xref: /freebsd/sys/kern/kern_descrip.c (revision 2ad872c5794e4c26fdf6ed219ad3f09ca0d5304a)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $Id: kern_descrip.c,v 1.57 1998/11/11 10:55:56 truckman Exp $
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_devfs.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/conf.h>
49 #include <sys/filedesc.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 #include <sys/vnode.h>
53 #include <sys/proc.h>
54 #include <sys/file.h>
55 #include <sys/socketvar.h>
56 #include <sys/stat.h>
57 #include <sys/filio.h>
58 #include <sys/ttycom.h>
59 #include <sys/fcntl.h>
60 #include <sys/malloc.h>
61 #include <sys/unistd.h>
62 #include <sys/resourcevar.h>
63 #include <sys/pipe.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_extern.h>
67 
68 #ifdef DEVFS
69 #include <sys/devfsext.h>
70 #endif /*DEVFS*/
71 
72 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
74 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
75 
76 
77 static	 d_open_t  fdopen;
78 #define NUMFDESC 64
79 
80 #define CDEV_MAJOR 22
81 static struct cdevsw fildesc_cdevsw =
82 	{ fdopen,	noclose,	noread,		nowrite,
83 	  noioc,	nostop,		nullreset,	nodevtotty,
84 	  seltrue,	nommap,		nostrat };
85 
86 static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
87 /*
88  * Descriptor management.
89  */
90 struct filelist filehead;	/* head of list of open files */
91 int nfiles;			/* actual number of open files */
92 extern int cmask;
93 
94 /*
95  * System calls on descriptors.
96  */
97 #ifndef _SYS_SYSPROTO_H_
98 struct getdtablesize_args {
99 	int	dummy;
100 };
101 #endif
102 /* ARGSUSED */
103 int
104 getdtablesize(p, uap)
105 	struct proc *p;
106 	struct getdtablesize_args *uap;
107 {
108 
109 	p->p_retval[0] =
110 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
111 	return (0);
112 }
113 
114 /*
115  * Duplicate a file descriptor to a particular value.
116  */
117 #ifndef _SYS_SYSPROTO_H_
118 struct dup2_args {
119 	u_int	from;
120 	u_int	to;
121 };
122 #endif
123 /* ARGSUSED */
124 int
125 dup2(p, uap)
126 	struct proc *p;
127 	struct dup2_args *uap;
128 {
129 	register struct filedesc *fdp = p->p_fd;
130 	register u_int old = uap->from, new = uap->to;
131 	int i, error;
132 
133 	if (old >= fdp->fd_nfiles ||
134 	    fdp->fd_ofiles[old] == NULL ||
135 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
136 	    new >= maxfilesperproc)
137 		return (EBADF);
138 	if (old == new) {
139 		p->p_retval[0] = new;
140 		return (0);
141 	}
142 	if (new >= fdp->fd_nfiles) {
143 		if ((error = fdalloc(p, new, &i)))
144 			return (error);
145 		if (new != i)
146 			panic("dup2: fdalloc");
147 	} else if (fdp->fd_ofiles[new]) {
148 		if (fdp->fd_ofileflags[new] & UF_MAPPED)
149 			(void) munmapfd(p, new);
150 		/*
151 		 * dup2() must succeed even if the close has an error.
152 		 */
153 		(void) closef(fdp->fd_ofiles[new], p);
154 	}
155 	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
156 }
157 
158 /*
159  * Duplicate a file descriptor.
160  */
161 #ifndef _SYS_SYSPROTO_H_
162 struct dup_args {
163 	u_int	fd;
164 };
165 #endif
166 /* ARGSUSED */
167 int
168 dup(p, uap)
169 	struct proc *p;
170 	struct dup_args *uap;
171 {
172 	register struct filedesc *fdp;
173 	u_int old;
174 	int new, error;
175 
176 	old = uap->fd;
177 
178 #if 0
179 	/*
180 	 * XXX Compatibility
181 	 */
182 	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); }
183 #endif
184 
185 	fdp = p->p_fd;
186 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
187 		return (EBADF);
188 	if ((error = fdalloc(p, 0, &new)))
189 		return (error);
190 	return (finishdup(fdp, (int)old, new, p->p_retval));
191 }
192 
193 /*
194  * The file control system call.
195  */
196 #ifndef _SYS_SYSPROTO_H_
197 struct fcntl_args {
198 	int	fd;
199 	int	cmd;
200 	long	arg;
201 };
202 #endif
203 /* ARGSUSED */
204 int
205 fcntl(p, uap)
206 	struct proc *p;
207 	register struct fcntl_args *uap;
208 {
209 	register struct filedesc *fdp = p->p_fd;
210 	register struct file *fp;
211 	register char *pop;
212 	struct vnode *vp;
213 	int i, tmp, error, flg = F_POSIX;
214 	struct flock fl;
215 	u_int newmin;
216 
217 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
218 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
219 		return (EBADF);
220 	pop = &fdp->fd_ofileflags[uap->fd];
221 	switch (uap->cmd) {
222 
223 	case F_DUPFD:
224 		newmin = uap->arg;
225 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
226 		    newmin >= maxfilesperproc)
227 			return (EINVAL);
228 		if ((error = fdalloc(p, newmin, &i)))
229 			return (error);
230 		return (finishdup(fdp, uap->fd, i, p->p_retval));
231 
232 	case F_GETFD:
233 		p->p_retval[0] = *pop & 1;
234 		return (0);
235 
236 	case F_SETFD:
237 		*pop = (*pop &~ 1) | (uap->arg & 1);
238 		return (0);
239 
240 	case F_GETFL:
241 		p->p_retval[0] = OFLAGS(fp->f_flag);
242 		return (0);
243 
244 	case F_SETFL:
245 		fp->f_flag &= ~FCNTLFLAGS;
246 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
247 		tmp = fp->f_flag & FNONBLOCK;
248 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
249 		if (error)
250 			return (error);
251 		tmp = fp->f_flag & FASYNC;
252 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
253 		if (!error)
254 			return (0);
255 		fp->f_flag &= ~FNONBLOCK;
256 		tmp = 0;
257 		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
258 		return (error);
259 
260 	case F_GETOWN:
261 		error = (*fp->f_ops->fo_ioctl)
262 			(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
263 		return (error);
264 
265 	case F_SETOWN:
266 		return ((*fp->f_ops->fo_ioctl)
267 			(fp, FIOSETOWN, (caddr_t)&uap->arg, p));
268 
269 	case F_SETLKW:
270 		flg |= F_WAIT;
271 		/* Fall into F_SETLK */
272 
273 	case F_SETLK:
274 		if (fp->f_type != DTYPE_VNODE)
275 			return (EBADF);
276 		vp = (struct vnode *)fp->f_data;
277 		/* Copy in the lock structure */
278 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
279 		    sizeof(fl));
280 		if (error)
281 			return (error);
282 		if (fl.l_whence == SEEK_CUR)
283 			fl.l_start += fp->f_offset;
284 		switch (fl.l_type) {
285 
286 		case F_RDLCK:
287 			if ((fp->f_flag & FREAD) == 0)
288 				return (EBADF);
289 			p->p_flag |= P_ADVLOCK;
290 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
291 
292 		case F_WRLCK:
293 			if ((fp->f_flag & FWRITE) == 0)
294 				return (EBADF);
295 			p->p_flag |= P_ADVLOCK;
296 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
297 
298 		case F_UNLCK:
299 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
300 				F_POSIX));
301 
302 		default:
303 			return (EINVAL);
304 		}
305 
306 	case F_GETLK:
307 		if (fp->f_type != DTYPE_VNODE)
308 			return (EBADF);
309 		vp = (struct vnode *)fp->f_data;
310 		/* Copy in the lock structure */
311 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
312 		    sizeof(fl));
313 		if (error)
314 			return (error);
315 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
316 		    fl.l_type != F_UNLCK)
317 			return (EINVAL);
318 		if (fl.l_whence == SEEK_CUR)
319 			fl.l_start += fp->f_offset;
320 		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
321 			return (error);
322 		return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
323 		    sizeof(fl)));
324 
325 	default:
326 		return (EINVAL);
327 	}
328 	/* NOTREACHED */
329 }
330 
331 /*
332  * Common code for dup, dup2, and fcntl(F_DUPFD).
333  */
334 static int
335 finishdup(fdp, old, new, retval)
336 	register struct filedesc *fdp;
337 	register int old, new;
338 	register_t *retval;
339 {
340 	register struct file *fp;
341 
342 	fp = fdp->fd_ofiles[old];
343 	fdp->fd_ofiles[new] = fp;
344 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
345 	fp->f_count++;
346 	if (new > fdp->fd_lastfile)
347 		fdp->fd_lastfile = new;
348 	*retval = new;
349 	return (0);
350 }
351 
352 /*
353  * If sigio is on the list associated with a process or process group,
354  * disable signalling from the device, remove sigio from the list and
355  * free sigio.
356  */
357 void
358 funsetown(sigio)
359 	struct sigio *sigio;
360 {
361 	int s;
362 
363 	if (sigio == NULL)
364 		return;
365 	s = splhigh();
366 	*(sigio->sio_myref) = NULL;
367 	splx(s);
368 	if (sigio->sio_pgid < 0) {
369 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
370 			     sigio, sio_pgsigio);
371 	} else /* if ((*sigiop)->sio_pgid > 0) */ {
372 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
373 			     sigio, sio_pgsigio);
374 	}
375 	crfree(sigio->sio_ucred);
376 	FREE(sigio, M_SIGIO);
377 }
378 
379 /* Free a list of sigio structures. */
380 void
381 funsetownlst(sigiolst)
382 	struct sigiolst *sigiolst;
383 {
384 	struct sigio *sigio;
385 
386 	while ((sigio = sigiolst->slh_first) != NULL)
387 		funsetown(sigio);
388 }
389 
390 /*
391  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
392  *
393  * After permission checking, add a sigio structure to the sigio list for
394  * the process or process group.
395  */
396 int
397 fsetown(pgid, sigiop)
398 	pid_t pgid;
399 	struct sigio **sigiop;
400 {
401 	struct proc *proc;
402 	struct pgrp *pgrp;
403 	struct sigio *sigio;
404 	int s;
405 
406 	if (pgid == 0) {
407 		funsetown(*sigiop);
408 		return (0);
409 	}
410 	if (pgid > 0) {
411 		proc = pfind(pgid);
412 		if (proc == NULL)
413 			return (ESRCH);
414 		/*
415 		 * Policy - Don't allow a process to FSETOWN a process
416 		 * in another session.
417 		 *
418 		 * Remove this test to allow maximum flexibility or
419 		 * restrict FSETOWN to the current process or process
420 		 * group for maximum safety.
421 		 */
422 		else if (proc->p_session != curproc->p_session)
423 			return (EPERM);
424 		pgrp = NULL;
425 	} else /* if (pgid < 0) */ {
426 		pgrp = pgfind(-pgid);
427 		if (pgrp == NULL)
428 			return (ESRCH);
429 		/*
430 		 * Policy - Don't allow a process to FSETOWN a process
431 		 * in another session.
432 		 *
433 		 * Remove this test to allow maximum flexibility or
434 		 * restrict FSETOWN to the current process or process
435 		 * group for maximum safety.
436 		 */
437 		else if (pgrp->pg_session != curproc->p_session)
438 			return (EPERM);
439 		proc = NULL;
440 	}
441 	funsetown(*sigiop);
442 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO,
443 	       M_WAITOK);
444 	if (pgid > 0) {
445 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
446 		sigio->sio_proc = proc;
447 	} else {
448 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
449 		sigio->sio_pgrp = pgrp;
450 	}
451 	sigio->sio_pgid = pgid;
452 	crhold(curproc->p_ucred);
453 	sigio->sio_ucred = curproc->p_ucred;
454 	/* It would be convenient if p_ruid was in ucred. */
455 	sigio->sio_ruid = curproc->p_cred->p_ruid;
456 	sigio->sio_myref = sigiop;
457 	s = splhigh();
458 	*sigiop = sigio;
459 	splx(s);
460 	return (0);
461 }
462 
463 /*
464  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
465  */
466 pid_t
467 fgetown(sigio)
468 	struct sigio *sigio;
469 {
470 	return (sigio != NULL ? sigio->sio_pgid : 0);
471 }
472 
473 /*
474  * Close a file descriptor.
475  */
476 #ifndef _SYS_SYSPROTO_H_
477 struct close_args {
478         int     fd;
479 };
480 #endif
481 /* ARGSUSED */
482 int
483 close(p, uap)
484 	struct proc *p;
485 	struct close_args *uap;
486 {
487 	register struct filedesc *fdp = p->p_fd;
488 	register struct file *fp;
489 	register int fd = uap->fd;
490 	register u_char *pf;
491 
492 	if ((unsigned)fd >= fdp->fd_nfiles ||
493 	    (fp = fdp->fd_ofiles[fd]) == NULL)
494 		return (EBADF);
495 	pf = (u_char *)&fdp->fd_ofileflags[fd];
496 	if (*pf & UF_MAPPED)
497 		(void) munmapfd(p, fd);
498 	fdp->fd_ofiles[fd] = NULL;
499 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
500 		fdp->fd_lastfile--;
501 	if (fd < fdp->fd_freefile)
502 		fdp->fd_freefile = fd;
503 	*pf = 0;
504 	return (closef(fp, p));
505 }
506 
507 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
508 /*
509  * Return status information about a file descriptor.
510  */
511 #ifndef _SYS_SYSPROTO_H_
512 struct ofstat_args {
513 	int	fd;
514 	struct	ostat *sb;
515 };
516 #endif
517 /* ARGSUSED */
518 int
519 ofstat(p, uap)
520 	struct proc *p;
521 	register struct ofstat_args *uap;
522 {
523 	register struct filedesc *fdp = p->p_fd;
524 	register struct file *fp;
525 	struct stat ub;
526 	struct ostat oub;
527 	int error;
528 
529 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
530 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
531 		return (EBADF);
532 	switch (fp->f_type) {
533 
534 	case DTYPE_FIFO:
535 	case DTYPE_VNODE:
536 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
537 		break;
538 
539 	case DTYPE_SOCKET:
540 		error = soo_stat((struct socket *)fp->f_data, &ub);
541 		break;
542 
543 	case DTYPE_PIPE:
544 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
545 		break;
546 
547 	default:
548 		panic("ofstat");
549 		/*NOTREACHED*/
550 	}
551 	cvtstat(&ub, &oub);
552 	if (error == 0)
553 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
554 	return (error);
555 }
556 #endif /* COMPAT_43 || COMPAT_SUNOS */
557 
558 /*
559  * Return status information about a file descriptor.
560  */
561 #ifndef _SYS_SYSPROTO_H_
562 struct fstat_args {
563 	int	fd;
564 	struct	stat *sb;
565 };
566 #endif
567 /* ARGSUSED */
568 int
569 fstat(p, uap)
570 	struct proc *p;
571 	register struct fstat_args *uap;
572 {
573 	register struct filedesc *fdp = p->p_fd;
574 	register struct file *fp;
575 	struct stat ub;
576 	int error;
577 
578 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
579 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
580 		return (EBADF);
581 	switch (fp->f_type) {
582 
583 	case DTYPE_FIFO:
584 	case DTYPE_VNODE:
585 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
586 		break;
587 
588 	case DTYPE_SOCKET:
589 		error = soo_stat((struct socket *)fp->f_data, &ub);
590 		break;
591 
592 	case DTYPE_PIPE:
593 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
594 		break;
595 
596 	default:
597 		panic("fstat");
598 		/*NOTREACHED*/
599 	}
600 	if (error == 0)
601 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
602 	return (error);
603 }
604 
605 /*
606  * Return status information about a file descriptor.
607  */
608 #ifndef _SYS_SYSPROTO_H_
609 struct nfstat_args {
610 	int	fd;
611 	struct	nstat *sb;
612 };
613 #endif
614 /* ARGSUSED */
615 int
616 nfstat(p, uap)
617 	struct proc *p;
618 	register struct nfstat_args *uap;
619 {
620 	register struct filedesc *fdp = p->p_fd;
621 	register struct file *fp;
622 	struct stat ub;
623 	struct nstat nub;
624 	int error;
625 
626 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
627 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
628 		return (EBADF);
629 	switch (fp->f_type) {
630 
631 	case DTYPE_FIFO:
632 	case DTYPE_VNODE:
633 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
634 		break;
635 
636 	case DTYPE_SOCKET:
637 		error = soo_stat((struct socket *)fp->f_data, &ub);
638 		break;
639 
640 	case DTYPE_PIPE:
641 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
642 		break;
643 
644 	default:
645 		panic("fstat");
646 		/*NOTREACHED*/
647 	}
648 	if (error == 0) {
649 		cvtnstat(&ub, &nub);
650 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
651 	}
652 	return (error);
653 }
654 
655 /*
656  * Return pathconf information about a file descriptor.
657  */
658 #ifndef _SYS_SYSPROTO_H_
659 struct fpathconf_args {
660 	int	fd;
661 	int	name;
662 };
663 #endif
664 /* ARGSUSED */
665 int
666 fpathconf(p, uap)
667 	struct proc *p;
668 	register struct fpathconf_args *uap;
669 {
670 	struct filedesc *fdp = p->p_fd;
671 	struct file *fp;
672 	struct vnode *vp;
673 
674 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
675 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
676 		return (EBADF);
677 	switch (fp->f_type) {
678 
679 	case DTYPE_PIPE:
680 	case DTYPE_SOCKET:
681 		if (uap->name != _PC_PIPE_BUF)
682 			return (EINVAL);
683 		p->p_retval[0] = PIPE_BUF;
684 		return (0);
685 
686 	case DTYPE_FIFO:
687 	case DTYPE_VNODE:
688 		vp = (struct vnode *)fp->f_data;
689 		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
690 
691 	default:
692 		panic("fpathconf");
693 	}
694 	/*NOTREACHED*/
695 }
696 
697 /*
698  * Allocate a file descriptor for the process.
699  */
700 static int fdexpand;
701 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
702 
703 int
704 fdalloc(p, want, result)
705 	struct proc *p;
706 	int want;
707 	int *result;
708 {
709 	register struct filedesc *fdp = p->p_fd;
710 	register int i;
711 	int lim, last, nfiles;
712 	struct file **newofile;
713 	char *newofileflags;
714 
715 	/*
716 	 * Search for a free descriptor starting at the higher
717 	 * of want or fd_freefile.  If that fails, consider
718 	 * expanding the ofile array.
719 	 */
720 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
721 	for (;;) {
722 		last = min(fdp->fd_nfiles, lim);
723 		if ((i = want) < fdp->fd_freefile)
724 			i = fdp->fd_freefile;
725 		for (; i < last; i++) {
726 			if (fdp->fd_ofiles[i] == NULL) {
727 				fdp->fd_ofileflags[i] = 0;
728 				if (i > fdp->fd_lastfile)
729 					fdp->fd_lastfile = i;
730 				if (want <= fdp->fd_freefile)
731 					fdp->fd_freefile = i;
732 				*result = i;
733 				return (0);
734 			}
735 		}
736 
737 		/*
738 		 * No space in current array.  Expand?
739 		 */
740 		if (fdp->fd_nfiles >= lim)
741 			return (EMFILE);
742 		if (fdp->fd_nfiles < NDEXTENT)
743 			nfiles = NDEXTENT;
744 		else
745 			nfiles = 2 * fdp->fd_nfiles;
746 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
747 		    M_FILEDESC, M_WAITOK);
748 		newofileflags = (char *) &newofile[nfiles];
749 		/*
750 		 * Copy the existing ofile and ofileflags arrays
751 		 * and zero the new portion of each array.
752 		 */
753 		bcopy(fdp->fd_ofiles, newofile,
754 			(i = sizeof(struct file *) * fdp->fd_nfiles));
755 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
756 		bcopy(fdp->fd_ofileflags, newofileflags,
757 			(i = sizeof(char) * fdp->fd_nfiles));
758 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
759 		if (fdp->fd_nfiles > NDFILE)
760 			FREE(fdp->fd_ofiles, M_FILEDESC);
761 		fdp->fd_ofiles = newofile;
762 		fdp->fd_ofileflags = newofileflags;
763 		fdp->fd_nfiles = nfiles;
764 		fdexpand++;
765 	}
766 	return (0);
767 }
768 
769 /*
770  * Check to see whether n user file descriptors
771  * are available to the process p.
772  */
773 int
774 fdavail(p, n)
775 	struct proc *p;
776 	register int n;
777 {
778 	register struct filedesc *fdp = p->p_fd;
779 	register struct file **fpp;
780 	register int i, lim, last;
781 
782 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
783 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
784 		return (1);
785 
786 	last = min(fdp->fd_nfiles, lim);
787 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
788 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
789 		if (*fpp == NULL && --n <= 0)
790 			return (1);
791 	return (0);
792 }
793 
794 /*
795  * Create a new open file structure and allocate
796  * a file decriptor for the process that refers to it.
797  */
798 int
799 falloc(p, resultfp, resultfd)
800 	register struct proc *p;
801 	struct file **resultfp;
802 	int *resultfd;
803 {
804 	register struct file *fp, *fq;
805 	int error, i;
806 
807 	if ((error = fdalloc(p, 0, &i)))
808 		return (error);
809 	if (nfiles >= maxfiles) {
810 		tablefull("file");
811 		return (ENFILE);
812 	}
813 	/*
814 	 * Allocate a new file descriptor.
815 	 * If the process has file descriptor zero open, add to the list
816 	 * of open files at that point, otherwise put it at the front of
817 	 * the list of open files.
818 	 */
819 	nfiles++;
820 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
821 	bzero(fp, sizeof(struct file));
822 	if ((fq = p->p_fd->fd_ofiles[0])) {
823 		LIST_INSERT_AFTER(fq, fp, f_list);
824 	} else {
825 		LIST_INSERT_HEAD(&filehead, fp, f_list);
826 	}
827 	p->p_fd->fd_ofiles[i] = fp;
828 	fp->f_count = 1;
829 	fp->f_cred = p->p_ucred;
830 	fp->f_seqcount = 1;
831 	crhold(fp->f_cred);
832 	if (resultfp)
833 		*resultfp = fp;
834 	if (resultfd)
835 		*resultfd = i;
836 	return (0);
837 }
838 
839 /*
840  * Free a file descriptor.
841  */
842 void
843 ffree(fp)
844 	register struct file *fp;
845 {
846 	LIST_REMOVE(fp, f_list);
847 	crfree(fp->f_cred);
848 #if defined(DIAGNOSTIC) || defined(INVARIANTS)
849 	fp->f_count = 0;
850 #endif
851 	nfiles--;
852 	FREE(fp, M_FILE);
853 }
854 
855 /*
856  * Build a new filedesc structure.
857  */
858 struct filedesc *
859 fdinit(p)
860 	struct proc *p;
861 {
862 	register struct filedesc0 *newfdp;
863 	register struct filedesc *fdp = p->p_fd;
864 
865 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
866 	    M_FILEDESC, M_WAITOK);
867 	bzero(newfdp, sizeof(struct filedesc0));
868 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
869 	VREF(newfdp->fd_fd.fd_cdir);
870 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
871 	VREF(newfdp->fd_fd.fd_rdir);
872 
873 	/* Create the file descriptor table. */
874 	newfdp->fd_fd.fd_refcnt = 1;
875 	newfdp->fd_fd.fd_cmask = cmask;
876 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
877 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
878 	newfdp->fd_fd.fd_nfiles = NDFILE;
879 
880 	newfdp->fd_fd.fd_freefile = 0;
881 	newfdp->fd_fd.fd_lastfile = 0;
882 
883 	return (&newfdp->fd_fd);
884 }
885 
886 /*
887  * Share a filedesc structure.
888  */
889 struct filedesc *
890 fdshare(p)
891 	struct proc *p;
892 {
893 	p->p_fd->fd_refcnt++;
894 	return (p->p_fd);
895 }
896 
897 /*
898  * Copy a filedesc structure.
899  */
900 struct filedesc *
901 fdcopy(p)
902 	struct proc *p;
903 {
904 	register struct filedesc *newfdp, *fdp = p->p_fd;
905 	register struct file **fpp;
906 	register int i;
907 
908 /*
909  * Certain daemons might not have file descriptors
910  */
911 	if (fdp == NULL)
912 		return NULL;
913 
914 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
915 	    M_FILEDESC, M_WAITOK);
916 	bcopy(fdp, newfdp, sizeof(struct filedesc));
917 	VREF(newfdp->fd_cdir);
918 	VREF(newfdp->fd_rdir);
919 	newfdp->fd_refcnt = 1;
920 
921 	/*
922 	 * If the number of open files fits in the internal arrays
923 	 * of the open file structure, use them, otherwise allocate
924 	 * additional memory for the number of descriptors currently
925 	 * in use.
926 	 */
927 	if (newfdp->fd_lastfile < NDFILE) {
928 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
929 		newfdp->fd_ofileflags =
930 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
931 		i = NDFILE;
932 	} else {
933 		/*
934 		 * Compute the smallest multiple of NDEXTENT needed
935 		 * for the file descriptors currently in use,
936 		 * allowing the table to shrink.
937 		 */
938 		i = newfdp->fd_nfiles;
939 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
940 			i /= 2;
941 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
942 		    M_FILEDESC, M_WAITOK);
943 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
944 	}
945 	newfdp->fd_nfiles = i;
946 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
947 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
948 	fpp = newfdp->fd_ofiles;
949 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
950 		if (*fpp != NULL)
951 			(*fpp)->f_count++;
952 	return (newfdp);
953 }
954 
955 /*
956  * Release a filedesc structure.
957  */
958 void
959 fdfree(p)
960 	struct proc *p;
961 {
962 	register struct filedesc *fdp = p->p_fd;
963 	struct file **fpp;
964 	register int i;
965 
966 /*
967  * Certain daemons might not have file descriptors
968  */
969 	if (fdp == NULL)
970 		return;
971 
972 	if (--fdp->fd_refcnt > 0)
973 		return;
974 	fpp = fdp->fd_ofiles;
975 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
976 		if (*fpp)
977 			(void) closef(*fpp, p);
978 	if (fdp->fd_nfiles > NDFILE)
979 		FREE(fdp->fd_ofiles, M_FILEDESC);
980 	vrele(fdp->fd_cdir);
981 	vrele(fdp->fd_rdir);
982 	FREE(fdp, M_FILEDESC);
983 }
984 
985 /*
986  * Close any files on exec?
987  */
988 void
989 fdcloseexec(p)
990 	struct proc *p;
991 {
992 	struct filedesc *fdp = p->p_fd;
993 	struct file **fpp;
994 	char *fdfp;
995 	register int i;
996 
997 /*
998  * Certain daemons might not have file descriptors
999  */
1000 	if (fdp == NULL)
1001 		return;
1002 
1003 	fpp = fdp->fd_ofiles;
1004 	fdfp = fdp->fd_ofileflags;
1005 	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
1006 		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
1007 			if (*fdfp & UF_MAPPED)
1008 				(void) munmapfd(p, i);
1009 			(void) closef(*fpp, p);
1010 			*fpp = NULL;
1011 			*fdfp = 0;
1012 			if (i < fdp->fd_freefile)
1013 				fdp->fd_freefile = i;
1014 		}
1015 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1016 		fdp->fd_lastfile--;
1017 }
1018 
1019 /*
1020  * Internal form of close.
1021  * Decrement reference count on file structure.
1022  * Note: p may be NULL when closing a file
1023  * that was being passed in a message.
1024  */
1025 int
1026 closef(fp, p)
1027 	register struct file *fp;
1028 	register struct proc *p;
1029 {
1030 	struct vnode *vp;
1031 	struct flock lf;
1032 	int error;
1033 
1034 	if (fp == NULL)
1035 		return (0);
1036 	/*
1037 	 * POSIX record locking dictates that any close releases ALL
1038 	 * locks owned by this process.  This is handled by setting
1039 	 * a flag in the unlock to free ONLY locks obeying POSIX
1040 	 * semantics, and not to free BSD-style file locks.
1041 	 * If the descriptor was in a message, POSIX-style locks
1042 	 * aren't passed with the descriptor.
1043 	 */
1044 	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1045 		lf.l_whence = SEEK_SET;
1046 		lf.l_start = 0;
1047 		lf.l_len = 0;
1048 		lf.l_type = F_UNLCK;
1049 		vp = (struct vnode *)fp->f_data;
1050 		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1051 	}
1052 	if (--fp->f_count > 0)
1053 		return (0);
1054 	if (fp->f_count < 0)
1055 		panic("closef: count < 0");
1056 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1057 		lf.l_whence = SEEK_SET;
1058 		lf.l_start = 0;
1059 		lf.l_len = 0;
1060 		lf.l_type = F_UNLCK;
1061 		vp = (struct vnode *)fp->f_data;
1062 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1063 	}
1064 	if (fp->f_ops)
1065 		error = (*fp->f_ops->fo_close)(fp, p);
1066 	else
1067 		error = 0;
1068 	ffree(fp);
1069 	return (error);
1070 }
1071 
1072 /*
1073  * Apply an advisory lock on a file descriptor.
1074  *
1075  * Just attempt to get a record lock of the requested type on
1076  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1077  */
1078 #ifndef _SYS_SYSPROTO_H_
1079 struct flock_args {
1080 	int	fd;
1081 	int	how;
1082 };
1083 #endif
1084 /* ARGSUSED */
1085 int
1086 flock(p, uap)
1087 	struct proc *p;
1088 	register struct flock_args *uap;
1089 {
1090 	register struct filedesc *fdp = p->p_fd;
1091 	register struct file *fp;
1092 	struct vnode *vp;
1093 	struct flock lf;
1094 
1095 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1096 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1097 		return (EBADF);
1098 	if (fp->f_type != DTYPE_VNODE)
1099 		return (EOPNOTSUPP);
1100 	vp = (struct vnode *)fp->f_data;
1101 	lf.l_whence = SEEK_SET;
1102 	lf.l_start = 0;
1103 	lf.l_len = 0;
1104 	if (uap->how & LOCK_UN) {
1105 		lf.l_type = F_UNLCK;
1106 		fp->f_flag &= ~FHASLOCK;
1107 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1108 	}
1109 	if (uap->how & LOCK_EX)
1110 		lf.l_type = F_WRLCK;
1111 	else if (uap->how & LOCK_SH)
1112 		lf.l_type = F_RDLCK;
1113 	else
1114 		return (EBADF);
1115 	fp->f_flag |= FHASLOCK;
1116 	if (uap->how & LOCK_NB)
1117 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1118 	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1119 }
1120 
1121 /*
1122  * File Descriptor pseudo-device driver (/dev/fd/).
1123  *
1124  * Opening minor device N dup()s the file (if any) connected to file
1125  * descriptor N belonging to the calling process.  Note that this driver
1126  * consists of only the ``open()'' routine, because all subsequent
1127  * references to this file will be direct to the other driver.
1128  */
1129 /* ARGSUSED */
1130 static int
1131 fdopen(dev, mode, type, p)
1132 	dev_t dev;
1133 	int mode, type;
1134 	struct proc *p;
1135 {
1136 
1137 	/*
1138 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1139 	 * the file descriptor being sought for duplication. The error
1140 	 * return ensures that the vnode for this device will be released
1141 	 * by vn_open. Open will detect this special error and take the
1142 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1143 	 * will simply report the error.
1144 	 */
1145 	p->p_dupfd = minor(dev);
1146 	return (ENODEV);
1147 }
1148 
1149 /*
1150  * Duplicate the specified descriptor to a free descriptor.
1151  */
1152 int
1153 dupfdopen(fdp, indx, dfd, mode, error)
1154 	register struct filedesc *fdp;
1155 	register int indx, dfd;
1156 	int mode;
1157 	int error;
1158 {
1159 	register struct file *wfp;
1160 	struct file *fp;
1161 
1162 	/*
1163 	 * If the to-be-dup'd fd number is greater than the allowed number
1164 	 * of file descriptors, or the fd to be dup'd has already been
1165 	 * closed, reject.  Note, check for new == old is necessary as
1166 	 * falloc could allocate an already closed to-be-dup'd descriptor
1167 	 * as the new descriptor.
1168 	 */
1169 	fp = fdp->fd_ofiles[indx];
1170 	if ((u_int)dfd >= fdp->fd_nfiles ||
1171 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1172 		return (EBADF);
1173 
1174 	/*
1175 	 * There are two cases of interest here.
1176 	 *
1177 	 * For ENODEV simply dup (dfd) to file descriptor
1178 	 * (indx) and return.
1179 	 *
1180 	 * For ENXIO steal away the file structure from (dfd) and
1181 	 * store it in (indx).  (dfd) is effectively closed by
1182 	 * this operation.
1183 	 *
1184 	 * Any other error code is just returned.
1185 	 */
1186 	switch (error) {
1187 	case ENODEV:
1188 		/*
1189 		 * Check that the mode the file is being opened for is a
1190 		 * subset of the mode of the existing descriptor.
1191 		 */
1192 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1193 			return (EACCES);
1194 		fdp->fd_ofiles[indx] = wfp;
1195 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1196 		wfp->f_count++;
1197 		if (indx > fdp->fd_lastfile)
1198 			fdp->fd_lastfile = indx;
1199 		return (0);
1200 
1201 	case ENXIO:
1202 		/*
1203 		 * Steal away the file pointer from dfd, and stuff it into indx.
1204 		 */
1205 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1206 		fdp->fd_ofiles[dfd] = NULL;
1207 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1208 		fdp->fd_ofileflags[dfd] = 0;
1209 		/*
1210 		 * Complete the clean up of the filedesc structure by
1211 		 * recomputing the various hints.
1212 		 */
1213 		if (indx > fdp->fd_lastfile)
1214 			fdp->fd_lastfile = indx;
1215 		else
1216 			while (fdp->fd_lastfile > 0 &&
1217 			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1218 				fdp->fd_lastfile--;
1219 			if (dfd < fdp->fd_freefile)
1220 				fdp->fd_freefile = dfd;
1221 		return (0);
1222 
1223 	default:
1224 		return (error);
1225 	}
1226 	/* NOTREACHED */
1227 }
1228 
1229 /*
1230  * Get file structures.
1231  */
1232 static int
1233 sysctl_kern_file SYSCTL_HANDLER_ARGS
1234 {
1235 	int error;
1236 	struct file *fp;
1237 
1238 	if (!req->oldptr) {
1239 		/*
1240 		 * overestimate by 10 files
1241 		 */
1242 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1243 				(nfiles + 10) * sizeof(struct file)));
1244 	}
1245 
1246 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1247 	if (error)
1248 		return (error);
1249 
1250 	/*
1251 	 * followed by an array of file structures
1252 	 */
1253 	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1254 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1255 		if (error)
1256 			return (error);
1257 	}
1258 	return (0);
1259 }
1260 
1261 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1262 	0, 0, sysctl_kern_file, "S,file", "");
1263 
1264 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1265 	CTLFLAG_RW, &maxfilesperproc, 0, "");
1266 
1267 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1268 
1269 static fildesc_devsw_installed = 0;
1270 #ifdef DEVFS
1271 static	void *devfs_token_stdin;
1272 static	void *devfs_token_stdout;
1273 static	void *devfs_token_stderr;
1274 static	void *devfs_token_fildesc[NUMFDESC];
1275 #endif
1276 
1277 static void 	fildesc_drvinit(void *unused)
1278 {
1279 	dev_t dev;
1280 #ifdef DEVFS
1281 	int fd;
1282 #endif
1283 
1284 	if( ! fildesc_devsw_installed ) {
1285 		dev = makedev(CDEV_MAJOR,0);
1286 		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1287 		fildesc_devsw_installed = 1;
1288 #ifdef DEVFS
1289 		for (fd = 0; fd < NUMFDESC; fd++)
1290 			devfs_token_fildesc[fd] =
1291 				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1292 						 UID_BIN, GID_BIN, 0666,
1293 						 "fd/%d", fd);
1294 		devfs_token_stdin =
1295 			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1296 					 UID_ROOT, GID_WHEEL, 0666,
1297 					 "stdin");
1298 		devfs_token_stdout =
1299 			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1300 					 UID_ROOT, GID_WHEEL, 0666,
1301 					 "stdout");
1302 		devfs_token_stderr =
1303 			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1304 					 UID_ROOT, GID_WHEEL, 0666,
1305 					 "stderr");
1306 #endif
1307     	}
1308 }
1309 
1310 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1311 					fildesc_drvinit,NULL)
1312 
1313 
1314