xref: /freebsd/sys/kern/kern_descrip.c (revision 0fddbf874719b9bd50cf66ac26d1140bb3f2be69)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/conf.h>
50 #include <sys/filedesc.h>
51 #include <sys/kernel.h>
52 #include <sys/sysctl.h>
53 #include <sys/vnode.h>
54 #include <sys/proc.h>
55 #include <sys/file.h>
56 #include <sys/stat.h>
57 #include <sys/filio.h>
58 #include <sys/fcntl.h>
59 #include <sys/malloc.h>
60 #include <sys/unistd.h>
61 #include <sys/resourcevar.h>
62 #include <sys/event.h>
63 
64 #include <machine/limits.h>
65 
66 #include <vm/vm.h>
67 #include <vm/vm_extern.h>
68 
69 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
70 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
71 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
72 
73 static	 d_open_t  fdopen;
74 #define NUMFDESC 64
75 
76 #define CDEV_MAJOR 22
77 static struct cdevsw fildesc_cdevsw = {
78 	/* open */	fdopen,
79 	/* close */	noclose,
80 	/* read */	noread,
81 	/* write */	nowrite,
82 	/* ioctl */	noioctl,
83 	/* poll */	nopoll,
84 	/* mmap */	nommap,
85 	/* strategy */	nostrategy,
86 	/* name */	"FD",
87 	/* maj */	CDEV_MAJOR,
88 	/* dump */	nodump,
89 	/* psize */	nopsize,
90 	/* flags */	0,
91 };
92 
93 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct proc *p));
94 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
95     struct ucred *cred, int flags, struct proc *p));
96 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
97     struct proc *p));
98 static int badfo_poll __P((struct file *fp, int events,
99     struct ucred *cred, struct proc *p));
100 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
101 static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p));
102 static int badfo_close __P((struct file *fp, struct proc *p));
103 
104 /*
105  * Descriptor management.
106  */
107 struct filelist filehead;	/* head of list of open files */
108 int nfiles;			/* actual number of open files */
109 extern int cmask;
110 
111 /*
112  * System calls on descriptors.
113  */
114 #ifndef _SYS_SYSPROTO_H_
115 struct getdtablesize_args {
116 	int	dummy;
117 };
118 #endif
119 /*
120  * MPSAFE
121  */
122 /* ARGSUSED */
123 int
124 getdtablesize(p, uap)
125 	struct proc *p;
126 	struct getdtablesize_args *uap;
127 {
128 
129 	mtx_lock(&Giant);
130 	p->p_retval[0] =
131 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
132 	mtx_unlock(&Giant);
133 	return (0);
134 }
135 
136 /*
137  * Duplicate a file descriptor to a particular value.
138  *
139  * note: keep in mind that a potential race condition exists when closing
140  * descriptors from a shared descriptor table (via rfork).
141  */
142 #ifndef _SYS_SYSPROTO_H_
143 struct dup2_args {
144 	u_int	from;
145 	u_int	to;
146 };
147 #endif
148 /*
149  * MPSAFE
150  */
151 /* ARGSUSED */
152 int
153 dup2(p, uap)
154 	struct proc *p;
155 	struct dup2_args *uap;
156 {
157 	register struct filedesc *fdp = p->p_fd;
158 	register u_int old = uap->from, new = uap->to;
159 	int i, error;
160 
161 	mtx_lock(&Giant);
162 retry:
163 	if (old >= fdp->fd_nfiles ||
164 	    fdp->fd_ofiles[old] == NULL ||
165 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
166 	    new >= maxfilesperproc) {
167 		error = EBADF;
168 		goto done2;
169 	}
170 	if (old == new) {
171 		p->p_retval[0] = new;
172 		error = 0;
173 		goto done2;
174 	}
175 	if (new >= fdp->fd_nfiles) {
176 		if ((error = fdalloc(p, new, &i)))
177 			goto done2;
178 		if (new != i)
179 			panic("dup2: fdalloc");
180 		/*
181 		 * fdalloc() may block, retest everything.
182 		 */
183 		goto retry;
184 	}
185 	error = do_dup(fdp, (int)old, (int)new, p->p_retval, p);
186 done2:
187 	mtx_unlock(&Giant);
188 	return(error);
189 }
190 
191 /*
192  * Duplicate a file descriptor.
193  */
194 #ifndef _SYS_SYSPROTO_H_
195 struct dup_args {
196 	u_int	fd;
197 };
198 #endif
199 /*
200  * MPSAFE
201  */
202 /* ARGSUSED */
203 int
204 dup(p, uap)
205 	struct proc *p;
206 	struct dup_args *uap;
207 {
208 	register struct filedesc *fdp;
209 	u_int old;
210 	int new, error;
211 
212 	mtx_lock(&Giant);
213 	old = uap->fd;
214 	fdp = p->p_fd;
215 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
216 		error = EBADF;
217 		goto done2;
218 	}
219 	if ((error = fdalloc(p, 0, &new)))
220 		goto done2;
221 	error = do_dup(fdp, (int)old, new, p->p_retval, p);
222 done2:
223 	mtx_unlock(&Giant);
224 	return (error);
225 }
226 
227 /*
228  * The file control system call.
229  */
230 #ifndef _SYS_SYSPROTO_H_
231 struct fcntl_args {
232 	int	fd;
233 	int	cmd;
234 	long	arg;
235 };
236 #endif
237 /*
238  * MPSAFE
239  */
240 /* ARGSUSED */
241 int
242 fcntl(p, uap)
243 	struct proc *p;
244 	register struct fcntl_args *uap;
245 {
246 	register struct filedesc *fdp;
247 	register struct file *fp;
248 	register char *pop;
249 	struct vnode *vp;
250 	int i, tmp, error = 0, flg = F_POSIX;
251 	struct flock fl;
252 	u_int newmin;
253 
254 	mtx_lock(&Giant);
255 
256 	fdp = p->p_fd;
257 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
258 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
259 		error = EBADF;
260 		goto done2;
261 	}
262 	pop = &fdp->fd_ofileflags[uap->fd];
263 
264 	switch (uap->cmd) {
265 	case F_DUPFD:
266 		newmin = uap->arg;
267 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
268 		    newmin >= maxfilesperproc) {
269 			error = EINVAL;
270 			break;
271 		}
272 		if ((error = fdalloc(p, newmin, &i)))
273 			break;
274 		error = do_dup(fdp, uap->fd, i, p->p_retval, p);
275 		break;
276 
277 	case F_GETFD:
278 		p->p_retval[0] = *pop & 1;
279 		break;
280 
281 	case F_SETFD:
282 		*pop = (*pop &~ 1) | (uap->arg & 1);
283 		break;
284 
285 	case F_GETFL:
286 		p->p_retval[0] = OFLAGS(fp->f_flag);
287 		break;
288 
289 	case F_SETFL:
290 		fhold(fp);
291 		fp->f_flag &= ~FCNTLFLAGS;
292 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
293 		tmp = fp->f_flag & FNONBLOCK;
294 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
295 		if (error) {
296 			fdrop(fp, p);
297 			break;
298 		}
299 		tmp = fp->f_flag & FASYNC;
300 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
301 		if (!error) {
302 			fdrop(fp, p);
303 			break;
304 		}
305 		fp->f_flag &= ~FNONBLOCK;
306 		tmp = 0;
307 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
308 		fdrop(fp, p);
309 		break;
310 
311 	case F_GETOWN:
312 		fhold(fp);
313 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
314 		fdrop(fp, p);
315 		break;
316 
317 	case F_SETOWN:
318 		fhold(fp);
319 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p);
320 		fdrop(fp, p);
321 		break;
322 
323 	case F_SETLKW:
324 		flg |= F_WAIT;
325 		/* Fall into F_SETLK */
326 
327 	case F_SETLK:
328 		if (fp->f_type != DTYPE_VNODE) {
329 			error = EBADF;
330 			break;
331 		}
332 		vp = (struct vnode *)fp->f_data;
333 
334 		/*
335 		 * copyin/lockop may block
336 		 */
337 		fhold(fp);
338 		/* Copy in the lock structure */
339 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
340 		    sizeof(fl));
341 		if (error) {
342 			fdrop(fp, p);
343 			break;
344 		}
345 		if (fl.l_whence == SEEK_CUR) {
346 			if (fp->f_offset < 0 ||
347 			    (fl.l_start > 0 &&
348 			     fp->f_offset > OFF_MAX - fl.l_start)) {
349 				fdrop(fp, p);
350 				error = EOVERFLOW;
351 				break;
352 			}
353 			fl.l_start += fp->f_offset;
354 		}
355 
356 		switch (fl.l_type) {
357 		case F_RDLCK:
358 			if ((fp->f_flag & FREAD) == 0) {
359 				error = EBADF;
360 				break;
361 			}
362 			p->p_flag |= P_ADVLOCK;
363 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
364 			    &fl, flg);
365 			break;
366 		case F_WRLCK:
367 			if ((fp->f_flag & FWRITE) == 0) {
368 				error = EBADF;
369 				break;
370 			}
371 			p->p_flag |= P_ADVLOCK;
372 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
373 			    &fl, flg);
374 			break;
375 		case F_UNLCK:
376 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
377 				&fl, F_POSIX);
378 			break;
379 		default:
380 			error = EINVAL;
381 			break;
382 		}
383 		fdrop(fp, p);
384 		break;
385 	case F_GETLK:
386 		if (fp->f_type != DTYPE_VNODE) {
387 			error = EBADF;
388 			break;
389 		}
390 		vp = (struct vnode *)fp->f_data;
391 		/*
392 		 * copyin/lockop may block
393 		 */
394 		fhold(fp);
395 		/* Copy in the lock structure */
396 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
397 		    sizeof(fl));
398 		if (error) {
399 			fdrop(fp, p);
400 			break;
401 		}
402 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
403 		    fl.l_type != F_UNLCK) {
404 			fdrop(fp, p);
405 			error = EINVAL;
406 			break;
407 		}
408 		if (fl.l_whence == SEEK_CUR) {
409 			if ((fl.l_start > 0 &&
410 			     fp->f_offset > OFF_MAX - fl.l_start) ||
411 			    (fl.l_start < 0 &&
412 			     fp->f_offset < OFF_MIN - fl.l_start)) {
413 				fdrop(fp, p);
414 				error = EOVERFLOW;
415 				break;
416 			}
417 			fl.l_start += fp->f_offset;
418 		}
419 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
420 			    &fl, F_POSIX);
421 		fdrop(fp, p);
422 		if (error == 0) {
423 			error = copyout((caddr_t)&fl,
424 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
425 		}
426 		break;
427 	default:
428 		error = EINVAL;
429 		break;
430 	}
431 done2:
432 	mtx_unlock(&Giant);
433 	return (error);
434 }
435 
436 /*
437  * Common code for dup, dup2, and fcntl(F_DUPFD).
438  */
439 static int
440 do_dup(fdp, old, new, retval, p)
441 	register struct filedesc *fdp;
442 	register int old, new;
443 	register_t *retval;
444 	struct proc *p;
445 {
446 	struct file *fp;
447 	struct file *delfp;
448 
449 	/*
450 	 * Save info on the descriptor being overwritten.  We have
451 	 * to do the unmap now, but we cannot close it without
452 	 * introducing an ownership race for the slot.
453 	 */
454 	delfp = fdp->fd_ofiles[new];
455 #if 0
456 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
457 		(void) munmapfd(p, new);
458 #endif
459 
460 	/*
461 	 * Duplicate the source descriptor, update lastfile
462 	 */
463 	fp = fdp->fd_ofiles[old];
464 	fdp->fd_ofiles[new] = fp;
465 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
466 	fhold(fp);
467 	if (new > fdp->fd_lastfile)
468 		fdp->fd_lastfile = new;
469 	*retval = new;
470 
471 	/*
472 	 * If we dup'd over a valid file, we now own the reference to it
473 	 * and must dispose of it using closef() semantics (as if a
474 	 * close() were performed on it).
475 	 */
476 	if (delfp)
477 		(void) closef(delfp, p);
478 	return (0);
479 }
480 
481 /*
482  * If sigio is on the list associated with a process or process group,
483  * disable signalling from the device, remove sigio from the list and
484  * free sigio.
485  */
486 void
487 funsetown(sigio)
488 	struct sigio *sigio;
489 {
490 	int s;
491 
492 	if (sigio == NULL)
493 		return;
494 	s = splhigh();
495 	*(sigio->sio_myref) = NULL;
496 	splx(s);
497 	if (sigio->sio_pgid < 0) {
498 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
499 			     sigio, sio_pgsigio);
500 	} else /* if ((*sigiop)->sio_pgid > 0) */ {
501 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
502 			     sigio, sio_pgsigio);
503 	}
504 	crfree(sigio->sio_ucred);
505 	FREE(sigio, M_SIGIO);
506 }
507 
508 /* Free a list of sigio structures. */
509 void
510 funsetownlst(sigiolst)
511 	struct sigiolst *sigiolst;
512 {
513 	struct sigio *sigio;
514 
515 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
516 		funsetown(sigio);
517 }
518 
519 /*
520  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
521  *
522  * After permission checking, add a sigio structure to the sigio list for
523  * the process or process group.
524  */
525 int
526 fsetown(pgid, sigiop)
527 	pid_t pgid;
528 	struct sigio **sigiop;
529 {
530 	struct proc *proc;
531 	struct pgrp *pgrp;
532 	struct sigio *sigio;
533 	int s;
534 
535 	if (pgid == 0) {
536 		funsetown(*sigiop);
537 		return (0);
538 	}
539 	if (pgid > 0) {
540 		proc = pfind(pgid);
541 		if (proc == NULL)
542 			return (ESRCH);
543 
544 		/*
545 		 * Policy - Don't allow a process to FSETOWN a process
546 		 * in another session.
547 		 *
548 		 * Remove this test to allow maximum flexibility or
549 		 * restrict FSETOWN to the current process or process
550 		 * group for maximum safety.
551 		 */
552 		if (proc->p_session != curproc->p_session) {
553 			PROC_UNLOCK(proc);
554 			return (EPERM);
555 		}
556 		PROC_UNLOCK(proc);
557 
558 		pgrp = NULL;
559 	} else /* if (pgid < 0) */ {
560 		pgrp = pgfind(-pgid);
561 		if (pgrp == NULL)
562 			return (ESRCH);
563 
564 		/*
565 		 * Policy - Don't allow a process to FSETOWN a process
566 		 * in another session.
567 		 *
568 		 * Remove this test to allow maximum flexibility or
569 		 * restrict FSETOWN to the current process or process
570 		 * group for maximum safety.
571 		 */
572 		if (pgrp->pg_session != curproc->p_session)
573 			return (EPERM);
574 
575 		proc = NULL;
576 	}
577 	funsetown(*sigiop);
578 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
579 	if (pgid > 0) {
580 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
581 		sigio->sio_proc = proc;
582 	} else {
583 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
584 		sigio->sio_pgrp = pgrp;
585 	}
586 	sigio->sio_pgid = pgid;
587 	crhold(curproc->p_ucred);
588 	sigio->sio_ucred = curproc->p_ucred;
589 	sigio->sio_myref = sigiop;
590 	s = splhigh();
591 	*sigiop = sigio;
592 	splx(s);
593 	return (0);
594 }
595 
596 /*
597  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
598  */
599 pid_t
600 fgetown(sigio)
601 	struct sigio *sigio;
602 {
603 	return (sigio != NULL ? sigio->sio_pgid : 0);
604 }
605 
606 /*
607  * Close a file descriptor.
608  */
609 #ifndef _SYS_SYSPROTO_H_
610 struct close_args {
611         int     fd;
612 };
613 #endif
614 /*
615  * MPSAFE
616  */
617 /* ARGSUSED */
618 int
619 close(p, uap)
620 	struct proc *p;
621 	struct close_args *uap;
622 {
623 	register struct filedesc *fdp;
624 	register struct file *fp;
625 	register int fd = uap->fd;
626 	int error = 0;
627 
628 	mtx_lock(&Giant);
629 	fdp = p->p_fd;
630 	if ((unsigned)fd >= fdp->fd_nfiles ||
631 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
632 		error = EBADF;
633 		goto done2;
634 	}
635 #if 0
636 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
637 		(void) munmapfd(p, fd);
638 #endif
639 	fdp->fd_ofiles[fd] = NULL;
640 	fdp->fd_ofileflags[fd] = 0;
641 
642 	/*
643 	 * we now hold the fp reference that used to be owned by the descriptor
644 	 * array.
645 	 */
646 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
647 		fdp->fd_lastfile--;
648 	if (fd < fdp->fd_freefile)
649 		fdp->fd_freefile = fd;
650 	if (fd < fdp->fd_knlistsize)
651 		knote_fdclose(p, fd);
652 	error = closef(fp, p);
653 done2:
654 	mtx_unlock(&Giant);
655 	return(error);
656 }
657 
658 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
659 /*
660  * Return status information about a file descriptor.
661  */
662 #ifndef _SYS_SYSPROTO_H_
663 struct ofstat_args {
664 	int	fd;
665 	struct	ostat *sb;
666 };
667 #endif
668 /*
669  * MPSAFE
670  */
671 /* ARGSUSED */
672 int
673 ofstat(p, uap)
674 	struct proc *p;
675 	register struct ofstat_args *uap;
676 {
677 	register struct filedesc *fdp = p->p_fd;
678 	register struct file *fp;
679 	struct stat ub;
680 	struct ostat oub;
681 	int error;
682 
683 	mtx_lock(&Giant);
684 
685 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
686 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
687 		error = EBADF;
688 		goto done2;
689 	}
690 	fhold(fp);
691 	error = fo_stat(fp, &ub, p);
692 	if (error == 0) {
693 		cvtstat(&ub, &oub);
694 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
695 	}
696 	fdrop(fp, p);
697 done2:
698 	mtx_unlock(&Giant);
699 	return (error);
700 }
701 #endif /* COMPAT_43 || COMPAT_SUNOS */
702 
703 /*
704  * Return status information about a file descriptor.
705  */
706 #ifndef _SYS_SYSPROTO_H_
707 struct fstat_args {
708 	int	fd;
709 	struct	stat *sb;
710 };
711 #endif
712 /*
713  * MPSAFE
714  */
715 /* ARGSUSED */
716 int
717 fstat(p, uap)
718 	struct proc *p;
719 	register struct fstat_args *uap;
720 {
721 	register struct filedesc *fdp;
722 	register struct file *fp;
723 	struct stat ub;
724 	int error;
725 
726 	mtx_lock(&Giant);
727 	fdp = p->p_fd;
728 
729 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
730 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
731 		error = EBADF;
732 		goto done2;
733 	}
734 	fhold(fp);
735 	error = fo_stat(fp, &ub, p);
736 	if (error == 0)
737 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
738 	fdrop(fp, p);
739 done2:
740 	mtx_unlock(&Giant);
741 	return (error);
742 }
743 
744 /*
745  * Return status information about a file descriptor.
746  */
747 #ifndef _SYS_SYSPROTO_H_
748 struct nfstat_args {
749 	int	fd;
750 	struct	nstat *sb;
751 };
752 #endif
753 /*
754  * MPSAFE
755  */
756 /* ARGSUSED */
757 int
758 nfstat(p, uap)
759 	struct proc *p;
760 	register struct nfstat_args *uap;
761 {
762 	register struct filedesc *fdp;
763 	register struct file *fp;
764 	struct stat ub;
765 	struct nstat nub;
766 	int error;
767 
768 	mtx_lock(&Giant);
769 
770 	fdp = p->p_fd;
771 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
772 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
773 		error = EBADF;
774 		goto done2;
775 	}
776 	fhold(fp);
777 	error = fo_stat(fp, &ub, p);
778 	if (error == 0) {
779 		cvtnstat(&ub, &nub);
780 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
781 	}
782 	fdrop(fp, p);
783 done2:
784 	mtx_unlock(&Giant);
785 	return (error);
786 }
787 
788 /*
789  * Return pathconf information about a file descriptor.
790  */
791 #ifndef _SYS_SYSPROTO_H_
792 struct fpathconf_args {
793 	int	fd;
794 	int	name;
795 };
796 #endif
797 /*
798  * MPSAFE
799  */
800 /* ARGSUSED */
801 int
802 fpathconf(p, uap)
803 	struct proc *p;
804 	register struct fpathconf_args *uap;
805 {
806 	struct filedesc *fdp;
807 	struct file *fp;
808 	struct vnode *vp;
809 	int error = 0;
810 
811 	mtx_lock(&Giant);
812 	fdp = p->p_fd;
813 
814 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
815 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
816 		error = EBADF;
817 		goto done2;
818 	}
819 
820 	fhold(fp);
821 
822 	switch (fp->f_type) {
823 	case DTYPE_PIPE:
824 	case DTYPE_SOCKET:
825 		if (uap->name != _PC_PIPE_BUF) {
826 			error = EINVAL;
827 			goto done2;
828 		}
829 		p->p_retval[0] = PIPE_BUF;
830 		error = 0;
831 		break;
832 	case DTYPE_FIFO:
833 	case DTYPE_VNODE:
834 		vp = (struct vnode *)fp->f_data;
835 		error = VOP_PATHCONF(vp, uap->name, p->p_retval);
836 		break;
837 	default:
838 		error = EOPNOTSUPP;
839 		break;
840 	}
841 	fdrop(fp, p);
842 done2:
843 	mtx_unlock(&Giant);
844 	return(error);
845 }
846 
847 /*
848  * Allocate a file descriptor for the process.
849  */
850 static int fdexpand;
851 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
852 
853 int
854 fdalloc(p, want, result)
855 	struct proc *p;
856 	int want;
857 	int *result;
858 {
859 	register struct filedesc *fdp = p->p_fd;
860 	register int i;
861 	int lim, last, nfiles;
862 	struct file **newofile;
863 	char *newofileflags;
864 
865 	/*
866 	 * Search for a free descriptor starting at the higher
867 	 * of want or fd_freefile.  If that fails, consider
868 	 * expanding the ofile array.
869 	 */
870 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
871 	for (;;) {
872 		last = min(fdp->fd_nfiles, lim);
873 		if ((i = want) < fdp->fd_freefile)
874 			i = fdp->fd_freefile;
875 		for (; i < last; i++) {
876 			if (fdp->fd_ofiles[i] == NULL) {
877 				fdp->fd_ofileflags[i] = 0;
878 				if (i > fdp->fd_lastfile)
879 					fdp->fd_lastfile = i;
880 				if (want <= fdp->fd_freefile)
881 					fdp->fd_freefile = i;
882 				*result = i;
883 				return (0);
884 			}
885 		}
886 
887 		/*
888 		 * No space in current array.  Expand?
889 		 */
890 		if (fdp->fd_nfiles >= lim)
891 			return (EMFILE);
892 		if (fdp->fd_nfiles < NDEXTENT)
893 			nfiles = NDEXTENT;
894 		else
895 			nfiles = 2 * fdp->fd_nfiles;
896 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
897 		    M_FILEDESC, M_WAITOK);
898 
899 		/*
900 		 * deal with file-table extend race that might have occured
901 		 * when malloc was blocked.
902 		 */
903 		if (fdp->fd_nfiles >= nfiles) {
904 			FREE(newofile, M_FILEDESC);
905 			continue;
906 		}
907 		newofileflags = (char *) &newofile[nfiles];
908 		/*
909 		 * Copy the existing ofile and ofileflags arrays
910 		 * and zero the new portion of each array.
911 		 */
912 		bcopy(fdp->fd_ofiles, newofile,
913 			(i = sizeof(struct file *) * fdp->fd_nfiles));
914 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
915 		bcopy(fdp->fd_ofileflags, newofileflags,
916 			(i = sizeof(char) * fdp->fd_nfiles));
917 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
918 		if (fdp->fd_nfiles > NDFILE)
919 			FREE(fdp->fd_ofiles, M_FILEDESC);
920 		fdp->fd_ofiles = newofile;
921 		fdp->fd_ofileflags = newofileflags;
922 		fdp->fd_nfiles = nfiles;
923 		fdexpand++;
924 	}
925 	return (0);
926 }
927 
928 /*
929  * Check to see whether n user file descriptors
930  * are available to the process p.
931  */
932 int
933 fdavail(p, n)
934 	struct proc *p;
935 	register int n;
936 {
937 	register struct filedesc *fdp = p->p_fd;
938 	register struct file **fpp;
939 	register int i, lim, last;
940 
941 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
942 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
943 		return (1);
944 
945 	last = min(fdp->fd_nfiles, lim);
946 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
947 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
948 		if (*fpp == NULL && --n <= 0)
949 			return (1);
950 	}
951 	return (0);
952 }
953 
954 /*
955  * Create a new open file structure and allocate
956  * a file decriptor for the process that refers to it.
957  */
958 int
959 falloc(p, resultfp, resultfd)
960 	register struct proc *p;
961 	struct file **resultfp;
962 	int *resultfd;
963 {
964 	register struct file *fp, *fq;
965 	int error, i;
966 
967 	if (nfiles >= maxfiles) {
968 		tablefull("file");
969 		return (ENFILE);
970 	}
971 	/*
972 	 * Allocate a new file descriptor.
973 	 * If the process has file descriptor zero open, add to the list
974 	 * of open files at that point, otherwise put it at the front of
975 	 * the list of open files.
976 	 */
977 	nfiles++;
978 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO);
979 
980 	/*
981 	 * wait until after malloc (which may have blocked) returns before
982 	 * allocating the slot, else a race might have shrunk it if we had
983 	 * allocated it before the malloc.
984 	 */
985 	if ((error = fdalloc(p, 0, &i))) {
986 		nfiles--;
987 		FREE(fp, M_FILE);
988 		return (error);
989 	}
990 	fp->f_count = 1;
991 	fp->f_cred = p->p_ucred;
992 	fp->f_ops = &badfileops;
993 	fp->f_seqcount = 1;
994 	crhold(fp->f_cred);
995 	if ((fq = p->p_fd->fd_ofiles[0])) {
996 		LIST_INSERT_AFTER(fq, fp, f_list);
997 	} else {
998 		LIST_INSERT_HEAD(&filehead, fp, f_list);
999 	}
1000 	p->p_fd->fd_ofiles[i] = fp;
1001 	if (resultfp)
1002 		*resultfp = fp;
1003 	if (resultfd)
1004 		*resultfd = i;
1005 	return (0);
1006 }
1007 
1008 /*
1009  * Free a file descriptor.
1010  */
1011 void
1012 ffree(fp)
1013 	register struct file *fp;
1014 {
1015 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1016 	LIST_REMOVE(fp, f_list);
1017 	crfree(fp->f_cred);
1018 	nfiles--;
1019 	FREE(fp, M_FILE);
1020 }
1021 
1022 /*
1023  * Build a new filedesc structure.
1024  */
1025 struct filedesc *
1026 fdinit(p)
1027 	struct proc *p;
1028 {
1029 	register struct filedesc0 *newfdp;
1030 	register struct filedesc *fdp = p->p_fd;
1031 
1032 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1033 	    M_FILEDESC, M_WAITOK | M_ZERO);
1034 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1035 	if (newfdp->fd_fd.fd_cdir)
1036 		VREF(newfdp->fd_fd.fd_cdir);
1037 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1038 	if (newfdp->fd_fd.fd_rdir)
1039 		VREF(newfdp->fd_fd.fd_rdir);
1040 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1041 	if (newfdp->fd_fd.fd_jdir)
1042 		VREF(newfdp->fd_fd.fd_jdir);
1043 
1044 	/* Create the file descriptor table. */
1045 	newfdp->fd_fd.fd_refcnt = 1;
1046 	newfdp->fd_fd.fd_cmask = cmask;
1047 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1048 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1049 	newfdp->fd_fd.fd_nfiles = NDFILE;
1050 	newfdp->fd_fd.fd_knlistsize = -1;
1051 
1052 	return (&newfdp->fd_fd);
1053 }
1054 
1055 /*
1056  * Share a filedesc structure.
1057  */
1058 struct filedesc *
1059 fdshare(p)
1060 	struct proc *p;
1061 {
1062 	p->p_fd->fd_refcnt++;
1063 	return (p->p_fd);
1064 }
1065 
1066 /*
1067  * Copy a filedesc structure.
1068  */
1069 struct filedesc *
1070 fdcopy(p)
1071 	struct proc *p;
1072 {
1073 	register struct filedesc *newfdp, *fdp = p->p_fd;
1074 	register struct file **fpp;
1075 	register int i;
1076 
1077 	/* Certain daemons might not have file descriptors. */
1078 	if (fdp == NULL)
1079 		return (NULL);
1080 
1081 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1082 	    M_FILEDESC, M_WAITOK);
1083 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1084 	if (newfdp->fd_cdir)
1085 		VREF(newfdp->fd_cdir);
1086 	if (newfdp->fd_rdir)
1087 		VREF(newfdp->fd_rdir);
1088 	if (newfdp->fd_jdir)
1089 		VREF(newfdp->fd_jdir);
1090 	newfdp->fd_refcnt = 1;
1091 
1092 	/*
1093 	 * If the number of open files fits in the internal arrays
1094 	 * of the open file structure, use them, otherwise allocate
1095 	 * additional memory for the number of descriptors currently
1096 	 * in use.
1097 	 */
1098 	if (newfdp->fd_lastfile < NDFILE) {
1099 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1100 		newfdp->fd_ofileflags =
1101 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1102 		i = NDFILE;
1103 	} else {
1104 		/*
1105 		 * Compute the smallest multiple of NDEXTENT needed
1106 		 * for the file descriptors currently in use,
1107 		 * allowing the table to shrink.
1108 		 */
1109 		i = newfdp->fd_nfiles;
1110 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1111 			i /= 2;
1112 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1113 		    M_FILEDESC, M_WAITOK);
1114 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1115 	}
1116 	newfdp->fd_nfiles = i;
1117 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1118 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1119 
1120 	/*
1121 	 * kq descriptors cannot be copied.
1122 	 */
1123 	if (newfdp->fd_knlistsize != -1) {
1124 		fpp = newfdp->fd_ofiles;
1125 		for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1126 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
1127 				*fpp = NULL;
1128 		}
1129 		newfdp->fd_knlist = NULL;
1130 		newfdp->fd_knlistsize = -1;
1131 		newfdp->fd_knhash = NULL;
1132 		newfdp->fd_knhashmask = 0;
1133 	}
1134 
1135 	fpp = newfdp->fd_ofiles;
1136 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1137 		if (*fpp != NULL)
1138 			fhold(*fpp);
1139 	}
1140 	return (newfdp);
1141 }
1142 
1143 /*
1144  * Release a filedesc structure.
1145  */
1146 void
1147 fdfree(p)
1148 	struct proc *p;
1149 {
1150 	register struct filedesc *fdp = p->p_fd;
1151 	struct file **fpp;
1152 	register int i;
1153 
1154 	/* Certain daemons might not have file descriptors. */
1155 	if (fdp == NULL)
1156 		return;
1157 
1158 	if (--fdp->fd_refcnt > 0)
1159 		return;
1160 	/*
1161 	 * we are the last reference to the structure, we can
1162 	 * safely assume it will not change out from under us.
1163 	 */
1164 	fpp = fdp->fd_ofiles;
1165 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1166 		if (*fpp)
1167 			(void) closef(*fpp, p);
1168 	}
1169 	if (fdp->fd_nfiles > NDFILE)
1170 		FREE(fdp->fd_ofiles, M_FILEDESC);
1171 	if (fdp->fd_cdir)
1172 		vrele(fdp->fd_cdir);
1173 	if (fdp->fd_rdir)
1174 		vrele(fdp->fd_rdir);
1175 	if (fdp->fd_jdir)
1176 		vrele(fdp->fd_jdir);
1177 	if (fdp->fd_knlist)
1178 		FREE(fdp->fd_knlist, M_TEMP);
1179 	if (fdp->fd_knhash)
1180 		FREE(fdp->fd_knhash, M_TEMP);
1181 	FREE(fdp, M_FILEDESC);
1182 }
1183 
1184 /*
1185  * For setugid programs, we don't want to people to use that setugidness
1186  * to generate error messages which write to a file which otherwise would
1187  * otherwise be off-limits to the process.
1188  *
1189  * This is a gross hack to plug the hole.  A better solution would involve
1190  * a special vop or other form of generalized access control mechanism.  We
1191  * go ahead and just reject all procfs file systems accesses as dangerous.
1192  *
1193  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1194  * sufficient.  We also don't for check setugidness since we know we are.
1195  */
1196 static int
1197 is_unsafe(struct file *fp)
1198 {
1199 	if (fp->f_type == DTYPE_VNODE &&
1200 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1201 		return (1);
1202 	return (0);
1203 }
1204 
1205 /*
1206  * Make this setguid thing safe, if at all possible.
1207  */
1208 void
1209 setugidsafety(p)
1210 	struct proc *p;
1211 {
1212 	struct filedesc *fdp = p->p_fd;
1213 	register int i;
1214 
1215 	/* Certain daemons might not have file descriptors. */
1216 	if (fdp == NULL)
1217 		return;
1218 
1219 	/*
1220 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1221 	 * we are blocked in a close.  Be careful!
1222 	 */
1223 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1224 		if (i > 2)
1225 			break;
1226 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1227 			struct file *fp;
1228 
1229 #if 0
1230 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1231 				(void) munmapfd(p, i);
1232 #endif
1233 			if (i < fdp->fd_knlistsize)
1234 				knote_fdclose(p, i);
1235 			/*
1236 			 * NULL-out descriptor prior to close to avoid
1237 			 * a race while close blocks.
1238 			 */
1239 			fp = fdp->fd_ofiles[i];
1240 			fdp->fd_ofiles[i] = NULL;
1241 			fdp->fd_ofileflags[i] = 0;
1242 			if (i < fdp->fd_freefile)
1243 				fdp->fd_freefile = i;
1244 			(void) closef(fp, p);
1245 		}
1246 	}
1247 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1248 		fdp->fd_lastfile--;
1249 }
1250 
1251 /*
1252  * Close any files on exec?
1253  */
1254 void
1255 fdcloseexec(p)
1256 	struct proc *p;
1257 {
1258 	struct filedesc *fdp = p->p_fd;
1259 	register int i;
1260 
1261 	/* Certain daemons might not have file descriptors. */
1262 	if (fdp == NULL)
1263 		return;
1264 
1265 	/*
1266 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1267 	 * may block and rip them out from under us.
1268 	 */
1269 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1270 		if (fdp->fd_ofiles[i] != NULL &&
1271 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1272 			struct file *fp;
1273 
1274 #if 0
1275 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1276 				(void) munmapfd(p, i);
1277 #endif
1278 			if (i < fdp->fd_knlistsize)
1279 				knote_fdclose(p, i);
1280 			/*
1281 			 * NULL-out descriptor prior to close to avoid
1282 			 * a race while close blocks.
1283 			 */
1284 			fp = fdp->fd_ofiles[i];
1285 			fdp->fd_ofiles[i] = NULL;
1286 			fdp->fd_ofileflags[i] = 0;
1287 			if (i < fdp->fd_freefile)
1288 				fdp->fd_freefile = i;
1289 			(void) closef(fp, p);
1290 		}
1291 	}
1292 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1293 		fdp->fd_lastfile--;
1294 }
1295 
1296 /*
1297  * Internal form of close.
1298  * Decrement reference count on file structure.
1299  * Note: p may be NULL when closing a file
1300  * that was being passed in a message.
1301  */
1302 int
1303 closef(fp, p)
1304 	register struct file *fp;
1305 	register struct proc *p;
1306 {
1307 	struct vnode *vp;
1308 	struct flock lf;
1309 
1310 	if (fp == NULL)
1311 		return (0);
1312 	/*
1313 	 * POSIX record locking dictates that any close releases ALL
1314 	 * locks owned by this process.  This is handled by setting
1315 	 * a flag in the unlock to free ONLY locks obeying POSIX
1316 	 * semantics, and not to free BSD-style file locks.
1317 	 * If the descriptor was in a message, POSIX-style locks
1318 	 * aren't passed with the descriptor.
1319 	 */
1320 	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1321 		lf.l_whence = SEEK_SET;
1322 		lf.l_start = 0;
1323 		lf.l_len = 0;
1324 		lf.l_type = F_UNLCK;
1325 		vp = (struct vnode *)fp->f_data;
1326 		(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX);
1327 	}
1328 	return (fdrop(fp, p));
1329 }
1330 
1331 int
1332 fdrop(fp, p)
1333 	struct file *fp;
1334 	struct proc *p;
1335 {
1336 	struct flock lf;
1337 	struct vnode *vp;
1338 	int error;
1339 
1340 	if (--fp->f_count > 0)
1341 		return (0);
1342 	if (fp->f_count < 0)
1343 		panic("fdrop: count < 0");
1344 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1345 		lf.l_whence = SEEK_SET;
1346 		lf.l_start = 0;
1347 		lf.l_len = 0;
1348 		lf.l_type = F_UNLCK;
1349 		vp = (struct vnode *)fp->f_data;
1350 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1351 	}
1352 	if (fp->f_ops != &badfileops)
1353 		error = fo_close(fp, p);
1354 	else
1355 		error = 0;
1356 	ffree(fp);
1357 	return (error);
1358 }
1359 
1360 /*
1361  * Apply an advisory lock on a file descriptor.
1362  *
1363  * Just attempt to get a record lock of the requested type on
1364  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1365  */
1366 #ifndef _SYS_SYSPROTO_H_
1367 struct flock_args {
1368 	int	fd;
1369 	int	how;
1370 };
1371 #endif
1372 /*
1373  * MPSAFE
1374  */
1375 /* ARGSUSED */
1376 int
1377 flock(p, uap)
1378 	struct proc *p;
1379 	register struct flock_args *uap;
1380 {
1381 	register struct filedesc *fdp = p->p_fd;
1382 	register struct file *fp;
1383 	struct vnode *vp;
1384 	struct flock lf;
1385 	int error;
1386 
1387 	mtx_lock(&Giant);
1388 
1389 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1390 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
1391 		error = EBADF;
1392 		goto done2;
1393 	}
1394 	if (fp->f_type != DTYPE_VNODE) {
1395 		error = EOPNOTSUPP;
1396 		goto done2;
1397 	}
1398 	vp = (struct vnode *)fp->f_data;
1399 	lf.l_whence = SEEK_SET;
1400 	lf.l_start = 0;
1401 	lf.l_len = 0;
1402 	if (uap->how & LOCK_UN) {
1403 		lf.l_type = F_UNLCK;
1404 		fp->f_flag &= ~FHASLOCK;
1405 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1406 		goto done2;
1407 	}
1408 	if (uap->how & LOCK_EX)
1409 		lf.l_type = F_WRLCK;
1410 	else if (uap->how & LOCK_SH)
1411 		lf.l_type = F_RDLCK;
1412 	else {
1413 		error = EBADF;
1414 		goto done2;
1415 	}
1416 	fp->f_flag |= FHASLOCK;
1417 	if (uap->how & LOCK_NB)
1418 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1419 	else
1420 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
1421 done2:
1422 	mtx_unlock(&Giant);
1423 	return (error);
1424 }
1425 
1426 /*
1427  * File Descriptor pseudo-device driver (/dev/fd/).
1428  *
1429  * Opening minor device N dup()s the file (if any) connected to file
1430  * descriptor N belonging to the calling process.  Note that this driver
1431  * consists of only the ``open()'' routine, because all subsequent
1432  * references to this file will be direct to the other driver.
1433  */
1434 /* ARGSUSED */
1435 static int
1436 fdopen(dev, mode, type, p)
1437 	dev_t dev;
1438 	int mode, type;
1439 	struct proc *p;
1440 {
1441 
1442 	/*
1443 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1444 	 * the file descriptor being sought for duplication. The error
1445 	 * return ensures that the vnode for this device will be released
1446 	 * by vn_open. Open will detect this special error and take the
1447 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1448 	 * will simply report the error.
1449 	 */
1450 	p->p_dupfd = dev2unit(dev);
1451 	return (ENODEV);
1452 }
1453 
1454 /*
1455  * Duplicate the specified descriptor to a free descriptor.
1456  */
1457 int
1458 dupfdopen(p, fdp, indx, dfd, mode, error)
1459 	struct proc *p;
1460 	struct filedesc *fdp;
1461 	int indx, dfd;
1462 	int mode;
1463 	int error;
1464 {
1465 	register struct file *wfp;
1466 	struct file *fp;
1467 
1468 	/*
1469 	 * If the to-be-dup'd fd number is greater than the allowed number
1470 	 * of file descriptors, or the fd to be dup'd has already been
1471 	 * closed, then reject.
1472 	 */
1473 	if ((u_int)dfd >= fdp->fd_nfiles ||
1474 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1475 		return (EBADF);
1476 	}
1477 
1478 	/*
1479 	 * There are two cases of interest here.
1480 	 *
1481 	 * For ENODEV simply dup (dfd) to file descriptor
1482 	 * (indx) and return.
1483 	 *
1484 	 * For ENXIO steal away the file structure from (dfd) and
1485 	 * store it in (indx).  (dfd) is effectively closed by
1486 	 * this operation.
1487 	 *
1488 	 * Any other error code is just returned.
1489 	 */
1490 	switch (error) {
1491 	case ENODEV:
1492 		/*
1493 		 * Check that the mode the file is being opened for is a
1494 		 * subset of the mode of the existing descriptor.
1495 		 */
1496 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1497 			return (EACCES);
1498 		fp = fdp->fd_ofiles[indx];
1499 #if 0
1500 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1501 			(void) munmapfd(p, indx);
1502 #endif
1503 		fdp->fd_ofiles[indx] = wfp;
1504 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1505 		fhold(wfp);
1506 		if (indx > fdp->fd_lastfile)
1507 			fdp->fd_lastfile = indx;
1508 		/*
1509 		 * we now own the reference to fp that the ofiles[] array
1510 		 * used to own.  Release it.
1511 		 */
1512 		if (fp)
1513 			fdrop(fp, p);
1514 		return (0);
1515 
1516 	case ENXIO:
1517 		/*
1518 		 * Steal away the file pointer from dfd, and stuff it into indx.
1519 		 */
1520 		fp = fdp->fd_ofiles[indx];
1521 #if 0
1522 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1523 			(void) munmapfd(p, indx);
1524 #endif
1525 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1526 		fdp->fd_ofiles[dfd] = NULL;
1527 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1528 		fdp->fd_ofileflags[dfd] = 0;
1529 
1530 		/*
1531 		 * we now own the reference to fp that the ofiles[] array
1532 		 * used to own.  Release it.
1533 		 */
1534 		if (fp)
1535 			fdrop(fp, p);
1536 		/*
1537 		 * Complete the clean up of the filedesc structure by
1538 		 * recomputing the various hints.
1539 		 */
1540 		if (indx > fdp->fd_lastfile) {
1541 			fdp->fd_lastfile = indx;
1542 		} else {
1543 			while (fdp->fd_lastfile > 0 &&
1544 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1545 				fdp->fd_lastfile--;
1546 			}
1547 			if (dfd < fdp->fd_freefile)
1548 				fdp->fd_freefile = dfd;
1549 		}
1550 		return (0);
1551 
1552 	default:
1553 		return (error);
1554 	}
1555 	/* NOTREACHED */
1556 }
1557 
1558 /*
1559  * Get file structures.
1560  */
1561 static int
1562 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1563 {
1564 	int error;
1565 	struct file *fp;
1566 
1567 	if (!req->oldptr) {
1568 		/*
1569 		 * overestimate by 10 files
1570 		 */
1571 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1572 				(nfiles + 10) * sizeof(struct file)));
1573 	}
1574 
1575 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1576 	if (error)
1577 		return (error);
1578 
1579 	/*
1580 	 * followed by an array of file structures
1581 	 */
1582 	LIST_FOREACH(fp, &filehead, f_list) {
1583 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1584 		if (error)
1585 			return (error);
1586 	}
1587 	return (0);
1588 }
1589 
1590 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1591     0, 0, sysctl_kern_file, "S,file", "Entire file table");
1592 
1593 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1594     &maxfilesperproc, 0, "Maximum files allowed open per process");
1595 
1596 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1597     &maxfiles, 0, "Maximum number of files");
1598 
1599 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1600     &nfiles, 0, "System-wide number of open files");
1601 
1602 static void
1603 fildesc_drvinit(void *unused)
1604 {
1605 	dev_t dev;
1606 
1607 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
1608 	make_dev_alias(dev, "stdin");
1609 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
1610 	make_dev_alias(dev, "stdout");
1611 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
1612 	make_dev_alias(dev, "stderr");
1613 	if (!devfs_present) {
1614 		int fd;
1615 
1616 		for (fd = 3; fd < NUMFDESC; fd++)
1617 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
1618 			    "fd/%d", fd);
1619 	}
1620 }
1621 
1622 struct fileops badfileops = {
1623 	badfo_readwrite,
1624 	badfo_readwrite,
1625 	badfo_ioctl,
1626 	badfo_poll,
1627 	badfo_kqfilter,
1628 	badfo_stat,
1629 	badfo_close
1630 };
1631 
1632 static int
1633 badfo_readwrite(fp, uio, cred, flags, p)
1634 	struct file *fp;
1635 	struct uio *uio;
1636 	struct ucred *cred;
1637 	struct proc *p;
1638 	int flags;
1639 {
1640 
1641 	return (EBADF);
1642 }
1643 
1644 static int
1645 badfo_ioctl(fp, com, data, p)
1646 	struct file *fp;
1647 	u_long com;
1648 	caddr_t data;
1649 	struct proc *p;
1650 {
1651 
1652 	return (EBADF);
1653 }
1654 
1655 static int
1656 badfo_poll(fp, events, cred, p)
1657 	struct file *fp;
1658 	int events;
1659 	struct ucred *cred;
1660 	struct proc *p;
1661 {
1662 
1663 	return (0);
1664 }
1665 
1666 static int
1667 badfo_kqfilter(fp, kn)
1668 	struct file *fp;
1669 	struct knote *kn;
1670 {
1671 
1672 	return (0);
1673 }
1674 
1675 static int
1676 badfo_stat(fp, sb, p)
1677 	struct file *fp;
1678 	struct stat *sb;
1679 	struct proc *p;
1680 {
1681 
1682 	return (EBADF);
1683 }
1684 
1685 static int
1686 badfo_close(fp, p)
1687 	struct file *fp;
1688 	struct proc *p;
1689 {
1690 
1691 	return (EBADF);
1692 }
1693 
1694 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1695 					fildesc_drvinit,NULL)
1696