xref: /freebsd/sys/fs/devfs/devfs_vnops.c (revision 8b3ae668b13db776ced151f20e9ad3d23eca545d)
1 /*-
2  * Copyright (c) 2000-2004
3  *	Poul-Henning Kamp.  All rights reserved.
4  * Copyright (c) 1989, 1992-1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
32  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
33  *
34  * $FreeBSD$
35  */
36 
37 /*
38  * TODO:
39  *	remove empty directories
40  *	mkdir: want it ?
41  */
42 
43 #include "opt_mac.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/conf.h>
48 #include <sys/dirent.h>
49 #include <sys/fcntl.h>
50 #include <sys/file.h>
51 #include <sys/filedesc.h>
52 #include <sys/filio.h>
53 #include <sys/kernel.h>
54 #include <sys/lock.h>
55 #include <sys/mac.h>
56 #include <sys/malloc.h>
57 #include <sys/mount.h>
58 #include <sys/namei.h>
59 #include <sys/proc.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/time.h>
63 #include <sys/ttycom.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 
67 static struct vop_vector devfs_vnodeops;
68 static struct vop_vector devfs_specops;
69 static struct fileops devfs_ops_f;
70 
71 #include <fs/devfs/devfs.h>
72 #include <fs/devfs/devfs_int.h>
73 
74 static struct mtx	devfs_de_interlock;
75 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
76 
77 static int
78 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
79 {
80 
81 	*devp = fp->f_vnode->v_rdev;
82 	if (*devp != fp->f_data)
83 		return (ENXIO);
84 	KASSERT((*devp)->si_refcount > 0,
85 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
86 	*dswp = dev_refthread(*devp);
87 	if (*dswp == NULL)
88 		return (ENXIO);
89 	return (0);
90 }
91 
92 /*
93  * Construct the fully qualified path name relative to the mountpoint
94  */
95 static char *
96 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp)
97 {
98 	int i;
99 	struct devfs_dirent *de, *dd;
100 	struct devfs_mount *dmp;
101 
102 	dmp = VFSTODEVFS(dvp->v_mount);
103 	dd = dvp->v_data;
104 	i = SPECNAMELEN;
105 	buf[i] = '\0';
106 	i -= cnp->cn_namelen;
107 	if (i < 0)
108 		 return (NULL);
109 	bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
110 	de = dd;
111 	while (de != dmp->dm_rootdir) {
112 		i--;
113 		if (i < 0)
114 			 return (NULL);
115 		buf[i] = '/';
116 		i -= de->de_dirent->d_namlen;
117 		if (i < 0)
118 			 return (NULL);
119 		bcopy(de->de_dirent->d_name, buf + i,
120 		    de->de_dirent->d_namlen);
121 		de = TAILQ_FIRST(&de->de_dlist);	/* "." */
122 		de = TAILQ_NEXT(de, de_list);		/* ".." */
123 		de = de->de_dir;
124 	}
125 	return (buf + i);
126 }
127 
128 int
129 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td)
130 {
131 	int error;
132 	struct vnode *vp;
133 	struct cdev *dev;
134 
135 	KASSERT(td == curthread, ("devfs_allocv: td != curthread"));
136 loop:
137 
138 	mtx_lock(&devfs_de_interlock);
139 	vp = de->de_vnode;
140 	if (vp != NULL) {
141 		VI_LOCK(vp);
142 		mtx_unlock(&devfs_de_interlock);
143 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td))
144 			goto loop;
145 		*vpp = vp;
146 		return (0);
147 	}
148 	mtx_unlock(&devfs_de_interlock);
149 	if (de->de_dirent->d_type == DT_CHR) {
150 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE))
151 			return (ENOENT);
152 		dev = &de->de_cdp->cdp_c;
153 	} else {
154 		dev = NULL;
155 	}
156 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
157 	if (error != 0) {
158 		printf("devfs_allocv: failed to allocate new vnode\n");
159 		return (error);
160 	}
161 
162 	if (de->de_dirent->d_type == DT_CHR) {
163 		vp->v_type = VCHR;
164 		VI_LOCK(vp);
165 		dev_lock();
166 		dev_refl(dev);
167 		vp->v_rdev = dev;
168 		KASSERT(vp->v_usecount == 1,
169 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
170 		dev->si_usecount += vp->v_usecount;
171 		dev_unlock();
172 		VI_UNLOCK(vp);
173 		vp->v_op = &devfs_specops;
174 	} else if (de->de_dirent->d_type == DT_DIR) {
175 		vp->v_type = VDIR;
176 	} else if (de->de_dirent->d_type == DT_LNK) {
177 		vp->v_type = VLNK;
178 	} else {
179 		vp->v_type = VBAD;
180 	}
181 	mtx_lock(&devfs_de_interlock);
182 	vp->v_data = de;
183 	de->de_vnode = vp;
184 	mtx_unlock(&devfs_de_interlock);
185 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
186 #ifdef MAC
187 	mac_associate_vnode_devfs(mp, de, vp);
188 #endif
189 	*vpp = vp;
190 	return (0);
191 }
192 
193 static int
194 devfs_access(struct vop_access_args *ap)
195 {
196 	struct vnode *vp = ap->a_vp;
197 	struct devfs_dirent *de;
198 	int error;
199 
200 	de = vp->v_data;
201 	if (vp->v_type == VDIR)
202 		de = de->de_dir;
203 
204 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
205 	    ap->a_mode, ap->a_cred, NULL);
206 	if (!error)
207 		return (error);
208 	if (error != EACCES)
209 		return (error);
210 	/* We do, however, allow access to the controlling terminal */
211 	if (!(ap->a_td->td_proc->p_flag & P_CONTROLT))
212 		return (error);
213 	if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode)
214 		return (0);
215 	return (error);
216 }
217 
218 /* ARGSUSED */
219 static int
220 devfs_advlock(struct vop_advlock_args *ap)
221 {
222 
223 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
224 }
225 
226 /* ARGSUSED */
227 static int
228 devfs_close(struct vop_close_args *ap)
229 {
230 	struct vnode *vp = ap->a_vp, *oldvp;
231 	struct thread *td = ap->a_td;
232 	struct cdev *dev = vp->v_rdev;
233 	struct cdevsw *dsw;
234 	int error;
235 
236 	/*
237 	 * Hack: a tty device that is a controlling terminal
238 	 * has a reference from the session structure.
239 	 * We cannot easily tell that a character device is
240 	 * a controlling terminal, unless it is the closing
241 	 * process' controlling terminal.  In that case,
242 	 * if the reference count is 2 (this last descriptor
243 	 * plus the session), release the reference from the session.
244 	 */
245 	oldvp = NULL;
246 	sx_xlock(&proctree_lock);
247 	if (td && vp == td->td_proc->p_session->s_ttyvp) {
248 		SESS_LOCK(td->td_proc->p_session);
249 		VI_LOCK(vp);
250 		if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) {
251 			td->td_proc->p_session->s_ttyvp = NULL;
252 			oldvp = vp;
253 		}
254 		VI_UNLOCK(vp);
255 		SESS_UNLOCK(td->td_proc->p_session);
256 	}
257 	sx_xunlock(&proctree_lock);
258 	if (oldvp != NULL)
259 		vrele(oldvp);
260 	/*
261 	 * We do not want to really close the device if it
262 	 * is still in use unless we are trying to close it
263 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
264 	 * holds a reference to the vnode, and because we mark
265 	 * any other vnodes that alias this device, when the
266 	 * sum of the reference counts on all the aliased
267 	 * vnodes descends to one, we are on last close.
268 	 */
269 	dsw = dev_refthread(dev);
270 	if (dsw == NULL)
271 		return (ENXIO);
272 	VI_LOCK(vp);
273 	if (vp->v_iflag & VI_DOOMED) {
274 		/* Forced close. */
275 	} else if (dsw->d_flags & D_TRACKCLOSE) {
276 		/* Keep device updated on status. */
277 	} else if (count_dev(dev) > 1) {
278 		VI_UNLOCK(vp);
279 		dev_relthread(dev);
280 		return (0);
281 	}
282 	VI_UNLOCK(vp);
283 	KASSERT(dev->si_refcount > 0,
284 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
285 	if (!(dsw->d_flags & D_NEEDGIANT)) {
286 		DROP_GIANT();
287 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
288 		PICKUP_GIANT();
289 	} else {
290 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
291 	}
292 	dev_relthread(dev);
293 	return (error);
294 }
295 
296 static int
297 devfs_close_f(struct file *fp, struct thread *td)
298 {
299 
300 	return (vnops.fo_close(fp, td));
301 }
302 
303 /* ARGSUSED */
304 static int
305 devfs_fsync(struct vop_fsync_args *ap)
306 {
307 	if (!vn_isdisk(ap->a_vp, NULL))
308 		return (0);
309 
310 	return (vop_stdfsync(ap));
311 }
312 
313 static int
314 devfs_getattr(struct vop_getattr_args *ap)
315 {
316 	struct vnode *vp = ap->a_vp;
317 	struct vattr *vap = ap->a_vap;
318 	int error = 0;
319 	struct devfs_dirent *de;
320 	struct cdev *dev;
321 
322 	de = vp->v_data;
323 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
324 	if (vp->v_type == VDIR) {
325 		de = de->de_dir;
326 		KASSERT(de != NULL,
327 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
328 	}
329 	bzero((caddr_t) vap, sizeof(*vap));
330 	vattr_null(vap);
331 	vap->va_uid = de->de_uid;
332 	vap->va_gid = de->de_gid;
333 	vap->va_mode = de->de_mode;
334 	if (vp->v_type == VLNK)
335 		vap->va_size = strlen(de->de_symlink);
336 	else if (vp->v_type == VDIR)
337 		vap->va_size = vap->va_bytes = DEV_BSIZE;
338 	else
339 		vap->va_size = 0;
340 	if (vp->v_type != VDIR)
341 		vap->va_bytes = 0;
342 	vap->va_blocksize = DEV_BSIZE;
343 	vap->va_type = vp->v_type;
344 
345 #define fix(aa)							\
346 	do {							\
347 		if ((aa).tv_sec == 0) {				\
348 			(aa).tv_sec = boottime.tv_sec;		\
349 			(aa).tv_nsec = boottime.tv_usec * 1000; \
350 		}						\
351 	} while (0)
352 
353 	if (vp->v_type != VCHR)  {
354 		fix(de->de_atime);
355 		vap->va_atime = de->de_atime;
356 		fix(de->de_mtime);
357 		vap->va_mtime = de->de_mtime;
358 		fix(de->de_ctime);
359 		vap->va_ctime = de->de_ctime;
360 	} else {
361 		dev = vp->v_rdev;
362 		fix(dev->si_atime);
363 		vap->va_atime = dev->si_atime;
364 		fix(dev->si_mtime);
365 		vap->va_mtime = dev->si_mtime;
366 		fix(dev->si_ctime);
367 		vap->va_ctime = dev->si_ctime;
368 
369 		vap->va_rdev = dev->si_priv->cdp_inode;
370 	}
371 	vap->va_gen = 0;
372 	vap->va_flags = 0;
373 	vap->va_nlink = de->de_links;
374 	vap->va_fileid = de->de_inode;
375 
376 	return (error);
377 }
378 
379 /* ARGSUSED */
380 static int
381 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
382 {
383 	struct cdev *dev;
384 	struct cdevsw *dsw;
385 	struct vnode *vp;
386 	struct vnode *vpold;
387 	int error, i;
388 	const char *p;
389 	struct fiodgname_arg *fgn;
390 
391 	error = devfs_fp_check(fp, &dev, &dsw);
392 	if (error)
393 		return (error);
394 
395 	if (com == FIODTYPE) {
396 		*(int *)data = dsw->d_flags & D_TYPEMASK;
397 		dev_relthread(dev);
398 		return (0);
399 	} else if (com == FIODGNAME) {
400 		fgn = data;
401 		p = devtoname(dev);
402 		i = strlen(p) + 1;
403 		if (i > fgn->len)
404 			error = EINVAL;
405 		else
406 			error = copyout(p, fgn->buf, i);
407 		dev_relthread(dev);
408 		return (error);
409 	}
410 	error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
411 	dev_relthread(dev);
412 	if (error == ENOIOCTL)
413 		error = ENOTTY;
414 	if (error == 0 && com == TIOCSCTTY) {
415 		vp = fp->f_vnode;
416 
417 		/* Do nothing if reassigning same control tty */
418 		sx_slock(&proctree_lock);
419 		if (td->td_proc->p_session->s_ttyvp == vp) {
420 			sx_sunlock(&proctree_lock);
421 			return (0);
422 		}
423 
424 		mtx_lock(&Giant);
425 
426 		vpold = td->td_proc->p_session->s_ttyvp;
427 		VREF(vp);
428 		SESS_LOCK(td->td_proc->p_session);
429 		td->td_proc->p_session->s_ttyvp = vp;
430 		SESS_UNLOCK(td->td_proc->p_session);
431 
432 		sx_sunlock(&proctree_lock);
433 
434 		/* Get rid of reference to old control tty */
435 		if (vpold)
436 			vrele(vpold);
437 		mtx_unlock(&Giant);
438 	}
439 	return (error);
440 }
441 
442 /* ARGSUSED */
443 static int
444 devfs_kqfilter_f(struct file *fp, struct knote *kn)
445 {
446 	struct cdev *dev;
447 	struct cdevsw *dsw;
448 	int error;
449 
450 	error = devfs_fp_check(fp, &dev, &dsw);
451 	if (error)
452 		return (error);
453 	error = dsw->d_kqfilter(dev, kn);
454 	dev_relthread(dev);
455 	return (error);
456 }
457 
458 static int
459 devfs_lookupx(struct vop_lookup_args *ap)
460 {
461 	struct componentname *cnp;
462 	struct vnode *dvp, **vpp;
463 	struct thread *td;
464 	struct devfs_dirent *de, *dd;
465 	struct devfs_dirent **dde;
466 	struct devfs_mount *dmp;
467 	struct cdev *cdev;
468 	int error, flags, nameiop;
469 	char specname[SPECNAMELEN + 1], *pname;
470 
471 	cnp = ap->a_cnp;
472 	vpp = ap->a_vpp;
473 	dvp = ap->a_dvp;
474 	pname = cnp->cn_nameptr;
475 	td = cnp->cn_thread;
476 	flags = cnp->cn_flags;
477 	nameiop = cnp->cn_nameiop;
478 	dmp = VFSTODEVFS(dvp->v_mount);
479 	dd = dvp->v_data;
480 	*vpp = NULLVP;
481 
482 	if ((flags & ISLASTCN) && nameiop == RENAME)
483 		return (EOPNOTSUPP);
484 
485 	if (dvp->v_type != VDIR)
486 		return (ENOTDIR);
487 
488 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
489 		return (EIO);
490 
491 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
492 	if (error)
493 		return (error);
494 
495 	if (cnp->cn_namelen == 1 && *pname == '.') {
496 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
497 			return (EINVAL);
498 		*vpp = dvp;
499 		VREF(dvp);
500 		return (0);
501 	}
502 
503 	if (flags & ISDOTDOT) {
504 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
505 			return (EINVAL);
506 		VOP_UNLOCK(dvp, 0, td);
507 		de = TAILQ_FIRST(&dd->de_dlist);	/* "." */
508 		de = TAILQ_NEXT(de, de_list);		/* ".." */
509 		de = de->de_dir;
510 		error = devfs_allocv(de, dvp->v_mount, vpp, td);
511 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
512 		return (error);
513 	}
514 
515 	devfs_populate(dmp);
516 	dd = dvp->v_data;
517 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen);
518 	while (de == NULL) {	/* While(...) so we can use break */
519 
520 		if (nameiop == DELETE)
521 			return (ENOENT);
522 
523 		/*
524 		 * OK, we didn't have an entry for the name we were asked for
525 		 * so we try to see if anybody can create it on demand.
526 		 */
527 		pname = devfs_fqpn(specname, dvp, cnp);
528 		if (pname == NULL)
529 			break;
530 
531 		cdev = NULL;
532 		EVENTHANDLER_INVOKE(dev_clone,
533 		    td->td_ucred, pname, strlen(pname), &cdev);
534 		if (cdev == NULL)
535 			break;
536 
537 		devfs_populate(dmp);
538 
539 		dev_lock();
540 		dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx];
541 		if (dde != NULL && *dde != NULL)
542 			de = *dde;
543 		dev_unlock();
544 		dev_rel(cdev);
545 		break;
546 	}
547 
548 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
549 		if ((nameiop == CREATE || nameiop == RENAME) &&
550 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
551 			cnp->cn_flags |= SAVENAME;
552 			return (EJUSTRETURN);
553 		}
554 		return (ENOENT);
555 	}
556 
557 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
558 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
559 		if (error)
560 			return (error);
561 		if (*vpp == dvp) {
562 			VREF(dvp);
563 			*vpp = dvp;
564 			return (0);
565 		}
566 	}
567 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
568 	return (error);
569 }
570 
571 static int
572 devfs_lookup(struct vop_lookup_args *ap)
573 {
574 	int j;
575 	struct devfs_mount *dmp;
576 
577 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
578 	sx_xlock(&dmp->dm_lock);
579 	j = devfs_lookupx(ap);
580 	sx_xunlock(&dmp->dm_lock);
581 	return (j);
582 }
583 
584 static int
585 devfs_mknod(struct vop_mknod_args *ap)
586 {
587 	struct componentname *cnp;
588 	struct vnode *dvp, **vpp;
589 	struct thread *td;
590 	struct devfs_dirent *dd, *de;
591 	struct devfs_mount *dmp;
592 	int error;
593 
594 	/*
595 	 * The only type of node we should be creating here is a
596 	 * character device, for anything else return EOPNOTSUPP.
597 	 */
598 	if (ap->a_vap->va_type != VCHR)
599 		return (EOPNOTSUPP);
600 	dvp = ap->a_dvp;
601 	dmp = VFSTODEVFS(dvp->v_mount);
602 	sx_xlock(&dmp->dm_lock);
603 
604 	cnp = ap->a_cnp;
605 	vpp = ap->a_vpp;
606 	td = cnp->cn_thread;
607 	dd = dvp->v_data;
608 
609 	error = ENOENT;
610 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
611 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
612 			continue;
613 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
614 		    de->de_dirent->d_namlen) != 0)
615 			continue;
616 		if (de->de_flags & DE_WHITEOUT)
617 			break;
618 		goto notfound;
619 	}
620 	if (de == NULL)
621 		goto notfound;
622 	de->de_flags &= ~DE_WHITEOUT;
623 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
624 notfound:
625 	sx_xunlock(&dmp->dm_lock);
626 	return (error);
627 }
628 
629 /* ARGSUSED */
630 static int
631 devfs_open(struct vop_open_args *ap)
632 {
633 	struct thread *td = ap->a_td;
634 	struct vnode *vp = ap->a_vp;
635 	struct cdev *dev = vp->v_rdev;
636 	struct file *fp;
637 	int error;
638 	struct cdevsw *dsw;
639 
640 	if (vp->v_type == VBLK)
641 		return (ENXIO);
642 
643 	if (dev == NULL)
644 		return (ENXIO);
645 
646 	/* Make this field valid before any I/O in d_open. */
647 	if (dev->si_iosize_max == 0)
648 		dev->si_iosize_max = DFLTPHYS;
649 
650 	dsw = dev_refthread(dev);
651 	if (dsw == NULL)
652 		return (ENXIO);
653 
654 	/* XXX: Special casing of ttys for deadfs.  Probably redundant. */
655 	if (dsw->d_flags & D_TTY)
656 		vp->v_vflag |= VV_ISTTY;
657 
658 	VOP_UNLOCK(vp, 0, td);
659 
660 	if(!(dsw->d_flags & D_NEEDGIANT)) {
661 		DROP_GIANT();
662 		if (dsw->d_fdopen != NULL)
663 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
664 		else
665 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
666 		PICKUP_GIANT();
667 	} else {
668 		if (dsw->d_fdopen != NULL)
669 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
670 		else
671 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
672 	}
673 
674 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
675 
676 	dev_relthread(dev);
677 
678 	if (error)
679 		return (error);
680 
681 #if 0	/* /dev/console */
682 	KASSERT(ap->a_fdidx >= 0,
683 	     ("Could not vnode bypass device on fd %d", ap->a_fdidx));
684 #else
685 	if(ap->a_fdidx < 0)
686 		return (error);
687 #endif
688 	/*
689 	 * This is a pretty disgustingly long chain, but I am not
690 	 * sure there is any better way.  Passing the fdidx into
691 	 * VOP_OPEN() offers us more information than just passing
692 	 * the file *.
693 	 */
694 	fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
695 	KASSERT(fp->f_ops == &badfileops,
696 	     ("Could not vnode bypass device on fdops %p", fp->f_ops));
697 	fp->f_ops = &devfs_ops_f;
698 	fp->f_data = dev;
699 	return (error);
700 }
701 
702 static int
703 devfs_pathconf(struct vop_pathconf_args *ap)
704 {
705 
706 	switch (ap->a_name) {
707 	case _PC_MAC_PRESENT:
708 #ifdef MAC
709 		/*
710 		 * If MAC is enabled, devfs automatically supports
711 		 * trivial non-persistant label storage.
712 		 */
713 		*ap->a_retval = 1;
714 #else
715 		*ap->a_retval = 0;
716 #endif
717 		return (0);
718 	default:
719 		return (vop_stdpathconf(ap));
720 	}
721 	/* NOTREACHED */
722 }
723 
724 /* ARGSUSED */
725 static int
726 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
727 {
728 	struct cdev *dev;
729 	struct cdevsw *dsw;
730 	int error;
731 
732 	error = devfs_fp_check(fp, &dev, &dsw);
733 	if (error)
734 		return (error);
735 	error = dsw->d_poll(dev, events, td);
736 	dev_relthread(dev);
737 	return(error);
738 }
739 
740 /*
741  * Print out the contents of a special device vnode.
742  */
743 static int
744 devfs_print(struct vop_print_args *ap)
745 {
746 
747 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
748 	return (0);
749 }
750 
751 /* ARGSUSED */
752 static int
753 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
754 {
755 	struct cdev *dev;
756 	int ioflag, error, resid;
757 	struct cdevsw *dsw;
758 
759 	error = devfs_fp_check(fp, &dev, &dsw);
760 	if (error)
761 		return (error);
762 	resid = uio->uio_resid;
763 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
764 	if (ioflag & O_DIRECT)
765 		ioflag |= IO_DIRECT;
766 
767 	if ((flags & FOF_OFFSET) == 0)
768 		uio->uio_offset = fp->f_offset;
769 
770 	error = dsw->d_read(dev, uio, ioflag);
771 	dev_relthread(dev);
772 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
773 		vfs_timestamp(&dev->si_atime);
774 
775 	if ((flags & FOF_OFFSET) == 0)
776 		fp->f_offset = uio->uio_offset;
777 	fp->f_nextoff = uio->uio_offset;
778 	return (error);
779 }
780 
781 static int
782 devfs_readdir(struct vop_readdir_args *ap)
783 {
784 	int error;
785 	struct uio *uio;
786 	struct dirent *dp;
787 	struct devfs_dirent *dd;
788 	struct devfs_dirent *de;
789 	struct devfs_mount *dmp;
790 	off_t off, oldoff;
791 	int *tmp_ncookies = NULL;
792 
793 	if (ap->a_vp->v_type != VDIR)
794 		return (ENOTDIR);
795 
796 	uio = ap->a_uio;
797 	if (uio->uio_offset < 0)
798 		return (EINVAL);
799 
800 	/*
801 	 * XXX: This is a temporary hack to get around this filesystem not
802 	 * supporting cookies. We store the location of the ncookies pointer
803 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
804 	 * and set the number of cookies to 0. We then set the pointer to
805 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on
806 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
807 	 * pointer to its original location before returning to the caller.
808 	 */
809 	if (ap->a_ncookies != NULL) {
810 		tmp_ncookies = ap->a_ncookies;
811 		*ap->a_ncookies = 0;
812 		ap->a_ncookies = NULL;
813 	}
814 
815 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
816 	sx_xlock(&dmp->dm_lock);
817 	devfs_populate(dmp);
818 	error = 0;
819 	de = ap->a_vp->v_data;
820 	off = 0;
821 	oldoff = uio->uio_offset;
822 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
823 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
824 		if (dd->de_flags & DE_WHITEOUT)
825 			continue;
826 		if (dd->de_dirent->d_type == DT_DIR)
827 			de = dd->de_dir;
828 		else
829 			de = dd;
830 		dp = dd->de_dirent;
831 		if (dp->d_reclen > uio->uio_resid)
832 			break;
833 		dp->d_fileno = de->de_inode;
834 		if (off >= uio->uio_offset) {
835 			error = vfs_read_dirent(ap, dp, off);
836 			if (error)
837 				break;
838 		}
839 		off += dp->d_reclen;
840 	}
841 	sx_xunlock(&dmp->dm_lock);
842 	uio->uio_offset = off;
843 
844 	/*
845 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
846 	 * place.
847 	 */
848 	if (tmp_ncookies != NULL)
849 		ap->a_ncookies = tmp_ncookies;
850 
851 	return (error);
852 }
853 
854 static int
855 devfs_readlink(struct vop_readlink_args *ap)
856 {
857 	struct devfs_dirent *de;
858 
859 	de = ap->a_vp->v_data;
860 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
861 }
862 
863 static int
864 devfs_reclaim(struct vop_reclaim_args *ap)
865 {
866 	struct vnode *vp = ap->a_vp;
867 	struct devfs_dirent *de;
868 	struct cdev *dev;
869 
870 	mtx_lock(&devfs_de_interlock);
871 	de = vp->v_data;
872 	if (de != NULL) {
873 		de->de_vnode = NULL;
874 		vp->v_data = NULL;
875 	}
876 	mtx_unlock(&devfs_de_interlock);
877 
878 	vnode_destroy_vobject(vp);
879 
880 	dev = vp->v_rdev;
881 	vp->v_rdev = NULL;
882 
883 	if (dev == NULL)
884 		return (0);
885 
886 	dev_lock();
887 	dev->si_usecount -= vp->v_usecount;
888 	dev_unlock();
889 	dev_rel(dev);
890 	return (0);
891 }
892 
893 static int
894 devfs_remove(struct vop_remove_args *ap)
895 {
896 	struct vnode *vp = ap->a_vp;
897 	struct devfs_dirent *dd;
898 	struct devfs_dirent *de;
899 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
900 
901 	sx_xlock(&dmp->dm_lock);
902 	dd = ap->a_dvp->v_data;
903 	de = vp->v_data;
904 	if (de->de_cdp == NULL) {
905 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
906 		devfs_delete(dmp, de);
907 	} else {
908 		de->de_flags |= DE_WHITEOUT;
909 	}
910 	sx_xunlock(&dmp->dm_lock);
911 	return (0);
912 }
913 
914 /*
915  * Revoke is called on a tty when a terminal session ends.  The vnode
916  * is orphaned by setting v_op to deadfs so we need to let go of it
917  * as well so that we create a new one next time around.
918  *
919  */
920 static int
921 devfs_revoke(struct vop_revoke_args *ap)
922 {
923 	struct vnode *vp = ap->a_vp, *vp2;
924 	struct cdev *dev;
925 	struct cdev_priv *cdp;
926 	struct devfs_dirent *de;
927 	int i;
928 
929 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
930 
931 	dev = vp->v_rdev;
932 	cdp = dev->si_priv;
933 	for (;;) {
934 		mtx_lock(&devfs_de_interlock);
935 		dev_lock();
936 		vp2 = NULL;
937 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
938 			de = cdp->cdp_dirents[i];
939 			if (de == NULL)
940 				continue;
941 
942       			vp2 = de->de_vnode;
943 			if (vp2 != NULL) {
944 				de->de_vnode = NULL;
945 				dev_unlock();
946 				VI_LOCK(vp2);
947 				mtx_unlock(&devfs_de_interlock);
948 				vholdl(vp2);
949 				VI_UNLOCK(vp2);
950 				vgone(vp2);
951 				vdrop(vp2);
952 				break;
953 			}
954 		}
955 		if (vp2 != NULL) {
956 			continue;
957 		}
958 		dev_unlock();
959 		mtx_unlock(&devfs_de_interlock);
960 		break;
961 	}
962 	return (0);
963 }
964 
965 static int
966 devfs_rioctl(struct vop_ioctl_args *ap)
967 {
968 	int error;
969 	struct devfs_mount *dmp;
970 
971 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
972 	sx_xlock(&dmp->dm_lock);
973 	devfs_populate(dmp);
974 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
975 	sx_xunlock(&dmp->dm_lock);
976 	return (error);
977 }
978 
979 static int
980 devfs_rread(struct vop_read_args *ap)
981 {
982 
983 	if (ap->a_vp->v_type != VDIR)
984 		return (EINVAL);
985 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
986 }
987 
988 static int
989 devfs_setattr(struct vop_setattr_args *ap)
990 {
991 	struct devfs_dirent *de;
992 	struct vattr *vap;
993 	struct vnode *vp;
994 	int c, error;
995 	uid_t uid;
996 	gid_t gid;
997 
998 	vap = ap->a_vap;
999 	vp = ap->a_vp;
1000 	if ((vap->va_type != VNON) ||
1001 	    (vap->va_nlink != VNOVAL) ||
1002 	    (vap->va_fsid != VNOVAL) ||
1003 	    (vap->va_fileid != VNOVAL) ||
1004 	    (vap->va_blocksize != VNOVAL) ||
1005 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1006 	    (vap->va_rdev != VNOVAL) ||
1007 	    ((int)vap->va_bytes != VNOVAL) ||
1008 	    (vap->va_gen != VNOVAL)) {
1009 		return (EINVAL);
1010 	}
1011 
1012 	de = vp->v_data;
1013 	if (vp->v_type == VDIR)
1014 		de = de->de_dir;
1015 
1016 	error = c = 0;
1017 	if (vap->va_uid == (uid_t)VNOVAL)
1018 		uid = de->de_uid;
1019 	else
1020 		uid = vap->va_uid;
1021 	if (vap->va_gid == (gid_t)VNOVAL)
1022 		gid = de->de_gid;
1023 	else
1024 		gid = vap->va_gid;
1025 	if (uid != de->de_uid || gid != de->de_gid) {
1026 		if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
1027 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) &&
1028 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0)
1029 			return (error);
1030 		de->de_uid = uid;
1031 		de->de_gid = gid;
1032 		c = 1;
1033 	}
1034 
1035 	if (vap->va_mode != (mode_t)VNOVAL) {
1036 		if ((ap->a_cred->cr_uid != de->de_uid) &&
1037 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)))
1038 			return (error);
1039 		de->de_mode = vap->va_mode;
1040 		c = 1;
1041 	}
1042 
1043 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1044 		/* See the comment in ufs_vnops::ufs_setattr(). */
1045 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) &&
1046 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1047 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td))))
1048 			return (error);
1049 		if (vap->va_atime.tv_sec != VNOVAL) {
1050 			if (vp->v_type == VCHR)
1051 				vp->v_rdev->si_atime = vap->va_atime;
1052 			else
1053 				de->de_atime = vap->va_atime;
1054 		}
1055 		if (vap->va_mtime.tv_sec != VNOVAL) {
1056 			if (vp->v_type == VCHR)
1057 				vp->v_rdev->si_mtime = vap->va_mtime;
1058 			else
1059 				de->de_mtime = vap->va_mtime;
1060 		}
1061 		c = 1;
1062 	}
1063 
1064 	if (c) {
1065 		if (vp->v_type == VCHR)
1066 			vfs_timestamp(&vp->v_rdev->si_ctime);
1067 		else
1068 			vfs_timestamp(&de->de_mtime);
1069 	}
1070 	return (0);
1071 }
1072 
1073 #ifdef MAC
1074 static int
1075 devfs_setlabel(struct vop_setlabel_args *ap)
1076 {
1077 	struct vnode *vp;
1078 	struct devfs_dirent *de;
1079 
1080 	vp = ap->a_vp;
1081 	de = vp->v_data;
1082 
1083 	mac_relabel_vnode(ap->a_cred, vp, ap->a_label);
1084 	mac_update_devfsdirent(vp->v_mount, de, vp);
1085 
1086 	return (0);
1087 }
1088 #endif
1089 
1090 static int
1091 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
1092 {
1093 
1094 	return (vnops.fo_stat(fp, sb, cred, td));
1095 }
1096 
1097 static int
1098 devfs_symlink(struct vop_symlink_args *ap)
1099 {
1100 	int i, error;
1101 	struct devfs_dirent *dd;
1102 	struct devfs_dirent *de;
1103 	struct devfs_mount *dmp;
1104 	struct thread *td;
1105 
1106 	td = ap->a_cnp->cn_thread;
1107 	KASSERT(td == curthread, ("devfs_symlink: td != curthread"));
1108 	error = suser(td);
1109 	if (error)
1110 		return(error);
1111 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1112 	dd = ap->a_dvp->v_data;
1113 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
1114 	de->de_uid = 0;
1115 	de->de_gid = 0;
1116 	de->de_mode = 0755;
1117 	de->de_inode = alloc_unr(devfs_inos);
1118 	de->de_dirent->d_type = DT_LNK;
1119 	i = strlen(ap->a_target) + 1;
1120 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
1121 	bcopy(ap->a_target, de->de_symlink, i);
1122 	sx_xlock(&dmp->dm_lock);
1123 #ifdef MAC
1124 	mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
1125 #endif
1126 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
1127 	devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td);
1128 	sx_xunlock(&dmp->dm_lock);
1129 	return (0);
1130 }
1131 
1132 /* ARGSUSED */
1133 static int
1134 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
1135 {
1136 	struct cdev *dev;
1137 	int error, ioflag, resid;
1138 	struct cdevsw *dsw;
1139 
1140 	error = devfs_fp_check(fp, &dev, &dsw);
1141 	if (error)
1142 		return (error);
1143 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
1144 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
1145 	if (ioflag & O_DIRECT)
1146 		ioflag |= IO_DIRECT;
1147 	if ((flags & FOF_OFFSET) == 0)
1148 		uio->uio_offset = fp->f_offset;
1149 
1150 	resid = uio->uio_resid;
1151 
1152 	error = dsw->d_write(dev, uio, ioflag);
1153 	dev_relthread(dev);
1154 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
1155 		vfs_timestamp(&dev->si_ctime);
1156 		dev->si_mtime = dev->si_ctime;
1157 	}
1158 
1159 	if ((flags & FOF_OFFSET) == 0)
1160 		fp->f_offset = uio->uio_offset;
1161 	fp->f_nextoff = uio->uio_offset;
1162 	return (error);
1163 }
1164 
1165 dev_t
1166 dev2udev(struct cdev *x)
1167 {
1168 	if (x == NULL)
1169 		return (NODEV);
1170 	return (x->si_priv->cdp_inode);
1171 }
1172 
1173 static struct fileops devfs_ops_f = {
1174 	.fo_read =	devfs_read_f,
1175 	.fo_write =	devfs_write_f,
1176 	.fo_ioctl =	devfs_ioctl_f,
1177 	.fo_poll =	devfs_poll_f,
1178 	.fo_kqfilter =	devfs_kqfilter_f,
1179 	.fo_stat =	devfs_stat_f,
1180 	.fo_close =	devfs_close_f,
1181 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
1182 };
1183 
1184 static struct vop_vector devfs_vnodeops = {
1185 	.vop_default =		&default_vnodeops,
1186 
1187 	.vop_access =		devfs_access,
1188 	.vop_getattr =		devfs_getattr,
1189 	.vop_ioctl =		devfs_rioctl,
1190 	.vop_lookup =		devfs_lookup,
1191 	.vop_mknod =		devfs_mknod,
1192 	.vop_pathconf =		devfs_pathconf,
1193 	.vop_read =		devfs_rread,
1194 	.vop_readdir =		devfs_readdir,
1195 	.vop_readlink =		devfs_readlink,
1196 	.vop_reclaim =		devfs_reclaim,
1197 	.vop_remove =		devfs_remove,
1198 	.vop_revoke =		devfs_revoke,
1199 	.vop_setattr =		devfs_setattr,
1200 #ifdef MAC
1201 	.vop_setlabel =		devfs_setlabel,
1202 #endif
1203 	.vop_symlink =		devfs_symlink,
1204 };
1205 
1206 static struct vop_vector devfs_specops = {
1207 	.vop_default =		&default_vnodeops,
1208 
1209 	.vop_access =		devfs_access,
1210 	.vop_advlock =		devfs_advlock,
1211 	.vop_bmap =		VOP_PANIC,
1212 	.vop_close =		devfs_close,
1213 	.vop_create =		VOP_PANIC,
1214 	.vop_fsync =		devfs_fsync,
1215 	.vop_getattr =		devfs_getattr,
1216 	.vop_lease =		VOP_NULL,
1217 	.vop_link =		VOP_PANIC,
1218 	.vop_mkdir =		VOP_PANIC,
1219 	.vop_mknod =		VOP_PANIC,
1220 	.vop_open =		devfs_open,
1221 	.vop_pathconf =		devfs_pathconf,
1222 	.vop_print =		devfs_print,
1223 	.vop_read =		VOP_PANIC,
1224 	.vop_readdir =		VOP_PANIC,
1225 	.vop_readlink =		VOP_PANIC,
1226 	.vop_reallocblks =	VOP_PANIC,
1227 	.vop_reclaim =		devfs_reclaim,
1228 	.vop_remove =		devfs_remove,
1229 	.vop_rename =		VOP_PANIC,
1230 	.vop_revoke =		devfs_revoke,
1231 	.vop_rmdir =		VOP_PANIC,
1232 	.vop_setattr =		devfs_setattr,
1233 #ifdef MAC
1234 	.vop_setlabel =		devfs_setlabel,
1235 #endif
1236 	.vop_strategy =		VOP_PANIC,
1237 	.vop_symlink =		VOP_PANIC,
1238 	.vop_write =		VOP_PANIC,
1239 };
1240 
1241 /*
1242  * Our calling convention to the device drivers used to be that we passed
1243  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
1244  * flags instead since that's what open(), close() and ioctl() takes and
1245  * we don't really want vnode.h in device drivers.
1246  * We solved the source compatibility by redefining some vnode flags to
1247  * be the same as the fcntl ones and by sending down the bitwise OR of
1248  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
1249  * pulls the rug out under this.
1250  */
1251 CTASSERT(O_NONBLOCK == IO_NDELAY);
1252 CTASSERT(O_FSYNC == IO_SYNC);
1253