xref: /freebsd/sys/fs/devfs/devfs_vnops.c (revision 3d11b6c8f01e1fca5936a11d6996448467851a94)
1 /*-
2  * Copyright (c) 2000-2004
3  *	Poul-Henning Kamp.  All rights reserved.
4  * Copyright (c) 1989, 1992-1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
32  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
33  *
34  * $FreeBSD$
35  */
36 
37 /*
38  * TODO:
39  *	remove empty directories
40  *	mkdir: want it ?
41  */
42 
43 #include <opt_devfs.h>
44 #include <opt_mac.h>
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/conf.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/filio.h>
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/mac.h>
57 #include <sys/malloc.h>
58 #include <sys/mount.h>
59 #include <sys/namei.h>
60 #include <sys/proc.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/time.h>
64 #include <sys/ttycom.h>
65 #include <sys/unistd.h>
66 #include <sys/vnode.h>
67 
68 static struct vop_vector devfs_vnodeops;
69 static struct vop_vector devfs_specops;
70 static struct fileops devfs_ops_f;
71 
72 #include <fs/devfs/devfs.h>
73 #include <fs/devfs/devfs_int.h>
74 
75 static int
76 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
77 {
78 
79 	*devp = fp->f_vnode->v_rdev;
80 	if (*devp != fp->f_data)
81 		return (ENXIO);
82 	KASSERT((*devp)->si_refcount > 0,
83 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
84 	*dswp = dev_refthread(*devp);
85 	if (*dswp == NULL)
86 		return (ENXIO);
87 	return (0);
88 }
89 
90 /*
91  * Construct the fully qualified path name relative to the mountpoint
92  */
93 static char *
94 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp)
95 {
96 	int i;
97 	struct devfs_dirent *de, *dd;
98 	struct devfs_mount *dmp;
99 
100 	dmp = VFSTODEVFS(dvp->v_mount);
101 	dd = dvp->v_data;
102 	i = SPECNAMELEN;
103 	buf[i] = '\0';
104 	i -= cnp->cn_namelen;
105 	if (i < 0)
106 		 return (NULL);
107 	bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
108 	de = dd;
109 	while (de != dmp->dm_rootdir) {
110 		i--;
111 		if (i < 0)
112 			 return (NULL);
113 		buf[i] = '/';
114 		i -= de->de_dirent->d_namlen;
115 		if (i < 0)
116 			 return (NULL);
117 		bcopy(de->de_dirent->d_name, buf + i,
118 		    de->de_dirent->d_namlen);
119 		de = TAILQ_FIRST(&de->de_dlist);	/* "." */
120 		de = TAILQ_NEXT(de, de_list);		/* ".." */
121 		de = de->de_dir;
122 	}
123 	return (buf + i);
124 }
125 
126 int
127 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td)
128 {
129 	int error;
130 	struct vnode *vp;
131 	struct cdev *dev;
132 
133 	KASSERT(td == curthread, ("devfs_allocv: td != curthread"));
134 loop:
135 	vp = de->de_vnode;
136 	if (vp != NULL) {
137 		if (vget(vp, LK_EXCLUSIVE, td))
138 			goto loop;
139 		*vpp = vp;
140 		return (0);
141 	}
142 	if (de->de_dirent->d_type == DT_CHR) {
143 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE))
144 			return (ENOENT);
145 		dev = &de->de_cdp->cdp_c;
146 	} else {
147 		dev = NULL;
148 	}
149 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
150 	if (error != 0) {
151 		printf("devfs_allocv: failed to allocate new vnode\n");
152 		return (error);
153 	}
154 
155 	if (de->de_dirent->d_type == DT_CHR) {
156 		vp->v_type = VCHR;
157 		VI_LOCK(vp);
158 		dev_lock();
159 		dev_refl(dev);
160 		vp->v_rdev = dev;
161 		KASSERT(vp->v_usecount == 1,
162 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
163 		dev->si_usecount += vp->v_usecount;
164 		dev_unlock();
165 		VI_UNLOCK(vp);
166 		vp->v_op = &devfs_specops;
167 	} else if (de->de_dirent->d_type == DT_DIR) {
168 		vp->v_type = VDIR;
169 	} else if (de->de_dirent->d_type == DT_LNK) {
170 		vp->v_type = VLNK;
171 	} else {
172 		vp->v_type = VBAD;
173 	}
174 	vp->v_data = de;
175 	de->de_vnode = vp;
176 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
177 #ifdef MAC
178 	mac_associate_vnode_devfs(mp, de, vp);
179 #endif
180 	*vpp = vp;
181 	return (0);
182 }
183 
184 static int
185 devfs_access(struct vop_access_args *ap)
186 {
187 	struct vnode *vp = ap->a_vp;
188 	struct devfs_dirent *de;
189 	int error;
190 
191 	de = vp->v_data;
192 	if (vp->v_type == VDIR)
193 		de = de->de_dir;
194 
195 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
196 	    ap->a_mode, ap->a_cred, NULL);
197 	if (!error)
198 		return (error);
199 	if (error != EACCES)
200 		return (error);
201 	/* We do, however, allow access to the controlling terminal */
202 	if (!(ap->a_td->td_proc->p_flag & P_CONTROLT))
203 		return (error);
204 	if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode)
205 		return (0);
206 	return (error);
207 }
208 
209 /* ARGSUSED */
210 static int
211 devfs_advlock(struct vop_advlock_args *ap)
212 {
213 
214 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
215 }
216 
217 /* ARGSUSED */
218 static int
219 devfs_close(struct vop_close_args *ap)
220 {
221 	struct vnode *vp = ap->a_vp, *oldvp;
222 	struct thread *td = ap->a_td;
223 	struct cdev *dev = vp->v_rdev;
224 	struct cdevsw *dsw;
225 	int error;
226 
227 	/*
228 	 * Hack: a tty device that is a controlling terminal
229 	 * has a reference from the session structure.
230 	 * We cannot easily tell that a character device is
231 	 * a controlling terminal, unless it is the closing
232 	 * process' controlling terminal.  In that case,
233 	 * if the reference count is 2 (this last descriptor
234 	 * plus the session), release the reference from the session.
235 	 */
236 	oldvp = NULL;
237 	sx_xlock(&proctree_lock);
238 	if (td && vp == td->td_proc->p_session->s_ttyvp) {
239 		SESS_LOCK(td->td_proc->p_session);
240 		VI_LOCK(vp);
241 		if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) {
242 			td->td_proc->p_session->s_ttyvp = NULL;
243 			oldvp = vp;
244 		}
245 		VI_UNLOCK(vp);
246 		SESS_UNLOCK(td->td_proc->p_session);
247 	}
248 	sx_xunlock(&proctree_lock);
249 	if (oldvp != NULL)
250 		vrele(oldvp);
251 	/*
252 	 * We do not want to really close the device if it
253 	 * is still in use unless we are trying to close it
254 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
255 	 * holds a reference to the vnode, and because we mark
256 	 * any other vnodes that alias this device, when the
257 	 * sum of the reference counts on all the aliased
258 	 * vnodes descends to one, we are on last close.
259 	 */
260 	dsw = dev_refthread(dev);
261 	if (dsw == NULL)
262 		return (ENXIO);
263 	VI_LOCK(vp);
264 	if (vp->v_iflag & VI_DOOMED) {
265 		/* Forced close. */
266 	} else if (dsw->d_flags & D_TRACKCLOSE) {
267 		/* Keep device updated on status. */
268 	} else if (count_dev(dev) > 1) {
269 		VI_UNLOCK(vp);
270 		dev_relthread(dev);
271 		return (0);
272 	}
273 	VI_UNLOCK(vp);
274 	KASSERT(dev->si_refcount > 0,
275 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
276 	if (!(dsw->d_flags & D_NEEDGIANT)) {
277 		DROP_GIANT();
278 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
279 		PICKUP_GIANT();
280 	} else {
281 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
282 	}
283 	dev_relthread(dev);
284 	return (error);
285 }
286 
287 static int
288 devfs_close_f(struct file *fp, struct thread *td)
289 {
290 
291 	return (vnops.fo_close(fp, td));
292 }
293 
294 /* ARGSUSED */
295 static int
296 devfs_fsync(struct vop_fsync_args *ap)
297 {
298 	if (!vn_isdisk(ap->a_vp, NULL))
299 		return (0);
300 
301 	return (vop_stdfsync(ap));
302 }
303 
304 static int
305 devfs_getattr(struct vop_getattr_args *ap)
306 {
307 	struct vnode *vp = ap->a_vp;
308 	struct vattr *vap = ap->a_vap;
309 	int error = 0;
310 	struct devfs_dirent *de;
311 	struct cdev *dev;
312 
313 	de = vp->v_data;
314 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
315 	if (vp->v_type == VDIR) {
316 		de = de->de_dir;
317 		KASSERT(de != NULL,
318 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
319 	}
320 	bzero((caddr_t) vap, sizeof(*vap));
321 	vattr_null(vap);
322 	vap->va_uid = de->de_uid;
323 	vap->va_gid = de->de_gid;
324 	vap->va_mode = de->de_mode;
325 	if (vp->v_type == VLNK)
326 		vap->va_size = strlen(de->de_symlink);
327 	else if (vp->v_type == VDIR)
328 		vap->va_size = vap->va_bytes = DEV_BSIZE;
329 	else
330 		vap->va_size = 0;
331 	if (vp->v_type != VDIR)
332 		vap->va_bytes = 0;
333 	vap->va_blocksize = DEV_BSIZE;
334 	vap->va_type = vp->v_type;
335 
336 #define fix(aa)							\
337 	do {							\
338 		if ((aa).tv_sec == 0) {				\
339 			(aa).tv_sec = boottime.tv_sec;		\
340 			(aa).tv_nsec = boottime.tv_usec * 1000; \
341 		}						\
342 	} while (0)
343 
344 	if (vp->v_type != VCHR)  {
345 		fix(de->de_atime);
346 		vap->va_atime = de->de_atime;
347 		fix(de->de_mtime);
348 		vap->va_mtime = de->de_mtime;
349 		fix(de->de_ctime);
350 		vap->va_ctime = de->de_ctime;
351 	} else {
352 		dev = vp->v_rdev;
353 		fix(dev->si_atime);
354 		vap->va_atime = dev->si_atime;
355 		fix(dev->si_mtime);
356 		vap->va_mtime = dev->si_mtime;
357 		fix(dev->si_ctime);
358 		vap->va_ctime = dev->si_ctime;
359 
360 		vap->va_rdev = dev->si_priv->cdp_inode;
361 	}
362 	vap->va_gen = 0;
363 	vap->va_flags = 0;
364 	vap->va_nlink = de->de_links;
365 	vap->va_fileid = de->de_inode;
366 
367 	return (error);
368 }
369 
370 /* ARGSUSED */
371 static int
372 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
373 {
374 	struct cdev *dev;
375 	struct cdevsw *dsw;
376 	struct vnode *vp;
377 	struct vnode *vpold;
378 	int error, i;
379 	const char *p;
380 	struct fiodgname_arg *fgn;
381 
382 	error = devfs_fp_check(fp, &dev, &dsw);
383 	if (error)
384 		return (error);
385 
386 	if (com == FIODTYPE) {
387 		*(int *)data = dsw->d_flags & D_TYPEMASK;
388 		dev_relthread(dev);
389 		return (0);
390 	} else if (com == FIODGNAME) {
391 		fgn = data;
392 		p = devtoname(dev);
393 		i = strlen(p) + 1;
394 		if (i > fgn->len)
395 			error = EINVAL;
396 		else
397 			error = copyout(p, fgn->buf, i);
398 		dev_relthread(dev);
399 		return (error);
400 	}
401 	error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
402 	dev_relthread(dev);
403 	if (error == ENOIOCTL)
404 		error = ENOTTY;
405 	if (error == 0 && com == TIOCSCTTY) {
406 		vp = fp->f_vnode;
407 
408 		/* Do nothing if reassigning same control tty */
409 		sx_slock(&proctree_lock);
410 		if (td->td_proc->p_session->s_ttyvp == vp) {
411 			sx_sunlock(&proctree_lock);
412 			return (0);
413 		}
414 
415 		mtx_lock(&Giant);
416 
417 		vpold = td->td_proc->p_session->s_ttyvp;
418 		VREF(vp);
419 		SESS_LOCK(td->td_proc->p_session);
420 		td->td_proc->p_session->s_ttyvp = vp;
421 		SESS_UNLOCK(td->td_proc->p_session);
422 
423 		sx_sunlock(&proctree_lock);
424 
425 		/* Get rid of reference to old control tty */
426 		if (vpold)
427 			vrele(vpold);
428 		mtx_unlock(&Giant);
429 	}
430 	return (error);
431 }
432 
433 /* ARGSUSED */
434 static int
435 devfs_kqfilter_f(struct file *fp, struct knote *kn)
436 {
437 	struct cdev *dev;
438 	struct cdevsw *dsw;
439 	int error;
440 
441 	error = devfs_fp_check(fp, &dev, &dsw);
442 	if (error)
443 		return (error);
444 	error = dsw->d_kqfilter(dev, kn);
445 	dev_relthread(dev);
446 	return (error);
447 }
448 
449 static int
450 devfs_lookupx(struct vop_lookup_args *ap)
451 {
452 	struct componentname *cnp;
453 	struct vnode *dvp, **vpp;
454 	struct thread *td;
455 	struct devfs_dirent *de, *dd;
456 	struct devfs_dirent **dde;
457 	struct devfs_mount *dmp;
458 	struct cdev *cdev;
459 	int error, flags, nameiop;
460 	char specname[SPECNAMELEN + 1], *pname;
461 
462 	cnp = ap->a_cnp;
463 	vpp = ap->a_vpp;
464 	dvp = ap->a_dvp;
465 	pname = cnp->cn_nameptr;
466 	td = cnp->cn_thread;
467 	flags = cnp->cn_flags;
468 	nameiop = cnp->cn_nameiop;
469 	dmp = VFSTODEVFS(dvp->v_mount);
470 	dd = dvp->v_data;
471 	*vpp = NULLVP;
472 
473 	if ((flags & ISLASTCN) && nameiop == RENAME)
474 		return (EOPNOTSUPP);
475 
476 	if (dvp->v_type != VDIR)
477 		return (ENOTDIR);
478 
479 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
480 		return (EIO);
481 
482 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
483 	if (error)
484 		return (error);
485 
486 	if (cnp->cn_namelen == 1 && *pname == '.') {
487 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
488 			return (EINVAL);
489 		*vpp = dvp;
490 		VREF(dvp);
491 		return (0);
492 	}
493 
494 	if (flags & ISDOTDOT) {
495 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
496 			return (EINVAL);
497 		VOP_UNLOCK(dvp, 0, td);
498 		de = TAILQ_FIRST(&dd->de_dlist);	/* "." */
499 		de = TAILQ_NEXT(de, de_list);		/* ".." */
500 		de = de->de_dir;
501 		error = devfs_allocv(de, dvp->v_mount, vpp, td);
502 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
503 		return (error);
504 	}
505 
506 	devfs_populate(dmp);
507 	dd = dvp->v_data;
508 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen);
509 	while (de == NULL) {	/* While(...) so we can use break */
510 
511 		if (nameiop == DELETE)
512 			return (ENOENT);
513 
514 		/*
515 		 * OK, we didn't have an entry for the name we were asked for
516 		 * so we try to see if anybody can create it on demand.
517 		 */
518 		pname = devfs_fqpn(specname, dvp, cnp);
519 		if (pname == NULL)
520 			break;
521 
522 		cdev = NULL;
523 		EVENTHANDLER_INVOKE(dev_clone,
524 		    td->td_ucred, pname, strlen(pname), &cdev);
525 		if (cdev == NULL)
526 			break;
527 
528 		devfs_populate(dmp);
529 
530 		dev_lock();
531 		dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx];
532 		if (dde != NULL && *dde != NULL)
533 			de = *dde;
534 		dev_unlock();
535 		dev_rel(cdev);
536 		break;
537 	}
538 
539 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
540 		if ((nameiop == CREATE || nameiop == RENAME) &&
541 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
542 			cnp->cn_flags |= SAVENAME;
543 			return (EJUSTRETURN);
544 		}
545 		return (ENOENT);
546 	}
547 
548 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
549 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
550 		if (error)
551 			return (error);
552 		if (*vpp == dvp) {
553 			VREF(dvp);
554 			*vpp = dvp;
555 			return (0);
556 		}
557 	}
558 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
559 	return (error);
560 }
561 
562 static int
563 devfs_lookup(struct vop_lookup_args *ap)
564 {
565 	int j;
566 	struct devfs_mount *dmp;
567 
568 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
569 	sx_xlock(&dmp->dm_lock);
570 	j = devfs_lookupx(ap);
571 	sx_xunlock(&dmp->dm_lock);
572 	return (j);
573 }
574 
575 static int
576 devfs_mknod(struct vop_mknod_args *ap)
577 {
578 	struct componentname *cnp;
579 	struct vnode *dvp, **vpp;
580 	struct thread *td;
581 	struct devfs_dirent *dd, *de;
582 	struct devfs_mount *dmp;
583 	int error;
584 
585 	/*
586 	 * The only type of node we should be creating here is a
587 	 * character device, for anything else return EOPNOTSUPP.
588 	 */
589 	if (ap->a_vap->va_type != VCHR)
590 		return (EOPNOTSUPP);
591 	dvp = ap->a_dvp;
592 	dmp = VFSTODEVFS(dvp->v_mount);
593 	sx_xlock(&dmp->dm_lock);
594 
595 	cnp = ap->a_cnp;
596 	vpp = ap->a_vpp;
597 	td = cnp->cn_thread;
598 	dd = dvp->v_data;
599 
600 	error = ENOENT;
601 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
602 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
603 			continue;
604 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
605 		    de->de_dirent->d_namlen) != 0)
606 			continue;
607 		if (de->de_flags & DE_WHITEOUT)
608 			break;
609 		goto notfound;
610 	}
611 	if (de == NULL)
612 		goto notfound;
613 	de->de_flags &= ~DE_WHITEOUT;
614 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
615 notfound:
616 	sx_xunlock(&dmp->dm_lock);
617 	return (error);
618 }
619 
620 /* ARGSUSED */
621 static int
622 devfs_open(struct vop_open_args *ap)
623 {
624 	struct thread *td = ap->a_td;
625 	struct vnode *vp = ap->a_vp;
626 	struct cdev *dev = vp->v_rdev;
627 	struct file *fp;
628 	int error;
629 	struct cdevsw *dsw;
630 
631 	if (vp->v_type == VBLK)
632 		return (ENXIO);
633 
634 	if (dev == NULL)
635 		return (ENXIO);
636 
637 	/* Make this field valid before any I/O in d_open. */
638 	if (dev->si_iosize_max == 0)
639 		dev->si_iosize_max = DFLTPHYS;
640 
641 	if (vn_isdisk(vp, NULL) &&
642 	    ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
643 		/*
644 		* When running in very secure mode, do not allow
645 		* opens for writing of any disks.
646 		* XXX: should be in geom_dev.c, but we lack the cred there.
647 		*/
648 		error = securelevel_ge(td->td_ucred, 2);
649 		if (error)
650 			return (error);
651 	}
652 
653 	dsw = dev_refthread(dev);
654 	if (dsw == NULL)
655 		return (ENXIO);
656 
657 	/* XXX: Special casing of ttys for deadfs.  Probably redundant. */
658 	if (dsw->d_flags & D_TTY)
659 		vp->v_vflag |= VV_ISTTY;
660 
661 	VOP_UNLOCK(vp, 0, td);
662 
663 	if(!(dsw->d_flags & D_NEEDGIANT)) {
664 		DROP_GIANT();
665 		if (dsw->d_fdopen != NULL)
666 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
667 		else
668 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
669 		PICKUP_GIANT();
670 	} else {
671 		if (dsw->d_fdopen != NULL)
672 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
673 		else
674 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
675 	}
676 
677 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
678 
679 	dev_relthread(dev);
680 
681 	if (error)
682 		return (error);
683 
684 #if 0	/* /dev/console */
685 	KASSERT(ap->a_fdidx >= 0,
686 	     ("Could not vnode bypass device on fd %d", ap->a_fdidx));
687 #else
688 	if(ap->a_fdidx < 0)
689 		return (error);
690 #endif
691 	/*
692 	 * This is a pretty disgustingly long chain, but I am not
693 	 * sure there is any better way.  Passing the fdidx into
694 	 * VOP_OPEN() offers us more information than just passing
695 	 * the file *.
696 	 */
697 	fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
698 	KASSERT(fp->f_ops == &badfileops,
699 	     ("Could not vnode bypass device on fdops %p", fp->f_ops));
700 	fp->f_ops = &devfs_ops_f;
701 	fp->f_data = dev;
702 	return (error);
703 }
704 
705 static int
706 devfs_pathconf(struct vop_pathconf_args *ap)
707 {
708 
709 	switch (ap->a_name) {
710 	case _PC_MAC_PRESENT:
711 #ifdef MAC
712 		/*
713 		 * If MAC is enabled, devfs automatically supports
714 		 * trivial non-persistant label storage.
715 		 */
716 		*ap->a_retval = 1;
717 #else
718 		*ap->a_retval = 0;
719 #endif
720 		return (0);
721 	default:
722 		return (vop_stdpathconf(ap));
723 	}
724 	/* NOTREACHED */
725 }
726 
727 /* ARGSUSED */
728 static int
729 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
730 {
731 	struct cdev *dev;
732 	struct cdevsw *dsw;
733 	int error;
734 
735 	error = devfs_fp_check(fp, &dev, &dsw);
736 	if (error)
737 		return (error);
738 	error = dsw->d_poll(dev, events, td);
739 	dev_relthread(dev);
740 	return(error);
741 }
742 
743 /*
744  * Print out the contents of a special device vnode.
745  */
746 static int
747 devfs_print(struct vop_print_args *ap)
748 {
749 
750 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
751 	return (0);
752 }
753 
754 /* ARGSUSED */
755 static int
756 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
757 {
758 	struct cdev *dev;
759 	int ioflag, error, resid;
760 	struct cdevsw *dsw;
761 
762 	error = devfs_fp_check(fp, &dev, &dsw);
763 	if (error)
764 		return (error);
765 	resid = uio->uio_resid;
766 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
767 	if (ioflag & O_DIRECT)
768 		ioflag |= IO_DIRECT;
769 
770 	if ((flags & FOF_OFFSET) == 0)
771 		uio->uio_offset = fp->f_offset;
772 
773 	error = dsw->d_read(dev, uio, ioflag);
774 	dev_relthread(dev);
775 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
776 		vfs_timestamp(&dev->si_atime);
777 
778 	if ((flags & FOF_OFFSET) == 0)
779 		fp->f_offset = uio->uio_offset;
780 	fp->f_nextoff = uio->uio_offset;
781 	return (error);
782 }
783 
784 static int
785 devfs_readdir(struct vop_readdir_args *ap)
786 {
787 	int error;
788 	struct uio *uio;
789 	struct dirent *dp;
790 	struct devfs_dirent *dd;
791 	struct devfs_dirent *de;
792 	struct devfs_mount *dmp;
793 	off_t off, oldoff;
794 	int *tmp_ncookies = NULL;
795 
796 	if (ap->a_vp->v_type != VDIR)
797 		return (ENOTDIR);
798 
799 	uio = ap->a_uio;
800 	if (uio->uio_offset < 0)
801 		return (EINVAL);
802 
803 	/*
804 	 * XXX: This is a temporary hack to get around this filesystem not
805 	 * supporting cookies. We store the location of the ncookies pointer
806 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
807 	 * and set the number of cookies to 0. We then set the pointer to
808 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on
809 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
810 	 * pointer to its original location before returning to the caller.
811 	 */
812 	if (ap->a_ncookies != NULL) {
813 		tmp_ncookies = ap->a_ncookies;
814 		*ap->a_ncookies = 0;
815 		ap->a_ncookies = NULL;
816 	}
817 
818 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
819 	sx_xlock(&dmp->dm_lock);
820 	devfs_populate(dmp);
821 	error = 0;
822 	de = ap->a_vp->v_data;
823 	off = 0;
824 	oldoff = uio->uio_offset;
825 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
826 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
827 		if (dd->de_flags & DE_WHITEOUT)
828 			continue;
829 		if (dd->de_dirent->d_type == DT_DIR)
830 			de = dd->de_dir;
831 		else
832 			de = dd;
833 		dp = dd->de_dirent;
834 		if (dp->d_reclen > uio->uio_resid)
835 			break;
836 		dp->d_fileno = de->de_inode;
837 		if (off >= uio->uio_offset) {
838 			error = vfs_read_dirent(ap, dp, off);
839 			if (error)
840 				break;
841 		}
842 		off += dp->d_reclen;
843 	}
844 	sx_xunlock(&dmp->dm_lock);
845 	uio->uio_offset = off;
846 
847 	/*
848 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
849 	 * place.
850 	 */
851 	if (tmp_ncookies != NULL)
852 		ap->a_ncookies = tmp_ncookies;
853 
854 	return (error);
855 }
856 
857 static int
858 devfs_readlink(struct vop_readlink_args *ap)
859 {
860 	struct devfs_dirent *de;
861 
862 	de = ap->a_vp->v_data;
863 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
864 }
865 
866 static int
867 devfs_reclaim(struct vop_reclaim_args *ap)
868 {
869 	struct vnode *vp = ap->a_vp;
870 	struct devfs_dirent *de;
871 	struct cdev *dev;
872 
873 	de = vp->v_data;
874 	if (de != NULL)
875 		de->de_vnode = NULL;
876 	vp->v_data = NULL;
877 	vnode_destroy_vobject(vp);
878 
879 	dev = vp->v_rdev;
880 	vp->v_rdev = NULL;
881 
882 	if (dev == NULL)
883 		return (0);
884 
885 	dev_lock();
886 	dev->si_usecount -= vp->v_usecount;
887 	dev_unlock();
888 	dev_rel(dev);
889 	return (0);
890 }
891 
892 static int
893 devfs_remove(struct vop_remove_args *ap)
894 {
895 	struct vnode *vp = ap->a_vp;
896 	struct devfs_dirent *dd;
897 	struct devfs_dirent *de;
898 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
899 
900 	sx_xlock(&dmp->dm_lock);
901 	dd = ap->a_dvp->v_data;
902 	de = vp->v_data;
903 	if (de->de_cdp == NULL) {
904 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
905 		devfs_delete(dmp, de);
906 	} else {
907 		de->de_flags |= DE_WHITEOUT;
908 	}
909 	sx_xunlock(&dmp->dm_lock);
910 	return (0);
911 }
912 
913 /*
914  * Revoke is called on a tty when a terminal session ends.  The vnode
915  * is orphaned by setting v_op to deadfs so we need to let go of it
916  * as well so that we create a new one next time around.
917  *
918  * XXX: locking :-(
919  * XXX: We mess around with other mountpoints without holding their sxlock.
920  * XXX: We hold the devlock() when we zero their vnode pointer, but is that
921  * XXX: enough ?
922  */
923 static int
924 devfs_revoke(struct vop_revoke_args *ap)
925 {
926 	struct vnode *vp = ap->a_vp, *vp2;
927 	struct cdev *dev;
928 	struct cdev_priv *cdp;
929 	struct devfs_dirent *de;
930 	int i;
931 
932 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
933 
934 	dev = vp->v_rdev;
935 	cdp = dev->si_priv;
936 	for (;;) {
937 		dev_lock();
938 		vp2 = NULL;
939 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
940 			de = cdp->cdp_dirents[i];
941 			if (de == NULL)
942 				continue;
943 			vp2 = de->de_vnode;
944 			de->de_vnode = NULL;
945 			if (vp2 != NULL)
946 				break;
947 		}
948 		dev_unlock();
949 		if (vp2 != NULL) {
950 			/* XXX */
951 			vhold(vp2);
952 			vgone(vp2);
953 			vdrop(vp2);
954 			continue;
955 		}
956 		break;
957 	}
958 	return (0);
959 }
960 
961 static int
962 devfs_rioctl(struct vop_ioctl_args *ap)
963 {
964 	int error;
965 	struct devfs_mount *dmp;
966 
967 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
968 	sx_xlock(&dmp->dm_lock);
969 	devfs_populate(dmp);
970 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
971 	sx_xunlock(&dmp->dm_lock);
972 	return (error);
973 }
974 
975 static int
976 devfs_rread(struct vop_read_args *ap)
977 {
978 
979 	if (ap->a_vp->v_type != VDIR)
980 		return (EINVAL);
981 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
982 }
983 
984 static int
985 devfs_setattr(struct vop_setattr_args *ap)
986 {
987 	struct devfs_dirent *de;
988 	struct vattr *vap;
989 	struct vnode *vp;
990 	int c, error;
991 	uid_t uid;
992 	gid_t gid;
993 
994 	vap = ap->a_vap;
995 	vp = ap->a_vp;
996 	if ((vap->va_type != VNON) ||
997 	    (vap->va_nlink != VNOVAL) ||
998 	    (vap->va_fsid != VNOVAL) ||
999 	    (vap->va_fileid != VNOVAL) ||
1000 	    (vap->va_blocksize != VNOVAL) ||
1001 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1002 	    (vap->va_rdev != VNOVAL) ||
1003 	    ((int)vap->va_bytes != VNOVAL) ||
1004 	    (vap->va_gen != VNOVAL)) {
1005 		return (EINVAL);
1006 	}
1007 
1008 	de = vp->v_data;
1009 	if (vp->v_type == VDIR)
1010 		de = de->de_dir;
1011 
1012 	error = c = 0;
1013 	if (vap->va_uid == (uid_t)VNOVAL)
1014 		uid = de->de_uid;
1015 	else
1016 		uid = vap->va_uid;
1017 	if (vap->va_gid == (gid_t)VNOVAL)
1018 		gid = de->de_gid;
1019 	else
1020 		gid = vap->va_gid;
1021 	if (uid != de->de_uid || gid != de->de_gid) {
1022 		if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
1023 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) &&
1024 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0)
1025 			return (error);
1026 		de->de_uid = uid;
1027 		de->de_gid = gid;
1028 		c = 1;
1029 	}
1030 
1031 	if (vap->va_mode != (mode_t)VNOVAL) {
1032 		if ((ap->a_cred->cr_uid != de->de_uid) &&
1033 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)))
1034 			return (error);
1035 		de->de_mode = vap->va_mode;
1036 		c = 1;
1037 	}
1038 
1039 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1040 		/* See the comment in ufs_vnops::ufs_setattr(). */
1041 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) &&
1042 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1043 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td))))
1044 			return (error);
1045 		if (vap->va_atime.tv_sec != VNOVAL) {
1046 			if (vp->v_type == VCHR)
1047 				vp->v_rdev->si_atime = vap->va_atime;
1048 			else
1049 				de->de_atime = vap->va_atime;
1050 		}
1051 		if (vap->va_mtime.tv_sec != VNOVAL) {
1052 			if (vp->v_type == VCHR)
1053 				vp->v_rdev->si_mtime = vap->va_mtime;
1054 			else
1055 				de->de_mtime = vap->va_mtime;
1056 		}
1057 		c = 1;
1058 	}
1059 
1060 	if (c) {
1061 		if (vp->v_type == VCHR)
1062 			vfs_timestamp(&vp->v_rdev->si_ctime);
1063 		else
1064 			vfs_timestamp(&de->de_mtime);
1065 	}
1066 	return (0);
1067 }
1068 
1069 #ifdef MAC
1070 static int
1071 devfs_setlabel(struct vop_setlabel_args *ap)
1072 {
1073 	struct vnode *vp;
1074 	struct devfs_dirent *de;
1075 
1076 	vp = ap->a_vp;
1077 	de = vp->v_data;
1078 
1079 	mac_relabel_vnode(ap->a_cred, vp, ap->a_label);
1080 	mac_update_devfsdirent(vp->v_mount, de, vp);
1081 
1082 	return (0);
1083 }
1084 #endif
1085 
1086 static int
1087 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
1088 {
1089 
1090 	return (vnops.fo_stat(fp, sb, cred, td));
1091 }
1092 
1093 static int
1094 devfs_symlink(struct vop_symlink_args *ap)
1095 {
1096 	int i, error;
1097 	struct devfs_dirent *dd;
1098 	struct devfs_dirent *de;
1099 	struct devfs_mount *dmp;
1100 	struct thread *td;
1101 
1102 	td = ap->a_cnp->cn_thread;
1103 	KASSERT(td == curthread, ("devfs_symlink: td != curthread"));
1104 	error = suser(td);
1105 	if (error)
1106 		return(error);
1107 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1108 	dd = ap->a_dvp->v_data;
1109 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
1110 	de->de_uid = 0;
1111 	de->de_gid = 0;
1112 	de->de_mode = 0755;
1113 	de->de_inode = alloc_unr(devfs_inos);
1114 	de->de_dirent->d_type = DT_LNK;
1115 	i = strlen(ap->a_target) + 1;
1116 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
1117 	bcopy(ap->a_target, de->de_symlink, i);
1118 	sx_xlock(&dmp->dm_lock);
1119 #ifdef MAC
1120 	mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
1121 #endif
1122 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
1123 	devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td);
1124 	sx_xunlock(&dmp->dm_lock);
1125 	return (0);
1126 }
1127 
1128 /* ARGSUSED */
1129 static int
1130 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
1131 {
1132 	struct cdev *dev;
1133 	int error, ioflag, resid;
1134 	struct cdevsw *dsw;
1135 
1136 	error = devfs_fp_check(fp, &dev, &dsw);
1137 	if (error)
1138 		return (error);
1139 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
1140 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
1141 	if (ioflag & O_DIRECT)
1142 		ioflag |= IO_DIRECT;
1143 	if ((flags & FOF_OFFSET) == 0)
1144 		uio->uio_offset = fp->f_offset;
1145 
1146 	resid = uio->uio_resid;
1147 
1148 	error = dsw->d_write(dev, uio, ioflag);
1149 	dev_relthread(dev);
1150 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
1151 		vfs_timestamp(&dev->si_ctime);
1152 		dev->si_mtime = dev->si_ctime;
1153 	}
1154 
1155 	if ((flags & FOF_OFFSET) == 0)
1156 		fp->f_offset = uio->uio_offset;
1157 	fp->f_nextoff = uio->uio_offset;
1158 	return (error);
1159 }
1160 
1161 dev_t
1162 dev2udev(struct cdev *x)
1163 {
1164 	if (x == NULL)
1165 		return (NODEV);
1166 	return (x->si_priv->cdp_inode);
1167 }
1168 
1169 static struct fileops devfs_ops_f = {
1170 	.fo_read =	devfs_read_f,
1171 	.fo_write =	devfs_write_f,
1172 	.fo_ioctl =	devfs_ioctl_f,
1173 	.fo_poll =	devfs_poll_f,
1174 	.fo_kqfilter =	devfs_kqfilter_f,
1175 	.fo_stat =	devfs_stat_f,
1176 	.fo_close =	devfs_close_f,
1177 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
1178 };
1179 
1180 static struct vop_vector devfs_vnodeops = {
1181 	.vop_default =		&default_vnodeops,
1182 
1183 	.vop_access =		devfs_access,
1184 	.vop_getattr =		devfs_getattr,
1185 	.vop_ioctl =		devfs_rioctl,
1186 	.vop_lookup =		devfs_lookup,
1187 	.vop_mknod =		devfs_mknod,
1188 	.vop_pathconf =		devfs_pathconf,
1189 	.vop_read =		devfs_rread,
1190 	.vop_readdir =		devfs_readdir,
1191 	.vop_readlink =		devfs_readlink,
1192 	.vop_reclaim =		devfs_reclaim,
1193 	.vop_remove =		devfs_remove,
1194 	.vop_revoke =		devfs_revoke,
1195 	.vop_setattr =		devfs_setattr,
1196 #ifdef MAC
1197 	.vop_setlabel =		devfs_setlabel,
1198 #endif
1199 	.vop_symlink =		devfs_symlink,
1200 };
1201 
1202 static struct vop_vector devfs_specops = {
1203 	.vop_default =		&default_vnodeops,
1204 
1205 	.vop_access =		devfs_access,
1206 	.vop_advlock =		devfs_advlock,
1207 	.vop_bmap =		VOP_PANIC,
1208 	.vop_close =		devfs_close,
1209 	.vop_create =		VOP_PANIC,
1210 	.vop_fsync =		devfs_fsync,
1211 	.vop_getattr =		devfs_getattr,
1212 	.vop_lease =		VOP_NULL,
1213 	.vop_link =		VOP_PANIC,
1214 	.vop_mkdir =		VOP_PANIC,
1215 	.vop_mknod =		VOP_PANIC,
1216 	.vop_open =		devfs_open,
1217 	.vop_pathconf =		devfs_pathconf,
1218 	.vop_print =		devfs_print,
1219 	.vop_read =		VOP_PANIC,
1220 	.vop_readdir =		VOP_PANIC,
1221 	.vop_readlink =		VOP_PANIC,
1222 	.vop_reallocblks =	VOP_PANIC,
1223 	.vop_reclaim =		devfs_reclaim,
1224 	.vop_remove =		devfs_remove,
1225 	.vop_rename =		VOP_PANIC,
1226 	.vop_revoke =		devfs_revoke,
1227 	.vop_rmdir =		VOP_PANIC,
1228 	.vop_setattr =		devfs_setattr,
1229 #ifdef MAC
1230 	.vop_setlabel =		devfs_setlabel,
1231 #endif
1232 	.vop_strategy =		VOP_PANIC,
1233 	.vop_symlink =		VOP_PANIC,
1234 	.vop_write =		VOP_PANIC,
1235 };
1236 
1237 /*
1238  * Our calling convention to the device drivers used to be that we passed
1239  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
1240  * flags instead since that's what open(), close() and ioctl() takes and
1241  * we don't really want vnode.h in device drivers.
1242  * We solved the source compatibility by redefining some vnode flags to
1243  * be the same as the fcntl ones and by sending down the bitwise OR of
1244  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
1245  * pulls the rug out under this.
1246  */
1247 CTASSERT(O_NONBLOCK == IO_NDELAY);
1248 CTASSERT(O_FSYNC == IO_SYNC);
1249