xref: /freebsd/sys/fs/devfs/devfs_vnops.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 2000-2004
3  *	Poul-Henning Kamp.  All rights reserved.
4  * Copyright (c) 1989, 1992-1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
32  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
33  *
34  * $FreeBSD$
35  */
36 
37 /*
38  * TODO:
39  *	remove empty directories
40  *	mkdir: want it ?
41  */
42 
43 #include "opt_devfs.h"
44 #include "opt_mac.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/conf.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/filio.h>
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/mac.h>
57 #include <sys/malloc.h>
58 #include <sys/mount.h>
59 #include <sys/namei.h>
60 #include <sys/proc.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/time.h>
64 #include <sys/ttycom.h>
65 #include <sys/unistd.h>
66 #include <sys/vnode.h>
67 
68 static struct vop_vector devfs_vnodeops;
69 static struct vop_vector devfs_specops;
70 static struct fileops devfs_ops_f;
71 
72 #include <fs/devfs/devfs.h>
73 #include <fs/devfs/devfs_int.h>
74 
75 static struct mtx	devfs_de_interlock;
76 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
77 
78 static int
79 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
80 {
81 
82 	*devp = fp->f_vnode->v_rdev;
83 	if (*devp != fp->f_data)
84 		return (ENXIO);
85 	KASSERT((*devp)->si_refcount > 0,
86 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
87 	*dswp = dev_refthread(*devp);
88 	if (*dswp == NULL)
89 		return (ENXIO);
90 	return (0);
91 }
92 
93 /*
94  * Construct the fully qualified path name relative to the mountpoint
95  */
96 static char *
97 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp)
98 {
99 	int i;
100 	struct devfs_dirent *de, *dd;
101 	struct devfs_mount *dmp;
102 
103 	dmp = VFSTODEVFS(dvp->v_mount);
104 	dd = dvp->v_data;
105 	i = SPECNAMELEN;
106 	buf[i] = '\0';
107 	i -= cnp->cn_namelen;
108 	if (i < 0)
109 		 return (NULL);
110 	bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
111 	de = dd;
112 	while (de != dmp->dm_rootdir) {
113 		i--;
114 		if (i < 0)
115 			 return (NULL);
116 		buf[i] = '/';
117 		i -= de->de_dirent->d_namlen;
118 		if (i < 0)
119 			 return (NULL);
120 		bcopy(de->de_dirent->d_name, buf + i,
121 		    de->de_dirent->d_namlen);
122 		de = TAILQ_FIRST(&de->de_dlist);	/* "." */
123 		de = TAILQ_NEXT(de, de_list);		/* ".." */
124 		de = de->de_dir;
125 	}
126 	return (buf + i);
127 }
128 
129 int
130 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td)
131 {
132 	int error;
133 	struct vnode *vp;
134 	struct cdev *dev;
135 
136 	KASSERT(td == curthread, ("devfs_allocv: td != curthread"));
137 loop:
138 
139 	mtx_lock(&devfs_de_interlock);
140 	vp = de->de_vnode;
141 	if (vp != NULL) {
142 		VI_LOCK(vp);
143 		mtx_unlock(&devfs_de_interlock);
144 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td))
145 			goto loop;
146 		*vpp = vp;
147 		return (0);
148 	}
149 	mtx_unlock(&devfs_de_interlock);
150 	if (de->de_dirent->d_type == DT_CHR) {
151 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE))
152 			return (ENOENT);
153 		dev = &de->de_cdp->cdp_c;
154 	} else {
155 		dev = NULL;
156 	}
157 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
158 	if (error != 0) {
159 		printf("devfs_allocv: failed to allocate new vnode\n");
160 		return (error);
161 	}
162 
163 	if (de->de_dirent->d_type == DT_CHR) {
164 		vp->v_type = VCHR;
165 		VI_LOCK(vp);
166 		dev_lock();
167 		dev_refl(dev);
168 		vp->v_rdev = dev;
169 		KASSERT(vp->v_usecount == 1,
170 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
171 		dev->si_usecount += vp->v_usecount;
172 		dev_unlock();
173 		VI_UNLOCK(vp);
174 		vp->v_op = &devfs_specops;
175 	} else if (de->de_dirent->d_type == DT_DIR) {
176 		vp->v_type = VDIR;
177 	} else if (de->de_dirent->d_type == DT_LNK) {
178 		vp->v_type = VLNK;
179 	} else {
180 		vp->v_type = VBAD;
181 	}
182 	mtx_lock(&devfs_de_interlock);
183 	vp->v_data = de;
184 	de->de_vnode = vp;
185 	mtx_unlock(&devfs_de_interlock);
186 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
187 #ifdef MAC
188 	mac_associate_vnode_devfs(mp, de, vp);
189 #endif
190 	*vpp = vp;
191 	return (0);
192 }
193 
194 static int
195 devfs_access(struct vop_access_args *ap)
196 {
197 	struct vnode *vp = ap->a_vp;
198 	struct devfs_dirent *de;
199 	int error;
200 
201 	de = vp->v_data;
202 	if (vp->v_type == VDIR)
203 		de = de->de_dir;
204 
205 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
206 	    ap->a_mode, ap->a_cred, NULL);
207 	if (!error)
208 		return (error);
209 	if (error != EACCES)
210 		return (error);
211 	/* We do, however, allow access to the controlling terminal */
212 	if (!(ap->a_td->td_proc->p_flag & P_CONTROLT))
213 		return (error);
214 	if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode)
215 		return (0);
216 	return (error);
217 }
218 
219 /* ARGSUSED */
220 static int
221 devfs_advlock(struct vop_advlock_args *ap)
222 {
223 
224 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
225 }
226 
227 /* ARGSUSED */
228 static int
229 devfs_close(struct vop_close_args *ap)
230 {
231 	struct vnode *vp = ap->a_vp, *oldvp;
232 	struct thread *td = ap->a_td;
233 	struct cdev *dev = vp->v_rdev;
234 	struct cdevsw *dsw;
235 	int error;
236 
237 	/*
238 	 * Hack: a tty device that is a controlling terminal
239 	 * has a reference from the session structure.
240 	 * We cannot easily tell that a character device is
241 	 * a controlling terminal, unless it is the closing
242 	 * process' controlling terminal.  In that case,
243 	 * if the reference count is 2 (this last descriptor
244 	 * plus the session), release the reference from the session.
245 	 */
246 	oldvp = NULL;
247 	sx_xlock(&proctree_lock);
248 	if (td && vp == td->td_proc->p_session->s_ttyvp) {
249 		SESS_LOCK(td->td_proc->p_session);
250 		VI_LOCK(vp);
251 		if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) {
252 			td->td_proc->p_session->s_ttyvp = NULL;
253 			oldvp = vp;
254 		}
255 		VI_UNLOCK(vp);
256 		SESS_UNLOCK(td->td_proc->p_session);
257 	}
258 	sx_xunlock(&proctree_lock);
259 	if (oldvp != NULL)
260 		vrele(oldvp);
261 	/*
262 	 * We do not want to really close the device if it
263 	 * is still in use unless we are trying to close it
264 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
265 	 * holds a reference to the vnode, and because we mark
266 	 * any other vnodes that alias this device, when the
267 	 * sum of the reference counts on all the aliased
268 	 * vnodes descends to one, we are on last close.
269 	 */
270 	dsw = dev_refthread(dev);
271 	if (dsw == NULL)
272 		return (ENXIO);
273 	VI_LOCK(vp);
274 	if (vp->v_iflag & VI_DOOMED) {
275 		/* Forced close. */
276 	} else if (dsw->d_flags & D_TRACKCLOSE) {
277 		/* Keep device updated on status. */
278 	} else if (count_dev(dev) > 1) {
279 		VI_UNLOCK(vp);
280 		dev_relthread(dev);
281 		return (0);
282 	}
283 	VI_UNLOCK(vp);
284 	KASSERT(dev->si_refcount > 0,
285 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
286 	if (!(dsw->d_flags & D_NEEDGIANT)) {
287 		DROP_GIANT();
288 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
289 		PICKUP_GIANT();
290 	} else {
291 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
292 	}
293 	dev_relthread(dev);
294 	return (error);
295 }
296 
297 static int
298 devfs_close_f(struct file *fp, struct thread *td)
299 {
300 
301 	return (vnops.fo_close(fp, td));
302 }
303 
304 /* ARGSUSED */
305 static int
306 devfs_fsync(struct vop_fsync_args *ap)
307 {
308 	if (!vn_isdisk(ap->a_vp, NULL))
309 		return (0);
310 
311 	return (vop_stdfsync(ap));
312 }
313 
314 static int
315 devfs_getattr(struct vop_getattr_args *ap)
316 {
317 	struct vnode *vp = ap->a_vp;
318 	struct vattr *vap = ap->a_vap;
319 	int error = 0;
320 	struct devfs_dirent *de;
321 	struct cdev *dev;
322 
323 	de = vp->v_data;
324 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
325 	if (vp->v_type == VDIR) {
326 		de = de->de_dir;
327 		KASSERT(de != NULL,
328 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
329 	}
330 	bzero((caddr_t) vap, sizeof(*vap));
331 	vattr_null(vap);
332 	vap->va_uid = de->de_uid;
333 	vap->va_gid = de->de_gid;
334 	vap->va_mode = de->de_mode;
335 	if (vp->v_type == VLNK)
336 		vap->va_size = strlen(de->de_symlink);
337 	else if (vp->v_type == VDIR)
338 		vap->va_size = vap->va_bytes = DEV_BSIZE;
339 	else
340 		vap->va_size = 0;
341 	if (vp->v_type != VDIR)
342 		vap->va_bytes = 0;
343 	vap->va_blocksize = DEV_BSIZE;
344 	vap->va_type = vp->v_type;
345 
346 #define fix(aa)							\
347 	do {							\
348 		if ((aa).tv_sec == 0) {				\
349 			(aa).tv_sec = boottime.tv_sec;		\
350 			(aa).tv_nsec = boottime.tv_usec * 1000; \
351 		}						\
352 	} while (0)
353 
354 	if (vp->v_type != VCHR)  {
355 		fix(de->de_atime);
356 		vap->va_atime = de->de_atime;
357 		fix(de->de_mtime);
358 		vap->va_mtime = de->de_mtime;
359 		fix(de->de_ctime);
360 		vap->va_ctime = de->de_ctime;
361 	} else {
362 		dev = vp->v_rdev;
363 		fix(dev->si_atime);
364 		vap->va_atime = dev->si_atime;
365 		fix(dev->si_mtime);
366 		vap->va_mtime = dev->si_mtime;
367 		fix(dev->si_ctime);
368 		vap->va_ctime = dev->si_ctime;
369 
370 		vap->va_rdev = dev->si_priv->cdp_inode;
371 	}
372 	vap->va_gen = 0;
373 	vap->va_flags = 0;
374 	vap->va_nlink = de->de_links;
375 	vap->va_fileid = de->de_inode;
376 
377 	return (error);
378 }
379 
380 /* ARGSUSED */
381 static int
382 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
383 {
384 	struct cdev *dev;
385 	struct cdevsw *dsw;
386 	struct vnode *vp;
387 	struct vnode *vpold;
388 	int error, i;
389 	const char *p;
390 	struct fiodgname_arg *fgn;
391 
392 	error = devfs_fp_check(fp, &dev, &dsw);
393 	if (error)
394 		return (error);
395 
396 	if (com == FIODTYPE) {
397 		*(int *)data = dsw->d_flags & D_TYPEMASK;
398 		dev_relthread(dev);
399 		return (0);
400 	} else if (com == FIODGNAME) {
401 		fgn = data;
402 		p = devtoname(dev);
403 		i = strlen(p) + 1;
404 		if (i > fgn->len)
405 			error = EINVAL;
406 		else
407 			error = copyout(p, fgn->buf, i);
408 		dev_relthread(dev);
409 		return (error);
410 	}
411 	error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
412 	dev_relthread(dev);
413 	if (error == ENOIOCTL)
414 		error = ENOTTY;
415 	if (error == 0 && com == TIOCSCTTY) {
416 		vp = fp->f_vnode;
417 
418 		/* Do nothing if reassigning same control tty */
419 		sx_slock(&proctree_lock);
420 		if (td->td_proc->p_session->s_ttyvp == vp) {
421 			sx_sunlock(&proctree_lock);
422 			return (0);
423 		}
424 
425 		mtx_lock(&Giant);
426 
427 		vpold = td->td_proc->p_session->s_ttyvp;
428 		VREF(vp);
429 		SESS_LOCK(td->td_proc->p_session);
430 		td->td_proc->p_session->s_ttyvp = vp;
431 		SESS_UNLOCK(td->td_proc->p_session);
432 
433 		sx_sunlock(&proctree_lock);
434 
435 		/* Get rid of reference to old control tty */
436 		if (vpold)
437 			vrele(vpold);
438 		mtx_unlock(&Giant);
439 	}
440 	return (error);
441 }
442 
443 /* ARGSUSED */
444 static int
445 devfs_kqfilter_f(struct file *fp, struct knote *kn)
446 {
447 	struct cdev *dev;
448 	struct cdevsw *dsw;
449 	int error;
450 
451 	error = devfs_fp_check(fp, &dev, &dsw);
452 	if (error)
453 		return (error);
454 	error = dsw->d_kqfilter(dev, kn);
455 	dev_relthread(dev);
456 	return (error);
457 }
458 
459 static int
460 devfs_lookupx(struct vop_lookup_args *ap)
461 {
462 	struct componentname *cnp;
463 	struct vnode *dvp, **vpp;
464 	struct thread *td;
465 	struct devfs_dirent *de, *dd;
466 	struct devfs_dirent **dde;
467 	struct devfs_mount *dmp;
468 	struct cdev *cdev;
469 	int error, flags, nameiop;
470 	char specname[SPECNAMELEN + 1], *pname;
471 
472 	cnp = ap->a_cnp;
473 	vpp = ap->a_vpp;
474 	dvp = ap->a_dvp;
475 	pname = cnp->cn_nameptr;
476 	td = cnp->cn_thread;
477 	flags = cnp->cn_flags;
478 	nameiop = cnp->cn_nameiop;
479 	dmp = VFSTODEVFS(dvp->v_mount);
480 	dd = dvp->v_data;
481 	*vpp = NULLVP;
482 
483 	if ((flags & ISLASTCN) && nameiop == RENAME)
484 		return (EOPNOTSUPP);
485 
486 	if (dvp->v_type != VDIR)
487 		return (ENOTDIR);
488 
489 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
490 		return (EIO);
491 
492 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
493 	if (error)
494 		return (error);
495 
496 	if (cnp->cn_namelen == 1 && *pname == '.') {
497 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
498 			return (EINVAL);
499 		*vpp = dvp;
500 		VREF(dvp);
501 		return (0);
502 	}
503 
504 	if (flags & ISDOTDOT) {
505 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
506 			return (EINVAL);
507 		VOP_UNLOCK(dvp, 0, td);
508 		de = TAILQ_FIRST(&dd->de_dlist);	/* "." */
509 		de = TAILQ_NEXT(de, de_list);		/* ".." */
510 		de = de->de_dir;
511 		error = devfs_allocv(de, dvp->v_mount, vpp, td);
512 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
513 		return (error);
514 	}
515 
516 	devfs_populate(dmp);
517 	dd = dvp->v_data;
518 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen);
519 	while (de == NULL) {	/* While(...) so we can use break */
520 
521 		if (nameiop == DELETE)
522 			return (ENOENT);
523 
524 		/*
525 		 * OK, we didn't have an entry for the name we were asked for
526 		 * so we try to see if anybody can create it on demand.
527 		 */
528 		pname = devfs_fqpn(specname, dvp, cnp);
529 		if (pname == NULL)
530 			break;
531 
532 		cdev = NULL;
533 		EVENTHANDLER_INVOKE(dev_clone,
534 		    td->td_ucred, pname, strlen(pname), &cdev);
535 		if (cdev == NULL)
536 			break;
537 
538 		devfs_populate(dmp);
539 
540 		dev_lock();
541 		dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx];
542 		if (dde != NULL && *dde != NULL)
543 			de = *dde;
544 		dev_unlock();
545 		dev_rel(cdev);
546 		break;
547 	}
548 
549 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
550 		if ((nameiop == CREATE || nameiop == RENAME) &&
551 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
552 			cnp->cn_flags |= SAVENAME;
553 			return (EJUSTRETURN);
554 		}
555 		return (ENOENT);
556 	}
557 
558 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
559 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
560 		if (error)
561 			return (error);
562 		if (*vpp == dvp) {
563 			VREF(dvp);
564 			*vpp = dvp;
565 			return (0);
566 		}
567 	}
568 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
569 	return (error);
570 }
571 
572 static int
573 devfs_lookup(struct vop_lookup_args *ap)
574 {
575 	int j;
576 	struct devfs_mount *dmp;
577 
578 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
579 	sx_xlock(&dmp->dm_lock);
580 	j = devfs_lookupx(ap);
581 	sx_xunlock(&dmp->dm_lock);
582 	return (j);
583 }
584 
585 static int
586 devfs_mknod(struct vop_mknod_args *ap)
587 {
588 	struct componentname *cnp;
589 	struct vnode *dvp, **vpp;
590 	struct thread *td;
591 	struct devfs_dirent *dd, *de;
592 	struct devfs_mount *dmp;
593 	int error;
594 
595 	/*
596 	 * The only type of node we should be creating here is a
597 	 * character device, for anything else return EOPNOTSUPP.
598 	 */
599 	if (ap->a_vap->va_type != VCHR)
600 		return (EOPNOTSUPP);
601 	dvp = ap->a_dvp;
602 	dmp = VFSTODEVFS(dvp->v_mount);
603 	sx_xlock(&dmp->dm_lock);
604 
605 	cnp = ap->a_cnp;
606 	vpp = ap->a_vpp;
607 	td = cnp->cn_thread;
608 	dd = dvp->v_data;
609 
610 	error = ENOENT;
611 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
612 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
613 			continue;
614 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
615 		    de->de_dirent->d_namlen) != 0)
616 			continue;
617 		if (de->de_flags & DE_WHITEOUT)
618 			break;
619 		goto notfound;
620 	}
621 	if (de == NULL)
622 		goto notfound;
623 	de->de_flags &= ~DE_WHITEOUT;
624 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
625 notfound:
626 	sx_xunlock(&dmp->dm_lock);
627 	return (error);
628 }
629 
630 /* ARGSUSED */
631 static int
632 devfs_open(struct vop_open_args *ap)
633 {
634 	struct thread *td = ap->a_td;
635 	struct vnode *vp = ap->a_vp;
636 	struct cdev *dev = vp->v_rdev;
637 	struct file *fp;
638 	int error;
639 	struct cdevsw *dsw;
640 
641 	if (vp->v_type == VBLK)
642 		return (ENXIO);
643 
644 	if (dev == NULL)
645 		return (ENXIO);
646 
647 	/* Make this field valid before any I/O in d_open. */
648 	if (dev->si_iosize_max == 0)
649 		dev->si_iosize_max = DFLTPHYS;
650 
651 	if (vn_isdisk(vp, NULL) &&
652 	    ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
653 		/*
654 		* When running in very secure mode, do not allow
655 		* opens for writing of any disks.
656 		* XXX: should be in geom_dev.c, but we lack the cred there.
657 		*/
658 		error = securelevel_ge(td->td_ucred, 2);
659 		if (error)
660 			return (error);
661 	}
662 
663 	dsw = dev_refthread(dev);
664 	if (dsw == NULL)
665 		return (ENXIO);
666 
667 	/* XXX: Special casing of ttys for deadfs.  Probably redundant. */
668 	if (dsw->d_flags & D_TTY)
669 		vp->v_vflag |= VV_ISTTY;
670 
671 	VOP_UNLOCK(vp, 0, td);
672 
673 	if(!(dsw->d_flags & D_NEEDGIANT)) {
674 		DROP_GIANT();
675 		if (dsw->d_fdopen != NULL)
676 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
677 		else
678 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
679 		PICKUP_GIANT();
680 	} else {
681 		if (dsw->d_fdopen != NULL)
682 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
683 		else
684 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
685 	}
686 
687 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
688 
689 	dev_relthread(dev);
690 
691 	if (error)
692 		return (error);
693 
694 #if 0	/* /dev/console */
695 	KASSERT(ap->a_fdidx >= 0,
696 	     ("Could not vnode bypass device on fd %d", ap->a_fdidx));
697 #else
698 	if(ap->a_fdidx < 0)
699 		return (error);
700 #endif
701 	/*
702 	 * This is a pretty disgustingly long chain, but I am not
703 	 * sure there is any better way.  Passing the fdidx into
704 	 * VOP_OPEN() offers us more information than just passing
705 	 * the file *.
706 	 */
707 	fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
708 	KASSERT(fp->f_ops == &badfileops,
709 	     ("Could not vnode bypass device on fdops %p", fp->f_ops));
710 	fp->f_ops = &devfs_ops_f;
711 	fp->f_data = dev;
712 	return (error);
713 }
714 
715 static int
716 devfs_pathconf(struct vop_pathconf_args *ap)
717 {
718 
719 	switch (ap->a_name) {
720 	case _PC_MAC_PRESENT:
721 #ifdef MAC
722 		/*
723 		 * If MAC is enabled, devfs automatically supports
724 		 * trivial non-persistant label storage.
725 		 */
726 		*ap->a_retval = 1;
727 #else
728 		*ap->a_retval = 0;
729 #endif
730 		return (0);
731 	default:
732 		return (vop_stdpathconf(ap));
733 	}
734 	/* NOTREACHED */
735 }
736 
737 /* ARGSUSED */
738 static int
739 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
740 {
741 	struct cdev *dev;
742 	struct cdevsw *dsw;
743 	int error;
744 
745 	error = devfs_fp_check(fp, &dev, &dsw);
746 	if (error)
747 		return (error);
748 	error = dsw->d_poll(dev, events, td);
749 	dev_relthread(dev);
750 	return(error);
751 }
752 
753 /*
754  * Print out the contents of a special device vnode.
755  */
756 static int
757 devfs_print(struct vop_print_args *ap)
758 {
759 
760 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
761 	return (0);
762 }
763 
764 /* ARGSUSED */
765 static int
766 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
767 {
768 	struct cdev *dev;
769 	int ioflag, error, resid;
770 	struct cdevsw *dsw;
771 
772 	error = devfs_fp_check(fp, &dev, &dsw);
773 	if (error)
774 		return (error);
775 	resid = uio->uio_resid;
776 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
777 	if (ioflag & O_DIRECT)
778 		ioflag |= IO_DIRECT;
779 
780 	if ((flags & FOF_OFFSET) == 0)
781 		uio->uio_offset = fp->f_offset;
782 
783 	error = dsw->d_read(dev, uio, ioflag);
784 	dev_relthread(dev);
785 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
786 		vfs_timestamp(&dev->si_atime);
787 
788 	if ((flags & FOF_OFFSET) == 0)
789 		fp->f_offset = uio->uio_offset;
790 	fp->f_nextoff = uio->uio_offset;
791 	return (error);
792 }
793 
794 static int
795 devfs_readdir(struct vop_readdir_args *ap)
796 {
797 	int error;
798 	struct uio *uio;
799 	struct dirent *dp;
800 	struct devfs_dirent *dd;
801 	struct devfs_dirent *de;
802 	struct devfs_mount *dmp;
803 	off_t off, oldoff;
804 	int *tmp_ncookies = NULL;
805 
806 	if (ap->a_vp->v_type != VDIR)
807 		return (ENOTDIR);
808 
809 	uio = ap->a_uio;
810 	if (uio->uio_offset < 0)
811 		return (EINVAL);
812 
813 	/*
814 	 * XXX: This is a temporary hack to get around this filesystem not
815 	 * supporting cookies. We store the location of the ncookies pointer
816 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
817 	 * and set the number of cookies to 0. We then set the pointer to
818 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on
819 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
820 	 * pointer to its original location before returning to the caller.
821 	 */
822 	if (ap->a_ncookies != NULL) {
823 		tmp_ncookies = ap->a_ncookies;
824 		*ap->a_ncookies = 0;
825 		ap->a_ncookies = NULL;
826 	}
827 
828 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
829 	sx_xlock(&dmp->dm_lock);
830 	devfs_populate(dmp);
831 	error = 0;
832 	de = ap->a_vp->v_data;
833 	off = 0;
834 	oldoff = uio->uio_offset;
835 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
836 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
837 		if (dd->de_flags & DE_WHITEOUT)
838 			continue;
839 		if (dd->de_dirent->d_type == DT_DIR)
840 			de = dd->de_dir;
841 		else
842 			de = dd;
843 		dp = dd->de_dirent;
844 		if (dp->d_reclen > uio->uio_resid)
845 			break;
846 		dp->d_fileno = de->de_inode;
847 		if (off >= uio->uio_offset) {
848 			error = vfs_read_dirent(ap, dp, off);
849 			if (error)
850 				break;
851 		}
852 		off += dp->d_reclen;
853 	}
854 	sx_xunlock(&dmp->dm_lock);
855 	uio->uio_offset = off;
856 
857 	/*
858 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
859 	 * place.
860 	 */
861 	if (tmp_ncookies != NULL)
862 		ap->a_ncookies = tmp_ncookies;
863 
864 	return (error);
865 }
866 
867 static int
868 devfs_readlink(struct vop_readlink_args *ap)
869 {
870 	struct devfs_dirent *de;
871 
872 	de = ap->a_vp->v_data;
873 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
874 }
875 
876 static int
877 devfs_reclaim(struct vop_reclaim_args *ap)
878 {
879 	struct vnode *vp = ap->a_vp;
880 	struct devfs_dirent *de;
881 	struct cdev *dev;
882 
883 	mtx_lock(&devfs_de_interlock);
884 	de = vp->v_data;
885 	if (de != NULL) {
886 		de->de_vnode = NULL;
887 		vp->v_data = NULL;
888 	}
889 	mtx_unlock(&devfs_de_interlock);
890 
891 	vnode_destroy_vobject(vp);
892 
893 	dev = vp->v_rdev;
894 	vp->v_rdev = NULL;
895 
896 	if (dev == NULL)
897 		return (0);
898 
899 	dev_lock();
900 	dev->si_usecount -= vp->v_usecount;
901 	dev_unlock();
902 	dev_rel(dev);
903 	return (0);
904 }
905 
906 static int
907 devfs_remove(struct vop_remove_args *ap)
908 {
909 	struct vnode *vp = ap->a_vp;
910 	struct devfs_dirent *dd;
911 	struct devfs_dirent *de;
912 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
913 
914 	sx_xlock(&dmp->dm_lock);
915 	dd = ap->a_dvp->v_data;
916 	de = vp->v_data;
917 	if (de->de_cdp == NULL) {
918 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
919 		devfs_delete(dmp, de);
920 	} else {
921 		de->de_flags |= DE_WHITEOUT;
922 	}
923 	sx_xunlock(&dmp->dm_lock);
924 	return (0);
925 }
926 
927 /*
928  * Revoke is called on a tty when a terminal session ends.  The vnode
929  * is orphaned by setting v_op to deadfs so we need to let go of it
930  * as well so that we create a new one next time around.
931  *
932  */
933 static int
934 devfs_revoke(struct vop_revoke_args *ap)
935 {
936 	struct vnode *vp = ap->a_vp, *vp2;
937 	struct cdev *dev;
938 	struct cdev_priv *cdp;
939 	struct devfs_dirent *de;
940 	int i;
941 
942 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
943 
944 	dev = vp->v_rdev;
945 	cdp = dev->si_priv;
946 	for (;;) {
947 		mtx_lock(&devfs_de_interlock);
948 		dev_lock();
949 		vp2 = NULL;
950 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
951 			de = cdp->cdp_dirents[i];
952 			if (de == NULL)
953 				continue;
954 
955       			vp2 = de->de_vnode;
956 			if (vp2 != NULL) {
957 				de->de_vnode = NULL;
958 				dev_unlock();
959 				VI_LOCK(vp2);
960 				mtx_unlock(&devfs_de_interlock);
961 				vholdl(vp2);
962 				VI_UNLOCK(vp2);
963 				vgone(vp2);
964 				vdrop(vp2);
965 				break;
966 			}
967 		}
968 		if (vp2 != NULL) {
969 			continue;
970 		}
971 		dev_unlock();
972 		mtx_unlock(&devfs_de_interlock);
973 		break;
974 	}
975 	return (0);
976 }
977 
978 static int
979 devfs_rioctl(struct vop_ioctl_args *ap)
980 {
981 	int error;
982 	struct devfs_mount *dmp;
983 
984 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
985 	sx_xlock(&dmp->dm_lock);
986 	devfs_populate(dmp);
987 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
988 	sx_xunlock(&dmp->dm_lock);
989 	return (error);
990 }
991 
992 static int
993 devfs_rread(struct vop_read_args *ap)
994 {
995 
996 	if (ap->a_vp->v_type != VDIR)
997 		return (EINVAL);
998 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
999 }
1000 
1001 static int
1002 devfs_setattr(struct vop_setattr_args *ap)
1003 {
1004 	struct devfs_dirent *de;
1005 	struct vattr *vap;
1006 	struct vnode *vp;
1007 	int c, error;
1008 	uid_t uid;
1009 	gid_t gid;
1010 
1011 	vap = ap->a_vap;
1012 	vp = ap->a_vp;
1013 	if ((vap->va_type != VNON) ||
1014 	    (vap->va_nlink != VNOVAL) ||
1015 	    (vap->va_fsid != VNOVAL) ||
1016 	    (vap->va_fileid != VNOVAL) ||
1017 	    (vap->va_blocksize != VNOVAL) ||
1018 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1019 	    (vap->va_rdev != VNOVAL) ||
1020 	    ((int)vap->va_bytes != VNOVAL) ||
1021 	    (vap->va_gen != VNOVAL)) {
1022 		return (EINVAL);
1023 	}
1024 
1025 	de = vp->v_data;
1026 	if (vp->v_type == VDIR)
1027 		de = de->de_dir;
1028 
1029 	error = c = 0;
1030 	if (vap->va_uid == (uid_t)VNOVAL)
1031 		uid = de->de_uid;
1032 	else
1033 		uid = vap->va_uid;
1034 	if (vap->va_gid == (gid_t)VNOVAL)
1035 		gid = de->de_gid;
1036 	else
1037 		gid = vap->va_gid;
1038 	if (uid != de->de_uid || gid != de->de_gid) {
1039 		if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
1040 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) &&
1041 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0)
1042 			return (error);
1043 		de->de_uid = uid;
1044 		de->de_gid = gid;
1045 		c = 1;
1046 	}
1047 
1048 	if (vap->va_mode != (mode_t)VNOVAL) {
1049 		if ((ap->a_cred->cr_uid != de->de_uid) &&
1050 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)))
1051 			return (error);
1052 		de->de_mode = vap->va_mode;
1053 		c = 1;
1054 	}
1055 
1056 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1057 		/* See the comment in ufs_vnops::ufs_setattr(). */
1058 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) &&
1059 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1060 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td))))
1061 			return (error);
1062 		if (vap->va_atime.tv_sec != VNOVAL) {
1063 			if (vp->v_type == VCHR)
1064 				vp->v_rdev->si_atime = vap->va_atime;
1065 			else
1066 				de->de_atime = vap->va_atime;
1067 		}
1068 		if (vap->va_mtime.tv_sec != VNOVAL) {
1069 			if (vp->v_type == VCHR)
1070 				vp->v_rdev->si_mtime = vap->va_mtime;
1071 			else
1072 				de->de_mtime = vap->va_mtime;
1073 		}
1074 		c = 1;
1075 	}
1076 
1077 	if (c) {
1078 		if (vp->v_type == VCHR)
1079 			vfs_timestamp(&vp->v_rdev->si_ctime);
1080 		else
1081 			vfs_timestamp(&de->de_mtime);
1082 	}
1083 	return (0);
1084 }
1085 
1086 #ifdef MAC
1087 static int
1088 devfs_setlabel(struct vop_setlabel_args *ap)
1089 {
1090 	struct vnode *vp;
1091 	struct devfs_dirent *de;
1092 
1093 	vp = ap->a_vp;
1094 	de = vp->v_data;
1095 
1096 	mac_relabel_vnode(ap->a_cred, vp, ap->a_label);
1097 	mac_update_devfsdirent(vp->v_mount, de, vp);
1098 
1099 	return (0);
1100 }
1101 #endif
1102 
1103 static int
1104 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
1105 {
1106 
1107 	return (vnops.fo_stat(fp, sb, cred, td));
1108 }
1109 
1110 static int
1111 devfs_symlink(struct vop_symlink_args *ap)
1112 {
1113 	int i, error;
1114 	struct devfs_dirent *dd;
1115 	struct devfs_dirent *de;
1116 	struct devfs_mount *dmp;
1117 	struct thread *td;
1118 
1119 	td = ap->a_cnp->cn_thread;
1120 	KASSERT(td == curthread, ("devfs_symlink: td != curthread"));
1121 	error = suser(td);
1122 	if (error)
1123 		return(error);
1124 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1125 	dd = ap->a_dvp->v_data;
1126 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
1127 	de->de_uid = 0;
1128 	de->de_gid = 0;
1129 	de->de_mode = 0755;
1130 	de->de_inode = alloc_unr(devfs_inos);
1131 	de->de_dirent->d_type = DT_LNK;
1132 	i = strlen(ap->a_target) + 1;
1133 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
1134 	bcopy(ap->a_target, de->de_symlink, i);
1135 	sx_xlock(&dmp->dm_lock);
1136 #ifdef MAC
1137 	mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
1138 #endif
1139 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
1140 	devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td);
1141 	sx_xunlock(&dmp->dm_lock);
1142 	return (0);
1143 }
1144 
1145 /* ARGSUSED */
1146 static int
1147 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
1148 {
1149 	struct cdev *dev;
1150 	int error, ioflag, resid;
1151 	struct cdevsw *dsw;
1152 
1153 	error = devfs_fp_check(fp, &dev, &dsw);
1154 	if (error)
1155 		return (error);
1156 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
1157 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
1158 	if (ioflag & O_DIRECT)
1159 		ioflag |= IO_DIRECT;
1160 	if ((flags & FOF_OFFSET) == 0)
1161 		uio->uio_offset = fp->f_offset;
1162 
1163 	resid = uio->uio_resid;
1164 
1165 	error = dsw->d_write(dev, uio, ioflag);
1166 	dev_relthread(dev);
1167 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
1168 		vfs_timestamp(&dev->si_ctime);
1169 		dev->si_mtime = dev->si_ctime;
1170 	}
1171 
1172 	if ((flags & FOF_OFFSET) == 0)
1173 		fp->f_offset = uio->uio_offset;
1174 	fp->f_nextoff = uio->uio_offset;
1175 	return (error);
1176 }
1177 
1178 dev_t
1179 dev2udev(struct cdev *x)
1180 {
1181 	if (x == NULL)
1182 		return (NODEV);
1183 	return (x->si_priv->cdp_inode);
1184 }
1185 
1186 static struct fileops devfs_ops_f = {
1187 	.fo_read =	devfs_read_f,
1188 	.fo_write =	devfs_write_f,
1189 	.fo_ioctl =	devfs_ioctl_f,
1190 	.fo_poll =	devfs_poll_f,
1191 	.fo_kqfilter =	devfs_kqfilter_f,
1192 	.fo_stat =	devfs_stat_f,
1193 	.fo_close =	devfs_close_f,
1194 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
1195 };
1196 
1197 static struct vop_vector devfs_vnodeops = {
1198 	.vop_default =		&default_vnodeops,
1199 
1200 	.vop_access =		devfs_access,
1201 	.vop_getattr =		devfs_getattr,
1202 	.vop_ioctl =		devfs_rioctl,
1203 	.vop_lookup =		devfs_lookup,
1204 	.vop_mknod =		devfs_mknod,
1205 	.vop_pathconf =		devfs_pathconf,
1206 	.vop_read =		devfs_rread,
1207 	.vop_readdir =		devfs_readdir,
1208 	.vop_readlink =		devfs_readlink,
1209 	.vop_reclaim =		devfs_reclaim,
1210 	.vop_remove =		devfs_remove,
1211 	.vop_revoke =		devfs_revoke,
1212 	.vop_setattr =		devfs_setattr,
1213 #ifdef MAC
1214 	.vop_setlabel =		devfs_setlabel,
1215 #endif
1216 	.vop_symlink =		devfs_symlink,
1217 };
1218 
1219 static struct vop_vector devfs_specops = {
1220 	.vop_default =		&default_vnodeops,
1221 
1222 	.vop_access =		devfs_access,
1223 	.vop_advlock =		devfs_advlock,
1224 	.vop_bmap =		VOP_PANIC,
1225 	.vop_close =		devfs_close,
1226 	.vop_create =		VOP_PANIC,
1227 	.vop_fsync =		devfs_fsync,
1228 	.vop_getattr =		devfs_getattr,
1229 	.vop_lease =		VOP_NULL,
1230 	.vop_link =		VOP_PANIC,
1231 	.vop_mkdir =		VOP_PANIC,
1232 	.vop_mknod =		VOP_PANIC,
1233 	.vop_open =		devfs_open,
1234 	.vop_pathconf =		devfs_pathconf,
1235 	.vop_print =		devfs_print,
1236 	.vop_read =		VOP_PANIC,
1237 	.vop_readdir =		VOP_PANIC,
1238 	.vop_readlink =		VOP_PANIC,
1239 	.vop_reallocblks =	VOP_PANIC,
1240 	.vop_reclaim =		devfs_reclaim,
1241 	.vop_remove =		devfs_remove,
1242 	.vop_rename =		VOP_PANIC,
1243 	.vop_revoke =		devfs_revoke,
1244 	.vop_rmdir =		VOP_PANIC,
1245 	.vop_setattr =		devfs_setattr,
1246 #ifdef MAC
1247 	.vop_setlabel =		devfs_setlabel,
1248 #endif
1249 	.vop_strategy =		VOP_PANIC,
1250 	.vop_symlink =		VOP_PANIC,
1251 	.vop_write =		VOP_PANIC,
1252 };
1253 
1254 /*
1255  * Our calling convention to the device drivers used to be that we passed
1256  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
1257  * flags instead since that's what open(), close() and ioctl() takes and
1258  * we don't really want vnode.h in device drivers.
1259  * We solved the source compatibility by redefining some vnode flags to
1260  * be the same as the fcntl ones and by sending down the bitwise OR of
1261  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
1262  * pulls the rug out under this.
1263  */
1264 CTASSERT(O_NONBLOCK == IO_NDELAY);
1265 CTASSERT(O_FSYNC == IO_SYNC);
1266