xref: /freebsd/sys/fs/devfs/devfs_vnops.c (revision 90e2fc863a95ebc1fc12ae656dbbc42c16e17a83)
1 /*-
2  * Copyright (c) 2000-2004
3  *	Poul-Henning Kamp.  All rights reserved.
4  * Copyright (c) 1989, 1992-1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
32  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
33  *
34  * $FreeBSD$
35  */
36 
37 /*
38  * TODO:
39  *	remove empty directories
40  *	mkdir: want it ?
41  */
42 
43 #include <opt_devfs.h>
44 #include <opt_mac.h>
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/conf.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/filio.h>
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/mac.h>
57 #include <sys/malloc.h>
58 #include <sys/mount.h>
59 #include <sys/namei.h>
60 #include <sys/proc.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/time.h>
64 #include <sys/ttycom.h>
65 #include <sys/unistd.h>
66 #include <sys/vnode.h>
67 
68 static struct vop_vector devfs_vnodeops;
69 static struct vop_vector devfs_specops;
70 static struct fileops devfs_ops_f;
71 
72 #include <fs/devfs/devfs.h>
73 #include <fs/devfs/devfs_int.h>
74 
75 static int
76 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
77 {
78 
79 	*devp = fp->f_vnode->v_rdev;
80 	if (*devp != fp->f_data)
81 		return (ENXIO);
82 	KASSERT((*devp)->si_refcount > 0,
83 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
84 	*dswp = dev_refthread(*devp);
85 	if (*dswp == NULL)
86 		return (ENXIO);
87 	return (0);
88 }
89 
90 /*
91  * Construct the fully qualified path name relative to the mountpoint
92  */
93 static char *
94 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp)
95 {
96 	int i;
97 	struct devfs_dirent *de, *dd;
98 	struct devfs_mount *dmp;
99 
100 	dmp = VFSTODEVFS(dvp->v_mount);
101 	dd = dvp->v_data;
102 	i = SPECNAMELEN;
103 	buf[i] = '\0';
104 	i -= cnp->cn_namelen;
105 	if (i < 0)
106 		 return (NULL);
107 	bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
108 	de = dd;
109 	while (de != dmp->dm_rootdir) {
110 		i--;
111 		if (i < 0)
112 			 return (NULL);
113 		buf[i] = '/';
114 		i -= de->de_dirent->d_namlen;
115 		if (i < 0)
116 			 return (NULL);
117 		bcopy(de->de_dirent->d_name, buf + i,
118 		    de->de_dirent->d_namlen);
119 		de = TAILQ_FIRST(&de->de_dlist);	/* "." */
120 		de = TAILQ_NEXT(de, de_list);		/* ".." */
121 		de = de->de_dir;
122 	}
123 	return (buf + i);
124 }
125 
126 int
127 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td)
128 {
129 	int error;
130 	struct vnode *vp;
131 	struct cdev *dev;
132 
133 	KASSERT(td == curthread, ("devfs_allocv: td != curthread"));
134 loop:
135 	vp = de->de_vnode;
136 	if (vp != NULL) {
137 		if (vget(vp, LK_EXCLUSIVE, td))
138 			goto loop;
139 		*vpp = vp;
140 		return (0);
141 	}
142 	if (de->de_dirent->d_type == DT_CHR) {
143 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE))
144 			return (ENOENT);
145 		dev = &de->de_cdp->cdp_c;
146 	} else {
147 		dev = NULL;
148 	}
149 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
150 	if (error != 0) {
151 		printf("devfs_allocv: failed to allocate new vnode\n");
152 		return (error);
153 	}
154 
155 	if (de->de_dirent->d_type == DT_CHR) {
156 		vp->v_type = VCHR;
157 		VI_LOCK(vp);
158 		dev_lock();
159 		dev_refl(dev);
160 		vp->v_rdev = dev;
161 		KASSERT(vp->v_usecount == 1,
162 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
163 		dev->si_usecount += vp->v_usecount;
164 		dev_unlock();
165 		VI_UNLOCK(vp);
166 		vp->v_op = &devfs_specops;
167 	} else if (de->de_dirent->d_type == DT_DIR) {
168 		vp->v_type = VDIR;
169 	} else if (de->de_dirent->d_type == DT_LNK) {
170 		vp->v_type = VLNK;
171 	} else {
172 		vp->v_type = VBAD;
173 	}
174 	vp->v_data = de;
175 	de->de_vnode = vp;
176 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
177 #ifdef MAC
178 	mac_associate_vnode_devfs(mp, de, vp);
179 #endif
180 	*vpp = vp;
181 	return (0);
182 }
183 
184 static int
185 devfs_access(struct vop_access_args *ap)
186 {
187 	struct vnode *vp = ap->a_vp;
188 	struct devfs_dirent *de;
189 	int error;
190 
191 	de = vp->v_data;
192 	if (vp->v_type == VDIR)
193 		de = de->de_dir;
194 
195 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
196 	    ap->a_mode, ap->a_cred, NULL);
197 	if (!error)
198 		return (error);
199 	if (error != EACCES)
200 		return (error);
201 	/* We do, however, allow access to the controlling terminal */
202 	if (!(ap->a_td->td_proc->p_flag & P_CONTROLT))
203 		return (error);
204 	if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode)
205 		return (0);
206 	return (error);
207 }
208 
209 /* ARGSUSED */
210 static int
211 devfs_advlock(struct vop_advlock_args *ap)
212 {
213 
214 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
215 }
216 
217 /* ARGSUSED */
218 static int
219 devfs_close(struct vop_close_args *ap)
220 {
221 	struct vnode *vp = ap->a_vp, *oldvp;
222 	struct thread *td = ap->a_td;
223 	struct cdev *dev = vp->v_rdev;
224 	struct cdevsw *dsw;
225 	int error;
226 
227 	/*
228 	 * Hack: a tty device that is a controlling terminal
229 	 * has a reference from the session structure.
230 	 * We cannot easily tell that a character device is
231 	 * a controlling terminal, unless it is the closing
232 	 * process' controlling terminal.  In that case,
233 	 * if the reference count is 2 (this last descriptor
234 	 * plus the session), release the reference from the session.
235 	 */
236 
237 	/*
238 	 * This needs to be rewritten to take the vp interlock into
239 	 * consideration.
240 	 */
241 
242 	oldvp = NULL;
243 	sx_xlock(&proctree_lock);
244 	if (td && vp == td->td_proc->p_session->s_ttyvp) {
245 		SESS_LOCK(td->td_proc->p_session);
246 		VI_LOCK(vp);
247 		if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) {
248 			td->td_proc->p_session->s_ttyvp = NULL;
249 			oldvp = vp;
250 		}
251 		VI_UNLOCK(vp);
252 		SESS_UNLOCK(td->td_proc->p_session);
253 	}
254 	sx_xunlock(&proctree_lock);
255 	if (oldvp != NULL)
256 		vrele(oldvp);
257 	/*
258 	 * We do not want to really close the device if it
259 	 * is still in use unless we are trying to close it
260 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
261 	 * holds a reference to the vnode, and because we mark
262 	 * any other vnodes that alias this device, when the
263 	 * sum of the reference counts on all the aliased
264 	 * vnodes descends to one, we are on last close.
265 	 */
266 	dsw = dev_refthread(dev);
267 	if (dsw == NULL)
268 		return (ENXIO);
269 	VI_LOCK(vp);
270 	if (vp->v_iflag & VI_DOOMED) {
271 		/* Forced close. */
272 	} else if (dsw->d_flags & D_TRACKCLOSE) {
273 		/* Keep device updated on status. */
274 	} else if (count_dev(dev) > 1) {
275 		VI_UNLOCK(vp);
276 		dev_relthread(dev);
277 		return (0);
278 	}
279 	VI_UNLOCK(vp);
280 	KASSERT(dev->si_refcount > 0,
281 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
282 	if (!(dsw->d_flags & D_NEEDGIANT)) {
283 		DROP_GIANT();
284 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
285 		PICKUP_GIANT();
286 	} else {
287 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
288 	}
289 	dev_relthread(dev);
290 	return (error);
291 }
292 
293 static int
294 devfs_close_f(struct file *fp, struct thread *td)
295 {
296 
297 	return (vnops.fo_close(fp, td));
298 }
299 
300 /* ARGSUSED */
301 static int
302 devfs_fsync(struct vop_fsync_args *ap)
303 {
304 	if (!vn_isdisk(ap->a_vp, NULL))
305 		return (0);
306 
307 	return (vop_stdfsync(ap));
308 }
309 
310 static int
311 devfs_getattr(struct vop_getattr_args *ap)
312 {
313 	struct vnode *vp = ap->a_vp;
314 	struct vattr *vap = ap->a_vap;
315 	int error = 0;
316 	struct devfs_dirent *de;
317 	struct cdev *dev;
318 
319 	de = vp->v_data;
320 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
321 	if (vp->v_type == VDIR) {
322 		de = de->de_dir;
323 		KASSERT(de != NULL,
324 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
325 	}
326 	bzero((caddr_t) vap, sizeof(*vap));
327 	vattr_null(vap);
328 	vap->va_uid = de->de_uid;
329 	vap->va_gid = de->de_gid;
330 	vap->va_mode = de->de_mode;
331 	if (vp->v_type == VLNK)
332 		vap->va_size = strlen(de->de_symlink);
333 	else if (vp->v_type == VDIR)
334 		vap->va_size = vap->va_bytes = DEV_BSIZE;
335 	else
336 		vap->va_size = 0;
337 	if (vp->v_type != VDIR)
338 		vap->va_bytes = 0;
339 	vap->va_blocksize = DEV_BSIZE;
340 	vap->va_type = vp->v_type;
341 
342 #define fix(aa)							\
343 	do {							\
344 		if ((aa).tv_sec == 0) {				\
345 			(aa).tv_sec = boottime.tv_sec;		\
346 			(aa).tv_nsec = boottime.tv_usec * 1000; \
347 		}						\
348 	} while (0)
349 
350 	if (vp->v_type != VCHR)  {
351 		fix(de->de_atime);
352 		vap->va_atime = de->de_atime;
353 		fix(de->de_mtime);
354 		vap->va_mtime = de->de_mtime;
355 		fix(de->de_ctime);
356 		vap->va_ctime = de->de_ctime;
357 	} else {
358 		dev = vp->v_rdev;
359 		fix(dev->si_atime);
360 		vap->va_atime = dev->si_atime;
361 		fix(dev->si_mtime);
362 		vap->va_mtime = dev->si_mtime;
363 		fix(dev->si_ctime);
364 		vap->va_ctime = dev->si_ctime;
365 
366 		vap->va_rdev = dev->si_priv->cdp_inode;
367 	}
368 	vap->va_gen = 0;
369 	vap->va_flags = 0;
370 	vap->va_nlink = de->de_links;
371 	vap->va_fileid = de->de_inode;
372 
373 	return (error);
374 }
375 
376 /* ARGSUSED */
377 static int
378 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
379 {
380 	struct cdev *dev;
381 	struct cdevsw *dsw;
382 	struct vnode *vp;
383 	struct vnode *vpold;
384 	int error, i;
385 	const char *p;
386 	struct fiodgname_arg *fgn;
387 
388 	error = devfs_fp_check(fp, &dev, &dsw);
389 	if (error)
390 		return (error);
391 
392 	if (com == FIODTYPE) {
393 		*(int *)data = dsw->d_flags & D_TYPEMASK;
394 		dev_relthread(dev);
395 		return (0);
396 	} else if (com == FIODGNAME) {
397 		fgn = data;
398 		p = devtoname(dev);
399 		i = strlen(p) + 1;
400 		if (i > fgn->len)
401 			error = EINVAL;
402 		else
403 			error = copyout(p, fgn->buf, i);
404 		dev_relthread(dev);
405 		return (error);
406 	}
407 	error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
408 	dev_relthread(dev);
409 	if (error == ENOIOCTL)
410 		error = ENOTTY;
411 	if (error == 0 && com == TIOCSCTTY) {
412 		vp = fp->f_vnode;
413 
414 		/* Do nothing if reassigning same control tty */
415 		sx_slock(&proctree_lock);
416 		if (td->td_proc->p_session->s_ttyvp == vp) {
417 			sx_sunlock(&proctree_lock);
418 			return (0);
419 		}
420 
421 		mtx_lock(&Giant);
422 
423 		vpold = td->td_proc->p_session->s_ttyvp;
424 		VREF(vp);
425 		SESS_LOCK(td->td_proc->p_session);
426 		td->td_proc->p_session->s_ttyvp = vp;
427 		SESS_UNLOCK(td->td_proc->p_session);
428 
429 		sx_sunlock(&proctree_lock);
430 
431 		/* Get rid of reference to old control tty */
432 		if (vpold)
433 			vrele(vpold);
434 		mtx_unlock(&Giant);
435 	}
436 	return (error);
437 }
438 
439 /* ARGSUSED */
440 static int
441 devfs_kqfilter_f(struct file *fp, struct knote *kn)
442 {
443 	struct cdev *dev;
444 	struct cdevsw *dsw;
445 	int error;
446 
447 	error = devfs_fp_check(fp, &dev, &dsw);
448 	if (error)
449 		return (error);
450 	error = dsw->d_kqfilter(dev, kn);
451 	dev_relthread(dev);
452 	return (error);
453 }
454 
455 static int
456 devfs_lookupx(struct vop_lookup_args *ap)
457 {
458 	struct componentname *cnp;
459 	struct vnode *dvp, **vpp;
460 	struct thread *td;
461 	struct devfs_dirent *de, *dd;
462 	struct devfs_dirent **dde;
463 	struct devfs_mount *dmp;
464 	struct cdev *cdev;
465 	int error, flags, nameiop;
466 	char specname[SPECNAMELEN + 1], *pname;
467 
468 	cnp = ap->a_cnp;
469 	vpp = ap->a_vpp;
470 	dvp = ap->a_dvp;
471 	pname = cnp->cn_nameptr;
472 	td = cnp->cn_thread;
473 	flags = cnp->cn_flags;
474 	nameiop = cnp->cn_nameiop;
475 	dmp = VFSTODEVFS(dvp->v_mount);
476 	dd = dvp->v_data;
477 	*vpp = NULLVP;
478 
479 	if ((flags & ISLASTCN) && nameiop == RENAME)
480 		return (EOPNOTSUPP);
481 
482 	if (dvp->v_type != VDIR)
483 		return (ENOTDIR);
484 
485 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
486 		return (EIO);
487 
488 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
489 	if (error)
490 		return (error);
491 
492 	if (cnp->cn_namelen == 1 && *pname == '.') {
493 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
494 			return (EINVAL);
495 		*vpp = dvp;
496 		VREF(dvp);
497 		return (0);
498 	}
499 
500 	if (flags & ISDOTDOT) {
501 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
502 			return (EINVAL);
503 		VOP_UNLOCK(dvp, 0, td);
504 		de = TAILQ_FIRST(&dd->de_dlist);	/* "." */
505 		de = TAILQ_NEXT(de, de_list);		/* ".." */
506 		de = de->de_dir;
507 		error = devfs_allocv(de, dvp->v_mount, vpp, td);
508 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
509 		return (error);
510 	}
511 
512 	devfs_populate(dmp);
513 	dd = dvp->v_data;
514 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen);
515 	while (de == NULL) {	/* While(...) so we can use break */
516 
517 		if (nameiop == DELETE)
518 			return (ENOENT);
519 
520 		/*
521 		 * OK, we didn't have an entry for the name we were asked for
522 		 * so we try to see if anybody can create it on demand.
523 		 */
524 		pname = devfs_fqpn(specname, dvp, cnp);
525 		if (pname == NULL)
526 			break;
527 
528 		cdev = NULL;
529 		EVENTHANDLER_INVOKE(dev_clone,
530 		    td->td_ucred, pname, strlen(pname), &cdev);
531 		if (cdev == NULL)
532 			break;
533 
534 		devfs_populate(dmp);
535 
536 		dev_lock();
537 		dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx];
538 		if (dde != NULL && *dde != NULL)
539 			de = *dde;
540 		dev_unlock();
541 		dev_rel(cdev);
542 		break;
543 	}
544 
545 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
546 		if ((nameiop == CREATE || nameiop == RENAME) &&
547 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
548 			cnp->cn_flags |= SAVENAME;
549 			return (EJUSTRETURN);
550 		}
551 		return (ENOENT);
552 	}
553 
554 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
555 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
556 		if (error)
557 			return (error);
558 		if (*vpp == dvp) {
559 			VREF(dvp);
560 			*vpp = dvp;
561 			return (0);
562 		}
563 	}
564 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
565 	return (error);
566 }
567 
568 static int
569 devfs_lookup(struct vop_lookup_args *ap)
570 {
571 	int j;
572 	struct devfs_mount *dmp;
573 
574 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
575 	sx_xlock(&dmp->dm_lock);
576 	j = devfs_lookupx(ap);
577 	sx_xunlock(&dmp->dm_lock);
578 	return (j);
579 }
580 
581 static int
582 devfs_mknod(struct vop_mknod_args *ap)
583 {
584 	struct componentname *cnp;
585 	struct vnode *dvp, **vpp;
586 	struct thread *td;
587 	struct devfs_dirent *dd, *de;
588 	struct devfs_mount *dmp;
589 	int error;
590 
591 	/*
592 	 * The only type of node we should be creating here is a
593 	 * character device, for anything else return EOPNOTSUPP.
594 	 */
595 	if (ap->a_vap->va_type != VCHR)
596 		return (EOPNOTSUPP);
597 	dvp = ap->a_dvp;
598 	dmp = VFSTODEVFS(dvp->v_mount);
599 	sx_xlock(&dmp->dm_lock);
600 
601 	cnp = ap->a_cnp;
602 	vpp = ap->a_vpp;
603 	td = cnp->cn_thread;
604 	dd = dvp->v_data;
605 
606 	error = ENOENT;
607 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
608 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
609 			continue;
610 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
611 		    de->de_dirent->d_namlen) != 0)
612 			continue;
613 		if (de->de_flags & DE_WHITEOUT)
614 			break;
615 		goto notfound;
616 	}
617 	if (de == NULL)
618 		goto notfound;
619 	de->de_flags &= ~DE_WHITEOUT;
620 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
621 notfound:
622 	sx_xunlock(&dmp->dm_lock);
623 	return (error);
624 }
625 
626 /* ARGSUSED */
627 static int
628 devfs_open(struct vop_open_args *ap)
629 {
630 	struct thread *td = ap->a_td;
631 	struct vnode *vp = ap->a_vp;
632 	struct cdev *dev = vp->v_rdev;
633 	struct file *fp;
634 	int error;
635 	struct cdevsw *dsw;
636 
637 	if (vp->v_type == VBLK)
638 		return (ENXIO);
639 
640 	if (dev == NULL)
641 		return (ENXIO);
642 
643 	/* Make this field valid before any I/O in d_open. */
644 	if (dev->si_iosize_max == 0)
645 		dev->si_iosize_max = DFLTPHYS;
646 
647 	if (vn_isdisk(vp, NULL) &&
648 	    ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
649 		/*
650 		* When running in very secure mode, do not allow
651 		* opens for writing of any disks.
652 		* XXX: should be in geom_dev.c, but we lack the cred there.
653 		*/
654 		error = securelevel_ge(td->td_ucred, 2);
655 		if (error)
656 			return (error);
657 	}
658 
659 	dsw = dev_refthread(dev);
660 	if (dsw == NULL)
661 		return (ENXIO);
662 
663 	/* XXX: Special casing of ttys for deadfs.  Probably redundant. */
664 	if (dsw->d_flags & D_TTY)
665 		vp->v_vflag |= VV_ISTTY;
666 
667 	VOP_UNLOCK(vp, 0, td);
668 
669 	if(!(dsw->d_flags & D_NEEDGIANT)) {
670 		DROP_GIANT();
671 		if (dsw->d_fdopen != NULL)
672 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
673 		else
674 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
675 		PICKUP_GIANT();
676 	} else {
677 		if (dsw->d_fdopen != NULL)
678 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
679 		else
680 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
681 	}
682 
683 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
684 
685 	dev_relthread(dev);
686 
687 	if (error)
688 		return (error);
689 
690 #if 0	/* /dev/console */
691 	KASSERT(ap->a_fdidx >= 0,
692 	     ("Could not vnode bypass device on fd %d", ap->a_fdidx));
693 #else
694 	if(ap->a_fdidx < 0)
695 		return (error);
696 #endif
697 	/*
698 	 * This is a pretty disgustingly long chain, but I am not
699 	 * sure there is any better way.  Passing the fdidx into
700 	 * VOP_OPEN() offers us more information than just passing
701 	 * the file *.
702 	 */
703 	fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
704 	KASSERT(fp->f_ops == &badfileops,
705 	     ("Could not vnode bypass device on fdops %p", fp->f_ops));
706 	fp->f_ops = &devfs_ops_f;
707 	fp->f_data = dev;
708 	return (error);
709 }
710 
711 static int
712 devfs_pathconf(struct vop_pathconf_args *ap)
713 {
714 
715 	switch (ap->a_name) {
716 	case _PC_MAC_PRESENT:
717 #ifdef MAC
718 		/*
719 		 * If MAC is enabled, devfs automatically supports
720 		 * trivial non-persistant label storage.
721 		 */
722 		*ap->a_retval = 1;
723 #else
724 		*ap->a_retval = 0;
725 #endif
726 		return (0);
727 	default:
728 		return (vop_stdpathconf(ap));
729 	}
730 	/* NOTREACHED */
731 }
732 
733 /* ARGSUSED */
734 static int
735 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
736 {
737 	struct cdev *dev;
738 	struct cdevsw *dsw;
739 	int error;
740 
741 	error = devfs_fp_check(fp, &dev, &dsw);
742 	if (error)
743 		return (error);
744 	error = dsw->d_poll(dev, events, td);
745 	dev_relthread(dev);
746 	return(error);
747 }
748 
749 /*
750  * Print out the contents of a special device vnode.
751  */
752 static int
753 devfs_print(struct vop_print_args *ap)
754 {
755 
756 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
757 	return (0);
758 }
759 
760 /* ARGSUSED */
761 static int
762 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
763 {
764 	struct cdev *dev;
765 	int ioflag, error, resid;
766 	struct cdevsw *dsw;
767 
768 	error = devfs_fp_check(fp, &dev, &dsw);
769 	if (error)
770 		return (error);
771 	resid = uio->uio_resid;
772 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
773 	if (ioflag & O_DIRECT)
774 		ioflag |= IO_DIRECT;
775 
776 	if ((flags & FOF_OFFSET) == 0)
777 		uio->uio_offset = fp->f_offset;
778 
779 	error = dsw->d_read(dev, uio, ioflag);
780 	dev_relthread(dev);
781 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
782 		vfs_timestamp(&dev->si_atime);
783 
784 	if ((flags & FOF_OFFSET) == 0)
785 		fp->f_offset = uio->uio_offset;
786 	fp->f_nextoff = uio->uio_offset;
787 	return (error);
788 }
789 
790 static int
791 devfs_readdir(struct vop_readdir_args *ap)
792 {
793 	int error;
794 	struct uio *uio;
795 	struct dirent *dp;
796 	struct devfs_dirent *dd;
797 	struct devfs_dirent *de;
798 	struct devfs_mount *dmp;
799 	off_t off, oldoff;
800 
801 	if (ap->a_vp->v_type != VDIR)
802 		return (ENOTDIR);
803 
804 	uio = ap->a_uio;
805 	if (uio->uio_offset < 0)
806 		return (EINVAL);
807 
808 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
809 	sx_xlock(&dmp->dm_lock);
810 	devfs_populate(dmp);
811 	error = 0;
812 	de = ap->a_vp->v_data;
813 	off = 0;
814 	oldoff = uio->uio_offset;
815 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
816 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
817 		if (dd->de_flags & DE_WHITEOUT)
818 			continue;
819 		if (dd->de_dirent->d_type == DT_DIR)
820 			de = dd->de_dir;
821 		else
822 			de = dd;
823 		dp = dd->de_dirent;
824 		if (dp->d_reclen > uio->uio_resid)
825 			break;
826 		dp->d_fileno = de->de_inode;
827 		if (off >= uio->uio_offset) {
828 			error = vfs_read_dirent(ap, dp, off);
829 			if (error)
830 				break;
831 		}
832 		off += dp->d_reclen;
833 	}
834 	sx_xunlock(&dmp->dm_lock);
835 	uio->uio_offset = off;
836 	return (error);
837 }
838 
839 static int
840 devfs_readlink(struct vop_readlink_args *ap)
841 {
842 	struct devfs_dirent *de;
843 
844 	de = ap->a_vp->v_data;
845 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
846 }
847 
848 static int
849 devfs_reclaim(struct vop_reclaim_args *ap)
850 {
851 	struct vnode *vp = ap->a_vp;
852 	struct devfs_dirent *de;
853 	struct cdev *dev;
854 
855 	de = vp->v_data;
856 	if (de != NULL)
857 		de->de_vnode = NULL;
858 	vp->v_data = NULL;
859 	vnode_destroy_vobject(vp);
860 
861 	dev = vp->v_rdev;
862 	vp->v_rdev = NULL;
863 
864 	if (dev == NULL)
865 		return (0);
866 
867 	dev_lock();
868 	dev->si_usecount -= vp->v_usecount;
869 	dev_unlock();
870 	dev_rel(dev);
871 	return (0);
872 }
873 
874 static int
875 devfs_remove(struct vop_remove_args *ap)
876 {
877 	struct vnode *vp = ap->a_vp;
878 	struct devfs_dirent *dd;
879 	struct devfs_dirent *de;
880 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
881 
882 	sx_xlock(&dmp->dm_lock);
883 	dd = ap->a_dvp->v_data;
884 	de = vp->v_data;
885 	if (de->de_cdp == NULL) {
886 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
887 		devfs_delete(dmp, de);
888 	} else {
889 		de->de_flags |= DE_WHITEOUT;
890 	}
891 	sx_xunlock(&dmp->dm_lock);
892 	return (0);
893 }
894 
895 /*
896  * Revoke is called on a tty when a terminal session ends.  The vnode
897  * is orphaned by setting v_op to deadfs so we need to let go of it
898  * as well so that we create a new one next time around.
899  *
900  * XXX: locking :-(
901  * XXX: We mess around with other mountpoints without holding their sxlock.
902  * XXX: We hold the devlock() when we zero their vnode pointer, but is that
903  * XXX: enough ?
904  */
905 static int
906 devfs_revoke(struct vop_revoke_args *ap)
907 {
908 	struct vnode *vp = ap->a_vp, *vp2;
909 	struct cdev *dev;
910 	struct cdev_priv *cdp;
911 	struct devfs_dirent *de;
912 	int i;
913 
914 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
915 
916 	dev = vp->v_rdev;
917 	cdp = dev->si_priv;
918 	for (;;) {
919 		dev_lock();
920 		vp2 = NULL;
921 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
922 			de = cdp->cdp_dirents[i];
923 			if (de == NULL)
924 				continue;
925 			vp2 = de->de_vnode;
926 			de->de_vnode = NULL;
927 			if (vp2 != NULL)
928 				break;
929 		}
930 		dev_unlock();
931 		if (vp2 != NULL) {
932 			vgone(vp2);
933 			continue;
934 		}
935 		break;
936 	}
937 	return (0);
938 }
939 
940 static int
941 devfs_rioctl(struct vop_ioctl_args *ap)
942 {
943 	int error;
944 	struct devfs_mount *dmp;
945 
946 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
947 	sx_xlock(&dmp->dm_lock);
948 	devfs_populate(dmp);
949 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
950 	sx_xunlock(&dmp->dm_lock);
951 	return (error);
952 }
953 
954 static int
955 devfs_rread(struct vop_read_args *ap)
956 {
957 
958 	if (ap->a_vp->v_type != VDIR)
959 		return (EINVAL);
960 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
961 }
962 
963 static int
964 devfs_setattr(struct vop_setattr_args *ap)
965 {
966 	struct devfs_dirent *de;
967 	struct vattr *vap;
968 	struct vnode *vp;
969 	int c, error;
970 	uid_t uid;
971 	gid_t gid;
972 
973 	vap = ap->a_vap;
974 	vp = ap->a_vp;
975 	if ((vap->va_type != VNON) ||
976 	    (vap->va_nlink != VNOVAL) ||
977 	    (vap->va_fsid != VNOVAL) ||
978 	    (vap->va_fileid != VNOVAL) ||
979 	    (vap->va_blocksize != VNOVAL) ||
980 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
981 	    (vap->va_rdev != VNOVAL) ||
982 	    ((int)vap->va_bytes != VNOVAL) ||
983 	    (vap->va_gen != VNOVAL)) {
984 		return (EINVAL);
985 	}
986 
987 	de = vp->v_data;
988 	if (vp->v_type == VDIR)
989 		de = de->de_dir;
990 
991 	error = c = 0;
992 	if (vap->va_uid == (uid_t)VNOVAL)
993 		uid = de->de_uid;
994 	else
995 		uid = vap->va_uid;
996 	if (vap->va_gid == (gid_t)VNOVAL)
997 		gid = de->de_gid;
998 	else
999 		gid = vap->va_gid;
1000 	if (uid != de->de_uid || gid != de->de_gid) {
1001 		if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
1002 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) &&
1003 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0)
1004 			return (error);
1005 		de->de_uid = uid;
1006 		de->de_gid = gid;
1007 		c = 1;
1008 	}
1009 
1010 	if (vap->va_mode != (mode_t)VNOVAL) {
1011 		if ((ap->a_cred->cr_uid != de->de_uid) &&
1012 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)))
1013 			return (error);
1014 		de->de_mode = vap->va_mode;
1015 		c = 1;
1016 	}
1017 
1018 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1019 		/* See the comment in ufs_vnops::ufs_setattr(). */
1020 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) &&
1021 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1022 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td))))
1023 			return (error);
1024 		if (vap->va_atime.tv_sec != VNOVAL) {
1025 			if (vp->v_type == VCHR)
1026 				vp->v_rdev->si_atime = vap->va_atime;
1027 			else
1028 				de->de_atime = vap->va_atime;
1029 		}
1030 		if (vap->va_mtime.tv_sec != VNOVAL) {
1031 			if (vp->v_type == VCHR)
1032 				vp->v_rdev->si_mtime = vap->va_mtime;
1033 			else
1034 				de->de_mtime = vap->va_mtime;
1035 		}
1036 		c = 1;
1037 	}
1038 
1039 	if (c) {
1040 		if (vp->v_type == VCHR)
1041 			vfs_timestamp(&vp->v_rdev->si_ctime);
1042 		else
1043 			vfs_timestamp(&de->de_mtime);
1044 	}
1045 	return (0);
1046 }
1047 
1048 #ifdef MAC
1049 static int
1050 devfs_setlabel(struct vop_setlabel_args *ap)
1051 {
1052 	struct vnode *vp;
1053 	struct devfs_dirent *de;
1054 
1055 	vp = ap->a_vp;
1056 	de = vp->v_data;
1057 
1058 	mac_relabel_vnode(ap->a_cred, vp, ap->a_label);
1059 	mac_update_devfsdirent(vp->v_mount, de, vp);
1060 
1061 	return (0);
1062 }
1063 #endif
1064 
1065 static int
1066 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
1067 {
1068 
1069 	return (vnops.fo_stat(fp, sb, cred, td));
1070 }
1071 
1072 static int
1073 devfs_symlink(struct vop_symlink_args *ap)
1074 {
1075 	int i, error;
1076 	struct devfs_dirent *dd;
1077 	struct devfs_dirent *de;
1078 	struct devfs_mount *dmp;
1079 	struct thread *td;
1080 
1081 	td = ap->a_cnp->cn_thread;
1082 	KASSERT(td == curthread, ("devfs_symlink: td != curthread"));
1083 	error = suser(td);
1084 	if (error)
1085 		return(error);
1086 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1087 	dd = ap->a_dvp->v_data;
1088 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
1089 	de->de_uid = 0;
1090 	de->de_gid = 0;
1091 	de->de_mode = 0755;
1092 	de->de_inode = alloc_unr(devfs_inos);
1093 	de->de_dirent->d_type = DT_LNK;
1094 	i = strlen(ap->a_target) + 1;
1095 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
1096 	bcopy(ap->a_target, de->de_symlink, i);
1097 	sx_xlock(&dmp->dm_lock);
1098 #ifdef MAC
1099 	mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
1100 #endif
1101 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
1102 	devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td);
1103 	sx_xunlock(&dmp->dm_lock);
1104 	return (0);
1105 }
1106 
1107 /* ARGSUSED */
1108 static int
1109 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
1110 {
1111 	struct cdev *dev;
1112 	int error, ioflag, resid;
1113 	struct cdevsw *dsw;
1114 
1115 	error = devfs_fp_check(fp, &dev, &dsw);
1116 	if (error)
1117 		return (error);
1118 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
1119 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
1120 	if (ioflag & O_DIRECT)
1121 		ioflag |= IO_DIRECT;
1122 	if ((flags & FOF_OFFSET) == 0)
1123 		uio->uio_offset = fp->f_offset;
1124 
1125 	resid = uio->uio_resid;
1126 
1127 	error = dsw->d_write(dev, uio, ioflag);
1128 	dev_relthread(dev);
1129 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
1130 		vfs_timestamp(&dev->si_ctime);
1131 		dev->si_mtime = dev->si_ctime;
1132 	}
1133 
1134 	if ((flags & FOF_OFFSET) == 0)
1135 		fp->f_offset = uio->uio_offset;
1136 	fp->f_nextoff = uio->uio_offset;
1137 	return (error);
1138 }
1139 
1140 dev_t
1141 dev2udev(struct cdev *x)
1142 {
1143 	if (x == NULL)
1144 		return (NODEV);
1145 	return (x->si_priv->cdp_inode);
1146 }
1147 
1148 static struct fileops devfs_ops_f = {
1149 	.fo_read =	devfs_read_f,
1150 	.fo_write =	devfs_write_f,
1151 	.fo_ioctl =	devfs_ioctl_f,
1152 	.fo_poll =	devfs_poll_f,
1153 	.fo_kqfilter =	devfs_kqfilter_f,
1154 	.fo_stat =	devfs_stat_f,
1155 	.fo_close =	devfs_close_f,
1156 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
1157 };
1158 
1159 static struct vop_vector devfs_vnodeops = {
1160 	.vop_default =		&default_vnodeops,
1161 
1162 	.vop_access =		devfs_access,
1163 	.vop_getattr =		devfs_getattr,
1164 	.vop_ioctl =		devfs_rioctl,
1165 	.vop_lookup =		devfs_lookup,
1166 	.vop_mknod =		devfs_mknod,
1167 	.vop_pathconf =		devfs_pathconf,
1168 	.vop_read =		devfs_rread,
1169 	.vop_readdir =		devfs_readdir,
1170 	.vop_readlink =		devfs_readlink,
1171 	.vop_reclaim =		devfs_reclaim,
1172 	.vop_remove =		devfs_remove,
1173 	.vop_revoke =		devfs_revoke,
1174 	.vop_setattr =		devfs_setattr,
1175 #ifdef MAC
1176 	.vop_setlabel =		devfs_setlabel,
1177 #endif
1178 	.vop_symlink =		devfs_symlink,
1179 };
1180 
1181 static struct vop_vector devfs_specops = {
1182 	.vop_default =		&default_vnodeops,
1183 
1184 	.vop_access =		devfs_access,
1185 	.vop_advlock =		devfs_advlock,
1186 	.vop_bmap =		VOP_PANIC,
1187 	.vop_close =		devfs_close,
1188 	.vop_create =		VOP_PANIC,
1189 	.vop_fsync =		devfs_fsync,
1190 	.vop_getattr =		devfs_getattr,
1191 	.vop_lease =		VOP_NULL,
1192 	.vop_link =		VOP_PANIC,
1193 	.vop_mkdir =		VOP_PANIC,
1194 	.vop_mknod =		VOP_PANIC,
1195 	.vop_open =		devfs_open,
1196 	.vop_pathconf =		devfs_pathconf,
1197 	.vop_print =		devfs_print,
1198 	.vop_read =		VOP_PANIC,
1199 	.vop_readdir =		VOP_PANIC,
1200 	.vop_readlink =		VOP_PANIC,
1201 	.vop_reallocblks =	VOP_PANIC,
1202 	.vop_reclaim =		devfs_reclaim,
1203 	.vop_remove =		devfs_remove,
1204 	.vop_rename =		VOP_PANIC,
1205 	.vop_revoke =		devfs_revoke,
1206 	.vop_rmdir =		VOP_PANIC,
1207 	.vop_setattr =		devfs_setattr,
1208 #ifdef MAC
1209 	.vop_setlabel =		devfs_setlabel,
1210 #endif
1211 	.vop_strategy =		VOP_PANIC,
1212 	.vop_symlink =		VOP_PANIC,
1213 	.vop_write =		VOP_PANIC,
1214 };
1215 
1216 /*
1217  * Our calling convention to the device drivers used to be that we passed
1218  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
1219  * flags instead since that's what open(), close() and ioctl() takes and
1220  * we don't really want vnode.h in device drivers.
1221  * We solved the source compatibility by redefining some vnode flags to
1222  * be the same as the fcntl ones and by sending down the bitwise OR of
1223  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
1224  * pulls the rug out under this.
1225  */
1226 CTASSERT(O_NONBLOCK == IO_NDELAY);
1227 CTASSERT(O_FSYNC == IO_SYNC);
1228