xref: /freebsd/sys/fs/devfs/devfs_vnops.c (revision 262e143bd46171a6415a5b28af260a5efa2a3db8)
1 /*-
2  * Copyright (c) 2000-2004
3  *	Poul-Henning Kamp.  All rights reserved.
4  * Copyright (c) 1989, 1992-1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
32  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
33  *
34  * $FreeBSD$
35  */
36 
37 /*
38  * TODO:
39  *	remove empty directories
40  *	mkdir: want it ?
41  */
42 
43 #include <opt_devfs.h>
44 #include <opt_mac.h>
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/conf.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/filio.h>
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/mac.h>
57 #include <sys/malloc.h>
58 #include <sys/mount.h>
59 #include <sys/namei.h>
60 #include <sys/proc.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/time.h>
64 #include <sys/ttycom.h>
65 #include <sys/unistd.h>
66 #include <sys/vnode.h>
67 
68 static struct vop_vector devfs_vnodeops;
69 static struct vop_vector devfs_specops;
70 static struct fileops devfs_ops_f;
71 
72 #include <fs/devfs/devfs.h>
73 #include <fs/devfs/devfs_int.h>
74 
75 static int
76 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
77 {
78 
79 	*devp = fp->f_vnode->v_rdev;
80 	if (*devp != fp->f_data)
81 		return (ENXIO);
82 	KASSERT((*devp)->si_refcount > 0,
83 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
84 	*dswp = dev_refthread(*devp);
85 	if (*dswp == NULL)
86 		return (ENXIO);
87 	return (0);
88 }
89 
90 /*
91  * Construct the fully qualified path name relative to the mountpoint
92  */
93 static char *
94 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp)
95 {
96 	int i;
97 	struct devfs_dirent *de, *dd;
98 	struct devfs_mount *dmp;
99 
100 	dmp = VFSTODEVFS(dvp->v_mount);
101 	dd = dvp->v_data;
102 	i = SPECNAMELEN;
103 	buf[i] = '\0';
104 	i -= cnp->cn_namelen;
105 	if (i < 0)
106 		 return (NULL);
107 	bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
108 	de = dd;
109 	while (de != dmp->dm_rootdir) {
110 		i--;
111 		if (i < 0)
112 			 return (NULL);
113 		buf[i] = '/';
114 		i -= de->de_dirent->d_namlen;
115 		if (i < 0)
116 			 return (NULL);
117 		bcopy(de->de_dirent->d_name, buf + i,
118 		    de->de_dirent->d_namlen);
119 		de = TAILQ_FIRST(&de->de_dlist);	/* "." */
120 		de = TAILQ_NEXT(de, de_list);		/* ".." */
121 		de = de->de_dir;
122 	}
123 	return (buf + i);
124 }
125 
126 int
127 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td)
128 {
129 	int error;
130 	struct vnode *vp;
131 	struct cdev *dev;
132 
133 	KASSERT(td == curthread, ("devfs_allocv: td != curthread"));
134 loop:
135 	vp = de->de_vnode;
136 	if (vp != NULL) {
137 		if (vget(vp, LK_EXCLUSIVE, td))
138 			goto loop;
139 		*vpp = vp;
140 		return (0);
141 	}
142 	if (de->de_dirent->d_type == DT_CHR) {
143 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE))
144 			return (ENOENT);
145 		dev = &de->de_cdp->cdp_c;
146 	} else {
147 		dev = NULL;
148 	}
149 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
150 	if (error != 0) {
151 		printf("devfs_allocv: failed to allocate new vnode\n");
152 		return (error);
153 	}
154 
155 	if (de->de_dirent->d_type == DT_CHR) {
156 		vp->v_type = VCHR;
157 		VI_LOCK(vp);
158 		dev_lock();
159 		dev_refl(dev);
160 		vp->v_rdev = dev;
161 		KASSERT(vp->v_usecount == 1,
162 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
163 		dev->si_usecount += vp->v_usecount;
164 		dev_unlock();
165 		VI_UNLOCK(vp);
166 		vp->v_op = &devfs_specops;
167 	} else if (de->de_dirent->d_type == DT_DIR) {
168 		vp->v_type = VDIR;
169 	} else if (de->de_dirent->d_type == DT_LNK) {
170 		vp->v_type = VLNK;
171 	} else {
172 		vp->v_type = VBAD;
173 	}
174 	vp->v_data = de;
175 	de->de_vnode = vp;
176 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
177 #ifdef MAC
178 	mac_associate_vnode_devfs(mp, de, vp);
179 #endif
180 	*vpp = vp;
181 	return (0);
182 }
183 
184 static int
185 devfs_access(struct vop_access_args *ap)
186 {
187 	struct vnode *vp = ap->a_vp;
188 	struct devfs_dirent *de;
189 	int error;
190 
191 	de = vp->v_data;
192 	if (vp->v_type == VDIR)
193 		de = de->de_dir;
194 
195 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
196 	    ap->a_mode, ap->a_cred, NULL);
197 	if (!error)
198 		return (error);
199 	if (error != EACCES)
200 		return (error);
201 	/* We do, however, allow access to the controlling terminal */
202 	if (!(ap->a_td->td_proc->p_flag & P_CONTROLT))
203 		return (error);
204 	if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode)
205 		return (0);
206 	return (error);
207 }
208 
209 /* ARGSUSED */
210 static int
211 devfs_advlock(struct vop_advlock_args *ap)
212 {
213 
214 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
215 }
216 
217 /* ARGSUSED */
218 static int
219 devfs_close(struct vop_close_args *ap)
220 {
221 	struct vnode *vp = ap->a_vp, *oldvp;
222 	struct thread *td = ap->a_td;
223 	struct cdev *dev = vp->v_rdev;
224 	struct cdevsw *dsw;
225 	int error;
226 
227 	/*
228 	 * Hack: a tty device that is a controlling terminal
229 	 * has a reference from the session structure.
230 	 * We cannot easily tell that a character device is
231 	 * a controlling terminal, unless it is the closing
232 	 * process' controlling terminal.  In that case,
233 	 * if the reference count is 2 (this last descriptor
234 	 * plus the session), release the reference from the session.
235 	 */
236 
237 	/*
238 	 * This needs to be rewritten to take the vp interlock into
239 	 * consideration.
240 	 */
241 
242 	oldvp = NULL;
243 	sx_xlock(&proctree_lock);
244 	if (td && vp == td->td_proc->p_session->s_ttyvp) {
245 		SESS_LOCK(td->td_proc->p_session);
246 		VI_LOCK(vp);
247 		if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) {
248 			td->td_proc->p_session->s_ttyvp = NULL;
249 			oldvp = vp;
250 		}
251 		VI_UNLOCK(vp);
252 		SESS_UNLOCK(td->td_proc->p_session);
253 	}
254 	sx_xunlock(&proctree_lock);
255 	if (oldvp != NULL)
256 		vrele(oldvp);
257 	/*
258 	 * We do not want to really close the device if it
259 	 * is still in use unless we are trying to close it
260 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
261 	 * holds a reference to the vnode, and because we mark
262 	 * any other vnodes that alias this device, when the
263 	 * sum of the reference counts on all the aliased
264 	 * vnodes descends to one, we are on last close.
265 	 */
266 	dsw = dev_refthread(dev);
267 	if (dsw == NULL)
268 		return (ENXIO);
269 	VI_LOCK(vp);
270 	if (vp->v_iflag & VI_DOOMED) {
271 		/* Forced close. */
272 	} else if (dsw->d_flags & D_TRACKCLOSE) {
273 		/* Keep device updated on status. */
274 	} else if (count_dev(dev) > 1) {
275 		VI_UNLOCK(vp);
276 		dev_relthread(dev);
277 		return (0);
278 	}
279 	VI_UNLOCK(vp);
280 	KASSERT(dev->si_refcount > 0,
281 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
282 	if (!(dsw->d_flags & D_NEEDGIANT)) {
283 		DROP_GIANT();
284 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
285 		PICKUP_GIANT();
286 	} else {
287 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
288 	}
289 	dev_relthread(dev);
290 	return (error);
291 }
292 
293 static int
294 devfs_close_f(struct file *fp, struct thread *td)
295 {
296 
297 	return (vnops.fo_close(fp, td));
298 }
299 
300 /* ARGSUSED */
301 static int
302 devfs_fsync(struct vop_fsync_args *ap)
303 {
304 	if (!vn_isdisk(ap->a_vp, NULL))
305 		return (0);
306 
307 	return (vop_stdfsync(ap));
308 }
309 
310 static int
311 devfs_getattr(struct vop_getattr_args *ap)
312 {
313 	struct vnode *vp = ap->a_vp;
314 	struct vattr *vap = ap->a_vap;
315 	int error = 0;
316 	struct devfs_dirent *de;
317 	struct cdev *dev;
318 
319 	de = vp->v_data;
320 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
321 	if (vp->v_type == VDIR) {
322 		de = de->de_dir;
323 		KASSERT(de != NULL,
324 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
325 	}
326 	bzero((caddr_t) vap, sizeof(*vap));
327 	vattr_null(vap);
328 	vap->va_uid = de->de_uid;
329 	vap->va_gid = de->de_gid;
330 	vap->va_mode = de->de_mode;
331 	if (vp->v_type == VLNK)
332 		vap->va_size = strlen(de->de_symlink);
333 	else if (vp->v_type == VDIR)
334 		vap->va_size = vap->va_bytes = DEV_BSIZE;
335 	else
336 		vap->va_size = 0;
337 	if (vp->v_type != VDIR)
338 		vap->va_bytes = 0;
339 	vap->va_blocksize = DEV_BSIZE;
340 	vap->va_type = vp->v_type;
341 
342 #define fix(aa)							\
343 	do {							\
344 		if ((aa).tv_sec == 0) {				\
345 			(aa).tv_sec = boottime.tv_sec;		\
346 			(aa).tv_nsec = boottime.tv_usec * 1000; \
347 		}						\
348 	} while (0)
349 
350 	if (vp->v_type != VCHR)  {
351 		fix(de->de_atime);
352 		vap->va_atime = de->de_atime;
353 		fix(de->de_mtime);
354 		vap->va_mtime = de->de_mtime;
355 		fix(de->de_ctime);
356 		vap->va_ctime = de->de_ctime;
357 	} else {
358 		dev = vp->v_rdev;
359 		fix(dev->si_atime);
360 		vap->va_atime = dev->si_atime;
361 		fix(dev->si_mtime);
362 		vap->va_mtime = dev->si_mtime;
363 		fix(dev->si_ctime);
364 		vap->va_ctime = dev->si_ctime;
365 
366 		vap->va_rdev = dev->si_priv->cdp_inode;
367 	}
368 	vap->va_gen = 0;
369 	vap->va_flags = 0;
370 	vap->va_nlink = de->de_links;
371 	vap->va_fileid = de->de_inode;
372 
373 	return (error);
374 }
375 
376 /* ARGSUSED */
377 static int
378 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
379 {
380 	struct cdev *dev;
381 	struct cdevsw *dsw;
382 	struct vnode *vp;
383 	struct vnode *vpold;
384 	int error, i;
385 	const char *p;
386 	struct fiodgname_arg *fgn;
387 
388 	error = devfs_fp_check(fp, &dev, &dsw);
389 	if (error)
390 		return (error);
391 
392 	if (com == FIODTYPE) {
393 		*(int *)data = dsw->d_flags & D_TYPEMASK;
394 		dev_relthread(dev);
395 		return (0);
396 	} else if (com == FIODGNAME) {
397 		fgn = data;
398 		p = devtoname(dev);
399 		i = strlen(p) + 1;
400 		if (i > fgn->len)
401 			error = EINVAL;
402 		else
403 			error = copyout(p, fgn->buf, i);
404 		dev_relthread(dev);
405 		return (error);
406 	}
407 	error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
408 	dev_relthread(dev);
409 	if (error == ENOIOCTL)
410 		error = ENOTTY;
411 	if (error == 0 && com == TIOCSCTTY) {
412 		vp = fp->f_vnode;
413 
414 		/* Do nothing if reassigning same control tty */
415 		sx_slock(&proctree_lock);
416 		if (td->td_proc->p_session->s_ttyvp == vp) {
417 			sx_sunlock(&proctree_lock);
418 			return (0);
419 		}
420 
421 		mtx_lock(&Giant);
422 
423 		vpold = td->td_proc->p_session->s_ttyvp;
424 		VREF(vp);
425 		SESS_LOCK(td->td_proc->p_session);
426 		td->td_proc->p_session->s_ttyvp = vp;
427 		SESS_UNLOCK(td->td_proc->p_session);
428 
429 		sx_sunlock(&proctree_lock);
430 
431 		/* Get rid of reference to old control tty */
432 		if (vpold)
433 			vrele(vpold);
434 		mtx_unlock(&Giant);
435 	}
436 	return (error);
437 }
438 
439 /* ARGSUSED */
440 static int
441 devfs_kqfilter_f(struct file *fp, struct knote *kn)
442 {
443 	struct cdev *dev;
444 	struct cdevsw *dsw;
445 	int error;
446 
447 	error = devfs_fp_check(fp, &dev, &dsw);
448 	if (error)
449 		return (error);
450 	error = dsw->d_kqfilter(dev, kn);
451 	dev_relthread(dev);
452 	return (error);
453 }
454 
455 static int
456 devfs_lookupx(struct vop_lookup_args *ap)
457 {
458 	struct componentname *cnp;
459 	struct vnode *dvp, **vpp;
460 	struct thread *td;
461 	struct devfs_dirent *de, *dd;
462 	struct devfs_dirent **dde;
463 	struct devfs_mount *dmp;
464 	struct cdev *cdev;
465 	int error, flags, nameiop;
466 	char specname[SPECNAMELEN + 1], *pname;
467 
468 	cnp = ap->a_cnp;
469 	vpp = ap->a_vpp;
470 	dvp = ap->a_dvp;
471 	pname = cnp->cn_nameptr;
472 	td = cnp->cn_thread;
473 	flags = cnp->cn_flags;
474 	nameiop = cnp->cn_nameiop;
475 	dmp = VFSTODEVFS(dvp->v_mount);
476 	dd = dvp->v_data;
477 	*vpp = NULLVP;
478 
479 	if ((flags & ISLASTCN) && nameiop == RENAME)
480 		return (EOPNOTSUPP);
481 
482 	if (dvp->v_type != VDIR)
483 		return (ENOTDIR);
484 
485 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
486 		return (EIO);
487 
488 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
489 	if (error)
490 		return (error);
491 
492 	if (cnp->cn_namelen == 1 && *pname == '.') {
493 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
494 			return (EINVAL);
495 		*vpp = dvp;
496 		VREF(dvp);
497 		return (0);
498 	}
499 
500 	if (flags & ISDOTDOT) {
501 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
502 			return (EINVAL);
503 		VOP_UNLOCK(dvp, 0, td);
504 		de = TAILQ_FIRST(&dd->de_dlist);	/* "." */
505 		de = TAILQ_NEXT(de, de_list);		/* ".." */
506 		de = de->de_dir;
507 		error = devfs_allocv(de, dvp->v_mount, vpp, td);
508 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
509 		return (error);
510 	}
511 
512 	devfs_populate(dmp);
513 	dd = dvp->v_data;
514 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen);
515 	while (de == NULL) {	/* While(...) so we can use break */
516 
517 		if (nameiop == DELETE)
518 			return (ENOENT);
519 
520 		/*
521 		 * OK, we didn't have an entry for the name we were asked for
522 		 * so we try to see if anybody can create it on demand.
523 		 */
524 		pname = devfs_fqpn(specname, dvp, cnp);
525 		if (pname == NULL)
526 			break;
527 
528 		cdev = NULL;
529 		EVENTHANDLER_INVOKE(dev_clone,
530 		    td->td_ucred, pname, strlen(pname), &cdev);
531 		if (cdev == NULL)
532 			break;
533 
534 		devfs_populate(dmp);
535 
536 		dev_lock();
537 		dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx];
538 		if (dde != NULL && *dde != NULL)
539 			de = *dde;
540 		dev_unlock();
541 		dev_rel(cdev);
542 		break;
543 	}
544 
545 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
546 		if ((nameiop == CREATE || nameiop == RENAME) &&
547 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
548 			cnp->cn_flags |= SAVENAME;
549 			return (EJUSTRETURN);
550 		}
551 		return (ENOENT);
552 	}
553 
554 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
555 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
556 		if (error)
557 			return (error);
558 		if (*vpp == dvp) {
559 			VREF(dvp);
560 			*vpp = dvp;
561 			return (0);
562 		}
563 	}
564 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
565 	return (error);
566 }
567 
568 static int
569 devfs_lookup(struct vop_lookup_args *ap)
570 {
571 	int j;
572 	struct devfs_mount *dmp;
573 
574 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
575 	sx_xlock(&dmp->dm_lock);
576 	j = devfs_lookupx(ap);
577 	sx_xunlock(&dmp->dm_lock);
578 	return (j);
579 }
580 
581 static int
582 devfs_mknod(struct vop_mknod_args *ap)
583 {
584 	struct componentname *cnp;
585 	struct vnode *dvp, **vpp;
586 	struct thread *td;
587 	struct devfs_dirent *dd, *de;
588 	struct devfs_mount *dmp;
589 	int error;
590 
591 	/*
592 	 * The only type of node we should be creating here is a
593 	 * character device, for anything else return EOPNOTSUPP.
594 	 */
595 	if (ap->a_vap->va_type != VCHR)
596 		return (EOPNOTSUPP);
597 	dvp = ap->a_dvp;
598 	dmp = VFSTODEVFS(dvp->v_mount);
599 	sx_xlock(&dmp->dm_lock);
600 
601 	cnp = ap->a_cnp;
602 	vpp = ap->a_vpp;
603 	td = cnp->cn_thread;
604 	dd = dvp->v_data;
605 
606 	error = ENOENT;
607 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
608 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
609 			continue;
610 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
611 		    de->de_dirent->d_namlen) != 0)
612 			continue;
613 		if (de->de_flags & DE_WHITEOUT)
614 			break;
615 		goto notfound;
616 	}
617 	if (de == NULL)
618 		goto notfound;
619 	de->de_flags &= ~DE_WHITEOUT;
620 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
621 notfound:
622 	sx_xunlock(&dmp->dm_lock);
623 	return (error);
624 }
625 
626 /* ARGSUSED */
627 static int
628 devfs_open(struct vop_open_args *ap)
629 {
630 	struct thread *td = ap->a_td;
631 	struct vnode *vp = ap->a_vp;
632 	struct cdev *dev = vp->v_rdev;
633 	struct file *fp;
634 	int error;
635 	struct cdevsw *dsw;
636 
637 	if (vp->v_type == VBLK)
638 		return (ENXIO);
639 
640 	if (dev == NULL)
641 		return (ENXIO);
642 
643 	/* Make this field valid before any I/O in d_open. */
644 	if (dev->si_iosize_max == 0)
645 		dev->si_iosize_max = DFLTPHYS;
646 
647 	if (vn_isdisk(vp, NULL) &&
648 	    ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
649 		/*
650 		* When running in very secure mode, do not allow
651 		* opens for writing of any disks.
652 		* XXX: should be in geom_dev.c, but we lack the cred there.
653 		*/
654 		error = securelevel_ge(td->td_ucred, 2);
655 		if (error)
656 			return (error);
657 	}
658 
659 	dsw = dev_refthread(dev);
660 	if (dsw == NULL)
661 		return (ENXIO);
662 
663 	/* XXX: Special casing of ttys for deadfs.  Probably redundant. */
664 	if (dsw->d_flags & D_TTY)
665 		vp->v_vflag |= VV_ISTTY;
666 
667 	VOP_UNLOCK(vp, 0, td);
668 
669 	if(!(dsw->d_flags & D_NEEDGIANT)) {
670 		DROP_GIANT();
671 		if (dsw->d_fdopen != NULL)
672 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
673 		else
674 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
675 		PICKUP_GIANT();
676 	} else {
677 		if (dsw->d_fdopen != NULL)
678 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
679 		else
680 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
681 	}
682 
683 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
684 
685 	dev_relthread(dev);
686 
687 	if (error)
688 		return (error);
689 
690 #if 0	/* /dev/console */
691 	KASSERT(ap->a_fdidx >= 0,
692 	     ("Could not vnode bypass device on fd %d", ap->a_fdidx));
693 #else
694 	if(ap->a_fdidx < 0)
695 		return (error);
696 #endif
697 	/*
698 	 * This is a pretty disgustingly long chain, but I am not
699 	 * sure there is any better way.  Passing the fdidx into
700 	 * VOP_OPEN() offers us more information than just passing
701 	 * the file *.
702 	 */
703 	fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
704 	KASSERT(fp->f_ops == &badfileops,
705 	     ("Could not vnode bypass device on fdops %p", fp->f_ops));
706 	fp->f_ops = &devfs_ops_f;
707 	fp->f_data = dev;
708 	return (error);
709 }
710 
711 static int
712 devfs_pathconf(struct vop_pathconf_args *ap)
713 {
714 
715 	switch (ap->a_name) {
716 	case _PC_MAC_PRESENT:
717 #ifdef MAC
718 		/*
719 		 * If MAC is enabled, devfs automatically supports
720 		 * trivial non-persistant label storage.
721 		 */
722 		*ap->a_retval = 1;
723 #else
724 		*ap->a_retval = 0;
725 #endif
726 		return (0);
727 	default:
728 		return (vop_stdpathconf(ap));
729 	}
730 	/* NOTREACHED */
731 }
732 
733 /* ARGSUSED */
734 static int
735 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
736 {
737 	struct cdev *dev;
738 	struct cdevsw *dsw;
739 	int error;
740 
741 	error = devfs_fp_check(fp, &dev, &dsw);
742 	if (error)
743 		return (error);
744 	error = dsw->d_poll(dev, events, td);
745 	dev_relthread(dev);
746 	return(error);
747 }
748 
749 /*
750  * Print out the contents of a special device vnode.
751  */
752 static int
753 devfs_print(struct vop_print_args *ap)
754 {
755 
756 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
757 	return (0);
758 }
759 
760 /* ARGSUSED */
761 static int
762 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
763 {
764 	struct cdev *dev;
765 	int ioflag, error, resid;
766 	struct cdevsw *dsw;
767 
768 	error = devfs_fp_check(fp, &dev, &dsw);
769 	if (error)
770 		return (error);
771 	resid = uio->uio_resid;
772 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
773 	if (ioflag & O_DIRECT)
774 		ioflag |= IO_DIRECT;
775 
776 	if ((flags & FOF_OFFSET) == 0)
777 		uio->uio_offset = fp->f_offset;
778 
779 	error = dsw->d_read(dev, uio, ioflag);
780 	dev_relthread(dev);
781 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
782 		vfs_timestamp(&dev->si_atime);
783 
784 	if ((flags & FOF_OFFSET) == 0)
785 		fp->f_offset = uio->uio_offset;
786 	fp->f_nextoff = uio->uio_offset;
787 	return (error);
788 }
789 
790 static int
791 devfs_readdir(struct vop_readdir_args *ap)
792 {
793 	int error;
794 	struct uio *uio;
795 	struct dirent *dp;
796 	struct devfs_dirent *dd;
797 	struct devfs_dirent *de;
798 	struct devfs_mount *dmp;
799 	off_t off, oldoff;
800 	int *tmp_ncookies = NULL;
801 
802 	if (ap->a_vp->v_type != VDIR)
803 		return (ENOTDIR);
804 
805 	uio = ap->a_uio;
806 	if (uio->uio_offset < 0)
807 		return (EINVAL);
808 
809 	/*
810 	 * XXX: This is a temporary hack to get around this filesystem not
811 	 * supporting cookies. We store the location of the ncookies pointer
812 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
813 	 * and set the number of cookies to 0. We then set the pointer to
814 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on
815 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
816 	 * pointer to its original location before returning to the caller.
817 	 */
818 	if (ap->a_ncookies != NULL) {
819 		tmp_ncookies = ap->a_ncookies;
820 		*ap->a_ncookies = 0;
821 		ap->a_ncookies = NULL;
822 	}
823 
824 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
825 	sx_xlock(&dmp->dm_lock);
826 	devfs_populate(dmp);
827 	error = 0;
828 	de = ap->a_vp->v_data;
829 	off = 0;
830 	oldoff = uio->uio_offset;
831 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
832 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
833 		if (dd->de_flags & DE_WHITEOUT)
834 			continue;
835 		if (dd->de_dirent->d_type == DT_DIR)
836 			de = dd->de_dir;
837 		else
838 			de = dd;
839 		dp = dd->de_dirent;
840 		if (dp->d_reclen > uio->uio_resid)
841 			break;
842 		dp->d_fileno = de->de_inode;
843 		if (off >= uio->uio_offset) {
844 			error = vfs_read_dirent(ap, dp, off);
845 			if (error)
846 				break;
847 		}
848 		off += dp->d_reclen;
849 	}
850 	sx_xunlock(&dmp->dm_lock);
851 	uio->uio_offset = off;
852 
853 	/*
854 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
855 	 * place.
856 	 */
857 	if (tmp_ncookies != NULL)
858 		ap->a_ncookies = tmp_ncookies;
859 
860 	return (error);
861 }
862 
863 static int
864 devfs_readlink(struct vop_readlink_args *ap)
865 {
866 	struct devfs_dirent *de;
867 
868 	de = ap->a_vp->v_data;
869 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
870 }
871 
872 static int
873 devfs_reclaim(struct vop_reclaim_args *ap)
874 {
875 	struct vnode *vp = ap->a_vp;
876 	struct devfs_dirent *de;
877 	struct cdev *dev;
878 
879 	de = vp->v_data;
880 	if (de != NULL)
881 		de->de_vnode = NULL;
882 	vp->v_data = NULL;
883 	vnode_destroy_vobject(vp);
884 
885 	dev = vp->v_rdev;
886 	vp->v_rdev = NULL;
887 
888 	if (dev == NULL)
889 		return (0);
890 
891 	dev_lock();
892 	dev->si_usecount -= vp->v_usecount;
893 	dev_unlock();
894 	dev_rel(dev);
895 	return (0);
896 }
897 
898 static int
899 devfs_remove(struct vop_remove_args *ap)
900 {
901 	struct vnode *vp = ap->a_vp;
902 	struct devfs_dirent *dd;
903 	struct devfs_dirent *de;
904 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
905 
906 	sx_xlock(&dmp->dm_lock);
907 	dd = ap->a_dvp->v_data;
908 	de = vp->v_data;
909 	if (de->de_cdp == NULL) {
910 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
911 		devfs_delete(dmp, de);
912 	} else {
913 		de->de_flags |= DE_WHITEOUT;
914 	}
915 	sx_xunlock(&dmp->dm_lock);
916 	return (0);
917 }
918 
919 /*
920  * Revoke is called on a tty when a terminal session ends.  The vnode
921  * is orphaned by setting v_op to deadfs so we need to let go of it
922  * as well so that we create a new one next time around.
923  *
924  * XXX: locking :-(
925  * XXX: We mess around with other mountpoints without holding their sxlock.
926  * XXX: We hold the devlock() when we zero their vnode pointer, but is that
927  * XXX: enough ?
928  */
929 static int
930 devfs_revoke(struct vop_revoke_args *ap)
931 {
932 	struct vnode *vp = ap->a_vp, *vp2;
933 	struct cdev *dev;
934 	struct cdev_priv *cdp;
935 	struct devfs_dirent *de;
936 	int i;
937 
938 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
939 
940 	dev = vp->v_rdev;
941 	cdp = dev->si_priv;
942 	for (;;) {
943 		dev_lock();
944 		vp2 = NULL;
945 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
946 			de = cdp->cdp_dirents[i];
947 			if (de == NULL)
948 				continue;
949 			vp2 = de->de_vnode;
950 			de->de_vnode = NULL;
951 			if (vp2 != NULL)
952 				break;
953 		}
954 		dev_unlock();
955 		if (vp2 != NULL) {
956 			vgone(vp2);
957 			continue;
958 		}
959 		break;
960 	}
961 	return (0);
962 }
963 
964 static int
965 devfs_rioctl(struct vop_ioctl_args *ap)
966 {
967 	int error;
968 	struct devfs_mount *dmp;
969 
970 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
971 	sx_xlock(&dmp->dm_lock);
972 	devfs_populate(dmp);
973 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
974 	sx_xunlock(&dmp->dm_lock);
975 	return (error);
976 }
977 
978 static int
979 devfs_rread(struct vop_read_args *ap)
980 {
981 
982 	if (ap->a_vp->v_type != VDIR)
983 		return (EINVAL);
984 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
985 }
986 
987 static int
988 devfs_setattr(struct vop_setattr_args *ap)
989 {
990 	struct devfs_dirent *de;
991 	struct vattr *vap;
992 	struct vnode *vp;
993 	int c, error;
994 	uid_t uid;
995 	gid_t gid;
996 
997 	vap = ap->a_vap;
998 	vp = ap->a_vp;
999 	if ((vap->va_type != VNON) ||
1000 	    (vap->va_nlink != VNOVAL) ||
1001 	    (vap->va_fsid != VNOVAL) ||
1002 	    (vap->va_fileid != VNOVAL) ||
1003 	    (vap->va_blocksize != VNOVAL) ||
1004 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1005 	    (vap->va_rdev != VNOVAL) ||
1006 	    ((int)vap->va_bytes != VNOVAL) ||
1007 	    (vap->va_gen != VNOVAL)) {
1008 		return (EINVAL);
1009 	}
1010 
1011 	de = vp->v_data;
1012 	if (vp->v_type == VDIR)
1013 		de = de->de_dir;
1014 
1015 	error = c = 0;
1016 	if (vap->va_uid == (uid_t)VNOVAL)
1017 		uid = de->de_uid;
1018 	else
1019 		uid = vap->va_uid;
1020 	if (vap->va_gid == (gid_t)VNOVAL)
1021 		gid = de->de_gid;
1022 	else
1023 		gid = vap->va_gid;
1024 	if (uid != de->de_uid || gid != de->de_gid) {
1025 		if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
1026 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) &&
1027 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0)
1028 			return (error);
1029 		de->de_uid = uid;
1030 		de->de_gid = gid;
1031 		c = 1;
1032 	}
1033 
1034 	if (vap->va_mode != (mode_t)VNOVAL) {
1035 		if ((ap->a_cred->cr_uid != de->de_uid) &&
1036 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)))
1037 			return (error);
1038 		de->de_mode = vap->va_mode;
1039 		c = 1;
1040 	}
1041 
1042 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1043 		/* See the comment in ufs_vnops::ufs_setattr(). */
1044 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) &&
1045 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1046 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td))))
1047 			return (error);
1048 		if (vap->va_atime.tv_sec != VNOVAL) {
1049 			if (vp->v_type == VCHR)
1050 				vp->v_rdev->si_atime = vap->va_atime;
1051 			else
1052 				de->de_atime = vap->va_atime;
1053 		}
1054 		if (vap->va_mtime.tv_sec != VNOVAL) {
1055 			if (vp->v_type == VCHR)
1056 				vp->v_rdev->si_mtime = vap->va_mtime;
1057 			else
1058 				de->de_mtime = vap->va_mtime;
1059 		}
1060 		c = 1;
1061 	}
1062 
1063 	if (c) {
1064 		if (vp->v_type == VCHR)
1065 			vfs_timestamp(&vp->v_rdev->si_ctime);
1066 		else
1067 			vfs_timestamp(&de->de_mtime);
1068 	}
1069 	return (0);
1070 }
1071 
1072 #ifdef MAC
1073 static int
1074 devfs_setlabel(struct vop_setlabel_args *ap)
1075 {
1076 	struct vnode *vp;
1077 	struct devfs_dirent *de;
1078 
1079 	vp = ap->a_vp;
1080 	de = vp->v_data;
1081 
1082 	mac_relabel_vnode(ap->a_cred, vp, ap->a_label);
1083 	mac_update_devfsdirent(vp->v_mount, de, vp);
1084 
1085 	return (0);
1086 }
1087 #endif
1088 
1089 static int
1090 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
1091 {
1092 
1093 	return (vnops.fo_stat(fp, sb, cred, td));
1094 }
1095 
1096 static int
1097 devfs_symlink(struct vop_symlink_args *ap)
1098 {
1099 	int i, error;
1100 	struct devfs_dirent *dd;
1101 	struct devfs_dirent *de;
1102 	struct devfs_mount *dmp;
1103 	struct thread *td;
1104 
1105 	td = ap->a_cnp->cn_thread;
1106 	KASSERT(td == curthread, ("devfs_symlink: td != curthread"));
1107 	error = suser(td);
1108 	if (error)
1109 		return(error);
1110 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1111 	dd = ap->a_dvp->v_data;
1112 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
1113 	de->de_uid = 0;
1114 	de->de_gid = 0;
1115 	de->de_mode = 0755;
1116 	de->de_inode = alloc_unr(devfs_inos);
1117 	de->de_dirent->d_type = DT_LNK;
1118 	i = strlen(ap->a_target) + 1;
1119 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
1120 	bcopy(ap->a_target, de->de_symlink, i);
1121 	sx_xlock(&dmp->dm_lock);
1122 #ifdef MAC
1123 	mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
1124 #endif
1125 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
1126 	devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td);
1127 	sx_xunlock(&dmp->dm_lock);
1128 	return (0);
1129 }
1130 
1131 /* ARGSUSED */
1132 static int
1133 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
1134 {
1135 	struct cdev *dev;
1136 	int error, ioflag, resid;
1137 	struct cdevsw *dsw;
1138 
1139 	error = devfs_fp_check(fp, &dev, &dsw);
1140 	if (error)
1141 		return (error);
1142 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
1143 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
1144 	if (ioflag & O_DIRECT)
1145 		ioflag |= IO_DIRECT;
1146 	if ((flags & FOF_OFFSET) == 0)
1147 		uio->uio_offset = fp->f_offset;
1148 
1149 	resid = uio->uio_resid;
1150 
1151 	error = dsw->d_write(dev, uio, ioflag);
1152 	dev_relthread(dev);
1153 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
1154 		vfs_timestamp(&dev->si_ctime);
1155 		dev->si_mtime = dev->si_ctime;
1156 	}
1157 
1158 	if ((flags & FOF_OFFSET) == 0)
1159 		fp->f_offset = uio->uio_offset;
1160 	fp->f_nextoff = uio->uio_offset;
1161 	return (error);
1162 }
1163 
1164 dev_t
1165 dev2udev(struct cdev *x)
1166 {
1167 	if (x == NULL)
1168 		return (NODEV);
1169 	return (x->si_priv->cdp_inode);
1170 }
1171 
1172 static struct fileops devfs_ops_f = {
1173 	.fo_read =	devfs_read_f,
1174 	.fo_write =	devfs_write_f,
1175 	.fo_ioctl =	devfs_ioctl_f,
1176 	.fo_poll =	devfs_poll_f,
1177 	.fo_kqfilter =	devfs_kqfilter_f,
1178 	.fo_stat =	devfs_stat_f,
1179 	.fo_close =	devfs_close_f,
1180 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
1181 };
1182 
1183 static struct vop_vector devfs_vnodeops = {
1184 	.vop_default =		&default_vnodeops,
1185 
1186 	.vop_access =		devfs_access,
1187 	.vop_getattr =		devfs_getattr,
1188 	.vop_ioctl =		devfs_rioctl,
1189 	.vop_lookup =		devfs_lookup,
1190 	.vop_mknod =		devfs_mknod,
1191 	.vop_pathconf =		devfs_pathconf,
1192 	.vop_read =		devfs_rread,
1193 	.vop_readdir =		devfs_readdir,
1194 	.vop_readlink =		devfs_readlink,
1195 	.vop_reclaim =		devfs_reclaim,
1196 	.vop_remove =		devfs_remove,
1197 	.vop_revoke =		devfs_revoke,
1198 	.vop_setattr =		devfs_setattr,
1199 #ifdef MAC
1200 	.vop_setlabel =		devfs_setlabel,
1201 #endif
1202 	.vop_symlink =		devfs_symlink,
1203 };
1204 
1205 static struct vop_vector devfs_specops = {
1206 	.vop_default =		&default_vnodeops,
1207 
1208 	.vop_access =		devfs_access,
1209 	.vop_advlock =		devfs_advlock,
1210 	.vop_bmap =		VOP_PANIC,
1211 	.vop_close =		devfs_close,
1212 	.vop_create =		VOP_PANIC,
1213 	.vop_fsync =		devfs_fsync,
1214 	.vop_getattr =		devfs_getattr,
1215 	.vop_lease =		VOP_NULL,
1216 	.vop_link =		VOP_PANIC,
1217 	.vop_mkdir =		VOP_PANIC,
1218 	.vop_mknod =		VOP_PANIC,
1219 	.vop_open =		devfs_open,
1220 	.vop_pathconf =		devfs_pathconf,
1221 	.vop_print =		devfs_print,
1222 	.vop_read =		VOP_PANIC,
1223 	.vop_readdir =		VOP_PANIC,
1224 	.vop_readlink =		VOP_PANIC,
1225 	.vop_reallocblks =	VOP_PANIC,
1226 	.vop_reclaim =		devfs_reclaim,
1227 	.vop_remove =		devfs_remove,
1228 	.vop_rename =		VOP_PANIC,
1229 	.vop_revoke =		devfs_revoke,
1230 	.vop_rmdir =		VOP_PANIC,
1231 	.vop_setattr =		devfs_setattr,
1232 #ifdef MAC
1233 	.vop_setlabel =		devfs_setlabel,
1234 #endif
1235 	.vop_strategy =		VOP_PANIC,
1236 	.vop_symlink =		VOP_PANIC,
1237 	.vop_write =		VOP_PANIC,
1238 };
1239 
1240 /*
1241  * Our calling convention to the device drivers used to be that we passed
1242  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
1243  * flags instead since that's what open(), close() and ioctl() takes and
1244  * we don't really want vnode.h in device drivers.
1245  * We solved the source compatibility by redefining some vnode flags to
1246  * be the same as the fcntl ones and by sending down the bitwise OR of
1247  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
1248  * pulls the rug out under this.
1249  */
1250 CTASSERT(O_NONBLOCK == IO_NDELAY);
1251 CTASSERT(O_FSYNC == IO_SYNC);
1252