xref: /freebsd/sys/fs/devfs/devfs_vnops.c (revision 87569f75a91f298c52a71823c04d41cf53c88889)
1 /*-
2  * Copyright (c) 2000-2004
3  *	Poul-Henning Kamp.  All rights reserved.
4  * Copyright (c) 1989, 1992-1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software donated to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
32  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
33  *
34  * $FreeBSD$
35  */
36 
37 /*
38  * TODO:
39  *	remove empty directories
40  *	mkdir: want it ?
41  */
42 
43 #include <opt_devfs.h>
44 #include <opt_mac.h>
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/conf.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/filio.h>
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/mac.h>
57 #include <sys/malloc.h>
58 #include <sys/mount.h>
59 #include <sys/namei.h>
60 #include <sys/proc.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/time.h>
64 #include <sys/ttycom.h>
65 #include <sys/unistd.h>
66 #include <sys/vnode.h>
67 
68 static struct vop_vector devfs_vnodeops;
69 static struct vop_vector devfs_specops;
70 static struct fileops devfs_ops_f;
71 
72 #include <fs/devfs/devfs.h>
73 #include <fs/devfs/devfs_int.h>
74 
75 static int
76 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
77 {
78 
79 	*devp = fp->f_vnode->v_rdev;
80 	if (*devp != fp->f_data)
81 		return (ENXIO);
82 	KASSERT((*devp)->si_refcount > 0,
83 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
84 	*dswp = dev_refthread(*devp);
85 	if (*dswp == NULL)
86 		return (ENXIO);
87 	return (0);
88 }
89 
90 /*
91  * Construct the fully qualified path name relative to the mountpoint
92  */
93 static char *
94 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp)
95 {
96 	int i;
97 	struct devfs_dirent *de, *dd;
98 	struct devfs_mount *dmp;
99 
100 	dmp = VFSTODEVFS(dvp->v_mount);
101 	dd = dvp->v_data;
102 	i = SPECNAMELEN;
103 	buf[i] = '\0';
104 	i -= cnp->cn_namelen;
105 	if (i < 0)
106 		 return (NULL);
107 	bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
108 	de = dd;
109 	while (de != dmp->dm_rootdir) {
110 		i--;
111 		if (i < 0)
112 			 return (NULL);
113 		buf[i] = '/';
114 		i -= de->de_dirent->d_namlen;
115 		if (i < 0)
116 			 return (NULL);
117 		bcopy(de->de_dirent->d_name, buf + i,
118 		    de->de_dirent->d_namlen);
119 		de = TAILQ_FIRST(&de->de_dlist);	/* "." */
120 		de = TAILQ_NEXT(de, de_list);		/* ".." */
121 		de = de->de_dir;
122 	}
123 	return (buf + i);
124 }
125 
126 int
127 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td)
128 {
129 	int error;
130 	struct vnode *vp;
131 	struct cdev *dev;
132 
133 	KASSERT(td == curthread, ("devfs_allocv: td != curthread"));
134 loop:
135 	vp = de->de_vnode;
136 	if (vp != NULL) {
137 		if (vget(vp, LK_EXCLUSIVE, td))
138 			goto loop;
139 		*vpp = vp;
140 		return (0);
141 	}
142 	if (de->de_dirent->d_type == DT_CHR) {
143 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE))
144 			return (ENOENT);
145 		dev = &de->de_cdp->cdp_c;
146 	} else {
147 		dev = NULL;
148 	}
149 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
150 	if (error != 0) {
151 		printf("devfs_allocv: failed to allocate new vnode\n");
152 		return (error);
153 	}
154 
155 	if (de->de_dirent->d_type == DT_CHR) {
156 		vp->v_type = VCHR;
157 		VI_LOCK(vp);
158 		dev_lock();
159 		dev_refl(dev);
160 		vp->v_rdev = dev;
161 		KASSERT(vp->v_usecount == 1,
162 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
163 		dev->si_usecount += vp->v_usecount;
164 		dev_unlock();
165 		VI_UNLOCK(vp);
166 		vp->v_op = &devfs_specops;
167 	} else if (de->de_dirent->d_type == DT_DIR) {
168 		vp->v_type = VDIR;
169 	} else if (de->de_dirent->d_type == DT_LNK) {
170 		vp->v_type = VLNK;
171 	} else {
172 		vp->v_type = VBAD;
173 	}
174 	vp->v_data = de;
175 	de->de_vnode = vp;
176 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
177 #ifdef MAC
178 	mac_associate_vnode_devfs(mp, de, vp);
179 #endif
180 	*vpp = vp;
181 	return (0);
182 }
183 
184 static int
185 devfs_access(struct vop_access_args *ap)
186 {
187 	struct vnode *vp = ap->a_vp;
188 	struct devfs_dirent *de;
189 	int error;
190 
191 	de = vp->v_data;
192 	if (vp->v_type == VDIR)
193 		de = de->de_dir;
194 
195 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
196 	    ap->a_mode, ap->a_cred, NULL);
197 	if (!error)
198 		return (error);
199 	if (error != EACCES)
200 		return (error);
201 	/* We do, however, allow access to the controlling terminal */
202 	if (!(ap->a_td->td_proc->p_flag & P_CONTROLT))
203 		return (error);
204 	if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode)
205 		return (0);
206 	return (error);
207 }
208 
209 /* ARGSUSED */
210 static int
211 devfs_advlock(struct vop_advlock_args *ap)
212 {
213 
214 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
215 }
216 
217 /* ARGSUSED */
218 static int
219 devfs_close(struct vop_close_args *ap)
220 {
221 	struct vnode *vp = ap->a_vp, *oldvp;
222 	struct thread *td = ap->a_td;
223 	struct cdev *dev = vp->v_rdev;
224 	struct cdevsw *dsw;
225 	int error;
226 
227 	/*
228 	 * Hack: a tty device that is a controlling terminal
229 	 * has a reference from the session structure.
230 	 * We cannot easily tell that a character device is
231 	 * a controlling terminal, unless it is the closing
232 	 * process' controlling terminal.  In that case,
233 	 * if the reference count is 2 (this last descriptor
234 	 * plus the session), release the reference from the session.
235 	 */
236 	oldvp = NULL;
237 	sx_xlock(&proctree_lock);
238 	if (td && vp == td->td_proc->p_session->s_ttyvp) {
239 		SESS_LOCK(td->td_proc->p_session);
240 		VI_LOCK(vp);
241 		if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) {
242 			td->td_proc->p_session->s_ttyvp = NULL;
243 			oldvp = vp;
244 		}
245 		VI_UNLOCK(vp);
246 		SESS_UNLOCK(td->td_proc->p_session);
247 	}
248 	sx_xunlock(&proctree_lock);
249 	if (oldvp != NULL)
250 		vrele(oldvp);
251 	/*
252 	 * We do not want to really close the device if it
253 	 * is still in use unless we are trying to close it
254 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
255 	 * holds a reference to the vnode, and because we mark
256 	 * any other vnodes that alias this device, when the
257 	 * sum of the reference counts on all the aliased
258 	 * vnodes descends to one, we are on last close.
259 	 */
260 	dsw = dev_refthread(dev);
261 	if (dsw == NULL)
262 		return (ENXIO);
263 	VI_LOCK(vp);
264 	if (vp->v_iflag & VI_DOOMED) {
265 		/* Forced close. */
266 	} else if (dsw->d_flags & D_TRACKCLOSE) {
267 		/* Keep device updated on status. */
268 	} else if (count_dev(dev) > 1) {
269 		VI_UNLOCK(vp);
270 		dev_relthread(dev);
271 		return (0);
272 	}
273 	VI_UNLOCK(vp);
274 	KASSERT(dev->si_refcount > 0,
275 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
276 	if (!(dsw->d_flags & D_NEEDGIANT)) {
277 		DROP_GIANT();
278 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
279 		PICKUP_GIANT();
280 	} else {
281 		error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
282 	}
283 	dev_relthread(dev);
284 	return (error);
285 }
286 
287 static int
288 devfs_close_f(struct file *fp, struct thread *td)
289 {
290 
291 	return (vnops.fo_close(fp, td));
292 }
293 
294 /* ARGSUSED */
295 static int
296 devfs_fsync(struct vop_fsync_args *ap)
297 {
298 	if (!vn_isdisk(ap->a_vp, NULL))
299 		return (0);
300 
301 	return (vop_stdfsync(ap));
302 }
303 
304 static int
305 devfs_getattr(struct vop_getattr_args *ap)
306 {
307 	struct vnode *vp = ap->a_vp;
308 	struct vattr *vap = ap->a_vap;
309 	int error = 0;
310 	struct devfs_dirent *de;
311 	struct cdev *dev;
312 
313 	de = vp->v_data;
314 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
315 	if (vp->v_type == VDIR) {
316 		de = de->de_dir;
317 		KASSERT(de != NULL,
318 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
319 	}
320 	bzero((caddr_t) vap, sizeof(*vap));
321 	vattr_null(vap);
322 	vap->va_uid = de->de_uid;
323 	vap->va_gid = de->de_gid;
324 	vap->va_mode = de->de_mode;
325 	if (vp->v_type == VLNK)
326 		vap->va_size = strlen(de->de_symlink);
327 	else if (vp->v_type == VDIR)
328 		vap->va_size = vap->va_bytes = DEV_BSIZE;
329 	else
330 		vap->va_size = 0;
331 	if (vp->v_type != VDIR)
332 		vap->va_bytes = 0;
333 	vap->va_blocksize = DEV_BSIZE;
334 	vap->va_type = vp->v_type;
335 
336 #define fix(aa)							\
337 	do {							\
338 		if ((aa).tv_sec == 0) {				\
339 			(aa).tv_sec = boottime.tv_sec;		\
340 			(aa).tv_nsec = boottime.tv_usec * 1000; \
341 		}						\
342 	} while (0)
343 
344 	if (vp->v_type != VCHR)  {
345 		fix(de->de_atime);
346 		vap->va_atime = de->de_atime;
347 		fix(de->de_mtime);
348 		vap->va_mtime = de->de_mtime;
349 		fix(de->de_ctime);
350 		vap->va_ctime = de->de_ctime;
351 	} else {
352 		dev = vp->v_rdev;
353 		fix(dev->si_atime);
354 		vap->va_atime = dev->si_atime;
355 		fix(dev->si_mtime);
356 		vap->va_mtime = dev->si_mtime;
357 		fix(dev->si_ctime);
358 		vap->va_ctime = dev->si_ctime;
359 
360 		vap->va_rdev = dev->si_priv->cdp_inode;
361 	}
362 	vap->va_gen = 0;
363 	vap->va_flags = 0;
364 	vap->va_nlink = de->de_links;
365 	vap->va_fileid = de->de_inode;
366 
367 	return (error);
368 }
369 
370 /* ARGSUSED */
371 static int
372 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
373 {
374 	struct cdev *dev;
375 	struct cdevsw *dsw;
376 	struct vnode *vp;
377 	struct vnode *vpold;
378 	int error, i;
379 	const char *p;
380 	struct fiodgname_arg *fgn;
381 
382 	error = devfs_fp_check(fp, &dev, &dsw);
383 	if (error)
384 		return (error);
385 
386 	if (com == FIODTYPE) {
387 		*(int *)data = dsw->d_flags & D_TYPEMASK;
388 		dev_relthread(dev);
389 		return (0);
390 	} else if (com == FIODGNAME) {
391 		fgn = data;
392 		p = devtoname(dev);
393 		i = strlen(p) + 1;
394 		if (i > fgn->len)
395 			error = EINVAL;
396 		else
397 			error = copyout(p, fgn->buf, i);
398 		dev_relthread(dev);
399 		return (error);
400 	}
401 	error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
402 	dev_relthread(dev);
403 	if (error == ENOIOCTL)
404 		error = ENOTTY;
405 	if (error == 0 && com == TIOCSCTTY) {
406 		vp = fp->f_vnode;
407 
408 		/* Do nothing if reassigning same control tty */
409 		sx_slock(&proctree_lock);
410 		if (td->td_proc->p_session->s_ttyvp == vp) {
411 			sx_sunlock(&proctree_lock);
412 			return (0);
413 		}
414 
415 		mtx_lock(&Giant);
416 
417 		vpold = td->td_proc->p_session->s_ttyvp;
418 		VREF(vp);
419 		SESS_LOCK(td->td_proc->p_session);
420 		td->td_proc->p_session->s_ttyvp = vp;
421 		SESS_UNLOCK(td->td_proc->p_session);
422 
423 		sx_sunlock(&proctree_lock);
424 
425 		/* Get rid of reference to old control tty */
426 		if (vpold)
427 			vrele(vpold);
428 		mtx_unlock(&Giant);
429 	}
430 	return (error);
431 }
432 
433 /* ARGSUSED */
434 static int
435 devfs_kqfilter_f(struct file *fp, struct knote *kn)
436 {
437 	struct cdev *dev;
438 	struct cdevsw *dsw;
439 	int error;
440 
441 	error = devfs_fp_check(fp, &dev, &dsw);
442 	if (error)
443 		return (error);
444 	error = dsw->d_kqfilter(dev, kn);
445 	dev_relthread(dev);
446 	return (error);
447 }
448 
449 static int
450 devfs_lookupx(struct vop_lookup_args *ap)
451 {
452 	struct componentname *cnp;
453 	struct vnode *dvp, **vpp;
454 	struct thread *td;
455 	struct devfs_dirent *de, *dd;
456 	struct devfs_dirent **dde;
457 	struct devfs_mount *dmp;
458 	struct cdev *cdev;
459 	int error, flags, nameiop;
460 	char specname[SPECNAMELEN + 1], *pname;
461 
462 	cnp = ap->a_cnp;
463 	vpp = ap->a_vpp;
464 	dvp = ap->a_dvp;
465 	pname = cnp->cn_nameptr;
466 	td = cnp->cn_thread;
467 	flags = cnp->cn_flags;
468 	nameiop = cnp->cn_nameiop;
469 	dmp = VFSTODEVFS(dvp->v_mount);
470 	dd = dvp->v_data;
471 	*vpp = NULLVP;
472 
473 	if ((flags & ISLASTCN) && nameiop == RENAME)
474 		return (EOPNOTSUPP);
475 
476 	if (dvp->v_type != VDIR)
477 		return (ENOTDIR);
478 
479 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
480 		return (EIO);
481 
482 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
483 	if (error)
484 		return (error);
485 
486 	if (cnp->cn_namelen == 1 && *pname == '.') {
487 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
488 			return (EINVAL);
489 		*vpp = dvp;
490 		VREF(dvp);
491 		return (0);
492 	}
493 
494 	if (flags & ISDOTDOT) {
495 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
496 			return (EINVAL);
497 		VOP_UNLOCK(dvp, 0, td);
498 		de = TAILQ_FIRST(&dd->de_dlist);	/* "." */
499 		de = TAILQ_NEXT(de, de_list);		/* ".." */
500 		de = de->de_dir;
501 		error = devfs_allocv(de, dvp->v_mount, vpp, td);
502 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
503 		return (error);
504 	}
505 
506 	devfs_populate(dmp);
507 	dd = dvp->v_data;
508 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen);
509 	while (de == NULL) {	/* While(...) so we can use break */
510 
511 		if (nameiop == DELETE)
512 			return (ENOENT);
513 
514 		/*
515 		 * OK, we didn't have an entry for the name we were asked for
516 		 * so we try to see if anybody can create it on demand.
517 		 */
518 		pname = devfs_fqpn(specname, dvp, cnp);
519 		if (pname == NULL)
520 			break;
521 
522 		cdev = NULL;
523 		EVENTHANDLER_INVOKE(dev_clone,
524 		    td->td_ucred, pname, strlen(pname), &cdev);
525 		if (cdev == NULL)
526 			break;
527 
528 		devfs_populate(dmp);
529 
530 		dev_lock();
531 		dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx];
532 		if (dde != NULL && *dde != NULL)
533 			de = *dde;
534 		dev_unlock();
535 		dev_rel(cdev);
536 		break;
537 	}
538 
539 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
540 		if ((nameiop == CREATE || nameiop == RENAME) &&
541 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
542 			cnp->cn_flags |= SAVENAME;
543 			return (EJUSTRETURN);
544 		}
545 		return (ENOENT);
546 	}
547 
548 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
549 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
550 		if (error)
551 			return (error);
552 		if (*vpp == dvp) {
553 			VREF(dvp);
554 			*vpp = dvp;
555 			return (0);
556 		}
557 	}
558 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
559 	return (error);
560 }
561 
562 static int
563 devfs_lookup(struct vop_lookup_args *ap)
564 {
565 	int j;
566 	struct devfs_mount *dmp;
567 
568 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
569 	sx_xlock(&dmp->dm_lock);
570 	j = devfs_lookupx(ap);
571 	sx_xunlock(&dmp->dm_lock);
572 	return (j);
573 }
574 
575 static int
576 devfs_mknod(struct vop_mknod_args *ap)
577 {
578 	struct componentname *cnp;
579 	struct vnode *dvp, **vpp;
580 	struct thread *td;
581 	struct devfs_dirent *dd, *de;
582 	struct devfs_mount *dmp;
583 	int error;
584 
585 	/*
586 	 * The only type of node we should be creating here is a
587 	 * character device, for anything else return EOPNOTSUPP.
588 	 */
589 	if (ap->a_vap->va_type != VCHR)
590 		return (EOPNOTSUPP);
591 	dvp = ap->a_dvp;
592 	dmp = VFSTODEVFS(dvp->v_mount);
593 	sx_xlock(&dmp->dm_lock);
594 
595 	cnp = ap->a_cnp;
596 	vpp = ap->a_vpp;
597 	td = cnp->cn_thread;
598 	dd = dvp->v_data;
599 
600 	error = ENOENT;
601 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
602 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
603 			continue;
604 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
605 		    de->de_dirent->d_namlen) != 0)
606 			continue;
607 		if (de->de_flags & DE_WHITEOUT)
608 			break;
609 		goto notfound;
610 	}
611 	if (de == NULL)
612 		goto notfound;
613 	de->de_flags &= ~DE_WHITEOUT;
614 	error = devfs_allocv(de, dvp->v_mount, vpp, td);
615 notfound:
616 	sx_xunlock(&dmp->dm_lock);
617 	return (error);
618 }
619 
620 /* ARGSUSED */
621 static int
622 devfs_open(struct vop_open_args *ap)
623 {
624 	struct thread *td = ap->a_td;
625 	struct vnode *vp = ap->a_vp;
626 	struct cdev *dev = vp->v_rdev;
627 	struct file *fp;
628 	int error;
629 	struct cdevsw *dsw;
630 
631 	if (vp->v_type == VBLK)
632 		return (ENXIO);
633 
634 	if (dev == NULL)
635 		return (ENXIO);
636 
637 	/* Make this field valid before any I/O in d_open. */
638 	if (dev->si_iosize_max == 0)
639 		dev->si_iosize_max = DFLTPHYS;
640 
641 	if (vn_isdisk(vp, NULL) &&
642 	    ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
643 		/*
644 		* When running in very secure mode, do not allow
645 		* opens for writing of any disks.
646 		* XXX: should be in geom_dev.c, but we lack the cred there.
647 		*/
648 		error = securelevel_ge(td->td_ucred, 2);
649 		if (error)
650 			return (error);
651 	}
652 
653 	dsw = dev_refthread(dev);
654 	if (dsw == NULL)
655 		return (ENXIO);
656 
657 	/* XXX: Special casing of ttys for deadfs.  Probably redundant. */
658 	if (dsw->d_flags & D_TTY)
659 		vp->v_vflag |= VV_ISTTY;
660 
661 	VOP_UNLOCK(vp, 0, td);
662 
663 	if(!(dsw->d_flags & D_NEEDGIANT)) {
664 		DROP_GIANT();
665 		if (dsw->d_fdopen != NULL)
666 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
667 		else
668 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
669 		PICKUP_GIANT();
670 	} else {
671 		if (dsw->d_fdopen != NULL)
672 			error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx);
673 		else
674 			error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
675 	}
676 
677 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
678 
679 	dev_relthread(dev);
680 
681 	if (error)
682 		return (error);
683 
684 #if 0	/* /dev/console */
685 	KASSERT(ap->a_fdidx >= 0,
686 	     ("Could not vnode bypass device on fd %d", ap->a_fdidx));
687 #else
688 	if(ap->a_fdidx < 0)
689 		return (error);
690 #endif
691 	/*
692 	 * This is a pretty disgustingly long chain, but I am not
693 	 * sure there is any better way.  Passing the fdidx into
694 	 * VOP_OPEN() offers us more information than just passing
695 	 * the file *.
696 	 */
697 	fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx];
698 	KASSERT(fp->f_ops == &badfileops,
699 	     ("Could not vnode bypass device on fdops %p", fp->f_ops));
700 	fp->f_ops = &devfs_ops_f;
701 	fp->f_data = dev;
702 	return (error);
703 }
704 
705 static int
706 devfs_pathconf(struct vop_pathconf_args *ap)
707 {
708 
709 	switch (ap->a_name) {
710 	case _PC_MAC_PRESENT:
711 #ifdef MAC
712 		/*
713 		 * If MAC is enabled, devfs automatically supports
714 		 * trivial non-persistant label storage.
715 		 */
716 		*ap->a_retval = 1;
717 #else
718 		*ap->a_retval = 0;
719 #endif
720 		return (0);
721 	default:
722 		return (vop_stdpathconf(ap));
723 	}
724 	/* NOTREACHED */
725 }
726 
727 /* ARGSUSED */
728 static int
729 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
730 {
731 	struct cdev *dev;
732 	struct cdevsw *dsw;
733 	int error;
734 
735 	error = devfs_fp_check(fp, &dev, &dsw);
736 	if (error)
737 		return (error);
738 	error = dsw->d_poll(dev, events, td);
739 	dev_relthread(dev);
740 	return(error);
741 }
742 
743 /*
744  * Print out the contents of a special device vnode.
745  */
746 static int
747 devfs_print(struct vop_print_args *ap)
748 {
749 
750 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
751 	return (0);
752 }
753 
754 /* ARGSUSED */
755 static int
756 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
757 {
758 	struct cdev *dev;
759 	int ioflag, error, resid;
760 	struct cdevsw *dsw;
761 
762 	error = devfs_fp_check(fp, &dev, &dsw);
763 	if (error)
764 		return (error);
765 	resid = uio->uio_resid;
766 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
767 	if (ioflag & O_DIRECT)
768 		ioflag |= IO_DIRECT;
769 
770 	if ((flags & FOF_OFFSET) == 0)
771 		uio->uio_offset = fp->f_offset;
772 
773 	error = dsw->d_read(dev, uio, ioflag);
774 	dev_relthread(dev);
775 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
776 		vfs_timestamp(&dev->si_atime);
777 
778 	if ((flags & FOF_OFFSET) == 0)
779 		fp->f_offset = uio->uio_offset;
780 	fp->f_nextoff = uio->uio_offset;
781 	return (error);
782 }
783 
784 static int
785 devfs_readdir(struct vop_readdir_args *ap)
786 {
787 	int error;
788 	struct uio *uio;
789 	struct dirent *dp;
790 	struct devfs_dirent *dd;
791 	struct devfs_dirent *de;
792 	struct devfs_mount *dmp;
793 	off_t off, oldoff;
794 	int *tmp_ncookies = NULL;
795 
796 	if (ap->a_vp->v_type != VDIR)
797 		return (ENOTDIR);
798 
799 	uio = ap->a_uio;
800 	if (uio->uio_offset < 0)
801 		return (EINVAL);
802 
803 	/*
804 	 * XXX: This is a temporary hack to get around this filesystem not
805 	 * supporting cookies. We store the location of the ncookies pointer
806 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
807 	 * and set the number of cookies to 0. We then set the pointer to
808 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on
809 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
810 	 * pointer to its original location before returning to the caller.
811 	 */
812 	if (ap->a_ncookies != NULL) {
813 		tmp_ncookies = ap->a_ncookies;
814 		*ap->a_ncookies = 0;
815 		ap->a_ncookies = NULL;
816 	}
817 
818 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
819 	sx_xlock(&dmp->dm_lock);
820 	devfs_populate(dmp);
821 	error = 0;
822 	de = ap->a_vp->v_data;
823 	off = 0;
824 	oldoff = uio->uio_offset;
825 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
826 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
827 		if (dd->de_flags & DE_WHITEOUT)
828 			continue;
829 		if (dd->de_dirent->d_type == DT_DIR)
830 			de = dd->de_dir;
831 		else
832 			de = dd;
833 		dp = dd->de_dirent;
834 		if (dp->d_reclen > uio->uio_resid)
835 			break;
836 		dp->d_fileno = de->de_inode;
837 		if (off >= uio->uio_offset) {
838 			error = vfs_read_dirent(ap, dp, off);
839 			if (error)
840 				break;
841 		}
842 		off += dp->d_reclen;
843 	}
844 	sx_xunlock(&dmp->dm_lock);
845 	uio->uio_offset = off;
846 
847 	/*
848 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
849 	 * place.
850 	 */
851 	if (tmp_ncookies != NULL)
852 		ap->a_ncookies = tmp_ncookies;
853 
854 	return (error);
855 }
856 
857 static int
858 devfs_readlink(struct vop_readlink_args *ap)
859 {
860 	struct devfs_dirent *de;
861 
862 	de = ap->a_vp->v_data;
863 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
864 }
865 
866 static int
867 devfs_reclaim(struct vop_reclaim_args *ap)
868 {
869 	struct vnode *vp = ap->a_vp;
870 	struct devfs_dirent *de;
871 	struct cdev *dev;
872 
873 	de = vp->v_data;
874 	if (de != NULL)
875 		de->de_vnode = NULL;
876 	vp->v_data = NULL;
877 	vnode_destroy_vobject(vp);
878 
879 	dev = vp->v_rdev;
880 	vp->v_rdev = NULL;
881 
882 	if (dev == NULL)
883 		return (0);
884 
885 	dev_lock();
886 	dev->si_usecount -= vp->v_usecount;
887 	dev_unlock();
888 	dev_rel(dev);
889 	return (0);
890 }
891 
892 static int
893 devfs_remove(struct vop_remove_args *ap)
894 {
895 	struct vnode *vp = ap->a_vp;
896 	struct devfs_dirent *dd;
897 	struct devfs_dirent *de;
898 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
899 
900 	sx_xlock(&dmp->dm_lock);
901 	dd = ap->a_dvp->v_data;
902 	de = vp->v_data;
903 	if (de->de_cdp == NULL) {
904 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
905 		devfs_delete(dmp, de);
906 	} else {
907 		de->de_flags |= DE_WHITEOUT;
908 	}
909 	sx_xunlock(&dmp->dm_lock);
910 	return (0);
911 }
912 
913 /*
914  * Revoke is called on a tty when a terminal session ends.  The vnode
915  * is orphaned by setting v_op to deadfs so we need to let go of it
916  * as well so that we create a new one next time around.
917  *
918  * XXX: locking :-(
919  * XXX: We mess around with other mountpoints without holding their sxlock.
920  * XXX: We hold the devlock() when we zero their vnode pointer, but is that
921  * XXX: enough ?
922  */
923 static int
924 devfs_revoke(struct vop_revoke_args *ap)
925 {
926 	struct vnode *vp = ap->a_vp, *vp2;
927 	struct cdev *dev;
928 	struct cdev_priv *cdp;
929 	struct devfs_dirent *de;
930 	int i;
931 
932 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
933 
934 	dev = vp->v_rdev;
935 	cdp = dev->si_priv;
936 	for (;;) {
937 		dev_lock();
938 		vp2 = NULL;
939 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
940 			de = cdp->cdp_dirents[i];
941 			if (de == NULL)
942 				continue;
943 			vp2 = de->de_vnode;
944 			de->de_vnode = NULL;
945 			if (vp2 != NULL)
946 				break;
947 		}
948 		dev_unlock();
949 		if (vp2 != NULL) {
950 			vgone(vp2);
951 			continue;
952 		}
953 		break;
954 	}
955 	return (0);
956 }
957 
958 static int
959 devfs_rioctl(struct vop_ioctl_args *ap)
960 {
961 	int error;
962 	struct devfs_mount *dmp;
963 
964 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
965 	sx_xlock(&dmp->dm_lock);
966 	devfs_populate(dmp);
967 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
968 	sx_xunlock(&dmp->dm_lock);
969 	return (error);
970 }
971 
972 static int
973 devfs_rread(struct vop_read_args *ap)
974 {
975 
976 	if (ap->a_vp->v_type != VDIR)
977 		return (EINVAL);
978 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
979 }
980 
981 static int
982 devfs_setattr(struct vop_setattr_args *ap)
983 {
984 	struct devfs_dirent *de;
985 	struct vattr *vap;
986 	struct vnode *vp;
987 	int c, error;
988 	uid_t uid;
989 	gid_t gid;
990 
991 	vap = ap->a_vap;
992 	vp = ap->a_vp;
993 	if ((vap->va_type != VNON) ||
994 	    (vap->va_nlink != VNOVAL) ||
995 	    (vap->va_fsid != VNOVAL) ||
996 	    (vap->va_fileid != VNOVAL) ||
997 	    (vap->va_blocksize != VNOVAL) ||
998 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
999 	    (vap->va_rdev != VNOVAL) ||
1000 	    ((int)vap->va_bytes != VNOVAL) ||
1001 	    (vap->va_gen != VNOVAL)) {
1002 		return (EINVAL);
1003 	}
1004 
1005 	de = vp->v_data;
1006 	if (vp->v_type == VDIR)
1007 		de = de->de_dir;
1008 
1009 	error = c = 0;
1010 	if (vap->va_uid == (uid_t)VNOVAL)
1011 		uid = de->de_uid;
1012 	else
1013 		uid = vap->va_uid;
1014 	if (vap->va_gid == (gid_t)VNOVAL)
1015 		gid = de->de_gid;
1016 	else
1017 		gid = vap->va_gid;
1018 	if (uid != de->de_uid || gid != de->de_gid) {
1019 		if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
1020 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) &&
1021 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0)
1022 			return (error);
1023 		de->de_uid = uid;
1024 		de->de_gid = gid;
1025 		c = 1;
1026 	}
1027 
1028 	if (vap->va_mode != (mode_t)VNOVAL) {
1029 		if ((ap->a_cred->cr_uid != de->de_uid) &&
1030 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)))
1031 			return (error);
1032 		de->de_mode = vap->va_mode;
1033 		c = 1;
1034 	}
1035 
1036 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1037 		/* See the comment in ufs_vnops::ufs_setattr(). */
1038 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) &&
1039 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1040 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td))))
1041 			return (error);
1042 		if (vap->va_atime.tv_sec != VNOVAL) {
1043 			if (vp->v_type == VCHR)
1044 				vp->v_rdev->si_atime = vap->va_atime;
1045 			else
1046 				de->de_atime = vap->va_atime;
1047 		}
1048 		if (vap->va_mtime.tv_sec != VNOVAL) {
1049 			if (vp->v_type == VCHR)
1050 				vp->v_rdev->si_mtime = vap->va_mtime;
1051 			else
1052 				de->de_mtime = vap->va_mtime;
1053 		}
1054 		c = 1;
1055 	}
1056 
1057 	if (c) {
1058 		if (vp->v_type == VCHR)
1059 			vfs_timestamp(&vp->v_rdev->si_ctime);
1060 		else
1061 			vfs_timestamp(&de->de_mtime);
1062 	}
1063 	return (0);
1064 }
1065 
1066 #ifdef MAC
1067 static int
1068 devfs_setlabel(struct vop_setlabel_args *ap)
1069 {
1070 	struct vnode *vp;
1071 	struct devfs_dirent *de;
1072 
1073 	vp = ap->a_vp;
1074 	de = vp->v_data;
1075 
1076 	mac_relabel_vnode(ap->a_cred, vp, ap->a_label);
1077 	mac_update_devfsdirent(vp->v_mount, de, vp);
1078 
1079 	return (0);
1080 }
1081 #endif
1082 
1083 static int
1084 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
1085 {
1086 
1087 	return (vnops.fo_stat(fp, sb, cred, td));
1088 }
1089 
1090 static int
1091 devfs_symlink(struct vop_symlink_args *ap)
1092 {
1093 	int i, error;
1094 	struct devfs_dirent *dd;
1095 	struct devfs_dirent *de;
1096 	struct devfs_mount *dmp;
1097 	struct thread *td;
1098 
1099 	td = ap->a_cnp->cn_thread;
1100 	KASSERT(td == curthread, ("devfs_symlink: td != curthread"));
1101 	error = suser(td);
1102 	if (error)
1103 		return(error);
1104 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1105 	dd = ap->a_dvp->v_data;
1106 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
1107 	de->de_uid = 0;
1108 	de->de_gid = 0;
1109 	de->de_mode = 0755;
1110 	de->de_inode = alloc_unr(devfs_inos);
1111 	de->de_dirent->d_type = DT_LNK;
1112 	i = strlen(ap->a_target) + 1;
1113 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
1114 	bcopy(ap->a_target, de->de_symlink, i);
1115 	sx_xlock(&dmp->dm_lock);
1116 #ifdef MAC
1117 	mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
1118 #endif
1119 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
1120 	devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td);
1121 	sx_xunlock(&dmp->dm_lock);
1122 	return (0);
1123 }
1124 
1125 /* ARGSUSED */
1126 static int
1127 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td)
1128 {
1129 	struct cdev *dev;
1130 	int error, ioflag, resid;
1131 	struct cdevsw *dsw;
1132 
1133 	error = devfs_fp_check(fp, &dev, &dsw);
1134 	if (error)
1135 		return (error);
1136 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
1137 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
1138 	if (ioflag & O_DIRECT)
1139 		ioflag |= IO_DIRECT;
1140 	if ((flags & FOF_OFFSET) == 0)
1141 		uio->uio_offset = fp->f_offset;
1142 
1143 	resid = uio->uio_resid;
1144 
1145 	error = dsw->d_write(dev, uio, ioflag);
1146 	dev_relthread(dev);
1147 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
1148 		vfs_timestamp(&dev->si_ctime);
1149 		dev->si_mtime = dev->si_ctime;
1150 	}
1151 
1152 	if ((flags & FOF_OFFSET) == 0)
1153 		fp->f_offset = uio->uio_offset;
1154 	fp->f_nextoff = uio->uio_offset;
1155 	return (error);
1156 }
1157 
1158 dev_t
1159 dev2udev(struct cdev *x)
1160 {
1161 	if (x == NULL)
1162 		return (NODEV);
1163 	return (x->si_priv->cdp_inode);
1164 }
1165 
1166 static struct fileops devfs_ops_f = {
1167 	.fo_read =	devfs_read_f,
1168 	.fo_write =	devfs_write_f,
1169 	.fo_ioctl =	devfs_ioctl_f,
1170 	.fo_poll =	devfs_poll_f,
1171 	.fo_kqfilter =	devfs_kqfilter_f,
1172 	.fo_stat =	devfs_stat_f,
1173 	.fo_close =	devfs_close_f,
1174 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
1175 };
1176 
1177 static struct vop_vector devfs_vnodeops = {
1178 	.vop_default =		&default_vnodeops,
1179 
1180 	.vop_access =		devfs_access,
1181 	.vop_getattr =		devfs_getattr,
1182 	.vop_ioctl =		devfs_rioctl,
1183 	.vop_lookup =		devfs_lookup,
1184 	.vop_mknod =		devfs_mknod,
1185 	.vop_pathconf =		devfs_pathconf,
1186 	.vop_read =		devfs_rread,
1187 	.vop_readdir =		devfs_readdir,
1188 	.vop_readlink =		devfs_readlink,
1189 	.vop_reclaim =		devfs_reclaim,
1190 	.vop_remove =		devfs_remove,
1191 	.vop_revoke =		devfs_revoke,
1192 	.vop_setattr =		devfs_setattr,
1193 #ifdef MAC
1194 	.vop_setlabel =		devfs_setlabel,
1195 #endif
1196 	.vop_symlink =		devfs_symlink,
1197 };
1198 
1199 static struct vop_vector devfs_specops = {
1200 	.vop_default =		&default_vnodeops,
1201 
1202 	.vop_access =		devfs_access,
1203 	.vop_advlock =		devfs_advlock,
1204 	.vop_bmap =		VOP_PANIC,
1205 	.vop_close =		devfs_close,
1206 	.vop_create =		VOP_PANIC,
1207 	.vop_fsync =		devfs_fsync,
1208 	.vop_getattr =		devfs_getattr,
1209 	.vop_lease =		VOP_NULL,
1210 	.vop_link =		VOP_PANIC,
1211 	.vop_mkdir =		VOP_PANIC,
1212 	.vop_mknod =		VOP_PANIC,
1213 	.vop_open =		devfs_open,
1214 	.vop_pathconf =		devfs_pathconf,
1215 	.vop_print =		devfs_print,
1216 	.vop_read =		VOP_PANIC,
1217 	.vop_readdir =		VOP_PANIC,
1218 	.vop_readlink =		VOP_PANIC,
1219 	.vop_reallocblks =	VOP_PANIC,
1220 	.vop_reclaim =		devfs_reclaim,
1221 	.vop_remove =		devfs_remove,
1222 	.vop_rename =		VOP_PANIC,
1223 	.vop_revoke =		devfs_revoke,
1224 	.vop_rmdir =		VOP_PANIC,
1225 	.vop_setattr =		devfs_setattr,
1226 #ifdef MAC
1227 	.vop_setlabel =		devfs_setlabel,
1228 #endif
1229 	.vop_strategy =		VOP_PANIC,
1230 	.vop_symlink =		VOP_PANIC,
1231 	.vop_write =		VOP_PANIC,
1232 };
1233 
1234 /*
1235  * Our calling convention to the device drivers used to be that we passed
1236  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
1237  * flags instead since that's what open(), close() and ioctl() takes and
1238  * we don't really want vnode.h in device drivers.
1239  * We solved the source compatibility by redefining some vnode flags to
1240  * be the same as the fcntl ones and by sending down the bitwise OR of
1241  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
1242  * pulls the rug out under this.
1243  */
1244 CTASSERT(O_NONBLOCK == IO_NDELAY);
1245 CTASSERT(O_FSYNC == IO_SYNC);
1246