xref: /freebsd/sys/fs/devfs/devfs_vnops.c (revision a8e92198f854c2766eedec5a2ea3cc23c64d7b12)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2000-2004
5  *	Poul-Henning Kamp.  All rights reserved.
6  * Copyright (c) 1989, 1992-1993, 1995
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software donated to Berkeley by
10  * Jan-Simon Pendry.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
33  */
34 
35 /*
36  * TODO:
37  *	mkdir: want it ?
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/conf.h>
43 #include <sys/dirent.h>
44 #include <sys/eventhandler.h>
45 #include <sys/fcntl.h>
46 #include <sys/file.h>
47 #include <sys/filedesc.h>
48 #include <sys/filio.h>
49 #include <sys/jail.h>
50 #include <sys/kernel.h>
51 #include <sys/limits.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mman.h>
55 #include <sys/mount.h>
56 #include <sys/namei.h>
57 #include <sys/priv.h>
58 #include <sys/proc.h>
59 #include <sys/stat.h>
60 #include <sys/sx.h>
61 #include <sys/sysctl.h>
62 #include <sys/time.h>
63 #include <sys/ttycom.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 
67 static struct vop_vector devfs_vnodeops;
68 static struct vop_vector devfs_specops;
69 static const struct fileops devfs_ops_f;
70 
71 #include <fs/devfs/devfs.h>
72 #include <fs/devfs/devfs_int.h>
73 
74 #include <security/mac/mac_framework.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_extern.h>
78 #include <vm/vm_object.h>
79 
80 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data");
81 
82 struct mtx	devfs_de_interlock;
83 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
84 struct mtx	cdevpriv_mtx;
85 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF);
86 
87 SYSCTL_DECL(_vfs_devfs);
88 
89 static int devfs_dotimes;
90 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW,
91     &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision");
92 
93 /*
94  * Update devfs node timestamp.  Note that updates are unlocked and
95  * stat(2) could see partially updated times.
96  */
97 static void
98 devfs_timestamp(struct timespec *tsp)
99 {
100 	time_t ts;
101 
102 	if (devfs_dotimes) {
103 		vfs_timestamp(tsp);
104 	} else {
105 		ts = time_second;
106 		if (tsp->tv_sec != ts) {
107 			tsp->tv_sec = ts;
108 			tsp->tv_nsec = 0;
109 		}
110 	}
111 }
112 
113 static int
114 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp,
115     int *ref)
116 {
117 	*dswp = devvn_refthread(fp->f_vnode, devp, ref);
118 	if (*dswp == NULL || *devp != fp->f_data) {
119 		if (*dswp != NULL)
120 			dev_relthread(*devp, *ref);
121 		return (ENXIO);
122 	}
123 	KASSERT((*devp)->si_refcount > 0,
124 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
125 	if (*dswp == NULL)
126 		return (ENXIO);
127 	curthread->td_fpop = fp;
128 	return (0);
129 }
130 
131 int
132 devfs_get_cdevpriv(void **datap)
133 {
134 	struct file *fp;
135 	struct cdev_privdata *p;
136 	int error;
137 
138 	fp = curthread->td_fpop;
139 	if (fp == NULL)
140 		return (EBADF);
141 	p = fp->f_cdevpriv;
142 	if (p != NULL) {
143 		error = 0;
144 		*datap = p->cdpd_data;
145 	} else
146 		error = ENOENT;
147 	return (error);
148 }
149 
150 int
151 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr)
152 {
153 	struct file *fp;
154 	struct cdev_priv *cdp;
155 	struct cdev_privdata *p;
156 	int error;
157 
158 	fp = curthread->td_fpop;
159 	if (fp == NULL)
160 		return (ENOENT);
161 	cdp = cdev2priv((struct cdev *)fp->f_data);
162 	p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK);
163 	p->cdpd_data = priv;
164 	p->cdpd_dtr = priv_dtr;
165 	p->cdpd_fp = fp;
166 	mtx_lock(&cdevpriv_mtx);
167 	if (fp->f_cdevpriv == NULL) {
168 		LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list);
169 		fp->f_cdevpriv = p;
170 		mtx_unlock(&cdevpriv_mtx);
171 		error = 0;
172 	} else {
173 		mtx_unlock(&cdevpriv_mtx);
174 		free(p, M_CDEVPDATA);
175 		error = EBUSY;
176 	}
177 	return (error);
178 }
179 
180 int
181 devfs_foreach_cdevpriv(struct cdev *dev, int (*cb)(void *data, void *arg),
182     void *arg)
183 {
184 	struct cdev_priv *cdp;
185 	struct cdev_privdata *p;
186 	int error;
187 
188 	cdp = cdev2priv(dev);
189 	error = 0;
190 	mtx_lock(&cdevpriv_mtx);
191 	LIST_FOREACH(p, &cdp->cdp_fdpriv, cdpd_list) {
192 		error = cb(p->cdpd_data, arg);
193 		if (error != 0)
194 			break;
195 	}
196 	mtx_unlock(&cdevpriv_mtx);
197 	return (error);
198 }
199 
200 void
201 devfs_destroy_cdevpriv(struct cdev_privdata *p)
202 {
203 	struct file *fp;
204 	struct cdev_priv *cdp;
205 
206 	mtx_assert(&cdevpriv_mtx, MA_OWNED);
207 	fp = p->cdpd_fp;
208 	KASSERT(fp->f_cdevpriv == p,
209 	    ("devfs_destoy_cdevpriv %p != %p", fp->f_cdevpriv, p));
210 	cdp = cdev2priv((struct cdev *)fp->f_data);
211 	cdp->cdp_fdpriv_dtrc++;
212 	fp->f_cdevpriv = NULL;
213 	LIST_REMOVE(p, cdpd_list);
214 	mtx_unlock(&cdevpriv_mtx);
215 	(p->cdpd_dtr)(p->cdpd_data);
216 	mtx_lock(&cdevpriv_mtx);
217 	MPASS(cdp->cdp_fdpriv_dtrc >= 1);
218 	cdp->cdp_fdpriv_dtrc--;
219 	if (cdp->cdp_fdpriv_dtrc == 0)
220 		wakeup(&cdp->cdp_fdpriv_dtrc);
221 	mtx_unlock(&cdevpriv_mtx);
222 	free(p, M_CDEVPDATA);
223 }
224 
225 static void
226 devfs_fpdrop(struct file *fp)
227 {
228 	struct cdev_privdata *p;
229 
230 	mtx_lock(&cdevpriv_mtx);
231 	if ((p = fp->f_cdevpriv) == NULL) {
232 		mtx_unlock(&cdevpriv_mtx);
233 		return;
234 	}
235 	devfs_destroy_cdevpriv(p);
236 }
237 
238 void
239 devfs_clear_cdevpriv(void)
240 {
241 	struct file *fp;
242 
243 	fp = curthread->td_fpop;
244 	if (fp == NULL)
245 		return;
246 	devfs_fpdrop(fp);
247 }
248 
249 static void
250 devfs_usecount_add(struct vnode *vp)
251 {
252 	struct devfs_dirent *de;
253 	struct cdev *dev;
254 
255 	mtx_lock(&devfs_de_interlock);
256 	VI_LOCK(vp);
257 	VNPASS(vp->v_type == VCHR || vp->v_type == VBAD, vp);
258 	if (VN_IS_DOOMED(vp)) {
259 		goto out_unlock;
260 	}
261 
262 	de = vp->v_data;
263 	dev = vp->v_rdev;
264 	MPASS(de != NULL);
265 	MPASS(dev != NULL);
266 	dev->si_usecount++;
267 	de->de_usecount++;
268 out_unlock:
269 	VI_UNLOCK(vp);
270 	mtx_unlock(&devfs_de_interlock);
271 }
272 
273 static void
274 devfs_usecount_subl(struct vnode *vp)
275 {
276 	struct devfs_dirent *de;
277 	struct cdev *dev;
278 
279 	mtx_assert(&devfs_de_interlock, MA_OWNED);
280 	ASSERT_VI_LOCKED(vp, __func__);
281 	VNPASS(vp->v_type == VCHR || vp->v_type == VBAD, vp);
282 
283 	de = vp->v_data;
284 	dev = vp->v_rdev;
285 	if (de == NULL)
286 		return;
287 	if (dev == NULL) {
288 		MPASS(de->de_usecount == 0);
289 		return;
290 	}
291 	if (dev->si_usecount < de->de_usecount)
292 		panic("%s: si_usecount underflow for dev %p "
293 		    "(has %ld, dirent has %d)\n",
294 		    __func__, dev, dev->si_usecount, de->de_usecount);
295 	if (VN_IS_DOOMED(vp)) {
296 		dev->si_usecount -= de->de_usecount;
297 		de->de_usecount = 0;
298 	} else {
299 		if (de->de_usecount == 0)
300 			panic("%s: de_usecount underflow for dev %p\n",
301 			    __func__, dev);
302 		dev->si_usecount--;
303 		de->de_usecount--;
304 	}
305 }
306 
307 static void
308 devfs_usecount_sub(struct vnode *vp)
309 {
310 
311 	mtx_lock(&devfs_de_interlock);
312 	VI_LOCK(vp);
313 	devfs_usecount_subl(vp);
314 	VI_UNLOCK(vp);
315 	mtx_unlock(&devfs_de_interlock);
316 }
317 
318 static int
319 devfs_usecountl(struct vnode *vp)
320 {
321 
322 	VNPASS(vp->v_type == VCHR, vp);
323 	mtx_assert(&devfs_de_interlock, MA_OWNED);
324 	ASSERT_VI_LOCKED(vp, __func__);
325 	return (vp->v_rdev->si_usecount);
326 }
327 
328 int
329 devfs_usecount(struct vnode *vp)
330 {
331 	int count;
332 
333 	VNPASS(vp->v_type == VCHR, vp);
334 	mtx_lock(&devfs_de_interlock);
335 	VI_LOCK(vp);
336 	count = devfs_usecountl(vp);
337 	VI_UNLOCK(vp);
338 	mtx_unlock(&devfs_de_interlock);
339 	return (count);
340 }
341 
342 void
343 devfs_ctty_ref(struct vnode *vp)
344 {
345 
346 	vrefact(vp);
347 	devfs_usecount_add(vp);
348 }
349 
350 void
351 devfs_ctty_unref(struct vnode *vp)
352 {
353 
354 	devfs_usecount_sub(vp);
355 	vrele(vp);
356 }
357 
358 /*
359  * On success devfs_populate_vp() returns with dmp->dm_lock held.
360  */
361 static int
362 devfs_populate_vp(struct vnode *vp)
363 {
364 	struct devfs_dirent *de;
365 	struct devfs_mount *dmp;
366 	int locked;
367 
368 	ASSERT_VOP_LOCKED(vp, "devfs_populate_vp");
369 
370 	if (VN_IS_DOOMED(vp))
371 		return (ENOENT);
372 
373 	dmp = VFSTODEVFS(vp->v_mount);
374 	if (!devfs_populate_needed(dmp)) {
375 		sx_xlock(&dmp->dm_lock);
376 		goto out_nopopulate;
377 	}
378 
379 	locked = VOP_ISLOCKED(vp);
380 
381 	sx_xlock(&dmp->dm_lock);
382 	DEVFS_DMP_HOLD(dmp);
383 
384 	/* Can't call devfs_populate() with the vnode lock held. */
385 	VOP_UNLOCK(vp);
386 	devfs_populate(dmp);
387 
388 	sx_xunlock(&dmp->dm_lock);
389 	vn_lock(vp, locked | LK_RETRY);
390 	sx_xlock(&dmp->dm_lock);
391 	if (DEVFS_DMP_DROP(dmp)) {
392 		sx_xunlock(&dmp->dm_lock);
393 		devfs_unmount_final(dmp);
394 		return (ERESTART);
395 	}
396 out_nopopulate:
397 	if (VN_IS_DOOMED(vp)) {
398 		sx_xunlock(&dmp->dm_lock);
399 		return (ERESTART);
400 	}
401 	de = vp->v_data;
402 	KASSERT(de != NULL,
403 	    ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed"));
404 	if ((de->de_flags & DE_DOOMED) != 0) {
405 		sx_xunlock(&dmp->dm_lock);
406 		return (ERESTART);
407 	}
408 
409 	return (0);
410 }
411 
412 static int
413 devfs_vptocnp(struct vop_vptocnp_args *ap)
414 {
415 	struct vnode *vp = ap->a_vp;
416 	struct vnode **dvp = ap->a_vpp;
417 	struct devfs_mount *dmp;
418 	char *buf = ap->a_buf;
419 	size_t *buflen = ap->a_buflen;
420 	struct devfs_dirent *dd, *de;
421 	int i, error;
422 
423 	dmp = VFSTODEVFS(vp->v_mount);
424 
425 	error = devfs_populate_vp(vp);
426 	if (error != 0)
427 		return (error);
428 
429 	if (vp->v_type != VCHR && vp->v_type != VDIR) {
430 		error = ENOENT;
431 		goto finished;
432 	}
433 
434 	dd = vp->v_data;
435 	if (vp->v_type == VDIR && dd == dmp->dm_rootdir) {
436 		*dvp = vp;
437 		vref(*dvp);
438 		goto finished;
439 	}
440 
441 	i = *buflen;
442 	i -= dd->de_dirent->d_namlen;
443 	if (i < 0) {
444 		error = ENOMEM;
445 		goto finished;
446 	}
447 	bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen);
448 	*buflen = i;
449 	de = devfs_parent_dirent(dd);
450 	if (de == NULL) {
451 		error = ENOENT;
452 		goto finished;
453 	}
454 	mtx_lock(&devfs_de_interlock);
455 	*dvp = de->de_vnode;
456 	if (*dvp != NULL) {
457 		VI_LOCK(*dvp);
458 		mtx_unlock(&devfs_de_interlock);
459 		vholdl(*dvp);
460 		VI_UNLOCK(*dvp);
461 		vref(*dvp);
462 		vdrop(*dvp);
463 	} else {
464 		mtx_unlock(&devfs_de_interlock);
465 		error = ENOENT;
466 	}
467 finished:
468 	sx_xunlock(&dmp->dm_lock);
469 	return (error);
470 }
471 
472 /*
473  * Construct the fully qualified path name relative to the mountpoint.
474  * If a NULL cnp is provided, no '/' is appended to the resulting path.
475  */
476 char *
477 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd,
478     struct componentname *cnp)
479 {
480 	int i;
481 	struct devfs_dirent *de;
482 
483 	sx_assert(&dmp->dm_lock, SA_LOCKED);
484 
485 	i = SPECNAMELEN;
486 	buf[i] = '\0';
487 	if (cnp != NULL)
488 		i -= cnp->cn_namelen;
489 	if (i < 0)
490 		 return (NULL);
491 	if (cnp != NULL)
492 		bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
493 	de = dd;
494 	while (de != dmp->dm_rootdir) {
495 		if (cnp != NULL || i < SPECNAMELEN) {
496 			i--;
497 			if (i < 0)
498 				 return (NULL);
499 			buf[i] = '/';
500 		}
501 		i -= de->de_dirent->d_namlen;
502 		if (i < 0)
503 			 return (NULL);
504 		bcopy(de->de_dirent->d_name, buf + i,
505 		    de->de_dirent->d_namlen);
506 		de = devfs_parent_dirent(de);
507 		if (de == NULL)
508 			return (NULL);
509 	}
510 	return (buf + i);
511 }
512 
513 static int
514 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp,
515 	struct devfs_dirent *de)
516 {
517 	int not_found;
518 
519 	not_found = 0;
520 	if (de->de_flags & DE_DOOMED)
521 		not_found = 1;
522 	if (DEVFS_DE_DROP(de)) {
523 		KASSERT(not_found == 1, ("DEVFS de dropped but not doomed"));
524 		devfs_dirent_free(de);
525 	}
526 	if (DEVFS_DMP_DROP(dmp)) {
527 		KASSERT(not_found == 1,
528 			("DEVFS mount struct freed before dirent"));
529 		not_found = 2;
530 		sx_xunlock(&dmp->dm_lock);
531 		devfs_unmount_final(dmp);
532 	}
533 	if (not_found == 1 || (drop_dm_lock && not_found != 2))
534 		sx_unlock(&dmp->dm_lock);
535 	return (not_found);
536 }
537 
538 /*
539  * devfs_allocv shall be entered with dmp->dm_lock held, and it drops
540  * it on return.
541  */
542 int
543 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
544     struct vnode **vpp)
545 {
546 	int error;
547 	struct vnode *vp;
548 	struct cdev *dev;
549 	struct devfs_mount *dmp;
550 	struct cdevsw *dsw;
551 	enum vgetstate vs;
552 
553 	dmp = VFSTODEVFS(mp);
554 	if (de->de_flags & DE_DOOMED) {
555 		sx_xunlock(&dmp->dm_lock);
556 		return (ENOENT);
557 	}
558 loop:
559 	DEVFS_DE_HOLD(de);
560 	DEVFS_DMP_HOLD(dmp);
561 	mtx_lock(&devfs_de_interlock);
562 	vp = de->de_vnode;
563 	if (vp != NULL) {
564 		vs = vget_prep(vp);
565 		mtx_unlock(&devfs_de_interlock);
566 		sx_xunlock(&dmp->dm_lock);
567 		vget_finish(vp, lockmode | LK_RETRY, vs);
568 		sx_xlock(&dmp->dm_lock);
569 		if (devfs_allocv_drop_refs(0, dmp, de)) {
570 			vput(vp);
571 			return (ENOENT);
572 		} else if (VN_IS_DOOMED(vp)) {
573 			mtx_lock(&devfs_de_interlock);
574 			if (de->de_vnode == vp) {
575 				de->de_vnode = NULL;
576 				vp->v_data = NULL;
577 			}
578 			mtx_unlock(&devfs_de_interlock);
579 			vput(vp);
580 			goto loop;
581 		}
582 		sx_xunlock(&dmp->dm_lock);
583 		*vpp = vp;
584 		return (0);
585 	}
586 	mtx_unlock(&devfs_de_interlock);
587 	if (de->de_dirent->d_type == DT_CHR) {
588 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) {
589 			devfs_allocv_drop_refs(1, dmp, de);
590 			return (ENOENT);
591 		}
592 		dev = &de->de_cdp->cdp_c;
593 	} else {
594 		dev = NULL;
595 	}
596 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
597 	if (error != 0) {
598 		devfs_allocv_drop_refs(1, dmp, de);
599 		printf("devfs_allocv: failed to allocate new vnode\n");
600 		return (error);
601 	}
602 
603 	if (de->de_dirent->d_type == DT_CHR) {
604 		vp->v_type = VCHR;
605 		VI_LOCK(vp);
606 		dev_lock();
607 		dev_refl(dev);
608 		/* XXX: v_rdev should be protect by vnode lock */
609 		vp->v_rdev = dev;
610 		VNPASS(vp->v_usecount == 1, vp);
611 		/* Special casing of ttys for deadfs.  Probably redundant. */
612 		dsw = dev->si_devsw;
613 		if (dsw != NULL && (dsw->d_flags & D_TTY) != 0)
614 			vp->v_vflag |= VV_ISTTY;
615 		dev_unlock();
616 		VI_UNLOCK(vp);
617 		if ((dev->si_flags & SI_ETERNAL) != 0)
618 			vp->v_vflag |= VV_ETERNALDEV;
619 		vp->v_op = &devfs_specops;
620 	} else if (de->de_dirent->d_type == DT_DIR) {
621 		vp->v_type = VDIR;
622 	} else if (de->de_dirent->d_type == DT_LNK) {
623 		vp->v_type = VLNK;
624 	} else {
625 		vp->v_type = VBAD;
626 	}
627 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS);
628 	VN_LOCK_ASHARE(vp);
629 	mtx_lock(&devfs_de_interlock);
630 	vp->v_data = de;
631 	de->de_vnode = vp;
632 	mtx_unlock(&devfs_de_interlock);
633 	error = insmntque1(vp, mp);
634 	if (error != 0) {
635 		mtx_lock(&devfs_de_interlock);
636 		vp->v_data = NULL;
637 		de->de_vnode = NULL;
638 		mtx_unlock(&devfs_de_interlock);
639 		vgone(vp);
640 		vput(vp);
641 		(void) devfs_allocv_drop_refs(1, dmp, de);
642 		return (error);
643 	}
644 	if (devfs_allocv_drop_refs(0, dmp, de)) {
645 		vgone(vp);
646 		vput(vp);
647 		return (ENOENT);
648 	}
649 #ifdef MAC
650 	mac_devfs_vnode_associate(mp, de, vp);
651 #endif
652 	sx_xunlock(&dmp->dm_lock);
653 	vn_set_state(vp, VSTATE_CONSTRUCTED);
654 	*vpp = vp;
655 	return (0);
656 }
657 
658 static int
659 devfs_access(struct vop_access_args *ap)
660 {
661 	struct vnode *vp = ap->a_vp;
662 	struct devfs_dirent *de;
663 	struct proc *p;
664 	int error;
665 
666 	de = vp->v_data;
667 	if (vp->v_type == VDIR)
668 		de = de->de_dir;
669 
670 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
671 	    ap->a_accmode, ap->a_cred);
672 	if (error == 0)
673 		return (0);
674 	if (error != EACCES)
675 		return (error);
676 	p = ap->a_td->td_proc;
677 	/* We do, however, allow access to the controlling terminal */
678 	PROC_LOCK(p);
679 	if (!(p->p_flag & P_CONTROLT)) {
680 		PROC_UNLOCK(p);
681 		return (error);
682 	}
683 	if (p->p_session->s_ttydp == de->de_cdp)
684 		error = 0;
685 	PROC_UNLOCK(p);
686 	return (error);
687 }
688 
689 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0,
690     "devfs-only flag reuse failed");
691 
692 static int
693 devfs_close(struct vop_close_args *ap)
694 {
695 	struct vnode *vp = ap->a_vp, *oldvp;
696 	struct thread *td = ap->a_td;
697 	struct proc *p;
698 	struct cdev *dev = vp->v_rdev;
699 	struct cdevsw *dsw;
700 	struct devfs_dirent *de = vp->v_data;
701 	int dflags, error, ref, vp_locked;
702 
703 	/*
704 	 * XXX: Don't call d_close() if we were called because of
705 	 * XXX: insmntque() failure.
706 	 */
707 	if (vp->v_data == NULL)
708 		return (0);
709 
710 	/*
711 	 * Hack: a tty device that is a controlling terminal
712 	 * has a reference from the session structure.
713 	 * We cannot easily tell that a character device is
714 	 * a controlling terminal, unless it is the closing
715 	 * process' controlling terminal.  In that case,
716 	 * if the reference count is 2 (this last descriptor
717 	 * plus the session), release the reference from the session.
718 	 */
719 	if (de->de_usecount == 2 && td != NULL) {
720 		p = td->td_proc;
721 		PROC_LOCK(p);
722 		if (vp == p->p_session->s_ttyvp) {
723 			PROC_UNLOCK(p);
724 			oldvp = NULL;
725 			sx_xlock(&proctree_lock);
726 			if (vp == p->p_session->s_ttyvp) {
727 				SESS_LOCK(p->p_session);
728 				mtx_lock(&devfs_de_interlock);
729 				VI_LOCK(vp);
730 				if (devfs_usecountl(vp) == 2 && !VN_IS_DOOMED(vp)) {
731 					p->p_session->s_ttyvp = NULL;
732 					p->p_session->s_ttydp = NULL;
733 					oldvp = vp;
734 				}
735 				VI_UNLOCK(vp);
736 				mtx_unlock(&devfs_de_interlock);
737 				SESS_UNLOCK(p->p_session);
738 			}
739 			sx_xunlock(&proctree_lock);
740 			if (oldvp != NULL)
741 				devfs_ctty_unref(oldvp);
742 		} else
743 			PROC_UNLOCK(p);
744 	}
745 	/*
746 	 * We do not want to really close the device if it
747 	 * is still in use unless we are trying to close it
748 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
749 	 * holds a reference to the vnode, and because we mark
750 	 * any other vnodes that alias this device, when the
751 	 * sum of the reference counts on all the aliased
752 	 * vnodes descends to one, we are on last close.
753 	 */
754 	dsw = dev_refthread(dev, &ref);
755 	if (dsw == NULL)
756 		return (ENXIO);
757 	dflags = 0;
758 	mtx_lock(&devfs_de_interlock);
759 	VI_LOCK(vp);
760 	if (devfs_usecountl(vp) == 1)
761 		dflags |= FLASTCLOSE;
762 	devfs_usecount_subl(vp);
763 	mtx_unlock(&devfs_de_interlock);
764 	if (VN_IS_DOOMED(vp)) {
765 		/* Forced close. */
766 		dflags |= FREVOKE | FNONBLOCK;
767 	} else if (dsw->d_flags & D_TRACKCLOSE) {
768 		/* Keep device updated on status. */
769 	} else if ((dflags & FLASTCLOSE) == 0) {
770 		VI_UNLOCK(vp);
771 		dev_relthread(dev, ref);
772 		return (0);
773 	}
774 	vholdnz(vp);
775 	VI_UNLOCK(vp);
776 	vp_locked = VOP_ISLOCKED(vp);
777 	VOP_UNLOCK(vp);
778 	KASSERT(dev->si_refcount > 0,
779 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
780 	error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td);
781 	dev_relthread(dev, ref);
782 	vn_lock(vp, vp_locked | LK_RETRY);
783 	vdrop(vp);
784 	return (error);
785 }
786 
787 static int
788 devfs_close_f(struct file *fp, struct thread *td)
789 {
790 	int error;
791 	struct file *fpop;
792 
793 	/*
794 	 * NB: td may be NULL if this descriptor is closed due to
795 	 * garbage collection from a closed UNIX domain socket.
796 	 */
797 	fpop = curthread->td_fpop;
798 	curthread->td_fpop = fp;
799 	error = vnops.fo_close(fp, td);
800 	curthread->td_fpop = fpop;
801 
802 	/*
803 	 * The f_cdevpriv cannot be assigned non-NULL value while we
804 	 * are destroying the file.
805 	 */
806 	if (fp->f_cdevpriv != NULL)
807 		devfs_fpdrop(fp);
808 	return (error);
809 }
810 
811 static int
812 devfs_getattr(struct vop_getattr_args *ap)
813 {
814 	struct vnode *vp = ap->a_vp;
815 	struct vattr *vap = ap->a_vap;
816 	struct devfs_dirent *de;
817 	struct devfs_mount *dmp;
818 	struct cdev *dev;
819 	struct timeval boottime;
820 	int error;
821 
822 	error = devfs_populate_vp(vp);
823 	if (error != 0)
824 		return (error);
825 
826 	dmp = VFSTODEVFS(vp->v_mount);
827 	sx_xunlock(&dmp->dm_lock);
828 
829 	de = vp->v_data;
830 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
831 	if (vp->v_type == VDIR) {
832 		de = de->de_dir;
833 		KASSERT(de != NULL,
834 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
835 	}
836 	vap->va_uid = de->de_uid;
837 	vap->va_gid = de->de_gid;
838 	vap->va_mode = de->de_mode;
839 	if (vp->v_type == VLNK)
840 		vap->va_size = strlen(de->de_symlink);
841 	else if (vp->v_type == VDIR)
842 		vap->va_size = vap->va_bytes = DEV_BSIZE;
843 	else
844 		vap->va_size = 0;
845 	if (vp->v_type != VDIR)
846 		vap->va_bytes = 0;
847 	vap->va_blocksize = DEV_BSIZE;
848 	vap->va_type = vp->v_type;
849 
850 	getboottime(&boottime);
851 #define fix(aa)							\
852 	do {							\
853 		if ((aa).tv_sec <= 3600) {			\
854 			(aa).tv_sec = boottime.tv_sec;		\
855 			(aa).tv_nsec = boottime.tv_usec * 1000; \
856 		}						\
857 	} while (0)
858 
859 	if (vp->v_type != VCHR)  {
860 		fix(de->de_atime);
861 		vap->va_atime = de->de_atime;
862 		fix(de->de_mtime);
863 		vap->va_mtime = de->de_mtime;
864 		fix(de->de_ctime);
865 		vap->va_ctime = de->de_ctime;
866 	} else {
867 		dev = vp->v_rdev;
868 		fix(dev->si_atime);
869 		vap->va_atime = dev->si_atime;
870 		fix(dev->si_mtime);
871 		vap->va_mtime = dev->si_mtime;
872 		fix(dev->si_ctime);
873 		vap->va_ctime = dev->si_ctime;
874 
875 		vap->va_rdev = cdev2priv(dev)->cdp_inode;
876 	}
877 	vap->va_gen = 0;
878 	vap->va_flags = 0;
879 	vap->va_filerev = 0;
880 	vap->va_nlink = de->de_links;
881 	vap->va_fileid = de->de_inode;
882 
883 	return (error);
884 }
885 
886 /* ARGSUSED */
887 static int
888 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
889 {
890 	struct file *fpop;
891 	int error;
892 
893 	fpop = td->td_fpop;
894 	td->td_fpop = fp;
895 	error = vnops.fo_ioctl(fp, com, data, cred, td);
896 	td->td_fpop = fpop;
897 	return (error);
898 }
899 
900 void *
901 fiodgname_buf_get_ptr(void *fgnp, u_long com)
902 {
903 	union {
904 		struct fiodgname_arg	fgn;
905 #ifdef COMPAT_FREEBSD32
906 		struct fiodgname_arg32	fgn32;
907 #endif
908 	} *fgnup;
909 
910 	fgnup = fgnp;
911 	switch (com) {
912 	case FIODGNAME:
913 		return (fgnup->fgn.buf);
914 #ifdef COMPAT_FREEBSD32
915 	case FIODGNAME_32:
916 		return ((void *)(uintptr_t)fgnup->fgn32.buf);
917 #endif
918 	default:
919 		panic("Unhandled ioctl command %ld", com);
920 	}
921 }
922 
923 static int
924 devfs_ioctl(struct vop_ioctl_args *ap)
925 {
926 	struct fiodgname_arg *fgn;
927 	struct vnode *vpold, *vp;
928 	struct cdevsw *dsw;
929 	struct thread *td;
930 	struct session *sess;
931 	struct cdev *dev;
932 	int error, ref, i;
933 	const char *p;
934 	u_long com;
935 
936 	vp = ap->a_vp;
937 	com = ap->a_command;
938 	td = ap->a_td;
939 
940 	dsw = devvn_refthread(vp, &dev, &ref);
941 	if (dsw == NULL)
942 		return (ENXIO);
943 	KASSERT(dev->si_refcount > 0,
944 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(dev)));
945 
946 	switch (com) {
947 	case FIODTYPE:
948 		*(int *)ap->a_data = dsw->d_flags & D_TYPEMASK;
949 		error = 0;
950 		break;
951 	case FIODGNAME:
952 #ifdef	COMPAT_FREEBSD32
953 	case FIODGNAME_32:
954 #endif
955 		fgn = ap->a_data;
956 		p = devtoname(dev);
957 		i = strlen(p) + 1;
958 		if (i > fgn->len)
959 			error = EINVAL;
960 		else
961 			error = copyout(p, fiodgname_buf_get_ptr(fgn, com), i);
962 		break;
963 	default:
964 		error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td);
965 	}
966 
967 	dev_relthread(dev, ref);
968 	if (error == ENOIOCTL)
969 		error = ENOTTY;
970 
971 	if (error == 0 && com == TIOCSCTTY) {
972 		/*
973 		 * Do nothing if reassigning same control tty, or if the
974 		 * control tty has already disappeared.  If it disappeared,
975 		 * it's because we were racing with TIOCNOTTY.  TIOCNOTTY
976 		 * already took care of releasing the old vnode and we have
977 		 * nothing left to do.
978 		 */
979 		sx_slock(&proctree_lock);
980 		sess = td->td_proc->p_session;
981 		if (sess->s_ttyvp == vp || sess->s_ttyp == NULL) {
982 			sx_sunlock(&proctree_lock);
983 			return (0);
984 		}
985 
986 		devfs_ctty_ref(vp);
987 		SESS_LOCK(sess);
988 		vpold = sess->s_ttyvp;
989 		sess->s_ttyvp = vp;
990 		sess->s_ttydp = cdev2priv(dev);
991 		SESS_UNLOCK(sess);
992 
993 		sx_sunlock(&proctree_lock);
994 
995 		/* Get rid of reference to old control tty */
996 		if (vpold)
997 			devfs_ctty_unref(vpold);
998 	}
999 	return (error);
1000 }
1001 
1002 /* ARGSUSED */
1003 static int
1004 devfs_kqfilter_f(struct file *fp, struct knote *kn)
1005 {
1006 	struct cdev *dev;
1007 	struct cdevsw *dsw;
1008 	int error, ref;
1009 	struct file *fpop;
1010 	struct thread *td;
1011 
1012 	td = curthread;
1013 	fpop = td->td_fpop;
1014 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
1015 	if (error)
1016 		return (error);
1017 	error = dsw->d_kqfilter(dev, kn);
1018 	td->td_fpop = fpop;
1019 	dev_relthread(dev, ref);
1020 	return (error);
1021 }
1022 
1023 static inline int
1024 devfs_prison_check(struct devfs_dirent *de, struct thread *td)
1025 {
1026 	struct cdev_priv *cdp;
1027 	struct ucred *dcr;
1028 	struct proc *p;
1029 	int error;
1030 
1031 	cdp = de->de_cdp;
1032 	if (cdp == NULL)
1033 		return (0);
1034 	dcr = cdp->cdp_c.si_cred;
1035 	if (dcr == NULL)
1036 		return (0);
1037 
1038 	error = prison_check(td->td_ucred, dcr);
1039 	if (error == 0)
1040 		return (0);
1041 	/* We do, however, allow access to the controlling terminal */
1042 	p = td->td_proc;
1043 	PROC_LOCK(p);
1044 	if (!(p->p_flag & P_CONTROLT)) {
1045 		PROC_UNLOCK(p);
1046 		return (error);
1047 	}
1048 	if (p->p_session->s_ttydp == cdp)
1049 		error = 0;
1050 	PROC_UNLOCK(p);
1051 	return (error);
1052 }
1053 
1054 static int
1055 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
1056 {
1057 	struct componentname *cnp;
1058 	struct vnode *dvp, **vpp;
1059 	struct thread *td;
1060 	struct devfs_dirent *de, *dd;
1061 	struct devfs_dirent **dde;
1062 	struct devfs_mount *dmp;
1063 	struct mount *mp;
1064 	struct cdev *cdev;
1065 	int error, flags, nameiop, dvplocked;
1066 	char specname[SPECNAMELEN + 1], *pname;
1067 
1068 	td = curthread;
1069 	cnp = ap->a_cnp;
1070 	vpp = ap->a_vpp;
1071 	dvp = ap->a_dvp;
1072 	pname = cnp->cn_nameptr;
1073 	flags = cnp->cn_flags;
1074 	nameiop = cnp->cn_nameiop;
1075 	mp = dvp->v_mount;
1076 	dmp = VFSTODEVFS(mp);
1077 	dd = dvp->v_data;
1078 	*vpp = NULL;
1079 
1080 	if ((flags & ISLASTCN) && nameiop == RENAME)
1081 		return (EOPNOTSUPP);
1082 
1083 	if (dvp->v_type != VDIR)
1084 		return (ENOTDIR);
1085 
1086 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
1087 		return (EIO);
1088 
1089 	error = vn_dir_check_exec(dvp, cnp);
1090 	if (error != 0)
1091 		return (error);
1092 
1093 	if (cnp->cn_namelen == 1 && *pname == '.') {
1094 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
1095 			return (EINVAL);
1096 		*vpp = dvp;
1097 		vref(dvp);
1098 		return (0);
1099 	}
1100 
1101 	if (flags & ISDOTDOT) {
1102 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
1103 			return (EINVAL);
1104 		de = devfs_parent_dirent(dd);
1105 		if (de == NULL)
1106 			return (ENOENT);
1107 		dvplocked = VOP_ISLOCKED(dvp);
1108 		VOP_UNLOCK(dvp);
1109 		error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK,
1110 		    vpp);
1111 		*dm_unlock = 0;
1112 		vn_lock(dvp, dvplocked | LK_RETRY);
1113 		return (error);
1114 	}
1115 
1116 	dd = dvp->v_data;
1117 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0);
1118 	while (de == NULL) {	/* While(...) so we can use break */
1119 
1120 		if (nameiop == DELETE)
1121 			return (ENOENT);
1122 
1123 		/*
1124 		 * OK, we didn't have an entry for the name we were asked for
1125 		 * so we try to see if anybody can create it on demand.
1126 		 */
1127 		pname = devfs_fqpn(specname, dmp, dd, cnp);
1128 		if (pname == NULL)
1129 			break;
1130 
1131 		cdev = NULL;
1132 		DEVFS_DMP_HOLD(dmp);
1133 		sx_xunlock(&dmp->dm_lock);
1134 		dvplocked = VOP_ISLOCKED(dvp);
1135 
1136 		/*
1137 		 * Invoke the dev_clone handler.  Unlock dvp around it
1138 		 * to simplify the cloner operations.
1139 		 *
1140 		 * If dvp is reclaimed while we unlocked it, we return
1141 		 * with ENOENT by some of the paths below.  If cloner
1142 		 * returned cdev, then devfs_populate_vp() notes the
1143 		 * reclamation.  Otherwise, note that either our devfs
1144 		 * mount is being unmounted, then DEVFS_DMP_DROP()
1145 		 * returns true, and we return ENOENT this way.  Or,
1146 		 * because de == NULL, the check for it after the loop
1147 		 * returns ENOENT.
1148 		 */
1149 		VOP_UNLOCK(dvp);
1150 		EVENTHANDLER_INVOKE(dev_clone,
1151 		    td->td_ucred, pname, strlen(pname), &cdev);
1152 		vn_lock(dvp, dvplocked | LK_RETRY);
1153 
1154 		if (cdev == NULL)
1155 			sx_xlock(&dmp->dm_lock);
1156 		else if (devfs_populate_vp(dvp) != 0) {
1157 			*dm_unlock = 0;
1158 			sx_xlock(&dmp->dm_lock);
1159 			if (DEVFS_DMP_DROP(dmp)) {
1160 				sx_xunlock(&dmp->dm_lock);
1161 				devfs_unmount_final(dmp);
1162 			} else
1163 				sx_xunlock(&dmp->dm_lock);
1164 			dev_rel(cdev);
1165 			return (ENOENT);
1166 		}
1167 		if (DEVFS_DMP_DROP(dmp)) {
1168 			*dm_unlock = 0;
1169 			sx_xunlock(&dmp->dm_lock);
1170 			devfs_unmount_final(dmp);
1171 			if (cdev != NULL)
1172 				dev_rel(cdev);
1173 			return (ENOENT);
1174 		}
1175 
1176 		if (cdev == NULL)
1177 			break;
1178 
1179 		dev_lock();
1180 		dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx];
1181 		if (dde != NULL && *dde != NULL)
1182 			de = *dde;
1183 		dev_unlock();
1184 		dev_rel(cdev);
1185 		break;
1186 	}
1187 
1188 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
1189 		if ((nameiop == CREATE || nameiop == RENAME) &&
1190 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
1191 			return (EJUSTRETURN);
1192 		}
1193 		return (ENOENT);
1194 	}
1195 
1196 	if (devfs_prison_check(de, td))
1197 		return (ENOENT);
1198 
1199 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
1200 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
1201 		if (error)
1202 			return (error);
1203 		if (*vpp == dvp) {
1204 			vref(dvp);
1205 			*vpp = dvp;
1206 			return (0);
1207 		}
1208 	}
1209 	error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, vpp);
1210 	*dm_unlock = 0;
1211 	return (error);
1212 }
1213 
1214 static int
1215 devfs_lookup(struct vop_lookup_args *ap)
1216 {
1217 	int j;
1218 	struct devfs_mount *dmp;
1219 	int dm_unlock;
1220 
1221 	if (devfs_populate_vp(ap->a_dvp) != 0)
1222 		return (ENOTDIR);
1223 
1224 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1225 	dm_unlock = 1;
1226 	j = devfs_lookupx(ap, &dm_unlock);
1227 	if (dm_unlock == 1)
1228 		sx_xunlock(&dmp->dm_lock);
1229 	return (j);
1230 }
1231 
1232 static int
1233 devfs_mknod(struct vop_mknod_args *ap)
1234 {
1235 	struct componentname *cnp;
1236 	struct vnode *dvp, **vpp;
1237 	struct devfs_dirent *dd, *de;
1238 	struct devfs_mount *dmp;
1239 	int error;
1240 
1241 	/*
1242 	 * The only type of node we should be creating here is a
1243 	 * character device, for anything else return EOPNOTSUPP.
1244 	 */
1245 	if (ap->a_vap->va_type != VCHR)
1246 		return (EOPNOTSUPP);
1247 	dvp = ap->a_dvp;
1248 	dmp = VFSTODEVFS(dvp->v_mount);
1249 
1250 	cnp = ap->a_cnp;
1251 	vpp = ap->a_vpp;
1252 	dd = dvp->v_data;
1253 
1254 	error = ENOENT;
1255 	sx_xlock(&dmp->dm_lock);
1256 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
1257 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
1258 			continue;
1259 		if (de->de_dirent->d_type == DT_CHR &&
1260 		    (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0)
1261 			continue;
1262 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
1263 		    de->de_dirent->d_namlen) != 0)
1264 			continue;
1265 		if (de->de_flags & DE_WHITEOUT)
1266 			break;
1267 		goto notfound;
1268 	}
1269 	if (de == NULL)
1270 		goto notfound;
1271 	de->de_flags &= ~DE_WHITEOUT;
1272 	error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp);
1273 	return (error);
1274 notfound:
1275 	sx_xunlock(&dmp->dm_lock);
1276 	return (error);
1277 }
1278 
1279 /* ARGSUSED */
1280 static int
1281 devfs_open(struct vop_open_args *ap)
1282 {
1283 	struct thread *td = ap->a_td;
1284 	struct vnode *vp = ap->a_vp;
1285 	struct cdev *dev = vp->v_rdev;
1286 	struct file *fp = ap->a_fp;
1287 	int error, ref, vlocked;
1288 	struct cdevsw *dsw;
1289 	struct file *fpop;
1290 
1291 	if (vp->v_type == VBLK)
1292 		return (ENXIO);
1293 
1294 	if (dev == NULL)
1295 		return (ENXIO);
1296 
1297 	/* Make this field valid before any I/O in d_open. */
1298 	if (dev->si_iosize_max == 0)
1299 		dev->si_iosize_max = DFLTPHYS;
1300 
1301 	dsw = dev_refthread(dev, &ref);
1302 	if (dsw == NULL)
1303 		return (ENXIO);
1304 	if (fp == NULL && dsw->d_fdopen != NULL) {
1305 		dev_relthread(dev, ref);
1306 		return (ENXIO);
1307 	}
1308 
1309 	if (vp->v_type == VCHR)
1310 		devfs_usecount_add(vp);
1311 
1312 	vlocked = VOP_ISLOCKED(vp);
1313 	VOP_UNLOCK(vp);
1314 
1315 	fpop = td->td_fpop;
1316 	td->td_fpop = fp;
1317 	if (fp != NULL) {
1318 		fp->f_data = dev;
1319 		fp->f_vnode = vp;
1320 	}
1321 	if (dsw->d_fdopen != NULL)
1322 		error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
1323 	else
1324 		error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
1325 	/* Clean up any cdevpriv upon error. */
1326 	if (error != 0)
1327 		devfs_clear_cdevpriv();
1328 	td->td_fpop = fpop;
1329 
1330 	vn_lock(vp, vlocked | LK_RETRY);
1331 	if (error != 0 && vp->v_type == VCHR)
1332 		devfs_usecount_sub(vp);
1333 
1334 	dev_relthread(dev, ref);
1335 	if (error != 0) {
1336 		if (error == ERESTART)
1337 			error = EINTR;
1338 		return (error);
1339 	}
1340 
1341 #if 0	/* /dev/console */
1342 	KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp"));
1343 #else
1344 	if (fp == NULL)
1345 		return (error);
1346 #endif
1347 	if (fp->f_ops == &badfileops)
1348 		finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
1349 	return (error);
1350 }
1351 
1352 static int
1353 devfs_pathconf(struct vop_pathconf_args *ap)
1354 {
1355 
1356 	switch (ap->a_name) {
1357 	case _PC_FILESIZEBITS:
1358 		*ap->a_retval = 64;
1359 		return (0);
1360 	case _PC_NAME_MAX:
1361 		*ap->a_retval = NAME_MAX;
1362 		return (0);
1363 	case _PC_LINK_MAX:
1364 		*ap->a_retval = INT_MAX;
1365 		return (0);
1366 	case _PC_SYMLINK_MAX:
1367 		*ap->a_retval = MAXPATHLEN;
1368 		return (0);
1369 	case _PC_MAX_CANON:
1370 		if (ap->a_vp->v_vflag & VV_ISTTY) {
1371 			*ap->a_retval = MAX_CANON;
1372 			return (0);
1373 		}
1374 		return (EINVAL);
1375 	case _PC_MAX_INPUT:
1376 		if (ap->a_vp->v_vflag & VV_ISTTY) {
1377 			*ap->a_retval = MAX_INPUT;
1378 			return (0);
1379 		}
1380 		return (EINVAL);
1381 	case _PC_VDISABLE:
1382 		if (ap->a_vp->v_vflag & VV_ISTTY) {
1383 			*ap->a_retval = _POSIX_VDISABLE;
1384 			return (0);
1385 		}
1386 		return (EINVAL);
1387 	case _PC_MAC_PRESENT:
1388 #ifdef MAC
1389 		/*
1390 		 * If MAC is enabled, devfs automatically supports
1391 		 * trivial non-persistent label storage.
1392 		 */
1393 		*ap->a_retval = 1;
1394 #else
1395 		*ap->a_retval = 0;
1396 #endif
1397 		return (0);
1398 	case _PC_CHOWN_RESTRICTED:
1399 		*ap->a_retval = 1;
1400 		return (0);
1401 	default:
1402 		return (vop_stdpathconf(ap));
1403 	}
1404 	/* NOTREACHED */
1405 }
1406 
1407 /* ARGSUSED */
1408 static int
1409 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
1410 {
1411 	struct cdev *dev;
1412 	struct cdevsw *dsw;
1413 	int error, ref;
1414 	struct file *fpop;
1415 
1416 	fpop = td->td_fpop;
1417 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
1418 	if (error != 0) {
1419 		error = vnops.fo_poll(fp, events, cred, td);
1420 		return (error);
1421 	}
1422 	error = dsw->d_poll(dev, events, td);
1423 	td->td_fpop = fpop;
1424 	dev_relthread(dev, ref);
1425 	return(error);
1426 }
1427 
1428 /*
1429  * Print out the contents of a special device vnode.
1430  */
1431 static int
1432 devfs_print(struct vop_print_args *ap)
1433 {
1434 
1435 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
1436 	return (0);
1437 }
1438 
1439 static int
1440 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred,
1441     int flags, struct thread *td)
1442 {
1443 	struct cdev *dev;
1444 	int ioflag, error, ref;
1445 	ssize_t resid;
1446 	struct cdevsw *dsw;
1447 	struct file *fpop;
1448 
1449 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
1450 		return (EINVAL);
1451 	fpop = td->td_fpop;
1452 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
1453 	if (error != 0) {
1454 		error = vnops.fo_read(fp, uio, cred, flags, td);
1455 		return (error);
1456 	}
1457 	resid = uio->uio_resid;
1458 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
1459 	if (ioflag & O_DIRECT)
1460 		ioflag |= IO_DIRECT;
1461 
1462 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
1463 	error = dsw->d_read(dev, uio, ioflag);
1464 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
1465 		devfs_timestamp(&dev->si_atime);
1466 	td->td_fpop = fpop;
1467 	dev_relthread(dev, ref);
1468 
1469 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_R);
1470 	return (error);
1471 }
1472 
1473 static int
1474 devfs_readdir(struct vop_readdir_args *ap)
1475 {
1476 	int error;
1477 	struct uio *uio;
1478 	struct dirent *dp;
1479 	struct devfs_dirent *dd;
1480 	struct devfs_dirent *de;
1481 	struct devfs_mount *dmp;
1482 	off_t off;
1483 	int *tmp_ncookies = NULL;
1484 	ssize_t startresid;
1485 
1486 	if (ap->a_vp->v_type != VDIR)
1487 		return (ENOTDIR);
1488 
1489 	uio = ap->a_uio;
1490 	if (uio->uio_offset < 0)
1491 		return (EINVAL);
1492 
1493 	/*
1494 	 * XXX: This is a temporary hack to get around this filesystem not
1495 	 * supporting cookies. We store the location of the ncookies pointer
1496 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
1497 	 * and set the number of cookies to 0. We then set the pointer to
1498 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on
1499 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
1500 	 * pointer to its original location before returning to the caller.
1501 	 */
1502 	if (ap->a_ncookies != NULL) {
1503 		tmp_ncookies = ap->a_ncookies;
1504 		*ap->a_ncookies = 0;
1505 		ap->a_ncookies = NULL;
1506 	}
1507 
1508 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
1509 	if (devfs_populate_vp(ap->a_vp) != 0) {
1510 		if (tmp_ncookies != NULL)
1511 			ap->a_ncookies = tmp_ncookies;
1512 		return (EIO);
1513 	}
1514 	error = 0;
1515 	de = ap->a_vp->v_data;
1516 	off = 0;
1517 	startresid = uio->uio_resid;
1518 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
1519 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
1520 		if (dd->de_flags & (DE_COVERED | DE_WHITEOUT))
1521 			continue;
1522 		if (devfs_prison_check(dd, uio->uio_td))
1523 			continue;
1524 		if (dd->de_dirent->d_type == DT_DIR)
1525 			de = dd->de_dir;
1526 		else
1527 			de = dd;
1528 		dp = dd->de_dirent;
1529 		MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp));
1530 		if (dp->d_reclen > uio->uio_resid) {
1531 			/* Nothing was copied out, return EINVAL. */
1532 			if (uio->uio_resid == startresid)
1533 				error = EINVAL;
1534 			/* Otherwise stop. */
1535 			break;
1536 		}
1537 		dp->d_fileno = de->de_inode;
1538 		/* NOTE: d_off is the offset for the *next* entry. */
1539 		dp->d_off = off + dp->d_reclen;
1540 		if (off >= uio->uio_offset) {
1541 			error = vfs_read_dirent(ap, dp, off);
1542 			if (error)
1543 				break;
1544 		}
1545 		off += dp->d_reclen;
1546 	}
1547 	sx_xunlock(&dmp->dm_lock);
1548 	uio->uio_offset = off;
1549 
1550 	/*
1551 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
1552 	 * place.
1553 	 */
1554 	if (tmp_ncookies != NULL)
1555 		ap->a_ncookies = tmp_ncookies;
1556 	if (dd == NULL && error == 0 && ap->a_eofflag != NULL)
1557 		*ap->a_eofflag = 1;
1558 
1559 	return (error);
1560 }
1561 
1562 static int
1563 devfs_readlink(struct vop_readlink_args *ap)
1564 {
1565 	struct devfs_dirent *de;
1566 
1567 	de = ap->a_vp->v_data;
1568 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
1569 }
1570 
1571 static void
1572 devfs_reclaiml(struct vnode *vp)
1573 {
1574 	struct devfs_dirent *de;
1575 
1576 	mtx_assert(&devfs_de_interlock, MA_OWNED);
1577 	de = vp->v_data;
1578 	if (de != NULL) {
1579 		MPASS(de->de_usecount == 0);
1580 		de->de_vnode = NULL;
1581 		vp->v_data = NULL;
1582 	}
1583 }
1584 
1585 static int
1586 devfs_reclaim(struct vop_reclaim_args *ap)
1587 {
1588 	struct vnode *vp;
1589 
1590 	vp = ap->a_vp;
1591 	mtx_lock(&devfs_de_interlock);
1592 	devfs_reclaiml(vp);
1593 	mtx_unlock(&devfs_de_interlock);
1594 	return (0);
1595 }
1596 
1597 static int
1598 devfs_reclaim_vchr(struct vop_reclaim_args *ap)
1599 {
1600 	struct vnode *vp;
1601 	struct cdev *dev;
1602 
1603 	vp = ap->a_vp;
1604 	MPASS(vp->v_type == VCHR);
1605 
1606 	mtx_lock(&devfs_de_interlock);
1607 	VI_LOCK(vp);
1608 	devfs_usecount_subl(vp);
1609 	devfs_reclaiml(vp);
1610 	mtx_unlock(&devfs_de_interlock);
1611 	dev_lock();
1612 	dev = vp->v_rdev;
1613 	vp->v_rdev = NULL;
1614 	dev_unlock();
1615 	VI_UNLOCK(vp);
1616 	if (dev != NULL)
1617 		dev_rel(dev);
1618 	return (0);
1619 }
1620 
1621 static int
1622 devfs_remove(struct vop_remove_args *ap)
1623 {
1624 	struct vnode *dvp = ap->a_dvp;
1625 	struct vnode *vp = ap->a_vp;
1626 	struct devfs_dirent *dd;
1627 	struct devfs_dirent *de, *de_covered;
1628 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
1629 
1630 	ASSERT_VOP_ELOCKED(dvp, "devfs_remove");
1631 	ASSERT_VOP_ELOCKED(vp, "devfs_remove");
1632 
1633 	sx_xlock(&dmp->dm_lock);
1634 	dd = ap->a_dvp->v_data;
1635 	de = vp->v_data;
1636 	if (de->de_cdp == NULL) {
1637 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
1638 		if (de->de_dirent->d_type == DT_LNK) {
1639 			de_covered = devfs_find(dd, de->de_dirent->d_name,
1640 			    de->de_dirent->d_namlen, 0);
1641 			if (de_covered != NULL)
1642 				de_covered->de_flags &= ~DE_COVERED;
1643 		}
1644 		/* We need to unlock dvp because devfs_delete() may lock it. */
1645 		VOP_UNLOCK(vp);
1646 		if (dvp != vp)
1647 			VOP_UNLOCK(dvp);
1648 		devfs_delete(dmp, de, 0);
1649 		sx_xunlock(&dmp->dm_lock);
1650 		if (dvp != vp)
1651 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1652 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1653 	} else {
1654 		de->de_flags |= DE_WHITEOUT;
1655 		sx_xunlock(&dmp->dm_lock);
1656 	}
1657 	return (0);
1658 }
1659 
1660 /*
1661  * Revoke is called on a tty when a terminal session ends.  The vnode
1662  * is orphaned by setting v_op to deadfs so we need to let go of it
1663  * as well so that we create a new one next time around.
1664  *
1665  */
1666 static int
1667 devfs_revoke(struct vop_revoke_args *ap)
1668 {
1669 	struct vnode *vp = ap->a_vp, *vp2;
1670 	struct cdev *dev;
1671 	struct cdev_priv *cdp;
1672 	struct devfs_dirent *de;
1673 	enum vgetstate vs;
1674 	u_int i;
1675 
1676 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
1677 
1678 	dev = vp->v_rdev;
1679 	cdp = cdev2priv(dev);
1680 
1681 	dev_lock();
1682 	cdp->cdp_inuse++;
1683 	dev_unlock();
1684 
1685 	vhold(vp);
1686 	vgone(vp);
1687 	vdrop(vp);
1688 
1689 	VOP_UNLOCK(vp);
1690  loop:
1691 	for (;;) {
1692 		mtx_lock(&devfs_de_interlock);
1693 		dev_lock();
1694 		vp2 = NULL;
1695 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
1696 			de = cdp->cdp_dirents[i];
1697 			if (de == NULL)
1698 				continue;
1699 
1700 			vp2 = de->de_vnode;
1701 			if (vp2 != NULL) {
1702 				dev_unlock();
1703 				vs = vget_prep(vp2);
1704 				mtx_unlock(&devfs_de_interlock);
1705 				if (vget_finish(vp2, LK_EXCLUSIVE, vs) != 0)
1706 					goto loop;
1707 				vhold(vp2);
1708 				vgone(vp2);
1709 				vdrop(vp2);
1710 				vput(vp2);
1711 				break;
1712 			}
1713 		}
1714 		if (vp2 != NULL) {
1715 			continue;
1716 		}
1717 		dev_unlock();
1718 		mtx_unlock(&devfs_de_interlock);
1719 		break;
1720 	}
1721 	dev_lock();
1722 	cdp->cdp_inuse--;
1723 	if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) {
1724 		KASSERT((cdp->cdp_flags & CDP_ON_ACTIVE_LIST) != 0,
1725 		    ("%s: cdp %p (%s) not on active list",
1726 		    __func__, cdp, dev->si_name));
1727 		cdp->cdp_flags &= ~CDP_ON_ACTIVE_LIST;
1728 		TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
1729 		dev_unlock();
1730 		dev_rel(&cdp->cdp_c);
1731 	} else
1732 		dev_unlock();
1733 
1734 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1735 	return (0);
1736 }
1737 
1738 static int
1739 devfs_rioctl(struct vop_ioctl_args *ap)
1740 {
1741 	struct vnode *vp;
1742 	struct devfs_mount *dmp;
1743 	int error;
1744 
1745 	vp = ap->a_vp;
1746 	vn_lock(vp, LK_SHARED | LK_RETRY);
1747 	if (VN_IS_DOOMED(vp)) {
1748 		VOP_UNLOCK(vp);
1749 		return (EBADF);
1750 	}
1751 	dmp = VFSTODEVFS(vp->v_mount);
1752 	sx_xlock(&dmp->dm_lock);
1753 	VOP_UNLOCK(vp);
1754 	DEVFS_DMP_HOLD(dmp);
1755 	devfs_populate(dmp);
1756 	if (DEVFS_DMP_DROP(dmp)) {
1757 		sx_xunlock(&dmp->dm_lock);
1758 		devfs_unmount_final(dmp);
1759 		return (ENOENT);
1760 	}
1761 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
1762 	sx_xunlock(&dmp->dm_lock);
1763 	return (error);
1764 }
1765 
1766 static int
1767 devfs_rread(struct vop_read_args *ap)
1768 {
1769 
1770 	if (ap->a_vp->v_type != VDIR)
1771 		return (EINVAL);
1772 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
1773 }
1774 
1775 static int
1776 devfs_setattr(struct vop_setattr_args *ap)
1777 {
1778 	struct devfs_dirent *de;
1779 	struct vattr *vap;
1780 	struct vnode *vp;
1781 	struct thread *td;
1782 	int c, error;
1783 	uid_t uid;
1784 	gid_t gid;
1785 
1786 	vap = ap->a_vap;
1787 	vp = ap->a_vp;
1788 	td = curthread;
1789 	if ((vap->va_type != VNON) ||
1790 	    (vap->va_nlink != VNOVAL) ||
1791 	    (vap->va_fsid != VNOVAL) ||
1792 	    (vap->va_fileid != VNOVAL) ||
1793 	    (vap->va_blocksize != VNOVAL) ||
1794 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1795 	    (vap->va_rdev != VNOVAL) ||
1796 	    ((int)vap->va_bytes != VNOVAL) ||
1797 	    (vap->va_gen != VNOVAL)) {
1798 		return (EINVAL);
1799 	}
1800 
1801 	error = devfs_populate_vp(vp);
1802 	if (error != 0)
1803 		return (error);
1804 
1805 	de = vp->v_data;
1806 	if (vp->v_type == VDIR)
1807 		de = de->de_dir;
1808 
1809 	c = 0;
1810 	if (vap->va_uid == (uid_t)VNOVAL)
1811 		uid = de->de_uid;
1812 	else
1813 		uid = vap->va_uid;
1814 	if (vap->va_gid == (gid_t)VNOVAL)
1815 		gid = de->de_gid;
1816 	else
1817 		gid = vap->va_gid;
1818 	if (uid != de->de_uid || gid != de->de_gid) {
1819 		if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
1820 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) {
1821 			error = priv_check(td, PRIV_VFS_CHOWN);
1822 			if (error != 0)
1823 				goto ret;
1824 		}
1825 		de->de_uid = uid;
1826 		de->de_gid = gid;
1827 		c = 1;
1828 	}
1829 
1830 	if (vap->va_mode != (mode_t)VNOVAL) {
1831 		if (ap->a_cred->cr_uid != de->de_uid) {
1832 			error = priv_check(td, PRIV_VFS_ADMIN);
1833 			if (error != 0)
1834 				goto ret;
1835 		}
1836 		de->de_mode = vap->va_mode;
1837 		c = 1;
1838 	}
1839 
1840 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1841 		error = vn_utimes_perm(vp, vap, ap->a_cred, td);
1842 		if (error != 0)
1843 			goto ret;
1844 		if (vap->va_atime.tv_sec != VNOVAL) {
1845 			if (vp->v_type == VCHR)
1846 				vp->v_rdev->si_atime = vap->va_atime;
1847 			else
1848 				de->de_atime = vap->va_atime;
1849 		}
1850 		if (vap->va_mtime.tv_sec != VNOVAL) {
1851 			if (vp->v_type == VCHR)
1852 				vp->v_rdev->si_mtime = vap->va_mtime;
1853 			else
1854 				de->de_mtime = vap->va_mtime;
1855 		}
1856 		c = 1;
1857 	}
1858 
1859 	if (c) {
1860 		if (vp->v_type == VCHR)
1861 			vfs_timestamp(&vp->v_rdev->si_ctime);
1862 		else
1863 			vfs_timestamp(&de->de_mtime);
1864 	}
1865 
1866 ret:
1867 	sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock);
1868 	return (error);
1869 }
1870 
1871 #ifdef MAC
1872 static int
1873 devfs_setlabel(struct vop_setlabel_args *ap)
1874 {
1875 	struct vnode *vp;
1876 	struct devfs_dirent *de;
1877 
1878 	vp = ap->a_vp;
1879 	de = vp->v_data;
1880 
1881 	mac_vnode_relabel(ap->a_cred, vp, ap->a_label);
1882 	mac_devfs_update(vp->v_mount, de, vp);
1883 
1884 	return (0);
1885 }
1886 #endif
1887 
1888 static int
1889 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred)
1890 {
1891 
1892 	return (vnops.fo_stat(fp, sb, cred));
1893 }
1894 
1895 static int
1896 devfs_symlink(struct vop_symlink_args *ap)
1897 {
1898 	int i, error;
1899 	struct devfs_dirent *dd;
1900 	struct devfs_dirent *de, *de_covered, *de_dotdot;
1901 	struct devfs_mount *dmp;
1902 
1903 	error = priv_check(curthread, PRIV_DEVFS_SYMLINK);
1904 	if (error)
1905 		return(error);
1906 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1907 	if (devfs_populate_vp(ap->a_dvp) != 0)
1908 		return (ENOENT);
1909 
1910 	dd = ap->a_dvp->v_data;
1911 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
1912 	de->de_flags = DE_USER;
1913 	de->de_uid = 0;
1914 	de->de_gid = 0;
1915 	de->de_mode = 0755;
1916 	de->de_inode = alloc_unr(devfs_inos);
1917 	de->de_dir = dd;
1918 	de->de_dirent->d_type = DT_LNK;
1919 	i = strlen(ap->a_target) + 1;
1920 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
1921 	bcopy(ap->a_target, de->de_symlink, i);
1922 #ifdef MAC
1923 	mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
1924 #endif
1925 	de_covered = devfs_find(dd, de->de_dirent->d_name,
1926 	    de->de_dirent->d_namlen, 0);
1927 	if (de_covered != NULL) {
1928 		if ((de_covered->de_flags & DE_USER) != 0) {
1929 			devfs_delete(dmp, de, DEVFS_DEL_NORECURSE);
1930 			sx_xunlock(&dmp->dm_lock);
1931 			return (EEXIST);
1932 		}
1933 		KASSERT((de_covered->de_flags & DE_COVERED) == 0,
1934 		    ("devfs_symlink: entry %p already covered", de_covered));
1935 		de_covered->de_flags |= DE_COVERED;
1936 	}
1937 
1938 	de_dotdot = TAILQ_FIRST(&dd->de_dlist);		/* "." */
1939 	de_dotdot = TAILQ_NEXT(de_dotdot, de_list);	/* ".." */
1940 	TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list);
1941 	devfs_dir_ref_de(dmp, dd);
1942 	devfs_rules_apply(dmp, de);
1943 
1944 	return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp));
1945 }
1946 
1947 static int
1948 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td)
1949 {
1950 
1951 	return (vnops.fo_truncate(fp, length, cred, td));
1952 }
1953 
1954 static int
1955 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred,
1956     int flags, struct thread *td)
1957 {
1958 	struct cdev *dev;
1959 	int error, ioflag, ref;
1960 	ssize_t resid;
1961 	struct cdevsw *dsw;
1962 	struct file *fpop;
1963 
1964 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
1965 		return (EINVAL);
1966 	fpop = td->td_fpop;
1967 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
1968 	if (error != 0) {
1969 		error = vnops.fo_write(fp, uio, cred, flags, td);
1970 		return (error);
1971 	}
1972 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
1973 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
1974 	if (ioflag & O_DIRECT)
1975 		ioflag |= IO_DIRECT;
1976 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
1977 
1978 	resid = uio->uio_resid;
1979 
1980 	error = dsw->d_write(dev, uio, ioflag);
1981 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
1982 		devfs_timestamp(&dev->si_ctime);
1983 		dev->si_mtime = dev->si_ctime;
1984 	}
1985 	td->td_fpop = fpop;
1986 	dev_relthread(dev, ref);
1987 
1988 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_W);
1989 	return (error);
1990 }
1991 
1992 static int
1993 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
1994     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
1995     struct thread *td)
1996 {
1997 	struct cdev *dev;
1998 	struct cdevsw *dsw;
1999 	struct mount *mp;
2000 	struct vnode *vp;
2001 	struct file *fpop;
2002 	vm_object_t object;
2003 	vm_prot_t maxprot;
2004 	int error, ref;
2005 
2006 	vp = fp->f_vnode;
2007 
2008 	/*
2009 	 * Ensure that file and memory protections are
2010 	 * compatible.
2011 	 */
2012 	mp = vp->v_mount;
2013 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
2014 		maxprot = VM_PROT_NONE;
2015 		if ((prot & VM_PROT_EXECUTE) != 0)
2016 			return (EACCES);
2017 	} else
2018 		maxprot = VM_PROT_EXECUTE;
2019 	if ((fp->f_flag & FREAD) != 0)
2020 		maxprot |= VM_PROT_READ;
2021 	else if ((prot & VM_PROT_READ) != 0)
2022 		return (EACCES);
2023 
2024 	/*
2025 	 * If we are sharing potential changes via MAP_SHARED and we
2026 	 * are trying to get write permission although we opened it
2027 	 * without asking for it, bail out.
2028 	 *
2029 	 * Note that most character devices always share mappings.
2030 	 * The one exception is that D_MMAP_ANON devices
2031 	 * (i.e. /dev/zero) permit private writable mappings.
2032 	 *
2033 	 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests
2034 	 * as well as updating maxprot to permit writing for
2035 	 * D_MMAP_ANON devices rather than doing that here.
2036 	 */
2037 	if ((flags & MAP_SHARED) != 0) {
2038 		if ((fp->f_flag & FWRITE) != 0)
2039 			maxprot |= VM_PROT_WRITE;
2040 		else if ((prot & VM_PROT_WRITE) != 0)
2041 			return (EACCES);
2042 	}
2043 	maxprot &= cap_maxprot;
2044 
2045 	fpop = td->td_fpop;
2046 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
2047 	if (error != 0)
2048 		return (error);
2049 
2050 	error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff,
2051 	    &object);
2052 	td->td_fpop = fpop;
2053 	dev_relthread(dev, ref);
2054 	if (error != 0)
2055 		return (error);
2056 
2057 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
2058 	    foff, FALSE, td);
2059 	if (error != 0)
2060 		vm_object_deallocate(object);
2061 	return (error);
2062 }
2063 
2064 dev_t
2065 dev2udev(struct cdev *x)
2066 {
2067 	if (x == NULL)
2068 		return (NODEV);
2069 	return (cdev2priv(x)->cdp_inode);
2070 }
2071 
2072 static int
2073 devfs_cmp_f(struct file *fp1, struct file *fp2, struct thread *td)
2074 {
2075 	if (fp2->f_type != DTYPE_VNODE || fp2->f_ops != &devfs_ops_f)
2076 		return (3);
2077 	return (kcmp_cmp((uintptr_t)fp1->f_data, (uintptr_t)fp2->f_data));
2078 }
2079 
2080 static const struct fileops devfs_ops_f = {
2081 	.fo_read =	devfs_read_f,
2082 	.fo_write =	devfs_write_f,
2083 	.fo_truncate =	devfs_truncate_f,
2084 	.fo_ioctl =	devfs_ioctl_f,
2085 	.fo_poll =	devfs_poll_f,
2086 	.fo_kqfilter =	devfs_kqfilter_f,
2087 	.fo_stat =	devfs_stat_f,
2088 	.fo_close =	devfs_close_f,
2089 	.fo_chmod =	vn_chmod,
2090 	.fo_chown =	vn_chown,
2091 	.fo_sendfile =	vn_sendfile,
2092 	.fo_seek =	vn_seek,
2093 	.fo_fill_kinfo = vn_fill_kinfo,
2094 	.fo_mmap =	devfs_mmap_f,
2095 	.fo_cmp =	devfs_cmp_f,
2096 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
2097 };
2098 
2099 /* Vops for non-CHR vnodes in /dev. */
2100 static struct vop_vector devfs_vnodeops = {
2101 	.vop_default =		&default_vnodeops,
2102 
2103 	.vop_access =		devfs_access,
2104 	.vop_getattr =		devfs_getattr,
2105 	.vop_ioctl =		devfs_rioctl,
2106 	.vop_lookup =		devfs_lookup,
2107 	.vop_mknod =		devfs_mknod,
2108 	.vop_pathconf =		devfs_pathconf,
2109 	.vop_read =		devfs_rread,
2110 	.vop_readdir =		devfs_readdir,
2111 	.vop_readlink =		devfs_readlink,
2112 	.vop_reclaim =		devfs_reclaim,
2113 	.vop_remove =		devfs_remove,
2114 	.vop_revoke =		devfs_revoke,
2115 	.vop_setattr =		devfs_setattr,
2116 #ifdef MAC
2117 	.vop_setlabel =		devfs_setlabel,
2118 #endif
2119 	.vop_symlink =		devfs_symlink,
2120 	.vop_vptocnp =		devfs_vptocnp,
2121 	.vop_lock1 =		vop_lock,
2122 	.vop_unlock =		vop_unlock,
2123 	.vop_islocked =		vop_islocked,
2124 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
2125 };
2126 VFS_VOP_VECTOR_REGISTER(devfs_vnodeops);
2127 
2128 /* Vops for VCHR vnodes in /dev. */
2129 static struct vop_vector devfs_specops = {
2130 	.vop_default =		&default_vnodeops,
2131 
2132 	.vop_access =		devfs_access,
2133 	.vop_bmap =		VOP_PANIC,
2134 	.vop_close =		devfs_close,
2135 	.vop_create =		VOP_PANIC,
2136 	.vop_fsync =		vop_stdfsync,
2137 	.vop_getattr =		devfs_getattr,
2138 	.vop_ioctl =		devfs_ioctl,
2139 	.vop_link =		VOP_PANIC,
2140 	.vop_mkdir =		VOP_PANIC,
2141 	.vop_mknod =		VOP_PANIC,
2142 	.vop_open =		devfs_open,
2143 	.vop_pathconf =		devfs_pathconf,
2144 	.vop_poll =		dead_poll,
2145 	.vop_print =		devfs_print,
2146 	.vop_read =		dead_read,
2147 	.vop_readdir =		VOP_PANIC,
2148 	.vop_readlink =		VOP_PANIC,
2149 	.vop_reallocblks =	VOP_PANIC,
2150 	.vop_reclaim =		devfs_reclaim_vchr,
2151 	.vop_remove =		devfs_remove,
2152 	.vop_rename =		VOP_PANIC,
2153 	.vop_revoke =		devfs_revoke,
2154 	.vop_rmdir =		VOP_PANIC,
2155 	.vop_setattr =		devfs_setattr,
2156 #ifdef MAC
2157 	.vop_setlabel =		devfs_setlabel,
2158 #endif
2159 	.vop_strategy =		VOP_PANIC,
2160 	.vop_symlink =		VOP_PANIC,
2161 	.vop_vptocnp =		devfs_vptocnp,
2162 	.vop_write =		dead_write,
2163 	.vop_lock1 =		vop_lock,
2164 	.vop_unlock =		vop_unlock,
2165 	.vop_islocked =		vop_islocked,
2166 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
2167 };
2168 VFS_VOP_VECTOR_REGISTER(devfs_specops);
2169 
2170 /*
2171  * Our calling convention to the device drivers used to be that we passed
2172  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
2173  * flags instead since that's what open(), close() and ioctl() takes and
2174  * we don't really want vnode.h in device drivers.
2175  * We solved the source compatibility by redefining some vnode flags to
2176  * be the same as the fcntl ones and by sending down the bitwise OR of
2177  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
2178  * pulls the rug out under this.
2179  */
2180 CTASSERT(O_NONBLOCK == IO_NDELAY);
2181 CTASSERT(O_FSYNC == IO_SYNC);
2182