xref: /freebsd/sys/fs/devfs/devfs_vnops.c (revision 4dbe6628179d8e6bf400bfdb4bfa869bdc102a56)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2000-2004
5  *	Poul-Henning Kamp.  All rights reserved.
6  * Copyright (c) 1989, 1992-1993, 1995
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software donated to Berkeley by
10  * Jan-Simon Pendry.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
33  */
34 
35 /*
36  * TODO:
37  *	mkdir: want it ?
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/conf.h>
43 #include <sys/dirent.h>
44 #include <sys/eventhandler.h>
45 #include <sys/fcntl.h>
46 #include <sys/file.h>
47 #include <sys/filedesc.h>
48 #include <sys/filio.h>
49 #include <sys/jail.h>
50 #include <sys/kernel.h>
51 #include <sys/limits.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mman.h>
55 #include <sys/mount.h>
56 #include <sys/namei.h>
57 #include <sys/priv.h>
58 #include <sys/proc.h>
59 #include <sys/stat.h>
60 #include <sys/sx.h>
61 #include <sys/sysctl.h>
62 #include <sys/time.h>
63 #include <sys/ttycom.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 
67 static struct vop_vector devfs_vnodeops;
68 static struct vop_vector devfs_specops;
69 static const struct fileops devfs_ops_f;
70 
71 #include <fs/devfs/devfs.h>
72 #include <fs/devfs/devfs_int.h>
73 
74 #include <security/mac/mac_framework.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_extern.h>
78 #include <vm/vm_object.h>
79 
80 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data");
81 
82 struct mtx	devfs_de_interlock;
83 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
84 struct mtx	cdevpriv_mtx;
85 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF);
86 
87 SYSCTL_DECL(_vfs_devfs);
88 
89 static int devfs_dotimes;
90 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW,
91     &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision");
92 
93 /*
94  * Update devfs node timestamp.  Note that updates are unlocked and
95  * stat(2) could see partially updated times.
96  */
97 static void
devfs_timestamp(struct timespec * tsp)98 devfs_timestamp(struct timespec *tsp)
99 {
100 	time_t ts;
101 
102 	if (devfs_dotimes) {
103 		vfs_timestamp(tsp);
104 	} else {
105 		ts = time_second;
106 		if (tsp->tv_sec != ts) {
107 			tsp->tv_sec = ts;
108 			tsp->tv_nsec = 0;
109 		}
110 	}
111 }
112 
113 static int
devfs_fp_check(struct file * fp,struct cdev ** devp,struct cdevsw ** dswp,int * ref)114 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp,
115     int *ref)
116 {
117 	*dswp = devvn_refthread(fp->f_vnode, devp, ref);
118 	if (*dswp == NULL || *devp != fp->f_data) {
119 		if (*dswp != NULL)
120 			dev_relthread(*devp, *ref);
121 		return (ENXIO);
122 	}
123 	KASSERT((*devp)->si_refcount > 0,
124 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
125 	if (*dswp == NULL)
126 		return (ENXIO);
127 	curthread->td_fpop = fp;
128 	return (0);
129 }
130 
131 int
devfs_get_cdevpriv(void ** datap)132 devfs_get_cdevpriv(void **datap)
133 {
134 	struct file *fp;
135 	struct cdev_privdata *p;
136 	int error;
137 
138 	fp = curthread->td_fpop;
139 	if (fp == NULL)
140 		return (EBADF);
141 	p = fp->f_cdevpriv;
142 	if (p != NULL) {
143 		error = 0;
144 		*datap = p->cdpd_data;
145 	} else
146 		error = ENOENT;
147 	return (error);
148 }
149 
150 int
devfs_set_cdevpriv(void * priv,d_priv_dtor_t * priv_dtr)151 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr)
152 {
153 	struct file *fp;
154 	struct cdev_priv *cdp;
155 	struct cdev_privdata *p;
156 	int error;
157 
158 	fp = curthread->td_fpop;
159 	if (fp == NULL)
160 		return (ENOENT);
161 	cdp = cdev2priv((struct cdev *)fp->f_data);
162 	p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK);
163 	p->cdpd_data = priv;
164 	p->cdpd_dtr = priv_dtr;
165 	p->cdpd_fp = fp;
166 	mtx_lock(&cdevpriv_mtx);
167 	if (fp->f_cdevpriv == NULL) {
168 		LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list);
169 		fp->f_cdevpriv = p;
170 		mtx_unlock(&cdevpriv_mtx);
171 		error = 0;
172 	} else {
173 		mtx_unlock(&cdevpriv_mtx);
174 		free(p, M_CDEVPDATA);
175 		error = EBUSY;
176 	}
177 	return (error);
178 }
179 
180 int
devfs_foreach_cdevpriv(struct cdev * dev,int (* cb)(void * data,void * arg),void * arg)181 devfs_foreach_cdevpriv(struct cdev *dev, int (*cb)(void *data, void *arg),
182     void *arg)
183 {
184 	struct cdev_priv *cdp;
185 	struct cdev_privdata *p;
186 	int error;
187 
188 	cdp = cdev2priv(dev);
189 	error = 0;
190 	mtx_lock(&cdevpriv_mtx);
191 	LIST_FOREACH(p, &cdp->cdp_fdpriv, cdpd_list) {
192 		error = cb(p->cdpd_data, arg);
193 		if (error != 0)
194 			break;
195 	}
196 	mtx_unlock(&cdevpriv_mtx);
197 	return (error);
198 }
199 
200 void
devfs_destroy_cdevpriv(struct cdev_privdata * p)201 devfs_destroy_cdevpriv(struct cdev_privdata *p)
202 {
203 	struct file *fp;
204 	struct cdev_priv *cdp;
205 
206 	mtx_assert(&cdevpriv_mtx, MA_OWNED);
207 	fp = p->cdpd_fp;
208 	KASSERT(fp->f_cdevpriv == p,
209 	    ("devfs_destoy_cdevpriv %p != %p", fp->f_cdevpriv, p));
210 	cdp = cdev2priv((struct cdev *)fp->f_data);
211 	cdp->cdp_fdpriv_dtrc++;
212 	fp->f_cdevpriv = NULL;
213 	LIST_REMOVE(p, cdpd_list);
214 	mtx_unlock(&cdevpriv_mtx);
215 	(p->cdpd_dtr)(p->cdpd_data);
216 	mtx_lock(&cdevpriv_mtx);
217 	MPASS(cdp->cdp_fdpriv_dtrc >= 1);
218 	cdp->cdp_fdpriv_dtrc--;
219 	if (cdp->cdp_fdpriv_dtrc == 0)
220 		wakeup(&cdp->cdp_fdpriv_dtrc);
221 	mtx_unlock(&cdevpriv_mtx);
222 	free(p, M_CDEVPDATA);
223 }
224 
225 static void
devfs_fpdrop(struct file * fp)226 devfs_fpdrop(struct file *fp)
227 {
228 	struct cdev_privdata *p;
229 
230 	mtx_lock(&cdevpriv_mtx);
231 	if ((p = fp->f_cdevpriv) == NULL) {
232 		mtx_unlock(&cdevpriv_mtx);
233 		return;
234 	}
235 	devfs_destroy_cdevpriv(p);
236 }
237 
238 void
devfs_clear_cdevpriv(void)239 devfs_clear_cdevpriv(void)
240 {
241 	struct file *fp;
242 
243 	fp = curthread->td_fpop;
244 	if (fp == NULL)
245 		return;
246 	devfs_fpdrop(fp);
247 }
248 
249 static void
devfs_usecount_add(struct vnode * vp)250 devfs_usecount_add(struct vnode *vp)
251 {
252 	struct devfs_dirent *de;
253 	struct cdev *dev;
254 
255 	mtx_lock(&devfs_de_interlock);
256 	VI_LOCK(vp);
257 	VNPASS(vp->v_type == VCHR || vp->v_type == VBAD, vp);
258 	if (VN_IS_DOOMED(vp)) {
259 		goto out_unlock;
260 	}
261 
262 	de = vp->v_data;
263 	dev = vp->v_rdev;
264 	MPASS(de != NULL);
265 	MPASS(dev != NULL);
266 	dev->si_usecount++;
267 	de->de_usecount++;
268 out_unlock:
269 	VI_UNLOCK(vp);
270 	mtx_unlock(&devfs_de_interlock);
271 }
272 
273 static void
devfs_usecount_subl(struct vnode * vp)274 devfs_usecount_subl(struct vnode *vp)
275 {
276 	struct devfs_dirent *de;
277 	struct cdev *dev;
278 
279 	mtx_assert(&devfs_de_interlock, MA_OWNED);
280 	ASSERT_VI_LOCKED(vp, __func__);
281 	VNPASS(vp->v_type == VCHR || vp->v_type == VBAD, vp);
282 
283 	de = vp->v_data;
284 	dev = vp->v_rdev;
285 	if (de == NULL)
286 		return;
287 	if (dev == NULL) {
288 		MPASS(de->de_usecount == 0);
289 		return;
290 	}
291 	if (dev->si_usecount < de->de_usecount)
292 		panic("%s: si_usecount underflow for dev %p "
293 		    "(has %ld, dirent has %d)\n",
294 		    __func__, dev, dev->si_usecount, de->de_usecount);
295 	if (VN_IS_DOOMED(vp)) {
296 		dev->si_usecount -= de->de_usecount;
297 		de->de_usecount = 0;
298 	} else {
299 		if (de->de_usecount == 0)
300 			panic("%s: de_usecount underflow for dev %p\n",
301 			    __func__, dev);
302 		dev->si_usecount--;
303 		de->de_usecount--;
304 	}
305 }
306 
307 static void
devfs_usecount_sub(struct vnode * vp)308 devfs_usecount_sub(struct vnode *vp)
309 {
310 
311 	mtx_lock(&devfs_de_interlock);
312 	VI_LOCK(vp);
313 	devfs_usecount_subl(vp);
314 	VI_UNLOCK(vp);
315 	mtx_unlock(&devfs_de_interlock);
316 }
317 
318 static int
devfs_usecountl(struct vnode * vp)319 devfs_usecountl(struct vnode *vp)
320 {
321 
322 	VNPASS(vp->v_type == VCHR, vp);
323 	mtx_assert(&devfs_de_interlock, MA_OWNED);
324 	ASSERT_VI_LOCKED(vp, __func__);
325 	return (vp->v_rdev->si_usecount);
326 }
327 
328 int
devfs_usecount(struct vnode * vp)329 devfs_usecount(struct vnode *vp)
330 {
331 	int count;
332 
333 	VNPASS(vp->v_type == VCHR, vp);
334 	mtx_lock(&devfs_de_interlock);
335 	VI_LOCK(vp);
336 	count = devfs_usecountl(vp);
337 	VI_UNLOCK(vp);
338 	mtx_unlock(&devfs_de_interlock);
339 	return (count);
340 }
341 
342 void
devfs_ctty_ref(struct vnode * vp)343 devfs_ctty_ref(struct vnode *vp)
344 {
345 
346 	vrefact(vp);
347 	devfs_usecount_add(vp);
348 }
349 
350 void
devfs_ctty_unref(struct vnode * vp)351 devfs_ctty_unref(struct vnode *vp)
352 {
353 
354 	devfs_usecount_sub(vp);
355 	vrele(vp);
356 }
357 
358 /*
359  * On success devfs_populate_vp() returns with dmp->dm_lock held.
360  */
361 static int
devfs_populate_vp(struct vnode * vp)362 devfs_populate_vp(struct vnode *vp)
363 {
364 	struct devfs_dirent *de;
365 	struct devfs_mount *dmp;
366 	int locked;
367 
368 	ASSERT_VOP_LOCKED(vp, "devfs_populate_vp");
369 
370 	dmp = VFSTODEVFS(vp->v_mount);
371 	if (!devfs_populate_needed(dmp)) {
372 		sx_xlock(&dmp->dm_lock);
373 		goto out_nopopulate;
374 	}
375 
376 	locked = VOP_ISLOCKED(vp);
377 
378 	sx_xlock(&dmp->dm_lock);
379 	DEVFS_DMP_HOLD(dmp);
380 
381 	/* Can't call devfs_populate() with the vnode lock held. */
382 	VOP_UNLOCK(vp);
383 	devfs_populate(dmp);
384 
385 	sx_xunlock(&dmp->dm_lock);
386 	vn_lock(vp, locked | LK_RETRY);
387 	sx_xlock(&dmp->dm_lock);
388 	if (DEVFS_DMP_DROP(dmp)) {
389 		sx_xunlock(&dmp->dm_lock);
390 		devfs_unmount_final(dmp);
391 		return (ERESTART);
392 	}
393 out_nopopulate:
394 	if (VN_IS_DOOMED(vp)) {
395 		sx_xunlock(&dmp->dm_lock);
396 		return (ERESTART);
397 	}
398 	de = vp->v_data;
399 	KASSERT(de != NULL,
400 	    ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed"));
401 	if ((de->de_flags & DE_DOOMED) != 0) {
402 		sx_xunlock(&dmp->dm_lock);
403 		return (ERESTART);
404 	}
405 
406 	return (0);
407 }
408 
409 static int
devfs_vptocnp(struct vop_vptocnp_args * ap)410 devfs_vptocnp(struct vop_vptocnp_args *ap)
411 {
412 	struct vnode *vp = ap->a_vp;
413 	struct vnode **dvp = ap->a_vpp;
414 	struct devfs_mount *dmp;
415 	char *buf = ap->a_buf;
416 	size_t *buflen = ap->a_buflen;
417 	struct devfs_dirent *dd, *de;
418 	int i, error;
419 
420 	dmp = VFSTODEVFS(vp->v_mount);
421 
422 	error = devfs_populate_vp(vp);
423 	if (error != 0)
424 		return (error);
425 
426 	if (vp->v_type != VCHR && vp->v_type != VDIR) {
427 		error = ENOENT;
428 		goto finished;
429 	}
430 
431 	dd = vp->v_data;
432 	if (vp->v_type == VDIR && dd == dmp->dm_rootdir) {
433 		*dvp = vp;
434 		vref(*dvp);
435 		goto finished;
436 	}
437 
438 	i = *buflen;
439 	i -= dd->de_dirent->d_namlen;
440 	if (i < 0) {
441 		error = ENOMEM;
442 		goto finished;
443 	}
444 	bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen);
445 	*buflen = i;
446 	de = devfs_parent_dirent(dd);
447 	if (de == NULL) {
448 		error = ENOENT;
449 		goto finished;
450 	}
451 	mtx_lock(&devfs_de_interlock);
452 	*dvp = de->de_vnode;
453 	if (*dvp != NULL) {
454 		VI_LOCK(*dvp);
455 		mtx_unlock(&devfs_de_interlock);
456 		vholdl(*dvp);
457 		VI_UNLOCK(*dvp);
458 		vref(*dvp);
459 		vdrop(*dvp);
460 	} else {
461 		mtx_unlock(&devfs_de_interlock);
462 		error = ENOENT;
463 	}
464 finished:
465 	sx_xunlock(&dmp->dm_lock);
466 	return (error);
467 }
468 
469 /*
470  * Construct the fully qualified path name relative to the mountpoint.
471  * If a NULL cnp is provided, no '/' is appended to the resulting path.
472  */
473 char *
devfs_fqpn(char * buf,struct devfs_mount * dmp,struct devfs_dirent * dd,struct componentname * cnp)474 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd,
475     struct componentname *cnp)
476 {
477 	int i;
478 	struct devfs_dirent *de;
479 
480 	sx_assert(&dmp->dm_lock, SA_LOCKED);
481 
482 	i = SPECNAMELEN;
483 	buf[i] = '\0';
484 	if (cnp != NULL)
485 		i -= cnp->cn_namelen;
486 	if (i < 0)
487 		 return (NULL);
488 	if (cnp != NULL)
489 		bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
490 	de = dd;
491 	while (de != dmp->dm_rootdir) {
492 		if (cnp != NULL || i < SPECNAMELEN) {
493 			i--;
494 			if (i < 0)
495 				 return (NULL);
496 			buf[i] = '/';
497 		}
498 		i -= de->de_dirent->d_namlen;
499 		if (i < 0)
500 			 return (NULL);
501 		bcopy(de->de_dirent->d_name, buf + i,
502 		    de->de_dirent->d_namlen);
503 		de = devfs_parent_dirent(de);
504 		if (de == NULL)
505 			return (NULL);
506 	}
507 	return (buf + i);
508 }
509 
510 static int
devfs_allocv_drop_refs(int drop_dm_lock,struct devfs_mount * dmp,struct devfs_dirent * de)511 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp,
512 	struct devfs_dirent *de)
513 {
514 	int not_found;
515 
516 	not_found = 0;
517 	if (de->de_flags & DE_DOOMED)
518 		not_found = 1;
519 	if (DEVFS_DE_DROP(de)) {
520 		KASSERT(not_found == 1, ("DEVFS de dropped but not doomed"));
521 		devfs_dirent_free(de);
522 	}
523 	if (DEVFS_DMP_DROP(dmp)) {
524 		KASSERT(not_found == 1,
525 			("DEVFS mount struct freed before dirent"));
526 		not_found = 2;
527 		sx_xunlock(&dmp->dm_lock);
528 		devfs_unmount_final(dmp);
529 	}
530 	if (not_found == 1 || (drop_dm_lock && not_found != 2))
531 		sx_unlock(&dmp->dm_lock);
532 	return (not_found);
533 }
534 
535 /*
536  * devfs_allocv shall be entered with dmp->dm_lock held, and it drops
537  * it on return.
538  */
539 int
devfs_allocv(struct devfs_dirent * de,struct mount * mp,int lockmode,struct vnode ** vpp)540 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
541     struct vnode **vpp)
542 {
543 	int error;
544 	struct vnode *vp;
545 	struct cdev *dev;
546 	struct devfs_mount *dmp;
547 	struct cdevsw *dsw;
548 	enum vgetstate vs;
549 
550 	dmp = VFSTODEVFS(mp);
551 	if (de->de_flags & DE_DOOMED) {
552 		sx_xunlock(&dmp->dm_lock);
553 		return (ENOENT);
554 	}
555 loop:
556 	DEVFS_DE_HOLD(de);
557 	DEVFS_DMP_HOLD(dmp);
558 	mtx_lock(&devfs_de_interlock);
559 	vp = de->de_vnode;
560 	if (vp != NULL) {
561 		vs = vget_prep(vp);
562 		mtx_unlock(&devfs_de_interlock);
563 		sx_xunlock(&dmp->dm_lock);
564 		vget_finish(vp, lockmode | LK_RETRY, vs);
565 		sx_xlock(&dmp->dm_lock);
566 		if (devfs_allocv_drop_refs(0, dmp, de)) {
567 			vput(vp);
568 			return (ENOENT);
569 		} else if (VN_IS_DOOMED(vp)) {
570 			mtx_lock(&devfs_de_interlock);
571 			if (de->de_vnode == vp) {
572 				de->de_vnode = NULL;
573 				vp->v_data = NULL;
574 			}
575 			mtx_unlock(&devfs_de_interlock);
576 			vput(vp);
577 			goto loop;
578 		}
579 		sx_xunlock(&dmp->dm_lock);
580 		*vpp = vp;
581 		return (0);
582 	}
583 	mtx_unlock(&devfs_de_interlock);
584 	if (de->de_dirent->d_type == DT_CHR) {
585 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) {
586 			devfs_allocv_drop_refs(1, dmp, de);
587 			return (ENOENT);
588 		}
589 		dev = &de->de_cdp->cdp_c;
590 	} else {
591 		dev = NULL;
592 	}
593 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
594 	if (error != 0) {
595 		devfs_allocv_drop_refs(1, dmp, de);
596 		printf("devfs_allocv: failed to allocate new vnode\n");
597 		return (error);
598 	}
599 
600 	if (de->de_dirent->d_type == DT_CHR) {
601 		vp->v_type = VCHR;
602 		VI_LOCK(vp);
603 		dev_lock();
604 		dev_refl(dev);
605 		/* XXX: v_rdev should be protect by vnode lock */
606 		vp->v_rdev = dev;
607 		VNPASS(vp->v_usecount == 1, vp);
608 		/* Special casing of ttys for deadfs.  Probably redundant. */
609 		dsw = dev->si_devsw;
610 		if (dsw != NULL && (dsw->d_flags & D_TTY) != 0)
611 			vp->v_vflag |= VV_ISTTY;
612 		dev_unlock();
613 		VI_UNLOCK(vp);
614 		if ((dev->si_flags & SI_ETERNAL) != 0)
615 			vp->v_vflag |= VV_ETERNALDEV;
616 		vp->v_op = &devfs_specops;
617 	} else if (de->de_dirent->d_type == DT_DIR) {
618 		vp->v_type = VDIR;
619 	} else if (de->de_dirent->d_type == DT_LNK) {
620 		vp->v_type = VLNK;
621 	} else {
622 		vp->v_type = VBAD;
623 	}
624 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS);
625 	VN_LOCK_ASHARE(vp);
626 	mtx_lock(&devfs_de_interlock);
627 	vp->v_data = de;
628 	de->de_vnode = vp;
629 	mtx_unlock(&devfs_de_interlock);
630 	error = insmntque1(vp, mp);
631 	if (error != 0) {
632 		mtx_lock(&devfs_de_interlock);
633 		vp->v_data = NULL;
634 		de->de_vnode = NULL;
635 		mtx_unlock(&devfs_de_interlock);
636 		vgone(vp);
637 		vput(vp);
638 		(void) devfs_allocv_drop_refs(1, dmp, de);
639 		return (error);
640 	}
641 	if (devfs_allocv_drop_refs(0, dmp, de)) {
642 		vgone(vp);
643 		vput(vp);
644 		return (ENOENT);
645 	}
646 #ifdef MAC
647 	mac_devfs_vnode_associate(mp, de, vp);
648 #endif
649 	sx_xunlock(&dmp->dm_lock);
650 	vn_set_state(vp, VSTATE_CONSTRUCTED);
651 	*vpp = vp;
652 	return (0);
653 }
654 
655 static int
devfs_access(struct vop_access_args * ap)656 devfs_access(struct vop_access_args *ap)
657 {
658 	struct vnode *vp = ap->a_vp;
659 	struct devfs_dirent *de;
660 	struct proc *p;
661 	int error;
662 
663 	de = vp->v_data;
664 	if (vp->v_type == VDIR)
665 		de = de->de_dir;
666 
667 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
668 	    ap->a_accmode, ap->a_cred);
669 	if (error == 0)
670 		return (0);
671 	if (error != EACCES)
672 		return (error);
673 	p = ap->a_td->td_proc;
674 	/* We do, however, allow access to the controlling terminal */
675 	PROC_LOCK(p);
676 	if (!(p->p_flag & P_CONTROLT)) {
677 		PROC_UNLOCK(p);
678 		return (error);
679 	}
680 	if (p->p_session->s_ttydp == de->de_cdp)
681 		error = 0;
682 	PROC_UNLOCK(p);
683 	return (error);
684 }
685 
686 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0,
687     "devfs-only flag reuse failed");
688 
689 static int
devfs_close(struct vop_close_args * ap)690 devfs_close(struct vop_close_args *ap)
691 {
692 	struct vnode *vp = ap->a_vp, *oldvp;
693 	struct thread *td = ap->a_td;
694 	struct proc *p;
695 	struct cdev *dev = vp->v_rdev;
696 	struct cdevsw *dsw;
697 	struct devfs_dirent *de = vp->v_data;
698 	int dflags, error, ref, vp_locked;
699 
700 	/*
701 	 * XXX: Don't call d_close() if we were called because of
702 	 * XXX: insmntque() failure.
703 	 */
704 	if (vp->v_data == NULL)
705 		return (0);
706 
707 	/*
708 	 * Hack: a tty device that is a controlling terminal
709 	 * has a reference from the session structure.
710 	 * We cannot easily tell that a character device is
711 	 * a controlling terminal, unless it is the closing
712 	 * process' controlling terminal.  In that case,
713 	 * if the reference count is 2 (this last descriptor
714 	 * plus the session), release the reference from the session.
715 	 */
716 	if (de->de_usecount == 2 && td != NULL) {
717 		p = td->td_proc;
718 		PROC_LOCK(p);
719 		if (vp == p->p_session->s_ttyvp) {
720 			PROC_UNLOCK(p);
721 			oldvp = NULL;
722 			sx_xlock(&proctree_lock);
723 			if (vp == p->p_session->s_ttyvp) {
724 				SESS_LOCK(p->p_session);
725 				mtx_lock(&devfs_de_interlock);
726 				VI_LOCK(vp);
727 				if (devfs_usecountl(vp) == 2 && !VN_IS_DOOMED(vp)) {
728 					p->p_session->s_ttyvp = NULL;
729 					p->p_session->s_ttydp = NULL;
730 					oldvp = vp;
731 				}
732 				VI_UNLOCK(vp);
733 				mtx_unlock(&devfs_de_interlock);
734 				SESS_UNLOCK(p->p_session);
735 			}
736 			sx_xunlock(&proctree_lock);
737 			if (oldvp != NULL)
738 				devfs_ctty_unref(oldvp);
739 		} else
740 			PROC_UNLOCK(p);
741 	}
742 	/*
743 	 * We do not want to really close the device if it
744 	 * is still in use unless we are trying to close it
745 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
746 	 * holds a reference to the vnode, and because we mark
747 	 * any other vnodes that alias this device, when the
748 	 * sum of the reference counts on all the aliased
749 	 * vnodes descends to one, we are on last close.
750 	 */
751 	dsw = dev_refthread(dev, &ref);
752 	if (dsw == NULL)
753 		return (ENXIO);
754 	dflags = 0;
755 	mtx_lock(&devfs_de_interlock);
756 	VI_LOCK(vp);
757 	if (devfs_usecountl(vp) == 1)
758 		dflags |= FLASTCLOSE;
759 	devfs_usecount_subl(vp);
760 	mtx_unlock(&devfs_de_interlock);
761 	if (VN_IS_DOOMED(vp)) {
762 		/* Forced close. */
763 		dflags |= FREVOKE | FNONBLOCK;
764 	} else if (dsw->d_flags & D_TRACKCLOSE) {
765 		/* Keep device updated on status. */
766 	} else if ((dflags & FLASTCLOSE) == 0) {
767 		VI_UNLOCK(vp);
768 		dev_relthread(dev, ref);
769 		return (0);
770 	}
771 	vholdnz(vp);
772 	VI_UNLOCK(vp);
773 	vp_locked = VOP_ISLOCKED(vp);
774 	VOP_UNLOCK(vp);
775 	KASSERT(dev->si_refcount > 0,
776 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
777 	error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td);
778 	dev_relthread(dev, ref);
779 	vn_lock(vp, vp_locked | LK_RETRY);
780 	vdrop(vp);
781 	return (error);
782 }
783 
784 static int
devfs_close_f(struct file * fp,struct thread * td)785 devfs_close_f(struct file *fp, struct thread *td)
786 {
787 	int error;
788 	struct file *fpop;
789 
790 	/*
791 	 * NB: td may be NULL if this descriptor is closed due to
792 	 * garbage collection from a closed UNIX domain socket.
793 	 */
794 	fpop = curthread->td_fpop;
795 	curthread->td_fpop = fp;
796 	error = vnops.fo_close(fp, td);
797 	curthread->td_fpop = fpop;
798 
799 	/*
800 	 * The f_cdevpriv cannot be assigned non-NULL value while we
801 	 * are destroying the file.
802 	 */
803 	if (fp->f_cdevpriv != NULL)
804 		devfs_fpdrop(fp);
805 	return (error);
806 }
807 
808 static int
devfs_getattr(struct vop_getattr_args * ap)809 devfs_getattr(struct vop_getattr_args *ap)
810 {
811 	struct vnode *vp = ap->a_vp;
812 	struct vattr *vap = ap->a_vap;
813 	struct devfs_dirent *de;
814 	struct devfs_mount *dmp;
815 	struct cdev *dev;
816 	struct timeval boottime;
817 	int error;
818 
819 	error = devfs_populate_vp(vp);
820 	if (error != 0)
821 		return (error);
822 
823 	dmp = VFSTODEVFS(vp->v_mount);
824 	sx_xunlock(&dmp->dm_lock);
825 
826 	de = vp->v_data;
827 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
828 	if (vp->v_type == VDIR) {
829 		de = de->de_dir;
830 		KASSERT(de != NULL,
831 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
832 	}
833 	vap->va_uid = de->de_uid;
834 	vap->va_gid = de->de_gid;
835 	vap->va_mode = de->de_mode;
836 	if (vp->v_type == VLNK)
837 		vap->va_size = strlen(de->de_symlink);
838 	else if (vp->v_type == VDIR)
839 		vap->va_size = vap->va_bytes = DEV_BSIZE;
840 	else
841 		vap->va_size = 0;
842 	if (vp->v_type != VDIR)
843 		vap->va_bytes = 0;
844 	vap->va_blocksize = DEV_BSIZE;
845 	vap->va_type = vp->v_type;
846 
847 	getboottime(&boottime);
848 #define fix(aa)							\
849 	do {							\
850 		if ((aa).tv_sec <= 3600) {			\
851 			(aa).tv_sec = boottime.tv_sec;		\
852 			(aa).tv_nsec = boottime.tv_usec * 1000; \
853 		}						\
854 	} while (0)
855 
856 	if (vp->v_type != VCHR)  {
857 		fix(de->de_atime);
858 		vap->va_atime = de->de_atime;
859 		fix(de->de_mtime);
860 		vap->va_mtime = de->de_mtime;
861 		fix(de->de_ctime);
862 		vap->va_ctime = de->de_ctime;
863 	} else {
864 		dev = vp->v_rdev;
865 		fix(dev->si_atime);
866 		vap->va_atime = dev->si_atime;
867 		fix(dev->si_mtime);
868 		vap->va_mtime = dev->si_mtime;
869 		fix(dev->si_ctime);
870 		vap->va_ctime = dev->si_ctime;
871 
872 		vap->va_rdev = cdev2priv(dev)->cdp_inode;
873 	}
874 	vap->va_gen = 0;
875 	vap->va_flags = 0;
876 	vap->va_filerev = 0;
877 	vap->va_nlink = de->de_links;
878 	vap->va_fileid = de->de_inode;
879 
880 	return (error);
881 }
882 
883 /* ARGSUSED */
884 static int
devfs_ioctl_f(struct file * fp,u_long com,void * data,struct ucred * cred,struct thread * td)885 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
886 {
887 	struct file *fpop;
888 	int error;
889 
890 	fpop = td->td_fpop;
891 	td->td_fpop = fp;
892 	error = vnops.fo_ioctl(fp, com, data, cred, td);
893 	td->td_fpop = fpop;
894 	return (error);
895 }
896 
897 void *
fiodgname_buf_get_ptr(void * fgnp,u_long com)898 fiodgname_buf_get_ptr(void *fgnp, u_long com)
899 {
900 	union {
901 		struct fiodgname_arg	fgn;
902 #ifdef COMPAT_FREEBSD32
903 		struct fiodgname_arg32	fgn32;
904 #endif
905 	} *fgnup;
906 
907 	fgnup = fgnp;
908 	switch (com) {
909 	case FIODGNAME:
910 		return (fgnup->fgn.buf);
911 #ifdef COMPAT_FREEBSD32
912 	case FIODGNAME_32:
913 		return ((void *)(uintptr_t)fgnup->fgn32.buf);
914 #endif
915 	default:
916 		panic("Unhandled ioctl command %ld", com);
917 	}
918 }
919 
920 static int
devfs_ioctl(struct vop_ioctl_args * ap)921 devfs_ioctl(struct vop_ioctl_args *ap)
922 {
923 	struct fiodgname_arg *fgn;
924 	struct vnode *vpold, *vp;
925 	struct cdevsw *dsw;
926 	struct thread *td;
927 	struct session *sess;
928 	struct cdev *dev;
929 	int error, ref, i;
930 	const char *p;
931 	u_long com;
932 
933 	vp = ap->a_vp;
934 	com = ap->a_command;
935 	td = ap->a_td;
936 
937 	dsw = devvn_refthread(vp, &dev, &ref);
938 	if (dsw == NULL)
939 		return (ENXIO);
940 	KASSERT(dev->si_refcount > 0,
941 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(dev)));
942 
943 	switch (com) {
944 	case FIODTYPE:
945 		*(int *)ap->a_data = dsw->d_flags & D_TYPEMASK;
946 		error = 0;
947 		break;
948 	case FIODGNAME:
949 #ifdef	COMPAT_FREEBSD32
950 	case FIODGNAME_32:
951 #endif
952 		fgn = ap->a_data;
953 		p = devtoname(dev);
954 		i = strlen(p) + 1;
955 		if (i > fgn->len)
956 			error = EINVAL;
957 		else
958 			error = copyout(p, fiodgname_buf_get_ptr(fgn, com), i);
959 		break;
960 	default:
961 		error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td);
962 	}
963 
964 	dev_relthread(dev, ref);
965 	if (error == ENOIOCTL)
966 		error = ENOTTY;
967 
968 	if (error == 0 && com == TIOCSCTTY) {
969 		/*
970 		 * Do nothing if reassigning same control tty, or if the
971 		 * control tty has already disappeared.  If it disappeared,
972 		 * it's because we were racing with TIOCNOTTY.  TIOCNOTTY
973 		 * already took care of releasing the old vnode and we have
974 		 * nothing left to do.
975 		 */
976 		sx_slock(&proctree_lock);
977 		sess = td->td_proc->p_session;
978 		if (sess->s_ttyvp == vp || sess->s_ttyp == NULL) {
979 			sx_sunlock(&proctree_lock);
980 			return (0);
981 		}
982 
983 		devfs_ctty_ref(vp);
984 		SESS_LOCK(sess);
985 		vpold = sess->s_ttyvp;
986 		sess->s_ttyvp = vp;
987 		sess->s_ttydp = cdev2priv(dev);
988 		SESS_UNLOCK(sess);
989 
990 		sx_sunlock(&proctree_lock);
991 
992 		/* Get rid of reference to old control tty */
993 		if (vpold)
994 			devfs_ctty_unref(vpold);
995 	}
996 	return (error);
997 }
998 
999 /* ARGSUSED */
1000 static int
devfs_kqfilter_f(struct file * fp,struct knote * kn)1001 devfs_kqfilter_f(struct file *fp, struct knote *kn)
1002 {
1003 	struct cdev *dev;
1004 	struct cdevsw *dsw;
1005 	int error, ref;
1006 	struct file *fpop;
1007 	struct thread *td;
1008 
1009 	td = curthread;
1010 	fpop = td->td_fpop;
1011 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
1012 	if (error)
1013 		return (error);
1014 	error = dsw->d_kqfilter(dev, kn);
1015 	td->td_fpop = fpop;
1016 	dev_relthread(dev, ref);
1017 	return (error);
1018 }
1019 
1020 static inline int
devfs_prison_check(struct devfs_dirent * de,struct thread * td)1021 devfs_prison_check(struct devfs_dirent *de, struct thread *td)
1022 {
1023 	struct cdev_priv *cdp;
1024 	struct ucred *dcr;
1025 	struct proc *p;
1026 	int error;
1027 
1028 	cdp = de->de_cdp;
1029 	if (cdp == NULL)
1030 		return (0);
1031 	dcr = cdp->cdp_c.si_cred;
1032 	if (dcr == NULL)
1033 		return (0);
1034 
1035 	error = prison_check(td->td_ucred, dcr);
1036 	if (error == 0)
1037 		return (0);
1038 	/* We do, however, allow access to the controlling terminal */
1039 	p = td->td_proc;
1040 	PROC_LOCK(p);
1041 	if (!(p->p_flag & P_CONTROLT)) {
1042 		PROC_UNLOCK(p);
1043 		return (error);
1044 	}
1045 	if (p->p_session->s_ttydp == cdp)
1046 		error = 0;
1047 	PROC_UNLOCK(p);
1048 	return (error);
1049 }
1050 
1051 static int
devfs_lookupx(struct vop_lookup_args * ap,int * dm_unlock)1052 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
1053 {
1054 	struct componentname *cnp;
1055 	struct vnode *dvp, **vpp;
1056 	struct thread *td;
1057 	struct devfs_dirent *de, *dd;
1058 	struct devfs_dirent **dde;
1059 	struct devfs_mount *dmp;
1060 	struct mount *mp;
1061 	struct cdev *cdev;
1062 	int error, flags, nameiop, dvplocked;
1063 	char specname[SPECNAMELEN + 1], *pname;
1064 
1065 	td = curthread;
1066 	cnp = ap->a_cnp;
1067 	vpp = ap->a_vpp;
1068 	dvp = ap->a_dvp;
1069 	pname = cnp->cn_nameptr;
1070 	flags = cnp->cn_flags;
1071 	nameiop = cnp->cn_nameiop;
1072 	mp = dvp->v_mount;
1073 	dmp = VFSTODEVFS(mp);
1074 	dd = dvp->v_data;
1075 	*vpp = NULL;
1076 
1077 	if ((flags & ISLASTCN) && nameiop == RENAME)
1078 		return (EOPNOTSUPP);
1079 
1080 	if (dvp->v_type != VDIR)
1081 		return (ENOTDIR);
1082 
1083 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
1084 		return (EIO);
1085 
1086 	error = vn_dir_check_exec(dvp, cnp);
1087 	if (error != 0)
1088 		return (error);
1089 
1090 	if (cnp->cn_namelen == 1 && *pname == '.') {
1091 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
1092 			return (EINVAL);
1093 		*vpp = dvp;
1094 		vref(dvp);
1095 		return (0);
1096 	}
1097 
1098 	if (flags & ISDOTDOT) {
1099 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
1100 			return (EINVAL);
1101 		de = devfs_parent_dirent(dd);
1102 		if (de == NULL)
1103 			return (ENOENT);
1104 		dvplocked = VOP_ISLOCKED(dvp);
1105 		VOP_UNLOCK(dvp);
1106 		error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK,
1107 		    vpp);
1108 		*dm_unlock = 0;
1109 		vn_lock(dvp, dvplocked | LK_RETRY);
1110 		return (error);
1111 	}
1112 
1113 	dd = dvp->v_data;
1114 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0);
1115 	while (de == NULL) {	/* While(...) so we can use break */
1116 
1117 		if (nameiop == DELETE)
1118 			return (ENOENT);
1119 
1120 		/*
1121 		 * OK, we didn't have an entry for the name we were asked for
1122 		 * so we try to see if anybody can create it on demand.
1123 		 */
1124 		pname = devfs_fqpn(specname, dmp, dd, cnp);
1125 		if (pname == NULL)
1126 			break;
1127 
1128 		cdev = NULL;
1129 		DEVFS_DMP_HOLD(dmp);
1130 		sx_xunlock(&dmp->dm_lock);
1131 		EVENTHANDLER_INVOKE(dev_clone,
1132 		    td->td_ucred, pname, strlen(pname), &cdev);
1133 
1134 		if (cdev == NULL)
1135 			sx_xlock(&dmp->dm_lock);
1136 		else if (devfs_populate_vp(dvp) != 0) {
1137 			*dm_unlock = 0;
1138 			sx_xlock(&dmp->dm_lock);
1139 			if (DEVFS_DMP_DROP(dmp)) {
1140 				sx_xunlock(&dmp->dm_lock);
1141 				devfs_unmount_final(dmp);
1142 			} else
1143 				sx_xunlock(&dmp->dm_lock);
1144 			dev_rel(cdev);
1145 			return (ENOENT);
1146 		}
1147 		if (DEVFS_DMP_DROP(dmp)) {
1148 			*dm_unlock = 0;
1149 			sx_xunlock(&dmp->dm_lock);
1150 			devfs_unmount_final(dmp);
1151 			if (cdev != NULL)
1152 				dev_rel(cdev);
1153 			return (ENOENT);
1154 		}
1155 
1156 		if (cdev == NULL)
1157 			break;
1158 
1159 		dev_lock();
1160 		dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx];
1161 		if (dde != NULL && *dde != NULL)
1162 			de = *dde;
1163 		dev_unlock();
1164 		dev_rel(cdev);
1165 		break;
1166 	}
1167 
1168 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
1169 		if ((nameiop == CREATE || nameiop == RENAME) &&
1170 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
1171 			return (EJUSTRETURN);
1172 		}
1173 		return (ENOENT);
1174 	}
1175 
1176 	if (devfs_prison_check(de, td))
1177 		return (ENOENT);
1178 
1179 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
1180 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
1181 		if (error)
1182 			return (error);
1183 		if (*vpp == dvp) {
1184 			vref(dvp);
1185 			*vpp = dvp;
1186 			return (0);
1187 		}
1188 	}
1189 	error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, vpp);
1190 	*dm_unlock = 0;
1191 	return (error);
1192 }
1193 
1194 static int
devfs_lookup(struct vop_lookup_args * ap)1195 devfs_lookup(struct vop_lookup_args *ap)
1196 {
1197 	int j;
1198 	struct devfs_mount *dmp;
1199 	int dm_unlock;
1200 
1201 	if (devfs_populate_vp(ap->a_dvp) != 0)
1202 		return (ENOTDIR);
1203 
1204 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1205 	dm_unlock = 1;
1206 	j = devfs_lookupx(ap, &dm_unlock);
1207 	if (dm_unlock == 1)
1208 		sx_xunlock(&dmp->dm_lock);
1209 	return (j);
1210 }
1211 
1212 static int
devfs_mknod(struct vop_mknod_args * ap)1213 devfs_mknod(struct vop_mknod_args *ap)
1214 {
1215 	struct componentname *cnp;
1216 	struct vnode *dvp, **vpp;
1217 	struct devfs_dirent *dd, *de;
1218 	struct devfs_mount *dmp;
1219 	int error;
1220 
1221 	/*
1222 	 * The only type of node we should be creating here is a
1223 	 * character device, for anything else return EOPNOTSUPP.
1224 	 */
1225 	if (ap->a_vap->va_type != VCHR)
1226 		return (EOPNOTSUPP);
1227 	dvp = ap->a_dvp;
1228 	dmp = VFSTODEVFS(dvp->v_mount);
1229 
1230 	cnp = ap->a_cnp;
1231 	vpp = ap->a_vpp;
1232 	dd = dvp->v_data;
1233 
1234 	error = ENOENT;
1235 	sx_xlock(&dmp->dm_lock);
1236 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
1237 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
1238 			continue;
1239 		if (de->de_dirent->d_type == DT_CHR &&
1240 		    (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0)
1241 			continue;
1242 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
1243 		    de->de_dirent->d_namlen) != 0)
1244 			continue;
1245 		if (de->de_flags & DE_WHITEOUT)
1246 			break;
1247 		goto notfound;
1248 	}
1249 	if (de == NULL)
1250 		goto notfound;
1251 	de->de_flags &= ~DE_WHITEOUT;
1252 	error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp);
1253 	return (error);
1254 notfound:
1255 	sx_xunlock(&dmp->dm_lock);
1256 	return (error);
1257 }
1258 
1259 /* ARGSUSED */
1260 static int
devfs_open(struct vop_open_args * ap)1261 devfs_open(struct vop_open_args *ap)
1262 {
1263 	struct thread *td = ap->a_td;
1264 	struct vnode *vp = ap->a_vp;
1265 	struct cdev *dev = vp->v_rdev;
1266 	struct file *fp = ap->a_fp;
1267 	int error, ref, vlocked;
1268 	struct cdevsw *dsw;
1269 	struct file *fpop;
1270 
1271 	if (vp->v_type == VBLK)
1272 		return (ENXIO);
1273 
1274 	if (dev == NULL)
1275 		return (ENXIO);
1276 
1277 	/* Make this field valid before any I/O in d_open. */
1278 	if (dev->si_iosize_max == 0)
1279 		dev->si_iosize_max = DFLTPHYS;
1280 
1281 	dsw = dev_refthread(dev, &ref);
1282 	if (dsw == NULL)
1283 		return (ENXIO);
1284 	if (fp == NULL && dsw->d_fdopen != NULL) {
1285 		dev_relthread(dev, ref);
1286 		return (ENXIO);
1287 	}
1288 
1289 	if (vp->v_type == VCHR)
1290 		devfs_usecount_add(vp);
1291 
1292 	vlocked = VOP_ISLOCKED(vp);
1293 	VOP_UNLOCK(vp);
1294 
1295 	fpop = td->td_fpop;
1296 	td->td_fpop = fp;
1297 	if (fp != NULL) {
1298 		fp->f_data = dev;
1299 		fp->f_vnode = vp;
1300 	}
1301 	if (dsw->d_fdopen != NULL)
1302 		error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
1303 	else
1304 		error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
1305 	/* Clean up any cdevpriv upon error. */
1306 	if (error != 0)
1307 		devfs_clear_cdevpriv();
1308 	td->td_fpop = fpop;
1309 
1310 	vn_lock(vp, vlocked | LK_RETRY);
1311 	if (error != 0 && vp->v_type == VCHR)
1312 		devfs_usecount_sub(vp);
1313 
1314 	dev_relthread(dev, ref);
1315 	if (error != 0) {
1316 		if (error == ERESTART)
1317 			error = EINTR;
1318 		return (error);
1319 	}
1320 
1321 #if 0	/* /dev/console */
1322 	KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp"));
1323 #else
1324 	if (fp == NULL)
1325 		return (error);
1326 #endif
1327 	if (fp->f_ops == &badfileops)
1328 		finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
1329 	return (error);
1330 }
1331 
1332 static int
devfs_pathconf(struct vop_pathconf_args * ap)1333 devfs_pathconf(struct vop_pathconf_args *ap)
1334 {
1335 
1336 	switch (ap->a_name) {
1337 	case _PC_FILESIZEBITS:
1338 		*ap->a_retval = 64;
1339 		return (0);
1340 	case _PC_NAME_MAX:
1341 		*ap->a_retval = NAME_MAX;
1342 		return (0);
1343 	case _PC_LINK_MAX:
1344 		*ap->a_retval = INT_MAX;
1345 		return (0);
1346 	case _PC_SYMLINK_MAX:
1347 		*ap->a_retval = MAXPATHLEN;
1348 		return (0);
1349 	case _PC_MAX_CANON:
1350 		if (ap->a_vp->v_vflag & VV_ISTTY) {
1351 			*ap->a_retval = MAX_CANON;
1352 			return (0);
1353 		}
1354 		return (EINVAL);
1355 	case _PC_MAX_INPUT:
1356 		if (ap->a_vp->v_vflag & VV_ISTTY) {
1357 			*ap->a_retval = MAX_INPUT;
1358 			return (0);
1359 		}
1360 		return (EINVAL);
1361 	case _PC_VDISABLE:
1362 		if (ap->a_vp->v_vflag & VV_ISTTY) {
1363 			*ap->a_retval = _POSIX_VDISABLE;
1364 			return (0);
1365 		}
1366 		return (EINVAL);
1367 	case _PC_MAC_PRESENT:
1368 #ifdef MAC
1369 		/*
1370 		 * If MAC is enabled, devfs automatically supports
1371 		 * trivial non-persistent label storage.
1372 		 */
1373 		*ap->a_retval = 1;
1374 #else
1375 		*ap->a_retval = 0;
1376 #endif
1377 		return (0);
1378 	case _PC_CHOWN_RESTRICTED:
1379 		*ap->a_retval = 1;
1380 		return (0);
1381 	default:
1382 		return (vop_stdpathconf(ap));
1383 	}
1384 	/* NOTREACHED */
1385 }
1386 
1387 /* ARGSUSED */
1388 static int
devfs_poll_f(struct file * fp,int events,struct ucred * cred,struct thread * td)1389 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
1390 {
1391 	struct cdev *dev;
1392 	struct cdevsw *dsw;
1393 	int error, ref;
1394 	struct file *fpop;
1395 
1396 	fpop = td->td_fpop;
1397 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
1398 	if (error != 0) {
1399 		error = vnops.fo_poll(fp, events, cred, td);
1400 		return (error);
1401 	}
1402 	error = dsw->d_poll(dev, events, td);
1403 	td->td_fpop = fpop;
1404 	dev_relthread(dev, ref);
1405 	return(error);
1406 }
1407 
1408 /*
1409  * Print out the contents of a special device vnode.
1410  */
1411 static int
devfs_print(struct vop_print_args * ap)1412 devfs_print(struct vop_print_args *ap)
1413 {
1414 
1415 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
1416 	return (0);
1417 }
1418 
1419 static int
devfs_read_f(struct file * fp,struct uio * uio,struct ucred * cred,int flags,struct thread * td)1420 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred,
1421     int flags, struct thread *td)
1422 {
1423 	struct cdev *dev;
1424 	int ioflag, error, ref;
1425 	ssize_t resid;
1426 	struct cdevsw *dsw;
1427 	struct file *fpop;
1428 
1429 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
1430 		return (EINVAL);
1431 	fpop = td->td_fpop;
1432 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
1433 	if (error != 0) {
1434 		error = vnops.fo_read(fp, uio, cred, flags, td);
1435 		return (error);
1436 	}
1437 	resid = uio->uio_resid;
1438 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
1439 	if (ioflag & O_DIRECT)
1440 		ioflag |= IO_DIRECT;
1441 
1442 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
1443 	error = dsw->d_read(dev, uio, ioflag);
1444 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
1445 		devfs_timestamp(&dev->si_atime);
1446 	td->td_fpop = fpop;
1447 	dev_relthread(dev, ref);
1448 
1449 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_R);
1450 	return (error);
1451 }
1452 
1453 static int
devfs_readdir(struct vop_readdir_args * ap)1454 devfs_readdir(struct vop_readdir_args *ap)
1455 {
1456 	int error;
1457 	struct uio *uio;
1458 	struct dirent *dp;
1459 	struct devfs_dirent *dd;
1460 	struct devfs_dirent *de;
1461 	struct devfs_mount *dmp;
1462 	off_t off;
1463 	int *tmp_ncookies = NULL;
1464 	ssize_t startresid;
1465 
1466 	if (ap->a_vp->v_type != VDIR)
1467 		return (ENOTDIR);
1468 
1469 	uio = ap->a_uio;
1470 	if (uio->uio_offset < 0)
1471 		return (EINVAL);
1472 
1473 	/*
1474 	 * XXX: This is a temporary hack to get around this filesystem not
1475 	 * supporting cookies. We store the location of the ncookies pointer
1476 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
1477 	 * and set the number of cookies to 0. We then set the pointer to
1478 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on
1479 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
1480 	 * pointer to its original location before returning to the caller.
1481 	 */
1482 	if (ap->a_ncookies != NULL) {
1483 		tmp_ncookies = ap->a_ncookies;
1484 		*ap->a_ncookies = 0;
1485 		ap->a_ncookies = NULL;
1486 	}
1487 
1488 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
1489 	if (devfs_populate_vp(ap->a_vp) != 0) {
1490 		if (tmp_ncookies != NULL)
1491 			ap->a_ncookies = tmp_ncookies;
1492 		return (EIO);
1493 	}
1494 	error = 0;
1495 	de = ap->a_vp->v_data;
1496 	off = 0;
1497 	startresid = uio->uio_resid;
1498 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
1499 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
1500 		if (dd->de_flags & (DE_COVERED | DE_WHITEOUT))
1501 			continue;
1502 		if (devfs_prison_check(dd, uio->uio_td))
1503 			continue;
1504 		if (dd->de_dirent->d_type == DT_DIR)
1505 			de = dd->de_dir;
1506 		else
1507 			de = dd;
1508 		dp = dd->de_dirent;
1509 		MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp));
1510 		if (dp->d_reclen > uio->uio_resid) {
1511 			/* Nothing was copied out, return EINVAL. */
1512 			if (uio->uio_resid == startresid)
1513 				error = EINVAL;
1514 			/* Otherwise stop. */
1515 			break;
1516 		}
1517 		dp->d_fileno = de->de_inode;
1518 		/* NOTE: d_off is the offset for the *next* entry. */
1519 		dp->d_off = off + dp->d_reclen;
1520 		if (off >= uio->uio_offset) {
1521 			error = vfs_read_dirent(ap, dp, off);
1522 			if (error)
1523 				break;
1524 		}
1525 		off += dp->d_reclen;
1526 	}
1527 	sx_xunlock(&dmp->dm_lock);
1528 	uio->uio_offset = off;
1529 
1530 	/*
1531 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
1532 	 * place.
1533 	 */
1534 	if (tmp_ncookies != NULL)
1535 		ap->a_ncookies = tmp_ncookies;
1536 	if (dd == NULL && error == 0 && ap->a_eofflag != NULL)
1537 		*ap->a_eofflag = 1;
1538 
1539 	return (error);
1540 }
1541 
1542 static int
devfs_readlink(struct vop_readlink_args * ap)1543 devfs_readlink(struct vop_readlink_args *ap)
1544 {
1545 	struct devfs_dirent *de;
1546 
1547 	de = ap->a_vp->v_data;
1548 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
1549 }
1550 
1551 static void
devfs_reclaiml(struct vnode * vp)1552 devfs_reclaiml(struct vnode *vp)
1553 {
1554 	struct devfs_dirent *de;
1555 
1556 	mtx_assert(&devfs_de_interlock, MA_OWNED);
1557 	de = vp->v_data;
1558 	if (de != NULL) {
1559 		MPASS(de->de_usecount == 0);
1560 		de->de_vnode = NULL;
1561 		vp->v_data = NULL;
1562 	}
1563 }
1564 
1565 static int
devfs_reclaim(struct vop_reclaim_args * ap)1566 devfs_reclaim(struct vop_reclaim_args *ap)
1567 {
1568 	struct vnode *vp;
1569 
1570 	vp = ap->a_vp;
1571 	mtx_lock(&devfs_de_interlock);
1572 	devfs_reclaiml(vp);
1573 	mtx_unlock(&devfs_de_interlock);
1574 	return (0);
1575 }
1576 
1577 static int
devfs_reclaim_vchr(struct vop_reclaim_args * ap)1578 devfs_reclaim_vchr(struct vop_reclaim_args *ap)
1579 {
1580 	struct vnode *vp;
1581 	struct cdev *dev;
1582 
1583 	vp = ap->a_vp;
1584 	MPASS(vp->v_type == VCHR);
1585 
1586 	mtx_lock(&devfs_de_interlock);
1587 	VI_LOCK(vp);
1588 	devfs_usecount_subl(vp);
1589 	devfs_reclaiml(vp);
1590 	mtx_unlock(&devfs_de_interlock);
1591 	dev_lock();
1592 	dev = vp->v_rdev;
1593 	vp->v_rdev = NULL;
1594 	dev_unlock();
1595 	VI_UNLOCK(vp);
1596 	if (dev != NULL)
1597 		dev_rel(dev);
1598 	return (0);
1599 }
1600 
1601 static int
devfs_remove(struct vop_remove_args * ap)1602 devfs_remove(struct vop_remove_args *ap)
1603 {
1604 	struct vnode *dvp = ap->a_dvp;
1605 	struct vnode *vp = ap->a_vp;
1606 	struct devfs_dirent *dd;
1607 	struct devfs_dirent *de, *de_covered;
1608 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
1609 
1610 	ASSERT_VOP_ELOCKED(dvp, "devfs_remove");
1611 	ASSERT_VOP_ELOCKED(vp, "devfs_remove");
1612 
1613 	sx_xlock(&dmp->dm_lock);
1614 	dd = ap->a_dvp->v_data;
1615 	de = vp->v_data;
1616 	if (de->de_cdp == NULL) {
1617 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
1618 		if (de->de_dirent->d_type == DT_LNK) {
1619 			de_covered = devfs_find(dd, de->de_dirent->d_name,
1620 			    de->de_dirent->d_namlen, 0);
1621 			if (de_covered != NULL)
1622 				de_covered->de_flags &= ~DE_COVERED;
1623 		}
1624 		/* We need to unlock dvp because devfs_delete() may lock it. */
1625 		VOP_UNLOCK(vp);
1626 		if (dvp != vp)
1627 			VOP_UNLOCK(dvp);
1628 		devfs_delete(dmp, de, 0);
1629 		sx_xunlock(&dmp->dm_lock);
1630 		if (dvp != vp)
1631 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1632 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1633 	} else {
1634 		de->de_flags |= DE_WHITEOUT;
1635 		sx_xunlock(&dmp->dm_lock);
1636 	}
1637 	return (0);
1638 }
1639 
1640 /*
1641  * Revoke is called on a tty when a terminal session ends.  The vnode
1642  * is orphaned by setting v_op to deadfs so we need to let go of it
1643  * as well so that we create a new one next time around.
1644  *
1645  */
1646 static int
devfs_revoke(struct vop_revoke_args * ap)1647 devfs_revoke(struct vop_revoke_args *ap)
1648 {
1649 	struct vnode *vp = ap->a_vp, *vp2;
1650 	struct cdev *dev;
1651 	struct cdev_priv *cdp;
1652 	struct devfs_dirent *de;
1653 	enum vgetstate vs;
1654 	u_int i;
1655 
1656 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
1657 
1658 	dev = vp->v_rdev;
1659 	cdp = cdev2priv(dev);
1660 
1661 	dev_lock();
1662 	cdp->cdp_inuse++;
1663 	dev_unlock();
1664 
1665 	vhold(vp);
1666 	vgone(vp);
1667 	vdrop(vp);
1668 
1669 	VOP_UNLOCK(vp);
1670  loop:
1671 	for (;;) {
1672 		mtx_lock(&devfs_de_interlock);
1673 		dev_lock();
1674 		vp2 = NULL;
1675 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
1676 			de = cdp->cdp_dirents[i];
1677 			if (de == NULL)
1678 				continue;
1679 
1680 			vp2 = de->de_vnode;
1681 			if (vp2 != NULL) {
1682 				dev_unlock();
1683 				vs = vget_prep(vp2);
1684 				mtx_unlock(&devfs_de_interlock);
1685 				if (vget_finish(vp2, LK_EXCLUSIVE, vs) != 0)
1686 					goto loop;
1687 				vhold(vp2);
1688 				vgone(vp2);
1689 				vdrop(vp2);
1690 				vput(vp2);
1691 				break;
1692 			}
1693 		}
1694 		if (vp2 != NULL) {
1695 			continue;
1696 		}
1697 		dev_unlock();
1698 		mtx_unlock(&devfs_de_interlock);
1699 		break;
1700 	}
1701 	dev_lock();
1702 	cdp->cdp_inuse--;
1703 	if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) {
1704 		KASSERT((cdp->cdp_flags & CDP_ON_ACTIVE_LIST) != 0,
1705 		    ("%s: cdp %p (%s) not on active list",
1706 		    __func__, cdp, dev->si_name));
1707 		cdp->cdp_flags &= ~CDP_ON_ACTIVE_LIST;
1708 		TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
1709 		dev_unlock();
1710 		dev_rel(&cdp->cdp_c);
1711 	} else
1712 		dev_unlock();
1713 
1714 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1715 	return (0);
1716 }
1717 
1718 static int
devfs_rioctl(struct vop_ioctl_args * ap)1719 devfs_rioctl(struct vop_ioctl_args *ap)
1720 {
1721 	struct vnode *vp;
1722 	struct devfs_mount *dmp;
1723 	int error;
1724 
1725 	vp = ap->a_vp;
1726 	vn_lock(vp, LK_SHARED | LK_RETRY);
1727 	if (VN_IS_DOOMED(vp)) {
1728 		VOP_UNLOCK(vp);
1729 		return (EBADF);
1730 	}
1731 	dmp = VFSTODEVFS(vp->v_mount);
1732 	sx_xlock(&dmp->dm_lock);
1733 	VOP_UNLOCK(vp);
1734 	DEVFS_DMP_HOLD(dmp);
1735 	devfs_populate(dmp);
1736 	if (DEVFS_DMP_DROP(dmp)) {
1737 		sx_xunlock(&dmp->dm_lock);
1738 		devfs_unmount_final(dmp);
1739 		return (ENOENT);
1740 	}
1741 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
1742 	sx_xunlock(&dmp->dm_lock);
1743 	return (error);
1744 }
1745 
1746 static int
devfs_rread(struct vop_read_args * ap)1747 devfs_rread(struct vop_read_args *ap)
1748 {
1749 
1750 	if (ap->a_vp->v_type != VDIR)
1751 		return (EINVAL);
1752 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
1753 }
1754 
1755 static int
devfs_setattr(struct vop_setattr_args * ap)1756 devfs_setattr(struct vop_setattr_args *ap)
1757 {
1758 	struct devfs_dirent *de;
1759 	struct vattr *vap;
1760 	struct vnode *vp;
1761 	struct thread *td;
1762 	int c, error;
1763 	uid_t uid;
1764 	gid_t gid;
1765 
1766 	vap = ap->a_vap;
1767 	vp = ap->a_vp;
1768 	td = curthread;
1769 	if ((vap->va_type != VNON) ||
1770 	    (vap->va_nlink != VNOVAL) ||
1771 	    (vap->va_fsid != VNOVAL) ||
1772 	    (vap->va_fileid != VNOVAL) ||
1773 	    (vap->va_blocksize != VNOVAL) ||
1774 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1775 	    (vap->va_rdev != VNOVAL) ||
1776 	    ((int)vap->va_bytes != VNOVAL) ||
1777 	    (vap->va_gen != VNOVAL)) {
1778 		return (EINVAL);
1779 	}
1780 
1781 	error = devfs_populate_vp(vp);
1782 	if (error != 0)
1783 		return (error);
1784 
1785 	de = vp->v_data;
1786 	if (vp->v_type == VDIR)
1787 		de = de->de_dir;
1788 
1789 	c = 0;
1790 	if (vap->va_uid == (uid_t)VNOVAL)
1791 		uid = de->de_uid;
1792 	else
1793 		uid = vap->va_uid;
1794 	if (vap->va_gid == (gid_t)VNOVAL)
1795 		gid = de->de_gid;
1796 	else
1797 		gid = vap->va_gid;
1798 	if (uid != de->de_uid || gid != de->de_gid) {
1799 		if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
1800 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) {
1801 			error = priv_check(td, PRIV_VFS_CHOWN);
1802 			if (error != 0)
1803 				goto ret;
1804 		}
1805 		de->de_uid = uid;
1806 		de->de_gid = gid;
1807 		c = 1;
1808 	}
1809 
1810 	if (vap->va_mode != (mode_t)VNOVAL) {
1811 		if (ap->a_cred->cr_uid != de->de_uid) {
1812 			error = priv_check(td, PRIV_VFS_ADMIN);
1813 			if (error != 0)
1814 				goto ret;
1815 		}
1816 		de->de_mode = vap->va_mode;
1817 		c = 1;
1818 	}
1819 
1820 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1821 		error = vn_utimes_perm(vp, vap, ap->a_cred, td);
1822 		if (error != 0)
1823 			goto ret;
1824 		if (vap->va_atime.tv_sec != VNOVAL) {
1825 			if (vp->v_type == VCHR)
1826 				vp->v_rdev->si_atime = vap->va_atime;
1827 			else
1828 				de->de_atime = vap->va_atime;
1829 		}
1830 		if (vap->va_mtime.tv_sec != VNOVAL) {
1831 			if (vp->v_type == VCHR)
1832 				vp->v_rdev->si_mtime = vap->va_mtime;
1833 			else
1834 				de->de_mtime = vap->va_mtime;
1835 		}
1836 		c = 1;
1837 	}
1838 
1839 	if (c) {
1840 		if (vp->v_type == VCHR)
1841 			vfs_timestamp(&vp->v_rdev->si_ctime);
1842 		else
1843 			vfs_timestamp(&de->de_mtime);
1844 	}
1845 
1846 ret:
1847 	sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock);
1848 	return (error);
1849 }
1850 
1851 #ifdef MAC
1852 static int
devfs_setlabel(struct vop_setlabel_args * ap)1853 devfs_setlabel(struct vop_setlabel_args *ap)
1854 {
1855 	struct vnode *vp;
1856 	struct devfs_dirent *de;
1857 
1858 	vp = ap->a_vp;
1859 	de = vp->v_data;
1860 
1861 	mac_vnode_relabel(ap->a_cred, vp, ap->a_label);
1862 	mac_devfs_update(vp->v_mount, de, vp);
1863 
1864 	return (0);
1865 }
1866 #endif
1867 
1868 static int
devfs_stat_f(struct file * fp,struct stat * sb,struct ucred * cred)1869 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred)
1870 {
1871 
1872 	return (vnops.fo_stat(fp, sb, cred));
1873 }
1874 
1875 static int
devfs_symlink(struct vop_symlink_args * ap)1876 devfs_symlink(struct vop_symlink_args *ap)
1877 {
1878 	int i, error;
1879 	struct devfs_dirent *dd;
1880 	struct devfs_dirent *de, *de_covered, *de_dotdot;
1881 	struct devfs_mount *dmp;
1882 
1883 	error = priv_check(curthread, PRIV_DEVFS_SYMLINK);
1884 	if (error)
1885 		return(error);
1886 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
1887 	if (devfs_populate_vp(ap->a_dvp) != 0)
1888 		return (ENOENT);
1889 
1890 	dd = ap->a_dvp->v_data;
1891 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
1892 	de->de_flags = DE_USER;
1893 	de->de_uid = 0;
1894 	de->de_gid = 0;
1895 	de->de_mode = 0755;
1896 	de->de_inode = alloc_unr(devfs_inos);
1897 	de->de_dir = dd;
1898 	de->de_dirent->d_type = DT_LNK;
1899 	i = strlen(ap->a_target) + 1;
1900 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
1901 	bcopy(ap->a_target, de->de_symlink, i);
1902 #ifdef MAC
1903 	mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
1904 #endif
1905 	de_covered = devfs_find(dd, de->de_dirent->d_name,
1906 	    de->de_dirent->d_namlen, 0);
1907 	if (de_covered != NULL) {
1908 		if ((de_covered->de_flags & DE_USER) != 0) {
1909 			devfs_delete(dmp, de, DEVFS_DEL_NORECURSE);
1910 			sx_xunlock(&dmp->dm_lock);
1911 			return (EEXIST);
1912 		}
1913 		KASSERT((de_covered->de_flags & DE_COVERED) == 0,
1914 		    ("devfs_symlink: entry %p already covered", de_covered));
1915 		de_covered->de_flags |= DE_COVERED;
1916 	}
1917 
1918 	de_dotdot = TAILQ_FIRST(&dd->de_dlist);		/* "." */
1919 	de_dotdot = TAILQ_NEXT(de_dotdot, de_list);	/* ".." */
1920 	TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list);
1921 	devfs_dir_ref_de(dmp, dd);
1922 	devfs_rules_apply(dmp, de);
1923 
1924 	return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp));
1925 }
1926 
1927 static int
devfs_truncate_f(struct file * fp,off_t length,struct ucred * cred,struct thread * td)1928 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td)
1929 {
1930 
1931 	return (vnops.fo_truncate(fp, length, cred, td));
1932 }
1933 
1934 static int
devfs_write_f(struct file * fp,struct uio * uio,struct ucred * cred,int flags,struct thread * td)1935 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred,
1936     int flags, struct thread *td)
1937 {
1938 	struct cdev *dev;
1939 	int error, ioflag, ref;
1940 	ssize_t resid;
1941 	struct cdevsw *dsw;
1942 	struct file *fpop;
1943 
1944 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
1945 		return (EINVAL);
1946 	fpop = td->td_fpop;
1947 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
1948 	if (error != 0) {
1949 		error = vnops.fo_write(fp, uio, cred, flags, td);
1950 		return (error);
1951 	}
1952 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
1953 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
1954 	if (ioflag & O_DIRECT)
1955 		ioflag |= IO_DIRECT;
1956 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
1957 
1958 	resid = uio->uio_resid;
1959 
1960 	error = dsw->d_write(dev, uio, ioflag);
1961 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
1962 		devfs_timestamp(&dev->si_ctime);
1963 		dev->si_mtime = dev->si_ctime;
1964 	}
1965 	td->td_fpop = fpop;
1966 	dev_relthread(dev, ref);
1967 
1968 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_W);
1969 	return (error);
1970 }
1971 
1972 static int
devfs_mmap_f(struct file * fp,vm_map_t map,vm_offset_t * addr,vm_size_t size,vm_prot_t prot,vm_prot_t cap_maxprot,int flags,vm_ooffset_t foff,struct thread * td)1973 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
1974     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
1975     struct thread *td)
1976 {
1977 	struct cdev *dev;
1978 	struct cdevsw *dsw;
1979 	struct mount *mp;
1980 	struct vnode *vp;
1981 	struct file *fpop;
1982 	vm_object_t object;
1983 	vm_prot_t maxprot;
1984 	int error, ref;
1985 
1986 	vp = fp->f_vnode;
1987 
1988 	/*
1989 	 * Ensure that file and memory protections are
1990 	 * compatible.
1991 	 */
1992 	mp = vp->v_mount;
1993 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
1994 		maxprot = VM_PROT_NONE;
1995 		if ((prot & VM_PROT_EXECUTE) != 0)
1996 			return (EACCES);
1997 	} else
1998 		maxprot = VM_PROT_EXECUTE;
1999 	if ((fp->f_flag & FREAD) != 0)
2000 		maxprot |= VM_PROT_READ;
2001 	else if ((prot & VM_PROT_READ) != 0)
2002 		return (EACCES);
2003 
2004 	/*
2005 	 * If we are sharing potential changes via MAP_SHARED and we
2006 	 * are trying to get write permission although we opened it
2007 	 * without asking for it, bail out.
2008 	 *
2009 	 * Note that most character devices always share mappings.
2010 	 * The one exception is that D_MMAP_ANON devices
2011 	 * (i.e. /dev/zero) permit private writable mappings.
2012 	 *
2013 	 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests
2014 	 * as well as updating maxprot to permit writing for
2015 	 * D_MMAP_ANON devices rather than doing that here.
2016 	 */
2017 	if ((flags & MAP_SHARED) != 0) {
2018 		if ((fp->f_flag & FWRITE) != 0)
2019 			maxprot |= VM_PROT_WRITE;
2020 		else if ((prot & VM_PROT_WRITE) != 0)
2021 			return (EACCES);
2022 	}
2023 	maxprot &= cap_maxprot;
2024 
2025 	fpop = td->td_fpop;
2026 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
2027 	if (error != 0)
2028 		return (error);
2029 
2030 	error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff,
2031 	    &object);
2032 	td->td_fpop = fpop;
2033 	dev_relthread(dev, ref);
2034 	if (error != 0)
2035 		return (error);
2036 
2037 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
2038 	    foff, FALSE, td);
2039 	if (error != 0)
2040 		vm_object_deallocate(object);
2041 	return (error);
2042 }
2043 
2044 dev_t
dev2udev(struct cdev * x)2045 dev2udev(struct cdev *x)
2046 {
2047 	if (x == NULL)
2048 		return (NODEV);
2049 	return (cdev2priv(x)->cdp_inode);
2050 }
2051 
2052 static int
devfs_cmp_f(struct file * fp1,struct file * fp2,struct thread * td)2053 devfs_cmp_f(struct file *fp1, struct file *fp2, struct thread *td)
2054 {
2055 	if (fp2->f_type != DTYPE_VNODE || fp2->f_ops != &devfs_ops_f)
2056 		return (3);
2057 	return (kcmp_cmp((uintptr_t)fp1->f_data, (uintptr_t)fp2->f_data));
2058 }
2059 
2060 static const struct fileops devfs_ops_f = {
2061 	.fo_read =	devfs_read_f,
2062 	.fo_write =	devfs_write_f,
2063 	.fo_truncate =	devfs_truncate_f,
2064 	.fo_ioctl =	devfs_ioctl_f,
2065 	.fo_poll =	devfs_poll_f,
2066 	.fo_kqfilter =	devfs_kqfilter_f,
2067 	.fo_stat =	devfs_stat_f,
2068 	.fo_close =	devfs_close_f,
2069 	.fo_chmod =	vn_chmod,
2070 	.fo_chown =	vn_chown,
2071 	.fo_sendfile =	vn_sendfile,
2072 	.fo_seek =	vn_seek,
2073 	.fo_fill_kinfo = vn_fill_kinfo,
2074 	.fo_mmap =	devfs_mmap_f,
2075 	.fo_cmp =	devfs_cmp_f,
2076 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
2077 };
2078 
2079 /* Vops for non-CHR vnodes in /dev. */
2080 static struct vop_vector devfs_vnodeops = {
2081 	.vop_default =		&default_vnodeops,
2082 
2083 	.vop_access =		devfs_access,
2084 	.vop_getattr =		devfs_getattr,
2085 	.vop_ioctl =		devfs_rioctl,
2086 	.vop_lookup =		devfs_lookup,
2087 	.vop_mknod =		devfs_mknod,
2088 	.vop_pathconf =		devfs_pathconf,
2089 	.vop_read =		devfs_rread,
2090 	.vop_readdir =		devfs_readdir,
2091 	.vop_readlink =		devfs_readlink,
2092 	.vop_reclaim =		devfs_reclaim,
2093 	.vop_remove =		devfs_remove,
2094 	.vop_revoke =		devfs_revoke,
2095 	.vop_setattr =		devfs_setattr,
2096 #ifdef MAC
2097 	.vop_setlabel =		devfs_setlabel,
2098 #endif
2099 	.vop_symlink =		devfs_symlink,
2100 	.vop_vptocnp =		devfs_vptocnp,
2101 	.vop_lock1 =		vop_lock,
2102 	.vop_unlock =		vop_unlock,
2103 	.vop_islocked =		vop_islocked,
2104 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
2105 };
2106 VFS_VOP_VECTOR_REGISTER(devfs_vnodeops);
2107 
2108 /* Vops for VCHR vnodes in /dev. */
2109 static struct vop_vector devfs_specops = {
2110 	.vop_default =		&default_vnodeops,
2111 
2112 	.vop_access =		devfs_access,
2113 	.vop_bmap =		VOP_PANIC,
2114 	.vop_close =		devfs_close,
2115 	.vop_create =		VOP_PANIC,
2116 	.vop_fsync =		vop_stdfsync,
2117 	.vop_getattr =		devfs_getattr,
2118 	.vop_ioctl =		devfs_ioctl,
2119 	.vop_link =		VOP_PANIC,
2120 	.vop_mkdir =		VOP_PANIC,
2121 	.vop_mknod =		VOP_PANIC,
2122 	.vop_open =		devfs_open,
2123 	.vop_pathconf =		devfs_pathconf,
2124 	.vop_poll =		dead_poll,
2125 	.vop_print =		devfs_print,
2126 	.vop_read =		dead_read,
2127 	.vop_readdir =		VOP_PANIC,
2128 	.vop_readlink =		VOP_PANIC,
2129 	.vop_reallocblks =	VOP_PANIC,
2130 	.vop_reclaim =		devfs_reclaim_vchr,
2131 	.vop_remove =		devfs_remove,
2132 	.vop_rename =		VOP_PANIC,
2133 	.vop_revoke =		devfs_revoke,
2134 	.vop_rmdir =		VOP_PANIC,
2135 	.vop_setattr =		devfs_setattr,
2136 #ifdef MAC
2137 	.vop_setlabel =		devfs_setlabel,
2138 #endif
2139 	.vop_strategy =		VOP_PANIC,
2140 	.vop_symlink =		VOP_PANIC,
2141 	.vop_vptocnp =		devfs_vptocnp,
2142 	.vop_write =		dead_write,
2143 	.vop_lock1 =		vop_lock,
2144 	.vop_unlock =		vop_unlock,
2145 	.vop_islocked =		vop_islocked,
2146 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
2147 };
2148 VFS_VOP_VECTOR_REGISTER(devfs_specops);
2149 
2150 /*
2151  * Our calling convention to the device drivers used to be that we passed
2152  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
2153  * flags instead since that's what open(), close() and ioctl() takes and
2154  * we don't really want vnode.h in device drivers.
2155  * We solved the source compatibility by redefining some vnode flags to
2156  * be the same as the fcntl ones and by sending down the bitwise OR of
2157  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
2158  * pulls the rug out under this.
2159  */
2160 CTASSERT(O_NONBLOCK == IO_NDELAY);
2161 CTASSERT(O_FSYNC == IO_SYNC);
2162