xref: /illumos-gate/usr/src/uts/common/fs/namefs/namevfs.c (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2017 by Delphix. All rights reserved.
24  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 
31 /*
32  * This file supports the vfs operations for the NAMEFS file system.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/inline.h>
42 #include <sys/file.h>
43 #include <sys/proc.h>
44 #include <sys/stat.h>
45 #include <sys/statvfs.h>
46 #include <sys/mount.h>
47 #include <sys/sysmacros.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vfs_opreg.h>
51 #include <sys/vnode.h>
52 #include <sys/mode.h>
53 #include <sys/pcb.h>
54 #include <sys/signal.h>
55 #include <sys/user.h>
56 #include <sys/uio.h>
57 #include <sys/cred.h>
58 #include <sys/fs/namenode.h>
59 #include <sys/stream.h>
60 #include <sys/strsubr.h>
61 #include <sys/cmn_err.h>
62 #include <sys/modctl.h>
63 #include <fs/fs_subr.h>
64 #include <sys/policy.h>
65 #include <sys/vmem.h>
66 #include <sys/fs/sdev_impl.h>
67 
68 #define	NM_INOQUANT		(64 * 1024)
69 
70 /*
71  * Define global data structures.
72  */
73 dev_t	namedev;
74 int	namefstype;
75 struct	namenode *nm_filevp_hash[NM_FILEVP_HASH_SIZE];
76 struct	vfs namevfs;
77 kmutex_t ntable_lock;
78 
79 static vmem_t	*nm_inoarena;	/* vmem arena to allocate inode no's from */
80 static kmutex_t	nm_inolock;
81 
82 vfsops_t *namefs_vfsops;
83 /*
84  * Functions to allocate node id's starting from 1. Based on vmem routines.
85  * The vmem arena is extended in NM_INOQUANT chunks.
86  */
87 uint64_t
88 namenodeno_alloc(void)
89 {
90 	uint64_t nno;
91 
92 	mutex_enter(&nm_inolock);
93 	nno = (uint64_t)(uintptr_t)
94 	    vmem_alloc(nm_inoarena, 1, VM_NOSLEEP + VM_FIRSTFIT);
95 	if (nno == 0) {
96 		(void) vmem_add(nm_inoarena, (void *)(vmem_size(nm_inoarena,
97 		    VMEM_ALLOC | VMEM_FREE) + 1), NM_INOQUANT, VM_SLEEP);
98 		nno = (uint64_t)(uintptr_t)
99 		    vmem_alloc(nm_inoarena, 1, VM_SLEEP + VM_FIRSTFIT);
100 		ASSERT(nno != 0);
101 	}
102 	mutex_exit(&nm_inolock);
103 	ASSERT32(nno <= ULONG_MAX);
104 	return (nno);
105 }
106 
107 static void
108 namenodeno_init(void)
109 {
110 	nm_inoarena = vmem_create("namefs_inodes", (void *)1, NM_INOQUANT, 1,
111 	    NULL, NULL, NULL, 1, VM_SLEEP);
112 	mutex_init(&nm_inolock, NULL, MUTEX_DEFAULT, NULL);
113 }
114 
115 void
116 namenodeno_free(uint64_t nn)
117 {
118 	void *vaddr = (void *)(uintptr_t)nn;
119 
120 	ASSERT32((uint64_t)(uintptr_t)vaddr == nn);
121 
122 	mutex_enter(&nm_inolock);
123 	vmem_free(nm_inoarena, vaddr, 1);
124 	mutex_exit(&nm_inolock);
125 }
126 
127 /*
128  * Insert a namenode into the nm_filevp_hash table.
129  *
130  * Each link has a unique namenode with a unique nm_mountvp field.
131  * The nm_filevp field of the namenode need not be unique, since a
132  * file descriptor may be mounted to multiple nodes at the same time.
133  * We hash on nm_filevp since that's what discriminates the searches
134  * in namefind() and nm_unmountall().
135  */
136 void
137 nameinsert(struct namenode *nodep)
138 {
139 	struct namenode **bucket;
140 
141 	ASSERT(MUTEX_HELD(&ntable_lock));
142 
143 	bucket = NM_FILEVP_HASH(nodep->nm_filevp);
144 	nodep->nm_nextp = *bucket;
145 	*bucket = nodep;
146 }
147 
148 /*
149  * Remove a namenode from the hash table, if present.
150  */
151 void
152 nameremove(struct namenode *nodep)
153 {
154 	struct namenode *np, **npp;
155 
156 	ASSERT(MUTEX_HELD(&ntable_lock));
157 
158 	for (npp = NM_FILEVP_HASH(nodep->nm_filevp); (np = *npp) != NULL;
159 	    npp = &np->nm_nextp) {
160 		if (np == nodep) {
161 			*npp = np->nm_nextp;
162 			return;
163 		}
164 	}
165 }
166 
167 /*
168  * Search for a namenode that has a nm_filevp == vp and nm_mountpt == mnt.
169  * If mnt is NULL, return the first link with nm_filevp of vp.
170  * Returns namenode pointer on success, NULL on failure.
171  */
172 struct namenode *
173 namefind(vnode_t *vp, vnode_t *mnt)
174 {
175 	struct namenode *np;
176 
177 	ASSERT(MUTEX_HELD(&ntable_lock));
178 	for (np = *NM_FILEVP_HASH(vp); np != NULL; np = np->nm_nextp)
179 		if (np->nm_filevp == vp &&
180 		    (mnt == NULL || np->nm_mountpt == mnt))
181 			break;
182 	return (np);
183 }
184 
185 /*
186  * For each namenode that has nm_filevp == vp, call the provided function
187  * with the namenode as an argument. This finds all of the namefs entries
188  * which are mounted on vp; note that there can be more than one.
189  */
190 int
191 nm_walk_mounts(const vnode_t *vp, nm_walk_mounts_f *func, cred_t *cr, void *arg)
192 {
193 	struct namenode *np;
194 	int ret = 0;
195 
196 	mutex_enter(&ntable_lock);
197 
198 	for (np = *NM_FILEVP_HASH(vp); np != NULL; np = np->nm_nextp) {
199 		if (np->nm_filevp == vp) {
200 			if ((ret = func(np, cr, arg)) != 0)
201 				break;
202 		}
203 	}
204 
205 	mutex_exit(&ntable_lock);
206 
207 	return (ret);
208 }
209 
210 /*
211  * Force the unmouting of a file descriptor from ALL of the nodes
212  * that it was mounted to.
213  * At the present time, the only usage for this routine is in the
214  * event one end of a pipe was mounted. At the time the unmounted
215  * end gets closed down, the mounted end is forced to be unmounted.
216  *
217  * This routine searches the namenode hash list for all namenodes
218  * that have a nm_filevp field equal to vp. Each time one is found,
219  * the dounmount() routine is called. This causes the nm_unmount()
220  * routine to be called and thus, the file descriptor is unmounted
221  * from the node.
222  *
223  * At the start of this routine, the reference count for vp is
224  * incremented to protect the vnode from being released in the
225  * event the mount was the only thing keeping the vnode active.
226  * If that is the case, the VOP_CLOSE operation is applied to
227  * the vnode, prior to it being released.
228  */
229 static int
230 nm_umountall(vnode_t *vp, cred_t *crp)
231 {
232 	vfs_t *vfsp;
233 	struct namenode *nodep;
234 	int error = 0;
235 	int realerr = 0;
236 
237 	/*
238 	 * For each namenode that is associated with the file:
239 	 * If the v_vfsp field is not namevfs, dounmount it.  Otherwise,
240 	 * it was created in nm_open() and will be released in time.
241 	 * The following loop replicates some code from nm_find.  That
242 	 * routine can't be used as is since the list isn't strictly
243 	 * consumed as it is traversed.
244 	 */
245 	mutex_enter(&ntable_lock);
246 	nodep = *NM_FILEVP_HASH(vp);
247 	while (nodep) {
248 		if (nodep->nm_filevp == vp &&
249 		    (vfsp = NMTOV(nodep)->v_vfsp) != NULL &&
250 		    vfsp != &namevfs && (NMTOV(nodep)->v_flag & VROOT)) {
251 
252 			/*
253 			 * If the vn_vfswlock fails, skip the vfs since
254 			 * somebody else may be unmounting it.
255 			 */
256 			if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
257 				realerr = EBUSY;
258 				nodep = nodep->nm_nextp;
259 				continue;
260 			}
261 
262 			/*
263 			 * Can't hold ntable_lock across call to do_unmount
264 			 * because nm_unmount tries to acquire it.  This means
265 			 * there is a window where another mount of vp can
266 			 * happen so it is possible that after nm_unmountall
267 			 * there are still some mounts.  This situation existed
268 			 * without MT locking because dounmount can sleep
269 			 * so another mount could happen during that time.
270 			 * This situation is unlikely and doesn't really cause
271 			 * any problems.
272 			 */
273 			mutex_exit(&ntable_lock);
274 			if ((error = dounmount(vfsp, 0, crp)) != 0)
275 				realerr = error;
276 			mutex_enter(&ntable_lock);
277 			/*
278 			 * Since we dropped the ntable_lock, we
279 			 * have to start over from the beginning.
280 			 * If for some reasons dounmount() fails,
281 			 * start from beginning means that we will keep on
282 			 * trying unless another thread unmounts it for us.
283 			 */
284 			nodep = *NM_FILEVP_HASH(vp);
285 		} else
286 			nodep = nodep->nm_nextp;
287 	}
288 	mutex_exit(&ntable_lock);
289 	return (realerr);
290 }
291 
292 /*
293  * Force the unmouting of a file descriptor from ALL of the nodes
294  * that it was mounted to.  XXX: fifo_close() calls this routine.
295  *
296  * nm_umountall() may return EBUSY.
297  * nm_unmountall() will keep on trying until it succeeds.
298  */
299 int
300 nm_unmountall(vnode_t *vp, cred_t *crp)
301 {
302 	int error;
303 
304 	/*
305 	 * Nm_umuontall() returns only if it succeeds or
306 	 * return with error EBUSY.  If EBUSY, that means
307 	 * it cannot acquire the lock on the covered vnode,
308 	 * and we will keep on trying.
309 	 */
310 	for (;;) {
311 		error = nm_umountall(vp, crp);
312 		if (error != EBUSY)
313 			break;
314 		delay(1);	/* yield cpu briefly, then try again */
315 	}
316 	return (error);
317 }
318 
319 /*
320  * Mount a file descriptor onto the node in the file system.
321  * Create a new vnode, update the attributes with info from the
322  * file descriptor and the mount point.  The mask, mode, uid, gid,
323  * atime, mtime and ctime are taken from the mountpt.  Link count is
324  * set to one, the file system id is namedev and nodeid is unique
325  * for each mounted object.  Other attributes are taken from mount point.
326  * Make sure user is owner (or root) with write permissions on mount point.
327  * Hash the new vnode and return 0.
328  * Upon entry to this routine, the file descriptor is in the
329  * fd field of a struct namefd.  Copy that structure from user
330  * space and retrieve the file descriptor.
331  */
332 static int
333 nm_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *crp)
334 {
335 	struct namefd namefdp;
336 	struct vnode *filevp;		/* file descriptor vnode */
337 	struct file *fp;
338 	struct vnode *newvp;		/* vnode representing this mount */
339 	struct vnode *rvp;		/* realvp (if any) for the mountpt */
340 	struct namenode *nodep;		/* namenode for this mount */
341 	struct vattr filevattr;		/* attributes of file dec.  */
342 	struct vattr *vattrp;		/* attributes of this mount */
343 	char *resource_name;
344 	char *resource_nodetype;
345 	statvfs64_t *svfsp;
346 	int error = 0;
347 
348 	/*
349 	 * Get the file descriptor from user space.
350 	 * Make sure the file descriptor is valid and has an
351 	 * associated file pointer.
352 	 * If so, extract the vnode from the file pointer.
353 	 */
354 	if (uap->datalen != sizeof (struct namefd))
355 		return (EINVAL);
356 
357 	if (copyin(uap->dataptr, &namefdp, uap->datalen))
358 		return (EFAULT);
359 
360 	if ((fp = getf(namefdp.fd)) == NULL)
361 		return (EBADF);
362 
363 	/*
364 	 * If the mount point already has something mounted
365 	 * on it, disallow this mount.  (This restriction may
366 	 * be removed in a later release).
367 	 * Or unmount has completed but the namefs ROOT vnode
368 	 * count has not decremented to zero, disallow this mount.
369 	 */
370 
371 	mutex_enter(&mvp->v_lock);
372 	if ((mvp->v_flag & VROOT) ||
373 	    vfs_matchops(mvp->v_vfsp, namefs_vfsops)) {
374 		mutex_exit(&mvp->v_lock);
375 		releasef(namefdp.fd);
376 		return (EBUSY);
377 	}
378 	mutex_exit(&mvp->v_lock);
379 
380 	/*
381 	 * Cannot allow users to fattach() in /dev/pts.
382 	 * First, there is no need for doing so and secondly
383 	 * we cannot allow arbitrary users to park on a node in
384 	 * /dev/pts or /dev/vt.
385 	 */
386 	rvp = NULLVP;
387 	if (vn_matchops(mvp, spec_getvnodeops()) &&
388 	    VOP_REALVP(mvp, &rvp, NULL) == 0 && rvp &&
389 	    (vn_matchops(rvp, devpts_getvnodeops()) ||
390 	    vn_matchops(rvp, devvt_getvnodeops()))) {
391 		releasef(namefdp.fd);
392 		return (ENOTSUP);
393 	}
394 
395 	filevp = fp->f_vnode;
396 	if (filevp->v_type == VDIR || filevp->v_type == VPORT) {
397 		releasef(namefdp.fd);
398 		return (EINVAL);
399 	}
400 
401 	/*
402 	 * If the fd being mounted refers to neither a door nor a stream,
403 	 * make sure the caller is privileged.
404 	 */
405 	if (filevp->v_type != VDOOR && filevp->v_stream == NULL) {
406 		if (secpolicy_fs_mount(crp, filevp, vfsp) != 0) {
407 			/* fd is neither a stream nor a door */
408 			releasef(namefdp.fd);
409 			return (EINVAL);
410 		}
411 	}
412 
413 	/*
414 	 * Make sure the file descriptor is not the root of some
415 	 * file system.
416 	 * If it's not, create a reference and allocate a namenode
417 	 * to represent this mount request.
418 	 */
419 	if (filevp->v_flag & VROOT) {
420 		releasef(namefdp.fd);
421 		return (EBUSY);
422 	}
423 
424 	nodep = kmem_zalloc(sizeof (struct namenode), KM_SLEEP);
425 
426 	mutex_init(&nodep->nm_lock, NULL, MUTEX_DEFAULT, NULL);
427 	vattrp = &nodep->nm_vattr;
428 	vattrp->va_mask = AT_ALL;
429 	if (error = VOP_GETATTR(mvp, vattrp, 0, crp, NULL))
430 		goto out;
431 
432 	filevattr.va_mask = AT_ALL;
433 	if (error = VOP_GETATTR(filevp, &filevattr, 0, crp, NULL))
434 		goto out;
435 	/*
436 	 * Make sure the user is the owner of the mount point
437 	 * or has sufficient privileges.
438 	 */
439 	if (error = secpolicy_vnode_owner(crp, vattrp->va_uid))
440 		goto out;
441 
442 	/*
443 	 * Make sure the user has write permissions on the
444 	 * mount point (or has sufficient privileges).
445 	 */
446 	if (secpolicy_vnode_access2(crp, mvp, vattrp->va_uid, vattrp->va_mode,
447 	    VWRITE) != 0) {
448 		error = EACCES;
449 		goto out;
450 	}
451 
452 	/*
453 	 * If the file descriptor has file/record locking, don't
454 	 * allow the mount to succeed.
455 	 */
456 	if (vn_has_flocks(filevp)) {
457 		error = EACCES;
458 		goto out;
459 	}
460 
461 	/*
462 	 * Initialize the namenode.
463 	 */
464 	if (filevp->v_stream) {
465 		struct stdata *stp = filevp->v_stream;
466 		mutex_enter(&stp->sd_lock);
467 		stp->sd_flag |= STRMOUNT;
468 		mutex_exit(&stp->sd_lock);
469 	}
470 	nodep->nm_filevp = filevp;
471 	mutex_enter(&fp->f_tlock);
472 	fp->f_count++;
473 	mutex_exit(&fp->f_tlock);
474 
475 	releasef(namefdp.fd);
476 	nodep->nm_filep = fp;
477 	nodep->nm_mountpt = mvp;
478 
479 	/*
480 	 * The attributes for the mounted file descriptor were initialized
481 	 * above by applying VOP_GETATTR to the mount point.  Some of
482 	 * the fields of the attributes structure will be overwritten
483 	 * by the attributes from the file descriptor.
484 	 */
485 	vattrp->va_type    = filevattr.va_type;
486 	vattrp->va_fsid    = namedev;
487 	vattrp->va_nodeid  = namenodeno_alloc();
488 	vattrp->va_nlink   = 1;
489 	vattrp->va_size    = filevattr.va_size;
490 	vattrp->va_rdev    = filevattr.va_rdev;
491 	vattrp->va_blksize = filevattr.va_blksize;
492 	vattrp->va_nblocks = filevattr.va_nblocks;
493 	vattrp->va_seq	   = 0;
494 
495 	/*
496 	 * Initialize new vnode structure for the mounted file descriptor.
497 	 */
498 	nodep->nm_vnode = vn_alloc(KM_SLEEP);
499 	newvp = NMTOV(nodep);
500 
501 	newvp->v_flag = filevp->v_flag | VROOT | VNOMAP | VNOSWAP;
502 	vn_setops(newvp, nm_vnodeops);
503 	newvp->v_vfsp = vfsp;
504 	newvp->v_stream = filevp->v_stream;
505 	newvp->v_type = filevp->v_type;
506 	newvp->v_rdev = filevp->v_rdev;
507 	newvp->v_data = (caddr_t)nodep;
508 	VFS_HOLD(vfsp);
509 	vn_copypath(mvp, newvp);
510 	vn_exists(newvp);
511 
512 	/*
513 	 * Initialize the vfs structure.
514 	 */
515 	vfsp->vfs_vnodecovered = NULL;
516 	vfsp->vfs_flag |= VFS_UNLINKABLE;
517 	vfsp->vfs_bsize = 1024;
518 	vfsp->vfs_fstype = namefstype;
519 	vfs_make_fsid(&vfsp->vfs_fsid, namedev, namefstype);
520 	vfsp->vfs_data = (caddr_t)nodep;
521 	vfsp->vfs_dev = namedev;
522 	vfsp->vfs_bcount = 0;
523 
524 	/*
525 	 * Set the name we mounted from.
526 	 */
527 	switch (filevp->v_type) {
528 	case VPROC:	/* VOP_GETATTR() translates this to VREG */
529 	case VREG:	resource_nodetype = "file"; break;
530 	case VDIR:	resource_nodetype = "directory"; break;
531 	case VBLK:	resource_nodetype = "device"; break;
532 	case VCHR:	resource_nodetype = "device"; break;
533 	case VLNK:	resource_nodetype = "link"; break;
534 	case VFIFO:	resource_nodetype = "fifo"; break;
535 	case VDOOR:	resource_nodetype = "door"; break;
536 	case VSOCK:	resource_nodetype = "socket"; break;
537 	default:	resource_nodetype = "resource"; break;
538 	}
539 
540 #define	RESOURCE_NAME_SZ 128 /* Maximum length of the resource name */
541 	resource_name = kmem_alloc(RESOURCE_NAME_SZ, KM_SLEEP);
542 	svfsp = kmem_alloc(sizeof (statvfs64_t), KM_SLEEP);
543 
544 	error = VFS_STATVFS(filevp->v_vfsp, svfsp);
545 	if (error == 0) {
546 		(void) snprintf(resource_name, RESOURCE_NAME_SZ,
547 		    "unspecified_%s_%s", svfsp->f_basetype, resource_nodetype);
548 	} else {
549 		(void) snprintf(resource_name, RESOURCE_NAME_SZ,
550 		    "unspecified_%s", resource_nodetype);
551 	}
552 
553 	vfs_setresource(vfsp, resource_name, 0);
554 
555 	kmem_free(svfsp, sizeof (statvfs64_t));
556 	kmem_free(resource_name, RESOURCE_NAME_SZ);
557 #undef RESOURCE_NAME_SZ
558 
559 	/*
560 	 * Insert the namenode.
561 	 */
562 	mutex_enter(&ntable_lock);
563 	nameinsert(nodep);
564 	mutex_exit(&ntable_lock);
565 	return (0);
566 out:
567 	releasef(namefdp.fd);
568 	kmem_free(nodep, sizeof (struct namenode));
569 	return (error);
570 }
571 
572 /*
573  * Unmount a file descriptor from a node in the file system.
574  * If the user is not the owner of the file and is not privileged,
575  * the request is denied.
576  * Otherwise, remove the namenode from the hash list.
577  * If the mounted file descriptor was that of a stream and this
578  * was the last mount of the stream, turn off the STRMOUNT flag.
579  * If the rootvp is referenced other than through the mount,
580  * nm_inactive will clean up.
581  */
582 static int
583 nm_unmount(vfs_t *vfsp, int flag, cred_t *crp)
584 {
585 	struct namenode *nodep = (struct namenode *)vfsp->vfs_data;
586 	vnode_t *vp, *thisvp;
587 	struct file *fp = NULL;
588 
589 	ASSERT((nodep->nm_flag & NMNMNT) == 0);
590 
591 	/*
592 	 * forced unmount is not supported by this file system
593 	 * and thus, ENOTSUP, is being returned.
594 	 */
595 	if (flag & MS_FORCE) {
596 		return (ENOTSUP);
597 	}
598 
599 	vp = nodep->nm_filevp;
600 	mutex_enter(&nodep->nm_lock);
601 	if (secpolicy_vnode_owner(crp, nodep->nm_vattr.va_uid) != 0) {
602 		mutex_exit(&nodep->nm_lock);
603 		return (EPERM);
604 	}
605 
606 	mutex_exit(&nodep->nm_lock);
607 
608 	mutex_enter(&ntable_lock);
609 	nameremove(nodep);
610 	thisvp = NMTOV(nodep);
611 	mutex_enter(&thisvp->v_lock);
612 	VN_RELE_LOCKED(thisvp);
613 	if (thisvp->v_count == 0) {
614 		fp = nodep->nm_filep;
615 		mutex_exit(&thisvp->v_lock);
616 		vn_invalid(thisvp);
617 		vn_free(thisvp);
618 		VFS_RELE(vfsp);
619 		namenodeno_free(nodep->nm_vattr.va_nodeid);
620 		kmem_free(nodep, sizeof (struct namenode));
621 	} else {
622 		thisvp->v_flag &= ~VROOT;
623 		mutex_exit(&thisvp->v_lock);
624 	}
625 	if (namefind(vp, NULLVP) == NULL && vp->v_stream) {
626 		struct stdata *stp = vp->v_stream;
627 		mutex_enter(&stp->sd_lock);
628 		stp->sd_flag &= ~STRMOUNT;
629 		mutex_exit(&stp->sd_lock);
630 	}
631 	mutex_exit(&ntable_lock);
632 	if (fp != NULL)
633 		(void) closef(fp);
634 	return (0);
635 }
636 
637 /*
638  * Create a reference to the root of a mounted file descriptor.
639  * This routine is called from lookupname() in the event a path
640  * is being searched that has a mounted file descriptor in it.
641  */
642 static int
643 nm_root(vfs_t *vfsp, vnode_t **vpp)
644 {
645 	struct namenode *nodep = (struct namenode *)vfsp->vfs_data;
646 	struct vnode *vp = NMTOV(nodep);
647 
648 	VN_HOLD(vp);
649 	*vpp = vp;
650 	return (0);
651 }
652 
653 /*
654  * Return in sp the status of this file system.
655  */
656 static int
657 nm_statvfs(vfs_t *vfsp, struct statvfs64 *sp)
658 {
659 	dev32_t d32;
660 
661 	bzero(sp, sizeof (*sp));
662 	sp->f_bsize	= 1024;
663 	sp->f_frsize	= 1024;
664 	(void) cmpldev(&d32, vfsp->vfs_dev);
665 	sp->f_fsid = d32;
666 	(void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
667 	sp->f_flag	= vf_to_stf(vfsp->vfs_flag);
668 	return (0);
669 }
670 
671 /*
672  * Since this file system has no disk blocks of its own, apply
673  * the VOP_FSYNC operation on the mounted file descriptor.
674  */
675 static int
676 nm_sync(vfs_t *vfsp, short flag, cred_t *crp)
677 {
678 	struct namenode *nodep;
679 
680 	if (vfsp == NULL)
681 		return (0);
682 
683 	nodep = (struct namenode *)vfsp->vfs_data;
684 	if (flag & SYNC_CLOSE)
685 		return (nm_umountall(nodep->nm_filevp, crp));
686 
687 	return (VOP_FSYNC(nodep->nm_filevp, FSYNC, crp, NULL));
688 }
689 
690 /*
691  * File system initialization routine. Save the file system type,
692  * establish a file system device number and initialize nm_filevp_hash[].
693  */
694 int
695 nameinit(int fstype, char *name)
696 {
697 	static const fs_operation_def_t nm_vfsops_template[] = {
698 		VFSNAME_MOUNT,		{ .vfs_mount = nm_mount },
699 		VFSNAME_UNMOUNT,	{ .vfs_unmount = nm_unmount },
700 		VFSNAME_ROOT,		{ .vfs_root = nm_root },
701 		VFSNAME_STATVFS,	{ .vfs_statvfs = nm_statvfs },
702 		VFSNAME_SYNC,		{ .vfs_sync = nm_sync },
703 		NULL,			NULL
704 	};
705 	static const fs_operation_def_t nm_dummy_vfsops_template[] = {
706 		VFSNAME_STATVFS,	{ .vfs_statvfs = nm_statvfs },
707 		VFSNAME_SYNC,		{ .vfs_sync = nm_sync },
708 		NULL,			NULL
709 	};
710 	int error;
711 	int dev;
712 	vfsops_t *dummy_vfsops;
713 
714 	error = vfs_setfsops(fstype, nm_vfsops_template, &namefs_vfsops);
715 	if (error != 0) {
716 		cmn_err(CE_WARN, "nameinit: bad vfs ops template");
717 		return (error);
718 	}
719 
720 	error = vfs_makefsops(nm_dummy_vfsops_template, &dummy_vfsops);
721 	if (error != 0) {
722 		(void) vfs_freevfsops_by_type(fstype);
723 		cmn_err(CE_WARN, "nameinit: bad dummy vfs ops template");
724 		return (error);
725 	}
726 
727 	error = vn_make_ops(name, nm_vnodeops_template, &nm_vnodeops);
728 	if (error != 0) {
729 		(void) vfs_freevfsops_by_type(fstype);
730 		vfs_freevfsops(dummy_vfsops);
731 		cmn_err(CE_WARN, "nameinit: bad vnode ops template");
732 		return (error);
733 	}
734 
735 	namefstype = fstype;
736 
737 	if ((dev = getudev()) == (major_t)-1) {
738 		cmn_err(CE_WARN, "nameinit: can't get unique device");
739 		dev = 0;
740 	}
741 	mutex_init(&ntable_lock, NULL, MUTEX_DEFAULT, NULL);
742 	namedev = makedevice(dev, 0);
743 	bzero(nm_filevp_hash, sizeof (nm_filevp_hash));
744 	vfs_setops(&namevfs, dummy_vfsops);
745 	namevfs.vfs_vnodecovered = NULL;
746 	namevfs.vfs_bsize = 1024;
747 	namevfs.vfs_fstype = namefstype;
748 	vfs_make_fsid(&namevfs.vfs_fsid, namedev, namefstype);
749 	namevfs.vfs_dev = namedev;
750 	return (0);
751 }
752 
753 static mntopts_t nm_mntopts = {
754 	.mo_count = 0,
755 	.mo_list = NULL
756 };
757 
758 static vfsdef_t vfw = {
759 	VFSDEF_VERSION,
760 	"namefs",
761 	nameinit,
762 	VSW_HASPROTO | VSW_ZMOUNT,
763 	&nm_mntopts
764 };
765 
766 /*
767  * Module linkage information for the kernel.
768  */
769 static struct modlfs modlfs = {
770 	&mod_fsops, "filesystem for namefs", &vfw
771 };
772 
773 static struct modlinkage modlinkage = {
774 	MODREV_1, (void *)&modlfs, NULL
775 };
776 
777 int
778 _init(void)
779 {
780 	namenodeno_init();
781 	return (mod_install(&modlinkage));
782 }
783 
784 int
785 _fini(void)
786 {
787 	return (EBUSY);
788 }
789 
790 int
791 _info(struct modinfo *modinfop)
792 {
793 	return (mod_info(&modlinkage, modinfop));
794 }
795