xref: /illumos-gate/usr/src/uts/common/fs/namefs/namevfs.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2017 by Delphix. All rights reserved.
24  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
25  * Copyright 2024 Oxide Computer Company
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved	*/
30 
31 
32 /*
33  * This file supports the vfs operations for the NAMEFS file system.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/debug.h>
40 #include <sys/errno.h>
41 #include <sys/kmem.h>
42 #include <sys/inline.h>
43 #include <sys/file.h>
44 #include <sys/proc.h>
45 #include <sys/stat.h>
46 #include <sys/statvfs.h>
47 #include <sys/mount.h>
48 #include <sys/sysmacros.h>
49 #include <sys/var.h>
50 #include <sys/vfs.h>
51 #include <sys/vfs_opreg.h>
52 #include <sys/vnode.h>
53 #include <sys/mode.h>
54 #include <sys/pcb.h>
55 #include <sys/signal.h>
56 #include <sys/user.h>
57 #include <sys/uio.h>
58 #include <sys/cred.h>
59 #include <sys/fs/namenode.h>
60 #include <sys/stream.h>
61 #include <sys/strsubr.h>
62 #include <sys/cmn_err.h>
63 #include <sys/modctl.h>
64 #include <fs/fs_subr.h>
65 #include <sys/policy.h>
66 #include <sys/vmem.h>
67 #include <sys/fs/sdev_impl.h>
68 
69 #define	NM_INOQUANT		(64 * 1024)
70 
71 /*
72  * Define global data structures.
73  */
74 dev_t	namedev;
75 int	namefstype;
76 struct	namenode *nm_filevp_hash[NM_FILEVP_HASH_SIZE];
77 struct	vfs namevfs;
78 kmutex_t ntable_lock;
79 
80 static vmem_t	*nm_inoarena;	/* vmem arena to allocate inode no's from */
81 static kmutex_t	nm_inolock;
82 
83 vfsops_t *namefs_vfsops;
84 /*
85  * Functions to allocate node id's starting from 1. Based on vmem routines.
86  * The vmem arena is extended in NM_INOQUANT chunks.
87  */
88 uint64_t
89 namenodeno_alloc(void)
90 {
91 	uint64_t nno;
92 
93 	mutex_enter(&nm_inolock);
94 	nno = (uint64_t)(uintptr_t)
95 	    vmem_alloc(nm_inoarena, 1, VM_NOSLEEP + VM_FIRSTFIT);
96 	if (nno == 0) {
97 		(void) vmem_add(nm_inoarena, (void *)(vmem_size(nm_inoarena,
98 		    VMEM_ALLOC | VMEM_FREE) + 1), NM_INOQUANT, VM_SLEEP);
99 		nno = (uint64_t)(uintptr_t)
100 		    vmem_alloc(nm_inoarena, 1, VM_SLEEP + VM_FIRSTFIT);
101 		ASSERT(nno != 0);
102 	}
103 	mutex_exit(&nm_inolock);
104 	ASSERT32(nno <= ULONG_MAX);
105 	return (nno);
106 }
107 
108 static void
109 namenodeno_init(void)
110 {
111 	nm_inoarena = vmem_create("namefs_inodes", (void *)1, NM_INOQUANT, 1,
112 	    NULL, NULL, NULL, 1, VM_SLEEP);
113 	mutex_init(&nm_inolock, NULL, MUTEX_DEFAULT, NULL);
114 }
115 
116 void
117 namenodeno_free(uint64_t nn)
118 {
119 	void *vaddr = (void *)(uintptr_t)nn;
120 
121 	ASSERT32((uint64_t)(uintptr_t)vaddr == nn);
122 
123 	mutex_enter(&nm_inolock);
124 	vmem_free(nm_inoarena, vaddr, 1);
125 	mutex_exit(&nm_inolock);
126 }
127 
128 /*
129  * Insert a namenode into the nm_filevp_hash table.
130  *
131  * Each link has a unique namenode with a unique nm_mountvp field.
132  * The nm_filevp field of the namenode need not be unique, since a
133  * file descriptor may be mounted to multiple nodes at the same time.
134  * We hash on nm_filevp since that's what discriminates the searches
135  * in namefind() and nm_unmountall().
136  */
137 void
138 nameinsert(struct namenode *nodep)
139 {
140 	struct namenode **bucket;
141 
142 	ASSERT(MUTEX_HELD(&ntable_lock));
143 
144 	bucket = NM_FILEVP_HASH(nodep->nm_filevp);
145 	nodep->nm_nextp = *bucket;
146 	*bucket = nodep;
147 }
148 
149 /*
150  * Remove a namenode from the hash table, if present.
151  */
152 void
153 nameremove(struct namenode *nodep)
154 {
155 	struct namenode *np, **npp;
156 
157 	ASSERT(MUTEX_HELD(&ntable_lock));
158 
159 	for (npp = NM_FILEVP_HASH(nodep->nm_filevp); (np = *npp) != NULL;
160 	    npp = &np->nm_nextp) {
161 		if (np == nodep) {
162 			*npp = np->nm_nextp;
163 			return;
164 		}
165 	}
166 }
167 
168 /*
169  * Search for a namenode that has a nm_filevp == vp and nm_mountpt == mnt.
170  * If mnt is NULL, return the first link with nm_filevp of vp.
171  * Returns namenode pointer on success, NULL on failure.
172  */
173 struct namenode *
174 namefind(vnode_t *vp, vnode_t *mnt)
175 {
176 	struct namenode *np;
177 
178 	ASSERT(MUTEX_HELD(&ntable_lock));
179 	for (np = *NM_FILEVP_HASH(vp); np != NULL; np = np->nm_nextp)
180 		if (np->nm_filevp == vp &&
181 		    (mnt == NULL || np->nm_mountpt == mnt))
182 			break;
183 	return (np);
184 }
185 
186 /*
187  * For each namenode that has nm_filevp == vp, call the provided function
188  * with the namenode as an argument. This finds all of the namefs entries
189  * which are mounted on vp; note that there can be more than one.
190  */
191 int
192 nm_walk_mounts(const vnode_t *vp, nm_walk_mounts_f *func, cred_t *cr, void *arg)
193 {
194 	struct namenode *np;
195 	int ret = 0;
196 
197 	mutex_enter(&ntable_lock);
198 
199 	for (np = *NM_FILEVP_HASH(vp); np != NULL; np = np->nm_nextp) {
200 		if (np->nm_filevp == vp) {
201 			if ((ret = func(np, cr, arg)) != 0)
202 				break;
203 		}
204 	}
205 
206 	mutex_exit(&ntable_lock);
207 
208 	return (ret);
209 }
210 
211 /*
212  * Force the unmouting of a file descriptor from ALL of the nodes
213  * that it was mounted to.
214  * At the present time, the only usage for this routine is in the
215  * event one end of a pipe was mounted. At the time the unmounted
216  * end gets closed down, the mounted end is forced to be unmounted.
217  *
218  * This routine searches the namenode hash list for all namenodes
219  * that have a nm_filevp field equal to vp. Each time one is found,
220  * the dounmount() routine is called. This causes the nm_unmount()
221  * routine to be called and thus, the file descriptor is unmounted
222  * from the node.
223  *
224  * At the start of this routine, the reference count for vp is
225  * incremented to protect the vnode from being released in the
226  * event the mount was the only thing keeping the vnode active.
227  * If that is the case, the VOP_CLOSE operation is applied to
228  * the vnode, prior to it being released.
229  */
230 static int
231 nm_umountall(vnode_t *vp, cred_t *crp)
232 {
233 	vfs_t *vfsp;
234 	struct namenode *nodep;
235 	int error = 0;
236 	int realerr = 0;
237 
238 	/*
239 	 * For each namenode that is associated with the file:
240 	 * If the v_vfsp field is not namevfs, dounmount it.  Otherwise,
241 	 * it was created in nm_open() and will be released in time.
242 	 * The following loop replicates some code from nm_find.  That
243 	 * routine can't be used as is since the list isn't strictly
244 	 * consumed as it is traversed.
245 	 */
246 	mutex_enter(&ntable_lock);
247 	nodep = *NM_FILEVP_HASH(vp);
248 	while (nodep) {
249 		if (nodep->nm_filevp == vp &&
250 		    (vfsp = NMTOV(nodep)->v_vfsp) != NULL &&
251 		    vfsp != &namevfs && (NMTOV(nodep)->v_flag & VROOT)) {
252 
253 			/*
254 			 * If the vn_vfswlock fails, skip the vfs since
255 			 * somebody else may be unmounting it.
256 			 */
257 			if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
258 				realerr = EBUSY;
259 				nodep = nodep->nm_nextp;
260 				continue;
261 			}
262 
263 			/*
264 			 * Can't hold ntable_lock across call to do_unmount
265 			 * because nm_unmount tries to acquire it.  This means
266 			 * there is a window where another mount of vp can
267 			 * happen so it is possible that after nm_unmountall
268 			 * there are still some mounts.  This situation existed
269 			 * without MT locking because dounmount can sleep
270 			 * so another mount could happen during that time.
271 			 * This situation is unlikely and doesn't really cause
272 			 * any problems.
273 			 */
274 			mutex_exit(&ntable_lock);
275 			if ((error = dounmount(vfsp, 0, crp)) != 0)
276 				realerr = error;
277 			mutex_enter(&ntable_lock);
278 			/*
279 			 * Since we dropped the ntable_lock, we
280 			 * have to start over from the beginning.
281 			 * If for some reasons dounmount() fails,
282 			 * start from beginning means that we will keep on
283 			 * trying unless another thread unmounts it for us.
284 			 */
285 			nodep = *NM_FILEVP_HASH(vp);
286 		} else
287 			nodep = nodep->nm_nextp;
288 	}
289 	mutex_exit(&ntable_lock);
290 	return (realerr);
291 }
292 
293 /*
294  * Force the unmouting of a file descriptor from ALL of the nodes
295  * that it was mounted to.  XXX: fifo_close() calls this routine.
296  *
297  * nm_umountall() may return EBUSY.
298  * nm_unmountall() will keep on trying until it succeeds.
299  */
300 int
301 nm_unmountall(vnode_t *vp, cred_t *crp)
302 {
303 	int error;
304 
305 	/*
306 	 * Nm_umuontall() returns only if it succeeds or
307 	 * return with error EBUSY.  If EBUSY, that means
308 	 * it cannot acquire the lock on the covered vnode,
309 	 * and we will keep on trying.
310 	 */
311 	for (;;) {
312 		error = nm_umountall(vp, crp);
313 		if (error != EBUSY)
314 			break;
315 		delay(1);	/* yield cpu briefly, then try again */
316 	}
317 	return (error);
318 }
319 
320 /*
321  * Mount a file descriptor onto the node in the file system.
322  * Create a new vnode, update the attributes with info from the
323  * file descriptor and the mount point.  The mask, mode, uid, gid,
324  * atime, mtime and ctime are taken from the mountpt.  Link count is
325  * set to one, the file system id is namedev and nodeid is unique
326  * for each mounted object.  Other attributes are taken from mount point.
327  * Make sure user is owner (or root) with write permissions on mount point.
328  * Hash the new vnode and return 0.
329  * Upon entry to this routine, the file descriptor is in the
330  * fd field of a struct namefd.  Copy that structure from user
331  * space and retrieve the file descriptor.
332  */
333 static int
334 nm_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *crp)
335 {
336 	struct namefd namefdp;
337 	struct vnode *filevp;		/* file descriptor vnode */
338 	struct file *fp;
339 	struct vnode *newvp;		/* vnode representing this mount */
340 	struct vnode *rvp;		/* realvp (if any) for the mountpt */
341 	struct namenode *nodep;		/* namenode for this mount */
342 	struct vattr filevattr;		/* attributes of file dec.  */
343 	struct vattr *vattrp;		/* attributes of this mount */
344 	char *resource_name;
345 	char *resource_nodetype;
346 	statvfs64_t *svfsp;
347 	int error = 0;
348 
349 	/*
350 	 * Get the file descriptor from user space.
351 	 * Make sure the file descriptor is valid and has an
352 	 * associated file pointer.
353 	 * If so, extract the vnode from the file pointer.
354 	 */
355 	if (uap->datalen != sizeof (struct namefd))
356 		return (EINVAL);
357 
358 	if (copyin(uap->dataptr, &namefdp, uap->datalen))
359 		return (EFAULT);
360 
361 	if ((fp = getf(namefdp.fd)) == NULL)
362 		return (EBADF);
363 
364 	/*
365 	 * If the mount point already has something mounted
366 	 * on it, disallow this mount.  (This restriction may
367 	 * be removed in a later release).
368 	 * Or unmount has completed but the namefs ROOT vnode
369 	 * count has not decremented to zero, disallow this mount.
370 	 */
371 
372 	mutex_enter(&mvp->v_lock);
373 	if ((mvp->v_flag & VROOT) ||
374 	    vfs_matchops(mvp->v_vfsp, namefs_vfsops)) {
375 		mutex_exit(&mvp->v_lock);
376 		releasef(namefdp.fd);
377 		return (EBUSY);
378 	}
379 	mutex_exit(&mvp->v_lock);
380 
381 	/*
382 	 * Cannot allow users to fattach() in /dev/pts.
383 	 * First, there is no need for doing so and secondly
384 	 * we cannot allow arbitrary users to park on a node in
385 	 * /dev/pts or /dev/vt.
386 	 */
387 	rvp = NULLVP;
388 	if (vn_matchops(mvp, spec_getvnodeops()) &&
389 	    VOP_REALVP(mvp, &rvp, NULL) == 0 && rvp &&
390 	    (vn_matchops(rvp, devpts_getvnodeops()) ||
391 	    vn_matchops(rvp, devvt_getvnodeops()))) {
392 		releasef(namefdp.fd);
393 		return (ENOTSUP);
394 	}
395 
396 	filevp = fp->f_vnode;
397 	if (filevp->v_type == VDIR || filevp->v_type == VPORT) {
398 		releasef(namefdp.fd);
399 		return (EINVAL);
400 	}
401 
402 	/*
403 	 * If the fd being mounted refers to neither a door nor a stream,
404 	 * make sure the caller is privileged.
405 	 */
406 	if (filevp->v_type != VDOOR && filevp->v_stream == NULL) {
407 		if (secpolicy_fs_mount(crp, filevp, vfsp) != 0) {
408 			/* fd is neither a stream nor a door */
409 			releasef(namefdp.fd);
410 			return (EINVAL);
411 		}
412 	}
413 
414 	/*
415 	 * Make sure the file descriptor is not the root of some
416 	 * file system.
417 	 * If it's not, create a reference and allocate a namenode
418 	 * to represent this mount request.
419 	 */
420 	if (filevp->v_flag & VROOT) {
421 		releasef(namefdp.fd);
422 		return (EBUSY);
423 	}
424 
425 	nodep = kmem_zalloc(sizeof (struct namenode), KM_SLEEP);
426 
427 	mutex_init(&nodep->nm_lock, NULL, MUTEX_DEFAULT, NULL);
428 	vattrp = &nodep->nm_vattr;
429 	vattrp->va_mask = AT_ALL;
430 	if (error = VOP_GETATTR(mvp, vattrp, 0, crp, NULL))
431 		goto out;
432 
433 	filevattr.va_mask = AT_ALL;
434 	if (error = VOP_GETATTR(filevp, &filevattr, 0, crp, NULL))
435 		goto out;
436 	/*
437 	 * Make sure the user is the owner of the mount point
438 	 * or has sufficient privileges.
439 	 */
440 	if (error = secpolicy_vnode_owner(crp, vattrp->va_uid))
441 		goto out;
442 
443 	/*
444 	 * Make sure the user has write permissions on the
445 	 * mount point (or has sufficient privileges).
446 	 */
447 	if (secpolicy_vnode_access2(crp, mvp, vattrp->va_uid, vattrp->va_mode,
448 	    VWRITE) != 0) {
449 		error = EACCES;
450 		goto out;
451 	}
452 
453 	/*
454 	 * If the file descriptor has file/record locking, don't
455 	 * allow the mount to succeed.
456 	 */
457 	if (vn_has_flocks(filevp)) {
458 		error = EACCES;
459 		goto out;
460 	}
461 
462 	/*
463 	 * Initialize the namenode.
464 	 */
465 	if (filevp->v_stream) {
466 		struct stdata *stp = filevp->v_stream;
467 		mutex_enter(&stp->sd_lock);
468 		stp->sd_flag |= STRMOUNT;
469 		mutex_exit(&stp->sd_lock);
470 	}
471 	nodep->nm_filevp = filevp;
472 	mutex_enter(&fp->f_tlock);
473 	fp->f_count++;
474 	mutex_exit(&fp->f_tlock);
475 
476 	releasef(namefdp.fd);
477 	nodep->nm_filep = fp;
478 	nodep->nm_mountpt = mvp;
479 
480 	/*
481 	 * The attributes for the mounted file descriptor were initialized
482 	 * above by applying VOP_GETATTR to the mount point.  Some of
483 	 * the fields of the attributes structure will be overwritten
484 	 * by the attributes from the file descriptor.
485 	 */
486 	vattrp->va_type    = filevattr.va_type;
487 	vattrp->va_fsid    = namedev;
488 	vattrp->va_nodeid  = namenodeno_alloc();
489 	vattrp->va_nlink   = 1;
490 	vattrp->va_size    = filevattr.va_size;
491 	vattrp->va_rdev    = filevattr.va_rdev;
492 	vattrp->va_blksize = filevattr.va_blksize;
493 	vattrp->va_nblocks = filevattr.va_nblocks;
494 	vattrp->va_seq	   = 0;
495 
496 	/*
497 	 * Initialize new vnode structure for the mounted file descriptor.
498 	 */
499 	nodep->nm_vnode = vn_alloc(KM_SLEEP);
500 	newvp = NMTOV(nodep);
501 
502 	newvp->v_flag = filevp->v_flag | VROOT | VNOMAP | VNOSWAP;
503 	vn_setops(newvp, nm_vnodeops);
504 	newvp->v_vfsp = vfsp;
505 	newvp->v_stream = filevp->v_stream;
506 	newvp->v_type = filevp->v_type;
507 	newvp->v_rdev = filevp->v_rdev;
508 	newvp->v_data = (caddr_t)nodep;
509 	VFS_HOLD(vfsp);
510 	vn_copypath(mvp, newvp);
511 	vn_exists(newvp);
512 
513 	/*
514 	 * Initialize the vfs structure.
515 	 */
516 	vfsp->vfs_vnodecovered = NULL;
517 	vfsp->vfs_flag |= VFS_UNLINKABLE;
518 	vfsp->vfs_bsize = 1024;
519 	vfsp->vfs_fstype = namefstype;
520 	vfs_make_fsid(&vfsp->vfs_fsid, namedev, namefstype);
521 	vfsp->vfs_data = (caddr_t)nodep;
522 	vfsp->vfs_dev = namedev;
523 	vfsp->vfs_bcount = 0;
524 
525 	/*
526 	 * Set the name we mounted from.
527 	 */
528 	switch (filevp->v_type) {
529 	case VPROC:	/* VOP_GETATTR() translates this to VREG */
530 	case VREG:	resource_nodetype = "file"; break;
531 	case VDIR:	resource_nodetype = "directory"; break;
532 	case VBLK:	resource_nodetype = "device"; break;
533 	case VCHR:	resource_nodetype = "device"; break;
534 	case VLNK:	resource_nodetype = "link"; break;
535 	case VFIFO:	resource_nodetype = "fifo"; break;
536 	case VDOOR:	resource_nodetype = "door"; break;
537 	case VSOCK:	resource_nodetype = "socket"; break;
538 	default:	resource_nodetype = "resource"; break;
539 	}
540 
541 #define	RESOURCE_NAME_SZ 128 /* Maximum length of the resource name */
542 	resource_name = kmem_alloc(RESOURCE_NAME_SZ, KM_SLEEP);
543 	svfsp = kmem_alloc(sizeof (statvfs64_t), KM_SLEEP);
544 
545 	error = VFS_STATVFS(filevp->v_vfsp, svfsp);
546 	if (error == 0) {
547 		(void) snprintf(resource_name, RESOURCE_NAME_SZ,
548 		    "unspecified_%s_%s", svfsp->f_basetype, resource_nodetype);
549 	} else {
550 		(void) snprintf(resource_name, RESOURCE_NAME_SZ,
551 		    "unspecified_%s", resource_nodetype);
552 	}
553 
554 	vfs_setresource(vfsp, resource_name, 0);
555 
556 	kmem_free(svfsp, sizeof (statvfs64_t));
557 	kmem_free(resource_name, RESOURCE_NAME_SZ);
558 #undef RESOURCE_NAME_SZ
559 
560 	/*
561 	 * Insert the namenode.
562 	 */
563 	mutex_enter(&ntable_lock);
564 	nameinsert(nodep);
565 	mutex_exit(&ntable_lock);
566 	return (0);
567 out:
568 	releasef(namefdp.fd);
569 	kmem_free(nodep, sizeof (struct namenode));
570 	return (error);
571 }
572 
573 /*
574  * Unmount a file descriptor from a node in the file system.
575  * If the user is not the owner of the file and is not privileged,
576  * the request is denied.
577  * Otherwise, remove the namenode from the hash list.
578  * If the mounted file descriptor was that of a stream and this
579  * was the last mount of the stream, turn off the STRMOUNT flag.
580  * If the rootvp is referenced other than through the mount,
581  * nm_inactive will clean up.
582  */
583 static int
584 nm_unmount(vfs_t *vfsp, int flag, cred_t *crp)
585 {
586 	struct namenode *nodep = (struct namenode *)vfsp->vfs_data;
587 	vnode_t *vp, *thisvp;
588 	struct file *fp = NULL;
589 
590 	ASSERT((nodep->nm_flag & NMNMNT) == 0);
591 
592 	/*
593 	 * forced unmount is not supported by this file system
594 	 * and thus, ENOTSUP, is being returned.
595 	 */
596 	if (flag & MS_FORCE) {
597 		return (ENOTSUP);
598 	}
599 
600 	vp = nodep->nm_filevp;
601 	mutex_enter(&nodep->nm_lock);
602 	if (secpolicy_vnode_owner(crp, nodep->nm_vattr.va_uid) != 0) {
603 		mutex_exit(&nodep->nm_lock);
604 		return (EPERM);
605 	}
606 
607 	mutex_exit(&nodep->nm_lock);
608 
609 	mutex_enter(&ntable_lock);
610 	nameremove(nodep);
611 	thisvp = NMTOV(nodep);
612 	mutex_enter(&thisvp->v_lock);
613 	VN_RELE_LOCKED(thisvp);
614 	if (thisvp->v_count == 0) {
615 		fp = nodep->nm_filep;
616 		mutex_exit(&thisvp->v_lock);
617 		vn_invalid(thisvp);
618 		vn_free(thisvp);
619 		VFS_RELE(vfsp);
620 		namenodeno_free(nodep->nm_vattr.va_nodeid);
621 		kmem_free(nodep, sizeof (struct namenode));
622 	} else {
623 		thisvp->v_flag &= ~VROOT;
624 		mutex_exit(&thisvp->v_lock);
625 	}
626 	if (namefind(vp, NULLVP) == NULL && vp->v_stream) {
627 		struct stdata *stp = vp->v_stream;
628 		mutex_enter(&stp->sd_lock);
629 		stp->sd_flag &= ~STRMOUNT;
630 		mutex_exit(&stp->sd_lock);
631 	}
632 	mutex_exit(&ntable_lock);
633 	if (fp != NULL)
634 		(void) closef(fp);
635 	return (0);
636 }
637 
638 /*
639  * Create a reference to the root of a mounted file descriptor.
640  * This routine is called from lookupname() in the event a path
641  * is being searched that has a mounted file descriptor in it.
642  */
643 static int
644 nm_root(vfs_t *vfsp, vnode_t **vpp)
645 {
646 	struct namenode *nodep = (struct namenode *)vfsp->vfs_data;
647 	struct vnode *vp = NMTOV(nodep);
648 
649 	VN_HOLD(vp);
650 	*vpp = vp;
651 	return (0);
652 }
653 
654 /*
655  * Return in sp the status of this file system.
656  */
657 static int
658 nm_statvfs(vfs_t *vfsp, struct statvfs64 *sp)
659 {
660 	dev32_t d32;
661 
662 	bzero(sp, sizeof (*sp));
663 	sp->f_bsize	= 1024;
664 	sp->f_frsize	= 1024;
665 	(void) cmpldev(&d32, vfsp->vfs_dev);
666 	sp->f_fsid = d32;
667 	(void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
668 	sp->f_flag	= vf_to_stf(vfsp->vfs_flag);
669 	return (0);
670 }
671 
672 /*
673  * Since this file system has no disk blocks of its own, apply
674  * the VOP_FSYNC operation on the mounted file descriptor.
675  */
676 static int
677 nm_sync(vfs_t *vfsp, short flag, cred_t *crp)
678 {
679 	struct namenode *nodep;
680 
681 	if (vfsp == NULL)
682 		return (0);
683 
684 	nodep = (struct namenode *)vfsp->vfs_data;
685 	if (flag & SYNC_CLOSE)
686 		return (nm_umountall(nodep->nm_filevp, crp));
687 
688 	return (VOP_FSYNC(nodep->nm_filevp, FSYNC, crp, NULL));
689 }
690 
691 static int
692 nm_syncfs(vfs_t *vfsp, uint64_t flags, cred_t *crp)
693 {
694 	struct namenode *nodep;
695 
696 	if (flags != 0) {
697 		return (ENOTSUP);
698 	}
699 
700 	nodep = (struct namenode *)vfsp->vfs_data;
701 	return (VOP_FSYNC(nodep->nm_filevp, FSYNC, crp, NULL));
702 }
703 
704 /*
705  * File system initialization routine. Save the file system type,
706  * establish a file system device number and initialize nm_filevp_hash[].
707  */
708 int
709 nameinit(int fstype, char *name)
710 {
711 	static const fs_operation_def_t nm_vfsops_template[] = {
712 		VFSNAME_MOUNT,		{ .vfs_mount = nm_mount },
713 		VFSNAME_UNMOUNT,	{ .vfs_unmount = nm_unmount },
714 		VFSNAME_ROOT,		{ .vfs_root = nm_root },
715 		VFSNAME_STATVFS,	{ .vfs_statvfs = nm_statvfs },
716 		VFSNAME_SYNC,		{ .vfs_sync = nm_sync },
717 		VFSNAME_SYNCFS,		{ .vfs_syncfs = nm_syncfs },
718 		NULL,			NULL
719 	};
720 	static const fs_operation_def_t nm_dummy_vfsops_template[] = {
721 		VFSNAME_STATVFS,	{ .vfs_statvfs = nm_statvfs },
722 		VFSNAME_SYNC,		{ .vfs_sync = nm_sync },
723 		NULL,			NULL
724 	};
725 	int error;
726 	int dev;
727 	vfsops_t *dummy_vfsops;
728 
729 	error = vfs_setfsops(fstype, nm_vfsops_template, &namefs_vfsops);
730 	if (error != 0) {
731 		cmn_err(CE_WARN, "nameinit: bad vfs ops template");
732 		return (error);
733 	}
734 
735 	error = vfs_makefsops(nm_dummy_vfsops_template, &dummy_vfsops);
736 	if (error != 0) {
737 		(void) vfs_freevfsops_by_type(fstype);
738 		cmn_err(CE_WARN, "nameinit: bad dummy vfs ops template");
739 		return (error);
740 	}
741 
742 	error = vn_make_ops(name, nm_vnodeops_template, &nm_vnodeops);
743 	if (error != 0) {
744 		(void) vfs_freevfsops_by_type(fstype);
745 		vfs_freevfsops(dummy_vfsops);
746 		cmn_err(CE_WARN, "nameinit: bad vnode ops template");
747 		return (error);
748 	}
749 
750 	namefstype = fstype;
751 
752 	if ((dev = getudev()) == (major_t)-1) {
753 		cmn_err(CE_WARN, "nameinit: can't get unique device");
754 		dev = 0;
755 	}
756 	mutex_init(&ntable_lock, NULL, MUTEX_DEFAULT, NULL);
757 	namedev = makedevice(dev, 0);
758 	bzero(nm_filevp_hash, sizeof (nm_filevp_hash));
759 	vfs_setops(&namevfs, dummy_vfsops);
760 	namevfs.vfs_vnodecovered = NULL;
761 	namevfs.vfs_bsize = 1024;
762 	namevfs.vfs_fstype = namefstype;
763 	vfs_make_fsid(&namevfs.vfs_fsid, namedev, namefstype);
764 	namevfs.vfs_dev = namedev;
765 	return (0);
766 }
767 
768 static mntopts_t nm_mntopts = {
769 	.mo_count = 0,
770 	.mo_list = NULL
771 };
772 
773 static vfsdef_t vfw = {
774 	VFSDEF_VERSION,
775 	"namefs",
776 	nameinit,
777 	VSW_HASPROTO | VSW_ZMOUNT,
778 	&nm_mntopts
779 };
780 
781 /*
782  * Module linkage information for the kernel.
783  */
784 static struct modlfs modlfs = {
785 	&mod_fsops, "filesystem for namefs", &vfw
786 };
787 
788 static struct modlinkage modlinkage = {
789 	MODREV_1, (void *)&modlfs, NULL
790 };
791 
792 int
793 _init(void)
794 {
795 	namenodeno_init();
796 	return (mod_install(&modlinkage));
797 }
798 
799 int
800 _fini(void)
801 {
802 	return (EBUSY);
803 }
804 
805 int
806 _info(struct modinfo *modinfop)
807 {
808 	return (mod_info(&modlinkage, modinfop));
809 }
810