xref: /illumos-gate/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c (revision f20211217f12ce291fd518e61065cd273f23e4ea)
1 /*
2  * Copyright (c) 2000-2001, Boris Popov
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *    This product includes software developed by Boris Popov.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $Id: smbfs_vfsops.c,v 1.73.64.1 2005/05/27 02:35:28 lindak Exp $
33  */
34 
35 /*
36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
37  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
38  * Copyright 2013, Joyent, Inc. All rights reserved.
39  * Copyright (c) 2016 by Delphix. All rights reserved.
40  */
41 
42 #include <sys/systm.h>
43 #include <sys/cred.h>
44 #include <sys/time.h>
45 #include <sys/vfs.h>
46 #include <sys/vnode.h>
47 #include <fs/fs_subr.h>
48 #include <sys/sysmacros.h>
49 #include <sys/kmem.h>
50 #include <sys/mkdev.h>
51 #include <sys/mount.h>
52 #include <sys/statvfs.h>
53 #include <sys/errno.h>
54 #include <sys/debug.h>
55 #include <sys/cmn_err.h>
56 #include <sys/modctl.h>
57 #include <sys/policy.h>
58 #include <sys/atomic.h>
59 #include <sys/zone.h>
60 #include <sys/vfs_opreg.h>
61 #include <sys/mntent.h>
62 #include <sys/priv.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65 #include <inet/ip.h>
66 
67 #include <netsmb/smb_osdep.h>
68 #include <netsmb/smb.h>
69 #include <netsmb/smb_conn.h>
70 #include <netsmb/smb_subr.h>
71 #include <netsmb/smb_dev.h>
72 
73 #include <smbfs/smbfs.h>
74 #include <smbfs/smbfs_node.h>
75 #include <smbfs/smbfs_subr.h>
76 
77 /*
78  * Should smbfs mount enable "-o acl" by default?  There are good
79  * arguments for both.  The most common use case is individual users
80  * accessing files on some SMB server, for which "noacl" is the more
81  * convenient default.  A less common use case is data migration,
82  * where the "acl" option might be a desirable default.  We'll make
83  * the common use case the default.  This default can be changed via
84  * /etc/system, and/or set per-mount via the "acl" mount option.
85  */
86 int smbfs_default_opt_acl = 0;
87 
88 /*
89  * Local functions definitions.
90  */
91 int		smbfsinit(int fstyp, char *name);
92 void		smbfsfini();
93 static int	smbfs_mount_label_policy(vfs_t *, void *, int, cred_t *);
94 
95 /*
96  * SMBFS Mount options table for MS_OPTIONSTR
97  * Note: These are not all the options.
98  * Some options come in via MS_DATA.
99  * Others are generic (see vfs.c)
100  */
101 static char *intr_cancel[] = { MNTOPT_NOINTR, NULL };
102 static char *nointr_cancel[] = { MNTOPT_INTR, NULL };
103 static char *acl_cancel[] = { MNTOPT_NOACL, NULL };
104 static char *noacl_cancel[] = { MNTOPT_ACL, NULL };
105 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
106 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
107 
108 static mntopt_t mntopts[] = {
109 /*
110  *	option name		cancel option	default arg	flags
111  *		ufs arg flag
112  */
113 	{ MNTOPT_INTR,		intr_cancel,	NULL,	MO_DEFAULT, 0 },
114 	{ MNTOPT_NOINTR,	nointr_cancel,	NULL,	0,	0 },
115 	{ MNTOPT_ACL,		acl_cancel,	NULL,	0,	0 },
116 	{ MNTOPT_NOACL,		noacl_cancel,	NULL,	0,	0 },
117 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,	MO_DEFAULT, 0 },
118 	{ MNTOPT_NOXATTR,	noxattr_cancel, NULL,	0,	0 }
119 };
120 
121 static mntopts_t smbfs_mntopts = {
122 	sizeof (mntopts) / sizeof (mntopt_t),
123 	mntopts
124 };
125 
126 static const char fs_type_name[FSTYPSZ] = "smbfs";
127 
128 static vfsdef_t vfw = {
129 	VFSDEF_VERSION,
130 	(char *)fs_type_name,
131 	smbfsinit,		/* init routine */
132 	VSW_HASPROTO|VSW_NOTZONESAFE,	/* flags */
133 	&smbfs_mntopts			/* mount options table prototype */
134 };
135 
136 static struct modlfs modlfs = {
137 	&mod_fsops,
138 	"SMBFS filesystem",
139 	&vfw
140 };
141 
142 static struct modlinkage modlinkage = {
143 	MODREV_1, (void *)&modlfs, NULL
144 };
145 
146 /*
147  * Mutex to protect the following variables:
148  *	  smbfs_major
149  *	  smbfs_minor
150  */
151 extern	kmutex_t	smbfs_minor_lock;
152 extern	int		smbfs_major;
153 extern	int		smbfs_minor;
154 
155 /*
156  * Prevent unloads while we have mounts
157  */
158 uint32_t	smbfs_mountcount;
159 
160 /*
161  * smbfs vfs operations.
162  */
163 static int	smbfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
164 static int	smbfs_unmount(vfs_t *, int, cred_t *);
165 static int	smbfs_root(vfs_t *, vnode_t **);
166 static int	smbfs_statvfs(vfs_t *, statvfs64_t *);
167 static int	smbfs_sync(vfs_t *, short, cred_t *);
168 static void	smbfs_freevfs(vfs_t *);
169 
170 /*
171  * Module loading
172  */
173 
174 /*
175  * This routine is invoked automatically when the kernel module
176  * containing this routine is loaded.  This allows module specific
177  * initialization to be done when the module is loaded.
178  */
179 int
180 _init(void)
181 {
182 	int		error;
183 
184 	/*
185 	 * Check compiled-in version of "nsmb"
186 	 * that we're linked with.  (paranoid)
187 	 */
188 	if (nsmb_version != NSMB_VERSION) {
189 		cmn_err(CE_WARN, "_init: nsmb version mismatch");
190 		return (ENOTTY);
191 	}
192 
193 	smbfs_mountcount = 0;
194 
195 	/*
196 	 * NFS calls these two in _clntinit
197 	 * Easier to follow this way.
198 	 */
199 	if ((error = smbfs_subrinit()) != 0) {
200 		cmn_err(CE_WARN, "_init: smbfs_subrinit failed");
201 		return (error);
202 	}
203 
204 	if ((error = smbfs_vfsinit()) != 0) {
205 		cmn_err(CE_WARN, "_init: smbfs_vfsinit failed");
206 		smbfs_subrfini();
207 		return (error);
208 	}
209 
210 	if ((error = smbfs_clntinit()) != 0) {
211 		cmn_err(CE_WARN, "_init: smbfs_clntinit failed");
212 		smbfs_vfsfini();
213 		smbfs_subrfini();
214 		return (error);
215 	}
216 
217 	error = mod_install((struct modlinkage *)&modlinkage);
218 	return (error);
219 }
220 
221 /*
222  * Free kernel module resources that were allocated in _init
223  * and remove the linkage information into the kernel
224  */
225 int
226 _fini(void)
227 {
228 	int	error;
229 
230 	/*
231 	 * If a forcedly unmounted instance is still hanging around,
232 	 * we cannot allow the module to be unloaded because that would
233 	 * cause panics once the VFS framework decides it's time to call
234 	 * into VFS_FREEVFS().
235 	 */
236 	if (smbfs_mountcount)
237 		return (EBUSY);
238 
239 	error = mod_remove(&modlinkage);
240 	if (error)
241 		return (error);
242 
243 	/*
244 	 * Free the allocated smbnodes, etc.
245 	 */
246 	smbfs_clntfini();
247 
248 	/* NFS calls these two in _clntfini */
249 	smbfs_vfsfini();
250 	smbfs_subrfini();
251 
252 	/*
253 	 * Free the ops vectors
254 	 */
255 	smbfsfini();
256 	return (0);
257 }
258 
259 /*
260  * Return information about the module
261  */
262 int
263 _info(struct modinfo *modinfop)
264 {
265 	return (mod_info((struct modlinkage *)&modlinkage, modinfop));
266 }
267 
268 /*
269  * Initialize the vfs structure
270  */
271 
272 int smbfsfstyp;
273 vfsops_t *smbfs_vfsops = NULL;
274 
275 static const fs_operation_def_t smbfs_vfsops_template[] = {
276 	{ VFSNAME_MOUNT, { .vfs_mount = smbfs_mount } },
277 	{ VFSNAME_UNMOUNT, { .vfs_unmount = smbfs_unmount } },
278 	{ VFSNAME_ROOT,	{ .vfs_root = smbfs_root } },
279 	{ VFSNAME_STATVFS, { .vfs_statvfs = smbfs_statvfs } },
280 	{ VFSNAME_SYNC,	{ .vfs_sync = smbfs_sync } },
281 	{ VFSNAME_VGET,	{ .error = fs_nosys } },
282 	{ VFSNAME_MOUNTROOT, { .error = fs_nosys } },
283 	{ VFSNAME_FREEVFS, { .vfs_freevfs = smbfs_freevfs } },
284 	{ NULL, NULL }
285 };
286 
287 int
288 smbfsinit(int fstyp, char *name)
289 {
290 	int		error;
291 
292 	error = vfs_setfsops(fstyp, smbfs_vfsops_template, &smbfs_vfsops);
293 	if (error != 0) {
294 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
295 		    "smbfsinit: bad vfs ops template");
296 		return (error);
297 	}
298 
299 	error = vn_make_ops(name, smbfs_vnodeops_template, &smbfs_vnodeops);
300 	if (error != 0) {
301 		(void) vfs_freevfsops_by_type(fstyp);
302 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
303 		    "smbfsinit: bad vnode ops template");
304 		return (error);
305 	}
306 
307 	smbfsfstyp = fstyp;
308 
309 	return (0);
310 }
311 
312 void
313 smbfsfini()
314 {
315 	if (smbfs_vfsops) {
316 		(void) vfs_freevfsops_by_type(smbfsfstyp);
317 		smbfs_vfsops = NULL;
318 	}
319 	if (smbfs_vnodeops) {
320 		vn_freevnodeops(smbfs_vnodeops);
321 		smbfs_vnodeops = NULL;
322 	}
323 }
324 
325 void
326 smbfs_free_smi(smbmntinfo_t *smi)
327 {
328 	if (smi == NULL)
329 		return;
330 
331 	if (smi->smi_zone_ref.zref_zone != NULL)
332 		zone_rele_ref(&smi->smi_zone_ref, ZONE_REF_SMBFS);
333 
334 	if (smi->smi_share != NULL)
335 		smb_share_rele(smi->smi_share);
336 
337 	avl_destroy(&smi->smi_hash_avl);
338 	rw_destroy(&smi->smi_hash_lk);
339 	cv_destroy(&smi->smi_statvfs_cv);
340 	mutex_destroy(&smi->smi_lock);
341 
342 	kmem_free(smi, sizeof (smbmntinfo_t));
343 }
344 
345 /*
346  * smbfs mount vfsop
347  * Set up mount info record and attach it to vfs struct.
348  */
349 static int
350 smbfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
351 {
352 	char		*data = uap->dataptr;
353 	int		error;
354 	smbnode_t	*rtnp = NULL;	/* root of this fs */
355 	smbmntinfo_t	*smi = NULL;
356 	dev_t		smbfs_dev;
357 	int		version;
358 	int		devfd;
359 	zone_t		*zone = curproc->p_zone;
360 	zone_t		*mntzone = NULL;
361 	smb_share_t	*ssp = NULL;
362 	smb_cred_t	scred;
363 	int		flags, sec;
364 
365 	STRUCT_DECL(smbfs_args, args);		/* smbfs mount arguments */
366 
367 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
368 		return (error);
369 
370 	if (mvp->v_type != VDIR)
371 		return (ENOTDIR);
372 
373 	/*
374 	 * get arguments
375 	 *
376 	 * uap->datalen might be different from sizeof (args)
377 	 * in a compatible situation.
378 	 */
379 	STRUCT_INIT(args, get_udatamodel());
380 	bzero(STRUCT_BUF(args), SIZEOF_STRUCT(smbfs_args, DATAMODEL_NATIVE));
381 	if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen,
382 	    SIZEOF_STRUCT(smbfs_args, DATAMODEL_NATIVE))))
383 		return (EFAULT);
384 
385 	/*
386 	 * Check mount program version
387 	 */
388 	version = STRUCT_FGET(args, version);
389 	if (version != SMBFS_VERSION) {
390 		cmn_err(CE_WARN, "mount version mismatch:"
391 		    " kernel=%d, mount=%d\n",
392 		    SMBFS_VERSION, version);
393 		return (EINVAL);
394 	}
395 
396 	/*
397 	 * Deal with re-mount requests.
398 	 */
399 	if (uap->flags & MS_REMOUNT) {
400 		cmn_err(CE_WARN, "MS_REMOUNT not implemented");
401 		return (ENOTSUP);
402 	}
403 
404 	/*
405 	 * Check for busy
406 	 */
407 	mutex_enter(&mvp->v_lock);
408 	if (!(uap->flags & MS_OVERLAY) &&
409 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
410 		mutex_exit(&mvp->v_lock);
411 		return (EBUSY);
412 	}
413 	mutex_exit(&mvp->v_lock);
414 
415 	/*
416 	 * Get the "share" from the netsmb driver (ssp).
417 	 * It is returned with a "ref" (hold) for us.
418 	 * Release this hold: at errout below, or in
419 	 * smbfs_freevfs().
420 	 */
421 	devfd = STRUCT_FGET(args, devfd);
422 	error = smb_dev2share(devfd, &ssp);
423 	if (error) {
424 		cmn_err(CE_WARN, "invalid device handle %d (%d)\n",
425 		    devfd, error);
426 		return (error);
427 	}
428 
429 	/*
430 	 * Use "goto errout" from here on.
431 	 * See: ssp, smi, rtnp, mntzone
432 	 */
433 
434 	/*
435 	 * Determine the zone we're being mounted into.
436 	 */
437 	zone_hold(mntzone = zone);		/* start with this assumption */
438 	if (getzoneid() == GLOBAL_ZONEID) {
439 		zone_rele(mntzone);
440 		mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
441 		ASSERT(mntzone != NULL);
442 		if (mntzone != zone) {
443 			error = EBUSY;
444 			goto errout;
445 		}
446 	}
447 
448 	/*
449 	 * Stop the mount from going any further if the zone is going away.
450 	 */
451 	if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
452 		error = EBUSY;
453 		goto errout;
454 	}
455 
456 	/*
457 	 * On a Trusted Extensions client, we may have to force read-only
458 	 * for read-down mounts.
459 	 */
460 	if (is_system_labeled()) {
461 		void *addr;
462 		int ipvers = 0;
463 		struct smb_vc *vcp;
464 
465 		vcp = SSTOVC(ssp);
466 		addr = smb_vc_getipaddr(vcp, &ipvers);
467 		error = smbfs_mount_label_policy(vfsp, addr, ipvers, cr);
468 
469 		if (error > 0)
470 			goto errout;
471 
472 		if (error == -1) {
473 			/* change mount to read-only to prevent write-down */
474 			vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
475 		}
476 	}
477 
478 	/* Prevent unload. */
479 	atomic_inc_32(&smbfs_mountcount);
480 
481 	/*
482 	 * Create a mount record and link it to the vfs struct.
483 	 * No more possiblities for errors from here on.
484 	 * Tear-down of this stuff is in smbfs_free_smi()
485 	 *
486 	 * Compare with NFS: nfsrootvp()
487 	 */
488 	smi = kmem_zalloc(sizeof (*smi), KM_SLEEP);
489 
490 	mutex_init(&smi->smi_lock, NULL, MUTEX_DEFAULT, NULL);
491 	cv_init(&smi->smi_statvfs_cv, NULL, CV_DEFAULT, NULL);
492 
493 	rw_init(&smi->smi_hash_lk, NULL, RW_DEFAULT, NULL);
494 	smbfs_init_hash_avl(&smi->smi_hash_avl);
495 
496 	smi->smi_share = ssp;
497 	ssp = NULL;
498 
499 	/*
500 	 * Convert the anonymous zone hold acquired via zone_hold() above
501 	 * into a zone reference.
502 	 */
503 	zone_init_ref(&smi->smi_zone_ref);
504 	zone_hold_ref(mntzone, &smi->smi_zone_ref, ZONE_REF_SMBFS);
505 	zone_rele(mntzone);
506 	mntzone = NULL;
507 
508 	/*
509 	 * Initialize option defaults
510 	 */
511 	smi->smi_flags	= SMI_LLOCK;
512 	smi->smi_acregmin = SEC2HR(SMBFS_ACREGMIN);
513 	smi->smi_acregmax = SEC2HR(SMBFS_ACREGMAX);
514 	smi->smi_acdirmin = SEC2HR(SMBFS_ACDIRMIN);
515 	smi->smi_acdirmax = SEC2HR(SMBFS_ACDIRMAX);
516 
517 	/*
518 	 * All "generic" mount options have already been
519 	 * handled in vfs.c:domount() - see mntopts stuff.
520 	 * Query generic options using vfs_optionisset().
521 	 * Give ACL an adjustable system-wide default.
522 	 */
523 	if (smbfs_default_opt_acl ||
524 	    vfs_optionisset(vfsp, MNTOPT_ACL, NULL))
525 		smi->smi_flags |= SMI_ACL;
526 	if (vfs_optionisset(vfsp, MNTOPT_NOACL, NULL))
527 		smi->smi_flags &= ~SMI_ACL;
528 	if (vfs_optionisset(vfsp, MNTOPT_INTR, NULL))
529 		smi->smi_flags |= SMI_INT;
530 
531 	/*
532 	 * Get the mount options that come in as smbfs_args,
533 	 * starting with args.flags (SMBFS_MF_xxx)
534 	 */
535 	flags = STRUCT_FGET(args, flags);
536 	smi->smi_uid	= STRUCT_FGET(args, uid);
537 	smi->smi_gid	= STRUCT_FGET(args, gid);
538 	smi->smi_fmode	= STRUCT_FGET(args, file_mode) & 0777;
539 	smi->smi_dmode	= STRUCT_FGET(args, dir_mode) & 0777;
540 
541 	/*
542 	 * Hande the SMBFS_MF_xxx flags.
543 	 */
544 	if (flags & SMBFS_MF_NOAC)
545 		smi->smi_flags |= SMI_NOAC;
546 	if (flags & SMBFS_MF_ACREGMIN) {
547 		sec = STRUCT_FGET(args, acregmin);
548 		if (sec < 0 || sec > SMBFS_ACMINMAX)
549 			sec = SMBFS_ACMINMAX;
550 		smi->smi_acregmin = SEC2HR(sec);
551 	}
552 	if (flags & SMBFS_MF_ACREGMAX) {
553 		sec = STRUCT_FGET(args, acregmax);
554 		if (sec < 0 || sec > SMBFS_ACMAXMAX)
555 			sec = SMBFS_ACMAXMAX;
556 		smi->smi_acregmax = SEC2HR(sec);
557 	}
558 	if (flags & SMBFS_MF_ACDIRMIN) {
559 		sec = STRUCT_FGET(args, acdirmin);
560 		if (sec < 0 || sec > SMBFS_ACMINMAX)
561 			sec = SMBFS_ACMINMAX;
562 		smi->smi_acdirmin = SEC2HR(sec);
563 	}
564 	if (flags & SMBFS_MF_ACDIRMAX) {
565 		sec = STRUCT_FGET(args, acdirmax);
566 		if (sec < 0 || sec > SMBFS_ACMAXMAX)
567 			sec = SMBFS_ACMAXMAX;
568 		smi->smi_acdirmax = SEC2HR(sec);
569 	}
570 
571 	/*
572 	 * Get attributes of the remote file system,
573 	 * i.e. ACL support, named streams, etc.
574 	 */
575 	smb_credinit(&scred, cr);
576 	error = smbfs_smb_qfsattr(smi->smi_share, &smi->smi_fsa, &scred);
577 	smb_credrele(&scred);
578 	if (error) {
579 		SMBVDEBUG("smbfs_smb_qfsattr error %d\n", error);
580 	}
581 
582 	/*
583 	 * We enable XATTR by default (via smbfs_mntopts)
584 	 * but if the share does not support named streams,
585 	 * force the NOXATTR option (also clears XATTR).
586 	 * Caller will set or clear VFS_XATTR after this.
587 	 */
588 	if ((smi->smi_fsattr & FILE_NAMED_STREAMS) == 0)
589 		vfs_setmntopt(vfsp, MNTOPT_NOXATTR, NULL, 0);
590 
591 	/*
592 	 * Ditto ACLs (disable if not supported on this share)
593 	 */
594 	if ((smi->smi_fsattr & FILE_PERSISTENT_ACLS) == 0) {
595 		vfs_setmntopt(vfsp, MNTOPT_NOACL, NULL, 0);
596 		smi->smi_flags &= ~SMI_ACL;
597 	}
598 
599 	/*
600 	 * Assign a unique device id to the mount
601 	 */
602 	mutex_enter(&smbfs_minor_lock);
603 	do {
604 		smbfs_minor = (smbfs_minor + 1) & MAXMIN32;
605 		smbfs_dev = makedevice(smbfs_major, smbfs_minor);
606 	} while (vfs_devismounted(smbfs_dev));
607 	mutex_exit(&smbfs_minor_lock);
608 
609 	vfsp->vfs_dev	= smbfs_dev;
610 	vfs_make_fsid(&vfsp->vfs_fsid, smbfs_dev, smbfsfstyp);
611 	vfsp->vfs_data	= (caddr_t)smi;
612 	vfsp->vfs_fstype = smbfsfstyp;
613 	vfsp->vfs_bsize = MAXBSIZE;
614 	vfsp->vfs_bcount = 0;
615 
616 	smi->smi_vfsp	= vfsp;
617 	smbfs_zonelist_add(smi);	/* undo in smbfs_freevfs */
618 
619 	/* PSARC 2007/227 VFS Feature Registration */
620 	vfs_set_feature(vfsp, VFSFT_XVATTR);
621 	vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
622 
623 	/*
624 	 * Create the root vnode, which we need in unmount
625 	 * for the call to smbfs_check_table(), etc.
626 	 * Release this hold in smbfs_unmount.
627 	 */
628 	rtnp = smbfs_node_findcreate(smi, "\\", 1, NULL, 0, 0,
629 	    &smbfs_fattr0);
630 	ASSERT(rtnp != NULL);
631 	rtnp->r_vnode->v_type = VDIR;
632 	rtnp->r_vnode->v_flag |= VROOT;
633 	smi->smi_root = rtnp;
634 
635 	/*
636 	 * NFS does other stuff here too:
637 	 *   async worker threads
638 	 *   init kstats
639 	 *
640 	 * End of code from NFS nfsrootvp()
641 	 */
642 	return (0);
643 
644 errout:
645 	vfsp->vfs_data = NULL;
646 	if (smi != NULL)
647 		smbfs_free_smi(smi);
648 
649 	if (mntzone != NULL)
650 		zone_rele(mntzone);
651 
652 	if (ssp != NULL)
653 		smb_share_rele(ssp);
654 
655 	return (error);
656 }
657 
658 /*
659  * vfs operations
660  */
661 static int
662 smbfs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
663 {
664 	smbmntinfo_t	*smi;
665 	smbnode_t	*rtnp;
666 
667 	smi = VFTOSMI(vfsp);
668 
669 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
670 		return (EPERM);
671 
672 	if ((flag & MS_FORCE) == 0) {
673 		smbfs_rflush(vfsp, cr);
674 
675 		/*
676 		 * If there are any active vnodes on this file system,
677 		 * (other than the root vnode) then the file system is
678 		 * busy and can't be umounted.
679 		 */
680 		if (smbfs_check_table(vfsp, smi->smi_root))
681 			return (EBUSY);
682 
683 		/*
684 		 * We normally hold a ref to the root vnode, so
685 		 * check for references beyond the one we expect:
686 		 *   smbmntinfo_t -> smi_root
687 		 * Note that NFS does not hold the root vnode.
688 		 */
689 		if (smi->smi_root &&
690 		    smi->smi_root->r_vnode->v_count > 1)
691 			return (EBUSY);
692 	}
693 
694 	/*
695 	 * common code for both forced and non-forced
696 	 *
697 	 * Setting VFS_UNMOUNTED prevents new operations.
698 	 * Operations already underway may continue,
699 	 * but not for long.
700 	 */
701 	vfsp->vfs_flag |= VFS_UNMOUNTED;
702 
703 	/*
704 	 * Shutdown any outstanding I/O requests on this share,
705 	 * and force a tree disconnect.  The share object will
706 	 * continue to hang around until smb_share_rele().
707 	 * This should also cause most active nodes to be
708 	 * released as their operations fail with EIO.
709 	 */
710 	smb_share_kill(smi->smi_share);
711 
712 	/*
713 	 * If we hold the root VP (and we normally do)
714 	 * then it's safe to release it now.
715 	 */
716 	if (smi->smi_root) {
717 		rtnp = smi->smi_root;
718 		smi->smi_root = NULL;
719 		VN_RELE(rtnp->r_vnode);	/* release root vnode */
720 	}
721 
722 	/*
723 	 * Remove all nodes from the node hash tables.
724 	 * This (indirectly) calls: smbfs_addfree, smbinactive,
725 	 * which will try to flush dirty pages, etc. so
726 	 * don't destroy the underlying share just yet.
727 	 *
728 	 * Also, with a forced unmount, some nodes may
729 	 * remain active, and those will get cleaned up
730 	 * after their last vn_rele.
731 	 */
732 	smbfs_destroy_table(vfsp);
733 
734 	/*
735 	 * Delete our kstats...
736 	 *
737 	 * Doing it here, rather than waiting until
738 	 * smbfs_freevfs so these are not visible
739 	 * after the unmount.
740 	 */
741 	if (smi->smi_io_kstats) {
742 		kstat_delete(smi->smi_io_kstats);
743 		smi->smi_io_kstats = NULL;
744 	}
745 	if (smi->smi_ro_kstats) {
746 		kstat_delete(smi->smi_ro_kstats);
747 		smi->smi_ro_kstats = NULL;
748 	}
749 
750 	/*
751 	 * The rest happens in smbfs_freevfs()
752 	 */
753 	return (0);
754 }
755 
756 
757 /*
758  * find root of smbfs
759  */
760 static int
761 smbfs_root(vfs_t *vfsp, vnode_t **vpp)
762 {
763 	smbmntinfo_t	*smi;
764 	vnode_t		*vp;
765 
766 	smi = VFTOSMI(vfsp);
767 
768 	if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
769 		return (EPERM);
770 
771 	if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
772 		return (EIO);
773 
774 	/*
775 	 * The root vp is created in mount and held
776 	 * until unmount, so this is paranoia.
777 	 */
778 	if (smi->smi_root == NULL)
779 		return (EIO);
780 
781 	/* Just take a reference and return it. */
782 	vp = SMBTOV(smi->smi_root);
783 	VN_HOLD(vp);
784 	*vpp = vp;
785 
786 	return (0);
787 }
788 
789 /*
790  * Get file system statistics.
791  */
792 static int
793 smbfs_statvfs(vfs_t *vfsp, statvfs64_t *sbp)
794 {
795 	int		error;
796 	smbmntinfo_t	*smi = VFTOSMI(vfsp);
797 	smb_share_t	*ssp = smi->smi_share;
798 	statvfs64_t	stvfs;
799 	hrtime_t now;
800 	smb_cred_t	scred;
801 
802 	if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
803 		return (EPERM);
804 
805 	if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
806 		return (EIO);
807 
808 	mutex_enter(&smi->smi_lock);
809 
810 	/*
811 	 * Use cached result if still valid.
812 	 */
813 recheck:
814 	now = gethrtime();
815 	if (now < smi->smi_statfstime) {
816 		error = 0;
817 		goto cache_hit;
818 	}
819 
820 	/*
821 	 * FS attributes are stale, so someone
822 	 * needs to do an OTW call to get them.
823 	 * Serialize here so only one thread
824 	 * does the OTW call.
825 	 */
826 	if (smi->smi_status & SM_STATUS_STATFS_BUSY) {
827 		smi->smi_status |= SM_STATUS_STATFS_WANT;
828 		if (!cv_wait_sig(&smi->smi_statvfs_cv, &smi->smi_lock)) {
829 			mutex_exit(&smi->smi_lock);
830 			return (EINTR);
831 		}
832 		/* Hope status is valid now. */
833 		goto recheck;
834 	}
835 	smi->smi_status |= SM_STATUS_STATFS_BUSY;
836 	mutex_exit(&smi->smi_lock);
837 
838 	/*
839 	 * Do the OTW call.  Note: lock NOT held.
840 	 */
841 	smb_credinit(&scred, NULL);
842 	bzero(&stvfs, sizeof (stvfs));
843 	error = smbfs_smb_statfs(ssp, &stvfs, &scred);
844 	smb_credrele(&scred);
845 	if (error) {
846 		SMBVDEBUG("statfs error=%d\n", error);
847 	} else {
848 
849 		/*
850 		 * Set a few things the OTW call didn't get.
851 		 */
852 		stvfs.f_frsize = stvfs.f_bsize;
853 		stvfs.f_favail = stvfs.f_ffree;
854 		stvfs.f_fsid = (unsigned long)vfsp->vfs_fsid.val[0];
855 		bcopy(fs_type_name, stvfs.f_basetype, FSTYPSZ);
856 		stvfs.f_flag	= vf_to_stf(vfsp->vfs_flag);
857 		stvfs.f_namemax	= smi->smi_fsa.fsa_maxname;
858 
859 		/*
860 		 * Save the result, update lifetime
861 		 */
862 		now = gethrtime();
863 		smi->smi_statfstime = now +
864 		    (SM_MAX_STATFSTIME * (hrtime_t)NANOSEC);
865 		smi->smi_statvfsbuf = stvfs; /* struct assign! */
866 	}
867 
868 	mutex_enter(&smi->smi_lock);
869 	if (smi->smi_status & SM_STATUS_STATFS_WANT)
870 		cv_broadcast(&smi->smi_statvfs_cv);
871 	smi->smi_status &= ~(SM_STATUS_STATFS_BUSY | SM_STATUS_STATFS_WANT);
872 
873 	/*
874 	 * Copy the statvfs data to caller's buf.
875 	 * Note: struct assignment
876 	 */
877 cache_hit:
878 	if (error == 0)
879 		*sbp = smi->smi_statvfsbuf;
880 	mutex_exit(&smi->smi_lock);
881 	return (error);
882 }
883 
884 /*
885  * Flush dirty smbfs files for file system vfsp.
886  * If vfsp == NULL, all smbfs files are flushed.
887  */
888 /*ARGSUSED*/
889 static int
890 smbfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
891 {
892 
893 	/*
894 	 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
895 	 * to sync metadata, which they would otherwise cache indefinitely.
896 	 * Semantically, the only requirement is that the sync be initiated.
897 	 * Assume the server-side takes care of attribute sync.
898 	 */
899 	if (flag & SYNC_ATTR)
900 		return (0);
901 
902 	if (vfsp == NULL) {
903 		/*
904 		 * Flush ALL smbfs mounts in this zone.
905 		 */
906 		smbfs_flushall(cr);
907 		return (0);
908 	}
909 
910 	smbfs_rflush(vfsp, cr);
911 
912 	return (0);
913 }
914 
915 /*
916  * Initialization routine for VFS routines.  Should only be called once
917  */
918 int
919 smbfs_vfsinit(void)
920 {
921 	return (0);
922 }
923 
924 /*
925  * Shutdown routine for VFS routines.  Should only be called once
926  */
927 void
928 smbfs_vfsfini(void)
929 {
930 }
931 
932 void
933 smbfs_freevfs(vfs_t *vfsp)
934 {
935 	smbmntinfo_t    *smi;
936 
937 	/* free up the resources */
938 	smi = VFTOSMI(vfsp);
939 
940 	/*
941 	 * By this time we should have already deleted the
942 	 * smi kstats in the unmount code.  If they are still around
943 	 * something is wrong
944 	 */
945 	ASSERT(smi->smi_io_kstats == NULL);
946 
947 	smbfs_zonelist_remove(smi);
948 
949 	smbfs_free_smi(smi);
950 
951 	/*
952 	 * Allow _fini() to succeed now, if so desired.
953 	 */
954 	atomic_dec_32(&smbfs_mountcount);
955 }
956 
957 /*
958  * smbfs_mount_label_policy:
959  *	Determine whether the mount is allowed according to MAC check,
960  *	by comparing (where appropriate) label of the remote server
961  *	against the label of the zone being mounted into.
962  *
963  *	Returns:
964  *		 0 :	access allowed
965  *		-1 :	read-only access allowed (i.e., read-down)
966  *		>0 :	error code, such as EACCES
967  *
968  * NB:
969  * NFS supports Cipso labels by parsing the vfs_resource
970  * to see what the Solaris server global zone has shared.
971  * We can't support that for CIFS since resource names
972  * contain share names, not paths.
973  */
974 static int
975 smbfs_mount_label_policy(vfs_t *vfsp, void *ipaddr, int addr_type, cred_t *cr)
976 {
977 	bslabel_t	*server_sl, *mntlabel;
978 	zone_t		*mntzone = NULL;
979 	ts_label_t	*zlabel;
980 	tsol_tpc_t	*tp;
981 	ts_label_t	*tsl = NULL;
982 	int		retv;
983 
984 	/*
985 	 * Get the zone's label.  Each zone on a labeled system has a label.
986 	 */
987 	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
988 	zlabel = mntzone->zone_slabel;
989 	ASSERT(zlabel != NULL);
990 	label_hold(zlabel);
991 
992 	retv = EACCES;				/* assume the worst */
993 
994 	/*
995 	 * Next, get the assigned label of the remote server.
996 	 */
997 	tp = find_tpc(ipaddr, addr_type, B_FALSE);
998 	if (tp == NULL)
999 		goto out;			/* error getting host entry */
1000 
1001 	if (tp->tpc_tp.tp_doi != zlabel->tsl_doi)
1002 		goto rel_tpc;			/* invalid domain */
1003 	if ((tp->tpc_tp.host_type != UNLABELED))
1004 		goto rel_tpc;			/* invalid hosttype */
1005 
1006 	server_sl = &tp->tpc_tp.tp_def_label;
1007 	mntlabel = label2bslabel(zlabel);
1008 
1009 	/*
1010 	 * Now compare labels to complete the MAC check.  If the labels
1011 	 * are equal or if the requestor is in the global zone and has
1012 	 * NET_MAC_AWARE, then allow read-write access.   (Except for
1013 	 * mounts into the global zone itself; restrict these to
1014 	 * read-only.)
1015 	 *
1016 	 * If the requestor is in some other zone, but their label
1017 	 * dominates the server, then allow read-down.
1018 	 *
1019 	 * Otherwise, access is denied.
1020 	 */
1021 	if (blequal(mntlabel, server_sl) ||
1022 	    (crgetzoneid(cr) == GLOBAL_ZONEID &&
1023 	    getpflags(NET_MAC_AWARE, cr) != 0)) {
1024 		if ((mntzone == global_zone) ||
1025 		    !blequal(mntlabel, server_sl))
1026 			retv = -1;		/* read-only */
1027 		else
1028 			retv = 0;		/* access OK */
1029 	} else if (bldominates(mntlabel, server_sl)) {
1030 		retv = -1;			/* read-only */
1031 	} else {
1032 		retv = EACCES;
1033 	}
1034 
1035 	if (tsl != NULL)
1036 		label_rele(tsl);
1037 
1038 rel_tpc:
1039 	/*LINTED*/
1040 	TPC_RELE(tp);
1041 out:
1042 	if (mntzone)
1043 		zone_rele(mntzone);
1044 	label_rele(zlabel);
1045 	return (retv);
1046 }
1047