xref: /illumos-gate/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c (revision 5328fc53d11d7151861fa272e4fb0248b8f0e145)
1 /*
2  * Copyright (c) 2000-2001, Boris Popov
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *    This product includes software developed by Boris Popov.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $Id: smbfs_vfsops.c,v 1.73.64.1 2005/05/27 02:35:28 lindak Exp $
33  */
34 
35 /*
36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
37  * Copyright 2013, Joyent, Inc. All rights reserved.
38  * Copyright (c) 2016 by Delphix. All rights reserved.
39  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
40  */
41 
42 #include <sys/systm.h>
43 #include <sys/cred.h>
44 #include <sys/time.h>
45 #include <sys/vfs.h>
46 #include <sys/vnode.h>
47 #include <fs/fs_subr.h>
48 #include <sys/sysmacros.h>
49 #include <sys/kmem.h>
50 #include <sys/mkdev.h>
51 #include <sys/mount.h>
52 #include <sys/statvfs.h>
53 #include <sys/errno.h>
54 #include <sys/debug.h>
55 #include <sys/disp.h>
56 #include <sys/cmn_err.h>
57 #include <sys/modctl.h>
58 #include <sys/policy.h>
59 #include <sys/atomic.h>
60 #include <sys/zone.h>
61 #include <sys/vfs_opreg.h>
62 #include <sys/mntent.h>
63 #include <sys/priv.h>
64 #include <sys/taskq.h>
65 #include <sys/tsol/label.h>
66 #include <sys/tsol/tndb.h>
67 #include <inet/ip.h>
68 
69 #include <netsmb/smb_osdep.h>
70 #include <netsmb/smb.h>
71 #include <netsmb/smb_conn.h>
72 #include <netsmb/smb_subr.h>
73 #include <netsmb/smb_dev.h>
74 
75 #include <smbfs/smbfs.h>
76 #include <smbfs/smbfs_node.h>
77 #include <smbfs/smbfs_subr.h>
78 
79 #ifndef	_KERNEL
80 
81 #include <libfksmbfs.h>
82 
83 #define	STRUCT_DECL(s, a) struct s a
84 #define	STRUCT_FGET(handle, field) ((handle).field)
85 #define	_init(v)	fksmbfs_init(v)
86 #define	_fini(v)	fksmbfs_fini(v)
87 
88 #endif	/* !_KERNEL */
89 
90 /*
91  * Should smbfs mount enable "-o acl" by default?  There are good
92  * arguments for both.  The most common use case is individual users
93  * accessing files on some SMB server, for which "noacl" is the more
94  * convenient default.  A less common use case is data migration,
95  * where the "acl" option might be a desirable default.  We'll make
96  * the common use case the default.  This default can be changed via
97  * /etc/system, and/or set per-mount via the "acl" mount option.
98  */
99 int smbfs_default_opt_acl = 0;
100 
101 /*
102  * How many taskq threads per-mount should we use.
103  * Just one is fine (until we do more async work).
104  */
105 int smbfs_tq_nthread = 1;
106 
107 /*
108  * Local functions definitions.
109  */
110 int		smbfsinit(int fstyp, char *name);
111 void		smbfsfini();
112 
113 #ifdef	_KERNEL
114 static int	smbfs_mount_label_policy(vfs_t *, void *, int, cred_t *);
115 #endif	/* _KERNEL */
116 
117 /*
118  * SMBFS Mount options table for MS_OPTIONSTR
119  * Note: These are not all the options.
120  * Some options come in via MS_DATA.
121  * Others are generic (see vfs.c)
122  */
123 static char *intr_cancel[] = { MNTOPT_NOINTR, NULL };
124 static char *nointr_cancel[] = { MNTOPT_INTR, NULL };
125 static char *acl_cancel[] = { MNTOPT_NOACL, NULL };
126 static char *noacl_cancel[] = { MNTOPT_ACL, NULL };
127 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
128 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
129 
130 static mntopt_t mntopts[] = {
131 /*
132  *	option name		cancel option	default arg	flags
133  *		ufs arg flag
134  */
135 	{ MNTOPT_INTR,		intr_cancel,	NULL,	MO_DEFAULT, 0 },
136 	{ MNTOPT_NOINTR,	nointr_cancel,	NULL,	0,	0 },
137 	{ MNTOPT_ACL,		acl_cancel,	NULL,	0,	0 },
138 	{ MNTOPT_NOACL,		noacl_cancel,	NULL,	0,	0 },
139 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,	MO_DEFAULT, 0 },
140 	{ MNTOPT_NOXATTR,	noxattr_cancel, NULL,	0,	0 },
141 #ifndef	_KERNEL
142 	/* See vfs_optionisset MNTOPT_NOAC below. */
143 	{ MNTOPT_NOAC,		NULL,		NULL,	0,	0 },
144 #endif	/* !_KERNEL */
145 };
146 
147 static mntopts_t smbfs_mntopts = {
148 	sizeof (mntopts) / sizeof (mntopt_t),
149 	mntopts
150 };
151 
152 static const char fs_type_name[FSTYPSZ] = "smbfs";
153 
154 static vfsdef_t vfw = {
155 	VFSDEF_VERSION,
156 	(char *)fs_type_name,
157 	smbfsinit,		/* init routine */
158 	VSW_HASPROTO|VSW_NOTZONESAFE,	/* flags */
159 	&smbfs_mntopts			/* mount options table prototype */
160 };
161 
162 #ifdef	_KERNEL
163 static struct modlfs modlfs = {
164 	&mod_fsops,
165 	"SMBFS filesystem",
166 	&vfw
167 };
168 
169 static struct modlinkage modlinkage = {
170 	MODREV_1, (void *)&modlfs, NULL
171 };
172 #endif	/* _KERNEL */
173 
174 /*
175  * Mutex to protect the following variables:
176  *	  smbfs_major
177  *	  smbfs_minor
178  */
179 extern	kmutex_t	smbfs_minor_lock;
180 extern	int		smbfs_major;
181 extern	int		smbfs_minor;
182 
183 /*
184  * Prevent unloads while we have mounts
185  */
186 uint32_t	smbfs_mountcount;
187 
188 /*
189  * smbfs vfs operations.
190  */
191 static int	smbfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
192 static int	smbfs_unmount(vfs_t *, int, cred_t *);
193 static int	smbfs_root(vfs_t *, vnode_t **);
194 static int	smbfs_statvfs(vfs_t *, statvfs64_t *);
195 static int	smbfs_sync(vfs_t *, short, cred_t *);
196 static void	smbfs_freevfs(vfs_t *);
197 
198 /*
199  * Module loading
200  */
201 
202 /*
203  * This routine is invoked automatically when the kernel module
204  * containing this routine is loaded.  This allows module specific
205  * initialization to be done when the module is loaded.
206  */
207 int
208 _init(void)
209 {
210 	int		error;
211 
212 	/*
213 	 * Check compiled-in version of "nsmb"
214 	 * that we're linked with.  (paranoid)
215 	 */
216 	if (nsmb_version != NSMB_VERSION) {
217 		cmn_err(CE_WARN, "_init: nsmb version mismatch");
218 		return (ENOTTY);
219 	}
220 
221 	smbfs_mountcount = 0;
222 
223 	/*
224 	 * NFS calls these two in _clntinit
225 	 * Easier to follow this way.
226 	 */
227 	if ((error = smbfs_subrinit()) != 0) {
228 		cmn_err(CE_WARN, "_init: smbfs_subrinit failed");
229 		return (error);
230 	}
231 
232 	if ((error = smbfs_vfsinit()) != 0) {
233 		cmn_err(CE_WARN, "_init: smbfs_vfsinit failed");
234 		smbfs_subrfini();
235 		return (error);
236 	}
237 
238 	if ((error = smbfs_clntinit()) != 0) {
239 		cmn_err(CE_WARN, "_init: smbfs_clntinit failed");
240 		smbfs_vfsfini();
241 		smbfs_subrfini();
242 		return (error);
243 	}
244 
245 #ifdef	_KERNEL
246 	error = mod_install((struct modlinkage *)&modlinkage);
247 #else	/* _KERNEL */
248 	error = fake_installfs(&vfw);
249 #endif	/* _KERNEL */
250 
251 	return (error);
252 }
253 
254 /*
255  * Free kernel module resources that were allocated in _init
256  * and remove the linkage information into the kernel
257  */
258 int
259 _fini(void)
260 {
261 	int	error;
262 
263 	/*
264 	 * If a forcedly unmounted instance is still hanging around,
265 	 * we cannot allow the module to be unloaded because that would
266 	 * cause panics once the VFS framework decides it's time to call
267 	 * into VFS_FREEVFS().
268 	 */
269 	if (smbfs_mountcount)
270 		return (EBUSY);
271 
272 #ifdef	_KERNEL
273 	error = mod_remove(&modlinkage);
274 #else	/* _KERNEL */
275 	error = fake_removefs(&vfw);
276 #endif	/* _KERNEL */
277 	if (error)
278 		return (error);
279 
280 	/*
281 	 * Free the allocated smbnodes, etc.
282 	 */
283 	smbfs_clntfini();
284 
285 	/* NFS calls these two in _clntfini */
286 	smbfs_vfsfini();
287 	smbfs_subrfini();
288 
289 	/*
290 	 * Free the ops vectors
291 	 */
292 	smbfsfini();
293 	return (0);
294 }
295 
296 /*
297  * Return information about the module
298  */
299 #ifdef	_KERNEL
300 int
301 _info(struct modinfo *modinfop)
302 {
303 	return (mod_info((struct modlinkage *)&modlinkage, modinfop));
304 }
305 #endif	/* _KERNEL */
306 
307 /*
308  * Initialize the vfs structure
309  */
310 
311 int smbfs_fstyp;
312 vfsops_t *smbfs_vfsops = NULL;
313 
314 static const fs_operation_def_t smbfs_vfsops_template[] = {
315 	{ VFSNAME_MOUNT, { .vfs_mount = smbfs_mount } },
316 	{ VFSNAME_UNMOUNT, { .vfs_unmount = smbfs_unmount } },
317 	{ VFSNAME_ROOT,	{ .vfs_root = smbfs_root } },
318 	{ VFSNAME_STATVFS, { .vfs_statvfs = smbfs_statvfs } },
319 	{ VFSNAME_SYNC,	{ .vfs_sync = smbfs_sync } },
320 	{ VFSNAME_VGET,	{ .error = fs_nosys } },
321 	{ VFSNAME_MOUNTROOT, { .error = fs_nosys } },
322 	{ VFSNAME_FREEVFS, { .vfs_freevfs = smbfs_freevfs } },
323 	{ NULL, NULL }
324 };
325 
326 /*
327  * This is the VFS switch initialization routine, normally called
328  * via vfssw[x].vsw_init by vfsinit() or mod_install
329  */
330 int
331 smbfsinit(int fstyp, char *name)
332 {
333 	int		error;
334 
335 	error = vfs_setfsops(fstyp, smbfs_vfsops_template, &smbfs_vfsops);
336 	if (error != 0) {
337 		cmn_err(CE_WARN,
338 		    "smbfsinit: bad vfs ops template");
339 		return (error);
340 	}
341 
342 	error = vn_make_ops(name, smbfs_vnodeops_template, &smbfs_vnodeops);
343 	if (error != 0) {
344 		(void) vfs_freevfsops_by_type(fstyp);
345 		cmn_err(CE_WARN,
346 		    "smbfsinit: bad vnode ops template");
347 		return (error);
348 	}
349 
350 	smbfs_fstyp = fstyp;
351 
352 	return (0);
353 }
354 
355 void
356 smbfsfini()
357 {
358 	if (smbfs_vfsops) {
359 		(void) vfs_freevfsops_by_type(smbfs_fstyp);
360 		smbfs_vfsops = NULL;
361 	}
362 	if (smbfs_vnodeops) {
363 		vn_freevnodeops(smbfs_vnodeops);
364 		smbfs_vnodeops = NULL;
365 	}
366 }
367 
368 void
369 smbfs_free_smi(smbmntinfo_t *smi)
370 {
371 	if (smi == NULL)
372 		return;
373 
374 #ifdef	_KERNEL
375 	if (smi->smi_zone_ref.zref_zone != NULL)
376 		zone_rele_ref(&smi->smi_zone_ref, ZONE_REF_SMBFS);
377 #endif	/* _KERNEL */
378 
379 	if (smi->smi_share != NULL)
380 		smb_share_rele(smi->smi_share);
381 
382 	avl_destroy(&smi->smi_hash_avl);
383 	rw_destroy(&smi->smi_hash_lk);
384 	cv_destroy(&smi->smi_statvfs_cv);
385 	mutex_destroy(&smi->smi_lock);
386 
387 	kmem_free(smi, sizeof (smbmntinfo_t));
388 }
389 
390 /*
391  * smbfs mount vfsop
392  * Set up mount info record and attach it to vfs struct.
393  */
394 static int
395 smbfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
396 {
397 	char		*data = uap->dataptr;
398 	int		error;
399 	smbnode_t	*rtnp = NULL;	/* root of this fs */
400 	smbmntinfo_t	*smi = NULL;
401 	dev_t		smbfs_dev;
402 	int		version;
403 	int		devfd;
404 	zone_t		*zone = curzone;
405 #ifdef	_KERNEL
406 	zone_t		*mntzone = NULL;
407 #else	/* _KERNEL */
408 	short		minclsyspri = MINCLSYSPRI;
409 #endif	/* _KERNEL */
410 	smb_share_t	*ssp = NULL;
411 	smb_cred_t	scred;
412 	int		flags, sec;
413 	STRUCT_DECL(smbfs_args, args);		/* smbfs mount arguments */
414 
415 #ifdef	_KERNEL
416 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
417 		return (error);
418 #endif	/* _KERNEL */
419 
420 	if (mvp->v_type != VDIR)
421 		return (ENOTDIR);
422 
423 	/*
424 	 * get arguments
425 	 *
426 	 * uap->datalen might be different from sizeof (args)
427 	 * in a compatible situation.
428 	 */
429 #ifdef	_KERNEL
430 	STRUCT_INIT(args, get_udatamodel());
431 	bzero(STRUCT_BUF(args), SIZEOF_STRUCT(smbfs_args, DATAMODEL_NATIVE));
432 	if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen,
433 	    SIZEOF_STRUCT(smbfs_args, DATAMODEL_NATIVE))))
434 		return (EFAULT);
435 #else	/* _KERNEL */
436 	bzero(&args, sizeof (args));
437 	if (copyin(data, &args, MIN(uap->datalen, sizeof (args))))
438 		return (EFAULT);
439 #endif	/* _KERNEL */
440 
441 	/*
442 	 * Check mount program version
443 	 */
444 	version = STRUCT_FGET(args, version);
445 	if (version != SMBFS_VERSION) {
446 		cmn_err(CE_WARN, "mount version mismatch:"
447 		    " kernel=%d, mount=%d\n",
448 		    SMBFS_VERSION, version);
449 		return (EINVAL);
450 	}
451 
452 	/*
453 	 * Deal with re-mount requests.
454 	 */
455 	if (uap->flags & MS_REMOUNT) {
456 		cmn_err(CE_WARN, "MS_REMOUNT not implemented");
457 		return (ENOTSUP);
458 	}
459 
460 	/*
461 	 * Check for busy
462 	 */
463 	mutex_enter(&mvp->v_lock);
464 	if (!(uap->flags & MS_OVERLAY) &&
465 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
466 		mutex_exit(&mvp->v_lock);
467 		return (EBUSY);
468 	}
469 	mutex_exit(&mvp->v_lock);
470 
471 	/*
472 	 * Get the "share" from the netsmb driver (ssp).
473 	 * It is returned with a "ref" (hold) for us.
474 	 * Release this hold: at errout below, or in
475 	 * smbfs_freevfs().
476 	 */
477 	devfd = STRUCT_FGET(args, devfd);
478 	error = smb_dev2share(devfd, &ssp);
479 	if (error) {
480 		cmn_err(CE_WARN, "invalid device handle %d (%d)\n",
481 		    devfd, error);
482 		return (error);
483 	}
484 
485 	/*
486 	 * Use "goto errout" from here on.
487 	 * See: ssp, smi, rtnp, mntzone
488 	 */
489 
490 #ifdef	_KERNEL
491 	/*
492 	 * Determine the zone we're being mounted into.
493 	 */
494 	zone_hold(mntzone = zone);		/* start with this assumption */
495 	if (getzoneid() == GLOBAL_ZONEID) {
496 		zone_rele(mntzone);
497 		mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
498 		ASSERT(mntzone != NULL);
499 		if (mntzone != zone) {
500 			error = EBUSY;
501 			goto errout;
502 		}
503 	}
504 
505 	/*
506 	 * Stop the mount from going any further if the zone is going away.
507 	 */
508 	if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
509 		error = EBUSY;
510 		goto errout;
511 	}
512 
513 	/*
514 	 * On a Trusted Extensions client, we may have to force read-only
515 	 * for read-down mounts.
516 	 */
517 	if (is_system_labeled()) {
518 		void *addr;
519 		int ipvers = 0;
520 		struct smb_vc *vcp;
521 
522 		vcp = SSTOVC(ssp);
523 		addr = smb_vc_getipaddr(vcp, &ipvers);
524 		error = smbfs_mount_label_policy(vfsp, addr, ipvers, cr);
525 
526 		if (error > 0)
527 			goto errout;
528 
529 		if (error == -1) {
530 			/* change mount to read-only to prevent write-down */
531 			vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
532 		}
533 	}
534 #endif	/* _KERNEL */
535 
536 	/* Prevent unload. */
537 	atomic_inc_32(&smbfs_mountcount);
538 
539 	/*
540 	 * Create a mount record and link it to the vfs struct.
541 	 * No more possiblities for errors from here on.
542 	 * Tear-down of this stuff is in smbfs_free_smi()
543 	 *
544 	 * Compare with NFS: nfsrootvp()
545 	 */
546 	smi = kmem_zalloc(sizeof (*smi), KM_SLEEP);
547 
548 	mutex_init(&smi->smi_lock, NULL, MUTEX_DEFAULT, NULL);
549 	cv_init(&smi->smi_statvfs_cv, NULL, CV_DEFAULT, NULL);
550 
551 	rw_init(&smi->smi_hash_lk, NULL, RW_DEFAULT, NULL);
552 	smbfs_init_hash_avl(&smi->smi_hash_avl);
553 
554 	smi->smi_share = ssp;
555 	ssp = NULL;
556 
557 #ifdef	_KERNEL
558 	/*
559 	 * Convert the anonymous zone hold acquired via zone_hold() above
560 	 * into a zone reference.
561 	 */
562 	zone_init_ref(&smi->smi_zone_ref);
563 	zone_hold_ref(mntzone, &smi->smi_zone_ref, ZONE_REF_SMBFS);
564 	zone_rele(mntzone);
565 	mntzone = NULL;
566 #else	/* _KERNEL */
567 	smi->smi_zone_ref.zref_zone = curzone;
568 #endif	/* _KERNEL */
569 
570 	/*
571 	 * Initialize option defaults
572 	 */
573 	smi->smi_acregmin = SEC2HR(SMBFS_ACREGMIN);
574 	smi->smi_acregmax = SEC2HR(SMBFS_ACREGMAX);
575 	smi->smi_acdirmin = SEC2HR(SMBFS_ACDIRMIN);
576 	smi->smi_acdirmax = SEC2HR(SMBFS_ACDIRMAX);
577 	smi->smi_flags	= SMI_LLOCK;
578 #ifndef	_KERNEL
579 	/* Always direct IO with fakekernel */
580 	smi->smi_flags	|= SMI_DIRECTIO;
581 #endif	/* _KERNEL */
582 
583 	/*
584 	 * All "generic" mount options have already been
585 	 * handled in vfs.c:domount() - see mntopts stuff.
586 	 * Query generic options using vfs_optionisset().
587 	 * Give ACL an adjustable system-wide default.
588 	 */
589 	if (smbfs_default_opt_acl ||
590 	    vfs_optionisset(vfsp, MNTOPT_ACL, NULL))
591 		smi->smi_flags |= SMI_ACL;
592 	if (vfs_optionisset(vfsp, MNTOPT_NOACL, NULL))
593 		smi->smi_flags &= ~SMI_ACL;
594 	if (vfs_optionisset(vfsp, MNTOPT_INTR, NULL))
595 		smi->smi_flags |= SMI_INT;
596 
597 	/*
598 	 * Get the mount options that come in as smbfs_args,
599 	 * starting with args.flags (SMBFS_MF_xxx)
600 	 */
601 	flags = STRUCT_FGET(args, flags);
602 	smi->smi_fmode	= STRUCT_FGET(args, file_mode) & 0777;
603 	smi->smi_dmode	= STRUCT_FGET(args, dir_mode) & 0777;
604 #ifdef	_KERNEL
605 	smi->smi_uid	= STRUCT_FGET(args, uid);
606 	smi->smi_gid	= STRUCT_FGET(args, gid);
607 #else	/* _KERNEL */
608 	/*
609 	 * Need uid/gid to match our fake cred we'll fail in
610 	 * smbfs_access_rwx later.
611 	 */
612 	smi->smi_uid	= crgetuid(cr);
613 	smi->smi_gid	= crgetgid(cr);
614 
615 	/*
616 	 * Our user-level do_mount() passes the mount options sting
617 	 * as-is, where the real mount program would convert some
618 	 * of those options to bits set in smbfs_args.flags.
619 	 * To avoid replicating all that conversion code, this
620 	 * uses the generic vfs option support to handle those
621 	 * option flag bits we need, i.e.: "noac"
622 	 */
623 	if (vfs_optionisset(vfsp, MNTOPT_NOAC, NULL))
624 		flags |= SMBFS_MF_NOAC;
625 #endif	/* _KERNEL */
626 
627 	/*
628 	 * Hande the SMBFS_MF_xxx flags.
629 	 */
630 	if (flags & SMBFS_MF_NOAC)
631 		smi->smi_flags |= SMI_NOAC;
632 	if (flags & SMBFS_MF_ACREGMIN) {
633 		sec = STRUCT_FGET(args, acregmin);
634 		if (sec < 0 || sec > SMBFS_ACMINMAX)
635 			sec = SMBFS_ACMINMAX;
636 		smi->smi_acregmin = SEC2HR(sec);
637 	}
638 	if (flags & SMBFS_MF_ACREGMAX) {
639 		sec = STRUCT_FGET(args, acregmax);
640 		if (sec < 0 || sec > SMBFS_ACMAXMAX)
641 			sec = SMBFS_ACMAXMAX;
642 		smi->smi_acregmax = SEC2HR(sec);
643 	}
644 	if (flags & SMBFS_MF_ACDIRMIN) {
645 		sec = STRUCT_FGET(args, acdirmin);
646 		if (sec < 0 || sec > SMBFS_ACMINMAX)
647 			sec = SMBFS_ACMINMAX;
648 		smi->smi_acdirmin = SEC2HR(sec);
649 	}
650 	if (flags & SMBFS_MF_ACDIRMAX) {
651 		sec = STRUCT_FGET(args, acdirmax);
652 		if (sec < 0 || sec > SMBFS_ACMAXMAX)
653 			sec = SMBFS_ACMAXMAX;
654 		smi->smi_acdirmax = SEC2HR(sec);
655 	}
656 
657 	/*
658 	 * Get attributes of the remote file system,
659 	 * i.e. ACL support, named streams, etc.
660 	 */
661 	smb_credinit(&scred, cr);
662 	error = smbfs_smb_qfsattr(smi->smi_share, &smi->smi_fsa, &scred);
663 	smb_credrele(&scred);
664 	if (error) {
665 		SMBVDEBUG("smbfs_smb_qfsattr error %d\n", error);
666 	}
667 
668 	/*
669 	 * We enable XATTR by default (via smbfs_mntopts)
670 	 * but if the share does not support named streams,
671 	 * force the NOXATTR option (also clears XATTR).
672 	 * Caller will set or clear VFS_XATTR after this.
673 	 */
674 	if ((smi->smi_fsattr & FILE_NAMED_STREAMS) == 0)
675 		vfs_setmntopt(vfsp, MNTOPT_NOXATTR, NULL, 0);
676 
677 	/*
678 	 * Ditto ACLs (disable if not supported on this share)
679 	 */
680 	if ((smi->smi_fsattr & FILE_PERSISTENT_ACLS) == 0) {
681 		vfs_setmntopt(vfsp, MNTOPT_NOACL, NULL, 0);
682 		smi->smi_flags &= ~SMI_ACL;
683 	}
684 
685 	/*
686 	 * Assign a unique device id to the mount
687 	 */
688 	mutex_enter(&smbfs_minor_lock);
689 	do {
690 		smbfs_minor = (smbfs_minor + 1) & MAXMIN32;
691 		smbfs_dev = makedevice(smbfs_major, smbfs_minor);
692 	} while (vfs_devismounted(smbfs_dev));
693 	mutex_exit(&smbfs_minor_lock);
694 
695 	vfsp->vfs_dev	= smbfs_dev;
696 	vfs_make_fsid(&vfsp->vfs_fsid, smbfs_dev, smbfs_fstyp);
697 	vfsp->vfs_data	= (caddr_t)smi;
698 	vfsp->vfs_fstype = smbfs_fstyp;
699 	vfsp->vfs_bsize = MAXBSIZE;
700 	vfsp->vfs_bcount = 0;
701 
702 	smi->smi_vfsp	= vfsp;
703 	smbfs_zonelist_add(smi);	/* undo in smbfs_freevfs */
704 
705 	/* PSARC 2007/227 VFS Feature Registration */
706 	vfs_set_feature(vfsp, VFSFT_XVATTR);
707 	vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
708 
709 	/*
710 	 * Create the root vnode, which we need in unmount
711 	 * for the call to smbfs_check_table(), etc.
712 	 * Release this hold in smbfs_unmount.
713 	 */
714 	rtnp = smbfs_node_findcreate(smi, "\\", 1, NULL, 0, 0,
715 	    &smbfs_fattr0);
716 	ASSERT(rtnp != NULL);
717 	rtnp->r_vnode->v_type = VDIR;
718 	rtnp->r_vnode->v_flag |= VROOT;
719 	smi->smi_root = rtnp;
720 
721 	/*
722 	 * Create a taskq for async work (i.e. putpage)
723 	 */
724 	smi->smi_taskq = taskq_create_proc("smbfs",
725 	    smbfs_tq_nthread, minclsyspri,
726 	    smbfs_tq_nthread, smbfs_tq_nthread * 2,
727 	    zone->zone_zsched, TASKQ_PREPOPULATE);
728 
729 	/*
730 	 * NFS does other stuff here too:
731 	 *   async worker threads
732 	 *   init kstats
733 	 *
734 	 * End of code from NFS nfsrootvp()
735 	 */
736 	return (0);
737 
738 #ifdef	_KERNEL
739 errout:
740 	vfsp->vfs_data = NULL;
741 	if (smi != NULL)
742 		smbfs_free_smi(smi);
743 
744 	if (mntzone != NULL)
745 		zone_rele(mntzone);
746 
747 	if (ssp != NULL)
748 		smb_share_rele(ssp);
749 
750 	return (error);
751 #endif	/* _KERNEL */
752 }
753 
754 /*
755  * vfs operations
756  */
757 static int
758 smbfs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
759 {
760 	smbmntinfo_t	*smi;
761 	smbnode_t	*rtnp;
762 
763 	smi = VFTOSMI(vfsp);
764 
765 #ifdef	_KERNEL
766 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
767 		return (EPERM);
768 #endif	/* _KERNEL */
769 
770 	if ((flag & MS_FORCE) == 0) {
771 		smbfs_rflush(vfsp, cr);
772 
773 		/*
774 		 * If there are any active vnodes on this file system,
775 		 * (other than the root vnode) then the file system is
776 		 * busy and can't be umounted.
777 		 */
778 		if (smbfs_check_table(vfsp, smi->smi_root))
779 			return (EBUSY);
780 
781 		/*
782 		 * We normally hold a ref to the root vnode, so
783 		 * check for references beyond the one we expect:
784 		 *   smbmntinfo_t -> smi_root
785 		 * Note that NFS does not hold the root vnode.
786 		 */
787 		if (smi->smi_root &&
788 		    smi->smi_root->r_vnode->v_count > 1)
789 			return (EBUSY);
790 	}
791 
792 	/*
793 	 * common code for both forced and non-forced
794 	 *
795 	 * Setting VFS_UNMOUNTED prevents new operations.
796 	 * Operations already underway may continue,
797 	 * but not for long.
798 	 */
799 	vfsp->vfs_flag |= VFS_UNMOUNTED;
800 
801 	/*
802 	 * If we hold the root VP (and we normally do)
803 	 * then it's safe to release it now.
804 	 */
805 	if (smi->smi_root) {
806 		rtnp = smi->smi_root;
807 		smi->smi_root = NULL;
808 		VN_RELE(rtnp->r_vnode);	/* release root vnode */
809 	}
810 
811 	/*
812 	 * Remove all nodes from the node hash tables.
813 	 * This (indirectly) calls: smbfs_addfree, smbinactive,
814 	 * which will try to flush dirty pages, etc. so
815 	 * don't destroy the underlying share just yet.
816 	 *
817 	 * Also, with a forced unmount, some nodes may
818 	 * remain active, and those will get cleaned up
819 	 * after their last vn_rele.
820 	 */
821 	smbfs_destroy_table(vfsp);
822 
823 	/*
824 	 * Shutdown any outstanding I/O requests on this share,
825 	 * and force a tree disconnect.  The share object will
826 	 * continue to hang around until smb_share_rele().
827 	 * This should also cause most active nodes to be
828 	 * released as their operations fail with EIO.
829 	 */
830 	smb_share_kill(smi->smi_share);
831 
832 	/*
833 	 * Any async taskq work should be giving up.
834 	 * Wait for those to exit.
835 	 */
836 	taskq_destroy(smi->smi_taskq);
837 
838 	/*
839 	 * Delete our kstats...
840 	 *
841 	 * Doing it here, rather than waiting until
842 	 * smbfs_freevfs so these are not visible
843 	 * after the unmount.
844 	 */
845 	if (smi->smi_io_kstats) {
846 		kstat_delete(smi->smi_io_kstats);
847 		smi->smi_io_kstats = NULL;
848 	}
849 	if (smi->smi_ro_kstats) {
850 		kstat_delete(smi->smi_ro_kstats);
851 		smi->smi_ro_kstats = NULL;
852 	}
853 
854 	/*
855 	 * The rest happens in smbfs_freevfs()
856 	 */
857 	return (0);
858 }
859 
860 
861 /*
862  * find root of smbfs
863  */
864 static int
865 smbfs_root(vfs_t *vfsp, vnode_t **vpp)
866 {
867 	smbmntinfo_t	*smi;
868 	vnode_t		*vp;
869 
870 	smi = VFTOSMI(vfsp);
871 
872 	if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
873 		return (EPERM);
874 
875 	if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
876 		return (EIO);
877 
878 	/*
879 	 * The root vp is created in mount and held
880 	 * until unmount, so this is paranoia.
881 	 */
882 	if (smi->smi_root == NULL)
883 		return (EIO);
884 
885 	/* Just take a reference and return it. */
886 	vp = SMBTOV(smi->smi_root);
887 	VN_HOLD(vp);
888 	*vpp = vp;
889 
890 	return (0);
891 }
892 
893 /*
894  * Get file system statistics.
895  */
896 static int
897 smbfs_statvfs(vfs_t *vfsp, statvfs64_t *sbp)
898 {
899 	int		error;
900 	smbmntinfo_t	*smi = VFTOSMI(vfsp);
901 	smb_share_t	*ssp = smi->smi_share;
902 	statvfs64_t	stvfs;
903 	hrtime_t now;
904 	smb_cred_t	scred;
905 
906 	if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
907 		return (EPERM);
908 
909 	if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
910 		return (EIO);
911 
912 	mutex_enter(&smi->smi_lock);
913 
914 	/*
915 	 * Use cached result if still valid.
916 	 */
917 recheck:
918 	now = gethrtime();
919 	if (now < smi->smi_statfstime) {
920 		error = 0;
921 		goto cache_hit;
922 	}
923 
924 	/*
925 	 * FS attributes are stale, so someone
926 	 * needs to do an OTW call to get them.
927 	 * Serialize here so only one thread
928 	 * does the OTW call.
929 	 */
930 	if (smi->smi_status & SM_STATUS_STATFS_BUSY) {
931 		smi->smi_status |= SM_STATUS_STATFS_WANT;
932 		if (!cv_wait_sig(&smi->smi_statvfs_cv, &smi->smi_lock)) {
933 			mutex_exit(&smi->smi_lock);
934 			return (EINTR);
935 		}
936 		/* Hope status is valid now. */
937 		goto recheck;
938 	}
939 	smi->smi_status |= SM_STATUS_STATFS_BUSY;
940 	mutex_exit(&smi->smi_lock);
941 
942 	/*
943 	 * Do the OTW call.  Note: lock NOT held.
944 	 */
945 	smb_credinit(&scred, NULL);
946 	bzero(&stvfs, sizeof (stvfs));
947 	error = smbfs_smb_statfs(ssp, &stvfs, &scred);
948 	smb_credrele(&scred);
949 	if (error) {
950 		SMBVDEBUG("statfs error=%d\n", error);
951 	} else {
952 
953 		/*
954 		 * Set a few things the OTW call didn't get.
955 		 */
956 		stvfs.f_frsize = stvfs.f_bsize;
957 		stvfs.f_favail = stvfs.f_ffree;
958 		stvfs.f_fsid = (unsigned long)vfsp->vfs_fsid.val[0];
959 		bcopy(fs_type_name, stvfs.f_basetype, FSTYPSZ);
960 		stvfs.f_flag	= vf_to_stf(vfsp->vfs_flag);
961 		stvfs.f_namemax	= smi->smi_fsa.fsa_maxname;
962 
963 		/*
964 		 * Save the result, update lifetime
965 		 */
966 		now = gethrtime();
967 		smi->smi_statfstime = now +
968 		    (SM_MAX_STATFSTIME * (hrtime_t)NANOSEC);
969 		smi->smi_statvfsbuf = stvfs; /* struct assign! */
970 	}
971 
972 	mutex_enter(&smi->smi_lock);
973 	if (smi->smi_status & SM_STATUS_STATFS_WANT)
974 		cv_broadcast(&smi->smi_statvfs_cv);
975 	smi->smi_status &= ~(SM_STATUS_STATFS_BUSY | SM_STATUS_STATFS_WANT);
976 
977 	/*
978 	 * Copy the statvfs data to caller's buf.
979 	 * Note: struct assignment
980 	 */
981 cache_hit:
982 	if (error == 0)
983 		*sbp = smi->smi_statvfsbuf;
984 	mutex_exit(&smi->smi_lock);
985 	return (error);
986 }
987 
988 /*
989  * Flush dirty smbfs files for file system vfsp.
990  * If vfsp == NULL, all smbfs files are flushed.
991  */
992 /*ARGSUSED*/
993 static int
994 smbfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
995 {
996 
997 	/*
998 	 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
999 	 * to sync metadata, which they would otherwise cache indefinitely.
1000 	 * Semantically, the only requirement is that the sync be initiated.
1001 	 * Assume the server-side takes care of attribute sync.
1002 	 */
1003 	if (flag & SYNC_ATTR)
1004 		return (0);
1005 
1006 	if (vfsp == NULL) {
1007 		/*
1008 		 * Flush ALL smbfs mounts in this zone.
1009 		 */
1010 		smbfs_flushall(cr);
1011 		return (0);
1012 	}
1013 
1014 	smbfs_rflush(vfsp, cr);
1015 
1016 	return (0);
1017 }
1018 
1019 /*
1020  * Initialization routine for VFS routines.  Should only be called once
1021  */
1022 int
1023 smbfs_vfsinit(void)
1024 {
1025 	return (0);
1026 }
1027 
1028 /*
1029  * Shutdown routine for VFS routines.  Should only be called once
1030  */
1031 void
1032 smbfs_vfsfini(void)
1033 {
1034 }
1035 
1036 void
1037 smbfs_freevfs(vfs_t *vfsp)
1038 {
1039 	smbmntinfo_t    *smi;
1040 
1041 	/* free up the resources */
1042 	smi = VFTOSMI(vfsp);
1043 
1044 	/*
1045 	 * By this time we should have already deleted the
1046 	 * smi kstats in the unmount code.  If they are still around
1047 	 * something is wrong
1048 	 */
1049 	ASSERT(smi->smi_io_kstats == NULL);
1050 
1051 	smbfs_zonelist_remove(smi);
1052 
1053 	smbfs_free_smi(smi);
1054 
1055 	/*
1056 	 * Allow _fini() to succeed now, if so desired.
1057 	 */
1058 	atomic_dec_32(&smbfs_mountcount);
1059 }
1060 
1061 #ifdef	_KERNEL
1062 /*
1063  * smbfs_mount_label_policy:
1064  *	Determine whether the mount is allowed according to MAC check,
1065  *	by comparing (where appropriate) label of the remote server
1066  *	against the label of the zone being mounted into.
1067  *
1068  *	Returns:
1069  *		 0 :	access allowed
1070  *		-1 :	read-only access allowed (i.e., read-down)
1071  *		>0 :	error code, such as EACCES
1072  *
1073  * NB:
1074  * NFS supports Cipso labels by parsing the vfs_resource
1075  * to see what the Solaris server global zone has shared.
1076  * We can't support that for CIFS since resource names
1077  * contain share names, not paths.
1078  */
1079 static int
1080 smbfs_mount_label_policy(vfs_t *vfsp, void *ipaddr, int addr_type, cred_t *cr)
1081 {
1082 	bslabel_t	*server_sl, *mntlabel;
1083 	zone_t		*mntzone = NULL;
1084 	ts_label_t	*zlabel;
1085 	tsol_tpc_t	*tp;
1086 	ts_label_t	*tsl = NULL;
1087 	int		retv;
1088 
1089 	/*
1090 	 * Get the zone's label.  Each zone on a labeled system has a label.
1091 	 */
1092 	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
1093 	zlabel = mntzone->zone_slabel;
1094 	ASSERT(zlabel != NULL);
1095 	label_hold(zlabel);
1096 
1097 	retv = EACCES;				/* assume the worst */
1098 
1099 	/*
1100 	 * Next, get the assigned label of the remote server.
1101 	 */
1102 	tp = find_tpc(ipaddr, addr_type, B_FALSE);
1103 	if (tp == NULL)
1104 		goto out;			/* error getting host entry */
1105 
1106 	if (tp->tpc_tp.tp_doi != zlabel->tsl_doi)
1107 		goto rel_tpc;			/* invalid domain */
1108 	if ((tp->tpc_tp.host_type != UNLABELED))
1109 		goto rel_tpc;			/* invalid hosttype */
1110 
1111 	server_sl = &tp->tpc_tp.tp_def_label;
1112 	mntlabel = label2bslabel(zlabel);
1113 
1114 	/*
1115 	 * Now compare labels to complete the MAC check.  If the labels
1116 	 * are equal or if the requestor is in the global zone and has
1117 	 * NET_MAC_AWARE, then allow read-write access.   (Except for
1118 	 * mounts into the global zone itself; restrict these to
1119 	 * read-only.)
1120 	 *
1121 	 * If the requestor is in some other zone, but their label
1122 	 * dominates the server, then allow read-down.
1123 	 *
1124 	 * Otherwise, access is denied.
1125 	 */
1126 	if (blequal(mntlabel, server_sl) ||
1127 	    (crgetzoneid(cr) == GLOBAL_ZONEID &&
1128 	    getpflags(NET_MAC_AWARE, cr) != 0)) {
1129 		if ((mntzone == global_zone) ||
1130 		    !blequal(mntlabel, server_sl))
1131 			retv = -1;		/* read-only */
1132 		else
1133 			retv = 0;		/* access OK */
1134 	} else if (bldominates(mntlabel, server_sl)) {
1135 		retv = -1;			/* read-only */
1136 	} else {
1137 		retv = EACCES;
1138 	}
1139 
1140 	if (tsl != NULL)
1141 		label_rele(tsl);
1142 
1143 rel_tpc:
1144 	/*LINTED*/
1145 	TPC_RELE(tp);
1146 out:
1147 	if (mntzone)
1148 		zone_rele(mntzone);
1149 	label_rele(zlabel);
1150 	return (retv);
1151 }
1152 #endif	/* _KERNEL */
1153