xref: /titanic_50/usr/src/uts/common/fs/nfs/nfs3_vfsops.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
29  *	All rights reserved.
30  */
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/pathname.h>
41 #include <sys/sysmacros.h>
42 #include <sys/kmem.h>
43 #include <sys/mkdev.h>
44 #include <sys/mount.h>
45 #include <sys/mntent.h>
46 #include <sys/statvfs.h>
47 #include <sys/errno.h>
48 #include <sys/debug.h>
49 #include <sys/cmn_err.h>
50 #include <sys/utsname.h>
51 #include <sys/bootconf.h>
52 #include <sys/modctl.h>
53 #include <sys/acl.h>
54 #include <sys/flock.h>
55 #include <sys/policy.h>
56 #include <sys/zone.h>
57 #include <sys/class.h>
58 #include <sys/socket.h>
59 #include <sys/netconfig.h>
60 
61 #include <rpc/types.h>
62 #include <rpc/auth.h>
63 #include <rpc/clnt.h>
64 
65 #include <nfs/nfs.h>
66 #include <nfs/nfs_clnt.h>
67 #include <nfs/rnode.h>
68 #include <nfs/mount.h>
69 #include <nfs/nfs_acl.h>
70 
71 #include <fs/fs_subr.h>
72 
73 /*
74  * From rpcsec module (common/rpcsec).
75  */
76 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t);
77 extern void sec_clnt_freeinfo(struct sec_data *);
78 
79 /*
80  * The order and contents of this structure must be kept in sync with that of
81  * rfsreqcnt_v3_tmpl in nfs_stats.c
82  */
83 static char *rfsnames_v3[] = {
84 	"null", "getattr", "setattr", "lookup", "access", "readlink", "read",
85 	"write", "create", "mkdir", "symlink", "mknod", "remove", "rmdir",
86 	"rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo",
87 	"pathconf", "commit"
88 };
89 
90 /*
91  * This table maps from NFS protocol number into call type.
92  * Zero means a "Lookup" type call
93  * One  means a "Read" type call
94  * Two  means a "Write" type call
95  * This is used to select a default time-out.
96  */
97 static uchar_t call_type_v3[] = {
98 	0, 0, 1, 0, 0, 0, 1,
99 	2, 2, 2, 2, 2, 2, 2,
100 	2, 2, 1, 2, 0, 0, 0,
101 	2 };
102 
103 /*
104  * Similar table, but to determine which timer to use
105  * (only real reads and writes!)
106  */
107 static uchar_t timer_type_v3[] = {
108 	0, 0, 0, 0, 0, 0, 1,
109 	2, 0, 0, 0, 0, 0, 0,
110 	0, 0, 1, 1, 0, 0, 0,
111 	0 };
112 
113 /*
114  * This table maps from NFS protocol number into a call type
115  * for the semisoft mount option.
116  * Zero means do not repeat operation.
117  * One  means repeat.
118  */
119 static uchar_t ss_call_type_v3[] = {
120 	0, 0, 1, 0, 0, 0, 0,
121 	1, 1, 1, 1, 1, 1, 1,
122 	1, 1, 0, 0, 0, 0, 0,
123 	1 };
124 
125 /*
126  * nfs3 vfs operations.
127  */
128 static int	nfs3_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
129 static int	nfs3_unmount(vfs_t *, int, cred_t *);
130 static int	nfs3_root(vfs_t *, vnode_t **);
131 static int	nfs3_statvfs(vfs_t *, struct statvfs64 *);
132 static int	nfs3_sync(vfs_t *, short, cred_t *);
133 static int	nfs3_vget(vfs_t *, vnode_t **, fid_t *);
134 static int	nfs3_mountroot(vfs_t *, whymountroot_t);
135 static void	nfs3_freevfs(vfs_t *);
136 
137 static int	nfs3rootvp(vnode_t **, vfs_t *, struct servinfo *,
138 		    int, cred_t *, zone_t *);
139 
140 /*
141  * Initialize the vfs structure
142  */
143 
144 static int nfs3fstyp;
145 vfsops_t *nfs3_vfsops;
146 
147 /*
148  * Debug variable to check for rdma based
149  * transport startup and cleanup. Controlled
150  * through /etc/system. Off by default.
151  */
152 extern int rdma_debug;
153 
154 int
155 nfs3init(int fstyp, char *name)
156 {
157 	static const fs_operation_def_t nfs3_vfsops_template[] = {
158 		VFSNAME_MOUNT, nfs3_mount,
159 		VFSNAME_UNMOUNT, nfs3_unmount,
160 		VFSNAME_ROOT, nfs3_root,
161 		VFSNAME_STATVFS, nfs3_statvfs,
162 		VFSNAME_SYNC, (fs_generic_func_p) nfs3_sync,
163 		VFSNAME_VGET, nfs3_vget,
164 		VFSNAME_MOUNTROOT, nfs3_mountroot,
165 		VFSNAME_FREEVFS, (fs_generic_func_p)nfs3_freevfs,
166 		NULL, NULL
167 	};
168 	int error;
169 
170 	error = vfs_setfsops(fstyp, nfs3_vfsops_template, &nfs3_vfsops);
171 	if (error != 0) {
172 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
173 		    "nfs3init: bad vfs ops template");
174 		return (error);
175 	}
176 
177 	error = vn_make_ops(name, nfs3_vnodeops_template, &nfs3_vnodeops);
178 	if (error != 0) {
179 		(void) vfs_freevfsops_by_type(fstyp);
180 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
181 		    "nfs3init: bad vnode ops template");
182 		return (error);
183 	}
184 
185 	nfs3fstyp = fstyp;
186 
187 	return (0);
188 }
189 
190 void
191 nfs3fini(void)
192 {
193 }
194 
195 /*
196  * nfs mount vfsop
197  * Set up mount info record and attach it to vfs struct.
198  */
199 static int
200 nfs3_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
201 {
202 	char *data = uap->dataptr;
203 	int error;
204 	vnode_t *rtvp;			/* the server's root */
205 	mntinfo_t *mi;			/* mount info, pointed at by vfs */
206 	size_t hlen;			/* length of hostname */
207 	size_t nlen;			/* length of netname */
208 	char netname[SYS_NMLN];		/* server's netname */
209 	struct netbuf addr;		/* server's address */
210 	struct netbuf syncaddr;		/* AUTH_DES time sync addr */
211 	struct knetconfig *knconf;	/* transport knetconfig structure */
212 	struct knetconfig *rdma_knconf;	/* rdma transport structure */
213 	rnode_t *rp;
214 	struct servinfo *svp;		/* nfs server info */
215 	struct servinfo *svp_tail = NULL; /* previous nfs server info */
216 	struct servinfo *svp_head;	/* first nfs server info */
217 	struct servinfo *svp_2ndlast;	/* 2nd last in server info list */
218 	struct sec_data *secdata;	/* security data */
219 	STRUCT_DECL(nfs_args, args);	/* nfs mount arguments */
220 	STRUCT_DECL(knetconfig, knconf_tmp);
221 	STRUCT_DECL(netbuf, addr_tmp);
222 	int flags, addr_type;
223 	char *p, *pf;
224 	zone_t *zone = curproc->p_zone;
225 
226 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
227 		return (EPERM);
228 
229 	if (mvp->v_type != VDIR)
230 		return (ENOTDIR);
231 
232 	/*
233 	 * get arguments
234 	 *
235 	 * nfs_args is now versioned and is extensible, so
236 	 * uap->datalen might be different from sizeof (args)
237 	 * in a compatible situation.
238 	 */
239 more:
240 	STRUCT_INIT(args, get_udatamodel());
241 	bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE));
242 	if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen,
243 	    STRUCT_SIZE(args))))
244 		return (EFAULT);
245 
246 	flags = STRUCT_FGET(args, flags);
247 
248 	if (uap->flags & MS_REMOUNT) {
249 		size_t n;
250 		char name[FSTYPSZ];
251 
252 		if (uap->flags & MS_SYSSPACE)
253 			error = copystr(uap->fstype, name, FSTYPSZ, &n);
254 		else
255 			error = copyinstr(uap->fstype, name, FSTYPSZ, &n);
256 
257 		if (error) {
258 			if (error == ENAMETOOLONG)
259 				return (EINVAL);
260 			return (error);
261 		}
262 
263 		/*
264 		 * This check is to ensure that the request is a
265 		 * genuine nfs remount request.
266 		 */
267 
268 		if (strncmp(name, "nfs", 3) != 0)
269 			return (EINVAL);
270 
271 		/*
272 		 * If the request changes the locking type, disallow the
273 		 * remount,
274 		 * because it's questionable whether we can transfer the
275 		 * locking state correctly.
276 		 */
277 
278 		if ((mi = VFTOMI(vfsp)) != NULL) {
279 			uint_t new_mi_llock;
280 			uint_t old_mi_llock;
281 
282 			new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0;
283 			old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0;
284 			if (old_mi_llock != new_mi_llock)
285 				return (EBUSY);
286 		}
287 		return (0);
288 	}
289 
290 	mutex_enter(&mvp->v_lock);
291 	if (!(uap->flags & MS_OVERLAY) &&
292 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
293 		mutex_exit(&mvp->v_lock);
294 		return (EBUSY);
295 	}
296 	mutex_exit(&mvp->v_lock);
297 
298 	/* make sure things are zeroed for errout: */
299 	rtvp = NULL;
300 	mi = NULL;
301 	addr.buf = NULL;
302 	syncaddr.buf = NULL;
303 	secdata = NULL;
304 
305 	/*
306 	 * A valid knetconfig structure is required.
307 	 */
308 	if (!(flags & NFSMNT_KNCONF))
309 		return (EINVAL);
310 
311 	/*
312 	 * Allocate a servinfo struct.
313 	 */
314 	svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
315 	mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL);
316 	if (svp_tail) {
317 		svp_2ndlast = svp_tail;
318 		svp_tail->sv_next = svp;
319 	} else {
320 		svp_head = svp;
321 		svp_2ndlast = svp;
322 	}
323 
324 	svp_tail = svp;
325 
326 	/*
327 	 * Allocate space for a knetconfig structure and
328 	 * its strings and copy in from user-land.
329 	 */
330 	knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP);
331 	svp->sv_knconf = knconf;
332 	STRUCT_INIT(knconf_tmp, get_udatamodel());
333 	if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp),
334 	    STRUCT_SIZE(knconf_tmp))) {
335 		sv_free(svp_head);
336 		return (EFAULT);
337 	}
338 
339 	knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics);
340 	knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly);
341 	knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto);
342 	if (get_udatamodel() != DATAMODEL_LP64) {
343 		knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev));
344 	} else {
345 		knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev);
346 	}
347 
348 	pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
349 	p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
350 	error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL);
351 	if (error) {
352 		kmem_free(pf, KNC_STRSIZE);
353 		kmem_free(p, KNC_STRSIZE);
354 		sv_free(svp_head);
355 		return (error);
356 	}
357 	error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL);
358 	if (error) {
359 		kmem_free(pf, KNC_STRSIZE);
360 		kmem_free(p, KNC_STRSIZE);
361 		sv_free(svp_head);
362 		return (error);
363 	}
364 	knconf->knc_protofmly = pf;
365 	knconf->knc_proto = p;
366 
367 	/*
368 	 * Get server address
369 	 */
370 	STRUCT_INIT(addr_tmp, get_udatamodel());
371 	if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp),
372 	    STRUCT_SIZE(addr_tmp))) {
373 		addr.buf = NULL;
374 		error = EFAULT;
375 	} else {
376 		char *userbufptr;
377 
378 		userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf);
379 		addr.len = STRUCT_FGET(addr_tmp, len);
380 		addr.buf = kmem_alloc(addr.len, KM_SLEEP);
381 		addr.maxlen = addr.len;
382 		if (copyin(userbufptr, addr.buf, addr.len))
383 			error = EFAULT;
384 	}
385 	svp->sv_addr = addr;
386 	if (error)
387 		goto errout;
388 
389 	/*
390 	 * Get the root fhandle
391 	 */
392 	if (copyin(STRUCT_FGETP(args, fh), &svp->sv_fhandle,
393 	    sizeof (svp->sv_fhandle))) {
394 		error = EFAULT;
395 		goto errout;
396 	}
397 
398 	/*
399 	 * Check the root fhandle length
400 	 */
401 	if (svp->sv_fhandle.fh_len > NFS3_FHSIZE) {
402 		error = EINVAL;
403 #ifdef DEBUG
404 		zcmn_err(getzoneid(), CE_WARN,
405 		    "nfs3_mount: got an invalid fhandle. fh_len = %d",
406 		    svp->sv_fhandle.fh_len);
407 		svp->sv_fhandle.fh_len = NFS_FHANDLE_LEN;
408 		nfs_printfhandle(&svp->sv_fhandle);
409 #endif
410 		goto errout;
411 	}
412 
413 	/*
414 	 * Get server's hostname
415 	 */
416 	if (flags & NFSMNT_HOSTNAME) {
417 		error = copyinstr(STRUCT_FGETP(args, hostname),
418 		    netname, sizeof (netname), &hlen);
419 		if (error)
420 			goto errout;
421 	} else {
422 		char *p = "unknown-host";
423 		hlen = strlen(p) + 1;
424 		(void) strcpy(netname, p);
425 	}
426 	svp->sv_hostnamelen = hlen;
427 	svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
428 	(void) strcpy(svp->sv_hostname, netname);
429 
430 	/*
431 	 * RDMA MOUNT SUPPORT FOR NFS v3:
432 	 * Establish, is it possible to use RDMA, if so overload the
433 	 * knconf with rdma specific knconf and free the orignal.
434 	 */
435 	if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) {
436 		/*
437 		 * Determine the addr type for RDMA, IPv4 or v6.
438 		 */
439 		if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0)
440 			addr_type = AF_INET;
441 		else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0)
442 			addr_type = AF_INET6;
443 
444 		if (rdma_reachable(addr_type, &svp->sv_addr,
445 			&rdma_knconf) == 0) {
446 			/*
447 			 * If successful, hijack the orignal knconf and
448 			 * replace with a new one, depending on the flags.
449 			 */
450 			svp->sv_origknconf = svp->sv_knconf;
451 			svp->sv_knconf = rdma_knconf;
452 			knconf = rdma_knconf;
453 		} else {
454 			if (flags & NFSMNT_TRYRDMA) {
455 #ifdef	DEBUG
456 				if (rdma_debug)
457 					zcmn_err(getzoneid(), CE_WARN,
458 					    "no RDMA onboard, revert\n");
459 #endif
460 			}
461 
462 			if (flags & NFSMNT_DORDMA) {
463 				/*
464 				 * If proto=rdma is specified and no RDMA
465 				 * path to this server is avialable then
466 				 * ditch this server.
467 				 * This is not included in the mountable
468 				 * server list or the replica list.
469 				 * Check if more servers are specified;
470 				 * Failover case, otherwise bail out of mount.
471 				 */
472 				if (STRUCT_FGET(args, nfs_args_ext) ==
473 				    NFS_ARGS_EXTB && STRUCT_FGETP(args,
474 					nfs_ext_u.nfs_extB.next) != NULL) {
475 					if (uap->flags & MS_RDONLY &&
476 					    !(flags & NFSMNT_SOFT)) {
477 						data = (char *)
478 						    STRUCT_FGETP(args,
479 						nfs_ext_u.nfs_extB.next);
480 						if (svp_head->sv_next == NULL) {
481 							svp_tail = NULL;
482 							svp_2ndlast = NULL;
483 							sv_free(svp_head);
484 							goto more;
485 						} else {
486 							svp_tail = svp_2ndlast;
487 							svp_2ndlast->sv_next =
488 							    NULL;
489 							sv_free(svp);
490 							goto more;
491 						}
492 					}
493 				} else {
494 					/*
495 					 * This is the last server specified
496 					 * in the nfs_args list passed down
497 					 * and its not rdma capable.
498 					 */
499 					if (svp_head->sv_next == NULL) {
500 						/*
501 						 * Is this the only one
502 						 */
503 						error = EINVAL;
504 #ifdef	DEBUG
505 						if (rdma_debug)
506 							zcmn_err(getzoneid(),
507 							    CE_WARN,
508 							    "No RDMA srv");
509 #endif
510 						goto errout;
511 					} else {
512 						/*
513 						 * There is list, since some
514 						 * servers specified before
515 						 * this passed all requirements
516 						 */
517 						svp_tail = svp_2ndlast;
518 						svp_2ndlast->sv_next = NULL;
519 						sv_free(svp);
520 						goto proceed;
521 					}
522 				}
523 			}
524 		}
525 	}
526 
527 	/*
528 	 * Get the extention data which has the new security data structure.
529 	 */
530 	if (flags & NFSMNT_NEWARGS) {
531 		switch (STRUCT_FGET(args, nfs_args_ext)) {
532 		case NFS_ARGS_EXTA:
533 		case NFS_ARGS_EXTB:
534 			/*
535 			 * Indicating the application is using the new
536 			 * sec_data structure to pass in the security
537 			 * data.
538 			 */
539 			if (STRUCT_FGETP(args,
540 			    nfs_ext_u.nfs_extA.secdata) == NULL) {
541 				error = EINVAL;
542 			} else {
543 				error = sec_clnt_loadinfo(
544 				    (struct sec_data *)STRUCT_FGETP(args,
545 					nfs_ext_u.nfs_extA.secdata),
546 				    &secdata, get_udatamodel());
547 			}
548 			break;
549 
550 		default:
551 			error = EINVAL;
552 			break;
553 		}
554 	} else if (flags & NFSMNT_SECURE) {
555 		/*
556 		 * Keep this for backward compatibility to support
557 		 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags.
558 		 */
559 		if (STRUCT_FGETP(args, syncaddr) == NULL) {
560 			error = EINVAL;
561 		} else {
562 			/*
563 			 * get time sync address.
564 			 */
565 			if (copyin(STRUCT_FGETP(args, syncaddr), &addr_tmp,
566 			    STRUCT_SIZE(addr_tmp))) {
567 				syncaddr.buf = NULL;
568 				error = EFAULT;
569 			} else {
570 				char *userbufptr;
571 
572 				userbufptr = syncaddr.buf =
573 				    STRUCT_FGETP(addr_tmp, buf);
574 				syncaddr.len =
575 				    STRUCT_FGET(addr_tmp, len);
576 				syncaddr.buf = kmem_alloc(syncaddr.len,
577 				    KM_SLEEP);
578 				syncaddr.maxlen = syncaddr.len;
579 
580 				if (copyin(userbufptr, syncaddr.buf,
581 				    syncaddr.len))
582 					error = EFAULT;
583 			}
584 
585 			/*
586 			 * get server's netname
587 			 */
588 			if (!error) {
589 				error = copyinstr(STRUCT_FGETP(args, netname),
590 				    netname, sizeof (netname), &nlen);
591 				netname[nlen] = '\0';
592 			}
593 
594 			if (error && syncaddr.buf != NULL) {
595 				kmem_free(syncaddr.buf, syncaddr.len);
596 				syncaddr.buf = NULL;
597 			}
598 		}
599 
600 		/*
601 		 * Move security related data to the sec_data structure.
602 		 */
603 		if (!error) {
604 			dh_k4_clntdata_t *data;
605 			char *pf, *p;
606 
607 			secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
608 			if (flags & NFSMNT_RPCTIMESYNC)
609 				secdata->flags |= AUTH_F_RPCTIMESYNC;
610 			data = kmem_alloc(sizeof (*data), KM_SLEEP);
611 			data->syncaddr = syncaddr;
612 
613 			/*
614 			 * duplicate the knconf information for the
615 			 * new opaque data.
616 			 */
617 			data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP);
618 			*data->knconf = *knconf;
619 			pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
620 			p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
621 			bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE);
622 			bcopy(knconf->knc_proto, pf, KNC_STRSIZE);
623 			data->knconf->knc_protofmly = pf;
624 			data->knconf->knc_proto = p;
625 
626 			/* move server netname to the sec_data structure */
627 			if (nlen != 0) {
628 				data->netname = kmem_alloc(nlen, KM_SLEEP);
629 				bcopy(netname, data->netname, nlen);
630 				data->netnamelen = (int)nlen;
631 			}
632 			secdata->secmod = secdata->rpcflavor = AUTH_DES;
633 			secdata->data = (caddr_t)data;
634 		}
635 	} else {
636 		secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
637 		secdata->secmod = secdata->rpcflavor = AUTH_UNIX;
638 		secdata->data = NULL;
639 	}
640 	svp->sv_secdata = secdata;
641 	if (error)
642 		goto errout;
643 
644 	/*
645 	 * See bug 1180236.
646 	 * If mount secure failed, we will fall back to AUTH_NONE
647 	 * and try again.  nfs3rootvp() will turn this back off.
648 	 *
649 	 * The NFS Version 3 mount uses the FSINFO and GETATTR
650 	 * procedures.  The server should not care if these procedures
651 	 * have the proper security flavor, so if mount retries using
652 	 * AUTH_NONE that does not require a credential setup for root
653 	 * then the automounter would work without requiring root to be
654 	 * keylogged into AUTH_DES.
655 	 */
656 	if (secdata->rpcflavor != AUTH_UNIX &&
657 	    secdata->rpcflavor != AUTH_LOOPBACK)
658 		secdata->flags |= AUTH_F_TRYNONE;
659 
660 	/*
661 	 * Failover support:
662 	 *
663 	 * We may have a linked list of nfs_args structures,
664 	 * which means the user is looking for failover.  If
665 	 * the mount is either not "read-only" or "soft",
666 	 * we want to bail out with EINVAL.
667 	 */
668 	if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB &&
669 	    STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) {
670 		if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) {
671 			data = (char *)STRUCT_FGETP(args,
672 			    nfs_ext_u.nfs_extB.next);
673 			goto more;
674 		}
675 		error = EINVAL;
676 		goto errout;
677 	}
678 
679 	/*
680 	 * Determine the zone we're being mounted into.
681 	 */
682 	if (getzoneid() == GLOBAL_ZONEID) {
683 		zone_t *mntzone;
684 
685 		mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
686 		ASSERT(mntzone != NULL);
687 		zone_rele(mntzone);
688 		if (mntzone != zone) {
689 			error = EBUSY;
690 			goto errout;
691 		}
692 	}
693 
694 	/*
695 	 * Stop the mount from going any further if the zone is going away.
696 	 */
697 	if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) {
698 		error = EBUSY;
699 		goto errout;
700 	}
701 
702 	/*
703 	 * Get root vnode.
704 	 */
705 proceed:
706 	error = nfs3rootvp(&rtvp, vfsp, svp_head, flags, cr, zone);
707 
708 	if (error)
709 		goto errout;
710 
711 	/*
712 	 * Set option fields in the mount info record
713 	 */
714 	mi = VTOMI(rtvp);
715 
716 	if (svp_head->sv_next)
717 		mi->mi_flags |= MI_LLOCK;
718 
719 	error = nfs_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args));
720 
721 errout:
722 	if (error) {
723 		if (rtvp != NULL) {
724 			rp = VTOR(rtvp);
725 			if (rp->r_flags & RHASHED)
726 				rp_rmhash(rp);
727 		}
728 		sv_free(svp_head);
729 		if (mi != NULL) {
730 			nfs_async_stop(vfsp);
731 			nfs_async_manager_stop(vfsp);
732 			if (mi->mi_io_kstats) {
733 				kstat_delete(mi->mi_io_kstats);
734 				mi->mi_io_kstats = NULL;
735 			}
736 			if (mi->mi_ro_kstats) {
737 				kstat_delete(mi->mi_ro_kstats);
738 				mi->mi_ro_kstats = NULL;
739 			}
740 			nfs_free_mi(mi);
741 		}
742 	}
743 
744 	if (rtvp != NULL)
745 		VN_RELE(rtvp);
746 
747 	return (error);
748 }
749 
750 static int nfs3_dynamic = 0;	/* global variable to enable dynamic retrans. */
751 static ushort_t nfs3_max_threads = 8;	/* max number of active async threads */
752 static uint_t nfs3_bsize = 32 * 1024;	/* client `block' size */
753 static uint_t nfs3_async_clusters = 1;	/* # of reqs from each async queue */
754 static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO;
755 
756 static int
757 nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp,
758 	int flags, cred_t *cr, zone_t *zone)
759 {
760 	vnode_t *rtvp;
761 	mntinfo_t *mi;
762 	dev_t nfs_dev;
763 	struct vattr va;
764 	struct FSINFO3args args;
765 	struct FSINFO3res res;
766 	int error;
767 	int douprintf;
768 	rnode_t *rp;
769 	int i;
770 	uint_t max_transfer_size;
771 	struct nfs_stats *nfsstatsp;
772 	cred_t *lcr = NULL, *tcr = cr;
773 
774 	nfsstatsp = zone_getspecific(nfsstat_zone_key, curproc->p_zone);
775 	ASSERT(nfsstatsp != NULL);
776 
777 	ASSERT(curproc->p_zone == zone);
778 	/*
779 	 * Create a mount record and link it to the vfs struct.
780 	 */
781 	mi = kmem_zalloc(sizeof (*mi), KM_SLEEP);
782 	mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL);
783 	mi->mi_flags = MI_ACL | MI_EXTATTR;
784 	if (!(flags & NFSMNT_SOFT))
785 		mi->mi_flags |= MI_HARD;
786 	if ((flags & NFSMNT_SEMISOFT))
787 		mi->mi_flags |= MI_SEMISOFT;
788 	if ((flags & NFSMNT_NOPRINT))
789 		mi->mi_flags |= MI_NOPRINT;
790 	if (flags & NFSMNT_INT)
791 		mi->mi_flags |= MI_INT;
792 	mi->mi_retrans = NFS_RETRIES;
793 	if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
794 	    svp->sv_knconf->knc_semantics == NC_TPI_COTS)
795 		mi->mi_timeo = nfs3_cots_timeo;
796 	else
797 		mi->mi_timeo = NFS_TIMEO;
798 	mi->mi_prog = NFS_PROGRAM;
799 	mi->mi_vers = NFS_V3;
800 	mi->mi_rfsnames = rfsnames_v3;
801 	mi->mi_reqs = nfsstatsp->nfs_stats_v3.rfsreqcnt_ptr;
802 	mi->mi_call_type = call_type_v3;
803 	mi->mi_ss_call_type = ss_call_type_v3;
804 	mi->mi_timer_type = timer_type_v3;
805 	mi->mi_aclnames = aclnames_v3;
806 	mi->mi_aclreqs = nfsstatsp->nfs_stats_v3.aclreqcnt_ptr;
807 	mi->mi_acl_call_type = acl_call_type_v3;
808 	mi->mi_acl_ss_call_type = acl_ss_call_type_v3;
809 	mi->mi_acl_timer_type = acl_timer_type_v3;
810 	cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL);
811 	mi->mi_servers = svp;
812 	mi->mi_curr_serv = svp;
813 	mi->mi_acregmin = SEC2HR(ACREGMIN);
814 	mi->mi_acregmax = SEC2HR(ACREGMAX);
815 	mi->mi_acdirmin = SEC2HR(ACDIRMIN);
816 	mi->mi_acdirmax = SEC2HR(ACDIRMAX);
817 
818 	if (nfs3_dynamic)
819 		mi->mi_flags |= MI_DYNAMIC;
820 
821 	if (flags & NFSMNT_DIRECTIO)
822 		mi->mi_flags |= MI_DIRECTIO;
823 
824 	/*
825 	 * Make a vfs struct for nfs.  We do this here instead of below
826 	 * because rtvp needs a vfs before we can do a getattr on it.
827 	 *
828 	 * Assign a unique device id to the mount
829 	 */
830 	mutex_enter(&nfs_minor_lock);
831 	do {
832 		nfs_minor = (nfs_minor + 1) & MAXMIN32;
833 		nfs_dev = makedevice(nfs_major, nfs_minor);
834 	} while (vfs_devismounted(nfs_dev));
835 	mutex_exit(&nfs_minor_lock);
836 
837 	vfsp->vfs_dev = nfs_dev;
838 	vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs3fstyp);
839 	vfsp->vfs_data = (caddr_t)mi;
840 	vfsp->vfs_fstype = nfsfstyp;
841 
842 	/*
843 	 * Verify that nfs3_bsize tuneable is set to an
844 	 * acceptable value.  It be a multiple of PAGESIZE or
845 	 * file corruption can occur.
846 	 */
847 	if (nfs3_bsize & PAGEOFFSET)
848 		nfs3_bsize &= PAGEMASK;
849 	if (nfs3_bsize < PAGESIZE)
850 		nfs3_bsize = PAGESIZE;
851 	vfsp->vfs_bsize = nfs3_bsize;
852 
853 	/*
854 	 * Initialize fields used to support async putpage operations.
855 	 */
856 	for (i = 0; i < NFS_ASYNC_TYPES; i++)
857 		mi->mi_async_clusters[i] = nfs3_async_clusters;
858 	mi->mi_async_init_clusters = nfs3_async_clusters;
859 	mi->mi_async_curr = &mi->mi_async_reqs[0];
860 	mi->mi_max_threads = nfs3_max_threads;
861 	mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL);
862 	cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL);
863 	cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL);
864 	cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL);
865 
866 	mi->mi_vfsp = vfsp;
867 	zone_hold(mi->mi_zone = zone);
868 	nfs_mi_zonelist_add(mi);
869 
870 	/*
871 	 * Make the root vnode, use it to get attributes,
872 	 * then remake it with the attributes.
873 	 */
874 	rtvp = makenfs3node((nfs_fh3 *)&svp->sv_fhandle,
875 	    NULL, vfsp, gethrtime(), cr, NULL, NULL);
876 
877 	/*
878 	 * Make the FSINFO calls, primarily at this point to
879 	 * determine the transfer size.  For client failover,
880 	 * we'll want this to be the minimum bid from any
881 	 * server, so that we don't overrun stated limits.
882 	 *
883 	 * While we're looping, we'll turn off AUTH_F_TRYNONE,
884 	 * which is only for the mount operation.
885 	 */
886 
887 	mi->mi_tsize = nfs3_tsize(svp->sv_knconf);
888 	mi->mi_stsize = mi->mi_tsize;
889 
890 	mi->mi_curread = nfs3_bsize;
891 	mi->mi_curwrite = mi->mi_curread;
892 
893 	/*
894 	 * If the uid is set then set the creds for secure mounts
895 	 * by proxy processes such as automountd.
896 	 */
897 	if (svp->sv_secdata->uid != 0 &&
898 	    svp->sv_secdata->rpcflavor == RPCSEC_GSS) {
899 		lcr = crdup(cr);
900 		(void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr));
901 		tcr = lcr;
902 	}
903 
904 	for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) {
905 		douprintf = 1;
906 		mi->mi_curr_serv = svp;
907 		max_transfer_size = nfs3_tsize(svp->sv_knconf);
908 		mi->mi_tsize = MIN(max_transfer_size, mi->mi_tsize);
909 		mi->mi_stsize = MIN(max_transfer_size, mi->mi_stsize);
910 		mi->mi_curread = MIN(max_transfer_size, mi->mi_curread);
911 		mi->mi_curwrite = MIN(max_transfer_size, mi->mi_curwrite);
912 		args.fsroot = *(nfs_fh3 *)&svp->sv_fhandle;
913 
914 		error = rfs3call(mi, NFSPROC3_FSINFO,
915 		    xdr_nfs_fh3, (caddr_t)&args,
916 		    xdr_FSINFO3res, (caddr_t)&res, tcr,
917 		    &douprintf, &res.status, 0, NULL);
918 		if (error)
919 			goto bad;
920 		error = geterrno3(res.status);
921 		if (error)
922 			goto bad;
923 
924 		/* get type of root node */
925 		if (res.resok.obj_attributes.attributes) {
926 			if (res.resok.obj_attributes.attr.type < NF3REG ||
927 			    res.resok.obj_attributes.attr.type > NF3FIFO) {
928 #ifdef DEBUG
929 				zcmn_err(getzoneid(), CE_WARN,
930 			    "NFS3 server %s returned a bad file type for root",
931 				    svp->sv_hostname);
932 #else
933 				zcmn_err(getzoneid(), CE_WARN,
934 			    "NFS server %s returned a bad file type for root",
935 				    svp->sv_hostname);
936 #endif
937 				error = EINVAL;
938 				goto bad;
939 			} else {
940 				if (rtvp->v_type != VNON &&
941 		rtvp->v_type != nf3_to_vt[res.resok.obj_attributes.attr.type]) {
942 #ifdef DEBUG
943 					zcmn_err(getzoneid(), CE_WARN,
944 		"NFS3 server %s returned a different file type for root",
945 					    svp->sv_hostname);
946 #else
947 					zcmn_err(getzoneid(), CE_WARN,
948 		"NFS server %s returned a different file type for root",
949 					    svp->sv_hostname);
950 #endif
951 					error = EINVAL;
952 					goto bad;
953 				}
954 				rtvp->v_type =
955 				nf3_to_vt[res.resok.obj_attributes.attr.type];
956 			}
957 		}
958 
959 		if (res.resok.rtmax != 0) {
960 			mi->mi_tsize = MIN(res.resok.rtmax, mi->mi_tsize);
961 			if (res.resok.rtpref != 0) {
962 				mi->mi_curread = MIN(res.resok.rtpref,
963 						    mi->mi_curread);
964 			} else {
965 				mi->mi_curread = MIN(res.resok.rtmax,
966 						    mi->mi_curread);
967 			}
968 		} else if (res.resok.rtpref != 0) {
969 			mi->mi_tsize = MIN(res.resok.rtpref, mi->mi_tsize);
970 			mi->mi_curread = MIN(res.resok.rtpref, mi->mi_curread);
971 		} else {
972 #ifdef DEBUG
973 			zcmn_err(getzoneid(), CE_WARN,
974 			    "NFS3 server %s returned 0 for read transfer sizes",
975 			    svp->sv_hostname);
976 #else
977 			zcmn_err(getzoneid(), CE_WARN,
978 			    "NFS server %s returned 0 for read transfer sizes",
979 			    svp->sv_hostname);
980 #endif
981 			error = EIO;
982 			goto bad;
983 		}
984 		if (res.resok.wtmax != 0) {
985 			mi->mi_stsize = MIN(res.resok.wtmax, mi->mi_stsize);
986 			if (res.resok.wtpref != 0) {
987 				mi->mi_curwrite = MIN(res.resok.wtpref,
988 						    mi->mi_curwrite);
989 			} else {
990 				mi->mi_curwrite = MIN(res.resok.wtmax,
991 						    mi->mi_curwrite);
992 			}
993 		} else if (res.resok.wtpref != 0) {
994 			mi->mi_stsize = MIN(res.resok.wtpref, mi->mi_stsize);
995 			mi->mi_curwrite = MIN(res.resok.wtpref,
996 					    mi->mi_curwrite);
997 		} else {
998 #ifdef DEBUG
999 			zcmn_err(getzoneid(), CE_WARN,
1000 			"NFS3 server %s returned 0 for write transfer sizes",
1001 			    svp->sv_hostname);
1002 #else
1003 			zcmn_err(getzoneid(), CE_WARN,
1004 			"NFS server %s returned 0 for write transfer sizes",
1005 			    svp->sv_hostname);
1006 #endif
1007 			error = EIO;
1008 			goto bad;
1009 		}
1010 
1011 		/*
1012 		 * These signal the ability of the server to create
1013 		 * hard links and symbolic links, so they really
1014 		 * aren't relevant if there is more than one server.
1015 		 * We'll set them here, though it probably looks odd.
1016 		 */
1017 		if (res.resok.properties & FSF3_LINK)
1018 			mi->mi_flags |= MI_LINK;
1019 		if (res.resok.properties & FSF3_SYMLINK)
1020 			mi->mi_flags |= MI_SYMLINK;
1021 
1022 		/* Pick up smallest non-zero maxfilesize value */
1023 		if (res.resok.maxfilesize) {
1024 			if (mi->mi_maxfilesize) {
1025 				mi->mi_maxfilesize = MIN(mi->mi_maxfilesize,
1026 							res.resok.maxfilesize);
1027 			} else
1028 				mi->mi_maxfilesize = res.resok.maxfilesize;
1029 		}
1030 
1031 		/*
1032 		 * AUTH_F_TRYNONE is only for the mount operation,
1033 		 * so turn it back off.
1034 		 */
1035 		svp->sv_secdata->flags &= ~AUTH_F_TRYNONE;
1036 	}
1037 	mi->mi_curr_serv = mi->mi_servers;
1038 
1039 	/*
1040 	 * Start the thread responsible for handling async worker threads.
1041 	 */
1042 	VFS_HOLD(vfsp);	/* add reference for thread */
1043 	mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager,
1044 					vfsp, 0, minclsyspri);
1045 	ASSERT(mi->mi_manager_thread != NULL);
1046 
1047 	/*
1048 	 * Initialize kstats
1049 	 */
1050 	nfs_mnt_kstat_init(vfsp);
1051 
1052 	/* If we didn't get a type, get one now */
1053 	if (rtvp->v_type == VNON) {
1054 		va.va_mask = AT_ALL;
1055 
1056 		error = nfs3getattr(rtvp, &va, tcr);
1057 		if (error)
1058 			goto bad;
1059 		rtvp->v_type = va.va_type;
1060 	}
1061 
1062 	mi->mi_type = rtvp->v_type;
1063 
1064 	*rtvpp = rtvp;
1065 	if (lcr != NULL)
1066 		crfree(lcr);
1067 
1068 	return (0);
1069 bad:
1070 	/*
1071 	 * An error occurred somewhere, need to clean up...
1072 	 * We need to release our reference to the root vnode and
1073 	 * destroy the mntinfo struct that we just created.
1074 	 */
1075 	if (lcr != NULL)
1076 		crfree(lcr);
1077 	rp = VTOR(rtvp);
1078 	if (rp->r_flags & RHASHED)
1079 		rp_rmhash(rp);
1080 	VN_RELE(rtvp);
1081 	nfs_async_stop(vfsp);
1082 	nfs_async_manager_stop(vfsp);
1083 	if (mi->mi_io_kstats) {
1084 		kstat_delete(mi->mi_io_kstats);
1085 		mi->mi_io_kstats = NULL;
1086 	}
1087 	if (mi->mi_ro_kstats) {
1088 		kstat_delete(mi->mi_ro_kstats);
1089 		mi->mi_ro_kstats = NULL;
1090 	}
1091 	nfs_free_mi(mi);
1092 	*rtvpp = NULL;
1093 	return (error);
1094 }
1095 
1096 /*
1097  * vfs operations
1098  */
1099 static int
1100 nfs3_unmount(vfs_t *vfsp, int flag, cred_t *cr)
1101 {
1102 	mntinfo_t *mi;
1103 	ushort_t omax;
1104 
1105 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
1106 		return (EPERM);
1107 
1108 	mi = VFTOMI(vfsp);
1109 	if (flag & MS_FORCE) {
1110 		vfsp->vfs_flag |= VFS_UNMOUNTED;
1111 		/*
1112 		 * We need to stop the manager thread explicitly; the worker
1113 		 * threads can time out and exit on their own.
1114 		 */
1115 		nfs_async_manager_stop(vfsp);
1116 		destroy_rtable(vfsp, cr);
1117 		if (mi->mi_io_kstats) {
1118 			kstat_delete(mi->mi_io_kstats);
1119 			mi->mi_io_kstats = NULL;
1120 		}
1121 		if (mi->mi_ro_kstats) {
1122 			kstat_delete(mi->mi_ro_kstats);
1123 			mi->mi_ro_kstats = NULL;
1124 		}
1125 		return (0);
1126 	}
1127 	/*
1128 	 * Wait until all asynchronous putpage operations on
1129 	 * this file system are complete before flushing rnodes
1130 	 * from the cache.
1131 	 */
1132 	omax = mi->mi_max_threads;
1133 	if (nfs_async_stop_sig(vfsp)) {
1134 		return (EINTR);
1135 	}
1136 	rflush(vfsp, cr);
1137 	/*
1138 	 * If there are any active vnodes on this file system,
1139 	 * then the file system is busy and can't be umounted.
1140 	 */
1141 	if (check_rtable(vfsp)) {
1142 		mutex_enter(&mi->mi_async_lock);
1143 		mi->mi_max_threads = omax;
1144 		mutex_exit(&mi->mi_async_lock);
1145 		return (EBUSY);
1146 	}
1147 	/*
1148 	 * The unmount can't fail from now on; stop the worker thread manager.
1149 	 */
1150 	nfs_async_manager_stop(vfsp);
1151 	/*
1152 	 * Destroy all rnodes belonging to this file system from the
1153 	 * rnode hash queues and purge any resources allocated to
1154 	 * them.
1155 	 */
1156 	destroy_rtable(vfsp, cr);
1157 	if (mi->mi_io_kstats) {
1158 		kstat_delete(mi->mi_io_kstats);
1159 		mi->mi_io_kstats = NULL;
1160 	}
1161 	if (mi->mi_ro_kstats) {
1162 		kstat_delete(mi->mi_ro_kstats);
1163 		mi->mi_ro_kstats = NULL;
1164 	}
1165 	return (0);
1166 }
1167 
1168 /*
1169  * find root of nfs
1170  */
1171 static int
1172 nfs3_root(vfs_t *vfsp, vnode_t **vpp)
1173 {
1174 	mntinfo_t *mi;
1175 	vnode_t *vp;
1176 	servinfo_t *svp;
1177 
1178 	mi = VFTOMI(vfsp);
1179 
1180 	if (curproc->p_zone != mi->mi_zone)
1181 		return (EPERM);
1182 
1183 	svp = mi->mi_curr_serv;
1184 	if (svp && (svp->sv_flags & SV_ROOT_STALE)) {
1185 		mutex_enter(&svp->sv_lock);
1186 		svp->sv_flags &= ~SV_ROOT_STALE;
1187 		mutex_exit(&svp->sv_lock);
1188 		return (ENOENT);
1189 	}
1190 
1191 	vp = makenfs3node((nfs_fh3 *)&mi->mi_curr_serv->sv_fhandle,
1192 	    NULL, vfsp, gethrtime(), CRED(), NULL, NULL);
1193 
1194 	if (VTOR(vp)->r_flags & RSTALE) {
1195 		VN_RELE(vp);
1196 		return (ENOENT);
1197 	}
1198 
1199 	ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type);
1200 
1201 	vp->v_type = mi->mi_type;
1202 
1203 	*vpp = vp;
1204 
1205 	return (0);
1206 }
1207 
1208 /*
1209  * Get file system statistics.
1210  */
1211 static int
1212 nfs3_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
1213 {
1214 	int error;
1215 	struct mntinfo *mi;
1216 	struct FSSTAT3args args;
1217 	struct FSSTAT3res res;
1218 	int douprintf;
1219 	failinfo_t fi;
1220 	vnode_t *vp;
1221 	cred_t *cr;
1222 	hrtime_t t;
1223 
1224 	mi = VFTOMI(vfsp);
1225 	if (curproc->p_zone != mi->mi_zone)
1226 		return (EPERM);
1227 	error = nfs3_root(vfsp, &vp);
1228 	if (error)
1229 		return (error);
1230 
1231 	cr = CRED();
1232 
1233 	args.fsroot = *VTOFH3(vp);
1234 	fi.vp = vp;
1235 	fi.fhp = (caddr_t)&args.fsroot;
1236 	fi.copyproc = nfs3copyfh;
1237 	fi.lookupproc = nfs3lookup;
1238 	fi.xattrdirproc = acl_getxattrdir3;
1239 
1240 	douprintf = 1;
1241 
1242 	t = gethrtime();
1243 
1244 	error = rfs3call(mi, NFSPROC3_FSSTAT,
1245 	    xdr_nfs_fh3, (caddr_t)&args,
1246 	    xdr_FSSTAT3res, (caddr_t)&res, cr,
1247 	    &douprintf, &res.status, 0, &fi);
1248 
1249 	if (error) {
1250 		VN_RELE(vp);
1251 		return (error);
1252 	}
1253 
1254 	error = geterrno3(res.status);
1255 	if (!error) {
1256 		nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr);
1257 		sbp->f_bsize = MAXBSIZE;
1258 		sbp->f_frsize = DEV_BSIZE;
1259 		/*
1260 		 * Allow -1 fields to pass through unconverted.  These
1261 		 * indicate "don't know" fields.
1262 		 */
1263 		if (res.resok.tbytes == (size3)-1)
1264 			sbp->f_blocks = (fsblkcnt64_t)res.resok.tbytes;
1265 		else {
1266 			sbp->f_blocks = (fsblkcnt64_t)
1267 			    (res.resok.tbytes / DEV_BSIZE);
1268 		}
1269 		if (res.resok.fbytes == (size3)-1)
1270 			sbp->f_bfree = (fsblkcnt64_t)res.resok.fbytes;
1271 		else {
1272 			sbp->f_bfree = (fsblkcnt64_t)
1273 			    (res.resok.fbytes / DEV_BSIZE);
1274 		}
1275 		if (res.resok.abytes == (size3)-1)
1276 			sbp->f_bavail = (fsblkcnt64_t)res.resok.abytes;
1277 		else {
1278 			sbp->f_bavail = (fsblkcnt64_t)
1279 			    (res.resok.abytes / DEV_BSIZE);
1280 		}
1281 		sbp->f_files = (fsfilcnt64_t)res.resok.tfiles;
1282 		sbp->f_ffree = (fsfilcnt64_t)res.resok.ffiles;
1283 		sbp->f_favail = (fsfilcnt64_t)res.resok.afiles;
1284 		sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0];
1285 		(void) strncpy(sbp->f_basetype,
1286 		    vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ);
1287 		sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
1288 		sbp->f_namemax = (ulong_t)-1;
1289 	} else {
1290 		nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr);
1291 		PURGE_STALE_FH(error, vp, cr);
1292 	}
1293 
1294 	VN_RELE(vp);
1295 
1296 	return (error);
1297 }
1298 
1299 static kmutex_t nfs3_syncbusy;
1300 
1301 /*
1302  * Flush dirty nfs files for file system vfsp.
1303  * If vfsp == NULL, all nfs files are flushed.
1304  */
1305 /* ARGSUSED */
1306 static int
1307 nfs3_sync(vfs_t *vfsp, short flag, cred_t *cr)
1308 {
1309 	/*
1310 	 * Cross-zone calls are OK here, since this translates to a
1311 	 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone.
1312 	 */
1313 	if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs3_syncbusy) != 0) {
1314 		rflush(vfsp, cr);
1315 		mutex_exit(&nfs3_syncbusy);
1316 	}
1317 	return (0);
1318 }
1319 
1320 /* ARGSUSED */
1321 static int
1322 nfs3_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1323 {
1324 	int error;
1325 	nfs_fh3 fh;
1326 	vnode_t *vp;
1327 	struct vattr va;
1328 
1329 	if (fidp->fid_len > NFS3_FHSIZE) {
1330 		*vpp = NULL;
1331 		return (ESTALE);
1332 	}
1333 
1334 	if (curproc->p_zone != VFTOMI(vfsp)->mi_zone)
1335 		return (EPERM);
1336 	fh.fh3_length = fidp->fid_len;
1337 	bcopy(fidp->fid_data, fh.fh3_u.data, fh.fh3_length);
1338 
1339 	vp = makenfs3node(&fh, NULL, vfsp, gethrtime(), CRED(), NULL, NULL);
1340 
1341 	if (VTOR(vp)->r_flags & RSTALE) {
1342 		VN_RELE(vp);
1343 		*vpp = NULL;
1344 		return (ENOENT);
1345 	}
1346 
1347 	if (vp->v_type == VNON) {
1348 		va.va_mask = AT_ALL;
1349 		error = nfs3getattr(vp, &va, CRED());
1350 		if (error) {
1351 			VN_RELE(vp);
1352 			*vpp = NULL;
1353 			return (error);
1354 		}
1355 		vp->v_type = va.va_type;
1356 	}
1357 
1358 	*vpp = vp;
1359 
1360 	return (0);
1361 }
1362 
1363 /* ARGSUSED */
1364 static int
1365 nfs3_mountroot(vfs_t *vfsp, whymountroot_t why)
1366 {
1367 	vnode_t *rtvp;
1368 	char root_hostname[SYS_NMLN+1];
1369 	struct servinfo *svp;
1370 	int error;
1371 	int vfsflags;
1372 	size_t size;
1373 	char *root_path;
1374 	struct pathname pn;
1375 	char *name;
1376 	cred_t *cr;
1377 	struct nfs_args args;		/* nfs mount arguments */
1378 	static char token[10];
1379 
1380 	bzero(&args, sizeof (args));
1381 
1382 	/* do this BEFORE getfile which causes xid stamps to be initialized */
1383 	clkset(-1L);		/* hack for now - until we get time svc? */
1384 
1385 	if (why == ROOT_REMOUNT) {
1386 		/*
1387 		 * Shouldn't happen.
1388 		 */
1389 		panic("nfs3_mountroot: why == ROOT_REMOUNT");
1390 	}
1391 
1392 	if (why == ROOT_UNMOUNT) {
1393 		/*
1394 		 * Nothing to do for NFS.
1395 		 */
1396 		return (0);
1397 	}
1398 
1399 	/*
1400 	 * why == ROOT_INIT
1401 	 */
1402 
1403 	name = token;
1404 	*name = 0;
1405 	getfsname("root", name, sizeof (token));
1406 
1407 	pn_alloc(&pn);
1408 	root_path = pn.pn_path;
1409 
1410 	svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
1411 	svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
1412 	svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1413 	svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1414 
1415 	/*
1416 	 * Get server address
1417 	 * Get the root fhandle
1418 	 * Get server's transport
1419 	 * Get server's hostname
1420 	 * Get options
1421 	 */
1422 	args.addr = &svp->sv_addr;
1423 	args.fh = (char *)&svp->sv_fhandle;
1424 	args.knconf = svp->sv_knconf;
1425 	args.hostname = root_hostname;
1426 	vfsflags = 0;
1427 	if (error = mount_root(*name ? name : "root", root_path, NFS_V3,
1428 	    &args, &vfsflags)) {
1429 		if (error == EPROTONOSUPPORT)
1430 			nfs_cmn_err(error, CE_WARN, "nfs3_mountroot: "
1431 			    "mount_root failed: server doesn't support NFS V3");
1432 		else
1433 			nfs_cmn_err(error, CE_WARN,
1434 			    "nfs3_mountroot: mount_root failed: %m");
1435 		sv_free(svp);
1436 		pn_free(&pn);
1437 		return (error);
1438 	}
1439 	svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1);
1440 	svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
1441 	(void) strcpy(svp->sv_hostname, root_hostname);
1442 
1443 	/*
1444 	 * Force root partition to always be mounted with AUTH_UNIX for now
1445 	 */
1446 	svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP);
1447 	svp->sv_secdata->secmod = AUTH_UNIX;
1448 	svp->sv_secdata->rpcflavor = AUTH_UNIX;
1449 	svp->sv_secdata->data = NULL;
1450 
1451 	cr = crgetcred();
1452 	rtvp = NULL;
1453 
1454 	error = nfs3rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone);
1455 
1456 	crfree(cr);
1457 
1458 	if (error) {
1459 		pn_free(&pn);
1460 		goto errout;
1461 	}
1462 
1463 	error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args);
1464 	if (error) {
1465 		nfs_cmn_err(error, CE_WARN,
1466 		    "nfs3_mountroot: invalid root mount options");
1467 		pn_free(&pn);
1468 		goto errout;
1469 	}
1470 
1471 	(void) vfs_lock_wait(vfsp);
1472 	vfs_add(NULL, vfsp, vfsflags);
1473 	vfs_unlock(vfsp);
1474 
1475 	size = strlen(svp->sv_hostname);
1476 	(void) strcpy(rootfs.bo_name, svp->sv_hostname);
1477 	rootfs.bo_name[size] = ':';
1478 	(void) strcpy(&rootfs.bo_name[size + 1], root_path);
1479 
1480 	pn_free(&pn);
1481 
1482 errout:
1483 	if (error) {
1484 		sv_free(svp);
1485 		nfs_async_stop(vfsp);
1486 		nfs_async_manager_stop(vfsp);
1487 	}
1488 
1489 	if (rtvp != NULL)
1490 		VN_RELE(rtvp);
1491 
1492 	return (error);
1493 }
1494 
1495 /*
1496  * Initialization routine for VFS routines.  Should only be called once
1497  */
1498 int
1499 nfs3_vfsinit(void)
1500 {
1501 	mutex_init(&nfs3_syncbusy, NULL, MUTEX_DEFAULT, NULL);
1502 	return (0);
1503 }
1504 
1505 void
1506 nfs3_vfsfini(void)
1507 {
1508 	mutex_destroy(&nfs3_syncbusy);
1509 }
1510 
1511 void
1512 nfs3_freevfs(vfs_t *vfsp)
1513 {
1514 	mntinfo_t *mi;
1515 	servinfo_t *svp;
1516 
1517 	/* free up the resources */
1518 	mi = VFTOMI(vfsp);
1519 	svp = mi->mi_servers;
1520 	mi->mi_servers = mi->mi_curr_serv = NULL;
1521 	sv_free(svp);
1522 
1523 	/*
1524 	 * By this time we should have already deleted the
1525 	 * mi kstats in the unmount code. If they are still around
1526 	 * somethings wrong
1527 	 */
1528 	ASSERT(mi->mi_io_kstats == NULL);
1529 	nfs_free_mi(mi);
1530 }
1531