xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision cbd30a72ca196976c1c700400ecd424baa1b9c16)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern int nfsrv_useacl;
82 extern int nfscl_debuglevel;
83 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
84 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
85 extern struct mtx ncl_iod_mutex;
86 NFSCLSTATEMUTEX;
87 
88 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
89 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
90 
91 SYSCTL_DECL(_vfs_nfs);
92 static int nfs_ip_paranoia = 1;
93 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
94     &nfs_ip_paranoia, 0, "");
95 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
96 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
97         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
98 /* how long between console messages "nfs server foo not responding" */
99 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
100 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
101         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
102 #ifdef NFS_DEBUG
103 int nfs_debug;
104 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
105     "Toggle debug flag");
106 #endif
107 
108 static int	nfs_mountroot(struct mount *);
109 static void	nfs_sec_name(char *, int *);
110 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
111 		    struct nfs_args *argp, const char *, struct ucred *,
112 		    struct thread *);
113 static int	mountnfs(struct nfs_args *, struct mount *,
114 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
115 		    u_char *, int, struct vnode **, struct ucred *,
116 		    struct thread *, int, int, int);
117 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
118 		    struct sockaddr_storage *, int *, off_t *,
119 		    struct timeval *);
120 static vfs_mount_t nfs_mount;
121 static vfs_cmount_t nfs_cmount;
122 static vfs_unmount_t nfs_unmount;
123 static vfs_root_t nfs_root;
124 static vfs_statfs_t nfs_statfs;
125 static vfs_sync_t nfs_sync;
126 static vfs_sysctl_t nfs_sysctl;
127 static vfs_purge_t nfs_purge;
128 
129 /*
130  * nfs vfs operations.
131  */
132 static struct vfsops nfs_vfsops = {
133 	.vfs_init =		ncl_init,
134 	.vfs_mount =		nfs_mount,
135 	.vfs_cmount =		nfs_cmount,
136 	.vfs_root =		nfs_root,
137 	.vfs_statfs =		nfs_statfs,
138 	.vfs_sync =		nfs_sync,
139 	.vfs_uninit =		ncl_uninit,
140 	.vfs_unmount =		nfs_unmount,
141 	.vfs_sysctl =		nfs_sysctl,
142 	.vfs_purge =		nfs_purge,
143 };
144 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
145 
146 /* So that loader and kldload(2) can find us, wherever we are.. */
147 MODULE_VERSION(nfs, 1);
148 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
149 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
150 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
152 
153 /*
154  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
155  * can be shared by both NFS clients. It is declared here so that it
156  * will be defined for kernels built without NFS_ROOT, although it
157  * isn't used in that case.
158  */
159 #if !defined(NFS_ROOT)
160 struct nfs_diskless	nfs_diskless = { { { 0 } } };
161 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
162 int			nfs_diskless_valid = 0;
163 #endif
164 
165 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
166     &nfs_diskless_valid, 0,
167     "Has the diskless struct been filled correctly");
168 
169 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
170     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
171 
172 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
173     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
174     "%Ssockaddr_in", "Diskless root nfs address");
175 
176 
177 void		newnfsargs_ntoh(struct nfs_args *);
178 static int	nfs_mountdiskless(char *,
179 		    struct sockaddr_in *, struct nfs_args *,
180 		    struct thread *, struct vnode **, struct mount *);
181 static void	nfs_convert_diskless(void);
182 static void	nfs_convert_oargs(struct nfs_args *args,
183 		    struct onfs_args *oargs);
184 
185 int
186 newnfs_iosize(struct nfsmount *nmp)
187 {
188 	int iosize, maxio;
189 
190 	/* First, set the upper limit for iosize */
191 	if (nmp->nm_flag & NFSMNT_NFSV4) {
192 		maxio = NFS_MAXBSIZE;
193 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
194 		if (nmp->nm_sotype == SOCK_DGRAM)
195 			maxio = NFS_MAXDGRAMDATA;
196 		else
197 			maxio = NFS_MAXBSIZE;
198 	} else {
199 		maxio = NFS_V2MAXDATA;
200 	}
201 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
202 		nmp->nm_rsize = maxio;
203 	if (nmp->nm_rsize > NFS_MAXBSIZE)
204 		nmp->nm_rsize = NFS_MAXBSIZE;
205 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
206 		nmp->nm_readdirsize = maxio;
207 	if (nmp->nm_readdirsize > nmp->nm_rsize)
208 		nmp->nm_readdirsize = nmp->nm_rsize;
209 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
210 		nmp->nm_wsize = maxio;
211 	if (nmp->nm_wsize > NFS_MAXBSIZE)
212 		nmp->nm_wsize = NFS_MAXBSIZE;
213 
214 	/*
215 	 * Calculate the size used for io buffers.  Use the larger
216 	 * of the two sizes to minimise nfs requests but make sure
217 	 * that it is at least one VM page to avoid wasting buffer
218 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
219 	 * that is the buffer size used for directories.
220 	 */
221 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
222 	iosize = imax(iosize, PAGE_SIZE);
223 	iosize = imax(iosize, NFS_DIRBLKSIZ);
224 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
225 	return (iosize);
226 }
227 
228 static void
229 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
230 {
231 
232 	args->version = NFS_ARGSVERSION;
233 	args->addr = oargs->addr;
234 	args->addrlen = oargs->addrlen;
235 	args->sotype = oargs->sotype;
236 	args->proto = oargs->proto;
237 	args->fh = oargs->fh;
238 	args->fhsize = oargs->fhsize;
239 	args->flags = oargs->flags;
240 	args->wsize = oargs->wsize;
241 	args->rsize = oargs->rsize;
242 	args->readdirsize = oargs->readdirsize;
243 	args->timeo = oargs->timeo;
244 	args->retrans = oargs->retrans;
245 	args->readahead = oargs->readahead;
246 	args->hostname = oargs->hostname;
247 }
248 
249 static void
250 nfs_convert_diskless(void)
251 {
252 
253 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
254 		sizeof(struct ifaliasreq));
255 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
256 		sizeof(struct sockaddr_in));
257 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
258 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
259 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
260 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
261 	} else {
262 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
263 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
264 	}
265 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
266 		sizeof(struct sockaddr_in));
267 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
268 	nfsv3_diskless.root_time = nfs_diskless.root_time;
269 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
270 		MAXHOSTNAMELEN);
271 	nfs_diskless_valid = 3;
272 }
273 
274 /*
275  * nfs statfs call
276  */
277 static int
278 nfs_statfs(struct mount *mp, struct statfs *sbp)
279 {
280 	struct vnode *vp;
281 	struct thread *td;
282 	struct nfsmount *nmp = VFSTONFS(mp);
283 	struct nfsvattr nfsva;
284 	struct nfsfsinfo fs;
285 	struct nfsstatfs sb;
286 	int error = 0, attrflag, gotfsinfo = 0, ret;
287 	struct nfsnode *np;
288 
289 	td = curthread;
290 
291 	error = vfs_busy(mp, MBF_NOWAIT);
292 	if (error)
293 		return (error);
294 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
295 	if (error) {
296 		vfs_unbusy(mp);
297 		return (error);
298 	}
299 	vp = NFSTOV(np);
300 	mtx_lock(&nmp->nm_mtx);
301 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
302 		mtx_unlock(&nmp->nm_mtx);
303 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
304 		    &attrflag, NULL);
305 		if (!error)
306 			gotfsinfo = 1;
307 	} else
308 		mtx_unlock(&nmp->nm_mtx);
309 	if (!error)
310 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
311 		    &attrflag, NULL);
312 	if (error != 0)
313 		NFSCL_DEBUG(2, "statfs=%d\n", error);
314 	if (attrflag == 0) {
315 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
316 		    td->td_ucred, td, &nfsva, NULL, NULL);
317 		if (ret) {
318 			/*
319 			 * Just set default values to get things going.
320 			 */
321 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
322 			nfsva.na_vattr.va_type = VDIR;
323 			nfsva.na_vattr.va_mode = 0777;
324 			nfsva.na_vattr.va_nlink = 100;
325 			nfsva.na_vattr.va_uid = (uid_t)0;
326 			nfsva.na_vattr.va_gid = (gid_t)0;
327 			nfsva.na_vattr.va_fileid = 2;
328 			nfsva.na_vattr.va_gen = 1;
329 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
330 			nfsva.na_vattr.va_size = 512 * 1024;
331 		}
332 	}
333 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
334 	if (!error) {
335 	    mtx_lock(&nmp->nm_mtx);
336 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
337 		nfscl_loadfsinfo(nmp, &fs);
338 	    nfscl_loadsbinfo(nmp, &sb, sbp);
339 	    sbp->f_iosize = newnfs_iosize(nmp);
340 	    mtx_unlock(&nmp->nm_mtx);
341 	    if (sbp != &mp->mnt_stat) {
342 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
343 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
344 	    }
345 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
346 	} else if (NFS_ISV4(vp)) {
347 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
348 	}
349 	vput(vp);
350 	vfs_unbusy(mp);
351 	return (error);
352 }
353 
354 /*
355  * nfs version 3 fsinfo rpc call
356  */
357 int
358 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
359     struct thread *td)
360 {
361 	struct nfsfsinfo fs;
362 	struct nfsvattr nfsva;
363 	int error, attrflag;
364 
365 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
366 	if (!error) {
367 		if (attrflag)
368 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
369 			    1);
370 		mtx_lock(&nmp->nm_mtx);
371 		nfscl_loadfsinfo(nmp, &fs);
372 		mtx_unlock(&nmp->nm_mtx);
373 	}
374 	return (error);
375 }
376 
377 /*
378  * Mount a remote root fs via. nfs. This depends on the info in the
379  * nfs_diskless structure that has been filled in properly by some primary
380  * bootstrap.
381  * It goes something like this:
382  * - do enough of "ifconfig" by calling ifioctl() so that the system
383  *   can talk to the server
384  * - If nfs_diskless.mygateway is filled in, use that address as
385  *   a default gateway.
386  * - build the rootfs mount point and call mountnfs() to do the rest.
387  *
388  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
389  * structure, as well as other global NFS client variables here, as
390  * nfs_mountroot() will be called once in the boot before any other NFS
391  * client activity occurs.
392  */
393 static int
394 nfs_mountroot(struct mount *mp)
395 {
396 	struct thread *td = curthread;
397 	struct nfsv3_diskless *nd = &nfsv3_diskless;
398 	struct socket *so;
399 	struct vnode *vp;
400 	struct ifreq ir;
401 	int error;
402 	u_long l;
403 	char buf[128];
404 	char *cp;
405 
406 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
407 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
408 #elif defined(NFS_ROOT)
409 	nfs_setup_diskless();
410 #endif
411 
412 	if (nfs_diskless_valid == 0)
413 		return (-1);
414 	if (nfs_diskless_valid == 1)
415 		nfs_convert_diskless();
416 
417 	/*
418 	 * XXX splnet, so networks will receive...
419 	 */
420 	splnet();
421 
422 	/*
423 	 * Do enough of ifconfig(8) so that the critical net interface can
424 	 * talk to the server.
425 	 */
426 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
427 	    td->td_ucred, td);
428 	if (error)
429 		panic("nfs_mountroot: socreate(%04x): %d",
430 			nd->myif.ifra_addr.sa_family, error);
431 
432 #if 0 /* XXX Bad idea */
433 	/*
434 	 * We might not have been told the right interface, so we pass
435 	 * over the first ten interfaces of the same kind, until we get
436 	 * one of them configured.
437 	 */
438 
439 	for (i = strlen(nd->myif.ifra_name) - 1;
440 		nd->myif.ifra_name[i] >= '0' &&
441 		nd->myif.ifra_name[i] <= '9';
442 		nd->myif.ifra_name[i] ++) {
443 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
444 		if(!error)
445 			break;
446 	}
447 #endif
448 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
449 	if (error)
450 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
451 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
452 		ir.ifr_mtu = strtol(cp, NULL, 10);
453 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
454 		freeenv(cp);
455 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
456 		if (error)
457 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
458 	}
459 	soclose(so);
460 
461 	/*
462 	 * If the gateway field is filled in, set it as the default route.
463 	 * Note that pxeboot will set a default route of 0 if the route
464 	 * is not set by the DHCP server.  Check also for a value of 0
465 	 * to avoid panicking inappropriately in that situation.
466 	 */
467 	if (nd->mygateway.sin_len != 0 &&
468 	    nd->mygateway.sin_addr.s_addr != 0) {
469 		struct sockaddr_in mask, sin;
470 
471 		bzero((caddr_t)&mask, sizeof(mask));
472 		sin = mask;
473 		sin.sin_family = AF_INET;
474 		sin.sin_len = sizeof(sin);
475                 /* XXX MRT use table 0 for this sort of thing */
476 		CURVNET_SET(TD_TO_VNET(td));
477 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
478 		    (struct sockaddr *)&nd->mygateway,
479 		    (struct sockaddr *)&mask,
480 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
481 		CURVNET_RESTORE();
482 		if (error)
483 			panic("nfs_mountroot: RTM_ADD: %d", error);
484 	}
485 
486 	/*
487 	 * Create the rootfs mount point.
488 	 */
489 	nd->root_args.fh = nd->root_fh;
490 	nd->root_args.fhsize = nd->root_fhsize;
491 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
492 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
493 		(l >> 24) & 0xff, (l >> 16) & 0xff,
494 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
495 	printf("NFS ROOT: %s\n", buf);
496 	nd->root_args.hostname = buf;
497 	if ((error = nfs_mountdiskless(buf,
498 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
499 		return (error);
500 	}
501 
502 	/*
503 	 * This is not really an nfs issue, but it is much easier to
504 	 * set hostname here and then let the "/etc/rc.xxx" files
505 	 * mount the right /var based upon its preset value.
506 	 */
507 	mtx_lock(&prison0.pr_mtx);
508 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
509 	    sizeof(prison0.pr_hostname));
510 	mtx_unlock(&prison0.pr_mtx);
511 	inittodr(ntohl(nd->root_time));
512 	return (0);
513 }
514 
515 /*
516  * Internal version of mount system call for diskless setup.
517  */
518 static int
519 nfs_mountdiskless(char *path,
520     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
521     struct vnode **vpp, struct mount *mp)
522 {
523 	struct sockaddr *nam;
524 	int dirlen, error;
525 	char *dirpath;
526 
527 	/*
528 	 * Find the directory path in "path", which also has the server's
529 	 * name/ip address in it.
530 	 */
531 	dirpath = strchr(path, ':');
532 	if (dirpath != NULL)
533 		dirlen = strlen(++dirpath);
534 	else
535 		dirlen = 0;
536 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
537 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
538 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
539 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
540 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
541 		return (error);
542 	}
543 	return (0);
544 }
545 
546 static void
547 nfs_sec_name(char *sec, int *flagsp)
548 {
549 	if (!strcmp(sec, "krb5"))
550 		*flagsp |= NFSMNT_KERB;
551 	else if (!strcmp(sec, "krb5i"))
552 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
553 	else if (!strcmp(sec, "krb5p"))
554 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
555 }
556 
557 static void
558 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
559     const char *hostname, struct ucred *cred, struct thread *td)
560 {
561 	int s;
562 	int adjsock;
563 	char *p;
564 
565 	s = splnet();
566 
567 	/*
568 	 * Set read-only flag if requested; otherwise, clear it if this is
569 	 * an update.  If this is not an update, then either the read-only
570 	 * flag is already clear, or this is a root mount and it was set
571 	 * intentionally at some previous point.
572 	 */
573 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
574 		MNT_ILOCK(mp);
575 		mp->mnt_flag |= MNT_RDONLY;
576 		MNT_IUNLOCK(mp);
577 	} else if (mp->mnt_flag & MNT_UPDATE) {
578 		MNT_ILOCK(mp);
579 		mp->mnt_flag &= ~MNT_RDONLY;
580 		MNT_IUNLOCK(mp);
581 	}
582 
583 	/*
584 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
585 	 * no sense in that context.  Also, set up appropriate retransmit
586 	 * and soft timeout behavior.
587 	 */
588 	if (argp->sotype == SOCK_STREAM) {
589 		nmp->nm_flag &= ~NFSMNT_NOCONN;
590 		nmp->nm_timeo = NFS_MAXTIMEO;
591 		if ((argp->flags & NFSMNT_NFSV4) != 0)
592 			nmp->nm_retry = INT_MAX;
593 		else
594 			nmp->nm_retry = NFS_RETRANS_TCP;
595 	}
596 
597 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
598 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
599 		argp->flags &= ~NFSMNT_RDIRPLUS;
600 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
601 	}
602 
603 	/* Re-bind if rsrvd port requested and wasn't on one */
604 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
605 		  && (argp->flags & NFSMNT_RESVPORT);
606 	/* Also re-bind if we're switching to/from a connected UDP socket */
607 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
608 		    (argp->flags & NFSMNT_NOCONN));
609 
610 	/* Update flags atomically.  Don't change the lock bits. */
611 	nmp->nm_flag = argp->flags | nmp->nm_flag;
612 	splx(s);
613 
614 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
615 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
616 		if (nmp->nm_timeo < NFS_MINTIMEO)
617 			nmp->nm_timeo = NFS_MINTIMEO;
618 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
619 			nmp->nm_timeo = NFS_MAXTIMEO;
620 	}
621 
622 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
623 		nmp->nm_retry = argp->retrans;
624 		if (nmp->nm_retry > NFS_MAXREXMIT)
625 			nmp->nm_retry = NFS_MAXREXMIT;
626 	}
627 
628 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
629 		nmp->nm_wsize = argp->wsize;
630 		/*
631 		 * Clip at the power of 2 below the size. There is an
632 		 * issue (not isolated) that causes intermittent page
633 		 * faults if this is not done.
634 		 */
635 		if (nmp->nm_wsize > NFS_FABLKSIZE)
636 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
637 		else
638 			nmp->nm_wsize = NFS_FABLKSIZE;
639 	}
640 
641 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
642 		nmp->nm_rsize = argp->rsize;
643 		/*
644 		 * Clip at the power of 2 below the size. There is an
645 		 * issue (not isolated) that causes intermittent page
646 		 * faults if this is not done.
647 		 */
648 		if (nmp->nm_rsize > NFS_FABLKSIZE)
649 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
650 		else
651 			nmp->nm_rsize = NFS_FABLKSIZE;
652 	}
653 
654 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
655 		nmp->nm_readdirsize = argp->readdirsize;
656 	}
657 
658 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
659 		nmp->nm_acregmin = argp->acregmin;
660 	else
661 		nmp->nm_acregmin = NFS_MINATTRTIMO;
662 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
663 		nmp->nm_acregmax = argp->acregmax;
664 	else
665 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
666 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
667 		nmp->nm_acdirmin = argp->acdirmin;
668 	else
669 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
670 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
671 		nmp->nm_acdirmax = argp->acdirmax;
672 	else
673 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
674 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
675 		nmp->nm_acdirmin = nmp->nm_acdirmax;
676 	if (nmp->nm_acregmin > nmp->nm_acregmax)
677 		nmp->nm_acregmin = nmp->nm_acregmax;
678 
679 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
680 		if (argp->readahead <= NFS_MAXRAHEAD)
681 			nmp->nm_readahead = argp->readahead;
682 		else
683 			nmp->nm_readahead = NFS_MAXRAHEAD;
684 	}
685 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
686 		if (argp->wcommitsize < nmp->nm_wsize)
687 			nmp->nm_wcommitsize = nmp->nm_wsize;
688 		else
689 			nmp->nm_wcommitsize = argp->wcommitsize;
690 	}
691 
692 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
693 		    (nmp->nm_soproto != argp->proto));
694 
695 	if (nmp->nm_client != NULL && adjsock) {
696 		int haslock = 0, error = 0;
697 
698 		if (nmp->nm_sotype == SOCK_STREAM) {
699 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
700 			if (!error)
701 				haslock = 1;
702 		}
703 		if (!error) {
704 		    newnfs_disconnect(&nmp->nm_sockreq);
705 		    if (haslock)
706 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
707 		    nmp->nm_sotype = argp->sotype;
708 		    nmp->nm_soproto = argp->proto;
709 		    if (nmp->nm_sotype == SOCK_DGRAM)
710 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
711 			    cred, td, 0)) {
712 				printf("newnfs_args: retrying connect\n");
713 				(void) nfs_catnap(PSOCK, 0, "nfscon");
714 			}
715 		}
716 	} else {
717 		nmp->nm_sotype = argp->sotype;
718 		nmp->nm_soproto = argp->proto;
719 	}
720 
721 	if (hostname != NULL) {
722 		strlcpy(nmp->nm_hostname, hostname,
723 		    sizeof(nmp->nm_hostname));
724 		p = strchr(nmp->nm_hostname, ':');
725 		if (p != NULL)
726 			*p = '\0';
727 	}
728 }
729 
730 static const char *nfs_opts[] = { "from", "nfs_args",
731     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
732     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
733     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
734     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
735     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
736     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
737     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
738     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
739     "pnfs", "wcommitsize",
740     NULL };
741 
742 /*
743  * Parse the "from" mountarg, passed by the generic mount(8) program
744  * or the mountroot code.  This is used when rerooting into NFS.
745  *
746  * Note that the "hostname" is actually a "hostname:/share/path" string.
747  */
748 static int
749 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
750     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
751 {
752 	char *nam, *delimp, *hostp, *spec;
753 	int error, have_bracket = 0, offset, rv, speclen;
754 	struct sockaddr_in *sin;
755 	size_t len;
756 
757 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
758 	if (error != 0)
759 		return (error);
760 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
761 
762 	/*
763 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
764 	 */
765 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
766 	    *(delimp + 1) == ':') {
767 		hostp = spec + 1;
768 		spec = delimp + 2;
769 		have_bracket = 1;
770 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
771 		hostp = spec;
772 		spec = delimp + 1;
773 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
774 		printf("%s: path@server syntax is deprecated, "
775 		    "use server:path\n", __func__);
776 		hostp = delimp + 1;
777 	} else {
778 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
779 		free(nam, M_TEMP);
780 		return (EINVAL);
781 	}
782 	*delimp = '\0';
783 
784 	/*
785 	 * If there has been a trailing slash at mounttime it seems
786 	 * that some mountd implementations fail to remove the mount
787 	 * entries from their mountlist while unmounting.
788 	 */
789 	for (speclen = strlen(spec);
790 	    speclen > 1 && spec[speclen - 1] == '/';
791 	    speclen--)
792 		spec[speclen - 1] = '\0';
793 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
794 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
795 		free(nam, M_TEMP);
796 		return (EINVAL);
797 	}
798 	/* Make both '@' and ':' notations equal */
799 	if (*hostp != '\0') {
800 		len = strlen(hostp);
801 		offset = 0;
802 		if (have_bracket)
803 			nam[offset++] = '[';
804 		memmove(nam + offset, hostp, len);
805 		if (have_bracket)
806 			nam[len + offset++] = ']';
807 		nam[len + offset++] = ':';
808 		memmove(nam + len + offset, spec, speclen);
809 		nam[len + speclen + offset] = '\0';
810 	} else
811 		nam[0] = '\0';
812 
813 	/*
814 	 * XXX: IPv6
815 	 */
816 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
817 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
818 	if (rv != 1) {
819 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
820 		    __func__, hostp, rv);
821 		free(nam, M_TEMP);
822 		free(sin, M_SONAME);
823 		return (EINVAL);
824 	}
825 
826 	sin->sin_len = sizeof(*sin);
827 	sin->sin_family = AF_INET;
828 	/*
829 	 * XXX: hardcoded port number.
830 	 */
831 	sin->sin_port = htons(2049);
832 
833 	*hostnamep = strdup(nam, M_NEWNFSMNT);
834 	*sinp = sin;
835 	strlcpy(dirpath, spec, dirpathsize);
836 	*dirlenp = strlen(dirpath);
837 
838 	free(nam, M_TEMP);
839 	return (0);
840 }
841 
842 /*
843  * VFS Operations.
844  *
845  * mount system call
846  * It seems a bit dumb to copyinstr() the host and path here and then
847  * bcopy() them in mountnfs(), but I wanted to detect errors before
848  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
849  * an error after that means that I have to release the mbuf.
850  */
851 /* ARGSUSED */
852 static int
853 nfs_mount(struct mount *mp)
854 {
855 	struct nfs_args args = {
856 	    .version = NFS_ARGSVERSION,
857 	    .addr = NULL,
858 	    .addrlen = sizeof (struct sockaddr_in),
859 	    .sotype = SOCK_STREAM,
860 	    .proto = 0,
861 	    .fh = NULL,
862 	    .fhsize = 0,
863 	    .flags = NFSMNT_RESVPORT,
864 	    .wsize = NFS_WSIZE,
865 	    .rsize = NFS_RSIZE,
866 	    .readdirsize = NFS_READDIRSIZE,
867 	    .timeo = 10,
868 	    .retrans = NFS_RETRANS,
869 	    .readahead = NFS_DEFRAHEAD,
870 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
871 	    .hostname = NULL,
872 	    .acregmin = NFS_MINATTRTIMO,
873 	    .acregmax = NFS_MAXATTRTIMO,
874 	    .acdirmin = NFS_MINDIRATTRTIMO,
875 	    .acdirmax = NFS_MAXDIRATTRTIMO,
876 	};
877 	int error = 0, ret, len;
878 	struct sockaddr *nam = NULL;
879 	struct vnode *vp;
880 	struct thread *td;
881 	char *hst;
882 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
883 	char *cp, *opt, *name, *secname;
884 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
885 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
886 	int minvers = 0;
887 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
888 	    krbnamelen, srvkrbnamelen;
889 	size_t hstlen;
890 
891 	has_nfs_args_opt = 0;
892 	has_nfs_from_opt = 0;
893 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
894 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
895 		error = EINVAL;
896 		goto out;
897 	}
898 
899 	td = curthread;
900 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
901 	    nfs_diskless_valid != 0) {
902 		error = nfs_mountroot(mp);
903 		goto out;
904 	}
905 
906 	nfscl_init();
907 
908 	/*
909 	 * The old mount_nfs program passed the struct nfs_args
910 	 * from userspace to kernel.  The new mount_nfs program
911 	 * passes string options via nmount() from userspace to kernel
912 	 * and we populate the struct nfs_args in the kernel.
913 	 */
914 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
915 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
916 		    sizeof(args));
917 		if (error != 0)
918 			goto out;
919 
920 		if (args.version != NFS_ARGSVERSION) {
921 			error = EPROGMISMATCH;
922 			goto out;
923 		}
924 		has_nfs_args_opt = 1;
925 	}
926 
927 	/* Handle the new style options. */
928 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
929 		args.acdirmin = args.acdirmax =
930 		    args.acregmin = args.acregmax = 0;
931 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
932 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
933 	}
934 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
935 		args.flags |= NFSMNT_NOCONN;
936 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
937 		args.flags &= ~NFSMNT_NOCONN;
938 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
939 		args.flags |= NFSMNT_NOLOCKD;
940 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
941 		args.flags &= ~NFSMNT_NOLOCKD;
942 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
943 		args.flags |= NFSMNT_INT;
944 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
945 		args.flags |= NFSMNT_RDIRPLUS;
946 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
947 		args.flags |= NFSMNT_RESVPORT;
948 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
949 		args.flags &= ~NFSMNT_RESVPORT;
950 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
951 		args.flags |= NFSMNT_SOFT;
952 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
953 		args.flags &= ~NFSMNT_SOFT;
954 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
955 		args.sotype = SOCK_DGRAM;
956 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
957 		args.sotype = SOCK_DGRAM;
958 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
959 		args.sotype = SOCK_STREAM;
960 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
961 		args.flags |= NFSMNT_NFSV3;
962 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
963 		args.flags |= NFSMNT_NFSV4;
964 		args.sotype = SOCK_STREAM;
965 	}
966 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
967 		args.flags |= NFSMNT_ALLGSSNAME;
968 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
969 		args.flags |= NFSMNT_NOCTO;
970 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
971 		args.flags |= NFSMNT_NONCONTIGWR;
972 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
973 		args.flags |= NFSMNT_PNFS;
974 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
975 		if (opt == NULL) {
976 			vfs_mount_error(mp, "illegal readdirsize");
977 			error = EINVAL;
978 			goto out;
979 		}
980 		ret = sscanf(opt, "%d", &args.readdirsize);
981 		if (ret != 1 || args.readdirsize <= 0) {
982 			vfs_mount_error(mp, "illegal readdirsize: %s",
983 			    opt);
984 			error = EINVAL;
985 			goto out;
986 		}
987 		args.flags |= NFSMNT_READDIRSIZE;
988 	}
989 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
990 		if (opt == NULL) {
991 			vfs_mount_error(mp, "illegal readahead");
992 			error = EINVAL;
993 			goto out;
994 		}
995 		ret = sscanf(opt, "%d", &args.readahead);
996 		if (ret != 1 || args.readahead <= 0) {
997 			vfs_mount_error(mp, "illegal readahead: %s",
998 			    opt);
999 			error = EINVAL;
1000 			goto out;
1001 		}
1002 		args.flags |= NFSMNT_READAHEAD;
1003 	}
1004 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1005 		if (opt == NULL) {
1006 			vfs_mount_error(mp, "illegal wsize");
1007 			error = EINVAL;
1008 			goto out;
1009 		}
1010 		ret = sscanf(opt, "%d", &args.wsize);
1011 		if (ret != 1 || args.wsize <= 0) {
1012 			vfs_mount_error(mp, "illegal wsize: %s",
1013 			    opt);
1014 			error = EINVAL;
1015 			goto out;
1016 		}
1017 		args.flags |= NFSMNT_WSIZE;
1018 	}
1019 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1020 		if (opt == NULL) {
1021 			vfs_mount_error(mp, "illegal rsize");
1022 			error = EINVAL;
1023 			goto out;
1024 		}
1025 		ret = sscanf(opt, "%d", &args.rsize);
1026 		if (ret != 1 || args.rsize <= 0) {
1027 			vfs_mount_error(mp, "illegal wsize: %s",
1028 			    opt);
1029 			error = EINVAL;
1030 			goto out;
1031 		}
1032 		args.flags |= NFSMNT_RSIZE;
1033 	}
1034 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1035 		if (opt == NULL) {
1036 			vfs_mount_error(mp, "illegal retrans");
1037 			error = EINVAL;
1038 			goto out;
1039 		}
1040 		ret = sscanf(opt, "%d", &args.retrans);
1041 		if (ret != 1 || args.retrans <= 0) {
1042 			vfs_mount_error(mp, "illegal retrans: %s",
1043 			    opt);
1044 			error = EINVAL;
1045 			goto out;
1046 		}
1047 		args.flags |= NFSMNT_RETRANS;
1048 	}
1049 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1050 		ret = sscanf(opt, "%d", &args.acregmin);
1051 		if (ret != 1 || args.acregmin < 0) {
1052 			vfs_mount_error(mp, "illegal actimeo: %s",
1053 			    opt);
1054 			error = EINVAL;
1055 			goto out;
1056 		}
1057 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1058 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1059 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1060 	}
1061 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1062 		ret = sscanf(opt, "%d", &args.acregmin);
1063 		if (ret != 1 || args.acregmin < 0) {
1064 			vfs_mount_error(mp, "illegal acregmin: %s",
1065 			    opt);
1066 			error = EINVAL;
1067 			goto out;
1068 		}
1069 		args.flags |= NFSMNT_ACREGMIN;
1070 	}
1071 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1072 		ret = sscanf(opt, "%d", &args.acregmax);
1073 		if (ret != 1 || args.acregmax < 0) {
1074 			vfs_mount_error(mp, "illegal acregmax: %s",
1075 			    opt);
1076 			error = EINVAL;
1077 			goto out;
1078 		}
1079 		args.flags |= NFSMNT_ACREGMAX;
1080 	}
1081 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1082 		ret = sscanf(opt, "%d", &args.acdirmin);
1083 		if (ret != 1 || args.acdirmin < 0) {
1084 			vfs_mount_error(mp, "illegal acdirmin: %s",
1085 			    opt);
1086 			error = EINVAL;
1087 			goto out;
1088 		}
1089 		args.flags |= NFSMNT_ACDIRMIN;
1090 	}
1091 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1092 		ret = sscanf(opt, "%d", &args.acdirmax);
1093 		if (ret != 1 || args.acdirmax < 0) {
1094 			vfs_mount_error(mp, "illegal acdirmax: %s",
1095 			    opt);
1096 			error = EINVAL;
1097 			goto out;
1098 		}
1099 		args.flags |= NFSMNT_ACDIRMAX;
1100 	}
1101 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1102 		ret = sscanf(opt, "%d", &args.wcommitsize);
1103 		if (ret != 1 || args.wcommitsize < 0) {
1104 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1105 			error = EINVAL;
1106 			goto out;
1107 		}
1108 		args.flags |= NFSMNT_WCOMMITSIZE;
1109 	}
1110 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1111 		ret = sscanf(opt, "%d", &args.timeo);
1112 		if (ret != 1 || args.timeo <= 0) {
1113 			vfs_mount_error(mp, "illegal timeo: %s",
1114 			    opt);
1115 			error = EINVAL;
1116 			goto out;
1117 		}
1118 		args.flags |= NFSMNT_TIMEO;
1119 	}
1120 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1121 		ret = sscanf(opt, "%d", &args.timeo);
1122 		if (ret != 1 || args.timeo <= 0) {
1123 			vfs_mount_error(mp, "illegal timeout: %s",
1124 			    opt);
1125 			error = EINVAL;
1126 			goto out;
1127 		}
1128 		args.flags |= NFSMNT_TIMEO;
1129 	}
1130 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1131 		ret = sscanf(opt, "%d", &nametimeo);
1132 		if (ret != 1 || nametimeo < 0) {
1133 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1134 			error = EINVAL;
1135 			goto out;
1136 		}
1137 	}
1138 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1139 	    == 0) {
1140 		ret = sscanf(opt, "%d", &negnametimeo);
1141 		if (ret != 1 || negnametimeo < 0) {
1142 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1143 			    opt);
1144 			error = EINVAL;
1145 			goto out;
1146 		}
1147 	}
1148 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1149 	    0) {
1150 		ret = sscanf(opt, "%d", &minvers);
1151 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1152 		    (args.flags & NFSMNT_NFSV4) == 0) {
1153 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1154 			error = EINVAL;
1155 			goto out;
1156 		}
1157 	}
1158 	if (vfs_getopt(mp->mnt_optnew, "sec",
1159 		(void **) &secname, NULL) == 0)
1160 		nfs_sec_name(secname, &args.flags);
1161 
1162 	if (mp->mnt_flag & MNT_UPDATE) {
1163 		struct nfsmount *nmp = VFSTONFS(mp);
1164 
1165 		if (nmp == NULL) {
1166 			error = EIO;
1167 			goto out;
1168 		}
1169 
1170 		/*
1171 		 * If a change from TCP->UDP is done and there are thread(s)
1172 		 * that have I/O RPC(s) in progress with a transfer size
1173 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1174 		 * hung, retrying the RPC(s) forever. Usually these threads
1175 		 * will be seen doing an uninterruptible sleep on wait channel
1176 		 * "nfsreq".
1177 		 */
1178 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1179 			tprintf(td->td_proc, LOG_WARNING,
1180 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1181 
1182 		/*
1183 		 * When doing an update, we can't change version,
1184 		 * security, switch lockd strategies or change cookie
1185 		 * translation
1186 		 */
1187 		args.flags = (args.flags &
1188 		    ~(NFSMNT_NFSV3 |
1189 		      NFSMNT_NFSV4 |
1190 		      NFSMNT_KERB |
1191 		      NFSMNT_INTEGRITY |
1192 		      NFSMNT_PRIVACY |
1193 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1194 		    (nmp->nm_flag &
1195 			(NFSMNT_NFSV3 |
1196 			 NFSMNT_NFSV4 |
1197 			 NFSMNT_KERB |
1198 			 NFSMNT_INTEGRITY |
1199 			 NFSMNT_PRIVACY |
1200 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1201 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1202 		goto out;
1203 	}
1204 
1205 	/*
1206 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1207 	 * or no-connection mode for those protocols that support
1208 	 * no-connection mode (the flag will be cleared later for protocols
1209 	 * that do not support no-connection mode).  This will allow a client
1210 	 * to receive replies from a different IP then the request was
1211 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1212 	 * not 0.
1213 	 */
1214 	if (nfs_ip_paranoia == 0)
1215 		args.flags |= NFSMNT_NOCONN;
1216 
1217 	if (has_nfs_args_opt != 0) {
1218 		/*
1219 		 * In the 'nfs_args' case, the pointers in the args
1220 		 * structure are in userland - we copy them in here.
1221 		 */
1222 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1223 			vfs_mount_error(mp, "Bad file handle");
1224 			error = EINVAL;
1225 			goto out;
1226 		}
1227 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1228 		    args.fhsize);
1229 		if (error != 0)
1230 			goto out;
1231 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1232 		if (error != 0)
1233 			goto out;
1234 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1235 		args.hostname = hst;
1236 		/* getsockaddr() call must be after above copyin() calls */
1237 		error = getsockaddr(&nam, (caddr_t)args.addr,
1238 		    args.addrlen);
1239 		if (error != 0)
1240 			goto out;
1241 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1242 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1243 	    sizeof(dirpath), &dirlen) == 0) {
1244 		has_nfs_from_opt = 1;
1245 		bcopy(args.hostname, hst, MNAMELEN);
1246 		hst[MNAMELEN - 1] = '\0';
1247 
1248 		/*
1249 		 * This only works with NFSv4 for now.
1250 		 */
1251 		args.fhsize = 0;
1252 		args.flags |= NFSMNT_NFSV4;
1253 		args.sotype = SOCK_STREAM;
1254 	} else {
1255 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1256 		    &args.fhsize) == 0) {
1257 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1258 				vfs_mount_error(mp, "Bad file handle");
1259 				error = EINVAL;
1260 				goto out;
1261 			}
1262 			bcopy(args.fh, nfh, args.fhsize);
1263 		} else {
1264 			args.fhsize = 0;
1265 		}
1266 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1267 		    (void **)&args.hostname, &len);
1268 		if (args.hostname == NULL) {
1269 			vfs_mount_error(mp, "Invalid hostname");
1270 			error = EINVAL;
1271 			goto out;
1272 		}
1273 		if (len >= MNAMELEN) {
1274 			vfs_mount_error(mp, "Hostname too long");
1275 			error = EINVAL;
1276 			goto out;
1277 		}
1278 		bcopy(args.hostname, hst, len);
1279 		hst[len] = '\0';
1280 	}
1281 
1282 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1283 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1284 	else {
1285 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1286 		cp = strchr(srvkrbname, ':');
1287 		if (cp != NULL)
1288 			*cp = '\0';
1289 	}
1290 	srvkrbnamelen = strlen(srvkrbname);
1291 
1292 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1293 		strlcpy(krbname, name, sizeof (krbname));
1294 	else
1295 		krbname[0] = '\0';
1296 	krbnamelen = strlen(krbname);
1297 
1298 	if (has_nfs_from_opt == 0) {
1299 		if (vfs_getopt(mp->mnt_optnew,
1300 		    "dirpath", (void **)&name, NULL) == 0)
1301 			strlcpy(dirpath, name, sizeof (dirpath));
1302 		else
1303 			dirpath[0] = '\0';
1304 		dirlen = strlen(dirpath);
1305 	}
1306 
1307 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1308 		if (vfs_getopt(mp->mnt_optnew, "addr",
1309 		    (void **)&args.addr, &args.addrlen) == 0) {
1310 			if (args.addrlen > SOCK_MAXADDRLEN) {
1311 				error = ENAMETOOLONG;
1312 				goto out;
1313 			}
1314 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1315 			bcopy(args.addr, nam, args.addrlen);
1316 			nam->sa_len = args.addrlen;
1317 		} else {
1318 			vfs_mount_error(mp, "No server address");
1319 			error = EINVAL;
1320 			goto out;
1321 		}
1322 	}
1323 
1324 	args.fh = nfh;
1325 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1326 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1327 	    nametimeo, negnametimeo, minvers);
1328 out:
1329 	if (!error) {
1330 		MNT_ILOCK(mp);
1331 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1332 		    MNTK_USES_BCACHE;
1333 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1334 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1335 		MNT_IUNLOCK(mp);
1336 	}
1337 	free(hst, M_TEMP);
1338 	return (error);
1339 }
1340 
1341 
1342 /*
1343  * VFS Operations.
1344  *
1345  * mount system call
1346  * It seems a bit dumb to copyinstr() the host and path here and then
1347  * bcopy() them in mountnfs(), but I wanted to detect errors before
1348  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1349  * an error after that means that I have to release the mbuf.
1350  */
1351 /* ARGSUSED */
1352 static int
1353 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1354 {
1355 	int error;
1356 	struct nfs_args args;
1357 
1358 	error = copyin(data, &args, sizeof (struct nfs_args));
1359 	if (error)
1360 		return error;
1361 
1362 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1363 
1364 	error = kernel_mount(ma, flags);
1365 	return (error);
1366 }
1367 
1368 /*
1369  * Common code for mount and mountroot
1370  */
1371 static int
1372 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1373     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1374     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1375     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1376     int minvers)
1377 {
1378 	struct nfsmount *nmp;
1379 	struct nfsnode *np;
1380 	int error, trycnt, ret;
1381 	struct nfsvattr nfsva;
1382 	struct nfsclclient *clp;
1383 	struct nfsclds *dsp, *tdsp;
1384 	uint32_t lease;
1385 	static u_int64_t clval = 0;
1386 
1387 	NFSCL_DEBUG(3, "in mnt\n");
1388 	clp = NULL;
1389 	if (mp->mnt_flag & MNT_UPDATE) {
1390 		nmp = VFSTONFS(mp);
1391 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1392 		FREE(nam, M_SONAME);
1393 		return (0);
1394 	} else {
1395 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1396 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1397 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1398 		TAILQ_INIT(&nmp->nm_bufq);
1399 		TAILQ_INIT(&nmp->nm_sess);
1400 		if (clval == 0)
1401 			clval = (u_int64_t)nfsboottime.tv_sec;
1402 		nmp->nm_clval = clval++;
1403 		nmp->nm_krbnamelen = krbnamelen;
1404 		nmp->nm_dirpathlen = dirlen;
1405 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1406 		if (td->td_ucred->cr_uid != (uid_t)0) {
1407 			/*
1408 			 * nm_uid is used to get KerberosV credentials for
1409 			 * the nfsv4 state handling operations if there is
1410 			 * no host based principal set. Use the uid of
1411 			 * this user if not root, since they are doing the
1412 			 * mount. I don't think setting this for root will
1413 			 * work, since root normally does not have user
1414 			 * credentials in a credentials cache.
1415 			 */
1416 			nmp->nm_uid = td->td_ucred->cr_uid;
1417 		} else {
1418 			/*
1419 			 * Just set to -1, so it won't be used.
1420 			 */
1421 			nmp->nm_uid = (uid_t)-1;
1422 		}
1423 
1424 		/* Copy and null terminate all the names */
1425 		if (nmp->nm_krbnamelen > 0) {
1426 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1427 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1428 		}
1429 		if (nmp->nm_dirpathlen > 0) {
1430 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1431 			    nmp->nm_dirpathlen);
1432 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1433 			    + 1] = '\0';
1434 		}
1435 		if (nmp->nm_srvkrbnamelen > 0) {
1436 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1437 			    nmp->nm_srvkrbnamelen);
1438 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1439 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1440 		}
1441 		nmp->nm_sockreq.nr_cred = crhold(cred);
1442 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1443 		mp->mnt_data = nmp;
1444 		nmp->nm_getinfo = nfs_getnlminfo;
1445 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1446 	}
1447 	vfs_getnewfsid(mp);
1448 	nmp->nm_mountp = mp;
1449 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1450 
1451 	/*
1452 	 * Since nfs_decode_args() might optionally set them, these
1453 	 * need to be set to defaults before the call, so that the
1454 	 * optional settings aren't overwritten.
1455 	 */
1456 	nmp->nm_nametimeo = nametimeo;
1457 	nmp->nm_negnametimeo = negnametimeo;
1458 	nmp->nm_timeo = NFS_TIMEO;
1459 	nmp->nm_retry = NFS_RETRANS;
1460 	nmp->nm_readahead = NFS_DEFRAHEAD;
1461 
1462 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1463 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1464 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1465 		nmp->nm_wcommitsize *= 2;
1466 	nmp->nm_wcommitsize *= 256;
1467 
1468 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1469 		nmp->nm_minorvers = minvers;
1470 	else
1471 		nmp->nm_minorvers = 0;
1472 
1473 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1474 
1475 	/*
1476 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1477 	 * high, depending on whether we end up with negative offsets in
1478 	 * the client or server somewhere.  2GB-1 may be safer.
1479 	 *
1480 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1481 	 * that we can handle until we find out otherwise.
1482 	 */
1483 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1484 		nmp->nm_maxfilesize = 0xffffffffLL;
1485 	else
1486 		nmp->nm_maxfilesize = OFF_MAX;
1487 
1488 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1489 		nmp->nm_wsize = NFS_WSIZE;
1490 		nmp->nm_rsize = NFS_RSIZE;
1491 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1492 	}
1493 	nmp->nm_numgrps = NFS_MAXGRPS;
1494 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1495 	if (nmp->nm_tprintf_delay < 0)
1496 		nmp->nm_tprintf_delay = 0;
1497 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1498 	if (nmp->nm_tprintf_initial_delay < 0)
1499 		nmp->nm_tprintf_initial_delay = 0;
1500 	nmp->nm_fhsize = argp->fhsize;
1501 	if (nmp->nm_fhsize > 0)
1502 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1503 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1504 	nmp->nm_nam = nam;
1505 	/* Set up the sockets and per-host congestion */
1506 	nmp->nm_sotype = argp->sotype;
1507 	nmp->nm_soproto = argp->proto;
1508 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1509 	if ((argp->flags & NFSMNT_NFSV4))
1510 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1511 	else if ((argp->flags & NFSMNT_NFSV3))
1512 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1513 	else
1514 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1515 
1516 
1517 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1518 		goto bad;
1519 	/* For NFSv4.1, get the clientid now. */
1520 	if (nmp->nm_minorvers > 0) {
1521 		NFSCL_DEBUG(3, "at getcl\n");
1522 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1523 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1524 		if (error != 0)
1525 			goto bad;
1526 	}
1527 
1528 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1529 	    nmp->nm_dirpathlen > 0) {
1530 		NFSCL_DEBUG(3, "in dirp\n");
1531 		/*
1532 		 * If the fhsize on the mount point == 0 for V4, the mount
1533 		 * path needs to be looked up.
1534 		 */
1535 		trycnt = 3;
1536 		do {
1537 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1538 			    cred, td);
1539 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1540 			if (error)
1541 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1542 		} while (error && --trycnt > 0);
1543 		if (error) {
1544 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1545 			goto bad;
1546 		}
1547 	}
1548 
1549 	/*
1550 	 * A reference count is needed on the nfsnode representing the
1551 	 * remote root.  If this object is not persistent, then backward
1552 	 * traversals of the mount point (i.e. "..") will not work if
1553 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1554 	 * this problem, because one can identify root inodes by their
1555 	 * number == UFS_ROOTINO (2).
1556 	 */
1557 	if (nmp->nm_fhsize > 0) {
1558 		/*
1559 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1560 		 * non-zero for the root vnode. f_iosize will be set correctly
1561 		 * by nfs_statfs() before any I/O occurs.
1562 		 */
1563 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1564 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1565 		    LK_EXCLUSIVE);
1566 		if (error)
1567 			goto bad;
1568 		*vpp = NFSTOV(np);
1569 
1570 		/*
1571 		 * Get file attributes and transfer parameters for the
1572 		 * mountpoint.  This has the side effect of filling in
1573 		 * (*vpp)->v_type with the correct value.
1574 		 */
1575 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1576 		    cred, td, &nfsva, NULL, &lease);
1577 		if (ret) {
1578 			/*
1579 			 * Just set default values to get things going.
1580 			 */
1581 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1582 			nfsva.na_vattr.va_type = VDIR;
1583 			nfsva.na_vattr.va_mode = 0777;
1584 			nfsva.na_vattr.va_nlink = 100;
1585 			nfsva.na_vattr.va_uid = (uid_t)0;
1586 			nfsva.na_vattr.va_gid = (gid_t)0;
1587 			nfsva.na_vattr.va_fileid = 2;
1588 			nfsva.na_vattr.va_gen = 1;
1589 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1590 			nfsva.na_vattr.va_size = 512 * 1024;
1591 			lease = 60;
1592 		}
1593 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1594 		if (nmp->nm_minorvers > 0) {
1595 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1596 			NFSLOCKCLSTATE();
1597 			clp->nfsc_renew = NFSCL_RENEW(lease);
1598 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1599 			clp->nfsc_clientidrev++;
1600 			if (clp->nfsc_clientidrev == 0)
1601 				clp->nfsc_clientidrev++;
1602 			NFSUNLOCKCLSTATE();
1603 			/*
1604 			 * Mount will succeed, so the renew thread can be
1605 			 * started now.
1606 			 */
1607 			nfscl_start_renewthread(clp);
1608 			nfscl_clientrelease(clp);
1609 		}
1610 		if (argp->flags & NFSMNT_NFSV3)
1611 			ncl_fsinfo(nmp, *vpp, cred, td);
1612 
1613 		/* Mark if the mount point supports NFSv4 ACLs. */
1614 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1615 		    ret == 0 &&
1616 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1617 			MNT_ILOCK(mp);
1618 			mp->mnt_flag |= MNT_NFS4ACLS;
1619 			MNT_IUNLOCK(mp);
1620 		}
1621 
1622 		/*
1623 		 * Lose the lock but keep the ref.
1624 		 */
1625 		NFSVOPUNLOCK(*vpp, 0);
1626 		return (0);
1627 	}
1628 	error = EIO;
1629 
1630 bad:
1631 	if (clp != NULL)
1632 		nfscl_clientrelease(clp);
1633 	newnfs_disconnect(&nmp->nm_sockreq);
1634 	crfree(nmp->nm_sockreq.nr_cred);
1635 	if (nmp->nm_sockreq.nr_auth != NULL)
1636 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1637 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1638 	mtx_destroy(&nmp->nm_mtx);
1639 	if (nmp->nm_clp != NULL) {
1640 		NFSLOCKCLSTATE();
1641 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1642 		NFSUNLOCKCLSTATE();
1643 		free(nmp->nm_clp, M_NFSCLCLIENT);
1644 	}
1645 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1646 		nfscl_freenfsclds(dsp);
1647 	FREE(nmp, M_NEWNFSMNT);
1648 	FREE(nam, M_SONAME);
1649 	return (error);
1650 }
1651 
1652 /*
1653  * unmount system call
1654  */
1655 static int
1656 nfs_unmount(struct mount *mp, int mntflags)
1657 {
1658 	struct thread *td;
1659 	struct nfsmount *nmp;
1660 	int error, flags = 0, i, trycnt = 0;
1661 	struct nfsclds *dsp, *tdsp;
1662 
1663 	td = curthread;
1664 
1665 	if (mntflags & MNT_FORCE)
1666 		flags |= FORCECLOSE;
1667 	nmp = VFSTONFS(mp);
1668 	/*
1669 	 * Goes something like this..
1670 	 * - Call vflush() to clear out vnodes for this filesystem
1671 	 * - Close the socket
1672 	 * - Free up the data structures
1673 	 */
1674 	/* In the forced case, cancel any outstanding requests. */
1675 	if (mntflags & MNT_FORCE) {
1676 		error = newnfs_nmcancelreqs(nmp);
1677 		if (error)
1678 			goto out;
1679 		/* For a forced close, get rid of the renew thread now */
1680 		nfscl_umount(nmp, td);
1681 	}
1682 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1683 	do {
1684 		error = vflush(mp, 1, flags, td);
1685 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1686 			(void) nfs_catnap(PSOCK, error, "newndm");
1687 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1688 	if (error)
1689 		goto out;
1690 
1691 	/*
1692 	 * We are now committed to the unmount.
1693 	 */
1694 	if ((mntflags & MNT_FORCE) == 0)
1695 		nfscl_umount(nmp, td);
1696 	/* Make sure no nfsiods are assigned to this mount. */
1697 	mtx_lock(&ncl_iod_mutex);
1698 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1699 		if (ncl_iodmount[i] == nmp) {
1700 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1701 			ncl_iodmount[i] = NULL;
1702 		}
1703 	mtx_unlock(&ncl_iod_mutex);
1704 	newnfs_disconnect(&nmp->nm_sockreq);
1705 	crfree(nmp->nm_sockreq.nr_cred);
1706 	FREE(nmp->nm_nam, M_SONAME);
1707 	if (nmp->nm_sockreq.nr_auth != NULL)
1708 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1709 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1710 	mtx_destroy(&nmp->nm_mtx);
1711 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1712 		nfscl_freenfsclds(dsp);
1713 	FREE(nmp, M_NEWNFSMNT);
1714 out:
1715 	return (error);
1716 }
1717 
1718 /*
1719  * Return root of a filesystem
1720  */
1721 static int
1722 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1723 {
1724 	struct vnode *vp;
1725 	struct nfsmount *nmp;
1726 	struct nfsnode *np;
1727 	int error;
1728 
1729 	nmp = VFSTONFS(mp);
1730 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1731 	if (error)
1732 		return error;
1733 	vp = NFSTOV(np);
1734 	/*
1735 	 * Get transfer parameters and attributes for root vnode once.
1736 	 */
1737 	mtx_lock(&nmp->nm_mtx);
1738 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1739 		mtx_unlock(&nmp->nm_mtx);
1740 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1741 	} else
1742 		mtx_unlock(&nmp->nm_mtx);
1743 	if (vp->v_type == VNON)
1744 	    vp->v_type = VDIR;
1745 	vp->v_vflag |= VV_ROOT;
1746 	*vpp = vp;
1747 	return (0);
1748 }
1749 
1750 /*
1751  * Flush out the buffer cache
1752  */
1753 /* ARGSUSED */
1754 static int
1755 nfs_sync(struct mount *mp, int waitfor)
1756 {
1757 	struct vnode *vp, *mvp;
1758 	struct thread *td;
1759 	int error, allerror = 0;
1760 
1761 	td = curthread;
1762 
1763 	MNT_ILOCK(mp);
1764 	/*
1765 	 * If a forced dismount is in progress, return from here so that
1766 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1767 	 * calling VFS_UNMOUNT().
1768 	 */
1769 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1770 		MNT_IUNLOCK(mp);
1771 		return (EBADF);
1772 	}
1773 	MNT_IUNLOCK(mp);
1774 
1775 	/*
1776 	 * Force stale buffer cache information to be flushed.
1777 	 */
1778 loop:
1779 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1780 		/* XXX Racy bv_cnt check. */
1781 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1782 		    waitfor == MNT_LAZY) {
1783 			VI_UNLOCK(vp);
1784 			continue;
1785 		}
1786 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1787 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1788 			goto loop;
1789 		}
1790 		error = VOP_FSYNC(vp, waitfor, td);
1791 		if (error)
1792 			allerror = error;
1793 		NFSVOPUNLOCK(vp, 0);
1794 		vrele(vp);
1795 	}
1796 	return (allerror);
1797 }
1798 
1799 static int
1800 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1801 {
1802 	struct nfsmount *nmp = VFSTONFS(mp);
1803 	struct vfsquery vq;
1804 	int error;
1805 
1806 	bzero(&vq, sizeof(vq));
1807 	switch (op) {
1808 #if 0
1809 	case VFS_CTL_NOLOCKS:
1810 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1811  		if (req->oldptr != NULL) {
1812  			error = SYSCTL_OUT(req, &val, sizeof(val));
1813  			if (error)
1814  				return (error);
1815  		}
1816  		if (req->newptr != NULL) {
1817  			error = SYSCTL_IN(req, &val, sizeof(val));
1818  			if (error)
1819  				return (error);
1820 			if (val)
1821 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1822 			else
1823 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1824  		}
1825 		break;
1826 #endif
1827 	case VFS_CTL_QUERY:
1828 		mtx_lock(&nmp->nm_mtx);
1829 		if (nmp->nm_state & NFSSTA_TIMEO)
1830 			vq.vq_flags |= VQ_NOTRESP;
1831 		mtx_unlock(&nmp->nm_mtx);
1832 #if 0
1833 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1834 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1835 			vq.vq_flags |= VQ_NOTRESPLOCK;
1836 #endif
1837 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1838 		break;
1839  	case VFS_CTL_TIMEO:
1840  		if (req->oldptr != NULL) {
1841  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1842  			    sizeof(nmp->nm_tprintf_initial_delay));
1843  			if (error)
1844  				return (error);
1845  		}
1846  		if (req->newptr != NULL) {
1847 			error = vfs_suser(mp, req->td);
1848 			if (error)
1849 				return (error);
1850  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1851  			    sizeof(nmp->nm_tprintf_initial_delay));
1852  			if (error)
1853  				return (error);
1854  			if (nmp->nm_tprintf_initial_delay < 0)
1855  				nmp->nm_tprintf_initial_delay = 0;
1856  		}
1857 		break;
1858 	default:
1859 		return (ENOTSUP);
1860 	}
1861 	return (0);
1862 }
1863 
1864 /*
1865  * Purge any RPCs in progress, so that they will all return errors.
1866  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1867  * forced dismount.
1868  */
1869 static void
1870 nfs_purge(struct mount *mp)
1871 {
1872 	struct nfsmount *nmp = VFSTONFS(mp);
1873 
1874 	newnfs_nmcancelreqs(nmp);
1875 }
1876 
1877 /*
1878  * Extract the information needed by the nlm from the nfs vnode.
1879  */
1880 static void
1881 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1882     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1883     struct timeval *timeop)
1884 {
1885 	struct nfsmount *nmp;
1886 	struct nfsnode *np = VTONFS(vp);
1887 
1888 	nmp = VFSTONFS(vp->v_mount);
1889 	if (fhlenp != NULL)
1890 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1891 	if (fhp != NULL)
1892 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1893 	if (sp != NULL)
1894 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1895 	if (is_v3p != NULL)
1896 		*is_v3p = NFS_ISV3(vp);
1897 	if (sizep != NULL)
1898 		*sizep = np->n_size;
1899 	if (timeop != NULL) {
1900 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1901 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1902 	}
1903 }
1904 
1905 /*
1906  * This function prints out an option name, based on the conditional
1907  * argument.
1908  */
1909 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1910     char *opt, char **buf, size_t *blen)
1911 {
1912 	int len;
1913 
1914 	if (testval != 0 && *blen > strlen(opt)) {
1915 		len = snprintf(*buf, *blen, "%s", opt);
1916 		if (len != strlen(opt))
1917 			printf("EEK!!\n");
1918 		*buf += len;
1919 		*blen -= len;
1920 	}
1921 }
1922 
1923 /*
1924  * This function printf out an options integer value.
1925  */
1926 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1927     char *opt, char **buf, size_t *blen)
1928 {
1929 	int len;
1930 
1931 	if (*blen > strlen(opt) + 1) {
1932 		/* Could result in truncated output string. */
1933 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1934 		if (len < *blen) {
1935 			*buf += len;
1936 			*blen -= len;
1937 		}
1938 	}
1939 }
1940 
1941 /*
1942  * Load the option flags and values into the buffer.
1943  */
1944 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1945 {
1946 	char *buf;
1947 	size_t blen;
1948 
1949 	buf = buffer;
1950 	blen = buflen;
1951 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1952 	    &blen);
1953 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1954 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1955 		    &blen);
1956 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1957 		    &buf, &blen);
1958 	}
1959 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1960 	    &blen);
1961 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1962 	    "nfsv2", &buf, &blen);
1963 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1964 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1965 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1966 	    &buf, &blen);
1967 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1968 	    &buf, &blen);
1969 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1970 	    &blen);
1971 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1972 	    &blen);
1973 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1974 	    &blen);
1975 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1976 	    &blen);
1977 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1978 	    &blen);
1979 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1980 	    ",noncontigwr", &buf, &blen);
1981 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1982 	    0, ",lockd", &buf, &blen);
1983 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1984 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1985 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1986 	    &buf, &blen);
1987 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1988 	    &buf, &blen);
1989 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1990 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1991 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1992 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1993 	    &buf, &blen);
1994 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1995 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1996 	    &buf, &blen);
1997 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1998 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1999 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2000 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2001 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2002 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2003 	    &blen);
2004 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2005 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2006 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2007 	    &blen);
2008 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2009 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2010 	    &blen);
2011 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2012 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2013 }
2014 
2015