xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 8b25e8410533a6e69cceff910546b2dc485a5059)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern int nfsrv_useacl;
82 extern int nfscl_debuglevel;
83 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
84 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
85 extern struct mtx ncl_iod_mutex;
86 NFSCLSTATEMUTEX;
87 
88 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
89 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
90 
91 SYSCTL_DECL(_vfs_nfs);
92 static int nfs_ip_paranoia = 1;
93 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
94     &nfs_ip_paranoia, 0, "");
95 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
96 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
97         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
98 /* how long between console messages "nfs server foo not responding" */
99 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
100 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
101         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
102 #ifdef NFS_DEBUG
103 int nfs_debug;
104 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
105     "Toggle debug flag");
106 #endif
107 
108 static int	nfs_mountroot(struct mount *);
109 static void	nfs_sec_name(char *, int *);
110 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
111 		    struct nfs_args *argp, const char *, struct ucred *,
112 		    struct thread *);
113 static int	mountnfs(struct nfs_args *, struct mount *,
114 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
115 		    u_char *, int, struct vnode **, struct ucred *,
116 		    struct thread *, int, int, int);
117 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
118 		    struct sockaddr_storage *, int *, off_t *,
119 		    struct timeval *);
120 static vfs_mount_t nfs_mount;
121 static vfs_cmount_t nfs_cmount;
122 static vfs_unmount_t nfs_unmount;
123 static vfs_root_t nfs_root;
124 static vfs_statfs_t nfs_statfs;
125 static vfs_sync_t nfs_sync;
126 static vfs_sysctl_t nfs_sysctl;
127 static vfs_purge_t nfs_purge;
128 
129 /*
130  * nfs vfs operations.
131  */
132 static struct vfsops nfs_vfsops = {
133 	.vfs_init =		ncl_init,
134 	.vfs_mount =		nfs_mount,
135 	.vfs_cmount =		nfs_cmount,
136 	.vfs_root =		nfs_root,
137 	.vfs_statfs =		nfs_statfs,
138 	.vfs_sync =		nfs_sync,
139 	.vfs_uninit =		ncl_uninit,
140 	.vfs_unmount =		nfs_unmount,
141 	.vfs_sysctl =		nfs_sysctl,
142 	.vfs_purge =		nfs_purge,
143 };
144 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
145 
146 /* So that loader and kldload(2) can find us, wherever we are.. */
147 MODULE_VERSION(nfs, 1);
148 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
149 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
150 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
152 
153 /*
154  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
155  * can be shared by both NFS clients. It is declared here so that it
156  * will be defined for kernels built without NFS_ROOT, although it
157  * isn't used in that case.
158  */
159 #if !defined(NFS_ROOT)
160 struct nfs_diskless	nfs_diskless = { { { 0 } } };
161 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
162 int			nfs_diskless_valid = 0;
163 #endif
164 
165 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
166     &nfs_diskless_valid, 0,
167     "Has the diskless struct been filled correctly");
168 
169 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
170     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
171 
172 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
173     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
174     "%Ssockaddr_in", "Diskless root nfs address");
175 
176 
177 void		newnfsargs_ntoh(struct nfs_args *);
178 static int	nfs_mountdiskless(char *,
179 		    struct sockaddr_in *, struct nfs_args *,
180 		    struct thread *, struct vnode **, struct mount *);
181 static void	nfs_convert_diskless(void);
182 static void	nfs_convert_oargs(struct nfs_args *args,
183 		    struct onfs_args *oargs);
184 
185 int
186 newnfs_iosize(struct nfsmount *nmp)
187 {
188 	int iosize, maxio;
189 
190 	/* First, set the upper limit for iosize */
191 	if (nmp->nm_flag & NFSMNT_NFSV4) {
192 		maxio = NFS_MAXBSIZE;
193 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
194 		if (nmp->nm_sotype == SOCK_DGRAM)
195 			maxio = NFS_MAXDGRAMDATA;
196 		else
197 			maxio = NFS_MAXBSIZE;
198 	} else {
199 		maxio = NFS_V2MAXDATA;
200 	}
201 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
202 		nmp->nm_rsize = maxio;
203 	if (nmp->nm_rsize > NFS_MAXBSIZE)
204 		nmp->nm_rsize = NFS_MAXBSIZE;
205 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
206 		nmp->nm_readdirsize = maxio;
207 	if (nmp->nm_readdirsize > nmp->nm_rsize)
208 		nmp->nm_readdirsize = nmp->nm_rsize;
209 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
210 		nmp->nm_wsize = maxio;
211 	if (nmp->nm_wsize > NFS_MAXBSIZE)
212 		nmp->nm_wsize = NFS_MAXBSIZE;
213 
214 	/*
215 	 * Calculate the size used for io buffers.  Use the larger
216 	 * of the two sizes to minimise nfs requests but make sure
217 	 * that it is at least one VM page to avoid wasting buffer
218 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
219 	 * that is the buffer size used for directories.
220 	 */
221 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
222 	iosize = imax(iosize, PAGE_SIZE);
223 	iosize = imax(iosize, NFS_DIRBLKSIZ);
224 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
225 	return (iosize);
226 }
227 
228 static void
229 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
230 {
231 
232 	args->version = NFS_ARGSVERSION;
233 	args->addr = oargs->addr;
234 	args->addrlen = oargs->addrlen;
235 	args->sotype = oargs->sotype;
236 	args->proto = oargs->proto;
237 	args->fh = oargs->fh;
238 	args->fhsize = oargs->fhsize;
239 	args->flags = oargs->flags;
240 	args->wsize = oargs->wsize;
241 	args->rsize = oargs->rsize;
242 	args->readdirsize = oargs->readdirsize;
243 	args->timeo = oargs->timeo;
244 	args->retrans = oargs->retrans;
245 	args->readahead = oargs->readahead;
246 	args->hostname = oargs->hostname;
247 }
248 
249 static void
250 nfs_convert_diskless(void)
251 {
252 
253 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
254 		sizeof(struct ifaliasreq));
255 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
256 		sizeof(struct sockaddr_in));
257 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
258 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
259 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
260 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
261 	} else {
262 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
263 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
264 	}
265 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
266 		sizeof(struct sockaddr_in));
267 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
268 	nfsv3_diskless.root_time = nfs_diskless.root_time;
269 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
270 		MAXHOSTNAMELEN);
271 	nfs_diskless_valid = 3;
272 }
273 
274 /*
275  * nfs statfs call
276  */
277 static int
278 nfs_statfs(struct mount *mp, struct statfs *sbp)
279 {
280 	struct vnode *vp;
281 	struct thread *td;
282 	struct nfsmount *nmp = VFSTONFS(mp);
283 	struct nfsvattr nfsva;
284 	struct nfsfsinfo fs;
285 	struct nfsstatfs sb;
286 	int error = 0, attrflag, gotfsinfo = 0, ret;
287 	struct nfsnode *np;
288 
289 	td = curthread;
290 
291 	error = vfs_busy(mp, MBF_NOWAIT);
292 	if (error)
293 		return (error);
294 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
295 	if (error) {
296 		vfs_unbusy(mp);
297 		return (error);
298 	}
299 	vp = NFSTOV(np);
300 	mtx_lock(&nmp->nm_mtx);
301 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
302 		mtx_unlock(&nmp->nm_mtx);
303 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
304 		    &attrflag, NULL);
305 		if (!error)
306 			gotfsinfo = 1;
307 	} else
308 		mtx_unlock(&nmp->nm_mtx);
309 	if (!error)
310 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
311 		    &attrflag, NULL);
312 	if (error != 0)
313 		NFSCL_DEBUG(2, "statfs=%d\n", error);
314 	if (attrflag == 0) {
315 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
316 		    td->td_ucred, td, &nfsva, NULL, NULL);
317 		if (ret) {
318 			/*
319 			 * Just set default values to get things going.
320 			 */
321 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
322 			nfsva.na_vattr.va_type = VDIR;
323 			nfsva.na_vattr.va_mode = 0777;
324 			nfsva.na_vattr.va_nlink = 100;
325 			nfsva.na_vattr.va_uid = (uid_t)0;
326 			nfsva.na_vattr.va_gid = (gid_t)0;
327 			nfsva.na_vattr.va_fileid = 2;
328 			nfsva.na_vattr.va_gen = 1;
329 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
330 			nfsva.na_vattr.va_size = 512 * 1024;
331 		}
332 	}
333 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
334 	if (!error) {
335 	    mtx_lock(&nmp->nm_mtx);
336 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
337 		nfscl_loadfsinfo(nmp, &fs);
338 	    nfscl_loadsbinfo(nmp, &sb, sbp);
339 	    sbp->f_iosize = newnfs_iosize(nmp);
340 	    mtx_unlock(&nmp->nm_mtx);
341 	    if (sbp != &mp->mnt_stat) {
342 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
343 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
344 	    }
345 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
346 	} else if (NFS_ISV4(vp)) {
347 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
348 	}
349 	vput(vp);
350 	vfs_unbusy(mp);
351 	return (error);
352 }
353 
354 /*
355  * nfs version 3 fsinfo rpc call
356  */
357 int
358 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
359     struct thread *td)
360 {
361 	struct nfsfsinfo fs;
362 	struct nfsvattr nfsva;
363 	int error, attrflag;
364 
365 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
366 	if (!error) {
367 		if (attrflag)
368 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
369 			    1);
370 		mtx_lock(&nmp->nm_mtx);
371 		nfscl_loadfsinfo(nmp, &fs);
372 		mtx_unlock(&nmp->nm_mtx);
373 	}
374 	return (error);
375 }
376 
377 /*
378  * Mount a remote root fs via. nfs. This depends on the info in the
379  * nfs_diskless structure that has been filled in properly by some primary
380  * bootstrap.
381  * It goes something like this:
382  * - do enough of "ifconfig" by calling ifioctl() so that the system
383  *   can talk to the server
384  * - If nfs_diskless.mygateway is filled in, use that address as
385  *   a default gateway.
386  * - build the rootfs mount point and call mountnfs() to do the rest.
387  *
388  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
389  * structure, as well as other global NFS client variables here, as
390  * nfs_mountroot() will be called once in the boot before any other NFS
391  * client activity occurs.
392  */
393 static int
394 nfs_mountroot(struct mount *mp)
395 {
396 	struct thread *td = curthread;
397 	struct nfsv3_diskless *nd = &nfsv3_diskless;
398 	struct socket *so;
399 	struct vnode *vp;
400 	struct ifreq ir;
401 	int error;
402 	u_long l;
403 	char buf[128];
404 	char *cp;
405 
406 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
407 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
408 #elif defined(NFS_ROOT)
409 	nfs_setup_diskless();
410 #endif
411 
412 	if (nfs_diskless_valid == 0)
413 		return (-1);
414 	if (nfs_diskless_valid == 1)
415 		nfs_convert_diskless();
416 
417 	/*
418 	 * XXX splnet, so networks will receive...
419 	 */
420 	splnet();
421 
422 	/*
423 	 * Do enough of ifconfig(8) so that the critical net interface can
424 	 * talk to the server.
425 	 */
426 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
427 	    td->td_ucred, td);
428 	if (error)
429 		panic("nfs_mountroot: socreate(%04x): %d",
430 			nd->myif.ifra_addr.sa_family, error);
431 
432 #if 0 /* XXX Bad idea */
433 	/*
434 	 * We might not have been told the right interface, so we pass
435 	 * over the first ten interfaces of the same kind, until we get
436 	 * one of them configured.
437 	 */
438 
439 	for (i = strlen(nd->myif.ifra_name) - 1;
440 		nd->myif.ifra_name[i] >= '0' &&
441 		nd->myif.ifra_name[i] <= '9';
442 		nd->myif.ifra_name[i] ++) {
443 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
444 		if(!error)
445 			break;
446 	}
447 #endif
448 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
449 	if (error)
450 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
451 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
452 		ir.ifr_mtu = strtol(cp, NULL, 10);
453 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
454 		freeenv(cp);
455 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
456 		if (error)
457 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
458 	}
459 	soclose(so);
460 
461 	/*
462 	 * If the gateway field is filled in, set it as the default route.
463 	 * Note that pxeboot will set a default route of 0 if the route
464 	 * is not set by the DHCP server.  Check also for a value of 0
465 	 * to avoid panicking inappropriately in that situation.
466 	 */
467 	if (nd->mygateway.sin_len != 0 &&
468 	    nd->mygateway.sin_addr.s_addr != 0) {
469 		struct sockaddr_in mask, sin;
470 
471 		bzero((caddr_t)&mask, sizeof(mask));
472 		sin = mask;
473 		sin.sin_family = AF_INET;
474 		sin.sin_len = sizeof(sin);
475                 /* XXX MRT use table 0 for this sort of thing */
476 		CURVNET_SET(TD_TO_VNET(td));
477 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
478 		    (struct sockaddr *)&nd->mygateway,
479 		    (struct sockaddr *)&mask,
480 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
481 		CURVNET_RESTORE();
482 		if (error)
483 			panic("nfs_mountroot: RTM_ADD: %d", error);
484 	}
485 
486 	/*
487 	 * Create the rootfs mount point.
488 	 */
489 	nd->root_args.fh = nd->root_fh;
490 	nd->root_args.fhsize = nd->root_fhsize;
491 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
492 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
493 		(l >> 24) & 0xff, (l >> 16) & 0xff,
494 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
495 	printf("NFS ROOT: %s\n", buf);
496 	nd->root_args.hostname = buf;
497 	if ((error = nfs_mountdiskless(buf,
498 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
499 		return (error);
500 	}
501 
502 	/*
503 	 * This is not really an nfs issue, but it is much easier to
504 	 * set hostname here and then let the "/etc/rc.xxx" files
505 	 * mount the right /var based upon its preset value.
506 	 */
507 	mtx_lock(&prison0.pr_mtx);
508 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
509 	    sizeof(prison0.pr_hostname));
510 	mtx_unlock(&prison0.pr_mtx);
511 	inittodr(ntohl(nd->root_time));
512 	return (0);
513 }
514 
515 /*
516  * Internal version of mount system call for diskless setup.
517  */
518 static int
519 nfs_mountdiskless(char *path,
520     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
521     struct vnode **vpp, struct mount *mp)
522 {
523 	struct sockaddr *nam;
524 	int dirlen, error;
525 	char *dirpath;
526 
527 	/*
528 	 * Find the directory path in "path", which also has the server's
529 	 * name/ip address in it.
530 	 */
531 	dirpath = strchr(path, ':');
532 	if (dirpath != NULL)
533 		dirlen = strlen(++dirpath);
534 	else
535 		dirlen = 0;
536 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
537 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
538 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
539 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
540 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
541 		return (error);
542 	}
543 	return (0);
544 }
545 
546 static void
547 nfs_sec_name(char *sec, int *flagsp)
548 {
549 	if (!strcmp(sec, "krb5"))
550 		*flagsp |= NFSMNT_KERB;
551 	else if (!strcmp(sec, "krb5i"))
552 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
553 	else if (!strcmp(sec, "krb5p"))
554 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
555 }
556 
557 static void
558 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
559     const char *hostname, struct ucred *cred, struct thread *td)
560 {
561 	int s;
562 	int adjsock;
563 	char *p;
564 
565 	s = splnet();
566 
567 	/*
568 	 * Set read-only flag if requested; otherwise, clear it if this is
569 	 * an update.  If this is not an update, then either the read-only
570 	 * flag is already clear, or this is a root mount and it was set
571 	 * intentionally at some previous point.
572 	 */
573 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
574 		MNT_ILOCK(mp);
575 		mp->mnt_flag |= MNT_RDONLY;
576 		MNT_IUNLOCK(mp);
577 	} else if (mp->mnt_flag & MNT_UPDATE) {
578 		MNT_ILOCK(mp);
579 		mp->mnt_flag &= ~MNT_RDONLY;
580 		MNT_IUNLOCK(mp);
581 	}
582 
583 	/*
584 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
585 	 * no sense in that context.  Also, set up appropriate retransmit
586 	 * and soft timeout behavior.
587 	 */
588 	if (argp->sotype == SOCK_STREAM) {
589 		nmp->nm_flag &= ~NFSMNT_NOCONN;
590 		nmp->nm_timeo = NFS_MAXTIMEO;
591 		if ((argp->flags & NFSMNT_NFSV4) != 0)
592 			nmp->nm_retry = INT_MAX;
593 		else
594 			nmp->nm_retry = NFS_RETRANS_TCP;
595 	}
596 
597 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
598 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
599 		argp->flags &= ~NFSMNT_RDIRPLUS;
600 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
601 	}
602 
603 	/* Re-bind if rsrvd port requested and wasn't on one */
604 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
605 		  && (argp->flags & NFSMNT_RESVPORT);
606 	/* Also re-bind if we're switching to/from a connected UDP socket */
607 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
608 		    (argp->flags & NFSMNT_NOCONN));
609 
610 	/* Update flags atomically.  Don't change the lock bits. */
611 	nmp->nm_flag = argp->flags | nmp->nm_flag;
612 	splx(s);
613 
614 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
615 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
616 		if (nmp->nm_timeo < NFS_MINTIMEO)
617 			nmp->nm_timeo = NFS_MINTIMEO;
618 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
619 			nmp->nm_timeo = NFS_MAXTIMEO;
620 	}
621 
622 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
623 		nmp->nm_retry = argp->retrans;
624 		if (nmp->nm_retry > NFS_MAXREXMIT)
625 			nmp->nm_retry = NFS_MAXREXMIT;
626 	}
627 
628 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
629 		nmp->nm_wsize = argp->wsize;
630 		/*
631 		 * Clip at the power of 2 below the size. There is an
632 		 * issue (not isolated) that causes intermittent page
633 		 * faults if this is not done.
634 		 */
635 		if (nmp->nm_wsize > NFS_FABLKSIZE)
636 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
637 		else
638 			nmp->nm_wsize = NFS_FABLKSIZE;
639 	}
640 
641 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
642 		nmp->nm_rsize = argp->rsize;
643 		/*
644 		 * Clip at the power of 2 below the size. There is an
645 		 * issue (not isolated) that causes intermittent page
646 		 * faults if this is not done.
647 		 */
648 		if (nmp->nm_rsize > NFS_FABLKSIZE)
649 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
650 		else
651 			nmp->nm_rsize = NFS_FABLKSIZE;
652 	}
653 
654 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
655 		nmp->nm_readdirsize = argp->readdirsize;
656 	}
657 
658 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
659 		nmp->nm_acregmin = argp->acregmin;
660 	else
661 		nmp->nm_acregmin = NFS_MINATTRTIMO;
662 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
663 		nmp->nm_acregmax = argp->acregmax;
664 	else
665 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
666 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
667 		nmp->nm_acdirmin = argp->acdirmin;
668 	else
669 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
670 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
671 		nmp->nm_acdirmax = argp->acdirmax;
672 	else
673 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
674 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
675 		nmp->nm_acdirmin = nmp->nm_acdirmax;
676 	if (nmp->nm_acregmin > nmp->nm_acregmax)
677 		nmp->nm_acregmin = nmp->nm_acregmax;
678 
679 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
680 		if (argp->readahead <= NFS_MAXRAHEAD)
681 			nmp->nm_readahead = argp->readahead;
682 		else
683 			nmp->nm_readahead = NFS_MAXRAHEAD;
684 	}
685 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
686 		if (argp->wcommitsize < nmp->nm_wsize)
687 			nmp->nm_wcommitsize = nmp->nm_wsize;
688 		else
689 			nmp->nm_wcommitsize = argp->wcommitsize;
690 	}
691 
692 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
693 		    (nmp->nm_soproto != argp->proto));
694 
695 	if (nmp->nm_client != NULL && adjsock) {
696 		int haslock = 0, error = 0;
697 
698 		if (nmp->nm_sotype == SOCK_STREAM) {
699 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
700 			if (!error)
701 				haslock = 1;
702 		}
703 		if (!error) {
704 		    newnfs_disconnect(&nmp->nm_sockreq);
705 		    if (haslock)
706 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
707 		    nmp->nm_sotype = argp->sotype;
708 		    nmp->nm_soproto = argp->proto;
709 		    if (nmp->nm_sotype == SOCK_DGRAM)
710 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
711 			    cred, td, 0)) {
712 				printf("newnfs_args: retrying connect\n");
713 				(void) nfs_catnap(PSOCK, 0, "nfscon");
714 			}
715 		}
716 	} else {
717 		nmp->nm_sotype = argp->sotype;
718 		nmp->nm_soproto = argp->proto;
719 	}
720 
721 	if (hostname != NULL) {
722 		strlcpy(nmp->nm_hostname, hostname,
723 		    sizeof(nmp->nm_hostname));
724 		p = strchr(nmp->nm_hostname, ':');
725 		if (p != NULL)
726 			*p = '\0';
727 	}
728 }
729 
730 static const char *nfs_opts[] = { "from", "nfs_args",
731     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
732     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
733     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
734     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
735     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
736     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
737     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
738     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
739     "pnfs", "wcommitsize",
740     NULL };
741 
742 /*
743  * Parse the "from" mountarg, passed by the generic mount(8) program
744  * or the mountroot code.  This is used when rerooting into NFS.
745  *
746  * Note that the "hostname" is actually a "hostname:/share/path" string.
747  */
748 static int
749 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
750     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
751 {
752 	char nam[MNAMELEN + 1];
753 	char *delimp, *hostp, *spec;
754 	int error, have_bracket = 0, offset, rv, speclen;
755 	struct sockaddr_in *sin;
756 	size_t len;
757 
758 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
759 	if (error != 0)
760 		return (error);
761 
762 	/*
763 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
764 	 */
765 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
766 	    *(delimp + 1) == ':') {
767 		hostp = spec + 1;
768 		spec = delimp + 2;
769 		have_bracket = 1;
770 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
771 		hostp = spec;
772 		spec = delimp + 1;
773 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
774 		printf("%s: path@server syntax is deprecated, "
775 		    "use server:path\n", __func__);
776 		hostp = delimp + 1;
777 	} else {
778 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
779 		return (EINVAL);
780 	}
781 	*delimp = '\0';
782 
783 	/*
784 	 * If there has been a trailing slash at mounttime it seems
785 	 * that some mountd implementations fail to remove the mount
786 	 * entries from their mountlist while unmounting.
787 	 */
788 	for (speclen = strlen(spec);
789 	    speclen > 1 && spec[speclen - 1] == '/';
790 	    speclen--)
791 		spec[speclen - 1] = '\0';
792 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
793 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
794 		return (EINVAL);
795 	}
796 	/* Make both '@' and ':' notations equal */
797 	if (*hostp != '\0') {
798 		len = strlen(hostp);
799 		offset = 0;
800 		if (have_bracket)
801 			nam[offset++] = '[';
802 		memmove(nam + offset, hostp, len);
803 		if (have_bracket)
804 			nam[len + offset++] = ']';
805 		nam[len + offset++] = ':';
806 		memmove(nam + len + offset, spec, speclen);
807 		nam[len + speclen + offset] = '\0';
808 	} else
809 		nam[0] = '\0';
810 
811 	/*
812 	 * XXX: IPv6
813 	 */
814 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
815 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
816 	if (rv != 1) {
817 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
818 		    __func__, hostp, rv);
819 		free(sin, M_SONAME);
820 		return (EINVAL);
821 	}
822 
823 	sin->sin_len = sizeof(*sin);
824 	sin->sin_family = AF_INET;
825 	/*
826 	 * XXX: hardcoded port number.
827 	 */
828 	sin->sin_port = htons(2049);
829 
830 	*hostnamep = strdup(nam, M_NEWNFSMNT);
831 	*sinp = sin;
832 	strlcpy(dirpath, spec, dirpathsize);
833 	*dirlenp = strlen(dirpath);
834 
835 	return (0);
836 }
837 
838 /*
839  * VFS Operations.
840  *
841  * mount system call
842  * It seems a bit dumb to copyinstr() the host and path here and then
843  * bcopy() them in mountnfs(), but I wanted to detect errors before
844  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
845  * an error after that means that I have to release the mbuf.
846  */
847 /* ARGSUSED */
848 static int
849 nfs_mount(struct mount *mp)
850 {
851 	struct nfs_args args = {
852 	    .version = NFS_ARGSVERSION,
853 	    .addr = NULL,
854 	    .addrlen = sizeof (struct sockaddr_in),
855 	    .sotype = SOCK_STREAM,
856 	    .proto = 0,
857 	    .fh = NULL,
858 	    .fhsize = 0,
859 	    .flags = NFSMNT_RESVPORT,
860 	    .wsize = NFS_WSIZE,
861 	    .rsize = NFS_RSIZE,
862 	    .readdirsize = NFS_READDIRSIZE,
863 	    .timeo = 10,
864 	    .retrans = NFS_RETRANS,
865 	    .readahead = NFS_DEFRAHEAD,
866 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
867 	    .hostname = NULL,
868 	    .acregmin = NFS_MINATTRTIMO,
869 	    .acregmax = NFS_MAXATTRTIMO,
870 	    .acdirmin = NFS_MINDIRATTRTIMO,
871 	    .acdirmax = NFS_MAXDIRATTRTIMO,
872 	};
873 	int error = 0, ret, len;
874 	struct sockaddr *nam = NULL;
875 	struct vnode *vp;
876 	struct thread *td;
877 	char hst[MNAMELEN];
878 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
879 	char *cp, *opt, *name, *secname;
880 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
881 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
882 	int minvers = 0;
883 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
884 	    krbnamelen, srvkrbnamelen;
885 	size_t hstlen;
886 
887 	has_nfs_args_opt = 0;
888 	has_nfs_from_opt = 0;
889 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
890 		error = EINVAL;
891 		goto out;
892 	}
893 
894 	td = curthread;
895 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
896 	    nfs_diskless_valid != 0) {
897 		error = nfs_mountroot(mp);
898 		goto out;
899 	}
900 
901 	nfscl_init();
902 
903 	/*
904 	 * The old mount_nfs program passed the struct nfs_args
905 	 * from userspace to kernel.  The new mount_nfs program
906 	 * passes string options via nmount() from userspace to kernel
907 	 * and we populate the struct nfs_args in the kernel.
908 	 */
909 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
910 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
911 		    sizeof(args));
912 		if (error != 0)
913 			goto out;
914 
915 		if (args.version != NFS_ARGSVERSION) {
916 			error = EPROGMISMATCH;
917 			goto out;
918 		}
919 		has_nfs_args_opt = 1;
920 	}
921 
922 	/* Handle the new style options. */
923 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
924 		args.acdirmin = args.acdirmax =
925 		    args.acregmin = args.acregmax = 0;
926 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
927 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
928 	}
929 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
930 		args.flags |= NFSMNT_NOCONN;
931 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
932 		args.flags &= ~NFSMNT_NOCONN;
933 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
934 		args.flags |= NFSMNT_NOLOCKD;
935 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
936 		args.flags &= ~NFSMNT_NOLOCKD;
937 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
938 		args.flags |= NFSMNT_INT;
939 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
940 		args.flags |= NFSMNT_RDIRPLUS;
941 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
942 		args.flags |= NFSMNT_RESVPORT;
943 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
944 		args.flags &= ~NFSMNT_RESVPORT;
945 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
946 		args.flags |= NFSMNT_SOFT;
947 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
948 		args.flags &= ~NFSMNT_SOFT;
949 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
950 		args.sotype = SOCK_DGRAM;
951 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
952 		args.sotype = SOCK_DGRAM;
953 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
954 		args.sotype = SOCK_STREAM;
955 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
956 		args.flags |= NFSMNT_NFSV3;
957 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
958 		args.flags |= NFSMNT_NFSV4;
959 		args.sotype = SOCK_STREAM;
960 	}
961 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
962 		args.flags |= NFSMNT_ALLGSSNAME;
963 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
964 		args.flags |= NFSMNT_NOCTO;
965 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
966 		args.flags |= NFSMNT_NONCONTIGWR;
967 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
968 		args.flags |= NFSMNT_PNFS;
969 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
970 		if (opt == NULL) {
971 			vfs_mount_error(mp, "illegal readdirsize");
972 			error = EINVAL;
973 			goto out;
974 		}
975 		ret = sscanf(opt, "%d", &args.readdirsize);
976 		if (ret != 1 || args.readdirsize <= 0) {
977 			vfs_mount_error(mp, "illegal readdirsize: %s",
978 			    opt);
979 			error = EINVAL;
980 			goto out;
981 		}
982 		args.flags |= NFSMNT_READDIRSIZE;
983 	}
984 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
985 		if (opt == NULL) {
986 			vfs_mount_error(mp, "illegal readahead");
987 			error = EINVAL;
988 			goto out;
989 		}
990 		ret = sscanf(opt, "%d", &args.readahead);
991 		if (ret != 1 || args.readahead <= 0) {
992 			vfs_mount_error(mp, "illegal readahead: %s",
993 			    opt);
994 			error = EINVAL;
995 			goto out;
996 		}
997 		args.flags |= NFSMNT_READAHEAD;
998 	}
999 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1000 		if (opt == NULL) {
1001 			vfs_mount_error(mp, "illegal wsize");
1002 			error = EINVAL;
1003 			goto out;
1004 		}
1005 		ret = sscanf(opt, "%d", &args.wsize);
1006 		if (ret != 1 || args.wsize <= 0) {
1007 			vfs_mount_error(mp, "illegal wsize: %s",
1008 			    opt);
1009 			error = EINVAL;
1010 			goto out;
1011 		}
1012 		args.flags |= NFSMNT_WSIZE;
1013 	}
1014 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1015 		if (opt == NULL) {
1016 			vfs_mount_error(mp, "illegal rsize");
1017 			error = EINVAL;
1018 			goto out;
1019 		}
1020 		ret = sscanf(opt, "%d", &args.rsize);
1021 		if (ret != 1 || args.rsize <= 0) {
1022 			vfs_mount_error(mp, "illegal wsize: %s",
1023 			    opt);
1024 			error = EINVAL;
1025 			goto out;
1026 		}
1027 		args.flags |= NFSMNT_RSIZE;
1028 	}
1029 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1030 		if (opt == NULL) {
1031 			vfs_mount_error(mp, "illegal retrans");
1032 			error = EINVAL;
1033 			goto out;
1034 		}
1035 		ret = sscanf(opt, "%d", &args.retrans);
1036 		if (ret != 1 || args.retrans <= 0) {
1037 			vfs_mount_error(mp, "illegal retrans: %s",
1038 			    opt);
1039 			error = EINVAL;
1040 			goto out;
1041 		}
1042 		args.flags |= NFSMNT_RETRANS;
1043 	}
1044 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1045 		ret = sscanf(opt, "%d", &args.acregmin);
1046 		if (ret != 1 || args.acregmin < 0) {
1047 			vfs_mount_error(mp, "illegal actimeo: %s",
1048 			    opt);
1049 			error = EINVAL;
1050 			goto out;
1051 		}
1052 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1053 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1054 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1055 	}
1056 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1057 		ret = sscanf(opt, "%d", &args.acregmin);
1058 		if (ret != 1 || args.acregmin < 0) {
1059 			vfs_mount_error(mp, "illegal acregmin: %s",
1060 			    opt);
1061 			error = EINVAL;
1062 			goto out;
1063 		}
1064 		args.flags |= NFSMNT_ACREGMIN;
1065 	}
1066 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1067 		ret = sscanf(opt, "%d", &args.acregmax);
1068 		if (ret != 1 || args.acregmax < 0) {
1069 			vfs_mount_error(mp, "illegal acregmax: %s",
1070 			    opt);
1071 			error = EINVAL;
1072 			goto out;
1073 		}
1074 		args.flags |= NFSMNT_ACREGMAX;
1075 	}
1076 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1077 		ret = sscanf(opt, "%d", &args.acdirmin);
1078 		if (ret != 1 || args.acdirmin < 0) {
1079 			vfs_mount_error(mp, "illegal acdirmin: %s",
1080 			    opt);
1081 			error = EINVAL;
1082 			goto out;
1083 		}
1084 		args.flags |= NFSMNT_ACDIRMIN;
1085 	}
1086 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1087 		ret = sscanf(opt, "%d", &args.acdirmax);
1088 		if (ret != 1 || args.acdirmax < 0) {
1089 			vfs_mount_error(mp, "illegal acdirmax: %s",
1090 			    opt);
1091 			error = EINVAL;
1092 			goto out;
1093 		}
1094 		args.flags |= NFSMNT_ACDIRMAX;
1095 	}
1096 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1097 		ret = sscanf(opt, "%d", &args.wcommitsize);
1098 		if (ret != 1 || args.wcommitsize < 0) {
1099 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1100 			error = EINVAL;
1101 			goto out;
1102 		}
1103 		args.flags |= NFSMNT_WCOMMITSIZE;
1104 	}
1105 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1106 		ret = sscanf(opt, "%d", &args.timeo);
1107 		if (ret != 1 || args.timeo <= 0) {
1108 			vfs_mount_error(mp, "illegal timeo: %s",
1109 			    opt);
1110 			error = EINVAL;
1111 			goto out;
1112 		}
1113 		args.flags |= NFSMNT_TIMEO;
1114 	}
1115 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1116 		ret = sscanf(opt, "%d", &args.timeo);
1117 		if (ret != 1 || args.timeo <= 0) {
1118 			vfs_mount_error(mp, "illegal timeout: %s",
1119 			    opt);
1120 			error = EINVAL;
1121 			goto out;
1122 		}
1123 		args.flags |= NFSMNT_TIMEO;
1124 	}
1125 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1126 		ret = sscanf(opt, "%d", &nametimeo);
1127 		if (ret != 1 || nametimeo < 0) {
1128 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1129 			error = EINVAL;
1130 			goto out;
1131 		}
1132 	}
1133 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1134 	    == 0) {
1135 		ret = sscanf(opt, "%d", &negnametimeo);
1136 		if (ret != 1 || negnametimeo < 0) {
1137 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1138 			    opt);
1139 			error = EINVAL;
1140 			goto out;
1141 		}
1142 	}
1143 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1144 	    0) {
1145 		ret = sscanf(opt, "%d", &minvers);
1146 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1147 		    (args.flags & NFSMNT_NFSV4) == 0) {
1148 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1149 			error = EINVAL;
1150 			goto out;
1151 		}
1152 	}
1153 	if (vfs_getopt(mp->mnt_optnew, "sec",
1154 		(void **) &secname, NULL) == 0)
1155 		nfs_sec_name(secname, &args.flags);
1156 
1157 	if (mp->mnt_flag & MNT_UPDATE) {
1158 		struct nfsmount *nmp = VFSTONFS(mp);
1159 
1160 		if (nmp == NULL) {
1161 			error = EIO;
1162 			goto out;
1163 		}
1164 
1165 		/*
1166 		 * If a change from TCP->UDP is done and there are thread(s)
1167 		 * that have I/O RPC(s) in progress with a transfer size
1168 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1169 		 * hung, retrying the RPC(s) forever. Usually these threads
1170 		 * will be seen doing an uninterruptible sleep on wait channel
1171 		 * "nfsreq".
1172 		 */
1173 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1174 			tprintf(td->td_proc, LOG_WARNING,
1175 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1176 
1177 		/*
1178 		 * When doing an update, we can't change version,
1179 		 * security, switch lockd strategies or change cookie
1180 		 * translation
1181 		 */
1182 		args.flags = (args.flags &
1183 		    ~(NFSMNT_NFSV3 |
1184 		      NFSMNT_NFSV4 |
1185 		      NFSMNT_KERB |
1186 		      NFSMNT_INTEGRITY |
1187 		      NFSMNT_PRIVACY |
1188 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1189 		    (nmp->nm_flag &
1190 			(NFSMNT_NFSV3 |
1191 			 NFSMNT_NFSV4 |
1192 			 NFSMNT_KERB |
1193 			 NFSMNT_INTEGRITY |
1194 			 NFSMNT_PRIVACY |
1195 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1196 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1197 		goto out;
1198 	}
1199 
1200 	/*
1201 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1202 	 * or no-connection mode for those protocols that support
1203 	 * no-connection mode (the flag will be cleared later for protocols
1204 	 * that do not support no-connection mode).  This will allow a client
1205 	 * to receive replies from a different IP then the request was
1206 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1207 	 * not 0.
1208 	 */
1209 	if (nfs_ip_paranoia == 0)
1210 		args.flags |= NFSMNT_NOCONN;
1211 
1212 	if (has_nfs_args_opt != 0) {
1213 		/*
1214 		 * In the 'nfs_args' case, the pointers in the args
1215 		 * structure are in userland - we copy them in here.
1216 		 */
1217 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1218 			vfs_mount_error(mp, "Bad file handle");
1219 			error = EINVAL;
1220 			goto out;
1221 		}
1222 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1223 		    args.fhsize);
1224 		if (error != 0)
1225 			goto out;
1226 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1227 		if (error != 0)
1228 			goto out;
1229 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1230 		args.hostname = hst;
1231 		/* getsockaddr() call must be after above copyin() calls */
1232 		error = getsockaddr(&nam, (caddr_t)args.addr,
1233 		    args.addrlen);
1234 		if (error != 0)
1235 			goto out;
1236 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1237 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1238 	    sizeof(dirpath), &dirlen) == 0) {
1239 		has_nfs_from_opt = 1;
1240 		bcopy(args.hostname, hst, MNAMELEN);
1241 		hst[MNAMELEN - 1] = '\0';
1242 
1243 		/*
1244 		 * This only works with NFSv4 for now.
1245 		 */
1246 		args.fhsize = 0;
1247 		args.flags |= NFSMNT_NFSV4;
1248 		args.sotype = SOCK_STREAM;
1249 	} else {
1250 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1251 		    &args.fhsize) == 0) {
1252 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1253 				vfs_mount_error(mp, "Bad file handle");
1254 				error = EINVAL;
1255 				goto out;
1256 			}
1257 			bcopy(args.fh, nfh, args.fhsize);
1258 		} else {
1259 			args.fhsize = 0;
1260 		}
1261 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1262 		    (void **)&args.hostname, &len);
1263 		if (args.hostname == NULL) {
1264 			vfs_mount_error(mp, "Invalid hostname");
1265 			error = EINVAL;
1266 			goto out;
1267 		}
1268 		bcopy(args.hostname, hst, MNAMELEN);
1269 		hst[MNAMELEN - 1] = '\0';
1270 	}
1271 
1272 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1273 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1274 	else {
1275 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1276 		cp = strchr(srvkrbname, ':');
1277 		if (cp != NULL)
1278 			*cp = '\0';
1279 	}
1280 	srvkrbnamelen = strlen(srvkrbname);
1281 
1282 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1283 		strlcpy(krbname, name, sizeof (krbname));
1284 	else
1285 		krbname[0] = '\0';
1286 	krbnamelen = strlen(krbname);
1287 
1288 	if (has_nfs_from_opt == 0) {
1289 		if (vfs_getopt(mp->mnt_optnew,
1290 		    "dirpath", (void **)&name, NULL) == 0)
1291 			strlcpy(dirpath, name, sizeof (dirpath));
1292 		else
1293 			dirpath[0] = '\0';
1294 		dirlen = strlen(dirpath);
1295 	}
1296 
1297 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1298 		if (vfs_getopt(mp->mnt_optnew, "addr",
1299 		    (void **)&args.addr, &args.addrlen) == 0) {
1300 			if (args.addrlen > SOCK_MAXADDRLEN) {
1301 				error = ENAMETOOLONG;
1302 				goto out;
1303 			}
1304 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1305 			bcopy(args.addr, nam, args.addrlen);
1306 			nam->sa_len = args.addrlen;
1307 		} else {
1308 			vfs_mount_error(mp, "No server address");
1309 			error = EINVAL;
1310 			goto out;
1311 		}
1312 	}
1313 
1314 	args.fh = nfh;
1315 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1316 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1317 	    nametimeo, negnametimeo, minvers);
1318 out:
1319 	if (!error) {
1320 		MNT_ILOCK(mp);
1321 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1322 		    MNTK_USES_BCACHE;
1323 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1324 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1325 		MNT_IUNLOCK(mp);
1326 	}
1327 	return (error);
1328 }
1329 
1330 
1331 /*
1332  * VFS Operations.
1333  *
1334  * mount system call
1335  * It seems a bit dumb to copyinstr() the host and path here and then
1336  * bcopy() them in mountnfs(), but I wanted to detect errors before
1337  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1338  * an error after that means that I have to release the mbuf.
1339  */
1340 /* ARGSUSED */
1341 static int
1342 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1343 {
1344 	int error;
1345 	struct nfs_args args;
1346 
1347 	error = copyin(data, &args, sizeof (struct nfs_args));
1348 	if (error)
1349 		return error;
1350 
1351 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1352 
1353 	error = kernel_mount(ma, flags);
1354 	return (error);
1355 }
1356 
1357 /*
1358  * Common code for mount and mountroot
1359  */
1360 static int
1361 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1362     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1363     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1364     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1365     int minvers)
1366 {
1367 	struct nfsmount *nmp;
1368 	struct nfsnode *np;
1369 	int error, trycnt, ret;
1370 	struct nfsvattr nfsva;
1371 	struct nfsclclient *clp;
1372 	struct nfsclds *dsp, *tdsp;
1373 	uint32_t lease;
1374 	static u_int64_t clval = 0;
1375 
1376 	NFSCL_DEBUG(3, "in mnt\n");
1377 	clp = NULL;
1378 	if (mp->mnt_flag & MNT_UPDATE) {
1379 		nmp = VFSTONFS(mp);
1380 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1381 		FREE(nam, M_SONAME);
1382 		return (0);
1383 	} else {
1384 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1385 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1386 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1387 		TAILQ_INIT(&nmp->nm_bufq);
1388 		TAILQ_INIT(&nmp->nm_sess);
1389 		if (clval == 0)
1390 			clval = (u_int64_t)nfsboottime.tv_sec;
1391 		nmp->nm_clval = clval++;
1392 		nmp->nm_krbnamelen = krbnamelen;
1393 		nmp->nm_dirpathlen = dirlen;
1394 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1395 		if (td->td_ucred->cr_uid != (uid_t)0) {
1396 			/*
1397 			 * nm_uid is used to get KerberosV credentials for
1398 			 * the nfsv4 state handling operations if there is
1399 			 * no host based principal set. Use the uid of
1400 			 * this user if not root, since they are doing the
1401 			 * mount. I don't think setting this for root will
1402 			 * work, since root normally does not have user
1403 			 * credentials in a credentials cache.
1404 			 */
1405 			nmp->nm_uid = td->td_ucred->cr_uid;
1406 		} else {
1407 			/*
1408 			 * Just set to -1, so it won't be used.
1409 			 */
1410 			nmp->nm_uid = (uid_t)-1;
1411 		}
1412 
1413 		/* Copy and null terminate all the names */
1414 		if (nmp->nm_krbnamelen > 0) {
1415 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1416 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1417 		}
1418 		if (nmp->nm_dirpathlen > 0) {
1419 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1420 			    nmp->nm_dirpathlen);
1421 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1422 			    + 1] = '\0';
1423 		}
1424 		if (nmp->nm_srvkrbnamelen > 0) {
1425 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1426 			    nmp->nm_srvkrbnamelen);
1427 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1428 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1429 		}
1430 		nmp->nm_sockreq.nr_cred = crhold(cred);
1431 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1432 		mp->mnt_data = nmp;
1433 		nmp->nm_getinfo = nfs_getnlminfo;
1434 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1435 	}
1436 	vfs_getnewfsid(mp);
1437 	nmp->nm_mountp = mp;
1438 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1439 
1440 	/*
1441 	 * Since nfs_decode_args() might optionally set them, these
1442 	 * need to be set to defaults before the call, so that the
1443 	 * optional settings aren't overwritten.
1444 	 */
1445 	nmp->nm_nametimeo = nametimeo;
1446 	nmp->nm_negnametimeo = negnametimeo;
1447 	nmp->nm_timeo = NFS_TIMEO;
1448 	nmp->nm_retry = NFS_RETRANS;
1449 	nmp->nm_readahead = NFS_DEFRAHEAD;
1450 
1451 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1452 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1453 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1454 		nmp->nm_wcommitsize *= 2;
1455 	nmp->nm_wcommitsize *= 256;
1456 
1457 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1458 		nmp->nm_minorvers = minvers;
1459 	else
1460 		nmp->nm_minorvers = 0;
1461 
1462 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1463 
1464 	/*
1465 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1466 	 * high, depending on whether we end up with negative offsets in
1467 	 * the client or server somewhere.  2GB-1 may be safer.
1468 	 *
1469 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1470 	 * that we can handle until we find out otherwise.
1471 	 */
1472 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1473 		nmp->nm_maxfilesize = 0xffffffffLL;
1474 	else
1475 		nmp->nm_maxfilesize = OFF_MAX;
1476 
1477 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1478 		nmp->nm_wsize = NFS_WSIZE;
1479 		nmp->nm_rsize = NFS_RSIZE;
1480 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1481 	}
1482 	nmp->nm_numgrps = NFS_MAXGRPS;
1483 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1484 	if (nmp->nm_tprintf_delay < 0)
1485 		nmp->nm_tprintf_delay = 0;
1486 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1487 	if (nmp->nm_tprintf_initial_delay < 0)
1488 		nmp->nm_tprintf_initial_delay = 0;
1489 	nmp->nm_fhsize = argp->fhsize;
1490 	if (nmp->nm_fhsize > 0)
1491 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1492 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1493 	nmp->nm_nam = nam;
1494 	/* Set up the sockets and per-host congestion */
1495 	nmp->nm_sotype = argp->sotype;
1496 	nmp->nm_soproto = argp->proto;
1497 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1498 	if ((argp->flags & NFSMNT_NFSV4))
1499 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1500 	else if ((argp->flags & NFSMNT_NFSV3))
1501 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1502 	else
1503 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1504 
1505 
1506 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1507 		goto bad;
1508 	/* For NFSv4.1, get the clientid now. */
1509 	if (nmp->nm_minorvers > 0) {
1510 		NFSCL_DEBUG(3, "at getcl\n");
1511 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1512 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1513 		if (error != 0)
1514 			goto bad;
1515 	}
1516 
1517 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1518 	    nmp->nm_dirpathlen > 0) {
1519 		NFSCL_DEBUG(3, "in dirp\n");
1520 		/*
1521 		 * If the fhsize on the mount point == 0 for V4, the mount
1522 		 * path needs to be looked up.
1523 		 */
1524 		trycnt = 3;
1525 		do {
1526 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1527 			    cred, td);
1528 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1529 			if (error)
1530 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1531 		} while (error && --trycnt > 0);
1532 		if (error) {
1533 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1534 			goto bad;
1535 		}
1536 	}
1537 
1538 	/*
1539 	 * A reference count is needed on the nfsnode representing the
1540 	 * remote root.  If this object is not persistent, then backward
1541 	 * traversals of the mount point (i.e. "..") will not work if
1542 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1543 	 * this problem, because one can identify root inodes by their
1544 	 * number == ROOTINO (2).
1545 	 */
1546 	if (nmp->nm_fhsize > 0) {
1547 		/*
1548 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1549 		 * non-zero for the root vnode. f_iosize will be set correctly
1550 		 * by nfs_statfs() before any I/O occurs.
1551 		 */
1552 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1553 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1554 		    LK_EXCLUSIVE);
1555 		if (error)
1556 			goto bad;
1557 		*vpp = NFSTOV(np);
1558 
1559 		/*
1560 		 * Get file attributes and transfer parameters for the
1561 		 * mountpoint.  This has the side effect of filling in
1562 		 * (*vpp)->v_type with the correct value.
1563 		 */
1564 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1565 		    cred, td, &nfsva, NULL, &lease);
1566 		if (ret) {
1567 			/*
1568 			 * Just set default values to get things going.
1569 			 */
1570 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1571 			nfsva.na_vattr.va_type = VDIR;
1572 			nfsva.na_vattr.va_mode = 0777;
1573 			nfsva.na_vattr.va_nlink = 100;
1574 			nfsva.na_vattr.va_uid = (uid_t)0;
1575 			nfsva.na_vattr.va_gid = (gid_t)0;
1576 			nfsva.na_vattr.va_fileid = 2;
1577 			nfsva.na_vattr.va_gen = 1;
1578 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1579 			nfsva.na_vattr.va_size = 512 * 1024;
1580 			lease = 60;
1581 		}
1582 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1583 		if (nmp->nm_minorvers > 0) {
1584 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1585 			NFSLOCKCLSTATE();
1586 			clp->nfsc_renew = NFSCL_RENEW(lease);
1587 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1588 			clp->nfsc_clientidrev++;
1589 			if (clp->nfsc_clientidrev == 0)
1590 				clp->nfsc_clientidrev++;
1591 			NFSUNLOCKCLSTATE();
1592 			/*
1593 			 * Mount will succeed, so the renew thread can be
1594 			 * started now.
1595 			 */
1596 			nfscl_start_renewthread(clp);
1597 			nfscl_clientrelease(clp);
1598 		}
1599 		if (argp->flags & NFSMNT_NFSV3)
1600 			ncl_fsinfo(nmp, *vpp, cred, td);
1601 
1602 		/* Mark if the mount point supports NFSv4 ACLs. */
1603 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1604 		    ret == 0 &&
1605 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1606 			MNT_ILOCK(mp);
1607 			mp->mnt_flag |= MNT_NFS4ACLS;
1608 			MNT_IUNLOCK(mp);
1609 		}
1610 
1611 		/*
1612 		 * Lose the lock but keep the ref.
1613 		 */
1614 		NFSVOPUNLOCK(*vpp, 0);
1615 		return (0);
1616 	}
1617 	error = EIO;
1618 
1619 bad:
1620 	if (clp != NULL)
1621 		nfscl_clientrelease(clp);
1622 	newnfs_disconnect(&nmp->nm_sockreq);
1623 	crfree(nmp->nm_sockreq.nr_cred);
1624 	if (nmp->nm_sockreq.nr_auth != NULL)
1625 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1626 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1627 	mtx_destroy(&nmp->nm_mtx);
1628 	if (nmp->nm_clp != NULL) {
1629 		NFSLOCKCLSTATE();
1630 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1631 		NFSUNLOCKCLSTATE();
1632 		free(nmp->nm_clp, M_NFSCLCLIENT);
1633 	}
1634 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1635 		nfscl_freenfsclds(dsp);
1636 	FREE(nmp, M_NEWNFSMNT);
1637 	FREE(nam, M_SONAME);
1638 	return (error);
1639 }
1640 
1641 /*
1642  * unmount system call
1643  */
1644 static int
1645 nfs_unmount(struct mount *mp, int mntflags)
1646 {
1647 	struct thread *td;
1648 	struct nfsmount *nmp;
1649 	int error, flags = 0, i, trycnt = 0;
1650 	struct nfsclds *dsp, *tdsp;
1651 
1652 	td = curthread;
1653 
1654 	if (mntflags & MNT_FORCE)
1655 		flags |= FORCECLOSE;
1656 	nmp = VFSTONFS(mp);
1657 	/*
1658 	 * Goes something like this..
1659 	 * - Call vflush() to clear out vnodes for this filesystem
1660 	 * - Close the socket
1661 	 * - Free up the data structures
1662 	 */
1663 	/* In the forced case, cancel any outstanding requests. */
1664 	if (mntflags & MNT_FORCE) {
1665 		error = newnfs_nmcancelreqs(nmp);
1666 		if (error)
1667 			goto out;
1668 		/* For a forced close, get rid of the renew thread now */
1669 		nfscl_umount(nmp, td);
1670 	}
1671 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1672 	do {
1673 		error = vflush(mp, 1, flags, td);
1674 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1675 			(void) nfs_catnap(PSOCK, error, "newndm");
1676 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1677 	if (error)
1678 		goto out;
1679 
1680 	/*
1681 	 * We are now committed to the unmount.
1682 	 */
1683 	if ((mntflags & MNT_FORCE) == 0)
1684 		nfscl_umount(nmp, td);
1685 	/* Make sure no nfsiods are assigned to this mount. */
1686 	mtx_lock(&ncl_iod_mutex);
1687 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1688 		if (ncl_iodmount[i] == nmp) {
1689 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1690 			ncl_iodmount[i] = NULL;
1691 		}
1692 	mtx_unlock(&ncl_iod_mutex);
1693 	newnfs_disconnect(&nmp->nm_sockreq);
1694 	crfree(nmp->nm_sockreq.nr_cred);
1695 	FREE(nmp->nm_nam, M_SONAME);
1696 	if (nmp->nm_sockreq.nr_auth != NULL)
1697 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1698 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1699 	mtx_destroy(&nmp->nm_mtx);
1700 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1701 		nfscl_freenfsclds(dsp);
1702 	FREE(nmp, M_NEWNFSMNT);
1703 out:
1704 	return (error);
1705 }
1706 
1707 /*
1708  * Return root of a filesystem
1709  */
1710 static int
1711 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1712 {
1713 	struct vnode *vp;
1714 	struct nfsmount *nmp;
1715 	struct nfsnode *np;
1716 	int error;
1717 
1718 	nmp = VFSTONFS(mp);
1719 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1720 	if (error)
1721 		return error;
1722 	vp = NFSTOV(np);
1723 	/*
1724 	 * Get transfer parameters and attributes for root vnode once.
1725 	 */
1726 	mtx_lock(&nmp->nm_mtx);
1727 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1728 		mtx_unlock(&nmp->nm_mtx);
1729 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1730 	} else
1731 		mtx_unlock(&nmp->nm_mtx);
1732 	if (vp->v_type == VNON)
1733 	    vp->v_type = VDIR;
1734 	vp->v_vflag |= VV_ROOT;
1735 	*vpp = vp;
1736 	return (0);
1737 }
1738 
1739 /*
1740  * Flush out the buffer cache
1741  */
1742 /* ARGSUSED */
1743 static int
1744 nfs_sync(struct mount *mp, int waitfor)
1745 {
1746 	struct vnode *vp, *mvp;
1747 	struct thread *td;
1748 	int error, allerror = 0;
1749 
1750 	td = curthread;
1751 
1752 	MNT_ILOCK(mp);
1753 	/*
1754 	 * If a forced dismount is in progress, return from here so that
1755 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1756 	 * calling VFS_UNMOUNT().
1757 	 */
1758 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1759 		MNT_IUNLOCK(mp);
1760 		return (EBADF);
1761 	}
1762 	MNT_IUNLOCK(mp);
1763 
1764 	/*
1765 	 * Force stale buffer cache information to be flushed.
1766 	 */
1767 loop:
1768 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1769 		/* XXX Racy bv_cnt check. */
1770 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1771 		    waitfor == MNT_LAZY) {
1772 			VI_UNLOCK(vp);
1773 			continue;
1774 		}
1775 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1776 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1777 			goto loop;
1778 		}
1779 		error = VOP_FSYNC(vp, waitfor, td);
1780 		if (error)
1781 			allerror = error;
1782 		NFSVOPUNLOCK(vp, 0);
1783 		vrele(vp);
1784 	}
1785 	return (allerror);
1786 }
1787 
1788 static int
1789 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1790 {
1791 	struct nfsmount *nmp = VFSTONFS(mp);
1792 	struct vfsquery vq;
1793 	int error;
1794 
1795 	bzero(&vq, sizeof(vq));
1796 	switch (op) {
1797 #if 0
1798 	case VFS_CTL_NOLOCKS:
1799 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1800  		if (req->oldptr != NULL) {
1801  			error = SYSCTL_OUT(req, &val, sizeof(val));
1802  			if (error)
1803  				return (error);
1804  		}
1805  		if (req->newptr != NULL) {
1806  			error = SYSCTL_IN(req, &val, sizeof(val));
1807  			if (error)
1808  				return (error);
1809 			if (val)
1810 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1811 			else
1812 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1813  		}
1814 		break;
1815 #endif
1816 	case VFS_CTL_QUERY:
1817 		mtx_lock(&nmp->nm_mtx);
1818 		if (nmp->nm_state & NFSSTA_TIMEO)
1819 			vq.vq_flags |= VQ_NOTRESP;
1820 		mtx_unlock(&nmp->nm_mtx);
1821 #if 0
1822 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1823 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1824 			vq.vq_flags |= VQ_NOTRESPLOCK;
1825 #endif
1826 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1827 		break;
1828  	case VFS_CTL_TIMEO:
1829  		if (req->oldptr != NULL) {
1830  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1831  			    sizeof(nmp->nm_tprintf_initial_delay));
1832  			if (error)
1833  				return (error);
1834  		}
1835  		if (req->newptr != NULL) {
1836 			error = vfs_suser(mp, req->td);
1837 			if (error)
1838 				return (error);
1839  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1840  			    sizeof(nmp->nm_tprintf_initial_delay));
1841  			if (error)
1842  				return (error);
1843  			if (nmp->nm_tprintf_initial_delay < 0)
1844  				nmp->nm_tprintf_initial_delay = 0;
1845  		}
1846 		break;
1847 	default:
1848 		return (ENOTSUP);
1849 	}
1850 	return (0);
1851 }
1852 
1853 /*
1854  * Purge any RPCs in progress, so that they will all return errors.
1855  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1856  * forced dismount.
1857  */
1858 static void
1859 nfs_purge(struct mount *mp)
1860 {
1861 	struct nfsmount *nmp = VFSTONFS(mp);
1862 
1863 	newnfs_nmcancelreqs(nmp);
1864 }
1865 
1866 /*
1867  * Extract the information needed by the nlm from the nfs vnode.
1868  */
1869 static void
1870 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1871     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1872     struct timeval *timeop)
1873 {
1874 	struct nfsmount *nmp;
1875 	struct nfsnode *np = VTONFS(vp);
1876 
1877 	nmp = VFSTONFS(vp->v_mount);
1878 	if (fhlenp != NULL)
1879 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1880 	if (fhp != NULL)
1881 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1882 	if (sp != NULL)
1883 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1884 	if (is_v3p != NULL)
1885 		*is_v3p = NFS_ISV3(vp);
1886 	if (sizep != NULL)
1887 		*sizep = np->n_size;
1888 	if (timeop != NULL) {
1889 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1890 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1891 	}
1892 }
1893 
1894 /*
1895  * This function prints out an option name, based on the conditional
1896  * argument.
1897  */
1898 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1899     char *opt, char **buf, size_t *blen)
1900 {
1901 	int len;
1902 
1903 	if (testval != 0 && *blen > strlen(opt)) {
1904 		len = snprintf(*buf, *blen, "%s", opt);
1905 		if (len != strlen(opt))
1906 			printf("EEK!!\n");
1907 		*buf += len;
1908 		*blen -= len;
1909 	}
1910 }
1911 
1912 /*
1913  * This function printf out an options integer value.
1914  */
1915 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1916     char *opt, char **buf, size_t *blen)
1917 {
1918 	int len;
1919 
1920 	if (*blen > strlen(opt) + 1) {
1921 		/* Could result in truncated output string. */
1922 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1923 		if (len < *blen) {
1924 			*buf += len;
1925 			*blen -= len;
1926 		}
1927 	}
1928 }
1929 
1930 /*
1931  * Load the option flags and values into the buffer.
1932  */
1933 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1934 {
1935 	char *buf;
1936 	size_t blen;
1937 
1938 	buf = buffer;
1939 	blen = buflen;
1940 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1941 	    &blen);
1942 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1943 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1944 		    &blen);
1945 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1946 		    &buf, &blen);
1947 	}
1948 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1949 	    &blen);
1950 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1951 	    "nfsv2", &buf, &blen);
1952 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1953 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1954 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1955 	    &buf, &blen);
1956 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1957 	    &buf, &blen);
1958 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1959 	    &blen);
1960 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1961 	    &blen);
1962 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1963 	    &blen);
1964 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1965 	    &blen);
1966 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1967 	    &blen);
1968 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1969 	    ",noncontigwr", &buf, &blen);
1970 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1971 	    0, ",lockd", &buf, &blen);
1972 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1973 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1974 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1975 	    &buf, &blen);
1976 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1977 	    &buf, &blen);
1978 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1979 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1980 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1981 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1982 	    &buf, &blen);
1983 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1984 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1985 	    &buf, &blen);
1986 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1987 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1988 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1989 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1990 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1991 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1992 	    &blen);
1993 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1994 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1995 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1996 	    &blen);
1997 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1998 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1999 	    &blen);
2000 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2001 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2002 }
2003 
2004