xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 18849b5da0c5eaa88500b457be05b038813b51b1)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 #ifdef NFS_DEBUG
104 int nfs_debug;
105 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
106     "Toggle debug flag");
107 #endif
108 
109 static int	nfs_mountroot(struct mount *);
110 static void	nfs_sec_name(char *, int *);
111 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112 		    struct nfs_args *argp, const char *, struct ucred *,
113 		    struct thread *);
114 static int	mountnfs(struct nfs_args *, struct mount *,
115 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
116 		    u_char *, int, struct vnode **, struct ucred *,
117 		    struct thread *, int, int, int);
118 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119 		    struct sockaddr_storage *, int *, off_t *,
120 		    struct timeval *);
121 static vfs_mount_t nfs_mount;
122 static vfs_cmount_t nfs_cmount;
123 static vfs_unmount_t nfs_unmount;
124 static vfs_root_t nfs_root;
125 static vfs_statfs_t nfs_statfs;
126 static vfs_sync_t nfs_sync;
127 static vfs_sysctl_t nfs_sysctl;
128 static vfs_purge_t nfs_purge;
129 
130 /*
131  * nfs vfs operations.
132  */
133 static struct vfsops nfs_vfsops = {
134 	.vfs_init =		ncl_init,
135 	.vfs_mount =		nfs_mount,
136 	.vfs_cmount =		nfs_cmount,
137 	.vfs_root =		nfs_root,
138 	.vfs_statfs =		nfs_statfs,
139 	.vfs_sync =		nfs_sync,
140 	.vfs_uninit =		ncl_uninit,
141 	.vfs_unmount =		nfs_unmount,
142 	.vfs_sysctl =		nfs_sysctl,
143 	.vfs_purge =		nfs_purge,
144 };
145 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
146 
147 /* So that loader and kldload(2) can find us, wherever we are.. */
148 MODULE_VERSION(nfs, 1);
149 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
150 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
152 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
153 
154 /*
155  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
156  * can be shared by both NFS clients. It is declared here so that it
157  * will be defined for kernels built without NFS_ROOT, although it
158  * isn't used in that case.
159  */
160 #if !defined(NFS_ROOT)
161 struct nfs_diskless	nfs_diskless = { { { 0 } } };
162 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
163 int			nfs_diskless_valid = 0;
164 #endif
165 
166 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
167     &nfs_diskless_valid, 0,
168     "Has the diskless struct been filled correctly");
169 
170 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
171     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
172 
173 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
174     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
175     "%Ssockaddr_in", "Diskless root nfs address");
176 
177 
178 void		newnfsargs_ntoh(struct nfs_args *);
179 static int	nfs_mountdiskless(char *,
180 		    struct sockaddr_in *, struct nfs_args *,
181 		    struct thread *, struct vnode **, struct mount *);
182 static void	nfs_convert_diskless(void);
183 static void	nfs_convert_oargs(struct nfs_args *args,
184 		    struct onfs_args *oargs);
185 
186 int
187 newnfs_iosize(struct nfsmount *nmp)
188 {
189 	int iosize, maxio;
190 
191 	/* First, set the upper limit for iosize */
192 	if (nmp->nm_flag & NFSMNT_NFSV4) {
193 		maxio = NFS_MAXBSIZE;
194 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
195 		if (nmp->nm_sotype == SOCK_DGRAM)
196 			maxio = NFS_MAXDGRAMDATA;
197 		else
198 			maxio = NFS_MAXBSIZE;
199 	} else {
200 		maxio = NFS_V2MAXDATA;
201 	}
202 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
203 		nmp->nm_rsize = maxio;
204 	if (nmp->nm_rsize > NFS_MAXBSIZE)
205 		nmp->nm_rsize = NFS_MAXBSIZE;
206 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
207 		nmp->nm_readdirsize = maxio;
208 	if (nmp->nm_readdirsize > nmp->nm_rsize)
209 		nmp->nm_readdirsize = nmp->nm_rsize;
210 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
211 		nmp->nm_wsize = maxio;
212 	if (nmp->nm_wsize > NFS_MAXBSIZE)
213 		nmp->nm_wsize = NFS_MAXBSIZE;
214 
215 	/*
216 	 * Calculate the size used for io buffers.  Use the larger
217 	 * of the two sizes to minimise nfs requests but make sure
218 	 * that it is at least one VM page to avoid wasting buffer
219 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
220 	 * that is the buffer size used for directories.
221 	 */
222 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
223 	iosize = imax(iosize, PAGE_SIZE);
224 	iosize = imax(iosize, NFS_DIRBLKSIZ);
225 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
226 	return (iosize);
227 }
228 
229 static void
230 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
231 {
232 
233 	args->version = NFS_ARGSVERSION;
234 	args->addr = oargs->addr;
235 	args->addrlen = oargs->addrlen;
236 	args->sotype = oargs->sotype;
237 	args->proto = oargs->proto;
238 	args->fh = oargs->fh;
239 	args->fhsize = oargs->fhsize;
240 	args->flags = oargs->flags;
241 	args->wsize = oargs->wsize;
242 	args->rsize = oargs->rsize;
243 	args->readdirsize = oargs->readdirsize;
244 	args->timeo = oargs->timeo;
245 	args->retrans = oargs->retrans;
246 	args->readahead = oargs->readahead;
247 	args->hostname = oargs->hostname;
248 }
249 
250 static void
251 nfs_convert_diskless(void)
252 {
253 
254 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
255 		sizeof(struct ifaliasreq));
256 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
257 		sizeof(struct sockaddr_in));
258 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
259 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
260 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
261 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
262 	} else {
263 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
264 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
265 	}
266 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
267 		sizeof(struct sockaddr_in));
268 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
269 	nfsv3_diskless.root_time = nfs_diskless.root_time;
270 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
271 		MAXHOSTNAMELEN);
272 	nfs_diskless_valid = 3;
273 }
274 
275 /*
276  * nfs statfs call
277  */
278 static int
279 nfs_statfs(struct mount *mp, struct statfs *sbp)
280 {
281 	struct vnode *vp;
282 	struct thread *td;
283 	struct nfsmount *nmp = VFSTONFS(mp);
284 	struct nfsvattr nfsva;
285 	struct nfsfsinfo fs;
286 	struct nfsstatfs sb;
287 	int error = 0, attrflag, gotfsinfo = 0, ret;
288 	struct nfsnode *np;
289 
290 	td = curthread;
291 
292 	error = vfs_busy(mp, MBF_NOWAIT);
293 	if (error)
294 		return (error);
295 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
296 	if (error) {
297 		vfs_unbusy(mp);
298 		return (error);
299 	}
300 	vp = NFSTOV(np);
301 	mtx_lock(&nmp->nm_mtx);
302 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
303 		mtx_unlock(&nmp->nm_mtx);
304 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
305 		    &attrflag, NULL);
306 		if (!error)
307 			gotfsinfo = 1;
308 	} else
309 		mtx_unlock(&nmp->nm_mtx);
310 	if (!error)
311 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
312 		    &attrflag, NULL);
313 	if (error != 0)
314 		NFSCL_DEBUG(2, "statfs=%d\n", error);
315 	if (attrflag == 0) {
316 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
317 		    td->td_ucred, td, &nfsva, NULL, NULL);
318 		if (ret) {
319 			/*
320 			 * Just set default values to get things going.
321 			 */
322 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
323 			nfsva.na_vattr.va_type = VDIR;
324 			nfsva.na_vattr.va_mode = 0777;
325 			nfsva.na_vattr.va_nlink = 100;
326 			nfsva.na_vattr.va_uid = (uid_t)0;
327 			nfsva.na_vattr.va_gid = (gid_t)0;
328 			nfsva.na_vattr.va_fileid = 2;
329 			nfsva.na_vattr.va_gen = 1;
330 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
331 			nfsva.na_vattr.va_size = 512 * 1024;
332 		}
333 	}
334 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
335 	if (!error) {
336 	    mtx_lock(&nmp->nm_mtx);
337 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
338 		nfscl_loadfsinfo(nmp, &fs);
339 	    nfscl_loadsbinfo(nmp, &sb, sbp);
340 	    sbp->f_iosize = newnfs_iosize(nmp);
341 	    mtx_unlock(&nmp->nm_mtx);
342 	    if (sbp != &mp->mnt_stat) {
343 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
344 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
345 	    }
346 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
347 	} else if (NFS_ISV4(vp)) {
348 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
349 	}
350 	vput(vp);
351 	vfs_unbusy(mp);
352 	return (error);
353 }
354 
355 /*
356  * nfs version 3 fsinfo rpc call
357  */
358 int
359 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
360     struct thread *td)
361 {
362 	struct nfsfsinfo fs;
363 	struct nfsvattr nfsva;
364 	int error, attrflag;
365 
366 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
367 	if (!error) {
368 		if (attrflag)
369 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
370 			    1);
371 		mtx_lock(&nmp->nm_mtx);
372 		nfscl_loadfsinfo(nmp, &fs);
373 		mtx_unlock(&nmp->nm_mtx);
374 	}
375 	return (error);
376 }
377 
378 /*
379  * Mount a remote root fs via. nfs. This depends on the info in the
380  * nfs_diskless structure that has been filled in properly by some primary
381  * bootstrap.
382  * It goes something like this:
383  * - do enough of "ifconfig" by calling ifioctl() so that the system
384  *   can talk to the server
385  * - If nfs_diskless.mygateway is filled in, use that address as
386  *   a default gateway.
387  * - build the rootfs mount point and call mountnfs() to do the rest.
388  *
389  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
390  * structure, as well as other global NFS client variables here, as
391  * nfs_mountroot() will be called once in the boot before any other NFS
392  * client activity occurs.
393  */
394 static int
395 nfs_mountroot(struct mount *mp)
396 {
397 	struct thread *td = curthread;
398 	struct nfsv3_diskless *nd = &nfsv3_diskless;
399 	struct socket *so;
400 	struct vnode *vp;
401 	struct ifreq ir;
402 	int error;
403 	u_long l;
404 	char buf[128];
405 	char *cp;
406 
407 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
408 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
409 #elif defined(NFS_ROOT)
410 	nfs_setup_diskless();
411 #endif
412 
413 	if (nfs_diskless_valid == 0)
414 		return (-1);
415 	if (nfs_diskless_valid == 1)
416 		nfs_convert_diskless();
417 
418 	/*
419 	 * XXX splnet, so networks will receive...
420 	 */
421 	splnet();
422 
423 	/*
424 	 * Do enough of ifconfig(8) so that the critical net interface can
425 	 * talk to the server.
426 	 */
427 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
428 	    td->td_ucred, td);
429 	if (error)
430 		panic("nfs_mountroot: socreate(%04x): %d",
431 			nd->myif.ifra_addr.sa_family, error);
432 
433 #if 0 /* XXX Bad idea */
434 	/*
435 	 * We might not have been told the right interface, so we pass
436 	 * over the first ten interfaces of the same kind, until we get
437 	 * one of them configured.
438 	 */
439 
440 	for (i = strlen(nd->myif.ifra_name) - 1;
441 		nd->myif.ifra_name[i] >= '0' &&
442 		nd->myif.ifra_name[i] <= '9';
443 		nd->myif.ifra_name[i] ++) {
444 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445 		if(!error)
446 			break;
447 	}
448 #endif
449 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
450 	if (error)
451 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
452 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
453 		ir.ifr_mtu = strtol(cp, NULL, 10);
454 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
455 		freeenv(cp);
456 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
457 		if (error)
458 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
459 	}
460 	soclose(so);
461 
462 	/*
463 	 * If the gateway field is filled in, set it as the default route.
464 	 * Note that pxeboot will set a default route of 0 if the route
465 	 * is not set by the DHCP server.  Check also for a value of 0
466 	 * to avoid panicking inappropriately in that situation.
467 	 */
468 	if (nd->mygateway.sin_len != 0 &&
469 	    nd->mygateway.sin_addr.s_addr != 0) {
470 		struct sockaddr_in mask, sin;
471 
472 		bzero((caddr_t)&mask, sizeof(mask));
473 		sin = mask;
474 		sin.sin_family = AF_INET;
475 		sin.sin_len = sizeof(sin);
476                 /* XXX MRT use table 0 for this sort of thing */
477 		CURVNET_SET(TD_TO_VNET(td));
478 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
479 		    (struct sockaddr *)&nd->mygateway,
480 		    (struct sockaddr *)&mask,
481 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
482 		CURVNET_RESTORE();
483 		if (error)
484 			panic("nfs_mountroot: RTM_ADD: %d", error);
485 	}
486 
487 	/*
488 	 * Create the rootfs mount point.
489 	 */
490 	nd->root_args.fh = nd->root_fh;
491 	nd->root_args.fhsize = nd->root_fhsize;
492 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
493 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
494 		(l >> 24) & 0xff, (l >> 16) & 0xff,
495 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
496 	printf("NFS ROOT: %s\n", buf);
497 	nd->root_args.hostname = buf;
498 	if ((error = nfs_mountdiskless(buf,
499 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
500 		return (error);
501 	}
502 
503 	/*
504 	 * This is not really an nfs issue, but it is much easier to
505 	 * set hostname here and then let the "/etc/rc.xxx" files
506 	 * mount the right /var based upon its preset value.
507 	 */
508 	mtx_lock(&prison0.pr_mtx);
509 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
510 	    sizeof(prison0.pr_hostname));
511 	mtx_unlock(&prison0.pr_mtx);
512 	inittodr(ntohl(nd->root_time));
513 	return (0);
514 }
515 
516 /*
517  * Internal version of mount system call for diskless setup.
518  */
519 static int
520 nfs_mountdiskless(char *path,
521     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
522     struct vnode **vpp, struct mount *mp)
523 {
524 	struct sockaddr *nam;
525 	int dirlen, error;
526 	char *dirpath;
527 
528 	/*
529 	 * Find the directory path in "path", which also has the server's
530 	 * name/ip address in it.
531 	 */
532 	dirpath = strchr(path, ':');
533 	if (dirpath != NULL)
534 		dirlen = strlen(++dirpath);
535 	else
536 		dirlen = 0;
537 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
538 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
539 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
540 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
541 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
542 		return (error);
543 	}
544 	return (0);
545 }
546 
547 static void
548 nfs_sec_name(char *sec, int *flagsp)
549 {
550 	if (!strcmp(sec, "krb5"))
551 		*flagsp |= NFSMNT_KERB;
552 	else if (!strcmp(sec, "krb5i"))
553 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
554 	else if (!strcmp(sec, "krb5p"))
555 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
556 }
557 
558 static void
559 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
560     const char *hostname, struct ucred *cred, struct thread *td)
561 {
562 	int s;
563 	int adjsock;
564 	char *p;
565 
566 	s = splnet();
567 
568 	/*
569 	 * Set read-only flag if requested; otherwise, clear it if this is
570 	 * an update.  If this is not an update, then either the read-only
571 	 * flag is already clear, or this is a root mount and it was set
572 	 * intentionally at some previous point.
573 	 */
574 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
575 		MNT_ILOCK(mp);
576 		mp->mnt_flag |= MNT_RDONLY;
577 		MNT_IUNLOCK(mp);
578 	} else if (mp->mnt_flag & MNT_UPDATE) {
579 		MNT_ILOCK(mp);
580 		mp->mnt_flag &= ~MNT_RDONLY;
581 		MNT_IUNLOCK(mp);
582 	}
583 
584 	/*
585 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
586 	 * no sense in that context.  Also, set up appropriate retransmit
587 	 * and soft timeout behavior.
588 	 */
589 	if (argp->sotype == SOCK_STREAM) {
590 		nmp->nm_flag &= ~NFSMNT_NOCONN;
591 		nmp->nm_timeo = NFS_MAXTIMEO;
592 		if ((argp->flags & NFSMNT_NFSV4) != 0)
593 			nmp->nm_retry = INT_MAX;
594 		else
595 			nmp->nm_retry = NFS_RETRANS_TCP;
596 	}
597 
598 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
599 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
600 		argp->flags &= ~NFSMNT_RDIRPLUS;
601 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
602 	}
603 
604 	/* Re-bind if rsrvd port requested and wasn't on one */
605 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
606 		  && (argp->flags & NFSMNT_RESVPORT);
607 	/* Also re-bind if we're switching to/from a connected UDP socket */
608 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
609 		    (argp->flags & NFSMNT_NOCONN));
610 
611 	/* Update flags atomically.  Don't change the lock bits. */
612 	nmp->nm_flag = argp->flags | nmp->nm_flag;
613 	splx(s);
614 
615 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
616 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
617 		if (nmp->nm_timeo < NFS_MINTIMEO)
618 			nmp->nm_timeo = NFS_MINTIMEO;
619 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
620 			nmp->nm_timeo = NFS_MAXTIMEO;
621 	}
622 
623 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
624 		nmp->nm_retry = argp->retrans;
625 		if (nmp->nm_retry > NFS_MAXREXMIT)
626 			nmp->nm_retry = NFS_MAXREXMIT;
627 	}
628 
629 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
630 		nmp->nm_wsize = argp->wsize;
631 		/*
632 		 * Clip at the power of 2 below the size. There is an
633 		 * issue (not isolated) that causes intermittent page
634 		 * faults if this is not done.
635 		 */
636 		if (nmp->nm_wsize > NFS_FABLKSIZE)
637 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
638 		else
639 			nmp->nm_wsize = NFS_FABLKSIZE;
640 	}
641 
642 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
643 		nmp->nm_rsize = argp->rsize;
644 		/*
645 		 * Clip at the power of 2 below the size. There is an
646 		 * issue (not isolated) that causes intermittent page
647 		 * faults if this is not done.
648 		 */
649 		if (nmp->nm_rsize > NFS_FABLKSIZE)
650 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
651 		else
652 			nmp->nm_rsize = NFS_FABLKSIZE;
653 	}
654 
655 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
656 		nmp->nm_readdirsize = argp->readdirsize;
657 	}
658 
659 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
660 		nmp->nm_acregmin = argp->acregmin;
661 	else
662 		nmp->nm_acregmin = NFS_MINATTRTIMO;
663 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
664 		nmp->nm_acregmax = argp->acregmax;
665 	else
666 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
667 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
668 		nmp->nm_acdirmin = argp->acdirmin;
669 	else
670 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
671 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
672 		nmp->nm_acdirmax = argp->acdirmax;
673 	else
674 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
675 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
676 		nmp->nm_acdirmin = nmp->nm_acdirmax;
677 	if (nmp->nm_acregmin > nmp->nm_acregmax)
678 		nmp->nm_acregmin = nmp->nm_acregmax;
679 
680 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
681 		if (argp->readahead <= NFS_MAXRAHEAD)
682 			nmp->nm_readahead = argp->readahead;
683 		else
684 			nmp->nm_readahead = NFS_MAXRAHEAD;
685 	}
686 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
687 		if (argp->wcommitsize < nmp->nm_wsize)
688 			nmp->nm_wcommitsize = nmp->nm_wsize;
689 		else
690 			nmp->nm_wcommitsize = argp->wcommitsize;
691 	}
692 
693 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
694 		    (nmp->nm_soproto != argp->proto));
695 
696 	if (nmp->nm_client != NULL && adjsock) {
697 		int haslock = 0, error = 0;
698 
699 		if (nmp->nm_sotype == SOCK_STREAM) {
700 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
701 			if (!error)
702 				haslock = 1;
703 		}
704 		if (!error) {
705 		    newnfs_disconnect(&nmp->nm_sockreq);
706 		    if (haslock)
707 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
708 		    nmp->nm_sotype = argp->sotype;
709 		    nmp->nm_soproto = argp->proto;
710 		    if (nmp->nm_sotype == SOCK_DGRAM)
711 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
712 			    cred, td, 0)) {
713 				printf("newnfs_args: retrying connect\n");
714 				(void) nfs_catnap(PSOCK, 0, "nfscon");
715 			}
716 		}
717 	} else {
718 		nmp->nm_sotype = argp->sotype;
719 		nmp->nm_soproto = argp->proto;
720 	}
721 
722 	if (hostname != NULL) {
723 		strlcpy(nmp->nm_hostname, hostname,
724 		    sizeof(nmp->nm_hostname));
725 		p = strchr(nmp->nm_hostname, ':');
726 		if (p != NULL)
727 			*p = '\0';
728 	}
729 }
730 
731 static const char *nfs_opts[] = { "from", "nfs_args",
732     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
733     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
734     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
735     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
736     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
737     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
738     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
739     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
740     "pnfs", "wcommitsize",
741     NULL };
742 
743 /*
744  * Parse the "from" mountarg, passed by the generic mount(8) program
745  * or the mountroot code.  This is used when rerooting into NFS.
746  *
747  * Note that the "hostname" is actually a "hostname:/share/path" string.
748  */
749 static int
750 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
751     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
752 {
753 	char nam[MNAMELEN + 1];
754 	char *delimp, *hostp, *spec;
755 	int error, have_bracket = 0, offset, rv, speclen;
756 	struct sockaddr_in *sin;
757 	size_t len;
758 
759 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
760 	if (error != 0)
761 		return (error);
762 
763 	/*
764 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
765 	 */
766         if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
767             *(delimp + 1) == ':') {
768                 hostp = spec + 1;
769                 spec = delimp + 2;
770                 have_bracket = 1;
771         } else if ((delimp = strrchr(spec, ':')) != NULL) {
772                 hostp = spec;
773                 spec = delimp + 1;
774         } else if ((delimp = strrchr(spec, '@')) != NULL) {
775                 printf("%s: path@server syntax is deprecated, "
776 		    "use server:path\n", __func__);
777                 hostp = delimp + 1;
778         } else {
779                 printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
780                 return (EINVAL);
781         }
782         *delimp = '\0';
783 
784         /*
785          * If there has been a trailing slash at mounttime it seems
786          * that some mountd implementations fail to remove the mount
787          * entries from their mountlist while unmounting.
788          */
789         for (speclen = strlen(spec);
790                 speclen > 1 && spec[speclen - 1] == '/';
791                 speclen--)
792                 spec[speclen - 1] = '\0';
793         if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
794                 printf("%s: %s:%s: name too long", __func__, hostp, spec);
795                 return (EINVAL);
796         }
797 	/* Make both '@' and ':' notations equal */
798 	if (*hostp != '\0') {
799 		len = strlen(hostp);
800 		offset = 0;
801 		if (have_bracket)
802 			nam[offset++] = '[';
803 		memmove(nam + offset, hostp, len);
804 		if (have_bracket)
805 			nam[len + offset++] = ']';
806 		nam[len + offset++] = ':';
807 		memmove(nam + len + offset, spec, speclen);
808 		nam[len + speclen + offset] = '\0';
809 	}
810 
811 	/*
812 	 * XXX: IPv6
813 	 */
814 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
815 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
816 	if (rv != 1) {
817 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
818 		    __func__, hostp, rv);
819 		free(sin, M_SONAME);
820 		return (EINVAL);
821 	}
822 
823 	sin->sin_len = sizeof(*sin);
824 	sin->sin_family = AF_INET;
825 	/*
826 	 * XXX: hardcoded port number.
827 	 */
828 	sin->sin_port = htons(2049);
829 
830 	*hostnamep = strdup(nam, M_NEWNFSMNT);
831 	*sinp = sin;
832 	strlcpy(dirpath, spec, dirpathsize);
833 	*dirlenp = strlen(dirpath);
834 
835 	return (0);
836 }
837 
838 /*
839  * VFS Operations.
840  *
841  * mount system call
842  * It seems a bit dumb to copyinstr() the host and path here and then
843  * bcopy() them in mountnfs(), but I wanted to detect errors before
844  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
845  * an error after that means that I have to release the mbuf.
846  */
847 /* ARGSUSED */
848 static int
849 nfs_mount(struct mount *mp)
850 {
851 	struct nfs_args args = {
852 	    .version = NFS_ARGSVERSION,
853 	    .addr = NULL,
854 	    .addrlen = sizeof (struct sockaddr_in),
855 	    .sotype = SOCK_STREAM,
856 	    .proto = 0,
857 	    .fh = NULL,
858 	    .fhsize = 0,
859 	    .flags = NFSMNT_RESVPORT,
860 	    .wsize = NFS_WSIZE,
861 	    .rsize = NFS_RSIZE,
862 	    .readdirsize = NFS_READDIRSIZE,
863 	    .timeo = 10,
864 	    .retrans = NFS_RETRANS,
865 	    .readahead = NFS_DEFRAHEAD,
866 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
867 	    .hostname = NULL,
868 	    .acregmin = NFS_MINATTRTIMO,
869 	    .acregmax = NFS_MAXATTRTIMO,
870 	    .acdirmin = NFS_MINDIRATTRTIMO,
871 	    .acdirmax = NFS_MAXDIRATTRTIMO,
872 	};
873 	int error = 0, ret, len;
874 	struct sockaddr *nam = NULL;
875 	struct vnode *vp;
876 	struct thread *td;
877 	char hst[MNAMELEN];
878 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
879 	char *cp, *opt, *name, *secname;
880 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
881 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
882 	int minvers = 0;
883 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
884 	    krbnamelen, srvkrbnamelen;
885 	size_t hstlen;
886 
887 	has_nfs_args_opt = 0;
888 	has_nfs_from_opt = 0;
889 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
890 		error = EINVAL;
891 		goto out;
892 	}
893 
894 	td = curthread;
895 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
896 	    nfs_diskless_valid != 0) {
897 		error = nfs_mountroot(mp);
898 		goto out;
899 	}
900 
901 	nfscl_init();
902 
903 	/*
904 	 * The old mount_nfs program passed the struct nfs_args
905 	 * from userspace to kernel.  The new mount_nfs program
906 	 * passes string options via nmount() from userspace to kernel
907 	 * and we populate the struct nfs_args in the kernel.
908 	 */
909 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
910 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
911 		    sizeof(args));
912 		if (error != 0)
913 			goto out;
914 
915 		if (args.version != NFS_ARGSVERSION) {
916 			error = EPROGMISMATCH;
917 			goto out;
918 		}
919 		has_nfs_args_opt = 1;
920 	}
921 
922 	/* Handle the new style options. */
923 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
924 		args.acdirmin = args.acdirmax =
925 		    args.acregmin = args.acregmax = 0;
926 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
927 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
928 	}
929 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
930 		args.flags |= NFSMNT_NOCONN;
931 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
932 		args.flags &= ~NFSMNT_NOCONN;
933 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
934 		args.flags |= NFSMNT_NOLOCKD;
935 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
936 		args.flags &= ~NFSMNT_NOLOCKD;
937 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
938 		args.flags |= NFSMNT_INT;
939 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
940 		args.flags |= NFSMNT_RDIRPLUS;
941 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
942 		args.flags |= NFSMNT_RESVPORT;
943 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
944 		args.flags &= ~NFSMNT_RESVPORT;
945 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
946 		args.flags |= NFSMNT_SOFT;
947 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
948 		args.flags &= ~NFSMNT_SOFT;
949 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
950 		args.sotype = SOCK_DGRAM;
951 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
952 		args.sotype = SOCK_DGRAM;
953 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
954 		args.sotype = SOCK_STREAM;
955 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
956 		args.flags |= NFSMNT_NFSV3;
957 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
958 		args.flags |= NFSMNT_NFSV4;
959 		args.sotype = SOCK_STREAM;
960 	}
961 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
962 		args.flags |= NFSMNT_ALLGSSNAME;
963 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
964 		args.flags |= NFSMNT_NOCTO;
965 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
966 		args.flags |= NFSMNT_NONCONTIGWR;
967 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
968 		args.flags |= NFSMNT_PNFS;
969 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
970 		if (opt == NULL) {
971 			vfs_mount_error(mp, "illegal readdirsize");
972 			error = EINVAL;
973 			goto out;
974 		}
975 		ret = sscanf(opt, "%d", &args.readdirsize);
976 		if (ret != 1 || args.readdirsize <= 0) {
977 			vfs_mount_error(mp, "illegal readdirsize: %s",
978 			    opt);
979 			error = EINVAL;
980 			goto out;
981 		}
982 		args.flags |= NFSMNT_READDIRSIZE;
983 	}
984 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
985 		if (opt == NULL) {
986 			vfs_mount_error(mp, "illegal readahead");
987 			error = EINVAL;
988 			goto out;
989 		}
990 		ret = sscanf(opt, "%d", &args.readahead);
991 		if (ret != 1 || args.readahead <= 0) {
992 			vfs_mount_error(mp, "illegal readahead: %s",
993 			    opt);
994 			error = EINVAL;
995 			goto out;
996 		}
997 		args.flags |= NFSMNT_READAHEAD;
998 	}
999 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1000 		if (opt == NULL) {
1001 			vfs_mount_error(mp, "illegal wsize");
1002 			error = EINVAL;
1003 			goto out;
1004 		}
1005 		ret = sscanf(opt, "%d", &args.wsize);
1006 		if (ret != 1 || args.wsize <= 0) {
1007 			vfs_mount_error(mp, "illegal wsize: %s",
1008 			    opt);
1009 			error = EINVAL;
1010 			goto out;
1011 		}
1012 		args.flags |= NFSMNT_WSIZE;
1013 	}
1014 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1015 		if (opt == NULL) {
1016 			vfs_mount_error(mp, "illegal rsize");
1017 			error = EINVAL;
1018 			goto out;
1019 		}
1020 		ret = sscanf(opt, "%d", &args.rsize);
1021 		if (ret != 1 || args.rsize <= 0) {
1022 			vfs_mount_error(mp, "illegal wsize: %s",
1023 			    opt);
1024 			error = EINVAL;
1025 			goto out;
1026 		}
1027 		args.flags |= NFSMNT_RSIZE;
1028 	}
1029 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1030 		if (opt == NULL) {
1031 			vfs_mount_error(mp, "illegal retrans");
1032 			error = EINVAL;
1033 			goto out;
1034 		}
1035 		ret = sscanf(opt, "%d", &args.retrans);
1036 		if (ret != 1 || args.retrans <= 0) {
1037 			vfs_mount_error(mp, "illegal retrans: %s",
1038 			    opt);
1039 			error = EINVAL;
1040 			goto out;
1041 		}
1042 		args.flags |= NFSMNT_RETRANS;
1043 	}
1044 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1045 		ret = sscanf(opt, "%d", &args.acregmin);
1046 		if (ret != 1 || args.acregmin < 0) {
1047 			vfs_mount_error(mp, "illegal actimeo: %s",
1048 			    opt);
1049 			error = EINVAL;
1050 			goto out;
1051 		}
1052 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1053 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1054 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1055 	}
1056 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1057 		ret = sscanf(opt, "%d", &args.acregmin);
1058 		if (ret != 1 || args.acregmin < 0) {
1059 			vfs_mount_error(mp, "illegal acregmin: %s",
1060 			    opt);
1061 			error = EINVAL;
1062 			goto out;
1063 		}
1064 		args.flags |= NFSMNT_ACREGMIN;
1065 	}
1066 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1067 		ret = sscanf(opt, "%d", &args.acregmax);
1068 		if (ret != 1 || args.acregmax < 0) {
1069 			vfs_mount_error(mp, "illegal acregmax: %s",
1070 			    opt);
1071 			error = EINVAL;
1072 			goto out;
1073 		}
1074 		args.flags |= NFSMNT_ACREGMAX;
1075 	}
1076 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1077 		ret = sscanf(opt, "%d", &args.acdirmin);
1078 		if (ret != 1 || args.acdirmin < 0) {
1079 			vfs_mount_error(mp, "illegal acdirmin: %s",
1080 			    opt);
1081 			error = EINVAL;
1082 			goto out;
1083 		}
1084 		args.flags |= NFSMNT_ACDIRMIN;
1085 	}
1086 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1087 		ret = sscanf(opt, "%d", &args.acdirmax);
1088 		if (ret != 1 || args.acdirmax < 0) {
1089 			vfs_mount_error(mp, "illegal acdirmax: %s",
1090 			    opt);
1091 			error = EINVAL;
1092 			goto out;
1093 		}
1094 		args.flags |= NFSMNT_ACDIRMAX;
1095 	}
1096 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1097 		ret = sscanf(opt, "%d", &args.wcommitsize);
1098 		if (ret != 1 || args.wcommitsize < 0) {
1099 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1100 			error = EINVAL;
1101 			goto out;
1102 		}
1103 		args.flags |= NFSMNT_WCOMMITSIZE;
1104 	}
1105 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1106 		ret = sscanf(opt, "%d", &args.timeo);
1107 		if (ret != 1 || args.timeo <= 0) {
1108 			vfs_mount_error(mp, "illegal timeo: %s",
1109 			    opt);
1110 			error = EINVAL;
1111 			goto out;
1112 		}
1113 		args.flags |= NFSMNT_TIMEO;
1114 	}
1115 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1116 		ret = sscanf(opt, "%d", &args.timeo);
1117 		if (ret != 1 || args.timeo <= 0) {
1118 			vfs_mount_error(mp, "illegal timeout: %s",
1119 			    opt);
1120 			error = EINVAL;
1121 			goto out;
1122 		}
1123 		args.flags |= NFSMNT_TIMEO;
1124 	}
1125 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1126 		ret = sscanf(opt, "%d", &nametimeo);
1127 		if (ret != 1 || nametimeo < 0) {
1128 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1129 			error = EINVAL;
1130 			goto out;
1131 		}
1132 	}
1133 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1134 	    == 0) {
1135 		ret = sscanf(opt, "%d", &negnametimeo);
1136 		if (ret != 1 || negnametimeo < 0) {
1137 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1138 			    opt);
1139 			error = EINVAL;
1140 			goto out;
1141 		}
1142 	}
1143 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1144 	    0) {
1145 		ret = sscanf(opt, "%d", &minvers);
1146 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1147 		    (args.flags & NFSMNT_NFSV4) == 0) {
1148 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1149 			error = EINVAL;
1150 			goto out;
1151 		}
1152 	}
1153 	if (vfs_getopt(mp->mnt_optnew, "sec",
1154 		(void **) &secname, NULL) == 0)
1155 		nfs_sec_name(secname, &args.flags);
1156 
1157 	if (mp->mnt_flag & MNT_UPDATE) {
1158 		struct nfsmount *nmp = VFSTONFS(mp);
1159 
1160 		if (nmp == NULL) {
1161 			error = EIO;
1162 			goto out;
1163 		}
1164 
1165 		/*
1166 		 * If a change from TCP->UDP is done and there are thread(s)
1167 		 * that have I/O RPC(s) in progress with a transfer size
1168 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1169 		 * hung, retrying the RPC(s) forever. Usually these threads
1170 		 * will be seen doing an uninterruptible sleep on wait channel
1171 		 * "nfsreq".
1172 		 */
1173 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1174 			tprintf(td->td_proc, LOG_WARNING,
1175 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1176 
1177 		/*
1178 		 * When doing an update, we can't change version,
1179 		 * security, switch lockd strategies or change cookie
1180 		 * translation
1181 		 */
1182 		args.flags = (args.flags &
1183 		    ~(NFSMNT_NFSV3 |
1184 		      NFSMNT_NFSV4 |
1185 		      NFSMNT_KERB |
1186 		      NFSMNT_INTEGRITY |
1187 		      NFSMNT_PRIVACY |
1188 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1189 		    (nmp->nm_flag &
1190 			(NFSMNT_NFSV3 |
1191 			 NFSMNT_NFSV4 |
1192 			 NFSMNT_KERB |
1193 			 NFSMNT_INTEGRITY |
1194 			 NFSMNT_PRIVACY |
1195 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1196 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1197 		goto out;
1198 	}
1199 
1200 	/*
1201 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1202 	 * or no-connection mode for those protocols that support
1203 	 * no-connection mode (the flag will be cleared later for protocols
1204 	 * that do not support no-connection mode).  This will allow a client
1205 	 * to receive replies from a different IP then the request was
1206 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1207 	 * not 0.
1208 	 */
1209 	if (nfs_ip_paranoia == 0)
1210 		args.flags |= NFSMNT_NOCONN;
1211 
1212 	if (has_nfs_args_opt != 0) {
1213 		/*
1214 		 * In the 'nfs_args' case, the pointers in the args
1215 		 * structure are in userland - we copy them in here.
1216 		 */
1217 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1218 			vfs_mount_error(mp, "Bad file handle");
1219 			error = EINVAL;
1220 			goto out;
1221 		}
1222 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1223 		    args.fhsize);
1224 		if (error != 0)
1225 			goto out;
1226 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1227 		if (error != 0)
1228 			goto out;
1229 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1230 		args.hostname = hst;
1231 		/* getsockaddr() call must be after above copyin() calls */
1232 		error = getsockaddr(&nam, (caddr_t)args.addr,
1233 		    args.addrlen);
1234 		if (error != 0)
1235 			goto out;
1236 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1237 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1238 	    sizeof(dirpath), &dirlen) == 0) {
1239 		has_nfs_from_opt = 1;
1240 		bcopy(args.hostname, hst, MNAMELEN);
1241 		hst[MNAMELEN - 1] = '\0';
1242 
1243 		/*
1244 		 * This only works with NFSv4 for now.
1245 		 */
1246 		args.fhsize = 0;
1247 		args.flags |= NFSMNT_NFSV4;
1248 		args.sotype = SOCK_STREAM;
1249 	} else {
1250 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1251 		    &args.fhsize) == 0) {
1252 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1253 				vfs_mount_error(mp, "Bad file handle");
1254 				error = EINVAL;
1255 				goto out;
1256 			}
1257 			bcopy(args.fh, nfh, args.fhsize);
1258 		} else {
1259 			args.fhsize = 0;
1260 		}
1261 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1262 		    (void **)&args.hostname, &len);
1263 		if (args.hostname == NULL) {
1264 			vfs_mount_error(mp, "Invalid hostname");
1265 			error = EINVAL;
1266 			goto out;
1267 		}
1268 		bcopy(args.hostname, hst, MNAMELEN);
1269 		hst[MNAMELEN - 1] = '\0';
1270 	}
1271 
1272 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1273 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1274 	else {
1275 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1276 		cp = strchr(srvkrbname, ':');
1277 		if (cp != NULL)
1278 			*cp = '\0';
1279 	}
1280 	srvkrbnamelen = strlen(srvkrbname);
1281 
1282 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1283 		strlcpy(krbname, name, sizeof (krbname));
1284 	else
1285 		krbname[0] = '\0';
1286 	krbnamelen = strlen(krbname);
1287 
1288 	if (has_nfs_from_opt == 0) {
1289 		if (vfs_getopt(mp->mnt_optnew,
1290 		    "dirpath", (void **)&name, NULL) == 0)
1291 			strlcpy(dirpath, name, sizeof (dirpath));
1292 		else
1293 			dirpath[0] = '\0';
1294 		dirlen = strlen(dirpath);
1295 	}
1296 
1297 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1298 		if (vfs_getopt(mp->mnt_optnew, "addr",
1299 		    (void **)&args.addr, &args.addrlen) == 0) {
1300 			if (args.addrlen > SOCK_MAXADDRLEN) {
1301 				error = ENAMETOOLONG;
1302 				goto out;
1303 			}
1304 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1305 			bcopy(args.addr, nam, args.addrlen);
1306 			nam->sa_len = args.addrlen;
1307 		} else {
1308 			vfs_mount_error(mp, "No server address");
1309 			error = EINVAL;
1310 			goto out;
1311 		}
1312 	}
1313 
1314 	args.fh = nfh;
1315 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1316 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1317 	    nametimeo, negnametimeo, minvers);
1318 out:
1319 	if (!error) {
1320 		MNT_ILOCK(mp);
1321 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1322 		    MNTK_USES_BCACHE;
1323 		MNT_IUNLOCK(mp);
1324 	}
1325 	return (error);
1326 }
1327 
1328 
1329 /*
1330  * VFS Operations.
1331  *
1332  * mount system call
1333  * It seems a bit dumb to copyinstr() the host and path here and then
1334  * bcopy() them in mountnfs(), but I wanted to detect errors before
1335  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1336  * an error after that means that I have to release the mbuf.
1337  */
1338 /* ARGSUSED */
1339 static int
1340 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1341 {
1342 	int error;
1343 	struct nfs_args args;
1344 
1345 	error = copyin(data, &args, sizeof (struct nfs_args));
1346 	if (error)
1347 		return error;
1348 
1349 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1350 
1351 	error = kernel_mount(ma, flags);
1352 	return (error);
1353 }
1354 
1355 /*
1356  * Common code for mount and mountroot
1357  */
1358 static int
1359 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1360     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1361     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1362     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1363     int minvers)
1364 {
1365 	struct nfsmount *nmp;
1366 	struct nfsnode *np;
1367 	int error, trycnt, ret;
1368 	struct nfsvattr nfsva;
1369 	struct nfsclclient *clp;
1370 	struct nfsclds *dsp, *tdsp;
1371 	uint32_t lease;
1372 	static u_int64_t clval = 0;
1373 
1374 	NFSCL_DEBUG(3, "in mnt\n");
1375 	clp = NULL;
1376 	if (mp->mnt_flag & MNT_UPDATE) {
1377 		nmp = VFSTONFS(mp);
1378 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1379 		FREE(nam, M_SONAME);
1380 		return (0);
1381 	} else {
1382 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1383 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1384 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1385 		TAILQ_INIT(&nmp->nm_bufq);
1386 		if (clval == 0)
1387 			clval = (u_int64_t)nfsboottime.tv_sec;
1388 		nmp->nm_clval = clval++;
1389 		nmp->nm_krbnamelen = krbnamelen;
1390 		nmp->nm_dirpathlen = dirlen;
1391 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1392 		if (td->td_ucred->cr_uid != (uid_t)0) {
1393 			/*
1394 			 * nm_uid is used to get KerberosV credentials for
1395 			 * the nfsv4 state handling operations if there is
1396 			 * no host based principal set. Use the uid of
1397 			 * this user if not root, since they are doing the
1398 			 * mount. I don't think setting this for root will
1399 			 * work, since root normally does not have user
1400 			 * credentials in a credentials cache.
1401 			 */
1402 			nmp->nm_uid = td->td_ucred->cr_uid;
1403 		} else {
1404 			/*
1405 			 * Just set to -1, so it won't be used.
1406 			 */
1407 			nmp->nm_uid = (uid_t)-1;
1408 		}
1409 
1410 		/* Copy and null terminate all the names */
1411 		if (nmp->nm_krbnamelen > 0) {
1412 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1413 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1414 		}
1415 		if (nmp->nm_dirpathlen > 0) {
1416 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1417 			    nmp->nm_dirpathlen);
1418 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1419 			    + 1] = '\0';
1420 		}
1421 		if (nmp->nm_srvkrbnamelen > 0) {
1422 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1423 			    nmp->nm_srvkrbnamelen);
1424 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1425 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1426 		}
1427 		nmp->nm_sockreq.nr_cred = crhold(cred);
1428 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1429 		mp->mnt_data = nmp;
1430 		nmp->nm_getinfo = nfs_getnlminfo;
1431 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1432 	}
1433 	vfs_getnewfsid(mp);
1434 	nmp->nm_mountp = mp;
1435 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1436 
1437 	/*
1438 	 * Since nfs_decode_args() might optionally set them, these
1439 	 * need to be set to defaults before the call, so that the
1440 	 * optional settings aren't overwritten.
1441 	 */
1442 	nmp->nm_nametimeo = nametimeo;
1443 	nmp->nm_negnametimeo = negnametimeo;
1444 	nmp->nm_timeo = NFS_TIMEO;
1445 	nmp->nm_retry = NFS_RETRANS;
1446 	nmp->nm_readahead = NFS_DEFRAHEAD;
1447 
1448 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1449 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1450 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1451 		nmp->nm_wcommitsize *= 2;
1452 	nmp->nm_wcommitsize *= 256;
1453 
1454 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1455 		nmp->nm_minorvers = minvers;
1456 	else
1457 		nmp->nm_minorvers = 0;
1458 
1459 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1460 
1461 	/*
1462 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1463 	 * high, depending on whether we end up with negative offsets in
1464 	 * the client or server somewhere.  2GB-1 may be safer.
1465 	 *
1466 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1467 	 * that we can handle until we find out otherwise.
1468 	 */
1469 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1470 		nmp->nm_maxfilesize = 0xffffffffLL;
1471 	else
1472 		nmp->nm_maxfilesize = OFF_MAX;
1473 
1474 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1475 		nmp->nm_wsize = NFS_WSIZE;
1476 		nmp->nm_rsize = NFS_RSIZE;
1477 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1478 	}
1479 	nmp->nm_numgrps = NFS_MAXGRPS;
1480 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1481 	if (nmp->nm_tprintf_delay < 0)
1482 		nmp->nm_tprintf_delay = 0;
1483 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1484 	if (nmp->nm_tprintf_initial_delay < 0)
1485 		nmp->nm_tprintf_initial_delay = 0;
1486 	nmp->nm_fhsize = argp->fhsize;
1487 	if (nmp->nm_fhsize > 0)
1488 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1489 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1490 	nmp->nm_nam = nam;
1491 	/* Set up the sockets and per-host congestion */
1492 	nmp->nm_sotype = argp->sotype;
1493 	nmp->nm_soproto = argp->proto;
1494 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1495 	if ((argp->flags & NFSMNT_NFSV4))
1496 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1497 	else if ((argp->flags & NFSMNT_NFSV3))
1498 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1499 	else
1500 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1501 
1502 
1503 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1504 		goto bad;
1505 	/* For NFSv4.1, get the clientid now. */
1506 	if (nmp->nm_minorvers > 0) {
1507 		NFSCL_DEBUG(3, "at getcl\n");
1508 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1509 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1510 		if (error != 0)
1511 			goto bad;
1512 	}
1513 
1514 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1515 	    nmp->nm_dirpathlen > 0) {
1516 		NFSCL_DEBUG(3, "in dirp\n");
1517 		/*
1518 		 * If the fhsize on the mount point == 0 for V4, the mount
1519 		 * path needs to be looked up.
1520 		 */
1521 		trycnt = 3;
1522 		do {
1523 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1524 			    cred, td);
1525 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1526 			if (error)
1527 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1528 		} while (error && --trycnt > 0);
1529 		if (error) {
1530 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1531 			goto bad;
1532 		}
1533 	}
1534 
1535 	/*
1536 	 * A reference count is needed on the nfsnode representing the
1537 	 * remote root.  If this object is not persistent, then backward
1538 	 * traversals of the mount point (i.e. "..") will not work if
1539 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1540 	 * this problem, because one can identify root inodes by their
1541 	 * number == ROOTINO (2).
1542 	 */
1543 	if (nmp->nm_fhsize > 0) {
1544 		/*
1545 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1546 		 * non-zero for the root vnode. f_iosize will be set correctly
1547 		 * by nfs_statfs() before any I/O occurs.
1548 		 */
1549 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1550 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1551 		    LK_EXCLUSIVE);
1552 		if (error)
1553 			goto bad;
1554 		*vpp = NFSTOV(np);
1555 
1556 		/*
1557 		 * Get file attributes and transfer parameters for the
1558 		 * mountpoint.  This has the side effect of filling in
1559 		 * (*vpp)->v_type with the correct value.
1560 		 */
1561 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1562 		    cred, td, &nfsva, NULL, &lease);
1563 		if (ret) {
1564 			/*
1565 			 * Just set default values to get things going.
1566 			 */
1567 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1568 			nfsva.na_vattr.va_type = VDIR;
1569 			nfsva.na_vattr.va_mode = 0777;
1570 			nfsva.na_vattr.va_nlink = 100;
1571 			nfsva.na_vattr.va_uid = (uid_t)0;
1572 			nfsva.na_vattr.va_gid = (gid_t)0;
1573 			nfsva.na_vattr.va_fileid = 2;
1574 			nfsva.na_vattr.va_gen = 1;
1575 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1576 			nfsva.na_vattr.va_size = 512 * 1024;
1577 			lease = 60;
1578 		}
1579 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1580 		if (nmp->nm_minorvers > 0) {
1581 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1582 			NFSLOCKCLSTATE();
1583 			clp->nfsc_renew = NFSCL_RENEW(lease);
1584 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1585 			clp->nfsc_clientidrev++;
1586 			if (clp->nfsc_clientidrev == 0)
1587 				clp->nfsc_clientidrev++;
1588 			NFSUNLOCKCLSTATE();
1589 			/*
1590 			 * Mount will succeed, so the renew thread can be
1591 			 * started now.
1592 			 */
1593 			nfscl_start_renewthread(clp);
1594 			nfscl_clientrelease(clp);
1595 		}
1596 		if (argp->flags & NFSMNT_NFSV3)
1597 			ncl_fsinfo(nmp, *vpp, cred, td);
1598 
1599 		/* Mark if the mount point supports NFSv4 ACLs. */
1600 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1601 		    ret == 0 &&
1602 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1603 			MNT_ILOCK(mp);
1604 			mp->mnt_flag |= MNT_NFS4ACLS;
1605 			MNT_IUNLOCK(mp);
1606 		}
1607 
1608 		/*
1609 		 * Lose the lock but keep the ref.
1610 		 */
1611 		NFSVOPUNLOCK(*vpp, 0);
1612 		return (0);
1613 	}
1614 	error = EIO;
1615 
1616 bad:
1617 	if (clp != NULL)
1618 		nfscl_clientrelease(clp);
1619 	newnfs_disconnect(&nmp->nm_sockreq);
1620 	crfree(nmp->nm_sockreq.nr_cred);
1621 	if (nmp->nm_sockreq.nr_auth != NULL)
1622 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1623 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1624 	mtx_destroy(&nmp->nm_mtx);
1625 	if (nmp->nm_clp != NULL) {
1626 		NFSLOCKCLSTATE();
1627 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1628 		NFSUNLOCKCLSTATE();
1629 		free(nmp->nm_clp, M_NFSCLCLIENT);
1630 	}
1631 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1632 		nfscl_freenfsclds(dsp);
1633 	FREE(nmp, M_NEWNFSMNT);
1634 	FREE(nam, M_SONAME);
1635 	return (error);
1636 }
1637 
1638 /*
1639  * unmount system call
1640  */
1641 static int
1642 nfs_unmount(struct mount *mp, int mntflags)
1643 {
1644 	struct thread *td;
1645 	struct nfsmount *nmp;
1646 	int error, flags = 0, i, trycnt = 0;
1647 	struct nfsclds *dsp, *tdsp;
1648 
1649 	td = curthread;
1650 
1651 	if (mntflags & MNT_FORCE)
1652 		flags |= FORCECLOSE;
1653 	nmp = VFSTONFS(mp);
1654 	/*
1655 	 * Goes something like this..
1656 	 * - Call vflush() to clear out vnodes for this filesystem
1657 	 * - Close the socket
1658 	 * - Free up the data structures
1659 	 */
1660 	/* In the forced case, cancel any outstanding requests. */
1661 	if (mntflags & MNT_FORCE) {
1662 		error = newnfs_nmcancelreqs(nmp);
1663 		if (error)
1664 			goto out;
1665 		/* For a forced close, get rid of the renew thread now */
1666 		nfscl_umount(nmp, td);
1667 	}
1668 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1669 	do {
1670 		error = vflush(mp, 1, flags, td);
1671 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1672 			(void) nfs_catnap(PSOCK, error, "newndm");
1673 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1674 	if (error)
1675 		goto out;
1676 
1677 	/*
1678 	 * We are now committed to the unmount.
1679 	 */
1680 	if ((mntflags & MNT_FORCE) == 0)
1681 		nfscl_umount(nmp, td);
1682 	/* Make sure no nfsiods are assigned to this mount. */
1683 	mtx_lock(&ncl_iod_mutex);
1684 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1685 		if (ncl_iodmount[i] == nmp) {
1686 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1687 			ncl_iodmount[i] = NULL;
1688 		}
1689 	mtx_unlock(&ncl_iod_mutex);
1690 	newnfs_disconnect(&nmp->nm_sockreq);
1691 	crfree(nmp->nm_sockreq.nr_cred);
1692 	FREE(nmp->nm_nam, M_SONAME);
1693 	if (nmp->nm_sockreq.nr_auth != NULL)
1694 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1695 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1696 	mtx_destroy(&nmp->nm_mtx);
1697 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1698 		nfscl_freenfsclds(dsp);
1699 	FREE(nmp, M_NEWNFSMNT);
1700 out:
1701 	return (error);
1702 }
1703 
1704 /*
1705  * Return root of a filesystem
1706  */
1707 static int
1708 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1709 {
1710 	struct vnode *vp;
1711 	struct nfsmount *nmp;
1712 	struct nfsnode *np;
1713 	int error;
1714 
1715 	nmp = VFSTONFS(mp);
1716 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1717 	if (error)
1718 		return error;
1719 	vp = NFSTOV(np);
1720 	/*
1721 	 * Get transfer parameters and attributes for root vnode once.
1722 	 */
1723 	mtx_lock(&nmp->nm_mtx);
1724 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1725 		mtx_unlock(&nmp->nm_mtx);
1726 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1727 	} else
1728 		mtx_unlock(&nmp->nm_mtx);
1729 	if (vp->v_type == VNON)
1730 	    vp->v_type = VDIR;
1731 	vp->v_vflag |= VV_ROOT;
1732 	*vpp = vp;
1733 	return (0);
1734 }
1735 
1736 /*
1737  * Flush out the buffer cache
1738  */
1739 /* ARGSUSED */
1740 static int
1741 nfs_sync(struct mount *mp, int waitfor)
1742 {
1743 	struct vnode *vp, *mvp;
1744 	struct thread *td;
1745 	int error, allerror = 0;
1746 
1747 	td = curthread;
1748 
1749 	MNT_ILOCK(mp);
1750 	/*
1751 	 * If a forced dismount is in progress, return from here so that
1752 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1753 	 * calling VFS_UNMOUNT().
1754 	 */
1755 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1756 		MNT_IUNLOCK(mp);
1757 		return (EBADF);
1758 	}
1759 	MNT_IUNLOCK(mp);
1760 
1761 	/*
1762 	 * Force stale buffer cache information to be flushed.
1763 	 */
1764 loop:
1765 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1766 		/* XXX Racy bv_cnt check. */
1767 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1768 		    waitfor == MNT_LAZY) {
1769 			VI_UNLOCK(vp);
1770 			continue;
1771 		}
1772 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1773 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1774 			goto loop;
1775 		}
1776 		error = VOP_FSYNC(vp, waitfor, td);
1777 		if (error)
1778 			allerror = error;
1779 		NFSVOPUNLOCK(vp, 0);
1780 		vrele(vp);
1781 	}
1782 	return (allerror);
1783 }
1784 
1785 static int
1786 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1787 {
1788 	struct nfsmount *nmp = VFSTONFS(mp);
1789 	struct vfsquery vq;
1790 	int error;
1791 
1792 	bzero(&vq, sizeof(vq));
1793 	switch (op) {
1794 #if 0
1795 	case VFS_CTL_NOLOCKS:
1796 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1797  		if (req->oldptr != NULL) {
1798  			error = SYSCTL_OUT(req, &val, sizeof(val));
1799  			if (error)
1800  				return (error);
1801  		}
1802  		if (req->newptr != NULL) {
1803  			error = SYSCTL_IN(req, &val, sizeof(val));
1804  			if (error)
1805  				return (error);
1806 			if (val)
1807 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1808 			else
1809 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1810  		}
1811 		break;
1812 #endif
1813 	case VFS_CTL_QUERY:
1814 		mtx_lock(&nmp->nm_mtx);
1815 		if (nmp->nm_state & NFSSTA_TIMEO)
1816 			vq.vq_flags |= VQ_NOTRESP;
1817 		mtx_unlock(&nmp->nm_mtx);
1818 #if 0
1819 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1820 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1821 			vq.vq_flags |= VQ_NOTRESPLOCK;
1822 #endif
1823 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1824 		break;
1825  	case VFS_CTL_TIMEO:
1826  		if (req->oldptr != NULL) {
1827  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1828  			    sizeof(nmp->nm_tprintf_initial_delay));
1829  			if (error)
1830  				return (error);
1831  		}
1832  		if (req->newptr != NULL) {
1833 			error = vfs_suser(mp, req->td);
1834 			if (error)
1835 				return (error);
1836  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1837  			    sizeof(nmp->nm_tprintf_initial_delay));
1838  			if (error)
1839  				return (error);
1840  			if (nmp->nm_tprintf_initial_delay < 0)
1841  				nmp->nm_tprintf_initial_delay = 0;
1842  		}
1843 		break;
1844 	default:
1845 		return (ENOTSUP);
1846 	}
1847 	return (0);
1848 }
1849 
1850 /*
1851  * Purge any RPCs in progress, so that they will all return errors.
1852  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1853  * forced dismount.
1854  */
1855 static void
1856 nfs_purge(struct mount *mp)
1857 {
1858 	struct nfsmount *nmp = VFSTONFS(mp);
1859 
1860 	newnfs_nmcancelreqs(nmp);
1861 }
1862 
1863 /*
1864  * Extract the information needed by the nlm from the nfs vnode.
1865  */
1866 static void
1867 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1868     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1869     struct timeval *timeop)
1870 {
1871 	struct nfsmount *nmp;
1872 	struct nfsnode *np = VTONFS(vp);
1873 
1874 	nmp = VFSTONFS(vp->v_mount);
1875 	if (fhlenp != NULL)
1876 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1877 	if (fhp != NULL)
1878 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1879 	if (sp != NULL)
1880 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1881 	if (is_v3p != NULL)
1882 		*is_v3p = NFS_ISV3(vp);
1883 	if (sizep != NULL)
1884 		*sizep = np->n_size;
1885 	if (timeop != NULL) {
1886 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1887 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1888 	}
1889 }
1890 
1891 /*
1892  * This function prints out an option name, based on the conditional
1893  * argument.
1894  */
1895 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1896     char *opt, char **buf, size_t *blen)
1897 {
1898 	int len;
1899 
1900 	if (testval != 0 && *blen > strlen(opt)) {
1901 		len = snprintf(*buf, *blen, "%s", opt);
1902 		if (len != strlen(opt))
1903 			printf("EEK!!\n");
1904 		*buf += len;
1905 		*blen -= len;
1906 	}
1907 }
1908 
1909 /*
1910  * This function printf out an options integer value.
1911  */
1912 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1913     char *opt, char **buf, size_t *blen)
1914 {
1915 	int len;
1916 
1917 	if (*blen > strlen(opt) + 1) {
1918 		/* Could result in truncated output string. */
1919 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1920 		if (len < *blen) {
1921 			*buf += len;
1922 			*blen -= len;
1923 		}
1924 	}
1925 }
1926 
1927 /*
1928  * Load the option flags and values into the buffer.
1929  */
1930 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1931 {
1932 	char *buf;
1933 	size_t blen;
1934 
1935 	buf = buffer;
1936 	blen = buflen;
1937 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1938 	    &blen);
1939 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1940 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1941 		    &blen);
1942 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1943 		    &buf, &blen);
1944 	}
1945 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1946 	    &blen);
1947 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1948 	    "nfsv2", &buf, &blen);
1949 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1950 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1951 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1952 	    &buf, &blen);
1953 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1954 	    &buf, &blen);
1955 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1956 	    &blen);
1957 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1958 	    &blen);
1959 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1960 	    &blen);
1961 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1962 	    &blen);
1963 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1964 	    &blen);
1965 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1966 	    ",noncontigwr", &buf, &blen);
1967 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1968 	    0, ",lockd", &buf, &blen);
1969 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1970 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1971 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1972 	    &buf, &blen);
1973 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1974 	    &buf, &blen);
1975 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1976 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1977 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1978 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1979 	    &buf, &blen);
1980 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1981 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1982 	    &buf, &blen);
1983 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1984 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1985 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1986 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1987 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1988 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1989 	    &blen);
1990 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1991 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1992 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1993 	    &blen);
1994 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1995 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1996 	    &blen);
1997 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1998 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1999 }
2000 
2001