xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision f18976136625a7d016e97bfd9eabddf640b3e06d)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 
41 #include "opt_bootp.h"
42 #include "opt_nfsroot.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/clock.h>
50 #include <sys/jail.h>
51 #include <sys/limits.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/mount.h>
57 #include <sys/proc.h>
58 #include <sys/socket.h>
59 #include <sys/socketvar.h>
60 #include <sys/sockio.h>
61 #include <sys/sysctl.h>
62 #include <sys/vnode.h>
63 #include <sys/signalvar.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_extern.h>
67 #include <vm/uma.h>
68 
69 #include <net/if.h>
70 #include <net/route.h>
71 #include <netinet/in.h>
72 
73 #include <fs/nfs/nfsport.h>
74 #include <fs/nfsclient/nfsnode.h>
75 #include <fs/nfsclient/nfsmount.h>
76 #include <fs/nfsclient/nfs.h>
77 #include <nfs/nfsdiskless.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 extern struct mtx nfsrv_dslock_mtx;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		vfs_cache_root,
140 	.vfs_cachedroot =	nfs_root,
141 	.vfs_statfs =		nfs_statfs,
142 	.vfs_sync =		nfs_sync,
143 	.vfs_uninit =		ncl_uninit,
144 	.vfs_unmount =		nfs_unmount,
145 	.vfs_sysctl =		nfs_sysctl,
146 	.vfs_purge =		nfs_purge,
147 };
148 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
149 
150 /* So that loader and kldload(2) can find us, wherever we are.. */
151 MODULE_VERSION(nfs, 1);
152 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
153 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
154 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
155 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
156 
157 /*
158  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
159  * can be shared by both NFS clients. It is declared here so that it
160  * will be defined for kernels built without NFS_ROOT, although it
161  * isn't used in that case.
162  */
163 #if !defined(NFS_ROOT)
164 struct nfs_diskless	nfs_diskless = { { { 0 } } };
165 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
166 int			nfs_diskless_valid = 0;
167 #endif
168 
169 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
170     &nfs_diskless_valid, 0,
171     "Has the diskless struct been filled correctly");
172 
173 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
174     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
175 
176 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
177     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
178     "%Ssockaddr_in", "Diskless root nfs address");
179 
180 
181 void		newnfsargs_ntoh(struct nfs_args *);
182 static int	nfs_mountdiskless(char *,
183 		    struct sockaddr_in *, struct nfs_args *,
184 		    struct thread *, struct vnode **, struct mount *);
185 static void	nfs_convert_diskless(void);
186 static void	nfs_convert_oargs(struct nfs_args *args,
187 		    struct onfs_args *oargs);
188 
189 int
190 newnfs_iosize(struct nfsmount *nmp)
191 {
192 	int iosize, maxio;
193 
194 	/* First, set the upper limit for iosize */
195 	if (nmp->nm_flag & NFSMNT_NFSV4) {
196 		maxio = NFS_MAXBSIZE;
197 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
198 		if (nmp->nm_sotype == SOCK_DGRAM)
199 			maxio = NFS_MAXDGRAMDATA;
200 		else
201 			maxio = NFS_MAXBSIZE;
202 	} else {
203 		maxio = NFS_V2MAXDATA;
204 	}
205 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
206 		nmp->nm_rsize = maxio;
207 	if (nmp->nm_rsize > NFS_MAXBSIZE)
208 		nmp->nm_rsize = NFS_MAXBSIZE;
209 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
210 		nmp->nm_readdirsize = maxio;
211 	if (nmp->nm_readdirsize > nmp->nm_rsize)
212 		nmp->nm_readdirsize = nmp->nm_rsize;
213 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
214 		nmp->nm_wsize = maxio;
215 	if (nmp->nm_wsize > NFS_MAXBSIZE)
216 		nmp->nm_wsize = NFS_MAXBSIZE;
217 
218 	/*
219 	 * Calculate the size used for io buffers.  Use the larger
220 	 * of the two sizes to minimise nfs requests but make sure
221 	 * that it is at least one VM page to avoid wasting buffer
222 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
223 	 * that is the buffer size used for directories.
224 	 */
225 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
226 	iosize = imax(iosize, PAGE_SIZE);
227 	iosize = imax(iosize, NFS_DIRBLKSIZ);
228 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
229 	return (iosize);
230 }
231 
232 static void
233 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
234 {
235 
236 	args->version = NFS_ARGSVERSION;
237 	args->addr = oargs->addr;
238 	args->addrlen = oargs->addrlen;
239 	args->sotype = oargs->sotype;
240 	args->proto = oargs->proto;
241 	args->fh = oargs->fh;
242 	args->fhsize = oargs->fhsize;
243 	args->flags = oargs->flags;
244 	args->wsize = oargs->wsize;
245 	args->rsize = oargs->rsize;
246 	args->readdirsize = oargs->readdirsize;
247 	args->timeo = oargs->timeo;
248 	args->retrans = oargs->retrans;
249 	args->readahead = oargs->readahead;
250 	args->hostname = oargs->hostname;
251 }
252 
253 static void
254 nfs_convert_diskless(void)
255 {
256 
257 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
258 		sizeof(struct ifaliasreq));
259 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
260 		sizeof(struct sockaddr_in));
261 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
262 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
263 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
264 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
265 	} else {
266 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
267 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
268 	}
269 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
270 		sizeof(struct sockaddr_in));
271 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
272 	nfsv3_diskless.root_time = nfs_diskless.root_time;
273 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
274 		MAXHOSTNAMELEN);
275 	nfs_diskless_valid = 3;
276 }
277 
278 /*
279  * nfs statfs call
280  */
281 static int
282 nfs_statfs(struct mount *mp, struct statfs *sbp)
283 {
284 	struct vnode *vp;
285 	struct thread *td;
286 	struct nfsmount *nmp = VFSTONFS(mp);
287 	struct nfsvattr nfsva;
288 	struct nfsfsinfo fs;
289 	struct nfsstatfs sb;
290 	int error = 0, attrflag, gotfsinfo = 0, ret;
291 	struct nfsnode *np;
292 
293 	td = curthread;
294 
295 	error = vfs_busy(mp, MBF_NOWAIT);
296 	if (error)
297 		return (error);
298 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
299 	if (error) {
300 		vfs_unbusy(mp);
301 		return (error);
302 	}
303 	vp = NFSTOV(np);
304 	mtx_lock(&nmp->nm_mtx);
305 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
306 		mtx_unlock(&nmp->nm_mtx);
307 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
308 		    &attrflag, NULL);
309 		if (!error)
310 			gotfsinfo = 1;
311 	} else
312 		mtx_unlock(&nmp->nm_mtx);
313 	if (!error)
314 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
315 		    &attrflag, NULL);
316 	if (error != 0)
317 		NFSCL_DEBUG(2, "statfs=%d\n", error);
318 	if (attrflag == 0) {
319 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
320 		    td->td_ucred, td, &nfsva, NULL, NULL);
321 		if (ret) {
322 			/*
323 			 * Just set default values to get things going.
324 			 */
325 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
326 			nfsva.na_vattr.va_type = VDIR;
327 			nfsva.na_vattr.va_mode = 0777;
328 			nfsva.na_vattr.va_nlink = 100;
329 			nfsva.na_vattr.va_uid = (uid_t)0;
330 			nfsva.na_vattr.va_gid = (gid_t)0;
331 			nfsva.na_vattr.va_fileid = 2;
332 			nfsva.na_vattr.va_gen = 1;
333 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
334 			nfsva.na_vattr.va_size = 512 * 1024;
335 		}
336 	}
337 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
338 	if (!error) {
339 	    mtx_lock(&nmp->nm_mtx);
340 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
341 		nfscl_loadfsinfo(nmp, &fs);
342 	    nfscl_loadsbinfo(nmp, &sb, sbp);
343 	    sbp->f_iosize = newnfs_iosize(nmp);
344 	    mtx_unlock(&nmp->nm_mtx);
345 	    if (sbp != &mp->mnt_stat) {
346 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
347 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
348 	    }
349 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
350 	} else if (NFS_ISV4(vp)) {
351 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
352 	}
353 	vput(vp);
354 	vfs_unbusy(mp);
355 	return (error);
356 }
357 
358 /*
359  * nfs version 3 fsinfo rpc call
360  */
361 int
362 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
363     struct thread *td)
364 {
365 	struct nfsfsinfo fs;
366 	struct nfsvattr nfsva;
367 	int error, attrflag;
368 
369 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
370 	if (!error) {
371 		if (attrflag)
372 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
373 			    1);
374 		mtx_lock(&nmp->nm_mtx);
375 		nfscl_loadfsinfo(nmp, &fs);
376 		mtx_unlock(&nmp->nm_mtx);
377 	}
378 	return (error);
379 }
380 
381 /*
382  * Mount a remote root fs via. nfs. This depends on the info in the
383  * nfs_diskless structure that has been filled in properly by some primary
384  * bootstrap.
385  * It goes something like this:
386  * - do enough of "ifconfig" by calling ifioctl() so that the system
387  *   can talk to the server
388  * - If nfs_diskless.mygateway is filled in, use that address as
389  *   a default gateway.
390  * - build the rootfs mount point and call mountnfs() to do the rest.
391  *
392  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
393  * structure, as well as other global NFS client variables here, as
394  * nfs_mountroot() will be called once in the boot before any other NFS
395  * client activity occurs.
396  */
397 static int
398 nfs_mountroot(struct mount *mp)
399 {
400 	struct thread *td = curthread;
401 	struct nfsv3_diskless *nd = &nfsv3_diskless;
402 	struct socket *so;
403 	struct vnode *vp;
404 	struct ifreq ir;
405 	int error;
406 	u_long l;
407 	char buf[128];
408 	char *cp;
409 
410 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
411 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
412 #elif defined(NFS_ROOT)
413 	nfs_setup_diskless();
414 #endif
415 
416 	if (nfs_diskless_valid == 0)
417 		return (-1);
418 	if (nfs_diskless_valid == 1)
419 		nfs_convert_diskless();
420 
421 	/*
422 	 * Do enough of ifconfig(8) so that the critical net interface can
423 	 * talk to the server.
424 	 */
425 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
426 	    td->td_ucred, td);
427 	if (error)
428 		panic("nfs_mountroot: socreate(%04x): %d",
429 			nd->myif.ifra_addr.sa_family, error);
430 
431 #if 0 /* XXX Bad idea */
432 	/*
433 	 * We might not have been told the right interface, so we pass
434 	 * over the first ten interfaces of the same kind, until we get
435 	 * one of them configured.
436 	 */
437 
438 	for (i = strlen(nd->myif.ifra_name) - 1;
439 		nd->myif.ifra_name[i] >= '0' &&
440 		nd->myif.ifra_name[i] <= '9';
441 		nd->myif.ifra_name[i] ++) {
442 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443 		if(!error)
444 			break;
445 	}
446 #endif
447 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
448 	if (error)
449 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
450 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
451 		ir.ifr_mtu = strtol(cp, NULL, 10);
452 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
453 		freeenv(cp);
454 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
455 		if (error)
456 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
457 	}
458 	soclose(so);
459 
460 	/*
461 	 * If the gateway field is filled in, set it as the default route.
462 	 * Note that pxeboot will set a default route of 0 if the route
463 	 * is not set by the DHCP server.  Check also for a value of 0
464 	 * to avoid panicking inappropriately in that situation.
465 	 */
466 	if (nd->mygateway.sin_len != 0 &&
467 	    nd->mygateway.sin_addr.s_addr != 0) {
468 		struct sockaddr_in mask, sin;
469 
470 		bzero((caddr_t)&mask, sizeof(mask));
471 		sin = mask;
472 		sin.sin_family = AF_INET;
473 		sin.sin_len = sizeof(sin);
474                 /* XXX MRT use table 0 for this sort of thing */
475 		CURVNET_SET(TD_TO_VNET(td));
476 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
477 		    (struct sockaddr *)&nd->mygateway,
478 		    (struct sockaddr *)&mask,
479 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
480 		CURVNET_RESTORE();
481 		if (error)
482 			panic("nfs_mountroot: RTM_ADD: %d", error);
483 	}
484 
485 	/*
486 	 * Create the rootfs mount point.
487 	 */
488 	nd->root_args.fh = nd->root_fh;
489 	nd->root_args.fhsize = nd->root_fhsize;
490 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
491 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
492 		(l >> 24) & 0xff, (l >> 16) & 0xff,
493 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
494 	printf("NFS ROOT: %s\n", buf);
495 	nd->root_args.hostname = buf;
496 	if ((error = nfs_mountdiskless(buf,
497 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
498 		return (error);
499 	}
500 
501 	/*
502 	 * This is not really an nfs issue, but it is much easier to
503 	 * set hostname here and then let the "/etc/rc.xxx" files
504 	 * mount the right /var based upon its preset value.
505 	 */
506 	mtx_lock(&prison0.pr_mtx);
507 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
508 	    sizeof(prison0.pr_hostname));
509 	mtx_unlock(&prison0.pr_mtx);
510 	inittodr(ntohl(nd->root_time));
511 	return (0);
512 }
513 
514 /*
515  * Internal version of mount system call for diskless setup.
516  */
517 static int
518 nfs_mountdiskless(char *path,
519     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
520     struct vnode **vpp, struct mount *mp)
521 {
522 	struct sockaddr *nam;
523 	int dirlen, error;
524 	char *dirpath;
525 
526 	/*
527 	 * Find the directory path in "path", which also has the server's
528 	 * name/ip address in it.
529 	 */
530 	dirpath = strchr(path, ':');
531 	if (dirpath != NULL)
532 		dirlen = strlen(++dirpath);
533 	else
534 		dirlen = 0;
535 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
536 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
537 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
538 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
539 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
540 		return (error);
541 	}
542 	return (0);
543 }
544 
545 static void
546 nfs_sec_name(char *sec, int *flagsp)
547 {
548 	if (!strcmp(sec, "krb5"))
549 		*flagsp |= NFSMNT_KERB;
550 	else if (!strcmp(sec, "krb5i"))
551 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
552 	else if (!strcmp(sec, "krb5p"))
553 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
554 }
555 
556 static void
557 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
558     const char *hostname, struct ucred *cred, struct thread *td)
559 {
560 	int adjsock;
561 	char *p;
562 
563 	/*
564 	 * Set read-only flag if requested; otherwise, clear it if this is
565 	 * an update.  If this is not an update, then either the read-only
566 	 * flag is already clear, or this is a root mount and it was set
567 	 * intentionally at some previous point.
568 	 */
569 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
570 		MNT_ILOCK(mp);
571 		mp->mnt_flag |= MNT_RDONLY;
572 		MNT_IUNLOCK(mp);
573 	} else if (mp->mnt_flag & MNT_UPDATE) {
574 		MNT_ILOCK(mp);
575 		mp->mnt_flag &= ~MNT_RDONLY;
576 		MNT_IUNLOCK(mp);
577 	}
578 
579 	/*
580 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
581 	 * no sense in that context.  Also, set up appropriate retransmit
582 	 * and soft timeout behavior.
583 	 */
584 	if (argp->sotype == SOCK_STREAM) {
585 		nmp->nm_flag &= ~NFSMNT_NOCONN;
586 		nmp->nm_timeo = NFS_MAXTIMEO;
587 		if ((argp->flags & NFSMNT_NFSV4) != 0)
588 			nmp->nm_retry = INT_MAX;
589 		else
590 			nmp->nm_retry = NFS_RETRANS_TCP;
591 	}
592 
593 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
594 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
595 		argp->flags &= ~NFSMNT_RDIRPLUS;
596 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
597 	}
598 
599 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
600 	if (nmp->nm_minorvers == 0) {
601 		argp->flags &= ~NFSMNT_ONEOPENOWN;
602 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
603 	}
604 
605 	/* Re-bind if rsrvd port requested and wasn't on one */
606 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
607 		  && (argp->flags & NFSMNT_RESVPORT);
608 	/* Also re-bind if we're switching to/from a connected UDP socket */
609 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
610 		    (argp->flags & NFSMNT_NOCONN));
611 
612 	/* Update flags atomically.  Don't change the lock bits. */
613 	nmp->nm_flag = argp->flags | nmp->nm_flag;
614 
615 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
616 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
617 		if (nmp->nm_timeo < NFS_MINTIMEO)
618 			nmp->nm_timeo = NFS_MINTIMEO;
619 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
620 			nmp->nm_timeo = NFS_MAXTIMEO;
621 	}
622 
623 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
624 		nmp->nm_retry = argp->retrans;
625 		if (nmp->nm_retry > NFS_MAXREXMIT)
626 			nmp->nm_retry = NFS_MAXREXMIT;
627 	}
628 
629 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
630 		nmp->nm_wsize = argp->wsize;
631 		/*
632 		 * Clip at the power of 2 below the size. There is an
633 		 * issue (not isolated) that causes intermittent page
634 		 * faults if this is not done.
635 		 */
636 		if (nmp->nm_wsize > NFS_FABLKSIZE)
637 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
638 		else
639 			nmp->nm_wsize = NFS_FABLKSIZE;
640 	}
641 
642 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
643 		nmp->nm_rsize = argp->rsize;
644 		/*
645 		 * Clip at the power of 2 below the size. There is an
646 		 * issue (not isolated) that causes intermittent page
647 		 * faults if this is not done.
648 		 */
649 		if (nmp->nm_rsize > NFS_FABLKSIZE)
650 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
651 		else
652 			nmp->nm_rsize = NFS_FABLKSIZE;
653 	}
654 
655 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
656 		nmp->nm_readdirsize = argp->readdirsize;
657 	}
658 
659 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
660 		nmp->nm_acregmin = argp->acregmin;
661 	else
662 		nmp->nm_acregmin = NFS_MINATTRTIMO;
663 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
664 		nmp->nm_acregmax = argp->acregmax;
665 	else
666 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
667 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
668 		nmp->nm_acdirmin = argp->acdirmin;
669 	else
670 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
671 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
672 		nmp->nm_acdirmax = argp->acdirmax;
673 	else
674 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
675 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
676 		nmp->nm_acdirmin = nmp->nm_acdirmax;
677 	if (nmp->nm_acregmin > nmp->nm_acregmax)
678 		nmp->nm_acregmin = nmp->nm_acregmax;
679 
680 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
681 		if (argp->readahead <= NFS_MAXRAHEAD)
682 			nmp->nm_readahead = argp->readahead;
683 		else
684 			nmp->nm_readahead = NFS_MAXRAHEAD;
685 	}
686 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
687 		if (argp->wcommitsize < nmp->nm_wsize)
688 			nmp->nm_wcommitsize = nmp->nm_wsize;
689 		else
690 			nmp->nm_wcommitsize = argp->wcommitsize;
691 	}
692 
693 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
694 		    (nmp->nm_soproto != argp->proto));
695 
696 	if (nmp->nm_client != NULL && adjsock) {
697 		int haslock = 0, error = 0;
698 
699 		if (nmp->nm_sotype == SOCK_STREAM) {
700 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
701 			if (!error)
702 				haslock = 1;
703 		}
704 		if (!error) {
705 		    newnfs_disconnect(&nmp->nm_sockreq);
706 		    if (haslock)
707 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
708 		    nmp->nm_sotype = argp->sotype;
709 		    nmp->nm_soproto = argp->proto;
710 		    if (nmp->nm_sotype == SOCK_DGRAM)
711 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
712 			    cred, td, 0)) {
713 				printf("newnfs_args: retrying connect\n");
714 				(void) nfs_catnap(PSOCK, 0, "nfscon");
715 			}
716 		}
717 	} else {
718 		nmp->nm_sotype = argp->sotype;
719 		nmp->nm_soproto = argp->proto;
720 	}
721 
722 	if (hostname != NULL) {
723 		strlcpy(nmp->nm_hostname, hostname,
724 		    sizeof(nmp->nm_hostname));
725 		p = strchr(nmp->nm_hostname, ':');
726 		if (p != NULL)
727 			*p = '\0';
728 	}
729 }
730 
731 static const char *nfs_opts[] = { "from", "nfs_args",
732     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
733     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
734     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
735     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
736     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
737     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
738     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
739     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
740     "pnfs", "wcommitsize", "oneopenown",
741     NULL };
742 
743 /*
744  * Parse the "from" mountarg, passed by the generic mount(8) program
745  * or the mountroot code.  This is used when rerooting into NFS.
746  *
747  * Note that the "hostname" is actually a "hostname:/share/path" string.
748  */
749 static int
750 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
751     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
752 {
753 	char *nam, *delimp, *hostp, *spec;
754 	int error, have_bracket = 0, offset, rv, speclen;
755 	struct sockaddr_in *sin;
756 	size_t len;
757 
758 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
759 	if (error != 0)
760 		return (error);
761 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
762 
763 	/*
764 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
765 	 */
766 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
767 	    *(delimp + 1) == ':') {
768 		hostp = spec + 1;
769 		spec = delimp + 2;
770 		have_bracket = 1;
771 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
772 		hostp = spec;
773 		spec = delimp + 1;
774 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
775 		printf("%s: path@server syntax is deprecated, "
776 		    "use server:path\n", __func__);
777 		hostp = delimp + 1;
778 	} else {
779 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
780 		free(nam, M_TEMP);
781 		return (EINVAL);
782 	}
783 	*delimp = '\0';
784 
785 	/*
786 	 * If there has been a trailing slash at mounttime it seems
787 	 * that some mountd implementations fail to remove the mount
788 	 * entries from their mountlist while unmounting.
789 	 */
790 	for (speclen = strlen(spec);
791 	    speclen > 1 && spec[speclen - 1] == '/';
792 	    speclen--)
793 		spec[speclen - 1] = '\0';
794 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
795 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
796 		free(nam, M_TEMP);
797 		return (EINVAL);
798 	}
799 	/* Make both '@' and ':' notations equal */
800 	if (*hostp != '\0') {
801 		len = strlen(hostp);
802 		offset = 0;
803 		if (have_bracket)
804 			nam[offset++] = '[';
805 		memmove(nam + offset, hostp, len);
806 		if (have_bracket)
807 			nam[len + offset++] = ']';
808 		nam[len + offset++] = ':';
809 		memmove(nam + len + offset, spec, speclen);
810 		nam[len + speclen + offset] = '\0';
811 	} else
812 		nam[0] = '\0';
813 
814 	/*
815 	 * XXX: IPv6
816 	 */
817 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
818 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
819 	if (rv != 1) {
820 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
821 		    __func__, hostp, rv);
822 		free(nam, M_TEMP);
823 		free(sin, M_SONAME);
824 		return (EINVAL);
825 	}
826 
827 	sin->sin_len = sizeof(*sin);
828 	sin->sin_family = AF_INET;
829 	/*
830 	 * XXX: hardcoded port number.
831 	 */
832 	sin->sin_port = htons(2049);
833 
834 	*hostnamep = strdup(nam, M_NEWNFSMNT);
835 	*sinp = sin;
836 	strlcpy(dirpath, spec, dirpathsize);
837 	*dirlenp = strlen(dirpath);
838 
839 	free(nam, M_TEMP);
840 	return (0);
841 }
842 
843 /*
844  * VFS Operations.
845  *
846  * mount system call
847  * It seems a bit dumb to copyinstr() the host and path here and then
848  * bcopy() them in mountnfs(), but I wanted to detect errors before
849  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
850  * an error after that means that I have to release the mbuf.
851  */
852 /* ARGSUSED */
853 static int
854 nfs_mount(struct mount *mp)
855 {
856 	struct nfs_args args = {
857 	    .version = NFS_ARGSVERSION,
858 	    .addr = NULL,
859 	    .addrlen = sizeof (struct sockaddr_in),
860 	    .sotype = SOCK_STREAM,
861 	    .proto = 0,
862 	    .fh = NULL,
863 	    .fhsize = 0,
864 	    .flags = NFSMNT_RESVPORT,
865 	    .wsize = NFS_WSIZE,
866 	    .rsize = NFS_RSIZE,
867 	    .readdirsize = NFS_READDIRSIZE,
868 	    .timeo = 10,
869 	    .retrans = NFS_RETRANS,
870 	    .readahead = NFS_DEFRAHEAD,
871 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
872 	    .hostname = NULL,
873 	    .acregmin = NFS_MINATTRTIMO,
874 	    .acregmax = NFS_MAXATTRTIMO,
875 	    .acdirmin = NFS_MINDIRATTRTIMO,
876 	    .acdirmax = NFS_MAXDIRATTRTIMO,
877 	};
878 	int error = 0, ret, len;
879 	struct sockaddr *nam = NULL;
880 	struct vnode *vp;
881 	struct thread *td;
882 	char *hst;
883 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
884 	char *cp, *opt, *name, *secname;
885 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
886 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
887 	int minvers = 0;
888 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
889 	    krbnamelen, srvkrbnamelen;
890 	size_t hstlen;
891 
892 	has_nfs_args_opt = 0;
893 	has_nfs_from_opt = 0;
894 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
895 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
896 		error = EINVAL;
897 		goto out;
898 	}
899 
900 	td = curthread;
901 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
902 	    nfs_diskless_valid != 0) {
903 		error = nfs_mountroot(mp);
904 		goto out;
905 	}
906 
907 	nfscl_init();
908 
909 	/*
910 	 * The old mount_nfs program passed the struct nfs_args
911 	 * from userspace to kernel.  The new mount_nfs program
912 	 * passes string options via nmount() from userspace to kernel
913 	 * and we populate the struct nfs_args in the kernel.
914 	 */
915 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
916 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
917 		    sizeof(args));
918 		if (error != 0)
919 			goto out;
920 
921 		if (args.version != NFS_ARGSVERSION) {
922 			error = EPROGMISMATCH;
923 			goto out;
924 		}
925 		has_nfs_args_opt = 1;
926 	}
927 
928 	/* Handle the new style options. */
929 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
930 		args.acdirmin = args.acdirmax =
931 		    args.acregmin = args.acregmax = 0;
932 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
933 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
934 	}
935 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
936 		args.flags |= NFSMNT_NOCONN;
937 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
938 		args.flags &= ~NFSMNT_NOCONN;
939 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
940 		args.flags |= NFSMNT_NOLOCKD;
941 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
942 		args.flags &= ~NFSMNT_NOLOCKD;
943 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
944 		args.flags |= NFSMNT_INT;
945 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
946 		args.flags |= NFSMNT_RDIRPLUS;
947 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
948 		args.flags |= NFSMNT_RESVPORT;
949 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
950 		args.flags &= ~NFSMNT_RESVPORT;
951 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
952 		args.flags |= NFSMNT_SOFT;
953 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
954 		args.flags &= ~NFSMNT_SOFT;
955 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
956 		args.sotype = SOCK_DGRAM;
957 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
958 		args.sotype = SOCK_DGRAM;
959 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
960 		args.sotype = SOCK_STREAM;
961 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
962 		args.flags |= NFSMNT_NFSV3;
963 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
964 		args.flags |= NFSMNT_NFSV4;
965 		args.sotype = SOCK_STREAM;
966 	}
967 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
968 		args.flags |= NFSMNT_ALLGSSNAME;
969 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
970 		args.flags |= NFSMNT_NOCTO;
971 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
972 		args.flags |= NFSMNT_NONCONTIGWR;
973 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
974 		args.flags |= NFSMNT_PNFS;
975 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
976 		args.flags |= NFSMNT_ONEOPENOWN;
977 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
978 		if (opt == NULL) {
979 			vfs_mount_error(mp, "illegal readdirsize");
980 			error = EINVAL;
981 			goto out;
982 		}
983 		ret = sscanf(opt, "%d", &args.readdirsize);
984 		if (ret != 1 || args.readdirsize <= 0) {
985 			vfs_mount_error(mp, "illegal readdirsize: %s",
986 			    opt);
987 			error = EINVAL;
988 			goto out;
989 		}
990 		args.flags |= NFSMNT_READDIRSIZE;
991 	}
992 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
993 		if (opt == NULL) {
994 			vfs_mount_error(mp, "illegal readahead");
995 			error = EINVAL;
996 			goto out;
997 		}
998 		ret = sscanf(opt, "%d", &args.readahead);
999 		if (ret != 1 || args.readahead <= 0) {
1000 			vfs_mount_error(mp, "illegal readahead: %s",
1001 			    opt);
1002 			error = EINVAL;
1003 			goto out;
1004 		}
1005 		args.flags |= NFSMNT_READAHEAD;
1006 	}
1007 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1008 		if (opt == NULL) {
1009 			vfs_mount_error(mp, "illegal wsize");
1010 			error = EINVAL;
1011 			goto out;
1012 		}
1013 		ret = sscanf(opt, "%d", &args.wsize);
1014 		if (ret != 1 || args.wsize <= 0) {
1015 			vfs_mount_error(mp, "illegal wsize: %s",
1016 			    opt);
1017 			error = EINVAL;
1018 			goto out;
1019 		}
1020 		args.flags |= NFSMNT_WSIZE;
1021 	}
1022 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1023 		if (opt == NULL) {
1024 			vfs_mount_error(mp, "illegal rsize");
1025 			error = EINVAL;
1026 			goto out;
1027 		}
1028 		ret = sscanf(opt, "%d", &args.rsize);
1029 		if (ret != 1 || args.rsize <= 0) {
1030 			vfs_mount_error(mp, "illegal wsize: %s",
1031 			    opt);
1032 			error = EINVAL;
1033 			goto out;
1034 		}
1035 		args.flags |= NFSMNT_RSIZE;
1036 	}
1037 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1038 		if (opt == NULL) {
1039 			vfs_mount_error(mp, "illegal retrans");
1040 			error = EINVAL;
1041 			goto out;
1042 		}
1043 		ret = sscanf(opt, "%d", &args.retrans);
1044 		if (ret != 1 || args.retrans <= 0) {
1045 			vfs_mount_error(mp, "illegal retrans: %s",
1046 			    opt);
1047 			error = EINVAL;
1048 			goto out;
1049 		}
1050 		args.flags |= NFSMNT_RETRANS;
1051 	}
1052 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1053 		ret = sscanf(opt, "%d", &args.acregmin);
1054 		if (ret != 1 || args.acregmin < 0) {
1055 			vfs_mount_error(mp, "illegal actimeo: %s",
1056 			    opt);
1057 			error = EINVAL;
1058 			goto out;
1059 		}
1060 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1061 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1062 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1063 	}
1064 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1065 		ret = sscanf(opt, "%d", &args.acregmin);
1066 		if (ret != 1 || args.acregmin < 0) {
1067 			vfs_mount_error(mp, "illegal acregmin: %s",
1068 			    opt);
1069 			error = EINVAL;
1070 			goto out;
1071 		}
1072 		args.flags |= NFSMNT_ACREGMIN;
1073 	}
1074 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1075 		ret = sscanf(opt, "%d", &args.acregmax);
1076 		if (ret != 1 || args.acregmax < 0) {
1077 			vfs_mount_error(mp, "illegal acregmax: %s",
1078 			    opt);
1079 			error = EINVAL;
1080 			goto out;
1081 		}
1082 		args.flags |= NFSMNT_ACREGMAX;
1083 	}
1084 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1085 		ret = sscanf(opt, "%d", &args.acdirmin);
1086 		if (ret != 1 || args.acdirmin < 0) {
1087 			vfs_mount_error(mp, "illegal acdirmin: %s",
1088 			    opt);
1089 			error = EINVAL;
1090 			goto out;
1091 		}
1092 		args.flags |= NFSMNT_ACDIRMIN;
1093 	}
1094 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1095 		ret = sscanf(opt, "%d", &args.acdirmax);
1096 		if (ret != 1 || args.acdirmax < 0) {
1097 			vfs_mount_error(mp, "illegal acdirmax: %s",
1098 			    opt);
1099 			error = EINVAL;
1100 			goto out;
1101 		}
1102 		args.flags |= NFSMNT_ACDIRMAX;
1103 	}
1104 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1105 		ret = sscanf(opt, "%d", &args.wcommitsize);
1106 		if (ret != 1 || args.wcommitsize < 0) {
1107 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1108 			error = EINVAL;
1109 			goto out;
1110 		}
1111 		args.flags |= NFSMNT_WCOMMITSIZE;
1112 	}
1113 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1114 		ret = sscanf(opt, "%d", &args.timeo);
1115 		if (ret != 1 || args.timeo <= 0) {
1116 			vfs_mount_error(mp, "illegal timeo: %s",
1117 			    opt);
1118 			error = EINVAL;
1119 			goto out;
1120 		}
1121 		args.flags |= NFSMNT_TIMEO;
1122 	}
1123 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1124 		ret = sscanf(opt, "%d", &args.timeo);
1125 		if (ret != 1 || args.timeo <= 0) {
1126 			vfs_mount_error(mp, "illegal timeout: %s",
1127 			    opt);
1128 			error = EINVAL;
1129 			goto out;
1130 		}
1131 		args.flags |= NFSMNT_TIMEO;
1132 	}
1133 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1134 		ret = sscanf(opt, "%d", &nametimeo);
1135 		if (ret != 1 || nametimeo < 0) {
1136 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1137 			error = EINVAL;
1138 			goto out;
1139 		}
1140 	}
1141 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1142 	    == 0) {
1143 		ret = sscanf(opt, "%d", &negnametimeo);
1144 		if (ret != 1 || negnametimeo < 0) {
1145 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1146 			    opt);
1147 			error = EINVAL;
1148 			goto out;
1149 		}
1150 	}
1151 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1152 	    0) {
1153 		ret = sscanf(opt, "%d", &minvers);
1154 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1155 		    (args.flags & NFSMNT_NFSV4) == 0) {
1156 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1157 			error = EINVAL;
1158 			goto out;
1159 		}
1160 	}
1161 	if (vfs_getopt(mp->mnt_optnew, "sec",
1162 		(void **) &secname, NULL) == 0)
1163 		nfs_sec_name(secname, &args.flags);
1164 
1165 	if (mp->mnt_flag & MNT_UPDATE) {
1166 		struct nfsmount *nmp = VFSTONFS(mp);
1167 
1168 		if (nmp == NULL) {
1169 			error = EIO;
1170 			goto out;
1171 		}
1172 
1173 		/*
1174 		 * If a change from TCP->UDP is done and there are thread(s)
1175 		 * that have I/O RPC(s) in progress with a transfer size
1176 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1177 		 * hung, retrying the RPC(s) forever. Usually these threads
1178 		 * will be seen doing an uninterruptible sleep on wait channel
1179 		 * "nfsreq".
1180 		 */
1181 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1182 			tprintf(td->td_proc, LOG_WARNING,
1183 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1184 
1185 		/*
1186 		 * When doing an update, we can't change version,
1187 		 * security, switch lockd strategies, change cookie
1188 		 * translation or switch oneopenown.
1189 		 */
1190 		args.flags = (args.flags &
1191 		    ~(NFSMNT_NFSV3 |
1192 		      NFSMNT_NFSV4 |
1193 		      NFSMNT_KERB |
1194 		      NFSMNT_INTEGRITY |
1195 		      NFSMNT_PRIVACY |
1196 		      NFSMNT_ONEOPENOWN |
1197 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1198 		    (nmp->nm_flag &
1199 			(NFSMNT_NFSV3 |
1200 			 NFSMNT_NFSV4 |
1201 			 NFSMNT_KERB |
1202 			 NFSMNT_INTEGRITY |
1203 			 NFSMNT_PRIVACY |
1204 			 NFSMNT_ONEOPENOWN |
1205 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1206 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1207 		goto out;
1208 	}
1209 
1210 	/*
1211 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1212 	 * or no-connection mode for those protocols that support
1213 	 * no-connection mode (the flag will be cleared later for protocols
1214 	 * that do not support no-connection mode).  This will allow a client
1215 	 * to receive replies from a different IP then the request was
1216 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1217 	 * not 0.
1218 	 */
1219 	if (nfs_ip_paranoia == 0)
1220 		args.flags |= NFSMNT_NOCONN;
1221 
1222 	if (has_nfs_args_opt != 0) {
1223 		/*
1224 		 * In the 'nfs_args' case, the pointers in the args
1225 		 * structure are in userland - we copy them in here.
1226 		 */
1227 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1228 			vfs_mount_error(mp, "Bad file handle");
1229 			error = EINVAL;
1230 			goto out;
1231 		}
1232 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1233 		    args.fhsize);
1234 		if (error != 0)
1235 			goto out;
1236 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1237 		if (error != 0)
1238 			goto out;
1239 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1240 		args.hostname = hst;
1241 		/* getsockaddr() call must be after above copyin() calls */
1242 		error = getsockaddr(&nam, args.addr, args.addrlen);
1243 		if (error != 0)
1244 			goto out;
1245 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1246 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1247 	    sizeof(dirpath), &dirlen) == 0) {
1248 		has_nfs_from_opt = 1;
1249 		bcopy(args.hostname, hst, MNAMELEN);
1250 		hst[MNAMELEN - 1] = '\0';
1251 
1252 		/*
1253 		 * This only works with NFSv4 for now.
1254 		 */
1255 		args.fhsize = 0;
1256 		args.flags |= NFSMNT_NFSV4;
1257 		args.sotype = SOCK_STREAM;
1258 	} else {
1259 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1260 		    &args.fhsize) == 0) {
1261 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1262 				vfs_mount_error(mp, "Bad file handle");
1263 				error = EINVAL;
1264 				goto out;
1265 			}
1266 			bcopy(args.fh, nfh, args.fhsize);
1267 		} else {
1268 			args.fhsize = 0;
1269 		}
1270 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1271 		    (void **)&args.hostname, &len);
1272 		if (args.hostname == NULL) {
1273 			vfs_mount_error(mp, "Invalid hostname");
1274 			error = EINVAL;
1275 			goto out;
1276 		}
1277 		if (len >= MNAMELEN) {
1278 			vfs_mount_error(mp, "Hostname too long");
1279 			error = EINVAL;
1280 			goto out;
1281 		}
1282 		bcopy(args.hostname, hst, len);
1283 		hst[len] = '\0';
1284 	}
1285 
1286 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1287 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1288 	else {
1289 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1290 		cp = strchr(srvkrbname, ':');
1291 		if (cp != NULL)
1292 			*cp = '\0';
1293 	}
1294 	srvkrbnamelen = strlen(srvkrbname);
1295 
1296 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1297 		strlcpy(krbname, name, sizeof (krbname));
1298 	else
1299 		krbname[0] = '\0';
1300 	krbnamelen = strlen(krbname);
1301 
1302 	if (has_nfs_from_opt == 0) {
1303 		if (vfs_getopt(mp->mnt_optnew,
1304 		    "dirpath", (void **)&name, NULL) == 0)
1305 			strlcpy(dirpath, name, sizeof (dirpath));
1306 		else
1307 			dirpath[0] = '\0';
1308 		dirlen = strlen(dirpath);
1309 	}
1310 
1311 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1312 		if (vfs_getopt(mp->mnt_optnew, "addr",
1313 		    (void **)&args.addr, &args.addrlen) == 0) {
1314 			if (args.addrlen > SOCK_MAXADDRLEN) {
1315 				error = ENAMETOOLONG;
1316 				goto out;
1317 			}
1318 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1319 			bcopy(args.addr, nam, args.addrlen);
1320 			nam->sa_len = args.addrlen;
1321 		} else {
1322 			vfs_mount_error(mp, "No server address");
1323 			error = EINVAL;
1324 			goto out;
1325 		}
1326 	}
1327 
1328 	args.fh = nfh;
1329 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1330 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1331 	    nametimeo, negnametimeo, minvers);
1332 out:
1333 	if (!error) {
1334 		MNT_ILOCK(mp);
1335 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1336 		    MNTK_USES_BCACHE;
1337 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1338 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1339 		MNT_IUNLOCK(mp);
1340 	}
1341 	free(hst, M_TEMP);
1342 	return (error);
1343 }
1344 
1345 
1346 /*
1347  * VFS Operations.
1348  *
1349  * mount system call
1350  * It seems a bit dumb to copyinstr() the host and path here and then
1351  * bcopy() them in mountnfs(), but I wanted to detect errors before
1352  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1353  * an error after that means that I have to release the mbuf.
1354  */
1355 /* ARGSUSED */
1356 static int
1357 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1358 {
1359 	int error;
1360 	struct nfs_args args;
1361 
1362 	error = copyin(data, &args, sizeof (struct nfs_args));
1363 	if (error)
1364 		return error;
1365 
1366 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1367 
1368 	error = kernel_mount(ma, flags);
1369 	return (error);
1370 }
1371 
1372 /*
1373  * Common code for mount and mountroot
1374  */
1375 static int
1376 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1377     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1378     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1379     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1380     int minvers)
1381 {
1382 	struct nfsmount *nmp;
1383 	struct nfsnode *np;
1384 	int error, trycnt, ret;
1385 	struct nfsvattr nfsva;
1386 	struct nfsclclient *clp;
1387 	struct nfsclds *dsp, *tdsp;
1388 	uint32_t lease;
1389 	static u_int64_t clval = 0;
1390 
1391 	NFSCL_DEBUG(3, "in mnt\n");
1392 	clp = NULL;
1393 	if (mp->mnt_flag & MNT_UPDATE) {
1394 		nmp = VFSTONFS(mp);
1395 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1396 		free(nam, M_SONAME);
1397 		return (0);
1398 	} else {
1399 		nmp = malloc(sizeof (struct nfsmount) +
1400 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1401 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1402 		TAILQ_INIT(&nmp->nm_bufq);
1403 		TAILQ_INIT(&nmp->nm_sess);
1404 		if (clval == 0)
1405 			clval = (u_int64_t)nfsboottime.tv_sec;
1406 		nmp->nm_clval = clval++;
1407 		nmp->nm_krbnamelen = krbnamelen;
1408 		nmp->nm_dirpathlen = dirlen;
1409 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1410 		if (td->td_ucred->cr_uid != (uid_t)0) {
1411 			/*
1412 			 * nm_uid is used to get KerberosV credentials for
1413 			 * the nfsv4 state handling operations if there is
1414 			 * no host based principal set. Use the uid of
1415 			 * this user if not root, since they are doing the
1416 			 * mount. I don't think setting this for root will
1417 			 * work, since root normally does not have user
1418 			 * credentials in a credentials cache.
1419 			 */
1420 			nmp->nm_uid = td->td_ucred->cr_uid;
1421 		} else {
1422 			/*
1423 			 * Just set to -1, so it won't be used.
1424 			 */
1425 			nmp->nm_uid = (uid_t)-1;
1426 		}
1427 
1428 		/* Copy and null terminate all the names */
1429 		if (nmp->nm_krbnamelen > 0) {
1430 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1431 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1432 		}
1433 		if (nmp->nm_dirpathlen > 0) {
1434 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1435 			    nmp->nm_dirpathlen);
1436 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1437 			    + 1] = '\0';
1438 		}
1439 		if (nmp->nm_srvkrbnamelen > 0) {
1440 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1441 			    nmp->nm_srvkrbnamelen);
1442 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1443 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1444 		}
1445 		nmp->nm_sockreq.nr_cred = crhold(cred);
1446 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1447 		mp->mnt_data = nmp;
1448 		nmp->nm_getinfo = nfs_getnlminfo;
1449 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1450 	}
1451 	vfs_getnewfsid(mp);
1452 	nmp->nm_mountp = mp;
1453 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1454 
1455 	/*
1456 	 * Since nfs_decode_args() might optionally set them, these
1457 	 * need to be set to defaults before the call, so that the
1458 	 * optional settings aren't overwritten.
1459 	 */
1460 	nmp->nm_nametimeo = nametimeo;
1461 	nmp->nm_negnametimeo = negnametimeo;
1462 	nmp->nm_timeo = NFS_TIMEO;
1463 	nmp->nm_retry = NFS_RETRANS;
1464 	nmp->nm_readahead = NFS_DEFRAHEAD;
1465 
1466 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1467 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1468 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1469 		nmp->nm_wcommitsize *= 2;
1470 	nmp->nm_wcommitsize *= 256;
1471 
1472 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1473 		nmp->nm_minorvers = minvers;
1474 	else
1475 		nmp->nm_minorvers = 0;
1476 
1477 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1478 
1479 	/*
1480 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1481 	 * high, depending on whether we end up with negative offsets in
1482 	 * the client or server somewhere.  2GB-1 may be safer.
1483 	 *
1484 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1485 	 * that we can handle until we find out otherwise.
1486 	 */
1487 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1488 		nmp->nm_maxfilesize = 0xffffffffLL;
1489 	else
1490 		nmp->nm_maxfilesize = OFF_MAX;
1491 
1492 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1493 		nmp->nm_wsize = NFS_WSIZE;
1494 		nmp->nm_rsize = NFS_RSIZE;
1495 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1496 	}
1497 	nmp->nm_numgrps = NFS_MAXGRPS;
1498 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1499 	if (nmp->nm_tprintf_delay < 0)
1500 		nmp->nm_tprintf_delay = 0;
1501 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1502 	if (nmp->nm_tprintf_initial_delay < 0)
1503 		nmp->nm_tprintf_initial_delay = 0;
1504 	nmp->nm_fhsize = argp->fhsize;
1505 	if (nmp->nm_fhsize > 0)
1506 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1507 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1508 	nmp->nm_nam = nam;
1509 	/* Set up the sockets and per-host congestion */
1510 	nmp->nm_sotype = argp->sotype;
1511 	nmp->nm_soproto = argp->proto;
1512 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1513 	if ((argp->flags & NFSMNT_NFSV4))
1514 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1515 	else if ((argp->flags & NFSMNT_NFSV3))
1516 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1517 	else
1518 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1519 
1520 
1521 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1522 		goto bad;
1523 	/* For NFSv4.1, get the clientid now. */
1524 	if (nmp->nm_minorvers > 0) {
1525 		NFSCL_DEBUG(3, "at getcl\n");
1526 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1527 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1528 		if (error != 0)
1529 			goto bad;
1530 	}
1531 
1532 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1533 	    nmp->nm_dirpathlen > 0) {
1534 		NFSCL_DEBUG(3, "in dirp\n");
1535 		/*
1536 		 * If the fhsize on the mount point == 0 for V4, the mount
1537 		 * path needs to be looked up.
1538 		 */
1539 		trycnt = 3;
1540 		do {
1541 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1542 			    cred, td);
1543 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1544 			if (error)
1545 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1546 		} while (error && --trycnt > 0);
1547 		if (error) {
1548 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1549 			goto bad;
1550 		}
1551 	}
1552 
1553 	/*
1554 	 * A reference count is needed on the nfsnode representing the
1555 	 * remote root.  If this object is not persistent, then backward
1556 	 * traversals of the mount point (i.e. "..") will not work if
1557 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1558 	 * this problem, because one can identify root inodes by their
1559 	 * number == UFS_ROOTINO (2).
1560 	 */
1561 	if (nmp->nm_fhsize > 0) {
1562 		/*
1563 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1564 		 * non-zero for the root vnode. f_iosize will be set correctly
1565 		 * by nfs_statfs() before any I/O occurs.
1566 		 */
1567 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1568 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1569 		    LK_EXCLUSIVE);
1570 		if (error)
1571 			goto bad;
1572 		*vpp = NFSTOV(np);
1573 
1574 		/*
1575 		 * Get file attributes and transfer parameters for the
1576 		 * mountpoint.  This has the side effect of filling in
1577 		 * (*vpp)->v_type with the correct value.
1578 		 */
1579 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1580 		    cred, td, &nfsva, NULL, &lease);
1581 		if (ret) {
1582 			/*
1583 			 * Just set default values to get things going.
1584 			 */
1585 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1586 			nfsva.na_vattr.va_type = VDIR;
1587 			nfsva.na_vattr.va_mode = 0777;
1588 			nfsva.na_vattr.va_nlink = 100;
1589 			nfsva.na_vattr.va_uid = (uid_t)0;
1590 			nfsva.na_vattr.va_gid = (gid_t)0;
1591 			nfsva.na_vattr.va_fileid = 2;
1592 			nfsva.na_vattr.va_gen = 1;
1593 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1594 			nfsva.na_vattr.va_size = 512 * 1024;
1595 			lease = 60;
1596 		}
1597 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1598 		if (nmp->nm_minorvers > 0) {
1599 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1600 			NFSLOCKCLSTATE();
1601 			clp->nfsc_renew = NFSCL_RENEW(lease);
1602 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1603 			clp->nfsc_clientidrev++;
1604 			if (clp->nfsc_clientidrev == 0)
1605 				clp->nfsc_clientidrev++;
1606 			NFSUNLOCKCLSTATE();
1607 			/*
1608 			 * Mount will succeed, so the renew thread can be
1609 			 * started now.
1610 			 */
1611 			nfscl_start_renewthread(clp);
1612 			nfscl_clientrelease(clp);
1613 		}
1614 		if (argp->flags & NFSMNT_NFSV3)
1615 			ncl_fsinfo(nmp, *vpp, cred, td);
1616 
1617 		/* Mark if the mount point supports NFSv4 ACLs. */
1618 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1619 		    ret == 0 &&
1620 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1621 			MNT_ILOCK(mp);
1622 			mp->mnt_flag |= MNT_NFS4ACLS;
1623 			MNT_IUNLOCK(mp);
1624 		}
1625 
1626 		/*
1627 		 * Lose the lock but keep the ref.
1628 		 */
1629 		NFSVOPUNLOCK(*vpp, 0);
1630 		vfs_cache_root_set(mp, *vpp);
1631 		return (0);
1632 	}
1633 	error = EIO;
1634 
1635 bad:
1636 	if (clp != NULL)
1637 		nfscl_clientrelease(clp);
1638 	newnfs_disconnect(&nmp->nm_sockreq);
1639 	crfree(nmp->nm_sockreq.nr_cred);
1640 	if (nmp->nm_sockreq.nr_auth != NULL)
1641 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1642 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1643 	mtx_destroy(&nmp->nm_mtx);
1644 	if (nmp->nm_clp != NULL) {
1645 		NFSLOCKCLSTATE();
1646 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1647 		NFSUNLOCKCLSTATE();
1648 		free(nmp->nm_clp, M_NFSCLCLIENT);
1649 	}
1650 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1651 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1652 		    dsp->nfsclds_sockp != NULL)
1653 			newnfs_disconnect(dsp->nfsclds_sockp);
1654 		nfscl_freenfsclds(dsp);
1655 	}
1656 	free(nmp, M_NEWNFSMNT);
1657 	free(nam, M_SONAME);
1658 	return (error);
1659 }
1660 
1661 /*
1662  * unmount system call
1663  */
1664 static int
1665 nfs_unmount(struct mount *mp, int mntflags)
1666 {
1667 	struct thread *td;
1668 	struct nfsmount *nmp;
1669 	int error, flags = 0, i, trycnt = 0;
1670 	struct nfsclds *dsp, *tdsp;
1671 
1672 	td = curthread;
1673 
1674 	if (mntflags & MNT_FORCE)
1675 		flags |= FORCECLOSE;
1676 	nmp = VFSTONFS(mp);
1677 	error = 0;
1678 	/*
1679 	 * Goes something like this..
1680 	 * - Call vflush() to clear out vnodes for this filesystem
1681 	 * - Close the socket
1682 	 * - Free up the data structures
1683 	 */
1684 	/* In the forced case, cancel any outstanding requests. */
1685 	if (mntflags & MNT_FORCE) {
1686 		NFSDDSLOCK();
1687 		if (nfsv4_findmirror(nmp) != NULL)
1688 			error = ENXIO;
1689 		NFSDDSUNLOCK();
1690 		if (error)
1691 			goto out;
1692 		error = newnfs_nmcancelreqs(nmp);
1693 		if (error)
1694 			goto out;
1695 		/* For a forced close, get rid of the renew thread now */
1696 		nfscl_umount(nmp, td);
1697 	}
1698 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1699 	do {
1700 		error = vflush(mp, 1, flags, td);
1701 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1702 			(void) nfs_catnap(PSOCK, error, "newndm");
1703 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1704 	if (error)
1705 		goto out;
1706 
1707 	/*
1708 	 * We are now committed to the unmount.
1709 	 */
1710 	if ((mntflags & MNT_FORCE) == 0)
1711 		nfscl_umount(nmp, td);
1712 	else {
1713 		mtx_lock(&nmp->nm_mtx);
1714 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1715 		mtx_unlock(&nmp->nm_mtx);
1716 	}
1717 	/* Make sure no nfsiods are assigned to this mount. */
1718 	NFSLOCKIOD();
1719 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1720 		if (ncl_iodmount[i] == nmp) {
1721 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1722 			ncl_iodmount[i] = NULL;
1723 		}
1724 	NFSUNLOCKIOD();
1725 
1726 	/*
1727 	 * We can now set mnt_data to NULL and wait for
1728 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1729 	 */
1730 	mtx_lock(&mountlist_mtx);
1731 	mtx_lock(&nmp->nm_mtx);
1732 	mp->mnt_data = NULL;
1733 	mtx_unlock(&mountlist_mtx);
1734 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1735 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1736 	mtx_unlock(&nmp->nm_mtx);
1737 
1738 	newnfs_disconnect(&nmp->nm_sockreq);
1739 	crfree(nmp->nm_sockreq.nr_cred);
1740 	free(nmp->nm_nam, M_SONAME);
1741 	if (nmp->nm_sockreq.nr_auth != NULL)
1742 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1743 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1744 	mtx_destroy(&nmp->nm_mtx);
1745 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1746 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1747 		    dsp->nfsclds_sockp != NULL)
1748 			newnfs_disconnect(dsp->nfsclds_sockp);
1749 		nfscl_freenfsclds(dsp);
1750 	}
1751 	free(nmp, M_NEWNFSMNT);
1752 out:
1753 	return (error);
1754 }
1755 
1756 /*
1757  * Return root of a filesystem
1758  */
1759 static int
1760 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1761 {
1762 	struct vnode *vp;
1763 	struct nfsmount *nmp;
1764 	struct nfsnode *np;
1765 	int error;
1766 
1767 	nmp = VFSTONFS(mp);
1768 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1769 	if (error)
1770 		return error;
1771 	vp = NFSTOV(np);
1772 	/*
1773 	 * Get transfer parameters and attributes for root vnode once.
1774 	 */
1775 	mtx_lock(&nmp->nm_mtx);
1776 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1777 		mtx_unlock(&nmp->nm_mtx);
1778 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1779 	} else
1780 		mtx_unlock(&nmp->nm_mtx);
1781 	if (vp->v_type == VNON)
1782 	    vp->v_type = VDIR;
1783 	vp->v_vflag |= VV_ROOT;
1784 	*vpp = vp;
1785 	return (0);
1786 }
1787 
1788 /*
1789  * Flush out the buffer cache
1790  */
1791 /* ARGSUSED */
1792 static int
1793 nfs_sync(struct mount *mp, int waitfor)
1794 {
1795 	struct vnode *vp, *mvp;
1796 	struct thread *td;
1797 	int error, allerror = 0;
1798 
1799 	td = curthread;
1800 
1801 	MNT_ILOCK(mp);
1802 	/*
1803 	 * If a forced dismount is in progress, return from here so that
1804 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1805 	 * calling VFS_UNMOUNT().
1806 	 */
1807 	if (NFSCL_FORCEDISM(mp)) {
1808 		MNT_IUNLOCK(mp);
1809 		return (EBADF);
1810 	}
1811 	MNT_IUNLOCK(mp);
1812 
1813 	/*
1814 	 * Force stale buffer cache information to be flushed.
1815 	 */
1816 loop:
1817 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1818 		/* XXX Racy bv_cnt check. */
1819 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1820 		    waitfor == MNT_LAZY) {
1821 			VI_UNLOCK(vp);
1822 			continue;
1823 		}
1824 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1825 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1826 			goto loop;
1827 		}
1828 		error = VOP_FSYNC(vp, waitfor, td);
1829 		if (error)
1830 			allerror = error;
1831 		NFSVOPUNLOCK(vp, 0);
1832 		vrele(vp);
1833 	}
1834 	return (allerror);
1835 }
1836 
1837 static int
1838 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1839 {
1840 	struct nfsmount *nmp = VFSTONFS(mp);
1841 	struct vfsquery vq;
1842 	int error;
1843 
1844 	bzero(&vq, sizeof(vq));
1845 	switch (op) {
1846 #if 0
1847 	case VFS_CTL_NOLOCKS:
1848 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1849  		if (req->oldptr != NULL) {
1850  			error = SYSCTL_OUT(req, &val, sizeof(val));
1851  			if (error)
1852  				return (error);
1853  		}
1854  		if (req->newptr != NULL) {
1855  			error = SYSCTL_IN(req, &val, sizeof(val));
1856  			if (error)
1857  				return (error);
1858 			if (val)
1859 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1860 			else
1861 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1862  		}
1863 		break;
1864 #endif
1865 	case VFS_CTL_QUERY:
1866 		mtx_lock(&nmp->nm_mtx);
1867 		if (nmp->nm_state & NFSSTA_TIMEO)
1868 			vq.vq_flags |= VQ_NOTRESP;
1869 		mtx_unlock(&nmp->nm_mtx);
1870 #if 0
1871 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1872 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1873 			vq.vq_flags |= VQ_NOTRESPLOCK;
1874 #endif
1875 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1876 		break;
1877  	case VFS_CTL_TIMEO:
1878  		if (req->oldptr != NULL) {
1879  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1880  			    sizeof(nmp->nm_tprintf_initial_delay));
1881  			if (error)
1882  				return (error);
1883  		}
1884  		if (req->newptr != NULL) {
1885 			error = vfs_suser(mp, req->td);
1886 			if (error)
1887 				return (error);
1888  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1889  			    sizeof(nmp->nm_tprintf_initial_delay));
1890  			if (error)
1891  				return (error);
1892  			if (nmp->nm_tprintf_initial_delay < 0)
1893  				nmp->nm_tprintf_initial_delay = 0;
1894  		}
1895 		break;
1896 	default:
1897 		return (ENOTSUP);
1898 	}
1899 	return (0);
1900 }
1901 
1902 /*
1903  * Purge any RPCs in progress, so that they will all return errors.
1904  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1905  * forced dismount.
1906  */
1907 static void
1908 nfs_purge(struct mount *mp)
1909 {
1910 	struct nfsmount *nmp = VFSTONFS(mp);
1911 
1912 	newnfs_nmcancelreqs(nmp);
1913 }
1914 
1915 /*
1916  * Extract the information needed by the nlm from the nfs vnode.
1917  */
1918 static void
1919 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1920     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1921     struct timeval *timeop)
1922 {
1923 	struct nfsmount *nmp;
1924 	struct nfsnode *np = VTONFS(vp);
1925 
1926 	nmp = VFSTONFS(vp->v_mount);
1927 	if (fhlenp != NULL)
1928 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1929 	if (fhp != NULL)
1930 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1931 	if (sp != NULL)
1932 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1933 	if (is_v3p != NULL)
1934 		*is_v3p = NFS_ISV3(vp);
1935 	if (sizep != NULL)
1936 		*sizep = np->n_size;
1937 	if (timeop != NULL) {
1938 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1939 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1940 	}
1941 }
1942 
1943 /*
1944  * This function prints out an option name, based on the conditional
1945  * argument.
1946  */
1947 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1948     char *opt, char **buf, size_t *blen)
1949 {
1950 	int len;
1951 
1952 	if (testval != 0 && *blen > strlen(opt)) {
1953 		len = snprintf(*buf, *blen, "%s", opt);
1954 		if (len != strlen(opt))
1955 			printf("EEK!!\n");
1956 		*buf += len;
1957 		*blen -= len;
1958 	}
1959 }
1960 
1961 /*
1962  * This function printf out an options integer value.
1963  */
1964 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1965     char *opt, char **buf, size_t *blen)
1966 {
1967 	int len;
1968 
1969 	if (*blen > strlen(opt) + 1) {
1970 		/* Could result in truncated output string. */
1971 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1972 		if (len < *blen) {
1973 			*buf += len;
1974 			*blen -= len;
1975 		}
1976 	}
1977 }
1978 
1979 /*
1980  * Load the option flags and values into the buffer.
1981  */
1982 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1983 {
1984 	char *buf;
1985 	size_t blen;
1986 
1987 	buf = buffer;
1988 	blen = buflen;
1989 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1990 	    &blen);
1991 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1992 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1993 		    &blen);
1994 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1995 		    &buf, &blen);
1996 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
1997 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
1998 	}
1999 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2000 	    &blen);
2001 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2002 	    "nfsv2", &buf, &blen);
2003 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2004 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2005 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2006 	    &buf, &blen);
2007 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2008 	    &buf, &blen);
2009 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2010 	    &blen);
2011 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2012 	    &blen);
2013 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2014 	    &blen);
2015 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2016 	    &blen);
2017 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2018 	    &blen);
2019 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2020 	    ",noncontigwr", &buf, &blen);
2021 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2022 	    0, ",lockd", &buf, &blen);
2023 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2024 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
2025 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2026 	    &buf, &blen);
2027 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2028 	    &buf, &blen);
2029 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2030 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2031 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2032 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2033 	    &buf, &blen);
2034 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2035 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2036 	    &buf, &blen);
2037 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2038 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2039 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2040 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2041 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2042 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2043 	    &blen);
2044 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2045 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2046 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2047 	    &blen);
2048 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2049 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2050 	    &blen);
2051 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2052 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2053 }
2054 
2055