xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 1f4bcc459a76b7aa664f3fd557684cd0ba6da352)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 #ifdef NFS_DEBUG
104 int nfs_debug;
105 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
106     "Toggle debug flag");
107 #endif
108 
109 static int	nfs_mountroot(struct mount *);
110 static void	nfs_sec_name(char *, int *);
111 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112 		    struct nfs_args *argp, const char *, struct ucred *,
113 		    struct thread *);
114 static int	mountnfs(struct nfs_args *, struct mount *,
115 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
116 		    u_char *, int, struct vnode **, struct ucred *,
117 		    struct thread *, int, int, int);
118 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119 		    struct sockaddr_storage *, int *, off_t *,
120 		    struct timeval *);
121 static vfs_mount_t nfs_mount;
122 static vfs_cmount_t nfs_cmount;
123 static vfs_unmount_t nfs_unmount;
124 static vfs_root_t nfs_root;
125 static vfs_statfs_t nfs_statfs;
126 static vfs_sync_t nfs_sync;
127 static vfs_sysctl_t nfs_sysctl;
128 static vfs_purge_t nfs_purge;
129 
130 /*
131  * nfs vfs operations.
132  */
133 static struct vfsops nfs_vfsops = {
134 	.vfs_init =		ncl_init,
135 	.vfs_mount =		nfs_mount,
136 	.vfs_cmount =		nfs_cmount,
137 	.vfs_root =		nfs_root,
138 	.vfs_statfs =		nfs_statfs,
139 	.vfs_sync =		nfs_sync,
140 	.vfs_uninit =		ncl_uninit,
141 	.vfs_unmount =		nfs_unmount,
142 	.vfs_sysctl =		nfs_sysctl,
143 	.vfs_purge =		nfs_purge,
144 };
145 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
146 
147 /* So that loader and kldload(2) can find us, wherever we are.. */
148 MODULE_VERSION(nfs, 1);
149 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
150 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
152 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
153 
154 /*
155  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
156  * can be shared by both NFS clients. It is declared here so that it
157  * will be defined for kernels built without NFS_ROOT, although it
158  * isn't used in that case.
159  */
160 #if !defined(NFS_ROOT)
161 struct nfs_diskless	nfs_diskless = { { { 0 } } };
162 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
163 int			nfs_diskless_valid = 0;
164 #endif
165 
166 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
167     &nfs_diskless_valid, 0,
168     "Has the diskless struct been filled correctly");
169 
170 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
171     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
172 
173 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
174     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
175     "%Ssockaddr_in", "Diskless root nfs address");
176 
177 
178 void		newnfsargs_ntoh(struct nfs_args *);
179 static int	nfs_mountdiskless(char *,
180 		    struct sockaddr_in *, struct nfs_args *,
181 		    struct thread *, struct vnode **, struct mount *);
182 static void	nfs_convert_diskless(void);
183 static void	nfs_convert_oargs(struct nfs_args *args,
184 		    struct onfs_args *oargs);
185 
186 int
187 newnfs_iosize(struct nfsmount *nmp)
188 {
189 	int iosize, maxio;
190 
191 	/* First, set the upper limit for iosize */
192 	if (nmp->nm_flag & NFSMNT_NFSV4) {
193 		maxio = NFS_MAXBSIZE;
194 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
195 		if (nmp->nm_sotype == SOCK_DGRAM)
196 			maxio = NFS_MAXDGRAMDATA;
197 		else
198 			maxio = NFS_MAXBSIZE;
199 	} else {
200 		maxio = NFS_V2MAXDATA;
201 	}
202 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
203 		nmp->nm_rsize = maxio;
204 	if (nmp->nm_rsize > NFS_MAXBSIZE)
205 		nmp->nm_rsize = NFS_MAXBSIZE;
206 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
207 		nmp->nm_readdirsize = maxio;
208 	if (nmp->nm_readdirsize > nmp->nm_rsize)
209 		nmp->nm_readdirsize = nmp->nm_rsize;
210 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
211 		nmp->nm_wsize = maxio;
212 	if (nmp->nm_wsize > NFS_MAXBSIZE)
213 		nmp->nm_wsize = NFS_MAXBSIZE;
214 
215 	/*
216 	 * Calculate the size used for io buffers.  Use the larger
217 	 * of the two sizes to minimise nfs requests but make sure
218 	 * that it is at least one VM page to avoid wasting buffer
219 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
220 	 * that is the buffer size used for directories.
221 	 */
222 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
223 	iosize = imax(iosize, PAGE_SIZE);
224 	iosize = imax(iosize, NFS_DIRBLKSIZ);
225 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
226 	return (iosize);
227 }
228 
229 static void
230 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
231 {
232 
233 	args->version = NFS_ARGSVERSION;
234 	args->addr = oargs->addr;
235 	args->addrlen = oargs->addrlen;
236 	args->sotype = oargs->sotype;
237 	args->proto = oargs->proto;
238 	args->fh = oargs->fh;
239 	args->fhsize = oargs->fhsize;
240 	args->flags = oargs->flags;
241 	args->wsize = oargs->wsize;
242 	args->rsize = oargs->rsize;
243 	args->readdirsize = oargs->readdirsize;
244 	args->timeo = oargs->timeo;
245 	args->retrans = oargs->retrans;
246 	args->readahead = oargs->readahead;
247 	args->hostname = oargs->hostname;
248 }
249 
250 static void
251 nfs_convert_diskless(void)
252 {
253 
254 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
255 		sizeof(struct ifaliasreq));
256 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
257 		sizeof(struct sockaddr_in));
258 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
259 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
260 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
261 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
262 	} else {
263 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
264 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
265 	}
266 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
267 		sizeof(struct sockaddr_in));
268 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
269 	nfsv3_diskless.root_time = nfs_diskless.root_time;
270 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
271 		MAXHOSTNAMELEN);
272 	nfs_diskless_valid = 3;
273 }
274 
275 /*
276  * nfs statfs call
277  */
278 static int
279 nfs_statfs(struct mount *mp, struct statfs *sbp)
280 {
281 	struct vnode *vp;
282 	struct thread *td;
283 	struct nfsmount *nmp = VFSTONFS(mp);
284 	struct nfsvattr nfsva;
285 	struct nfsfsinfo fs;
286 	struct nfsstatfs sb;
287 	int error = 0, attrflag, gotfsinfo = 0, ret;
288 	struct nfsnode *np;
289 
290 	td = curthread;
291 
292 	error = vfs_busy(mp, MBF_NOWAIT);
293 	if (error)
294 		return (error);
295 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
296 	if (error) {
297 		vfs_unbusy(mp);
298 		return (error);
299 	}
300 	vp = NFSTOV(np);
301 	mtx_lock(&nmp->nm_mtx);
302 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
303 		mtx_unlock(&nmp->nm_mtx);
304 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
305 		    &attrflag, NULL);
306 		if (!error)
307 			gotfsinfo = 1;
308 	} else
309 		mtx_unlock(&nmp->nm_mtx);
310 	if (!error)
311 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
312 		    &attrflag, NULL);
313 	if (error != 0)
314 		NFSCL_DEBUG(2, "statfs=%d\n", error);
315 	if (attrflag == 0) {
316 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
317 		    td->td_ucred, td, &nfsva, NULL, NULL);
318 		if (ret) {
319 			/*
320 			 * Just set default values to get things going.
321 			 */
322 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
323 			nfsva.na_vattr.va_type = VDIR;
324 			nfsva.na_vattr.va_mode = 0777;
325 			nfsva.na_vattr.va_nlink = 100;
326 			nfsva.na_vattr.va_uid = (uid_t)0;
327 			nfsva.na_vattr.va_gid = (gid_t)0;
328 			nfsva.na_vattr.va_fileid = 2;
329 			nfsva.na_vattr.va_gen = 1;
330 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
331 			nfsva.na_vattr.va_size = 512 * 1024;
332 		}
333 	}
334 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
335 	if (!error) {
336 	    mtx_lock(&nmp->nm_mtx);
337 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
338 		nfscl_loadfsinfo(nmp, &fs);
339 	    nfscl_loadsbinfo(nmp, &sb, sbp);
340 	    sbp->f_iosize = newnfs_iosize(nmp);
341 	    mtx_unlock(&nmp->nm_mtx);
342 	    if (sbp != &mp->mnt_stat) {
343 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
344 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
345 	    }
346 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
347 	} else if (NFS_ISV4(vp)) {
348 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
349 	}
350 	vput(vp);
351 	vfs_unbusy(mp);
352 	return (error);
353 }
354 
355 /*
356  * nfs version 3 fsinfo rpc call
357  */
358 int
359 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
360     struct thread *td)
361 {
362 	struct nfsfsinfo fs;
363 	struct nfsvattr nfsva;
364 	int error, attrflag;
365 
366 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
367 	if (!error) {
368 		if (attrflag)
369 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
370 			    1);
371 		mtx_lock(&nmp->nm_mtx);
372 		nfscl_loadfsinfo(nmp, &fs);
373 		mtx_unlock(&nmp->nm_mtx);
374 	}
375 	return (error);
376 }
377 
378 /*
379  * Mount a remote root fs via. nfs. This depends on the info in the
380  * nfs_diskless structure that has been filled in properly by some primary
381  * bootstrap.
382  * It goes something like this:
383  * - do enough of "ifconfig" by calling ifioctl() so that the system
384  *   can talk to the server
385  * - If nfs_diskless.mygateway is filled in, use that address as
386  *   a default gateway.
387  * - build the rootfs mount point and call mountnfs() to do the rest.
388  *
389  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
390  * structure, as well as other global NFS client variables here, as
391  * nfs_mountroot() will be called once in the boot before any other NFS
392  * client activity occurs.
393  */
394 static int
395 nfs_mountroot(struct mount *mp)
396 {
397 	struct thread *td = curthread;
398 	struct nfsv3_diskless *nd = &nfsv3_diskless;
399 	struct socket *so;
400 	struct vnode *vp;
401 	struct ifreq ir;
402 	int error;
403 	u_long l;
404 	char buf[128];
405 	char *cp;
406 
407 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
408 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
409 #elif defined(NFS_ROOT)
410 	nfs_setup_diskless();
411 #endif
412 
413 	if (nfs_diskless_valid == 0)
414 		return (-1);
415 	if (nfs_diskless_valid == 1)
416 		nfs_convert_diskless();
417 
418 	/*
419 	 * XXX splnet, so networks will receive...
420 	 */
421 	splnet();
422 
423 	/*
424 	 * Do enough of ifconfig(8) so that the critical net interface can
425 	 * talk to the server.
426 	 */
427 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
428 	    td->td_ucred, td);
429 	if (error)
430 		panic("nfs_mountroot: socreate(%04x): %d",
431 			nd->myif.ifra_addr.sa_family, error);
432 
433 #if 0 /* XXX Bad idea */
434 	/*
435 	 * We might not have been told the right interface, so we pass
436 	 * over the first ten interfaces of the same kind, until we get
437 	 * one of them configured.
438 	 */
439 
440 	for (i = strlen(nd->myif.ifra_name) - 1;
441 		nd->myif.ifra_name[i] >= '0' &&
442 		nd->myif.ifra_name[i] <= '9';
443 		nd->myif.ifra_name[i] ++) {
444 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445 		if(!error)
446 			break;
447 	}
448 #endif
449 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
450 	if (error)
451 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
452 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
453 		ir.ifr_mtu = strtol(cp, NULL, 10);
454 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
455 		freeenv(cp);
456 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
457 		if (error)
458 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
459 	}
460 	soclose(so);
461 
462 	/*
463 	 * If the gateway field is filled in, set it as the default route.
464 	 * Note that pxeboot will set a default route of 0 if the route
465 	 * is not set by the DHCP server.  Check also for a value of 0
466 	 * to avoid panicking inappropriately in that situation.
467 	 */
468 	if (nd->mygateway.sin_len != 0 &&
469 	    nd->mygateway.sin_addr.s_addr != 0) {
470 		struct sockaddr_in mask, sin;
471 
472 		bzero((caddr_t)&mask, sizeof(mask));
473 		sin = mask;
474 		sin.sin_family = AF_INET;
475 		sin.sin_len = sizeof(sin);
476                 /* XXX MRT use table 0 for this sort of thing */
477 		CURVNET_SET(TD_TO_VNET(td));
478 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
479 		    (struct sockaddr *)&nd->mygateway,
480 		    (struct sockaddr *)&mask,
481 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
482 		CURVNET_RESTORE();
483 		if (error)
484 			panic("nfs_mountroot: RTM_ADD: %d", error);
485 	}
486 
487 	/*
488 	 * Create the rootfs mount point.
489 	 */
490 	nd->root_args.fh = nd->root_fh;
491 	nd->root_args.fhsize = nd->root_fhsize;
492 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
493 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
494 		(l >> 24) & 0xff, (l >> 16) & 0xff,
495 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
496 	printf("NFS ROOT: %s\n", buf);
497 	nd->root_args.hostname = buf;
498 	if ((error = nfs_mountdiskless(buf,
499 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
500 		return (error);
501 	}
502 
503 	/*
504 	 * This is not really an nfs issue, but it is much easier to
505 	 * set hostname here and then let the "/etc/rc.xxx" files
506 	 * mount the right /var based upon its preset value.
507 	 */
508 	mtx_lock(&prison0.pr_mtx);
509 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
510 	    sizeof(prison0.pr_hostname));
511 	mtx_unlock(&prison0.pr_mtx);
512 	inittodr(ntohl(nd->root_time));
513 	return (0);
514 }
515 
516 /*
517  * Internal version of mount system call for diskless setup.
518  */
519 static int
520 nfs_mountdiskless(char *path,
521     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
522     struct vnode **vpp, struct mount *mp)
523 {
524 	struct sockaddr *nam;
525 	int dirlen, error;
526 	char *dirpath;
527 
528 	/*
529 	 * Find the directory path in "path", which also has the server's
530 	 * name/ip address in it.
531 	 */
532 	dirpath = strchr(path, ':');
533 	if (dirpath != NULL)
534 		dirlen = strlen(++dirpath);
535 	else
536 		dirlen = 0;
537 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
538 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
539 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
540 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
541 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
542 		return (error);
543 	}
544 	return (0);
545 }
546 
547 static void
548 nfs_sec_name(char *sec, int *flagsp)
549 {
550 	if (!strcmp(sec, "krb5"))
551 		*flagsp |= NFSMNT_KERB;
552 	else if (!strcmp(sec, "krb5i"))
553 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
554 	else if (!strcmp(sec, "krb5p"))
555 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
556 }
557 
558 static void
559 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
560     const char *hostname, struct ucred *cred, struct thread *td)
561 {
562 	int s;
563 	int adjsock;
564 	char *p;
565 
566 	s = splnet();
567 
568 	/*
569 	 * Set read-only flag if requested; otherwise, clear it if this is
570 	 * an update.  If this is not an update, then either the read-only
571 	 * flag is already clear, or this is a root mount and it was set
572 	 * intentionally at some previous point.
573 	 */
574 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
575 		MNT_ILOCK(mp);
576 		mp->mnt_flag |= MNT_RDONLY;
577 		MNT_IUNLOCK(mp);
578 	} else if (mp->mnt_flag & MNT_UPDATE) {
579 		MNT_ILOCK(mp);
580 		mp->mnt_flag &= ~MNT_RDONLY;
581 		MNT_IUNLOCK(mp);
582 	}
583 
584 	/*
585 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
586 	 * no sense in that context.  Also, set up appropriate retransmit
587 	 * and soft timeout behavior.
588 	 */
589 	if (argp->sotype == SOCK_STREAM) {
590 		nmp->nm_flag &= ~NFSMNT_NOCONN;
591 		nmp->nm_timeo = NFS_MAXTIMEO;
592 		if ((argp->flags & NFSMNT_NFSV4) != 0)
593 			nmp->nm_retry = INT_MAX;
594 		else
595 			nmp->nm_retry = NFS_RETRANS_TCP;
596 	}
597 
598 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
599 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
600 		argp->flags &= ~NFSMNT_RDIRPLUS;
601 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
602 	}
603 
604 	/* Re-bind if rsrvd port requested and wasn't on one */
605 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
606 		  && (argp->flags & NFSMNT_RESVPORT);
607 	/* Also re-bind if we're switching to/from a connected UDP socket */
608 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
609 		    (argp->flags & NFSMNT_NOCONN));
610 
611 	/* Update flags atomically.  Don't change the lock bits. */
612 	nmp->nm_flag = argp->flags | nmp->nm_flag;
613 	splx(s);
614 
615 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
616 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
617 		if (nmp->nm_timeo < NFS_MINTIMEO)
618 			nmp->nm_timeo = NFS_MINTIMEO;
619 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
620 			nmp->nm_timeo = NFS_MAXTIMEO;
621 	}
622 
623 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
624 		nmp->nm_retry = argp->retrans;
625 		if (nmp->nm_retry > NFS_MAXREXMIT)
626 			nmp->nm_retry = NFS_MAXREXMIT;
627 	}
628 
629 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
630 		nmp->nm_wsize = argp->wsize;
631 		/*
632 		 * Clip at the power of 2 below the size. There is an
633 		 * issue (not isolated) that causes intermittent page
634 		 * faults if this is not done.
635 		 */
636 		if (nmp->nm_wsize > NFS_FABLKSIZE)
637 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
638 		else
639 			nmp->nm_wsize = NFS_FABLKSIZE;
640 	}
641 
642 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
643 		nmp->nm_rsize = argp->rsize;
644 		/*
645 		 * Clip at the power of 2 below the size. There is an
646 		 * issue (not isolated) that causes intermittent page
647 		 * faults if this is not done.
648 		 */
649 		if (nmp->nm_rsize > NFS_FABLKSIZE)
650 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
651 		else
652 			nmp->nm_rsize = NFS_FABLKSIZE;
653 	}
654 
655 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
656 		nmp->nm_readdirsize = argp->readdirsize;
657 	}
658 
659 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
660 		nmp->nm_acregmin = argp->acregmin;
661 	else
662 		nmp->nm_acregmin = NFS_MINATTRTIMO;
663 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
664 		nmp->nm_acregmax = argp->acregmax;
665 	else
666 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
667 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
668 		nmp->nm_acdirmin = argp->acdirmin;
669 	else
670 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
671 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
672 		nmp->nm_acdirmax = argp->acdirmax;
673 	else
674 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
675 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
676 		nmp->nm_acdirmin = nmp->nm_acdirmax;
677 	if (nmp->nm_acregmin > nmp->nm_acregmax)
678 		nmp->nm_acregmin = nmp->nm_acregmax;
679 
680 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
681 		if (argp->readahead <= NFS_MAXRAHEAD)
682 			nmp->nm_readahead = argp->readahead;
683 		else
684 			nmp->nm_readahead = NFS_MAXRAHEAD;
685 	}
686 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
687 		if (argp->wcommitsize < nmp->nm_wsize)
688 			nmp->nm_wcommitsize = nmp->nm_wsize;
689 		else
690 			nmp->nm_wcommitsize = argp->wcommitsize;
691 	}
692 
693 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
694 		    (nmp->nm_soproto != argp->proto));
695 
696 	if (nmp->nm_client != NULL && adjsock) {
697 		int haslock = 0, error = 0;
698 
699 		if (nmp->nm_sotype == SOCK_STREAM) {
700 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
701 			if (!error)
702 				haslock = 1;
703 		}
704 		if (!error) {
705 		    newnfs_disconnect(&nmp->nm_sockreq);
706 		    if (haslock)
707 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
708 		    nmp->nm_sotype = argp->sotype;
709 		    nmp->nm_soproto = argp->proto;
710 		    if (nmp->nm_sotype == SOCK_DGRAM)
711 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
712 			    cred, td, 0)) {
713 				printf("newnfs_args: retrying connect\n");
714 				(void) nfs_catnap(PSOCK, 0, "nfscon");
715 			}
716 		}
717 	} else {
718 		nmp->nm_sotype = argp->sotype;
719 		nmp->nm_soproto = argp->proto;
720 	}
721 
722 	if (hostname != NULL) {
723 		strlcpy(nmp->nm_hostname, hostname,
724 		    sizeof(nmp->nm_hostname));
725 		p = strchr(nmp->nm_hostname, ':');
726 		if (p != NULL)
727 			*p = '\0';
728 	}
729 }
730 
731 static const char *nfs_opts[] = { "from", "nfs_args",
732     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
733     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
734     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
735     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
736     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
737     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
738     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
739     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
740     "pnfs", "wcommitsize",
741     NULL };
742 
743 /*
744  * VFS Operations.
745  *
746  * mount system call
747  * It seems a bit dumb to copyinstr() the host and path here and then
748  * bcopy() them in mountnfs(), but I wanted to detect errors before
749  * doing the sockargs() call because sockargs() allocates an mbuf and
750  * an error after that means that I have to release the mbuf.
751  */
752 /* ARGSUSED */
753 static int
754 nfs_mount(struct mount *mp)
755 {
756 	struct nfs_args args = {
757 	    .version = NFS_ARGSVERSION,
758 	    .addr = NULL,
759 	    .addrlen = sizeof (struct sockaddr_in),
760 	    .sotype = SOCK_STREAM,
761 	    .proto = 0,
762 	    .fh = NULL,
763 	    .fhsize = 0,
764 	    .flags = NFSMNT_RESVPORT,
765 	    .wsize = NFS_WSIZE,
766 	    .rsize = NFS_RSIZE,
767 	    .readdirsize = NFS_READDIRSIZE,
768 	    .timeo = 10,
769 	    .retrans = NFS_RETRANS,
770 	    .readahead = NFS_DEFRAHEAD,
771 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
772 	    .hostname = NULL,
773 	    .acregmin = NFS_MINATTRTIMO,
774 	    .acregmax = NFS_MAXATTRTIMO,
775 	    .acdirmin = NFS_MINDIRATTRTIMO,
776 	    .acdirmax = NFS_MAXDIRATTRTIMO,
777 	};
778 	int error = 0, ret, len;
779 	struct sockaddr *nam = NULL;
780 	struct vnode *vp;
781 	struct thread *td;
782 	char hst[MNAMELEN];
783 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
784 	char *cp, *opt, *name, *secname;
785 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
786 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
787 	int minvers = 0;
788 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
789 	size_t hstlen;
790 
791 	has_nfs_args_opt = 0;
792 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
793 		error = EINVAL;
794 		goto out;
795 	}
796 
797 	td = curthread;
798 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
799 		error = nfs_mountroot(mp);
800 		goto out;
801 	}
802 
803 	nfscl_init();
804 
805 	/*
806 	 * The old mount_nfs program passed the struct nfs_args
807 	 * from userspace to kernel.  The new mount_nfs program
808 	 * passes string options via nmount() from userspace to kernel
809 	 * and we populate the struct nfs_args in the kernel.
810 	 */
811 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
812 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
813 		    sizeof(args));
814 		if (error != 0)
815 			goto out;
816 
817 		if (args.version != NFS_ARGSVERSION) {
818 			error = EPROGMISMATCH;
819 			goto out;
820 		}
821 		has_nfs_args_opt = 1;
822 	}
823 
824 	/* Handle the new style options. */
825 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
826 		args.acdirmin = args.acdirmax =
827 		    args.acregmin = args.acregmax = 0;
828 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
829 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
830 	}
831 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
832 		args.flags |= NFSMNT_NOCONN;
833 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
834 		args.flags &= ~NFSMNT_NOCONN;
835 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
836 		args.flags |= NFSMNT_NOLOCKD;
837 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
838 		args.flags &= ~NFSMNT_NOLOCKD;
839 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
840 		args.flags |= NFSMNT_INT;
841 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
842 		args.flags |= NFSMNT_RDIRPLUS;
843 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
844 		args.flags |= NFSMNT_RESVPORT;
845 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
846 		args.flags &= ~NFSMNT_RESVPORT;
847 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
848 		args.flags |= NFSMNT_SOFT;
849 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
850 		args.flags &= ~NFSMNT_SOFT;
851 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
852 		args.sotype = SOCK_DGRAM;
853 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
854 		args.sotype = SOCK_DGRAM;
855 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
856 		args.sotype = SOCK_STREAM;
857 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
858 		args.flags |= NFSMNT_NFSV3;
859 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
860 		args.flags |= NFSMNT_NFSV4;
861 		args.sotype = SOCK_STREAM;
862 	}
863 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
864 		args.flags |= NFSMNT_ALLGSSNAME;
865 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
866 		args.flags |= NFSMNT_NOCTO;
867 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
868 		args.flags |= NFSMNT_NONCONTIGWR;
869 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
870 		args.flags |= NFSMNT_PNFS;
871 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
872 		if (opt == NULL) {
873 			vfs_mount_error(mp, "illegal readdirsize");
874 			error = EINVAL;
875 			goto out;
876 		}
877 		ret = sscanf(opt, "%d", &args.readdirsize);
878 		if (ret != 1 || args.readdirsize <= 0) {
879 			vfs_mount_error(mp, "illegal readdirsize: %s",
880 			    opt);
881 			error = EINVAL;
882 			goto out;
883 		}
884 		args.flags |= NFSMNT_READDIRSIZE;
885 	}
886 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
887 		if (opt == NULL) {
888 			vfs_mount_error(mp, "illegal readahead");
889 			error = EINVAL;
890 			goto out;
891 		}
892 		ret = sscanf(opt, "%d", &args.readahead);
893 		if (ret != 1 || args.readahead <= 0) {
894 			vfs_mount_error(mp, "illegal readahead: %s",
895 			    opt);
896 			error = EINVAL;
897 			goto out;
898 		}
899 		args.flags |= NFSMNT_READAHEAD;
900 	}
901 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
902 		if (opt == NULL) {
903 			vfs_mount_error(mp, "illegal wsize");
904 			error = EINVAL;
905 			goto out;
906 		}
907 		ret = sscanf(opt, "%d", &args.wsize);
908 		if (ret != 1 || args.wsize <= 0) {
909 			vfs_mount_error(mp, "illegal wsize: %s",
910 			    opt);
911 			error = EINVAL;
912 			goto out;
913 		}
914 		args.flags |= NFSMNT_WSIZE;
915 	}
916 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
917 		if (opt == NULL) {
918 			vfs_mount_error(mp, "illegal rsize");
919 			error = EINVAL;
920 			goto out;
921 		}
922 		ret = sscanf(opt, "%d", &args.rsize);
923 		if (ret != 1 || args.rsize <= 0) {
924 			vfs_mount_error(mp, "illegal wsize: %s",
925 			    opt);
926 			error = EINVAL;
927 			goto out;
928 		}
929 		args.flags |= NFSMNT_RSIZE;
930 	}
931 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
932 		if (opt == NULL) {
933 			vfs_mount_error(mp, "illegal retrans");
934 			error = EINVAL;
935 			goto out;
936 		}
937 		ret = sscanf(opt, "%d", &args.retrans);
938 		if (ret != 1 || args.retrans <= 0) {
939 			vfs_mount_error(mp, "illegal retrans: %s",
940 			    opt);
941 			error = EINVAL;
942 			goto out;
943 		}
944 		args.flags |= NFSMNT_RETRANS;
945 	}
946 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
947 		ret = sscanf(opt, "%d", &args.acregmin);
948 		if (ret != 1 || args.acregmin < 0) {
949 			vfs_mount_error(mp, "illegal actimeo: %s",
950 			    opt);
951 			error = EINVAL;
952 			goto out;
953 		}
954 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
955 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
956 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
957 	}
958 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
959 		ret = sscanf(opt, "%d", &args.acregmin);
960 		if (ret != 1 || args.acregmin < 0) {
961 			vfs_mount_error(mp, "illegal acregmin: %s",
962 			    opt);
963 			error = EINVAL;
964 			goto out;
965 		}
966 		args.flags |= NFSMNT_ACREGMIN;
967 	}
968 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
969 		ret = sscanf(opt, "%d", &args.acregmax);
970 		if (ret != 1 || args.acregmax < 0) {
971 			vfs_mount_error(mp, "illegal acregmax: %s",
972 			    opt);
973 			error = EINVAL;
974 			goto out;
975 		}
976 		args.flags |= NFSMNT_ACREGMAX;
977 	}
978 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
979 		ret = sscanf(opt, "%d", &args.acdirmin);
980 		if (ret != 1 || args.acdirmin < 0) {
981 			vfs_mount_error(mp, "illegal acdirmin: %s",
982 			    opt);
983 			error = EINVAL;
984 			goto out;
985 		}
986 		args.flags |= NFSMNT_ACDIRMIN;
987 	}
988 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
989 		ret = sscanf(opt, "%d", &args.acdirmax);
990 		if (ret != 1 || args.acdirmax < 0) {
991 			vfs_mount_error(mp, "illegal acdirmax: %s",
992 			    opt);
993 			error = EINVAL;
994 			goto out;
995 		}
996 		args.flags |= NFSMNT_ACDIRMAX;
997 	}
998 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
999 		ret = sscanf(opt, "%d", &args.wcommitsize);
1000 		if (ret != 1 || args.wcommitsize < 0) {
1001 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1002 			error = EINVAL;
1003 			goto out;
1004 		}
1005 		args.flags |= NFSMNT_WCOMMITSIZE;
1006 	}
1007 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1008 		ret = sscanf(opt, "%d", &args.timeo);
1009 		if (ret != 1 || args.timeo <= 0) {
1010 			vfs_mount_error(mp, "illegal timeo: %s",
1011 			    opt);
1012 			error = EINVAL;
1013 			goto out;
1014 		}
1015 		args.flags |= NFSMNT_TIMEO;
1016 	}
1017 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1018 		ret = sscanf(opt, "%d", &args.timeo);
1019 		if (ret != 1 || args.timeo <= 0) {
1020 			vfs_mount_error(mp, "illegal timeout: %s",
1021 			    opt);
1022 			error = EINVAL;
1023 			goto out;
1024 		}
1025 		args.flags |= NFSMNT_TIMEO;
1026 	}
1027 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1028 		ret = sscanf(opt, "%d", &nametimeo);
1029 		if (ret != 1 || nametimeo < 0) {
1030 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1031 			error = EINVAL;
1032 			goto out;
1033 		}
1034 	}
1035 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1036 	    == 0) {
1037 		ret = sscanf(opt, "%d", &negnametimeo);
1038 		if (ret != 1 || negnametimeo < 0) {
1039 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1040 			    opt);
1041 			error = EINVAL;
1042 			goto out;
1043 		}
1044 	}
1045 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1046 	    0) {
1047 		ret = sscanf(opt, "%d", &minvers);
1048 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1049 		    (args.flags & NFSMNT_NFSV4) == 0) {
1050 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1051 			error = EINVAL;
1052 			goto out;
1053 		}
1054 	}
1055 	if (vfs_getopt(mp->mnt_optnew, "sec",
1056 		(void **) &secname, NULL) == 0)
1057 		nfs_sec_name(secname, &args.flags);
1058 
1059 	if (mp->mnt_flag & MNT_UPDATE) {
1060 		struct nfsmount *nmp = VFSTONFS(mp);
1061 
1062 		if (nmp == NULL) {
1063 			error = EIO;
1064 			goto out;
1065 		}
1066 
1067 		/*
1068 		 * If a change from TCP->UDP is done and there are thread(s)
1069 		 * that have I/O RPC(s) in progress with a tranfer size
1070 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1071 		 * hung, retrying the RPC(s) forever. Usually these threads
1072 		 * will be seen doing an uninterruptible sleep on wait channel
1073 		 * "nfsreq".
1074 		 */
1075 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1076 			tprintf(td->td_proc, LOG_WARNING,
1077 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1078 
1079 		/*
1080 		 * When doing an update, we can't change version,
1081 		 * security, switch lockd strategies or change cookie
1082 		 * translation
1083 		 */
1084 		args.flags = (args.flags &
1085 		    ~(NFSMNT_NFSV3 |
1086 		      NFSMNT_NFSV4 |
1087 		      NFSMNT_KERB |
1088 		      NFSMNT_INTEGRITY |
1089 		      NFSMNT_PRIVACY |
1090 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1091 		    (nmp->nm_flag &
1092 			(NFSMNT_NFSV3 |
1093 			 NFSMNT_NFSV4 |
1094 			 NFSMNT_KERB |
1095 			 NFSMNT_INTEGRITY |
1096 			 NFSMNT_PRIVACY |
1097 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1098 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1099 		goto out;
1100 	}
1101 
1102 	/*
1103 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1104 	 * or no-connection mode for those protocols that support
1105 	 * no-connection mode (the flag will be cleared later for protocols
1106 	 * that do not support no-connection mode).  This will allow a client
1107 	 * to receive replies from a different IP then the request was
1108 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1109 	 * not 0.
1110 	 */
1111 	if (nfs_ip_paranoia == 0)
1112 		args.flags |= NFSMNT_NOCONN;
1113 
1114 	if (has_nfs_args_opt != 0) {
1115 		/*
1116 		 * In the 'nfs_args' case, the pointers in the args
1117 		 * structure are in userland - we copy them in here.
1118 		 */
1119 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1120 			vfs_mount_error(mp, "Bad file handle");
1121 			error = EINVAL;
1122 			goto out;
1123 		}
1124 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1125 		    args.fhsize);
1126 		if (error != 0)
1127 			goto out;
1128 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1129 		if (error != 0)
1130 			goto out;
1131 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1132 		args.hostname = hst;
1133 		/* sockargs() call must be after above copyin() calls */
1134 		error = getsockaddr(&nam, (caddr_t)args.addr,
1135 		    args.addrlen);
1136 		if (error != 0)
1137 			goto out;
1138 	} else {
1139 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1140 		    &args.fhsize) == 0) {
1141 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1142 				vfs_mount_error(mp, "Bad file handle");
1143 				error = EINVAL;
1144 				goto out;
1145 			}
1146 			bcopy(args.fh, nfh, args.fhsize);
1147 		} else {
1148 			args.fhsize = 0;
1149 		}
1150 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1151 		    (void **)&args.hostname, &len);
1152 		if (args.hostname == NULL) {
1153 			vfs_mount_error(mp, "Invalid hostname");
1154 			error = EINVAL;
1155 			goto out;
1156 		}
1157 		bcopy(args.hostname, hst, MNAMELEN);
1158 		hst[MNAMELEN - 1] = '\0';
1159 	}
1160 
1161 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1162 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1163 	else {
1164 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1165 		cp = strchr(srvkrbname, ':');
1166 		if (cp != NULL)
1167 			*cp = '\0';
1168 	}
1169 	srvkrbnamelen = strlen(srvkrbname);
1170 
1171 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1172 		strlcpy(krbname, name, sizeof (krbname));
1173 	else
1174 		krbname[0] = '\0';
1175 	krbnamelen = strlen(krbname);
1176 
1177 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1178 		strlcpy(dirpath, name, sizeof (dirpath));
1179 	else
1180 		dirpath[0] = '\0';
1181 	dirlen = strlen(dirpath);
1182 
1183 	if (has_nfs_args_opt == 0) {
1184 		if (vfs_getopt(mp->mnt_optnew, "addr",
1185 		    (void **)&args.addr, &args.addrlen) == 0) {
1186 			if (args.addrlen > SOCK_MAXADDRLEN) {
1187 				error = ENAMETOOLONG;
1188 				goto out;
1189 			}
1190 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1191 			bcopy(args.addr, nam, args.addrlen);
1192 			nam->sa_len = args.addrlen;
1193 		} else {
1194 			vfs_mount_error(mp, "No server address");
1195 			error = EINVAL;
1196 			goto out;
1197 		}
1198 	}
1199 
1200 	args.fh = nfh;
1201 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1202 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1203 	    nametimeo, negnametimeo, minvers);
1204 out:
1205 	if (!error) {
1206 		MNT_ILOCK(mp);
1207 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1208 		    MNTK_USES_BCACHE;
1209 		MNT_IUNLOCK(mp);
1210 	}
1211 	return (error);
1212 }
1213 
1214 
1215 /*
1216  * VFS Operations.
1217  *
1218  * mount system call
1219  * It seems a bit dumb to copyinstr() the host and path here and then
1220  * bcopy() them in mountnfs(), but I wanted to detect errors before
1221  * doing the sockargs() call because sockargs() allocates an mbuf and
1222  * an error after that means that I have to release the mbuf.
1223  */
1224 /* ARGSUSED */
1225 static int
1226 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1227 {
1228 	int error;
1229 	struct nfs_args args;
1230 
1231 	error = copyin(data, &args, sizeof (struct nfs_args));
1232 	if (error)
1233 		return error;
1234 
1235 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1236 
1237 	error = kernel_mount(ma, flags);
1238 	return (error);
1239 }
1240 
1241 /*
1242  * Common code for mount and mountroot
1243  */
1244 static int
1245 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1246     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1247     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1248     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1249     int minvers)
1250 {
1251 	struct nfsmount *nmp;
1252 	struct nfsnode *np;
1253 	int error, trycnt, ret;
1254 	struct nfsvattr nfsva;
1255 	struct nfsclclient *clp;
1256 	struct nfsclds *dsp, *tdsp;
1257 	uint32_t lease;
1258 	static u_int64_t clval = 0;
1259 
1260 	NFSCL_DEBUG(3, "in mnt\n");
1261 	clp = NULL;
1262 	if (mp->mnt_flag & MNT_UPDATE) {
1263 		nmp = VFSTONFS(mp);
1264 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1265 		FREE(nam, M_SONAME);
1266 		return (0);
1267 	} else {
1268 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1269 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1270 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1271 		TAILQ_INIT(&nmp->nm_bufq);
1272 		if (clval == 0)
1273 			clval = (u_int64_t)nfsboottime.tv_sec;
1274 		nmp->nm_clval = clval++;
1275 		nmp->nm_krbnamelen = krbnamelen;
1276 		nmp->nm_dirpathlen = dirlen;
1277 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1278 		if (td->td_ucred->cr_uid != (uid_t)0) {
1279 			/*
1280 			 * nm_uid is used to get KerberosV credentials for
1281 			 * the nfsv4 state handling operations if there is
1282 			 * no host based principal set. Use the uid of
1283 			 * this user if not root, since they are doing the
1284 			 * mount. I don't think setting this for root will
1285 			 * work, since root normally does not have user
1286 			 * credentials in a credentials cache.
1287 			 */
1288 			nmp->nm_uid = td->td_ucred->cr_uid;
1289 		} else {
1290 			/*
1291 			 * Just set to -1, so it won't be used.
1292 			 */
1293 			nmp->nm_uid = (uid_t)-1;
1294 		}
1295 
1296 		/* Copy and null terminate all the names */
1297 		if (nmp->nm_krbnamelen > 0) {
1298 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1299 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1300 		}
1301 		if (nmp->nm_dirpathlen > 0) {
1302 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1303 			    nmp->nm_dirpathlen);
1304 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1305 			    + 1] = '\0';
1306 		}
1307 		if (nmp->nm_srvkrbnamelen > 0) {
1308 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1309 			    nmp->nm_srvkrbnamelen);
1310 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1311 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1312 		}
1313 		nmp->nm_sockreq.nr_cred = crhold(cred);
1314 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1315 		mp->mnt_data = nmp;
1316 		nmp->nm_getinfo = nfs_getnlminfo;
1317 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1318 	}
1319 	vfs_getnewfsid(mp);
1320 	nmp->nm_mountp = mp;
1321 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1322 
1323 	/*
1324 	 * Since nfs_decode_args() might optionally set them, these
1325 	 * need to be set to defaults before the call, so that the
1326 	 * optional settings aren't overwritten.
1327 	 */
1328 	nmp->nm_nametimeo = nametimeo;
1329 	nmp->nm_negnametimeo = negnametimeo;
1330 	nmp->nm_timeo = NFS_TIMEO;
1331 	nmp->nm_retry = NFS_RETRANS;
1332 	nmp->nm_readahead = NFS_DEFRAHEAD;
1333 
1334 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1335 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1336 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1337 		nmp->nm_wcommitsize *= 2;
1338 	nmp->nm_wcommitsize *= 256;
1339 
1340 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1341 		nmp->nm_minorvers = minvers;
1342 	else
1343 		nmp->nm_minorvers = 0;
1344 
1345 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1346 
1347 	/*
1348 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1349 	 * high, depending on whether we end up with negative offsets in
1350 	 * the client or server somewhere.  2GB-1 may be safer.
1351 	 *
1352 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1353 	 * that we can handle until we find out otherwise.
1354 	 */
1355 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1356 		nmp->nm_maxfilesize = 0xffffffffLL;
1357 	else
1358 		nmp->nm_maxfilesize = OFF_MAX;
1359 
1360 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1361 		nmp->nm_wsize = NFS_WSIZE;
1362 		nmp->nm_rsize = NFS_RSIZE;
1363 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1364 	}
1365 	nmp->nm_numgrps = NFS_MAXGRPS;
1366 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1367 	if (nmp->nm_tprintf_delay < 0)
1368 		nmp->nm_tprintf_delay = 0;
1369 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1370 	if (nmp->nm_tprintf_initial_delay < 0)
1371 		nmp->nm_tprintf_initial_delay = 0;
1372 	nmp->nm_fhsize = argp->fhsize;
1373 	if (nmp->nm_fhsize > 0)
1374 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1375 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1376 	nmp->nm_nam = nam;
1377 	/* Set up the sockets and per-host congestion */
1378 	nmp->nm_sotype = argp->sotype;
1379 	nmp->nm_soproto = argp->proto;
1380 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1381 	if ((argp->flags & NFSMNT_NFSV4))
1382 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1383 	else if ((argp->flags & NFSMNT_NFSV3))
1384 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1385 	else
1386 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1387 
1388 
1389 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1390 		goto bad;
1391 	/* For NFSv4.1, get the clientid now. */
1392 	if (nmp->nm_minorvers > 0) {
1393 		NFSCL_DEBUG(3, "at getcl\n");
1394 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1395 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1396 		if (error != 0)
1397 			goto bad;
1398 	}
1399 
1400 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1401 	    nmp->nm_dirpathlen > 0) {
1402 		NFSCL_DEBUG(3, "in dirp\n");
1403 		/*
1404 		 * If the fhsize on the mount point == 0 for V4, the mount
1405 		 * path needs to be looked up.
1406 		 */
1407 		trycnt = 3;
1408 		do {
1409 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1410 			    cred, td);
1411 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1412 			if (error)
1413 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1414 		} while (error && --trycnt > 0);
1415 		if (error) {
1416 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1417 			goto bad;
1418 		}
1419 	}
1420 
1421 	/*
1422 	 * A reference count is needed on the nfsnode representing the
1423 	 * remote root.  If this object is not persistent, then backward
1424 	 * traversals of the mount point (i.e. "..") will not work if
1425 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1426 	 * this problem, because one can identify root inodes by their
1427 	 * number == ROOTINO (2).
1428 	 */
1429 	if (nmp->nm_fhsize > 0) {
1430 		/*
1431 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1432 		 * non-zero for the root vnode. f_iosize will be set correctly
1433 		 * by nfs_statfs() before any I/O occurs.
1434 		 */
1435 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1436 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1437 		    LK_EXCLUSIVE);
1438 		if (error)
1439 			goto bad;
1440 		*vpp = NFSTOV(np);
1441 
1442 		/*
1443 		 * Get file attributes and transfer parameters for the
1444 		 * mountpoint.  This has the side effect of filling in
1445 		 * (*vpp)->v_type with the correct value.
1446 		 */
1447 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1448 		    cred, td, &nfsva, NULL, &lease);
1449 		if (ret) {
1450 			/*
1451 			 * Just set default values to get things going.
1452 			 */
1453 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1454 			nfsva.na_vattr.va_type = VDIR;
1455 			nfsva.na_vattr.va_mode = 0777;
1456 			nfsva.na_vattr.va_nlink = 100;
1457 			nfsva.na_vattr.va_uid = (uid_t)0;
1458 			nfsva.na_vattr.va_gid = (gid_t)0;
1459 			nfsva.na_vattr.va_fileid = 2;
1460 			nfsva.na_vattr.va_gen = 1;
1461 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1462 			nfsva.na_vattr.va_size = 512 * 1024;
1463 			lease = 60;
1464 		}
1465 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1466 		if (nmp->nm_minorvers > 0) {
1467 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1468 			NFSLOCKCLSTATE();
1469 			clp->nfsc_renew = NFSCL_RENEW(lease);
1470 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1471 			clp->nfsc_clientidrev++;
1472 			if (clp->nfsc_clientidrev == 0)
1473 				clp->nfsc_clientidrev++;
1474 			NFSUNLOCKCLSTATE();
1475 			/*
1476 			 * Mount will succeed, so the renew thread can be
1477 			 * started now.
1478 			 */
1479 			nfscl_start_renewthread(clp);
1480 			nfscl_clientrelease(clp);
1481 		}
1482 		if (argp->flags & NFSMNT_NFSV3)
1483 			ncl_fsinfo(nmp, *vpp, cred, td);
1484 
1485 		/* Mark if the mount point supports NFSv4 ACLs. */
1486 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1487 		    ret == 0 &&
1488 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1489 			MNT_ILOCK(mp);
1490 			mp->mnt_flag |= MNT_NFS4ACLS;
1491 			MNT_IUNLOCK(mp);
1492 		}
1493 
1494 		/*
1495 		 * Lose the lock but keep the ref.
1496 		 */
1497 		NFSVOPUNLOCK(*vpp, 0);
1498 		return (0);
1499 	}
1500 	error = EIO;
1501 
1502 bad:
1503 	if (clp != NULL)
1504 		nfscl_clientrelease(clp);
1505 	newnfs_disconnect(&nmp->nm_sockreq);
1506 	crfree(nmp->nm_sockreq.nr_cred);
1507 	if (nmp->nm_sockreq.nr_auth != NULL)
1508 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1509 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1510 	mtx_destroy(&nmp->nm_mtx);
1511 	if (nmp->nm_clp != NULL) {
1512 		NFSLOCKCLSTATE();
1513 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1514 		NFSUNLOCKCLSTATE();
1515 		free(nmp->nm_clp, M_NFSCLCLIENT);
1516 	}
1517 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1518 		nfscl_freenfsclds(dsp);
1519 	FREE(nmp, M_NEWNFSMNT);
1520 	FREE(nam, M_SONAME);
1521 	return (error);
1522 }
1523 
1524 /*
1525  * unmount system call
1526  */
1527 static int
1528 nfs_unmount(struct mount *mp, int mntflags)
1529 {
1530 	struct thread *td;
1531 	struct nfsmount *nmp;
1532 	int error, flags = 0, i, trycnt = 0;
1533 	struct nfsclds *dsp, *tdsp;
1534 
1535 	td = curthread;
1536 
1537 	if (mntflags & MNT_FORCE)
1538 		flags |= FORCECLOSE;
1539 	nmp = VFSTONFS(mp);
1540 	/*
1541 	 * Goes something like this..
1542 	 * - Call vflush() to clear out vnodes for this filesystem
1543 	 * - Close the socket
1544 	 * - Free up the data structures
1545 	 */
1546 	/* In the forced case, cancel any outstanding requests. */
1547 	if (mntflags & MNT_FORCE) {
1548 		error = newnfs_nmcancelreqs(nmp);
1549 		if (error)
1550 			goto out;
1551 		/* For a forced close, get rid of the renew thread now */
1552 		nfscl_umount(nmp, td);
1553 	}
1554 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1555 	do {
1556 		error = vflush(mp, 1, flags, td);
1557 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1558 			(void) nfs_catnap(PSOCK, error, "newndm");
1559 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1560 	if (error)
1561 		goto out;
1562 
1563 	/*
1564 	 * We are now committed to the unmount.
1565 	 */
1566 	if ((mntflags & MNT_FORCE) == 0)
1567 		nfscl_umount(nmp, td);
1568 	/* Make sure no nfsiods are assigned to this mount. */
1569 	mtx_lock(&ncl_iod_mutex);
1570 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1571 		if (ncl_iodmount[i] == nmp) {
1572 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1573 			ncl_iodmount[i] = NULL;
1574 		}
1575 	mtx_unlock(&ncl_iod_mutex);
1576 	newnfs_disconnect(&nmp->nm_sockreq);
1577 	crfree(nmp->nm_sockreq.nr_cred);
1578 	FREE(nmp->nm_nam, M_SONAME);
1579 	if (nmp->nm_sockreq.nr_auth != NULL)
1580 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1581 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1582 	mtx_destroy(&nmp->nm_mtx);
1583 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1584 		nfscl_freenfsclds(dsp);
1585 	FREE(nmp, M_NEWNFSMNT);
1586 out:
1587 	return (error);
1588 }
1589 
1590 /*
1591  * Return root of a filesystem
1592  */
1593 static int
1594 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1595 {
1596 	struct vnode *vp;
1597 	struct nfsmount *nmp;
1598 	struct nfsnode *np;
1599 	int error;
1600 
1601 	nmp = VFSTONFS(mp);
1602 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1603 	if (error)
1604 		return error;
1605 	vp = NFSTOV(np);
1606 	/*
1607 	 * Get transfer parameters and attributes for root vnode once.
1608 	 */
1609 	mtx_lock(&nmp->nm_mtx);
1610 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1611 		mtx_unlock(&nmp->nm_mtx);
1612 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1613 	} else
1614 		mtx_unlock(&nmp->nm_mtx);
1615 	if (vp->v_type == VNON)
1616 	    vp->v_type = VDIR;
1617 	vp->v_vflag |= VV_ROOT;
1618 	*vpp = vp;
1619 	return (0);
1620 }
1621 
1622 /*
1623  * Flush out the buffer cache
1624  */
1625 /* ARGSUSED */
1626 static int
1627 nfs_sync(struct mount *mp, int waitfor)
1628 {
1629 	struct vnode *vp, *mvp;
1630 	struct thread *td;
1631 	int error, allerror = 0;
1632 
1633 	td = curthread;
1634 
1635 	MNT_ILOCK(mp);
1636 	/*
1637 	 * If a forced dismount is in progress, return from here so that
1638 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1639 	 * calling VFS_UNMOUNT().
1640 	 */
1641 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1642 		MNT_IUNLOCK(mp);
1643 		return (EBADF);
1644 	}
1645 	MNT_IUNLOCK(mp);
1646 
1647 	/*
1648 	 * Force stale buffer cache information to be flushed.
1649 	 */
1650 loop:
1651 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1652 		/* XXX Racy bv_cnt check. */
1653 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1654 		    waitfor == MNT_LAZY) {
1655 			VI_UNLOCK(vp);
1656 			continue;
1657 		}
1658 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1659 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1660 			goto loop;
1661 		}
1662 		error = VOP_FSYNC(vp, waitfor, td);
1663 		if (error)
1664 			allerror = error;
1665 		NFSVOPUNLOCK(vp, 0);
1666 		vrele(vp);
1667 	}
1668 	return (allerror);
1669 }
1670 
1671 static int
1672 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1673 {
1674 	struct nfsmount *nmp = VFSTONFS(mp);
1675 	struct vfsquery vq;
1676 	int error;
1677 
1678 	bzero(&vq, sizeof(vq));
1679 	switch (op) {
1680 #if 0
1681 	case VFS_CTL_NOLOCKS:
1682 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1683  		if (req->oldptr != NULL) {
1684  			error = SYSCTL_OUT(req, &val, sizeof(val));
1685  			if (error)
1686  				return (error);
1687  		}
1688  		if (req->newptr != NULL) {
1689  			error = SYSCTL_IN(req, &val, sizeof(val));
1690  			if (error)
1691  				return (error);
1692 			if (val)
1693 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1694 			else
1695 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1696  		}
1697 		break;
1698 #endif
1699 	case VFS_CTL_QUERY:
1700 		mtx_lock(&nmp->nm_mtx);
1701 		if (nmp->nm_state & NFSSTA_TIMEO)
1702 			vq.vq_flags |= VQ_NOTRESP;
1703 		mtx_unlock(&nmp->nm_mtx);
1704 #if 0
1705 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1706 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1707 			vq.vq_flags |= VQ_NOTRESPLOCK;
1708 #endif
1709 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1710 		break;
1711  	case VFS_CTL_TIMEO:
1712  		if (req->oldptr != NULL) {
1713  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1714  			    sizeof(nmp->nm_tprintf_initial_delay));
1715  			if (error)
1716  				return (error);
1717  		}
1718  		if (req->newptr != NULL) {
1719 			error = vfs_suser(mp, req->td);
1720 			if (error)
1721 				return (error);
1722  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1723  			    sizeof(nmp->nm_tprintf_initial_delay));
1724  			if (error)
1725  				return (error);
1726  			if (nmp->nm_tprintf_initial_delay < 0)
1727  				nmp->nm_tprintf_initial_delay = 0;
1728  		}
1729 		break;
1730 	default:
1731 		return (ENOTSUP);
1732 	}
1733 	return (0);
1734 }
1735 
1736 /*
1737  * Purge any RPCs in progress, so that they will all return errors.
1738  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1739  * forced dismount.
1740  */
1741 static void
1742 nfs_purge(struct mount *mp)
1743 {
1744 	struct nfsmount *nmp = VFSTONFS(mp);
1745 
1746 	newnfs_nmcancelreqs(nmp);
1747 }
1748 
1749 /*
1750  * Extract the information needed by the nlm from the nfs vnode.
1751  */
1752 static void
1753 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1754     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1755     struct timeval *timeop)
1756 {
1757 	struct nfsmount *nmp;
1758 	struct nfsnode *np = VTONFS(vp);
1759 
1760 	nmp = VFSTONFS(vp->v_mount);
1761 	if (fhlenp != NULL)
1762 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1763 	if (fhp != NULL)
1764 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1765 	if (sp != NULL)
1766 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1767 	if (is_v3p != NULL)
1768 		*is_v3p = NFS_ISV3(vp);
1769 	if (sizep != NULL)
1770 		*sizep = np->n_size;
1771 	if (timeop != NULL) {
1772 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1773 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1774 	}
1775 }
1776 
1777 /*
1778  * This function prints out an option name, based on the conditional
1779  * argument.
1780  */
1781 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1782     char *opt, char **buf, size_t *blen)
1783 {
1784 	int len;
1785 
1786 	if (testval != 0 && *blen > strlen(opt)) {
1787 		len = snprintf(*buf, *blen, "%s", opt);
1788 		if (len != strlen(opt))
1789 			printf("EEK!!\n");
1790 		*buf += len;
1791 		*blen -= len;
1792 	}
1793 }
1794 
1795 /*
1796  * This function printf out an options integer value.
1797  */
1798 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1799     char *opt, char **buf, size_t *blen)
1800 {
1801 	int len;
1802 
1803 	if (*blen > strlen(opt) + 1) {
1804 		/* Could result in truncated output string. */
1805 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1806 		if (len < *blen) {
1807 			*buf += len;
1808 			*blen -= len;
1809 		}
1810 	}
1811 }
1812 
1813 /*
1814  * Load the option flags and values into the buffer.
1815  */
1816 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1817 {
1818 	char *buf;
1819 	size_t blen;
1820 
1821 	buf = buffer;
1822 	blen = buflen;
1823 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1824 	    &blen);
1825 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1826 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1827 		    &blen);
1828 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1829 		    &buf, &blen);
1830 	}
1831 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1832 	    &blen);
1833 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1834 	    "nfsv2", &buf, &blen);
1835 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1836 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1837 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1838 	    &buf, &blen);
1839 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1840 	    &buf, &blen);
1841 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1842 	    &blen);
1843 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1844 	    &blen);
1845 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1846 	    &blen);
1847 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1848 	    &blen);
1849 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1850 	    &blen);
1851 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1852 	    ",noncontigwr", &buf, &blen);
1853 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1854 	    0, ",lockd", &buf, &blen);
1855 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1856 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1857 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1858 	    &buf, &blen);
1859 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1860 	    &buf, &blen);
1861 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1862 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1863 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1864 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1865 	    &buf, &blen);
1866 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1867 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1868 	    &buf, &blen);
1869 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1870 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1871 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1872 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1873 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1874 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1875 	    &blen);
1876 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1877 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1878 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1879 	    &blen);
1880 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1881 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1882 	    &blen);
1883 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1884 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1885 }
1886 
1887