xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision b78ee15e9f04ae15c3e1200df974473167524d17)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 #ifdef NFS_DEBUG
104 int nfs_debug;
105 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
106     "Toggle debug flag");
107 #endif
108 
109 static int	nfs_mountroot(struct mount *);
110 static void	nfs_sec_name(char *, int *);
111 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112 		    struct nfs_args *argp, const char *, struct ucred *,
113 		    struct thread *);
114 static int	mountnfs(struct nfs_args *, struct mount *,
115 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
116 		    u_char *, int, struct vnode **, struct ucred *,
117 		    struct thread *, int, int, int);
118 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119 		    struct sockaddr_storage *, int *, off_t *,
120 		    struct timeval *);
121 static vfs_mount_t nfs_mount;
122 static vfs_cmount_t nfs_cmount;
123 static vfs_unmount_t nfs_unmount;
124 static vfs_root_t nfs_root;
125 static vfs_statfs_t nfs_statfs;
126 static vfs_sync_t nfs_sync;
127 static vfs_sysctl_t nfs_sysctl;
128 static vfs_purge_t nfs_purge;
129 
130 /*
131  * nfs vfs operations.
132  */
133 static struct vfsops nfs_vfsops = {
134 	.vfs_init =		ncl_init,
135 	.vfs_mount =		nfs_mount,
136 	.vfs_cmount =		nfs_cmount,
137 	.vfs_root =		nfs_root,
138 	.vfs_statfs =		nfs_statfs,
139 	.vfs_sync =		nfs_sync,
140 	.vfs_uninit =		ncl_uninit,
141 	.vfs_unmount =		nfs_unmount,
142 	.vfs_sysctl =		nfs_sysctl,
143 	.vfs_purge =		nfs_purge,
144 };
145 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
146 
147 /* So that loader and kldload(2) can find us, wherever we are.. */
148 MODULE_VERSION(nfs, 1);
149 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
150 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
152 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
153 
154 /*
155  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
156  * can be shared by both NFS clients. It is declared here so that it
157  * will be defined for kernels built without NFS_ROOT, although it
158  * isn't used in that case.
159  */
160 #if !defined(NFS_ROOT)
161 struct nfs_diskless	nfs_diskless = { { { 0 } } };
162 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
163 int			nfs_diskless_valid = 0;
164 #endif
165 
166 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
167     &nfs_diskless_valid, 0,
168     "Has the diskless struct been filled correctly");
169 
170 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
171     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
172 
173 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
174     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
175     "%Ssockaddr_in", "Diskless root nfs address");
176 
177 
178 void		newnfsargs_ntoh(struct nfs_args *);
179 static int	nfs_mountdiskless(char *,
180 		    struct sockaddr_in *, struct nfs_args *,
181 		    struct thread *, struct vnode **, struct mount *);
182 static void	nfs_convert_diskless(void);
183 static void	nfs_convert_oargs(struct nfs_args *args,
184 		    struct onfs_args *oargs);
185 
186 int
187 newnfs_iosize(struct nfsmount *nmp)
188 {
189 	int iosize, maxio;
190 
191 	/* First, set the upper limit for iosize */
192 	if (nmp->nm_flag & NFSMNT_NFSV4) {
193 		maxio = NFS_MAXBSIZE;
194 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
195 		if (nmp->nm_sotype == SOCK_DGRAM)
196 			maxio = NFS_MAXDGRAMDATA;
197 		else
198 			maxio = NFS_MAXBSIZE;
199 	} else {
200 		maxio = NFS_V2MAXDATA;
201 	}
202 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
203 		nmp->nm_rsize = maxio;
204 	if (nmp->nm_rsize > NFS_MAXBSIZE)
205 		nmp->nm_rsize = NFS_MAXBSIZE;
206 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
207 		nmp->nm_readdirsize = maxio;
208 	if (nmp->nm_readdirsize > nmp->nm_rsize)
209 		nmp->nm_readdirsize = nmp->nm_rsize;
210 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
211 		nmp->nm_wsize = maxio;
212 	if (nmp->nm_wsize > NFS_MAXBSIZE)
213 		nmp->nm_wsize = NFS_MAXBSIZE;
214 
215 	/*
216 	 * Calculate the size used for io buffers.  Use the larger
217 	 * of the two sizes to minimise nfs requests but make sure
218 	 * that it is at least one VM page to avoid wasting buffer
219 	 * space.
220 	 */
221 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
222 	iosize = imax(iosize, PAGE_SIZE);
223 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
224 	return (iosize);
225 }
226 
227 static void
228 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
229 {
230 
231 	args->version = NFS_ARGSVERSION;
232 	args->addr = oargs->addr;
233 	args->addrlen = oargs->addrlen;
234 	args->sotype = oargs->sotype;
235 	args->proto = oargs->proto;
236 	args->fh = oargs->fh;
237 	args->fhsize = oargs->fhsize;
238 	args->flags = oargs->flags;
239 	args->wsize = oargs->wsize;
240 	args->rsize = oargs->rsize;
241 	args->readdirsize = oargs->readdirsize;
242 	args->timeo = oargs->timeo;
243 	args->retrans = oargs->retrans;
244 	args->readahead = oargs->readahead;
245 	args->hostname = oargs->hostname;
246 }
247 
248 static void
249 nfs_convert_diskless(void)
250 {
251 
252 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
253 		sizeof(struct ifaliasreq));
254 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
255 		sizeof(struct sockaddr_in));
256 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
257 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
258 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
259 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
260 	} else {
261 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
262 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
263 	}
264 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
265 		sizeof(struct sockaddr_in));
266 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
267 	nfsv3_diskless.root_time = nfs_diskless.root_time;
268 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
269 		MAXHOSTNAMELEN);
270 	nfs_diskless_valid = 3;
271 }
272 
273 /*
274  * nfs statfs call
275  */
276 static int
277 nfs_statfs(struct mount *mp, struct statfs *sbp)
278 {
279 	struct vnode *vp;
280 	struct thread *td;
281 	struct nfsmount *nmp = VFSTONFS(mp);
282 	struct nfsvattr nfsva;
283 	struct nfsfsinfo fs;
284 	struct nfsstatfs sb;
285 	int error = 0, attrflag, gotfsinfo = 0, ret;
286 	struct nfsnode *np;
287 
288 	td = curthread;
289 
290 	error = vfs_busy(mp, MBF_NOWAIT);
291 	if (error)
292 		return (error);
293 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
294 	if (error) {
295 		vfs_unbusy(mp);
296 		return (error);
297 	}
298 	vp = NFSTOV(np);
299 	mtx_lock(&nmp->nm_mtx);
300 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
301 		mtx_unlock(&nmp->nm_mtx);
302 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
303 		    &attrflag, NULL);
304 		if (!error)
305 			gotfsinfo = 1;
306 	} else
307 		mtx_unlock(&nmp->nm_mtx);
308 	if (!error)
309 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
310 		    &attrflag, NULL);
311 	if (error != 0)
312 		NFSCL_DEBUG(2, "statfs=%d\n", error);
313 	if (attrflag == 0) {
314 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
315 		    td->td_ucred, td, &nfsva, NULL, NULL);
316 		if (ret) {
317 			/*
318 			 * Just set default values to get things going.
319 			 */
320 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
321 			nfsva.na_vattr.va_type = VDIR;
322 			nfsva.na_vattr.va_mode = 0777;
323 			nfsva.na_vattr.va_nlink = 100;
324 			nfsva.na_vattr.va_uid = (uid_t)0;
325 			nfsva.na_vattr.va_gid = (gid_t)0;
326 			nfsva.na_vattr.va_fileid = 2;
327 			nfsva.na_vattr.va_gen = 1;
328 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
329 			nfsva.na_vattr.va_size = 512 * 1024;
330 		}
331 	}
332 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
333 	if (!error) {
334 	    mtx_lock(&nmp->nm_mtx);
335 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
336 		nfscl_loadfsinfo(nmp, &fs);
337 	    nfscl_loadsbinfo(nmp, &sb, sbp);
338 	    sbp->f_iosize = newnfs_iosize(nmp);
339 	    mtx_unlock(&nmp->nm_mtx);
340 	    if (sbp != &mp->mnt_stat) {
341 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
342 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
343 	    }
344 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
345 	} else if (NFS_ISV4(vp)) {
346 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
347 	}
348 	vput(vp);
349 	vfs_unbusy(mp);
350 	return (error);
351 }
352 
353 /*
354  * nfs version 3 fsinfo rpc call
355  */
356 int
357 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
358     struct thread *td)
359 {
360 	struct nfsfsinfo fs;
361 	struct nfsvattr nfsva;
362 	int error, attrflag;
363 
364 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
365 	if (!error) {
366 		if (attrflag)
367 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
368 			    1);
369 		mtx_lock(&nmp->nm_mtx);
370 		nfscl_loadfsinfo(nmp, &fs);
371 		mtx_unlock(&nmp->nm_mtx);
372 	}
373 	return (error);
374 }
375 
376 /*
377  * Mount a remote root fs via. nfs. This depends on the info in the
378  * nfs_diskless structure that has been filled in properly by some primary
379  * bootstrap.
380  * It goes something like this:
381  * - do enough of "ifconfig" by calling ifioctl() so that the system
382  *   can talk to the server
383  * - If nfs_diskless.mygateway is filled in, use that address as
384  *   a default gateway.
385  * - build the rootfs mount point and call mountnfs() to do the rest.
386  *
387  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
388  * structure, as well as other global NFS client variables here, as
389  * nfs_mountroot() will be called once in the boot before any other NFS
390  * client activity occurs.
391  */
392 static int
393 nfs_mountroot(struct mount *mp)
394 {
395 	struct thread *td = curthread;
396 	struct nfsv3_diskless *nd = &nfsv3_diskless;
397 	struct socket *so;
398 	struct vnode *vp;
399 	struct ifreq ir;
400 	int error;
401 	u_long l;
402 	char buf[128];
403 	char *cp;
404 
405 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
406 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
407 #elif defined(NFS_ROOT)
408 	nfs_setup_diskless();
409 #endif
410 
411 	if (nfs_diskless_valid == 0)
412 		return (-1);
413 	if (nfs_diskless_valid == 1)
414 		nfs_convert_diskless();
415 
416 	/*
417 	 * XXX splnet, so networks will receive...
418 	 */
419 	splnet();
420 
421 	/*
422 	 * Do enough of ifconfig(8) so that the critical net interface can
423 	 * talk to the server.
424 	 */
425 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
426 	    td->td_ucred, td);
427 	if (error)
428 		panic("nfs_mountroot: socreate(%04x): %d",
429 			nd->myif.ifra_addr.sa_family, error);
430 
431 #if 0 /* XXX Bad idea */
432 	/*
433 	 * We might not have been told the right interface, so we pass
434 	 * over the first ten interfaces of the same kind, until we get
435 	 * one of them configured.
436 	 */
437 
438 	for (i = strlen(nd->myif.ifra_name) - 1;
439 		nd->myif.ifra_name[i] >= '0' &&
440 		nd->myif.ifra_name[i] <= '9';
441 		nd->myif.ifra_name[i] ++) {
442 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443 		if(!error)
444 			break;
445 	}
446 #endif
447 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
448 	if (error)
449 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
450 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
451 		ir.ifr_mtu = strtol(cp, NULL, 10);
452 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
453 		freeenv(cp);
454 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
455 		if (error)
456 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
457 	}
458 	soclose(so);
459 
460 	/*
461 	 * If the gateway field is filled in, set it as the default route.
462 	 * Note that pxeboot will set a default route of 0 if the route
463 	 * is not set by the DHCP server.  Check also for a value of 0
464 	 * to avoid panicking inappropriately in that situation.
465 	 */
466 	if (nd->mygateway.sin_len != 0 &&
467 	    nd->mygateway.sin_addr.s_addr != 0) {
468 		struct sockaddr_in mask, sin;
469 
470 		bzero((caddr_t)&mask, sizeof(mask));
471 		sin = mask;
472 		sin.sin_family = AF_INET;
473 		sin.sin_len = sizeof(sin);
474                 /* XXX MRT use table 0 for this sort of thing */
475 		CURVNET_SET(TD_TO_VNET(td));
476 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
477 		    (struct sockaddr *)&nd->mygateway,
478 		    (struct sockaddr *)&mask,
479 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
480 		CURVNET_RESTORE();
481 		if (error)
482 			panic("nfs_mountroot: RTM_ADD: %d", error);
483 	}
484 
485 	/*
486 	 * Create the rootfs mount point.
487 	 */
488 	nd->root_args.fh = nd->root_fh;
489 	nd->root_args.fhsize = nd->root_fhsize;
490 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
491 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
492 		(l >> 24) & 0xff, (l >> 16) & 0xff,
493 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
494 	printf("NFS ROOT: %s\n", buf);
495 	nd->root_args.hostname = buf;
496 	if ((error = nfs_mountdiskless(buf,
497 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
498 		return (error);
499 	}
500 
501 	/*
502 	 * This is not really an nfs issue, but it is much easier to
503 	 * set hostname here and then let the "/etc/rc.xxx" files
504 	 * mount the right /var based upon its preset value.
505 	 */
506 	mtx_lock(&prison0.pr_mtx);
507 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
508 	    sizeof(prison0.pr_hostname));
509 	mtx_unlock(&prison0.pr_mtx);
510 	inittodr(ntohl(nd->root_time));
511 	return (0);
512 }
513 
514 /*
515  * Internal version of mount system call for diskless setup.
516  */
517 static int
518 nfs_mountdiskless(char *path,
519     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
520     struct vnode **vpp, struct mount *mp)
521 {
522 	struct sockaddr *nam;
523 	int dirlen, error;
524 	char *dirpath;
525 
526 	/*
527 	 * Find the directory path in "path", which also has the server's
528 	 * name/ip address in it.
529 	 */
530 	dirpath = strchr(path, ':');
531 	if (dirpath != NULL)
532 		dirlen = strlen(++dirpath);
533 	else
534 		dirlen = 0;
535 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
536 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
537 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
538 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
539 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
540 		return (error);
541 	}
542 	return (0);
543 }
544 
545 static void
546 nfs_sec_name(char *sec, int *flagsp)
547 {
548 	if (!strcmp(sec, "krb5"))
549 		*flagsp |= NFSMNT_KERB;
550 	else if (!strcmp(sec, "krb5i"))
551 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
552 	else if (!strcmp(sec, "krb5p"))
553 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
554 }
555 
556 static void
557 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
558     const char *hostname, struct ucred *cred, struct thread *td)
559 {
560 	int s;
561 	int adjsock;
562 	char *p;
563 
564 	s = splnet();
565 
566 	/*
567 	 * Set read-only flag if requested; otherwise, clear it if this is
568 	 * an update.  If this is not an update, then either the read-only
569 	 * flag is already clear, or this is a root mount and it was set
570 	 * intentionally at some previous point.
571 	 */
572 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
573 		MNT_ILOCK(mp);
574 		mp->mnt_flag |= MNT_RDONLY;
575 		MNT_IUNLOCK(mp);
576 	} else if (mp->mnt_flag & MNT_UPDATE) {
577 		MNT_ILOCK(mp);
578 		mp->mnt_flag &= ~MNT_RDONLY;
579 		MNT_IUNLOCK(mp);
580 	}
581 
582 	/*
583 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
584 	 * no sense in that context.  Also, set up appropriate retransmit
585 	 * and soft timeout behavior.
586 	 */
587 	if (argp->sotype == SOCK_STREAM) {
588 		nmp->nm_flag &= ~NFSMNT_NOCONN;
589 		nmp->nm_timeo = NFS_MAXTIMEO;
590 		if ((argp->flags & NFSMNT_NFSV4) != 0)
591 			nmp->nm_retry = INT_MAX;
592 		else
593 			nmp->nm_retry = NFS_RETRANS_TCP;
594 	}
595 
596 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
597 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
598 		argp->flags &= ~NFSMNT_RDIRPLUS;
599 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
600 	}
601 
602 	/* Re-bind if rsrvd port requested and wasn't on one */
603 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
604 		  && (argp->flags & NFSMNT_RESVPORT);
605 	/* Also re-bind if we're switching to/from a connected UDP socket */
606 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
607 		    (argp->flags & NFSMNT_NOCONN));
608 
609 	/* Update flags atomically.  Don't change the lock bits. */
610 	nmp->nm_flag = argp->flags | nmp->nm_flag;
611 	splx(s);
612 
613 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
614 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
615 		if (nmp->nm_timeo < NFS_MINTIMEO)
616 			nmp->nm_timeo = NFS_MINTIMEO;
617 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
618 			nmp->nm_timeo = NFS_MAXTIMEO;
619 	}
620 
621 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
622 		nmp->nm_retry = argp->retrans;
623 		if (nmp->nm_retry > NFS_MAXREXMIT)
624 			nmp->nm_retry = NFS_MAXREXMIT;
625 	}
626 
627 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
628 		nmp->nm_wsize = argp->wsize;
629 		/*
630 		 * Clip at the power of 2 below the size. There is an
631 		 * issue (not isolated) that causes intermittent page
632 		 * faults if this is not done.
633 		 */
634 		if (nmp->nm_wsize > NFS_FABLKSIZE)
635 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
636 		else
637 			nmp->nm_wsize = NFS_FABLKSIZE;
638 	}
639 
640 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
641 		nmp->nm_rsize = argp->rsize;
642 		/*
643 		 * Clip at the power of 2 below the size. There is an
644 		 * issue (not isolated) that causes intermittent page
645 		 * faults if this is not done.
646 		 */
647 		if (nmp->nm_rsize > NFS_FABLKSIZE)
648 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
649 		else
650 			nmp->nm_rsize = NFS_FABLKSIZE;
651 	}
652 
653 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
654 		nmp->nm_readdirsize = argp->readdirsize;
655 	}
656 
657 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
658 		nmp->nm_acregmin = argp->acregmin;
659 	else
660 		nmp->nm_acregmin = NFS_MINATTRTIMO;
661 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
662 		nmp->nm_acregmax = argp->acregmax;
663 	else
664 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
665 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
666 		nmp->nm_acdirmin = argp->acdirmin;
667 	else
668 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
669 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
670 		nmp->nm_acdirmax = argp->acdirmax;
671 	else
672 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
673 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
674 		nmp->nm_acdirmin = nmp->nm_acdirmax;
675 	if (nmp->nm_acregmin > nmp->nm_acregmax)
676 		nmp->nm_acregmin = nmp->nm_acregmax;
677 
678 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
679 		if (argp->readahead <= NFS_MAXRAHEAD)
680 			nmp->nm_readahead = argp->readahead;
681 		else
682 			nmp->nm_readahead = NFS_MAXRAHEAD;
683 	}
684 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
685 		if (argp->wcommitsize < nmp->nm_wsize)
686 			nmp->nm_wcommitsize = nmp->nm_wsize;
687 		else
688 			nmp->nm_wcommitsize = argp->wcommitsize;
689 	}
690 
691 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
692 		    (nmp->nm_soproto != argp->proto));
693 
694 	if (nmp->nm_client != NULL && adjsock) {
695 		int haslock = 0, error = 0;
696 
697 		if (nmp->nm_sotype == SOCK_STREAM) {
698 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
699 			if (!error)
700 				haslock = 1;
701 		}
702 		if (!error) {
703 		    newnfs_disconnect(&nmp->nm_sockreq);
704 		    if (haslock)
705 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
706 		    nmp->nm_sotype = argp->sotype;
707 		    nmp->nm_soproto = argp->proto;
708 		    if (nmp->nm_sotype == SOCK_DGRAM)
709 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
710 			    cred, td, 0)) {
711 				printf("newnfs_args: retrying connect\n");
712 				(void) nfs_catnap(PSOCK, 0, "nfscon");
713 			}
714 		}
715 	} else {
716 		nmp->nm_sotype = argp->sotype;
717 		nmp->nm_soproto = argp->proto;
718 	}
719 
720 	if (hostname != NULL) {
721 		strlcpy(nmp->nm_hostname, hostname,
722 		    sizeof(nmp->nm_hostname));
723 		p = strchr(nmp->nm_hostname, ':');
724 		if (p != NULL)
725 			*p = '\0';
726 	}
727 }
728 
729 static const char *nfs_opts[] = { "from", "nfs_args",
730     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
731     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
732     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
733     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
734     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
735     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
736     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
737     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
738     "pnfs", "wcommitsize",
739     NULL };
740 
741 /*
742  * VFS Operations.
743  *
744  * mount system call
745  * It seems a bit dumb to copyinstr() the host and path here and then
746  * bcopy() them in mountnfs(), but I wanted to detect errors before
747  * doing the sockargs() call because sockargs() allocates an mbuf and
748  * an error after that means that I have to release the mbuf.
749  */
750 /* ARGSUSED */
751 static int
752 nfs_mount(struct mount *mp)
753 {
754 	struct nfs_args args = {
755 	    .version = NFS_ARGSVERSION,
756 	    .addr = NULL,
757 	    .addrlen = sizeof (struct sockaddr_in),
758 	    .sotype = SOCK_STREAM,
759 	    .proto = 0,
760 	    .fh = NULL,
761 	    .fhsize = 0,
762 	    .flags = NFSMNT_RESVPORT,
763 	    .wsize = NFS_WSIZE,
764 	    .rsize = NFS_RSIZE,
765 	    .readdirsize = NFS_READDIRSIZE,
766 	    .timeo = 10,
767 	    .retrans = NFS_RETRANS,
768 	    .readahead = NFS_DEFRAHEAD,
769 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
770 	    .hostname = NULL,
771 	    .acregmin = NFS_MINATTRTIMO,
772 	    .acregmax = NFS_MAXATTRTIMO,
773 	    .acdirmin = NFS_MINDIRATTRTIMO,
774 	    .acdirmax = NFS_MAXDIRATTRTIMO,
775 	};
776 	int error = 0, ret, len;
777 	struct sockaddr *nam = NULL;
778 	struct vnode *vp;
779 	struct thread *td;
780 	char hst[MNAMELEN];
781 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
782 	char *opt, *name, *secname;
783 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
784 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
785 	int minvers = 0;
786 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
787 	size_t hstlen;
788 
789 	has_nfs_args_opt = 0;
790 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
791 		error = EINVAL;
792 		goto out;
793 	}
794 
795 	td = curthread;
796 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
797 		error = nfs_mountroot(mp);
798 		goto out;
799 	}
800 
801 	nfscl_init();
802 
803 	/*
804 	 * The old mount_nfs program passed the struct nfs_args
805 	 * from userspace to kernel.  The new mount_nfs program
806 	 * passes string options via nmount() from userspace to kernel
807 	 * and we populate the struct nfs_args in the kernel.
808 	 */
809 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
810 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
811 		    sizeof(args));
812 		if (error != 0)
813 			goto out;
814 
815 		if (args.version != NFS_ARGSVERSION) {
816 			error = EPROGMISMATCH;
817 			goto out;
818 		}
819 		has_nfs_args_opt = 1;
820 	}
821 
822 	/* Handle the new style options. */
823 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
824 		args.acdirmin = args.acdirmax =
825 		    args.acregmin = args.acregmax = 0;
826 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
827 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
828 	}
829 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
830 		args.flags |= NFSMNT_NOCONN;
831 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
832 		args.flags &= ~NFSMNT_NOCONN;
833 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
834 		args.flags |= NFSMNT_NOLOCKD;
835 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
836 		args.flags &= ~NFSMNT_NOLOCKD;
837 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
838 		args.flags |= NFSMNT_INT;
839 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
840 		args.flags |= NFSMNT_RDIRPLUS;
841 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
842 		args.flags |= NFSMNT_RESVPORT;
843 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
844 		args.flags &= ~NFSMNT_RESVPORT;
845 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
846 		args.flags |= NFSMNT_SOFT;
847 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
848 		args.flags &= ~NFSMNT_SOFT;
849 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
850 		args.sotype = SOCK_DGRAM;
851 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
852 		args.sotype = SOCK_DGRAM;
853 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
854 		args.sotype = SOCK_STREAM;
855 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
856 		args.flags |= NFSMNT_NFSV3;
857 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
858 		args.flags |= NFSMNT_NFSV4;
859 		args.sotype = SOCK_STREAM;
860 	}
861 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
862 		args.flags |= NFSMNT_ALLGSSNAME;
863 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
864 		args.flags |= NFSMNT_NOCTO;
865 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
866 		args.flags |= NFSMNT_NONCONTIGWR;
867 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
868 		args.flags |= NFSMNT_PNFS;
869 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
870 		if (opt == NULL) {
871 			vfs_mount_error(mp, "illegal readdirsize");
872 			error = EINVAL;
873 			goto out;
874 		}
875 		ret = sscanf(opt, "%d", &args.readdirsize);
876 		if (ret != 1 || args.readdirsize <= 0) {
877 			vfs_mount_error(mp, "illegal readdirsize: %s",
878 			    opt);
879 			error = EINVAL;
880 			goto out;
881 		}
882 		args.flags |= NFSMNT_READDIRSIZE;
883 	}
884 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
885 		if (opt == NULL) {
886 			vfs_mount_error(mp, "illegal readahead");
887 			error = EINVAL;
888 			goto out;
889 		}
890 		ret = sscanf(opt, "%d", &args.readahead);
891 		if (ret != 1 || args.readahead <= 0) {
892 			vfs_mount_error(mp, "illegal readahead: %s",
893 			    opt);
894 			error = EINVAL;
895 			goto out;
896 		}
897 		args.flags |= NFSMNT_READAHEAD;
898 	}
899 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
900 		if (opt == NULL) {
901 			vfs_mount_error(mp, "illegal wsize");
902 			error = EINVAL;
903 			goto out;
904 		}
905 		ret = sscanf(opt, "%d", &args.wsize);
906 		if (ret != 1 || args.wsize <= 0) {
907 			vfs_mount_error(mp, "illegal wsize: %s",
908 			    opt);
909 			error = EINVAL;
910 			goto out;
911 		}
912 		args.flags |= NFSMNT_WSIZE;
913 	}
914 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
915 		if (opt == NULL) {
916 			vfs_mount_error(mp, "illegal rsize");
917 			error = EINVAL;
918 			goto out;
919 		}
920 		ret = sscanf(opt, "%d", &args.rsize);
921 		if (ret != 1 || args.rsize <= 0) {
922 			vfs_mount_error(mp, "illegal wsize: %s",
923 			    opt);
924 			error = EINVAL;
925 			goto out;
926 		}
927 		args.flags |= NFSMNT_RSIZE;
928 	}
929 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
930 		if (opt == NULL) {
931 			vfs_mount_error(mp, "illegal retrans");
932 			error = EINVAL;
933 			goto out;
934 		}
935 		ret = sscanf(opt, "%d", &args.retrans);
936 		if (ret != 1 || args.retrans <= 0) {
937 			vfs_mount_error(mp, "illegal retrans: %s",
938 			    opt);
939 			error = EINVAL;
940 			goto out;
941 		}
942 		args.flags |= NFSMNT_RETRANS;
943 	}
944 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
945 		ret = sscanf(opt, "%d", &args.acregmin);
946 		if (ret != 1 || args.acregmin < 0) {
947 			vfs_mount_error(mp, "illegal actimeo: %s",
948 			    opt);
949 			error = EINVAL;
950 			goto out;
951 		}
952 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
953 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
954 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
955 	}
956 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
957 		ret = sscanf(opt, "%d", &args.acregmin);
958 		if (ret != 1 || args.acregmin < 0) {
959 			vfs_mount_error(mp, "illegal acregmin: %s",
960 			    opt);
961 			error = EINVAL;
962 			goto out;
963 		}
964 		args.flags |= NFSMNT_ACREGMIN;
965 	}
966 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
967 		ret = sscanf(opt, "%d", &args.acregmax);
968 		if (ret != 1 || args.acregmax < 0) {
969 			vfs_mount_error(mp, "illegal acregmax: %s",
970 			    opt);
971 			error = EINVAL;
972 			goto out;
973 		}
974 		args.flags |= NFSMNT_ACREGMAX;
975 	}
976 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
977 		ret = sscanf(opt, "%d", &args.acdirmin);
978 		if (ret != 1 || args.acdirmin < 0) {
979 			vfs_mount_error(mp, "illegal acdirmin: %s",
980 			    opt);
981 			error = EINVAL;
982 			goto out;
983 		}
984 		args.flags |= NFSMNT_ACDIRMIN;
985 	}
986 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
987 		ret = sscanf(opt, "%d", &args.acdirmax);
988 		if (ret != 1 || args.acdirmax < 0) {
989 			vfs_mount_error(mp, "illegal acdirmax: %s",
990 			    opt);
991 			error = EINVAL;
992 			goto out;
993 		}
994 		args.flags |= NFSMNT_ACDIRMAX;
995 	}
996 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
997 		ret = sscanf(opt, "%d", &args.wcommitsize);
998 		if (ret != 1 || args.wcommitsize < 0) {
999 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1000 			error = EINVAL;
1001 			goto out;
1002 		}
1003 		args.flags |= NFSMNT_WCOMMITSIZE;
1004 	}
1005 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1006 		ret = sscanf(opt, "%d", &args.timeo);
1007 		if (ret != 1 || args.timeo <= 0) {
1008 			vfs_mount_error(mp, "illegal timeo: %s",
1009 			    opt);
1010 			error = EINVAL;
1011 			goto out;
1012 		}
1013 		args.flags |= NFSMNT_TIMEO;
1014 	}
1015 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1016 		ret = sscanf(opt, "%d", &args.timeo);
1017 		if (ret != 1 || args.timeo <= 0) {
1018 			vfs_mount_error(mp, "illegal timeout: %s",
1019 			    opt);
1020 			error = EINVAL;
1021 			goto out;
1022 		}
1023 		args.flags |= NFSMNT_TIMEO;
1024 	}
1025 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1026 		ret = sscanf(opt, "%d", &nametimeo);
1027 		if (ret != 1 || nametimeo < 0) {
1028 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1029 			error = EINVAL;
1030 			goto out;
1031 		}
1032 	}
1033 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1034 	    == 0) {
1035 		ret = sscanf(opt, "%d", &negnametimeo);
1036 		if (ret != 1 || negnametimeo < 0) {
1037 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1038 			    opt);
1039 			error = EINVAL;
1040 			goto out;
1041 		}
1042 	}
1043 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1044 	    0) {
1045 		ret = sscanf(opt, "%d", &minvers);
1046 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1047 		    (args.flags & NFSMNT_NFSV4) == 0) {
1048 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1049 			error = EINVAL;
1050 			goto out;
1051 		}
1052 	}
1053 	if (vfs_getopt(mp->mnt_optnew, "sec",
1054 		(void **) &secname, NULL) == 0)
1055 		nfs_sec_name(secname, &args.flags);
1056 
1057 	if (mp->mnt_flag & MNT_UPDATE) {
1058 		struct nfsmount *nmp = VFSTONFS(mp);
1059 
1060 		if (nmp == NULL) {
1061 			error = EIO;
1062 			goto out;
1063 		}
1064 
1065 		/*
1066 		 * If a change from TCP->UDP is done and there are thread(s)
1067 		 * that have I/O RPC(s) in progress with a tranfer size
1068 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1069 		 * hung, retrying the RPC(s) forever. Usually these threads
1070 		 * will be seen doing an uninterruptible sleep on wait channel
1071 		 * "nfsreq".
1072 		 */
1073 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1074 			tprintf(td->td_proc, LOG_WARNING,
1075 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1076 
1077 		/*
1078 		 * When doing an update, we can't change version,
1079 		 * security, switch lockd strategies or change cookie
1080 		 * translation
1081 		 */
1082 		args.flags = (args.flags &
1083 		    ~(NFSMNT_NFSV3 |
1084 		      NFSMNT_NFSV4 |
1085 		      NFSMNT_KERB |
1086 		      NFSMNT_INTEGRITY |
1087 		      NFSMNT_PRIVACY |
1088 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1089 		    (nmp->nm_flag &
1090 			(NFSMNT_NFSV3 |
1091 			 NFSMNT_NFSV4 |
1092 			 NFSMNT_KERB |
1093 			 NFSMNT_INTEGRITY |
1094 			 NFSMNT_PRIVACY |
1095 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1096 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1097 		goto out;
1098 	}
1099 
1100 	/*
1101 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1102 	 * or no-connection mode for those protocols that support
1103 	 * no-connection mode (the flag will be cleared later for protocols
1104 	 * that do not support no-connection mode).  This will allow a client
1105 	 * to receive replies from a different IP then the request was
1106 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1107 	 * not 0.
1108 	 */
1109 	if (nfs_ip_paranoia == 0)
1110 		args.flags |= NFSMNT_NOCONN;
1111 
1112 	if (has_nfs_args_opt != 0) {
1113 		/*
1114 		 * In the 'nfs_args' case, the pointers in the args
1115 		 * structure are in userland - we copy them in here.
1116 		 */
1117 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1118 			vfs_mount_error(mp, "Bad file handle");
1119 			error = EINVAL;
1120 			goto out;
1121 		}
1122 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1123 		    args.fhsize);
1124 		if (error != 0)
1125 			goto out;
1126 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1127 		if (error != 0)
1128 			goto out;
1129 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1130 		args.hostname = hst;
1131 		/* sockargs() call must be after above copyin() calls */
1132 		error = getsockaddr(&nam, (caddr_t)args.addr,
1133 		    args.addrlen);
1134 		if (error != 0)
1135 			goto out;
1136 	} else {
1137 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1138 		    &args.fhsize) == 0) {
1139 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1140 				vfs_mount_error(mp, "Bad file handle");
1141 				error = EINVAL;
1142 				goto out;
1143 			}
1144 			bcopy(args.fh, nfh, args.fhsize);
1145 		} else {
1146 			args.fhsize = 0;
1147 		}
1148 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1149 		    (void **)&args.hostname, &len);
1150 		if (args.hostname == NULL) {
1151 			vfs_mount_error(mp, "Invalid hostname");
1152 			error = EINVAL;
1153 			goto out;
1154 		}
1155 		bcopy(args.hostname, hst, MNAMELEN);
1156 		hst[MNAMELEN - 1] = '\0';
1157 	}
1158 
1159 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1160 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1161 	else
1162 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1163 	srvkrbnamelen = strlen(srvkrbname);
1164 
1165 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1166 		strlcpy(krbname, name, sizeof (krbname));
1167 	else
1168 		krbname[0] = '\0';
1169 	krbnamelen = strlen(krbname);
1170 
1171 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1172 		strlcpy(dirpath, name, sizeof (dirpath));
1173 	else
1174 		dirpath[0] = '\0';
1175 	dirlen = strlen(dirpath);
1176 
1177 	if (has_nfs_args_opt == 0) {
1178 		if (vfs_getopt(mp->mnt_optnew, "addr",
1179 		    (void **)&args.addr, &args.addrlen) == 0) {
1180 			if (args.addrlen > SOCK_MAXADDRLEN) {
1181 				error = ENAMETOOLONG;
1182 				goto out;
1183 			}
1184 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1185 			bcopy(args.addr, nam, args.addrlen);
1186 			nam->sa_len = args.addrlen;
1187 		} else {
1188 			vfs_mount_error(mp, "No server address");
1189 			error = EINVAL;
1190 			goto out;
1191 		}
1192 	}
1193 
1194 	args.fh = nfh;
1195 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1196 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1197 	    nametimeo, negnametimeo, minvers);
1198 out:
1199 	if (!error) {
1200 		MNT_ILOCK(mp);
1201 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1202 		    MNTK_USES_BCACHE;
1203 		MNT_IUNLOCK(mp);
1204 	}
1205 	return (error);
1206 }
1207 
1208 
1209 /*
1210  * VFS Operations.
1211  *
1212  * mount system call
1213  * It seems a bit dumb to copyinstr() the host and path here and then
1214  * bcopy() them in mountnfs(), but I wanted to detect errors before
1215  * doing the sockargs() call because sockargs() allocates an mbuf and
1216  * an error after that means that I have to release the mbuf.
1217  */
1218 /* ARGSUSED */
1219 static int
1220 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1221 {
1222 	int error;
1223 	struct nfs_args args;
1224 
1225 	error = copyin(data, &args, sizeof (struct nfs_args));
1226 	if (error)
1227 		return error;
1228 
1229 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1230 
1231 	error = kernel_mount(ma, flags);
1232 	return (error);
1233 }
1234 
1235 /*
1236  * Common code for mount and mountroot
1237  */
1238 static int
1239 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1240     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1241     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1242     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1243     int minvers)
1244 {
1245 	struct nfsmount *nmp;
1246 	struct nfsnode *np;
1247 	int error, trycnt, ret;
1248 	struct nfsvattr nfsva;
1249 	struct nfsclclient *clp;
1250 	struct nfsclds *dsp, *tdsp;
1251 	uint32_t lease;
1252 	static u_int64_t clval = 0;
1253 
1254 	NFSCL_DEBUG(3, "in mnt\n");
1255 	clp = NULL;
1256 	if (mp->mnt_flag & MNT_UPDATE) {
1257 		nmp = VFSTONFS(mp);
1258 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1259 		FREE(nam, M_SONAME);
1260 		return (0);
1261 	} else {
1262 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1263 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1264 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1265 		TAILQ_INIT(&nmp->nm_bufq);
1266 		if (clval == 0)
1267 			clval = (u_int64_t)nfsboottime.tv_sec;
1268 		nmp->nm_clval = clval++;
1269 		nmp->nm_krbnamelen = krbnamelen;
1270 		nmp->nm_dirpathlen = dirlen;
1271 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1272 		if (td->td_ucred->cr_uid != (uid_t)0) {
1273 			/*
1274 			 * nm_uid is used to get KerberosV credentials for
1275 			 * the nfsv4 state handling operations if there is
1276 			 * no host based principal set. Use the uid of
1277 			 * this user if not root, since they are doing the
1278 			 * mount. I don't think setting this for root will
1279 			 * work, since root normally does not have user
1280 			 * credentials in a credentials cache.
1281 			 */
1282 			nmp->nm_uid = td->td_ucred->cr_uid;
1283 		} else {
1284 			/*
1285 			 * Just set to -1, so it won't be used.
1286 			 */
1287 			nmp->nm_uid = (uid_t)-1;
1288 		}
1289 
1290 		/* Copy and null terminate all the names */
1291 		if (nmp->nm_krbnamelen > 0) {
1292 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1293 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1294 		}
1295 		if (nmp->nm_dirpathlen > 0) {
1296 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1297 			    nmp->nm_dirpathlen);
1298 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1299 			    + 1] = '\0';
1300 		}
1301 		if (nmp->nm_srvkrbnamelen > 0) {
1302 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1303 			    nmp->nm_srvkrbnamelen);
1304 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1305 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1306 		}
1307 		nmp->nm_sockreq.nr_cred = crhold(cred);
1308 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1309 		mp->mnt_data = nmp;
1310 		nmp->nm_getinfo = nfs_getnlminfo;
1311 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1312 	}
1313 	vfs_getnewfsid(mp);
1314 	nmp->nm_mountp = mp;
1315 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1316 
1317 	/*
1318 	 * Since nfs_decode_args() might optionally set them, these
1319 	 * need to be set to defaults before the call, so that the
1320 	 * optional settings aren't overwritten.
1321 	 */
1322 	nmp->nm_nametimeo = nametimeo;
1323 	nmp->nm_negnametimeo = negnametimeo;
1324 	nmp->nm_timeo = NFS_TIMEO;
1325 	nmp->nm_retry = NFS_RETRANS;
1326 	nmp->nm_readahead = NFS_DEFRAHEAD;
1327 
1328 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1329 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1330 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1331 		nmp->nm_wcommitsize *= 2;
1332 	nmp->nm_wcommitsize *= 256;
1333 
1334 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1335 		nmp->nm_minorvers = minvers;
1336 	else
1337 		nmp->nm_minorvers = 0;
1338 
1339 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1340 
1341 	/*
1342 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1343 	 * high, depending on whether we end up with negative offsets in
1344 	 * the client or server somewhere.  2GB-1 may be safer.
1345 	 *
1346 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1347 	 * that we can handle until we find out otherwise.
1348 	 */
1349 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1350 		nmp->nm_maxfilesize = 0xffffffffLL;
1351 	else
1352 		nmp->nm_maxfilesize = OFF_MAX;
1353 
1354 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1355 		nmp->nm_wsize = NFS_WSIZE;
1356 		nmp->nm_rsize = NFS_RSIZE;
1357 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1358 	}
1359 	nmp->nm_numgrps = NFS_MAXGRPS;
1360 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1361 	if (nmp->nm_tprintf_delay < 0)
1362 		nmp->nm_tprintf_delay = 0;
1363 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1364 	if (nmp->nm_tprintf_initial_delay < 0)
1365 		nmp->nm_tprintf_initial_delay = 0;
1366 	nmp->nm_fhsize = argp->fhsize;
1367 	if (nmp->nm_fhsize > 0)
1368 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1369 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1370 	nmp->nm_nam = nam;
1371 	/* Set up the sockets and per-host congestion */
1372 	nmp->nm_sotype = argp->sotype;
1373 	nmp->nm_soproto = argp->proto;
1374 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1375 	if ((argp->flags & NFSMNT_NFSV4))
1376 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1377 	else if ((argp->flags & NFSMNT_NFSV3))
1378 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1379 	else
1380 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1381 
1382 
1383 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1384 		goto bad;
1385 	/* For NFSv4.1, get the clientid now. */
1386 	if (nmp->nm_minorvers > 0) {
1387 		NFSCL_DEBUG(3, "at getcl\n");
1388 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1389 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1390 		if (error != 0)
1391 			goto bad;
1392 	}
1393 
1394 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1395 	    nmp->nm_dirpathlen > 0) {
1396 		NFSCL_DEBUG(3, "in dirp\n");
1397 		/*
1398 		 * If the fhsize on the mount point == 0 for V4, the mount
1399 		 * path needs to be looked up.
1400 		 */
1401 		trycnt = 3;
1402 		do {
1403 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1404 			    cred, td);
1405 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1406 			if (error)
1407 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1408 		} while (error && --trycnt > 0);
1409 		if (error) {
1410 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1411 			goto bad;
1412 		}
1413 	}
1414 
1415 	/*
1416 	 * A reference count is needed on the nfsnode representing the
1417 	 * remote root.  If this object is not persistent, then backward
1418 	 * traversals of the mount point (i.e. "..") will not work if
1419 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1420 	 * this problem, because one can identify root inodes by their
1421 	 * number == ROOTINO (2).
1422 	 */
1423 	if (nmp->nm_fhsize > 0) {
1424 		/*
1425 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1426 		 * non-zero for the root vnode. f_iosize will be set correctly
1427 		 * by nfs_statfs() before any I/O occurs.
1428 		 */
1429 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1430 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1431 		    LK_EXCLUSIVE);
1432 		if (error)
1433 			goto bad;
1434 		*vpp = NFSTOV(np);
1435 
1436 		/*
1437 		 * Get file attributes and transfer parameters for the
1438 		 * mountpoint.  This has the side effect of filling in
1439 		 * (*vpp)->v_type with the correct value.
1440 		 */
1441 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1442 		    cred, td, &nfsva, NULL, &lease);
1443 		if (ret) {
1444 			/*
1445 			 * Just set default values to get things going.
1446 			 */
1447 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1448 			nfsva.na_vattr.va_type = VDIR;
1449 			nfsva.na_vattr.va_mode = 0777;
1450 			nfsva.na_vattr.va_nlink = 100;
1451 			nfsva.na_vattr.va_uid = (uid_t)0;
1452 			nfsva.na_vattr.va_gid = (gid_t)0;
1453 			nfsva.na_vattr.va_fileid = 2;
1454 			nfsva.na_vattr.va_gen = 1;
1455 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1456 			nfsva.na_vattr.va_size = 512 * 1024;
1457 			lease = 60;
1458 		}
1459 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1460 		if (nmp->nm_minorvers > 0) {
1461 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1462 			NFSLOCKCLSTATE();
1463 			clp->nfsc_renew = NFSCL_RENEW(lease);
1464 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1465 			clp->nfsc_clientidrev++;
1466 			if (clp->nfsc_clientidrev == 0)
1467 				clp->nfsc_clientidrev++;
1468 			NFSUNLOCKCLSTATE();
1469 			/*
1470 			 * Mount will succeed, so the renew thread can be
1471 			 * started now.
1472 			 */
1473 			nfscl_start_renewthread(clp);
1474 			nfscl_clientrelease(clp);
1475 		}
1476 		if (argp->flags & NFSMNT_NFSV3)
1477 			ncl_fsinfo(nmp, *vpp, cred, td);
1478 
1479 		/* Mark if the mount point supports NFSv4 ACLs. */
1480 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1481 		    ret == 0 &&
1482 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1483 			MNT_ILOCK(mp);
1484 			mp->mnt_flag |= MNT_NFS4ACLS;
1485 			MNT_IUNLOCK(mp);
1486 		}
1487 
1488 		/*
1489 		 * Lose the lock but keep the ref.
1490 		 */
1491 		NFSVOPUNLOCK(*vpp, 0);
1492 		return (0);
1493 	}
1494 	error = EIO;
1495 
1496 bad:
1497 	if (clp != NULL)
1498 		nfscl_clientrelease(clp);
1499 	newnfs_disconnect(&nmp->nm_sockreq);
1500 	crfree(nmp->nm_sockreq.nr_cred);
1501 	if (nmp->nm_sockreq.nr_auth != NULL)
1502 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1503 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1504 	mtx_destroy(&nmp->nm_mtx);
1505 	if (nmp->nm_clp != NULL) {
1506 		NFSLOCKCLSTATE();
1507 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1508 		NFSUNLOCKCLSTATE();
1509 		free(nmp->nm_clp, M_NFSCLCLIENT);
1510 	}
1511 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1512 		nfscl_freenfsclds(dsp);
1513 	FREE(nmp, M_NEWNFSMNT);
1514 	FREE(nam, M_SONAME);
1515 	return (error);
1516 }
1517 
1518 /*
1519  * unmount system call
1520  */
1521 static int
1522 nfs_unmount(struct mount *mp, int mntflags)
1523 {
1524 	struct thread *td;
1525 	struct nfsmount *nmp;
1526 	int error, flags = 0, i, trycnt = 0;
1527 	struct nfsclds *dsp, *tdsp;
1528 
1529 	td = curthread;
1530 
1531 	if (mntflags & MNT_FORCE)
1532 		flags |= FORCECLOSE;
1533 	nmp = VFSTONFS(mp);
1534 	/*
1535 	 * Goes something like this..
1536 	 * - Call vflush() to clear out vnodes for this filesystem
1537 	 * - Close the socket
1538 	 * - Free up the data structures
1539 	 */
1540 	/* In the forced case, cancel any outstanding requests. */
1541 	if (mntflags & MNT_FORCE) {
1542 		error = newnfs_nmcancelreqs(nmp);
1543 		if (error)
1544 			goto out;
1545 		/* For a forced close, get rid of the renew thread now */
1546 		nfscl_umount(nmp, td);
1547 	}
1548 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1549 	do {
1550 		error = vflush(mp, 1, flags, td);
1551 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1552 			(void) nfs_catnap(PSOCK, error, "newndm");
1553 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1554 	if (error)
1555 		goto out;
1556 
1557 	/*
1558 	 * We are now committed to the unmount.
1559 	 */
1560 	if ((mntflags & MNT_FORCE) == 0)
1561 		nfscl_umount(nmp, td);
1562 	/* Make sure no nfsiods are assigned to this mount. */
1563 	mtx_lock(&ncl_iod_mutex);
1564 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1565 		if (ncl_iodmount[i] == nmp) {
1566 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1567 			ncl_iodmount[i] = NULL;
1568 		}
1569 	mtx_unlock(&ncl_iod_mutex);
1570 	newnfs_disconnect(&nmp->nm_sockreq);
1571 	crfree(nmp->nm_sockreq.nr_cred);
1572 	FREE(nmp->nm_nam, M_SONAME);
1573 	if (nmp->nm_sockreq.nr_auth != NULL)
1574 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1575 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1576 	mtx_destroy(&nmp->nm_mtx);
1577 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1578 		nfscl_freenfsclds(dsp);
1579 	FREE(nmp, M_NEWNFSMNT);
1580 out:
1581 	return (error);
1582 }
1583 
1584 /*
1585  * Return root of a filesystem
1586  */
1587 static int
1588 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1589 {
1590 	struct vnode *vp;
1591 	struct nfsmount *nmp;
1592 	struct nfsnode *np;
1593 	int error;
1594 
1595 	nmp = VFSTONFS(mp);
1596 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1597 	if (error)
1598 		return error;
1599 	vp = NFSTOV(np);
1600 	/*
1601 	 * Get transfer parameters and attributes for root vnode once.
1602 	 */
1603 	mtx_lock(&nmp->nm_mtx);
1604 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1605 		mtx_unlock(&nmp->nm_mtx);
1606 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1607 	} else
1608 		mtx_unlock(&nmp->nm_mtx);
1609 	if (vp->v_type == VNON)
1610 	    vp->v_type = VDIR;
1611 	vp->v_vflag |= VV_ROOT;
1612 	*vpp = vp;
1613 	return (0);
1614 }
1615 
1616 /*
1617  * Flush out the buffer cache
1618  */
1619 /* ARGSUSED */
1620 static int
1621 nfs_sync(struct mount *mp, int waitfor)
1622 {
1623 	struct vnode *vp, *mvp;
1624 	struct thread *td;
1625 	int error, allerror = 0;
1626 
1627 	td = curthread;
1628 
1629 	MNT_ILOCK(mp);
1630 	/*
1631 	 * If a forced dismount is in progress, return from here so that
1632 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1633 	 * calling VFS_UNMOUNT().
1634 	 */
1635 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1636 		MNT_IUNLOCK(mp);
1637 		return (EBADF);
1638 	}
1639 	MNT_IUNLOCK(mp);
1640 
1641 	/*
1642 	 * Force stale buffer cache information to be flushed.
1643 	 */
1644 loop:
1645 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1646 		/* XXX Racy bv_cnt check. */
1647 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1648 		    waitfor == MNT_LAZY) {
1649 			VI_UNLOCK(vp);
1650 			continue;
1651 		}
1652 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1653 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1654 			goto loop;
1655 		}
1656 		error = VOP_FSYNC(vp, waitfor, td);
1657 		if (error)
1658 			allerror = error;
1659 		NFSVOPUNLOCK(vp, 0);
1660 		vrele(vp);
1661 	}
1662 	return (allerror);
1663 }
1664 
1665 static int
1666 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1667 {
1668 	struct nfsmount *nmp = VFSTONFS(mp);
1669 	struct vfsquery vq;
1670 	int error;
1671 
1672 	bzero(&vq, sizeof(vq));
1673 	switch (op) {
1674 #if 0
1675 	case VFS_CTL_NOLOCKS:
1676 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1677  		if (req->oldptr != NULL) {
1678  			error = SYSCTL_OUT(req, &val, sizeof(val));
1679  			if (error)
1680  				return (error);
1681  		}
1682  		if (req->newptr != NULL) {
1683  			error = SYSCTL_IN(req, &val, sizeof(val));
1684  			if (error)
1685  				return (error);
1686 			if (val)
1687 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1688 			else
1689 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1690  		}
1691 		break;
1692 #endif
1693 	case VFS_CTL_QUERY:
1694 		mtx_lock(&nmp->nm_mtx);
1695 		if (nmp->nm_state & NFSSTA_TIMEO)
1696 			vq.vq_flags |= VQ_NOTRESP;
1697 		mtx_unlock(&nmp->nm_mtx);
1698 #if 0
1699 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1700 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1701 			vq.vq_flags |= VQ_NOTRESPLOCK;
1702 #endif
1703 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1704 		break;
1705  	case VFS_CTL_TIMEO:
1706  		if (req->oldptr != NULL) {
1707  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1708  			    sizeof(nmp->nm_tprintf_initial_delay));
1709  			if (error)
1710  				return (error);
1711  		}
1712  		if (req->newptr != NULL) {
1713 			error = vfs_suser(mp, req->td);
1714 			if (error)
1715 				return (error);
1716  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1717  			    sizeof(nmp->nm_tprintf_initial_delay));
1718  			if (error)
1719  				return (error);
1720  			if (nmp->nm_tprintf_initial_delay < 0)
1721  				nmp->nm_tprintf_initial_delay = 0;
1722  		}
1723 		break;
1724 	default:
1725 		return (ENOTSUP);
1726 	}
1727 	return (0);
1728 }
1729 
1730 /*
1731  * Purge any RPCs in progress, so that they will all return errors.
1732  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1733  * forced dismount.
1734  */
1735 static void
1736 nfs_purge(struct mount *mp)
1737 {
1738 	struct nfsmount *nmp = VFSTONFS(mp);
1739 
1740 	newnfs_nmcancelreqs(nmp);
1741 }
1742 
1743 /*
1744  * Extract the information needed by the nlm from the nfs vnode.
1745  */
1746 static void
1747 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1748     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1749     struct timeval *timeop)
1750 {
1751 	struct nfsmount *nmp;
1752 	struct nfsnode *np = VTONFS(vp);
1753 
1754 	nmp = VFSTONFS(vp->v_mount);
1755 	if (fhlenp != NULL)
1756 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1757 	if (fhp != NULL)
1758 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1759 	if (sp != NULL)
1760 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1761 	if (is_v3p != NULL)
1762 		*is_v3p = NFS_ISV3(vp);
1763 	if (sizep != NULL)
1764 		*sizep = np->n_size;
1765 	if (timeop != NULL) {
1766 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1767 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1768 	}
1769 }
1770 
1771 /*
1772  * This function prints out an option name, based on the conditional
1773  * argument.
1774  */
1775 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1776     char *opt, char **buf, size_t *blen)
1777 {
1778 	int len;
1779 
1780 	if (testval != 0 && *blen > strlen(opt)) {
1781 		len = snprintf(*buf, *blen, "%s", opt);
1782 		if (len != strlen(opt))
1783 			printf("EEK!!\n");
1784 		*buf += len;
1785 		*blen -= len;
1786 	}
1787 }
1788 
1789 /*
1790  * This function printf out an options integer value.
1791  */
1792 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1793     char *opt, char **buf, size_t *blen)
1794 {
1795 	int len;
1796 
1797 	if (*blen > strlen(opt) + 1) {
1798 		/* Could result in truncated output string. */
1799 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1800 		if (len < *blen) {
1801 			*buf += len;
1802 			*blen -= len;
1803 		}
1804 	}
1805 }
1806 
1807 /*
1808  * Load the option flags and values into the buffer.
1809  */
1810 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1811 {
1812 	char *buf;
1813 	size_t blen;
1814 
1815 	buf = buffer;
1816 	blen = buflen;
1817 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1818 	    &blen);
1819 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1820 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1821 		    &blen);
1822 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1823 		    &buf, &blen);
1824 	}
1825 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1826 	    &blen);
1827 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1828 	    "nfsv2", &buf, &blen);
1829 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1830 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1831 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1832 	    &buf, &blen);
1833 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1834 	    &buf, &blen);
1835 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1836 	    &blen);
1837 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1838 	    &blen);
1839 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1840 	    &blen);
1841 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1842 	    &blen);
1843 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1844 	    &blen);
1845 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1846 	    ",noncontigwr", &buf, &blen);
1847 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1848 	    0, ",lockd", &buf, &blen);
1849 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1850 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1851 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1852 	    &buf, &blen);
1853 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1854 	    &buf, &blen);
1855 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1856 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1857 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1858 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1859 	    &buf, &blen);
1860 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1861 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1862 	    &buf, &blen);
1863 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1864 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1865 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1866 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1867 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1868 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1869 	    &blen);
1870 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1871 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1872 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1873 	    &blen);
1874 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1875 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1876 	    &blen);
1877 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1878 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1879 }
1880 
1881