xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 4b63a7c678a1291c8056de3770cfa393773b1f94)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 
104 static int	nfs_mountroot(struct mount *);
105 static void	nfs_sec_name(char *, int *);
106 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107 		    struct nfs_args *argp, const char *, struct ucred *,
108 		    struct thread *);
109 static int	mountnfs(struct nfs_args *, struct mount *,
110 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111 		    u_char *, int, struct vnode **, struct ucred *,
112 		    struct thread *, int, int, int);
113 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114 		    struct sockaddr_storage *, int *, off_t *,
115 		    struct timeval *);
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123 static vfs_purge_t nfs_purge;
124 
125 /*
126  * nfs vfs operations.
127  */
128 static struct vfsops nfs_vfsops = {
129 	.vfs_init =		ncl_init,
130 	.vfs_mount =		nfs_mount,
131 	.vfs_cmount =		nfs_cmount,
132 	.vfs_root =		nfs_root,
133 	.vfs_statfs =		nfs_statfs,
134 	.vfs_sync =		nfs_sync,
135 	.vfs_uninit =		ncl_uninit,
136 	.vfs_unmount =		nfs_unmount,
137 	.vfs_sysctl =		nfs_sysctl,
138 	.vfs_purge =		nfs_purge,
139 };
140 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141 
142 /* So that loader and kldload(2) can find us, wherever we are.. */
143 MODULE_VERSION(nfs, 1);
144 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148 
149 /*
150  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151  * can be shared by both NFS clients. It is declared here so that it
152  * will be defined for kernels built without NFS_ROOT, although it
153  * isn't used in that case.
154  */
155 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156 struct nfs_diskless	nfs_diskless = { { { 0 } } };
157 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158 int			nfs_diskless_valid = 0;
159 #endif
160 
161 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162     &nfs_diskless_valid, 0,
163     "Has the diskless struct been filled correctly");
164 
165 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167 
168 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170     "%Ssockaddr_in", "Diskless root nfs address");
171 
172 
173 void		newnfsargs_ntoh(struct nfs_args *);
174 static int	nfs_mountdiskless(char *,
175 		    struct sockaddr_in *, struct nfs_args *,
176 		    struct thread *, struct vnode **, struct mount *);
177 static void	nfs_convert_diskless(void);
178 static void	nfs_convert_oargs(struct nfs_args *args,
179 		    struct onfs_args *oargs);
180 
181 int
182 newnfs_iosize(struct nfsmount *nmp)
183 {
184 	int iosize, maxio;
185 
186 	/* First, set the upper limit for iosize */
187 	if (nmp->nm_flag & NFSMNT_NFSV4) {
188 		maxio = NFS_MAXBSIZE;
189 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190 		if (nmp->nm_sotype == SOCK_DGRAM)
191 			maxio = NFS_MAXDGRAMDATA;
192 		else
193 			maxio = NFS_MAXBSIZE;
194 	} else {
195 		maxio = NFS_V2MAXDATA;
196 	}
197 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198 		nmp->nm_rsize = maxio;
199 	if (nmp->nm_rsize > MAXBSIZE)
200 		nmp->nm_rsize = MAXBSIZE;
201 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202 		nmp->nm_readdirsize = maxio;
203 	if (nmp->nm_readdirsize > nmp->nm_rsize)
204 		nmp->nm_readdirsize = nmp->nm_rsize;
205 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206 		nmp->nm_wsize = maxio;
207 	if (nmp->nm_wsize > MAXBSIZE)
208 		nmp->nm_wsize = MAXBSIZE;
209 
210 	/*
211 	 * Calculate the size used for io buffers.  Use the larger
212 	 * of the two sizes to minimise nfs requests but make sure
213 	 * that it is at least one VM page to avoid wasting buffer
214 	 * space.
215 	 */
216 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217 	iosize = imax(iosize, PAGE_SIZE);
218 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
219 	return (iosize);
220 }
221 
222 static void
223 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
224 {
225 
226 	args->version = NFS_ARGSVERSION;
227 	args->addr = oargs->addr;
228 	args->addrlen = oargs->addrlen;
229 	args->sotype = oargs->sotype;
230 	args->proto = oargs->proto;
231 	args->fh = oargs->fh;
232 	args->fhsize = oargs->fhsize;
233 	args->flags = oargs->flags;
234 	args->wsize = oargs->wsize;
235 	args->rsize = oargs->rsize;
236 	args->readdirsize = oargs->readdirsize;
237 	args->timeo = oargs->timeo;
238 	args->retrans = oargs->retrans;
239 	args->readahead = oargs->readahead;
240 	args->hostname = oargs->hostname;
241 }
242 
243 static void
244 nfs_convert_diskless(void)
245 {
246 
247 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248 		sizeof(struct ifaliasreq));
249 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250 		sizeof(struct sockaddr_in));
251 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
254 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
255 	} else {
256 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
257 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
258 	}
259 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260 		sizeof(struct sockaddr_in));
261 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262 	nfsv3_diskless.root_time = nfs_diskless.root_time;
263 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
264 		MAXHOSTNAMELEN);
265 	nfs_diskless_valid = 3;
266 }
267 
268 /*
269  * nfs statfs call
270  */
271 static int
272 nfs_statfs(struct mount *mp, struct statfs *sbp)
273 {
274 	struct vnode *vp;
275 	struct thread *td;
276 	struct nfsmount *nmp = VFSTONFS(mp);
277 	struct nfsvattr nfsva;
278 	struct nfsfsinfo fs;
279 	struct nfsstatfs sb;
280 	int error = 0, attrflag, gotfsinfo = 0, ret;
281 	struct nfsnode *np;
282 
283 	td = curthread;
284 
285 	error = vfs_busy(mp, MBF_NOWAIT);
286 	if (error)
287 		return (error);
288 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289 	if (error) {
290 		vfs_unbusy(mp);
291 		return (error);
292 	}
293 	vp = NFSTOV(np);
294 	mtx_lock(&nmp->nm_mtx);
295 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296 		mtx_unlock(&nmp->nm_mtx);
297 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
298 		    &attrflag, NULL);
299 		if (!error)
300 			gotfsinfo = 1;
301 	} else
302 		mtx_unlock(&nmp->nm_mtx);
303 	if (!error)
304 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
305 		    &attrflag, NULL);
306 	if (error != 0)
307 		NFSCL_DEBUG(2, "statfs=%d\n", error);
308 	if (attrflag == 0) {
309 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310 		    td->td_ucred, td, &nfsva, NULL, NULL);
311 		if (ret) {
312 			/*
313 			 * Just set default values to get things going.
314 			 */
315 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316 			nfsva.na_vattr.va_type = VDIR;
317 			nfsva.na_vattr.va_mode = 0777;
318 			nfsva.na_vattr.va_nlink = 100;
319 			nfsva.na_vattr.va_uid = (uid_t)0;
320 			nfsva.na_vattr.va_gid = (gid_t)0;
321 			nfsva.na_vattr.va_fileid = 2;
322 			nfsva.na_vattr.va_gen = 1;
323 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324 			nfsva.na_vattr.va_size = 512 * 1024;
325 		}
326 	}
327 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
328 	if (!error) {
329 	    mtx_lock(&nmp->nm_mtx);
330 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331 		nfscl_loadfsinfo(nmp, &fs);
332 	    nfscl_loadsbinfo(nmp, &sb, sbp);
333 	    sbp->f_iosize = newnfs_iosize(nmp);
334 	    mtx_unlock(&nmp->nm_mtx);
335 	    if (sbp != &mp->mnt_stat) {
336 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
338 	    }
339 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340 	} else if (NFS_ISV4(vp)) {
341 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
342 	}
343 	vput(vp);
344 	vfs_unbusy(mp);
345 	return (error);
346 }
347 
348 /*
349  * nfs version 3 fsinfo rpc call
350  */
351 int
352 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
353     struct thread *td)
354 {
355 	struct nfsfsinfo fs;
356 	struct nfsvattr nfsva;
357 	int error, attrflag;
358 
359 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
360 	if (!error) {
361 		if (attrflag)
362 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
363 			    1);
364 		mtx_lock(&nmp->nm_mtx);
365 		nfscl_loadfsinfo(nmp, &fs);
366 		mtx_unlock(&nmp->nm_mtx);
367 	}
368 	return (error);
369 }
370 
371 /*
372  * Mount a remote root fs via. nfs. This depends on the info in the
373  * nfs_diskless structure that has been filled in properly by some primary
374  * bootstrap.
375  * It goes something like this:
376  * - do enough of "ifconfig" by calling ifioctl() so that the system
377  *   can talk to the server
378  * - If nfs_diskless.mygateway is filled in, use that address as
379  *   a default gateway.
380  * - build the rootfs mount point and call mountnfs() to do the rest.
381  *
382  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383  * structure, as well as other global NFS client variables here, as
384  * nfs_mountroot() will be called once in the boot before any other NFS
385  * client activity occurs.
386  */
387 static int
388 nfs_mountroot(struct mount *mp)
389 {
390 	struct thread *td = curthread;
391 	struct nfsv3_diskless *nd = &nfsv3_diskless;
392 	struct socket *so;
393 	struct vnode *vp;
394 	struct ifreq ir;
395 	int error;
396 	u_long l;
397 	char buf[128];
398 	char *cp;
399 
400 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
401 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
402 #elif defined(NFS_ROOT)
403 	nfs_setup_diskless();
404 #endif
405 
406 	if (nfs_diskless_valid == 0)
407 		return (-1);
408 	if (nfs_diskless_valid == 1)
409 		nfs_convert_diskless();
410 
411 	/*
412 	 * XXX splnet, so networks will receive...
413 	 */
414 	splnet();
415 
416 	/*
417 	 * Do enough of ifconfig(8) so that the critical net interface can
418 	 * talk to the server.
419 	 */
420 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
421 	    td->td_ucred, td);
422 	if (error)
423 		panic("nfs_mountroot: socreate(%04x): %d",
424 			nd->myif.ifra_addr.sa_family, error);
425 
426 #if 0 /* XXX Bad idea */
427 	/*
428 	 * We might not have been told the right interface, so we pass
429 	 * over the first ten interfaces of the same kind, until we get
430 	 * one of them configured.
431 	 */
432 
433 	for (i = strlen(nd->myif.ifra_name) - 1;
434 		nd->myif.ifra_name[i] >= '0' &&
435 		nd->myif.ifra_name[i] <= '9';
436 		nd->myif.ifra_name[i] ++) {
437 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438 		if(!error)
439 			break;
440 	}
441 #endif
442 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443 	if (error)
444 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
446 		ir.ifr_mtu = strtol(cp, NULL, 10);
447 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
448 		freeenv(cp);
449 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
450 		if (error)
451 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
452 	}
453 	soclose(so);
454 
455 	/*
456 	 * If the gateway field is filled in, set it as the default route.
457 	 * Note that pxeboot will set a default route of 0 if the route
458 	 * is not set by the DHCP server.  Check also for a value of 0
459 	 * to avoid panicking inappropriately in that situation.
460 	 */
461 	if (nd->mygateway.sin_len != 0 &&
462 	    nd->mygateway.sin_addr.s_addr != 0) {
463 		struct sockaddr_in mask, sin;
464 
465 		bzero((caddr_t)&mask, sizeof(mask));
466 		sin = mask;
467 		sin.sin_family = AF_INET;
468 		sin.sin_len = sizeof(sin);
469                 /* XXX MRT use table 0 for this sort of thing */
470 		CURVNET_SET(TD_TO_VNET(td));
471 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472 		    (struct sockaddr *)&nd->mygateway,
473 		    (struct sockaddr *)&mask,
474 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
475 		CURVNET_RESTORE();
476 		if (error)
477 			panic("nfs_mountroot: RTM_ADD: %d", error);
478 	}
479 
480 	/*
481 	 * Create the rootfs mount point.
482 	 */
483 	nd->root_args.fh = nd->root_fh;
484 	nd->root_args.fhsize = nd->root_fhsize;
485 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
486 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487 		(l >> 24) & 0xff, (l >> 16) & 0xff,
488 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
489 	printf("NFS ROOT: %s\n", buf);
490 	nd->root_args.hostname = buf;
491 	if ((error = nfs_mountdiskless(buf,
492 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
493 		return (error);
494 	}
495 
496 	/*
497 	 * This is not really an nfs issue, but it is much easier to
498 	 * set hostname here and then let the "/etc/rc.xxx" files
499 	 * mount the right /var based upon its preset value.
500 	 */
501 	mtx_lock(&prison0.pr_mtx);
502 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
503 	    sizeof(prison0.pr_hostname));
504 	mtx_unlock(&prison0.pr_mtx);
505 	inittodr(ntohl(nd->root_time));
506 	return (0);
507 }
508 
509 /*
510  * Internal version of mount system call for diskless setup.
511  */
512 static int
513 nfs_mountdiskless(char *path,
514     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515     struct vnode **vpp, struct mount *mp)
516 {
517 	struct sockaddr *nam;
518 	int dirlen, error;
519 	char *dirpath;
520 
521 	/*
522 	 * Find the directory path in "path", which also has the server's
523 	 * name/ip address in it.
524 	 */
525 	dirpath = strchr(path, ':');
526 	if (dirpath != NULL)
527 		dirlen = strlen(++dirpath);
528 	else
529 		dirlen = 0;
530 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
535 		return (error);
536 	}
537 	return (0);
538 }
539 
540 static void
541 nfs_sec_name(char *sec, int *flagsp)
542 {
543 	if (!strcmp(sec, "krb5"))
544 		*flagsp |= NFSMNT_KERB;
545 	else if (!strcmp(sec, "krb5i"))
546 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547 	else if (!strcmp(sec, "krb5p"))
548 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
549 }
550 
551 static void
552 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553     const char *hostname, struct ucred *cred, struct thread *td)
554 {
555 	int s;
556 	int adjsock;
557 	char *p;
558 
559 	s = splnet();
560 
561 	/*
562 	 * Set read-only flag if requested; otherwise, clear it if this is
563 	 * an update.  If this is not an update, then either the read-only
564 	 * flag is already clear, or this is a root mount and it was set
565 	 * intentionally at some previous point.
566 	 */
567 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568 		MNT_ILOCK(mp);
569 		mp->mnt_flag |= MNT_RDONLY;
570 		MNT_IUNLOCK(mp);
571 	} else if (mp->mnt_flag & MNT_UPDATE) {
572 		MNT_ILOCK(mp);
573 		mp->mnt_flag &= ~MNT_RDONLY;
574 		MNT_IUNLOCK(mp);
575 	}
576 
577 	/*
578 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579 	 * no sense in that context.  Also, set up appropriate retransmit
580 	 * and soft timeout behavior.
581 	 */
582 	if (argp->sotype == SOCK_STREAM) {
583 		nmp->nm_flag &= ~NFSMNT_NOCONN;
584 		nmp->nm_timeo = NFS_MAXTIMEO;
585 		if ((argp->flags & NFSMNT_NFSV4) != 0)
586 			nmp->nm_retry = INT_MAX;
587 		else
588 			nmp->nm_retry = NFS_RETRANS_TCP;
589 	}
590 
591 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593 		argp->flags &= ~NFSMNT_RDIRPLUS;
594 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595 	}
596 
597 	/* Re-bind if rsrvd port requested and wasn't on one */
598 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599 		  && (argp->flags & NFSMNT_RESVPORT);
600 	/* Also re-bind if we're switching to/from a connected UDP socket */
601 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602 		    (argp->flags & NFSMNT_NOCONN));
603 
604 	/* Update flags atomically.  Don't change the lock bits. */
605 	nmp->nm_flag = argp->flags | nmp->nm_flag;
606 	splx(s);
607 
608 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610 		if (nmp->nm_timeo < NFS_MINTIMEO)
611 			nmp->nm_timeo = NFS_MINTIMEO;
612 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
613 			nmp->nm_timeo = NFS_MAXTIMEO;
614 	}
615 
616 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617 		nmp->nm_retry = argp->retrans;
618 		if (nmp->nm_retry > NFS_MAXREXMIT)
619 			nmp->nm_retry = NFS_MAXREXMIT;
620 	}
621 
622 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623 		nmp->nm_wsize = argp->wsize;
624 		/*
625 		 * Clip at the power of 2 below the size. There is an
626 		 * issue (not isolated) that causes intermittent page
627 		 * faults if this is not done.
628 		 */
629 		if (nmp->nm_wsize > NFS_FABLKSIZE)
630 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
631 		else
632 			nmp->nm_wsize = NFS_FABLKSIZE;
633 	}
634 
635 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
636 		nmp->nm_rsize = argp->rsize;
637 		/*
638 		 * Clip at the power of 2 below the size. There is an
639 		 * issue (not isolated) that causes intermittent page
640 		 * faults if this is not done.
641 		 */
642 		if (nmp->nm_rsize > NFS_FABLKSIZE)
643 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
644 		else
645 			nmp->nm_rsize = NFS_FABLKSIZE;
646 	}
647 
648 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
649 		nmp->nm_readdirsize = argp->readdirsize;
650 	}
651 
652 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
653 		nmp->nm_acregmin = argp->acregmin;
654 	else
655 		nmp->nm_acregmin = NFS_MINATTRTIMO;
656 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
657 		nmp->nm_acregmax = argp->acregmax;
658 	else
659 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
660 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
661 		nmp->nm_acdirmin = argp->acdirmin;
662 	else
663 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
664 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
665 		nmp->nm_acdirmax = argp->acdirmax;
666 	else
667 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
668 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
669 		nmp->nm_acdirmin = nmp->nm_acdirmax;
670 	if (nmp->nm_acregmin > nmp->nm_acregmax)
671 		nmp->nm_acregmin = nmp->nm_acregmax;
672 
673 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
674 		if (argp->readahead <= NFS_MAXRAHEAD)
675 			nmp->nm_readahead = argp->readahead;
676 		else
677 			nmp->nm_readahead = NFS_MAXRAHEAD;
678 	}
679 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
680 		if (argp->wcommitsize < nmp->nm_wsize)
681 			nmp->nm_wcommitsize = nmp->nm_wsize;
682 		else
683 			nmp->nm_wcommitsize = argp->wcommitsize;
684 	}
685 
686 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
687 		    (nmp->nm_soproto != argp->proto));
688 
689 	if (nmp->nm_client != NULL && adjsock) {
690 		int haslock = 0, error = 0;
691 
692 		if (nmp->nm_sotype == SOCK_STREAM) {
693 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
694 			if (!error)
695 				haslock = 1;
696 		}
697 		if (!error) {
698 		    newnfs_disconnect(&nmp->nm_sockreq);
699 		    if (haslock)
700 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
701 		    nmp->nm_sotype = argp->sotype;
702 		    nmp->nm_soproto = argp->proto;
703 		    if (nmp->nm_sotype == SOCK_DGRAM)
704 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
705 			    cred, td, 0)) {
706 				printf("newnfs_args: retrying connect\n");
707 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
708 			}
709 		}
710 	} else {
711 		nmp->nm_sotype = argp->sotype;
712 		nmp->nm_soproto = argp->proto;
713 	}
714 
715 	if (hostname != NULL) {
716 		strlcpy(nmp->nm_hostname, hostname,
717 		    sizeof(nmp->nm_hostname));
718 		p = strchr(nmp->nm_hostname, ':');
719 		if (p != NULL)
720 			*p = '\0';
721 	}
722 }
723 
724 static const char *nfs_opts[] = { "from", "nfs_args",
725     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
726     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
727     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
728     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
729     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
730     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
731     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
732     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
733     "pnfs", "wcommitsize",
734     NULL };
735 
736 /*
737  * VFS Operations.
738  *
739  * mount system call
740  * It seems a bit dumb to copyinstr() the host and path here and then
741  * bcopy() them in mountnfs(), but I wanted to detect errors before
742  * doing the sockargs() call because sockargs() allocates an mbuf and
743  * an error after that means that I have to release the mbuf.
744  */
745 /* ARGSUSED */
746 static int
747 nfs_mount(struct mount *mp)
748 {
749 	struct nfs_args args = {
750 	    .version = NFS_ARGSVERSION,
751 	    .addr = NULL,
752 	    .addrlen = sizeof (struct sockaddr_in),
753 	    .sotype = SOCK_STREAM,
754 	    .proto = 0,
755 	    .fh = NULL,
756 	    .fhsize = 0,
757 	    .flags = NFSMNT_RESVPORT,
758 	    .wsize = NFS_WSIZE,
759 	    .rsize = NFS_RSIZE,
760 	    .readdirsize = NFS_READDIRSIZE,
761 	    .timeo = 10,
762 	    .retrans = NFS_RETRANS,
763 	    .readahead = NFS_DEFRAHEAD,
764 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
765 	    .hostname = NULL,
766 	    .acregmin = NFS_MINATTRTIMO,
767 	    .acregmax = NFS_MAXATTRTIMO,
768 	    .acdirmin = NFS_MINDIRATTRTIMO,
769 	    .acdirmax = NFS_MAXDIRATTRTIMO,
770 	};
771 	int error = 0, ret, len;
772 	struct sockaddr *nam = NULL;
773 	struct vnode *vp;
774 	struct thread *td;
775 	char hst[MNAMELEN];
776 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
777 	char *opt, *name, *secname;
778 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
779 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
780 	int minvers = 0;
781 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
782 	size_t hstlen;
783 
784 	has_nfs_args_opt = 0;
785 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
786 		error = EINVAL;
787 		goto out;
788 	}
789 
790 	td = curthread;
791 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
792 		error = nfs_mountroot(mp);
793 		goto out;
794 	}
795 
796 	nfscl_init();
797 
798 	/*
799 	 * The old mount_nfs program passed the struct nfs_args
800 	 * from userspace to kernel.  The new mount_nfs program
801 	 * passes string options via nmount() from userspace to kernel
802 	 * and we populate the struct nfs_args in the kernel.
803 	 */
804 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
805 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
806 		    sizeof(args));
807 		if (error != 0)
808 			goto out;
809 
810 		if (args.version != NFS_ARGSVERSION) {
811 			error = EPROGMISMATCH;
812 			goto out;
813 		}
814 		has_nfs_args_opt = 1;
815 	}
816 
817 	/* Handle the new style options. */
818 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
819 		args.acdirmin = args.acdirmax =
820 		    args.acregmin = args.acregmax = 0;
821 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
822 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
823 	}
824 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
825 		args.flags |= NFSMNT_NOCONN;
826 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
827 		args.flags &= ~NFSMNT_NOCONN;
828 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
829 		args.flags |= NFSMNT_NOLOCKD;
830 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
831 		args.flags &= ~NFSMNT_NOLOCKD;
832 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
833 		args.flags |= NFSMNT_INT;
834 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
835 		args.flags |= NFSMNT_RDIRPLUS;
836 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
837 		args.flags |= NFSMNT_RESVPORT;
838 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
839 		args.flags &= ~NFSMNT_RESVPORT;
840 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
841 		args.flags |= NFSMNT_SOFT;
842 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
843 		args.flags &= ~NFSMNT_SOFT;
844 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
845 		args.sotype = SOCK_DGRAM;
846 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
847 		args.sotype = SOCK_DGRAM;
848 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
849 		args.sotype = SOCK_STREAM;
850 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
851 		args.flags |= NFSMNT_NFSV3;
852 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
853 		args.flags |= NFSMNT_NFSV4;
854 		args.sotype = SOCK_STREAM;
855 	}
856 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
857 		args.flags |= NFSMNT_ALLGSSNAME;
858 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
859 		args.flags |= NFSMNT_NOCTO;
860 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
861 		args.flags |= NFSMNT_NONCONTIGWR;
862 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
863 		args.flags |= NFSMNT_PNFS;
864 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
865 		if (opt == NULL) {
866 			vfs_mount_error(mp, "illegal readdirsize");
867 			error = EINVAL;
868 			goto out;
869 		}
870 		ret = sscanf(opt, "%d", &args.readdirsize);
871 		if (ret != 1 || args.readdirsize <= 0) {
872 			vfs_mount_error(mp, "illegal readdirsize: %s",
873 			    opt);
874 			error = EINVAL;
875 			goto out;
876 		}
877 		args.flags |= NFSMNT_READDIRSIZE;
878 	}
879 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
880 		if (opt == NULL) {
881 			vfs_mount_error(mp, "illegal readahead");
882 			error = EINVAL;
883 			goto out;
884 		}
885 		ret = sscanf(opt, "%d", &args.readahead);
886 		if (ret != 1 || args.readahead <= 0) {
887 			vfs_mount_error(mp, "illegal readahead: %s",
888 			    opt);
889 			error = EINVAL;
890 			goto out;
891 		}
892 		args.flags |= NFSMNT_READAHEAD;
893 	}
894 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
895 		if (opt == NULL) {
896 			vfs_mount_error(mp, "illegal wsize");
897 			error = EINVAL;
898 			goto out;
899 		}
900 		ret = sscanf(opt, "%d", &args.wsize);
901 		if (ret != 1 || args.wsize <= 0) {
902 			vfs_mount_error(mp, "illegal wsize: %s",
903 			    opt);
904 			error = EINVAL;
905 			goto out;
906 		}
907 		args.flags |= NFSMNT_WSIZE;
908 	}
909 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
910 		if (opt == NULL) {
911 			vfs_mount_error(mp, "illegal rsize");
912 			error = EINVAL;
913 			goto out;
914 		}
915 		ret = sscanf(opt, "%d", &args.rsize);
916 		if (ret != 1 || args.rsize <= 0) {
917 			vfs_mount_error(mp, "illegal wsize: %s",
918 			    opt);
919 			error = EINVAL;
920 			goto out;
921 		}
922 		args.flags |= NFSMNT_RSIZE;
923 	}
924 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
925 		if (opt == NULL) {
926 			vfs_mount_error(mp, "illegal retrans");
927 			error = EINVAL;
928 			goto out;
929 		}
930 		ret = sscanf(opt, "%d", &args.retrans);
931 		if (ret != 1 || args.retrans <= 0) {
932 			vfs_mount_error(mp, "illegal retrans: %s",
933 			    opt);
934 			error = EINVAL;
935 			goto out;
936 		}
937 		args.flags |= NFSMNT_RETRANS;
938 	}
939 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
940 		ret = sscanf(opt, "%d", &args.acregmin);
941 		if (ret != 1 || args.acregmin < 0) {
942 			vfs_mount_error(mp, "illegal actimeo: %s",
943 			    opt);
944 			error = EINVAL;
945 			goto out;
946 		}
947 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
948 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
949 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
950 	}
951 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
952 		ret = sscanf(opt, "%d", &args.acregmin);
953 		if (ret != 1 || args.acregmin < 0) {
954 			vfs_mount_error(mp, "illegal acregmin: %s",
955 			    opt);
956 			error = EINVAL;
957 			goto out;
958 		}
959 		args.flags |= NFSMNT_ACREGMIN;
960 	}
961 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
962 		ret = sscanf(opt, "%d", &args.acregmax);
963 		if (ret != 1 || args.acregmax < 0) {
964 			vfs_mount_error(mp, "illegal acregmax: %s",
965 			    opt);
966 			error = EINVAL;
967 			goto out;
968 		}
969 		args.flags |= NFSMNT_ACREGMAX;
970 	}
971 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
972 		ret = sscanf(opt, "%d", &args.acdirmin);
973 		if (ret != 1 || args.acdirmin < 0) {
974 			vfs_mount_error(mp, "illegal acdirmin: %s",
975 			    opt);
976 			error = EINVAL;
977 			goto out;
978 		}
979 		args.flags |= NFSMNT_ACDIRMIN;
980 	}
981 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
982 		ret = sscanf(opt, "%d", &args.acdirmax);
983 		if (ret != 1 || args.acdirmax < 0) {
984 			vfs_mount_error(mp, "illegal acdirmax: %s",
985 			    opt);
986 			error = EINVAL;
987 			goto out;
988 		}
989 		args.flags |= NFSMNT_ACDIRMAX;
990 	}
991 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
992 		ret = sscanf(opt, "%d", &args.wcommitsize);
993 		if (ret != 1 || args.wcommitsize < 0) {
994 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
995 			error = EINVAL;
996 			goto out;
997 		}
998 		args.flags |= NFSMNT_WCOMMITSIZE;
999 	}
1000 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1001 		ret = sscanf(opt, "%d", &args.timeo);
1002 		if (ret != 1 || args.timeo <= 0) {
1003 			vfs_mount_error(mp, "illegal timeo: %s",
1004 			    opt);
1005 			error = EINVAL;
1006 			goto out;
1007 		}
1008 		args.flags |= NFSMNT_TIMEO;
1009 	}
1010 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1011 		ret = sscanf(opt, "%d", &args.timeo);
1012 		if (ret != 1 || args.timeo <= 0) {
1013 			vfs_mount_error(mp, "illegal timeout: %s",
1014 			    opt);
1015 			error = EINVAL;
1016 			goto out;
1017 		}
1018 		args.flags |= NFSMNT_TIMEO;
1019 	}
1020 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1021 		ret = sscanf(opt, "%d", &nametimeo);
1022 		if (ret != 1 || nametimeo < 0) {
1023 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1024 			error = EINVAL;
1025 			goto out;
1026 		}
1027 	}
1028 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1029 	    == 0) {
1030 		ret = sscanf(opt, "%d", &negnametimeo);
1031 		if (ret != 1 || negnametimeo < 0) {
1032 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1033 			    opt);
1034 			error = EINVAL;
1035 			goto out;
1036 		}
1037 	}
1038 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1039 	    0) {
1040 		ret = sscanf(opt, "%d", &minvers);
1041 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1042 		    (args.flags & NFSMNT_NFSV4) == 0) {
1043 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1044 			error = EINVAL;
1045 			goto out;
1046 		}
1047 	}
1048 	if (vfs_getopt(mp->mnt_optnew, "sec",
1049 		(void **) &secname, NULL) == 0)
1050 		nfs_sec_name(secname, &args.flags);
1051 
1052 	if (mp->mnt_flag & MNT_UPDATE) {
1053 		struct nfsmount *nmp = VFSTONFS(mp);
1054 
1055 		if (nmp == NULL) {
1056 			error = EIO;
1057 			goto out;
1058 		}
1059 
1060 		/*
1061 		 * If a change from TCP->UDP is done and there are thread(s)
1062 		 * that have I/O RPC(s) in progress with a tranfer size
1063 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1064 		 * hung, retrying the RPC(s) forever. Usually these threads
1065 		 * will be seen doing an uninterruptible sleep on wait channel
1066 		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1067 		 */
1068 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1069 			tprintf(td->td_proc, LOG_WARNING,
1070 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1071 
1072 		/*
1073 		 * When doing an update, we can't change version,
1074 		 * security, switch lockd strategies or change cookie
1075 		 * translation
1076 		 */
1077 		args.flags = (args.flags &
1078 		    ~(NFSMNT_NFSV3 |
1079 		      NFSMNT_NFSV4 |
1080 		      NFSMNT_KERB |
1081 		      NFSMNT_INTEGRITY |
1082 		      NFSMNT_PRIVACY |
1083 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1084 		    (nmp->nm_flag &
1085 			(NFSMNT_NFSV3 |
1086 			 NFSMNT_NFSV4 |
1087 			 NFSMNT_KERB |
1088 			 NFSMNT_INTEGRITY |
1089 			 NFSMNT_PRIVACY |
1090 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1091 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1092 		goto out;
1093 	}
1094 
1095 	/*
1096 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1097 	 * or no-connection mode for those protocols that support
1098 	 * no-connection mode (the flag will be cleared later for protocols
1099 	 * that do not support no-connection mode).  This will allow a client
1100 	 * to receive replies from a different IP then the request was
1101 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1102 	 * not 0.
1103 	 */
1104 	if (nfs_ip_paranoia == 0)
1105 		args.flags |= NFSMNT_NOCONN;
1106 
1107 	if (has_nfs_args_opt != 0) {
1108 		/*
1109 		 * In the 'nfs_args' case, the pointers in the args
1110 		 * structure are in userland - we copy them in here.
1111 		 */
1112 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1113 			vfs_mount_error(mp, "Bad file handle");
1114 			error = EINVAL;
1115 			goto out;
1116 		}
1117 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1118 		    args.fhsize);
1119 		if (error != 0)
1120 			goto out;
1121 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1122 		if (error != 0)
1123 			goto out;
1124 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1125 		args.hostname = hst;
1126 		/* sockargs() call must be after above copyin() calls */
1127 		error = getsockaddr(&nam, (caddr_t)args.addr,
1128 		    args.addrlen);
1129 		if (error != 0)
1130 			goto out;
1131 	} else {
1132 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1133 		    &args.fhsize) == 0) {
1134 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1135 				vfs_mount_error(mp, "Bad file handle");
1136 				error = EINVAL;
1137 				goto out;
1138 			}
1139 			bcopy(args.fh, nfh, args.fhsize);
1140 		} else {
1141 			args.fhsize = 0;
1142 		}
1143 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1144 		    (void **)&args.hostname, &len);
1145 		if (args.hostname == NULL) {
1146 			vfs_mount_error(mp, "Invalid hostname");
1147 			error = EINVAL;
1148 			goto out;
1149 		}
1150 		bcopy(args.hostname, hst, MNAMELEN);
1151 		hst[MNAMELEN - 1] = '\0';
1152 	}
1153 
1154 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1155 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1156 	else
1157 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1158 	srvkrbnamelen = strlen(srvkrbname);
1159 
1160 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1161 		strlcpy(krbname, name, sizeof (krbname));
1162 	else
1163 		krbname[0] = '\0';
1164 	krbnamelen = strlen(krbname);
1165 
1166 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1167 		strlcpy(dirpath, name, sizeof (dirpath));
1168 	else
1169 		dirpath[0] = '\0';
1170 	dirlen = strlen(dirpath);
1171 
1172 	if (has_nfs_args_opt == 0) {
1173 		if (vfs_getopt(mp->mnt_optnew, "addr",
1174 		    (void **)&args.addr, &args.addrlen) == 0) {
1175 			if (args.addrlen > SOCK_MAXADDRLEN) {
1176 				error = ENAMETOOLONG;
1177 				goto out;
1178 			}
1179 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1180 			bcopy(args.addr, nam, args.addrlen);
1181 			nam->sa_len = args.addrlen;
1182 		} else {
1183 			vfs_mount_error(mp, "No server address");
1184 			error = EINVAL;
1185 			goto out;
1186 		}
1187 	}
1188 
1189 	args.fh = nfh;
1190 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1191 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1192 	    nametimeo, negnametimeo, minvers);
1193 out:
1194 	if (!error) {
1195 		MNT_ILOCK(mp);
1196 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF;
1197 		MNT_IUNLOCK(mp);
1198 	}
1199 	return (error);
1200 }
1201 
1202 
1203 /*
1204  * VFS Operations.
1205  *
1206  * mount system call
1207  * It seems a bit dumb to copyinstr() the host and path here and then
1208  * bcopy() them in mountnfs(), but I wanted to detect errors before
1209  * doing the sockargs() call because sockargs() allocates an mbuf and
1210  * an error after that means that I have to release the mbuf.
1211  */
1212 /* ARGSUSED */
1213 static int
1214 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1215 {
1216 	int error;
1217 	struct nfs_args args;
1218 
1219 	error = copyin(data, &args, sizeof (struct nfs_args));
1220 	if (error)
1221 		return error;
1222 
1223 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1224 
1225 	error = kernel_mount(ma, flags);
1226 	return (error);
1227 }
1228 
1229 /*
1230  * Common code for mount and mountroot
1231  */
1232 static int
1233 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1234     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1235     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1236     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1237     int minvers)
1238 {
1239 	struct nfsmount *nmp;
1240 	struct nfsnode *np;
1241 	int error, trycnt, ret;
1242 	struct nfsvattr nfsva;
1243 	struct nfsclclient *clp;
1244 	struct nfsclds *dsp, *tdsp;
1245 	uint32_t lease;
1246 	static u_int64_t clval = 0;
1247 
1248 	NFSCL_DEBUG(3, "in mnt\n");
1249 	clp = NULL;
1250 	if (mp->mnt_flag & MNT_UPDATE) {
1251 		nmp = VFSTONFS(mp);
1252 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1253 		FREE(nam, M_SONAME);
1254 		return (0);
1255 	} else {
1256 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1257 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1258 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1259 		TAILQ_INIT(&nmp->nm_bufq);
1260 		if (clval == 0)
1261 			clval = (u_int64_t)nfsboottime.tv_sec;
1262 		nmp->nm_clval = clval++;
1263 		nmp->nm_krbnamelen = krbnamelen;
1264 		nmp->nm_dirpathlen = dirlen;
1265 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1266 		if (td->td_ucred->cr_uid != (uid_t)0) {
1267 			/*
1268 			 * nm_uid is used to get KerberosV credentials for
1269 			 * the nfsv4 state handling operations if there is
1270 			 * no host based principal set. Use the uid of
1271 			 * this user if not root, since they are doing the
1272 			 * mount. I don't think setting this for root will
1273 			 * work, since root normally does not have user
1274 			 * credentials in a credentials cache.
1275 			 */
1276 			nmp->nm_uid = td->td_ucred->cr_uid;
1277 		} else {
1278 			/*
1279 			 * Just set to -1, so it won't be used.
1280 			 */
1281 			nmp->nm_uid = (uid_t)-1;
1282 		}
1283 
1284 		/* Copy and null terminate all the names */
1285 		if (nmp->nm_krbnamelen > 0) {
1286 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1287 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1288 		}
1289 		if (nmp->nm_dirpathlen > 0) {
1290 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1291 			    nmp->nm_dirpathlen);
1292 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1293 			    + 1] = '\0';
1294 		}
1295 		if (nmp->nm_srvkrbnamelen > 0) {
1296 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1297 			    nmp->nm_srvkrbnamelen);
1298 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1299 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1300 		}
1301 		nmp->nm_sockreq.nr_cred = crhold(cred);
1302 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1303 		mp->mnt_data = nmp;
1304 		nmp->nm_getinfo = nfs_getnlminfo;
1305 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1306 	}
1307 	vfs_getnewfsid(mp);
1308 	nmp->nm_mountp = mp;
1309 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1310 
1311 	/*
1312 	 * Since nfs_decode_args() might optionally set them, these
1313 	 * need to be set to defaults before the call, so that the
1314 	 * optional settings aren't overwritten.
1315 	 */
1316 	nmp->nm_nametimeo = nametimeo;
1317 	nmp->nm_negnametimeo = negnametimeo;
1318 	nmp->nm_timeo = NFS_TIMEO;
1319 	nmp->nm_retry = NFS_RETRANS;
1320 	nmp->nm_readahead = NFS_DEFRAHEAD;
1321 	if (desiredvnodes >= 11000)
1322 		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1323 	else
1324 		nmp->nm_wcommitsize = hibufspace / 10;
1325 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1326 		nmp->nm_minorvers = minvers;
1327 	else
1328 		nmp->nm_minorvers = 0;
1329 
1330 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1331 
1332 	/*
1333 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1334 	 * high, depending on whether we end up with negative offsets in
1335 	 * the client or server somewhere.  2GB-1 may be safer.
1336 	 *
1337 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1338 	 * that we can handle until we find out otherwise.
1339 	 */
1340 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1341 		nmp->nm_maxfilesize = 0xffffffffLL;
1342 	else
1343 		nmp->nm_maxfilesize = OFF_MAX;
1344 
1345 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1346 		nmp->nm_wsize = NFS_WSIZE;
1347 		nmp->nm_rsize = NFS_RSIZE;
1348 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1349 	}
1350 	nmp->nm_numgrps = NFS_MAXGRPS;
1351 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1352 	if (nmp->nm_tprintf_delay < 0)
1353 		nmp->nm_tprintf_delay = 0;
1354 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1355 	if (nmp->nm_tprintf_initial_delay < 0)
1356 		nmp->nm_tprintf_initial_delay = 0;
1357 	nmp->nm_fhsize = argp->fhsize;
1358 	if (nmp->nm_fhsize > 0)
1359 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1360 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1361 	nmp->nm_nam = nam;
1362 	/* Set up the sockets and per-host congestion */
1363 	nmp->nm_sotype = argp->sotype;
1364 	nmp->nm_soproto = argp->proto;
1365 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1366 	if ((argp->flags & NFSMNT_NFSV4))
1367 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1368 	else if ((argp->flags & NFSMNT_NFSV3))
1369 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1370 	else
1371 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1372 
1373 
1374 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1375 		goto bad;
1376 	/* For NFSv4.1, get the clientid now. */
1377 	if (nmp->nm_minorvers > 0) {
1378 		NFSCL_DEBUG(3, "at getcl\n");
1379 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1380 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1381 		if (error != 0)
1382 			goto bad;
1383 	}
1384 
1385 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1386 	    nmp->nm_dirpathlen > 0) {
1387 		NFSCL_DEBUG(3, "in dirp\n");
1388 		/*
1389 		 * If the fhsize on the mount point == 0 for V4, the mount
1390 		 * path needs to be looked up.
1391 		 */
1392 		trycnt = 3;
1393 		do {
1394 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1395 			    cred, td);
1396 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1397 			if (error)
1398 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1399 		} while (error && --trycnt > 0);
1400 		if (error) {
1401 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1402 			goto bad;
1403 		}
1404 	}
1405 
1406 	/*
1407 	 * A reference count is needed on the nfsnode representing the
1408 	 * remote root.  If this object is not persistent, then backward
1409 	 * traversals of the mount point (i.e. "..") will not work if
1410 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1411 	 * this problem, because one can identify root inodes by their
1412 	 * number == ROOTINO (2).
1413 	 */
1414 	if (nmp->nm_fhsize > 0) {
1415 		/*
1416 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1417 		 * non-zero for the root vnode. f_iosize will be set correctly
1418 		 * by nfs_statfs() before any I/O occurs.
1419 		 */
1420 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1421 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1422 		    LK_EXCLUSIVE);
1423 		if (error)
1424 			goto bad;
1425 		*vpp = NFSTOV(np);
1426 
1427 		/*
1428 		 * Get file attributes and transfer parameters for the
1429 		 * mountpoint.  This has the side effect of filling in
1430 		 * (*vpp)->v_type with the correct value.
1431 		 */
1432 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1433 		    cred, td, &nfsva, NULL, &lease);
1434 		if (ret) {
1435 			/*
1436 			 * Just set default values to get things going.
1437 			 */
1438 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1439 			nfsva.na_vattr.va_type = VDIR;
1440 			nfsva.na_vattr.va_mode = 0777;
1441 			nfsva.na_vattr.va_nlink = 100;
1442 			nfsva.na_vattr.va_uid = (uid_t)0;
1443 			nfsva.na_vattr.va_gid = (gid_t)0;
1444 			nfsva.na_vattr.va_fileid = 2;
1445 			nfsva.na_vattr.va_gen = 1;
1446 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1447 			nfsva.na_vattr.va_size = 512 * 1024;
1448 			lease = 60;
1449 		}
1450 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1451 		if (nmp->nm_minorvers > 0) {
1452 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1453 			NFSLOCKCLSTATE();
1454 			clp->nfsc_renew = NFSCL_RENEW(lease);
1455 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1456 			clp->nfsc_clientidrev++;
1457 			if (clp->nfsc_clientidrev == 0)
1458 				clp->nfsc_clientidrev++;
1459 			NFSUNLOCKCLSTATE();
1460 			/*
1461 			 * Mount will succeed, so the renew thread can be
1462 			 * started now.
1463 			 */
1464 			nfscl_start_renewthread(clp);
1465 			nfscl_clientrelease(clp);
1466 		}
1467 		if (argp->flags & NFSMNT_NFSV3)
1468 			ncl_fsinfo(nmp, *vpp, cred, td);
1469 
1470 		/* Mark if the mount point supports NFSv4 ACLs. */
1471 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1472 		    ret == 0 &&
1473 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1474 			MNT_ILOCK(mp);
1475 			mp->mnt_flag |= MNT_NFS4ACLS;
1476 			MNT_IUNLOCK(mp);
1477 		}
1478 
1479 		/*
1480 		 * Lose the lock but keep the ref.
1481 		 */
1482 		NFSVOPUNLOCK(*vpp, 0);
1483 		return (0);
1484 	}
1485 	error = EIO;
1486 
1487 bad:
1488 	if (clp != NULL)
1489 		nfscl_clientrelease(clp);
1490 	newnfs_disconnect(&nmp->nm_sockreq);
1491 	crfree(nmp->nm_sockreq.nr_cred);
1492 	if (nmp->nm_sockreq.nr_auth != NULL)
1493 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1494 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1495 	mtx_destroy(&nmp->nm_mtx);
1496 	if (nmp->nm_clp != NULL) {
1497 		NFSLOCKCLSTATE();
1498 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1499 		NFSUNLOCKCLSTATE();
1500 		free(nmp->nm_clp, M_NFSCLCLIENT);
1501 	}
1502 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1503 		nfscl_freenfsclds(dsp);
1504 	FREE(nmp, M_NEWNFSMNT);
1505 	FREE(nam, M_SONAME);
1506 	return (error);
1507 }
1508 
1509 /*
1510  * unmount system call
1511  */
1512 static int
1513 nfs_unmount(struct mount *mp, int mntflags)
1514 {
1515 	struct thread *td;
1516 	struct nfsmount *nmp;
1517 	int error, flags = 0, i, trycnt = 0;
1518 	struct nfsclds *dsp, *tdsp;
1519 
1520 	td = curthread;
1521 
1522 	if (mntflags & MNT_FORCE)
1523 		flags |= FORCECLOSE;
1524 	nmp = VFSTONFS(mp);
1525 	/*
1526 	 * Goes something like this..
1527 	 * - Call vflush() to clear out vnodes for this filesystem
1528 	 * - Close the socket
1529 	 * - Free up the data structures
1530 	 */
1531 	/* In the forced case, cancel any outstanding requests. */
1532 	if (mntflags & MNT_FORCE) {
1533 		error = newnfs_nmcancelreqs(nmp);
1534 		if (error)
1535 			goto out;
1536 		/* For a forced close, get rid of the renew thread now */
1537 		nfscl_umount(nmp, td);
1538 	}
1539 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1540 	do {
1541 		error = vflush(mp, 1, flags, td);
1542 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1543 			(void) nfs_catnap(PSOCK, error, "newndm");
1544 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1545 	if (error)
1546 		goto out;
1547 
1548 	/*
1549 	 * We are now committed to the unmount.
1550 	 */
1551 	if ((mntflags & MNT_FORCE) == 0)
1552 		nfscl_umount(nmp, td);
1553 	/* Make sure no nfsiods are assigned to this mount. */
1554 	mtx_lock(&ncl_iod_mutex);
1555 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1556 		if (ncl_iodmount[i] == nmp) {
1557 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1558 			ncl_iodmount[i] = NULL;
1559 		}
1560 	mtx_unlock(&ncl_iod_mutex);
1561 	newnfs_disconnect(&nmp->nm_sockreq);
1562 	crfree(nmp->nm_sockreq.nr_cred);
1563 	FREE(nmp->nm_nam, M_SONAME);
1564 	if (nmp->nm_sockreq.nr_auth != NULL)
1565 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1566 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1567 	mtx_destroy(&nmp->nm_mtx);
1568 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1569 		nfscl_freenfsclds(dsp);
1570 	FREE(nmp, M_NEWNFSMNT);
1571 out:
1572 	return (error);
1573 }
1574 
1575 /*
1576  * Return root of a filesystem
1577  */
1578 static int
1579 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1580 {
1581 	struct vnode *vp;
1582 	struct nfsmount *nmp;
1583 	struct nfsnode *np;
1584 	int error;
1585 
1586 	nmp = VFSTONFS(mp);
1587 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1588 	if (error)
1589 		return error;
1590 	vp = NFSTOV(np);
1591 	/*
1592 	 * Get transfer parameters and attributes for root vnode once.
1593 	 */
1594 	mtx_lock(&nmp->nm_mtx);
1595 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1596 		mtx_unlock(&nmp->nm_mtx);
1597 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1598 	} else
1599 		mtx_unlock(&nmp->nm_mtx);
1600 	if (vp->v_type == VNON)
1601 	    vp->v_type = VDIR;
1602 	vp->v_vflag |= VV_ROOT;
1603 	*vpp = vp;
1604 	return (0);
1605 }
1606 
1607 /*
1608  * Flush out the buffer cache
1609  */
1610 /* ARGSUSED */
1611 static int
1612 nfs_sync(struct mount *mp, int waitfor)
1613 {
1614 	struct vnode *vp, *mvp;
1615 	struct thread *td;
1616 	int error, allerror = 0;
1617 
1618 	td = curthread;
1619 
1620 	MNT_ILOCK(mp);
1621 	/*
1622 	 * If a forced dismount is in progress, return from here so that
1623 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1624 	 * calling VFS_UNMOUNT().
1625 	 */
1626 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1627 		MNT_IUNLOCK(mp);
1628 		return (EBADF);
1629 	}
1630 	MNT_IUNLOCK(mp);
1631 
1632 	/*
1633 	 * Force stale buffer cache information to be flushed.
1634 	 */
1635 loop:
1636 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1637 		/* XXX Racy bv_cnt check. */
1638 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1639 		    waitfor == MNT_LAZY) {
1640 			VI_UNLOCK(vp);
1641 			continue;
1642 		}
1643 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1644 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1645 			goto loop;
1646 		}
1647 		error = VOP_FSYNC(vp, waitfor, td);
1648 		if (error)
1649 			allerror = error;
1650 		NFSVOPUNLOCK(vp, 0);
1651 		vrele(vp);
1652 	}
1653 	return (allerror);
1654 }
1655 
1656 static int
1657 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1658 {
1659 	struct nfsmount *nmp = VFSTONFS(mp);
1660 	struct vfsquery vq;
1661 	int error;
1662 
1663 	bzero(&vq, sizeof(vq));
1664 	switch (op) {
1665 #if 0
1666 	case VFS_CTL_NOLOCKS:
1667 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1668  		if (req->oldptr != NULL) {
1669  			error = SYSCTL_OUT(req, &val, sizeof(val));
1670  			if (error)
1671  				return (error);
1672  		}
1673  		if (req->newptr != NULL) {
1674  			error = SYSCTL_IN(req, &val, sizeof(val));
1675  			if (error)
1676  				return (error);
1677 			if (val)
1678 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1679 			else
1680 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1681  		}
1682 		break;
1683 #endif
1684 	case VFS_CTL_QUERY:
1685 		mtx_lock(&nmp->nm_mtx);
1686 		if (nmp->nm_state & NFSSTA_TIMEO)
1687 			vq.vq_flags |= VQ_NOTRESP;
1688 		mtx_unlock(&nmp->nm_mtx);
1689 #if 0
1690 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1691 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1692 			vq.vq_flags |= VQ_NOTRESPLOCK;
1693 #endif
1694 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1695 		break;
1696  	case VFS_CTL_TIMEO:
1697  		if (req->oldptr != NULL) {
1698  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1699  			    sizeof(nmp->nm_tprintf_initial_delay));
1700  			if (error)
1701  				return (error);
1702  		}
1703  		if (req->newptr != NULL) {
1704 			error = vfs_suser(mp, req->td);
1705 			if (error)
1706 				return (error);
1707  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1708  			    sizeof(nmp->nm_tprintf_initial_delay));
1709  			if (error)
1710  				return (error);
1711  			if (nmp->nm_tprintf_initial_delay < 0)
1712  				nmp->nm_tprintf_initial_delay = 0;
1713  		}
1714 		break;
1715 	default:
1716 		return (ENOTSUP);
1717 	}
1718 	return (0);
1719 }
1720 
1721 /*
1722  * Purge any RPCs in progress, so that they will all return errors.
1723  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1724  * forced dismount.
1725  */
1726 static void
1727 nfs_purge(struct mount *mp)
1728 {
1729 	struct nfsmount *nmp = VFSTONFS(mp);
1730 
1731 	newnfs_nmcancelreqs(nmp);
1732 }
1733 
1734 /*
1735  * Extract the information needed by the nlm from the nfs vnode.
1736  */
1737 static void
1738 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1739     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1740     struct timeval *timeop)
1741 {
1742 	struct nfsmount *nmp;
1743 	struct nfsnode *np = VTONFS(vp);
1744 
1745 	nmp = VFSTONFS(vp->v_mount);
1746 	if (fhlenp != NULL)
1747 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1748 	if (fhp != NULL)
1749 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1750 	if (sp != NULL)
1751 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1752 	if (is_v3p != NULL)
1753 		*is_v3p = NFS_ISV3(vp);
1754 	if (sizep != NULL)
1755 		*sizep = np->n_size;
1756 	if (timeop != NULL) {
1757 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1758 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1759 	}
1760 }
1761 
1762 /*
1763  * This function prints out an option name, based on the conditional
1764  * argument.
1765  */
1766 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1767     char *opt, char **buf, size_t *blen)
1768 {
1769 	int len;
1770 
1771 	if (testval != 0 && *blen > strlen(opt)) {
1772 		len = snprintf(*buf, *blen, "%s", opt);
1773 		if (len != strlen(opt))
1774 			printf("EEK!!\n");
1775 		*buf += len;
1776 		*blen -= len;
1777 	}
1778 }
1779 
1780 /*
1781  * This function printf out an options integer value.
1782  */
1783 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1784     char *opt, char **buf, size_t *blen)
1785 {
1786 	int len;
1787 
1788 	if (*blen > strlen(opt) + 1) {
1789 		/* Could result in truncated output string. */
1790 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1791 		if (len < *blen) {
1792 			*buf += len;
1793 			*blen -= len;
1794 		}
1795 	}
1796 }
1797 
1798 /*
1799  * Load the option flags and values into the buffer.
1800  */
1801 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1802 {
1803 	char *buf;
1804 	size_t blen;
1805 
1806 	buf = buffer;
1807 	blen = buflen;
1808 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1809 	    &blen);
1810 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1811 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1812 		    &blen);
1813 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1814 		    &buf, &blen);
1815 	}
1816 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1817 	    &blen);
1818 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1819 	    "nfsv2", &buf, &blen);
1820 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1821 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1822 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1823 	    &buf, &blen);
1824 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1825 	    &buf, &blen);
1826 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1827 	    &blen);
1828 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1829 	    &blen);
1830 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1831 	    &blen);
1832 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1833 	    &blen);
1834 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1835 	    &blen);
1836 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1837 	    ",noncontigwr", &buf, &blen);
1838 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1839 	    0, ",lockd", &buf, &blen);
1840 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1841 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1842 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1843 	    &buf, &blen);
1844 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1845 	    &buf, &blen);
1846 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1847 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1848 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1849 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1850 	    &buf, &blen);
1851 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1852 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1853 	    &buf, &blen);
1854 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1855 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1856 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1857 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1858 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1859 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1860 	    &blen);
1861 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1862 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1863 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1864 	    &blen);
1865 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1866 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1867 	    &blen);
1868 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1869 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1870 }
1871 
1872