xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 55620f43deef5c0eb5b4b0f675de18b30c8d1c2d)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 #ifdef NFS_DEBUG
104 int nfs_debug;
105 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
106     "Toggle debug flag");
107 #endif
108 
109 static int	nfs_mountroot(struct mount *);
110 static void	nfs_sec_name(char *, int *);
111 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112 		    struct nfs_args *argp, const char *, struct ucred *,
113 		    struct thread *);
114 static int	mountnfs(struct nfs_args *, struct mount *,
115 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
116 		    u_char *, int, struct vnode **, struct ucred *,
117 		    struct thread *, int, int, int);
118 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119 		    struct sockaddr_storage *, int *, off_t *,
120 		    struct timeval *);
121 static vfs_mount_t nfs_mount;
122 static vfs_cmount_t nfs_cmount;
123 static vfs_unmount_t nfs_unmount;
124 static vfs_root_t nfs_root;
125 static vfs_statfs_t nfs_statfs;
126 static vfs_sync_t nfs_sync;
127 static vfs_sysctl_t nfs_sysctl;
128 static vfs_purge_t nfs_purge;
129 
130 /*
131  * nfs vfs operations.
132  */
133 static struct vfsops nfs_vfsops = {
134 	.vfs_init =		ncl_init,
135 	.vfs_mount =		nfs_mount,
136 	.vfs_cmount =		nfs_cmount,
137 	.vfs_root =		nfs_root,
138 	.vfs_statfs =		nfs_statfs,
139 	.vfs_sync =		nfs_sync,
140 	.vfs_uninit =		ncl_uninit,
141 	.vfs_unmount =		nfs_unmount,
142 	.vfs_sysctl =		nfs_sysctl,
143 	.vfs_purge =		nfs_purge,
144 };
145 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
146 
147 /* So that loader and kldload(2) can find us, wherever we are.. */
148 MODULE_VERSION(nfs, 1);
149 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
150 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
152 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
153 
154 /*
155  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
156  * can be shared by both NFS clients. It is declared here so that it
157  * will be defined for kernels built without NFS_ROOT, although it
158  * isn't used in that case.
159  */
160 #if !defined(NFS_ROOT)
161 struct nfs_diskless	nfs_diskless = { { { 0 } } };
162 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
163 int			nfs_diskless_valid = 0;
164 #endif
165 
166 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
167     &nfs_diskless_valid, 0,
168     "Has the diskless struct been filled correctly");
169 
170 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
171     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
172 
173 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
174     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
175     "%Ssockaddr_in", "Diskless root nfs address");
176 
177 
178 void		newnfsargs_ntoh(struct nfs_args *);
179 static int	nfs_mountdiskless(char *,
180 		    struct sockaddr_in *, struct nfs_args *,
181 		    struct thread *, struct vnode **, struct mount *);
182 static void	nfs_convert_diskless(void);
183 static void	nfs_convert_oargs(struct nfs_args *args,
184 		    struct onfs_args *oargs);
185 
186 int
187 newnfs_iosize(struct nfsmount *nmp)
188 {
189 	int iosize, maxio;
190 
191 	/* First, set the upper limit for iosize */
192 	if (nmp->nm_flag & NFSMNT_NFSV4) {
193 		maxio = NFS_MAXBSIZE;
194 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
195 		if (nmp->nm_sotype == SOCK_DGRAM)
196 			maxio = NFS_MAXDGRAMDATA;
197 		else
198 			maxio = NFS_MAXBSIZE;
199 	} else {
200 		maxio = NFS_V2MAXDATA;
201 	}
202 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
203 		nmp->nm_rsize = maxio;
204 	if (nmp->nm_rsize > NFS_MAXBSIZE)
205 		nmp->nm_rsize = NFS_MAXBSIZE;
206 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
207 		nmp->nm_readdirsize = maxio;
208 	if (nmp->nm_readdirsize > nmp->nm_rsize)
209 		nmp->nm_readdirsize = nmp->nm_rsize;
210 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
211 		nmp->nm_wsize = maxio;
212 	if (nmp->nm_wsize > NFS_MAXBSIZE)
213 		nmp->nm_wsize = NFS_MAXBSIZE;
214 
215 	/*
216 	 * Calculate the size used for io buffers.  Use the larger
217 	 * of the two sizes to minimise nfs requests but make sure
218 	 * that it is at least one VM page to avoid wasting buffer
219 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
220 	 * that is the buffer size used for directories.
221 	 */
222 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
223 	iosize = imax(iosize, PAGE_SIZE);
224 	iosize = imax(iosize, NFS_DIRBLKSIZ);
225 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
226 	return (iosize);
227 }
228 
229 static void
230 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
231 {
232 
233 	args->version = NFS_ARGSVERSION;
234 	args->addr = oargs->addr;
235 	args->addrlen = oargs->addrlen;
236 	args->sotype = oargs->sotype;
237 	args->proto = oargs->proto;
238 	args->fh = oargs->fh;
239 	args->fhsize = oargs->fhsize;
240 	args->flags = oargs->flags;
241 	args->wsize = oargs->wsize;
242 	args->rsize = oargs->rsize;
243 	args->readdirsize = oargs->readdirsize;
244 	args->timeo = oargs->timeo;
245 	args->retrans = oargs->retrans;
246 	args->readahead = oargs->readahead;
247 	args->hostname = oargs->hostname;
248 }
249 
250 static void
251 nfs_convert_diskless(void)
252 {
253 
254 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
255 		sizeof(struct ifaliasreq));
256 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
257 		sizeof(struct sockaddr_in));
258 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
259 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
260 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
261 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
262 	} else {
263 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
264 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
265 	}
266 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
267 		sizeof(struct sockaddr_in));
268 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
269 	nfsv3_diskless.root_time = nfs_diskless.root_time;
270 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
271 		MAXHOSTNAMELEN);
272 	nfs_diskless_valid = 3;
273 }
274 
275 /*
276  * nfs statfs call
277  */
278 static int
279 nfs_statfs(struct mount *mp, struct statfs *sbp)
280 {
281 	struct vnode *vp;
282 	struct thread *td;
283 	struct nfsmount *nmp = VFSTONFS(mp);
284 	struct nfsvattr nfsva;
285 	struct nfsfsinfo fs;
286 	struct nfsstatfs sb;
287 	int error = 0, attrflag, gotfsinfo = 0, ret;
288 	struct nfsnode *np;
289 
290 	td = curthread;
291 
292 	error = vfs_busy(mp, MBF_NOWAIT);
293 	if (error)
294 		return (error);
295 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
296 	if (error) {
297 		vfs_unbusy(mp);
298 		return (error);
299 	}
300 	vp = NFSTOV(np);
301 	mtx_lock(&nmp->nm_mtx);
302 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
303 		mtx_unlock(&nmp->nm_mtx);
304 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
305 		    &attrflag, NULL);
306 		if (!error)
307 			gotfsinfo = 1;
308 	} else
309 		mtx_unlock(&nmp->nm_mtx);
310 	if (!error)
311 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
312 		    &attrflag, NULL);
313 	if (error != 0)
314 		NFSCL_DEBUG(2, "statfs=%d\n", error);
315 	if (attrflag == 0) {
316 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
317 		    td->td_ucred, td, &nfsva, NULL, NULL);
318 		if (ret) {
319 			/*
320 			 * Just set default values to get things going.
321 			 */
322 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
323 			nfsva.na_vattr.va_type = VDIR;
324 			nfsva.na_vattr.va_mode = 0777;
325 			nfsva.na_vattr.va_nlink = 100;
326 			nfsva.na_vattr.va_uid = (uid_t)0;
327 			nfsva.na_vattr.va_gid = (gid_t)0;
328 			nfsva.na_vattr.va_fileid = 2;
329 			nfsva.na_vattr.va_gen = 1;
330 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
331 			nfsva.na_vattr.va_size = 512 * 1024;
332 		}
333 	}
334 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
335 	if (!error) {
336 	    mtx_lock(&nmp->nm_mtx);
337 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
338 		nfscl_loadfsinfo(nmp, &fs);
339 	    nfscl_loadsbinfo(nmp, &sb, sbp);
340 	    sbp->f_iosize = newnfs_iosize(nmp);
341 	    mtx_unlock(&nmp->nm_mtx);
342 	    if (sbp != &mp->mnt_stat) {
343 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
344 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
345 	    }
346 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
347 	} else if (NFS_ISV4(vp)) {
348 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
349 	}
350 	vput(vp);
351 	vfs_unbusy(mp);
352 	return (error);
353 }
354 
355 /*
356  * nfs version 3 fsinfo rpc call
357  */
358 int
359 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
360     struct thread *td)
361 {
362 	struct nfsfsinfo fs;
363 	struct nfsvattr nfsva;
364 	int error, attrflag;
365 
366 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
367 	if (!error) {
368 		if (attrflag)
369 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
370 			    1);
371 		mtx_lock(&nmp->nm_mtx);
372 		nfscl_loadfsinfo(nmp, &fs);
373 		mtx_unlock(&nmp->nm_mtx);
374 	}
375 	return (error);
376 }
377 
378 /*
379  * Mount a remote root fs via. nfs. This depends on the info in the
380  * nfs_diskless structure that has been filled in properly by some primary
381  * bootstrap.
382  * It goes something like this:
383  * - do enough of "ifconfig" by calling ifioctl() so that the system
384  *   can talk to the server
385  * - If nfs_diskless.mygateway is filled in, use that address as
386  *   a default gateway.
387  * - build the rootfs mount point and call mountnfs() to do the rest.
388  *
389  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
390  * structure, as well as other global NFS client variables here, as
391  * nfs_mountroot() will be called once in the boot before any other NFS
392  * client activity occurs.
393  */
394 static int
395 nfs_mountroot(struct mount *mp)
396 {
397 	struct thread *td = curthread;
398 	struct nfsv3_diskless *nd = &nfsv3_diskless;
399 	struct socket *so;
400 	struct vnode *vp;
401 	struct ifreq ir;
402 	int error;
403 	u_long l;
404 	char buf[128];
405 	char *cp;
406 
407 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
408 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
409 #elif defined(NFS_ROOT)
410 	nfs_setup_diskless();
411 #endif
412 
413 	if (nfs_diskless_valid == 0)
414 		return (-1);
415 	if (nfs_diskless_valid == 1)
416 		nfs_convert_diskless();
417 
418 	/*
419 	 * XXX splnet, so networks will receive...
420 	 */
421 	splnet();
422 
423 	/*
424 	 * Do enough of ifconfig(8) so that the critical net interface can
425 	 * talk to the server.
426 	 */
427 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
428 	    td->td_ucred, td);
429 	if (error)
430 		panic("nfs_mountroot: socreate(%04x): %d",
431 			nd->myif.ifra_addr.sa_family, error);
432 
433 #if 0 /* XXX Bad idea */
434 	/*
435 	 * We might not have been told the right interface, so we pass
436 	 * over the first ten interfaces of the same kind, until we get
437 	 * one of them configured.
438 	 */
439 
440 	for (i = strlen(nd->myif.ifra_name) - 1;
441 		nd->myif.ifra_name[i] >= '0' &&
442 		nd->myif.ifra_name[i] <= '9';
443 		nd->myif.ifra_name[i] ++) {
444 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445 		if(!error)
446 			break;
447 	}
448 #endif
449 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
450 	if (error)
451 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
452 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
453 		ir.ifr_mtu = strtol(cp, NULL, 10);
454 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
455 		freeenv(cp);
456 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
457 		if (error)
458 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
459 	}
460 	soclose(so);
461 
462 	/*
463 	 * If the gateway field is filled in, set it as the default route.
464 	 * Note that pxeboot will set a default route of 0 if the route
465 	 * is not set by the DHCP server.  Check also for a value of 0
466 	 * to avoid panicking inappropriately in that situation.
467 	 */
468 	if (nd->mygateway.sin_len != 0 &&
469 	    nd->mygateway.sin_addr.s_addr != 0) {
470 		struct sockaddr_in mask, sin;
471 
472 		bzero((caddr_t)&mask, sizeof(mask));
473 		sin = mask;
474 		sin.sin_family = AF_INET;
475 		sin.sin_len = sizeof(sin);
476                 /* XXX MRT use table 0 for this sort of thing */
477 		CURVNET_SET(TD_TO_VNET(td));
478 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
479 		    (struct sockaddr *)&nd->mygateway,
480 		    (struct sockaddr *)&mask,
481 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
482 		CURVNET_RESTORE();
483 		if (error)
484 			panic("nfs_mountroot: RTM_ADD: %d", error);
485 	}
486 
487 	/*
488 	 * Create the rootfs mount point.
489 	 */
490 	nd->root_args.fh = nd->root_fh;
491 	nd->root_args.fhsize = nd->root_fhsize;
492 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
493 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
494 		(l >> 24) & 0xff, (l >> 16) & 0xff,
495 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
496 	printf("NFS ROOT: %s\n", buf);
497 	nd->root_args.hostname = buf;
498 	if ((error = nfs_mountdiskless(buf,
499 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
500 		return (error);
501 	}
502 
503 	/*
504 	 * This is not really an nfs issue, but it is much easier to
505 	 * set hostname here and then let the "/etc/rc.xxx" files
506 	 * mount the right /var based upon its preset value.
507 	 */
508 	mtx_lock(&prison0.pr_mtx);
509 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
510 	    sizeof(prison0.pr_hostname));
511 	mtx_unlock(&prison0.pr_mtx);
512 	inittodr(ntohl(nd->root_time));
513 	return (0);
514 }
515 
516 /*
517  * Internal version of mount system call for diskless setup.
518  */
519 static int
520 nfs_mountdiskless(char *path,
521     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
522     struct vnode **vpp, struct mount *mp)
523 {
524 	struct sockaddr *nam;
525 	int dirlen, error;
526 	char *dirpath;
527 
528 	/*
529 	 * Find the directory path in "path", which also has the server's
530 	 * name/ip address in it.
531 	 */
532 	dirpath = strchr(path, ':');
533 	if (dirpath != NULL)
534 		dirlen = strlen(++dirpath);
535 	else
536 		dirlen = 0;
537 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
538 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
539 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
540 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
541 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
542 		return (error);
543 	}
544 	return (0);
545 }
546 
547 static void
548 nfs_sec_name(char *sec, int *flagsp)
549 {
550 	if (!strcmp(sec, "krb5"))
551 		*flagsp |= NFSMNT_KERB;
552 	else if (!strcmp(sec, "krb5i"))
553 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
554 	else if (!strcmp(sec, "krb5p"))
555 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
556 }
557 
558 static void
559 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
560     const char *hostname, struct ucred *cred, struct thread *td)
561 {
562 	int s;
563 	int adjsock;
564 	char *p;
565 
566 	s = splnet();
567 
568 	/*
569 	 * Set read-only flag if requested; otherwise, clear it if this is
570 	 * an update.  If this is not an update, then either the read-only
571 	 * flag is already clear, or this is a root mount and it was set
572 	 * intentionally at some previous point.
573 	 */
574 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
575 		MNT_ILOCK(mp);
576 		mp->mnt_flag |= MNT_RDONLY;
577 		MNT_IUNLOCK(mp);
578 	} else if (mp->mnt_flag & MNT_UPDATE) {
579 		MNT_ILOCK(mp);
580 		mp->mnt_flag &= ~MNT_RDONLY;
581 		MNT_IUNLOCK(mp);
582 	}
583 
584 	/*
585 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
586 	 * no sense in that context.  Also, set up appropriate retransmit
587 	 * and soft timeout behavior.
588 	 */
589 	if (argp->sotype == SOCK_STREAM) {
590 		nmp->nm_flag &= ~NFSMNT_NOCONN;
591 		nmp->nm_timeo = NFS_MAXTIMEO;
592 		if ((argp->flags & NFSMNT_NFSV4) != 0)
593 			nmp->nm_retry = INT_MAX;
594 		else
595 			nmp->nm_retry = NFS_RETRANS_TCP;
596 	}
597 
598 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
599 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
600 		argp->flags &= ~NFSMNT_RDIRPLUS;
601 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
602 	}
603 
604 	/* Re-bind if rsrvd port requested and wasn't on one */
605 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
606 		  && (argp->flags & NFSMNT_RESVPORT);
607 	/* Also re-bind if we're switching to/from a connected UDP socket */
608 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
609 		    (argp->flags & NFSMNT_NOCONN));
610 
611 	/* Update flags atomically.  Don't change the lock bits. */
612 	nmp->nm_flag = argp->flags | nmp->nm_flag;
613 	splx(s);
614 
615 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
616 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
617 		if (nmp->nm_timeo < NFS_MINTIMEO)
618 			nmp->nm_timeo = NFS_MINTIMEO;
619 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
620 			nmp->nm_timeo = NFS_MAXTIMEO;
621 	}
622 
623 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
624 		nmp->nm_retry = argp->retrans;
625 		if (nmp->nm_retry > NFS_MAXREXMIT)
626 			nmp->nm_retry = NFS_MAXREXMIT;
627 	}
628 
629 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
630 		nmp->nm_wsize = argp->wsize;
631 		/*
632 		 * Clip at the power of 2 below the size. There is an
633 		 * issue (not isolated) that causes intermittent page
634 		 * faults if this is not done.
635 		 */
636 		if (nmp->nm_wsize > NFS_FABLKSIZE)
637 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
638 		else
639 			nmp->nm_wsize = NFS_FABLKSIZE;
640 	}
641 
642 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
643 		nmp->nm_rsize = argp->rsize;
644 		/*
645 		 * Clip at the power of 2 below the size. There is an
646 		 * issue (not isolated) that causes intermittent page
647 		 * faults if this is not done.
648 		 */
649 		if (nmp->nm_rsize > NFS_FABLKSIZE)
650 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
651 		else
652 			nmp->nm_rsize = NFS_FABLKSIZE;
653 	}
654 
655 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
656 		nmp->nm_readdirsize = argp->readdirsize;
657 	}
658 
659 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
660 		nmp->nm_acregmin = argp->acregmin;
661 	else
662 		nmp->nm_acregmin = NFS_MINATTRTIMO;
663 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
664 		nmp->nm_acregmax = argp->acregmax;
665 	else
666 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
667 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
668 		nmp->nm_acdirmin = argp->acdirmin;
669 	else
670 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
671 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
672 		nmp->nm_acdirmax = argp->acdirmax;
673 	else
674 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
675 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
676 		nmp->nm_acdirmin = nmp->nm_acdirmax;
677 	if (nmp->nm_acregmin > nmp->nm_acregmax)
678 		nmp->nm_acregmin = nmp->nm_acregmax;
679 
680 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
681 		if (argp->readahead <= NFS_MAXRAHEAD)
682 			nmp->nm_readahead = argp->readahead;
683 		else
684 			nmp->nm_readahead = NFS_MAXRAHEAD;
685 	}
686 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
687 		if (argp->wcommitsize < nmp->nm_wsize)
688 			nmp->nm_wcommitsize = nmp->nm_wsize;
689 		else
690 			nmp->nm_wcommitsize = argp->wcommitsize;
691 	}
692 
693 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
694 		    (nmp->nm_soproto != argp->proto));
695 
696 	if (nmp->nm_client != NULL && adjsock) {
697 		int haslock = 0, error = 0;
698 
699 		if (nmp->nm_sotype == SOCK_STREAM) {
700 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
701 			if (!error)
702 				haslock = 1;
703 		}
704 		if (!error) {
705 		    newnfs_disconnect(&nmp->nm_sockreq);
706 		    if (haslock)
707 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
708 		    nmp->nm_sotype = argp->sotype;
709 		    nmp->nm_soproto = argp->proto;
710 		    if (nmp->nm_sotype == SOCK_DGRAM)
711 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
712 			    cred, td, 0)) {
713 				printf("newnfs_args: retrying connect\n");
714 				(void) nfs_catnap(PSOCK, 0, "nfscon");
715 			}
716 		}
717 	} else {
718 		nmp->nm_sotype = argp->sotype;
719 		nmp->nm_soproto = argp->proto;
720 	}
721 
722 	if (hostname != NULL) {
723 		strlcpy(nmp->nm_hostname, hostname,
724 		    sizeof(nmp->nm_hostname));
725 		p = strchr(nmp->nm_hostname, ':');
726 		if (p != NULL)
727 			*p = '\0';
728 	}
729 }
730 
731 static const char *nfs_opts[] = { "from", "nfs_args",
732     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
733     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
734     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
735     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
736     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
737     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
738     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
739     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
740     "pnfs", "wcommitsize",
741     NULL };
742 
743 /*
744  * Parse the "from" mountarg, passed by the generic mount(8) program
745  * or the mountroot code.  This is used when rerooting into NFS.
746  *
747  * Note that the "hostname" is actually a "hostname:/share/path" string.
748  */
749 static int
750 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
751     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
752 {
753 	char nam[MNAMELEN + 1];
754 	char *delimp, *hostp, *spec;
755 	int error, have_bracket = 0, offset, rv, speclen;
756 	struct sockaddr_in *sin;
757 	size_t len;
758 
759 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
760 	if (error != 0)
761 		return (error);
762 
763 	/*
764 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
765 	 */
766 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
767 	    *(delimp + 1) == ':') {
768 		hostp = spec + 1;
769 		spec = delimp + 2;
770 		have_bracket = 1;
771 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
772 		hostp = spec;
773 		spec = delimp + 1;
774 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
775 		printf("%s: path@server syntax is deprecated, "
776 		    "use server:path\n", __func__);
777 		hostp = delimp + 1;
778 	} else {
779 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
780 		return (EINVAL);
781 	}
782 	*delimp = '\0';
783 
784 	/*
785 	 * If there has been a trailing slash at mounttime it seems
786 	 * that some mountd implementations fail to remove the mount
787 	 * entries from their mountlist while unmounting.
788 	 */
789 	for (speclen = strlen(spec);
790 	    speclen > 1 && spec[speclen - 1] == '/';
791 	    speclen--)
792 		spec[speclen - 1] = '\0';
793 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
794 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
795 		return (EINVAL);
796 	}
797 	/* Make both '@' and ':' notations equal */
798 	if (*hostp != '\0') {
799 		len = strlen(hostp);
800 		offset = 0;
801 		if (have_bracket)
802 			nam[offset++] = '[';
803 		memmove(nam + offset, hostp, len);
804 		if (have_bracket)
805 			nam[len + offset++] = ']';
806 		nam[len + offset++] = ':';
807 		memmove(nam + len + offset, spec, speclen);
808 		nam[len + speclen + offset] = '\0';
809 	} else
810 		nam[0] = '\0';
811 
812 	/*
813 	 * XXX: IPv6
814 	 */
815 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
816 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
817 	if (rv != 1) {
818 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
819 		    __func__, hostp, rv);
820 		free(sin, M_SONAME);
821 		return (EINVAL);
822 	}
823 
824 	sin->sin_len = sizeof(*sin);
825 	sin->sin_family = AF_INET;
826 	/*
827 	 * XXX: hardcoded port number.
828 	 */
829 	sin->sin_port = htons(2049);
830 
831 	*hostnamep = strdup(nam, M_NEWNFSMNT);
832 	*sinp = sin;
833 	strlcpy(dirpath, spec, dirpathsize);
834 	*dirlenp = strlen(dirpath);
835 
836 	return (0);
837 }
838 
839 /*
840  * VFS Operations.
841  *
842  * mount system call
843  * It seems a bit dumb to copyinstr() the host and path here and then
844  * bcopy() them in mountnfs(), but I wanted to detect errors before
845  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
846  * an error after that means that I have to release the mbuf.
847  */
848 /* ARGSUSED */
849 static int
850 nfs_mount(struct mount *mp)
851 {
852 	struct nfs_args args = {
853 	    .version = NFS_ARGSVERSION,
854 	    .addr = NULL,
855 	    .addrlen = sizeof (struct sockaddr_in),
856 	    .sotype = SOCK_STREAM,
857 	    .proto = 0,
858 	    .fh = NULL,
859 	    .fhsize = 0,
860 	    .flags = NFSMNT_RESVPORT,
861 	    .wsize = NFS_WSIZE,
862 	    .rsize = NFS_RSIZE,
863 	    .readdirsize = NFS_READDIRSIZE,
864 	    .timeo = 10,
865 	    .retrans = NFS_RETRANS,
866 	    .readahead = NFS_DEFRAHEAD,
867 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
868 	    .hostname = NULL,
869 	    .acregmin = NFS_MINATTRTIMO,
870 	    .acregmax = NFS_MAXATTRTIMO,
871 	    .acdirmin = NFS_MINDIRATTRTIMO,
872 	    .acdirmax = NFS_MAXDIRATTRTIMO,
873 	};
874 	int error = 0, ret, len;
875 	struct sockaddr *nam = NULL;
876 	struct vnode *vp;
877 	struct thread *td;
878 	char hst[MNAMELEN];
879 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
880 	char *cp, *opt, *name, *secname;
881 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
882 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
883 	int minvers = 0;
884 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
885 	    krbnamelen, srvkrbnamelen;
886 	size_t hstlen;
887 
888 	has_nfs_args_opt = 0;
889 	has_nfs_from_opt = 0;
890 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
891 		error = EINVAL;
892 		goto out;
893 	}
894 
895 	td = curthread;
896 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
897 	    nfs_diskless_valid != 0) {
898 		error = nfs_mountroot(mp);
899 		goto out;
900 	}
901 
902 	nfscl_init();
903 
904 	/*
905 	 * The old mount_nfs program passed the struct nfs_args
906 	 * from userspace to kernel.  The new mount_nfs program
907 	 * passes string options via nmount() from userspace to kernel
908 	 * and we populate the struct nfs_args in the kernel.
909 	 */
910 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
911 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
912 		    sizeof(args));
913 		if (error != 0)
914 			goto out;
915 
916 		if (args.version != NFS_ARGSVERSION) {
917 			error = EPROGMISMATCH;
918 			goto out;
919 		}
920 		has_nfs_args_opt = 1;
921 	}
922 
923 	/* Handle the new style options. */
924 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
925 		args.acdirmin = args.acdirmax =
926 		    args.acregmin = args.acregmax = 0;
927 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
928 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
929 	}
930 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
931 		args.flags |= NFSMNT_NOCONN;
932 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
933 		args.flags &= ~NFSMNT_NOCONN;
934 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
935 		args.flags |= NFSMNT_NOLOCKD;
936 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
937 		args.flags &= ~NFSMNT_NOLOCKD;
938 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
939 		args.flags |= NFSMNT_INT;
940 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
941 		args.flags |= NFSMNT_RDIRPLUS;
942 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
943 		args.flags |= NFSMNT_RESVPORT;
944 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
945 		args.flags &= ~NFSMNT_RESVPORT;
946 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
947 		args.flags |= NFSMNT_SOFT;
948 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
949 		args.flags &= ~NFSMNT_SOFT;
950 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
951 		args.sotype = SOCK_DGRAM;
952 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
953 		args.sotype = SOCK_DGRAM;
954 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
955 		args.sotype = SOCK_STREAM;
956 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
957 		args.flags |= NFSMNT_NFSV3;
958 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
959 		args.flags |= NFSMNT_NFSV4;
960 		args.sotype = SOCK_STREAM;
961 	}
962 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
963 		args.flags |= NFSMNT_ALLGSSNAME;
964 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
965 		args.flags |= NFSMNT_NOCTO;
966 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
967 		args.flags |= NFSMNT_NONCONTIGWR;
968 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
969 		args.flags |= NFSMNT_PNFS;
970 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
971 		if (opt == NULL) {
972 			vfs_mount_error(mp, "illegal readdirsize");
973 			error = EINVAL;
974 			goto out;
975 		}
976 		ret = sscanf(opt, "%d", &args.readdirsize);
977 		if (ret != 1 || args.readdirsize <= 0) {
978 			vfs_mount_error(mp, "illegal readdirsize: %s",
979 			    opt);
980 			error = EINVAL;
981 			goto out;
982 		}
983 		args.flags |= NFSMNT_READDIRSIZE;
984 	}
985 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
986 		if (opt == NULL) {
987 			vfs_mount_error(mp, "illegal readahead");
988 			error = EINVAL;
989 			goto out;
990 		}
991 		ret = sscanf(opt, "%d", &args.readahead);
992 		if (ret != 1 || args.readahead <= 0) {
993 			vfs_mount_error(mp, "illegal readahead: %s",
994 			    opt);
995 			error = EINVAL;
996 			goto out;
997 		}
998 		args.flags |= NFSMNT_READAHEAD;
999 	}
1000 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1001 		if (opt == NULL) {
1002 			vfs_mount_error(mp, "illegal wsize");
1003 			error = EINVAL;
1004 			goto out;
1005 		}
1006 		ret = sscanf(opt, "%d", &args.wsize);
1007 		if (ret != 1 || args.wsize <= 0) {
1008 			vfs_mount_error(mp, "illegal wsize: %s",
1009 			    opt);
1010 			error = EINVAL;
1011 			goto out;
1012 		}
1013 		args.flags |= NFSMNT_WSIZE;
1014 	}
1015 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1016 		if (opt == NULL) {
1017 			vfs_mount_error(mp, "illegal rsize");
1018 			error = EINVAL;
1019 			goto out;
1020 		}
1021 		ret = sscanf(opt, "%d", &args.rsize);
1022 		if (ret != 1 || args.rsize <= 0) {
1023 			vfs_mount_error(mp, "illegal wsize: %s",
1024 			    opt);
1025 			error = EINVAL;
1026 			goto out;
1027 		}
1028 		args.flags |= NFSMNT_RSIZE;
1029 	}
1030 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1031 		if (opt == NULL) {
1032 			vfs_mount_error(mp, "illegal retrans");
1033 			error = EINVAL;
1034 			goto out;
1035 		}
1036 		ret = sscanf(opt, "%d", &args.retrans);
1037 		if (ret != 1 || args.retrans <= 0) {
1038 			vfs_mount_error(mp, "illegal retrans: %s",
1039 			    opt);
1040 			error = EINVAL;
1041 			goto out;
1042 		}
1043 		args.flags |= NFSMNT_RETRANS;
1044 	}
1045 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1046 		ret = sscanf(opt, "%d", &args.acregmin);
1047 		if (ret != 1 || args.acregmin < 0) {
1048 			vfs_mount_error(mp, "illegal actimeo: %s",
1049 			    opt);
1050 			error = EINVAL;
1051 			goto out;
1052 		}
1053 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1054 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1055 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1056 	}
1057 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1058 		ret = sscanf(opt, "%d", &args.acregmin);
1059 		if (ret != 1 || args.acregmin < 0) {
1060 			vfs_mount_error(mp, "illegal acregmin: %s",
1061 			    opt);
1062 			error = EINVAL;
1063 			goto out;
1064 		}
1065 		args.flags |= NFSMNT_ACREGMIN;
1066 	}
1067 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1068 		ret = sscanf(opt, "%d", &args.acregmax);
1069 		if (ret != 1 || args.acregmax < 0) {
1070 			vfs_mount_error(mp, "illegal acregmax: %s",
1071 			    opt);
1072 			error = EINVAL;
1073 			goto out;
1074 		}
1075 		args.flags |= NFSMNT_ACREGMAX;
1076 	}
1077 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1078 		ret = sscanf(opt, "%d", &args.acdirmin);
1079 		if (ret != 1 || args.acdirmin < 0) {
1080 			vfs_mount_error(mp, "illegal acdirmin: %s",
1081 			    opt);
1082 			error = EINVAL;
1083 			goto out;
1084 		}
1085 		args.flags |= NFSMNT_ACDIRMIN;
1086 	}
1087 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1088 		ret = sscanf(opt, "%d", &args.acdirmax);
1089 		if (ret != 1 || args.acdirmax < 0) {
1090 			vfs_mount_error(mp, "illegal acdirmax: %s",
1091 			    opt);
1092 			error = EINVAL;
1093 			goto out;
1094 		}
1095 		args.flags |= NFSMNT_ACDIRMAX;
1096 	}
1097 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1098 		ret = sscanf(opt, "%d", &args.wcommitsize);
1099 		if (ret != 1 || args.wcommitsize < 0) {
1100 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1101 			error = EINVAL;
1102 			goto out;
1103 		}
1104 		args.flags |= NFSMNT_WCOMMITSIZE;
1105 	}
1106 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1107 		ret = sscanf(opt, "%d", &args.timeo);
1108 		if (ret != 1 || args.timeo <= 0) {
1109 			vfs_mount_error(mp, "illegal timeo: %s",
1110 			    opt);
1111 			error = EINVAL;
1112 			goto out;
1113 		}
1114 		args.flags |= NFSMNT_TIMEO;
1115 	}
1116 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1117 		ret = sscanf(opt, "%d", &args.timeo);
1118 		if (ret != 1 || args.timeo <= 0) {
1119 			vfs_mount_error(mp, "illegal timeout: %s",
1120 			    opt);
1121 			error = EINVAL;
1122 			goto out;
1123 		}
1124 		args.flags |= NFSMNT_TIMEO;
1125 	}
1126 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1127 		ret = sscanf(opt, "%d", &nametimeo);
1128 		if (ret != 1 || nametimeo < 0) {
1129 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1130 			error = EINVAL;
1131 			goto out;
1132 		}
1133 	}
1134 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1135 	    == 0) {
1136 		ret = sscanf(opt, "%d", &negnametimeo);
1137 		if (ret != 1 || negnametimeo < 0) {
1138 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1139 			    opt);
1140 			error = EINVAL;
1141 			goto out;
1142 		}
1143 	}
1144 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1145 	    0) {
1146 		ret = sscanf(opt, "%d", &minvers);
1147 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1148 		    (args.flags & NFSMNT_NFSV4) == 0) {
1149 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1150 			error = EINVAL;
1151 			goto out;
1152 		}
1153 	}
1154 	if (vfs_getopt(mp->mnt_optnew, "sec",
1155 		(void **) &secname, NULL) == 0)
1156 		nfs_sec_name(secname, &args.flags);
1157 
1158 	if (mp->mnt_flag & MNT_UPDATE) {
1159 		struct nfsmount *nmp = VFSTONFS(mp);
1160 
1161 		if (nmp == NULL) {
1162 			error = EIO;
1163 			goto out;
1164 		}
1165 
1166 		/*
1167 		 * If a change from TCP->UDP is done and there are thread(s)
1168 		 * that have I/O RPC(s) in progress with a transfer size
1169 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1170 		 * hung, retrying the RPC(s) forever. Usually these threads
1171 		 * will be seen doing an uninterruptible sleep on wait channel
1172 		 * "nfsreq".
1173 		 */
1174 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1175 			tprintf(td->td_proc, LOG_WARNING,
1176 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1177 
1178 		/*
1179 		 * When doing an update, we can't change version,
1180 		 * security, switch lockd strategies or change cookie
1181 		 * translation
1182 		 */
1183 		args.flags = (args.flags &
1184 		    ~(NFSMNT_NFSV3 |
1185 		      NFSMNT_NFSV4 |
1186 		      NFSMNT_KERB |
1187 		      NFSMNT_INTEGRITY |
1188 		      NFSMNT_PRIVACY |
1189 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1190 		    (nmp->nm_flag &
1191 			(NFSMNT_NFSV3 |
1192 			 NFSMNT_NFSV4 |
1193 			 NFSMNT_KERB |
1194 			 NFSMNT_INTEGRITY |
1195 			 NFSMNT_PRIVACY |
1196 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1197 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1198 		goto out;
1199 	}
1200 
1201 	/*
1202 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1203 	 * or no-connection mode for those protocols that support
1204 	 * no-connection mode (the flag will be cleared later for protocols
1205 	 * that do not support no-connection mode).  This will allow a client
1206 	 * to receive replies from a different IP then the request was
1207 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1208 	 * not 0.
1209 	 */
1210 	if (nfs_ip_paranoia == 0)
1211 		args.flags |= NFSMNT_NOCONN;
1212 
1213 	if (has_nfs_args_opt != 0) {
1214 		/*
1215 		 * In the 'nfs_args' case, the pointers in the args
1216 		 * structure are in userland - we copy them in here.
1217 		 */
1218 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1219 			vfs_mount_error(mp, "Bad file handle");
1220 			error = EINVAL;
1221 			goto out;
1222 		}
1223 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1224 		    args.fhsize);
1225 		if (error != 0)
1226 			goto out;
1227 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1228 		if (error != 0)
1229 			goto out;
1230 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1231 		args.hostname = hst;
1232 		/* getsockaddr() call must be after above copyin() calls */
1233 		error = getsockaddr(&nam, (caddr_t)args.addr,
1234 		    args.addrlen);
1235 		if (error != 0)
1236 			goto out;
1237 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1238 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1239 	    sizeof(dirpath), &dirlen) == 0) {
1240 		has_nfs_from_opt = 1;
1241 		bcopy(args.hostname, hst, MNAMELEN);
1242 		hst[MNAMELEN - 1] = '\0';
1243 
1244 		/*
1245 		 * This only works with NFSv4 for now.
1246 		 */
1247 		args.fhsize = 0;
1248 		args.flags |= NFSMNT_NFSV4;
1249 		args.sotype = SOCK_STREAM;
1250 	} else {
1251 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1252 		    &args.fhsize) == 0) {
1253 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1254 				vfs_mount_error(mp, "Bad file handle");
1255 				error = EINVAL;
1256 				goto out;
1257 			}
1258 			bcopy(args.fh, nfh, args.fhsize);
1259 		} else {
1260 			args.fhsize = 0;
1261 		}
1262 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1263 		    (void **)&args.hostname, &len);
1264 		if (args.hostname == NULL) {
1265 			vfs_mount_error(mp, "Invalid hostname");
1266 			error = EINVAL;
1267 			goto out;
1268 		}
1269 		bcopy(args.hostname, hst, MNAMELEN);
1270 		hst[MNAMELEN - 1] = '\0';
1271 	}
1272 
1273 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1274 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1275 	else {
1276 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1277 		cp = strchr(srvkrbname, ':');
1278 		if (cp != NULL)
1279 			*cp = '\0';
1280 	}
1281 	srvkrbnamelen = strlen(srvkrbname);
1282 
1283 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1284 		strlcpy(krbname, name, sizeof (krbname));
1285 	else
1286 		krbname[0] = '\0';
1287 	krbnamelen = strlen(krbname);
1288 
1289 	if (has_nfs_from_opt == 0) {
1290 		if (vfs_getopt(mp->mnt_optnew,
1291 		    "dirpath", (void **)&name, NULL) == 0)
1292 			strlcpy(dirpath, name, sizeof (dirpath));
1293 		else
1294 			dirpath[0] = '\0';
1295 		dirlen = strlen(dirpath);
1296 	}
1297 
1298 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1299 		if (vfs_getopt(mp->mnt_optnew, "addr",
1300 		    (void **)&args.addr, &args.addrlen) == 0) {
1301 			if (args.addrlen > SOCK_MAXADDRLEN) {
1302 				error = ENAMETOOLONG;
1303 				goto out;
1304 			}
1305 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1306 			bcopy(args.addr, nam, args.addrlen);
1307 			nam->sa_len = args.addrlen;
1308 		} else {
1309 			vfs_mount_error(mp, "No server address");
1310 			error = EINVAL;
1311 			goto out;
1312 		}
1313 	}
1314 
1315 	args.fh = nfh;
1316 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1317 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1318 	    nametimeo, negnametimeo, minvers);
1319 out:
1320 	if (!error) {
1321 		MNT_ILOCK(mp);
1322 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1323 		    MNTK_USES_BCACHE;
1324 		MNT_IUNLOCK(mp);
1325 	}
1326 	return (error);
1327 }
1328 
1329 
1330 /*
1331  * VFS Operations.
1332  *
1333  * mount system call
1334  * It seems a bit dumb to copyinstr() the host and path here and then
1335  * bcopy() them in mountnfs(), but I wanted to detect errors before
1336  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1337  * an error after that means that I have to release the mbuf.
1338  */
1339 /* ARGSUSED */
1340 static int
1341 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1342 {
1343 	int error;
1344 	struct nfs_args args;
1345 
1346 	error = copyin(data, &args, sizeof (struct nfs_args));
1347 	if (error)
1348 		return error;
1349 
1350 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1351 
1352 	error = kernel_mount(ma, flags);
1353 	return (error);
1354 }
1355 
1356 /*
1357  * Common code for mount and mountroot
1358  */
1359 static int
1360 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1361     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1362     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1363     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1364     int minvers)
1365 {
1366 	struct nfsmount *nmp;
1367 	struct nfsnode *np;
1368 	int error, trycnt, ret;
1369 	struct nfsvattr nfsva;
1370 	struct nfsclclient *clp;
1371 	struct nfsclds *dsp, *tdsp;
1372 	uint32_t lease;
1373 	static u_int64_t clval = 0;
1374 
1375 	NFSCL_DEBUG(3, "in mnt\n");
1376 	clp = NULL;
1377 	if (mp->mnt_flag & MNT_UPDATE) {
1378 		nmp = VFSTONFS(mp);
1379 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1380 		FREE(nam, M_SONAME);
1381 		return (0);
1382 	} else {
1383 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1384 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1385 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1386 		TAILQ_INIT(&nmp->nm_bufq);
1387 		if (clval == 0)
1388 			clval = (u_int64_t)nfsboottime.tv_sec;
1389 		nmp->nm_clval = clval++;
1390 		nmp->nm_krbnamelen = krbnamelen;
1391 		nmp->nm_dirpathlen = dirlen;
1392 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1393 		if (td->td_ucred->cr_uid != (uid_t)0) {
1394 			/*
1395 			 * nm_uid is used to get KerberosV credentials for
1396 			 * the nfsv4 state handling operations if there is
1397 			 * no host based principal set. Use the uid of
1398 			 * this user if not root, since they are doing the
1399 			 * mount. I don't think setting this for root will
1400 			 * work, since root normally does not have user
1401 			 * credentials in a credentials cache.
1402 			 */
1403 			nmp->nm_uid = td->td_ucred->cr_uid;
1404 		} else {
1405 			/*
1406 			 * Just set to -1, so it won't be used.
1407 			 */
1408 			nmp->nm_uid = (uid_t)-1;
1409 		}
1410 
1411 		/* Copy and null terminate all the names */
1412 		if (nmp->nm_krbnamelen > 0) {
1413 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1414 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1415 		}
1416 		if (nmp->nm_dirpathlen > 0) {
1417 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1418 			    nmp->nm_dirpathlen);
1419 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1420 			    + 1] = '\0';
1421 		}
1422 		if (nmp->nm_srvkrbnamelen > 0) {
1423 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1424 			    nmp->nm_srvkrbnamelen);
1425 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1426 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1427 		}
1428 		nmp->nm_sockreq.nr_cred = crhold(cred);
1429 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1430 		mp->mnt_data = nmp;
1431 		nmp->nm_getinfo = nfs_getnlminfo;
1432 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1433 	}
1434 	vfs_getnewfsid(mp);
1435 	nmp->nm_mountp = mp;
1436 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1437 
1438 	/*
1439 	 * Since nfs_decode_args() might optionally set them, these
1440 	 * need to be set to defaults before the call, so that the
1441 	 * optional settings aren't overwritten.
1442 	 */
1443 	nmp->nm_nametimeo = nametimeo;
1444 	nmp->nm_negnametimeo = negnametimeo;
1445 	nmp->nm_timeo = NFS_TIMEO;
1446 	nmp->nm_retry = NFS_RETRANS;
1447 	nmp->nm_readahead = NFS_DEFRAHEAD;
1448 
1449 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1450 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1451 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1452 		nmp->nm_wcommitsize *= 2;
1453 	nmp->nm_wcommitsize *= 256;
1454 
1455 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1456 		nmp->nm_minorvers = minvers;
1457 	else
1458 		nmp->nm_minorvers = 0;
1459 
1460 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1461 
1462 	/*
1463 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1464 	 * high, depending on whether we end up with negative offsets in
1465 	 * the client or server somewhere.  2GB-1 may be safer.
1466 	 *
1467 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1468 	 * that we can handle until we find out otherwise.
1469 	 */
1470 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1471 		nmp->nm_maxfilesize = 0xffffffffLL;
1472 	else
1473 		nmp->nm_maxfilesize = OFF_MAX;
1474 
1475 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1476 		nmp->nm_wsize = NFS_WSIZE;
1477 		nmp->nm_rsize = NFS_RSIZE;
1478 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1479 	}
1480 	nmp->nm_numgrps = NFS_MAXGRPS;
1481 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1482 	if (nmp->nm_tprintf_delay < 0)
1483 		nmp->nm_tprintf_delay = 0;
1484 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1485 	if (nmp->nm_tprintf_initial_delay < 0)
1486 		nmp->nm_tprintf_initial_delay = 0;
1487 	nmp->nm_fhsize = argp->fhsize;
1488 	if (nmp->nm_fhsize > 0)
1489 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1490 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1491 	nmp->nm_nam = nam;
1492 	/* Set up the sockets and per-host congestion */
1493 	nmp->nm_sotype = argp->sotype;
1494 	nmp->nm_soproto = argp->proto;
1495 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1496 	if ((argp->flags & NFSMNT_NFSV4))
1497 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1498 	else if ((argp->flags & NFSMNT_NFSV3))
1499 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1500 	else
1501 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1502 
1503 
1504 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1505 		goto bad;
1506 	/* For NFSv4.1, get the clientid now. */
1507 	if (nmp->nm_minorvers > 0) {
1508 		NFSCL_DEBUG(3, "at getcl\n");
1509 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1510 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1511 		if (error != 0)
1512 			goto bad;
1513 	}
1514 
1515 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1516 	    nmp->nm_dirpathlen > 0) {
1517 		NFSCL_DEBUG(3, "in dirp\n");
1518 		/*
1519 		 * If the fhsize on the mount point == 0 for V4, the mount
1520 		 * path needs to be looked up.
1521 		 */
1522 		trycnt = 3;
1523 		do {
1524 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1525 			    cred, td);
1526 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1527 			if (error)
1528 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1529 		} while (error && --trycnt > 0);
1530 		if (error) {
1531 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1532 			goto bad;
1533 		}
1534 	}
1535 
1536 	/*
1537 	 * A reference count is needed on the nfsnode representing the
1538 	 * remote root.  If this object is not persistent, then backward
1539 	 * traversals of the mount point (i.e. "..") will not work if
1540 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1541 	 * this problem, because one can identify root inodes by their
1542 	 * number == ROOTINO (2).
1543 	 */
1544 	if (nmp->nm_fhsize > 0) {
1545 		/*
1546 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1547 		 * non-zero for the root vnode. f_iosize will be set correctly
1548 		 * by nfs_statfs() before any I/O occurs.
1549 		 */
1550 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1551 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1552 		    LK_EXCLUSIVE);
1553 		if (error)
1554 			goto bad;
1555 		*vpp = NFSTOV(np);
1556 
1557 		/*
1558 		 * Get file attributes and transfer parameters for the
1559 		 * mountpoint.  This has the side effect of filling in
1560 		 * (*vpp)->v_type with the correct value.
1561 		 */
1562 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1563 		    cred, td, &nfsva, NULL, &lease);
1564 		if (ret) {
1565 			/*
1566 			 * Just set default values to get things going.
1567 			 */
1568 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1569 			nfsva.na_vattr.va_type = VDIR;
1570 			nfsva.na_vattr.va_mode = 0777;
1571 			nfsva.na_vattr.va_nlink = 100;
1572 			nfsva.na_vattr.va_uid = (uid_t)0;
1573 			nfsva.na_vattr.va_gid = (gid_t)0;
1574 			nfsva.na_vattr.va_fileid = 2;
1575 			nfsva.na_vattr.va_gen = 1;
1576 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1577 			nfsva.na_vattr.va_size = 512 * 1024;
1578 			lease = 60;
1579 		}
1580 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1581 		if (nmp->nm_minorvers > 0) {
1582 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1583 			NFSLOCKCLSTATE();
1584 			clp->nfsc_renew = NFSCL_RENEW(lease);
1585 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1586 			clp->nfsc_clientidrev++;
1587 			if (clp->nfsc_clientidrev == 0)
1588 				clp->nfsc_clientidrev++;
1589 			NFSUNLOCKCLSTATE();
1590 			/*
1591 			 * Mount will succeed, so the renew thread can be
1592 			 * started now.
1593 			 */
1594 			nfscl_start_renewthread(clp);
1595 			nfscl_clientrelease(clp);
1596 		}
1597 		if (argp->flags & NFSMNT_NFSV3)
1598 			ncl_fsinfo(nmp, *vpp, cred, td);
1599 
1600 		/* Mark if the mount point supports NFSv4 ACLs. */
1601 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1602 		    ret == 0 &&
1603 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1604 			MNT_ILOCK(mp);
1605 			mp->mnt_flag |= MNT_NFS4ACLS;
1606 			MNT_IUNLOCK(mp);
1607 		}
1608 
1609 		/*
1610 		 * Lose the lock but keep the ref.
1611 		 */
1612 		NFSVOPUNLOCK(*vpp, 0);
1613 		return (0);
1614 	}
1615 	error = EIO;
1616 
1617 bad:
1618 	if (clp != NULL)
1619 		nfscl_clientrelease(clp);
1620 	newnfs_disconnect(&nmp->nm_sockreq);
1621 	crfree(nmp->nm_sockreq.nr_cred);
1622 	if (nmp->nm_sockreq.nr_auth != NULL)
1623 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1624 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1625 	mtx_destroy(&nmp->nm_mtx);
1626 	if (nmp->nm_clp != NULL) {
1627 		NFSLOCKCLSTATE();
1628 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1629 		NFSUNLOCKCLSTATE();
1630 		free(nmp->nm_clp, M_NFSCLCLIENT);
1631 	}
1632 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1633 		nfscl_freenfsclds(dsp);
1634 	FREE(nmp, M_NEWNFSMNT);
1635 	FREE(nam, M_SONAME);
1636 	return (error);
1637 }
1638 
1639 /*
1640  * unmount system call
1641  */
1642 static int
1643 nfs_unmount(struct mount *mp, int mntflags)
1644 {
1645 	struct thread *td;
1646 	struct nfsmount *nmp;
1647 	int error, flags = 0, i, trycnt = 0;
1648 	struct nfsclds *dsp, *tdsp;
1649 
1650 	td = curthread;
1651 
1652 	if (mntflags & MNT_FORCE)
1653 		flags |= FORCECLOSE;
1654 	nmp = VFSTONFS(mp);
1655 	/*
1656 	 * Goes something like this..
1657 	 * - Call vflush() to clear out vnodes for this filesystem
1658 	 * - Close the socket
1659 	 * - Free up the data structures
1660 	 */
1661 	/* In the forced case, cancel any outstanding requests. */
1662 	if (mntflags & MNT_FORCE) {
1663 		error = newnfs_nmcancelreqs(nmp);
1664 		if (error)
1665 			goto out;
1666 		/* For a forced close, get rid of the renew thread now */
1667 		nfscl_umount(nmp, td);
1668 	}
1669 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1670 	do {
1671 		error = vflush(mp, 1, flags, td);
1672 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1673 			(void) nfs_catnap(PSOCK, error, "newndm");
1674 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1675 	if (error)
1676 		goto out;
1677 
1678 	/*
1679 	 * We are now committed to the unmount.
1680 	 */
1681 	if ((mntflags & MNT_FORCE) == 0)
1682 		nfscl_umount(nmp, td);
1683 	/* Make sure no nfsiods are assigned to this mount. */
1684 	mtx_lock(&ncl_iod_mutex);
1685 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1686 		if (ncl_iodmount[i] == nmp) {
1687 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1688 			ncl_iodmount[i] = NULL;
1689 		}
1690 	mtx_unlock(&ncl_iod_mutex);
1691 	newnfs_disconnect(&nmp->nm_sockreq);
1692 	crfree(nmp->nm_sockreq.nr_cred);
1693 	FREE(nmp->nm_nam, M_SONAME);
1694 	if (nmp->nm_sockreq.nr_auth != NULL)
1695 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1696 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1697 	mtx_destroy(&nmp->nm_mtx);
1698 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1699 		nfscl_freenfsclds(dsp);
1700 	FREE(nmp, M_NEWNFSMNT);
1701 out:
1702 	return (error);
1703 }
1704 
1705 /*
1706  * Return root of a filesystem
1707  */
1708 static int
1709 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1710 {
1711 	struct vnode *vp;
1712 	struct nfsmount *nmp;
1713 	struct nfsnode *np;
1714 	int error;
1715 
1716 	nmp = VFSTONFS(mp);
1717 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1718 	if (error)
1719 		return error;
1720 	vp = NFSTOV(np);
1721 	/*
1722 	 * Get transfer parameters and attributes for root vnode once.
1723 	 */
1724 	mtx_lock(&nmp->nm_mtx);
1725 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1726 		mtx_unlock(&nmp->nm_mtx);
1727 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1728 	} else
1729 		mtx_unlock(&nmp->nm_mtx);
1730 	if (vp->v_type == VNON)
1731 	    vp->v_type = VDIR;
1732 	vp->v_vflag |= VV_ROOT;
1733 	*vpp = vp;
1734 	return (0);
1735 }
1736 
1737 /*
1738  * Flush out the buffer cache
1739  */
1740 /* ARGSUSED */
1741 static int
1742 nfs_sync(struct mount *mp, int waitfor)
1743 {
1744 	struct vnode *vp, *mvp;
1745 	struct thread *td;
1746 	int error, allerror = 0;
1747 
1748 	td = curthread;
1749 
1750 	MNT_ILOCK(mp);
1751 	/*
1752 	 * If a forced dismount is in progress, return from here so that
1753 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1754 	 * calling VFS_UNMOUNT().
1755 	 */
1756 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1757 		MNT_IUNLOCK(mp);
1758 		return (EBADF);
1759 	}
1760 	MNT_IUNLOCK(mp);
1761 
1762 	/*
1763 	 * Force stale buffer cache information to be flushed.
1764 	 */
1765 loop:
1766 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1767 		/* XXX Racy bv_cnt check. */
1768 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1769 		    waitfor == MNT_LAZY) {
1770 			VI_UNLOCK(vp);
1771 			continue;
1772 		}
1773 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1774 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1775 			goto loop;
1776 		}
1777 		error = VOP_FSYNC(vp, waitfor, td);
1778 		if (error)
1779 			allerror = error;
1780 		NFSVOPUNLOCK(vp, 0);
1781 		vrele(vp);
1782 	}
1783 	return (allerror);
1784 }
1785 
1786 static int
1787 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1788 {
1789 	struct nfsmount *nmp = VFSTONFS(mp);
1790 	struct vfsquery vq;
1791 	int error;
1792 
1793 	bzero(&vq, sizeof(vq));
1794 	switch (op) {
1795 #if 0
1796 	case VFS_CTL_NOLOCKS:
1797 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1798  		if (req->oldptr != NULL) {
1799  			error = SYSCTL_OUT(req, &val, sizeof(val));
1800  			if (error)
1801  				return (error);
1802  		}
1803  		if (req->newptr != NULL) {
1804  			error = SYSCTL_IN(req, &val, sizeof(val));
1805  			if (error)
1806  				return (error);
1807 			if (val)
1808 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1809 			else
1810 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1811  		}
1812 		break;
1813 #endif
1814 	case VFS_CTL_QUERY:
1815 		mtx_lock(&nmp->nm_mtx);
1816 		if (nmp->nm_state & NFSSTA_TIMEO)
1817 			vq.vq_flags |= VQ_NOTRESP;
1818 		mtx_unlock(&nmp->nm_mtx);
1819 #if 0
1820 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1821 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1822 			vq.vq_flags |= VQ_NOTRESPLOCK;
1823 #endif
1824 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1825 		break;
1826  	case VFS_CTL_TIMEO:
1827  		if (req->oldptr != NULL) {
1828  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1829  			    sizeof(nmp->nm_tprintf_initial_delay));
1830  			if (error)
1831  				return (error);
1832  		}
1833  		if (req->newptr != NULL) {
1834 			error = vfs_suser(mp, req->td);
1835 			if (error)
1836 				return (error);
1837  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1838  			    sizeof(nmp->nm_tprintf_initial_delay));
1839  			if (error)
1840  				return (error);
1841  			if (nmp->nm_tprintf_initial_delay < 0)
1842  				nmp->nm_tprintf_initial_delay = 0;
1843  		}
1844 		break;
1845 	default:
1846 		return (ENOTSUP);
1847 	}
1848 	return (0);
1849 }
1850 
1851 /*
1852  * Purge any RPCs in progress, so that they will all return errors.
1853  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1854  * forced dismount.
1855  */
1856 static void
1857 nfs_purge(struct mount *mp)
1858 {
1859 	struct nfsmount *nmp = VFSTONFS(mp);
1860 
1861 	newnfs_nmcancelreqs(nmp);
1862 }
1863 
1864 /*
1865  * Extract the information needed by the nlm from the nfs vnode.
1866  */
1867 static void
1868 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1869     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1870     struct timeval *timeop)
1871 {
1872 	struct nfsmount *nmp;
1873 	struct nfsnode *np = VTONFS(vp);
1874 
1875 	nmp = VFSTONFS(vp->v_mount);
1876 	if (fhlenp != NULL)
1877 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1878 	if (fhp != NULL)
1879 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1880 	if (sp != NULL)
1881 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1882 	if (is_v3p != NULL)
1883 		*is_v3p = NFS_ISV3(vp);
1884 	if (sizep != NULL)
1885 		*sizep = np->n_size;
1886 	if (timeop != NULL) {
1887 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1888 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1889 	}
1890 }
1891 
1892 /*
1893  * This function prints out an option name, based on the conditional
1894  * argument.
1895  */
1896 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1897     char *opt, char **buf, size_t *blen)
1898 {
1899 	int len;
1900 
1901 	if (testval != 0 && *blen > strlen(opt)) {
1902 		len = snprintf(*buf, *blen, "%s", opt);
1903 		if (len != strlen(opt))
1904 			printf("EEK!!\n");
1905 		*buf += len;
1906 		*blen -= len;
1907 	}
1908 }
1909 
1910 /*
1911  * This function printf out an options integer value.
1912  */
1913 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1914     char *opt, char **buf, size_t *blen)
1915 {
1916 	int len;
1917 
1918 	if (*blen > strlen(opt) + 1) {
1919 		/* Could result in truncated output string. */
1920 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1921 		if (len < *blen) {
1922 			*buf += len;
1923 			*blen -= len;
1924 		}
1925 	}
1926 }
1927 
1928 /*
1929  * Load the option flags and values into the buffer.
1930  */
1931 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1932 {
1933 	char *buf;
1934 	size_t blen;
1935 
1936 	buf = buffer;
1937 	blen = buflen;
1938 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1939 	    &blen);
1940 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1941 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1942 		    &blen);
1943 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1944 		    &buf, &blen);
1945 	}
1946 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1947 	    &blen);
1948 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1949 	    "nfsv2", &buf, &blen);
1950 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1951 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1952 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1953 	    &buf, &blen);
1954 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1955 	    &buf, &blen);
1956 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1957 	    &blen);
1958 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1959 	    &blen);
1960 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1961 	    &blen);
1962 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1963 	    &blen);
1964 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1965 	    &blen);
1966 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1967 	    ",noncontigwr", &buf, &blen);
1968 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1969 	    0, ",lockd", &buf, &blen);
1970 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1971 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1972 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1973 	    &buf, &blen);
1974 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1975 	    &buf, &blen);
1976 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1977 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1978 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1979 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1980 	    &buf, &blen);
1981 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1982 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1983 	    &buf, &blen);
1984 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1985 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1986 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1987 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1988 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1989 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1990 	    &blen);
1991 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1992 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1993 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1994 	    &blen);
1995 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1996 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1997 	    &blen);
1998 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1999 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2000 }
2001 
2002