xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 5944f899a2519c6321bac3c17cc076418643a088)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern int nfsrv_useacl;
82 extern int nfscl_debuglevel;
83 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
84 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
85 extern struct mtx ncl_iod_mutex;
86 NFSCLSTATEMUTEX;
87 
88 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
89 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
90 
91 SYSCTL_DECL(_vfs_nfs);
92 static int nfs_ip_paranoia = 1;
93 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
94     &nfs_ip_paranoia, 0, "");
95 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
96 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
97         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
98 /* how long between console messages "nfs server foo not responding" */
99 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
100 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
101         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
102 #ifdef NFS_DEBUG
103 int nfs_debug;
104 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
105     "Toggle debug flag");
106 #endif
107 
108 static int	nfs_mountroot(struct mount *);
109 static void	nfs_sec_name(char *, int *);
110 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
111 		    struct nfs_args *argp, const char *, struct ucred *,
112 		    struct thread *);
113 static int	mountnfs(struct nfs_args *, struct mount *,
114 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
115 		    u_char *, int, struct vnode **, struct ucred *,
116 		    struct thread *, int, int, int);
117 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
118 		    struct sockaddr_storage *, int *, off_t *,
119 		    struct timeval *);
120 static vfs_mount_t nfs_mount;
121 static vfs_cmount_t nfs_cmount;
122 static vfs_unmount_t nfs_unmount;
123 static vfs_root_t nfs_root;
124 static vfs_statfs_t nfs_statfs;
125 static vfs_sync_t nfs_sync;
126 static vfs_sysctl_t nfs_sysctl;
127 static vfs_purge_t nfs_purge;
128 
129 /*
130  * nfs vfs operations.
131  */
132 static struct vfsops nfs_vfsops = {
133 	.vfs_init =		ncl_init,
134 	.vfs_mount =		nfs_mount,
135 	.vfs_cmount =		nfs_cmount,
136 	.vfs_root =		nfs_root,
137 	.vfs_statfs =		nfs_statfs,
138 	.vfs_sync =		nfs_sync,
139 	.vfs_uninit =		ncl_uninit,
140 	.vfs_unmount =		nfs_unmount,
141 	.vfs_sysctl =		nfs_sysctl,
142 	.vfs_purge =		nfs_purge,
143 };
144 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
145 
146 /* So that loader and kldload(2) can find us, wherever we are.. */
147 MODULE_VERSION(nfs, 1);
148 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
149 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
150 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
152 
153 /*
154  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
155  * can be shared by both NFS clients. It is declared here so that it
156  * will be defined for kernels built without NFS_ROOT, although it
157  * isn't used in that case.
158  */
159 #if !defined(NFS_ROOT)
160 struct nfs_diskless	nfs_diskless = { { { 0 } } };
161 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
162 int			nfs_diskless_valid = 0;
163 #endif
164 
165 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
166     &nfs_diskless_valid, 0,
167     "Has the diskless struct been filled correctly");
168 
169 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
170     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
171 
172 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
173     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
174     "%Ssockaddr_in", "Diskless root nfs address");
175 
176 
177 void		newnfsargs_ntoh(struct nfs_args *);
178 static int	nfs_mountdiskless(char *,
179 		    struct sockaddr_in *, struct nfs_args *,
180 		    struct thread *, struct vnode **, struct mount *);
181 static void	nfs_convert_diskless(void);
182 static void	nfs_convert_oargs(struct nfs_args *args,
183 		    struct onfs_args *oargs);
184 
185 int
186 newnfs_iosize(struct nfsmount *nmp)
187 {
188 	int iosize, maxio;
189 
190 	/* First, set the upper limit for iosize */
191 	if (nmp->nm_flag & NFSMNT_NFSV4) {
192 		maxio = NFS_MAXBSIZE;
193 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
194 		if (nmp->nm_sotype == SOCK_DGRAM)
195 			maxio = NFS_MAXDGRAMDATA;
196 		else
197 			maxio = NFS_MAXBSIZE;
198 	} else {
199 		maxio = NFS_V2MAXDATA;
200 	}
201 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
202 		nmp->nm_rsize = maxio;
203 	if (nmp->nm_rsize > NFS_MAXBSIZE)
204 		nmp->nm_rsize = NFS_MAXBSIZE;
205 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
206 		nmp->nm_readdirsize = maxio;
207 	if (nmp->nm_readdirsize > nmp->nm_rsize)
208 		nmp->nm_readdirsize = nmp->nm_rsize;
209 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
210 		nmp->nm_wsize = maxio;
211 	if (nmp->nm_wsize > NFS_MAXBSIZE)
212 		nmp->nm_wsize = NFS_MAXBSIZE;
213 
214 	/*
215 	 * Calculate the size used for io buffers.  Use the larger
216 	 * of the two sizes to minimise nfs requests but make sure
217 	 * that it is at least one VM page to avoid wasting buffer
218 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
219 	 * that is the buffer size used for directories.
220 	 */
221 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
222 	iosize = imax(iosize, PAGE_SIZE);
223 	iosize = imax(iosize, NFS_DIRBLKSIZ);
224 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
225 	return (iosize);
226 }
227 
228 static void
229 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
230 {
231 
232 	args->version = NFS_ARGSVERSION;
233 	args->addr = oargs->addr;
234 	args->addrlen = oargs->addrlen;
235 	args->sotype = oargs->sotype;
236 	args->proto = oargs->proto;
237 	args->fh = oargs->fh;
238 	args->fhsize = oargs->fhsize;
239 	args->flags = oargs->flags;
240 	args->wsize = oargs->wsize;
241 	args->rsize = oargs->rsize;
242 	args->readdirsize = oargs->readdirsize;
243 	args->timeo = oargs->timeo;
244 	args->retrans = oargs->retrans;
245 	args->readahead = oargs->readahead;
246 	args->hostname = oargs->hostname;
247 }
248 
249 static void
250 nfs_convert_diskless(void)
251 {
252 
253 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
254 		sizeof(struct ifaliasreq));
255 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
256 		sizeof(struct sockaddr_in));
257 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
258 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
259 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
260 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
261 	} else {
262 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
263 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
264 	}
265 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
266 		sizeof(struct sockaddr_in));
267 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
268 	nfsv3_diskless.root_time = nfs_diskless.root_time;
269 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
270 		MAXHOSTNAMELEN);
271 	nfs_diskless_valid = 3;
272 }
273 
274 /*
275  * nfs statfs call
276  */
277 static int
278 nfs_statfs(struct mount *mp, struct statfs *sbp)
279 {
280 	struct vnode *vp;
281 	struct thread *td;
282 	struct nfsmount *nmp = VFSTONFS(mp);
283 	struct nfsvattr nfsva;
284 	struct nfsfsinfo fs;
285 	struct nfsstatfs sb;
286 	int error = 0, attrflag, gotfsinfo = 0, ret;
287 	struct nfsnode *np;
288 
289 	td = curthread;
290 
291 	error = vfs_busy(mp, MBF_NOWAIT);
292 	if (error)
293 		return (error);
294 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
295 	if (error) {
296 		vfs_unbusy(mp);
297 		return (error);
298 	}
299 	vp = NFSTOV(np);
300 	mtx_lock(&nmp->nm_mtx);
301 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
302 		mtx_unlock(&nmp->nm_mtx);
303 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
304 		    &attrflag, NULL);
305 		if (!error)
306 			gotfsinfo = 1;
307 	} else
308 		mtx_unlock(&nmp->nm_mtx);
309 	if (!error)
310 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
311 		    &attrflag, NULL);
312 	if (error != 0)
313 		NFSCL_DEBUG(2, "statfs=%d\n", error);
314 	if (attrflag == 0) {
315 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
316 		    td->td_ucred, td, &nfsva, NULL, NULL);
317 		if (ret) {
318 			/*
319 			 * Just set default values to get things going.
320 			 */
321 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
322 			nfsva.na_vattr.va_type = VDIR;
323 			nfsva.na_vattr.va_mode = 0777;
324 			nfsva.na_vattr.va_nlink = 100;
325 			nfsva.na_vattr.va_uid = (uid_t)0;
326 			nfsva.na_vattr.va_gid = (gid_t)0;
327 			nfsva.na_vattr.va_fileid = 2;
328 			nfsva.na_vattr.va_gen = 1;
329 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
330 			nfsva.na_vattr.va_size = 512 * 1024;
331 		}
332 	}
333 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
334 	if (!error) {
335 	    mtx_lock(&nmp->nm_mtx);
336 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
337 		nfscl_loadfsinfo(nmp, &fs);
338 	    nfscl_loadsbinfo(nmp, &sb, sbp);
339 	    sbp->f_iosize = newnfs_iosize(nmp);
340 	    mtx_unlock(&nmp->nm_mtx);
341 	    if (sbp != &mp->mnt_stat) {
342 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
343 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
344 	    }
345 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
346 	} else if (NFS_ISV4(vp)) {
347 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
348 	}
349 	vput(vp);
350 	vfs_unbusy(mp);
351 	return (error);
352 }
353 
354 /*
355  * nfs version 3 fsinfo rpc call
356  */
357 int
358 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
359     struct thread *td)
360 {
361 	struct nfsfsinfo fs;
362 	struct nfsvattr nfsva;
363 	int error, attrflag;
364 
365 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
366 	if (!error) {
367 		if (attrflag)
368 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
369 			    1);
370 		mtx_lock(&nmp->nm_mtx);
371 		nfscl_loadfsinfo(nmp, &fs);
372 		mtx_unlock(&nmp->nm_mtx);
373 	}
374 	return (error);
375 }
376 
377 /*
378  * Mount a remote root fs via. nfs. This depends on the info in the
379  * nfs_diskless structure that has been filled in properly by some primary
380  * bootstrap.
381  * It goes something like this:
382  * - do enough of "ifconfig" by calling ifioctl() so that the system
383  *   can talk to the server
384  * - If nfs_diskless.mygateway is filled in, use that address as
385  *   a default gateway.
386  * - build the rootfs mount point and call mountnfs() to do the rest.
387  *
388  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
389  * structure, as well as other global NFS client variables here, as
390  * nfs_mountroot() will be called once in the boot before any other NFS
391  * client activity occurs.
392  */
393 static int
394 nfs_mountroot(struct mount *mp)
395 {
396 	struct thread *td = curthread;
397 	struct nfsv3_diskless *nd = &nfsv3_diskless;
398 	struct socket *so;
399 	struct vnode *vp;
400 	struct ifreq ir;
401 	int error;
402 	u_long l;
403 	char buf[128];
404 	char *cp;
405 
406 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
407 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
408 #elif defined(NFS_ROOT)
409 	nfs_setup_diskless();
410 #endif
411 
412 	if (nfs_diskless_valid == 0)
413 		return (-1);
414 	if (nfs_diskless_valid == 1)
415 		nfs_convert_diskless();
416 
417 	/*
418 	 * Do enough of ifconfig(8) so that the critical net interface can
419 	 * talk to the server.
420 	 */
421 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
422 	    td->td_ucred, td);
423 	if (error)
424 		panic("nfs_mountroot: socreate(%04x): %d",
425 			nd->myif.ifra_addr.sa_family, error);
426 
427 #if 0 /* XXX Bad idea */
428 	/*
429 	 * We might not have been told the right interface, so we pass
430 	 * over the first ten interfaces of the same kind, until we get
431 	 * one of them configured.
432 	 */
433 
434 	for (i = strlen(nd->myif.ifra_name) - 1;
435 		nd->myif.ifra_name[i] >= '0' &&
436 		nd->myif.ifra_name[i] <= '9';
437 		nd->myif.ifra_name[i] ++) {
438 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
439 		if(!error)
440 			break;
441 	}
442 #endif
443 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
444 	if (error)
445 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
446 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
447 		ir.ifr_mtu = strtol(cp, NULL, 10);
448 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
449 		freeenv(cp);
450 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
451 		if (error)
452 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
453 	}
454 	soclose(so);
455 
456 	/*
457 	 * If the gateway field is filled in, set it as the default route.
458 	 * Note that pxeboot will set a default route of 0 if the route
459 	 * is not set by the DHCP server.  Check also for a value of 0
460 	 * to avoid panicking inappropriately in that situation.
461 	 */
462 	if (nd->mygateway.sin_len != 0 &&
463 	    nd->mygateway.sin_addr.s_addr != 0) {
464 		struct sockaddr_in mask, sin;
465 
466 		bzero((caddr_t)&mask, sizeof(mask));
467 		sin = mask;
468 		sin.sin_family = AF_INET;
469 		sin.sin_len = sizeof(sin);
470                 /* XXX MRT use table 0 for this sort of thing */
471 		CURVNET_SET(TD_TO_VNET(td));
472 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
473 		    (struct sockaddr *)&nd->mygateway,
474 		    (struct sockaddr *)&mask,
475 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
476 		CURVNET_RESTORE();
477 		if (error)
478 			panic("nfs_mountroot: RTM_ADD: %d", error);
479 	}
480 
481 	/*
482 	 * Create the rootfs mount point.
483 	 */
484 	nd->root_args.fh = nd->root_fh;
485 	nd->root_args.fhsize = nd->root_fhsize;
486 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
487 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
488 		(l >> 24) & 0xff, (l >> 16) & 0xff,
489 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
490 	printf("NFS ROOT: %s\n", buf);
491 	nd->root_args.hostname = buf;
492 	if ((error = nfs_mountdiskless(buf,
493 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
494 		return (error);
495 	}
496 
497 	/*
498 	 * This is not really an nfs issue, but it is much easier to
499 	 * set hostname here and then let the "/etc/rc.xxx" files
500 	 * mount the right /var based upon its preset value.
501 	 */
502 	mtx_lock(&prison0.pr_mtx);
503 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
504 	    sizeof(prison0.pr_hostname));
505 	mtx_unlock(&prison0.pr_mtx);
506 	inittodr(ntohl(nd->root_time));
507 	return (0);
508 }
509 
510 /*
511  * Internal version of mount system call for diskless setup.
512  */
513 static int
514 nfs_mountdiskless(char *path,
515     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
516     struct vnode **vpp, struct mount *mp)
517 {
518 	struct sockaddr *nam;
519 	int dirlen, error;
520 	char *dirpath;
521 
522 	/*
523 	 * Find the directory path in "path", which also has the server's
524 	 * name/ip address in it.
525 	 */
526 	dirpath = strchr(path, ':');
527 	if (dirpath != NULL)
528 		dirlen = strlen(++dirpath);
529 	else
530 		dirlen = 0;
531 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
532 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
533 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
534 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
535 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
536 		return (error);
537 	}
538 	return (0);
539 }
540 
541 static void
542 nfs_sec_name(char *sec, int *flagsp)
543 {
544 	if (!strcmp(sec, "krb5"))
545 		*flagsp |= NFSMNT_KERB;
546 	else if (!strcmp(sec, "krb5i"))
547 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
548 	else if (!strcmp(sec, "krb5p"))
549 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
550 }
551 
552 static void
553 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
554     const char *hostname, struct ucred *cred, struct thread *td)
555 {
556 	int adjsock;
557 	char *p;
558 
559 	/*
560 	 * Set read-only flag if requested; otherwise, clear it if this is
561 	 * an update.  If this is not an update, then either the read-only
562 	 * flag is already clear, or this is a root mount and it was set
563 	 * intentionally at some previous point.
564 	 */
565 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
566 		MNT_ILOCK(mp);
567 		mp->mnt_flag |= MNT_RDONLY;
568 		MNT_IUNLOCK(mp);
569 	} else if (mp->mnt_flag & MNT_UPDATE) {
570 		MNT_ILOCK(mp);
571 		mp->mnt_flag &= ~MNT_RDONLY;
572 		MNT_IUNLOCK(mp);
573 	}
574 
575 	/*
576 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
577 	 * no sense in that context.  Also, set up appropriate retransmit
578 	 * and soft timeout behavior.
579 	 */
580 	if (argp->sotype == SOCK_STREAM) {
581 		nmp->nm_flag &= ~NFSMNT_NOCONN;
582 		nmp->nm_timeo = NFS_MAXTIMEO;
583 		if ((argp->flags & NFSMNT_NFSV4) != 0)
584 			nmp->nm_retry = INT_MAX;
585 		else
586 			nmp->nm_retry = NFS_RETRANS_TCP;
587 	}
588 
589 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
590 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
591 		argp->flags &= ~NFSMNT_RDIRPLUS;
592 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
593 	}
594 
595 	/* Re-bind if rsrvd port requested and wasn't on one */
596 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
597 		  && (argp->flags & NFSMNT_RESVPORT);
598 	/* Also re-bind if we're switching to/from a connected UDP socket */
599 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
600 		    (argp->flags & NFSMNT_NOCONN));
601 
602 	/* Update flags atomically.  Don't change the lock bits. */
603 	nmp->nm_flag = argp->flags | nmp->nm_flag;
604 
605 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
606 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
607 		if (nmp->nm_timeo < NFS_MINTIMEO)
608 			nmp->nm_timeo = NFS_MINTIMEO;
609 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
610 			nmp->nm_timeo = NFS_MAXTIMEO;
611 	}
612 
613 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
614 		nmp->nm_retry = argp->retrans;
615 		if (nmp->nm_retry > NFS_MAXREXMIT)
616 			nmp->nm_retry = NFS_MAXREXMIT;
617 	}
618 
619 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
620 		nmp->nm_wsize = argp->wsize;
621 		/*
622 		 * Clip at the power of 2 below the size. There is an
623 		 * issue (not isolated) that causes intermittent page
624 		 * faults if this is not done.
625 		 */
626 		if (nmp->nm_wsize > NFS_FABLKSIZE)
627 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
628 		else
629 			nmp->nm_wsize = NFS_FABLKSIZE;
630 	}
631 
632 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
633 		nmp->nm_rsize = argp->rsize;
634 		/*
635 		 * Clip at the power of 2 below the size. There is an
636 		 * issue (not isolated) that causes intermittent page
637 		 * faults if this is not done.
638 		 */
639 		if (nmp->nm_rsize > NFS_FABLKSIZE)
640 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
641 		else
642 			nmp->nm_rsize = NFS_FABLKSIZE;
643 	}
644 
645 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
646 		nmp->nm_readdirsize = argp->readdirsize;
647 	}
648 
649 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
650 		nmp->nm_acregmin = argp->acregmin;
651 	else
652 		nmp->nm_acregmin = NFS_MINATTRTIMO;
653 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
654 		nmp->nm_acregmax = argp->acregmax;
655 	else
656 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
657 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
658 		nmp->nm_acdirmin = argp->acdirmin;
659 	else
660 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
661 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
662 		nmp->nm_acdirmax = argp->acdirmax;
663 	else
664 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
665 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
666 		nmp->nm_acdirmin = nmp->nm_acdirmax;
667 	if (nmp->nm_acregmin > nmp->nm_acregmax)
668 		nmp->nm_acregmin = nmp->nm_acregmax;
669 
670 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
671 		if (argp->readahead <= NFS_MAXRAHEAD)
672 			nmp->nm_readahead = argp->readahead;
673 		else
674 			nmp->nm_readahead = NFS_MAXRAHEAD;
675 	}
676 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
677 		if (argp->wcommitsize < nmp->nm_wsize)
678 			nmp->nm_wcommitsize = nmp->nm_wsize;
679 		else
680 			nmp->nm_wcommitsize = argp->wcommitsize;
681 	}
682 
683 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
684 		    (nmp->nm_soproto != argp->proto));
685 
686 	if (nmp->nm_client != NULL && adjsock) {
687 		int haslock = 0, error = 0;
688 
689 		if (nmp->nm_sotype == SOCK_STREAM) {
690 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
691 			if (!error)
692 				haslock = 1;
693 		}
694 		if (!error) {
695 		    newnfs_disconnect(&nmp->nm_sockreq);
696 		    if (haslock)
697 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
698 		    nmp->nm_sotype = argp->sotype;
699 		    nmp->nm_soproto = argp->proto;
700 		    if (nmp->nm_sotype == SOCK_DGRAM)
701 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
702 			    cred, td, 0)) {
703 				printf("newnfs_args: retrying connect\n");
704 				(void) nfs_catnap(PSOCK, 0, "nfscon");
705 			}
706 		}
707 	} else {
708 		nmp->nm_sotype = argp->sotype;
709 		nmp->nm_soproto = argp->proto;
710 	}
711 
712 	if (hostname != NULL) {
713 		strlcpy(nmp->nm_hostname, hostname,
714 		    sizeof(nmp->nm_hostname));
715 		p = strchr(nmp->nm_hostname, ':');
716 		if (p != NULL)
717 			*p = '\0';
718 	}
719 }
720 
721 static const char *nfs_opts[] = { "from", "nfs_args",
722     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
723     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
724     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
725     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
726     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
727     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
728     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
729     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
730     "pnfs", "wcommitsize",
731     NULL };
732 
733 /*
734  * Parse the "from" mountarg, passed by the generic mount(8) program
735  * or the mountroot code.  This is used when rerooting into NFS.
736  *
737  * Note that the "hostname" is actually a "hostname:/share/path" string.
738  */
739 static int
740 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
741     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
742 {
743 	char *nam, *delimp, *hostp, *spec;
744 	int error, have_bracket = 0, offset, rv, speclen;
745 	struct sockaddr_in *sin;
746 	size_t len;
747 
748 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
749 	if (error != 0)
750 		return (error);
751 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
752 
753 	/*
754 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
755 	 */
756 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
757 	    *(delimp + 1) == ':') {
758 		hostp = spec + 1;
759 		spec = delimp + 2;
760 		have_bracket = 1;
761 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
762 		hostp = spec;
763 		spec = delimp + 1;
764 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
765 		printf("%s: path@server syntax is deprecated, "
766 		    "use server:path\n", __func__);
767 		hostp = delimp + 1;
768 	} else {
769 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
770 		free(nam, M_TEMP);
771 		return (EINVAL);
772 	}
773 	*delimp = '\0';
774 
775 	/*
776 	 * If there has been a trailing slash at mounttime it seems
777 	 * that some mountd implementations fail to remove the mount
778 	 * entries from their mountlist while unmounting.
779 	 */
780 	for (speclen = strlen(spec);
781 	    speclen > 1 && spec[speclen - 1] == '/';
782 	    speclen--)
783 		spec[speclen - 1] = '\0';
784 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
785 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
786 		free(nam, M_TEMP);
787 		return (EINVAL);
788 	}
789 	/* Make both '@' and ':' notations equal */
790 	if (*hostp != '\0') {
791 		len = strlen(hostp);
792 		offset = 0;
793 		if (have_bracket)
794 			nam[offset++] = '[';
795 		memmove(nam + offset, hostp, len);
796 		if (have_bracket)
797 			nam[len + offset++] = ']';
798 		nam[len + offset++] = ':';
799 		memmove(nam + len + offset, spec, speclen);
800 		nam[len + speclen + offset] = '\0';
801 	} else
802 		nam[0] = '\0';
803 
804 	/*
805 	 * XXX: IPv6
806 	 */
807 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
808 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
809 	if (rv != 1) {
810 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
811 		    __func__, hostp, rv);
812 		free(nam, M_TEMP);
813 		free(sin, M_SONAME);
814 		return (EINVAL);
815 	}
816 
817 	sin->sin_len = sizeof(*sin);
818 	sin->sin_family = AF_INET;
819 	/*
820 	 * XXX: hardcoded port number.
821 	 */
822 	sin->sin_port = htons(2049);
823 
824 	*hostnamep = strdup(nam, M_NEWNFSMNT);
825 	*sinp = sin;
826 	strlcpy(dirpath, spec, dirpathsize);
827 	*dirlenp = strlen(dirpath);
828 
829 	free(nam, M_TEMP);
830 	return (0);
831 }
832 
833 /*
834  * VFS Operations.
835  *
836  * mount system call
837  * It seems a bit dumb to copyinstr() the host and path here and then
838  * bcopy() them in mountnfs(), but I wanted to detect errors before
839  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
840  * an error after that means that I have to release the mbuf.
841  */
842 /* ARGSUSED */
843 static int
844 nfs_mount(struct mount *mp)
845 {
846 	struct nfs_args args = {
847 	    .version = NFS_ARGSVERSION,
848 	    .addr = NULL,
849 	    .addrlen = sizeof (struct sockaddr_in),
850 	    .sotype = SOCK_STREAM,
851 	    .proto = 0,
852 	    .fh = NULL,
853 	    .fhsize = 0,
854 	    .flags = NFSMNT_RESVPORT,
855 	    .wsize = NFS_WSIZE,
856 	    .rsize = NFS_RSIZE,
857 	    .readdirsize = NFS_READDIRSIZE,
858 	    .timeo = 10,
859 	    .retrans = NFS_RETRANS,
860 	    .readahead = NFS_DEFRAHEAD,
861 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
862 	    .hostname = NULL,
863 	    .acregmin = NFS_MINATTRTIMO,
864 	    .acregmax = NFS_MAXATTRTIMO,
865 	    .acdirmin = NFS_MINDIRATTRTIMO,
866 	    .acdirmax = NFS_MAXDIRATTRTIMO,
867 	};
868 	int error = 0, ret, len;
869 	struct sockaddr *nam = NULL;
870 	struct vnode *vp;
871 	struct thread *td;
872 	char *hst;
873 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
874 	char *cp, *opt, *name, *secname;
875 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
876 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
877 	int minvers = 0;
878 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
879 	    krbnamelen, srvkrbnamelen;
880 	size_t hstlen;
881 
882 	has_nfs_args_opt = 0;
883 	has_nfs_from_opt = 0;
884 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
885 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
886 		error = EINVAL;
887 		goto out;
888 	}
889 
890 	td = curthread;
891 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
892 	    nfs_diskless_valid != 0) {
893 		error = nfs_mountroot(mp);
894 		goto out;
895 	}
896 
897 	nfscl_init();
898 
899 	/*
900 	 * The old mount_nfs program passed the struct nfs_args
901 	 * from userspace to kernel.  The new mount_nfs program
902 	 * passes string options via nmount() from userspace to kernel
903 	 * and we populate the struct nfs_args in the kernel.
904 	 */
905 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
906 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
907 		    sizeof(args));
908 		if (error != 0)
909 			goto out;
910 
911 		if (args.version != NFS_ARGSVERSION) {
912 			error = EPROGMISMATCH;
913 			goto out;
914 		}
915 		has_nfs_args_opt = 1;
916 	}
917 
918 	/* Handle the new style options. */
919 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
920 		args.acdirmin = args.acdirmax =
921 		    args.acregmin = args.acregmax = 0;
922 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
923 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
924 	}
925 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
926 		args.flags |= NFSMNT_NOCONN;
927 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
928 		args.flags &= ~NFSMNT_NOCONN;
929 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
930 		args.flags |= NFSMNT_NOLOCKD;
931 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
932 		args.flags &= ~NFSMNT_NOLOCKD;
933 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
934 		args.flags |= NFSMNT_INT;
935 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
936 		args.flags |= NFSMNT_RDIRPLUS;
937 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
938 		args.flags |= NFSMNT_RESVPORT;
939 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
940 		args.flags &= ~NFSMNT_RESVPORT;
941 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
942 		args.flags |= NFSMNT_SOFT;
943 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
944 		args.flags &= ~NFSMNT_SOFT;
945 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
946 		args.sotype = SOCK_DGRAM;
947 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
948 		args.sotype = SOCK_DGRAM;
949 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
950 		args.sotype = SOCK_STREAM;
951 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
952 		args.flags |= NFSMNT_NFSV3;
953 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
954 		args.flags |= NFSMNT_NFSV4;
955 		args.sotype = SOCK_STREAM;
956 	}
957 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
958 		args.flags |= NFSMNT_ALLGSSNAME;
959 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
960 		args.flags |= NFSMNT_NOCTO;
961 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
962 		args.flags |= NFSMNT_NONCONTIGWR;
963 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
964 		args.flags |= NFSMNT_PNFS;
965 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
966 		if (opt == NULL) {
967 			vfs_mount_error(mp, "illegal readdirsize");
968 			error = EINVAL;
969 			goto out;
970 		}
971 		ret = sscanf(opt, "%d", &args.readdirsize);
972 		if (ret != 1 || args.readdirsize <= 0) {
973 			vfs_mount_error(mp, "illegal readdirsize: %s",
974 			    opt);
975 			error = EINVAL;
976 			goto out;
977 		}
978 		args.flags |= NFSMNT_READDIRSIZE;
979 	}
980 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
981 		if (opt == NULL) {
982 			vfs_mount_error(mp, "illegal readahead");
983 			error = EINVAL;
984 			goto out;
985 		}
986 		ret = sscanf(opt, "%d", &args.readahead);
987 		if (ret != 1 || args.readahead <= 0) {
988 			vfs_mount_error(mp, "illegal readahead: %s",
989 			    opt);
990 			error = EINVAL;
991 			goto out;
992 		}
993 		args.flags |= NFSMNT_READAHEAD;
994 	}
995 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
996 		if (opt == NULL) {
997 			vfs_mount_error(mp, "illegal wsize");
998 			error = EINVAL;
999 			goto out;
1000 		}
1001 		ret = sscanf(opt, "%d", &args.wsize);
1002 		if (ret != 1 || args.wsize <= 0) {
1003 			vfs_mount_error(mp, "illegal wsize: %s",
1004 			    opt);
1005 			error = EINVAL;
1006 			goto out;
1007 		}
1008 		args.flags |= NFSMNT_WSIZE;
1009 	}
1010 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1011 		if (opt == NULL) {
1012 			vfs_mount_error(mp, "illegal rsize");
1013 			error = EINVAL;
1014 			goto out;
1015 		}
1016 		ret = sscanf(opt, "%d", &args.rsize);
1017 		if (ret != 1 || args.rsize <= 0) {
1018 			vfs_mount_error(mp, "illegal wsize: %s",
1019 			    opt);
1020 			error = EINVAL;
1021 			goto out;
1022 		}
1023 		args.flags |= NFSMNT_RSIZE;
1024 	}
1025 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1026 		if (opt == NULL) {
1027 			vfs_mount_error(mp, "illegal retrans");
1028 			error = EINVAL;
1029 			goto out;
1030 		}
1031 		ret = sscanf(opt, "%d", &args.retrans);
1032 		if (ret != 1 || args.retrans <= 0) {
1033 			vfs_mount_error(mp, "illegal retrans: %s",
1034 			    opt);
1035 			error = EINVAL;
1036 			goto out;
1037 		}
1038 		args.flags |= NFSMNT_RETRANS;
1039 	}
1040 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1041 		ret = sscanf(opt, "%d", &args.acregmin);
1042 		if (ret != 1 || args.acregmin < 0) {
1043 			vfs_mount_error(mp, "illegal actimeo: %s",
1044 			    opt);
1045 			error = EINVAL;
1046 			goto out;
1047 		}
1048 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1049 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1050 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1051 	}
1052 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1053 		ret = sscanf(opt, "%d", &args.acregmin);
1054 		if (ret != 1 || args.acregmin < 0) {
1055 			vfs_mount_error(mp, "illegal acregmin: %s",
1056 			    opt);
1057 			error = EINVAL;
1058 			goto out;
1059 		}
1060 		args.flags |= NFSMNT_ACREGMIN;
1061 	}
1062 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1063 		ret = sscanf(opt, "%d", &args.acregmax);
1064 		if (ret != 1 || args.acregmax < 0) {
1065 			vfs_mount_error(mp, "illegal acregmax: %s",
1066 			    opt);
1067 			error = EINVAL;
1068 			goto out;
1069 		}
1070 		args.flags |= NFSMNT_ACREGMAX;
1071 	}
1072 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1073 		ret = sscanf(opt, "%d", &args.acdirmin);
1074 		if (ret != 1 || args.acdirmin < 0) {
1075 			vfs_mount_error(mp, "illegal acdirmin: %s",
1076 			    opt);
1077 			error = EINVAL;
1078 			goto out;
1079 		}
1080 		args.flags |= NFSMNT_ACDIRMIN;
1081 	}
1082 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1083 		ret = sscanf(opt, "%d", &args.acdirmax);
1084 		if (ret != 1 || args.acdirmax < 0) {
1085 			vfs_mount_error(mp, "illegal acdirmax: %s",
1086 			    opt);
1087 			error = EINVAL;
1088 			goto out;
1089 		}
1090 		args.flags |= NFSMNT_ACDIRMAX;
1091 	}
1092 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1093 		ret = sscanf(opt, "%d", &args.wcommitsize);
1094 		if (ret != 1 || args.wcommitsize < 0) {
1095 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1096 			error = EINVAL;
1097 			goto out;
1098 		}
1099 		args.flags |= NFSMNT_WCOMMITSIZE;
1100 	}
1101 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1102 		ret = sscanf(opt, "%d", &args.timeo);
1103 		if (ret != 1 || args.timeo <= 0) {
1104 			vfs_mount_error(mp, "illegal timeo: %s",
1105 			    opt);
1106 			error = EINVAL;
1107 			goto out;
1108 		}
1109 		args.flags |= NFSMNT_TIMEO;
1110 	}
1111 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1112 		ret = sscanf(opt, "%d", &args.timeo);
1113 		if (ret != 1 || args.timeo <= 0) {
1114 			vfs_mount_error(mp, "illegal timeout: %s",
1115 			    opt);
1116 			error = EINVAL;
1117 			goto out;
1118 		}
1119 		args.flags |= NFSMNT_TIMEO;
1120 	}
1121 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1122 		ret = sscanf(opt, "%d", &nametimeo);
1123 		if (ret != 1 || nametimeo < 0) {
1124 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1125 			error = EINVAL;
1126 			goto out;
1127 		}
1128 	}
1129 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1130 	    == 0) {
1131 		ret = sscanf(opt, "%d", &negnametimeo);
1132 		if (ret != 1 || negnametimeo < 0) {
1133 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1134 			    opt);
1135 			error = EINVAL;
1136 			goto out;
1137 		}
1138 	}
1139 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1140 	    0) {
1141 		ret = sscanf(opt, "%d", &minvers);
1142 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1143 		    (args.flags & NFSMNT_NFSV4) == 0) {
1144 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1145 			error = EINVAL;
1146 			goto out;
1147 		}
1148 	}
1149 	if (vfs_getopt(mp->mnt_optnew, "sec",
1150 		(void **) &secname, NULL) == 0)
1151 		nfs_sec_name(secname, &args.flags);
1152 
1153 	if (mp->mnt_flag & MNT_UPDATE) {
1154 		struct nfsmount *nmp = VFSTONFS(mp);
1155 
1156 		if (nmp == NULL) {
1157 			error = EIO;
1158 			goto out;
1159 		}
1160 
1161 		/*
1162 		 * If a change from TCP->UDP is done and there are thread(s)
1163 		 * that have I/O RPC(s) in progress with a transfer size
1164 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1165 		 * hung, retrying the RPC(s) forever. Usually these threads
1166 		 * will be seen doing an uninterruptible sleep on wait channel
1167 		 * "nfsreq".
1168 		 */
1169 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1170 			tprintf(td->td_proc, LOG_WARNING,
1171 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1172 
1173 		/*
1174 		 * When doing an update, we can't change version,
1175 		 * security, switch lockd strategies or change cookie
1176 		 * translation
1177 		 */
1178 		args.flags = (args.flags &
1179 		    ~(NFSMNT_NFSV3 |
1180 		      NFSMNT_NFSV4 |
1181 		      NFSMNT_KERB |
1182 		      NFSMNT_INTEGRITY |
1183 		      NFSMNT_PRIVACY |
1184 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1185 		    (nmp->nm_flag &
1186 			(NFSMNT_NFSV3 |
1187 			 NFSMNT_NFSV4 |
1188 			 NFSMNT_KERB |
1189 			 NFSMNT_INTEGRITY |
1190 			 NFSMNT_PRIVACY |
1191 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1192 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1193 		goto out;
1194 	}
1195 
1196 	/*
1197 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1198 	 * or no-connection mode for those protocols that support
1199 	 * no-connection mode (the flag will be cleared later for protocols
1200 	 * that do not support no-connection mode).  This will allow a client
1201 	 * to receive replies from a different IP then the request was
1202 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1203 	 * not 0.
1204 	 */
1205 	if (nfs_ip_paranoia == 0)
1206 		args.flags |= NFSMNT_NOCONN;
1207 
1208 	if (has_nfs_args_opt != 0) {
1209 		/*
1210 		 * In the 'nfs_args' case, the pointers in the args
1211 		 * structure are in userland - we copy them in here.
1212 		 */
1213 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1214 			vfs_mount_error(mp, "Bad file handle");
1215 			error = EINVAL;
1216 			goto out;
1217 		}
1218 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1219 		    args.fhsize);
1220 		if (error != 0)
1221 			goto out;
1222 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1223 		if (error != 0)
1224 			goto out;
1225 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1226 		args.hostname = hst;
1227 		/* getsockaddr() call must be after above copyin() calls */
1228 		error = getsockaddr(&nam, (caddr_t)args.addr,
1229 		    args.addrlen);
1230 		if (error != 0)
1231 			goto out;
1232 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1233 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1234 	    sizeof(dirpath), &dirlen) == 0) {
1235 		has_nfs_from_opt = 1;
1236 		bcopy(args.hostname, hst, MNAMELEN);
1237 		hst[MNAMELEN - 1] = '\0';
1238 
1239 		/*
1240 		 * This only works with NFSv4 for now.
1241 		 */
1242 		args.fhsize = 0;
1243 		args.flags |= NFSMNT_NFSV4;
1244 		args.sotype = SOCK_STREAM;
1245 	} else {
1246 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1247 		    &args.fhsize) == 0) {
1248 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1249 				vfs_mount_error(mp, "Bad file handle");
1250 				error = EINVAL;
1251 				goto out;
1252 			}
1253 			bcopy(args.fh, nfh, args.fhsize);
1254 		} else {
1255 			args.fhsize = 0;
1256 		}
1257 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1258 		    (void **)&args.hostname, &len);
1259 		if (args.hostname == NULL) {
1260 			vfs_mount_error(mp, "Invalid hostname");
1261 			error = EINVAL;
1262 			goto out;
1263 		}
1264 		if (len >= MNAMELEN) {
1265 			vfs_mount_error(mp, "Hostname too long");
1266 			error = EINVAL;
1267 			goto out;
1268 		}
1269 		bcopy(args.hostname, hst, len);
1270 		hst[len] = '\0';
1271 	}
1272 
1273 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1274 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1275 	else {
1276 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1277 		cp = strchr(srvkrbname, ':');
1278 		if (cp != NULL)
1279 			*cp = '\0';
1280 	}
1281 	srvkrbnamelen = strlen(srvkrbname);
1282 
1283 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1284 		strlcpy(krbname, name, sizeof (krbname));
1285 	else
1286 		krbname[0] = '\0';
1287 	krbnamelen = strlen(krbname);
1288 
1289 	if (has_nfs_from_opt == 0) {
1290 		if (vfs_getopt(mp->mnt_optnew,
1291 		    "dirpath", (void **)&name, NULL) == 0)
1292 			strlcpy(dirpath, name, sizeof (dirpath));
1293 		else
1294 			dirpath[0] = '\0';
1295 		dirlen = strlen(dirpath);
1296 	}
1297 
1298 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1299 		if (vfs_getopt(mp->mnt_optnew, "addr",
1300 		    (void **)&args.addr, &args.addrlen) == 0) {
1301 			if (args.addrlen > SOCK_MAXADDRLEN) {
1302 				error = ENAMETOOLONG;
1303 				goto out;
1304 			}
1305 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1306 			bcopy(args.addr, nam, args.addrlen);
1307 			nam->sa_len = args.addrlen;
1308 		} else {
1309 			vfs_mount_error(mp, "No server address");
1310 			error = EINVAL;
1311 			goto out;
1312 		}
1313 	}
1314 
1315 	args.fh = nfh;
1316 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1317 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1318 	    nametimeo, negnametimeo, minvers);
1319 out:
1320 	if (!error) {
1321 		MNT_ILOCK(mp);
1322 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1323 		    MNTK_USES_BCACHE;
1324 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1325 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1326 		MNT_IUNLOCK(mp);
1327 	}
1328 	free(hst, M_TEMP);
1329 	return (error);
1330 }
1331 
1332 
1333 /*
1334  * VFS Operations.
1335  *
1336  * mount system call
1337  * It seems a bit dumb to copyinstr() the host and path here and then
1338  * bcopy() them in mountnfs(), but I wanted to detect errors before
1339  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1340  * an error after that means that I have to release the mbuf.
1341  */
1342 /* ARGSUSED */
1343 static int
1344 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1345 {
1346 	int error;
1347 	struct nfs_args args;
1348 
1349 	error = copyin(data, &args, sizeof (struct nfs_args));
1350 	if (error)
1351 		return error;
1352 
1353 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1354 
1355 	error = kernel_mount(ma, flags);
1356 	return (error);
1357 }
1358 
1359 /*
1360  * Common code for mount and mountroot
1361  */
1362 static int
1363 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1364     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1365     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1366     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1367     int minvers)
1368 {
1369 	struct nfsmount *nmp;
1370 	struct nfsnode *np;
1371 	int error, trycnt, ret;
1372 	struct nfsvattr nfsva;
1373 	struct nfsclclient *clp;
1374 	struct nfsclds *dsp, *tdsp;
1375 	uint32_t lease;
1376 	static u_int64_t clval = 0;
1377 
1378 	NFSCL_DEBUG(3, "in mnt\n");
1379 	clp = NULL;
1380 	if (mp->mnt_flag & MNT_UPDATE) {
1381 		nmp = VFSTONFS(mp);
1382 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1383 		FREE(nam, M_SONAME);
1384 		return (0);
1385 	} else {
1386 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1387 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1388 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1389 		TAILQ_INIT(&nmp->nm_bufq);
1390 		TAILQ_INIT(&nmp->nm_sess);
1391 		if (clval == 0)
1392 			clval = (u_int64_t)nfsboottime.tv_sec;
1393 		nmp->nm_clval = clval++;
1394 		nmp->nm_krbnamelen = krbnamelen;
1395 		nmp->nm_dirpathlen = dirlen;
1396 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1397 		if (td->td_ucred->cr_uid != (uid_t)0) {
1398 			/*
1399 			 * nm_uid is used to get KerberosV credentials for
1400 			 * the nfsv4 state handling operations if there is
1401 			 * no host based principal set. Use the uid of
1402 			 * this user if not root, since they are doing the
1403 			 * mount. I don't think setting this for root will
1404 			 * work, since root normally does not have user
1405 			 * credentials in a credentials cache.
1406 			 */
1407 			nmp->nm_uid = td->td_ucred->cr_uid;
1408 		} else {
1409 			/*
1410 			 * Just set to -1, so it won't be used.
1411 			 */
1412 			nmp->nm_uid = (uid_t)-1;
1413 		}
1414 
1415 		/* Copy and null terminate all the names */
1416 		if (nmp->nm_krbnamelen > 0) {
1417 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1418 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1419 		}
1420 		if (nmp->nm_dirpathlen > 0) {
1421 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1422 			    nmp->nm_dirpathlen);
1423 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1424 			    + 1] = '\0';
1425 		}
1426 		if (nmp->nm_srvkrbnamelen > 0) {
1427 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1428 			    nmp->nm_srvkrbnamelen);
1429 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1430 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1431 		}
1432 		nmp->nm_sockreq.nr_cred = crhold(cred);
1433 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1434 		mp->mnt_data = nmp;
1435 		nmp->nm_getinfo = nfs_getnlminfo;
1436 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1437 	}
1438 	vfs_getnewfsid(mp);
1439 	nmp->nm_mountp = mp;
1440 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1441 
1442 	/*
1443 	 * Since nfs_decode_args() might optionally set them, these
1444 	 * need to be set to defaults before the call, so that the
1445 	 * optional settings aren't overwritten.
1446 	 */
1447 	nmp->nm_nametimeo = nametimeo;
1448 	nmp->nm_negnametimeo = negnametimeo;
1449 	nmp->nm_timeo = NFS_TIMEO;
1450 	nmp->nm_retry = NFS_RETRANS;
1451 	nmp->nm_readahead = NFS_DEFRAHEAD;
1452 
1453 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1454 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1455 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1456 		nmp->nm_wcommitsize *= 2;
1457 	nmp->nm_wcommitsize *= 256;
1458 
1459 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1460 		nmp->nm_minorvers = minvers;
1461 	else
1462 		nmp->nm_minorvers = 0;
1463 
1464 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1465 
1466 	/*
1467 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1468 	 * high, depending on whether we end up with negative offsets in
1469 	 * the client or server somewhere.  2GB-1 may be safer.
1470 	 *
1471 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1472 	 * that we can handle until we find out otherwise.
1473 	 */
1474 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1475 		nmp->nm_maxfilesize = 0xffffffffLL;
1476 	else
1477 		nmp->nm_maxfilesize = OFF_MAX;
1478 
1479 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1480 		nmp->nm_wsize = NFS_WSIZE;
1481 		nmp->nm_rsize = NFS_RSIZE;
1482 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1483 	}
1484 	nmp->nm_numgrps = NFS_MAXGRPS;
1485 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1486 	if (nmp->nm_tprintf_delay < 0)
1487 		nmp->nm_tprintf_delay = 0;
1488 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1489 	if (nmp->nm_tprintf_initial_delay < 0)
1490 		nmp->nm_tprintf_initial_delay = 0;
1491 	nmp->nm_fhsize = argp->fhsize;
1492 	if (nmp->nm_fhsize > 0)
1493 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1494 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1495 	nmp->nm_nam = nam;
1496 	/* Set up the sockets and per-host congestion */
1497 	nmp->nm_sotype = argp->sotype;
1498 	nmp->nm_soproto = argp->proto;
1499 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1500 	if ((argp->flags & NFSMNT_NFSV4))
1501 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1502 	else if ((argp->flags & NFSMNT_NFSV3))
1503 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1504 	else
1505 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1506 
1507 
1508 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1509 		goto bad;
1510 	/* For NFSv4.1, get the clientid now. */
1511 	if (nmp->nm_minorvers > 0) {
1512 		NFSCL_DEBUG(3, "at getcl\n");
1513 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1514 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1515 		if (error != 0)
1516 			goto bad;
1517 	}
1518 
1519 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1520 	    nmp->nm_dirpathlen > 0) {
1521 		NFSCL_DEBUG(3, "in dirp\n");
1522 		/*
1523 		 * If the fhsize on the mount point == 0 for V4, the mount
1524 		 * path needs to be looked up.
1525 		 */
1526 		trycnt = 3;
1527 		do {
1528 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1529 			    cred, td);
1530 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1531 			if (error)
1532 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1533 		} while (error && --trycnt > 0);
1534 		if (error) {
1535 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1536 			goto bad;
1537 		}
1538 	}
1539 
1540 	/*
1541 	 * A reference count is needed on the nfsnode representing the
1542 	 * remote root.  If this object is not persistent, then backward
1543 	 * traversals of the mount point (i.e. "..") will not work if
1544 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1545 	 * this problem, because one can identify root inodes by their
1546 	 * number == UFS_ROOTINO (2).
1547 	 */
1548 	if (nmp->nm_fhsize > 0) {
1549 		/*
1550 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1551 		 * non-zero for the root vnode. f_iosize will be set correctly
1552 		 * by nfs_statfs() before any I/O occurs.
1553 		 */
1554 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1555 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1556 		    LK_EXCLUSIVE);
1557 		if (error)
1558 			goto bad;
1559 		*vpp = NFSTOV(np);
1560 
1561 		/*
1562 		 * Get file attributes and transfer parameters for the
1563 		 * mountpoint.  This has the side effect of filling in
1564 		 * (*vpp)->v_type with the correct value.
1565 		 */
1566 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1567 		    cred, td, &nfsva, NULL, &lease);
1568 		if (ret) {
1569 			/*
1570 			 * Just set default values to get things going.
1571 			 */
1572 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1573 			nfsva.na_vattr.va_type = VDIR;
1574 			nfsva.na_vattr.va_mode = 0777;
1575 			nfsva.na_vattr.va_nlink = 100;
1576 			nfsva.na_vattr.va_uid = (uid_t)0;
1577 			nfsva.na_vattr.va_gid = (gid_t)0;
1578 			nfsva.na_vattr.va_fileid = 2;
1579 			nfsva.na_vattr.va_gen = 1;
1580 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1581 			nfsva.na_vattr.va_size = 512 * 1024;
1582 			lease = 60;
1583 		}
1584 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1585 		if (nmp->nm_minorvers > 0) {
1586 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1587 			NFSLOCKCLSTATE();
1588 			clp->nfsc_renew = NFSCL_RENEW(lease);
1589 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1590 			clp->nfsc_clientidrev++;
1591 			if (clp->nfsc_clientidrev == 0)
1592 				clp->nfsc_clientidrev++;
1593 			NFSUNLOCKCLSTATE();
1594 			/*
1595 			 * Mount will succeed, so the renew thread can be
1596 			 * started now.
1597 			 */
1598 			nfscl_start_renewthread(clp);
1599 			nfscl_clientrelease(clp);
1600 		}
1601 		if (argp->flags & NFSMNT_NFSV3)
1602 			ncl_fsinfo(nmp, *vpp, cred, td);
1603 
1604 		/* Mark if the mount point supports NFSv4 ACLs. */
1605 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1606 		    ret == 0 &&
1607 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1608 			MNT_ILOCK(mp);
1609 			mp->mnt_flag |= MNT_NFS4ACLS;
1610 			MNT_IUNLOCK(mp);
1611 		}
1612 
1613 		/*
1614 		 * Lose the lock but keep the ref.
1615 		 */
1616 		NFSVOPUNLOCK(*vpp, 0);
1617 		return (0);
1618 	}
1619 	error = EIO;
1620 
1621 bad:
1622 	if (clp != NULL)
1623 		nfscl_clientrelease(clp);
1624 	newnfs_disconnect(&nmp->nm_sockreq);
1625 	crfree(nmp->nm_sockreq.nr_cred);
1626 	if (nmp->nm_sockreq.nr_auth != NULL)
1627 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1628 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1629 	mtx_destroy(&nmp->nm_mtx);
1630 	if (nmp->nm_clp != NULL) {
1631 		NFSLOCKCLSTATE();
1632 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1633 		NFSUNLOCKCLSTATE();
1634 		free(nmp->nm_clp, M_NFSCLCLIENT);
1635 	}
1636 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1637 		nfscl_freenfsclds(dsp);
1638 	FREE(nmp, M_NEWNFSMNT);
1639 	FREE(nam, M_SONAME);
1640 	return (error);
1641 }
1642 
1643 /*
1644  * unmount system call
1645  */
1646 static int
1647 nfs_unmount(struct mount *mp, int mntflags)
1648 {
1649 	struct thread *td;
1650 	struct nfsmount *nmp;
1651 	int error, flags = 0, i, trycnt = 0;
1652 	struct nfsclds *dsp, *tdsp;
1653 
1654 	td = curthread;
1655 
1656 	if (mntflags & MNT_FORCE)
1657 		flags |= FORCECLOSE;
1658 	nmp = VFSTONFS(mp);
1659 	/*
1660 	 * Goes something like this..
1661 	 * - Call vflush() to clear out vnodes for this filesystem
1662 	 * - Close the socket
1663 	 * - Free up the data structures
1664 	 */
1665 	/* In the forced case, cancel any outstanding requests. */
1666 	if (mntflags & MNT_FORCE) {
1667 		error = newnfs_nmcancelreqs(nmp);
1668 		if (error)
1669 			goto out;
1670 		/* For a forced close, get rid of the renew thread now */
1671 		nfscl_umount(nmp, td);
1672 	}
1673 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1674 	do {
1675 		error = vflush(mp, 1, flags, td);
1676 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1677 			(void) nfs_catnap(PSOCK, error, "newndm");
1678 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1679 	if (error)
1680 		goto out;
1681 
1682 	/*
1683 	 * We are now committed to the unmount.
1684 	 */
1685 	if ((mntflags & MNT_FORCE) == 0)
1686 		nfscl_umount(nmp, td);
1687 	/* Make sure no nfsiods are assigned to this mount. */
1688 	mtx_lock(&ncl_iod_mutex);
1689 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1690 		if (ncl_iodmount[i] == nmp) {
1691 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1692 			ncl_iodmount[i] = NULL;
1693 		}
1694 	mtx_unlock(&ncl_iod_mutex);
1695 	newnfs_disconnect(&nmp->nm_sockreq);
1696 	crfree(nmp->nm_sockreq.nr_cred);
1697 	FREE(nmp->nm_nam, M_SONAME);
1698 	if (nmp->nm_sockreq.nr_auth != NULL)
1699 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1700 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1701 	mtx_destroy(&nmp->nm_mtx);
1702 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1703 		nfscl_freenfsclds(dsp);
1704 	FREE(nmp, M_NEWNFSMNT);
1705 out:
1706 	return (error);
1707 }
1708 
1709 /*
1710  * Return root of a filesystem
1711  */
1712 static int
1713 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1714 {
1715 	struct vnode *vp;
1716 	struct nfsmount *nmp;
1717 	struct nfsnode *np;
1718 	int error;
1719 
1720 	nmp = VFSTONFS(mp);
1721 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1722 	if (error)
1723 		return error;
1724 	vp = NFSTOV(np);
1725 	/*
1726 	 * Get transfer parameters and attributes for root vnode once.
1727 	 */
1728 	mtx_lock(&nmp->nm_mtx);
1729 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1730 		mtx_unlock(&nmp->nm_mtx);
1731 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1732 	} else
1733 		mtx_unlock(&nmp->nm_mtx);
1734 	if (vp->v_type == VNON)
1735 	    vp->v_type = VDIR;
1736 	vp->v_vflag |= VV_ROOT;
1737 	*vpp = vp;
1738 	return (0);
1739 }
1740 
1741 /*
1742  * Flush out the buffer cache
1743  */
1744 /* ARGSUSED */
1745 static int
1746 nfs_sync(struct mount *mp, int waitfor)
1747 {
1748 	struct vnode *vp, *mvp;
1749 	struct thread *td;
1750 	int error, allerror = 0;
1751 
1752 	td = curthread;
1753 
1754 	MNT_ILOCK(mp);
1755 	/*
1756 	 * If a forced dismount is in progress, return from here so that
1757 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1758 	 * calling VFS_UNMOUNT().
1759 	 */
1760 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1761 		MNT_IUNLOCK(mp);
1762 		return (EBADF);
1763 	}
1764 	MNT_IUNLOCK(mp);
1765 
1766 	/*
1767 	 * Force stale buffer cache information to be flushed.
1768 	 */
1769 loop:
1770 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1771 		/* XXX Racy bv_cnt check. */
1772 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1773 		    waitfor == MNT_LAZY) {
1774 			VI_UNLOCK(vp);
1775 			continue;
1776 		}
1777 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1778 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1779 			goto loop;
1780 		}
1781 		error = VOP_FSYNC(vp, waitfor, td);
1782 		if (error)
1783 			allerror = error;
1784 		NFSVOPUNLOCK(vp, 0);
1785 		vrele(vp);
1786 	}
1787 	return (allerror);
1788 }
1789 
1790 static int
1791 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1792 {
1793 	struct nfsmount *nmp = VFSTONFS(mp);
1794 	struct vfsquery vq;
1795 	int error;
1796 
1797 	bzero(&vq, sizeof(vq));
1798 	switch (op) {
1799 #if 0
1800 	case VFS_CTL_NOLOCKS:
1801 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1802  		if (req->oldptr != NULL) {
1803  			error = SYSCTL_OUT(req, &val, sizeof(val));
1804  			if (error)
1805  				return (error);
1806  		}
1807  		if (req->newptr != NULL) {
1808  			error = SYSCTL_IN(req, &val, sizeof(val));
1809  			if (error)
1810  				return (error);
1811 			if (val)
1812 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1813 			else
1814 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1815  		}
1816 		break;
1817 #endif
1818 	case VFS_CTL_QUERY:
1819 		mtx_lock(&nmp->nm_mtx);
1820 		if (nmp->nm_state & NFSSTA_TIMEO)
1821 			vq.vq_flags |= VQ_NOTRESP;
1822 		mtx_unlock(&nmp->nm_mtx);
1823 #if 0
1824 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1825 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1826 			vq.vq_flags |= VQ_NOTRESPLOCK;
1827 #endif
1828 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1829 		break;
1830  	case VFS_CTL_TIMEO:
1831  		if (req->oldptr != NULL) {
1832  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1833  			    sizeof(nmp->nm_tprintf_initial_delay));
1834  			if (error)
1835  				return (error);
1836  		}
1837  		if (req->newptr != NULL) {
1838 			error = vfs_suser(mp, req->td);
1839 			if (error)
1840 				return (error);
1841  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1842  			    sizeof(nmp->nm_tprintf_initial_delay));
1843  			if (error)
1844  				return (error);
1845  			if (nmp->nm_tprintf_initial_delay < 0)
1846  				nmp->nm_tprintf_initial_delay = 0;
1847  		}
1848 		break;
1849 	default:
1850 		return (ENOTSUP);
1851 	}
1852 	return (0);
1853 }
1854 
1855 /*
1856  * Purge any RPCs in progress, so that they will all return errors.
1857  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1858  * forced dismount.
1859  */
1860 static void
1861 nfs_purge(struct mount *mp)
1862 {
1863 	struct nfsmount *nmp = VFSTONFS(mp);
1864 
1865 	newnfs_nmcancelreqs(nmp);
1866 }
1867 
1868 /*
1869  * Extract the information needed by the nlm from the nfs vnode.
1870  */
1871 static void
1872 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1873     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1874     struct timeval *timeop)
1875 {
1876 	struct nfsmount *nmp;
1877 	struct nfsnode *np = VTONFS(vp);
1878 
1879 	nmp = VFSTONFS(vp->v_mount);
1880 	if (fhlenp != NULL)
1881 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1882 	if (fhp != NULL)
1883 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1884 	if (sp != NULL)
1885 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1886 	if (is_v3p != NULL)
1887 		*is_v3p = NFS_ISV3(vp);
1888 	if (sizep != NULL)
1889 		*sizep = np->n_size;
1890 	if (timeop != NULL) {
1891 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1892 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1893 	}
1894 }
1895 
1896 /*
1897  * This function prints out an option name, based on the conditional
1898  * argument.
1899  */
1900 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1901     char *opt, char **buf, size_t *blen)
1902 {
1903 	int len;
1904 
1905 	if (testval != 0 && *blen > strlen(opt)) {
1906 		len = snprintf(*buf, *blen, "%s", opt);
1907 		if (len != strlen(opt))
1908 			printf("EEK!!\n");
1909 		*buf += len;
1910 		*blen -= len;
1911 	}
1912 }
1913 
1914 /*
1915  * This function printf out an options integer value.
1916  */
1917 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1918     char *opt, char **buf, size_t *blen)
1919 {
1920 	int len;
1921 
1922 	if (*blen > strlen(opt) + 1) {
1923 		/* Could result in truncated output string. */
1924 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1925 		if (len < *blen) {
1926 			*buf += len;
1927 			*blen -= len;
1928 		}
1929 	}
1930 }
1931 
1932 /*
1933  * Load the option flags and values into the buffer.
1934  */
1935 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1936 {
1937 	char *buf;
1938 	size_t blen;
1939 
1940 	buf = buffer;
1941 	blen = buflen;
1942 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1943 	    &blen);
1944 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1945 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1946 		    &blen);
1947 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1948 		    &buf, &blen);
1949 	}
1950 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1951 	    &blen);
1952 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1953 	    "nfsv2", &buf, &blen);
1954 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1955 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1956 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1957 	    &buf, &blen);
1958 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1959 	    &buf, &blen);
1960 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1961 	    &blen);
1962 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1963 	    &blen);
1964 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1965 	    &blen);
1966 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1967 	    &blen);
1968 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1969 	    &blen);
1970 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1971 	    ",noncontigwr", &buf, &blen);
1972 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1973 	    0, ",lockd", &buf, &blen);
1974 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1975 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1976 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1977 	    &buf, &blen);
1978 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1979 	    &buf, &blen);
1980 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1981 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1982 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1983 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1984 	    &buf, &blen);
1985 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1986 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1987 	    &buf, &blen);
1988 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1989 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1990 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1991 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1992 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1993 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1994 	    &blen);
1995 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1996 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1997 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1998 	    &blen);
1999 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2000 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2001 	    &blen);
2002 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2003 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2004 }
2005 
2006