xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 28f42739a547ffe0b5dfaaf9f49fb4c4813aa232)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 #ifdef NFS_DEBUG
104 int nfs_debug;
105 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
106     "Toggle debug flag");
107 #endif
108 
109 static int	nfs_mountroot(struct mount *);
110 static void	nfs_sec_name(char *, int *);
111 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112 		    struct nfs_args *argp, const char *, struct ucred *,
113 		    struct thread *);
114 static int	mountnfs(struct nfs_args *, struct mount *,
115 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
116 		    u_char *, int, struct vnode **, struct ucred *,
117 		    struct thread *, int, int, int);
118 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119 		    struct sockaddr_storage *, int *, off_t *,
120 		    struct timeval *);
121 static vfs_mount_t nfs_mount;
122 static vfs_cmount_t nfs_cmount;
123 static vfs_unmount_t nfs_unmount;
124 static vfs_root_t nfs_root;
125 static vfs_statfs_t nfs_statfs;
126 static vfs_sync_t nfs_sync;
127 static vfs_sysctl_t nfs_sysctl;
128 static vfs_purge_t nfs_purge;
129 
130 /*
131  * nfs vfs operations.
132  */
133 static struct vfsops nfs_vfsops = {
134 	.vfs_init =		ncl_init,
135 	.vfs_mount =		nfs_mount,
136 	.vfs_cmount =		nfs_cmount,
137 	.vfs_root =		nfs_root,
138 	.vfs_statfs =		nfs_statfs,
139 	.vfs_sync =		nfs_sync,
140 	.vfs_uninit =		ncl_uninit,
141 	.vfs_unmount =		nfs_unmount,
142 	.vfs_sysctl =		nfs_sysctl,
143 	.vfs_purge =		nfs_purge,
144 };
145 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
146 
147 /* So that loader and kldload(2) can find us, wherever we are.. */
148 MODULE_VERSION(nfs, 1);
149 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
150 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
152 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
153 
154 /*
155  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
156  * can be shared by both NFS clients. It is declared here so that it
157  * will be defined for kernels built without NFS_ROOT, although it
158  * isn't used in that case.
159  */
160 #if !defined(NFS_ROOT)
161 struct nfs_diskless	nfs_diskless = { { { 0 } } };
162 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
163 int			nfs_diskless_valid = 0;
164 #endif
165 
166 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
167     &nfs_diskless_valid, 0,
168     "Has the diskless struct been filled correctly");
169 
170 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
171     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
172 
173 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
174     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
175     "%Ssockaddr_in", "Diskless root nfs address");
176 
177 
178 void		newnfsargs_ntoh(struct nfs_args *);
179 static int	nfs_mountdiskless(char *,
180 		    struct sockaddr_in *, struct nfs_args *,
181 		    struct thread *, struct vnode **, struct mount *);
182 static void	nfs_convert_diskless(void);
183 static void	nfs_convert_oargs(struct nfs_args *args,
184 		    struct onfs_args *oargs);
185 
186 int
187 newnfs_iosize(struct nfsmount *nmp)
188 {
189 	int iosize, maxio;
190 
191 	/* First, set the upper limit for iosize */
192 	if (nmp->nm_flag & NFSMNT_NFSV4) {
193 		maxio = NFS_MAXBSIZE;
194 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
195 		if (nmp->nm_sotype == SOCK_DGRAM)
196 			maxio = NFS_MAXDGRAMDATA;
197 		else
198 			maxio = NFS_MAXBSIZE;
199 	} else {
200 		maxio = NFS_V2MAXDATA;
201 	}
202 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
203 		nmp->nm_rsize = maxio;
204 	if (nmp->nm_rsize > MAXBSIZE)
205 		nmp->nm_rsize = MAXBSIZE;
206 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
207 		nmp->nm_readdirsize = maxio;
208 	if (nmp->nm_readdirsize > nmp->nm_rsize)
209 		nmp->nm_readdirsize = nmp->nm_rsize;
210 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
211 		nmp->nm_wsize = maxio;
212 	if (nmp->nm_wsize > MAXBSIZE)
213 		nmp->nm_wsize = MAXBSIZE;
214 
215 	/*
216 	 * Calculate the size used for io buffers.  Use the larger
217 	 * of the two sizes to minimise nfs requests but make sure
218 	 * that it is at least one VM page to avoid wasting buffer
219 	 * space.
220 	 */
221 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
222 	iosize = imax(iosize, PAGE_SIZE);
223 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
224 	return (iosize);
225 }
226 
227 static void
228 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
229 {
230 
231 	args->version = NFS_ARGSVERSION;
232 	args->addr = oargs->addr;
233 	args->addrlen = oargs->addrlen;
234 	args->sotype = oargs->sotype;
235 	args->proto = oargs->proto;
236 	args->fh = oargs->fh;
237 	args->fhsize = oargs->fhsize;
238 	args->flags = oargs->flags;
239 	args->wsize = oargs->wsize;
240 	args->rsize = oargs->rsize;
241 	args->readdirsize = oargs->readdirsize;
242 	args->timeo = oargs->timeo;
243 	args->retrans = oargs->retrans;
244 	args->readahead = oargs->readahead;
245 	args->hostname = oargs->hostname;
246 }
247 
248 static void
249 nfs_convert_diskless(void)
250 {
251 
252 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
253 		sizeof(struct ifaliasreq));
254 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
255 		sizeof(struct sockaddr_in));
256 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
257 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
258 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
259 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
260 	} else {
261 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
262 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
263 	}
264 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
265 		sizeof(struct sockaddr_in));
266 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
267 	nfsv3_diskless.root_time = nfs_diskless.root_time;
268 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
269 		MAXHOSTNAMELEN);
270 	nfs_diskless_valid = 3;
271 }
272 
273 /*
274  * nfs statfs call
275  */
276 static int
277 nfs_statfs(struct mount *mp, struct statfs *sbp)
278 {
279 	struct vnode *vp;
280 	struct thread *td;
281 	struct nfsmount *nmp = VFSTONFS(mp);
282 	struct nfsvattr nfsva;
283 	struct nfsfsinfo fs;
284 	struct nfsstatfs sb;
285 	int error = 0, attrflag, gotfsinfo = 0, ret;
286 	struct nfsnode *np;
287 
288 	td = curthread;
289 
290 	error = vfs_busy(mp, MBF_NOWAIT);
291 	if (error)
292 		return (error);
293 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
294 	if (error) {
295 		vfs_unbusy(mp);
296 		return (error);
297 	}
298 	vp = NFSTOV(np);
299 	mtx_lock(&nmp->nm_mtx);
300 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
301 		mtx_unlock(&nmp->nm_mtx);
302 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
303 		    &attrflag, NULL);
304 		if (!error)
305 			gotfsinfo = 1;
306 	} else
307 		mtx_unlock(&nmp->nm_mtx);
308 	if (!error)
309 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
310 		    &attrflag, NULL);
311 	if (error != 0)
312 		NFSCL_DEBUG(2, "statfs=%d\n", error);
313 	if (attrflag == 0) {
314 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
315 		    td->td_ucred, td, &nfsva, NULL, NULL);
316 		if (ret) {
317 			/*
318 			 * Just set default values to get things going.
319 			 */
320 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
321 			nfsva.na_vattr.va_type = VDIR;
322 			nfsva.na_vattr.va_mode = 0777;
323 			nfsva.na_vattr.va_nlink = 100;
324 			nfsva.na_vattr.va_uid = (uid_t)0;
325 			nfsva.na_vattr.va_gid = (gid_t)0;
326 			nfsva.na_vattr.va_fileid = 2;
327 			nfsva.na_vattr.va_gen = 1;
328 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
329 			nfsva.na_vattr.va_size = 512 * 1024;
330 		}
331 	}
332 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
333 	if (!error) {
334 	    mtx_lock(&nmp->nm_mtx);
335 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
336 		nfscl_loadfsinfo(nmp, &fs);
337 	    nfscl_loadsbinfo(nmp, &sb, sbp);
338 	    sbp->f_iosize = newnfs_iosize(nmp);
339 	    mtx_unlock(&nmp->nm_mtx);
340 	    if (sbp != &mp->mnt_stat) {
341 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
342 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
343 	    }
344 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
345 	} else if (NFS_ISV4(vp)) {
346 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
347 	}
348 	vput(vp);
349 	vfs_unbusy(mp);
350 	return (error);
351 }
352 
353 /*
354  * nfs version 3 fsinfo rpc call
355  */
356 int
357 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
358     struct thread *td)
359 {
360 	struct nfsfsinfo fs;
361 	struct nfsvattr nfsva;
362 	int error, attrflag;
363 
364 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
365 	if (!error) {
366 		if (attrflag)
367 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
368 			    1);
369 		mtx_lock(&nmp->nm_mtx);
370 		nfscl_loadfsinfo(nmp, &fs);
371 		mtx_unlock(&nmp->nm_mtx);
372 	}
373 	return (error);
374 }
375 
376 /*
377  * Mount a remote root fs via. nfs. This depends on the info in the
378  * nfs_diskless structure that has been filled in properly by some primary
379  * bootstrap.
380  * It goes something like this:
381  * - do enough of "ifconfig" by calling ifioctl() so that the system
382  *   can talk to the server
383  * - If nfs_diskless.mygateway is filled in, use that address as
384  *   a default gateway.
385  * - build the rootfs mount point and call mountnfs() to do the rest.
386  *
387  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
388  * structure, as well as other global NFS client variables here, as
389  * nfs_mountroot() will be called once in the boot before any other NFS
390  * client activity occurs.
391  */
392 static int
393 nfs_mountroot(struct mount *mp)
394 {
395 	struct thread *td = curthread;
396 	struct nfsv3_diskless *nd = &nfsv3_diskless;
397 	struct socket *so;
398 	struct vnode *vp;
399 	struct ifreq ir;
400 	int error;
401 	u_long l;
402 	char buf[128];
403 	char *cp;
404 
405 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
406 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
407 #elif defined(NFS_ROOT)
408 	nfs_setup_diskless();
409 #endif
410 
411 	if (nfs_diskless_valid == 0)
412 		return (-1);
413 	if (nfs_diskless_valid == 1)
414 		nfs_convert_diskless();
415 
416 	/*
417 	 * XXX splnet, so networks will receive...
418 	 */
419 	splnet();
420 
421 	/*
422 	 * Do enough of ifconfig(8) so that the critical net interface can
423 	 * talk to the server.
424 	 */
425 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
426 	    td->td_ucred, td);
427 	if (error)
428 		panic("nfs_mountroot: socreate(%04x): %d",
429 			nd->myif.ifra_addr.sa_family, error);
430 
431 #if 0 /* XXX Bad idea */
432 	/*
433 	 * We might not have been told the right interface, so we pass
434 	 * over the first ten interfaces of the same kind, until we get
435 	 * one of them configured.
436 	 */
437 
438 	for (i = strlen(nd->myif.ifra_name) - 1;
439 		nd->myif.ifra_name[i] >= '0' &&
440 		nd->myif.ifra_name[i] <= '9';
441 		nd->myif.ifra_name[i] ++) {
442 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443 		if(!error)
444 			break;
445 	}
446 #endif
447 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
448 	if (error)
449 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
450 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
451 		ir.ifr_mtu = strtol(cp, NULL, 10);
452 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
453 		freeenv(cp);
454 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
455 		if (error)
456 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
457 	}
458 	soclose(so);
459 
460 	/*
461 	 * If the gateway field is filled in, set it as the default route.
462 	 * Note that pxeboot will set a default route of 0 if the route
463 	 * is not set by the DHCP server.  Check also for a value of 0
464 	 * to avoid panicking inappropriately in that situation.
465 	 */
466 	if (nd->mygateway.sin_len != 0 &&
467 	    nd->mygateway.sin_addr.s_addr != 0) {
468 		struct sockaddr_in mask, sin;
469 
470 		bzero((caddr_t)&mask, sizeof(mask));
471 		sin = mask;
472 		sin.sin_family = AF_INET;
473 		sin.sin_len = sizeof(sin);
474                 /* XXX MRT use table 0 for this sort of thing */
475 		CURVNET_SET(TD_TO_VNET(td));
476 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
477 		    (struct sockaddr *)&nd->mygateway,
478 		    (struct sockaddr *)&mask,
479 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
480 		CURVNET_RESTORE();
481 		if (error)
482 			panic("nfs_mountroot: RTM_ADD: %d", error);
483 	}
484 
485 	/*
486 	 * Create the rootfs mount point.
487 	 */
488 	nd->root_args.fh = nd->root_fh;
489 	nd->root_args.fhsize = nd->root_fhsize;
490 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
491 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
492 		(l >> 24) & 0xff, (l >> 16) & 0xff,
493 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
494 	printf("NFS ROOT: %s\n", buf);
495 	nd->root_args.hostname = buf;
496 	if ((error = nfs_mountdiskless(buf,
497 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
498 		return (error);
499 	}
500 
501 	/*
502 	 * This is not really an nfs issue, but it is much easier to
503 	 * set hostname here and then let the "/etc/rc.xxx" files
504 	 * mount the right /var based upon its preset value.
505 	 */
506 	mtx_lock(&prison0.pr_mtx);
507 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
508 	    sizeof(prison0.pr_hostname));
509 	mtx_unlock(&prison0.pr_mtx);
510 	inittodr(ntohl(nd->root_time));
511 	return (0);
512 }
513 
514 /*
515  * Internal version of mount system call for diskless setup.
516  */
517 static int
518 nfs_mountdiskless(char *path,
519     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
520     struct vnode **vpp, struct mount *mp)
521 {
522 	struct sockaddr *nam;
523 	int dirlen, error;
524 	char *dirpath;
525 
526 	/*
527 	 * Find the directory path in "path", which also has the server's
528 	 * name/ip address in it.
529 	 */
530 	dirpath = strchr(path, ':');
531 	if (dirpath != NULL)
532 		dirlen = strlen(++dirpath);
533 	else
534 		dirlen = 0;
535 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
536 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
537 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
538 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
539 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
540 		return (error);
541 	}
542 	return (0);
543 }
544 
545 static void
546 nfs_sec_name(char *sec, int *flagsp)
547 {
548 	if (!strcmp(sec, "krb5"))
549 		*flagsp |= NFSMNT_KERB;
550 	else if (!strcmp(sec, "krb5i"))
551 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
552 	else if (!strcmp(sec, "krb5p"))
553 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
554 }
555 
556 static void
557 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
558     const char *hostname, struct ucred *cred, struct thread *td)
559 {
560 	int s;
561 	int adjsock;
562 	char *p;
563 
564 	s = splnet();
565 
566 	/*
567 	 * Set read-only flag if requested; otherwise, clear it if this is
568 	 * an update.  If this is not an update, then either the read-only
569 	 * flag is already clear, or this is a root mount and it was set
570 	 * intentionally at some previous point.
571 	 */
572 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
573 		MNT_ILOCK(mp);
574 		mp->mnt_flag |= MNT_RDONLY;
575 		MNT_IUNLOCK(mp);
576 	} else if (mp->mnt_flag & MNT_UPDATE) {
577 		MNT_ILOCK(mp);
578 		mp->mnt_flag &= ~MNT_RDONLY;
579 		MNT_IUNLOCK(mp);
580 	}
581 
582 	/*
583 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
584 	 * no sense in that context.  Also, set up appropriate retransmit
585 	 * and soft timeout behavior.
586 	 */
587 	if (argp->sotype == SOCK_STREAM) {
588 		nmp->nm_flag &= ~NFSMNT_NOCONN;
589 		nmp->nm_timeo = NFS_MAXTIMEO;
590 		if ((argp->flags & NFSMNT_NFSV4) != 0)
591 			nmp->nm_retry = INT_MAX;
592 		else
593 			nmp->nm_retry = NFS_RETRANS_TCP;
594 	}
595 
596 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
597 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
598 		argp->flags &= ~NFSMNT_RDIRPLUS;
599 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
600 	}
601 
602 	/* Re-bind if rsrvd port requested and wasn't on one */
603 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
604 		  && (argp->flags & NFSMNT_RESVPORT);
605 	/* Also re-bind if we're switching to/from a connected UDP socket */
606 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
607 		    (argp->flags & NFSMNT_NOCONN));
608 
609 	/* Update flags atomically.  Don't change the lock bits. */
610 	nmp->nm_flag = argp->flags | nmp->nm_flag;
611 	splx(s);
612 
613 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
614 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
615 		if (nmp->nm_timeo < NFS_MINTIMEO)
616 			nmp->nm_timeo = NFS_MINTIMEO;
617 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
618 			nmp->nm_timeo = NFS_MAXTIMEO;
619 	}
620 
621 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
622 		nmp->nm_retry = argp->retrans;
623 		if (nmp->nm_retry > NFS_MAXREXMIT)
624 			nmp->nm_retry = NFS_MAXREXMIT;
625 	}
626 
627 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
628 		nmp->nm_wsize = argp->wsize;
629 		/*
630 		 * Clip at the power of 2 below the size. There is an
631 		 * issue (not isolated) that causes intermittent page
632 		 * faults if this is not done.
633 		 */
634 		if (nmp->nm_wsize > NFS_FABLKSIZE)
635 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
636 		else
637 			nmp->nm_wsize = NFS_FABLKSIZE;
638 	}
639 
640 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
641 		nmp->nm_rsize = argp->rsize;
642 		/*
643 		 * Clip at the power of 2 below the size. There is an
644 		 * issue (not isolated) that causes intermittent page
645 		 * faults if this is not done.
646 		 */
647 		if (nmp->nm_rsize > NFS_FABLKSIZE)
648 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
649 		else
650 			nmp->nm_rsize = NFS_FABLKSIZE;
651 	}
652 
653 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
654 		nmp->nm_readdirsize = argp->readdirsize;
655 	}
656 
657 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
658 		nmp->nm_acregmin = argp->acregmin;
659 	else
660 		nmp->nm_acregmin = NFS_MINATTRTIMO;
661 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
662 		nmp->nm_acregmax = argp->acregmax;
663 	else
664 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
665 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
666 		nmp->nm_acdirmin = argp->acdirmin;
667 	else
668 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
669 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
670 		nmp->nm_acdirmax = argp->acdirmax;
671 	else
672 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
673 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
674 		nmp->nm_acdirmin = nmp->nm_acdirmax;
675 	if (nmp->nm_acregmin > nmp->nm_acregmax)
676 		nmp->nm_acregmin = nmp->nm_acregmax;
677 
678 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
679 		if (argp->readahead <= NFS_MAXRAHEAD)
680 			nmp->nm_readahead = argp->readahead;
681 		else
682 			nmp->nm_readahead = NFS_MAXRAHEAD;
683 	}
684 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
685 		if (argp->wcommitsize < nmp->nm_wsize)
686 			nmp->nm_wcommitsize = nmp->nm_wsize;
687 		else
688 			nmp->nm_wcommitsize = argp->wcommitsize;
689 	}
690 
691 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
692 		    (nmp->nm_soproto != argp->proto));
693 
694 	if (nmp->nm_client != NULL && adjsock) {
695 		int haslock = 0, error = 0;
696 
697 		if (nmp->nm_sotype == SOCK_STREAM) {
698 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
699 			if (!error)
700 				haslock = 1;
701 		}
702 		if (!error) {
703 		    newnfs_disconnect(&nmp->nm_sockreq);
704 		    if (haslock)
705 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
706 		    nmp->nm_sotype = argp->sotype;
707 		    nmp->nm_soproto = argp->proto;
708 		    if (nmp->nm_sotype == SOCK_DGRAM)
709 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
710 			    cred, td, 0)) {
711 				printf("newnfs_args: retrying connect\n");
712 				(void) nfs_catnap(PSOCK, 0, "nfscon");
713 			}
714 		}
715 	} else {
716 		nmp->nm_sotype = argp->sotype;
717 		nmp->nm_soproto = argp->proto;
718 	}
719 
720 	if (hostname != NULL) {
721 		strlcpy(nmp->nm_hostname, hostname,
722 		    sizeof(nmp->nm_hostname));
723 		p = strchr(nmp->nm_hostname, ':');
724 		if (p != NULL)
725 			*p = '\0';
726 	}
727 }
728 
729 static const char *nfs_opts[] = { "from", "nfs_args",
730     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
731     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
732     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
733     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
734     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
735     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
736     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
737     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
738     "pnfs", "wcommitsize",
739     NULL };
740 
741 /*
742  * VFS Operations.
743  *
744  * mount system call
745  * It seems a bit dumb to copyinstr() the host and path here and then
746  * bcopy() them in mountnfs(), but I wanted to detect errors before
747  * doing the sockargs() call because sockargs() allocates an mbuf and
748  * an error after that means that I have to release the mbuf.
749  */
750 /* ARGSUSED */
751 static int
752 nfs_mount(struct mount *mp)
753 {
754 	struct nfs_args args = {
755 	    .version = NFS_ARGSVERSION,
756 	    .addr = NULL,
757 	    .addrlen = sizeof (struct sockaddr_in),
758 	    .sotype = SOCK_STREAM,
759 	    .proto = 0,
760 	    .fh = NULL,
761 	    .fhsize = 0,
762 	    .flags = NFSMNT_RESVPORT,
763 	    .wsize = NFS_WSIZE,
764 	    .rsize = NFS_RSIZE,
765 	    .readdirsize = NFS_READDIRSIZE,
766 	    .timeo = 10,
767 	    .retrans = NFS_RETRANS,
768 	    .readahead = NFS_DEFRAHEAD,
769 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
770 	    .hostname = NULL,
771 	    .acregmin = NFS_MINATTRTIMO,
772 	    .acregmax = NFS_MAXATTRTIMO,
773 	    .acdirmin = NFS_MINDIRATTRTIMO,
774 	    .acdirmax = NFS_MAXDIRATTRTIMO,
775 	};
776 	int error = 0, ret, len;
777 	struct sockaddr *nam = NULL;
778 	struct vnode *vp;
779 	struct thread *td;
780 	char hst[MNAMELEN];
781 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
782 	char *opt, *name, *secname;
783 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
784 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
785 	int minvers = 0;
786 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
787 	size_t hstlen;
788 
789 	has_nfs_args_opt = 0;
790 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
791 		error = EINVAL;
792 		goto out;
793 	}
794 
795 	td = curthread;
796 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
797 		error = nfs_mountroot(mp);
798 		goto out;
799 	}
800 
801 	nfscl_init();
802 
803 	/*
804 	 * The old mount_nfs program passed the struct nfs_args
805 	 * from userspace to kernel.  The new mount_nfs program
806 	 * passes string options via nmount() from userspace to kernel
807 	 * and we populate the struct nfs_args in the kernel.
808 	 */
809 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
810 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
811 		    sizeof(args));
812 		if (error != 0)
813 			goto out;
814 
815 		if (args.version != NFS_ARGSVERSION) {
816 			error = EPROGMISMATCH;
817 			goto out;
818 		}
819 		has_nfs_args_opt = 1;
820 	}
821 
822 	/* Handle the new style options. */
823 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
824 		args.acdirmin = args.acdirmax =
825 		    args.acregmin = args.acregmax = 0;
826 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
827 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
828 	}
829 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
830 		args.flags |= NFSMNT_NOCONN;
831 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
832 		args.flags &= ~NFSMNT_NOCONN;
833 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
834 		args.flags |= NFSMNT_NOLOCKD;
835 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
836 		args.flags &= ~NFSMNT_NOLOCKD;
837 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
838 		args.flags |= NFSMNT_INT;
839 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
840 		args.flags |= NFSMNT_RDIRPLUS;
841 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
842 		args.flags |= NFSMNT_RESVPORT;
843 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
844 		args.flags &= ~NFSMNT_RESVPORT;
845 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
846 		args.flags |= NFSMNT_SOFT;
847 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
848 		args.flags &= ~NFSMNT_SOFT;
849 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
850 		args.sotype = SOCK_DGRAM;
851 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
852 		args.sotype = SOCK_DGRAM;
853 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
854 		args.sotype = SOCK_STREAM;
855 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
856 		args.flags |= NFSMNT_NFSV3;
857 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
858 		args.flags |= NFSMNT_NFSV4;
859 		args.sotype = SOCK_STREAM;
860 	}
861 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
862 		args.flags |= NFSMNT_ALLGSSNAME;
863 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
864 		args.flags |= NFSMNT_NOCTO;
865 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
866 		args.flags |= NFSMNT_NONCONTIGWR;
867 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
868 		args.flags |= NFSMNT_PNFS;
869 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
870 		if (opt == NULL) {
871 			vfs_mount_error(mp, "illegal readdirsize");
872 			error = EINVAL;
873 			goto out;
874 		}
875 		ret = sscanf(opt, "%d", &args.readdirsize);
876 		if (ret != 1 || args.readdirsize <= 0) {
877 			vfs_mount_error(mp, "illegal readdirsize: %s",
878 			    opt);
879 			error = EINVAL;
880 			goto out;
881 		}
882 		args.flags |= NFSMNT_READDIRSIZE;
883 	}
884 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
885 		if (opt == NULL) {
886 			vfs_mount_error(mp, "illegal readahead");
887 			error = EINVAL;
888 			goto out;
889 		}
890 		ret = sscanf(opt, "%d", &args.readahead);
891 		if (ret != 1 || args.readahead <= 0) {
892 			vfs_mount_error(mp, "illegal readahead: %s",
893 			    opt);
894 			error = EINVAL;
895 			goto out;
896 		}
897 		args.flags |= NFSMNT_READAHEAD;
898 	}
899 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
900 		if (opt == NULL) {
901 			vfs_mount_error(mp, "illegal wsize");
902 			error = EINVAL;
903 			goto out;
904 		}
905 		ret = sscanf(opt, "%d", &args.wsize);
906 		if (ret != 1 || args.wsize <= 0) {
907 			vfs_mount_error(mp, "illegal wsize: %s",
908 			    opt);
909 			error = EINVAL;
910 			goto out;
911 		}
912 		args.flags |= NFSMNT_WSIZE;
913 	}
914 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
915 		if (opt == NULL) {
916 			vfs_mount_error(mp, "illegal rsize");
917 			error = EINVAL;
918 			goto out;
919 		}
920 		ret = sscanf(opt, "%d", &args.rsize);
921 		if (ret != 1 || args.rsize <= 0) {
922 			vfs_mount_error(mp, "illegal wsize: %s",
923 			    opt);
924 			error = EINVAL;
925 			goto out;
926 		}
927 		args.flags |= NFSMNT_RSIZE;
928 	}
929 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
930 		if (opt == NULL) {
931 			vfs_mount_error(mp, "illegal retrans");
932 			error = EINVAL;
933 			goto out;
934 		}
935 		ret = sscanf(opt, "%d", &args.retrans);
936 		if (ret != 1 || args.retrans <= 0) {
937 			vfs_mount_error(mp, "illegal retrans: %s",
938 			    opt);
939 			error = EINVAL;
940 			goto out;
941 		}
942 		args.flags |= NFSMNT_RETRANS;
943 	}
944 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
945 		ret = sscanf(opt, "%d", &args.acregmin);
946 		if (ret != 1 || args.acregmin < 0) {
947 			vfs_mount_error(mp, "illegal actimeo: %s",
948 			    opt);
949 			error = EINVAL;
950 			goto out;
951 		}
952 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
953 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
954 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
955 	}
956 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
957 		ret = sscanf(opt, "%d", &args.acregmin);
958 		if (ret != 1 || args.acregmin < 0) {
959 			vfs_mount_error(mp, "illegal acregmin: %s",
960 			    opt);
961 			error = EINVAL;
962 			goto out;
963 		}
964 		args.flags |= NFSMNT_ACREGMIN;
965 	}
966 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
967 		ret = sscanf(opt, "%d", &args.acregmax);
968 		if (ret != 1 || args.acregmax < 0) {
969 			vfs_mount_error(mp, "illegal acregmax: %s",
970 			    opt);
971 			error = EINVAL;
972 			goto out;
973 		}
974 		args.flags |= NFSMNT_ACREGMAX;
975 	}
976 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
977 		ret = sscanf(opt, "%d", &args.acdirmin);
978 		if (ret != 1 || args.acdirmin < 0) {
979 			vfs_mount_error(mp, "illegal acdirmin: %s",
980 			    opt);
981 			error = EINVAL;
982 			goto out;
983 		}
984 		args.flags |= NFSMNT_ACDIRMIN;
985 	}
986 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
987 		ret = sscanf(opt, "%d", &args.acdirmax);
988 		if (ret != 1 || args.acdirmax < 0) {
989 			vfs_mount_error(mp, "illegal acdirmax: %s",
990 			    opt);
991 			error = EINVAL;
992 			goto out;
993 		}
994 		args.flags |= NFSMNT_ACDIRMAX;
995 	}
996 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
997 		ret = sscanf(opt, "%d", &args.wcommitsize);
998 		if (ret != 1 || args.wcommitsize < 0) {
999 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1000 			error = EINVAL;
1001 			goto out;
1002 		}
1003 		args.flags |= NFSMNT_WCOMMITSIZE;
1004 	}
1005 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1006 		ret = sscanf(opt, "%d", &args.timeo);
1007 		if (ret != 1 || args.timeo <= 0) {
1008 			vfs_mount_error(mp, "illegal timeo: %s",
1009 			    opt);
1010 			error = EINVAL;
1011 			goto out;
1012 		}
1013 		args.flags |= NFSMNT_TIMEO;
1014 	}
1015 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1016 		ret = sscanf(opt, "%d", &args.timeo);
1017 		if (ret != 1 || args.timeo <= 0) {
1018 			vfs_mount_error(mp, "illegal timeout: %s",
1019 			    opt);
1020 			error = EINVAL;
1021 			goto out;
1022 		}
1023 		args.flags |= NFSMNT_TIMEO;
1024 	}
1025 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1026 		ret = sscanf(opt, "%d", &nametimeo);
1027 		if (ret != 1 || nametimeo < 0) {
1028 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1029 			error = EINVAL;
1030 			goto out;
1031 		}
1032 	}
1033 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1034 	    == 0) {
1035 		ret = sscanf(opt, "%d", &negnametimeo);
1036 		if (ret != 1 || negnametimeo < 0) {
1037 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1038 			    opt);
1039 			error = EINVAL;
1040 			goto out;
1041 		}
1042 	}
1043 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1044 	    0) {
1045 		ret = sscanf(opt, "%d", &minvers);
1046 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1047 		    (args.flags & NFSMNT_NFSV4) == 0) {
1048 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1049 			error = EINVAL;
1050 			goto out;
1051 		}
1052 	}
1053 	if (vfs_getopt(mp->mnt_optnew, "sec",
1054 		(void **) &secname, NULL) == 0)
1055 		nfs_sec_name(secname, &args.flags);
1056 
1057 	if (mp->mnt_flag & MNT_UPDATE) {
1058 		struct nfsmount *nmp = VFSTONFS(mp);
1059 
1060 		if (nmp == NULL) {
1061 			error = EIO;
1062 			goto out;
1063 		}
1064 
1065 		/*
1066 		 * If a change from TCP->UDP is done and there are thread(s)
1067 		 * that have I/O RPC(s) in progress with a tranfer size
1068 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1069 		 * hung, retrying the RPC(s) forever. Usually these threads
1070 		 * will be seen doing an uninterruptible sleep on wait channel
1071 		 * "nfsreq".
1072 		 */
1073 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1074 			tprintf(td->td_proc, LOG_WARNING,
1075 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1076 
1077 		/*
1078 		 * When doing an update, we can't change version,
1079 		 * security, switch lockd strategies or change cookie
1080 		 * translation
1081 		 */
1082 		args.flags = (args.flags &
1083 		    ~(NFSMNT_NFSV3 |
1084 		      NFSMNT_NFSV4 |
1085 		      NFSMNT_KERB |
1086 		      NFSMNT_INTEGRITY |
1087 		      NFSMNT_PRIVACY |
1088 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1089 		    (nmp->nm_flag &
1090 			(NFSMNT_NFSV3 |
1091 			 NFSMNT_NFSV4 |
1092 			 NFSMNT_KERB |
1093 			 NFSMNT_INTEGRITY |
1094 			 NFSMNT_PRIVACY |
1095 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1096 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1097 		goto out;
1098 	}
1099 
1100 	/*
1101 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1102 	 * or no-connection mode for those protocols that support
1103 	 * no-connection mode (the flag will be cleared later for protocols
1104 	 * that do not support no-connection mode).  This will allow a client
1105 	 * to receive replies from a different IP then the request was
1106 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1107 	 * not 0.
1108 	 */
1109 	if (nfs_ip_paranoia == 0)
1110 		args.flags |= NFSMNT_NOCONN;
1111 
1112 	if (has_nfs_args_opt != 0) {
1113 		/*
1114 		 * In the 'nfs_args' case, the pointers in the args
1115 		 * structure are in userland - we copy them in here.
1116 		 */
1117 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1118 			vfs_mount_error(mp, "Bad file handle");
1119 			error = EINVAL;
1120 			goto out;
1121 		}
1122 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1123 		    args.fhsize);
1124 		if (error != 0)
1125 			goto out;
1126 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1127 		if (error != 0)
1128 			goto out;
1129 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1130 		args.hostname = hst;
1131 		/* sockargs() call must be after above copyin() calls */
1132 		error = getsockaddr(&nam, (caddr_t)args.addr,
1133 		    args.addrlen);
1134 		if (error != 0)
1135 			goto out;
1136 	} else {
1137 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1138 		    &args.fhsize) == 0) {
1139 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1140 				vfs_mount_error(mp, "Bad file handle");
1141 				error = EINVAL;
1142 				goto out;
1143 			}
1144 			bcopy(args.fh, nfh, args.fhsize);
1145 		} else {
1146 			args.fhsize = 0;
1147 		}
1148 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1149 		    (void **)&args.hostname, &len);
1150 		if (args.hostname == NULL) {
1151 			vfs_mount_error(mp, "Invalid hostname");
1152 			error = EINVAL;
1153 			goto out;
1154 		}
1155 		bcopy(args.hostname, hst, MNAMELEN);
1156 		hst[MNAMELEN - 1] = '\0';
1157 	}
1158 
1159 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1160 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1161 	else
1162 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1163 	srvkrbnamelen = strlen(srvkrbname);
1164 
1165 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1166 		strlcpy(krbname, name, sizeof (krbname));
1167 	else
1168 		krbname[0] = '\0';
1169 	krbnamelen = strlen(krbname);
1170 
1171 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1172 		strlcpy(dirpath, name, sizeof (dirpath));
1173 	else
1174 		dirpath[0] = '\0';
1175 	dirlen = strlen(dirpath);
1176 
1177 	if (has_nfs_args_opt == 0) {
1178 		if (vfs_getopt(mp->mnt_optnew, "addr",
1179 		    (void **)&args.addr, &args.addrlen) == 0) {
1180 			if (args.addrlen > SOCK_MAXADDRLEN) {
1181 				error = ENAMETOOLONG;
1182 				goto out;
1183 			}
1184 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1185 			bcopy(args.addr, nam, args.addrlen);
1186 			nam->sa_len = args.addrlen;
1187 		} else {
1188 			vfs_mount_error(mp, "No server address");
1189 			error = EINVAL;
1190 			goto out;
1191 		}
1192 	}
1193 
1194 	args.fh = nfh;
1195 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1196 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1197 	    nametimeo, negnametimeo, minvers);
1198 out:
1199 	if (!error) {
1200 		MNT_ILOCK(mp);
1201 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF;
1202 		MNT_IUNLOCK(mp);
1203 	}
1204 	return (error);
1205 }
1206 
1207 
1208 /*
1209  * VFS Operations.
1210  *
1211  * mount system call
1212  * It seems a bit dumb to copyinstr() the host and path here and then
1213  * bcopy() them in mountnfs(), but I wanted to detect errors before
1214  * doing the sockargs() call because sockargs() allocates an mbuf and
1215  * an error after that means that I have to release the mbuf.
1216  */
1217 /* ARGSUSED */
1218 static int
1219 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1220 {
1221 	int error;
1222 	struct nfs_args args;
1223 
1224 	error = copyin(data, &args, sizeof (struct nfs_args));
1225 	if (error)
1226 		return error;
1227 
1228 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1229 
1230 	error = kernel_mount(ma, flags);
1231 	return (error);
1232 }
1233 
1234 /*
1235  * Common code for mount and mountroot
1236  */
1237 static int
1238 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1239     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1240     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1241     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1242     int minvers)
1243 {
1244 	struct nfsmount *nmp;
1245 	struct nfsnode *np;
1246 	int error, trycnt, ret;
1247 	struct nfsvattr nfsva;
1248 	struct nfsclclient *clp;
1249 	struct nfsclds *dsp, *tdsp;
1250 	uint32_t lease;
1251 	static u_int64_t clval = 0;
1252 
1253 	NFSCL_DEBUG(3, "in mnt\n");
1254 	clp = NULL;
1255 	if (mp->mnt_flag & MNT_UPDATE) {
1256 		nmp = VFSTONFS(mp);
1257 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1258 		FREE(nam, M_SONAME);
1259 		return (0);
1260 	} else {
1261 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1262 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1263 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1264 		TAILQ_INIT(&nmp->nm_bufq);
1265 		if (clval == 0)
1266 			clval = (u_int64_t)nfsboottime.tv_sec;
1267 		nmp->nm_clval = clval++;
1268 		nmp->nm_krbnamelen = krbnamelen;
1269 		nmp->nm_dirpathlen = dirlen;
1270 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1271 		if (td->td_ucred->cr_uid != (uid_t)0) {
1272 			/*
1273 			 * nm_uid is used to get KerberosV credentials for
1274 			 * the nfsv4 state handling operations if there is
1275 			 * no host based principal set. Use the uid of
1276 			 * this user if not root, since they are doing the
1277 			 * mount. I don't think setting this for root will
1278 			 * work, since root normally does not have user
1279 			 * credentials in a credentials cache.
1280 			 */
1281 			nmp->nm_uid = td->td_ucred->cr_uid;
1282 		} else {
1283 			/*
1284 			 * Just set to -1, so it won't be used.
1285 			 */
1286 			nmp->nm_uid = (uid_t)-1;
1287 		}
1288 
1289 		/* Copy and null terminate all the names */
1290 		if (nmp->nm_krbnamelen > 0) {
1291 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1292 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1293 		}
1294 		if (nmp->nm_dirpathlen > 0) {
1295 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1296 			    nmp->nm_dirpathlen);
1297 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1298 			    + 1] = '\0';
1299 		}
1300 		if (nmp->nm_srvkrbnamelen > 0) {
1301 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1302 			    nmp->nm_srvkrbnamelen);
1303 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1304 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1305 		}
1306 		nmp->nm_sockreq.nr_cred = crhold(cred);
1307 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1308 		mp->mnt_data = nmp;
1309 		nmp->nm_getinfo = nfs_getnlminfo;
1310 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1311 	}
1312 	vfs_getnewfsid(mp);
1313 	nmp->nm_mountp = mp;
1314 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1315 
1316 	/*
1317 	 * Since nfs_decode_args() might optionally set them, these
1318 	 * need to be set to defaults before the call, so that the
1319 	 * optional settings aren't overwritten.
1320 	 */
1321 	nmp->nm_nametimeo = nametimeo;
1322 	nmp->nm_negnametimeo = negnametimeo;
1323 	nmp->nm_timeo = NFS_TIMEO;
1324 	nmp->nm_retry = NFS_RETRANS;
1325 	nmp->nm_readahead = NFS_DEFRAHEAD;
1326 	if (desiredvnodes >= 11000)
1327 		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1328 	else
1329 		nmp->nm_wcommitsize = hibufspace / 10;
1330 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1331 		nmp->nm_minorvers = minvers;
1332 	else
1333 		nmp->nm_minorvers = 0;
1334 
1335 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1336 
1337 	/*
1338 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1339 	 * high, depending on whether we end up with negative offsets in
1340 	 * the client or server somewhere.  2GB-1 may be safer.
1341 	 *
1342 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1343 	 * that we can handle until we find out otherwise.
1344 	 */
1345 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1346 		nmp->nm_maxfilesize = 0xffffffffLL;
1347 	else
1348 		nmp->nm_maxfilesize = OFF_MAX;
1349 
1350 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1351 		nmp->nm_wsize = NFS_WSIZE;
1352 		nmp->nm_rsize = NFS_RSIZE;
1353 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1354 	}
1355 	nmp->nm_numgrps = NFS_MAXGRPS;
1356 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1357 	if (nmp->nm_tprintf_delay < 0)
1358 		nmp->nm_tprintf_delay = 0;
1359 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1360 	if (nmp->nm_tprintf_initial_delay < 0)
1361 		nmp->nm_tprintf_initial_delay = 0;
1362 	nmp->nm_fhsize = argp->fhsize;
1363 	if (nmp->nm_fhsize > 0)
1364 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1365 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1366 	nmp->nm_nam = nam;
1367 	/* Set up the sockets and per-host congestion */
1368 	nmp->nm_sotype = argp->sotype;
1369 	nmp->nm_soproto = argp->proto;
1370 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1371 	if ((argp->flags & NFSMNT_NFSV4))
1372 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1373 	else if ((argp->flags & NFSMNT_NFSV3))
1374 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1375 	else
1376 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1377 
1378 
1379 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1380 		goto bad;
1381 	/* For NFSv4.1, get the clientid now. */
1382 	if (nmp->nm_minorvers > 0) {
1383 		NFSCL_DEBUG(3, "at getcl\n");
1384 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1385 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1386 		if (error != 0)
1387 			goto bad;
1388 	}
1389 
1390 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1391 	    nmp->nm_dirpathlen > 0) {
1392 		NFSCL_DEBUG(3, "in dirp\n");
1393 		/*
1394 		 * If the fhsize on the mount point == 0 for V4, the mount
1395 		 * path needs to be looked up.
1396 		 */
1397 		trycnt = 3;
1398 		do {
1399 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1400 			    cred, td);
1401 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1402 			if (error)
1403 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1404 		} while (error && --trycnt > 0);
1405 		if (error) {
1406 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1407 			goto bad;
1408 		}
1409 	}
1410 
1411 	/*
1412 	 * A reference count is needed on the nfsnode representing the
1413 	 * remote root.  If this object is not persistent, then backward
1414 	 * traversals of the mount point (i.e. "..") will not work if
1415 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1416 	 * this problem, because one can identify root inodes by their
1417 	 * number == ROOTINO (2).
1418 	 */
1419 	if (nmp->nm_fhsize > 0) {
1420 		/*
1421 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1422 		 * non-zero for the root vnode. f_iosize will be set correctly
1423 		 * by nfs_statfs() before any I/O occurs.
1424 		 */
1425 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1426 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1427 		    LK_EXCLUSIVE);
1428 		if (error)
1429 			goto bad;
1430 		*vpp = NFSTOV(np);
1431 
1432 		/*
1433 		 * Get file attributes and transfer parameters for the
1434 		 * mountpoint.  This has the side effect of filling in
1435 		 * (*vpp)->v_type with the correct value.
1436 		 */
1437 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1438 		    cred, td, &nfsva, NULL, &lease);
1439 		if (ret) {
1440 			/*
1441 			 * Just set default values to get things going.
1442 			 */
1443 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1444 			nfsva.na_vattr.va_type = VDIR;
1445 			nfsva.na_vattr.va_mode = 0777;
1446 			nfsva.na_vattr.va_nlink = 100;
1447 			nfsva.na_vattr.va_uid = (uid_t)0;
1448 			nfsva.na_vattr.va_gid = (gid_t)0;
1449 			nfsva.na_vattr.va_fileid = 2;
1450 			nfsva.na_vattr.va_gen = 1;
1451 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1452 			nfsva.na_vattr.va_size = 512 * 1024;
1453 			lease = 60;
1454 		}
1455 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1456 		if (nmp->nm_minorvers > 0) {
1457 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1458 			NFSLOCKCLSTATE();
1459 			clp->nfsc_renew = NFSCL_RENEW(lease);
1460 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1461 			clp->nfsc_clientidrev++;
1462 			if (clp->nfsc_clientidrev == 0)
1463 				clp->nfsc_clientidrev++;
1464 			NFSUNLOCKCLSTATE();
1465 			/*
1466 			 * Mount will succeed, so the renew thread can be
1467 			 * started now.
1468 			 */
1469 			nfscl_start_renewthread(clp);
1470 			nfscl_clientrelease(clp);
1471 		}
1472 		if (argp->flags & NFSMNT_NFSV3)
1473 			ncl_fsinfo(nmp, *vpp, cred, td);
1474 
1475 		/* Mark if the mount point supports NFSv4 ACLs. */
1476 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1477 		    ret == 0 &&
1478 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1479 			MNT_ILOCK(mp);
1480 			mp->mnt_flag |= MNT_NFS4ACLS;
1481 			MNT_IUNLOCK(mp);
1482 		}
1483 
1484 		/*
1485 		 * Lose the lock but keep the ref.
1486 		 */
1487 		NFSVOPUNLOCK(*vpp, 0);
1488 		return (0);
1489 	}
1490 	error = EIO;
1491 
1492 bad:
1493 	if (clp != NULL)
1494 		nfscl_clientrelease(clp);
1495 	newnfs_disconnect(&nmp->nm_sockreq);
1496 	crfree(nmp->nm_sockreq.nr_cred);
1497 	if (nmp->nm_sockreq.nr_auth != NULL)
1498 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1499 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1500 	mtx_destroy(&nmp->nm_mtx);
1501 	if (nmp->nm_clp != NULL) {
1502 		NFSLOCKCLSTATE();
1503 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1504 		NFSUNLOCKCLSTATE();
1505 		free(nmp->nm_clp, M_NFSCLCLIENT);
1506 	}
1507 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1508 		nfscl_freenfsclds(dsp);
1509 	FREE(nmp, M_NEWNFSMNT);
1510 	FREE(nam, M_SONAME);
1511 	return (error);
1512 }
1513 
1514 /*
1515  * unmount system call
1516  */
1517 static int
1518 nfs_unmount(struct mount *mp, int mntflags)
1519 {
1520 	struct thread *td;
1521 	struct nfsmount *nmp;
1522 	int error, flags = 0, i, trycnt = 0;
1523 	struct nfsclds *dsp, *tdsp;
1524 
1525 	td = curthread;
1526 
1527 	if (mntflags & MNT_FORCE)
1528 		flags |= FORCECLOSE;
1529 	nmp = VFSTONFS(mp);
1530 	/*
1531 	 * Goes something like this..
1532 	 * - Call vflush() to clear out vnodes for this filesystem
1533 	 * - Close the socket
1534 	 * - Free up the data structures
1535 	 */
1536 	/* In the forced case, cancel any outstanding requests. */
1537 	if (mntflags & MNT_FORCE) {
1538 		error = newnfs_nmcancelreqs(nmp);
1539 		if (error)
1540 			goto out;
1541 		/* For a forced close, get rid of the renew thread now */
1542 		nfscl_umount(nmp, td);
1543 	}
1544 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1545 	do {
1546 		error = vflush(mp, 1, flags, td);
1547 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1548 			(void) nfs_catnap(PSOCK, error, "newndm");
1549 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1550 	if (error)
1551 		goto out;
1552 
1553 	/*
1554 	 * We are now committed to the unmount.
1555 	 */
1556 	if ((mntflags & MNT_FORCE) == 0)
1557 		nfscl_umount(nmp, td);
1558 	/* Make sure no nfsiods are assigned to this mount. */
1559 	mtx_lock(&ncl_iod_mutex);
1560 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1561 		if (ncl_iodmount[i] == nmp) {
1562 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1563 			ncl_iodmount[i] = NULL;
1564 		}
1565 	mtx_unlock(&ncl_iod_mutex);
1566 	newnfs_disconnect(&nmp->nm_sockreq);
1567 	crfree(nmp->nm_sockreq.nr_cred);
1568 	FREE(nmp->nm_nam, M_SONAME);
1569 	if (nmp->nm_sockreq.nr_auth != NULL)
1570 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1571 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1572 	mtx_destroy(&nmp->nm_mtx);
1573 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1574 		nfscl_freenfsclds(dsp);
1575 	FREE(nmp, M_NEWNFSMNT);
1576 out:
1577 	return (error);
1578 }
1579 
1580 /*
1581  * Return root of a filesystem
1582  */
1583 static int
1584 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1585 {
1586 	struct vnode *vp;
1587 	struct nfsmount *nmp;
1588 	struct nfsnode *np;
1589 	int error;
1590 
1591 	nmp = VFSTONFS(mp);
1592 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1593 	if (error)
1594 		return error;
1595 	vp = NFSTOV(np);
1596 	/*
1597 	 * Get transfer parameters and attributes for root vnode once.
1598 	 */
1599 	mtx_lock(&nmp->nm_mtx);
1600 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1601 		mtx_unlock(&nmp->nm_mtx);
1602 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1603 	} else
1604 		mtx_unlock(&nmp->nm_mtx);
1605 	if (vp->v_type == VNON)
1606 	    vp->v_type = VDIR;
1607 	vp->v_vflag |= VV_ROOT;
1608 	*vpp = vp;
1609 	return (0);
1610 }
1611 
1612 /*
1613  * Flush out the buffer cache
1614  */
1615 /* ARGSUSED */
1616 static int
1617 nfs_sync(struct mount *mp, int waitfor)
1618 {
1619 	struct vnode *vp, *mvp;
1620 	struct thread *td;
1621 	int error, allerror = 0;
1622 
1623 	td = curthread;
1624 
1625 	MNT_ILOCK(mp);
1626 	/*
1627 	 * If a forced dismount is in progress, return from here so that
1628 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1629 	 * calling VFS_UNMOUNT().
1630 	 */
1631 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1632 		MNT_IUNLOCK(mp);
1633 		return (EBADF);
1634 	}
1635 	MNT_IUNLOCK(mp);
1636 
1637 	/*
1638 	 * Force stale buffer cache information to be flushed.
1639 	 */
1640 loop:
1641 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1642 		/* XXX Racy bv_cnt check. */
1643 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1644 		    waitfor == MNT_LAZY) {
1645 			VI_UNLOCK(vp);
1646 			continue;
1647 		}
1648 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1649 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1650 			goto loop;
1651 		}
1652 		error = VOP_FSYNC(vp, waitfor, td);
1653 		if (error)
1654 			allerror = error;
1655 		NFSVOPUNLOCK(vp, 0);
1656 		vrele(vp);
1657 	}
1658 	return (allerror);
1659 }
1660 
1661 static int
1662 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1663 {
1664 	struct nfsmount *nmp = VFSTONFS(mp);
1665 	struct vfsquery vq;
1666 	int error;
1667 
1668 	bzero(&vq, sizeof(vq));
1669 	switch (op) {
1670 #if 0
1671 	case VFS_CTL_NOLOCKS:
1672 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1673  		if (req->oldptr != NULL) {
1674  			error = SYSCTL_OUT(req, &val, sizeof(val));
1675  			if (error)
1676  				return (error);
1677  		}
1678  		if (req->newptr != NULL) {
1679  			error = SYSCTL_IN(req, &val, sizeof(val));
1680  			if (error)
1681  				return (error);
1682 			if (val)
1683 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1684 			else
1685 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1686  		}
1687 		break;
1688 #endif
1689 	case VFS_CTL_QUERY:
1690 		mtx_lock(&nmp->nm_mtx);
1691 		if (nmp->nm_state & NFSSTA_TIMEO)
1692 			vq.vq_flags |= VQ_NOTRESP;
1693 		mtx_unlock(&nmp->nm_mtx);
1694 #if 0
1695 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1696 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1697 			vq.vq_flags |= VQ_NOTRESPLOCK;
1698 #endif
1699 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1700 		break;
1701  	case VFS_CTL_TIMEO:
1702  		if (req->oldptr != NULL) {
1703  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1704  			    sizeof(nmp->nm_tprintf_initial_delay));
1705  			if (error)
1706  				return (error);
1707  		}
1708  		if (req->newptr != NULL) {
1709 			error = vfs_suser(mp, req->td);
1710 			if (error)
1711 				return (error);
1712  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1713  			    sizeof(nmp->nm_tprintf_initial_delay));
1714  			if (error)
1715  				return (error);
1716  			if (nmp->nm_tprintf_initial_delay < 0)
1717  				nmp->nm_tprintf_initial_delay = 0;
1718  		}
1719 		break;
1720 	default:
1721 		return (ENOTSUP);
1722 	}
1723 	return (0);
1724 }
1725 
1726 /*
1727  * Purge any RPCs in progress, so that they will all return errors.
1728  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1729  * forced dismount.
1730  */
1731 static void
1732 nfs_purge(struct mount *mp)
1733 {
1734 	struct nfsmount *nmp = VFSTONFS(mp);
1735 
1736 	newnfs_nmcancelreqs(nmp);
1737 }
1738 
1739 /*
1740  * Extract the information needed by the nlm from the nfs vnode.
1741  */
1742 static void
1743 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1744     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1745     struct timeval *timeop)
1746 {
1747 	struct nfsmount *nmp;
1748 	struct nfsnode *np = VTONFS(vp);
1749 
1750 	nmp = VFSTONFS(vp->v_mount);
1751 	if (fhlenp != NULL)
1752 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1753 	if (fhp != NULL)
1754 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1755 	if (sp != NULL)
1756 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1757 	if (is_v3p != NULL)
1758 		*is_v3p = NFS_ISV3(vp);
1759 	if (sizep != NULL)
1760 		*sizep = np->n_size;
1761 	if (timeop != NULL) {
1762 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1763 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1764 	}
1765 }
1766 
1767 /*
1768  * This function prints out an option name, based on the conditional
1769  * argument.
1770  */
1771 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1772     char *opt, char **buf, size_t *blen)
1773 {
1774 	int len;
1775 
1776 	if (testval != 0 && *blen > strlen(opt)) {
1777 		len = snprintf(*buf, *blen, "%s", opt);
1778 		if (len != strlen(opt))
1779 			printf("EEK!!\n");
1780 		*buf += len;
1781 		*blen -= len;
1782 	}
1783 }
1784 
1785 /*
1786  * This function printf out an options integer value.
1787  */
1788 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1789     char *opt, char **buf, size_t *blen)
1790 {
1791 	int len;
1792 
1793 	if (*blen > strlen(opt) + 1) {
1794 		/* Could result in truncated output string. */
1795 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1796 		if (len < *blen) {
1797 			*buf += len;
1798 			*blen -= len;
1799 		}
1800 	}
1801 }
1802 
1803 /*
1804  * Load the option flags and values into the buffer.
1805  */
1806 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1807 {
1808 	char *buf;
1809 	size_t blen;
1810 
1811 	buf = buffer;
1812 	blen = buflen;
1813 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1814 	    &blen);
1815 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1816 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1817 		    &blen);
1818 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1819 		    &buf, &blen);
1820 	}
1821 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1822 	    &blen);
1823 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1824 	    "nfsv2", &buf, &blen);
1825 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1826 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1827 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1828 	    &buf, &blen);
1829 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1830 	    &buf, &blen);
1831 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1832 	    &blen);
1833 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1834 	    &blen);
1835 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1836 	    &blen);
1837 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1838 	    &blen);
1839 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1840 	    &blen);
1841 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1842 	    ",noncontigwr", &buf, &blen);
1843 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1844 	    0, ",lockd", &buf, &blen);
1845 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1846 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1847 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1848 	    &buf, &blen);
1849 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1850 	    &buf, &blen);
1851 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1852 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1853 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1854 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1855 	    &buf, &blen);
1856 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1857 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1858 	    &buf, &blen);
1859 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1860 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1861 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1862 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1863 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1864 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1865 	    &blen);
1866 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1867 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1868 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1869 	    &blen);
1870 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1871 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1872 	    &blen);
1873 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1874 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1875 }
1876 
1877