xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 526e1dc1c0d052b9d2a6cd6da7a16eb09c971c54)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 NFSCLSTATEMUTEX;
85 
86 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
87 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
88 
89 SYSCTL_DECL(_vfs_nfs);
90 static int nfs_ip_paranoia = 1;
91 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
92     &nfs_ip_paranoia, 0, "");
93 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
94 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
95         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
96 /* how long between console messages "nfs server foo not responding" */
97 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
98 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
99         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
100 
101 static int	nfs_mountroot(struct mount *);
102 static void	nfs_sec_name(char *, int *);
103 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
104 		    struct nfs_args *argp, const char *, struct ucred *,
105 		    struct thread *);
106 static int	mountnfs(struct nfs_args *, struct mount *,
107 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
108 		    u_char *, int, struct vnode **, struct ucred *,
109 		    struct thread *, int, int, int);
110 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
111 		    struct sockaddr_storage *, int *, off_t *,
112 		    struct timeval *);
113 static vfs_mount_t nfs_mount;
114 static vfs_cmount_t nfs_cmount;
115 static vfs_unmount_t nfs_unmount;
116 static vfs_root_t nfs_root;
117 static vfs_statfs_t nfs_statfs;
118 static vfs_sync_t nfs_sync;
119 static vfs_sysctl_t nfs_sysctl;
120 
121 /*
122  * nfs vfs operations.
123  */
124 static struct vfsops nfs_vfsops = {
125 	.vfs_init =		ncl_init,
126 	.vfs_mount =		nfs_mount,
127 	.vfs_cmount =		nfs_cmount,
128 	.vfs_root =		nfs_root,
129 	.vfs_statfs =		nfs_statfs,
130 	.vfs_sync =		nfs_sync,
131 	.vfs_uninit =		ncl_uninit,
132 	.vfs_unmount =		nfs_unmount,
133 	.vfs_sysctl =		nfs_sysctl,
134 };
135 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
136 
137 /* So that loader and kldload(2) can find us, wherever we are.. */
138 MODULE_VERSION(nfs, 1);
139 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
140 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
141 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
142 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
143 
144 /*
145  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
146  * can be shared by both NFS clients. It is declared here so that it
147  * will be defined for kernels built without NFS_ROOT, although it
148  * isn't used in that case.
149  */
150 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
151 struct nfs_diskless	nfs_diskless = { { { 0 } } };
152 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
153 int			nfs_diskless_valid = 0;
154 #endif
155 
156 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
157     &nfs_diskless_valid, 0,
158     "Has the diskless struct been filled correctly");
159 
160 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
161     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
162 
163 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
164     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
165     "%Ssockaddr_in", "Diskless root nfs address");
166 
167 
168 void		newnfsargs_ntoh(struct nfs_args *);
169 static int	nfs_mountdiskless(char *,
170 		    struct sockaddr_in *, struct nfs_args *,
171 		    struct thread *, struct vnode **, struct mount *);
172 static void	nfs_convert_diskless(void);
173 static void	nfs_convert_oargs(struct nfs_args *args,
174 		    struct onfs_args *oargs);
175 
176 int
177 newnfs_iosize(struct nfsmount *nmp)
178 {
179 	int iosize, maxio;
180 
181 	/* First, set the upper limit for iosize */
182 	if (nmp->nm_flag & NFSMNT_NFSV4) {
183 		maxio = NFS_MAXBSIZE;
184 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
185 		if (nmp->nm_sotype == SOCK_DGRAM)
186 			maxio = NFS_MAXDGRAMDATA;
187 		else
188 			maxio = NFS_MAXBSIZE;
189 	} else {
190 		maxio = NFS_V2MAXDATA;
191 	}
192 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
193 		nmp->nm_rsize = maxio;
194 	if (nmp->nm_rsize > MAXBSIZE)
195 		nmp->nm_rsize = MAXBSIZE;
196 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
197 		nmp->nm_readdirsize = maxio;
198 	if (nmp->nm_readdirsize > nmp->nm_rsize)
199 		nmp->nm_readdirsize = nmp->nm_rsize;
200 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
201 		nmp->nm_wsize = maxio;
202 	if (nmp->nm_wsize > MAXBSIZE)
203 		nmp->nm_wsize = MAXBSIZE;
204 
205 	/*
206 	 * Calculate the size used for io buffers.  Use the larger
207 	 * of the two sizes to minimise nfs requests but make sure
208 	 * that it is at least one VM page to avoid wasting buffer
209 	 * space.
210 	 */
211 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
212 	iosize = imax(iosize, PAGE_SIZE);
213 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
214 	return (iosize);
215 }
216 
217 static void
218 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
219 {
220 
221 	args->version = NFS_ARGSVERSION;
222 	args->addr = oargs->addr;
223 	args->addrlen = oargs->addrlen;
224 	args->sotype = oargs->sotype;
225 	args->proto = oargs->proto;
226 	args->fh = oargs->fh;
227 	args->fhsize = oargs->fhsize;
228 	args->flags = oargs->flags;
229 	args->wsize = oargs->wsize;
230 	args->rsize = oargs->rsize;
231 	args->readdirsize = oargs->readdirsize;
232 	args->timeo = oargs->timeo;
233 	args->retrans = oargs->retrans;
234 	args->readahead = oargs->readahead;
235 	args->hostname = oargs->hostname;
236 }
237 
238 static void
239 nfs_convert_diskless(void)
240 {
241 
242 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
243 		sizeof(struct ifaliasreq));
244 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
245 		sizeof(struct sockaddr_in));
246 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
247 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
248 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
249 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
250 	} else {
251 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
252 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
253 	}
254 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
255 		sizeof(struct sockaddr_in));
256 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
257 	nfsv3_diskless.root_time = nfs_diskless.root_time;
258 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
259 		MAXHOSTNAMELEN);
260 	nfs_diskless_valid = 3;
261 }
262 
263 /*
264  * nfs statfs call
265  */
266 static int
267 nfs_statfs(struct mount *mp, struct statfs *sbp)
268 {
269 	struct vnode *vp;
270 	struct thread *td;
271 	struct nfsmount *nmp = VFSTONFS(mp);
272 	struct nfsvattr nfsva;
273 	struct nfsfsinfo fs;
274 	struct nfsstatfs sb;
275 	int error = 0, attrflag, gotfsinfo = 0, ret;
276 	struct nfsnode *np;
277 
278 	td = curthread;
279 
280 	error = vfs_busy(mp, MBF_NOWAIT);
281 	if (error)
282 		return (error);
283 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
284 	if (error) {
285 		vfs_unbusy(mp);
286 		return (error);
287 	}
288 	vp = NFSTOV(np);
289 	mtx_lock(&nmp->nm_mtx);
290 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
291 		mtx_unlock(&nmp->nm_mtx);
292 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
293 		    &attrflag, NULL);
294 		if (!error)
295 			gotfsinfo = 1;
296 	} else
297 		mtx_unlock(&nmp->nm_mtx);
298 	if (!error)
299 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
300 		    &attrflag, NULL);
301 	if (error != 0)
302 		NFSCL_DEBUG(2, "statfs=%d\n", error);
303 	if (attrflag == 0) {
304 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
305 		    td->td_ucred, td, &nfsva, NULL, NULL);
306 		if (ret) {
307 			/*
308 			 * Just set default values to get things going.
309 			 */
310 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
311 			nfsva.na_vattr.va_type = VDIR;
312 			nfsva.na_vattr.va_mode = 0777;
313 			nfsva.na_vattr.va_nlink = 100;
314 			nfsva.na_vattr.va_uid = (uid_t)0;
315 			nfsva.na_vattr.va_gid = (gid_t)0;
316 			nfsva.na_vattr.va_fileid = 2;
317 			nfsva.na_vattr.va_gen = 1;
318 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
319 			nfsva.na_vattr.va_size = 512 * 1024;
320 		}
321 	}
322 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
323 	if (!error) {
324 	    mtx_lock(&nmp->nm_mtx);
325 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
326 		nfscl_loadfsinfo(nmp, &fs);
327 	    nfscl_loadsbinfo(nmp, &sb, sbp);
328 	    sbp->f_iosize = newnfs_iosize(nmp);
329 	    mtx_unlock(&nmp->nm_mtx);
330 	    if (sbp != &mp->mnt_stat) {
331 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
332 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
333 	    }
334 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
335 	} else if (NFS_ISV4(vp)) {
336 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
337 	}
338 	vput(vp);
339 	vfs_unbusy(mp);
340 	return (error);
341 }
342 
343 /*
344  * nfs version 3 fsinfo rpc call
345  */
346 int
347 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
348     struct thread *td)
349 {
350 	struct nfsfsinfo fs;
351 	struct nfsvattr nfsva;
352 	int error, attrflag;
353 
354 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
355 	if (!error) {
356 		if (attrflag)
357 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
358 			    1);
359 		mtx_lock(&nmp->nm_mtx);
360 		nfscl_loadfsinfo(nmp, &fs);
361 		mtx_unlock(&nmp->nm_mtx);
362 	}
363 	return (error);
364 }
365 
366 /*
367  * Mount a remote root fs via. nfs. This depends on the info in the
368  * nfs_diskless structure that has been filled in properly by some primary
369  * bootstrap.
370  * It goes something like this:
371  * - do enough of "ifconfig" by calling ifioctl() so that the system
372  *   can talk to the server
373  * - If nfs_diskless.mygateway is filled in, use that address as
374  *   a default gateway.
375  * - build the rootfs mount point and call mountnfs() to do the rest.
376  *
377  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
378  * structure, as well as other global NFS client variables here, as
379  * nfs_mountroot() will be called once in the boot before any other NFS
380  * client activity occurs.
381  */
382 static int
383 nfs_mountroot(struct mount *mp)
384 {
385 	struct thread *td = curthread;
386 	struct nfsv3_diskless *nd = &nfsv3_diskless;
387 	struct socket *so;
388 	struct vnode *vp;
389 	struct ifreq ir;
390 	int error;
391 	u_long l;
392 	char buf[128];
393 	char *cp;
394 
395 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
396 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
397 #elif defined(NFS_ROOT)
398 	nfs_setup_diskless();
399 #endif
400 
401 	if (nfs_diskless_valid == 0)
402 		return (-1);
403 	if (nfs_diskless_valid == 1)
404 		nfs_convert_diskless();
405 
406 	/*
407 	 * XXX splnet, so networks will receive...
408 	 */
409 	splnet();
410 
411 	/*
412 	 * Do enough of ifconfig(8) so that the critical net interface can
413 	 * talk to the server.
414 	 */
415 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
416 	    td->td_ucred, td);
417 	if (error)
418 		panic("nfs_mountroot: socreate(%04x): %d",
419 			nd->myif.ifra_addr.sa_family, error);
420 
421 #if 0 /* XXX Bad idea */
422 	/*
423 	 * We might not have been told the right interface, so we pass
424 	 * over the first ten interfaces of the same kind, until we get
425 	 * one of them configured.
426 	 */
427 
428 	for (i = strlen(nd->myif.ifra_name) - 1;
429 		nd->myif.ifra_name[i] >= '0' &&
430 		nd->myif.ifra_name[i] <= '9';
431 		nd->myif.ifra_name[i] ++) {
432 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
433 		if(!error)
434 			break;
435 	}
436 #endif
437 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438 	if (error)
439 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
440 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
441 		ir.ifr_mtu = strtol(cp, NULL, 10);
442 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
443 		freeenv(cp);
444 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
445 		if (error)
446 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
447 	}
448 	soclose(so);
449 
450 	/*
451 	 * If the gateway field is filled in, set it as the default route.
452 	 * Note that pxeboot will set a default route of 0 if the route
453 	 * is not set by the DHCP server.  Check also for a value of 0
454 	 * to avoid panicking inappropriately in that situation.
455 	 */
456 	if (nd->mygateway.sin_len != 0 &&
457 	    nd->mygateway.sin_addr.s_addr != 0) {
458 		struct sockaddr_in mask, sin;
459 
460 		bzero((caddr_t)&mask, sizeof(mask));
461 		sin = mask;
462 		sin.sin_family = AF_INET;
463 		sin.sin_len = sizeof(sin);
464                 /* XXX MRT use table 0 for this sort of thing */
465 		CURVNET_SET(TD_TO_VNET(td));
466 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
467 		    (struct sockaddr *)&nd->mygateway,
468 		    (struct sockaddr *)&mask,
469 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
470 		CURVNET_RESTORE();
471 		if (error)
472 			panic("nfs_mountroot: RTM_ADD: %d", error);
473 	}
474 
475 	/*
476 	 * Create the rootfs mount point.
477 	 */
478 	nd->root_args.fh = nd->root_fh;
479 	nd->root_args.fhsize = nd->root_fhsize;
480 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
481 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
482 		(l >> 24) & 0xff, (l >> 16) & 0xff,
483 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
484 	printf("NFS ROOT: %s\n", buf);
485 	nd->root_args.hostname = buf;
486 	if ((error = nfs_mountdiskless(buf,
487 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
488 		return (error);
489 	}
490 
491 	/*
492 	 * This is not really an nfs issue, but it is much easier to
493 	 * set hostname here and then let the "/etc/rc.xxx" files
494 	 * mount the right /var based upon its preset value.
495 	 */
496 	mtx_lock(&prison0.pr_mtx);
497 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
498 	    sizeof(prison0.pr_hostname));
499 	mtx_unlock(&prison0.pr_mtx);
500 	inittodr(ntohl(nd->root_time));
501 	return (0);
502 }
503 
504 /*
505  * Internal version of mount system call for diskless setup.
506  */
507 static int
508 nfs_mountdiskless(char *path,
509     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
510     struct vnode **vpp, struct mount *mp)
511 {
512 	struct sockaddr *nam;
513 	int dirlen, error;
514 	char *dirpath;
515 
516 	/*
517 	 * Find the directory path in "path", which also has the server's
518 	 * name/ip address in it.
519 	 */
520 	dirpath = strchr(path, ':');
521 	if (dirpath != NULL)
522 		dirlen = strlen(++dirpath);
523 	else
524 		dirlen = 0;
525 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
526 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
527 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
528 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
529 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
530 		return (error);
531 	}
532 	return (0);
533 }
534 
535 static void
536 nfs_sec_name(char *sec, int *flagsp)
537 {
538 	if (!strcmp(sec, "krb5"))
539 		*flagsp |= NFSMNT_KERB;
540 	else if (!strcmp(sec, "krb5i"))
541 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
542 	else if (!strcmp(sec, "krb5p"))
543 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
544 }
545 
546 static void
547 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
548     const char *hostname, struct ucred *cred, struct thread *td)
549 {
550 	int s;
551 	int adjsock;
552 	char *p;
553 
554 	s = splnet();
555 
556 	/*
557 	 * Set read-only flag if requested; otherwise, clear it if this is
558 	 * an update.  If this is not an update, then either the read-only
559 	 * flag is already clear, or this is a root mount and it was set
560 	 * intentionally at some previous point.
561 	 */
562 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
563 		MNT_ILOCK(mp);
564 		mp->mnt_flag |= MNT_RDONLY;
565 		MNT_IUNLOCK(mp);
566 	} else if (mp->mnt_flag & MNT_UPDATE) {
567 		MNT_ILOCK(mp);
568 		mp->mnt_flag &= ~MNT_RDONLY;
569 		MNT_IUNLOCK(mp);
570 	}
571 
572 	/*
573 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
574 	 * no sense in that context.  Also, set up appropriate retransmit
575 	 * and soft timeout behavior.
576 	 */
577 	if (argp->sotype == SOCK_STREAM) {
578 		nmp->nm_flag &= ~NFSMNT_NOCONN;
579 		nmp->nm_timeo = NFS_MAXTIMEO;
580 		if ((argp->flags & NFSMNT_NFSV4) != 0)
581 			nmp->nm_retry = INT_MAX;
582 		else
583 			nmp->nm_retry = NFS_RETRANS_TCP;
584 	}
585 
586 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
587 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
588 		argp->flags &= ~NFSMNT_RDIRPLUS;
589 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
590 	}
591 
592 	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
593 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
594 		argp->flags &= ~NFSMNT_RESVPORT;
595 		nmp->nm_flag &= ~NFSMNT_RESVPORT;
596 	}
597 
598 	/* Re-bind if rsrvd port requested and wasn't on one */
599 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
600 		  && (argp->flags & NFSMNT_RESVPORT);
601 	/* Also re-bind if we're switching to/from a connected UDP socket */
602 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
603 		    (argp->flags & NFSMNT_NOCONN));
604 
605 	/* Update flags atomically.  Don't change the lock bits. */
606 	nmp->nm_flag = argp->flags | nmp->nm_flag;
607 	splx(s);
608 
609 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
610 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
611 		if (nmp->nm_timeo < NFS_MINTIMEO)
612 			nmp->nm_timeo = NFS_MINTIMEO;
613 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
614 			nmp->nm_timeo = NFS_MAXTIMEO;
615 	}
616 
617 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
618 		nmp->nm_retry = argp->retrans;
619 		if (nmp->nm_retry > NFS_MAXREXMIT)
620 			nmp->nm_retry = NFS_MAXREXMIT;
621 	}
622 
623 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
624 		nmp->nm_wsize = argp->wsize;
625 		/* Round down to multiple of blocksize */
626 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
627 		if (nmp->nm_wsize <= 0)
628 			nmp->nm_wsize = NFS_FABLKSIZE;
629 	}
630 
631 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
632 		nmp->nm_rsize = argp->rsize;
633 		/* Round down to multiple of blocksize */
634 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
635 		if (nmp->nm_rsize <= 0)
636 			nmp->nm_rsize = NFS_FABLKSIZE;
637 	}
638 
639 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
640 		nmp->nm_readdirsize = argp->readdirsize;
641 	}
642 
643 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
644 		nmp->nm_acregmin = argp->acregmin;
645 	else
646 		nmp->nm_acregmin = NFS_MINATTRTIMO;
647 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
648 		nmp->nm_acregmax = argp->acregmax;
649 	else
650 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
651 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
652 		nmp->nm_acdirmin = argp->acdirmin;
653 	else
654 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
655 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
656 		nmp->nm_acdirmax = argp->acdirmax;
657 	else
658 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
659 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
660 		nmp->nm_acdirmin = nmp->nm_acdirmax;
661 	if (nmp->nm_acregmin > nmp->nm_acregmax)
662 		nmp->nm_acregmin = nmp->nm_acregmax;
663 
664 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
665 		if (argp->readahead <= NFS_MAXRAHEAD)
666 			nmp->nm_readahead = argp->readahead;
667 		else
668 			nmp->nm_readahead = NFS_MAXRAHEAD;
669 	}
670 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
671 		if (argp->wcommitsize < nmp->nm_wsize)
672 			nmp->nm_wcommitsize = nmp->nm_wsize;
673 		else
674 			nmp->nm_wcommitsize = argp->wcommitsize;
675 	}
676 
677 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
678 		    (nmp->nm_soproto != argp->proto));
679 
680 	if (nmp->nm_client != NULL && adjsock) {
681 		int haslock = 0, error = 0;
682 
683 		if (nmp->nm_sotype == SOCK_STREAM) {
684 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
685 			if (!error)
686 				haslock = 1;
687 		}
688 		if (!error) {
689 		    newnfs_disconnect(&nmp->nm_sockreq);
690 		    if (haslock)
691 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
692 		    nmp->nm_sotype = argp->sotype;
693 		    nmp->nm_soproto = argp->proto;
694 		    if (nmp->nm_sotype == SOCK_DGRAM)
695 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
696 			    cred, td, 0)) {
697 				printf("newnfs_args: retrying connect\n");
698 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
699 			}
700 		}
701 	} else {
702 		nmp->nm_sotype = argp->sotype;
703 		nmp->nm_soproto = argp->proto;
704 	}
705 
706 	if (hostname != NULL) {
707 		strlcpy(nmp->nm_hostname, hostname,
708 		    sizeof(nmp->nm_hostname));
709 		p = strchr(nmp->nm_hostname, ':');
710 		if (p != NULL)
711 			*p = '\0';
712 	}
713 }
714 
715 static const char *nfs_opts[] = { "from", "nfs_args",
716     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
717     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
718     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
719     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
720     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
721     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
722     "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion",
723     "nametimeo", "negnametimeo", "nocto", "pnfs", "wcommitsize",
724     NULL };
725 
726 /*
727  * VFS Operations.
728  *
729  * mount system call
730  * It seems a bit dumb to copyinstr() the host and path here and then
731  * bcopy() them in mountnfs(), but I wanted to detect errors before
732  * doing the sockargs() call because sockargs() allocates an mbuf and
733  * an error after that means that I have to release the mbuf.
734  */
735 /* ARGSUSED */
736 static int
737 nfs_mount(struct mount *mp)
738 {
739 	struct nfs_args args = {
740 	    .version = NFS_ARGSVERSION,
741 	    .addr = NULL,
742 	    .addrlen = sizeof (struct sockaddr_in),
743 	    .sotype = SOCK_STREAM,
744 	    .proto = 0,
745 	    .fh = NULL,
746 	    .fhsize = 0,
747 	    .flags = NFSMNT_RESVPORT,
748 	    .wsize = NFS_WSIZE,
749 	    .rsize = NFS_RSIZE,
750 	    .readdirsize = NFS_READDIRSIZE,
751 	    .timeo = 10,
752 	    .retrans = NFS_RETRANS,
753 	    .readahead = NFS_DEFRAHEAD,
754 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
755 	    .hostname = NULL,
756 	    .acregmin = NFS_MINATTRTIMO,
757 	    .acregmax = NFS_MAXATTRTIMO,
758 	    .acdirmin = NFS_MINDIRATTRTIMO,
759 	    .acdirmax = NFS_MAXDIRATTRTIMO,
760 	};
761 	int error = 0, ret, len;
762 	struct sockaddr *nam = NULL;
763 	struct vnode *vp;
764 	struct thread *td;
765 	char hst[MNAMELEN];
766 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
767 	char *opt, *name, *secname;
768 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
769 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
770 	int minvers = 0;
771 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
772 	size_t hstlen;
773 
774 	has_nfs_args_opt = 0;
775 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
776 		error = EINVAL;
777 		goto out;
778 	}
779 
780 	td = curthread;
781 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
782 		error = nfs_mountroot(mp);
783 		goto out;
784 	}
785 
786 	nfscl_init();
787 
788 	/*
789 	 * The old mount_nfs program passed the struct nfs_args
790 	 * from userspace to kernel.  The new mount_nfs program
791 	 * passes string options via nmount() from userspace to kernel
792 	 * and we populate the struct nfs_args in the kernel.
793 	 */
794 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
795 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
796 		    sizeof(args));
797 		if (error != 0)
798 			goto out;
799 
800 		if (args.version != NFS_ARGSVERSION) {
801 			error = EPROGMISMATCH;
802 			goto out;
803 		}
804 		has_nfs_args_opt = 1;
805 	}
806 
807 	/* Handle the new style options. */
808 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
809 		args.flags |= NFSMNT_NOCONN;
810 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
811 		args.flags |= NFSMNT_NOCONN;
812 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
813 		args.flags |= NFSMNT_NOLOCKD;
814 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
815 		args.flags &= ~NFSMNT_NOLOCKD;
816 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
817 		args.flags |= NFSMNT_INT;
818 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
819 		args.flags |= NFSMNT_RDIRPLUS;
820 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
821 		args.flags |= NFSMNT_RESVPORT;
822 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
823 		args.flags &= ~NFSMNT_RESVPORT;
824 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
825 		args.flags |= NFSMNT_SOFT;
826 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
827 		args.flags &= ~NFSMNT_SOFT;
828 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
829 		args.sotype = SOCK_DGRAM;
830 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
831 		args.sotype = SOCK_DGRAM;
832 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
833 		args.sotype = SOCK_STREAM;
834 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
835 		args.flags |= NFSMNT_NFSV3;
836 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
837 		args.flags |= NFSMNT_NFSV4;
838 		args.sotype = SOCK_STREAM;
839 	}
840 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
841 		args.flags |= NFSMNT_ALLGSSNAME;
842 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
843 		args.flags |= NFSMNT_NOCTO;
844 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
845 		args.flags |= NFSMNT_PNFS;
846 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
847 		if (opt == NULL) {
848 			vfs_mount_error(mp, "illegal readdirsize");
849 			error = EINVAL;
850 			goto out;
851 		}
852 		ret = sscanf(opt, "%d", &args.readdirsize);
853 		if (ret != 1 || args.readdirsize <= 0) {
854 			vfs_mount_error(mp, "illegal readdirsize: %s",
855 			    opt);
856 			error = EINVAL;
857 			goto out;
858 		}
859 		args.flags |= NFSMNT_READDIRSIZE;
860 	}
861 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
862 		if (opt == NULL) {
863 			vfs_mount_error(mp, "illegal readahead");
864 			error = EINVAL;
865 			goto out;
866 		}
867 		ret = sscanf(opt, "%d", &args.readahead);
868 		if (ret != 1 || args.readahead <= 0) {
869 			vfs_mount_error(mp, "illegal readahead: %s",
870 			    opt);
871 			error = EINVAL;
872 			goto out;
873 		}
874 		args.flags |= NFSMNT_READAHEAD;
875 	}
876 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
877 		if (opt == NULL) {
878 			vfs_mount_error(mp, "illegal wsize");
879 			error = EINVAL;
880 			goto out;
881 		}
882 		ret = sscanf(opt, "%d", &args.wsize);
883 		if (ret != 1 || args.wsize <= 0) {
884 			vfs_mount_error(mp, "illegal wsize: %s",
885 			    opt);
886 			error = EINVAL;
887 			goto out;
888 		}
889 		args.flags |= NFSMNT_WSIZE;
890 	}
891 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
892 		if (opt == NULL) {
893 			vfs_mount_error(mp, "illegal rsize");
894 			error = EINVAL;
895 			goto out;
896 		}
897 		ret = sscanf(opt, "%d", &args.rsize);
898 		if (ret != 1 || args.rsize <= 0) {
899 			vfs_mount_error(mp, "illegal wsize: %s",
900 			    opt);
901 			error = EINVAL;
902 			goto out;
903 		}
904 		args.flags |= NFSMNT_RSIZE;
905 	}
906 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
907 		if (opt == NULL) {
908 			vfs_mount_error(mp, "illegal retrans");
909 			error = EINVAL;
910 			goto out;
911 		}
912 		ret = sscanf(opt, "%d", &args.retrans);
913 		if (ret != 1 || args.retrans <= 0) {
914 			vfs_mount_error(mp, "illegal retrans: %s",
915 			    opt);
916 			error = EINVAL;
917 			goto out;
918 		}
919 		args.flags |= NFSMNT_RETRANS;
920 	}
921 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
922 		ret = sscanf(opt, "%d", &args.acregmin);
923 		if (ret != 1 || args.acregmin < 0) {
924 			vfs_mount_error(mp, "illegal acregmin: %s",
925 			    opt);
926 			error = EINVAL;
927 			goto out;
928 		}
929 		args.flags |= NFSMNT_ACREGMIN;
930 	}
931 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
932 		ret = sscanf(opt, "%d", &args.acregmax);
933 		if (ret != 1 || args.acregmax < 0) {
934 			vfs_mount_error(mp, "illegal acregmax: %s",
935 			    opt);
936 			error = EINVAL;
937 			goto out;
938 		}
939 		args.flags |= NFSMNT_ACREGMAX;
940 	}
941 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
942 		ret = sscanf(opt, "%d", &args.acdirmin);
943 		if (ret != 1 || args.acdirmin < 0) {
944 			vfs_mount_error(mp, "illegal acdirmin: %s",
945 			    opt);
946 			error = EINVAL;
947 			goto out;
948 		}
949 		args.flags |= NFSMNT_ACDIRMIN;
950 	}
951 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
952 		ret = sscanf(opt, "%d", &args.acdirmax);
953 		if (ret != 1 || args.acdirmax < 0) {
954 			vfs_mount_error(mp, "illegal acdirmax: %s",
955 			    opt);
956 			error = EINVAL;
957 			goto out;
958 		}
959 		args.flags |= NFSMNT_ACDIRMAX;
960 	}
961 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
962 		ret = sscanf(opt, "%d", &args.wcommitsize);
963 		if (ret != 1 || args.wcommitsize < 0) {
964 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
965 			error = EINVAL;
966 			goto out;
967 		}
968 		args.flags |= NFSMNT_WCOMMITSIZE;
969 	}
970 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
971 		ret = sscanf(opt, "%d", &args.timeo);
972 		if (ret != 1 || args.timeo <= 0) {
973 			vfs_mount_error(mp, "illegal timeout: %s",
974 			    opt);
975 			error = EINVAL;
976 			goto out;
977 		}
978 		args.flags |= NFSMNT_TIMEO;
979 	}
980 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
981 		ret = sscanf(opt, "%d", &nametimeo);
982 		if (ret != 1 || nametimeo < 0) {
983 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
984 			error = EINVAL;
985 			goto out;
986 		}
987 	}
988 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
989 	    == 0) {
990 		ret = sscanf(opt, "%d", &negnametimeo);
991 		if (ret != 1 || negnametimeo < 0) {
992 			vfs_mount_error(mp, "illegal negnametimeo: %s",
993 			    opt);
994 			error = EINVAL;
995 			goto out;
996 		}
997 	}
998 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
999 	    0) {
1000 		ret = sscanf(opt, "%d", &minvers);
1001 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1002 		    (args.flags & NFSMNT_NFSV4) == 0) {
1003 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1004 			error = EINVAL;
1005 			goto out;
1006 		}
1007 	}
1008 	if (vfs_getopt(mp->mnt_optnew, "sec",
1009 		(void **) &secname, NULL) == 0)
1010 		nfs_sec_name(secname, &args.flags);
1011 
1012 	if (mp->mnt_flag & MNT_UPDATE) {
1013 		struct nfsmount *nmp = VFSTONFS(mp);
1014 
1015 		if (nmp == NULL) {
1016 			error = EIO;
1017 			goto out;
1018 		}
1019 
1020 		/*
1021 		 * If a change from TCP->UDP is done and there are thread(s)
1022 		 * that have I/O RPC(s) in progress with a tranfer size
1023 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1024 		 * hung, retrying the RPC(s) forever. Usually these threads
1025 		 * will be seen doing an uninterruptible sleep on wait channel
1026 		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1027 		 */
1028 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1029 			tprintf(td->td_proc, LOG_WARNING,
1030 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1031 
1032 		/*
1033 		 * When doing an update, we can't change version,
1034 		 * security, switch lockd strategies or change cookie
1035 		 * translation
1036 		 */
1037 		args.flags = (args.flags &
1038 		    ~(NFSMNT_NFSV3 |
1039 		      NFSMNT_NFSV4 |
1040 		      NFSMNT_KERB |
1041 		      NFSMNT_INTEGRITY |
1042 		      NFSMNT_PRIVACY |
1043 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1044 		    (nmp->nm_flag &
1045 			(NFSMNT_NFSV3 |
1046 			 NFSMNT_NFSV4 |
1047 			 NFSMNT_KERB |
1048 			 NFSMNT_INTEGRITY |
1049 			 NFSMNT_PRIVACY |
1050 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1051 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1052 		goto out;
1053 	}
1054 
1055 	/*
1056 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1057 	 * or no-connection mode for those protocols that support
1058 	 * no-connection mode (the flag will be cleared later for protocols
1059 	 * that do not support no-connection mode).  This will allow a client
1060 	 * to receive replies from a different IP then the request was
1061 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1062 	 * not 0.
1063 	 */
1064 	if (nfs_ip_paranoia == 0)
1065 		args.flags |= NFSMNT_NOCONN;
1066 
1067 	if (has_nfs_args_opt != 0) {
1068 		/*
1069 		 * In the 'nfs_args' case, the pointers in the args
1070 		 * structure are in userland - we copy them in here.
1071 		 */
1072 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1073 			vfs_mount_error(mp, "Bad file handle");
1074 			error = EINVAL;
1075 			goto out;
1076 		}
1077 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1078 		    args.fhsize);
1079 		if (error != 0)
1080 			goto out;
1081 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1082 		if (error != 0)
1083 			goto out;
1084 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1085 		args.hostname = hst;
1086 		/* sockargs() call must be after above copyin() calls */
1087 		error = getsockaddr(&nam, (caddr_t)args.addr,
1088 		    args.addrlen);
1089 		if (error != 0)
1090 			goto out;
1091 	} else {
1092 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1093 		    &args.fhsize) == 0) {
1094 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1095 				vfs_mount_error(mp, "Bad file handle");
1096 				error = EINVAL;
1097 				goto out;
1098 			}
1099 			bcopy(args.fh, nfh, args.fhsize);
1100 		} else {
1101 			args.fhsize = 0;
1102 		}
1103 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1104 		    (void **)&args.hostname, &len);
1105 		if (args.hostname == NULL) {
1106 			vfs_mount_error(mp, "Invalid hostname");
1107 			error = EINVAL;
1108 			goto out;
1109 		}
1110 		bcopy(args.hostname, hst, MNAMELEN);
1111 		hst[MNAMELEN - 1] = '\0';
1112 	}
1113 
1114 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1115 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1116 	else
1117 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1118 	srvkrbnamelen = strlen(srvkrbname);
1119 
1120 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1121 		strlcpy(krbname, name, sizeof (krbname));
1122 	else
1123 		krbname[0] = '\0';
1124 	krbnamelen = strlen(krbname);
1125 
1126 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1127 		strlcpy(dirpath, name, sizeof (dirpath));
1128 	else
1129 		dirpath[0] = '\0';
1130 	dirlen = strlen(dirpath);
1131 
1132 	if (has_nfs_args_opt == 0) {
1133 		if (vfs_getopt(mp->mnt_optnew, "addr",
1134 		    (void **)&args.addr, &args.addrlen) == 0) {
1135 			if (args.addrlen > SOCK_MAXADDRLEN) {
1136 				error = ENAMETOOLONG;
1137 				goto out;
1138 			}
1139 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1140 			bcopy(args.addr, nam, args.addrlen);
1141 			nam->sa_len = args.addrlen;
1142 		} else {
1143 			vfs_mount_error(mp, "No server address");
1144 			error = EINVAL;
1145 			goto out;
1146 		}
1147 	}
1148 
1149 	args.fh = nfh;
1150 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1151 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1152 	    nametimeo, negnametimeo, minvers);
1153 out:
1154 	if (!error) {
1155 		MNT_ILOCK(mp);
1156 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF;
1157 		MNT_IUNLOCK(mp);
1158 	}
1159 	return (error);
1160 }
1161 
1162 
1163 /*
1164  * VFS Operations.
1165  *
1166  * mount system call
1167  * It seems a bit dumb to copyinstr() the host and path here and then
1168  * bcopy() them in mountnfs(), but I wanted to detect errors before
1169  * doing the sockargs() call because sockargs() allocates an mbuf and
1170  * an error after that means that I have to release the mbuf.
1171  */
1172 /* ARGSUSED */
1173 static int
1174 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1175 {
1176 	int error;
1177 	struct nfs_args args;
1178 
1179 	error = copyin(data, &args, sizeof (struct nfs_args));
1180 	if (error)
1181 		return error;
1182 
1183 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1184 
1185 	error = kernel_mount(ma, flags);
1186 	return (error);
1187 }
1188 
1189 /*
1190  * Common code for mount and mountroot
1191  */
1192 static int
1193 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1194     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1195     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1196     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1197     int minvers)
1198 {
1199 	struct nfsmount *nmp;
1200 	struct nfsnode *np;
1201 	int error, trycnt, ret;
1202 	struct nfsvattr nfsva;
1203 	struct nfsclclient *clp;
1204 	struct nfsclds *dsp, *tdsp;
1205 	uint32_t lease;
1206 	static u_int64_t clval = 0;
1207 
1208 	NFSCL_DEBUG(3, "in mnt\n");
1209 	clp = NULL;
1210 	if (mp->mnt_flag & MNT_UPDATE) {
1211 		nmp = VFSTONFS(mp);
1212 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1213 		FREE(nam, M_SONAME);
1214 		return (0);
1215 	} else {
1216 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1217 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1218 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1219 		TAILQ_INIT(&nmp->nm_bufq);
1220 		if (clval == 0)
1221 			clval = (u_int64_t)nfsboottime.tv_sec;
1222 		nmp->nm_clval = clval++;
1223 		nmp->nm_krbnamelen = krbnamelen;
1224 		nmp->nm_dirpathlen = dirlen;
1225 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1226 		if (td->td_ucred->cr_uid != (uid_t)0) {
1227 			/*
1228 			 * nm_uid is used to get KerberosV credentials for
1229 			 * the nfsv4 state handling operations if there is
1230 			 * no host based principal set. Use the uid of
1231 			 * this user if not root, since they are doing the
1232 			 * mount. I don't think setting this for root will
1233 			 * work, since root normally does not have user
1234 			 * credentials in a credentials cache.
1235 			 */
1236 			nmp->nm_uid = td->td_ucred->cr_uid;
1237 		} else {
1238 			/*
1239 			 * Just set to -1, so it won't be used.
1240 			 */
1241 			nmp->nm_uid = (uid_t)-1;
1242 		}
1243 
1244 		/* Copy and null terminate all the names */
1245 		if (nmp->nm_krbnamelen > 0) {
1246 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1247 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1248 		}
1249 		if (nmp->nm_dirpathlen > 0) {
1250 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1251 			    nmp->nm_dirpathlen);
1252 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1253 			    + 1] = '\0';
1254 		}
1255 		if (nmp->nm_srvkrbnamelen > 0) {
1256 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1257 			    nmp->nm_srvkrbnamelen);
1258 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1259 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1260 		}
1261 		nmp->nm_sockreq.nr_cred = crhold(cred);
1262 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1263 		mp->mnt_data = nmp;
1264 		nmp->nm_getinfo = nfs_getnlminfo;
1265 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1266 	}
1267 	vfs_getnewfsid(mp);
1268 	nmp->nm_mountp = mp;
1269 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1270 
1271 	/*
1272 	 * Since nfs_decode_args() might optionally set them, these
1273 	 * need to be set to defaults before the call, so that the
1274 	 * optional settings aren't overwritten.
1275 	 */
1276 	nmp->nm_nametimeo = nametimeo;
1277 	nmp->nm_negnametimeo = negnametimeo;
1278 	nmp->nm_timeo = NFS_TIMEO;
1279 	nmp->nm_retry = NFS_RETRANS;
1280 	nmp->nm_readahead = NFS_DEFRAHEAD;
1281 	if (desiredvnodes >= 11000)
1282 		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1283 	else
1284 		nmp->nm_wcommitsize = hibufspace / 10;
1285 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1286 		nmp->nm_minorvers = minvers;
1287 	else
1288 		nmp->nm_minorvers = 0;
1289 
1290 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1291 
1292 	/*
1293 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1294 	 * high, depending on whether we end up with negative offsets in
1295 	 * the client or server somewhere.  2GB-1 may be safer.
1296 	 *
1297 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1298 	 * that we can handle until we find out otherwise.
1299 	 */
1300 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1301 		nmp->nm_maxfilesize = 0xffffffffLL;
1302 	else
1303 		nmp->nm_maxfilesize = OFF_MAX;
1304 
1305 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1306 		nmp->nm_wsize = NFS_WSIZE;
1307 		nmp->nm_rsize = NFS_RSIZE;
1308 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1309 	}
1310 	nmp->nm_numgrps = NFS_MAXGRPS;
1311 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1312 	if (nmp->nm_tprintf_delay < 0)
1313 		nmp->nm_tprintf_delay = 0;
1314 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1315 	if (nmp->nm_tprintf_initial_delay < 0)
1316 		nmp->nm_tprintf_initial_delay = 0;
1317 	nmp->nm_fhsize = argp->fhsize;
1318 	if (nmp->nm_fhsize > 0)
1319 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1320 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1321 	nmp->nm_nam = nam;
1322 	/* Set up the sockets and per-host congestion */
1323 	nmp->nm_sotype = argp->sotype;
1324 	nmp->nm_soproto = argp->proto;
1325 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1326 	if ((argp->flags & NFSMNT_NFSV4))
1327 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1328 	else if ((argp->flags & NFSMNT_NFSV3))
1329 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1330 	else
1331 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1332 
1333 
1334 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1335 		goto bad;
1336 	/* For NFSv4.1, get the clientid now. */
1337 	if (nmp->nm_minorvers > 0) {
1338 		NFSCL_DEBUG(3, "at getcl\n");
1339 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1340 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1341 		if (error != 0)
1342 			goto bad;
1343 	}
1344 
1345 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1346 	    nmp->nm_dirpathlen > 0) {
1347 		NFSCL_DEBUG(3, "in dirp\n");
1348 		/*
1349 		 * If the fhsize on the mount point == 0 for V4, the mount
1350 		 * path needs to be looked up.
1351 		 */
1352 		trycnt = 3;
1353 		do {
1354 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1355 			    cred, td);
1356 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1357 			if (error)
1358 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1359 		} while (error && --trycnt > 0);
1360 		if (error) {
1361 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1362 			goto bad;
1363 		}
1364 	}
1365 
1366 	/*
1367 	 * A reference count is needed on the nfsnode representing the
1368 	 * remote root.  If this object is not persistent, then backward
1369 	 * traversals of the mount point (i.e. "..") will not work if
1370 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1371 	 * this problem, because one can identify root inodes by their
1372 	 * number == ROOTINO (2).
1373 	 */
1374 	if (nmp->nm_fhsize > 0) {
1375 		/*
1376 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1377 		 * non-zero for the root vnode. f_iosize will be set correctly
1378 		 * by nfs_statfs() before any I/O occurs.
1379 		 */
1380 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1381 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1382 		    LK_EXCLUSIVE);
1383 		if (error)
1384 			goto bad;
1385 		*vpp = NFSTOV(np);
1386 
1387 		/*
1388 		 * Get file attributes and transfer parameters for the
1389 		 * mountpoint.  This has the side effect of filling in
1390 		 * (*vpp)->v_type with the correct value.
1391 		 */
1392 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1393 		    cred, td, &nfsva, NULL, &lease);
1394 		if (ret) {
1395 			/*
1396 			 * Just set default values to get things going.
1397 			 */
1398 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1399 			nfsva.na_vattr.va_type = VDIR;
1400 			nfsva.na_vattr.va_mode = 0777;
1401 			nfsva.na_vattr.va_nlink = 100;
1402 			nfsva.na_vattr.va_uid = (uid_t)0;
1403 			nfsva.na_vattr.va_gid = (gid_t)0;
1404 			nfsva.na_vattr.va_fileid = 2;
1405 			nfsva.na_vattr.va_gen = 1;
1406 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1407 			nfsva.na_vattr.va_size = 512 * 1024;
1408 			lease = 60;
1409 		}
1410 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1411 		if (nmp->nm_minorvers > 0) {
1412 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1413 			NFSLOCKCLSTATE();
1414 			clp->nfsc_renew = NFSCL_RENEW(lease);
1415 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1416 			clp->nfsc_clientidrev++;
1417 			if (clp->nfsc_clientidrev == 0)
1418 				clp->nfsc_clientidrev++;
1419 			NFSUNLOCKCLSTATE();
1420 			/*
1421 			 * Mount will succeed, so the renew thread can be
1422 			 * started now.
1423 			 */
1424 			nfscl_start_renewthread(clp);
1425 			nfscl_clientrelease(clp);
1426 		}
1427 		if (argp->flags & NFSMNT_NFSV3)
1428 			ncl_fsinfo(nmp, *vpp, cred, td);
1429 
1430 		/* Mark if the mount point supports NFSv4 ACLs. */
1431 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1432 		    ret == 0 &&
1433 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1434 			MNT_ILOCK(mp);
1435 			mp->mnt_flag |= MNT_NFS4ACLS;
1436 			MNT_IUNLOCK(mp);
1437 		}
1438 
1439 		/*
1440 		 * Lose the lock but keep the ref.
1441 		 */
1442 		NFSVOPUNLOCK(*vpp, 0);
1443 		return (0);
1444 	}
1445 	error = EIO;
1446 
1447 bad:
1448 	if (clp != NULL)
1449 		nfscl_clientrelease(clp);
1450 	newnfs_disconnect(&nmp->nm_sockreq);
1451 	crfree(nmp->nm_sockreq.nr_cred);
1452 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1453 	mtx_destroy(&nmp->nm_mtx);
1454 	if (nmp->nm_clp != NULL) {
1455 		NFSLOCKCLSTATE();
1456 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1457 		NFSUNLOCKCLSTATE();
1458 		free(nmp->nm_clp, M_NFSCLCLIENT);
1459 	}
1460 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1461 		nfscl_freenfsclds(dsp);
1462 	FREE(nmp, M_NEWNFSMNT);
1463 	FREE(nam, M_SONAME);
1464 	return (error);
1465 }
1466 
1467 /*
1468  * unmount system call
1469  */
1470 static int
1471 nfs_unmount(struct mount *mp, int mntflags)
1472 {
1473 	struct thread *td;
1474 	struct nfsmount *nmp;
1475 	int error, flags = 0, trycnt = 0;
1476 	struct nfsclds *dsp, *tdsp;
1477 
1478 	td = curthread;
1479 
1480 	if (mntflags & MNT_FORCE)
1481 		flags |= FORCECLOSE;
1482 	nmp = VFSTONFS(mp);
1483 	/*
1484 	 * Goes something like this..
1485 	 * - Call vflush() to clear out vnodes for this filesystem
1486 	 * - Close the socket
1487 	 * - Free up the data structures
1488 	 */
1489 	/* In the forced case, cancel any outstanding requests. */
1490 	if (mntflags & MNT_FORCE) {
1491 		error = newnfs_nmcancelreqs(nmp);
1492 		if (error)
1493 			goto out;
1494 		/* For a forced close, get rid of the renew thread now */
1495 		nfscl_umount(nmp, td);
1496 	}
1497 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1498 	do {
1499 		error = vflush(mp, 1, flags, td);
1500 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1501 			(void) nfs_catnap(PSOCK, error, "newndm");
1502 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1503 	if (error)
1504 		goto out;
1505 
1506 	/*
1507 	 * We are now committed to the unmount.
1508 	 */
1509 	if ((mntflags & MNT_FORCE) == 0)
1510 		nfscl_umount(nmp, td);
1511 	newnfs_disconnect(&nmp->nm_sockreq);
1512 	crfree(nmp->nm_sockreq.nr_cred);
1513 	FREE(nmp->nm_nam, M_SONAME);
1514 
1515 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1516 	mtx_destroy(&nmp->nm_mtx);
1517 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1518 		nfscl_freenfsclds(dsp);
1519 	FREE(nmp, M_NEWNFSMNT);
1520 out:
1521 	return (error);
1522 }
1523 
1524 /*
1525  * Return root of a filesystem
1526  */
1527 static int
1528 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1529 {
1530 	struct vnode *vp;
1531 	struct nfsmount *nmp;
1532 	struct nfsnode *np;
1533 	int error;
1534 
1535 	nmp = VFSTONFS(mp);
1536 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1537 	if (error)
1538 		return error;
1539 	vp = NFSTOV(np);
1540 	/*
1541 	 * Get transfer parameters and attributes for root vnode once.
1542 	 */
1543 	mtx_lock(&nmp->nm_mtx);
1544 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1545 		mtx_unlock(&nmp->nm_mtx);
1546 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1547 	} else
1548 		mtx_unlock(&nmp->nm_mtx);
1549 	if (vp->v_type == VNON)
1550 	    vp->v_type = VDIR;
1551 	vp->v_vflag |= VV_ROOT;
1552 	*vpp = vp;
1553 	return (0);
1554 }
1555 
1556 /*
1557  * Flush out the buffer cache
1558  */
1559 /* ARGSUSED */
1560 static int
1561 nfs_sync(struct mount *mp, int waitfor)
1562 {
1563 	struct vnode *vp, *mvp;
1564 	struct thread *td;
1565 	int error, allerror = 0;
1566 
1567 	td = curthread;
1568 
1569 	MNT_ILOCK(mp);
1570 	/*
1571 	 * If a forced dismount is in progress, return from here so that
1572 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1573 	 * calling VFS_UNMOUNT().
1574 	 */
1575 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1576 		MNT_IUNLOCK(mp);
1577 		return (EBADF);
1578 	}
1579 	MNT_IUNLOCK(mp);
1580 
1581 	/*
1582 	 * Force stale buffer cache information to be flushed.
1583 	 */
1584 loop:
1585 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1586 		/* XXX Racy bv_cnt check. */
1587 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1588 		    waitfor == MNT_LAZY) {
1589 			VI_UNLOCK(vp);
1590 			continue;
1591 		}
1592 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1593 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1594 			goto loop;
1595 		}
1596 		error = VOP_FSYNC(vp, waitfor, td);
1597 		if (error)
1598 			allerror = error;
1599 		NFSVOPUNLOCK(vp, 0);
1600 		vrele(vp);
1601 	}
1602 	return (allerror);
1603 }
1604 
1605 static int
1606 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1607 {
1608 	struct nfsmount *nmp = VFSTONFS(mp);
1609 	struct vfsquery vq;
1610 	int error;
1611 
1612 	bzero(&vq, sizeof(vq));
1613 	switch (op) {
1614 #if 0
1615 	case VFS_CTL_NOLOCKS:
1616 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1617  		if (req->oldptr != NULL) {
1618  			error = SYSCTL_OUT(req, &val, sizeof(val));
1619  			if (error)
1620  				return (error);
1621  		}
1622  		if (req->newptr != NULL) {
1623  			error = SYSCTL_IN(req, &val, sizeof(val));
1624  			if (error)
1625  				return (error);
1626 			if (val)
1627 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1628 			else
1629 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1630  		}
1631 		break;
1632 #endif
1633 	case VFS_CTL_QUERY:
1634 		mtx_lock(&nmp->nm_mtx);
1635 		if (nmp->nm_state & NFSSTA_TIMEO)
1636 			vq.vq_flags |= VQ_NOTRESP;
1637 		mtx_unlock(&nmp->nm_mtx);
1638 #if 0
1639 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1640 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1641 			vq.vq_flags |= VQ_NOTRESPLOCK;
1642 #endif
1643 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1644 		break;
1645  	case VFS_CTL_TIMEO:
1646  		if (req->oldptr != NULL) {
1647  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1648  			    sizeof(nmp->nm_tprintf_initial_delay));
1649  			if (error)
1650  				return (error);
1651  		}
1652  		if (req->newptr != NULL) {
1653 			error = vfs_suser(mp, req->td);
1654 			if (error)
1655 				return (error);
1656  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1657  			    sizeof(nmp->nm_tprintf_initial_delay));
1658  			if (error)
1659  				return (error);
1660  			if (nmp->nm_tprintf_initial_delay < 0)
1661  				nmp->nm_tprintf_initial_delay = 0;
1662  		}
1663 		break;
1664 	default:
1665 		return (ENOTSUP);
1666 	}
1667 	return (0);
1668 }
1669 
1670 /*
1671  * Extract the information needed by the nlm from the nfs vnode.
1672  */
1673 static void
1674 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1675     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1676     struct timeval *timeop)
1677 {
1678 	struct nfsmount *nmp;
1679 	struct nfsnode *np = VTONFS(vp);
1680 
1681 	nmp = VFSTONFS(vp->v_mount);
1682 	if (fhlenp != NULL)
1683 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1684 	if (fhp != NULL)
1685 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1686 	if (sp != NULL)
1687 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1688 	if (is_v3p != NULL)
1689 		*is_v3p = NFS_ISV3(vp);
1690 	if (sizep != NULL)
1691 		*sizep = np->n_size;
1692 	if (timeop != NULL) {
1693 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1694 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1695 	}
1696 }
1697 
1698 /*
1699  * This function prints out an option name, based on the conditional
1700  * argument.
1701  */
1702 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1703     char *opt, char **buf, size_t *blen)
1704 {
1705 	int len;
1706 
1707 	if (testval != 0 && *blen > strlen(opt)) {
1708 		len = snprintf(*buf, *blen, "%s", opt);
1709 		if (len != strlen(opt))
1710 			printf("EEK!!\n");
1711 		*buf += len;
1712 		*blen -= len;
1713 	}
1714 }
1715 
1716 /*
1717  * This function printf out an options integer value.
1718  */
1719 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1720     char *opt, char **buf, size_t *blen)
1721 {
1722 	int len;
1723 
1724 	if (*blen > strlen(opt) + 1) {
1725 		/* Could result in truncated output string. */
1726 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1727 		if (len < *blen) {
1728 			*buf += len;
1729 			*blen -= len;
1730 		}
1731 	}
1732 }
1733 
1734 /*
1735  * Load the option flags and values into the buffer.
1736  */
1737 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1738 {
1739 	char *buf;
1740 	size_t blen;
1741 
1742 	buf = buffer;
1743 	blen = buflen;
1744 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1745 	    &blen);
1746 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1747 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1748 		    &blen);
1749 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1750 		    &buf, &blen);
1751 	}
1752 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1753 	    &blen);
1754 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1755 	    "nfsv2", &buf, &blen);
1756 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1757 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1758 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1759 	    &buf, &blen);
1760 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1761 	    &buf, &blen);
1762 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1763 	    &blen);
1764 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1765 	    &blen);
1766 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1767 	    &blen);
1768 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1769 	    &blen);
1770 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1771 	    &blen);
1772 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1773 	    0, ",lockd", &buf, &blen);
1774 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1775 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1776 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1777 	    &buf, &blen);
1778 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1779 	    &buf, &blen);
1780 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1781 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1782 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1783 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1784 	    &buf, &blen);
1785 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1786 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1787 	    &buf, &blen);
1788 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1789 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1790 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1791 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1792 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1793 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1794 	    &blen);
1795 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1796 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1797 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1798 	    &blen);
1799 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1800 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1801 	    &blen);
1802 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1803 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1804 }
1805 
1806