xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision f0157ce528a128e2abb181a5c766033a2ce49a5f)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 
104 static int	nfs_mountroot(struct mount *);
105 static void	nfs_sec_name(char *, int *);
106 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107 		    struct nfs_args *argp, const char *, struct ucred *,
108 		    struct thread *);
109 static int	mountnfs(struct nfs_args *, struct mount *,
110 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111 		    u_char *, int, struct vnode **, struct ucred *,
112 		    struct thread *, int, int, int);
113 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114 		    struct sockaddr_storage *, int *, off_t *,
115 		    struct timeval *);
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123 
124 /*
125  * nfs vfs operations.
126  */
127 static struct vfsops nfs_vfsops = {
128 	.vfs_init =		ncl_init,
129 	.vfs_mount =		nfs_mount,
130 	.vfs_cmount =		nfs_cmount,
131 	.vfs_root =		nfs_root,
132 	.vfs_statfs =		nfs_statfs,
133 	.vfs_sync =		nfs_sync,
134 	.vfs_uninit =		ncl_uninit,
135 	.vfs_unmount =		nfs_unmount,
136 	.vfs_sysctl =		nfs_sysctl,
137 };
138 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
139 
140 /* So that loader and kldload(2) can find us, wherever we are.. */
141 MODULE_VERSION(nfs, 1);
142 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
143 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
144 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
145 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
146 
147 /*
148  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
149  * can be shared by both NFS clients. It is declared here so that it
150  * will be defined for kernels built without NFS_ROOT, although it
151  * isn't used in that case.
152  */
153 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
154 struct nfs_diskless	nfs_diskless = { { { 0 } } };
155 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
156 int			nfs_diskless_valid = 0;
157 #endif
158 
159 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
160     &nfs_diskless_valid, 0,
161     "Has the diskless struct been filled correctly");
162 
163 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
164     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
165 
166 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
167     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
168     "%Ssockaddr_in", "Diskless root nfs address");
169 
170 
171 void		newnfsargs_ntoh(struct nfs_args *);
172 static int	nfs_mountdiskless(char *,
173 		    struct sockaddr_in *, struct nfs_args *,
174 		    struct thread *, struct vnode **, struct mount *);
175 static void	nfs_convert_diskless(void);
176 static void	nfs_convert_oargs(struct nfs_args *args,
177 		    struct onfs_args *oargs);
178 
179 int
180 newnfs_iosize(struct nfsmount *nmp)
181 {
182 	int iosize, maxio;
183 
184 	/* First, set the upper limit for iosize */
185 	if (nmp->nm_flag & NFSMNT_NFSV4) {
186 		maxio = NFS_MAXBSIZE;
187 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
188 		if (nmp->nm_sotype == SOCK_DGRAM)
189 			maxio = NFS_MAXDGRAMDATA;
190 		else
191 			maxio = NFS_MAXBSIZE;
192 	} else {
193 		maxio = NFS_V2MAXDATA;
194 	}
195 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
196 		nmp->nm_rsize = maxio;
197 	if (nmp->nm_rsize > MAXBSIZE)
198 		nmp->nm_rsize = MAXBSIZE;
199 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
200 		nmp->nm_readdirsize = maxio;
201 	if (nmp->nm_readdirsize > nmp->nm_rsize)
202 		nmp->nm_readdirsize = nmp->nm_rsize;
203 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
204 		nmp->nm_wsize = maxio;
205 	if (nmp->nm_wsize > MAXBSIZE)
206 		nmp->nm_wsize = MAXBSIZE;
207 
208 	/*
209 	 * Calculate the size used for io buffers.  Use the larger
210 	 * of the two sizes to minimise nfs requests but make sure
211 	 * that it is at least one VM page to avoid wasting buffer
212 	 * space.
213 	 */
214 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
215 	iosize = imax(iosize, PAGE_SIZE);
216 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
217 	return (iosize);
218 }
219 
220 static void
221 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
222 {
223 
224 	args->version = NFS_ARGSVERSION;
225 	args->addr = oargs->addr;
226 	args->addrlen = oargs->addrlen;
227 	args->sotype = oargs->sotype;
228 	args->proto = oargs->proto;
229 	args->fh = oargs->fh;
230 	args->fhsize = oargs->fhsize;
231 	args->flags = oargs->flags;
232 	args->wsize = oargs->wsize;
233 	args->rsize = oargs->rsize;
234 	args->readdirsize = oargs->readdirsize;
235 	args->timeo = oargs->timeo;
236 	args->retrans = oargs->retrans;
237 	args->readahead = oargs->readahead;
238 	args->hostname = oargs->hostname;
239 }
240 
241 static void
242 nfs_convert_diskless(void)
243 {
244 
245 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
246 		sizeof(struct ifaliasreq));
247 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
248 		sizeof(struct sockaddr_in));
249 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
250 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
251 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
252 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
253 	} else {
254 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
255 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
256 	}
257 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
258 		sizeof(struct sockaddr_in));
259 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
260 	nfsv3_diskless.root_time = nfs_diskless.root_time;
261 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
262 		MAXHOSTNAMELEN);
263 	nfs_diskless_valid = 3;
264 }
265 
266 /*
267  * nfs statfs call
268  */
269 static int
270 nfs_statfs(struct mount *mp, struct statfs *sbp)
271 {
272 	struct vnode *vp;
273 	struct thread *td;
274 	struct nfsmount *nmp = VFSTONFS(mp);
275 	struct nfsvattr nfsva;
276 	struct nfsfsinfo fs;
277 	struct nfsstatfs sb;
278 	int error = 0, attrflag, gotfsinfo = 0, ret;
279 	struct nfsnode *np;
280 
281 	td = curthread;
282 
283 	error = vfs_busy(mp, MBF_NOWAIT);
284 	if (error)
285 		return (error);
286 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
287 	if (error) {
288 		vfs_unbusy(mp);
289 		return (error);
290 	}
291 	vp = NFSTOV(np);
292 	mtx_lock(&nmp->nm_mtx);
293 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
294 		mtx_unlock(&nmp->nm_mtx);
295 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
296 		    &attrflag, NULL);
297 		if (!error)
298 			gotfsinfo = 1;
299 	} else
300 		mtx_unlock(&nmp->nm_mtx);
301 	if (!error)
302 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
303 		    &attrflag, NULL);
304 	if (error != 0)
305 		NFSCL_DEBUG(2, "statfs=%d\n", error);
306 	if (attrflag == 0) {
307 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
308 		    td->td_ucred, td, &nfsva, NULL, NULL);
309 		if (ret) {
310 			/*
311 			 * Just set default values to get things going.
312 			 */
313 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
314 			nfsva.na_vattr.va_type = VDIR;
315 			nfsva.na_vattr.va_mode = 0777;
316 			nfsva.na_vattr.va_nlink = 100;
317 			nfsva.na_vattr.va_uid = (uid_t)0;
318 			nfsva.na_vattr.va_gid = (gid_t)0;
319 			nfsva.na_vattr.va_fileid = 2;
320 			nfsva.na_vattr.va_gen = 1;
321 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
322 			nfsva.na_vattr.va_size = 512 * 1024;
323 		}
324 	}
325 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
326 	if (!error) {
327 	    mtx_lock(&nmp->nm_mtx);
328 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
329 		nfscl_loadfsinfo(nmp, &fs);
330 	    nfscl_loadsbinfo(nmp, &sb, sbp);
331 	    sbp->f_iosize = newnfs_iosize(nmp);
332 	    mtx_unlock(&nmp->nm_mtx);
333 	    if (sbp != &mp->mnt_stat) {
334 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
335 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
336 	    }
337 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
338 	} else if (NFS_ISV4(vp)) {
339 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
340 	}
341 	vput(vp);
342 	vfs_unbusy(mp);
343 	return (error);
344 }
345 
346 /*
347  * nfs version 3 fsinfo rpc call
348  */
349 int
350 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
351     struct thread *td)
352 {
353 	struct nfsfsinfo fs;
354 	struct nfsvattr nfsva;
355 	int error, attrflag;
356 
357 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
358 	if (!error) {
359 		if (attrflag)
360 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
361 			    1);
362 		mtx_lock(&nmp->nm_mtx);
363 		nfscl_loadfsinfo(nmp, &fs);
364 		mtx_unlock(&nmp->nm_mtx);
365 	}
366 	return (error);
367 }
368 
369 /*
370  * Mount a remote root fs via. nfs. This depends on the info in the
371  * nfs_diskless structure that has been filled in properly by some primary
372  * bootstrap.
373  * It goes something like this:
374  * - do enough of "ifconfig" by calling ifioctl() so that the system
375  *   can talk to the server
376  * - If nfs_diskless.mygateway is filled in, use that address as
377  *   a default gateway.
378  * - build the rootfs mount point and call mountnfs() to do the rest.
379  *
380  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
381  * structure, as well as other global NFS client variables here, as
382  * nfs_mountroot() will be called once in the boot before any other NFS
383  * client activity occurs.
384  */
385 static int
386 nfs_mountroot(struct mount *mp)
387 {
388 	struct thread *td = curthread;
389 	struct nfsv3_diskless *nd = &nfsv3_diskless;
390 	struct socket *so;
391 	struct vnode *vp;
392 	struct ifreq ir;
393 	int error;
394 	u_long l;
395 	char buf[128];
396 	char *cp;
397 
398 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
399 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
400 #elif defined(NFS_ROOT)
401 	nfs_setup_diskless();
402 #endif
403 
404 	if (nfs_diskless_valid == 0)
405 		return (-1);
406 	if (nfs_diskless_valid == 1)
407 		nfs_convert_diskless();
408 
409 	/*
410 	 * XXX splnet, so networks will receive...
411 	 */
412 	splnet();
413 
414 	/*
415 	 * Do enough of ifconfig(8) so that the critical net interface can
416 	 * talk to the server.
417 	 */
418 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
419 	    td->td_ucred, td);
420 	if (error)
421 		panic("nfs_mountroot: socreate(%04x): %d",
422 			nd->myif.ifra_addr.sa_family, error);
423 
424 #if 0 /* XXX Bad idea */
425 	/*
426 	 * We might not have been told the right interface, so we pass
427 	 * over the first ten interfaces of the same kind, until we get
428 	 * one of them configured.
429 	 */
430 
431 	for (i = strlen(nd->myif.ifra_name) - 1;
432 		nd->myif.ifra_name[i] >= '0' &&
433 		nd->myif.ifra_name[i] <= '9';
434 		nd->myif.ifra_name[i] ++) {
435 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
436 		if(!error)
437 			break;
438 	}
439 #endif
440 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
441 	if (error)
442 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
443 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
444 		ir.ifr_mtu = strtol(cp, NULL, 10);
445 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
446 		freeenv(cp);
447 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
448 		if (error)
449 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
450 	}
451 	soclose(so);
452 
453 	/*
454 	 * If the gateway field is filled in, set it as the default route.
455 	 * Note that pxeboot will set a default route of 0 if the route
456 	 * is not set by the DHCP server.  Check also for a value of 0
457 	 * to avoid panicking inappropriately in that situation.
458 	 */
459 	if (nd->mygateway.sin_len != 0 &&
460 	    nd->mygateway.sin_addr.s_addr != 0) {
461 		struct sockaddr_in mask, sin;
462 
463 		bzero((caddr_t)&mask, sizeof(mask));
464 		sin = mask;
465 		sin.sin_family = AF_INET;
466 		sin.sin_len = sizeof(sin);
467                 /* XXX MRT use table 0 for this sort of thing */
468 		CURVNET_SET(TD_TO_VNET(td));
469 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
470 		    (struct sockaddr *)&nd->mygateway,
471 		    (struct sockaddr *)&mask,
472 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
473 		CURVNET_RESTORE();
474 		if (error)
475 			panic("nfs_mountroot: RTM_ADD: %d", error);
476 	}
477 
478 	/*
479 	 * Create the rootfs mount point.
480 	 */
481 	nd->root_args.fh = nd->root_fh;
482 	nd->root_args.fhsize = nd->root_fhsize;
483 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
484 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
485 		(l >> 24) & 0xff, (l >> 16) & 0xff,
486 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
487 	printf("NFS ROOT: %s\n", buf);
488 	nd->root_args.hostname = buf;
489 	if ((error = nfs_mountdiskless(buf,
490 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
491 		return (error);
492 	}
493 
494 	/*
495 	 * This is not really an nfs issue, but it is much easier to
496 	 * set hostname here and then let the "/etc/rc.xxx" files
497 	 * mount the right /var based upon its preset value.
498 	 */
499 	mtx_lock(&prison0.pr_mtx);
500 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
501 	    sizeof(prison0.pr_hostname));
502 	mtx_unlock(&prison0.pr_mtx);
503 	inittodr(ntohl(nd->root_time));
504 	return (0);
505 }
506 
507 /*
508  * Internal version of mount system call for diskless setup.
509  */
510 static int
511 nfs_mountdiskless(char *path,
512     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
513     struct vnode **vpp, struct mount *mp)
514 {
515 	struct sockaddr *nam;
516 	int dirlen, error;
517 	char *dirpath;
518 
519 	/*
520 	 * Find the directory path in "path", which also has the server's
521 	 * name/ip address in it.
522 	 */
523 	dirpath = strchr(path, ':');
524 	if (dirpath != NULL)
525 		dirlen = strlen(++dirpath);
526 	else
527 		dirlen = 0;
528 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
529 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
530 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
531 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
532 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
533 		return (error);
534 	}
535 	return (0);
536 }
537 
538 static void
539 nfs_sec_name(char *sec, int *flagsp)
540 {
541 	if (!strcmp(sec, "krb5"))
542 		*flagsp |= NFSMNT_KERB;
543 	else if (!strcmp(sec, "krb5i"))
544 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
545 	else if (!strcmp(sec, "krb5p"))
546 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
547 }
548 
549 static void
550 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
551     const char *hostname, struct ucred *cred, struct thread *td)
552 {
553 	int s;
554 	int adjsock;
555 	char *p;
556 
557 	s = splnet();
558 
559 	/*
560 	 * Set read-only flag if requested; otherwise, clear it if this is
561 	 * an update.  If this is not an update, then either the read-only
562 	 * flag is already clear, or this is a root mount and it was set
563 	 * intentionally at some previous point.
564 	 */
565 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
566 		MNT_ILOCK(mp);
567 		mp->mnt_flag |= MNT_RDONLY;
568 		MNT_IUNLOCK(mp);
569 	} else if (mp->mnt_flag & MNT_UPDATE) {
570 		MNT_ILOCK(mp);
571 		mp->mnt_flag &= ~MNT_RDONLY;
572 		MNT_IUNLOCK(mp);
573 	}
574 
575 	/*
576 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
577 	 * no sense in that context.  Also, set up appropriate retransmit
578 	 * and soft timeout behavior.
579 	 */
580 	if (argp->sotype == SOCK_STREAM) {
581 		nmp->nm_flag &= ~NFSMNT_NOCONN;
582 		nmp->nm_timeo = NFS_MAXTIMEO;
583 		if ((argp->flags & NFSMNT_NFSV4) != 0)
584 			nmp->nm_retry = INT_MAX;
585 		else
586 			nmp->nm_retry = NFS_RETRANS_TCP;
587 	}
588 
589 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
590 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
591 		argp->flags &= ~NFSMNT_RDIRPLUS;
592 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
593 	}
594 
595 	/* Re-bind if rsrvd port requested and wasn't on one */
596 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
597 		  && (argp->flags & NFSMNT_RESVPORT);
598 	/* Also re-bind if we're switching to/from a connected UDP socket */
599 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
600 		    (argp->flags & NFSMNT_NOCONN));
601 
602 	/* Update flags atomically.  Don't change the lock bits. */
603 	nmp->nm_flag = argp->flags | nmp->nm_flag;
604 	splx(s);
605 
606 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
607 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
608 		if (nmp->nm_timeo < NFS_MINTIMEO)
609 			nmp->nm_timeo = NFS_MINTIMEO;
610 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
611 			nmp->nm_timeo = NFS_MAXTIMEO;
612 	}
613 
614 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
615 		nmp->nm_retry = argp->retrans;
616 		if (nmp->nm_retry > NFS_MAXREXMIT)
617 			nmp->nm_retry = NFS_MAXREXMIT;
618 	}
619 
620 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
621 		nmp->nm_wsize = argp->wsize;
622 		/* Round down to multiple of blocksize */
623 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
624 		if (nmp->nm_wsize <= 0)
625 			nmp->nm_wsize = NFS_FABLKSIZE;
626 	}
627 
628 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
629 		nmp->nm_rsize = argp->rsize;
630 		/* Round down to multiple of blocksize */
631 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
632 		if (nmp->nm_rsize <= 0)
633 			nmp->nm_rsize = NFS_FABLKSIZE;
634 	}
635 
636 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
637 		nmp->nm_readdirsize = argp->readdirsize;
638 	}
639 
640 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
641 		nmp->nm_acregmin = argp->acregmin;
642 	else
643 		nmp->nm_acregmin = NFS_MINATTRTIMO;
644 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
645 		nmp->nm_acregmax = argp->acregmax;
646 	else
647 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
648 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
649 		nmp->nm_acdirmin = argp->acdirmin;
650 	else
651 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
652 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
653 		nmp->nm_acdirmax = argp->acdirmax;
654 	else
655 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
656 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
657 		nmp->nm_acdirmin = nmp->nm_acdirmax;
658 	if (nmp->nm_acregmin > nmp->nm_acregmax)
659 		nmp->nm_acregmin = nmp->nm_acregmax;
660 
661 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
662 		if (argp->readahead <= NFS_MAXRAHEAD)
663 			nmp->nm_readahead = argp->readahead;
664 		else
665 			nmp->nm_readahead = NFS_MAXRAHEAD;
666 	}
667 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
668 		if (argp->wcommitsize < nmp->nm_wsize)
669 			nmp->nm_wcommitsize = nmp->nm_wsize;
670 		else
671 			nmp->nm_wcommitsize = argp->wcommitsize;
672 	}
673 
674 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
675 		    (nmp->nm_soproto != argp->proto));
676 
677 	if (nmp->nm_client != NULL && adjsock) {
678 		int haslock = 0, error = 0;
679 
680 		if (nmp->nm_sotype == SOCK_STREAM) {
681 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
682 			if (!error)
683 				haslock = 1;
684 		}
685 		if (!error) {
686 		    newnfs_disconnect(&nmp->nm_sockreq);
687 		    if (haslock)
688 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
689 		    nmp->nm_sotype = argp->sotype;
690 		    nmp->nm_soproto = argp->proto;
691 		    if (nmp->nm_sotype == SOCK_DGRAM)
692 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
693 			    cred, td, 0)) {
694 				printf("newnfs_args: retrying connect\n");
695 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
696 			}
697 		}
698 	} else {
699 		nmp->nm_sotype = argp->sotype;
700 		nmp->nm_soproto = argp->proto;
701 	}
702 
703 	if (hostname != NULL) {
704 		strlcpy(nmp->nm_hostname, hostname,
705 		    sizeof(nmp->nm_hostname));
706 		p = strchr(nmp->nm_hostname, ':');
707 		if (p != NULL)
708 			*p = '\0';
709 	}
710 }
711 
712 static const char *nfs_opts[] = { "from", "nfs_args",
713     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
714     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
715     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
716     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
717     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
718     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
719     "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion",
720     "nametimeo", "negnametimeo", "nocto", "pnfs", "wcommitsize",
721     NULL };
722 
723 /*
724  * VFS Operations.
725  *
726  * mount system call
727  * It seems a bit dumb to copyinstr() the host and path here and then
728  * bcopy() them in mountnfs(), but I wanted to detect errors before
729  * doing the sockargs() call because sockargs() allocates an mbuf and
730  * an error after that means that I have to release the mbuf.
731  */
732 /* ARGSUSED */
733 static int
734 nfs_mount(struct mount *mp)
735 {
736 	struct nfs_args args = {
737 	    .version = NFS_ARGSVERSION,
738 	    .addr = NULL,
739 	    .addrlen = sizeof (struct sockaddr_in),
740 	    .sotype = SOCK_STREAM,
741 	    .proto = 0,
742 	    .fh = NULL,
743 	    .fhsize = 0,
744 	    .flags = NFSMNT_RESVPORT,
745 	    .wsize = NFS_WSIZE,
746 	    .rsize = NFS_RSIZE,
747 	    .readdirsize = NFS_READDIRSIZE,
748 	    .timeo = 10,
749 	    .retrans = NFS_RETRANS,
750 	    .readahead = NFS_DEFRAHEAD,
751 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
752 	    .hostname = NULL,
753 	    .acregmin = NFS_MINATTRTIMO,
754 	    .acregmax = NFS_MAXATTRTIMO,
755 	    .acdirmin = NFS_MINDIRATTRTIMO,
756 	    .acdirmax = NFS_MAXDIRATTRTIMO,
757 	};
758 	int error = 0, ret, len;
759 	struct sockaddr *nam = NULL;
760 	struct vnode *vp;
761 	struct thread *td;
762 	char hst[MNAMELEN];
763 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
764 	char *opt, *name, *secname;
765 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
766 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
767 	int minvers = 0;
768 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
769 	size_t hstlen;
770 
771 	has_nfs_args_opt = 0;
772 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
773 		error = EINVAL;
774 		goto out;
775 	}
776 
777 	td = curthread;
778 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
779 		error = nfs_mountroot(mp);
780 		goto out;
781 	}
782 
783 	nfscl_init();
784 
785 	/*
786 	 * The old mount_nfs program passed the struct nfs_args
787 	 * from userspace to kernel.  The new mount_nfs program
788 	 * passes string options via nmount() from userspace to kernel
789 	 * and we populate the struct nfs_args in the kernel.
790 	 */
791 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
792 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
793 		    sizeof(args));
794 		if (error != 0)
795 			goto out;
796 
797 		if (args.version != NFS_ARGSVERSION) {
798 			error = EPROGMISMATCH;
799 			goto out;
800 		}
801 		has_nfs_args_opt = 1;
802 	}
803 
804 	/* Handle the new style options. */
805 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
806 		args.flags |= NFSMNT_NOCONN;
807 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
808 		args.flags |= NFSMNT_NOCONN;
809 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
810 		args.flags |= NFSMNT_NOLOCKD;
811 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
812 		args.flags &= ~NFSMNT_NOLOCKD;
813 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
814 		args.flags |= NFSMNT_INT;
815 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
816 		args.flags |= NFSMNT_RDIRPLUS;
817 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
818 		args.flags |= NFSMNT_RESVPORT;
819 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
820 		args.flags &= ~NFSMNT_RESVPORT;
821 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
822 		args.flags |= NFSMNT_SOFT;
823 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
824 		args.flags &= ~NFSMNT_SOFT;
825 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
826 		args.sotype = SOCK_DGRAM;
827 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
828 		args.sotype = SOCK_DGRAM;
829 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
830 		args.sotype = SOCK_STREAM;
831 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
832 		args.flags |= NFSMNT_NFSV3;
833 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
834 		args.flags |= NFSMNT_NFSV4;
835 		args.sotype = SOCK_STREAM;
836 	}
837 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
838 		args.flags |= NFSMNT_ALLGSSNAME;
839 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
840 		args.flags |= NFSMNT_NOCTO;
841 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
842 		args.flags |= NFSMNT_PNFS;
843 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
844 		if (opt == NULL) {
845 			vfs_mount_error(mp, "illegal readdirsize");
846 			error = EINVAL;
847 			goto out;
848 		}
849 		ret = sscanf(opt, "%d", &args.readdirsize);
850 		if (ret != 1 || args.readdirsize <= 0) {
851 			vfs_mount_error(mp, "illegal readdirsize: %s",
852 			    opt);
853 			error = EINVAL;
854 			goto out;
855 		}
856 		args.flags |= NFSMNT_READDIRSIZE;
857 	}
858 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
859 		if (opt == NULL) {
860 			vfs_mount_error(mp, "illegal readahead");
861 			error = EINVAL;
862 			goto out;
863 		}
864 		ret = sscanf(opt, "%d", &args.readahead);
865 		if (ret != 1 || args.readahead <= 0) {
866 			vfs_mount_error(mp, "illegal readahead: %s",
867 			    opt);
868 			error = EINVAL;
869 			goto out;
870 		}
871 		args.flags |= NFSMNT_READAHEAD;
872 	}
873 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
874 		if (opt == NULL) {
875 			vfs_mount_error(mp, "illegal wsize");
876 			error = EINVAL;
877 			goto out;
878 		}
879 		ret = sscanf(opt, "%d", &args.wsize);
880 		if (ret != 1 || args.wsize <= 0) {
881 			vfs_mount_error(mp, "illegal wsize: %s",
882 			    opt);
883 			error = EINVAL;
884 			goto out;
885 		}
886 		args.flags |= NFSMNT_WSIZE;
887 	}
888 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
889 		if (opt == NULL) {
890 			vfs_mount_error(mp, "illegal rsize");
891 			error = EINVAL;
892 			goto out;
893 		}
894 		ret = sscanf(opt, "%d", &args.rsize);
895 		if (ret != 1 || args.rsize <= 0) {
896 			vfs_mount_error(mp, "illegal wsize: %s",
897 			    opt);
898 			error = EINVAL;
899 			goto out;
900 		}
901 		args.flags |= NFSMNT_RSIZE;
902 	}
903 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
904 		if (opt == NULL) {
905 			vfs_mount_error(mp, "illegal retrans");
906 			error = EINVAL;
907 			goto out;
908 		}
909 		ret = sscanf(opt, "%d", &args.retrans);
910 		if (ret != 1 || args.retrans <= 0) {
911 			vfs_mount_error(mp, "illegal retrans: %s",
912 			    opt);
913 			error = EINVAL;
914 			goto out;
915 		}
916 		args.flags |= NFSMNT_RETRANS;
917 	}
918 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
919 		ret = sscanf(opt, "%d", &args.acregmin);
920 		if (ret != 1 || args.acregmin < 0) {
921 			vfs_mount_error(mp, "illegal acregmin: %s",
922 			    opt);
923 			error = EINVAL;
924 			goto out;
925 		}
926 		args.flags |= NFSMNT_ACREGMIN;
927 	}
928 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
929 		ret = sscanf(opt, "%d", &args.acregmax);
930 		if (ret != 1 || args.acregmax < 0) {
931 			vfs_mount_error(mp, "illegal acregmax: %s",
932 			    opt);
933 			error = EINVAL;
934 			goto out;
935 		}
936 		args.flags |= NFSMNT_ACREGMAX;
937 	}
938 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
939 		ret = sscanf(opt, "%d", &args.acdirmin);
940 		if (ret != 1 || args.acdirmin < 0) {
941 			vfs_mount_error(mp, "illegal acdirmin: %s",
942 			    opt);
943 			error = EINVAL;
944 			goto out;
945 		}
946 		args.flags |= NFSMNT_ACDIRMIN;
947 	}
948 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
949 		ret = sscanf(opt, "%d", &args.acdirmax);
950 		if (ret != 1 || args.acdirmax < 0) {
951 			vfs_mount_error(mp, "illegal acdirmax: %s",
952 			    opt);
953 			error = EINVAL;
954 			goto out;
955 		}
956 		args.flags |= NFSMNT_ACDIRMAX;
957 	}
958 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
959 		ret = sscanf(opt, "%d", &args.wcommitsize);
960 		if (ret != 1 || args.wcommitsize < 0) {
961 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
962 			error = EINVAL;
963 			goto out;
964 		}
965 		args.flags |= NFSMNT_WCOMMITSIZE;
966 	}
967 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
968 		ret = sscanf(opt, "%d", &args.timeo);
969 		if (ret != 1 || args.timeo <= 0) {
970 			vfs_mount_error(mp, "illegal timeout: %s",
971 			    opt);
972 			error = EINVAL;
973 			goto out;
974 		}
975 		args.flags |= NFSMNT_TIMEO;
976 	}
977 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
978 		ret = sscanf(opt, "%d", &nametimeo);
979 		if (ret != 1 || nametimeo < 0) {
980 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
981 			error = EINVAL;
982 			goto out;
983 		}
984 	}
985 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
986 	    == 0) {
987 		ret = sscanf(opt, "%d", &negnametimeo);
988 		if (ret != 1 || negnametimeo < 0) {
989 			vfs_mount_error(mp, "illegal negnametimeo: %s",
990 			    opt);
991 			error = EINVAL;
992 			goto out;
993 		}
994 	}
995 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
996 	    0) {
997 		ret = sscanf(opt, "%d", &minvers);
998 		if (ret != 1 || minvers < 0 || minvers > 1 ||
999 		    (args.flags & NFSMNT_NFSV4) == 0) {
1000 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1001 			error = EINVAL;
1002 			goto out;
1003 		}
1004 	}
1005 	if (vfs_getopt(mp->mnt_optnew, "sec",
1006 		(void **) &secname, NULL) == 0)
1007 		nfs_sec_name(secname, &args.flags);
1008 
1009 	if (mp->mnt_flag & MNT_UPDATE) {
1010 		struct nfsmount *nmp = VFSTONFS(mp);
1011 
1012 		if (nmp == NULL) {
1013 			error = EIO;
1014 			goto out;
1015 		}
1016 
1017 		/*
1018 		 * If a change from TCP->UDP is done and there are thread(s)
1019 		 * that have I/O RPC(s) in progress with a tranfer size
1020 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1021 		 * hung, retrying the RPC(s) forever. Usually these threads
1022 		 * will be seen doing an uninterruptible sleep on wait channel
1023 		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1024 		 */
1025 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1026 			tprintf(td->td_proc, LOG_WARNING,
1027 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1028 
1029 		/*
1030 		 * When doing an update, we can't change version,
1031 		 * security, switch lockd strategies or change cookie
1032 		 * translation
1033 		 */
1034 		args.flags = (args.flags &
1035 		    ~(NFSMNT_NFSV3 |
1036 		      NFSMNT_NFSV4 |
1037 		      NFSMNT_KERB |
1038 		      NFSMNT_INTEGRITY |
1039 		      NFSMNT_PRIVACY |
1040 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1041 		    (nmp->nm_flag &
1042 			(NFSMNT_NFSV3 |
1043 			 NFSMNT_NFSV4 |
1044 			 NFSMNT_KERB |
1045 			 NFSMNT_INTEGRITY |
1046 			 NFSMNT_PRIVACY |
1047 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1048 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1049 		goto out;
1050 	}
1051 
1052 	/*
1053 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1054 	 * or no-connection mode for those protocols that support
1055 	 * no-connection mode (the flag will be cleared later for protocols
1056 	 * that do not support no-connection mode).  This will allow a client
1057 	 * to receive replies from a different IP then the request was
1058 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1059 	 * not 0.
1060 	 */
1061 	if (nfs_ip_paranoia == 0)
1062 		args.flags |= NFSMNT_NOCONN;
1063 
1064 	if (has_nfs_args_opt != 0) {
1065 		/*
1066 		 * In the 'nfs_args' case, the pointers in the args
1067 		 * structure are in userland - we copy them in here.
1068 		 */
1069 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1070 			vfs_mount_error(mp, "Bad file handle");
1071 			error = EINVAL;
1072 			goto out;
1073 		}
1074 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1075 		    args.fhsize);
1076 		if (error != 0)
1077 			goto out;
1078 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1079 		if (error != 0)
1080 			goto out;
1081 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1082 		args.hostname = hst;
1083 		/* sockargs() call must be after above copyin() calls */
1084 		error = getsockaddr(&nam, (caddr_t)args.addr,
1085 		    args.addrlen);
1086 		if (error != 0)
1087 			goto out;
1088 	} else {
1089 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1090 		    &args.fhsize) == 0) {
1091 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1092 				vfs_mount_error(mp, "Bad file handle");
1093 				error = EINVAL;
1094 				goto out;
1095 			}
1096 			bcopy(args.fh, nfh, args.fhsize);
1097 		} else {
1098 			args.fhsize = 0;
1099 		}
1100 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1101 		    (void **)&args.hostname, &len);
1102 		if (args.hostname == NULL) {
1103 			vfs_mount_error(mp, "Invalid hostname");
1104 			error = EINVAL;
1105 			goto out;
1106 		}
1107 		bcopy(args.hostname, hst, MNAMELEN);
1108 		hst[MNAMELEN - 1] = '\0';
1109 	}
1110 
1111 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1112 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1113 	else
1114 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1115 	srvkrbnamelen = strlen(srvkrbname);
1116 
1117 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1118 		strlcpy(krbname, name, sizeof (krbname));
1119 	else
1120 		krbname[0] = '\0';
1121 	krbnamelen = strlen(krbname);
1122 
1123 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1124 		strlcpy(dirpath, name, sizeof (dirpath));
1125 	else
1126 		dirpath[0] = '\0';
1127 	dirlen = strlen(dirpath);
1128 
1129 	if (has_nfs_args_opt == 0) {
1130 		if (vfs_getopt(mp->mnt_optnew, "addr",
1131 		    (void **)&args.addr, &args.addrlen) == 0) {
1132 			if (args.addrlen > SOCK_MAXADDRLEN) {
1133 				error = ENAMETOOLONG;
1134 				goto out;
1135 			}
1136 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1137 			bcopy(args.addr, nam, args.addrlen);
1138 			nam->sa_len = args.addrlen;
1139 		} else {
1140 			vfs_mount_error(mp, "No server address");
1141 			error = EINVAL;
1142 			goto out;
1143 		}
1144 	}
1145 
1146 	args.fh = nfh;
1147 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1148 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1149 	    nametimeo, negnametimeo, minvers);
1150 out:
1151 	if (!error) {
1152 		MNT_ILOCK(mp);
1153 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF;
1154 		MNT_IUNLOCK(mp);
1155 	}
1156 	return (error);
1157 }
1158 
1159 
1160 /*
1161  * VFS Operations.
1162  *
1163  * mount system call
1164  * It seems a bit dumb to copyinstr() the host and path here and then
1165  * bcopy() them in mountnfs(), but I wanted to detect errors before
1166  * doing the sockargs() call because sockargs() allocates an mbuf and
1167  * an error after that means that I have to release the mbuf.
1168  */
1169 /* ARGSUSED */
1170 static int
1171 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1172 {
1173 	int error;
1174 	struct nfs_args args;
1175 
1176 	error = copyin(data, &args, sizeof (struct nfs_args));
1177 	if (error)
1178 		return error;
1179 
1180 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1181 
1182 	error = kernel_mount(ma, flags);
1183 	return (error);
1184 }
1185 
1186 /*
1187  * Common code for mount and mountroot
1188  */
1189 static int
1190 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1191     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1192     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1193     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1194     int minvers)
1195 {
1196 	struct nfsmount *nmp;
1197 	struct nfsnode *np;
1198 	int error, trycnt, ret;
1199 	struct nfsvattr nfsva;
1200 	struct nfsclclient *clp;
1201 	struct nfsclds *dsp, *tdsp;
1202 	uint32_t lease;
1203 	static u_int64_t clval = 0;
1204 
1205 	NFSCL_DEBUG(3, "in mnt\n");
1206 	clp = NULL;
1207 	if (mp->mnt_flag & MNT_UPDATE) {
1208 		nmp = VFSTONFS(mp);
1209 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1210 		FREE(nam, M_SONAME);
1211 		return (0);
1212 	} else {
1213 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1214 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1215 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1216 		TAILQ_INIT(&nmp->nm_bufq);
1217 		if (clval == 0)
1218 			clval = (u_int64_t)nfsboottime.tv_sec;
1219 		nmp->nm_clval = clval++;
1220 		nmp->nm_krbnamelen = krbnamelen;
1221 		nmp->nm_dirpathlen = dirlen;
1222 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1223 		if (td->td_ucred->cr_uid != (uid_t)0) {
1224 			/*
1225 			 * nm_uid is used to get KerberosV credentials for
1226 			 * the nfsv4 state handling operations if there is
1227 			 * no host based principal set. Use the uid of
1228 			 * this user if not root, since they are doing the
1229 			 * mount. I don't think setting this for root will
1230 			 * work, since root normally does not have user
1231 			 * credentials in a credentials cache.
1232 			 */
1233 			nmp->nm_uid = td->td_ucred->cr_uid;
1234 		} else {
1235 			/*
1236 			 * Just set to -1, so it won't be used.
1237 			 */
1238 			nmp->nm_uid = (uid_t)-1;
1239 		}
1240 
1241 		/* Copy and null terminate all the names */
1242 		if (nmp->nm_krbnamelen > 0) {
1243 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1244 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1245 		}
1246 		if (nmp->nm_dirpathlen > 0) {
1247 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1248 			    nmp->nm_dirpathlen);
1249 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1250 			    + 1] = '\0';
1251 		}
1252 		if (nmp->nm_srvkrbnamelen > 0) {
1253 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1254 			    nmp->nm_srvkrbnamelen);
1255 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1256 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1257 		}
1258 		nmp->nm_sockreq.nr_cred = crhold(cred);
1259 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1260 		mp->mnt_data = nmp;
1261 		nmp->nm_getinfo = nfs_getnlminfo;
1262 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1263 	}
1264 	vfs_getnewfsid(mp);
1265 	nmp->nm_mountp = mp;
1266 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1267 
1268 	/*
1269 	 * Since nfs_decode_args() might optionally set them, these
1270 	 * need to be set to defaults before the call, so that the
1271 	 * optional settings aren't overwritten.
1272 	 */
1273 	nmp->nm_nametimeo = nametimeo;
1274 	nmp->nm_negnametimeo = negnametimeo;
1275 	nmp->nm_timeo = NFS_TIMEO;
1276 	nmp->nm_retry = NFS_RETRANS;
1277 	nmp->nm_readahead = NFS_DEFRAHEAD;
1278 	if (desiredvnodes >= 11000)
1279 		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1280 	else
1281 		nmp->nm_wcommitsize = hibufspace / 10;
1282 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1283 		nmp->nm_minorvers = minvers;
1284 	else
1285 		nmp->nm_minorvers = 0;
1286 
1287 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1288 
1289 	/*
1290 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1291 	 * high, depending on whether we end up with negative offsets in
1292 	 * the client or server somewhere.  2GB-1 may be safer.
1293 	 *
1294 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1295 	 * that we can handle until we find out otherwise.
1296 	 */
1297 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1298 		nmp->nm_maxfilesize = 0xffffffffLL;
1299 	else
1300 		nmp->nm_maxfilesize = OFF_MAX;
1301 
1302 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1303 		nmp->nm_wsize = NFS_WSIZE;
1304 		nmp->nm_rsize = NFS_RSIZE;
1305 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1306 	}
1307 	nmp->nm_numgrps = NFS_MAXGRPS;
1308 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1309 	if (nmp->nm_tprintf_delay < 0)
1310 		nmp->nm_tprintf_delay = 0;
1311 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1312 	if (nmp->nm_tprintf_initial_delay < 0)
1313 		nmp->nm_tprintf_initial_delay = 0;
1314 	nmp->nm_fhsize = argp->fhsize;
1315 	if (nmp->nm_fhsize > 0)
1316 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1317 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1318 	nmp->nm_nam = nam;
1319 	/* Set up the sockets and per-host congestion */
1320 	nmp->nm_sotype = argp->sotype;
1321 	nmp->nm_soproto = argp->proto;
1322 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1323 	if ((argp->flags & NFSMNT_NFSV4))
1324 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1325 	else if ((argp->flags & NFSMNT_NFSV3))
1326 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1327 	else
1328 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1329 
1330 
1331 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1332 		goto bad;
1333 	/* For NFSv4.1, get the clientid now. */
1334 	if (nmp->nm_minorvers > 0) {
1335 		NFSCL_DEBUG(3, "at getcl\n");
1336 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1337 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1338 		if (error != 0)
1339 			goto bad;
1340 	}
1341 
1342 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1343 	    nmp->nm_dirpathlen > 0) {
1344 		NFSCL_DEBUG(3, "in dirp\n");
1345 		/*
1346 		 * If the fhsize on the mount point == 0 for V4, the mount
1347 		 * path needs to be looked up.
1348 		 */
1349 		trycnt = 3;
1350 		do {
1351 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1352 			    cred, td);
1353 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1354 			if (error)
1355 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1356 		} while (error && --trycnt > 0);
1357 		if (error) {
1358 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1359 			goto bad;
1360 		}
1361 	}
1362 
1363 	/*
1364 	 * A reference count is needed on the nfsnode representing the
1365 	 * remote root.  If this object is not persistent, then backward
1366 	 * traversals of the mount point (i.e. "..") will not work if
1367 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1368 	 * this problem, because one can identify root inodes by their
1369 	 * number == ROOTINO (2).
1370 	 */
1371 	if (nmp->nm_fhsize > 0) {
1372 		/*
1373 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1374 		 * non-zero for the root vnode. f_iosize will be set correctly
1375 		 * by nfs_statfs() before any I/O occurs.
1376 		 */
1377 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1378 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1379 		    LK_EXCLUSIVE);
1380 		if (error)
1381 			goto bad;
1382 		*vpp = NFSTOV(np);
1383 
1384 		/*
1385 		 * Get file attributes and transfer parameters for the
1386 		 * mountpoint.  This has the side effect of filling in
1387 		 * (*vpp)->v_type with the correct value.
1388 		 */
1389 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1390 		    cred, td, &nfsva, NULL, &lease);
1391 		if (ret) {
1392 			/*
1393 			 * Just set default values to get things going.
1394 			 */
1395 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1396 			nfsva.na_vattr.va_type = VDIR;
1397 			nfsva.na_vattr.va_mode = 0777;
1398 			nfsva.na_vattr.va_nlink = 100;
1399 			nfsva.na_vattr.va_uid = (uid_t)0;
1400 			nfsva.na_vattr.va_gid = (gid_t)0;
1401 			nfsva.na_vattr.va_fileid = 2;
1402 			nfsva.na_vattr.va_gen = 1;
1403 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1404 			nfsva.na_vattr.va_size = 512 * 1024;
1405 			lease = 60;
1406 		}
1407 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1408 		if (nmp->nm_minorvers > 0) {
1409 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1410 			NFSLOCKCLSTATE();
1411 			clp->nfsc_renew = NFSCL_RENEW(lease);
1412 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1413 			clp->nfsc_clientidrev++;
1414 			if (clp->nfsc_clientidrev == 0)
1415 				clp->nfsc_clientidrev++;
1416 			NFSUNLOCKCLSTATE();
1417 			/*
1418 			 * Mount will succeed, so the renew thread can be
1419 			 * started now.
1420 			 */
1421 			nfscl_start_renewthread(clp);
1422 			nfscl_clientrelease(clp);
1423 		}
1424 		if (argp->flags & NFSMNT_NFSV3)
1425 			ncl_fsinfo(nmp, *vpp, cred, td);
1426 
1427 		/* Mark if the mount point supports NFSv4 ACLs. */
1428 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1429 		    ret == 0 &&
1430 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1431 			MNT_ILOCK(mp);
1432 			mp->mnt_flag |= MNT_NFS4ACLS;
1433 			MNT_IUNLOCK(mp);
1434 		}
1435 
1436 		/*
1437 		 * Lose the lock but keep the ref.
1438 		 */
1439 		NFSVOPUNLOCK(*vpp, 0);
1440 		return (0);
1441 	}
1442 	error = EIO;
1443 
1444 bad:
1445 	if (clp != NULL)
1446 		nfscl_clientrelease(clp);
1447 	newnfs_disconnect(&nmp->nm_sockreq);
1448 	crfree(nmp->nm_sockreq.nr_cred);
1449 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1450 	mtx_destroy(&nmp->nm_mtx);
1451 	if (nmp->nm_clp != NULL) {
1452 		NFSLOCKCLSTATE();
1453 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1454 		NFSUNLOCKCLSTATE();
1455 		free(nmp->nm_clp, M_NFSCLCLIENT);
1456 	}
1457 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1458 		nfscl_freenfsclds(dsp);
1459 	FREE(nmp, M_NEWNFSMNT);
1460 	FREE(nam, M_SONAME);
1461 	return (error);
1462 }
1463 
1464 /*
1465  * unmount system call
1466  */
1467 static int
1468 nfs_unmount(struct mount *mp, int mntflags)
1469 {
1470 	struct thread *td;
1471 	struct nfsmount *nmp;
1472 	int error, flags = 0, i, trycnt = 0;
1473 	struct nfsclds *dsp, *tdsp;
1474 
1475 	td = curthread;
1476 
1477 	if (mntflags & MNT_FORCE)
1478 		flags |= FORCECLOSE;
1479 	nmp = VFSTONFS(mp);
1480 	/*
1481 	 * Goes something like this..
1482 	 * - Call vflush() to clear out vnodes for this filesystem
1483 	 * - Close the socket
1484 	 * - Free up the data structures
1485 	 */
1486 	/* In the forced case, cancel any outstanding requests. */
1487 	if (mntflags & MNT_FORCE) {
1488 		error = newnfs_nmcancelreqs(nmp);
1489 		if (error)
1490 			goto out;
1491 		/* For a forced close, get rid of the renew thread now */
1492 		nfscl_umount(nmp, td);
1493 	}
1494 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1495 	do {
1496 		error = vflush(mp, 1, flags, td);
1497 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1498 			(void) nfs_catnap(PSOCK, error, "newndm");
1499 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1500 	if (error)
1501 		goto out;
1502 
1503 	/*
1504 	 * We are now committed to the unmount.
1505 	 */
1506 	if ((mntflags & MNT_FORCE) == 0)
1507 		nfscl_umount(nmp, td);
1508 	/* Make sure no nfsiods are assigned to this mount. */
1509 	mtx_lock(&ncl_iod_mutex);
1510 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1511 		if (ncl_iodmount[i] == nmp) {
1512 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1513 			ncl_iodmount[i] = NULL;
1514 		}
1515 	mtx_unlock(&ncl_iod_mutex);
1516 	newnfs_disconnect(&nmp->nm_sockreq);
1517 	crfree(nmp->nm_sockreq.nr_cred);
1518 	FREE(nmp->nm_nam, M_SONAME);
1519 
1520 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1521 	mtx_destroy(&nmp->nm_mtx);
1522 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1523 		nfscl_freenfsclds(dsp);
1524 	FREE(nmp, M_NEWNFSMNT);
1525 out:
1526 	return (error);
1527 }
1528 
1529 /*
1530  * Return root of a filesystem
1531  */
1532 static int
1533 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1534 {
1535 	struct vnode *vp;
1536 	struct nfsmount *nmp;
1537 	struct nfsnode *np;
1538 	int error;
1539 
1540 	nmp = VFSTONFS(mp);
1541 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1542 	if (error)
1543 		return error;
1544 	vp = NFSTOV(np);
1545 	/*
1546 	 * Get transfer parameters and attributes for root vnode once.
1547 	 */
1548 	mtx_lock(&nmp->nm_mtx);
1549 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1550 		mtx_unlock(&nmp->nm_mtx);
1551 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1552 	} else
1553 		mtx_unlock(&nmp->nm_mtx);
1554 	if (vp->v_type == VNON)
1555 	    vp->v_type = VDIR;
1556 	vp->v_vflag |= VV_ROOT;
1557 	*vpp = vp;
1558 	return (0);
1559 }
1560 
1561 /*
1562  * Flush out the buffer cache
1563  */
1564 /* ARGSUSED */
1565 static int
1566 nfs_sync(struct mount *mp, int waitfor)
1567 {
1568 	struct vnode *vp, *mvp;
1569 	struct thread *td;
1570 	int error, allerror = 0;
1571 
1572 	td = curthread;
1573 
1574 	MNT_ILOCK(mp);
1575 	/*
1576 	 * If a forced dismount is in progress, return from here so that
1577 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1578 	 * calling VFS_UNMOUNT().
1579 	 */
1580 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1581 		MNT_IUNLOCK(mp);
1582 		return (EBADF);
1583 	}
1584 	MNT_IUNLOCK(mp);
1585 
1586 	/*
1587 	 * Force stale buffer cache information to be flushed.
1588 	 */
1589 loop:
1590 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1591 		/* XXX Racy bv_cnt check. */
1592 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1593 		    waitfor == MNT_LAZY) {
1594 			VI_UNLOCK(vp);
1595 			continue;
1596 		}
1597 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1598 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1599 			goto loop;
1600 		}
1601 		error = VOP_FSYNC(vp, waitfor, td);
1602 		if (error)
1603 			allerror = error;
1604 		NFSVOPUNLOCK(vp, 0);
1605 		vrele(vp);
1606 	}
1607 	return (allerror);
1608 }
1609 
1610 static int
1611 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1612 {
1613 	struct nfsmount *nmp = VFSTONFS(mp);
1614 	struct vfsquery vq;
1615 	int error;
1616 
1617 	bzero(&vq, sizeof(vq));
1618 	switch (op) {
1619 #if 0
1620 	case VFS_CTL_NOLOCKS:
1621 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1622  		if (req->oldptr != NULL) {
1623  			error = SYSCTL_OUT(req, &val, sizeof(val));
1624  			if (error)
1625  				return (error);
1626  		}
1627  		if (req->newptr != NULL) {
1628  			error = SYSCTL_IN(req, &val, sizeof(val));
1629  			if (error)
1630  				return (error);
1631 			if (val)
1632 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1633 			else
1634 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1635  		}
1636 		break;
1637 #endif
1638 	case VFS_CTL_QUERY:
1639 		mtx_lock(&nmp->nm_mtx);
1640 		if (nmp->nm_state & NFSSTA_TIMEO)
1641 			vq.vq_flags |= VQ_NOTRESP;
1642 		mtx_unlock(&nmp->nm_mtx);
1643 #if 0
1644 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1645 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1646 			vq.vq_flags |= VQ_NOTRESPLOCK;
1647 #endif
1648 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1649 		break;
1650  	case VFS_CTL_TIMEO:
1651  		if (req->oldptr != NULL) {
1652  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1653  			    sizeof(nmp->nm_tprintf_initial_delay));
1654  			if (error)
1655  				return (error);
1656  		}
1657  		if (req->newptr != NULL) {
1658 			error = vfs_suser(mp, req->td);
1659 			if (error)
1660 				return (error);
1661  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1662  			    sizeof(nmp->nm_tprintf_initial_delay));
1663  			if (error)
1664  				return (error);
1665  			if (nmp->nm_tprintf_initial_delay < 0)
1666  				nmp->nm_tprintf_initial_delay = 0;
1667  		}
1668 		break;
1669 	default:
1670 		return (ENOTSUP);
1671 	}
1672 	return (0);
1673 }
1674 
1675 /*
1676  * Extract the information needed by the nlm from the nfs vnode.
1677  */
1678 static void
1679 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1680     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1681     struct timeval *timeop)
1682 {
1683 	struct nfsmount *nmp;
1684 	struct nfsnode *np = VTONFS(vp);
1685 
1686 	nmp = VFSTONFS(vp->v_mount);
1687 	if (fhlenp != NULL)
1688 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1689 	if (fhp != NULL)
1690 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1691 	if (sp != NULL)
1692 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1693 	if (is_v3p != NULL)
1694 		*is_v3p = NFS_ISV3(vp);
1695 	if (sizep != NULL)
1696 		*sizep = np->n_size;
1697 	if (timeop != NULL) {
1698 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1699 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1700 	}
1701 }
1702 
1703 /*
1704  * This function prints out an option name, based on the conditional
1705  * argument.
1706  */
1707 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1708     char *opt, char **buf, size_t *blen)
1709 {
1710 	int len;
1711 
1712 	if (testval != 0 && *blen > strlen(opt)) {
1713 		len = snprintf(*buf, *blen, "%s", opt);
1714 		if (len != strlen(opt))
1715 			printf("EEK!!\n");
1716 		*buf += len;
1717 		*blen -= len;
1718 	}
1719 }
1720 
1721 /*
1722  * This function printf out an options integer value.
1723  */
1724 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1725     char *opt, char **buf, size_t *blen)
1726 {
1727 	int len;
1728 
1729 	if (*blen > strlen(opt) + 1) {
1730 		/* Could result in truncated output string. */
1731 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1732 		if (len < *blen) {
1733 			*buf += len;
1734 			*blen -= len;
1735 		}
1736 	}
1737 }
1738 
1739 /*
1740  * Load the option flags and values into the buffer.
1741  */
1742 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1743 {
1744 	char *buf;
1745 	size_t blen;
1746 
1747 	buf = buffer;
1748 	blen = buflen;
1749 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1750 	    &blen);
1751 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1752 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1753 		    &blen);
1754 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1755 		    &buf, &blen);
1756 	}
1757 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1758 	    &blen);
1759 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1760 	    "nfsv2", &buf, &blen);
1761 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1762 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1763 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1764 	    &buf, &blen);
1765 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1766 	    &buf, &blen);
1767 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1768 	    &blen);
1769 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1770 	    &blen);
1771 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1772 	    &blen);
1773 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1774 	    &blen);
1775 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1776 	    &blen);
1777 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1778 	    0, ",lockd", &buf, &blen);
1779 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1780 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1781 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1782 	    &buf, &blen);
1783 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1784 	    &buf, &blen);
1785 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1786 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1787 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1788 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1789 	    &buf, &blen);
1790 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1791 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1792 	    &buf, &blen);
1793 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1794 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1795 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1796 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1797 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1798 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1799 	    &blen);
1800 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1801 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1802 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1803 	    &blen);
1804 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1805 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1806 	    &blen);
1807 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1808 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1809 }
1810 
1811