xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision d940bfec8c329dd82d8d54efebd81c8aa420503b)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 
104 static int	nfs_mountroot(struct mount *);
105 static void	nfs_sec_name(char *, int *);
106 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107 		    struct nfs_args *argp, const char *, struct ucred *,
108 		    struct thread *);
109 static int	mountnfs(struct nfs_args *, struct mount *,
110 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111 		    u_char *, int, struct vnode **, struct ucred *,
112 		    struct thread *, int, int, int);
113 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114 		    struct sockaddr_storage *, int *, off_t *,
115 		    struct timeval *);
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123 static vfs_purge_t nfs_purge;
124 
125 /*
126  * nfs vfs operations.
127  */
128 static struct vfsops nfs_vfsops = {
129 	.vfs_init =		ncl_init,
130 	.vfs_mount =		nfs_mount,
131 	.vfs_cmount =		nfs_cmount,
132 	.vfs_root =		nfs_root,
133 	.vfs_statfs =		nfs_statfs,
134 	.vfs_sync =		nfs_sync,
135 	.vfs_uninit =		ncl_uninit,
136 	.vfs_unmount =		nfs_unmount,
137 	.vfs_sysctl =		nfs_sysctl,
138 	.vfs_purge =		nfs_purge,
139 };
140 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141 
142 /* So that loader and kldload(2) can find us, wherever we are.. */
143 MODULE_VERSION(nfs, 1);
144 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148 
149 /*
150  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151  * can be shared by both NFS clients. It is declared here so that it
152  * will be defined for kernels built without NFS_ROOT, although it
153  * isn't used in that case.
154  */
155 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156 struct nfs_diskless	nfs_diskless = { { { 0 } } };
157 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158 int			nfs_diskless_valid = 0;
159 #endif
160 
161 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162     &nfs_diskless_valid, 0,
163     "Has the diskless struct been filled correctly");
164 
165 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167 
168 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170     "%Ssockaddr_in", "Diskless root nfs address");
171 
172 
173 void		newnfsargs_ntoh(struct nfs_args *);
174 static int	nfs_mountdiskless(char *,
175 		    struct sockaddr_in *, struct nfs_args *,
176 		    struct thread *, struct vnode **, struct mount *);
177 static void	nfs_convert_diskless(void);
178 static void	nfs_convert_oargs(struct nfs_args *args,
179 		    struct onfs_args *oargs);
180 
181 int
182 newnfs_iosize(struct nfsmount *nmp)
183 {
184 	int iosize, maxio;
185 
186 	/* First, set the upper limit for iosize */
187 	if (nmp->nm_flag & NFSMNT_NFSV4) {
188 		maxio = NFS_MAXBSIZE;
189 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190 		if (nmp->nm_sotype == SOCK_DGRAM)
191 			maxio = NFS_MAXDGRAMDATA;
192 		else
193 			maxio = NFS_MAXBSIZE;
194 	} else {
195 		maxio = NFS_V2MAXDATA;
196 	}
197 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198 		nmp->nm_rsize = maxio;
199 	if (nmp->nm_rsize > MAXBSIZE)
200 		nmp->nm_rsize = MAXBSIZE;
201 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202 		nmp->nm_readdirsize = maxio;
203 	if (nmp->nm_readdirsize > nmp->nm_rsize)
204 		nmp->nm_readdirsize = nmp->nm_rsize;
205 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206 		nmp->nm_wsize = maxio;
207 	if (nmp->nm_wsize > MAXBSIZE)
208 		nmp->nm_wsize = MAXBSIZE;
209 
210 	/*
211 	 * Calculate the size used for io buffers.  Use the larger
212 	 * of the two sizes to minimise nfs requests but make sure
213 	 * that it is at least one VM page to avoid wasting buffer
214 	 * space.
215 	 */
216 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217 	iosize = imax(iosize, PAGE_SIZE);
218 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
219 	return (iosize);
220 }
221 
222 static void
223 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
224 {
225 
226 	args->version = NFS_ARGSVERSION;
227 	args->addr = oargs->addr;
228 	args->addrlen = oargs->addrlen;
229 	args->sotype = oargs->sotype;
230 	args->proto = oargs->proto;
231 	args->fh = oargs->fh;
232 	args->fhsize = oargs->fhsize;
233 	args->flags = oargs->flags;
234 	args->wsize = oargs->wsize;
235 	args->rsize = oargs->rsize;
236 	args->readdirsize = oargs->readdirsize;
237 	args->timeo = oargs->timeo;
238 	args->retrans = oargs->retrans;
239 	args->readahead = oargs->readahead;
240 	args->hostname = oargs->hostname;
241 }
242 
243 static void
244 nfs_convert_diskless(void)
245 {
246 
247 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248 		sizeof(struct ifaliasreq));
249 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250 		sizeof(struct sockaddr_in));
251 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
254 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
255 	} else {
256 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
257 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
258 	}
259 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260 		sizeof(struct sockaddr_in));
261 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262 	nfsv3_diskless.root_time = nfs_diskless.root_time;
263 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
264 		MAXHOSTNAMELEN);
265 	nfs_diskless_valid = 3;
266 }
267 
268 /*
269  * nfs statfs call
270  */
271 static int
272 nfs_statfs(struct mount *mp, struct statfs *sbp)
273 {
274 	struct vnode *vp;
275 	struct thread *td;
276 	struct nfsmount *nmp = VFSTONFS(mp);
277 	struct nfsvattr nfsva;
278 	struct nfsfsinfo fs;
279 	struct nfsstatfs sb;
280 	int error = 0, attrflag, gotfsinfo = 0, ret;
281 	struct nfsnode *np;
282 
283 	td = curthread;
284 
285 	error = vfs_busy(mp, MBF_NOWAIT);
286 	if (error)
287 		return (error);
288 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289 	if (error) {
290 		vfs_unbusy(mp);
291 		return (error);
292 	}
293 	vp = NFSTOV(np);
294 	mtx_lock(&nmp->nm_mtx);
295 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296 		mtx_unlock(&nmp->nm_mtx);
297 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
298 		    &attrflag, NULL);
299 		if (!error)
300 			gotfsinfo = 1;
301 	} else
302 		mtx_unlock(&nmp->nm_mtx);
303 	if (!error)
304 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
305 		    &attrflag, NULL);
306 	if (error != 0)
307 		NFSCL_DEBUG(2, "statfs=%d\n", error);
308 	if (attrflag == 0) {
309 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310 		    td->td_ucred, td, &nfsva, NULL, NULL);
311 		if (ret) {
312 			/*
313 			 * Just set default values to get things going.
314 			 */
315 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316 			nfsva.na_vattr.va_type = VDIR;
317 			nfsva.na_vattr.va_mode = 0777;
318 			nfsva.na_vattr.va_nlink = 100;
319 			nfsva.na_vattr.va_uid = (uid_t)0;
320 			nfsva.na_vattr.va_gid = (gid_t)0;
321 			nfsva.na_vattr.va_fileid = 2;
322 			nfsva.na_vattr.va_gen = 1;
323 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324 			nfsva.na_vattr.va_size = 512 * 1024;
325 		}
326 	}
327 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
328 	if (!error) {
329 	    mtx_lock(&nmp->nm_mtx);
330 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331 		nfscl_loadfsinfo(nmp, &fs);
332 	    nfscl_loadsbinfo(nmp, &sb, sbp);
333 	    sbp->f_iosize = newnfs_iosize(nmp);
334 	    mtx_unlock(&nmp->nm_mtx);
335 	    if (sbp != &mp->mnt_stat) {
336 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
338 	    }
339 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340 	} else if (NFS_ISV4(vp)) {
341 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
342 	}
343 	vput(vp);
344 	vfs_unbusy(mp);
345 	return (error);
346 }
347 
348 /*
349  * nfs version 3 fsinfo rpc call
350  */
351 int
352 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
353     struct thread *td)
354 {
355 	struct nfsfsinfo fs;
356 	struct nfsvattr nfsva;
357 	int error, attrflag;
358 
359 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
360 	if (!error) {
361 		if (attrflag)
362 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
363 			    1);
364 		mtx_lock(&nmp->nm_mtx);
365 		nfscl_loadfsinfo(nmp, &fs);
366 		mtx_unlock(&nmp->nm_mtx);
367 	}
368 	return (error);
369 }
370 
371 /*
372  * Mount a remote root fs via. nfs. This depends on the info in the
373  * nfs_diskless structure that has been filled in properly by some primary
374  * bootstrap.
375  * It goes something like this:
376  * - do enough of "ifconfig" by calling ifioctl() so that the system
377  *   can talk to the server
378  * - If nfs_diskless.mygateway is filled in, use that address as
379  *   a default gateway.
380  * - build the rootfs mount point and call mountnfs() to do the rest.
381  *
382  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383  * structure, as well as other global NFS client variables here, as
384  * nfs_mountroot() will be called once in the boot before any other NFS
385  * client activity occurs.
386  */
387 static int
388 nfs_mountroot(struct mount *mp)
389 {
390 	struct thread *td = curthread;
391 	struct nfsv3_diskless *nd = &nfsv3_diskless;
392 	struct socket *so;
393 	struct vnode *vp;
394 	struct ifreq ir;
395 	int error;
396 	u_long l;
397 	char buf[128];
398 	char *cp;
399 
400 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
401 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
402 #elif defined(NFS_ROOT)
403 	nfs_setup_diskless();
404 #endif
405 
406 	if (nfs_diskless_valid == 0)
407 		return (-1);
408 	if (nfs_diskless_valid == 1)
409 		nfs_convert_diskless();
410 
411 	/*
412 	 * XXX splnet, so networks will receive...
413 	 */
414 	splnet();
415 
416 	/*
417 	 * Do enough of ifconfig(8) so that the critical net interface can
418 	 * talk to the server.
419 	 */
420 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
421 	    td->td_ucred, td);
422 	if (error)
423 		panic("nfs_mountroot: socreate(%04x): %d",
424 			nd->myif.ifra_addr.sa_family, error);
425 
426 #if 0 /* XXX Bad idea */
427 	/*
428 	 * We might not have been told the right interface, so we pass
429 	 * over the first ten interfaces of the same kind, until we get
430 	 * one of them configured.
431 	 */
432 
433 	for (i = strlen(nd->myif.ifra_name) - 1;
434 		nd->myif.ifra_name[i] >= '0' &&
435 		nd->myif.ifra_name[i] <= '9';
436 		nd->myif.ifra_name[i] ++) {
437 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438 		if(!error)
439 			break;
440 	}
441 #endif
442 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443 	if (error)
444 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
446 		ir.ifr_mtu = strtol(cp, NULL, 10);
447 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
448 		freeenv(cp);
449 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
450 		if (error)
451 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
452 	}
453 	soclose(so);
454 
455 	/*
456 	 * If the gateway field is filled in, set it as the default route.
457 	 * Note that pxeboot will set a default route of 0 if the route
458 	 * is not set by the DHCP server.  Check also for a value of 0
459 	 * to avoid panicking inappropriately in that situation.
460 	 */
461 	if (nd->mygateway.sin_len != 0 &&
462 	    nd->mygateway.sin_addr.s_addr != 0) {
463 		struct sockaddr_in mask, sin;
464 
465 		bzero((caddr_t)&mask, sizeof(mask));
466 		sin = mask;
467 		sin.sin_family = AF_INET;
468 		sin.sin_len = sizeof(sin);
469                 /* XXX MRT use table 0 for this sort of thing */
470 		CURVNET_SET(TD_TO_VNET(td));
471 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472 		    (struct sockaddr *)&nd->mygateway,
473 		    (struct sockaddr *)&mask,
474 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
475 		CURVNET_RESTORE();
476 		if (error)
477 			panic("nfs_mountroot: RTM_ADD: %d", error);
478 	}
479 
480 	/*
481 	 * Create the rootfs mount point.
482 	 */
483 	nd->root_args.fh = nd->root_fh;
484 	nd->root_args.fhsize = nd->root_fhsize;
485 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
486 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487 		(l >> 24) & 0xff, (l >> 16) & 0xff,
488 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
489 	printf("NFS ROOT: %s\n", buf);
490 	nd->root_args.hostname = buf;
491 	if ((error = nfs_mountdiskless(buf,
492 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
493 		return (error);
494 	}
495 
496 	/*
497 	 * This is not really an nfs issue, but it is much easier to
498 	 * set hostname here and then let the "/etc/rc.xxx" files
499 	 * mount the right /var based upon its preset value.
500 	 */
501 	mtx_lock(&prison0.pr_mtx);
502 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
503 	    sizeof(prison0.pr_hostname));
504 	mtx_unlock(&prison0.pr_mtx);
505 	inittodr(ntohl(nd->root_time));
506 	return (0);
507 }
508 
509 /*
510  * Internal version of mount system call for diskless setup.
511  */
512 static int
513 nfs_mountdiskless(char *path,
514     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515     struct vnode **vpp, struct mount *mp)
516 {
517 	struct sockaddr *nam;
518 	int dirlen, error;
519 	char *dirpath;
520 
521 	/*
522 	 * Find the directory path in "path", which also has the server's
523 	 * name/ip address in it.
524 	 */
525 	dirpath = strchr(path, ':');
526 	if (dirpath != NULL)
527 		dirlen = strlen(++dirpath);
528 	else
529 		dirlen = 0;
530 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
535 		return (error);
536 	}
537 	return (0);
538 }
539 
540 static void
541 nfs_sec_name(char *sec, int *flagsp)
542 {
543 	if (!strcmp(sec, "krb5"))
544 		*flagsp |= NFSMNT_KERB;
545 	else if (!strcmp(sec, "krb5i"))
546 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547 	else if (!strcmp(sec, "krb5p"))
548 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
549 }
550 
551 static void
552 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553     const char *hostname, struct ucred *cred, struct thread *td)
554 {
555 	int s;
556 	int adjsock;
557 	char *p;
558 
559 	s = splnet();
560 
561 	/*
562 	 * Set read-only flag if requested; otherwise, clear it if this is
563 	 * an update.  If this is not an update, then either the read-only
564 	 * flag is already clear, or this is a root mount and it was set
565 	 * intentionally at some previous point.
566 	 */
567 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568 		MNT_ILOCK(mp);
569 		mp->mnt_flag |= MNT_RDONLY;
570 		MNT_IUNLOCK(mp);
571 	} else if (mp->mnt_flag & MNT_UPDATE) {
572 		MNT_ILOCK(mp);
573 		mp->mnt_flag &= ~MNT_RDONLY;
574 		MNT_IUNLOCK(mp);
575 	}
576 
577 	/*
578 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579 	 * no sense in that context.  Also, set up appropriate retransmit
580 	 * and soft timeout behavior.
581 	 */
582 	if (argp->sotype == SOCK_STREAM) {
583 		nmp->nm_flag &= ~NFSMNT_NOCONN;
584 		nmp->nm_timeo = NFS_MAXTIMEO;
585 		if ((argp->flags & NFSMNT_NFSV4) != 0)
586 			nmp->nm_retry = INT_MAX;
587 		else
588 			nmp->nm_retry = NFS_RETRANS_TCP;
589 	}
590 
591 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593 		argp->flags &= ~NFSMNT_RDIRPLUS;
594 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595 	}
596 
597 	/* Re-bind if rsrvd port requested and wasn't on one */
598 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599 		  && (argp->flags & NFSMNT_RESVPORT);
600 	/* Also re-bind if we're switching to/from a connected UDP socket */
601 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602 		    (argp->flags & NFSMNT_NOCONN));
603 
604 	/* Update flags atomically.  Don't change the lock bits. */
605 	nmp->nm_flag = argp->flags | nmp->nm_flag;
606 	splx(s);
607 
608 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610 		if (nmp->nm_timeo < NFS_MINTIMEO)
611 			nmp->nm_timeo = NFS_MINTIMEO;
612 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
613 			nmp->nm_timeo = NFS_MAXTIMEO;
614 	}
615 
616 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617 		nmp->nm_retry = argp->retrans;
618 		if (nmp->nm_retry > NFS_MAXREXMIT)
619 			nmp->nm_retry = NFS_MAXREXMIT;
620 	}
621 
622 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623 		nmp->nm_wsize = argp->wsize;
624 		/* Round down to multiple of blocksize */
625 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
626 		if (nmp->nm_wsize <= 0)
627 			nmp->nm_wsize = NFS_FABLKSIZE;
628 	}
629 
630 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
631 		nmp->nm_rsize = argp->rsize;
632 		/* Round down to multiple of blocksize */
633 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
634 		if (nmp->nm_rsize <= 0)
635 			nmp->nm_rsize = NFS_FABLKSIZE;
636 	}
637 
638 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
639 		nmp->nm_readdirsize = argp->readdirsize;
640 	}
641 
642 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
643 		nmp->nm_acregmin = argp->acregmin;
644 	else
645 		nmp->nm_acregmin = NFS_MINATTRTIMO;
646 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
647 		nmp->nm_acregmax = argp->acregmax;
648 	else
649 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
650 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
651 		nmp->nm_acdirmin = argp->acdirmin;
652 	else
653 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
654 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
655 		nmp->nm_acdirmax = argp->acdirmax;
656 	else
657 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
658 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
659 		nmp->nm_acdirmin = nmp->nm_acdirmax;
660 	if (nmp->nm_acregmin > nmp->nm_acregmax)
661 		nmp->nm_acregmin = nmp->nm_acregmax;
662 
663 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
664 		if (argp->readahead <= NFS_MAXRAHEAD)
665 			nmp->nm_readahead = argp->readahead;
666 		else
667 			nmp->nm_readahead = NFS_MAXRAHEAD;
668 	}
669 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
670 		if (argp->wcommitsize < nmp->nm_wsize)
671 			nmp->nm_wcommitsize = nmp->nm_wsize;
672 		else
673 			nmp->nm_wcommitsize = argp->wcommitsize;
674 	}
675 
676 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
677 		    (nmp->nm_soproto != argp->proto));
678 
679 	if (nmp->nm_client != NULL && adjsock) {
680 		int haslock = 0, error = 0;
681 
682 		if (nmp->nm_sotype == SOCK_STREAM) {
683 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
684 			if (!error)
685 				haslock = 1;
686 		}
687 		if (!error) {
688 		    newnfs_disconnect(&nmp->nm_sockreq);
689 		    if (haslock)
690 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
691 		    nmp->nm_sotype = argp->sotype;
692 		    nmp->nm_soproto = argp->proto;
693 		    if (nmp->nm_sotype == SOCK_DGRAM)
694 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
695 			    cred, td, 0)) {
696 				printf("newnfs_args: retrying connect\n");
697 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
698 			}
699 		}
700 	} else {
701 		nmp->nm_sotype = argp->sotype;
702 		nmp->nm_soproto = argp->proto;
703 	}
704 
705 	if (hostname != NULL) {
706 		strlcpy(nmp->nm_hostname, hostname,
707 		    sizeof(nmp->nm_hostname));
708 		p = strchr(nmp->nm_hostname, ':');
709 		if (p != NULL)
710 			*p = '\0';
711 	}
712 }
713 
714 static const char *nfs_opts[] = { "from", "nfs_args",
715     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
716     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
717     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
718     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
719     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
720     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
721     "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion",
722     "nametimeo", "negnametimeo", "nocto", "pnfs", "wcommitsize",
723     NULL };
724 
725 /*
726  * VFS Operations.
727  *
728  * mount system call
729  * It seems a bit dumb to copyinstr() the host and path here and then
730  * bcopy() them in mountnfs(), but I wanted to detect errors before
731  * doing the sockargs() call because sockargs() allocates an mbuf and
732  * an error after that means that I have to release the mbuf.
733  */
734 /* ARGSUSED */
735 static int
736 nfs_mount(struct mount *mp)
737 {
738 	struct nfs_args args = {
739 	    .version = NFS_ARGSVERSION,
740 	    .addr = NULL,
741 	    .addrlen = sizeof (struct sockaddr_in),
742 	    .sotype = SOCK_STREAM,
743 	    .proto = 0,
744 	    .fh = NULL,
745 	    .fhsize = 0,
746 	    .flags = NFSMNT_RESVPORT,
747 	    .wsize = NFS_WSIZE,
748 	    .rsize = NFS_RSIZE,
749 	    .readdirsize = NFS_READDIRSIZE,
750 	    .timeo = 10,
751 	    .retrans = NFS_RETRANS,
752 	    .readahead = NFS_DEFRAHEAD,
753 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
754 	    .hostname = NULL,
755 	    .acregmin = NFS_MINATTRTIMO,
756 	    .acregmax = NFS_MAXATTRTIMO,
757 	    .acdirmin = NFS_MINDIRATTRTIMO,
758 	    .acdirmax = NFS_MAXDIRATTRTIMO,
759 	};
760 	int error = 0, ret, len;
761 	struct sockaddr *nam = NULL;
762 	struct vnode *vp;
763 	struct thread *td;
764 	char hst[MNAMELEN];
765 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
766 	char *opt, *name, *secname;
767 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
768 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
769 	int minvers = 0;
770 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
771 	size_t hstlen;
772 
773 	has_nfs_args_opt = 0;
774 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
775 		error = EINVAL;
776 		goto out;
777 	}
778 
779 	td = curthread;
780 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
781 		error = nfs_mountroot(mp);
782 		goto out;
783 	}
784 
785 	nfscl_init();
786 
787 	/*
788 	 * The old mount_nfs program passed the struct nfs_args
789 	 * from userspace to kernel.  The new mount_nfs program
790 	 * passes string options via nmount() from userspace to kernel
791 	 * and we populate the struct nfs_args in the kernel.
792 	 */
793 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
794 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
795 		    sizeof(args));
796 		if (error != 0)
797 			goto out;
798 
799 		if (args.version != NFS_ARGSVERSION) {
800 			error = EPROGMISMATCH;
801 			goto out;
802 		}
803 		has_nfs_args_opt = 1;
804 	}
805 
806 	/* Handle the new style options. */
807 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
808 		args.flags |= NFSMNT_NOCONN;
809 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
810 		args.flags |= NFSMNT_NOCONN;
811 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
812 		args.flags |= NFSMNT_NOLOCKD;
813 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
814 		args.flags &= ~NFSMNT_NOLOCKD;
815 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
816 		args.flags |= NFSMNT_INT;
817 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
818 		args.flags |= NFSMNT_RDIRPLUS;
819 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
820 		args.flags |= NFSMNT_RESVPORT;
821 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
822 		args.flags &= ~NFSMNT_RESVPORT;
823 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
824 		args.flags |= NFSMNT_SOFT;
825 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
826 		args.flags &= ~NFSMNT_SOFT;
827 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
828 		args.sotype = SOCK_DGRAM;
829 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
830 		args.sotype = SOCK_DGRAM;
831 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
832 		args.sotype = SOCK_STREAM;
833 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
834 		args.flags |= NFSMNT_NFSV3;
835 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
836 		args.flags |= NFSMNT_NFSV4;
837 		args.sotype = SOCK_STREAM;
838 	}
839 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
840 		args.flags |= NFSMNT_ALLGSSNAME;
841 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
842 		args.flags |= NFSMNT_NOCTO;
843 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
844 		args.flags |= NFSMNT_PNFS;
845 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
846 		if (opt == NULL) {
847 			vfs_mount_error(mp, "illegal readdirsize");
848 			error = EINVAL;
849 			goto out;
850 		}
851 		ret = sscanf(opt, "%d", &args.readdirsize);
852 		if (ret != 1 || args.readdirsize <= 0) {
853 			vfs_mount_error(mp, "illegal readdirsize: %s",
854 			    opt);
855 			error = EINVAL;
856 			goto out;
857 		}
858 		args.flags |= NFSMNT_READDIRSIZE;
859 	}
860 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
861 		if (opt == NULL) {
862 			vfs_mount_error(mp, "illegal readahead");
863 			error = EINVAL;
864 			goto out;
865 		}
866 		ret = sscanf(opt, "%d", &args.readahead);
867 		if (ret != 1 || args.readahead <= 0) {
868 			vfs_mount_error(mp, "illegal readahead: %s",
869 			    opt);
870 			error = EINVAL;
871 			goto out;
872 		}
873 		args.flags |= NFSMNT_READAHEAD;
874 	}
875 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
876 		if (opt == NULL) {
877 			vfs_mount_error(mp, "illegal wsize");
878 			error = EINVAL;
879 			goto out;
880 		}
881 		ret = sscanf(opt, "%d", &args.wsize);
882 		if (ret != 1 || args.wsize <= 0) {
883 			vfs_mount_error(mp, "illegal wsize: %s",
884 			    opt);
885 			error = EINVAL;
886 			goto out;
887 		}
888 		args.flags |= NFSMNT_WSIZE;
889 	}
890 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
891 		if (opt == NULL) {
892 			vfs_mount_error(mp, "illegal rsize");
893 			error = EINVAL;
894 			goto out;
895 		}
896 		ret = sscanf(opt, "%d", &args.rsize);
897 		if (ret != 1 || args.rsize <= 0) {
898 			vfs_mount_error(mp, "illegal wsize: %s",
899 			    opt);
900 			error = EINVAL;
901 			goto out;
902 		}
903 		args.flags |= NFSMNT_RSIZE;
904 	}
905 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
906 		if (opt == NULL) {
907 			vfs_mount_error(mp, "illegal retrans");
908 			error = EINVAL;
909 			goto out;
910 		}
911 		ret = sscanf(opt, "%d", &args.retrans);
912 		if (ret != 1 || args.retrans <= 0) {
913 			vfs_mount_error(mp, "illegal retrans: %s",
914 			    opt);
915 			error = EINVAL;
916 			goto out;
917 		}
918 		args.flags |= NFSMNT_RETRANS;
919 	}
920 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
921 		ret = sscanf(opt, "%d", &args.acregmin);
922 		if (ret != 1 || args.acregmin < 0) {
923 			vfs_mount_error(mp, "illegal acregmin: %s",
924 			    opt);
925 			error = EINVAL;
926 			goto out;
927 		}
928 		args.flags |= NFSMNT_ACREGMIN;
929 	}
930 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
931 		ret = sscanf(opt, "%d", &args.acregmax);
932 		if (ret != 1 || args.acregmax < 0) {
933 			vfs_mount_error(mp, "illegal acregmax: %s",
934 			    opt);
935 			error = EINVAL;
936 			goto out;
937 		}
938 		args.flags |= NFSMNT_ACREGMAX;
939 	}
940 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
941 		ret = sscanf(opt, "%d", &args.acdirmin);
942 		if (ret != 1 || args.acdirmin < 0) {
943 			vfs_mount_error(mp, "illegal acdirmin: %s",
944 			    opt);
945 			error = EINVAL;
946 			goto out;
947 		}
948 		args.flags |= NFSMNT_ACDIRMIN;
949 	}
950 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
951 		ret = sscanf(opt, "%d", &args.acdirmax);
952 		if (ret != 1 || args.acdirmax < 0) {
953 			vfs_mount_error(mp, "illegal acdirmax: %s",
954 			    opt);
955 			error = EINVAL;
956 			goto out;
957 		}
958 		args.flags |= NFSMNT_ACDIRMAX;
959 	}
960 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
961 		ret = sscanf(opt, "%d", &args.wcommitsize);
962 		if (ret != 1 || args.wcommitsize < 0) {
963 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
964 			error = EINVAL;
965 			goto out;
966 		}
967 		args.flags |= NFSMNT_WCOMMITSIZE;
968 	}
969 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
970 		ret = sscanf(opt, "%d", &args.timeo);
971 		if (ret != 1 || args.timeo <= 0) {
972 			vfs_mount_error(mp, "illegal timeout: %s",
973 			    opt);
974 			error = EINVAL;
975 			goto out;
976 		}
977 		args.flags |= NFSMNT_TIMEO;
978 	}
979 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
980 		ret = sscanf(opt, "%d", &nametimeo);
981 		if (ret != 1 || nametimeo < 0) {
982 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
983 			error = EINVAL;
984 			goto out;
985 		}
986 	}
987 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
988 	    == 0) {
989 		ret = sscanf(opt, "%d", &negnametimeo);
990 		if (ret != 1 || negnametimeo < 0) {
991 			vfs_mount_error(mp, "illegal negnametimeo: %s",
992 			    opt);
993 			error = EINVAL;
994 			goto out;
995 		}
996 	}
997 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
998 	    0) {
999 		ret = sscanf(opt, "%d", &minvers);
1000 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1001 		    (args.flags & NFSMNT_NFSV4) == 0) {
1002 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1003 			error = EINVAL;
1004 			goto out;
1005 		}
1006 	}
1007 	if (vfs_getopt(mp->mnt_optnew, "sec",
1008 		(void **) &secname, NULL) == 0)
1009 		nfs_sec_name(secname, &args.flags);
1010 
1011 	if (mp->mnt_flag & MNT_UPDATE) {
1012 		struct nfsmount *nmp = VFSTONFS(mp);
1013 
1014 		if (nmp == NULL) {
1015 			error = EIO;
1016 			goto out;
1017 		}
1018 
1019 		/*
1020 		 * If a change from TCP->UDP is done and there are thread(s)
1021 		 * that have I/O RPC(s) in progress with a tranfer size
1022 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1023 		 * hung, retrying the RPC(s) forever. Usually these threads
1024 		 * will be seen doing an uninterruptible sleep on wait channel
1025 		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1026 		 */
1027 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1028 			tprintf(td->td_proc, LOG_WARNING,
1029 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1030 
1031 		/*
1032 		 * When doing an update, we can't change version,
1033 		 * security, switch lockd strategies or change cookie
1034 		 * translation
1035 		 */
1036 		args.flags = (args.flags &
1037 		    ~(NFSMNT_NFSV3 |
1038 		      NFSMNT_NFSV4 |
1039 		      NFSMNT_KERB |
1040 		      NFSMNT_INTEGRITY |
1041 		      NFSMNT_PRIVACY |
1042 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1043 		    (nmp->nm_flag &
1044 			(NFSMNT_NFSV3 |
1045 			 NFSMNT_NFSV4 |
1046 			 NFSMNT_KERB |
1047 			 NFSMNT_INTEGRITY |
1048 			 NFSMNT_PRIVACY |
1049 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1050 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1051 		goto out;
1052 	}
1053 
1054 	/*
1055 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1056 	 * or no-connection mode for those protocols that support
1057 	 * no-connection mode (the flag will be cleared later for protocols
1058 	 * that do not support no-connection mode).  This will allow a client
1059 	 * to receive replies from a different IP then the request was
1060 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1061 	 * not 0.
1062 	 */
1063 	if (nfs_ip_paranoia == 0)
1064 		args.flags |= NFSMNT_NOCONN;
1065 
1066 	if (has_nfs_args_opt != 0) {
1067 		/*
1068 		 * In the 'nfs_args' case, the pointers in the args
1069 		 * structure are in userland - we copy them in here.
1070 		 */
1071 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1072 			vfs_mount_error(mp, "Bad file handle");
1073 			error = EINVAL;
1074 			goto out;
1075 		}
1076 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1077 		    args.fhsize);
1078 		if (error != 0)
1079 			goto out;
1080 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1081 		if (error != 0)
1082 			goto out;
1083 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1084 		args.hostname = hst;
1085 		/* sockargs() call must be after above copyin() calls */
1086 		error = getsockaddr(&nam, (caddr_t)args.addr,
1087 		    args.addrlen);
1088 		if (error != 0)
1089 			goto out;
1090 	} else {
1091 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1092 		    &args.fhsize) == 0) {
1093 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1094 				vfs_mount_error(mp, "Bad file handle");
1095 				error = EINVAL;
1096 				goto out;
1097 			}
1098 			bcopy(args.fh, nfh, args.fhsize);
1099 		} else {
1100 			args.fhsize = 0;
1101 		}
1102 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1103 		    (void **)&args.hostname, &len);
1104 		if (args.hostname == NULL) {
1105 			vfs_mount_error(mp, "Invalid hostname");
1106 			error = EINVAL;
1107 			goto out;
1108 		}
1109 		bcopy(args.hostname, hst, MNAMELEN);
1110 		hst[MNAMELEN - 1] = '\0';
1111 	}
1112 
1113 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1114 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1115 	else
1116 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1117 	srvkrbnamelen = strlen(srvkrbname);
1118 
1119 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1120 		strlcpy(krbname, name, sizeof (krbname));
1121 	else
1122 		krbname[0] = '\0';
1123 	krbnamelen = strlen(krbname);
1124 
1125 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1126 		strlcpy(dirpath, name, sizeof (dirpath));
1127 	else
1128 		dirpath[0] = '\0';
1129 	dirlen = strlen(dirpath);
1130 
1131 	if (has_nfs_args_opt == 0) {
1132 		if (vfs_getopt(mp->mnt_optnew, "addr",
1133 		    (void **)&args.addr, &args.addrlen) == 0) {
1134 			if (args.addrlen > SOCK_MAXADDRLEN) {
1135 				error = ENAMETOOLONG;
1136 				goto out;
1137 			}
1138 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1139 			bcopy(args.addr, nam, args.addrlen);
1140 			nam->sa_len = args.addrlen;
1141 		} else {
1142 			vfs_mount_error(mp, "No server address");
1143 			error = EINVAL;
1144 			goto out;
1145 		}
1146 	}
1147 
1148 	args.fh = nfh;
1149 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1150 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1151 	    nametimeo, negnametimeo, minvers);
1152 out:
1153 	if (!error) {
1154 		MNT_ILOCK(mp);
1155 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF;
1156 		MNT_IUNLOCK(mp);
1157 	}
1158 	return (error);
1159 }
1160 
1161 
1162 /*
1163  * VFS Operations.
1164  *
1165  * mount system call
1166  * It seems a bit dumb to copyinstr() the host and path here and then
1167  * bcopy() them in mountnfs(), but I wanted to detect errors before
1168  * doing the sockargs() call because sockargs() allocates an mbuf and
1169  * an error after that means that I have to release the mbuf.
1170  */
1171 /* ARGSUSED */
1172 static int
1173 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1174 {
1175 	int error;
1176 	struct nfs_args args;
1177 
1178 	error = copyin(data, &args, sizeof (struct nfs_args));
1179 	if (error)
1180 		return error;
1181 
1182 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1183 
1184 	error = kernel_mount(ma, flags);
1185 	return (error);
1186 }
1187 
1188 /*
1189  * Common code for mount and mountroot
1190  */
1191 static int
1192 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1193     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1194     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1195     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1196     int minvers)
1197 {
1198 	struct nfsmount *nmp;
1199 	struct nfsnode *np;
1200 	int error, trycnt, ret;
1201 	struct nfsvattr nfsva;
1202 	struct nfsclclient *clp;
1203 	struct nfsclds *dsp, *tdsp;
1204 	uint32_t lease;
1205 	static u_int64_t clval = 0;
1206 
1207 	NFSCL_DEBUG(3, "in mnt\n");
1208 	clp = NULL;
1209 	if (mp->mnt_flag & MNT_UPDATE) {
1210 		nmp = VFSTONFS(mp);
1211 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1212 		FREE(nam, M_SONAME);
1213 		return (0);
1214 	} else {
1215 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1216 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1217 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1218 		TAILQ_INIT(&nmp->nm_bufq);
1219 		if (clval == 0)
1220 			clval = (u_int64_t)nfsboottime.tv_sec;
1221 		nmp->nm_clval = clval++;
1222 		nmp->nm_krbnamelen = krbnamelen;
1223 		nmp->nm_dirpathlen = dirlen;
1224 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1225 		if (td->td_ucred->cr_uid != (uid_t)0) {
1226 			/*
1227 			 * nm_uid is used to get KerberosV credentials for
1228 			 * the nfsv4 state handling operations if there is
1229 			 * no host based principal set. Use the uid of
1230 			 * this user if not root, since they are doing the
1231 			 * mount. I don't think setting this for root will
1232 			 * work, since root normally does not have user
1233 			 * credentials in a credentials cache.
1234 			 */
1235 			nmp->nm_uid = td->td_ucred->cr_uid;
1236 		} else {
1237 			/*
1238 			 * Just set to -1, so it won't be used.
1239 			 */
1240 			nmp->nm_uid = (uid_t)-1;
1241 		}
1242 
1243 		/* Copy and null terminate all the names */
1244 		if (nmp->nm_krbnamelen > 0) {
1245 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1246 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1247 		}
1248 		if (nmp->nm_dirpathlen > 0) {
1249 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1250 			    nmp->nm_dirpathlen);
1251 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1252 			    + 1] = '\0';
1253 		}
1254 		if (nmp->nm_srvkrbnamelen > 0) {
1255 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1256 			    nmp->nm_srvkrbnamelen);
1257 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1258 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1259 		}
1260 		nmp->nm_sockreq.nr_cred = crhold(cred);
1261 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1262 		mp->mnt_data = nmp;
1263 		nmp->nm_getinfo = nfs_getnlminfo;
1264 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1265 	}
1266 	vfs_getnewfsid(mp);
1267 	nmp->nm_mountp = mp;
1268 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1269 
1270 	/*
1271 	 * Since nfs_decode_args() might optionally set them, these
1272 	 * need to be set to defaults before the call, so that the
1273 	 * optional settings aren't overwritten.
1274 	 */
1275 	nmp->nm_nametimeo = nametimeo;
1276 	nmp->nm_negnametimeo = negnametimeo;
1277 	nmp->nm_timeo = NFS_TIMEO;
1278 	nmp->nm_retry = NFS_RETRANS;
1279 	nmp->nm_readahead = NFS_DEFRAHEAD;
1280 	if (desiredvnodes >= 11000)
1281 		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1282 	else
1283 		nmp->nm_wcommitsize = hibufspace / 10;
1284 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1285 		nmp->nm_minorvers = minvers;
1286 	else
1287 		nmp->nm_minorvers = 0;
1288 
1289 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1290 
1291 	/*
1292 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1293 	 * high, depending on whether we end up with negative offsets in
1294 	 * the client or server somewhere.  2GB-1 may be safer.
1295 	 *
1296 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1297 	 * that we can handle until we find out otherwise.
1298 	 */
1299 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1300 		nmp->nm_maxfilesize = 0xffffffffLL;
1301 	else
1302 		nmp->nm_maxfilesize = OFF_MAX;
1303 
1304 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1305 		nmp->nm_wsize = NFS_WSIZE;
1306 		nmp->nm_rsize = NFS_RSIZE;
1307 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1308 	}
1309 	nmp->nm_numgrps = NFS_MAXGRPS;
1310 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1311 	if (nmp->nm_tprintf_delay < 0)
1312 		nmp->nm_tprintf_delay = 0;
1313 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1314 	if (nmp->nm_tprintf_initial_delay < 0)
1315 		nmp->nm_tprintf_initial_delay = 0;
1316 	nmp->nm_fhsize = argp->fhsize;
1317 	if (nmp->nm_fhsize > 0)
1318 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1319 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1320 	nmp->nm_nam = nam;
1321 	/* Set up the sockets and per-host congestion */
1322 	nmp->nm_sotype = argp->sotype;
1323 	nmp->nm_soproto = argp->proto;
1324 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1325 	if ((argp->flags & NFSMNT_NFSV4))
1326 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1327 	else if ((argp->flags & NFSMNT_NFSV3))
1328 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1329 	else
1330 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1331 
1332 
1333 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1334 		goto bad;
1335 	/* For NFSv4.1, get the clientid now. */
1336 	if (nmp->nm_minorvers > 0) {
1337 		NFSCL_DEBUG(3, "at getcl\n");
1338 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1339 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1340 		if (error != 0)
1341 			goto bad;
1342 	}
1343 
1344 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1345 	    nmp->nm_dirpathlen > 0) {
1346 		NFSCL_DEBUG(3, "in dirp\n");
1347 		/*
1348 		 * If the fhsize on the mount point == 0 for V4, the mount
1349 		 * path needs to be looked up.
1350 		 */
1351 		trycnt = 3;
1352 		do {
1353 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1354 			    cred, td);
1355 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1356 			if (error)
1357 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1358 		} while (error && --trycnt > 0);
1359 		if (error) {
1360 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1361 			goto bad;
1362 		}
1363 	}
1364 
1365 	/*
1366 	 * A reference count is needed on the nfsnode representing the
1367 	 * remote root.  If this object is not persistent, then backward
1368 	 * traversals of the mount point (i.e. "..") will not work if
1369 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1370 	 * this problem, because one can identify root inodes by their
1371 	 * number == ROOTINO (2).
1372 	 */
1373 	if (nmp->nm_fhsize > 0) {
1374 		/*
1375 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1376 		 * non-zero for the root vnode. f_iosize will be set correctly
1377 		 * by nfs_statfs() before any I/O occurs.
1378 		 */
1379 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1380 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1381 		    LK_EXCLUSIVE);
1382 		if (error)
1383 			goto bad;
1384 		*vpp = NFSTOV(np);
1385 
1386 		/*
1387 		 * Get file attributes and transfer parameters for the
1388 		 * mountpoint.  This has the side effect of filling in
1389 		 * (*vpp)->v_type with the correct value.
1390 		 */
1391 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1392 		    cred, td, &nfsva, NULL, &lease);
1393 		if (ret) {
1394 			/*
1395 			 * Just set default values to get things going.
1396 			 */
1397 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1398 			nfsva.na_vattr.va_type = VDIR;
1399 			nfsva.na_vattr.va_mode = 0777;
1400 			nfsva.na_vattr.va_nlink = 100;
1401 			nfsva.na_vattr.va_uid = (uid_t)0;
1402 			nfsva.na_vattr.va_gid = (gid_t)0;
1403 			nfsva.na_vattr.va_fileid = 2;
1404 			nfsva.na_vattr.va_gen = 1;
1405 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1406 			nfsva.na_vattr.va_size = 512 * 1024;
1407 			lease = 60;
1408 		}
1409 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1410 		if (nmp->nm_minorvers > 0) {
1411 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1412 			NFSLOCKCLSTATE();
1413 			clp->nfsc_renew = NFSCL_RENEW(lease);
1414 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1415 			clp->nfsc_clientidrev++;
1416 			if (clp->nfsc_clientidrev == 0)
1417 				clp->nfsc_clientidrev++;
1418 			NFSUNLOCKCLSTATE();
1419 			/*
1420 			 * Mount will succeed, so the renew thread can be
1421 			 * started now.
1422 			 */
1423 			nfscl_start_renewthread(clp);
1424 			nfscl_clientrelease(clp);
1425 		}
1426 		if (argp->flags & NFSMNT_NFSV3)
1427 			ncl_fsinfo(nmp, *vpp, cred, td);
1428 
1429 		/* Mark if the mount point supports NFSv4 ACLs. */
1430 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1431 		    ret == 0 &&
1432 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1433 			MNT_ILOCK(mp);
1434 			mp->mnt_flag |= MNT_NFS4ACLS;
1435 			MNT_IUNLOCK(mp);
1436 		}
1437 
1438 		/*
1439 		 * Lose the lock but keep the ref.
1440 		 */
1441 		NFSVOPUNLOCK(*vpp, 0);
1442 		return (0);
1443 	}
1444 	error = EIO;
1445 
1446 bad:
1447 	if (clp != NULL)
1448 		nfscl_clientrelease(clp);
1449 	newnfs_disconnect(&nmp->nm_sockreq);
1450 	crfree(nmp->nm_sockreq.nr_cred);
1451 	if (nmp->nm_sockreq.nr_auth != NULL)
1452 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1453 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1454 	mtx_destroy(&nmp->nm_mtx);
1455 	if (nmp->nm_clp != NULL) {
1456 		NFSLOCKCLSTATE();
1457 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1458 		NFSUNLOCKCLSTATE();
1459 		free(nmp->nm_clp, M_NFSCLCLIENT);
1460 	}
1461 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1462 		nfscl_freenfsclds(dsp);
1463 	FREE(nmp, M_NEWNFSMNT);
1464 	FREE(nam, M_SONAME);
1465 	return (error);
1466 }
1467 
1468 /*
1469  * unmount system call
1470  */
1471 static int
1472 nfs_unmount(struct mount *mp, int mntflags)
1473 {
1474 	struct thread *td;
1475 	struct nfsmount *nmp;
1476 	int error, flags = 0, i, trycnt = 0;
1477 	struct nfsclds *dsp, *tdsp;
1478 
1479 	td = curthread;
1480 
1481 	if (mntflags & MNT_FORCE)
1482 		flags |= FORCECLOSE;
1483 	nmp = VFSTONFS(mp);
1484 	/*
1485 	 * Goes something like this..
1486 	 * - Call vflush() to clear out vnodes for this filesystem
1487 	 * - Close the socket
1488 	 * - Free up the data structures
1489 	 */
1490 	/* In the forced case, cancel any outstanding requests. */
1491 	if (mntflags & MNT_FORCE) {
1492 		error = newnfs_nmcancelreqs(nmp);
1493 		if (error)
1494 			goto out;
1495 		/* For a forced close, get rid of the renew thread now */
1496 		nfscl_umount(nmp, td);
1497 	}
1498 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1499 	do {
1500 		error = vflush(mp, 1, flags, td);
1501 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1502 			(void) nfs_catnap(PSOCK, error, "newndm");
1503 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1504 	if (error)
1505 		goto out;
1506 
1507 	/*
1508 	 * We are now committed to the unmount.
1509 	 */
1510 	if ((mntflags & MNT_FORCE) == 0)
1511 		nfscl_umount(nmp, td);
1512 	/* Make sure no nfsiods are assigned to this mount. */
1513 	mtx_lock(&ncl_iod_mutex);
1514 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1515 		if (ncl_iodmount[i] == nmp) {
1516 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1517 			ncl_iodmount[i] = NULL;
1518 		}
1519 	mtx_unlock(&ncl_iod_mutex);
1520 	newnfs_disconnect(&nmp->nm_sockreq);
1521 	crfree(nmp->nm_sockreq.nr_cred);
1522 	FREE(nmp->nm_nam, M_SONAME);
1523 	if (nmp->nm_sockreq.nr_auth != NULL)
1524 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1525 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1526 	mtx_destroy(&nmp->nm_mtx);
1527 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1528 		nfscl_freenfsclds(dsp);
1529 	FREE(nmp, M_NEWNFSMNT);
1530 out:
1531 	return (error);
1532 }
1533 
1534 /*
1535  * Return root of a filesystem
1536  */
1537 static int
1538 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1539 {
1540 	struct vnode *vp;
1541 	struct nfsmount *nmp;
1542 	struct nfsnode *np;
1543 	int error;
1544 
1545 	nmp = VFSTONFS(mp);
1546 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1547 	if (error)
1548 		return error;
1549 	vp = NFSTOV(np);
1550 	/*
1551 	 * Get transfer parameters and attributes for root vnode once.
1552 	 */
1553 	mtx_lock(&nmp->nm_mtx);
1554 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1555 		mtx_unlock(&nmp->nm_mtx);
1556 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1557 	} else
1558 		mtx_unlock(&nmp->nm_mtx);
1559 	if (vp->v_type == VNON)
1560 	    vp->v_type = VDIR;
1561 	vp->v_vflag |= VV_ROOT;
1562 	*vpp = vp;
1563 	return (0);
1564 }
1565 
1566 /*
1567  * Flush out the buffer cache
1568  */
1569 /* ARGSUSED */
1570 static int
1571 nfs_sync(struct mount *mp, int waitfor)
1572 {
1573 	struct vnode *vp, *mvp;
1574 	struct thread *td;
1575 	int error, allerror = 0;
1576 
1577 	td = curthread;
1578 
1579 	MNT_ILOCK(mp);
1580 	/*
1581 	 * If a forced dismount is in progress, return from here so that
1582 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1583 	 * calling VFS_UNMOUNT().
1584 	 */
1585 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1586 		MNT_IUNLOCK(mp);
1587 		return (EBADF);
1588 	}
1589 	MNT_IUNLOCK(mp);
1590 
1591 	/*
1592 	 * Force stale buffer cache information to be flushed.
1593 	 */
1594 loop:
1595 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1596 		/* XXX Racy bv_cnt check. */
1597 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1598 		    waitfor == MNT_LAZY) {
1599 			VI_UNLOCK(vp);
1600 			continue;
1601 		}
1602 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1603 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1604 			goto loop;
1605 		}
1606 		error = VOP_FSYNC(vp, waitfor, td);
1607 		if (error)
1608 			allerror = error;
1609 		NFSVOPUNLOCK(vp, 0);
1610 		vrele(vp);
1611 	}
1612 	return (allerror);
1613 }
1614 
1615 static int
1616 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1617 {
1618 	struct nfsmount *nmp = VFSTONFS(mp);
1619 	struct vfsquery vq;
1620 	int error;
1621 
1622 	bzero(&vq, sizeof(vq));
1623 	switch (op) {
1624 #if 0
1625 	case VFS_CTL_NOLOCKS:
1626 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1627  		if (req->oldptr != NULL) {
1628  			error = SYSCTL_OUT(req, &val, sizeof(val));
1629  			if (error)
1630  				return (error);
1631  		}
1632  		if (req->newptr != NULL) {
1633  			error = SYSCTL_IN(req, &val, sizeof(val));
1634  			if (error)
1635  				return (error);
1636 			if (val)
1637 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1638 			else
1639 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1640  		}
1641 		break;
1642 #endif
1643 	case VFS_CTL_QUERY:
1644 		mtx_lock(&nmp->nm_mtx);
1645 		if (nmp->nm_state & NFSSTA_TIMEO)
1646 			vq.vq_flags |= VQ_NOTRESP;
1647 		mtx_unlock(&nmp->nm_mtx);
1648 #if 0
1649 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1650 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1651 			vq.vq_flags |= VQ_NOTRESPLOCK;
1652 #endif
1653 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1654 		break;
1655  	case VFS_CTL_TIMEO:
1656  		if (req->oldptr != NULL) {
1657  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1658  			    sizeof(nmp->nm_tprintf_initial_delay));
1659  			if (error)
1660  				return (error);
1661  		}
1662  		if (req->newptr != NULL) {
1663 			error = vfs_suser(mp, req->td);
1664 			if (error)
1665 				return (error);
1666  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1667  			    sizeof(nmp->nm_tprintf_initial_delay));
1668  			if (error)
1669  				return (error);
1670  			if (nmp->nm_tprintf_initial_delay < 0)
1671  				nmp->nm_tprintf_initial_delay = 0;
1672  		}
1673 		break;
1674 	default:
1675 		return (ENOTSUP);
1676 	}
1677 	return (0);
1678 }
1679 
1680 /*
1681  * Purge any RPCs in progress, so that they will all return errors.
1682  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1683  * forced dismount.
1684  */
1685 static void
1686 nfs_purge(struct mount *mp)
1687 {
1688 	struct nfsmount *nmp = VFSTONFS(mp);
1689 
1690 	newnfs_nmcancelreqs(nmp);
1691 }
1692 
1693 /*
1694  * Extract the information needed by the nlm from the nfs vnode.
1695  */
1696 static void
1697 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1698     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1699     struct timeval *timeop)
1700 {
1701 	struct nfsmount *nmp;
1702 	struct nfsnode *np = VTONFS(vp);
1703 
1704 	nmp = VFSTONFS(vp->v_mount);
1705 	if (fhlenp != NULL)
1706 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1707 	if (fhp != NULL)
1708 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1709 	if (sp != NULL)
1710 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1711 	if (is_v3p != NULL)
1712 		*is_v3p = NFS_ISV3(vp);
1713 	if (sizep != NULL)
1714 		*sizep = np->n_size;
1715 	if (timeop != NULL) {
1716 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1717 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1718 	}
1719 }
1720 
1721 /*
1722  * This function prints out an option name, based on the conditional
1723  * argument.
1724  */
1725 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1726     char *opt, char **buf, size_t *blen)
1727 {
1728 	int len;
1729 
1730 	if (testval != 0 && *blen > strlen(opt)) {
1731 		len = snprintf(*buf, *blen, "%s", opt);
1732 		if (len != strlen(opt))
1733 			printf("EEK!!\n");
1734 		*buf += len;
1735 		*blen -= len;
1736 	}
1737 }
1738 
1739 /*
1740  * This function printf out an options integer value.
1741  */
1742 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1743     char *opt, char **buf, size_t *blen)
1744 {
1745 	int len;
1746 
1747 	if (*blen > strlen(opt) + 1) {
1748 		/* Could result in truncated output string. */
1749 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1750 		if (len < *blen) {
1751 			*buf += len;
1752 			*blen -= len;
1753 		}
1754 	}
1755 }
1756 
1757 /*
1758  * Load the option flags and values into the buffer.
1759  */
1760 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1761 {
1762 	char *buf;
1763 	size_t blen;
1764 
1765 	buf = buffer;
1766 	blen = buflen;
1767 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1768 	    &blen);
1769 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1770 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1771 		    &blen);
1772 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1773 		    &buf, &blen);
1774 	}
1775 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1776 	    &blen);
1777 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1778 	    "nfsv2", &buf, &blen);
1779 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1780 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1781 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1782 	    &buf, &blen);
1783 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1784 	    &buf, &blen);
1785 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1786 	    &blen);
1787 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1788 	    &blen);
1789 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1790 	    &blen);
1791 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1792 	    &blen);
1793 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1794 	    &blen);
1795 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1796 	    0, ",lockd", &buf, &blen);
1797 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1798 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1799 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1800 	    &buf, &blen);
1801 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1802 	    &buf, &blen);
1803 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1804 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1805 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1806 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1807 	    &buf, &blen);
1808 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1809 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1810 	    &buf, &blen);
1811 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1812 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1813 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1814 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1815 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1816 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1817 	    &blen);
1818 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1819 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1820 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1821 	    &blen);
1822 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1823 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1824 	    &blen);
1825 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1826 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1827 }
1828 
1829