xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 3823d5e198425b4f5e5a80267d195769d1063773)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 
104 static int	nfs_mountroot(struct mount *);
105 static void	nfs_sec_name(char *, int *);
106 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107 		    struct nfs_args *argp, const char *, struct ucred *,
108 		    struct thread *);
109 static int	mountnfs(struct nfs_args *, struct mount *,
110 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111 		    u_char *, int, struct vnode **, struct ucred *,
112 		    struct thread *, int, int, int);
113 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114 		    struct sockaddr_storage *, int *, off_t *,
115 		    struct timeval *);
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123 static vfs_purge_t nfs_purge;
124 
125 /*
126  * nfs vfs operations.
127  */
128 static struct vfsops nfs_vfsops = {
129 	.vfs_init =		ncl_init,
130 	.vfs_mount =		nfs_mount,
131 	.vfs_cmount =		nfs_cmount,
132 	.vfs_root =		nfs_root,
133 	.vfs_statfs =		nfs_statfs,
134 	.vfs_sync =		nfs_sync,
135 	.vfs_uninit =		ncl_uninit,
136 	.vfs_unmount =		nfs_unmount,
137 	.vfs_sysctl =		nfs_sysctl,
138 	.vfs_purge =		nfs_purge,
139 };
140 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141 
142 /* So that loader and kldload(2) can find us, wherever we are.. */
143 MODULE_VERSION(nfs, 1);
144 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148 
149 /*
150  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151  * can be shared by both NFS clients. It is declared here so that it
152  * will be defined for kernels built without NFS_ROOT, although it
153  * isn't used in that case.
154  */
155 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156 struct nfs_diskless	nfs_diskless = { { { 0 } } };
157 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158 int			nfs_diskless_valid = 0;
159 #endif
160 
161 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162     &nfs_diskless_valid, 0,
163     "Has the diskless struct been filled correctly");
164 
165 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167 
168 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170     "%Ssockaddr_in", "Diskless root nfs address");
171 
172 
173 void		newnfsargs_ntoh(struct nfs_args *);
174 static int	nfs_mountdiskless(char *,
175 		    struct sockaddr_in *, struct nfs_args *,
176 		    struct thread *, struct vnode **, struct mount *);
177 static void	nfs_convert_diskless(void);
178 static void	nfs_convert_oargs(struct nfs_args *args,
179 		    struct onfs_args *oargs);
180 
181 int
182 newnfs_iosize(struct nfsmount *nmp)
183 {
184 	int iosize, maxio;
185 
186 	/* First, set the upper limit for iosize */
187 	if (nmp->nm_flag & NFSMNT_NFSV4) {
188 		maxio = NFS_MAXBSIZE;
189 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190 		if (nmp->nm_sotype == SOCK_DGRAM)
191 			maxio = NFS_MAXDGRAMDATA;
192 		else
193 			maxio = NFS_MAXBSIZE;
194 	} else {
195 		maxio = NFS_V2MAXDATA;
196 	}
197 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198 		nmp->nm_rsize = maxio;
199 	if (nmp->nm_rsize > MAXBSIZE)
200 		nmp->nm_rsize = MAXBSIZE;
201 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202 		nmp->nm_readdirsize = maxio;
203 	if (nmp->nm_readdirsize > nmp->nm_rsize)
204 		nmp->nm_readdirsize = nmp->nm_rsize;
205 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206 		nmp->nm_wsize = maxio;
207 	if (nmp->nm_wsize > MAXBSIZE)
208 		nmp->nm_wsize = MAXBSIZE;
209 
210 	/*
211 	 * Calculate the size used for io buffers.  Use the larger
212 	 * of the two sizes to minimise nfs requests but make sure
213 	 * that it is at least one VM page to avoid wasting buffer
214 	 * space.
215 	 */
216 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217 	iosize = imax(iosize, PAGE_SIZE);
218 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
219 	return (iosize);
220 }
221 
222 static void
223 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
224 {
225 
226 	args->version = NFS_ARGSVERSION;
227 	args->addr = oargs->addr;
228 	args->addrlen = oargs->addrlen;
229 	args->sotype = oargs->sotype;
230 	args->proto = oargs->proto;
231 	args->fh = oargs->fh;
232 	args->fhsize = oargs->fhsize;
233 	args->flags = oargs->flags;
234 	args->wsize = oargs->wsize;
235 	args->rsize = oargs->rsize;
236 	args->readdirsize = oargs->readdirsize;
237 	args->timeo = oargs->timeo;
238 	args->retrans = oargs->retrans;
239 	args->readahead = oargs->readahead;
240 	args->hostname = oargs->hostname;
241 }
242 
243 static void
244 nfs_convert_diskless(void)
245 {
246 
247 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248 		sizeof(struct ifaliasreq));
249 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250 		sizeof(struct sockaddr_in));
251 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
254 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
255 	} else {
256 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
257 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
258 	}
259 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260 		sizeof(struct sockaddr_in));
261 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262 	nfsv3_diskless.root_time = nfs_diskless.root_time;
263 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
264 		MAXHOSTNAMELEN);
265 	nfs_diskless_valid = 3;
266 }
267 
268 /*
269  * nfs statfs call
270  */
271 static int
272 nfs_statfs(struct mount *mp, struct statfs *sbp)
273 {
274 	struct vnode *vp;
275 	struct thread *td;
276 	struct nfsmount *nmp = VFSTONFS(mp);
277 	struct nfsvattr nfsva;
278 	struct nfsfsinfo fs;
279 	struct nfsstatfs sb;
280 	int error = 0, attrflag, gotfsinfo = 0, ret;
281 	struct nfsnode *np;
282 
283 	td = curthread;
284 
285 	error = vfs_busy(mp, MBF_NOWAIT);
286 	if (error)
287 		return (error);
288 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289 	if (error) {
290 		vfs_unbusy(mp);
291 		return (error);
292 	}
293 	vp = NFSTOV(np);
294 	mtx_lock(&nmp->nm_mtx);
295 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296 		mtx_unlock(&nmp->nm_mtx);
297 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
298 		    &attrflag, NULL);
299 		if (!error)
300 			gotfsinfo = 1;
301 	} else
302 		mtx_unlock(&nmp->nm_mtx);
303 	if (!error)
304 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
305 		    &attrflag, NULL);
306 	if (error != 0)
307 		NFSCL_DEBUG(2, "statfs=%d\n", error);
308 	if (attrflag == 0) {
309 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310 		    td->td_ucred, td, &nfsva, NULL, NULL);
311 		if (ret) {
312 			/*
313 			 * Just set default values to get things going.
314 			 */
315 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316 			nfsva.na_vattr.va_type = VDIR;
317 			nfsva.na_vattr.va_mode = 0777;
318 			nfsva.na_vattr.va_nlink = 100;
319 			nfsva.na_vattr.va_uid = (uid_t)0;
320 			nfsva.na_vattr.va_gid = (gid_t)0;
321 			nfsva.na_vattr.va_fileid = 2;
322 			nfsva.na_vattr.va_gen = 1;
323 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324 			nfsva.na_vattr.va_size = 512 * 1024;
325 		}
326 	}
327 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
328 	if (!error) {
329 	    mtx_lock(&nmp->nm_mtx);
330 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331 		nfscl_loadfsinfo(nmp, &fs);
332 	    nfscl_loadsbinfo(nmp, &sb, sbp);
333 	    sbp->f_iosize = newnfs_iosize(nmp);
334 	    mtx_unlock(&nmp->nm_mtx);
335 	    if (sbp != &mp->mnt_stat) {
336 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
338 	    }
339 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340 	} else if (NFS_ISV4(vp)) {
341 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
342 	}
343 	vput(vp);
344 	vfs_unbusy(mp);
345 	return (error);
346 }
347 
348 /*
349  * nfs version 3 fsinfo rpc call
350  */
351 int
352 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
353     struct thread *td)
354 {
355 	struct nfsfsinfo fs;
356 	struct nfsvattr nfsva;
357 	int error, attrflag;
358 
359 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
360 	if (!error) {
361 		if (attrflag)
362 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
363 			    1);
364 		mtx_lock(&nmp->nm_mtx);
365 		nfscl_loadfsinfo(nmp, &fs);
366 		mtx_unlock(&nmp->nm_mtx);
367 	}
368 	return (error);
369 }
370 
371 /*
372  * Mount a remote root fs via. nfs. This depends on the info in the
373  * nfs_diskless structure that has been filled in properly by some primary
374  * bootstrap.
375  * It goes something like this:
376  * - do enough of "ifconfig" by calling ifioctl() so that the system
377  *   can talk to the server
378  * - If nfs_diskless.mygateway is filled in, use that address as
379  *   a default gateway.
380  * - build the rootfs mount point and call mountnfs() to do the rest.
381  *
382  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383  * structure, as well as other global NFS client variables here, as
384  * nfs_mountroot() will be called once in the boot before any other NFS
385  * client activity occurs.
386  */
387 static int
388 nfs_mountroot(struct mount *mp)
389 {
390 	struct thread *td = curthread;
391 	struct nfsv3_diskless *nd = &nfsv3_diskless;
392 	struct socket *so;
393 	struct vnode *vp;
394 	struct ifreq ir;
395 	int error;
396 	u_long l;
397 	char buf[128];
398 	char *cp;
399 
400 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
401 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
402 #elif defined(NFS_ROOT)
403 	nfs_setup_diskless();
404 #endif
405 
406 	if (nfs_diskless_valid == 0)
407 		return (-1);
408 	if (nfs_diskless_valid == 1)
409 		nfs_convert_diskless();
410 
411 	/*
412 	 * XXX splnet, so networks will receive...
413 	 */
414 	splnet();
415 
416 	/*
417 	 * Do enough of ifconfig(8) so that the critical net interface can
418 	 * talk to the server.
419 	 */
420 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
421 	    td->td_ucred, td);
422 	if (error)
423 		panic("nfs_mountroot: socreate(%04x): %d",
424 			nd->myif.ifra_addr.sa_family, error);
425 
426 #if 0 /* XXX Bad idea */
427 	/*
428 	 * We might not have been told the right interface, so we pass
429 	 * over the first ten interfaces of the same kind, until we get
430 	 * one of them configured.
431 	 */
432 
433 	for (i = strlen(nd->myif.ifra_name) - 1;
434 		nd->myif.ifra_name[i] >= '0' &&
435 		nd->myif.ifra_name[i] <= '9';
436 		nd->myif.ifra_name[i] ++) {
437 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438 		if(!error)
439 			break;
440 	}
441 #endif
442 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443 	if (error)
444 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
446 		ir.ifr_mtu = strtol(cp, NULL, 10);
447 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
448 		freeenv(cp);
449 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
450 		if (error)
451 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
452 	}
453 	soclose(so);
454 
455 	/*
456 	 * If the gateway field is filled in, set it as the default route.
457 	 * Note that pxeboot will set a default route of 0 if the route
458 	 * is not set by the DHCP server.  Check also for a value of 0
459 	 * to avoid panicking inappropriately in that situation.
460 	 */
461 	if (nd->mygateway.sin_len != 0 &&
462 	    nd->mygateway.sin_addr.s_addr != 0) {
463 		struct sockaddr_in mask, sin;
464 
465 		bzero((caddr_t)&mask, sizeof(mask));
466 		sin = mask;
467 		sin.sin_family = AF_INET;
468 		sin.sin_len = sizeof(sin);
469                 /* XXX MRT use table 0 for this sort of thing */
470 		CURVNET_SET(TD_TO_VNET(td));
471 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472 		    (struct sockaddr *)&nd->mygateway,
473 		    (struct sockaddr *)&mask,
474 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
475 		CURVNET_RESTORE();
476 		if (error)
477 			panic("nfs_mountroot: RTM_ADD: %d", error);
478 	}
479 
480 	/*
481 	 * Create the rootfs mount point.
482 	 */
483 	nd->root_args.fh = nd->root_fh;
484 	nd->root_args.fhsize = nd->root_fhsize;
485 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
486 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487 		(l >> 24) & 0xff, (l >> 16) & 0xff,
488 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
489 	printf("NFS ROOT: %s\n", buf);
490 	nd->root_args.hostname = buf;
491 	if ((error = nfs_mountdiskless(buf,
492 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
493 		return (error);
494 	}
495 
496 	/*
497 	 * This is not really an nfs issue, but it is much easier to
498 	 * set hostname here and then let the "/etc/rc.xxx" files
499 	 * mount the right /var based upon its preset value.
500 	 */
501 	mtx_lock(&prison0.pr_mtx);
502 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
503 	    sizeof(prison0.pr_hostname));
504 	mtx_unlock(&prison0.pr_mtx);
505 	inittodr(ntohl(nd->root_time));
506 	return (0);
507 }
508 
509 /*
510  * Internal version of mount system call for diskless setup.
511  */
512 static int
513 nfs_mountdiskless(char *path,
514     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515     struct vnode **vpp, struct mount *mp)
516 {
517 	struct sockaddr *nam;
518 	int dirlen, error;
519 	char *dirpath;
520 
521 	/*
522 	 * Find the directory path in "path", which also has the server's
523 	 * name/ip address in it.
524 	 */
525 	dirpath = strchr(path, ':');
526 	if (dirpath != NULL)
527 		dirlen = strlen(++dirpath);
528 	else
529 		dirlen = 0;
530 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
535 		return (error);
536 	}
537 	return (0);
538 }
539 
540 static void
541 nfs_sec_name(char *sec, int *flagsp)
542 {
543 	if (!strcmp(sec, "krb5"))
544 		*flagsp |= NFSMNT_KERB;
545 	else if (!strcmp(sec, "krb5i"))
546 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547 	else if (!strcmp(sec, "krb5p"))
548 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
549 }
550 
551 static void
552 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553     const char *hostname, struct ucred *cred, struct thread *td)
554 {
555 	int s;
556 	int adjsock;
557 	char *p;
558 
559 	s = splnet();
560 
561 	/*
562 	 * Set read-only flag if requested; otherwise, clear it if this is
563 	 * an update.  If this is not an update, then either the read-only
564 	 * flag is already clear, or this is a root mount and it was set
565 	 * intentionally at some previous point.
566 	 */
567 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568 		MNT_ILOCK(mp);
569 		mp->mnt_flag |= MNT_RDONLY;
570 		MNT_IUNLOCK(mp);
571 	} else if (mp->mnt_flag & MNT_UPDATE) {
572 		MNT_ILOCK(mp);
573 		mp->mnt_flag &= ~MNT_RDONLY;
574 		MNT_IUNLOCK(mp);
575 	}
576 
577 	/*
578 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579 	 * no sense in that context.  Also, set up appropriate retransmit
580 	 * and soft timeout behavior.
581 	 */
582 	if (argp->sotype == SOCK_STREAM) {
583 		nmp->nm_flag &= ~NFSMNT_NOCONN;
584 		nmp->nm_timeo = NFS_MAXTIMEO;
585 		if ((argp->flags & NFSMNT_NFSV4) != 0)
586 			nmp->nm_retry = INT_MAX;
587 		else
588 			nmp->nm_retry = NFS_RETRANS_TCP;
589 	}
590 
591 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593 		argp->flags &= ~NFSMNT_RDIRPLUS;
594 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595 	}
596 
597 	/* Re-bind if rsrvd port requested and wasn't on one */
598 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599 		  && (argp->flags & NFSMNT_RESVPORT);
600 	/* Also re-bind if we're switching to/from a connected UDP socket */
601 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602 		    (argp->flags & NFSMNT_NOCONN));
603 
604 	/* Update flags atomically.  Don't change the lock bits. */
605 	nmp->nm_flag = argp->flags | nmp->nm_flag;
606 	splx(s);
607 
608 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610 		if (nmp->nm_timeo < NFS_MINTIMEO)
611 			nmp->nm_timeo = NFS_MINTIMEO;
612 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
613 			nmp->nm_timeo = NFS_MAXTIMEO;
614 	}
615 
616 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617 		nmp->nm_retry = argp->retrans;
618 		if (nmp->nm_retry > NFS_MAXREXMIT)
619 			nmp->nm_retry = NFS_MAXREXMIT;
620 	}
621 
622 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623 		nmp->nm_wsize = argp->wsize;
624 		/*
625 		 * Clip at the power of 2 below the size. There is an
626 		 * issue (not isolated) that causes intermittent page
627 		 * faults if this is not done.
628 		 */
629 		if (nmp->nm_wsize > NFS_FABLKSIZE)
630 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
631 		else
632 			nmp->nm_wsize = NFS_FABLKSIZE;
633 	}
634 
635 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
636 		nmp->nm_rsize = argp->rsize;
637 		/*
638 		 * Clip at the power of 2 below the size. There is an
639 		 * issue (not isolated) that causes intermittent page
640 		 * faults if this is not done.
641 		 */
642 		if (nmp->nm_rsize > NFS_FABLKSIZE)
643 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
644 		else
645 			nmp->nm_rsize = NFS_FABLKSIZE;
646 	}
647 
648 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
649 		nmp->nm_readdirsize = argp->readdirsize;
650 	}
651 
652 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
653 		nmp->nm_acregmin = argp->acregmin;
654 	else
655 		nmp->nm_acregmin = NFS_MINATTRTIMO;
656 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
657 		nmp->nm_acregmax = argp->acregmax;
658 	else
659 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
660 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
661 		nmp->nm_acdirmin = argp->acdirmin;
662 	else
663 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
664 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
665 		nmp->nm_acdirmax = argp->acdirmax;
666 	else
667 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
668 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
669 		nmp->nm_acdirmin = nmp->nm_acdirmax;
670 	if (nmp->nm_acregmin > nmp->nm_acregmax)
671 		nmp->nm_acregmin = nmp->nm_acregmax;
672 
673 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
674 		if (argp->readahead <= NFS_MAXRAHEAD)
675 			nmp->nm_readahead = argp->readahead;
676 		else
677 			nmp->nm_readahead = NFS_MAXRAHEAD;
678 	}
679 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
680 		if (argp->wcommitsize < nmp->nm_wsize)
681 			nmp->nm_wcommitsize = nmp->nm_wsize;
682 		else
683 			nmp->nm_wcommitsize = argp->wcommitsize;
684 	}
685 
686 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
687 		    (nmp->nm_soproto != argp->proto));
688 
689 	if (nmp->nm_client != NULL && adjsock) {
690 		int haslock = 0, error = 0;
691 
692 		if (nmp->nm_sotype == SOCK_STREAM) {
693 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
694 			if (!error)
695 				haslock = 1;
696 		}
697 		if (!error) {
698 		    newnfs_disconnect(&nmp->nm_sockreq);
699 		    if (haslock)
700 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
701 		    nmp->nm_sotype = argp->sotype;
702 		    nmp->nm_soproto = argp->proto;
703 		    if (nmp->nm_sotype == SOCK_DGRAM)
704 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
705 			    cred, td, 0)) {
706 				printf("newnfs_args: retrying connect\n");
707 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
708 			}
709 		}
710 	} else {
711 		nmp->nm_sotype = argp->sotype;
712 		nmp->nm_soproto = argp->proto;
713 	}
714 
715 	if (hostname != NULL) {
716 		strlcpy(nmp->nm_hostname, hostname,
717 		    sizeof(nmp->nm_hostname));
718 		p = strchr(nmp->nm_hostname, ':');
719 		if (p != NULL)
720 			*p = '\0';
721 	}
722 }
723 
724 static const char *nfs_opts[] = { "from", "nfs_args",
725     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
726     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
727     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
728     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
729     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
730     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
731     "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion",
732     "nametimeo", "negnametimeo", "nocto", "noncontigwr", "pnfs",
733     "wcommitsize",
734     NULL };
735 
736 /*
737  * VFS Operations.
738  *
739  * mount system call
740  * It seems a bit dumb to copyinstr() the host and path here and then
741  * bcopy() them in mountnfs(), but I wanted to detect errors before
742  * doing the sockargs() call because sockargs() allocates an mbuf and
743  * an error after that means that I have to release the mbuf.
744  */
745 /* ARGSUSED */
746 static int
747 nfs_mount(struct mount *mp)
748 {
749 	struct nfs_args args = {
750 	    .version = NFS_ARGSVERSION,
751 	    .addr = NULL,
752 	    .addrlen = sizeof (struct sockaddr_in),
753 	    .sotype = SOCK_STREAM,
754 	    .proto = 0,
755 	    .fh = NULL,
756 	    .fhsize = 0,
757 	    .flags = NFSMNT_RESVPORT,
758 	    .wsize = NFS_WSIZE,
759 	    .rsize = NFS_RSIZE,
760 	    .readdirsize = NFS_READDIRSIZE,
761 	    .timeo = 10,
762 	    .retrans = NFS_RETRANS,
763 	    .readahead = NFS_DEFRAHEAD,
764 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
765 	    .hostname = NULL,
766 	    .acregmin = NFS_MINATTRTIMO,
767 	    .acregmax = NFS_MAXATTRTIMO,
768 	    .acdirmin = NFS_MINDIRATTRTIMO,
769 	    .acdirmax = NFS_MAXDIRATTRTIMO,
770 	};
771 	int error = 0, ret, len;
772 	struct sockaddr *nam = NULL;
773 	struct vnode *vp;
774 	struct thread *td;
775 	char hst[MNAMELEN];
776 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
777 	char *opt, *name, *secname;
778 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
779 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
780 	int minvers = 0;
781 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
782 	size_t hstlen;
783 
784 	has_nfs_args_opt = 0;
785 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
786 		error = EINVAL;
787 		goto out;
788 	}
789 
790 	td = curthread;
791 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
792 		error = nfs_mountroot(mp);
793 		goto out;
794 	}
795 
796 	nfscl_init();
797 
798 	/*
799 	 * The old mount_nfs program passed the struct nfs_args
800 	 * from userspace to kernel.  The new mount_nfs program
801 	 * passes string options via nmount() from userspace to kernel
802 	 * and we populate the struct nfs_args in the kernel.
803 	 */
804 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
805 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
806 		    sizeof(args));
807 		if (error != 0)
808 			goto out;
809 
810 		if (args.version != NFS_ARGSVERSION) {
811 			error = EPROGMISMATCH;
812 			goto out;
813 		}
814 		has_nfs_args_opt = 1;
815 	}
816 
817 	/* Handle the new style options. */
818 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
819 		args.flags |= NFSMNT_NOCONN;
820 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
821 		args.flags |= NFSMNT_NOCONN;
822 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
823 		args.flags |= NFSMNT_NOLOCKD;
824 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
825 		args.flags &= ~NFSMNT_NOLOCKD;
826 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
827 		args.flags |= NFSMNT_INT;
828 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
829 		args.flags |= NFSMNT_RDIRPLUS;
830 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
831 		args.flags |= NFSMNT_RESVPORT;
832 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
833 		args.flags &= ~NFSMNT_RESVPORT;
834 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
835 		args.flags |= NFSMNT_SOFT;
836 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
837 		args.flags &= ~NFSMNT_SOFT;
838 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
839 		args.sotype = SOCK_DGRAM;
840 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
841 		args.sotype = SOCK_DGRAM;
842 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
843 		args.sotype = SOCK_STREAM;
844 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
845 		args.flags |= NFSMNT_NFSV3;
846 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
847 		args.flags |= NFSMNT_NFSV4;
848 		args.sotype = SOCK_STREAM;
849 	}
850 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
851 		args.flags |= NFSMNT_ALLGSSNAME;
852 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
853 		args.flags |= NFSMNT_NOCTO;
854 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
855 		args.flags |= NFSMNT_NONCONTIGWR;
856 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
857 		args.flags |= NFSMNT_PNFS;
858 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
859 		if (opt == NULL) {
860 			vfs_mount_error(mp, "illegal readdirsize");
861 			error = EINVAL;
862 			goto out;
863 		}
864 		ret = sscanf(opt, "%d", &args.readdirsize);
865 		if (ret != 1 || args.readdirsize <= 0) {
866 			vfs_mount_error(mp, "illegal readdirsize: %s",
867 			    opt);
868 			error = EINVAL;
869 			goto out;
870 		}
871 		args.flags |= NFSMNT_READDIRSIZE;
872 	}
873 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
874 		if (opt == NULL) {
875 			vfs_mount_error(mp, "illegal readahead");
876 			error = EINVAL;
877 			goto out;
878 		}
879 		ret = sscanf(opt, "%d", &args.readahead);
880 		if (ret != 1 || args.readahead <= 0) {
881 			vfs_mount_error(mp, "illegal readahead: %s",
882 			    opt);
883 			error = EINVAL;
884 			goto out;
885 		}
886 		args.flags |= NFSMNT_READAHEAD;
887 	}
888 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
889 		if (opt == NULL) {
890 			vfs_mount_error(mp, "illegal wsize");
891 			error = EINVAL;
892 			goto out;
893 		}
894 		ret = sscanf(opt, "%d", &args.wsize);
895 		if (ret != 1 || args.wsize <= 0) {
896 			vfs_mount_error(mp, "illegal wsize: %s",
897 			    opt);
898 			error = EINVAL;
899 			goto out;
900 		}
901 		args.flags |= NFSMNT_WSIZE;
902 	}
903 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
904 		if (opt == NULL) {
905 			vfs_mount_error(mp, "illegal rsize");
906 			error = EINVAL;
907 			goto out;
908 		}
909 		ret = sscanf(opt, "%d", &args.rsize);
910 		if (ret != 1 || args.rsize <= 0) {
911 			vfs_mount_error(mp, "illegal wsize: %s",
912 			    opt);
913 			error = EINVAL;
914 			goto out;
915 		}
916 		args.flags |= NFSMNT_RSIZE;
917 	}
918 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
919 		if (opt == NULL) {
920 			vfs_mount_error(mp, "illegal retrans");
921 			error = EINVAL;
922 			goto out;
923 		}
924 		ret = sscanf(opt, "%d", &args.retrans);
925 		if (ret != 1 || args.retrans <= 0) {
926 			vfs_mount_error(mp, "illegal retrans: %s",
927 			    opt);
928 			error = EINVAL;
929 			goto out;
930 		}
931 		args.flags |= NFSMNT_RETRANS;
932 	}
933 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
934 		ret = sscanf(opt, "%d", &args.acregmin);
935 		if (ret != 1 || args.acregmin < 0) {
936 			vfs_mount_error(mp, "illegal acregmin: %s",
937 			    opt);
938 			error = EINVAL;
939 			goto out;
940 		}
941 		args.flags |= NFSMNT_ACREGMIN;
942 	}
943 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
944 		ret = sscanf(opt, "%d", &args.acregmax);
945 		if (ret != 1 || args.acregmax < 0) {
946 			vfs_mount_error(mp, "illegal acregmax: %s",
947 			    opt);
948 			error = EINVAL;
949 			goto out;
950 		}
951 		args.flags |= NFSMNT_ACREGMAX;
952 	}
953 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
954 		ret = sscanf(opt, "%d", &args.acdirmin);
955 		if (ret != 1 || args.acdirmin < 0) {
956 			vfs_mount_error(mp, "illegal acdirmin: %s",
957 			    opt);
958 			error = EINVAL;
959 			goto out;
960 		}
961 		args.flags |= NFSMNT_ACDIRMIN;
962 	}
963 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
964 		ret = sscanf(opt, "%d", &args.acdirmax);
965 		if (ret != 1 || args.acdirmax < 0) {
966 			vfs_mount_error(mp, "illegal acdirmax: %s",
967 			    opt);
968 			error = EINVAL;
969 			goto out;
970 		}
971 		args.flags |= NFSMNT_ACDIRMAX;
972 	}
973 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
974 		ret = sscanf(opt, "%d", &args.wcommitsize);
975 		if (ret != 1 || args.wcommitsize < 0) {
976 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
977 			error = EINVAL;
978 			goto out;
979 		}
980 		args.flags |= NFSMNT_WCOMMITSIZE;
981 	}
982 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
983 		ret = sscanf(opt, "%d", &args.timeo);
984 		if (ret != 1 || args.timeo <= 0) {
985 			vfs_mount_error(mp, "illegal timeout: %s",
986 			    opt);
987 			error = EINVAL;
988 			goto out;
989 		}
990 		args.flags |= NFSMNT_TIMEO;
991 	}
992 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
993 		ret = sscanf(opt, "%d", &nametimeo);
994 		if (ret != 1 || nametimeo < 0) {
995 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
996 			error = EINVAL;
997 			goto out;
998 		}
999 	}
1000 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1001 	    == 0) {
1002 		ret = sscanf(opt, "%d", &negnametimeo);
1003 		if (ret != 1 || negnametimeo < 0) {
1004 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1005 			    opt);
1006 			error = EINVAL;
1007 			goto out;
1008 		}
1009 	}
1010 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1011 	    0) {
1012 		ret = sscanf(opt, "%d", &minvers);
1013 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1014 		    (args.flags & NFSMNT_NFSV4) == 0) {
1015 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1016 			error = EINVAL;
1017 			goto out;
1018 		}
1019 	}
1020 	if (vfs_getopt(mp->mnt_optnew, "sec",
1021 		(void **) &secname, NULL) == 0)
1022 		nfs_sec_name(secname, &args.flags);
1023 
1024 	if (mp->mnt_flag & MNT_UPDATE) {
1025 		struct nfsmount *nmp = VFSTONFS(mp);
1026 
1027 		if (nmp == NULL) {
1028 			error = EIO;
1029 			goto out;
1030 		}
1031 
1032 		/*
1033 		 * If a change from TCP->UDP is done and there are thread(s)
1034 		 * that have I/O RPC(s) in progress with a tranfer size
1035 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1036 		 * hung, retrying the RPC(s) forever. Usually these threads
1037 		 * will be seen doing an uninterruptible sleep on wait channel
1038 		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1039 		 */
1040 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1041 			tprintf(td->td_proc, LOG_WARNING,
1042 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1043 
1044 		/*
1045 		 * When doing an update, we can't change version,
1046 		 * security, switch lockd strategies or change cookie
1047 		 * translation
1048 		 */
1049 		args.flags = (args.flags &
1050 		    ~(NFSMNT_NFSV3 |
1051 		      NFSMNT_NFSV4 |
1052 		      NFSMNT_KERB |
1053 		      NFSMNT_INTEGRITY |
1054 		      NFSMNT_PRIVACY |
1055 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1056 		    (nmp->nm_flag &
1057 			(NFSMNT_NFSV3 |
1058 			 NFSMNT_NFSV4 |
1059 			 NFSMNT_KERB |
1060 			 NFSMNT_INTEGRITY |
1061 			 NFSMNT_PRIVACY |
1062 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1063 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1064 		goto out;
1065 	}
1066 
1067 	/*
1068 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1069 	 * or no-connection mode for those protocols that support
1070 	 * no-connection mode (the flag will be cleared later for protocols
1071 	 * that do not support no-connection mode).  This will allow a client
1072 	 * to receive replies from a different IP then the request was
1073 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1074 	 * not 0.
1075 	 */
1076 	if (nfs_ip_paranoia == 0)
1077 		args.flags |= NFSMNT_NOCONN;
1078 
1079 	if (has_nfs_args_opt != 0) {
1080 		/*
1081 		 * In the 'nfs_args' case, the pointers in the args
1082 		 * structure are in userland - we copy them in here.
1083 		 */
1084 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1085 			vfs_mount_error(mp, "Bad file handle");
1086 			error = EINVAL;
1087 			goto out;
1088 		}
1089 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1090 		    args.fhsize);
1091 		if (error != 0)
1092 			goto out;
1093 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1094 		if (error != 0)
1095 			goto out;
1096 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1097 		args.hostname = hst;
1098 		/* sockargs() call must be after above copyin() calls */
1099 		error = getsockaddr(&nam, (caddr_t)args.addr,
1100 		    args.addrlen);
1101 		if (error != 0)
1102 			goto out;
1103 	} else {
1104 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1105 		    &args.fhsize) == 0) {
1106 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1107 				vfs_mount_error(mp, "Bad file handle");
1108 				error = EINVAL;
1109 				goto out;
1110 			}
1111 			bcopy(args.fh, nfh, args.fhsize);
1112 		} else {
1113 			args.fhsize = 0;
1114 		}
1115 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1116 		    (void **)&args.hostname, &len);
1117 		if (args.hostname == NULL) {
1118 			vfs_mount_error(mp, "Invalid hostname");
1119 			error = EINVAL;
1120 			goto out;
1121 		}
1122 		bcopy(args.hostname, hst, MNAMELEN);
1123 		hst[MNAMELEN - 1] = '\0';
1124 	}
1125 
1126 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1127 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1128 	else
1129 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1130 	srvkrbnamelen = strlen(srvkrbname);
1131 
1132 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1133 		strlcpy(krbname, name, sizeof (krbname));
1134 	else
1135 		krbname[0] = '\0';
1136 	krbnamelen = strlen(krbname);
1137 
1138 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1139 		strlcpy(dirpath, name, sizeof (dirpath));
1140 	else
1141 		dirpath[0] = '\0';
1142 	dirlen = strlen(dirpath);
1143 
1144 	if (has_nfs_args_opt == 0) {
1145 		if (vfs_getopt(mp->mnt_optnew, "addr",
1146 		    (void **)&args.addr, &args.addrlen) == 0) {
1147 			if (args.addrlen > SOCK_MAXADDRLEN) {
1148 				error = ENAMETOOLONG;
1149 				goto out;
1150 			}
1151 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1152 			bcopy(args.addr, nam, args.addrlen);
1153 			nam->sa_len = args.addrlen;
1154 		} else {
1155 			vfs_mount_error(mp, "No server address");
1156 			error = EINVAL;
1157 			goto out;
1158 		}
1159 	}
1160 
1161 	args.fh = nfh;
1162 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1163 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1164 	    nametimeo, negnametimeo, minvers);
1165 out:
1166 	if (!error) {
1167 		MNT_ILOCK(mp);
1168 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF;
1169 		MNT_IUNLOCK(mp);
1170 	}
1171 	return (error);
1172 }
1173 
1174 
1175 /*
1176  * VFS Operations.
1177  *
1178  * mount system call
1179  * It seems a bit dumb to copyinstr() the host and path here and then
1180  * bcopy() them in mountnfs(), but I wanted to detect errors before
1181  * doing the sockargs() call because sockargs() allocates an mbuf and
1182  * an error after that means that I have to release the mbuf.
1183  */
1184 /* ARGSUSED */
1185 static int
1186 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1187 {
1188 	int error;
1189 	struct nfs_args args;
1190 
1191 	error = copyin(data, &args, sizeof (struct nfs_args));
1192 	if (error)
1193 		return error;
1194 
1195 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1196 
1197 	error = kernel_mount(ma, flags);
1198 	return (error);
1199 }
1200 
1201 /*
1202  * Common code for mount and mountroot
1203  */
1204 static int
1205 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1206     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1207     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1208     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1209     int minvers)
1210 {
1211 	struct nfsmount *nmp;
1212 	struct nfsnode *np;
1213 	int error, trycnt, ret;
1214 	struct nfsvattr nfsva;
1215 	struct nfsclclient *clp;
1216 	struct nfsclds *dsp, *tdsp;
1217 	uint32_t lease;
1218 	static u_int64_t clval = 0;
1219 
1220 	NFSCL_DEBUG(3, "in mnt\n");
1221 	clp = NULL;
1222 	if (mp->mnt_flag & MNT_UPDATE) {
1223 		nmp = VFSTONFS(mp);
1224 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1225 		FREE(nam, M_SONAME);
1226 		return (0);
1227 	} else {
1228 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1229 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1230 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1231 		TAILQ_INIT(&nmp->nm_bufq);
1232 		if (clval == 0)
1233 			clval = (u_int64_t)nfsboottime.tv_sec;
1234 		nmp->nm_clval = clval++;
1235 		nmp->nm_krbnamelen = krbnamelen;
1236 		nmp->nm_dirpathlen = dirlen;
1237 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1238 		if (td->td_ucred->cr_uid != (uid_t)0) {
1239 			/*
1240 			 * nm_uid is used to get KerberosV credentials for
1241 			 * the nfsv4 state handling operations if there is
1242 			 * no host based principal set. Use the uid of
1243 			 * this user if not root, since they are doing the
1244 			 * mount. I don't think setting this for root will
1245 			 * work, since root normally does not have user
1246 			 * credentials in a credentials cache.
1247 			 */
1248 			nmp->nm_uid = td->td_ucred->cr_uid;
1249 		} else {
1250 			/*
1251 			 * Just set to -1, so it won't be used.
1252 			 */
1253 			nmp->nm_uid = (uid_t)-1;
1254 		}
1255 
1256 		/* Copy and null terminate all the names */
1257 		if (nmp->nm_krbnamelen > 0) {
1258 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1259 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1260 		}
1261 		if (nmp->nm_dirpathlen > 0) {
1262 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1263 			    nmp->nm_dirpathlen);
1264 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1265 			    + 1] = '\0';
1266 		}
1267 		if (nmp->nm_srvkrbnamelen > 0) {
1268 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1269 			    nmp->nm_srvkrbnamelen);
1270 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1271 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1272 		}
1273 		nmp->nm_sockreq.nr_cred = crhold(cred);
1274 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1275 		mp->mnt_data = nmp;
1276 		nmp->nm_getinfo = nfs_getnlminfo;
1277 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1278 	}
1279 	vfs_getnewfsid(mp);
1280 	nmp->nm_mountp = mp;
1281 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1282 
1283 	/*
1284 	 * Since nfs_decode_args() might optionally set them, these
1285 	 * need to be set to defaults before the call, so that the
1286 	 * optional settings aren't overwritten.
1287 	 */
1288 	nmp->nm_nametimeo = nametimeo;
1289 	nmp->nm_negnametimeo = negnametimeo;
1290 	nmp->nm_timeo = NFS_TIMEO;
1291 	nmp->nm_retry = NFS_RETRANS;
1292 	nmp->nm_readahead = NFS_DEFRAHEAD;
1293 	if (desiredvnodes >= 11000)
1294 		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1295 	else
1296 		nmp->nm_wcommitsize = hibufspace / 10;
1297 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1298 		nmp->nm_minorvers = minvers;
1299 	else
1300 		nmp->nm_minorvers = 0;
1301 
1302 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1303 
1304 	/*
1305 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1306 	 * high, depending on whether we end up with negative offsets in
1307 	 * the client or server somewhere.  2GB-1 may be safer.
1308 	 *
1309 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1310 	 * that we can handle until we find out otherwise.
1311 	 */
1312 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1313 		nmp->nm_maxfilesize = 0xffffffffLL;
1314 	else
1315 		nmp->nm_maxfilesize = OFF_MAX;
1316 
1317 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1318 		nmp->nm_wsize = NFS_WSIZE;
1319 		nmp->nm_rsize = NFS_RSIZE;
1320 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1321 	}
1322 	nmp->nm_numgrps = NFS_MAXGRPS;
1323 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1324 	if (nmp->nm_tprintf_delay < 0)
1325 		nmp->nm_tprintf_delay = 0;
1326 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1327 	if (nmp->nm_tprintf_initial_delay < 0)
1328 		nmp->nm_tprintf_initial_delay = 0;
1329 	nmp->nm_fhsize = argp->fhsize;
1330 	if (nmp->nm_fhsize > 0)
1331 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1332 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1333 	nmp->nm_nam = nam;
1334 	/* Set up the sockets and per-host congestion */
1335 	nmp->nm_sotype = argp->sotype;
1336 	nmp->nm_soproto = argp->proto;
1337 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1338 	if ((argp->flags & NFSMNT_NFSV4))
1339 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1340 	else if ((argp->flags & NFSMNT_NFSV3))
1341 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1342 	else
1343 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1344 
1345 
1346 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1347 		goto bad;
1348 	/* For NFSv4.1, get the clientid now. */
1349 	if (nmp->nm_minorvers > 0) {
1350 		NFSCL_DEBUG(3, "at getcl\n");
1351 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1352 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1353 		if (error != 0)
1354 			goto bad;
1355 	}
1356 
1357 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1358 	    nmp->nm_dirpathlen > 0) {
1359 		NFSCL_DEBUG(3, "in dirp\n");
1360 		/*
1361 		 * If the fhsize on the mount point == 0 for V4, the mount
1362 		 * path needs to be looked up.
1363 		 */
1364 		trycnt = 3;
1365 		do {
1366 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1367 			    cred, td);
1368 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1369 			if (error)
1370 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1371 		} while (error && --trycnt > 0);
1372 		if (error) {
1373 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1374 			goto bad;
1375 		}
1376 	}
1377 
1378 	/*
1379 	 * A reference count is needed on the nfsnode representing the
1380 	 * remote root.  If this object is not persistent, then backward
1381 	 * traversals of the mount point (i.e. "..") will not work if
1382 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1383 	 * this problem, because one can identify root inodes by their
1384 	 * number == ROOTINO (2).
1385 	 */
1386 	if (nmp->nm_fhsize > 0) {
1387 		/*
1388 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1389 		 * non-zero for the root vnode. f_iosize will be set correctly
1390 		 * by nfs_statfs() before any I/O occurs.
1391 		 */
1392 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1393 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1394 		    LK_EXCLUSIVE);
1395 		if (error)
1396 			goto bad;
1397 		*vpp = NFSTOV(np);
1398 
1399 		/*
1400 		 * Get file attributes and transfer parameters for the
1401 		 * mountpoint.  This has the side effect of filling in
1402 		 * (*vpp)->v_type with the correct value.
1403 		 */
1404 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1405 		    cred, td, &nfsva, NULL, &lease);
1406 		if (ret) {
1407 			/*
1408 			 * Just set default values to get things going.
1409 			 */
1410 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1411 			nfsva.na_vattr.va_type = VDIR;
1412 			nfsva.na_vattr.va_mode = 0777;
1413 			nfsva.na_vattr.va_nlink = 100;
1414 			nfsva.na_vattr.va_uid = (uid_t)0;
1415 			nfsva.na_vattr.va_gid = (gid_t)0;
1416 			nfsva.na_vattr.va_fileid = 2;
1417 			nfsva.na_vattr.va_gen = 1;
1418 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1419 			nfsva.na_vattr.va_size = 512 * 1024;
1420 			lease = 60;
1421 		}
1422 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1423 		if (nmp->nm_minorvers > 0) {
1424 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1425 			NFSLOCKCLSTATE();
1426 			clp->nfsc_renew = NFSCL_RENEW(lease);
1427 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1428 			clp->nfsc_clientidrev++;
1429 			if (clp->nfsc_clientidrev == 0)
1430 				clp->nfsc_clientidrev++;
1431 			NFSUNLOCKCLSTATE();
1432 			/*
1433 			 * Mount will succeed, so the renew thread can be
1434 			 * started now.
1435 			 */
1436 			nfscl_start_renewthread(clp);
1437 			nfscl_clientrelease(clp);
1438 		}
1439 		if (argp->flags & NFSMNT_NFSV3)
1440 			ncl_fsinfo(nmp, *vpp, cred, td);
1441 
1442 		/* Mark if the mount point supports NFSv4 ACLs. */
1443 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1444 		    ret == 0 &&
1445 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1446 			MNT_ILOCK(mp);
1447 			mp->mnt_flag |= MNT_NFS4ACLS;
1448 			MNT_IUNLOCK(mp);
1449 		}
1450 
1451 		/*
1452 		 * Lose the lock but keep the ref.
1453 		 */
1454 		NFSVOPUNLOCK(*vpp, 0);
1455 		return (0);
1456 	}
1457 	error = EIO;
1458 
1459 bad:
1460 	if (clp != NULL)
1461 		nfscl_clientrelease(clp);
1462 	newnfs_disconnect(&nmp->nm_sockreq);
1463 	crfree(nmp->nm_sockreq.nr_cred);
1464 	if (nmp->nm_sockreq.nr_auth != NULL)
1465 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1466 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1467 	mtx_destroy(&nmp->nm_mtx);
1468 	if (nmp->nm_clp != NULL) {
1469 		NFSLOCKCLSTATE();
1470 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1471 		NFSUNLOCKCLSTATE();
1472 		free(nmp->nm_clp, M_NFSCLCLIENT);
1473 	}
1474 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1475 		nfscl_freenfsclds(dsp);
1476 	FREE(nmp, M_NEWNFSMNT);
1477 	FREE(nam, M_SONAME);
1478 	return (error);
1479 }
1480 
1481 /*
1482  * unmount system call
1483  */
1484 static int
1485 nfs_unmount(struct mount *mp, int mntflags)
1486 {
1487 	struct thread *td;
1488 	struct nfsmount *nmp;
1489 	int error, flags = 0, i, trycnt = 0;
1490 	struct nfsclds *dsp, *tdsp;
1491 
1492 	td = curthread;
1493 
1494 	if (mntflags & MNT_FORCE)
1495 		flags |= FORCECLOSE;
1496 	nmp = VFSTONFS(mp);
1497 	/*
1498 	 * Goes something like this..
1499 	 * - Call vflush() to clear out vnodes for this filesystem
1500 	 * - Close the socket
1501 	 * - Free up the data structures
1502 	 */
1503 	/* In the forced case, cancel any outstanding requests. */
1504 	if (mntflags & MNT_FORCE) {
1505 		error = newnfs_nmcancelreqs(nmp);
1506 		if (error)
1507 			goto out;
1508 		/* For a forced close, get rid of the renew thread now */
1509 		nfscl_umount(nmp, td);
1510 	}
1511 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1512 	do {
1513 		error = vflush(mp, 1, flags, td);
1514 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1515 			(void) nfs_catnap(PSOCK, error, "newndm");
1516 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1517 	if (error)
1518 		goto out;
1519 
1520 	/*
1521 	 * We are now committed to the unmount.
1522 	 */
1523 	if ((mntflags & MNT_FORCE) == 0)
1524 		nfscl_umount(nmp, td);
1525 	/* Make sure no nfsiods are assigned to this mount. */
1526 	mtx_lock(&ncl_iod_mutex);
1527 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1528 		if (ncl_iodmount[i] == nmp) {
1529 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1530 			ncl_iodmount[i] = NULL;
1531 		}
1532 	mtx_unlock(&ncl_iod_mutex);
1533 	newnfs_disconnect(&nmp->nm_sockreq);
1534 	crfree(nmp->nm_sockreq.nr_cred);
1535 	FREE(nmp->nm_nam, M_SONAME);
1536 	if (nmp->nm_sockreq.nr_auth != NULL)
1537 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1538 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1539 	mtx_destroy(&nmp->nm_mtx);
1540 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1541 		nfscl_freenfsclds(dsp);
1542 	FREE(nmp, M_NEWNFSMNT);
1543 out:
1544 	return (error);
1545 }
1546 
1547 /*
1548  * Return root of a filesystem
1549  */
1550 static int
1551 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1552 {
1553 	struct vnode *vp;
1554 	struct nfsmount *nmp;
1555 	struct nfsnode *np;
1556 	int error;
1557 
1558 	nmp = VFSTONFS(mp);
1559 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1560 	if (error)
1561 		return error;
1562 	vp = NFSTOV(np);
1563 	/*
1564 	 * Get transfer parameters and attributes for root vnode once.
1565 	 */
1566 	mtx_lock(&nmp->nm_mtx);
1567 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1568 		mtx_unlock(&nmp->nm_mtx);
1569 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1570 	} else
1571 		mtx_unlock(&nmp->nm_mtx);
1572 	if (vp->v_type == VNON)
1573 	    vp->v_type = VDIR;
1574 	vp->v_vflag |= VV_ROOT;
1575 	*vpp = vp;
1576 	return (0);
1577 }
1578 
1579 /*
1580  * Flush out the buffer cache
1581  */
1582 /* ARGSUSED */
1583 static int
1584 nfs_sync(struct mount *mp, int waitfor)
1585 {
1586 	struct vnode *vp, *mvp;
1587 	struct thread *td;
1588 	int error, allerror = 0;
1589 
1590 	td = curthread;
1591 
1592 	MNT_ILOCK(mp);
1593 	/*
1594 	 * If a forced dismount is in progress, return from here so that
1595 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1596 	 * calling VFS_UNMOUNT().
1597 	 */
1598 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1599 		MNT_IUNLOCK(mp);
1600 		return (EBADF);
1601 	}
1602 	MNT_IUNLOCK(mp);
1603 
1604 	/*
1605 	 * Force stale buffer cache information to be flushed.
1606 	 */
1607 loop:
1608 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1609 		/* XXX Racy bv_cnt check. */
1610 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1611 		    waitfor == MNT_LAZY) {
1612 			VI_UNLOCK(vp);
1613 			continue;
1614 		}
1615 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1616 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1617 			goto loop;
1618 		}
1619 		error = VOP_FSYNC(vp, waitfor, td);
1620 		if (error)
1621 			allerror = error;
1622 		NFSVOPUNLOCK(vp, 0);
1623 		vrele(vp);
1624 	}
1625 	return (allerror);
1626 }
1627 
1628 static int
1629 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1630 {
1631 	struct nfsmount *nmp = VFSTONFS(mp);
1632 	struct vfsquery vq;
1633 	int error;
1634 
1635 	bzero(&vq, sizeof(vq));
1636 	switch (op) {
1637 #if 0
1638 	case VFS_CTL_NOLOCKS:
1639 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1640  		if (req->oldptr != NULL) {
1641  			error = SYSCTL_OUT(req, &val, sizeof(val));
1642  			if (error)
1643  				return (error);
1644  		}
1645  		if (req->newptr != NULL) {
1646  			error = SYSCTL_IN(req, &val, sizeof(val));
1647  			if (error)
1648  				return (error);
1649 			if (val)
1650 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1651 			else
1652 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1653  		}
1654 		break;
1655 #endif
1656 	case VFS_CTL_QUERY:
1657 		mtx_lock(&nmp->nm_mtx);
1658 		if (nmp->nm_state & NFSSTA_TIMEO)
1659 			vq.vq_flags |= VQ_NOTRESP;
1660 		mtx_unlock(&nmp->nm_mtx);
1661 #if 0
1662 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1663 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1664 			vq.vq_flags |= VQ_NOTRESPLOCK;
1665 #endif
1666 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1667 		break;
1668  	case VFS_CTL_TIMEO:
1669  		if (req->oldptr != NULL) {
1670  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1671  			    sizeof(nmp->nm_tprintf_initial_delay));
1672  			if (error)
1673  				return (error);
1674  		}
1675  		if (req->newptr != NULL) {
1676 			error = vfs_suser(mp, req->td);
1677 			if (error)
1678 				return (error);
1679  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1680  			    sizeof(nmp->nm_tprintf_initial_delay));
1681  			if (error)
1682  				return (error);
1683  			if (nmp->nm_tprintf_initial_delay < 0)
1684  				nmp->nm_tprintf_initial_delay = 0;
1685  		}
1686 		break;
1687 	default:
1688 		return (ENOTSUP);
1689 	}
1690 	return (0);
1691 }
1692 
1693 /*
1694  * Purge any RPCs in progress, so that they will all return errors.
1695  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1696  * forced dismount.
1697  */
1698 static void
1699 nfs_purge(struct mount *mp)
1700 {
1701 	struct nfsmount *nmp = VFSTONFS(mp);
1702 
1703 	newnfs_nmcancelreqs(nmp);
1704 }
1705 
1706 /*
1707  * Extract the information needed by the nlm from the nfs vnode.
1708  */
1709 static void
1710 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1711     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1712     struct timeval *timeop)
1713 {
1714 	struct nfsmount *nmp;
1715 	struct nfsnode *np = VTONFS(vp);
1716 
1717 	nmp = VFSTONFS(vp->v_mount);
1718 	if (fhlenp != NULL)
1719 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1720 	if (fhp != NULL)
1721 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1722 	if (sp != NULL)
1723 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1724 	if (is_v3p != NULL)
1725 		*is_v3p = NFS_ISV3(vp);
1726 	if (sizep != NULL)
1727 		*sizep = np->n_size;
1728 	if (timeop != NULL) {
1729 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1730 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1731 	}
1732 }
1733 
1734 /*
1735  * This function prints out an option name, based on the conditional
1736  * argument.
1737  */
1738 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1739     char *opt, char **buf, size_t *blen)
1740 {
1741 	int len;
1742 
1743 	if (testval != 0 && *blen > strlen(opt)) {
1744 		len = snprintf(*buf, *blen, "%s", opt);
1745 		if (len != strlen(opt))
1746 			printf("EEK!!\n");
1747 		*buf += len;
1748 		*blen -= len;
1749 	}
1750 }
1751 
1752 /*
1753  * This function printf out an options integer value.
1754  */
1755 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1756     char *opt, char **buf, size_t *blen)
1757 {
1758 	int len;
1759 
1760 	if (*blen > strlen(opt) + 1) {
1761 		/* Could result in truncated output string. */
1762 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1763 		if (len < *blen) {
1764 			*buf += len;
1765 			*blen -= len;
1766 		}
1767 	}
1768 }
1769 
1770 /*
1771  * Load the option flags and values into the buffer.
1772  */
1773 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1774 {
1775 	char *buf;
1776 	size_t blen;
1777 
1778 	buf = buffer;
1779 	blen = buflen;
1780 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1781 	    &blen);
1782 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1783 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1784 		    &blen);
1785 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1786 		    &buf, &blen);
1787 	}
1788 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1789 	    &blen);
1790 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1791 	    "nfsv2", &buf, &blen);
1792 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1793 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1794 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1795 	    &buf, &blen);
1796 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1797 	    &buf, &blen);
1798 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1799 	    &blen);
1800 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1801 	    &blen);
1802 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1803 	    &blen);
1804 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1805 	    &blen);
1806 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1807 	    &blen);
1808 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1809 	    ",noncontigwr", &buf, &blen);
1810 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1811 	    0, ",lockd", &buf, &blen);
1812 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1813 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1814 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1815 	    &buf, &blen);
1816 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1817 	    &buf, &blen);
1818 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1819 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1820 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1821 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1822 	    &buf, &blen);
1823 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1824 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1825 	    &buf, &blen);
1826 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1827 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1828 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1829 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1830 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1831 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1832 	    &blen);
1833 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1834 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1835 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1836 	    &blen);
1837 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1838 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1839 	    &blen);
1840 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1841 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1842 }
1843 
1844