xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 31d62a73c2e6ac0ff413a7a17700ffc7dce254ef)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 
41 #include "opt_bootp.h"
42 #include "opt_nfsroot.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/clock.h>
50 #include <sys/jail.h>
51 #include <sys/limits.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/mount.h>
57 #include <sys/proc.h>
58 #include <sys/socket.h>
59 #include <sys/socketvar.h>
60 #include <sys/sockio.h>
61 #include <sys/sysctl.h>
62 #include <sys/vnode.h>
63 #include <sys/signalvar.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_extern.h>
67 #include <vm/uma.h>
68 
69 #include <net/if.h>
70 #include <net/route.h>
71 #include <netinet/in.h>
72 
73 #include <fs/nfs/nfsport.h>
74 #include <fs/nfsclient/nfsnode.h>
75 #include <fs/nfsclient/nfsmount.h>
76 #include <fs/nfsclient/nfs.h>
77 #include <nfs/nfsdiskless.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 extern struct mtx nfsrv_dslock_mtx;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		nfs_root,
140 	.vfs_statfs =		nfs_statfs,
141 	.vfs_sync =		nfs_sync,
142 	.vfs_uninit =		ncl_uninit,
143 	.vfs_unmount =		nfs_unmount,
144 	.vfs_sysctl =		nfs_sysctl,
145 	.vfs_purge =		nfs_purge,
146 };
147 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
148 
149 /* So that loader and kldload(2) can find us, wherever we are.. */
150 MODULE_VERSION(nfs, 1);
151 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
152 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
153 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
154 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
155 
156 /*
157  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
158  * can be shared by both NFS clients. It is declared here so that it
159  * will be defined for kernels built without NFS_ROOT, although it
160  * isn't used in that case.
161  */
162 #if !defined(NFS_ROOT)
163 struct nfs_diskless	nfs_diskless = { { { 0 } } };
164 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
165 int			nfs_diskless_valid = 0;
166 #endif
167 
168 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
169     &nfs_diskless_valid, 0,
170     "Has the diskless struct been filled correctly");
171 
172 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
173     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
174 
175 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
176     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
177     "%Ssockaddr_in", "Diskless root nfs address");
178 
179 
180 void		newnfsargs_ntoh(struct nfs_args *);
181 static int	nfs_mountdiskless(char *,
182 		    struct sockaddr_in *, struct nfs_args *,
183 		    struct thread *, struct vnode **, struct mount *);
184 static void	nfs_convert_diskless(void);
185 static void	nfs_convert_oargs(struct nfs_args *args,
186 		    struct onfs_args *oargs);
187 
188 int
189 newnfs_iosize(struct nfsmount *nmp)
190 {
191 	int iosize, maxio;
192 
193 	/* First, set the upper limit for iosize */
194 	if (nmp->nm_flag & NFSMNT_NFSV4) {
195 		maxio = NFS_MAXBSIZE;
196 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
197 		if (nmp->nm_sotype == SOCK_DGRAM)
198 			maxio = NFS_MAXDGRAMDATA;
199 		else
200 			maxio = NFS_MAXBSIZE;
201 	} else {
202 		maxio = NFS_V2MAXDATA;
203 	}
204 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
205 		nmp->nm_rsize = maxio;
206 	if (nmp->nm_rsize > NFS_MAXBSIZE)
207 		nmp->nm_rsize = NFS_MAXBSIZE;
208 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
209 		nmp->nm_readdirsize = maxio;
210 	if (nmp->nm_readdirsize > nmp->nm_rsize)
211 		nmp->nm_readdirsize = nmp->nm_rsize;
212 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
213 		nmp->nm_wsize = maxio;
214 	if (nmp->nm_wsize > NFS_MAXBSIZE)
215 		nmp->nm_wsize = NFS_MAXBSIZE;
216 
217 	/*
218 	 * Calculate the size used for io buffers.  Use the larger
219 	 * of the two sizes to minimise nfs requests but make sure
220 	 * that it is at least one VM page to avoid wasting buffer
221 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
222 	 * that is the buffer size used for directories.
223 	 */
224 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
225 	iosize = imax(iosize, PAGE_SIZE);
226 	iosize = imax(iosize, NFS_DIRBLKSIZ);
227 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
228 	return (iosize);
229 }
230 
231 static void
232 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
233 {
234 
235 	args->version = NFS_ARGSVERSION;
236 	args->addr = oargs->addr;
237 	args->addrlen = oargs->addrlen;
238 	args->sotype = oargs->sotype;
239 	args->proto = oargs->proto;
240 	args->fh = oargs->fh;
241 	args->fhsize = oargs->fhsize;
242 	args->flags = oargs->flags;
243 	args->wsize = oargs->wsize;
244 	args->rsize = oargs->rsize;
245 	args->readdirsize = oargs->readdirsize;
246 	args->timeo = oargs->timeo;
247 	args->retrans = oargs->retrans;
248 	args->readahead = oargs->readahead;
249 	args->hostname = oargs->hostname;
250 }
251 
252 static void
253 nfs_convert_diskless(void)
254 {
255 
256 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
257 		sizeof(struct ifaliasreq));
258 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
259 		sizeof(struct sockaddr_in));
260 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
261 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
262 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
263 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
264 	} else {
265 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
266 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
267 	}
268 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
269 		sizeof(struct sockaddr_in));
270 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
271 	nfsv3_diskless.root_time = nfs_diskless.root_time;
272 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
273 		MAXHOSTNAMELEN);
274 	nfs_diskless_valid = 3;
275 }
276 
277 /*
278  * nfs statfs call
279  */
280 static int
281 nfs_statfs(struct mount *mp, struct statfs *sbp)
282 {
283 	struct vnode *vp;
284 	struct thread *td;
285 	struct nfsmount *nmp = VFSTONFS(mp);
286 	struct nfsvattr nfsva;
287 	struct nfsfsinfo fs;
288 	struct nfsstatfs sb;
289 	int error = 0, attrflag, gotfsinfo = 0, ret;
290 	struct nfsnode *np;
291 
292 	td = curthread;
293 
294 	error = vfs_busy(mp, MBF_NOWAIT);
295 	if (error)
296 		return (error);
297 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
298 	if (error) {
299 		vfs_unbusy(mp);
300 		return (error);
301 	}
302 	vp = NFSTOV(np);
303 	mtx_lock(&nmp->nm_mtx);
304 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
305 		mtx_unlock(&nmp->nm_mtx);
306 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
307 		    &attrflag, NULL);
308 		if (!error)
309 			gotfsinfo = 1;
310 	} else
311 		mtx_unlock(&nmp->nm_mtx);
312 	if (!error)
313 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
314 		    &attrflag, NULL);
315 	if (error != 0)
316 		NFSCL_DEBUG(2, "statfs=%d\n", error);
317 	if (attrflag == 0) {
318 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
319 		    td->td_ucred, td, &nfsva, NULL, NULL);
320 		if (ret) {
321 			/*
322 			 * Just set default values to get things going.
323 			 */
324 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
325 			nfsva.na_vattr.va_type = VDIR;
326 			nfsva.na_vattr.va_mode = 0777;
327 			nfsva.na_vattr.va_nlink = 100;
328 			nfsva.na_vattr.va_uid = (uid_t)0;
329 			nfsva.na_vattr.va_gid = (gid_t)0;
330 			nfsva.na_vattr.va_fileid = 2;
331 			nfsva.na_vattr.va_gen = 1;
332 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
333 			nfsva.na_vattr.va_size = 512 * 1024;
334 		}
335 	}
336 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
337 	if (!error) {
338 	    mtx_lock(&nmp->nm_mtx);
339 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
340 		nfscl_loadfsinfo(nmp, &fs);
341 	    nfscl_loadsbinfo(nmp, &sb, sbp);
342 	    sbp->f_iosize = newnfs_iosize(nmp);
343 	    mtx_unlock(&nmp->nm_mtx);
344 	    if (sbp != &mp->mnt_stat) {
345 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
346 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
347 	    }
348 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
349 	} else if (NFS_ISV4(vp)) {
350 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
351 	}
352 	vput(vp);
353 	vfs_unbusy(mp);
354 	return (error);
355 }
356 
357 /*
358  * nfs version 3 fsinfo rpc call
359  */
360 int
361 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
362     struct thread *td)
363 {
364 	struct nfsfsinfo fs;
365 	struct nfsvattr nfsva;
366 	int error, attrflag;
367 
368 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
369 	if (!error) {
370 		if (attrflag)
371 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
372 			    1);
373 		mtx_lock(&nmp->nm_mtx);
374 		nfscl_loadfsinfo(nmp, &fs);
375 		mtx_unlock(&nmp->nm_mtx);
376 	}
377 	return (error);
378 }
379 
380 /*
381  * Mount a remote root fs via. nfs. This depends on the info in the
382  * nfs_diskless structure that has been filled in properly by some primary
383  * bootstrap.
384  * It goes something like this:
385  * - do enough of "ifconfig" by calling ifioctl() so that the system
386  *   can talk to the server
387  * - If nfs_diskless.mygateway is filled in, use that address as
388  *   a default gateway.
389  * - build the rootfs mount point and call mountnfs() to do the rest.
390  *
391  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
392  * structure, as well as other global NFS client variables here, as
393  * nfs_mountroot() will be called once in the boot before any other NFS
394  * client activity occurs.
395  */
396 static int
397 nfs_mountroot(struct mount *mp)
398 {
399 	struct thread *td = curthread;
400 	struct nfsv3_diskless *nd = &nfsv3_diskless;
401 	struct socket *so;
402 	struct vnode *vp;
403 	struct ifreq ir;
404 	int error;
405 	u_long l;
406 	char buf[128];
407 	char *cp;
408 
409 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
410 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
411 #elif defined(NFS_ROOT)
412 	nfs_setup_diskless();
413 #endif
414 
415 	if (nfs_diskless_valid == 0)
416 		return (-1);
417 	if (nfs_diskless_valid == 1)
418 		nfs_convert_diskless();
419 
420 	/*
421 	 * Do enough of ifconfig(8) so that the critical net interface can
422 	 * talk to the server.
423 	 */
424 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
425 	    td->td_ucred, td);
426 	if (error)
427 		panic("nfs_mountroot: socreate(%04x): %d",
428 			nd->myif.ifra_addr.sa_family, error);
429 
430 #if 0 /* XXX Bad idea */
431 	/*
432 	 * We might not have been told the right interface, so we pass
433 	 * over the first ten interfaces of the same kind, until we get
434 	 * one of them configured.
435 	 */
436 
437 	for (i = strlen(nd->myif.ifra_name) - 1;
438 		nd->myif.ifra_name[i] >= '0' &&
439 		nd->myif.ifra_name[i] <= '9';
440 		nd->myif.ifra_name[i] ++) {
441 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
442 		if(!error)
443 			break;
444 	}
445 #endif
446 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
447 	if (error)
448 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
449 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
450 		ir.ifr_mtu = strtol(cp, NULL, 10);
451 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
452 		freeenv(cp);
453 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
454 		if (error)
455 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
456 	}
457 	soclose(so);
458 
459 	/*
460 	 * If the gateway field is filled in, set it as the default route.
461 	 * Note that pxeboot will set a default route of 0 if the route
462 	 * is not set by the DHCP server.  Check also for a value of 0
463 	 * to avoid panicking inappropriately in that situation.
464 	 */
465 	if (nd->mygateway.sin_len != 0 &&
466 	    nd->mygateway.sin_addr.s_addr != 0) {
467 		struct sockaddr_in mask, sin;
468 
469 		bzero((caddr_t)&mask, sizeof(mask));
470 		sin = mask;
471 		sin.sin_family = AF_INET;
472 		sin.sin_len = sizeof(sin);
473                 /* XXX MRT use table 0 for this sort of thing */
474 		CURVNET_SET(TD_TO_VNET(td));
475 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
476 		    (struct sockaddr *)&nd->mygateway,
477 		    (struct sockaddr *)&mask,
478 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
479 		CURVNET_RESTORE();
480 		if (error)
481 			panic("nfs_mountroot: RTM_ADD: %d", error);
482 	}
483 
484 	/*
485 	 * Create the rootfs mount point.
486 	 */
487 	nd->root_args.fh = nd->root_fh;
488 	nd->root_args.fhsize = nd->root_fhsize;
489 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
490 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
491 		(l >> 24) & 0xff, (l >> 16) & 0xff,
492 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
493 	printf("NFS ROOT: %s\n", buf);
494 	nd->root_args.hostname = buf;
495 	if ((error = nfs_mountdiskless(buf,
496 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
497 		return (error);
498 	}
499 
500 	/*
501 	 * This is not really an nfs issue, but it is much easier to
502 	 * set hostname here and then let the "/etc/rc.xxx" files
503 	 * mount the right /var based upon its preset value.
504 	 */
505 	mtx_lock(&prison0.pr_mtx);
506 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
507 	    sizeof(prison0.pr_hostname));
508 	mtx_unlock(&prison0.pr_mtx);
509 	inittodr(ntohl(nd->root_time));
510 	return (0);
511 }
512 
513 /*
514  * Internal version of mount system call for diskless setup.
515  */
516 static int
517 nfs_mountdiskless(char *path,
518     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
519     struct vnode **vpp, struct mount *mp)
520 {
521 	struct sockaddr *nam;
522 	int dirlen, error;
523 	char *dirpath;
524 
525 	/*
526 	 * Find the directory path in "path", which also has the server's
527 	 * name/ip address in it.
528 	 */
529 	dirpath = strchr(path, ':');
530 	if (dirpath != NULL)
531 		dirlen = strlen(++dirpath);
532 	else
533 		dirlen = 0;
534 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
535 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
536 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
537 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
538 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
539 		return (error);
540 	}
541 	return (0);
542 }
543 
544 static void
545 nfs_sec_name(char *sec, int *flagsp)
546 {
547 	if (!strcmp(sec, "krb5"))
548 		*flagsp |= NFSMNT_KERB;
549 	else if (!strcmp(sec, "krb5i"))
550 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
551 	else if (!strcmp(sec, "krb5p"))
552 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
553 }
554 
555 static void
556 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
557     const char *hostname, struct ucred *cred, struct thread *td)
558 {
559 	int adjsock;
560 	char *p;
561 
562 	/*
563 	 * Set read-only flag if requested; otherwise, clear it if this is
564 	 * an update.  If this is not an update, then either the read-only
565 	 * flag is already clear, or this is a root mount and it was set
566 	 * intentionally at some previous point.
567 	 */
568 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
569 		MNT_ILOCK(mp);
570 		mp->mnt_flag |= MNT_RDONLY;
571 		MNT_IUNLOCK(mp);
572 	} else if (mp->mnt_flag & MNT_UPDATE) {
573 		MNT_ILOCK(mp);
574 		mp->mnt_flag &= ~MNT_RDONLY;
575 		MNT_IUNLOCK(mp);
576 	}
577 
578 	/*
579 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
580 	 * no sense in that context.  Also, set up appropriate retransmit
581 	 * and soft timeout behavior.
582 	 */
583 	if (argp->sotype == SOCK_STREAM) {
584 		nmp->nm_flag &= ~NFSMNT_NOCONN;
585 		nmp->nm_timeo = NFS_MAXTIMEO;
586 		if ((argp->flags & NFSMNT_NFSV4) != 0)
587 			nmp->nm_retry = INT_MAX;
588 		else
589 			nmp->nm_retry = NFS_RETRANS_TCP;
590 	}
591 
592 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
593 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
594 		argp->flags &= ~NFSMNT_RDIRPLUS;
595 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
596 	}
597 
598 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
599 	if (nmp->nm_minorvers == 0) {
600 		argp->flags &= ~NFSMNT_ONEOPENOWN;
601 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
602 	}
603 
604 	/* Re-bind if rsrvd port requested and wasn't on one */
605 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
606 		  && (argp->flags & NFSMNT_RESVPORT);
607 	/* Also re-bind if we're switching to/from a connected UDP socket */
608 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
609 		    (argp->flags & NFSMNT_NOCONN));
610 
611 	/* Update flags atomically.  Don't change the lock bits. */
612 	nmp->nm_flag = argp->flags | nmp->nm_flag;
613 
614 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
615 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
616 		if (nmp->nm_timeo < NFS_MINTIMEO)
617 			nmp->nm_timeo = NFS_MINTIMEO;
618 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
619 			nmp->nm_timeo = NFS_MAXTIMEO;
620 	}
621 
622 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
623 		nmp->nm_retry = argp->retrans;
624 		if (nmp->nm_retry > NFS_MAXREXMIT)
625 			nmp->nm_retry = NFS_MAXREXMIT;
626 	}
627 
628 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
629 		nmp->nm_wsize = argp->wsize;
630 		/*
631 		 * Clip at the power of 2 below the size. There is an
632 		 * issue (not isolated) that causes intermittent page
633 		 * faults if this is not done.
634 		 */
635 		if (nmp->nm_wsize > NFS_FABLKSIZE)
636 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
637 		else
638 			nmp->nm_wsize = NFS_FABLKSIZE;
639 	}
640 
641 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
642 		nmp->nm_rsize = argp->rsize;
643 		/*
644 		 * Clip at the power of 2 below the size. There is an
645 		 * issue (not isolated) that causes intermittent page
646 		 * faults if this is not done.
647 		 */
648 		if (nmp->nm_rsize > NFS_FABLKSIZE)
649 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
650 		else
651 			nmp->nm_rsize = NFS_FABLKSIZE;
652 	}
653 
654 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
655 		nmp->nm_readdirsize = argp->readdirsize;
656 	}
657 
658 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
659 		nmp->nm_acregmin = argp->acregmin;
660 	else
661 		nmp->nm_acregmin = NFS_MINATTRTIMO;
662 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
663 		nmp->nm_acregmax = argp->acregmax;
664 	else
665 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
666 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
667 		nmp->nm_acdirmin = argp->acdirmin;
668 	else
669 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
670 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
671 		nmp->nm_acdirmax = argp->acdirmax;
672 	else
673 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
674 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
675 		nmp->nm_acdirmin = nmp->nm_acdirmax;
676 	if (nmp->nm_acregmin > nmp->nm_acregmax)
677 		nmp->nm_acregmin = nmp->nm_acregmax;
678 
679 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
680 		if (argp->readahead <= NFS_MAXRAHEAD)
681 			nmp->nm_readahead = argp->readahead;
682 		else
683 			nmp->nm_readahead = NFS_MAXRAHEAD;
684 	}
685 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
686 		if (argp->wcommitsize < nmp->nm_wsize)
687 			nmp->nm_wcommitsize = nmp->nm_wsize;
688 		else
689 			nmp->nm_wcommitsize = argp->wcommitsize;
690 	}
691 
692 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
693 		    (nmp->nm_soproto != argp->proto));
694 
695 	if (nmp->nm_client != NULL && adjsock) {
696 		int haslock = 0, error = 0;
697 
698 		if (nmp->nm_sotype == SOCK_STREAM) {
699 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
700 			if (!error)
701 				haslock = 1;
702 		}
703 		if (!error) {
704 		    newnfs_disconnect(&nmp->nm_sockreq);
705 		    if (haslock)
706 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
707 		    nmp->nm_sotype = argp->sotype;
708 		    nmp->nm_soproto = argp->proto;
709 		    if (nmp->nm_sotype == SOCK_DGRAM)
710 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
711 			    cred, td, 0)) {
712 				printf("newnfs_args: retrying connect\n");
713 				(void) nfs_catnap(PSOCK, 0, "nfscon");
714 			}
715 		}
716 	} else {
717 		nmp->nm_sotype = argp->sotype;
718 		nmp->nm_soproto = argp->proto;
719 	}
720 
721 	if (hostname != NULL) {
722 		strlcpy(nmp->nm_hostname, hostname,
723 		    sizeof(nmp->nm_hostname));
724 		p = strchr(nmp->nm_hostname, ':');
725 		if (p != NULL)
726 			*p = '\0';
727 	}
728 }
729 
730 static const char *nfs_opts[] = { "from", "nfs_args",
731     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
732     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
733     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
734     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
735     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
736     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
737     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
738     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
739     "pnfs", "wcommitsize", "oneopenown",
740     NULL };
741 
742 /*
743  * Parse the "from" mountarg, passed by the generic mount(8) program
744  * or the mountroot code.  This is used when rerooting into NFS.
745  *
746  * Note that the "hostname" is actually a "hostname:/share/path" string.
747  */
748 static int
749 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
750     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
751 {
752 	char *nam, *delimp, *hostp, *spec;
753 	int error, have_bracket = 0, offset, rv, speclen;
754 	struct sockaddr_in *sin;
755 	size_t len;
756 
757 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
758 	if (error != 0)
759 		return (error);
760 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
761 
762 	/*
763 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
764 	 */
765 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
766 	    *(delimp + 1) == ':') {
767 		hostp = spec + 1;
768 		spec = delimp + 2;
769 		have_bracket = 1;
770 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
771 		hostp = spec;
772 		spec = delimp + 1;
773 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
774 		printf("%s: path@server syntax is deprecated, "
775 		    "use server:path\n", __func__);
776 		hostp = delimp + 1;
777 	} else {
778 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
779 		free(nam, M_TEMP);
780 		return (EINVAL);
781 	}
782 	*delimp = '\0';
783 
784 	/*
785 	 * If there has been a trailing slash at mounttime it seems
786 	 * that some mountd implementations fail to remove the mount
787 	 * entries from their mountlist while unmounting.
788 	 */
789 	for (speclen = strlen(spec);
790 	    speclen > 1 && spec[speclen - 1] == '/';
791 	    speclen--)
792 		spec[speclen - 1] = '\0';
793 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
794 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
795 		free(nam, M_TEMP);
796 		return (EINVAL);
797 	}
798 	/* Make both '@' and ':' notations equal */
799 	if (*hostp != '\0') {
800 		len = strlen(hostp);
801 		offset = 0;
802 		if (have_bracket)
803 			nam[offset++] = '[';
804 		memmove(nam + offset, hostp, len);
805 		if (have_bracket)
806 			nam[len + offset++] = ']';
807 		nam[len + offset++] = ':';
808 		memmove(nam + len + offset, spec, speclen);
809 		nam[len + speclen + offset] = '\0';
810 	} else
811 		nam[0] = '\0';
812 
813 	/*
814 	 * XXX: IPv6
815 	 */
816 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
817 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
818 	if (rv != 1) {
819 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
820 		    __func__, hostp, rv);
821 		free(nam, M_TEMP);
822 		free(sin, M_SONAME);
823 		return (EINVAL);
824 	}
825 
826 	sin->sin_len = sizeof(*sin);
827 	sin->sin_family = AF_INET;
828 	/*
829 	 * XXX: hardcoded port number.
830 	 */
831 	sin->sin_port = htons(2049);
832 
833 	*hostnamep = strdup(nam, M_NEWNFSMNT);
834 	*sinp = sin;
835 	strlcpy(dirpath, spec, dirpathsize);
836 	*dirlenp = strlen(dirpath);
837 
838 	free(nam, M_TEMP);
839 	return (0);
840 }
841 
842 /*
843  * VFS Operations.
844  *
845  * mount system call
846  * It seems a bit dumb to copyinstr() the host and path here and then
847  * bcopy() them in mountnfs(), but I wanted to detect errors before
848  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
849  * an error after that means that I have to release the mbuf.
850  */
851 /* ARGSUSED */
852 static int
853 nfs_mount(struct mount *mp)
854 {
855 	struct nfs_args args = {
856 	    .version = NFS_ARGSVERSION,
857 	    .addr = NULL,
858 	    .addrlen = sizeof (struct sockaddr_in),
859 	    .sotype = SOCK_STREAM,
860 	    .proto = 0,
861 	    .fh = NULL,
862 	    .fhsize = 0,
863 	    .flags = NFSMNT_RESVPORT,
864 	    .wsize = NFS_WSIZE,
865 	    .rsize = NFS_RSIZE,
866 	    .readdirsize = NFS_READDIRSIZE,
867 	    .timeo = 10,
868 	    .retrans = NFS_RETRANS,
869 	    .readahead = NFS_DEFRAHEAD,
870 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
871 	    .hostname = NULL,
872 	    .acregmin = NFS_MINATTRTIMO,
873 	    .acregmax = NFS_MAXATTRTIMO,
874 	    .acdirmin = NFS_MINDIRATTRTIMO,
875 	    .acdirmax = NFS_MAXDIRATTRTIMO,
876 	};
877 	int error = 0, ret, len;
878 	struct sockaddr *nam = NULL;
879 	struct vnode *vp;
880 	struct thread *td;
881 	char *hst;
882 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
883 	char *cp, *opt, *name, *secname;
884 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
885 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
886 	int minvers = 0;
887 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
888 	    krbnamelen, srvkrbnamelen;
889 	size_t hstlen;
890 
891 	has_nfs_args_opt = 0;
892 	has_nfs_from_opt = 0;
893 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
894 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
895 		error = EINVAL;
896 		goto out;
897 	}
898 
899 	td = curthread;
900 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
901 	    nfs_diskless_valid != 0) {
902 		error = nfs_mountroot(mp);
903 		goto out;
904 	}
905 
906 	nfscl_init();
907 
908 	/*
909 	 * The old mount_nfs program passed the struct nfs_args
910 	 * from userspace to kernel.  The new mount_nfs program
911 	 * passes string options via nmount() from userspace to kernel
912 	 * and we populate the struct nfs_args in the kernel.
913 	 */
914 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
915 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
916 		    sizeof(args));
917 		if (error != 0)
918 			goto out;
919 
920 		if (args.version != NFS_ARGSVERSION) {
921 			error = EPROGMISMATCH;
922 			goto out;
923 		}
924 		has_nfs_args_opt = 1;
925 	}
926 
927 	/* Handle the new style options. */
928 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
929 		args.acdirmin = args.acdirmax =
930 		    args.acregmin = args.acregmax = 0;
931 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
932 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
933 	}
934 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
935 		args.flags |= NFSMNT_NOCONN;
936 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
937 		args.flags &= ~NFSMNT_NOCONN;
938 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
939 		args.flags |= NFSMNT_NOLOCKD;
940 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
941 		args.flags &= ~NFSMNT_NOLOCKD;
942 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
943 		args.flags |= NFSMNT_INT;
944 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
945 		args.flags |= NFSMNT_RDIRPLUS;
946 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
947 		args.flags |= NFSMNT_RESVPORT;
948 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
949 		args.flags &= ~NFSMNT_RESVPORT;
950 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
951 		args.flags |= NFSMNT_SOFT;
952 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
953 		args.flags &= ~NFSMNT_SOFT;
954 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
955 		args.sotype = SOCK_DGRAM;
956 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
957 		args.sotype = SOCK_DGRAM;
958 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
959 		args.sotype = SOCK_STREAM;
960 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
961 		args.flags |= NFSMNT_NFSV3;
962 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
963 		args.flags |= NFSMNT_NFSV4;
964 		args.sotype = SOCK_STREAM;
965 	}
966 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
967 		args.flags |= NFSMNT_ALLGSSNAME;
968 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
969 		args.flags |= NFSMNT_NOCTO;
970 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
971 		args.flags |= NFSMNT_NONCONTIGWR;
972 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
973 		args.flags |= NFSMNT_PNFS;
974 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
975 		args.flags |= NFSMNT_ONEOPENOWN;
976 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
977 		if (opt == NULL) {
978 			vfs_mount_error(mp, "illegal readdirsize");
979 			error = EINVAL;
980 			goto out;
981 		}
982 		ret = sscanf(opt, "%d", &args.readdirsize);
983 		if (ret != 1 || args.readdirsize <= 0) {
984 			vfs_mount_error(mp, "illegal readdirsize: %s",
985 			    opt);
986 			error = EINVAL;
987 			goto out;
988 		}
989 		args.flags |= NFSMNT_READDIRSIZE;
990 	}
991 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
992 		if (opt == NULL) {
993 			vfs_mount_error(mp, "illegal readahead");
994 			error = EINVAL;
995 			goto out;
996 		}
997 		ret = sscanf(opt, "%d", &args.readahead);
998 		if (ret != 1 || args.readahead <= 0) {
999 			vfs_mount_error(mp, "illegal readahead: %s",
1000 			    opt);
1001 			error = EINVAL;
1002 			goto out;
1003 		}
1004 		args.flags |= NFSMNT_READAHEAD;
1005 	}
1006 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1007 		if (opt == NULL) {
1008 			vfs_mount_error(mp, "illegal wsize");
1009 			error = EINVAL;
1010 			goto out;
1011 		}
1012 		ret = sscanf(opt, "%d", &args.wsize);
1013 		if (ret != 1 || args.wsize <= 0) {
1014 			vfs_mount_error(mp, "illegal wsize: %s",
1015 			    opt);
1016 			error = EINVAL;
1017 			goto out;
1018 		}
1019 		args.flags |= NFSMNT_WSIZE;
1020 	}
1021 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1022 		if (opt == NULL) {
1023 			vfs_mount_error(mp, "illegal rsize");
1024 			error = EINVAL;
1025 			goto out;
1026 		}
1027 		ret = sscanf(opt, "%d", &args.rsize);
1028 		if (ret != 1 || args.rsize <= 0) {
1029 			vfs_mount_error(mp, "illegal wsize: %s",
1030 			    opt);
1031 			error = EINVAL;
1032 			goto out;
1033 		}
1034 		args.flags |= NFSMNT_RSIZE;
1035 	}
1036 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1037 		if (opt == NULL) {
1038 			vfs_mount_error(mp, "illegal retrans");
1039 			error = EINVAL;
1040 			goto out;
1041 		}
1042 		ret = sscanf(opt, "%d", &args.retrans);
1043 		if (ret != 1 || args.retrans <= 0) {
1044 			vfs_mount_error(mp, "illegal retrans: %s",
1045 			    opt);
1046 			error = EINVAL;
1047 			goto out;
1048 		}
1049 		args.flags |= NFSMNT_RETRANS;
1050 	}
1051 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1052 		ret = sscanf(opt, "%d", &args.acregmin);
1053 		if (ret != 1 || args.acregmin < 0) {
1054 			vfs_mount_error(mp, "illegal actimeo: %s",
1055 			    opt);
1056 			error = EINVAL;
1057 			goto out;
1058 		}
1059 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1060 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1061 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1062 	}
1063 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1064 		ret = sscanf(opt, "%d", &args.acregmin);
1065 		if (ret != 1 || args.acregmin < 0) {
1066 			vfs_mount_error(mp, "illegal acregmin: %s",
1067 			    opt);
1068 			error = EINVAL;
1069 			goto out;
1070 		}
1071 		args.flags |= NFSMNT_ACREGMIN;
1072 	}
1073 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1074 		ret = sscanf(opt, "%d", &args.acregmax);
1075 		if (ret != 1 || args.acregmax < 0) {
1076 			vfs_mount_error(mp, "illegal acregmax: %s",
1077 			    opt);
1078 			error = EINVAL;
1079 			goto out;
1080 		}
1081 		args.flags |= NFSMNT_ACREGMAX;
1082 	}
1083 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1084 		ret = sscanf(opt, "%d", &args.acdirmin);
1085 		if (ret != 1 || args.acdirmin < 0) {
1086 			vfs_mount_error(mp, "illegal acdirmin: %s",
1087 			    opt);
1088 			error = EINVAL;
1089 			goto out;
1090 		}
1091 		args.flags |= NFSMNT_ACDIRMIN;
1092 	}
1093 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1094 		ret = sscanf(opt, "%d", &args.acdirmax);
1095 		if (ret != 1 || args.acdirmax < 0) {
1096 			vfs_mount_error(mp, "illegal acdirmax: %s",
1097 			    opt);
1098 			error = EINVAL;
1099 			goto out;
1100 		}
1101 		args.flags |= NFSMNT_ACDIRMAX;
1102 	}
1103 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1104 		ret = sscanf(opt, "%d", &args.wcommitsize);
1105 		if (ret != 1 || args.wcommitsize < 0) {
1106 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1107 			error = EINVAL;
1108 			goto out;
1109 		}
1110 		args.flags |= NFSMNT_WCOMMITSIZE;
1111 	}
1112 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1113 		ret = sscanf(opt, "%d", &args.timeo);
1114 		if (ret != 1 || args.timeo <= 0) {
1115 			vfs_mount_error(mp, "illegal timeo: %s",
1116 			    opt);
1117 			error = EINVAL;
1118 			goto out;
1119 		}
1120 		args.flags |= NFSMNT_TIMEO;
1121 	}
1122 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1123 		ret = sscanf(opt, "%d", &args.timeo);
1124 		if (ret != 1 || args.timeo <= 0) {
1125 			vfs_mount_error(mp, "illegal timeout: %s",
1126 			    opt);
1127 			error = EINVAL;
1128 			goto out;
1129 		}
1130 		args.flags |= NFSMNT_TIMEO;
1131 	}
1132 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1133 		ret = sscanf(opt, "%d", &nametimeo);
1134 		if (ret != 1 || nametimeo < 0) {
1135 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1136 			error = EINVAL;
1137 			goto out;
1138 		}
1139 	}
1140 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1141 	    == 0) {
1142 		ret = sscanf(opt, "%d", &negnametimeo);
1143 		if (ret != 1 || negnametimeo < 0) {
1144 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1145 			    opt);
1146 			error = EINVAL;
1147 			goto out;
1148 		}
1149 	}
1150 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1151 	    0) {
1152 		ret = sscanf(opt, "%d", &minvers);
1153 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1154 		    (args.flags & NFSMNT_NFSV4) == 0) {
1155 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1156 			error = EINVAL;
1157 			goto out;
1158 		}
1159 	}
1160 	if (vfs_getopt(mp->mnt_optnew, "sec",
1161 		(void **) &secname, NULL) == 0)
1162 		nfs_sec_name(secname, &args.flags);
1163 
1164 	if (mp->mnt_flag & MNT_UPDATE) {
1165 		struct nfsmount *nmp = VFSTONFS(mp);
1166 
1167 		if (nmp == NULL) {
1168 			error = EIO;
1169 			goto out;
1170 		}
1171 
1172 		/*
1173 		 * If a change from TCP->UDP is done and there are thread(s)
1174 		 * that have I/O RPC(s) in progress with a transfer size
1175 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1176 		 * hung, retrying the RPC(s) forever. Usually these threads
1177 		 * will be seen doing an uninterruptible sleep on wait channel
1178 		 * "nfsreq".
1179 		 */
1180 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1181 			tprintf(td->td_proc, LOG_WARNING,
1182 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1183 
1184 		/*
1185 		 * When doing an update, we can't change version,
1186 		 * security, switch lockd strategies, change cookie
1187 		 * translation or switch oneopenown.
1188 		 */
1189 		args.flags = (args.flags &
1190 		    ~(NFSMNT_NFSV3 |
1191 		      NFSMNT_NFSV4 |
1192 		      NFSMNT_KERB |
1193 		      NFSMNT_INTEGRITY |
1194 		      NFSMNT_PRIVACY |
1195 		      NFSMNT_ONEOPENOWN |
1196 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1197 		    (nmp->nm_flag &
1198 			(NFSMNT_NFSV3 |
1199 			 NFSMNT_NFSV4 |
1200 			 NFSMNT_KERB |
1201 			 NFSMNT_INTEGRITY |
1202 			 NFSMNT_PRIVACY |
1203 			 NFSMNT_ONEOPENOWN |
1204 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1205 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1206 		goto out;
1207 	}
1208 
1209 	/*
1210 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1211 	 * or no-connection mode for those protocols that support
1212 	 * no-connection mode (the flag will be cleared later for protocols
1213 	 * that do not support no-connection mode).  This will allow a client
1214 	 * to receive replies from a different IP then the request was
1215 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1216 	 * not 0.
1217 	 */
1218 	if (nfs_ip_paranoia == 0)
1219 		args.flags |= NFSMNT_NOCONN;
1220 
1221 	if (has_nfs_args_opt != 0) {
1222 		/*
1223 		 * In the 'nfs_args' case, the pointers in the args
1224 		 * structure are in userland - we copy them in here.
1225 		 */
1226 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1227 			vfs_mount_error(mp, "Bad file handle");
1228 			error = EINVAL;
1229 			goto out;
1230 		}
1231 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1232 		    args.fhsize);
1233 		if (error != 0)
1234 			goto out;
1235 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1236 		if (error != 0)
1237 			goto out;
1238 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1239 		args.hostname = hst;
1240 		/* getsockaddr() call must be after above copyin() calls */
1241 		error = getsockaddr(&nam, (caddr_t)args.addr,
1242 		    args.addrlen);
1243 		if (error != 0)
1244 			goto out;
1245 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1246 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1247 	    sizeof(dirpath), &dirlen) == 0) {
1248 		has_nfs_from_opt = 1;
1249 		bcopy(args.hostname, hst, MNAMELEN);
1250 		hst[MNAMELEN - 1] = '\0';
1251 
1252 		/*
1253 		 * This only works with NFSv4 for now.
1254 		 */
1255 		args.fhsize = 0;
1256 		args.flags |= NFSMNT_NFSV4;
1257 		args.sotype = SOCK_STREAM;
1258 	} else {
1259 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1260 		    &args.fhsize) == 0) {
1261 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1262 				vfs_mount_error(mp, "Bad file handle");
1263 				error = EINVAL;
1264 				goto out;
1265 			}
1266 			bcopy(args.fh, nfh, args.fhsize);
1267 		} else {
1268 			args.fhsize = 0;
1269 		}
1270 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1271 		    (void **)&args.hostname, &len);
1272 		if (args.hostname == NULL) {
1273 			vfs_mount_error(mp, "Invalid hostname");
1274 			error = EINVAL;
1275 			goto out;
1276 		}
1277 		if (len >= MNAMELEN) {
1278 			vfs_mount_error(mp, "Hostname too long");
1279 			error = EINVAL;
1280 			goto out;
1281 		}
1282 		bcopy(args.hostname, hst, len);
1283 		hst[len] = '\0';
1284 	}
1285 
1286 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1287 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1288 	else {
1289 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1290 		cp = strchr(srvkrbname, ':');
1291 		if (cp != NULL)
1292 			*cp = '\0';
1293 	}
1294 	srvkrbnamelen = strlen(srvkrbname);
1295 
1296 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1297 		strlcpy(krbname, name, sizeof (krbname));
1298 	else
1299 		krbname[0] = '\0';
1300 	krbnamelen = strlen(krbname);
1301 
1302 	if (has_nfs_from_opt == 0) {
1303 		if (vfs_getopt(mp->mnt_optnew,
1304 		    "dirpath", (void **)&name, NULL) == 0)
1305 			strlcpy(dirpath, name, sizeof (dirpath));
1306 		else
1307 			dirpath[0] = '\0';
1308 		dirlen = strlen(dirpath);
1309 	}
1310 
1311 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1312 		if (vfs_getopt(mp->mnt_optnew, "addr",
1313 		    (void **)&args.addr, &args.addrlen) == 0) {
1314 			if (args.addrlen > SOCK_MAXADDRLEN) {
1315 				error = ENAMETOOLONG;
1316 				goto out;
1317 			}
1318 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1319 			bcopy(args.addr, nam, args.addrlen);
1320 			nam->sa_len = args.addrlen;
1321 		} else {
1322 			vfs_mount_error(mp, "No server address");
1323 			error = EINVAL;
1324 			goto out;
1325 		}
1326 	}
1327 
1328 	args.fh = nfh;
1329 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1330 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1331 	    nametimeo, negnametimeo, minvers);
1332 out:
1333 	if (!error) {
1334 		MNT_ILOCK(mp);
1335 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1336 		    MNTK_USES_BCACHE;
1337 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1338 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1339 		MNT_IUNLOCK(mp);
1340 	}
1341 	free(hst, M_TEMP);
1342 	return (error);
1343 }
1344 
1345 
1346 /*
1347  * VFS Operations.
1348  *
1349  * mount system call
1350  * It seems a bit dumb to copyinstr() the host and path here and then
1351  * bcopy() them in mountnfs(), but I wanted to detect errors before
1352  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1353  * an error after that means that I have to release the mbuf.
1354  */
1355 /* ARGSUSED */
1356 static int
1357 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1358 {
1359 	int error;
1360 	struct nfs_args args;
1361 
1362 	error = copyin(data, &args, sizeof (struct nfs_args));
1363 	if (error)
1364 		return error;
1365 
1366 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1367 
1368 	error = kernel_mount(ma, flags);
1369 	return (error);
1370 }
1371 
1372 /*
1373  * Common code for mount and mountroot
1374  */
1375 static int
1376 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1377     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1378     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1379     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1380     int minvers)
1381 {
1382 	struct nfsmount *nmp;
1383 	struct nfsnode *np;
1384 	int error, trycnt, ret;
1385 	struct nfsvattr nfsva;
1386 	struct nfsclclient *clp;
1387 	struct nfsclds *dsp, *tdsp;
1388 	uint32_t lease;
1389 	static u_int64_t clval = 0;
1390 
1391 	NFSCL_DEBUG(3, "in mnt\n");
1392 	clp = NULL;
1393 	if (mp->mnt_flag & MNT_UPDATE) {
1394 		nmp = VFSTONFS(mp);
1395 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1396 		free(nam, M_SONAME);
1397 		return (0);
1398 	} else {
1399 		nmp = malloc(sizeof (struct nfsmount) +
1400 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1401 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1402 		TAILQ_INIT(&nmp->nm_bufq);
1403 		TAILQ_INIT(&nmp->nm_sess);
1404 		if (clval == 0)
1405 			clval = (u_int64_t)nfsboottime.tv_sec;
1406 		nmp->nm_clval = clval++;
1407 		nmp->nm_krbnamelen = krbnamelen;
1408 		nmp->nm_dirpathlen = dirlen;
1409 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1410 		if (td->td_ucred->cr_uid != (uid_t)0) {
1411 			/*
1412 			 * nm_uid is used to get KerberosV credentials for
1413 			 * the nfsv4 state handling operations if there is
1414 			 * no host based principal set. Use the uid of
1415 			 * this user if not root, since they are doing the
1416 			 * mount. I don't think setting this for root will
1417 			 * work, since root normally does not have user
1418 			 * credentials in a credentials cache.
1419 			 */
1420 			nmp->nm_uid = td->td_ucred->cr_uid;
1421 		} else {
1422 			/*
1423 			 * Just set to -1, so it won't be used.
1424 			 */
1425 			nmp->nm_uid = (uid_t)-1;
1426 		}
1427 
1428 		/* Copy and null terminate all the names */
1429 		if (nmp->nm_krbnamelen > 0) {
1430 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1431 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1432 		}
1433 		if (nmp->nm_dirpathlen > 0) {
1434 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1435 			    nmp->nm_dirpathlen);
1436 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1437 			    + 1] = '\0';
1438 		}
1439 		if (nmp->nm_srvkrbnamelen > 0) {
1440 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1441 			    nmp->nm_srvkrbnamelen);
1442 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1443 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1444 		}
1445 		nmp->nm_sockreq.nr_cred = crhold(cred);
1446 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1447 		mp->mnt_data = nmp;
1448 		nmp->nm_getinfo = nfs_getnlminfo;
1449 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1450 	}
1451 	vfs_getnewfsid(mp);
1452 	nmp->nm_mountp = mp;
1453 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1454 
1455 	/*
1456 	 * Since nfs_decode_args() might optionally set them, these
1457 	 * need to be set to defaults before the call, so that the
1458 	 * optional settings aren't overwritten.
1459 	 */
1460 	nmp->nm_nametimeo = nametimeo;
1461 	nmp->nm_negnametimeo = negnametimeo;
1462 	nmp->nm_timeo = NFS_TIMEO;
1463 	nmp->nm_retry = NFS_RETRANS;
1464 	nmp->nm_readahead = NFS_DEFRAHEAD;
1465 
1466 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1467 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1468 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1469 		nmp->nm_wcommitsize *= 2;
1470 	nmp->nm_wcommitsize *= 256;
1471 
1472 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1473 		nmp->nm_minorvers = minvers;
1474 	else
1475 		nmp->nm_minorvers = 0;
1476 
1477 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1478 
1479 	/*
1480 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1481 	 * high, depending on whether we end up with negative offsets in
1482 	 * the client or server somewhere.  2GB-1 may be safer.
1483 	 *
1484 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1485 	 * that we can handle until we find out otherwise.
1486 	 */
1487 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1488 		nmp->nm_maxfilesize = 0xffffffffLL;
1489 	else
1490 		nmp->nm_maxfilesize = OFF_MAX;
1491 
1492 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1493 		nmp->nm_wsize = NFS_WSIZE;
1494 		nmp->nm_rsize = NFS_RSIZE;
1495 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1496 	}
1497 	nmp->nm_numgrps = NFS_MAXGRPS;
1498 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1499 	if (nmp->nm_tprintf_delay < 0)
1500 		nmp->nm_tprintf_delay = 0;
1501 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1502 	if (nmp->nm_tprintf_initial_delay < 0)
1503 		nmp->nm_tprintf_initial_delay = 0;
1504 	nmp->nm_fhsize = argp->fhsize;
1505 	if (nmp->nm_fhsize > 0)
1506 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1507 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1508 	nmp->nm_nam = nam;
1509 	/* Set up the sockets and per-host congestion */
1510 	nmp->nm_sotype = argp->sotype;
1511 	nmp->nm_soproto = argp->proto;
1512 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1513 	if ((argp->flags & NFSMNT_NFSV4))
1514 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1515 	else if ((argp->flags & NFSMNT_NFSV3))
1516 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1517 	else
1518 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1519 
1520 
1521 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1522 		goto bad;
1523 	/* For NFSv4.1, get the clientid now. */
1524 	if (nmp->nm_minorvers > 0) {
1525 		NFSCL_DEBUG(3, "at getcl\n");
1526 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1527 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1528 		if (error != 0)
1529 			goto bad;
1530 	}
1531 
1532 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1533 	    nmp->nm_dirpathlen > 0) {
1534 		NFSCL_DEBUG(3, "in dirp\n");
1535 		/*
1536 		 * If the fhsize on the mount point == 0 for V4, the mount
1537 		 * path needs to be looked up.
1538 		 */
1539 		trycnt = 3;
1540 		do {
1541 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1542 			    cred, td);
1543 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1544 			if (error)
1545 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1546 		} while (error && --trycnt > 0);
1547 		if (error) {
1548 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1549 			goto bad;
1550 		}
1551 	}
1552 
1553 	/*
1554 	 * A reference count is needed on the nfsnode representing the
1555 	 * remote root.  If this object is not persistent, then backward
1556 	 * traversals of the mount point (i.e. "..") will not work if
1557 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1558 	 * this problem, because one can identify root inodes by their
1559 	 * number == UFS_ROOTINO (2).
1560 	 */
1561 	if (nmp->nm_fhsize > 0) {
1562 		/*
1563 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1564 		 * non-zero for the root vnode. f_iosize will be set correctly
1565 		 * by nfs_statfs() before any I/O occurs.
1566 		 */
1567 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1568 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1569 		    LK_EXCLUSIVE);
1570 		if (error)
1571 			goto bad;
1572 		*vpp = NFSTOV(np);
1573 
1574 		/*
1575 		 * Get file attributes and transfer parameters for the
1576 		 * mountpoint.  This has the side effect of filling in
1577 		 * (*vpp)->v_type with the correct value.
1578 		 */
1579 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1580 		    cred, td, &nfsva, NULL, &lease);
1581 		if (ret) {
1582 			/*
1583 			 * Just set default values to get things going.
1584 			 */
1585 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1586 			nfsva.na_vattr.va_type = VDIR;
1587 			nfsva.na_vattr.va_mode = 0777;
1588 			nfsva.na_vattr.va_nlink = 100;
1589 			nfsva.na_vattr.va_uid = (uid_t)0;
1590 			nfsva.na_vattr.va_gid = (gid_t)0;
1591 			nfsva.na_vattr.va_fileid = 2;
1592 			nfsva.na_vattr.va_gen = 1;
1593 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1594 			nfsva.na_vattr.va_size = 512 * 1024;
1595 			lease = 60;
1596 		}
1597 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1598 		if (nmp->nm_minorvers > 0) {
1599 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1600 			NFSLOCKCLSTATE();
1601 			clp->nfsc_renew = NFSCL_RENEW(lease);
1602 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1603 			clp->nfsc_clientidrev++;
1604 			if (clp->nfsc_clientidrev == 0)
1605 				clp->nfsc_clientidrev++;
1606 			NFSUNLOCKCLSTATE();
1607 			/*
1608 			 * Mount will succeed, so the renew thread can be
1609 			 * started now.
1610 			 */
1611 			nfscl_start_renewthread(clp);
1612 			nfscl_clientrelease(clp);
1613 		}
1614 		if (argp->flags & NFSMNT_NFSV3)
1615 			ncl_fsinfo(nmp, *vpp, cred, td);
1616 
1617 		/* Mark if the mount point supports NFSv4 ACLs. */
1618 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1619 		    ret == 0 &&
1620 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1621 			MNT_ILOCK(mp);
1622 			mp->mnt_flag |= MNT_NFS4ACLS;
1623 			MNT_IUNLOCK(mp);
1624 		}
1625 
1626 		/*
1627 		 * Lose the lock but keep the ref.
1628 		 */
1629 		NFSVOPUNLOCK(*vpp, 0);
1630 		return (0);
1631 	}
1632 	error = EIO;
1633 
1634 bad:
1635 	if (clp != NULL)
1636 		nfscl_clientrelease(clp);
1637 	newnfs_disconnect(&nmp->nm_sockreq);
1638 	crfree(nmp->nm_sockreq.nr_cred);
1639 	if (nmp->nm_sockreq.nr_auth != NULL)
1640 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1641 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1642 	mtx_destroy(&nmp->nm_mtx);
1643 	if (nmp->nm_clp != NULL) {
1644 		NFSLOCKCLSTATE();
1645 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1646 		NFSUNLOCKCLSTATE();
1647 		free(nmp->nm_clp, M_NFSCLCLIENT);
1648 	}
1649 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1650 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1651 		    dsp->nfsclds_sockp != NULL)
1652 			newnfs_disconnect(dsp->nfsclds_sockp);
1653 		nfscl_freenfsclds(dsp);
1654 	}
1655 	free(nmp, M_NEWNFSMNT);
1656 	free(nam, M_SONAME);
1657 	return (error);
1658 }
1659 
1660 /*
1661  * unmount system call
1662  */
1663 static int
1664 nfs_unmount(struct mount *mp, int mntflags)
1665 {
1666 	struct thread *td;
1667 	struct nfsmount *nmp;
1668 	int error, flags = 0, i, trycnt = 0;
1669 	struct nfsclds *dsp, *tdsp;
1670 
1671 	td = curthread;
1672 
1673 	if (mntflags & MNT_FORCE)
1674 		flags |= FORCECLOSE;
1675 	nmp = VFSTONFS(mp);
1676 	error = 0;
1677 	/*
1678 	 * Goes something like this..
1679 	 * - Call vflush() to clear out vnodes for this filesystem
1680 	 * - Close the socket
1681 	 * - Free up the data structures
1682 	 */
1683 	/* In the forced case, cancel any outstanding requests. */
1684 	if (mntflags & MNT_FORCE) {
1685 		NFSDDSLOCK();
1686 		if (nfsv4_findmirror(nmp) != NULL)
1687 			error = ENXIO;
1688 		NFSDDSUNLOCK();
1689 		if (error)
1690 			goto out;
1691 		error = newnfs_nmcancelreqs(nmp);
1692 		if (error)
1693 			goto out;
1694 		/* For a forced close, get rid of the renew thread now */
1695 		nfscl_umount(nmp, td);
1696 	}
1697 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1698 	do {
1699 		error = vflush(mp, 1, flags, td);
1700 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1701 			(void) nfs_catnap(PSOCK, error, "newndm");
1702 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1703 	if (error)
1704 		goto out;
1705 
1706 	/*
1707 	 * We are now committed to the unmount.
1708 	 */
1709 	if ((mntflags & MNT_FORCE) == 0)
1710 		nfscl_umount(nmp, td);
1711 	else {
1712 		mtx_lock(&nmp->nm_mtx);
1713 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1714 		mtx_unlock(&nmp->nm_mtx);
1715 	}
1716 	/* Make sure no nfsiods are assigned to this mount. */
1717 	mtx_lock(&ncl_iod_mutex);
1718 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1719 		if (ncl_iodmount[i] == nmp) {
1720 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1721 			ncl_iodmount[i] = NULL;
1722 		}
1723 	mtx_unlock(&ncl_iod_mutex);
1724 
1725 	/*
1726 	 * We can now set mnt_data to NULL and wait for
1727 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1728 	 */
1729 	mtx_lock(&mountlist_mtx);
1730 	mtx_lock(&nmp->nm_mtx);
1731 	mp->mnt_data = NULL;
1732 	mtx_unlock(&mountlist_mtx);
1733 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1734 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1735 	mtx_unlock(&nmp->nm_mtx);
1736 
1737 	newnfs_disconnect(&nmp->nm_sockreq);
1738 	crfree(nmp->nm_sockreq.nr_cred);
1739 	free(nmp->nm_nam, M_SONAME);
1740 	if (nmp->nm_sockreq.nr_auth != NULL)
1741 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1742 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1743 	mtx_destroy(&nmp->nm_mtx);
1744 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1745 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1746 		    dsp->nfsclds_sockp != NULL)
1747 			newnfs_disconnect(dsp->nfsclds_sockp);
1748 		nfscl_freenfsclds(dsp);
1749 	}
1750 	free(nmp, M_NEWNFSMNT);
1751 out:
1752 	return (error);
1753 }
1754 
1755 /*
1756  * Return root of a filesystem
1757  */
1758 static int
1759 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1760 {
1761 	struct vnode *vp;
1762 	struct nfsmount *nmp;
1763 	struct nfsnode *np;
1764 	int error;
1765 
1766 	nmp = VFSTONFS(mp);
1767 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1768 	if (error)
1769 		return error;
1770 	vp = NFSTOV(np);
1771 	/*
1772 	 * Get transfer parameters and attributes for root vnode once.
1773 	 */
1774 	mtx_lock(&nmp->nm_mtx);
1775 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1776 		mtx_unlock(&nmp->nm_mtx);
1777 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1778 	} else
1779 		mtx_unlock(&nmp->nm_mtx);
1780 	if (vp->v_type == VNON)
1781 	    vp->v_type = VDIR;
1782 	vp->v_vflag |= VV_ROOT;
1783 	*vpp = vp;
1784 	return (0);
1785 }
1786 
1787 /*
1788  * Flush out the buffer cache
1789  */
1790 /* ARGSUSED */
1791 static int
1792 nfs_sync(struct mount *mp, int waitfor)
1793 {
1794 	struct vnode *vp, *mvp;
1795 	struct thread *td;
1796 	int error, allerror = 0;
1797 
1798 	td = curthread;
1799 
1800 	MNT_ILOCK(mp);
1801 	/*
1802 	 * If a forced dismount is in progress, return from here so that
1803 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1804 	 * calling VFS_UNMOUNT().
1805 	 */
1806 	if (NFSCL_FORCEDISM(mp)) {
1807 		MNT_IUNLOCK(mp);
1808 		return (EBADF);
1809 	}
1810 	MNT_IUNLOCK(mp);
1811 
1812 	/*
1813 	 * Force stale buffer cache information to be flushed.
1814 	 */
1815 loop:
1816 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1817 		/* XXX Racy bv_cnt check. */
1818 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1819 		    waitfor == MNT_LAZY) {
1820 			VI_UNLOCK(vp);
1821 			continue;
1822 		}
1823 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1824 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1825 			goto loop;
1826 		}
1827 		error = VOP_FSYNC(vp, waitfor, td);
1828 		if (error)
1829 			allerror = error;
1830 		NFSVOPUNLOCK(vp, 0);
1831 		vrele(vp);
1832 	}
1833 	return (allerror);
1834 }
1835 
1836 static int
1837 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1838 {
1839 	struct nfsmount *nmp = VFSTONFS(mp);
1840 	struct vfsquery vq;
1841 	int error;
1842 
1843 	bzero(&vq, sizeof(vq));
1844 	switch (op) {
1845 #if 0
1846 	case VFS_CTL_NOLOCKS:
1847 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1848  		if (req->oldptr != NULL) {
1849  			error = SYSCTL_OUT(req, &val, sizeof(val));
1850  			if (error)
1851  				return (error);
1852  		}
1853  		if (req->newptr != NULL) {
1854  			error = SYSCTL_IN(req, &val, sizeof(val));
1855  			if (error)
1856  				return (error);
1857 			if (val)
1858 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1859 			else
1860 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1861  		}
1862 		break;
1863 #endif
1864 	case VFS_CTL_QUERY:
1865 		mtx_lock(&nmp->nm_mtx);
1866 		if (nmp->nm_state & NFSSTA_TIMEO)
1867 			vq.vq_flags |= VQ_NOTRESP;
1868 		mtx_unlock(&nmp->nm_mtx);
1869 #if 0
1870 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1871 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1872 			vq.vq_flags |= VQ_NOTRESPLOCK;
1873 #endif
1874 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1875 		break;
1876  	case VFS_CTL_TIMEO:
1877  		if (req->oldptr != NULL) {
1878  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1879  			    sizeof(nmp->nm_tprintf_initial_delay));
1880  			if (error)
1881  				return (error);
1882  		}
1883  		if (req->newptr != NULL) {
1884 			error = vfs_suser(mp, req->td);
1885 			if (error)
1886 				return (error);
1887  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1888  			    sizeof(nmp->nm_tprintf_initial_delay));
1889  			if (error)
1890  				return (error);
1891  			if (nmp->nm_tprintf_initial_delay < 0)
1892  				nmp->nm_tprintf_initial_delay = 0;
1893  		}
1894 		break;
1895 	default:
1896 		return (ENOTSUP);
1897 	}
1898 	return (0);
1899 }
1900 
1901 /*
1902  * Purge any RPCs in progress, so that they will all return errors.
1903  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1904  * forced dismount.
1905  */
1906 static void
1907 nfs_purge(struct mount *mp)
1908 {
1909 	struct nfsmount *nmp = VFSTONFS(mp);
1910 
1911 	newnfs_nmcancelreqs(nmp);
1912 }
1913 
1914 /*
1915  * Extract the information needed by the nlm from the nfs vnode.
1916  */
1917 static void
1918 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1919     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1920     struct timeval *timeop)
1921 {
1922 	struct nfsmount *nmp;
1923 	struct nfsnode *np = VTONFS(vp);
1924 
1925 	nmp = VFSTONFS(vp->v_mount);
1926 	if (fhlenp != NULL)
1927 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1928 	if (fhp != NULL)
1929 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1930 	if (sp != NULL)
1931 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1932 	if (is_v3p != NULL)
1933 		*is_v3p = NFS_ISV3(vp);
1934 	if (sizep != NULL)
1935 		*sizep = np->n_size;
1936 	if (timeop != NULL) {
1937 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1938 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1939 	}
1940 }
1941 
1942 /*
1943  * This function prints out an option name, based on the conditional
1944  * argument.
1945  */
1946 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1947     char *opt, char **buf, size_t *blen)
1948 {
1949 	int len;
1950 
1951 	if (testval != 0 && *blen > strlen(opt)) {
1952 		len = snprintf(*buf, *blen, "%s", opt);
1953 		if (len != strlen(opt))
1954 			printf("EEK!!\n");
1955 		*buf += len;
1956 		*blen -= len;
1957 	}
1958 }
1959 
1960 /*
1961  * This function printf out an options integer value.
1962  */
1963 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1964     char *opt, char **buf, size_t *blen)
1965 {
1966 	int len;
1967 
1968 	if (*blen > strlen(opt) + 1) {
1969 		/* Could result in truncated output string. */
1970 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1971 		if (len < *blen) {
1972 			*buf += len;
1973 			*blen -= len;
1974 		}
1975 	}
1976 }
1977 
1978 /*
1979  * Load the option flags and values into the buffer.
1980  */
1981 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1982 {
1983 	char *buf;
1984 	size_t blen;
1985 
1986 	buf = buffer;
1987 	blen = buflen;
1988 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1989 	    &blen);
1990 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1991 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1992 		    &blen);
1993 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1994 		    &buf, &blen);
1995 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
1996 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
1997 	}
1998 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1999 	    &blen);
2000 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2001 	    "nfsv2", &buf, &blen);
2002 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2003 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2004 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2005 	    &buf, &blen);
2006 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2007 	    &buf, &blen);
2008 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2009 	    &blen);
2010 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2011 	    &blen);
2012 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2013 	    &blen);
2014 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2015 	    &blen);
2016 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2017 	    &blen);
2018 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2019 	    ",noncontigwr", &buf, &blen);
2020 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2021 	    0, ",lockd", &buf, &blen);
2022 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2023 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
2024 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2025 	    &buf, &blen);
2026 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2027 	    &buf, &blen);
2028 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2029 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2030 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2031 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2032 	    &buf, &blen);
2033 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2034 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2035 	    &buf, &blen);
2036 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2037 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2038 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2039 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2040 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2041 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2042 	    &blen);
2043 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2044 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2045 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2046 	    &blen);
2047 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2048 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2049 	    &blen);
2050 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2051 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2052 }
2053 
2054