xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 
84 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
85 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
86 
87 SYSCTL_DECL(_vfs_nfs);
88 static int nfs_ip_paranoia = 1;
89 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
90     &nfs_ip_paranoia, 0, "");
91 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
92 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
93         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
94 /* how long between console messages "nfs server foo not responding" */
95 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
96 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
97         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
98 
99 static int	nfs_mountroot(struct mount *);
100 static void	nfs_sec_name(char *, int *);
101 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
102 		    struct nfs_args *argp, const char *, struct ucred *,
103 		    struct thread *);
104 static int	mountnfs(struct nfs_args *, struct mount *,
105 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
106 		    u_char *, int, struct vnode **, struct ucred *,
107 		    struct thread *, int);
108 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
109 		    struct sockaddr_storage *, int *, off_t *,
110 		    struct timeval *);
111 static vfs_mount_t nfs_mount;
112 static vfs_cmount_t nfs_cmount;
113 static vfs_unmount_t nfs_unmount;
114 static vfs_root_t nfs_root;
115 static vfs_statfs_t nfs_statfs;
116 static vfs_sync_t nfs_sync;
117 static vfs_sysctl_t nfs_sysctl;
118 
119 /*
120  * nfs vfs operations.
121  */
122 static struct vfsops nfs_vfsops = {
123 	.vfs_init =		ncl_init,
124 	.vfs_mount =		nfs_mount,
125 	.vfs_cmount =		nfs_cmount,
126 	.vfs_root =		nfs_root,
127 	.vfs_statfs =		nfs_statfs,
128 	.vfs_sync =		nfs_sync,
129 	.vfs_uninit =		ncl_uninit,
130 	.vfs_unmount =		nfs_unmount,
131 	.vfs_sysctl =		nfs_sysctl,
132 };
133 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
134 
135 /* So that loader and kldload(2) can find us, wherever we are.. */
136 MODULE_VERSION(nfs, 1);
137 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
138 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
139 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
140 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
141 
142 /*
143  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
144  * can be shared by both NFS clients. It is declared here so that it
145  * will be defined for kernels built without NFS_ROOT, although it
146  * isn't used in that case.
147  */
148 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
149 struct nfs_diskless	nfs_diskless = { { { 0 } } };
150 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
151 int			nfs_diskless_valid = 0;
152 #endif
153 
154 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
155     &nfs_diskless_valid, 0,
156     "Has the diskless struct been filled correctly");
157 
158 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
159     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
160 
161 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
162     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
163     "%Ssockaddr_in", "Diskless root nfs address");
164 
165 
166 void		newnfsargs_ntoh(struct nfs_args *);
167 static int	nfs_mountdiskless(char *,
168 		    struct sockaddr_in *, struct nfs_args *,
169 		    struct thread *, struct vnode **, struct mount *);
170 static void	nfs_convert_diskless(void);
171 static void	nfs_convert_oargs(struct nfs_args *args,
172 		    struct onfs_args *oargs);
173 
174 int
175 newnfs_iosize(struct nfsmount *nmp)
176 {
177 	int iosize, maxio;
178 
179 	/* First, set the upper limit for iosize */
180 	if (nmp->nm_flag & NFSMNT_NFSV4) {
181 		maxio = NFS_MAXBSIZE;
182 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
183 		if (nmp->nm_sotype == SOCK_DGRAM)
184 			maxio = NFS_MAXDGRAMDATA;
185 		else
186 			maxio = NFS_MAXBSIZE;
187 	} else {
188 		maxio = NFS_V2MAXDATA;
189 	}
190 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
191 		nmp->nm_rsize = maxio;
192 	if (nmp->nm_rsize > MAXBSIZE)
193 		nmp->nm_rsize = MAXBSIZE;
194 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
195 		nmp->nm_readdirsize = maxio;
196 	if (nmp->nm_readdirsize > nmp->nm_rsize)
197 		nmp->nm_readdirsize = nmp->nm_rsize;
198 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
199 		nmp->nm_wsize = maxio;
200 	if (nmp->nm_wsize > MAXBSIZE)
201 		nmp->nm_wsize = MAXBSIZE;
202 
203 	/*
204 	 * Calculate the size used for io buffers.  Use the larger
205 	 * of the two sizes to minimise nfs requests but make sure
206 	 * that it is at least one VM page to avoid wasting buffer
207 	 * space.
208 	 */
209 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
210 	iosize = imax(iosize, PAGE_SIZE);
211 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
212 	return (iosize);
213 }
214 
215 static void
216 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
217 {
218 
219 	args->version = NFS_ARGSVERSION;
220 	args->addr = oargs->addr;
221 	args->addrlen = oargs->addrlen;
222 	args->sotype = oargs->sotype;
223 	args->proto = oargs->proto;
224 	args->fh = oargs->fh;
225 	args->fhsize = oargs->fhsize;
226 	args->flags = oargs->flags;
227 	args->wsize = oargs->wsize;
228 	args->rsize = oargs->rsize;
229 	args->readdirsize = oargs->readdirsize;
230 	args->timeo = oargs->timeo;
231 	args->retrans = oargs->retrans;
232 	args->readahead = oargs->readahead;
233 	args->hostname = oargs->hostname;
234 }
235 
236 static void
237 nfs_convert_diskless(void)
238 {
239 
240 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
241 		sizeof(struct ifaliasreq));
242 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
243 		sizeof(struct sockaddr_in));
244 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
245 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
246 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
247 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
248 	} else {
249 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
250 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
251 	}
252 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
253 		sizeof(struct sockaddr_in));
254 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
255 	nfsv3_diskless.root_time = nfs_diskless.root_time;
256 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
257 		MAXHOSTNAMELEN);
258 	nfs_diskless_valid = 3;
259 }
260 
261 /*
262  * nfs statfs call
263  */
264 static int
265 nfs_statfs(struct mount *mp, struct statfs *sbp)
266 {
267 	struct vnode *vp;
268 	struct thread *td;
269 	struct nfsmount *nmp = VFSTONFS(mp);
270 	struct nfsvattr nfsva;
271 	struct nfsfsinfo fs;
272 	struct nfsstatfs sb;
273 	int error = 0, attrflag, gotfsinfo = 0, ret;
274 	struct nfsnode *np;
275 
276 	td = curthread;
277 
278 	error = vfs_busy(mp, MBF_NOWAIT);
279 	if (error)
280 		return (error);
281 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
282 	if (error) {
283 		vfs_unbusy(mp);
284 		return (error);
285 	}
286 	vp = NFSTOV(np);
287 	mtx_lock(&nmp->nm_mtx);
288 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
289 		mtx_unlock(&nmp->nm_mtx);
290 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
291 		    &attrflag, NULL);
292 		if (!error)
293 			gotfsinfo = 1;
294 	} else
295 		mtx_unlock(&nmp->nm_mtx);
296 	if (!error)
297 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
298 		    &attrflag, NULL);
299 	if (attrflag == 0) {
300 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
301 		    td->td_ucred, td, &nfsva, NULL);
302 		if (ret) {
303 			/*
304 			 * Just set default values to get things going.
305 			 */
306 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
307 			nfsva.na_vattr.va_type = VDIR;
308 			nfsva.na_vattr.va_mode = 0777;
309 			nfsva.na_vattr.va_nlink = 100;
310 			nfsva.na_vattr.va_uid = (uid_t)0;
311 			nfsva.na_vattr.va_gid = (gid_t)0;
312 			nfsva.na_vattr.va_fileid = 2;
313 			nfsva.na_vattr.va_gen = 1;
314 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
315 			nfsva.na_vattr.va_size = 512 * 1024;
316 		}
317 	}
318 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
319 	if (!error) {
320 	    mtx_lock(&nmp->nm_mtx);
321 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
322 		nfscl_loadfsinfo(nmp, &fs);
323 	    nfscl_loadsbinfo(nmp, &sb, sbp);
324 	    sbp->f_iosize = newnfs_iosize(nmp);
325 	    mtx_unlock(&nmp->nm_mtx);
326 	    if (sbp != &mp->mnt_stat) {
327 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
328 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
329 	    }
330 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
331 	} else if (NFS_ISV4(vp)) {
332 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
333 	}
334 	vput(vp);
335 	vfs_unbusy(mp);
336 	return (error);
337 }
338 
339 /*
340  * nfs version 3 fsinfo rpc call
341  */
342 int
343 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
344     struct thread *td)
345 {
346 	struct nfsfsinfo fs;
347 	struct nfsvattr nfsva;
348 	int error, attrflag;
349 
350 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
351 	if (!error) {
352 		if (attrflag)
353 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
354 			    1);
355 		mtx_lock(&nmp->nm_mtx);
356 		nfscl_loadfsinfo(nmp, &fs);
357 		mtx_unlock(&nmp->nm_mtx);
358 	}
359 	return (error);
360 }
361 
362 /*
363  * Mount a remote root fs via. nfs. This depends on the info in the
364  * nfs_diskless structure that has been filled in properly by some primary
365  * bootstrap.
366  * It goes something like this:
367  * - do enough of "ifconfig" by calling ifioctl() so that the system
368  *   can talk to the server
369  * - If nfs_diskless.mygateway is filled in, use that address as
370  *   a default gateway.
371  * - build the rootfs mount point and call mountnfs() to do the rest.
372  *
373  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
374  * structure, as well as other global NFS client variables here, as
375  * nfs_mountroot() will be called once in the boot before any other NFS
376  * client activity occurs.
377  */
378 static int
379 nfs_mountroot(struct mount *mp)
380 {
381 	struct thread *td = curthread;
382 	struct nfsv3_diskless *nd = &nfsv3_diskless;
383 	struct socket *so;
384 	struct vnode *vp;
385 	struct ifreq ir;
386 	int error;
387 	u_long l;
388 	char buf[128];
389 	char *cp;
390 
391 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
392 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
393 #elif defined(NFS_ROOT)
394 	nfs_setup_diskless();
395 #endif
396 
397 	if (nfs_diskless_valid == 0)
398 		return (-1);
399 	if (nfs_diskless_valid == 1)
400 		nfs_convert_diskless();
401 
402 	/*
403 	 * XXX splnet, so networks will receive...
404 	 */
405 	splnet();
406 
407 	/*
408 	 * Do enough of ifconfig(8) so that the critical net interface can
409 	 * talk to the server.
410 	 */
411 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
412 	    td->td_ucred, td);
413 	if (error)
414 		panic("nfs_mountroot: socreate(%04x): %d",
415 			nd->myif.ifra_addr.sa_family, error);
416 
417 #if 0 /* XXX Bad idea */
418 	/*
419 	 * We might not have been told the right interface, so we pass
420 	 * over the first ten interfaces of the same kind, until we get
421 	 * one of them configured.
422 	 */
423 
424 	for (i = strlen(nd->myif.ifra_name) - 1;
425 		nd->myif.ifra_name[i] >= '0' &&
426 		nd->myif.ifra_name[i] <= '9';
427 		nd->myif.ifra_name[i] ++) {
428 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
429 		if(!error)
430 			break;
431 	}
432 #endif
433 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
434 	if (error)
435 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
436 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
437 		ir.ifr_mtu = strtol(cp, NULL, 10);
438 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
439 		freeenv(cp);
440 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
441 		if (error)
442 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
443 	}
444 	soclose(so);
445 
446 	/*
447 	 * If the gateway field is filled in, set it as the default route.
448 	 * Note that pxeboot will set a default route of 0 if the route
449 	 * is not set by the DHCP server.  Check also for a value of 0
450 	 * to avoid panicking inappropriately in that situation.
451 	 */
452 	if (nd->mygateway.sin_len != 0 &&
453 	    nd->mygateway.sin_addr.s_addr != 0) {
454 		struct sockaddr_in mask, sin;
455 
456 		bzero((caddr_t)&mask, sizeof(mask));
457 		sin = mask;
458 		sin.sin_family = AF_INET;
459 		sin.sin_len = sizeof(sin);
460                 /* XXX MRT use table 0 for this sort of thing */
461 		CURVNET_SET(TD_TO_VNET(td));
462 		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
463 		    (struct sockaddr *)&nd->mygateway,
464 		    (struct sockaddr *)&mask,
465 		    RTF_UP | RTF_GATEWAY, NULL);
466 		CURVNET_RESTORE();
467 		if (error)
468 			panic("nfs_mountroot: RTM_ADD: %d", error);
469 	}
470 
471 	/*
472 	 * Create the rootfs mount point.
473 	 */
474 	nd->root_args.fh = nd->root_fh;
475 	nd->root_args.fhsize = nd->root_fhsize;
476 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
477 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
478 		(l >> 24) & 0xff, (l >> 16) & 0xff,
479 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
480 	printf("NFS ROOT: %s\n", buf);
481 	nd->root_args.hostname = buf;
482 	if ((error = nfs_mountdiskless(buf,
483 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
484 		return (error);
485 	}
486 
487 	/*
488 	 * This is not really an nfs issue, but it is much easier to
489 	 * set hostname here and then let the "/etc/rc.xxx" files
490 	 * mount the right /var based upon its preset value.
491 	 */
492 	mtx_lock(&prison0.pr_mtx);
493 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
494 	    sizeof(prison0.pr_hostname));
495 	mtx_unlock(&prison0.pr_mtx);
496 	inittodr(ntohl(nd->root_time));
497 	return (0);
498 }
499 
500 /*
501  * Internal version of mount system call for diskless setup.
502  */
503 static int
504 nfs_mountdiskless(char *path,
505     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
506     struct vnode **vpp, struct mount *mp)
507 {
508 	struct sockaddr *nam;
509 	int dirlen, error;
510 	char *dirpath;
511 
512 	/*
513 	 * Find the directory path in "path", which also has the server's
514 	 * name/ip address in it.
515 	 */
516 	dirpath = strchr(path, ':');
517 	if (dirpath != NULL)
518 		dirlen = strlen(++dirpath);
519 	else
520 		dirlen = 0;
521 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
522 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
523 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
524 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
525 		return (error);
526 	}
527 	return (0);
528 }
529 
530 static void
531 nfs_sec_name(char *sec, int *flagsp)
532 {
533 	if (!strcmp(sec, "krb5"))
534 		*flagsp |= NFSMNT_KERB;
535 	else if (!strcmp(sec, "krb5i"))
536 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
537 	else if (!strcmp(sec, "krb5p"))
538 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
539 }
540 
541 static void
542 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
543     const char *hostname, struct ucred *cred, struct thread *td)
544 {
545 	int s;
546 	int adjsock;
547 	char *p;
548 
549 	s = splnet();
550 
551 	/*
552 	 * Set read-only flag if requested; otherwise, clear it if this is
553 	 * an update.  If this is not an update, then either the read-only
554 	 * flag is already clear, or this is a root mount and it was set
555 	 * intentionally at some previous point.
556 	 */
557 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
558 		MNT_ILOCK(mp);
559 		mp->mnt_flag |= MNT_RDONLY;
560 		MNT_IUNLOCK(mp);
561 	} else if (mp->mnt_flag & MNT_UPDATE) {
562 		MNT_ILOCK(mp);
563 		mp->mnt_flag &= ~MNT_RDONLY;
564 		MNT_IUNLOCK(mp);
565 	}
566 
567 	/*
568 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
569 	 * no sense in that context.  Also, set up appropriate retransmit
570 	 * and soft timeout behavior.
571 	 */
572 	if (argp->sotype == SOCK_STREAM) {
573 		nmp->nm_flag &= ~NFSMNT_NOCONN;
574 		nmp->nm_timeo = NFS_MAXTIMEO;
575 		if ((argp->flags & NFSMNT_NFSV4) != 0)
576 			nmp->nm_retry = INT_MAX;
577 		else
578 			nmp->nm_retry = NFS_RETRANS_TCP;
579 	}
580 
581 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
582 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
583 		argp->flags &= ~NFSMNT_RDIRPLUS;
584 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
585 	}
586 
587 	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
588 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
589 		argp->flags &= ~NFSMNT_RESVPORT;
590 		nmp->nm_flag &= ~NFSMNT_RESVPORT;
591 	}
592 
593 	/* Re-bind if rsrvd port requested and wasn't on one */
594 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
595 		  && (argp->flags & NFSMNT_RESVPORT);
596 	/* Also re-bind if we're switching to/from a connected UDP socket */
597 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
598 		    (argp->flags & NFSMNT_NOCONN));
599 
600 	/* Update flags atomically.  Don't change the lock bits. */
601 	nmp->nm_flag = argp->flags | nmp->nm_flag;
602 	splx(s);
603 
604 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
605 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
606 		if (nmp->nm_timeo < NFS_MINTIMEO)
607 			nmp->nm_timeo = NFS_MINTIMEO;
608 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
609 			nmp->nm_timeo = NFS_MAXTIMEO;
610 	}
611 
612 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
613 		nmp->nm_retry = argp->retrans;
614 		if (nmp->nm_retry > NFS_MAXREXMIT)
615 			nmp->nm_retry = NFS_MAXREXMIT;
616 	}
617 
618 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
619 		nmp->nm_wsize = argp->wsize;
620 		/* Round down to multiple of blocksize */
621 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
622 		if (nmp->nm_wsize <= 0)
623 			nmp->nm_wsize = NFS_FABLKSIZE;
624 	}
625 
626 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
627 		nmp->nm_rsize = argp->rsize;
628 		/* Round down to multiple of blocksize */
629 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
630 		if (nmp->nm_rsize <= 0)
631 			nmp->nm_rsize = NFS_FABLKSIZE;
632 	}
633 
634 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
635 		nmp->nm_readdirsize = argp->readdirsize;
636 	}
637 
638 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
639 		nmp->nm_acregmin = argp->acregmin;
640 	else
641 		nmp->nm_acregmin = NFS_MINATTRTIMO;
642 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
643 		nmp->nm_acregmax = argp->acregmax;
644 	else
645 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
646 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
647 		nmp->nm_acdirmin = argp->acdirmin;
648 	else
649 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
650 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
651 		nmp->nm_acdirmax = argp->acdirmax;
652 	else
653 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
654 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
655 		nmp->nm_acdirmin = nmp->nm_acdirmax;
656 	if (nmp->nm_acregmin > nmp->nm_acregmax)
657 		nmp->nm_acregmin = nmp->nm_acregmax;
658 
659 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
660 		if (argp->readahead <= NFS_MAXRAHEAD)
661 			nmp->nm_readahead = argp->readahead;
662 		else
663 			nmp->nm_readahead = NFS_MAXRAHEAD;
664 	}
665 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
666 		if (argp->wcommitsize < nmp->nm_wsize)
667 			nmp->nm_wcommitsize = nmp->nm_wsize;
668 		else
669 			nmp->nm_wcommitsize = argp->wcommitsize;
670 	}
671 
672 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
673 		    (nmp->nm_soproto != argp->proto));
674 
675 	if (nmp->nm_client != NULL && adjsock) {
676 		int haslock = 0, error = 0;
677 
678 		if (nmp->nm_sotype == SOCK_STREAM) {
679 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
680 			if (!error)
681 				haslock = 1;
682 		}
683 		if (!error) {
684 		    newnfs_disconnect(&nmp->nm_sockreq);
685 		    if (haslock)
686 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
687 		    nmp->nm_sotype = argp->sotype;
688 		    nmp->nm_soproto = argp->proto;
689 		    if (nmp->nm_sotype == SOCK_DGRAM)
690 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
691 			    cred, td, 0)) {
692 				printf("newnfs_args: retrying connect\n");
693 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
694 			}
695 		}
696 	} else {
697 		nmp->nm_sotype = argp->sotype;
698 		nmp->nm_soproto = argp->proto;
699 	}
700 
701 	if (hostname != NULL) {
702 		strlcpy(nmp->nm_hostname, hostname,
703 		    sizeof(nmp->nm_hostname));
704 		p = strchr(nmp->nm_hostname, ':');
705 		if (p != NULL)
706 			*p = '\0';
707 	}
708 }
709 
710 static const char *nfs_opts[] = { "from", "nfs_args",
711     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
712     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
713     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
714     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
715     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
716     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
717     "principal", "nfsv4", "gssname", "allgssname", "dirpath",
718     "negnametimeo", "nocto",
719     NULL };
720 
721 /*
722  * VFS Operations.
723  *
724  * mount system call
725  * It seems a bit dumb to copyinstr() the host and path here and then
726  * bcopy() them in mountnfs(), but I wanted to detect errors before
727  * doing the sockargs() call because sockargs() allocates an mbuf and
728  * an error after that means that I have to release the mbuf.
729  */
730 /* ARGSUSED */
731 static int
732 nfs_mount(struct mount *mp)
733 {
734 	struct nfs_args args = {
735 	    .version = NFS_ARGSVERSION,
736 	    .addr = NULL,
737 	    .addrlen = sizeof (struct sockaddr_in),
738 	    .sotype = SOCK_STREAM,
739 	    .proto = 0,
740 	    .fh = NULL,
741 	    .fhsize = 0,
742 	    .flags = NFSMNT_RESVPORT,
743 	    .wsize = NFS_WSIZE,
744 	    .rsize = NFS_RSIZE,
745 	    .readdirsize = NFS_READDIRSIZE,
746 	    .timeo = 10,
747 	    .retrans = NFS_RETRANS,
748 	    .readahead = NFS_DEFRAHEAD,
749 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
750 	    .hostname = NULL,
751 	    .acregmin = NFS_MINATTRTIMO,
752 	    .acregmax = NFS_MAXATTRTIMO,
753 	    .acdirmin = NFS_MINDIRATTRTIMO,
754 	    .acdirmax = NFS_MAXDIRATTRTIMO,
755 	};
756 	int error = 0, ret, len;
757 	struct sockaddr *nam = NULL;
758 	struct vnode *vp;
759 	struct thread *td;
760 	char hst[MNAMELEN];
761 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
762 	char *opt, *name, *secname;
763 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
764 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
765 	size_t hstlen;
766 
767 	has_nfs_args_opt = 0;
768 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
769 		error = EINVAL;
770 		goto out;
771 	}
772 
773 	td = curthread;
774 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
775 		error = nfs_mountroot(mp);
776 		goto out;
777 	}
778 
779 	nfscl_init();
780 
781 	/*
782 	 * The old mount_nfs program passed the struct nfs_args
783 	 * from userspace to kernel.  The new mount_nfs program
784 	 * passes string options via nmount() from userspace to kernel
785 	 * and we populate the struct nfs_args in the kernel.
786 	 */
787 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
788 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
789 		    sizeof(args));
790 		if (error != 0)
791 			goto out;
792 
793 		if (args.version != NFS_ARGSVERSION) {
794 			error = EPROGMISMATCH;
795 			goto out;
796 		}
797 		has_nfs_args_opt = 1;
798 	}
799 
800 	/* Handle the new style options. */
801 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
802 		args.flags |= NFSMNT_NOCONN;
803 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
804 		args.flags |= NFSMNT_NOCONN;
805 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
806 		args.flags |= NFSMNT_NOLOCKD;
807 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
808 		args.flags &= ~NFSMNT_NOLOCKD;
809 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
810 		args.flags |= NFSMNT_INT;
811 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
812 		args.flags |= NFSMNT_RDIRPLUS;
813 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
814 		args.flags |= NFSMNT_RESVPORT;
815 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
816 		args.flags &= ~NFSMNT_RESVPORT;
817 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
818 		args.flags |= NFSMNT_SOFT;
819 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
820 		args.flags &= ~NFSMNT_SOFT;
821 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
822 		args.sotype = SOCK_DGRAM;
823 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
824 		args.sotype = SOCK_DGRAM;
825 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
826 		args.sotype = SOCK_STREAM;
827 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
828 		args.flags |= NFSMNT_NFSV3;
829 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
830 		args.flags |= NFSMNT_NFSV4;
831 		args.sotype = SOCK_STREAM;
832 	}
833 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
834 		args.flags |= NFSMNT_ALLGSSNAME;
835 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
836 		args.flags |= NFSMNT_NOCTO;
837 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
838 		if (opt == NULL) {
839 			vfs_mount_error(mp, "illegal readdirsize");
840 			error = EINVAL;
841 			goto out;
842 		}
843 		ret = sscanf(opt, "%d", &args.readdirsize);
844 		if (ret != 1 || args.readdirsize <= 0) {
845 			vfs_mount_error(mp, "illegal readdirsize: %s",
846 			    opt);
847 			error = EINVAL;
848 			goto out;
849 		}
850 		args.flags |= NFSMNT_READDIRSIZE;
851 	}
852 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
853 		if (opt == NULL) {
854 			vfs_mount_error(mp, "illegal readahead");
855 			error = EINVAL;
856 			goto out;
857 		}
858 		ret = sscanf(opt, "%d", &args.readahead);
859 		if (ret != 1 || args.readahead <= 0) {
860 			vfs_mount_error(mp, "illegal readahead: %s",
861 			    opt);
862 			error = EINVAL;
863 			goto out;
864 		}
865 		args.flags |= NFSMNT_READAHEAD;
866 	}
867 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
868 		if (opt == NULL) {
869 			vfs_mount_error(mp, "illegal wsize");
870 			error = EINVAL;
871 			goto out;
872 		}
873 		ret = sscanf(opt, "%d", &args.wsize);
874 		if (ret != 1 || args.wsize <= 0) {
875 			vfs_mount_error(mp, "illegal wsize: %s",
876 			    opt);
877 			error = EINVAL;
878 			goto out;
879 		}
880 		args.flags |= NFSMNT_WSIZE;
881 	}
882 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
883 		if (opt == NULL) {
884 			vfs_mount_error(mp, "illegal rsize");
885 			error = EINVAL;
886 			goto out;
887 		}
888 		ret = sscanf(opt, "%d", &args.rsize);
889 		if (ret != 1 || args.rsize <= 0) {
890 			vfs_mount_error(mp, "illegal wsize: %s",
891 			    opt);
892 			error = EINVAL;
893 			goto out;
894 		}
895 		args.flags |= NFSMNT_RSIZE;
896 	}
897 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
898 		if (opt == NULL) {
899 			vfs_mount_error(mp, "illegal retrans");
900 			error = EINVAL;
901 			goto out;
902 		}
903 		ret = sscanf(opt, "%d", &args.retrans);
904 		if (ret != 1 || args.retrans <= 0) {
905 			vfs_mount_error(mp, "illegal retrans: %s",
906 			    opt);
907 			error = EINVAL;
908 			goto out;
909 		}
910 		args.flags |= NFSMNT_RETRANS;
911 	}
912 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
913 		ret = sscanf(opt, "%d", &args.acregmin);
914 		if (ret != 1 || args.acregmin < 0) {
915 			vfs_mount_error(mp, "illegal acregmin: %s",
916 			    opt);
917 			error = EINVAL;
918 			goto out;
919 		}
920 		args.flags |= NFSMNT_ACREGMIN;
921 	}
922 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
923 		ret = sscanf(opt, "%d", &args.acregmax);
924 		if (ret != 1 || args.acregmax < 0) {
925 			vfs_mount_error(mp, "illegal acregmax: %s",
926 			    opt);
927 			error = EINVAL;
928 			goto out;
929 		}
930 		args.flags |= NFSMNT_ACREGMAX;
931 	}
932 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
933 		ret = sscanf(opt, "%d", &args.acdirmin);
934 		if (ret != 1 || args.acdirmin < 0) {
935 			vfs_mount_error(mp, "illegal acdirmin: %s",
936 			    opt);
937 			error = EINVAL;
938 			goto out;
939 		}
940 		args.flags |= NFSMNT_ACDIRMIN;
941 	}
942 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
943 		ret = sscanf(opt, "%d", &args.acdirmax);
944 		if (ret != 1 || args.acdirmax < 0) {
945 			vfs_mount_error(mp, "illegal acdirmax: %s",
946 			    opt);
947 			error = EINVAL;
948 			goto out;
949 		}
950 		args.flags |= NFSMNT_ACDIRMAX;
951 	}
952 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
953 		ret = sscanf(opt, "%d", &args.timeo);
954 		if (ret != 1 || args.timeo <= 0) {
955 			vfs_mount_error(mp, "illegal timeout: %s",
956 			    opt);
957 			error = EINVAL;
958 			goto out;
959 		}
960 		args.flags |= NFSMNT_TIMEO;
961 	}
962 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
963 	    == 0) {
964 		ret = sscanf(opt, "%d", &negnametimeo);
965 		if (ret != 1 || negnametimeo < 0) {
966 			vfs_mount_error(mp, "illegal negnametimeo: %s",
967 			    opt);
968 			error = EINVAL;
969 			goto out;
970 		}
971 	}
972 	if (vfs_getopt(mp->mnt_optnew, "sec",
973 		(void **) &secname, NULL) == 0)
974 		nfs_sec_name(secname, &args.flags);
975 
976 	if (mp->mnt_flag & MNT_UPDATE) {
977 		struct nfsmount *nmp = VFSTONFS(mp);
978 
979 		if (nmp == NULL) {
980 			error = EIO;
981 			goto out;
982 		}
983 		/*
984 		 * When doing an update, we can't change version,
985 		 * security, switch lockd strategies or change cookie
986 		 * translation
987 		 */
988 		args.flags = (args.flags &
989 		    ~(NFSMNT_NFSV3 |
990 		      NFSMNT_NFSV4 |
991 		      NFSMNT_KERB |
992 		      NFSMNT_INTEGRITY |
993 		      NFSMNT_PRIVACY |
994 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
995 		    (nmp->nm_flag &
996 			(NFSMNT_NFSV3 |
997 			 NFSMNT_NFSV4 |
998 			 NFSMNT_KERB |
999 			 NFSMNT_INTEGRITY |
1000 			 NFSMNT_PRIVACY |
1001 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1002 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1003 		goto out;
1004 	}
1005 
1006 	/*
1007 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1008 	 * or no-connection mode for those protocols that support
1009 	 * no-connection mode (the flag will be cleared later for protocols
1010 	 * that do not support no-connection mode).  This will allow a client
1011 	 * to receive replies from a different IP then the request was
1012 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1013 	 * not 0.
1014 	 */
1015 	if (nfs_ip_paranoia == 0)
1016 		args.flags |= NFSMNT_NOCONN;
1017 
1018 	if (has_nfs_args_opt != 0) {
1019 		/*
1020 		 * In the 'nfs_args' case, the pointers in the args
1021 		 * structure are in userland - we copy them in here.
1022 		 */
1023 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1024 			vfs_mount_error(mp, "Bad file handle");
1025 			error = EINVAL;
1026 			goto out;
1027 		}
1028 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1029 		    args.fhsize);
1030 		if (error != 0)
1031 			goto out;
1032 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1033 		if (error != 0)
1034 			goto out;
1035 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1036 		args.hostname = hst;
1037 		/* sockargs() call must be after above copyin() calls */
1038 		error = getsockaddr(&nam, (caddr_t)args.addr,
1039 		    args.addrlen);
1040 		if (error != 0)
1041 			goto out;
1042 	} else {
1043 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1044 		    &args.fhsize) == 0) {
1045 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1046 				vfs_mount_error(mp, "Bad file handle");
1047 				error = EINVAL;
1048 				goto out;
1049 			}
1050 			bcopy(args.fh, nfh, args.fhsize);
1051 		} else {
1052 			args.fhsize = 0;
1053 		}
1054 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1055 		    (void **)&args.hostname, &len);
1056 		if (args.hostname == NULL) {
1057 			vfs_mount_error(mp, "Invalid hostname");
1058 			error = EINVAL;
1059 			goto out;
1060 		}
1061 		bcopy(args.hostname, hst, MNAMELEN);
1062 		hst[MNAMELEN - 1] = '\0';
1063 	}
1064 
1065 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1066 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1067 	else
1068 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1069 	srvkrbnamelen = strlen(srvkrbname);
1070 
1071 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1072 		strlcpy(krbname, name, sizeof (krbname));
1073 	else
1074 		krbname[0] = '\0';
1075 	krbnamelen = strlen(krbname);
1076 
1077 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1078 		strlcpy(dirpath, name, sizeof (dirpath));
1079 	else
1080 		dirpath[0] = '\0';
1081 	dirlen = strlen(dirpath);
1082 
1083 	if (has_nfs_args_opt == 0) {
1084 		if (vfs_getopt(mp->mnt_optnew, "addr",
1085 		    (void **)&args.addr, &args.addrlen) == 0) {
1086 			if (args.addrlen > SOCK_MAXADDRLEN) {
1087 				error = ENAMETOOLONG;
1088 				goto out;
1089 			}
1090 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1091 			bcopy(args.addr, nam, args.addrlen);
1092 			nam->sa_len = args.addrlen;
1093 		} else {
1094 			vfs_mount_error(mp, "No server address");
1095 			error = EINVAL;
1096 			goto out;
1097 		}
1098 	}
1099 
1100 	args.fh = nfh;
1101 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1102 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1103 	    negnametimeo);
1104 out:
1105 	if (!error) {
1106 		MNT_ILOCK(mp);
1107 		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1108 		MNT_IUNLOCK(mp);
1109 	}
1110 	return (error);
1111 }
1112 
1113 
1114 /*
1115  * VFS Operations.
1116  *
1117  * mount system call
1118  * It seems a bit dumb to copyinstr() the host and path here and then
1119  * bcopy() them in mountnfs(), but I wanted to detect errors before
1120  * doing the sockargs() call because sockargs() allocates an mbuf and
1121  * an error after that means that I have to release the mbuf.
1122  */
1123 /* ARGSUSED */
1124 static int
1125 nfs_cmount(struct mntarg *ma, void *data, int flags)
1126 {
1127 	int error;
1128 	struct nfs_args args;
1129 
1130 	error = copyin(data, &args, sizeof (struct nfs_args));
1131 	if (error)
1132 		return error;
1133 
1134 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1135 
1136 	error = kernel_mount(ma, flags);
1137 	return (error);
1138 }
1139 
1140 /*
1141  * Common code for mount and mountroot
1142  */
1143 static int
1144 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1145     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1146     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1147     struct ucred *cred, struct thread *td, int negnametimeo)
1148 {
1149 	struct nfsmount *nmp;
1150 	struct nfsnode *np;
1151 	int error, trycnt, ret;
1152 	struct nfsvattr nfsva;
1153 	static u_int64_t clval = 0;
1154 
1155 	if (mp->mnt_flag & MNT_UPDATE) {
1156 		nmp = VFSTONFS(mp);
1157 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1158 		FREE(nam, M_SONAME);
1159 		return (0);
1160 	} else {
1161 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1162 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1163 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1164 		TAILQ_INIT(&nmp->nm_bufq);
1165 		if (clval == 0)
1166 			clval = (u_int64_t)nfsboottime.tv_sec;
1167 		nmp->nm_clval = clval++;
1168 		nmp->nm_krbnamelen = krbnamelen;
1169 		nmp->nm_dirpathlen = dirlen;
1170 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1171 		if (td->td_ucred->cr_uid != (uid_t)0) {
1172 			/*
1173 			 * nm_uid is used to get KerberosV credentials for
1174 			 * the nfsv4 state handling operations if there is
1175 			 * no host based principal set. Use the uid of
1176 			 * this user if not root, since they are doing the
1177 			 * mount. I don't think setting this for root will
1178 			 * work, since root normally does not have user
1179 			 * credentials in a credentials cache.
1180 			 */
1181 			nmp->nm_uid = td->td_ucred->cr_uid;
1182 		} else {
1183 			/*
1184 			 * Just set to -1, so it won't be used.
1185 			 */
1186 			nmp->nm_uid = (uid_t)-1;
1187 		}
1188 
1189 		/* Copy and null terminate all the names */
1190 		if (nmp->nm_krbnamelen > 0) {
1191 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1192 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1193 		}
1194 		if (nmp->nm_dirpathlen > 0) {
1195 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1196 			    nmp->nm_dirpathlen);
1197 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1198 			    + 1] = '\0';
1199 		}
1200 		if (nmp->nm_srvkrbnamelen > 0) {
1201 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1202 			    nmp->nm_srvkrbnamelen);
1203 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1204 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1205 		}
1206 		nmp->nm_sockreq.nr_cred = crhold(cred);
1207 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1208 		mp->mnt_data = nmp;
1209 		nmp->nm_getinfo = nfs_getnlminfo;
1210 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1211 	}
1212 	vfs_getnewfsid(mp);
1213 	nmp->nm_mountp = mp;
1214 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1215 	nmp->nm_negnametimeo = negnametimeo;
1216 
1217 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1218 
1219 	/*
1220 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1221 	 * high, depending on whether we end up with negative offsets in
1222 	 * the client or server somewhere.  2GB-1 may be safer.
1223 	 *
1224 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1225 	 * that we can handle until we find out otherwise.
1226 	 * XXX Our "safe" limit on the client is what we can store in our
1227 	 * buffer cache using signed(!) block numbers.
1228 	 */
1229 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1230 		nmp->nm_maxfilesize = 0xffffffffLL;
1231 	else
1232 		nmp->nm_maxfilesize = OFF_MAX;
1233 
1234 	nmp->nm_timeo = NFS_TIMEO;
1235 	nmp->nm_retry = NFS_RETRANS;
1236 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1237 		nmp->nm_wsize = NFS_WSIZE;
1238 		nmp->nm_rsize = NFS_RSIZE;
1239 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1240 	}
1241 	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1242 	nmp->nm_numgrps = NFS_MAXGRPS;
1243 	nmp->nm_readahead = NFS_DEFRAHEAD;
1244 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1245 	if (nmp->nm_tprintf_delay < 0)
1246 		nmp->nm_tprintf_delay = 0;
1247 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1248 	if (nmp->nm_tprintf_initial_delay < 0)
1249 		nmp->nm_tprintf_initial_delay = 0;
1250 	nmp->nm_fhsize = argp->fhsize;
1251 	if (nmp->nm_fhsize > 0)
1252 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1253 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1254 	nmp->nm_nam = nam;
1255 	/* Set up the sockets and per-host congestion */
1256 	nmp->nm_sotype = argp->sotype;
1257 	nmp->nm_soproto = argp->proto;
1258 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1259 	if ((argp->flags & NFSMNT_NFSV4))
1260 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1261 	else if ((argp->flags & NFSMNT_NFSV3))
1262 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1263 	else
1264 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1265 
1266 
1267 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1268 		goto bad;
1269 
1270 	/*
1271 	 * A reference count is needed on the nfsnode representing the
1272 	 * remote root.  If this object is not persistent, then backward
1273 	 * traversals of the mount point (i.e. "..") will not work if
1274 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1275 	 * this problem, because one can identify root inodes by their
1276 	 * number == ROOTINO (2).
1277 	 */
1278 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1279 	    nmp->nm_dirpathlen > 0) {
1280 		/*
1281 		 * If the fhsize on the mount point == 0 for V4, the mount
1282 		 * path needs to be looked up.
1283 		 */
1284 		trycnt = 3;
1285 		do {
1286 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1287 			    cred, td);
1288 			if (error)
1289 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1290 		} while (error && --trycnt > 0);
1291 		if (error) {
1292 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1293 			goto bad;
1294 		}
1295 	}
1296 	if (nmp->nm_fhsize > 0) {
1297 		/*
1298 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1299 		 * non-zero for the root vnode. f_iosize will be set correctly
1300 		 * by nfs_statfs() before any I/O occurs.
1301 		 */
1302 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1303 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1304 		    LK_EXCLUSIVE);
1305 		if (error)
1306 			goto bad;
1307 		*vpp = NFSTOV(np);
1308 
1309 		/*
1310 		 * Get file attributes and transfer parameters for the
1311 		 * mountpoint.  This has the side effect of filling in
1312 		 * (*vpp)->v_type with the correct value.
1313 		 */
1314 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1315 		    cred, td, &nfsva, NULL);
1316 		if (ret) {
1317 			/*
1318 			 * Just set default values to get things going.
1319 			 */
1320 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1321 			nfsva.na_vattr.va_type = VDIR;
1322 			nfsva.na_vattr.va_mode = 0777;
1323 			nfsva.na_vattr.va_nlink = 100;
1324 			nfsva.na_vattr.va_uid = (uid_t)0;
1325 			nfsva.na_vattr.va_gid = (gid_t)0;
1326 			nfsva.na_vattr.va_fileid = 2;
1327 			nfsva.na_vattr.va_gen = 1;
1328 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1329 			nfsva.na_vattr.va_size = 512 * 1024;
1330 		}
1331 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1332 		if (argp->flags & NFSMNT_NFSV3)
1333 			ncl_fsinfo(nmp, *vpp, cred, td);
1334 
1335 		/* Mark if the mount point supports NFSv4 ACLs. */
1336 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1337 		    ret == 0 &&
1338 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1339 			MNT_ILOCK(mp);
1340 			mp->mnt_flag |= MNT_NFS4ACLS;
1341 			MNT_IUNLOCK(mp);
1342 		}
1343 
1344 		/*
1345 		 * Lose the lock but keep the ref.
1346 		 */
1347 		VOP_UNLOCK(*vpp, 0);
1348 		return (0);
1349 	}
1350 	error = EIO;
1351 
1352 bad:
1353 	newnfs_disconnect(&nmp->nm_sockreq);
1354 	crfree(nmp->nm_sockreq.nr_cred);
1355 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1356 	mtx_destroy(&nmp->nm_mtx);
1357 	FREE(nmp, M_NEWNFSMNT);
1358 	FREE(nam, M_SONAME);
1359 	return (error);
1360 }
1361 
1362 /*
1363  * unmount system call
1364  */
1365 static int
1366 nfs_unmount(struct mount *mp, int mntflags)
1367 {
1368 	struct thread *td;
1369 	struct nfsmount *nmp;
1370 	int error, flags = 0, trycnt = 0;
1371 
1372 	td = curthread;
1373 
1374 	if (mntflags & MNT_FORCE)
1375 		flags |= FORCECLOSE;
1376 	nmp = VFSTONFS(mp);
1377 	/*
1378 	 * Goes something like this..
1379 	 * - Call vflush() to clear out vnodes for this filesystem
1380 	 * - Close the socket
1381 	 * - Free up the data structures
1382 	 */
1383 	/* In the forced case, cancel any outstanding requests. */
1384 	if (mntflags & MNT_FORCE) {
1385 		error = newnfs_nmcancelreqs(nmp);
1386 		if (error)
1387 			goto out;
1388 		/* For a forced close, get rid of the renew thread now */
1389 		nfscl_umount(nmp, td);
1390 	}
1391 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1392 	do {
1393 		error = vflush(mp, 1, flags, td);
1394 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1395 			(void) nfs_catnap(PSOCK, error, "newndm");
1396 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1397 	if (error)
1398 		goto out;
1399 
1400 	/*
1401 	 * We are now committed to the unmount.
1402 	 */
1403 	if ((mntflags & MNT_FORCE) == 0)
1404 		nfscl_umount(nmp, td);
1405 	newnfs_disconnect(&nmp->nm_sockreq);
1406 	crfree(nmp->nm_sockreq.nr_cred);
1407 	FREE(nmp->nm_nam, M_SONAME);
1408 
1409 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1410 	mtx_destroy(&nmp->nm_mtx);
1411 	FREE(nmp, M_NEWNFSMNT);
1412 out:
1413 	return (error);
1414 }
1415 
1416 /*
1417  * Return root of a filesystem
1418  */
1419 static int
1420 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1421 {
1422 	struct vnode *vp;
1423 	struct nfsmount *nmp;
1424 	struct nfsnode *np;
1425 	int error;
1426 
1427 	nmp = VFSTONFS(mp);
1428 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1429 	if (error)
1430 		return error;
1431 	vp = NFSTOV(np);
1432 	/*
1433 	 * Get transfer parameters and attributes for root vnode once.
1434 	 */
1435 	mtx_lock(&nmp->nm_mtx);
1436 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1437 		mtx_unlock(&nmp->nm_mtx);
1438 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1439 	} else
1440 		mtx_unlock(&nmp->nm_mtx);
1441 	if (vp->v_type == VNON)
1442 	    vp->v_type = VDIR;
1443 	vp->v_vflag |= VV_ROOT;
1444 	*vpp = vp;
1445 	return (0);
1446 }
1447 
1448 /*
1449  * Flush out the buffer cache
1450  */
1451 /* ARGSUSED */
1452 static int
1453 nfs_sync(struct mount *mp, int waitfor)
1454 {
1455 	struct vnode *vp, *mvp;
1456 	struct thread *td;
1457 	int error, allerror = 0;
1458 
1459 	td = curthread;
1460 
1461 	MNT_ILOCK(mp);
1462 	/*
1463 	 * If a forced dismount is in progress, return from here so that
1464 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1465 	 * calling VFS_UNMOUNT().
1466 	 */
1467 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1468 		MNT_IUNLOCK(mp);
1469 		return (EBADF);
1470 	}
1471 
1472 	/*
1473 	 * Force stale buffer cache information to be flushed.
1474 	 */
1475 loop:
1476 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1477 		VI_LOCK(vp);
1478 		MNT_IUNLOCK(mp);
1479 		/* XXX Racy bv_cnt check. */
1480 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1481 		    waitfor == MNT_LAZY) {
1482 			VI_UNLOCK(vp);
1483 			MNT_ILOCK(mp);
1484 			continue;
1485 		}
1486 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1487 			MNT_ILOCK(mp);
1488 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1489 			goto loop;
1490 		}
1491 		error = VOP_FSYNC(vp, waitfor, td);
1492 		if (error)
1493 			allerror = error;
1494 		VOP_UNLOCK(vp, 0);
1495 		vrele(vp);
1496 
1497 		MNT_ILOCK(mp);
1498 	}
1499 	MNT_IUNLOCK(mp);
1500 	return (allerror);
1501 }
1502 
1503 static int
1504 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1505 {
1506 	struct nfsmount *nmp = VFSTONFS(mp);
1507 	struct vfsquery vq;
1508 	int error;
1509 
1510 	bzero(&vq, sizeof(vq));
1511 	switch (op) {
1512 #if 0
1513 	case VFS_CTL_NOLOCKS:
1514 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1515  		if (req->oldptr != NULL) {
1516  			error = SYSCTL_OUT(req, &val, sizeof(val));
1517  			if (error)
1518  				return (error);
1519  		}
1520  		if (req->newptr != NULL) {
1521  			error = SYSCTL_IN(req, &val, sizeof(val));
1522  			if (error)
1523  				return (error);
1524 			if (val)
1525 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1526 			else
1527 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1528  		}
1529 		break;
1530 #endif
1531 	case VFS_CTL_QUERY:
1532 		mtx_lock(&nmp->nm_mtx);
1533 		if (nmp->nm_state & NFSSTA_TIMEO)
1534 			vq.vq_flags |= VQ_NOTRESP;
1535 		mtx_unlock(&nmp->nm_mtx);
1536 #if 0
1537 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1538 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1539 			vq.vq_flags |= VQ_NOTRESPLOCK;
1540 #endif
1541 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1542 		break;
1543  	case VFS_CTL_TIMEO:
1544  		if (req->oldptr != NULL) {
1545  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1546  			    sizeof(nmp->nm_tprintf_initial_delay));
1547  			if (error)
1548  				return (error);
1549  		}
1550  		if (req->newptr != NULL) {
1551 			error = vfs_suser(mp, req->td);
1552 			if (error)
1553 				return (error);
1554  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1555  			    sizeof(nmp->nm_tprintf_initial_delay));
1556  			if (error)
1557  				return (error);
1558  			if (nmp->nm_tprintf_initial_delay < 0)
1559  				nmp->nm_tprintf_initial_delay = 0;
1560  		}
1561 		break;
1562 	default:
1563 		return (ENOTSUP);
1564 	}
1565 	return (0);
1566 }
1567 
1568 /*
1569  * Extract the information needed by the nlm from the nfs vnode.
1570  */
1571 static void
1572 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1573     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1574     struct timeval *timeop)
1575 {
1576 	struct nfsmount *nmp;
1577 	struct nfsnode *np = VTONFS(vp);
1578 
1579 	nmp = VFSTONFS(vp->v_mount);
1580 	if (fhlenp != NULL)
1581 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1582 	if (fhp != NULL)
1583 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1584 	if (sp != NULL)
1585 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1586 	if (is_v3p != NULL)
1587 		*is_v3p = NFS_ISV3(vp);
1588 	if (sizep != NULL)
1589 		*sizep = np->n_size;
1590 	if (timeop != NULL) {
1591 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1592 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1593 	}
1594 }
1595 
1596