xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 8f861da99cb9865b2f1ef6098ad074150f368c23)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <netinet/in.h>
69 
70 #include <fs/nfs/nfsport.h>
71 #include <fs/nfsclient/nfsnode.h>
72 #include <fs/nfsclient/nfsmount.h>
73 #include <fs/nfsclient/nfs.h>
74 #include <fs/nfsclient/nfsdiskless.h>
75 
76 extern int nfscl_ticks;
77 extern struct timeval nfsboottime;
78 extern struct nfsstats	newnfsstats;
79 
80 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
81 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
82 
83 SYSCTL_DECL(_vfs_newnfs);
84 SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
85 	&newnfsstats, nfsstats, "S,nfsstats");
86 static int nfs_ip_paranoia = 1;
87 SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
88     &nfs_ip_paranoia, 0, "");
89 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
90 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
91         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
92 /* how long between console messages "nfs server foo not responding" */
93 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
94 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
95         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
96 
97 static void	nfs_sec_name(char *, int *);
98 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
99 		    struct nfs_args *argp, const char *, struct ucred *,
100 		    struct thread *);
101 static int	mountnfs(struct nfs_args *, struct mount *,
102 		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
103 		    struct vnode **, struct ucred *, struct thread *, int);
104 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
105 		    struct sockaddr_storage *, int *, off_t *,
106 		    struct timeval *);
107 static vfs_mount_t nfs_mount;
108 static vfs_cmount_t nfs_cmount;
109 static vfs_unmount_t nfs_unmount;
110 static vfs_root_t nfs_root;
111 static vfs_statfs_t nfs_statfs;
112 static vfs_sync_t nfs_sync;
113 static vfs_sysctl_t nfs_sysctl;
114 
115 /*
116  * nfs vfs operations.
117  */
118 static struct vfsops nfs_vfsops = {
119 	.vfs_init =		ncl_init,
120 	.vfs_mount =		nfs_mount,
121 	.vfs_cmount =		nfs_cmount,
122 	.vfs_root =		nfs_root,
123 	.vfs_statfs =		nfs_statfs,
124 	.vfs_sync =		nfs_sync,
125 	.vfs_uninit =		ncl_uninit,
126 	.vfs_unmount =		nfs_unmount,
127 	.vfs_sysctl =		nfs_sysctl,
128 };
129 VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
130 
131 /* So that loader and kldload(2) can find us, wherever we are.. */
132 MODULE_VERSION(newnfs, 1);
133 
134 /*
135  * This structure must be filled in by a primary bootstrap or bootstrap
136  * server for a diskless/dataless machine. It is initialized below just
137  * to ensure that it is allocated to initialized data (.data not .bss).
138  */
139 struct nfs_diskless newnfs_diskless = { { { 0 } } };
140 struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
141 int newnfs_diskless_valid = 0;
142 
143 SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
144     &newnfs_diskless_valid, 0,
145     "Has the diskless struct been filled correctly");
146 
147 SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
148     newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
149 
150 SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
151     &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
152     "%Ssockaddr_in", "Diskless root nfs address");
153 
154 
155 void		newnfsargs_ntoh(struct nfs_args *);
156 static int	nfs_mountdiskless(char *,
157 		    struct sockaddr_in *, struct nfs_args *,
158 		    struct thread *, struct vnode **, struct mount *);
159 static void	nfs_convert_diskless(void);
160 static void	nfs_convert_oargs(struct nfs_args *args,
161 		    struct onfs_args *oargs);
162 
163 int
164 newnfs_iosize(struct nfsmount *nmp)
165 {
166 	int iosize, maxio;
167 
168 	/* First, set the upper limit for iosize */
169 	if (nmp->nm_flag & NFSMNT_NFSV4) {
170 		maxio = NFS_MAXBSIZE;
171 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
172 		if (nmp->nm_sotype == SOCK_DGRAM)
173 			maxio = NFS_MAXDGRAMDATA;
174 		else
175 			maxio = NFS_MAXBSIZE;
176 	} else {
177 		maxio = NFS_V2MAXDATA;
178 	}
179 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
180 		nmp->nm_rsize = maxio;
181 	if (nmp->nm_rsize > MAXBSIZE)
182 		nmp->nm_rsize = MAXBSIZE;
183 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
184 		nmp->nm_readdirsize = maxio;
185 	if (nmp->nm_readdirsize > nmp->nm_rsize)
186 		nmp->nm_readdirsize = nmp->nm_rsize;
187 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
188 		nmp->nm_wsize = maxio;
189 	if (nmp->nm_wsize > MAXBSIZE)
190 		nmp->nm_wsize = MAXBSIZE;
191 
192 	/*
193 	 * Calculate the size used for io buffers.  Use the larger
194 	 * of the two sizes to minimise nfs requests but make sure
195 	 * that it is at least one VM page to avoid wasting buffer
196 	 * space.
197 	 */
198 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
199 	iosize = imax(iosize, PAGE_SIZE);
200 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
201 	return (iosize);
202 }
203 
204 static void
205 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
206 {
207 
208 	args->version = NFS_ARGSVERSION;
209 	args->addr = oargs->addr;
210 	args->addrlen = oargs->addrlen;
211 	args->sotype = oargs->sotype;
212 	args->proto = oargs->proto;
213 	args->fh = oargs->fh;
214 	args->fhsize = oargs->fhsize;
215 	args->flags = oargs->flags;
216 	args->wsize = oargs->wsize;
217 	args->rsize = oargs->rsize;
218 	args->readdirsize = oargs->readdirsize;
219 	args->timeo = oargs->timeo;
220 	args->retrans = oargs->retrans;
221 	args->readahead = oargs->readahead;
222 	args->hostname = oargs->hostname;
223 }
224 
225 static void
226 nfs_convert_diskless(void)
227 {
228 
229 	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
230 	    sizeof (struct ifaliasreq));
231 	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
232 	    sizeof (struct sockaddr_in));
233 	nfs_convert_oargs(&newnfsv3_diskless.root_args,
234 	    &newnfs_diskless.root_args);
235 	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
236 		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
237 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
238 		    NFSX_MYFH);
239 	} else {
240 		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
241 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
242 		    NFSX_V2FH);
243 	}
244 	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
245 	    sizeof(struct sockaddr_in));
246 	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
247 	    MNAMELEN);
248 	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
249 	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
250 	    MAXHOSTNAMELEN);
251 	newnfs_diskless_valid = 3;
252 }
253 
254 /*
255  * nfs statfs call
256  */
257 static int
258 nfs_statfs(struct mount *mp, struct statfs *sbp)
259 {
260 	struct vnode *vp;
261 	struct thread *td;
262 	struct nfsmount *nmp = VFSTONFS(mp);
263 	struct nfsvattr nfsva;
264 	struct nfsfsinfo fs;
265 	struct nfsstatfs sb;
266 	int error = 0, attrflag, gotfsinfo = 0, ret;
267 	struct nfsnode *np;
268 
269 	td = curthread;
270 
271 	error = vfs_busy(mp, MBF_NOWAIT);
272 	if (error)
273 		return (error);
274 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
275 	if (error) {
276 		vfs_unbusy(mp);
277 		return (error);
278 	}
279 	vp = NFSTOV(np);
280 	mtx_lock(&nmp->nm_mtx);
281 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
282 		mtx_unlock(&nmp->nm_mtx);
283 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
284 		    &attrflag, NULL);
285 		if (!error)
286 			gotfsinfo = 1;
287 	} else
288 		mtx_unlock(&nmp->nm_mtx);
289 	if (!error)
290 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
291 		    &attrflag, NULL);
292 	if (attrflag == 0) {
293 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
294 		    td->td_ucred, td, &nfsva, NULL);
295 		if (ret) {
296 			/*
297 			 * Just set default values to get things going.
298 			 */
299 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
300 			nfsva.na_vattr.va_type = VDIR;
301 			nfsva.na_vattr.va_mode = 0777;
302 			nfsva.na_vattr.va_nlink = 100;
303 			nfsva.na_vattr.va_uid = (uid_t)0;
304 			nfsva.na_vattr.va_gid = (gid_t)0;
305 			nfsva.na_vattr.va_fileid = 2;
306 			nfsva.na_vattr.va_gen = 1;
307 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
308 			nfsva.na_vattr.va_size = 512 * 1024;
309 		}
310 	}
311 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
312 	if (!error) {
313 	    mtx_lock(&nmp->nm_mtx);
314 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
315 		nfscl_loadfsinfo(nmp, &fs);
316 	    nfscl_loadsbinfo(nmp, &sb, sbp);
317 	    sbp->f_flags = nmp->nm_flag;
318 	    sbp->f_iosize = newnfs_iosize(nmp);
319 	    mtx_unlock(&nmp->nm_mtx);
320 	    if (sbp != &mp->mnt_stat) {
321 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
322 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
323 	    }
324 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
325 	} else if (NFS_ISV4(vp)) {
326 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
327 	}
328 	vput(vp);
329 	vfs_unbusy(mp);
330 	return (error);
331 }
332 
333 /*
334  * nfs version 3 fsinfo rpc call
335  */
336 int
337 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
338     struct thread *td)
339 {
340 	struct nfsfsinfo fs;
341 	struct nfsvattr nfsva;
342 	int error, attrflag;
343 
344 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
345 	if (!error) {
346 		if (attrflag)
347 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
348 			    1);
349 		mtx_lock(&nmp->nm_mtx);
350 		nfscl_loadfsinfo(nmp, &fs);
351 		mtx_unlock(&nmp->nm_mtx);
352 	}
353 	return (error);
354 }
355 
356 /*
357  * Mount a remote root fs via. nfs. This depends on the info in the
358  * newnfs_diskless structure that has been filled in properly by some primary
359  * bootstrap.
360  * It goes something like this:
361  * - do enough of "ifconfig" by calling ifioctl() so that the system
362  *   can talk to the server
363  * - If newnfs_diskless.mygateway is filled in, use that address as
364  *   a default gateway.
365  * - build the rootfs mount point and call mountnfs() to do the rest.
366  *
367  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
368  * structure, as well as other global NFS client variables here, as
369  * nfs_mountroot() will be called once in the boot before any other NFS
370  * client activity occurs.
371  */
372 int
373 ncl_mountroot(struct mount *mp)
374 {
375 	struct thread *td = curthread;
376 	struct nfsv3_diskless *nd = &newnfsv3_diskless;
377 	struct socket *so;
378 	struct vnode *vp;
379 	struct ifreq ir;
380 	int error;
381 	u_long l;
382 	char buf[128];
383 	char *cp;
384 
385 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
386 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
387 #elif defined(NFS_ROOT)
388 	nfs_setup_diskless();
389 #endif
390 
391 	if (newnfs_diskless_valid == 0)
392 		return (-1);
393 	if (newnfs_diskless_valid == 1)
394 		nfs_convert_diskless();
395 
396 	/*
397 	 * XXX splnet, so networks will receive...
398 	 */
399 	splnet();
400 
401 	/*
402 	 * Do enough of ifconfig(8) so that the critical net interface can
403 	 * talk to the server.
404 	 */
405 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
406 	    td->td_ucred, td);
407 	if (error)
408 		panic("nfs_mountroot: socreate(%04x): %d",
409 			nd->myif.ifra_addr.sa_family, error);
410 
411 #if 0 /* XXX Bad idea */
412 	/*
413 	 * We might not have been told the right interface, so we pass
414 	 * over the first ten interfaces of the same kind, until we get
415 	 * one of them configured.
416 	 */
417 
418 	for (i = strlen(nd->myif.ifra_name) - 1;
419 		nd->myif.ifra_name[i] >= '0' &&
420 		nd->myif.ifra_name[i] <= '9';
421 		nd->myif.ifra_name[i] ++) {
422 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
423 		if(!error)
424 			break;
425 	}
426 #endif
427 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
428 	if (error)
429 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
430 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
431 		ir.ifr_mtu = strtol(cp, NULL, 10);
432 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
433 		freeenv(cp);
434 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
435 		if (error)
436 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
437 	}
438 	soclose(so);
439 
440 	/*
441 	 * If the gateway field is filled in, set it as the default route.
442 	 * Note that pxeboot will set a default route of 0 if the route
443 	 * is not set by the DHCP server.  Check also for a value of 0
444 	 * to avoid panicking inappropriately in that situation.
445 	 */
446 	if (nd->mygateway.sin_len != 0 &&
447 	    nd->mygateway.sin_addr.s_addr != 0) {
448 		struct sockaddr_in mask, sin;
449 
450 		bzero((caddr_t)&mask, sizeof(mask));
451 		sin = mask;
452 		sin.sin_family = AF_INET;
453 		sin.sin_len = sizeof(sin);
454                 /* XXX MRT use table 0 for this sort of thing */
455 		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
456 		    (struct sockaddr *)&nd->mygateway,
457 		    (struct sockaddr *)&mask,
458 		    RTF_UP | RTF_GATEWAY, NULL);
459 		if (error)
460 			panic("nfs_mountroot: RTM_ADD: %d", error);
461 	}
462 
463 	/*
464 	 * Create the rootfs mount point.
465 	 */
466 	nd->root_args.fh = nd->root_fh;
467 	nd->root_args.fhsize = nd->root_fhsize;
468 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
469 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
470 		(l >> 24) & 0xff, (l >> 16) & 0xff,
471 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
472 	printf("NFS ROOT: %s\n", buf);
473 	nd->root_args.hostname = buf;
474 	if ((error = nfs_mountdiskless(buf,
475 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
476 		return (error);
477 	}
478 
479 	/*
480 	 * This is not really an nfs issue, but it is much easier to
481 	 * set hostname here and then let the "/etc/rc.xxx" files
482 	 * mount the right /var based upon its preset value.
483 	 */
484 	mtx_lock(&prison0.pr_mtx);
485 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
486 	    sizeof(prison0.pr_hostname));
487 	mtx_unlock(&prison0.pr_mtx);
488 	inittodr(ntohl(nd->root_time));
489 	return (0);
490 }
491 
492 /*
493  * Internal version of mount system call for diskless setup.
494  */
495 static int
496 nfs_mountdiskless(char *path,
497     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
498     struct vnode **vpp, struct mount *mp)
499 {
500 	struct sockaddr *nam;
501 	int error;
502 
503 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
504 	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
505 	    td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
506 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
507 		return (error);
508 	}
509 	return (0);
510 }
511 
512 static void
513 nfs_sec_name(char *sec, int *flagsp)
514 {
515 	if (!strcmp(sec, "krb5"))
516 		*flagsp |= NFSMNT_KERB;
517 	else if (!strcmp(sec, "krb5i"))
518 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
519 	else if (!strcmp(sec, "krb5p"))
520 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
521 }
522 
523 static void
524 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
525     const char *hostname, struct ucred *cred, struct thread *td)
526 {
527 	int s;
528 	int adjsock;
529 	char *p;
530 
531 	s = splnet();
532 
533 	/*
534 	 * Set read-only flag if requested; otherwise, clear it if this is
535 	 * an update.  If this is not an update, then either the read-only
536 	 * flag is already clear, or this is a root mount and it was set
537 	 * intentionally at some previous point.
538 	 */
539 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
540 		MNT_ILOCK(mp);
541 		mp->mnt_flag |= MNT_RDONLY;
542 		MNT_IUNLOCK(mp);
543 	} else if (mp->mnt_flag & MNT_UPDATE) {
544 		MNT_ILOCK(mp);
545 		mp->mnt_flag &= ~MNT_RDONLY;
546 		MNT_IUNLOCK(mp);
547 	}
548 
549 	/*
550 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
551 	 * no sense in that context.  Also, set up appropriate retransmit
552 	 * and soft timeout behavior.
553 	 */
554 	if (argp->sotype == SOCK_STREAM) {
555 		nmp->nm_flag &= ~NFSMNT_NOCONN;
556 		nmp->nm_timeo = NFS_MAXTIMEO;
557 	}
558 
559 	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
560 	if ((argp->flags & NFSMNT_NFSV3) == 0)
561 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
562 
563 	/* Also re-bind if we're switching to/from a connected UDP socket */
564 	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
565 		    (argp->flags & NFSMNT_NOCONN));
566 
567 	/* Update flags atomically.  Don't change the lock bits. */
568 	nmp->nm_flag = argp->flags | nmp->nm_flag;
569 	splx(s);
570 
571 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
572 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
573 		if (nmp->nm_timeo < NFS_MINTIMEO)
574 			nmp->nm_timeo = NFS_MINTIMEO;
575 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
576 			nmp->nm_timeo = NFS_MAXTIMEO;
577 	}
578 
579 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
580 		nmp->nm_retry = argp->retrans;
581 		if (nmp->nm_retry > NFS_MAXREXMIT)
582 			nmp->nm_retry = NFS_MAXREXMIT;
583 	}
584 
585 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
586 		nmp->nm_wsize = argp->wsize;
587 		/* Round down to multiple of blocksize */
588 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
589 		if (nmp->nm_wsize <= 0)
590 			nmp->nm_wsize = NFS_FABLKSIZE;
591 	}
592 
593 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
594 		nmp->nm_rsize = argp->rsize;
595 		/* Round down to multiple of blocksize */
596 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
597 		if (nmp->nm_rsize <= 0)
598 			nmp->nm_rsize = NFS_FABLKSIZE;
599 	}
600 
601 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
602 		nmp->nm_readdirsize = argp->readdirsize;
603 	}
604 
605 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
606 		nmp->nm_acregmin = argp->acregmin;
607 	else
608 		nmp->nm_acregmin = NFS_MINATTRTIMO;
609 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
610 		nmp->nm_acregmax = argp->acregmax;
611 	else
612 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
613 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
614 		nmp->nm_acdirmin = argp->acdirmin;
615 	else
616 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
617 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
618 		nmp->nm_acdirmax = argp->acdirmax;
619 	else
620 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
621 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
622 		nmp->nm_acdirmin = nmp->nm_acdirmax;
623 	if (nmp->nm_acregmin > nmp->nm_acregmax)
624 		nmp->nm_acregmin = nmp->nm_acregmax;
625 
626 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
627 		if (argp->readahead <= NFS_MAXRAHEAD)
628 			nmp->nm_readahead = argp->readahead;
629 		else
630 			nmp->nm_readahead = NFS_MAXRAHEAD;
631 	}
632 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
633 		if (argp->wcommitsize < nmp->nm_wsize)
634 			nmp->nm_wcommitsize = nmp->nm_wsize;
635 		else
636 			nmp->nm_wcommitsize = argp->wcommitsize;
637 	}
638 
639 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
640 		    (nmp->nm_soproto != argp->proto));
641 
642 	if (nmp->nm_client != NULL && adjsock) {
643 		int haslock = 0, error = 0;
644 
645 		if (nmp->nm_sotype == SOCK_STREAM) {
646 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
647 			if (!error)
648 				haslock = 1;
649 		}
650 		if (!error) {
651 		    newnfs_disconnect(&nmp->nm_sockreq);
652 		    if (haslock)
653 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
654 		    nmp->nm_sotype = argp->sotype;
655 		    nmp->nm_soproto = argp->proto;
656 		    if (nmp->nm_sotype == SOCK_DGRAM)
657 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
658 			    cred, td, 0)) {
659 				printf("newnfs_args: retrying connect\n");
660 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
661 			}
662 		}
663 	} else {
664 		nmp->nm_sotype = argp->sotype;
665 		nmp->nm_soproto = argp->proto;
666 	}
667 
668 	if (hostname != NULL) {
669 		strlcpy(nmp->nm_hostname, hostname,
670 		    sizeof(nmp->nm_hostname));
671 		p = strchr(nmp->nm_hostname, ':');
672 		if (p != NULL)
673 			*p = '\0';
674 	}
675 }
676 
677 static const char *nfs_opts[] = { "from",
678     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
679     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
680     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
681     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
682     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
683     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
684     "principal", "nfsv4", "gssname", "allgssname", "dirpath",
685     "negnametimeo",
686     NULL };
687 
688 /*
689  * VFS Operations.
690  *
691  * mount system call
692  * It seems a bit dumb to copyinstr() the host and path here and then
693  * bcopy() them in mountnfs(), but I wanted to detect errors before
694  * doing the sockargs() call because sockargs() allocates an mbuf and
695  * an error after that means that I have to release the mbuf.
696  */
697 /* ARGSUSED */
698 static int
699 nfs_mount(struct mount *mp)
700 {
701 	struct nfs_args args = {
702 	    .version = NFS_ARGSVERSION,
703 	    .addr = NULL,
704 	    .addrlen = sizeof (struct sockaddr_in),
705 	    .sotype = SOCK_STREAM,
706 	    .proto = 0,
707 	    .fh = NULL,
708 	    .fhsize = 0,
709 	    .flags = 0,
710 	    .wsize = NFS_WSIZE,
711 	    .rsize = NFS_RSIZE,
712 	    .readdirsize = NFS_READDIRSIZE,
713 	    .timeo = 10,
714 	    .retrans = NFS_RETRANS,
715 	    .readahead = NFS_DEFRAHEAD,
716 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
717 	    .hostname = NULL,
718 	    /* args version 4 */
719 	    .acregmin = NFS_MINATTRTIMO,
720 	    .acregmax = NFS_MAXATTRTIMO,
721 	    .acdirmin = NFS_MINDIRATTRTIMO,
722 	    .acdirmax = NFS_MAXDIRATTRTIMO,
723 	    .dirlen = 0,
724 	    .krbnamelen = 0,
725 	    .srvkrbnamelen = 0,
726 	};
727 	int error = 0, ret, len;
728 	struct sockaddr *nam = NULL;
729 	struct vnode *vp;
730 	struct thread *td;
731 	char hst[MNAMELEN];
732 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
733 	char *opt, *name, *secname;
734 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
735 
736 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
737 		error = EINVAL;
738 		goto out;
739 	}
740 
741 	td = curthread;
742 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
743 		error = ncl_mountroot(mp);
744 		goto out;
745 	}
746 
747 	nfscl_init();
748 
749 	/* Handle the new style options. */
750 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
751 		args.flags |= NFSMNT_NOCONN;
752 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
753 		args.flags |= NFSMNT_NOCONN;
754 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
755 		args.flags |= NFSMNT_NOLOCKD;
756 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
757 		args.flags &= ~NFSMNT_NOLOCKD;
758 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
759 		args.flags |= NFSMNT_INT;
760 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
761 		args.flags |= NFSMNT_RDIRPLUS;
762 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
763 		args.flags |= NFSMNT_RESVPORT;
764 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
765 		args.flags &= ~NFSMNT_RESVPORT;
766 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
767 		args.flags |= NFSMNT_SOFT;
768 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
769 		args.flags &= ~NFSMNT_SOFT;
770 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
771 		args.sotype = SOCK_DGRAM;
772 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
773 		args.sotype = SOCK_DGRAM;
774 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
775 		args.sotype = SOCK_STREAM;
776 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
777 		args.flags |= NFSMNT_NFSV3;
778 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
779 		args.flags |= NFSMNT_NFSV4;
780 		args.sotype = SOCK_STREAM;
781 	}
782 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
783 		args.flags |= NFSMNT_ALLGSSNAME;
784 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
785 		if (opt == NULL) {
786 			vfs_mount_error(mp, "illegal readdirsize");
787 			error = EINVAL;
788 			goto out;
789 		}
790 		ret = sscanf(opt, "%d", &args.readdirsize);
791 		if (ret != 1 || args.readdirsize <= 0) {
792 			vfs_mount_error(mp, "illegal readdirsize: %s",
793 			    opt);
794 			error = EINVAL;
795 			goto out;
796 		}
797 		args.flags |= NFSMNT_READDIRSIZE;
798 	}
799 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
800 		if (opt == NULL) {
801 			vfs_mount_error(mp, "illegal readahead");
802 			error = EINVAL;
803 			goto out;
804 		}
805 		ret = sscanf(opt, "%d", &args.readahead);
806 		if (ret != 1 || args.readahead <= 0) {
807 			vfs_mount_error(mp, "illegal readahead: %s",
808 			    opt);
809 			error = EINVAL;
810 			goto out;
811 		}
812 		args.flags |= NFSMNT_READAHEAD;
813 	}
814 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
815 		if (opt == NULL) {
816 			vfs_mount_error(mp, "illegal wsize");
817 			error = EINVAL;
818 			goto out;
819 		}
820 		ret = sscanf(opt, "%d", &args.wsize);
821 		if (ret != 1 || args.wsize <= 0) {
822 			vfs_mount_error(mp, "illegal wsize: %s",
823 			    opt);
824 			error = EINVAL;
825 			goto out;
826 		}
827 		args.flags |= NFSMNT_WSIZE;
828 	}
829 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
830 		if (opt == NULL) {
831 			vfs_mount_error(mp, "illegal rsize");
832 			error = EINVAL;
833 			goto out;
834 		}
835 		ret = sscanf(opt, "%d", &args.rsize);
836 		if (ret != 1 || args.rsize <= 0) {
837 			vfs_mount_error(mp, "illegal wsize: %s",
838 			    opt);
839 			error = EINVAL;
840 			goto out;
841 		}
842 		args.flags |= NFSMNT_RSIZE;
843 	}
844 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
845 		if (opt == NULL) {
846 			vfs_mount_error(mp, "illegal retrans");
847 			error = EINVAL;
848 			goto out;
849 		}
850 		ret = sscanf(opt, "%d", &args.retrans);
851 		if (ret != 1 || args.retrans <= 0) {
852 			vfs_mount_error(mp, "illegal retrans: %s",
853 			    opt);
854 			error = EINVAL;
855 			goto out;
856 		}
857 		args.flags |= NFSMNT_RETRANS;
858 	}
859 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
860 		ret = sscanf(opt, "%d", &args.acregmin);
861 		if (ret != 1 || args.acregmin < 0) {
862 			vfs_mount_error(mp, "illegal acregmin: %s",
863 			    opt);
864 			error = EINVAL;
865 			goto out;
866 		}
867 		args.flags |= NFSMNT_ACREGMIN;
868 	}
869 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
870 		ret = sscanf(opt, "%d", &args.acregmax);
871 		if (ret != 1 || args.acregmax < 0) {
872 			vfs_mount_error(mp, "illegal acregmax: %s",
873 			    opt);
874 			error = EINVAL;
875 			goto out;
876 		}
877 		args.flags |= NFSMNT_ACREGMAX;
878 	}
879 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
880 		ret = sscanf(opt, "%d", &args.acdirmin);
881 		if (ret != 1 || args.acdirmin < 0) {
882 			vfs_mount_error(mp, "illegal acdirmin: %s",
883 			    opt);
884 			error = EINVAL;
885 			goto out;
886 		}
887 		args.flags |= NFSMNT_ACDIRMIN;
888 	}
889 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
890 		ret = sscanf(opt, "%d", &args.acdirmax);
891 		if (ret != 1 || args.acdirmax < 0) {
892 			vfs_mount_error(mp, "illegal acdirmax: %s",
893 			    opt);
894 			error = EINVAL;
895 			goto out;
896 		}
897 		args.flags |= NFSMNT_ACDIRMAX;
898 	}
899 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
900 		ret = sscanf(opt, "%d", &args.timeo);
901 		if (ret != 1 || args.timeo <= 0) {
902 			vfs_mount_error(mp, "illegal timeout: %s",
903 			    opt);
904 			error = EINVAL;
905 			goto out;
906 		}
907 		args.flags |= NFSMNT_TIMEO;
908 	}
909 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
910 	    == 0) {
911 		ret = sscanf(opt, "%d", &negnametimeo);
912 		if (ret != 1 || negnametimeo < 0) {
913 			vfs_mount_error(mp, "illegal negnametimeo: %s",
914 			    opt);
915 			error = EINVAL;
916 			goto out;
917 		}
918 	}
919 	if (vfs_getopt(mp->mnt_optnew, "sec",
920 		(void **) &secname, NULL) == 0)
921 		nfs_sec_name(secname, &args.flags);
922 
923 	if (mp->mnt_flag & MNT_UPDATE) {
924 		struct nfsmount *nmp = VFSTONFS(mp);
925 
926 		if (nmp == NULL) {
927 			error = EIO;
928 			goto out;
929 		}
930 		/*
931 		 * When doing an update, we can't change version,
932 		 * security, switch lockd strategies or change cookie
933 		 * translation
934 		 */
935 		args.flags = (args.flags &
936 		    ~(NFSMNT_NFSV3 |
937 		      NFSMNT_NFSV4 |
938 		      NFSMNT_KERB |
939 		      NFSMNT_INTEGRITY |
940 		      NFSMNT_PRIVACY |
941 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
942 		    (nmp->nm_flag &
943 			(NFSMNT_NFSV3 |
944 			 NFSMNT_NFSV4 |
945 			 NFSMNT_KERB |
946 			 NFSMNT_INTEGRITY |
947 			 NFSMNT_PRIVACY |
948 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
949 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
950 		goto out;
951 	}
952 
953 	/*
954 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
955 	 * or no-connection mode for those protocols that support
956 	 * no-connection mode (the flag will be cleared later for protocols
957 	 * that do not support no-connection mode).  This will allow a client
958 	 * to receive replies from a different IP then the request was
959 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
960 	 * not 0.
961 	 */
962 	if (nfs_ip_paranoia == 0)
963 		args.flags |= NFSMNT_NOCONN;
964 
965 	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
966 	    &args.fhsize) == 0) {
967 		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
968 			vfs_mount_error(mp, "Bad file handle");
969 			error = EINVAL;
970 			goto out;
971 		}
972 		bcopy(args.fh, nfh, args.fhsize);
973 	} else {
974 		args.fhsize = 0;
975 	}
976 
977 	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
978 	    &len);
979 	if (args.hostname == NULL) {
980 		vfs_mount_error(mp, "Invalid hostname");
981 		error = EINVAL;
982 		goto out;
983 	}
984 	bcopy(args.hostname, hst, MNAMELEN);
985 	hst[MNAMELEN - 1] = '\0';
986 
987 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
988 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
989 	else
990 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
991 	args.srvkrbnamelen = strlen(srvkrbname);
992 
993 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
994 		strlcpy(krbname, name, sizeof (krbname));
995 	else
996 		krbname[0] = '\0';
997 	args.krbnamelen = strlen(krbname);
998 
999 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1000 		strlcpy(dirpath, name, sizeof (dirpath));
1001 	else
1002 		dirpath[0] = '\0';
1003 	args.dirlen = strlen(dirpath);
1004 
1005 	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1006 	    &args.addrlen) == 0) {
1007 		if (args.addrlen > SOCK_MAXADDRLEN) {
1008 			error = ENAMETOOLONG;
1009 			goto out;
1010 		}
1011 		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1012 		bcopy(args.addr, nam, args.addrlen);
1013 		nam->sa_len = args.addrlen;
1014 	}
1015 
1016 	args.fh = nfh;
1017 	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
1018 	    &vp, td->td_ucred, td, negnametimeo);
1019 out:
1020 	if (!error) {
1021 		MNT_ILOCK(mp);
1022 		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1023 		MNT_IUNLOCK(mp);
1024 	}
1025 	return (error);
1026 }
1027 
1028 
1029 /*
1030  * VFS Operations.
1031  *
1032  * mount system call
1033  * It seems a bit dumb to copyinstr() the host and path here and then
1034  * bcopy() them in mountnfs(), but I wanted to detect errors before
1035  * doing the sockargs() call because sockargs() allocates an mbuf and
1036  * an error after that means that I have to release the mbuf.
1037  */
1038 /* ARGSUSED */
1039 static int
1040 nfs_cmount(struct mntarg *ma, void *data, int flags)
1041 {
1042 	int error;
1043 	struct nfs_args args;
1044 
1045 	error = copyin(data, &args, sizeof (struct nfs_args));
1046 	if (error)
1047 		return error;
1048 
1049 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1050 
1051 	error = kernel_mount(ma, flags);
1052 	return (error);
1053 }
1054 
1055 /*
1056  * Common code for mount and mountroot
1057  */
1058 static int
1059 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1060     char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1061     struct vnode **vpp, struct ucred *cred, struct thread *td,
1062     int negnametimeo)
1063 {
1064 	struct nfsmount *nmp;
1065 	struct nfsnode *np;
1066 	int error, trycnt, ret;
1067 	struct nfsvattr nfsva;
1068 	static u_int64_t clval = 0;
1069 
1070 	if (mp->mnt_flag & MNT_UPDATE) {
1071 		nmp = VFSTONFS(mp);
1072 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1073 		FREE(nam, M_SONAME);
1074 		return (0);
1075 	} else {
1076 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1077 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1078 		    M_NEWNFSMNT, M_WAITOK);
1079 		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1080 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1081 		TAILQ_INIT(&nmp->nm_bufq);
1082 		if (clval == 0)
1083 			clval = (u_int64_t)nfsboottime.tv_sec;
1084 		nmp->nm_clval = clval++;
1085 		nmp->nm_krbnamelen = argp->krbnamelen;
1086 		nmp->nm_dirpathlen = argp->dirlen;
1087 		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1088 		if (td->td_ucred->cr_uid != (uid_t)0) {
1089 			/*
1090 			 * nm_uid is used to get KerberosV credentials for
1091 			 * the nfsv4 state handling operations if there is
1092 			 * no host based principal set. Use the uid of
1093 			 * this user if not root, since they are doing the
1094 			 * mount. I don't think setting this for root will
1095 			 * work, since root normally does not have user
1096 			 * credentials in a credentials cache.
1097 			 */
1098 			nmp->nm_uid = td->td_ucred->cr_uid;
1099 		} else {
1100 			/*
1101 			 * Just set to -1, so it won't be used.
1102 			 */
1103 			nmp->nm_uid = (uid_t)-1;
1104 		}
1105 
1106 		/* Copy and null terminate all the names */
1107 		if (nmp->nm_krbnamelen > 0) {
1108 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1109 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1110 		}
1111 		if (nmp->nm_dirpathlen > 0) {
1112 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1113 			    nmp->nm_dirpathlen);
1114 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1115 			    + 1] = '\0';
1116 		}
1117 		if (nmp->nm_srvkrbnamelen > 0) {
1118 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1119 			    nmp->nm_srvkrbnamelen);
1120 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1121 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1122 		}
1123 		nmp->nm_sockreq.nr_cred = crhold(cred);
1124 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1125 		mp->mnt_data = nmp;
1126 		nmp->nm_getinfo = nfs_getnlminfo;
1127 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1128 	}
1129 	vfs_getnewfsid(mp);
1130 	nmp->nm_mountp = mp;
1131 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1132 	nmp->nm_negnametimeo = negnametimeo;
1133 
1134 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1135 
1136 	/*
1137 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1138 	 * high, depending on whether we end up with negative offsets in
1139 	 * the client or server somewhere.  2GB-1 may be safer.
1140 	 *
1141 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1142 	 * that we can handle until we find out otherwise.
1143 	 * XXX Our "safe" limit on the client is what we can store in our
1144 	 * buffer cache using signed(!) block numbers.
1145 	 */
1146 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1147 		nmp->nm_maxfilesize = 0xffffffffLL;
1148 	else
1149 		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1150 
1151 	nmp->nm_timeo = NFS_TIMEO;
1152 	nmp->nm_retry = NFS_RETRANS;
1153 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1154 		nmp->nm_wsize = NFS_WSIZE;
1155 		nmp->nm_rsize = NFS_RSIZE;
1156 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1157 	}
1158 	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1159 	nmp->nm_numgrps = NFS_MAXGRPS;
1160 	nmp->nm_readahead = NFS_DEFRAHEAD;
1161 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1162 	if (nmp->nm_tprintf_delay < 0)
1163 		nmp->nm_tprintf_delay = 0;
1164 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1165 	if (nmp->nm_tprintf_initial_delay < 0)
1166 		nmp->nm_tprintf_initial_delay = 0;
1167 	nmp->nm_fhsize = argp->fhsize;
1168 	if (nmp->nm_fhsize > 0)
1169 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1170 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1171 	nmp->nm_nam = nam;
1172 	/* Set up the sockets and per-host congestion */
1173 	nmp->nm_sotype = argp->sotype;
1174 	nmp->nm_soproto = argp->proto;
1175 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1176 	if ((argp->flags & NFSMNT_NFSV4))
1177 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1178 	else if ((argp->flags & NFSMNT_NFSV3))
1179 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1180 	else
1181 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1182 
1183 
1184 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1185 		goto bad;
1186 
1187 	/*
1188 	 * A reference count is needed on the nfsnode representing the
1189 	 * remote root.  If this object is not persistent, then backward
1190 	 * traversals of the mount point (i.e. "..") will not work if
1191 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1192 	 * this problem, because one can identify root inodes by their
1193 	 * number == ROOTINO (2).
1194 	 */
1195 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1196 	    nmp->nm_dirpathlen > 0) {
1197 		/*
1198 		 * If the fhsize on the mount point == 0 for V4, the mount
1199 		 * path needs to be looked up.
1200 		 */
1201 		trycnt = 3;
1202 		do {
1203 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1204 			    cred, td);
1205 			if (error)
1206 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1207 		} while (error && --trycnt > 0);
1208 		if (error) {
1209 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1210 			goto bad;
1211 		}
1212 	}
1213 	if (nmp->nm_fhsize > 0) {
1214 		/*
1215 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1216 		 * non-zero for the root vnode. f_iosize will be set correctly
1217 		 * by nfs_statfs() before any I/O occurs.
1218 		 */
1219 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1220 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1221 		if (error)
1222 			goto bad;
1223 		*vpp = NFSTOV(np);
1224 
1225 		/*
1226 		 * Get file attributes and transfer parameters for the
1227 		 * mountpoint.  This has the side effect of filling in
1228 		 * (*vpp)->v_type with the correct value.
1229 		 */
1230 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1231 		    cred, td, &nfsva, NULL);
1232 		if (ret) {
1233 			/*
1234 			 * Just set default values to get things going.
1235 			 */
1236 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1237 			nfsva.na_vattr.va_type = VDIR;
1238 			nfsva.na_vattr.va_mode = 0777;
1239 			nfsva.na_vattr.va_nlink = 100;
1240 			nfsva.na_vattr.va_uid = (uid_t)0;
1241 			nfsva.na_vattr.va_gid = (gid_t)0;
1242 			nfsva.na_vattr.va_fileid = 2;
1243 			nfsva.na_vattr.va_gen = 1;
1244 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1245 			nfsva.na_vattr.va_size = 512 * 1024;
1246 		}
1247 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1248 		if (argp->flags & NFSMNT_NFSV3)
1249 			ncl_fsinfo(nmp, *vpp, cred, td);
1250 
1251 		/*
1252 		 * Lose the lock but keep the ref.
1253 		 */
1254 		VOP_UNLOCK(*vpp, 0);
1255 		return (0);
1256 	}
1257 	error = EIO;
1258 
1259 bad:
1260 	newnfs_disconnect(&nmp->nm_sockreq);
1261 	crfree(nmp->nm_sockreq.nr_cred);
1262 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1263 	mtx_destroy(&nmp->nm_mtx);
1264 	FREE(nmp, M_NEWNFSMNT);
1265 	FREE(nam, M_SONAME);
1266 	return (error);
1267 }
1268 
1269 /*
1270  * unmount system call
1271  */
1272 static int
1273 nfs_unmount(struct mount *mp, int mntflags)
1274 {
1275 	struct thread *td;
1276 	struct nfsmount *nmp;
1277 	int error, flags = 0, trycnt = 0;
1278 
1279 	td = curthread;
1280 
1281 	if (mntflags & MNT_FORCE)
1282 		flags |= FORCECLOSE;
1283 	nmp = VFSTONFS(mp);
1284 	/*
1285 	 * Goes something like this..
1286 	 * - Call vflush() to clear out vnodes for this filesystem
1287 	 * - Close the socket
1288 	 * - Free up the data structures
1289 	 */
1290 	/* In the forced case, cancel any outstanding requests. */
1291 	if (mntflags & MNT_FORCE) {
1292 		error = newnfs_nmcancelreqs(nmp);
1293 		if (error)
1294 			goto out;
1295 		/* For a forced close, get rid of the renew thread now */
1296 		nfscl_umount(nmp, td);
1297 	}
1298 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1299 	do {
1300 		error = vflush(mp, 1, flags, td);
1301 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1302 			(void) nfs_catnap(PSOCK, error, "newndm");
1303 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1304 	if (error)
1305 		goto out;
1306 
1307 	/*
1308 	 * We are now committed to the unmount.
1309 	 */
1310 	if ((mntflags & MNT_FORCE) == 0)
1311 		nfscl_umount(nmp, td);
1312 	newnfs_disconnect(&nmp->nm_sockreq);
1313 	crfree(nmp->nm_sockreq.nr_cred);
1314 	FREE(nmp->nm_nam, M_SONAME);
1315 
1316 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1317 	mtx_destroy(&nmp->nm_mtx);
1318 	FREE(nmp, M_NEWNFSMNT);
1319 out:
1320 	return (error);
1321 }
1322 
1323 /*
1324  * Return root of a filesystem
1325  */
1326 static int
1327 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1328 {
1329 	struct vnode *vp;
1330 	struct nfsmount *nmp;
1331 	struct nfsnode *np;
1332 	int error;
1333 
1334 	nmp = VFSTONFS(mp);
1335 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1336 	if (error)
1337 		return error;
1338 	vp = NFSTOV(np);
1339 	/*
1340 	 * Get transfer parameters and attributes for root vnode once.
1341 	 */
1342 	mtx_lock(&nmp->nm_mtx);
1343 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1344 		mtx_unlock(&nmp->nm_mtx);
1345 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1346 	} else
1347 		mtx_unlock(&nmp->nm_mtx);
1348 	if (vp->v_type == VNON)
1349 	    vp->v_type = VDIR;
1350 	vp->v_vflag |= VV_ROOT;
1351 	*vpp = vp;
1352 	return (0);
1353 }
1354 
1355 /*
1356  * Flush out the buffer cache
1357  */
1358 /* ARGSUSED */
1359 static int
1360 nfs_sync(struct mount *mp, int waitfor)
1361 {
1362 	struct vnode *vp, *mvp;
1363 	struct thread *td;
1364 	int error, allerror = 0;
1365 
1366 	td = curthread;
1367 
1368 	/*
1369 	 * Force stale buffer cache information to be flushed.
1370 	 */
1371 	MNT_ILOCK(mp);
1372 loop:
1373 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1374 		VI_LOCK(vp);
1375 		MNT_IUNLOCK(mp);
1376 		/* XXX Racy bv_cnt check. */
1377 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1378 		    waitfor == MNT_LAZY) {
1379 			VI_UNLOCK(vp);
1380 			MNT_ILOCK(mp);
1381 			continue;
1382 		}
1383 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1384 			MNT_ILOCK(mp);
1385 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1386 			goto loop;
1387 		}
1388 		error = VOP_FSYNC(vp, waitfor, td);
1389 		if (error)
1390 			allerror = error;
1391 		VOP_UNLOCK(vp, 0);
1392 		vrele(vp);
1393 
1394 		MNT_ILOCK(mp);
1395 	}
1396 	MNT_IUNLOCK(mp);
1397 	return (allerror);
1398 }
1399 
1400 static int
1401 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1402 {
1403 	struct nfsmount *nmp = VFSTONFS(mp);
1404 	struct vfsquery vq;
1405 	int error;
1406 
1407 	bzero(&vq, sizeof(vq));
1408 	switch (op) {
1409 #if 0
1410 	case VFS_CTL_NOLOCKS:
1411 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1412  		if (req->oldptr != NULL) {
1413  			error = SYSCTL_OUT(req, &val, sizeof(val));
1414  			if (error)
1415  				return (error);
1416  		}
1417  		if (req->newptr != NULL) {
1418  			error = SYSCTL_IN(req, &val, sizeof(val));
1419  			if (error)
1420  				return (error);
1421 			if (val)
1422 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1423 			else
1424 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1425  		}
1426 		break;
1427 #endif
1428 	case VFS_CTL_QUERY:
1429 		mtx_lock(&nmp->nm_mtx);
1430 		if (nmp->nm_state & NFSSTA_TIMEO)
1431 			vq.vq_flags |= VQ_NOTRESP;
1432 		mtx_unlock(&nmp->nm_mtx);
1433 #if 0
1434 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1435 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1436 			vq.vq_flags |= VQ_NOTRESPLOCK;
1437 #endif
1438 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1439 		break;
1440  	case VFS_CTL_TIMEO:
1441  		if (req->oldptr != NULL) {
1442  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1443  			    sizeof(nmp->nm_tprintf_initial_delay));
1444  			if (error)
1445  				return (error);
1446  		}
1447  		if (req->newptr != NULL) {
1448 			error = vfs_suser(mp, req->td);
1449 			if (error)
1450 				return (error);
1451  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1452  			    sizeof(nmp->nm_tprintf_initial_delay));
1453  			if (error)
1454  				return (error);
1455  			if (nmp->nm_tprintf_initial_delay < 0)
1456  				nmp->nm_tprintf_initial_delay = 0;
1457  		}
1458 		break;
1459 	default:
1460 		return (ENOTSUP);
1461 	}
1462 	return (0);
1463 }
1464 
1465 /*
1466  * Extract the information needed by the nlm from the nfs vnode.
1467  */
1468 static void
1469 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1470     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1471     struct timeval *timeop)
1472 {
1473 	struct nfsmount *nmp;
1474 	struct nfsnode *np = VTONFS(vp);
1475 
1476 	nmp = VFSTONFS(vp->v_mount);
1477 	if (fhlenp != NULL)
1478 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1479 	if (fhp != NULL)
1480 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1481 	if (sp != NULL)
1482 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1483 	if (is_v3p != NULL)
1484 		*is_v3p = NFS_ISV3(vp);
1485 	if (sizep != NULL)
1486 		*sizep = np->n_size;
1487 	if (timeop != NULL) {
1488 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1489 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1490 	}
1491 }
1492 
1493