xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision a3cf0ef5a295c885c895fabfd56470c0d1db322d)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <netinet/in.h>
69 
70 #include <fs/nfs/nfsport.h>
71 #include <fs/nfsclient/nfsnode.h>
72 #include <fs/nfsclient/nfsmount.h>
73 #include <fs/nfsclient/nfs.h>
74 #include <fs/nfsclient/nfsdiskless.h>
75 
76 extern int nfscl_ticks;
77 extern struct timeval nfsboottime;
78 extern struct nfsstats	newnfsstats;
79 
80 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
81 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
82 
83 SYSCTL_DECL(_vfs_newnfs);
84 SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
85 	&newnfsstats, nfsstats, "S,nfsstats");
86 static int nfs_ip_paranoia = 1;
87 SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
88     &nfs_ip_paranoia, 0, "");
89 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
90 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
91         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
92 /* how long between console messages "nfs server foo not responding" */
93 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
94 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
95         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
96 
97 static void	nfs_sec_name(char *, int *);
98 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
99 		    struct nfs_args *argp, const char *, struct ucred *,
100 		    struct thread *);
101 static int	mountnfs(struct nfs_args *, struct mount *,
102 		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
103 		    struct vnode **, struct ucred *, struct thread *, int);
104 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
105 		    struct sockaddr_storage *, int *, off_t *);
106 static vfs_mount_t nfs_mount;
107 static vfs_cmount_t nfs_cmount;
108 static vfs_unmount_t nfs_unmount;
109 static vfs_root_t nfs_root;
110 static vfs_statfs_t nfs_statfs;
111 static vfs_sync_t nfs_sync;
112 static vfs_sysctl_t nfs_sysctl;
113 
114 /*
115  * nfs vfs operations.
116  */
117 static struct vfsops nfs_vfsops = {
118 	.vfs_init =		ncl_init,
119 	.vfs_mount =		nfs_mount,
120 	.vfs_cmount =		nfs_cmount,
121 	.vfs_root =		nfs_root,
122 	.vfs_statfs =		nfs_statfs,
123 	.vfs_sync =		nfs_sync,
124 	.vfs_uninit =		ncl_uninit,
125 	.vfs_unmount =		nfs_unmount,
126 	.vfs_sysctl =		nfs_sysctl,
127 };
128 VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
129 
130 /* So that loader and kldload(2) can find us, wherever we are.. */
131 MODULE_VERSION(newnfs, 1);
132 
133 /*
134  * This structure must be filled in by a primary bootstrap or bootstrap
135  * server for a diskless/dataless machine. It is initialized below just
136  * to ensure that it is allocated to initialized data (.data not .bss).
137  */
138 struct nfs_diskless newnfs_diskless = { { { 0 } } };
139 struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
140 int newnfs_diskless_valid = 0;
141 
142 SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
143     &newnfs_diskless_valid, 0,
144     "Has the diskless struct been filled correctly");
145 
146 SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
147     newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
148 
149 SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
150     &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
151     "%Ssockaddr_in", "Diskless root nfs address");
152 
153 
154 void		newnfsargs_ntoh(struct nfs_args *);
155 static int	nfs_mountdiskless(char *,
156 		    struct sockaddr_in *, struct nfs_args *,
157 		    struct thread *, struct vnode **, struct mount *);
158 static void	nfs_convert_diskless(void);
159 static void	nfs_convert_oargs(struct nfs_args *args,
160 		    struct onfs_args *oargs);
161 
162 int
163 newnfs_iosize(struct nfsmount *nmp)
164 {
165 	int iosize, maxio;
166 
167 	/* First, set the upper limit for iosize */
168 	if (nmp->nm_flag & NFSMNT_NFSV4) {
169 		maxio = NFS_MAXBSIZE;
170 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
171 		if (nmp->nm_sotype == SOCK_DGRAM)
172 			maxio = NFS_MAXDGRAMDATA;
173 		else
174 			maxio = NFS_MAXBSIZE;
175 	} else {
176 		maxio = NFS_V2MAXDATA;
177 	}
178 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
179 		nmp->nm_rsize = maxio;
180 	if (nmp->nm_rsize > MAXBSIZE)
181 		nmp->nm_rsize = MAXBSIZE;
182 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
183 		nmp->nm_readdirsize = maxio;
184 	if (nmp->nm_readdirsize > nmp->nm_rsize)
185 		nmp->nm_readdirsize = nmp->nm_rsize;
186 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
187 		nmp->nm_wsize = maxio;
188 	if (nmp->nm_wsize > MAXBSIZE)
189 		nmp->nm_wsize = MAXBSIZE;
190 
191 	/*
192 	 * Calculate the size used for io buffers.  Use the larger
193 	 * of the two sizes to minimise nfs requests but make sure
194 	 * that it is at least one VM page to avoid wasting buffer
195 	 * space.
196 	 */
197 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
198 	iosize = imax(iosize, PAGE_SIZE);
199 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
200 	return (iosize);
201 }
202 
203 static void
204 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
205 {
206 
207 	args->version = NFS_ARGSVERSION;
208 	args->addr = oargs->addr;
209 	args->addrlen = oargs->addrlen;
210 	args->sotype = oargs->sotype;
211 	args->proto = oargs->proto;
212 	args->fh = oargs->fh;
213 	args->fhsize = oargs->fhsize;
214 	args->flags = oargs->flags;
215 	args->wsize = oargs->wsize;
216 	args->rsize = oargs->rsize;
217 	args->readdirsize = oargs->readdirsize;
218 	args->timeo = oargs->timeo;
219 	args->retrans = oargs->retrans;
220 	args->readahead = oargs->readahead;
221 	args->hostname = oargs->hostname;
222 }
223 
224 static void
225 nfs_convert_diskless(void)
226 {
227 
228 	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
229 	    sizeof (struct ifaliasreq));
230 	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
231 	    sizeof (struct sockaddr_in));
232 	nfs_convert_oargs(&newnfsv3_diskless.root_args,
233 	    &newnfs_diskless.root_args);
234 	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
235 		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
236 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
237 		    NFSX_MYFH);
238 	} else {
239 		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
240 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
241 		    NFSX_V2FH);
242 	}
243 	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
244 	    sizeof(struct sockaddr_in));
245 	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
246 	    MNAMELEN);
247 	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
248 	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
249 	    MAXHOSTNAMELEN);
250 	newnfs_diskless_valid = 3;
251 }
252 
253 /*
254  * nfs statfs call
255  */
256 static int
257 nfs_statfs(struct mount *mp, struct statfs *sbp)
258 {
259 	struct vnode *vp;
260 	struct thread *td;
261 	struct nfsmount *nmp = VFSTONFS(mp);
262 	struct nfsvattr nfsva;
263 	struct nfsfsinfo fs;
264 	struct nfsstatfs sb;
265 	int error = 0, attrflag, gotfsinfo = 0, ret;
266 	struct nfsnode *np;
267 
268 	td = curthread;
269 
270 	error = vfs_busy(mp, MBF_NOWAIT);
271 	if (error)
272 		return (error);
273 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
274 	if (error) {
275 		vfs_unbusy(mp);
276 		return (error);
277 	}
278 	vp = NFSTOV(np);
279 	mtx_lock(&nmp->nm_mtx);
280 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
281 		mtx_unlock(&nmp->nm_mtx);
282 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
283 		    &attrflag, NULL);
284 		if (!error)
285 			gotfsinfo = 1;
286 	} else
287 		mtx_unlock(&nmp->nm_mtx);
288 	if (!error)
289 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
290 		    &attrflag, NULL);
291 	if (attrflag == 0) {
292 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
293 		    td->td_ucred, td, &nfsva, NULL);
294 		if (ret) {
295 			/*
296 			 * Just set default values to get things going.
297 			 */
298 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
299 			nfsva.na_vattr.va_type = VDIR;
300 			nfsva.na_vattr.va_mode = 0777;
301 			nfsva.na_vattr.va_nlink = 100;
302 			nfsva.na_vattr.va_uid = (uid_t)0;
303 			nfsva.na_vattr.va_gid = (gid_t)0;
304 			nfsva.na_vattr.va_fileid = 2;
305 			nfsva.na_vattr.va_gen = 1;
306 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
307 			nfsva.na_vattr.va_size = 512 * 1024;
308 		}
309 	}
310 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
311 	if (!error) {
312 	    mtx_lock(&nmp->nm_mtx);
313 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
314 		nfscl_loadfsinfo(nmp, &fs);
315 	    nfscl_loadsbinfo(nmp, &sb, sbp);
316 	    sbp->f_flags = nmp->nm_flag;
317 	    sbp->f_iosize = newnfs_iosize(nmp);
318 	    mtx_unlock(&nmp->nm_mtx);
319 	    if (sbp != &mp->mnt_stat) {
320 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
321 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
322 	    }
323 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
324 	} else if (NFS_ISV4(vp)) {
325 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
326 	}
327 	vput(vp);
328 	vfs_unbusy(mp);
329 	return (error);
330 }
331 
332 /*
333  * nfs version 3 fsinfo rpc call
334  */
335 int
336 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
337     struct thread *td)
338 {
339 	struct nfsfsinfo fs;
340 	struct nfsvattr nfsva;
341 	int error, attrflag;
342 
343 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
344 	if (!error) {
345 		if (attrflag)
346 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
347 			    1);
348 		mtx_lock(&nmp->nm_mtx);
349 		nfscl_loadfsinfo(nmp, &fs);
350 		mtx_unlock(&nmp->nm_mtx);
351 	}
352 	return (error);
353 }
354 
355 /*
356  * Mount a remote root fs via. nfs. This depends on the info in the
357  * newnfs_diskless structure that has been filled in properly by some primary
358  * bootstrap.
359  * It goes something like this:
360  * - do enough of "ifconfig" by calling ifioctl() so that the system
361  *   can talk to the server
362  * - If newnfs_diskless.mygateway is filled in, use that address as
363  *   a default gateway.
364  * - build the rootfs mount point and call mountnfs() to do the rest.
365  *
366  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
367  * structure, as well as other global NFS client variables here, as
368  * nfs_mountroot() will be called once in the boot before any other NFS
369  * client activity occurs.
370  */
371 int
372 ncl_mountroot(struct mount *mp)
373 {
374 	struct thread *td = curthread;
375 	struct nfsv3_diskless *nd = &newnfsv3_diskless;
376 	struct socket *so;
377 	struct vnode *vp;
378 	struct ifreq ir;
379 	int error;
380 	u_long l;
381 	char buf[128];
382 	char *cp;
383 
384 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
385 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
386 #elif defined(NFS_ROOT)
387 	nfs_setup_diskless();
388 #endif
389 
390 	if (newnfs_diskless_valid == 0)
391 		return (-1);
392 	if (newnfs_diskless_valid == 1)
393 		nfs_convert_diskless();
394 
395 	/*
396 	 * XXX splnet, so networks will receive...
397 	 */
398 	splnet();
399 
400 	/*
401 	 * Do enough of ifconfig(8) so that the critical net interface can
402 	 * talk to the server.
403 	 */
404 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
405 	    td->td_ucred, td);
406 	if (error)
407 		panic("nfs_mountroot: socreate(%04x): %d",
408 			nd->myif.ifra_addr.sa_family, error);
409 
410 #if 0 /* XXX Bad idea */
411 	/*
412 	 * We might not have been told the right interface, so we pass
413 	 * over the first ten interfaces of the same kind, until we get
414 	 * one of them configured.
415 	 */
416 
417 	for (i = strlen(nd->myif.ifra_name) - 1;
418 		nd->myif.ifra_name[i] >= '0' &&
419 		nd->myif.ifra_name[i] <= '9';
420 		nd->myif.ifra_name[i] ++) {
421 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
422 		if(!error)
423 			break;
424 	}
425 #endif
426 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
427 	if (error)
428 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
429 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
430 		ir.ifr_mtu = strtol(cp, NULL, 10);
431 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
432 		freeenv(cp);
433 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
434 		if (error)
435 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
436 	}
437 	soclose(so);
438 
439 	/*
440 	 * If the gateway field is filled in, set it as the default route.
441 	 * Note that pxeboot will set a default route of 0 if the route
442 	 * is not set by the DHCP server.  Check also for a value of 0
443 	 * to avoid panicking inappropriately in that situation.
444 	 */
445 	if (nd->mygateway.sin_len != 0 &&
446 	    nd->mygateway.sin_addr.s_addr != 0) {
447 		struct sockaddr_in mask, sin;
448 
449 		bzero((caddr_t)&mask, sizeof(mask));
450 		sin = mask;
451 		sin.sin_family = AF_INET;
452 		sin.sin_len = sizeof(sin);
453                 /* XXX MRT use table 0 for this sort of thing */
454 		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
455 		    (struct sockaddr *)&nd->mygateway,
456 		    (struct sockaddr *)&mask,
457 		    RTF_UP | RTF_GATEWAY, NULL);
458 		if (error)
459 			panic("nfs_mountroot: RTM_ADD: %d", error);
460 	}
461 
462 	/*
463 	 * Create the rootfs mount point.
464 	 */
465 	nd->root_args.fh = nd->root_fh;
466 	nd->root_args.fhsize = nd->root_fhsize;
467 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
468 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
469 		(l >> 24) & 0xff, (l >> 16) & 0xff,
470 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
471 	printf("NFS ROOT: %s\n", buf);
472 	nd->root_args.hostname = buf;
473 	if ((error = nfs_mountdiskless(buf,
474 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
475 		return (error);
476 	}
477 
478 	/*
479 	 * This is not really an nfs issue, but it is much easier to
480 	 * set hostname here and then let the "/etc/rc.xxx" files
481 	 * mount the right /var based upon its preset value.
482 	 */
483 	mtx_lock(&prison0.pr_mtx);
484 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
485 	    sizeof(prison0.pr_hostname));
486 	mtx_unlock(&prison0.pr_mtx);
487 	inittodr(ntohl(nd->root_time));
488 	return (0);
489 }
490 
491 /*
492  * Internal version of mount system call for diskless setup.
493  */
494 static int
495 nfs_mountdiskless(char *path,
496     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
497     struct vnode **vpp, struct mount *mp)
498 {
499 	struct sockaddr *nam;
500 	int error;
501 
502 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
503 	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
504 	    td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
505 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
506 		return (error);
507 	}
508 	return (0);
509 }
510 
511 static void
512 nfs_sec_name(char *sec, int *flagsp)
513 {
514 	if (!strcmp(sec, "krb5"))
515 		*flagsp |= NFSMNT_KERB;
516 	else if (!strcmp(sec, "krb5i"))
517 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
518 	else if (!strcmp(sec, "krb5p"))
519 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
520 }
521 
522 static void
523 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
524     const char *hostname, struct ucred *cred, struct thread *td)
525 {
526 	int s;
527 	int adjsock;
528 	char *p;
529 
530 	s = splnet();
531 
532 	/*
533 	 * Set read-only flag if requested; otherwise, clear it if this is
534 	 * an update.  If this is not an update, then either the read-only
535 	 * flag is already clear, or this is a root mount and it was set
536 	 * intentionally at some previous point.
537 	 */
538 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
539 		MNT_ILOCK(mp);
540 		mp->mnt_flag |= MNT_RDONLY;
541 		MNT_IUNLOCK(mp);
542 	} else if (mp->mnt_flag & MNT_UPDATE) {
543 		MNT_ILOCK(mp);
544 		mp->mnt_flag &= ~MNT_RDONLY;
545 		MNT_IUNLOCK(mp);
546 	}
547 
548 	/*
549 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
550 	 * no sense in that context.  Also, set up appropriate retransmit
551 	 * and soft timeout behavior.
552 	 */
553 	if (argp->sotype == SOCK_STREAM) {
554 		nmp->nm_flag &= ~NFSMNT_NOCONN;
555 		nmp->nm_timeo = NFS_MAXTIMEO;
556 	}
557 
558 	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
559 	if ((argp->flags & NFSMNT_NFSV3) == 0)
560 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
561 
562 	/* Also re-bind if we're switching to/from a connected UDP socket */
563 	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
564 		    (argp->flags & NFSMNT_NOCONN));
565 
566 	/* Update flags atomically.  Don't change the lock bits. */
567 	nmp->nm_flag = argp->flags | nmp->nm_flag;
568 	splx(s);
569 
570 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
571 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
572 		if (nmp->nm_timeo < NFS_MINTIMEO)
573 			nmp->nm_timeo = NFS_MINTIMEO;
574 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
575 			nmp->nm_timeo = NFS_MAXTIMEO;
576 	}
577 
578 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
579 		nmp->nm_retry = argp->retrans;
580 		if (nmp->nm_retry > NFS_MAXREXMIT)
581 			nmp->nm_retry = NFS_MAXREXMIT;
582 	}
583 
584 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
585 		nmp->nm_wsize = argp->wsize;
586 		/* Round down to multiple of blocksize */
587 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
588 		if (nmp->nm_wsize <= 0)
589 			nmp->nm_wsize = NFS_FABLKSIZE;
590 	}
591 
592 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
593 		nmp->nm_rsize = argp->rsize;
594 		/* Round down to multiple of blocksize */
595 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
596 		if (nmp->nm_rsize <= 0)
597 			nmp->nm_rsize = NFS_FABLKSIZE;
598 	}
599 
600 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
601 		nmp->nm_readdirsize = argp->readdirsize;
602 	}
603 
604 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
605 		nmp->nm_acregmin = argp->acregmin;
606 	else
607 		nmp->nm_acregmin = NFS_MINATTRTIMO;
608 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
609 		nmp->nm_acregmax = argp->acregmax;
610 	else
611 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
612 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
613 		nmp->nm_acdirmin = argp->acdirmin;
614 	else
615 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
616 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
617 		nmp->nm_acdirmax = argp->acdirmax;
618 	else
619 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
620 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
621 		nmp->nm_acdirmin = nmp->nm_acdirmax;
622 	if (nmp->nm_acregmin > nmp->nm_acregmax)
623 		nmp->nm_acregmin = nmp->nm_acregmax;
624 
625 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
626 		if (argp->readahead <= NFS_MAXRAHEAD)
627 			nmp->nm_readahead = argp->readahead;
628 		else
629 			nmp->nm_readahead = NFS_MAXRAHEAD;
630 	}
631 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
632 		if (argp->wcommitsize < nmp->nm_wsize)
633 			nmp->nm_wcommitsize = nmp->nm_wsize;
634 		else
635 			nmp->nm_wcommitsize = argp->wcommitsize;
636 	}
637 
638 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
639 		    (nmp->nm_soproto != argp->proto));
640 
641 	if (nmp->nm_client != NULL && adjsock) {
642 		int haslock = 0, error = 0;
643 
644 		if (nmp->nm_sotype == SOCK_STREAM) {
645 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
646 			if (!error)
647 				haslock = 1;
648 		}
649 		if (!error) {
650 		    newnfs_disconnect(&nmp->nm_sockreq);
651 		    if (haslock)
652 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
653 		    nmp->nm_sotype = argp->sotype;
654 		    nmp->nm_soproto = argp->proto;
655 		    if (nmp->nm_sotype == SOCK_DGRAM)
656 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
657 			    cred, td, 0)) {
658 				printf("newnfs_args: retrying connect\n");
659 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
660 			}
661 		}
662 	} else {
663 		nmp->nm_sotype = argp->sotype;
664 		nmp->nm_soproto = argp->proto;
665 	}
666 
667 	if (hostname != NULL) {
668 		strlcpy(nmp->nm_hostname, hostname,
669 		    sizeof(nmp->nm_hostname));
670 		p = strchr(nmp->nm_hostname, ':');
671 		if (p != NULL)
672 			*p = '\0';
673 	}
674 }
675 
676 static const char *nfs_opts[] = { "from",
677     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
678     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
679     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
680     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
681     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
682     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
683     "principal", "nfsv4", "gssname", "allgssname", "dirpath",
684     "negnametimeo",
685     NULL };
686 
687 /*
688  * VFS Operations.
689  *
690  * mount system call
691  * It seems a bit dumb to copyinstr() the host and path here and then
692  * bcopy() them in mountnfs(), but I wanted to detect errors before
693  * doing the sockargs() call because sockargs() allocates an mbuf and
694  * an error after that means that I have to release the mbuf.
695  */
696 /* ARGSUSED */
697 static int
698 nfs_mount(struct mount *mp)
699 {
700 	struct nfs_args args = {
701 	    .version = NFS_ARGSVERSION,
702 	    .addr = NULL,
703 	    .addrlen = sizeof (struct sockaddr_in),
704 	    .sotype = SOCK_STREAM,
705 	    .proto = 0,
706 	    .fh = NULL,
707 	    .fhsize = 0,
708 	    .flags = 0,
709 	    .wsize = NFS_WSIZE,
710 	    .rsize = NFS_RSIZE,
711 	    .readdirsize = NFS_READDIRSIZE,
712 	    .timeo = 10,
713 	    .retrans = NFS_RETRANS,
714 	    .readahead = NFS_DEFRAHEAD,
715 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
716 	    .hostname = NULL,
717 	    /* args version 4 */
718 	    .acregmin = NFS_MINATTRTIMO,
719 	    .acregmax = NFS_MAXATTRTIMO,
720 	    .acdirmin = NFS_MINDIRATTRTIMO,
721 	    .acdirmax = NFS_MAXDIRATTRTIMO,
722 	    .dirlen = 0,
723 	    .krbnamelen = 0,
724 	    .srvkrbnamelen = 0,
725 	};
726 	int error = 0, ret, len;
727 	struct sockaddr *nam = NULL;
728 	struct vnode *vp;
729 	struct thread *td;
730 	char hst[MNAMELEN];
731 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
732 	char *opt, *name, *secname;
733 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
734 
735 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
736 		error = EINVAL;
737 		goto out;
738 	}
739 
740 	td = curthread;
741 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
742 		error = ncl_mountroot(mp);
743 		goto out;
744 	}
745 
746 	nfscl_init();
747 
748 	/* Handle the new style options. */
749 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
750 		args.flags |= NFSMNT_NOCONN;
751 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
752 		args.flags |= NFSMNT_NOCONN;
753 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
754 		args.flags |= NFSMNT_NOLOCKD;
755 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
756 		args.flags &= ~NFSMNT_NOLOCKD;
757 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
758 		args.flags |= NFSMNT_INT;
759 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
760 		args.flags |= NFSMNT_RDIRPLUS;
761 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
762 		args.flags |= NFSMNT_RESVPORT;
763 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
764 		args.flags &= ~NFSMNT_RESVPORT;
765 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
766 		args.flags |= NFSMNT_SOFT;
767 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
768 		args.flags &= ~NFSMNT_SOFT;
769 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
770 		args.sotype = SOCK_DGRAM;
771 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
772 		args.sotype = SOCK_DGRAM;
773 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
774 		args.sotype = SOCK_STREAM;
775 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
776 		args.flags |= NFSMNT_NFSV3;
777 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
778 		args.flags |= NFSMNT_NFSV4;
779 		args.sotype = SOCK_STREAM;
780 	}
781 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
782 		args.flags |= NFSMNT_ALLGSSNAME;
783 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
784 		if (opt == NULL) {
785 			vfs_mount_error(mp, "illegal readdirsize");
786 			error = EINVAL;
787 			goto out;
788 		}
789 		ret = sscanf(opt, "%d", &args.readdirsize);
790 		if (ret != 1 || args.readdirsize <= 0) {
791 			vfs_mount_error(mp, "illegal readdirsize: %s",
792 			    opt);
793 			error = EINVAL;
794 			goto out;
795 		}
796 		args.flags |= NFSMNT_READDIRSIZE;
797 	}
798 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
799 		if (opt == NULL) {
800 			vfs_mount_error(mp, "illegal readahead");
801 			error = EINVAL;
802 			goto out;
803 		}
804 		ret = sscanf(opt, "%d", &args.readahead);
805 		if (ret != 1 || args.readahead <= 0) {
806 			vfs_mount_error(mp, "illegal readahead: %s",
807 			    opt);
808 			error = EINVAL;
809 			goto out;
810 		}
811 		args.flags |= NFSMNT_READAHEAD;
812 	}
813 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
814 		if (opt == NULL) {
815 			vfs_mount_error(mp, "illegal wsize");
816 			error = EINVAL;
817 			goto out;
818 		}
819 		ret = sscanf(opt, "%d", &args.wsize);
820 		if (ret != 1 || args.wsize <= 0) {
821 			vfs_mount_error(mp, "illegal wsize: %s",
822 			    opt);
823 			error = EINVAL;
824 			goto out;
825 		}
826 		args.flags |= NFSMNT_WSIZE;
827 	}
828 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
829 		if (opt == NULL) {
830 			vfs_mount_error(mp, "illegal rsize");
831 			error = EINVAL;
832 			goto out;
833 		}
834 		ret = sscanf(opt, "%d", &args.rsize);
835 		if (ret != 1 || args.rsize <= 0) {
836 			vfs_mount_error(mp, "illegal wsize: %s",
837 			    opt);
838 			error = EINVAL;
839 			goto out;
840 		}
841 		args.flags |= NFSMNT_RSIZE;
842 	}
843 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
844 		if (opt == NULL) {
845 			vfs_mount_error(mp, "illegal retrans");
846 			error = EINVAL;
847 			goto out;
848 		}
849 		ret = sscanf(opt, "%d", &args.retrans);
850 		if (ret != 1 || args.retrans <= 0) {
851 			vfs_mount_error(mp, "illegal retrans: %s",
852 			    opt);
853 			error = EINVAL;
854 			goto out;
855 		}
856 		args.flags |= NFSMNT_RETRANS;
857 	}
858 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
859 		ret = sscanf(opt, "%d", &args.acregmin);
860 		if (ret != 1 || args.acregmin < 0) {
861 			vfs_mount_error(mp, "illegal acregmin: %s",
862 			    opt);
863 			error = EINVAL;
864 			goto out;
865 		}
866 		args.flags |= NFSMNT_ACREGMIN;
867 	}
868 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
869 		ret = sscanf(opt, "%d", &args.acregmax);
870 		if (ret != 1 || args.acregmax < 0) {
871 			vfs_mount_error(mp, "illegal acregmax: %s",
872 			    opt);
873 			error = EINVAL;
874 			goto out;
875 		}
876 		args.flags |= NFSMNT_ACREGMAX;
877 	}
878 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
879 		ret = sscanf(opt, "%d", &args.acdirmin);
880 		if (ret != 1 || args.acdirmin < 0) {
881 			vfs_mount_error(mp, "illegal acdirmin: %s",
882 			    opt);
883 			error = EINVAL;
884 			goto out;
885 		}
886 		args.flags |= NFSMNT_ACDIRMIN;
887 	}
888 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
889 		ret = sscanf(opt, "%d", &args.acdirmax);
890 		if (ret != 1 || args.acdirmax < 0) {
891 			vfs_mount_error(mp, "illegal acdirmax: %s",
892 			    opt);
893 			error = EINVAL;
894 			goto out;
895 		}
896 		args.flags |= NFSMNT_ACDIRMAX;
897 	}
898 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
899 		ret = sscanf(opt, "%d", &args.timeo);
900 		if (ret != 1 || args.timeo <= 0) {
901 			vfs_mount_error(mp, "illegal timeout: %s",
902 			    opt);
903 			error = EINVAL;
904 			goto out;
905 		}
906 		args.flags |= NFSMNT_TIMEO;
907 	}
908 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
909 	    == 0) {
910 		ret = sscanf(opt, "%d", &negnametimeo);
911 		if (ret != 1 || negnametimeo < 0) {
912 			vfs_mount_error(mp, "illegal negnametimeo: %s",
913 			    opt);
914 			error = EINVAL;
915 			goto out;
916 		}
917 	}
918 	if (vfs_getopt(mp->mnt_optnew, "sec",
919 		(void **) &secname, NULL) == 0)
920 		nfs_sec_name(secname, &args.flags);
921 
922 	if (mp->mnt_flag & MNT_UPDATE) {
923 		struct nfsmount *nmp = VFSTONFS(mp);
924 
925 		if (nmp == NULL) {
926 			error = EIO;
927 			goto out;
928 		}
929 		/*
930 		 * When doing an update, we can't change version,
931 		 * security, switch lockd strategies or change cookie
932 		 * translation
933 		 */
934 		args.flags = (args.flags &
935 		    ~(NFSMNT_NFSV3 |
936 		      NFSMNT_NFSV4 |
937 		      NFSMNT_KERB |
938 		      NFSMNT_INTEGRITY |
939 		      NFSMNT_PRIVACY |
940 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
941 		    (nmp->nm_flag &
942 			(NFSMNT_NFSV3 |
943 			 NFSMNT_NFSV4 |
944 			 NFSMNT_KERB |
945 			 NFSMNT_INTEGRITY |
946 			 NFSMNT_PRIVACY |
947 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
948 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
949 		goto out;
950 	}
951 
952 	/*
953 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
954 	 * or no-connection mode for those protocols that support
955 	 * no-connection mode (the flag will be cleared later for protocols
956 	 * that do not support no-connection mode).  This will allow a client
957 	 * to receive replies from a different IP then the request was
958 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
959 	 * not 0.
960 	 */
961 	if (nfs_ip_paranoia == 0)
962 		args.flags |= NFSMNT_NOCONN;
963 
964 	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
965 	    &args.fhsize) == 0) {
966 		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
967 			vfs_mount_error(mp, "Bad file handle");
968 			error = EINVAL;
969 			goto out;
970 		}
971 		bcopy(args.fh, nfh, args.fhsize);
972 	} else {
973 		args.fhsize = 0;
974 	}
975 
976 	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
977 	    &len);
978 	if (args.hostname == NULL) {
979 		vfs_mount_error(mp, "Invalid hostname");
980 		error = EINVAL;
981 		goto out;
982 	}
983 	bcopy(args.hostname, hst, MNAMELEN);
984 	hst[MNAMELEN - 1] = '\0';
985 
986 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
987 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
988 	else
989 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
990 	args.srvkrbnamelen = strlen(srvkrbname);
991 
992 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
993 		strlcpy(krbname, name, sizeof (krbname));
994 	else
995 		krbname[0] = '\0';
996 	args.krbnamelen = strlen(krbname);
997 
998 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
999 		strlcpy(dirpath, name, sizeof (dirpath));
1000 	else
1001 		dirpath[0] = '\0';
1002 	args.dirlen = strlen(dirpath);
1003 
1004 	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1005 	    &args.addrlen) == 0) {
1006 		if (args.addrlen > SOCK_MAXADDRLEN) {
1007 			error = ENAMETOOLONG;
1008 			goto out;
1009 		}
1010 		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1011 		bcopy(args.addr, nam, args.addrlen);
1012 		nam->sa_len = args.addrlen;
1013 	}
1014 
1015 	args.fh = nfh;
1016 	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
1017 	    &vp, td->td_ucred, td, negnametimeo);
1018 out:
1019 	if (!error) {
1020 		MNT_ILOCK(mp);
1021 		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1022 		MNT_IUNLOCK(mp);
1023 	}
1024 	return (error);
1025 }
1026 
1027 
1028 /*
1029  * VFS Operations.
1030  *
1031  * mount system call
1032  * It seems a bit dumb to copyinstr() the host and path here and then
1033  * bcopy() them in mountnfs(), but I wanted to detect errors before
1034  * doing the sockargs() call because sockargs() allocates an mbuf and
1035  * an error after that means that I have to release the mbuf.
1036  */
1037 /* ARGSUSED */
1038 static int
1039 nfs_cmount(struct mntarg *ma, void *data, int flags)
1040 {
1041 	int error;
1042 	struct nfs_args args;
1043 
1044 	error = copyin(data, &args, sizeof (struct nfs_args));
1045 	if (error)
1046 		return error;
1047 
1048 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1049 
1050 	error = kernel_mount(ma, flags);
1051 	return (error);
1052 }
1053 
1054 /*
1055  * Common code for mount and mountroot
1056  */
1057 static int
1058 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1059     char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1060     struct vnode **vpp, struct ucred *cred, struct thread *td,
1061     int negnametimeo)
1062 {
1063 	struct nfsmount *nmp;
1064 	struct nfsnode *np;
1065 	int error, trycnt, ret;
1066 	struct nfsvattr nfsva;
1067 	static u_int64_t clval = 0;
1068 
1069 	if (mp->mnt_flag & MNT_UPDATE) {
1070 		nmp = VFSTONFS(mp);
1071 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1072 		FREE(nam, M_SONAME);
1073 		return (0);
1074 	} else {
1075 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1076 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1077 		    M_NEWNFSMNT, M_WAITOK);
1078 		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1079 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1080 		TAILQ_INIT(&nmp->nm_bufq);
1081 		if (clval == 0)
1082 			clval = (u_int64_t)nfsboottime.tv_sec;
1083 		nmp->nm_clval = clval++;
1084 		nmp->nm_krbnamelen = argp->krbnamelen;
1085 		nmp->nm_dirpathlen = argp->dirlen;
1086 		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1087 		if (td->td_ucred->cr_uid != (uid_t)0) {
1088 			/*
1089 			 * nm_uid is used to get KerberosV credentials for
1090 			 * the nfsv4 state handling operations if there is
1091 			 * no host based principal set. Use the uid of
1092 			 * this user if not root, since they are doing the
1093 			 * mount. I don't think setting this for root will
1094 			 * work, since root normally does not have user
1095 			 * credentials in a credentials cache.
1096 			 */
1097 			nmp->nm_uid = td->td_ucred->cr_uid;
1098 		} else {
1099 			/*
1100 			 * Just set to -1, so it won't be used.
1101 			 */
1102 			nmp->nm_uid = (uid_t)-1;
1103 		}
1104 
1105 		/* Copy and null terminate all the names */
1106 		if (nmp->nm_krbnamelen > 0) {
1107 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1108 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1109 		}
1110 		if (nmp->nm_dirpathlen > 0) {
1111 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1112 			    nmp->nm_dirpathlen);
1113 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1114 			    + 1] = '\0';
1115 		}
1116 		if (nmp->nm_srvkrbnamelen > 0) {
1117 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1118 			    nmp->nm_srvkrbnamelen);
1119 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1120 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1121 		}
1122 		nmp->nm_sockreq.nr_cred = crhold(cred);
1123 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1124 		mp->mnt_data = nmp;
1125 		nmp->nm_getinfo = nfs_getnlminfo;
1126 	}
1127 	vfs_getnewfsid(mp);
1128 	nmp->nm_mountp = mp;
1129 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1130 	nmp->nm_negnametimeo = negnametimeo;
1131 
1132 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1133 
1134 	/*
1135 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1136 	 * high, depending on whether we end up with negative offsets in
1137 	 * the client or server somewhere.  2GB-1 may be safer.
1138 	 *
1139 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1140 	 * that we can handle until we find out otherwise.
1141 	 * XXX Our "safe" limit on the client is what we can store in our
1142 	 * buffer cache using signed(!) block numbers.
1143 	 */
1144 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1145 		nmp->nm_maxfilesize = 0xffffffffLL;
1146 	else
1147 		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1148 
1149 	nmp->nm_timeo = NFS_TIMEO;
1150 	nmp->nm_retry = NFS_RETRANS;
1151 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1152 		nmp->nm_wsize = NFS_WSIZE;
1153 		nmp->nm_rsize = NFS_RSIZE;
1154 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1155 	}
1156 	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1157 	nmp->nm_numgrps = NFS_MAXGRPS;
1158 	nmp->nm_readahead = NFS_DEFRAHEAD;
1159 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1160 	if (nmp->nm_tprintf_delay < 0)
1161 		nmp->nm_tprintf_delay = 0;
1162 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1163 	if (nmp->nm_tprintf_initial_delay < 0)
1164 		nmp->nm_tprintf_initial_delay = 0;
1165 	nmp->nm_fhsize = argp->fhsize;
1166 	if (nmp->nm_fhsize > 0)
1167 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1168 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1169 	nmp->nm_nam = nam;
1170 	/* Set up the sockets and per-host congestion */
1171 	nmp->nm_sotype = argp->sotype;
1172 	nmp->nm_soproto = argp->proto;
1173 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1174 	if ((argp->flags & NFSMNT_NFSV4))
1175 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1176 	else if ((argp->flags & NFSMNT_NFSV3))
1177 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1178 	else
1179 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1180 
1181 
1182 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1183 		goto bad;
1184 
1185 	/*
1186 	 * A reference count is needed on the nfsnode representing the
1187 	 * remote root.  If this object is not persistent, then backward
1188 	 * traversals of the mount point (i.e. "..") will not work if
1189 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1190 	 * this problem, because one can identify root inodes by their
1191 	 * number == ROOTINO (2).
1192 	 */
1193 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1194 	    nmp->nm_dirpathlen > 0) {
1195 		/*
1196 		 * If the fhsize on the mount point == 0 for V4, the mount
1197 		 * path needs to be looked up.
1198 		 */
1199 		trycnt = 3;
1200 		do {
1201 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1202 			    cred, td);
1203 			if (error)
1204 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1205 		} while (error && --trycnt > 0);
1206 		if (error) {
1207 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1208 			goto bad;
1209 		}
1210 	}
1211 	if (nmp->nm_fhsize > 0) {
1212 		/*
1213 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1214 		 * non-zero for the root vnode. f_iosize will be set correctly
1215 		 * by nfs_statfs() before any I/O occurs.
1216 		 */
1217 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1218 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1219 		if (error)
1220 			goto bad;
1221 		*vpp = NFSTOV(np);
1222 
1223 		/*
1224 		 * Get file attributes and transfer parameters for the
1225 		 * mountpoint.  This has the side effect of filling in
1226 		 * (*vpp)->v_type with the correct value.
1227 		 */
1228 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1229 		    cred, td, &nfsva, NULL);
1230 		if (ret) {
1231 			/*
1232 			 * Just set default values to get things going.
1233 			 */
1234 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1235 			nfsva.na_vattr.va_type = VDIR;
1236 			nfsva.na_vattr.va_mode = 0777;
1237 			nfsva.na_vattr.va_nlink = 100;
1238 			nfsva.na_vattr.va_uid = (uid_t)0;
1239 			nfsva.na_vattr.va_gid = (gid_t)0;
1240 			nfsva.na_vattr.va_fileid = 2;
1241 			nfsva.na_vattr.va_gen = 1;
1242 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1243 			nfsva.na_vattr.va_size = 512 * 1024;
1244 		}
1245 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1246 		if (argp->flags & NFSMNT_NFSV3)
1247 			ncl_fsinfo(nmp, *vpp, cred, td);
1248 
1249 		/*
1250 		 * Lose the lock but keep the ref.
1251 		 */
1252 		VOP_UNLOCK(*vpp, 0);
1253 		return (0);
1254 	}
1255 	error = EIO;
1256 
1257 bad:
1258 	newnfs_disconnect(&nmp->nm_sockreq);
1259 	crfree(nmp->nm_sockreq.nr_cred);
1260 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1261 	mtx_destroy(&nmp->nm_mtx);
1262 	FREE(nmp, M_NEWNFSMNT);
1263 	FREE(nam, M_SONAME);
1264 	return (error);
1265 }
1266 
1267 /*
1268  * unmount system call
1269  */
1270 static int
1271 nfs_unmount(struct mount *mp, int mntflags)
1272 {
1273 	struct thread *td;
1274 	struct nfsmount *nmp;
1275 	int error, flags = 0, trycnt = 0;
1276 
1277 	td = curthread;
1278 
1279 	if (mntflags & MNT_FORCE)
1280 		flags |= FORCECLOSE;
1281 	nmp = VFSTONFS(mp);
1282 	/*
1283 	 * Goes something like this..
1284 	 * - Call vflush() to clear out vnodes for this filesystem
1285 	 * - Close the socket
1286 	 * - Free up the data structures
1287 	 */
1288 	/* In the forced case, cancel any outstanding requests. */
1289 	if (mntflags & MNT_FORCE) {
1290 		error = newnfs_nmcancelreqs(nmp);
1291 		if (error)
1292 			goto out;
1293 		/* For a forced close, get rid of the renew thread now */
1294 		nfscl_umount(nmp, td);
1295 	}
1296 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1297 	do {
1298 		error = vflush(mp, 1, flags, td);
1299 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1300 			(void) nfs_catnap(PSOCK, error, "newndm");
1301 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1302 	if (error)
1303 		goto out;
1304 
1305 	/*
1306 	 * We are now committed to the unmount.
1307 	 */
1308 	if ((mntflags & MNT_FORCE) == 0)
1309 		nfscl_umount(nmp, td);
1310 	newnfs_disconnect(&nmp->nm_sockreq);
1311 	crfree(nmp->nm_sockreq.nr_cred);
1312 	FREE(nmp->nm_nam, M_SONAME);
1313 
1314 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1315 	mtx_destroy(&nmp->nm_mtx);
1316 	FREE(nmp, M_NEWNFSMNT);
1317 out:
1318 	return (error);
1319 }
1320 
1321 /*
1322  * Return root of a filesystem
1323  */
1324 static int
1325 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1326 {
1327 	struct vnode *vp;
1328 	struct nfsmount *nmp;
1329 	struct nfsnode *np;
1330 	int error;
1331 
1332 	nmp = VFSTONFS(mp);
1333 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1334 	if (error)
1335 		return error;
1336 	vp = NFSTOV(np);
1337 	/*
1338 	 * Get transfer parameters and attributes for root vnode once.
1339 	 */
1340 	mtx_lock(&nmp->nm_mtx);
1341 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1342 		mtx_unlock(&nmp->nm_mtx);
1343 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1344 	} else
1345 		mtx_unlock(&nmp->nm_mtx);
1346 	if (vp->v_type == VNON)
1347 	    vp->v_type = VDIR;
1348 	vp->v_vflag |= VV_ROOT;
1349 	*vpp = vp;
1350 	return (0);
1351 }
1352 
1353 /*
1354  * Flush out the buffer cache
1355  */
1356 /* ARGSUSED */
1357 static int
1358 nfs_sync(struct mount *mp, int waitfor)
1359 {
1360 	struct vnode *vp, *mvp;
1361 	struct thread *td;
1362 	int error, allerror = 0;
1363 
1364 	td = curthread;
1365 
1366 	/*
1367 	 * Force stale buffer cache information to be flushed.
1368 	 */
1369 	MNT_ILOCK(mp);
1370 loop:
1371 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1372 		VI_LOCK(vp);
1373 		MNT_IUNLOCK(mp);
1374 		/* XXX Racy bv_cnt check. */
1375 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1376 		    waitfor == MNT_LAZY) {
1377 			VI_UNLOCK(vp);
1378 			MNT_ILOCK(mp);
1379 			continue;
1380 		}
1381 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1382 			MNT_ILOCK(mp);
1383 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1384 			goto loop;
1385 		}
1386 		error = VOP_FSYNC(vp, waitfor, td);
1387 		if (error)
1388 			allerror = error;
1389 		VOP_UNLOCK(vp, 0);
1390 		vrele(vp);
1391 
1392 		MNT_ILOCK(mp);
1393 	}
1394 	MNT_IUNLOCK(mp);
1395 	return (allerror);
1396 }
1397 
1398 static int
1399 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1400 {
1401 	struct nfsmount *nmp = VFSTONFS(mp);
1402 	struct vfsquery vq;
1403 	int error;
1404 
1405 	bzero(&vq, sizeof(vq));
1406 	switch (op) {
1407 #if 0
1408 	case VFS_CTL_NOLOCKS:
1409 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1410  		if (req->oldptr != NULL) {
1411  			error = SYSCTL_OUT(req, &val, sizeof(val));
1412  			if (error)
1413  				return (error);
1414  		}
1415  		if (req->newptr != NULL) {
1416  			error = SYSCTL_IN(req, &val, sizeof(val));
1417  			if (error)
1418  				return (error);
1419 			if (val)
1420 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1421 			else
1422 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1423  		}
1424 		break;
1425 #endif
1426 	case VFS_CTL_QUERY:
1427 		mtx_lock(&nmp->nm_mtx);
1428 		if (nmp->nm_state & NFSSTA_TIMEO)
1429 			vq.vq_flags |= VQ_NOTRESP;
1430 		mtx_unlock(&nmp->nm_mtx);
1431 #if 0
1432 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1433 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1434 			vq.vq_flags |= VQ_NOTRESPLOCK;
1435 #endif
1436 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1437 		break;
1438  	case VFS_CTL_TIMEO:
1439  		if (req->oldptr != NULL) {
1440  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1441  			    sizeof(nmp->nm_tprintf_initial_delay));
1442  			if (error)
1443  				return (error);
1444  		}
1445  		if (req->newptr != NULL) {
1446 			error = vfs_suser(mp, req->td);
1447 			if (error)
1448 				return (error);
1449  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1450  			    sizeof(nmp->nm_tprintf_initial_delay));
1451  			if (error)
1452  				return (error);
1453  			if (nmp->nm_tprintf_initial_delay < 0)
1454  				nmp->nm_tprintf_initial_delay = 0;
1455  		}
1456 		break;
1457 	default:
1458 		return (ENOTSUP);
1459 	}
1460 	return (0);
1461 }
1462 
1463 /*
1464  * Extract the information needed by the nlm from the nfs vnode.
1465  */
1466 static void
1467 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1468     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep)
1469 {
1470 	struct nfsmount *nmp;
1471 	struct nfsnode *np = VTONFS(vp);
1472 
1473 	nmp = VFSTONFS(vp->v_mount);
1474 	if (fhlenp != NULL)
1475 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1476 	if (fhp != NULL)
1477 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1478 	if (sp != NULL)
1479 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1480 	if (is_v3p != NULL)
1481 		*is_v3p = NFS_ISV3(vp);
1482 	if (sizep != NULL)
1483 		*sizep = np->n_size;
1484 }
1485 
1486