xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 830940567b49bb0c08dfaed40418999e76616909)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <netinet/in.h>
69 
70 #include <fs/nfs/nfsport.h>
71 #include <fs/nfsclient/nfsnode.h>
72 #include <fs/nfsclient/nfsmount.h>
73 #include <fs/nfsclient/nfs.h>
74 #include <fs/nfsclient/nfsdiskless.h>
75 
76 extern int nfscl_ticks;
77 extern struct timeval nfsboottime;
78 extern struct nfsstats	newnfsstats;
79 
80 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
81 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
82 
83 SYSCTL_DECL(_vfs_newnfs);
84 SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
85 	&newnfsstats, nfsstats, "S,nfsstats");
86 static int nfs_ip_paranoia = 1;
87 SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
88     &nfs_ip_paranoia, 0, "");
89 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
90 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
91         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
92 /* how long between console messages "nfs server foo not responding" */
93 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
94 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
95         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
96 
97 static void	nfs_sec_name(char *, int *);
98 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
99 		    struct nfs_args *argp, struct ucred *, struct thread *);
100 static int	mountnfs(struct nfs_args *, struct mount *,
101 		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
102 		    struct vnode **, struct ucred *, struct thread *);
103 static vfs_mount_t nfs_mount;
104 static vfs_cmount_t nfs_cmount;
105 static vfs_unmount_t nfs_unmount;
106 static vfs_root_t nfs_root;
107 static vfs_statfs_t nfs_statfs;
108 static vfs_sync_t nfs_sync;
109 static vfs_sysctl_t nfs_sysctl;
110 
111 /*
112  * nfs vfs operations.
113  */
114 static struct vfsops nfs_vfsops = {
115 	.vfs_init =		ncl_init,
116 	.vfs_mount =		nfs_mount,
117 	.vfs_cmount =		nfs_cmount,
118 	.vfs_root =		nfs_root,
119 	.vfs_statfs =		nfs_statfs,
120 	.vfs_sync =		nfs_sync,
121 	.vfs_uninit =		ncl_uninit,
122 	.vfs_unmount =		nfs_unmount,
123 	.vfs_sysctl =		nfs_sysctl,
124 };
125 VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
126 
127 /* So that loader and kldload(2) can find us, wherever we are.. */
128 MODULE_VERSION(newnfs, 1);
129 
130 /*
131  * This structure must be filled in by a primary bootstrap or bootstrap
132  * server for a diskless/dataless machine. It is initialized below just
133  * to ensure that it is allocated to initialized data (.data not .bss).
134  */
135 struct nfs_diskless newnfs_diskless = { { { 0 } } };
136 struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
137 int newnfs_diskless_valid = 0;
138 
139 SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
140     &newnfs_diskless_valid, 0,
141     "Has the diskless struct been filled correctly");
142 
143 SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
144     newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
145 
146 SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
147     &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
148     "%Ssockaddr_in", "Diskless root nfs address");
149 
150 
151 void		newnfsargs_ntoh(struct nfs_args *);
152 static int	nfs_mountdiskless(char *,
153 		    struct sockaddr_in *, struct nfs_args *,
154 		    struct thread *, struct vnode **, struct mount *);
155 static void	nfs_convert_diskless(void);
156 static void	nfs_convert_oargs(struct nfs_args *args,
157 		    struct onfs_args *oargs);
158 
159 int
160 newnfs_iosize(struct nfsmount *nmp)
161 {
162 	int iosize, maxio;
163 
164 	/* First, set the upper limit for iosize */
165 	if (nmp->nm_flag & NFSMNT_NFSV4) {
166 		maxio = NFS_MAXBSIZE;
167 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
168 		if (nmp->nm_sotype == SOCK_DGRAM)
169 			maxio = NFS_MAXDGRAMDATA;
170 		else
171 			maxio = NFS_MAXBSIZE;
172 	} else {
173 		maxio = NFS_V2MAXDATA;
174 	}
175 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
176 		nmp->nm_rsize = maxio;
177 	if (nmp->nm_rsize > MAXBSIZE)
178 		nmp->nm_rsize = MAXBSIZE;
179 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
180 		nmp->nm_readdirsize = maxio;
181 	if (nmp->nm_readdirsize > nmp->nm_rsize)
182 		nmp->nm_readdirsize = nmp->nm_rsize;
183 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
184 		nmp->nm_wsize = maxio;
185 	if (nmp->nm_wsize > MAXBSIZE)
186 		nmp->nm_wsize = MAXBSIZE;
187 
188 	/*
189 	 * Calculate the size used for io buffers.  Use the larger
190 	 * of the two sizes to minimise nfs requests but make sure
191 	 * that it is at least one VM page to avoid wasting buffer
192 	 * space.
193 	 */
194 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
195 	iosize = imax(iosize, PAGE_SIZE);
196 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
197 	return (iosize);
198 }
199 
200 static void
201 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
202 {
203 
204 	args->version = NFS_ARGSVERSION;
205 	args->addr = oargs->addr;
206 	args->addrlen = oargs->addrlen;
207 	args->sotype = oargs->sotype;
208 	args->proto = oargs->proto;
209 	args->fh = oargs->fh;
210 	args->fhsize = oargs->fhsize;
211 	args->flags = oargs->flags;
212 	args->wsize = oargs->wsize;
213 	args->rsize = oargs->rsize;
214 	args->readdirsize = oargs->readdirsize;
215 	args->timeo = oargs->timeo;
216 	args->retrans = oargs->retrans;
217 	args->readahead = oargs->readahead;
218 	args->hostname = oargs->hostname;
219 }
220 
221 static void
222 nfs_convert_diskless(void)
223 {
224 
225 	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
226 	    sizeof (struct ifaliasreq));
227 	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
228 	    sizeof (struct sockaddr_in));
229 	nfs_convert_oargs(&newnfsv3_diskless.root_args,
230 	    &newnfs_diskless.root_args);
231 	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
232 		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
233 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
234 		    NFSX_MYFH);
235 	} else {
236 		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
237 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
238 		    NFSX_V2FH);
239 	}
240 	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
241 	    sizeof(struct sockaddr_in));
242 	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
243 	    MNAMELEN);
244 	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
245 	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
246 	    MAXHOSTNAMELEN);
247 	newnfs_diskless_valid = 3;
248 }
249 
250 /*
251  * nfs statfs call
252  */
253 static int
254 nfs_statfs(struct mount *mp, struct statfs *sbp)
255 {
256 	struct vnode *vp;
257 	struct thread *td;
258 	struct nfsmount *nmp = VFSTONFS(mp);
259 	struct nfsvattr nfsva;
260 	struct nfsfsinfo fs;
261 	struct nfsstatfs sb;
262 	int error = 0, attrflag, gotfsinfo = 0, ret;
263 	struct nfsnode *np;
264 
265 	td = curthread;
266 
267 	error = vfs_busy(mp, MBF_NOWAIT);
268 	if (error)
269 		return (error);
270 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
271 	if (error) {
272 		vfs_unbusy(mp);
273 		return (error);
274 	}
275 	vp = NFSTOV(np);
276 	mtx_lock(&nmp->nm_mtx);
277 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
278 		mtx_unlock(&nmp->nm_mtx);
279 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
280 		    &attrflag, NULL);
281 		if (!error)
282 			gotfsinfo = 1;
283 	} else
284 		mtx_unlock(&nmp->nm_mtx);
285 	if (!error)
286 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
287 		    &attrflag, NULL);
288 	if (attrflag == 0) {
289 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
290 		    td->td_ucred, td, &nfsva, NULL);
291 		if (ret) {
292 			/*
293 			 * Just set default values to get things going.
294 			 */
295 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
296 			nfsva.na_vattr.va_type = VDIR;
297 			nfsva.na_vattr.va_mode = 0777;
298 			nfsva.na_vattr.va_nlink = 100;
299 			nfsva.na_vattr.va_uid = (uid_t)0;
300 			nfsva.na_vattr.va_gid = (gid_t)0;
301 			nfsva.na_vattr.va_fileid = 2;
302 			nfsva.na_vattr.va_gen = 1;
303 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
304 			nfsva.na_vattr.va_size = 512 * 1024;
305 		}
306 	}
307 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
308 	if (!error) {
309 	    mtx_lock(&nmp->nm_mtx);
310 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
311 		nfscl_loadfsinfo(nmp, &fs);
312 	    nfscl_loadsbinfo(nmp, &sb, sbp);
313 	    sbp->f_flags = nmp->nm_flag;
314 	    sbp->f_iosize = newnfs_iosize(nmp);
315 	    mtx_unlock(&nmp->nm_mtx);
316 	    if (sbp != &mp->mnt_stat) {
317 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
318 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
319 	    }
320 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
321 	} else if (NFS_ISV4(vp)) {
322 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
323 	}
324 	vput(vp);
325 	vfs_unbusy(mp);
326 	return (error);
327 }
328 
329 /*
330  * nfs version 3 fsinfo rpc call
331  */
332 int
333 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
334     struct thread *td)
335 {
336 	struct nfsfsinfo fs;
337 	struct nfsvattr nfsva;
338 	int error, attrflag;
339 
340 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
341 	if (!error) {
342 		if (attrflag)
343 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
344 			    1);
345 		mtx_lock(&nmp->nm_mtx);
346 		nfscl_loadfsinfo(nmp, &fs);
347 		mtx_unlock(&nmp->nm_mtx);
348 	}
349 	return (error);
350 }
351 
352 /*
353  * Mount a remote root fs via. nfs. This depends on the info in the
354  * newnfs_diskless structure that has been filled in properly by some primary
355  * bootstrap.
356  * It goes something like this:
357  * - do enough of "ifconfig" by calling ifioctl() so that the system
358  *   can talk to the server
359  * - If newnfs_diskless.mygateway is filled in, use that address as
360  *   a default gateway.
361  * - build the rootfs mount point and call mountnfs() to do the rest.
362  *
363  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
364  * structure, as well as other global NFS client variables here, as
365  * nfs_mountroot() will be called once in the boot before any other NFS
366  * client activity occurs.
367  */
368 int
369 ncl_mountroot(struct mount *mp)
370 {
371 	struct thread *td = curthread;
372 	struct nfsv3_diskless *nd = &newnfsv3_diskless;
373 	struct socket *so;
374 	struct vnode *vp;
375 	struct ifreq ir;
376 	int error;
377 	u_long l;
378 	char buf[128];
379 	char *cp;
380 
381 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
382 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
383 #elif defined(NFS_ROOT)
384 	nfs_setup_diskless();
385 #endif
386 
387 	if (newnfs_diskless_valid == 0)
388 		return (-1);
389 	if (newnfs_diskless_valid == 1)
390 		nfs_convert_diskless();
391 
392 	/*
393 	 * XXX splnet, so networks will receive...
394 	 */
395 	splnet();
396 
397 	/*
398 	 * Do enough of ifconfig(8) so that the critical net interface can
399 	 * talk to the server.
400 	 */
401 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
402 	    td->td_ucred, td);
403 	if (error)
404 		panic("nfs_mountroot: socreate(%04x): %d",
405 			nd->myif.ifra_addr.sa_family, error);
406 
407 #if 0 /* XXX Bad idea */
408 	/*
409 	 * We might not have been told the right interface, so we pass
410 	 * over the first ten interfaces of the same kind, until we get
411 	 * one of them configured.
412 	 */
413 
414 	for (i = strlen(nd->myif.ifra_name) - 1;
415 		nd->myif.ifra_name[i] >= '0' &&
416 		nd->myif.ifra_name[i] <= '9';
417 		nd->myif.ifra_name[i] ++) {
418 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
419 		if(!error)
420 			break;
421 	}
422 #endif
423 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
424 	if (error)
425 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
426 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
427 		ir.ifr_mtu = strtol(cp, NULL, 10);
428 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
429 		freeenv(cp);
430 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
431 		if (error)
432 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
433 	}
434 	soclose(so);
435 
436 	/*
437 	 * If the gateway field is filled in, set it as the default route.
438 	 * Note that pxeboot will set a default route of 0 if the route
439 	 * is not set by the DHCP server.  Check also for a value of 0
440 	 * to avoid panicking inappropriately in that situation.
441 	 */
442 	if (nd->mygateway.sin_len != 0 &&
443 	    nd->mygateway.sin_addr.s_addr != 0) {
444 		struct sockaddr_in mask, sin;
445 
446 		bzero((caddr_t)&mask, sizeof(mask));
447 		sin = mask;
448 		sin.sin_family = AF_INET;
449 		sin.sin_len = sizeof(sin);
450                 /* XXX MRT use table 0 for this sort of thing */
451 		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
452 		    (struct sockaddr *)&nd->mygateway,
453 		    (struct sockaddr *)&mask,
454 		    RTF_UP | RTF_GATEWAY, NULL);
455 		if (error)
456 			panic("nfs_mountroot: RTM_ADD: %d", error);
457 	}
458 
459 	/*
460 	 * Create the rootfs mount point.
461 	 */
462 	nd->root_args.fh = nd->root_fh;
463 	nd->root_args.fhsize = nd->root_fhsize;
464 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
465 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
466 		(l >> 24) & 0xff, (l >> 16) & 0xff,
467 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
468 	printf("NFS ROOT: %s\n", buf);
469 	nd->root_args.hostname = buf;
470 	if ((error = nfs_mountdiskless(buf,
471 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
472 		return (error);
473 	}
474 
475 	/*
476 	 * This is not really an nfs issue, but it is much easier to
477 	 * set hostname here and then let the "/etc/rc.xxx" files
478 	 * mount the right /var based upon its preset value.
479 	 */
480 	mtx_lock(&prison0.pr_mtx);
481 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
482 	    sizeof(prison0.pr_hostname));
483 	mtx_unlock(&prison0.pr_mtx);
484 	inittodr(ntohl(nd->root_time));
485 	return (0);
486 }
487 
488 /*
489  * Internal version of mount system call for diskless setup.
490  */
491 static int
492 nfs_mountdiskless(char *path,
493     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
494     struct vnode **vpp, struct mount *mp)
495 {
496 	struct sockaddr *nam;
497 	int error;
498 
499 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
500 	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
501 	    td->td_ucred, td)) != 0) {
502 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
503 		return (error);
504 	}
505 	return (0);
506 }
507 
508 static void
509 nfs_sec_name(char *sec, int *flagsp)
510 {
511 	if (!strcmp(sec, "krb5"))
512 		*flagsp |= NFSMNT_KERB;
513 	else if (!strcmp(sec, "krb5i"))
514 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
515 	else if (!strcmp(sec, "krb5p"))
516 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
517 }
518 
519 static void
520 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
521     struct ucred *cred, struct thread *td)
522 {
523 	int s;
524 	int adjsock;
525 
526 	s = splnet();
527 
528 	/*
529 	 * Set read-only flag if requested; otherwise, clear it if this is
530 	 * an update.  If this is not an update, then either the read-only
531 	 * flag is already clear, or this is a root mount and it was set
532 	 * intentionally at some previous point.
533 	 */
534 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
535 		MNT_ILOCK(mp);
536 		mp->mnt_flag |= MNT_RDONLY;
537 		MNT_IUNLOCK(mp);
538 	} else if (mp->mnt_flag & MNT_UPDATE) {
539 		MNT_ILOCK(mp);
540 		mp->mnt_flag &= ~MNT_RDONLY;
541 		MNT_IUNLOCK(mp);
542 	}
543 
544 	/*
545 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
546 	 * no sense in that context.  Also, set up appropriate retransmit
547 	 * and soft timeout behavior.
548 	 */
549 	if (argp->sotype == SOCK_STREAM) {
550 		nmp->nm_flag &= ~NFSMNT_NOCONN;
551 		nmp->nm_timeo = NFS_MAXTIMEO;
552 	}
553 
554 	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
555 	if ((argp->flags & NFSMNT_NFSV3) == 0)
556 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
557 
558 	/* Also re-bind if we're switching to/from a connected UDP socket */
559 	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
560 		    (argp->flags & NFSMNT_NOCONN));
561 
562 	/* Update flags atomically.  Don't change the lock bits. */
563 	nmp->nm_flag = argp->flags | nmp->nm_flag;
564 	splx(s);
565 
566 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
567 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
568 		if (nmp->nm_timeo < NFS_MINTIMEO)
569 			nmp->nm_timeo = NFS_MINTIMEO;
570 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
571 			nmp->nm_timeo = NFS_MAXTIMEO;
572 	}
573 
574 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
575 		nmp->nm_retry = argp->retrans;
576 		if (nmp->nm_retry > NFS_MAXREXMIT)
577 			nmp->nm_retry = NFS_MAXREXMIT;
578 	}
579 
580 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
581 		nmp->nm_wsize = argp->wsize;
582 		/* Round down to multiple of blocksize */
583 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
584 		if (nmp->nm_wsize <= 0)
585 			nmp->nm_wsize = NFS_FABLKSIZE;
586 	}
587 
588 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
589 		nmp->nm_rsize = argp->rsize;
590 		/* Round down to multiple of blocksize */
591 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
592 		if (nmp->nm_rsize <= 0)
593 			nmp->nm_rsize = NFS_FABLKSIZE;
594 	}
595 
596 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
597 		nmp->nm_readdirsize = argp->readdirsize;
598 	}
599 
600 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
601 		nmp->nm_acregmin = argp->acregmin;
602 	else
603 		nmp->nm_acregmin = NFS_MINATTRTIMO;
604 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
605 		nmp->nm_acregmax = argp->acregmax;
606 	else
607 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
608 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
609 		nmp->nm_acdirmin = argp->acdirmin;
610 	else
611 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
612 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
613 		nmp->nm_acdirmax = argp->acdirmax;
614 	else
615 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
616 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
617 		nmp->nm_acdirmin = nmp->nm_acdirmax;
618 	if (nmp->nm_acregmin > nmp->nm_acregmax)
619 		nmp->nm_acregmin = nmp->nm_acregmax;
620 
621 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
622 		if (argp->readahead <= NFS_MAXRAHEAD)
623 			nmp->nm_readahead = argp->readahead;
624 		else
625 			nmp->nm_readahead = NFS_MAXRAHEAD;
626 	}
627 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
628 		if (argp->wcommitsize < nmp->nm_wsize)
629 			nmp->nm_wcommitsize = nmp->nm_wsize;
630 		else
631 			nmp->nm_wcommitsize = argp->wcommitsize;
632 	}
633 
634 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
635 		    (nmp->nm_soproto != argp->proto));
636 
637 	if (nmp->nm_client != NULL && adjsock) {
638 		int haslock = 0, error = 0;
639 
640 		if (nmp->nm_sotype == SOCK_STREAM) {
641 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
642 			if (!error)
643 				haslock = 1;
644 		}
645 		if (!error) {
646 		    newnfs_disconnect(&nmp->nm_sockreq);
647 		    if (haslock)
648 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
649 		    nmp->nm_sotype = argp->sotype;
650 		    nmp->nm_soproto = argp->proto;
651 		    if (nmp->nm_sotype == SOCK_DGRAM)
652 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
653 			    cred, td, 0)) {
654 				printf("newnfs_args: retrying connect\n");
655 				(void) nfs_catnap(PSOCK, "newnfscon");
656 			}
657 		}
658 	} else {
659 		nmp->nm_sotype = argp->sotype;
660 		nmp->nm_soproto = argp->proto;
661 	}
662 }
663 
664 static const char *nfs_opts[] = { "from",
665     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
666     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
667     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
668     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
669     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
670     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
671     "principal", "nfsv4", "gssname", "allgssname", "dirpath",
672     NULL };
673 
674 /*
675  * VFS Operations.
676  *
677  * mount system call
678  * It seems a bit dumb to copyinstr() the host and path here and then
679  * bcopy() them in mountnfs(), but I wanted to detect errors before
680  * doing the sockargs() call because sockargs() allocates an mbuf and
681  * an error after that means that I have to release the mbuf.
682  */
683 /* ARGSUSED */
684 static int
685 nfs_mount(struct mount *mp)
686 {
687 	struct nfs_args args = {
688 	    .version = NFS_ARGSVERSION,
689 	    .addr = NULL,
690 	    .addrlen = sizeof (struct sockaddr_in),
691 	    .sotype = SOCK_STREAM,
692 	    .proto = 0,
693 	    .fh = NULL,
694 	    .fhsize = 0,
695 	    .flags = 0,
696 	    .wsize = NFS_WSIZE,
697 	    .rsize = NFS_RSIZE,
698 	    .readdirsize = NFS_READDIRSIZE,
699 	    .timeo = 10,
700 	    .retrans = NFS_RETRANS,
701 	    .readahead = NFS_DEFRAHEAD,
702 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
703 	    .hostname = NULL,
704 	    /* args version 4 */
705 	    .acregmin = NFS_MINATTRTIMO,
706 	    .acregmax = NFS_MAXATTRTIMO,
707 	    .acdirmin = NFS_MINDIRATTRTIMO,
708 	    .acdirmax = NFS_MAXDIRATTRTIMO,
709 	    .dirlen = 0,
710 	    .krbnamelen = 0,
711 	    .srvkrbnamelen = 0,
712 	};
713 	int error = 0, ret, len;
714 	struct sockaddr *nam = NULL;
715 	struct vnode *vp;
716 	struct thread *td;
717 	char hst[MNAMELEN];
718 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
719 	char *opt, *name, *secname;
720 
721 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
722 		error = EINVAL;
723 		goto out;
724 	}
725 
726 	td = curthread;
727 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
728 		error = ncl_mountroot(mp);
729 		goto out;
730 	}
731 
732 	nfscl_init();
733 
734 	/* Handle the new style options. */
735 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
736 		args.flags |= NFSMNT_NOCONN;
737 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
738 		args.flags |= NFSMNT_NOCONN;
739 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
740 		args.flags |= NFSMNT_NOLOCKD;
741 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
742 		args.flags &= ~NFSMNT_NOLOCKD;
743 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
744 		args.flags |= NFSMNT_INT;
745 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
746 		args.flags |= NFSMNT_RDIRPLUS;
747 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
748 		args.flags |= NFSMNT_RESVPORT;
749 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
750 		args.flags &= ~NFSMNT_RESVPORT;
751 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
752 		args.flags |= NFSMNT_SOFT;
753 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
754 		args.flags &= ~NFSMNT_SOFT;
755 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
756 		args.sotype = SOCK_DGRAM;
757 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
758 		args.sotype = SOCK_DGRAM;
759 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
760 		args.sotype = SOCK_STREAM;
761 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
762 		args.flags |= NFSMNT_NFSV3;
763 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
764 		args.flags |= NFSMNT_NFSV4;
765 		args.sotype = SOCK_STREAM;
766 	}
767 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
768 		args.flags |= NFSMNT_ALLGSSNAME;
769 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
770 		if (opt == NULL) {
771 			vfs_mount_error(mp, "illegal readdirsize");
772 			error = EINVAL;
773 			goto out;
774 		}
775 		ret = sscanf(opt, "%d", &args.readdirsize);
776 		if (ret != 1 || args.readdirsize <= 0) {
777 			vfs_mount_error(mp, "illegal readdirsize: %s",
778 			    opt);
779 			error = EINVAL;
780 			goto out;
781 		}
782 		args.flags |= NFSMNT_READDIRSIZE;
783 	}
784 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
785 		if (opt == NULL) {
786 			vfs_mount_error(mp, "illegal readahead");
787 			error = EINVAL;
788 			goto out;
789 		}
790 		ret = sscanf(opt, "%d", &args.readahead);
791 		if (ret != 1 || args.readahead <= 0) {
792 			vfs_mount_error(mp, "illegal readahead: %s",
793 			    opt);
794 			error = EINVAL;
795 			goto out;
796 		}
797 		args.flags |= NFSMNT_READAHEAD;
798 	}
799 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
800 		if (opt == NULL) {
801 			vfs_mount_error(mp, "illegal wsize");
802 			error = EINVAL;
803 			goto out;
804 		}
805 		ret = sscanf(opt, "%d", &args.wsize);
806 		if (ret != 1 || args.wsize <= 0) {
807 			vfs_mount_error(mp, "illegal wsize: %s",
808 			    opt);
809 			error = EINVAL;
810 			goto out;
811 		}
812 		args.flags |= NFSMNT_WSIZE;
813 	}
814 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
815 		if (opt == NULL) {
816 			vfs_mount_error(mp, "illegal rsize");
817 			error = EINVAL;
818 			goto out;
819 		}
820 		ret = sscanf(opt, "%d", &args.rsize);
821 		if (ret != 1 || args.rsize <= 0) {
822 			vfs_mount_error(mp, "illegal wsize: %s",
823 			    opt);
824 			error = EINVAL;
825 			goto out;
826 		}
827 		args.flags |= NFSMNT_RSIZE;
828 	}
829 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
830 		if (opt == NULL) {
831 			vfs_mount_error(mp, "illegal retrans");
832 			error = EINVAL;
833 			goto out;
834 		}
835 		ret = sscanf(opt, "%d", &args.retrans);
836 		if (ret != 1 || args.retrans <= 0) {
837 			vfs_mount_error(mp, "illegal retrans: %s",
838 			    opt);
839 			error = EINVAL;
840 			goto out;
841 		}
842 		args.flags |= NFSMNT_RETRANS;
843 	}
844 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
845 		ret = sscanf(opt, "%d", &args.acregmin);
846 		if (ret != 1 || args.acregmin < 0) {
847 			vfs_mount_error(mp, "illegal acregmin: %s",
848 			    opt);
849 			error = EINVAL;
850 			goto out;
851 		}
852 		args.flags |= NFSMNT_ACREGMIN;
853 	}
854 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
855 		ret = sscanf(opt, "%d", &args.acregmax);
856 		if (ret != 1 || args.acregmax < 0) {
857 			vfs_mount_error(mp, "illegal acregmax: %s",
858 			    opt);
859 			error = EINVAL;
860 			goto out;
861 		}
862 		args.flags |= NFSMNT_ACREGMAX;
863 	}
864 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
865 		ret = sscanf(opt, "%d", &args.acdirmin);
866 		if (ret != 1 || args.acdirmin < 0) {
867 			vfs_mount_error(mp, "illegal acdirmin: %s",
868 			    opt);
869 			error = EINVAL;
870 			goto out;
871 		}
872 		args.flags |= NFSMNT_ACDIRMIN;
873 	}
874 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
875 		ret = sscanf(opt, "%d", &args.acdirmax);
876 		if (ret != 1 || args.acdirmax < 0) {
877 			vfs_mount_error(mp, "illegal acdirmax: %s",
878 			    opt);
879 			error = EINVAL;
880 			goto out;
881 		}
882 		args.flags |= NFSMNT_ACDIRMAX;
883 	}
884 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
885 		ret = sscanf(opt, "%d", &args.timeo);
886 		if (ret != 1 || args.timeo <= 0) {
887 			vfs_mount_error(mp, "illegal timeout: %s",
888 			    opt);
889 			error = EINVAL;
890 			goto out;
891 		}
892 		args.flags |= NFSMNT_TIMEO;
893 	}
894 	if (vfs_getopt(mp->mnt_optnew, "sec",
895 		(void **) &secname, NULL) == 0)
896 		nfs_sec_name(secname, &args.flags);
897 
898 	if (mp->mnt_flag & MNT_UPDATE) {
899 		struct nfsmount *nmp = VFSTONFS(mp);
900 
901 		if (nmp == NULL) {
902 			error = EIO;
903 			goto out;
904 		}
905 		/*
906 		 * When doing an update, we can't change version,
907 		 * security, switch lockd strategies or change cookie
908 		 * translation
909 		 */
910 		args.flags = (args.flags &
911 		    ~(NFSMNT_NFSV3 |
912 		      NFSMNT_NFSV4 |
913 		      NFSMNT_KERB |
914 		      NFSMNT_INTEGRITY |
915 		      NFSMNT_PRIVACY |
916 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
917 		    (nmp->nm_flag &
918 			(NFSMNT_NFSV3 |
919 			 NFSMNT_NFSV4 |
920 			 NFSMNT_KERB |
921 			 NFSMNT_INTEGRITY |
922 			 NFSMNT_PRIVACY |
923 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
924 		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
925 		goto out;
926 	}
927 
928 	/*
929 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
930 	 * or no-connection mode for those protocols that support
931 	 * no-connection mode (the flag will be cleared later for protocols
932 	 * that do not support no-connection mode).  This will allow a client
933 	 * to receive replies from a different IP then the request was
934 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
935 	 * not 0.
936 	 */
937 	if (nfs_ip_paranoia == 0)
938 		args.flags |= NFSMNT_NOCONN;
939 
940 	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
941 	    &args.fhsize) == 0) {
942 		if (args.fhsize > NFSX_FHMAX) {
943 			vfs_mount_error(mp, "Bad file handle");
944 			error = EINVAL;
945 			goto out;
946 		}
947 		bcopy(args.fh, nfh, args.fhsize);
948 	} else {
949 		args.fhsize = 0;
950 	}
951 
952 	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
953 	    &len);
954 	if (args.hostname == NULL) {
955 		vfs_mount_error(mp, "Invalid hostname");
956 		error = EINVAL;
957 		goto out;
958 	}
959 	bcopy(args.hostname, hst, MNAMELEN);
960 	hst[MNAMELEN - 1] = '\0';
961 
962 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
963 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
964 	else
965 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
966 	args.srvkrbnamelen = strlen(srvkrbname);
967 
968 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
969 		strlcpy(krbname, name, sizeof (krbname));
970 	else
971 		krbname[0] = '\0';
972 	args.krbnamelen = strlen(krbname);
973 
974 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
975 		strlcpy(dirpath, name, sizeof (dirpath));
976 	else
977 		dirpath[0] = '\0';
978 	args.dirlen = strlen(dirpath);
979 
980 	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
981 	    &args.addrlen) == 0) {
982 		if (args.addrlen > SOCK_MAXADDRLEN) {
983 			error = ENAMETOOLONG;
984 			goto out;
985 		}
986 		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
987 		bcopy(args.addr, nam, args.addrlen);
988 		nam->sa_len = args.addrlen;
989 	}
990 
991 	args.fh = nfh;
992 	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
993 	    &vp, td->td_ucred, td);
994 out:
995 	if (!error) {
996 		MNT_ILOCK(mp);
997 		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
998 		MNT_IUNLOCK(mp);
999 	}
1000 	return (error);
1001 }
1002 
1003 
1004 /*
1005  * VFS Operations.
1006  *
1007  * mount system call
1008  * It seems a bit dumb to copyinstr() the host and path here and then
1009  * bcopy() them in mountnfs(), but I wanted to detect errors before
1010  * doing the sockargs() call because sockargs() allocates an mbuf and
1011  * an error after that means that I have to release the mbuf.
1012  */
1013 /* ARGSUSED */
1014 static int
1015 nfs_cmount(struct mntarg *ma, void *data, int flags)
1016 {
1017 	int error;
1018 	struct nfs_args args;
1019 
1020 	error = copyin(data, &args, sizeof (struct nfs_args));
1021 	if (error)
1022 		return error;
1023 
1024 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1025 
1026 	error = kernel_mount(ma, flags);
1027 	return (error);
1028 }
1029 
1030 /*
1031  * Common code for mount and mountroot
1032  */
1033 static int
1034 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1035     char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1036     struct vnode **vpp, struct ucred *cred, struct thread *td)
1037 {
1038 	struct nfsmount *nmp;
1039 	struct nfsnode *np;
1040 	int error, trycnt, ret;
1041 	struct nfsvattr nfsva;
1042 	static u_int64_t clval = 0;
1043 
1044 	if (mp->mnt_flag & MNT_UPDATE) {
1045 		nmp = VFSTONFS(mp);
1046 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1047 		FREE(nam, M_SONAME);
1048 		return (0);
1049 	} else {
1050 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1051 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1052 		    M_NEWNFSMNT, M_WAITOK);
1053 		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1054 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1055 		TAILQ_INIT(&nmp->nm_bufq);
1056 		if (clval == 0)
1057 			clval = (u_int64_t)nfsboottime.tv_sec;
1058 		nmp->nm_clval = clval++;
1059 		nmp->nm_krbnamelen = argp->krbnamelen;
1060 		nmp->nm_dirpathlen = argp->dirlen;
1061 		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1062 		if (td->td_ucred->cr_uid != (uid_t)0) {
1063 			/*
1064 			 * nm_uid is used to get KerberosV credentials for
1065 			 * the nfsv4 state handling operations if there is
1066 			 * no host based principal set. Use the uid of
1067 			 * this user if not root, since they are doing the
1068 			 * mount. I don't think setting this for root will
1069 			 * work, since root normally does not have user
1070 			 * credentials in a credentials cache.
1071 			 */
1072 			nmp->nm_uid = td->td_ucred->cr_uid;
1073 		} else {
1074 			/*
1075 			 * Just set to -1, so it won't be used.
1076 			 */
1077 			nmp->nm_uid = (uid_t)-1;
1078 		}
1079 
1080 		/* Copy and null terminate all the names */
1081 		if (nmp->nm_krbnamelen > 0) {
1082 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1083 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1084 		}
1085 		if (nmp->nm_dirpathlen > 0) {
1086 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1087 			    nmp->nm_dirpathlen);
1088 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1089 			    + 1] = '\0';
1090 		}
1091 		if (nmp->nm_srvkrbnamelen > 0) {
1092 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1093 			    nmp->nm_srvkrbnamelen);
1094 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1095 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1096 		}
1097 		nmp->nm_sockreq.nr_cred = crhold(cred);
1098 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1099 		mp->mnt_data = nmp;
1100 	}
1101 	vfs_getnewfsid(mp);
1102 	nmp->nm_mountp = mp;
1103 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1104 
1105 	nfs_decode_args(mp, nmp, argp, cred, td);
1106 
1107 	/*
1108 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1109 	 * high, depending on whether we end up with negative offsets in
1110 	 * the client or server somewhere.  2GB-1 may be safer.
1111 	 *
1112 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1113 	 * that we can handle until we find out otherwise.
1114 	 * XXX Our "safe" limit on the client is what we can store in our
1115 	 * buffer cache using signed(!) block numbers.
1116 	 */
1117 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1118 		nmp->nm_maxfilesize = 0xffffffffLL;
1119 	else
1120 		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1121 
1122 	nmp->nm_timeo = NFS_TIMEO;
1123 	nmp->nm_retry = NFS_RETRANS;
1124 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1125 		nmp->nm_wsize = NFS_WSIZE;
1126 		nmp->nm_rsize = NFS_RSIZE;
1127 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1128 	}
1129 	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1130 	nmp->nm_numgrps = NFS_MAXGRPS;
1131 	nmp->nm_readahead = NFS_DEFRAHEAD;
1132 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1133 	if (nmp->nm_tprintf_delay < 0)
1134 		nmp->nm_tprintf_delay = 0;
1135 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1136 	if (nmp->nm_tprintf_initial_delay < 0)
1137 		nmp->nm_tprintf_initial_delay = 0;
1138 	nmp->nm_fhsize = argp->fhsize;
1139 	if (nmp->nm_fhsize > 0)
1140 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1141 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1142 	nmp->nm_nam = nam;
1143 	/* Set up the sockets and per-host congestion */
1144 	nmp->nm_sotype = argp->sotype;
1145 	nmp->nm_soproto = argp->proto;
1146 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1147 	if ((argp->flags & NFSMNT_NFSV4))
1148 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1149 	else if ((argp->flags & NFSMNT_NFSV3))
1150 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1151 	else
1152 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1153 
1154 
1155 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1156 		goto bad;
1157 
1158 	/*
1159 	 * A reference count is needed on the nfsnode representing the
1160 	 * remote root.  If this object is not persistent, then backward
1161 	 * traversals of the mount point (i.e. "..") will not work if
1162 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1163 	 * this problem, because one can identify root inodes by their
1164 	 * number == ROOTINO (2).
1165 	 */
1166 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1167 	    nmp->nm_dirpathlen > 0) {
1168 		/*
1169 		 * If the fhsize on the mount point == 0 for V4, the mount
1170 		 * path needs to be looked up.
1171 		 */
1172 		trycnt = 3;
1173 		do {
1174 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1175 			    cred, td);
1176 			if (error)
1177 				(void) nfs_catnap(PZERO, "nfsgetdirp");
1178 		} while (error && --trycnt > 0);
1179 		if (error) {
1180 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1181 			goto bad;
1182 		}
1183 	}
1184 	if (nmp->nm_fhsize > 0) {
1185 		/*
1186 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1187 		 * non-zero for the root vnode. f_iosize will be set correctly
1188 		 * by nfs_statfs() before any I/O occurs.
1189 		 */
1190 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1191 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1192 		if (error)
1193 			goto bad;
1194 		*vpp = NFSTOV(np);
1195 
1196 		/*
1197 		 * Get file attributes and transfer parameters for the
1198 		 * mountpoint.  This has the side effect of filling in
1199 		 * (*vpp)->v_type with the correct value.
1200 		 */
1201 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1202 		    cred, td, &nfsva, NULL);
1203 		if (ret) {
1204 			/*
1205 			 * Just set default values to get things going.
1206 			 */
1207 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1208 			nfsva.na_vattr.va_type = VDIR;
1209 			nfsva.na_vattr.va_mode = 0777;
1210 			nfsva.na_vattr.va_nlink = 100;
1211 			nfsva.na_vattr.va_uid = (uid_t)0;
1212 			nfsva.na_vattr.va_gid = (gid_t)0;
1213 			nfsva.na_vattr.va_fileid = 2;
1214 			nfsva.na_vattr.va_gen = 1;
1215 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1216 			nfsva.na_vattr.va_size = 512 * 1024;
1217 		}
1218 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1219 		if (argp->flags & NFSMNT_NFSV3)
1220 			ncl_fsinfo(nmp, *vpp, cred, td);
1221 
1222 		/*
1223 		 * Lose the lock but keep the ref.
1224 		 */
1225 		VOP_UNLOCK(*vpp, 0);
1226 		return (0);
1227 	}
1228 	error = EIO;
1229 
1230 bad:
1231 	newnfs_disconnect(&nmp->nm_sockreq);
1232 	crfree(nmp->nm_sockreq.nr_cred);
1233 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1234 	mtx_destroy(&nmp->nm_mtx);
1235 	FREE(nmp, M_NEWNFSMNT);
1236 	FREE(nam, M_SONAME);
1237 	return (error);
1238 }
1239 
1240 /*
1241  * unmount system call
1242  */
1243 static int
1244 nfs_unmount(struct mount *mp, int mntflags)
1245 {
1246 	struct thread *td;
1247 	struct nfsmount *nmp;
1248 	int error, flags = 0, trycnt = 0;
1249 
1250 	td = curthread;
1251 
1252 	if (mntflags & MNT_FORCE)
1253 		flags |= FORCECLOSE;
1254 	nmp = VFSTONFS(mp);
1255 	/*
1256 	 * Goes something like this..
1257 	 * - Call vflush() to clear out vnodes for this filesystem
1258 	 * - Close the socket
1259 	 * - Free up the data structures
1260 	 */
1261 	/* In the forced case, cancel any outstanding requests. */
1262 	if (mntflags & MNT_FORCE) {
1263 		error = newnfs_nmcancelreqs(nmp);
1264 		if (error)
1265 			goto out;
1266 		/* For a forced close, get rid of the renew thread now */
1267 		nfscl_umount(nmp, td);
1268 	}
1269 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1270 	do {
1271 		error = vflush(mp, 1, flags, td);
1272 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1273 			(void) nfs_catnap(PSOCK, "newndm");
1274 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1275 	if (error)
1276 		goto out;
1277 
1278 	/*
1279 	 * We are now committed to the unmount.
1280 	 */
1281 	if ((mntflags & MNT_FORCE) == 0)
1282 		nfscl_umount(nmp, td);
1283 	newnfs_disconnect(&nmp->nm_sockreq);
1284 	crfree(nmp->nm_sockreq.nr_cred);
1285 	FREE(nmp->nm_nam, M_SONAME);
1286 
1287 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1288 	mtx_destroy(&nmp->nm_mtx);
1289 	FREE(nmp, M_NEWNFSMNT);
1290 out:
1291 	return (error);
1292 }
1293 
1294 /*
1295  * Return root of a filesystem
1296  */
1297 static int
1298 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1299 {
1300 	struct vnode *vp;
1301 	struct nfsmount *nmp;
1302 	struct nfsnode *np;
1303 	int error;
1304 
1305 	nmp = VFSTONFS(mp);
1306 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1307 	if (error)
1308 		return error;
1309 	vp = NFSTOV(np);
1310 	/*
1311 	 * Get transfer parameters and attributes for root vnode once.
1312 	 */
1313 	mtx_lock(&nmp->nm_mtx);
1314 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1315 		mtx_unlock(&nmp->nm_mtx);
1316 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1317 	} else
1318 		mtx_unlock(&nmp->nm_mtx);
1319 	if (vp->v_type == VNON)
1320 	    vp->v_type = VDIR;
1321 	vp->v_vflag |= VV_ROOT;
1322 	*vpp = vp;
1323 	return (0);
1324 }
1325 
1326 /*
1327  * Flush out the buffer cache
1328  */
1329 /* ARGSUSED */
1330 static int
1331 nfs_sync(struct mount *mp, int waitfor)
1332 {
1333 	struct vnode *vp, *mvp;
1334 	struct thread *td;
1335 	int error, allerror = 0;
1336 
1337 	td = curthread;
1338 
1339 	/*
1340 	 * Force stale buffer cache information to be flushed.
1341 	 */
1342 	MNT_ILOCK(mp);
1343 loop:
1344 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1345 		VI_LOCK(vp);
1346 		MNT_IUNLOCK(mp);
1347 		/* XXX Racy bv_cnt check. */
1348 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1349 		    waitfor == MNT_LAZY) {
1350 			VI_UNLOCK(vp);
1351 			MNT_ILOCK(mp);
1352 			continue;
1353 		}
1354 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1355 			MNT_ILOCK(mp);
1356 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1357 			goto loop;
1358 		}
1359 		error = VOP_FSYNC(vp, waitfor, td);
1360 		if (error)
1361 			allerror = error;
1362 		VOP_UNLOCK(vp, 0);
1363 		vrele(vp);
1364 
1365 		MNT_ILOCK(mp);
1366 	}
1367 	MNT_IUNLOCK(mp);
1368 	return (allerror);
1369 }
1370 
1371 static int
1372 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1373 {
1374 	struct nfsmount *nmp = VFSTONFS(mp);
1375 	struct vfsquery vq;
1376 	int error;
1377 
1378 	bzero(&vq, sizeof(vq));
1379 	switch (op) {
1380 #if 0
1381 	case VFS_CTL_NOLOCKS:
1382 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1383  		if (req->oldptr != NULL) {
1384  			error = SYSCTL_OUT(req, &val, sizeof(val));
1385  			if (error)
1386  				return (error);
1387  		}
1388  		if (req->newptr != NULL) {
1389  			error = SYSCTL_IN(req, &val, sizeof(val));
1390  			if (error)
1391  				return (error);
1392 			if (val)
1393 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1394 			else
1395 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1396  		}
1397 		break;
1398 #endif
1399 	case VFS_CTL_QUERY:
1400 		mtx_lock(&nmp->nm_mtx);
1401 		if (nmp->nm_state & NFSSTA_TIMEO)
1402 			vq.vq_flags |= VQ_NOTRESP;
1403 		mtx_unlock(&nmp->nm_mtx);
1404 #if 0
1405 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1406 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1407 			vq.vq_flags |= VQ_NOTRESPLOCK;
1408 #endif
1409 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1410 		break;
1411  	case VFS_CTL_TIMEO:
1412  		if (req->oldptr != NULL) {
1413  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1414  			    sizeof(nmp->nm_tprintf_initial_delay));
1415  			if (error)
1416  				return (error);
1417  		}
1418  		if (req->newptr != NULL) {
1419 			error = vfs_suser(mp, req->td);
1420 			if (error)
1421 				return (error);
1422  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1423  			    sizeof(nmp->nm_tprintf_initial_delay));
1424  			if (error)
1425  				return (error);
1426  			if (nmp->nm_tprintf_initial_delay < 0)
1427  				nmp->nm_tprintf_initial_delay = 0;
1428  		}
1429 		break;
1430 	default:
1431 		return (ENOTSUP);
1432 	}
1433 	return (0);
1434 }
1435 
1436