xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision aa64588d28258aef88cc33b8043112e8856948d0)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <netinet/in.h>
69 
70 #include <fs/nfs/nfsport.h>
71 #include <fs/nfsclient/nfsnode.h>
72 #include <fs/nfsclient/nfsmount.h>
73 #include <fs/nfsclient/nfs.h>
74 #include <fs/nfsclient/nfsdiskless.h>
75 
76 extern int nfscl_ticks;
77 extern struct timeval nfsboottime;
78 extern struct nfsstats	newnfsstats;
79 
80 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
81 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
82 
83 SYSCTL_DECL(_vfs_newnfs);
84 SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
85 	&newnfsstats, nfsstats, "S,nfsstats");
86 static int nfs_ip_paranoia = 1;
87 SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
88     &nfs_ip_paranoia, 0, "");
89 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
90 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
91         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
92 /* how long between console messages "nfs server foo not responding" */
93 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
94 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
95         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
96 
97 static void	nfs_sec_name(char *, int *);
98 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
99 		    struct nfs_args *argp, struct ucred *, struct thread *);
100 static int	mountnfs(struct nfs_args *, struct mount *,
101 		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
102 		    struct vnode **, struct ucred *, struct thread *, int);
103 static vfs_mount_t nfs_mount;
104 static vfs_cmount_t nfs_cmount;
105 static vfs_unmount_t nfs_unmount;
106 static vfs_root_t nfs_root;
107 static vfs_statfs_t nfs_statfs;
108 static vfs_sync_t nfs_sync;
109 static vfs_sysctl_t nfs_sysctl;
110 
111 /*
112  * nfs vfs operations.
113  */
114 static struct vfsops nfs_vfsops = {
115 	.vfs_init =		ncl_init,
116 	.vfs_mount =		nfs_mount,
117 	.vfs_cmount =		nfs_cmount,
118 	.vfs_root =		nfs_root,
119 	.vfs_statfs =		nfs_statfs,
120 	.vfs_sync =		nfs_sync,
121 	.vfs_uninit =		ncl_uninit,
122 	.vfs_unmount =		nfs_unmount,
123 	.vfs_sysctl =		nfs_sysctl,
124 };
125 VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
126 
127 /* So that loader and kldload(2) can find us, wherever we are.. */
128 MODULE_VERSION(newnfs, 1);
129 
130 /*
131  * This structure must be filled in by a primary bootstrap or bootstrap
132  * server for a diskless/dataless machine. It is initialized below just
133  * to ensure that it is allocated to initialized data (.data not .bss).
134  */
135 struct nfs_diskless newnfs_diskless = { { { 0 } } };
136 struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
137 int newnfs_diskless_valid = 0;
138 
139 SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
140     &newnfs_diskless_valid, 0,
141     "Has the diskless struct been filled correctly");
142 
143 SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
144     newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
145 
146 SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
147     &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
148     "%Ssockaddr_in", "Diskless root nfs address");
149 
150 
151 void		newnfsargs_ntoh(struct nfs_args *);
152 static int	nfs_mountdiskless(char *,
153 		    struct sockaddr_in *, struct nfs_args *,
154 		    struct thread *, struct vnode **, struct mount *);
155 static void	nfs_convert_diskless(void);
156 static void	nfs_convert_oargs(struct nfs_args *args,
157 		    struct onfs_args *oargs);
158 
159 int
160 newnfs_iosize(struct nfsmount *nmp)
161 {
162 	int iosize, maxio;
163 
164 	/* First, set the upper limit for iosize */
165 	if (nmp->nm_flag & NFSMNT_NFSV4) {
166 		maxio = NFS_MAXBSIZE;
167 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
168 		if (nmp->nm_sotype == SOCK_DGRAM)
169 			maxio = NFS_MAXDGRAMDATA;
170 		else
171 			maxio = NFS_MAXBSIZE;
172 	} else {
173 		maxio = NFS_V2MAXDATA;
174 	}
175 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
176 		nmp->nm_rsize = maxio;
177 	if (nmp->nm_rsize > MAXBSIZE)
178 		nmp->nm_rsize = MAXBSIZE;
179 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
180 		nmp->nm_readdirsize = maxio;
181 	if (nmp->nm_readdirsize > nmp->nm_rsize)
182 		nmp->nm_readdirsize = nmp->nm_rsize;
183 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
184 		nmp->nm_wsize = maxio;
185 	if (nmp->nm_wsize > MAXBSIZE)
186 		nmp->nm_wsize = MAXBSIZE;
187 
188 	/*
189 	 * Calculate the size used for io buffers.  Use the larger
190 	 * of the two sizes to minimise nfs requests but make sure
191 	 * that it is at least one VM page to avoid wasting buffer
192 	 * space.
193 	 */
194 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
195 	iosize = imax(iosize, PAGE_SIZE);
196 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
197 	return (iosize);
198 }
199 
200 static void
201 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
202 {
203 
204 	args->version = NFS_ARGSVERSION;
205 	args->addr = oargs->addr;
206 	args->addrlen = oargs->addrlen;
207 	args->sotype = oargs->sotype;
208 	args->proto = oargs->proto;
209 	args->fh = oargs->fh;
210 	args->fhsize = oargs->fhsize;
211 	args->flags = oargs->flags;
212 	args->wsize = oargs->wsize;
213 	args->rsize = oargs->rsize;
214 	args->readdirsize = oargs->readdirsize;
215 	args->timeo = oargs->timeo;
216 	args->retrans = oargs->retrans;
217 	args->readahead = oargs->readahead;
218 	args->hostname = oargs->hostname;
219 }
220 
221 static void
222 nfs_convert_diskless(void)
223 {
224 
225 	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
226 	    sizeof (struct ifaliasreq));
227 	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
228 	    sizeof (struct sockaddr_in));
229 	nfs_convert_oargs(&newnfsv3_diskless.root_args,
230 	    &newnfs_diskless.root_args);
231 	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
232 		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
233 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
234 		    NFSX_MYFH);
235 	} else {
236 		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
237 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
238 		    NFSX_V2FH);
239 	}
240 	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
241 	    sizeof(struct sockaddr_in));
242 	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
243 	    MNAMELEN);
244 	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
245 	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
246 	    MAXHOSTNAMELEN);
247 	newnfs_diskless_valid = 3;
248 }
249 
250 /*
251  * nfs statfs call
252  */
253 static int
254 nfs_statfs(struct mount *mp, struct statfs *sbp)
255 {
256 	struct vnode *vp;
257 	struct thread *td;
258 	struct nfsmount *nmp = VFSTONFS(mp);
259 	struct nfsvattr nfsva;
260 	struct nfsfsinfo fs;
261 	struct nfsstatfs sb;
262 	int error = 0, attrflag, gotfsinfo = 0, ret;
263 	struct nfsnode *np;
264 
265 	td = curthread;
266 
267 	error = vfs_busy(mp, MBF_NOWAIT);
268 	if (error)
269 		return (error);
270 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
271 	if (error) {
272 		vfs_unbusy(mp);
273 		return (error);
274 	}
275 	vp = NFSTOV(np);
276 	mtx_lock(&nmp->nm_mtx);
277 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
278 		mtx_unlock(&nmp->nm_mtx);
279 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
280 		    &attrflag, NULL);
281 		if (!error)
282 			gotfsinfo = 1;
283 	} else
284 		mtx_unlock(&nmp->nm_mtx);
285 	if (!error)
286 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
287 		    &attrflag, NULL);
288 	if (attrflag == 0) {
289 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
290 		    td->td_ucred, td, &nfsva, NULL);
291 		if (ret) {
292 			/*
293 			 * Just set default values to get things going.
294 			 */
295 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
296 			nfsva.na_vattr.va_type = VDIR;
297 			nfsva.na_vattr.va_mode = 0777;
298 			nfsva.na_vattr.va_nlink = 100;
299 			nfsva.na_vattr.va_uid = (uid_t)0;
300 			nfsva.na_vattr.va_gid = (gid_t)0;
301 			nfsva.na_vattr.va_fileid = 2;
302 			nfsva.na_vattr.va_gen = 1;
303 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
304 			nfsva.na_vattr.va_size = 512 * 1024;
305 		}
306 	}
307 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
308 	if (!error) {
309 	    mtx_lock(&nmp->nm_mtx);
310 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
311 		nfscl_loadfsinfo(nmp, &fs);
312 	    nfscl_loadsbinfo(nmp, &sb, sbp);
313 	    sbp->f_flags = nmp->nm_flag;
314 	    sbp->f_iosize = newnfs_iosize(nmp);
315 	    mtx_unlock(&nmp->nm_mtx);
316 	    if (sbp != &mp->mnt_stat) {
317 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
318 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
319 	    }
320 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
321 	} else if (NFS_ISV4(vp)) {
322 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
323 	}
324 	vput(vp);
325 	vfs_unbusy(mp);
326 	return (error);
327 }
328 
329 /*
330  * nfs version 3 fsinfo rpc call
331  */
332 int
333 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
334     struct thread *td)
335 {
336 	struct nfsfsinfo fs;
337 	struct nfsvattr nfsva;
338 	int error, attrflag;
339 
340 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
341 	if (!error) {
342 		if (attrflag)
343 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
344 			    1);
345 		mtx_lock(&nmp->nm_mtx);
346 		nfscl_loadfsinfo(nmp, &fs);
347 		mtx_unlock(&nmp->nm_mtx);
348 	}
349 	return (error);
350 }
351 
352 /*
353  * Mount a remote root fs via. nfs. This depends on the info in the
354  * newnfs_diskless structure that has been filled in properly by some primary
355  * bootstrap.
356  * It goes something like this:
357  * - do enough of "ifconfig" by calling ifioctl() so that the system
358  *   can talk to the server
359  * - If newnfs_diskless.mygateway is filled in, use that address as
360  *   a default gateway.
361  * - build the rootfs mount point and call mountnfs() to do the rest.
362  *
363  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
364  * structure, as well as other global NFS client variables here, as
365  * nfs_mountroot() will be called once in the boot before any other NFS
366  * client activity occurs.
367  */
368 int
369 ncl_mountroot(struct mount *mp)
370 {
371 	struct thread *td = curthread;
372 	struct nfsv3_diskless *nd = &newnfsv3_diskless;
373 	struct socket *so;
374 	struct vnode *vp;
375 	struct ifreq ir;
376 	int error;
377 	u_long l;
378 	char buf[128];
379 	char *cp;
380 
381 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
382 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
383 #elif defined(NFS_ROOT)
384 	nfs_setup_diskless();
385 #endif
386 
387 	if (newnfs_diskless_valid == 0)
388 		return (-1);
389 	if (newnfs_diskless_valid == 1)
390 		nfs_convert_diskless();
391 
392 	/*
393 	 * XXX splnet, so networks will receive...
394 	 */
395 	splnet();
396 
397 	/*
398 	 * Do enough of ifconfig(8) so that the critical net interface can
399 	 * talk to the server.
400 	 */
401 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
402 	    td->td_ucred, td);
403 	if (error)
404 		panic("nfs_mountroot: socreate(%04x): %d",
405 			nd->myif.ifra_addr.sa_family, error);
406 
407 #if 0 /* XXX Bad idea */
408 	/*
409 	 * We might not have been told the right interface, so we pass
410 	 * over the first ten interfaces of the same kind, until we get
411 	 * one of them configured.
412 	 */
413 
414 	for (i = strlen(nd->myif.ifra_name) - 1;
415 		nd->myif.ifra_name[i] >= '0' &&
416 		nd->myif.ifra_name[i] <= '9';
417 		nd->myif.ifra_name[i] ++) {
418 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
419 		if(!error)
420 			break;
421 	}
422 #endif
423 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
424 	if (error)
425 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
426 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
427 		ir.ifr_mtu = strtol(cp, NULL, 10);
428 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
429 		freeenv(cp);
430 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
431 		if (error)
432 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
433 	}
434 	soclose(so);
435 
436 	/*
437 	 * If the gateway field is filled in, set it as the default route.
438 	 * Note that pxeboot will set a default route of 0 if the route
439 	 * is not set by the DHCP server.  Check also for a value of 0
440 	 * to avoid panicking inappropriately in that situation.
441 	 */
442 	if (nd->mygateway.sin_len != 0 &&
443 	    nd->mygateway.sin_addr.s_addr != 0) {
444 		struct sockaddr_in mask, sin;
445 
446 		bzero((caddr_t)&mask, sizeof(mask));
447 		sin = mask;
448 		sin.sin_family = AF_INET;
449 		sin.sin_len = sizeof(sin);
450                 /* XXX MRT use table 0 for this sort of thing */
451 		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
452 		    (struct sockaddr *)&nd->mygateway,
453 		    (struct sockaddr *)&mask,
454 		    RTF_UP | RTF_GATEWAY, NULL);
455 		if (error)
456 			panic("nfs_mountroot: RTM_ADD: %d", error);
457 	}
458 
459 	/*
460 	 * Create the rootfs mount point.
461 	 */
462 	nd->root_args.fh = nd->root_fh;
463 	nd->root_args.fhsize = nd->root_fhsize;
464 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
465 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
466 		(l >> 24) & 0xff, (l >> 16) & 0xff,
467 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
468 	printf("NFS ROOT: %s\n", buf);
469 	nd->root_args.hostname = buf;
470 	if ((error = nfs_mountdiskless(buf,
471 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
472 		return (error);
473 	}
474 
475 	/*
476 	 * This is not really an nfs issue, but it is much easier to
477 	 * set hostname here and then let the "/etc/rc.xxx" files
478 	 * mount the right /var based upon its preset value.
479 	 */
480 	mtx_lock(&prison0.pr_mtx);
481 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
482 	    sizeof(prison0.pr_hostname));
483 	mtx_unlock(&prison0.pr_mtx);
484 	inittodr(ntohl(nd->root_time));
485 	return (0);
486 }
487 
488 /*
489  * Internal version of mount system call for diskless setup.
490  */
491 static int
492 nfs_mountdiskless(char *path,
493     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
494     struct vnode **vpp, struct mount *mp)
495 {
496 	struct sockaddr *nam;
497 	int error;
498 
499 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
500 	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
501 	    td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
502 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
503 		return (error);
504 	}
505 	return (0);
506 }
507 
508 static void
509 nfs_sec_name(char *sec, int *flagsp)
510 {
511 	if (!strcmp(sec, "krb5"))
512 		*flagsp |= NFSMNT_KERB;
513 	else if (!strcmp(sec, "krb5i"))
514 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
515 	else if (!strcmp(sec, "krb5p"))
516 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
517 }
518 
519 static void
520 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
521     struct ucred *cred, struct thread *td)
522 {
523 	int s;
524 	int adjsock;
525 
526 	s = splnet();
527 
528 	/*
529 	 * Set read-only flag if requested; otherwise, clear it if this is
530 	 * an update.  If this is not an update, then either the read-only
531 	 * flag is already clear, or this is a root mount and it was set
532 	 * intentionally at some previous point.
533 	 */
534 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
535 		MNT_ILOCK(mp);
536 		mp->mnt_flag |= MNT_RDONLY;
537 		MNT_IUNLOCK(mp);
538 	} else if (mp->mnt_flag & MNT_UPDATE) {
539 		MNT_ILOCK(mp);
540 		mp->mnt_flag &= ~MNT_RDONLY;
541 		MNT_IUNLOCK(mp);
542 	}
543 
544 	/*
545 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
546 	 * no sense in that context.  Also, set up appropriate retransmit
547 	 * and soft timeout behavior.
548 	 */
549 	if (argp->sotype == SOCK_STREAM) {
550 		nmp->nm_flag &= ~NFSMNT_NOCONN;
551 		nmp->nm_timeo = NFS_MAXTIMEO;
552 	}
553 
554 	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
555 	if ((argp->flags & NFSMNT_NFSV3) == 0)
556 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
557 
558 	/* Also re-bind if we're switching to/from a connected UDP socket */
559 	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
560 		    (argp->flags & NFSMNT_NOCONN));
561 
562 	/* Update flags atomically.  Don't change the lock bits. */
563 	nmp->nm_flag = argp->flags | nmp->nm_flag;
564 	splx(s);
565 
566 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
567 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
568 		if (nmp->nm_timeo < NFS_MINTIMEO)
569 			nmp->nm_timeo = NFS_MINTIMEO;
570 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
571 			nmp->nm_timeo = NFS_MAXTIMEO;
572 	}
573 
574 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
575 		nmp->nm_retry = argp->retrans;
576 		if (nmp->nm_retry > NFS_MAXREXMIT)
577 			nmp->nm_retry = NFS_MAXREXMIT;
578 	}
579 
580 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
581 		nmp->nm_wsize = argp->wsize;
582 		/* Round down to multiple of blocksize */
583 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
584 		if (nmp->nm_wsize <= 0)
585 			nmp->nm_wsize = NFS_FABLKSIZE;
586 	}
587 
588 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
589 		nmp->nm_rsize = argp->rsize;
590 		/* Round down to multiple of blocksize */
591 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
592 		if (nmp->nm_rsize <= 0)
593 			nmp->nm_rsize = NFS_FABLKSIZE;
594 	}
595 
596 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
597 		nmp->nm_readdirsize = argp->readdirsize;
598 	}
599 
600 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
601 		nmp->nm_acregmin = argp->acregmin;
602 	else
603 		nmp->nm_acregmin = NFS_MINATTRTIMO;
604 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
605 		nmp->nm_acregmax = argp->acregmax;
606 	else
607 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
608 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
609 		nmp->nm_acdirmin = argp->acdirmin;
610 	else
611 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
612 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
613 		nmp->nm_acdirmax = argp->acdirmax;
614 	else
615 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
616 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
617 		nmp->nm_acdirmin = nmp->nm_acdirmax;
618 	if (nmp->nm_acregmin > nmp->nm_acregmax)
619 		nmp->nm_acregmin = nmp->nm_acregmax;
620 
621 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
622 		if (argp->readahead <= NFS_MAXRAHEAD)
623 			nmp->nm_readahead = argp->readahead;
624 		else
625 			nmp->nm_readahead = NFS_MAXRAHEAD;
626 	}
627 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
628 		if (argp->wcommitsize < nmp->nm_wsize)
629 			nmp->nm_wcommitsize = nmp->nm_wsize;
630 		else
631 			nmp->nm_wcommitsize = argp->wcommitsize;
632 	}
633 
634 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
635 		    (nmp->nm_soproto != argp->proto));
636 
637 	if (nmp->nm_client != NULL && adjsock) {
638 		int haslock = 0, error = 0;
639 
640 		if (nmp->nm_sotype == SOCK_STREAM) {
641 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
642 			if (!error)
643 				haslock = 1;
644 		}
645 		if (!error) {
646 		    newnfs_disconnect(&nmp->nm_sockreq);
647 		    if (haslock)
648 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
649 		    nmp->nm_sotype = argp->sotype;
650 		    nmp->nm_soproto = argp->proto;
651 		    if (nmp->nm_sotype == SOCK_DGRAM)
652 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
653 			    cred, td, 0)) {
654 				printf("newnfs_args: retrying connect\n");
655 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
656 			}
657 		}
658 	} else {
659 		nmp->nm_sotype = argp->sotype;
660 		nmp->nm_soproto = argp->proto;
661 	}
662 }
663 
664 static const char *nfs_opts[] = { "from",
665     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
666     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
667     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
668     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
669     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
670     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
671     "principal", "nfsv4", "gssname", "allgssname", "dirpath",
672     "negnametimeo",
673     NULL };
674 
675 /*
676  * VFS Operations.
677  *
678  * mount system call
679  * It seems a bit dumb to copyinstr() the host and path here and then
680  * bcopy() them in mountnfs(), but I wanted to detect errors before
681  * doing the sockargs() call because sockargs() allocates an mbuf and
682  * an error after that means that I have to release the mbuf.
683  */
684 /* ARGSUSED */
685 static int
686 nfs_mount(struct mount *mp)
687 {
688 	struct nfs_args args = {
689 	    .version = NFS_ARGSVERSION,
690 	    .addr = NULL,
691 	    .addrlen = sizeof (struct sockaddr_in),
692 	    .sotype = SOCK_STREAM,
693 	    .proto = 0,
694 	    .fh = NULL,
695 	    .fhsize = 0,
696 	    .flags = 0,
697 	    .wsize = NFS_WSIZE,
698 	    .rsize = NFS_RSIZE,
699 	    .readdirsize = NFS_READDIRSIZE,
700 	    .timeo = 10,
701 	    .retrans = NFS_RETRANS,
702 	    .readahead = NFS_DEFRAHEAD,
703 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
704 	    .hostname = NULL,
705 	    /* args version 4 */
706 	    .acregmin = NFS_MINATTRTIMO,
707 	    .acregmax = NFS_MAXATTRTIMO,
708 	    .acdirmin = NFS_MINDIRATTRTIMO,
709 	    .acdirmax = NFS_MAXDIRATTRTIMO,
710 	    .dirlen = 0,
711 	    .krbnamelen = 0,
712 	    .srvkrbnamelen = 0,
713 	};
714 	int error = 0, ret, len;
715 	struct sockaddr *nam = NULL;
716 	struct vnode *vp;
717 	struct thread *td;
718 	char hst[MNAMELEN];
719 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
720 	char *opt, *name, *secname;
721 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
722 
723 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
724 		error = EINVAL;
725 		goto out;
726 	}
727 
728 	td = curthread;
729 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
730 		error = ncl_mountroot(mp);
731 		goto out;
732 	}
733 
734 	nfscl_init();
735 
736 	/* Handle the new style options. */
737 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
738 		args.flags |= NFSMNT_NOCONN;
739 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
740 		args.flags |= NFSMNT_NOCONN;
741 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
742 		args.flags |= NFSMNT_NOLOCKD;
743 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
744 		args.flags &= ~NFSMNT_NOLOCKD;
745 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
746 		args.flags |= NFSMNT_INT;
747 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
748 		args.flags |= NFSMNT_RDIRPLUS;
749 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
750 		args.flags |= NFSMNT_RESVPORT;
751 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
752 		args.flags &= ~NFSMNT_RESVPORT;
753 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
754 		args.flags |= NFSMNT_SOFT;
755 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
756 		args.flags &= ~NFSMNT_SOFT;
757 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
758 		args.sotype = SOCK_DGRAM;
759 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
760 		args.sotype = SOCK_DGRAM;
761 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
762 		args.sotype = SOCK_STREAM;
763 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
764 		args.flags |= NFSMNT_NFSV3;
765 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
766 		args.flags |= NFSMNT_NFSV4;
767 		args.sotype = SOCK_STREAM;
768 	}
769 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
770 		args.flags |= NFSMNT_ALLGSSNAME;
771 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
772 		if (opt == NULL) {
773 			vfs_mount_error(mp, "illegal readdirsize");
774 			error = EINVAL;
775 			goto out;
776 		}
777 		ret = sscanf(opt, "%d", &args.readdirsize);
778 		if (ret != 1 || args.readdirsize <= 0) {
779 			vfs_mount_error(mp, "illegal readdirsize: %s",
780 			    opt);
781 			error = EINVAL;
782 			goto out;
783 		}
784 		args.flags |= NFSMNT_READDIRSIZE;
785 	}
786 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
787 		if (opt == NULL) {
788 			vfs_mount_error(mp, "illegal readahead");
789 			error = EINVAL;
790 			goto out;
791 		}
792 		ret = sscanf(opt, "%d", &args.readahead);
793 		if (ret != 1 || args.readahead <= 0) {
794 			vfs_mount_error(mp, "illegal readahead: %s",
795 			    opt);
796 			error = EINVAL;
797 			goto out;
798 		}
799 		args.flags |= NFSMNT_READAHEAD;
800 	}
801 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
802 		if (opt == NULL) {
803 			vfs_mount_error(mp, "illegal wsize");
804 			error = EINVAL;
805 			goto out;
806 		}
807 		ret = sscanf(opt, "%d", &args.wsize);
808 		if (ret != 1 || args.wsize <= 0) {
809 			vfs_mount_error(mp, "illegal wsize: %s",
810 			    opt);
811 			error = EINVAL;
812 			goto out;
813 		}
814 		args.flags |= NFSMNT_WSIZE;
815 	}
816 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
817 		if (opt == NULL) {
818 			vfs_mount_error(mp, "illegal rsize");
819 			error = EINVAL;
820 			goto out;
821 		}
822 		ret = sscanf(opt, "%d", &args.rsize);
823 		if (ret != 1 || args.rsize <= 0) {
824 			vfs_mount_error(mp, "illegal wsize: %s",
825 			    opt);
826 			error = EINVAL;
827 			goto out;
828 		}
829 		args.flags |= NFSMNT_RSIZE;
830 	}
831 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
832 		if (opt == NULL) {
833 			vfs_mount_error(mp, "illegal retrans");
834 			error = EINVAL;
835 			goto out;
836 		}
837 		ret = sscanf(opt, "%d", &args.retrans);
838 		if (ret != 1 || args.retrans <= 0) {
839 			vfs_mount_error(mp, "illegal retrans: %s",
840 			    opt);
841 			error = EINVAL;
842 			goto out;
843 		}
844 		args.flags |= NFSMNT_RETRANS;
845 	}
846 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
847 		ret = sscanf(opt, "%d", &args.acregmin);
848 		if (ret != 1 || args.acregmin < 0) {
849 			vfs_mount_error(mp, "illegal acregmin: %s",
850 			    opt);
851 			error = EINVAL;
852 			goto out;
853 		}
854 		args.flags |= NFSMNT_ACREGMIN;
855 	}
856 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
857 		ret = sscanf(opt, "%d", &args.acregmax);
858 		if (ret != 1 || args.acregmax < 0) {
859 			vfs_mount_error(mp, "illegal acregmax: %s",
860 			    opt);
861 			error = EINVAL;
862 			goto out;
863 		}
864 		args.flags |= NFSMNT_ACREGMAX;
865 	}
866 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
867 		ret = sscanf(opt, "%d", &args.acdirmin);
868 		if (ret != 1 || args.acdirmin < 0) {
869 			vfs_mount_error(mp, "illegal acdirmin: %s",
870 			    opt);
871 			error = EINVAL;
872 			goto out;
873 		}
874 		args.flags |= NFSMNT_ACDIRMIN;
875 	}
876 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
877 		ret = sscanf(opt, "%d", &args.acdirmax);
878 		if (ret != 1 || args.acdirmax < 0) {
879 			vfs_mount_error(mp, "illegal acdirmax: %s",
880 			    opt);
881 			error = EINVAL;
882 			goto out;
883 		}
884 		args.flags |= NFSMNT_ACDIRMAX;
885 	}
886 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
887 		ret = sscanf(opt, "%d", &args.timeo);
888 		if (ret != 1 || args.timeo <= 0) {
889 			vfs_mount_error(mp, "illegal timeout: %s",
890 			    opt);
891 			error = EINVAL;
892 			goto out;
893 		}
894 		args.flags |= NFSMNT_TIMEO;
895 	}
896 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
897 	    == 0) {
898 		ret = sscanf(opt, "%d", &negnametimeo);
899 		if (ret != 1 || negnametimeo < 0) {
900 			vfs_mount_error(mp, "illegal negnametimeo: %s",
901 			    opt);
902 			error = EINVAL;
903 			goto out;
904 		}
905 	}
906 	if (vfs_getopt(mp->mnt_optnew, "sec",
907 		(void **) &secname, NULL) == 0)
908 		nfs_sec_name(secname, &args.flags);
909 
910 	if (mp->mnt_flag & MNT_UPDATE) {
911 		struct nfsmount *nmp = VFSTONFS(mp);
912 
913 		if (nmp == NULL) {
914 			error = EIO;
915 			goto out;
916 		}
917 		/*
918 		 * When doing an update, we can't change version,
919 		 * security, switch lockd strategies or change cookie
920 		 * translation
921 		 */
922 		args.flags = (args.flags &
923 		    ~(NFSMNT_NFSV3 |
924 		      NFSMNT_NFSV4 |
925 		      NFSMNT_KERB |
926 		      NFSMNT_INTEGRITY |
927 		      NFSMNT_PRIVACY |
928 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
929 		    (nmp->nm_flag &
930 			(NFSMNT_NFSV3 |
931 			 NFSMNT_NFSV4 |
932 			 NFSMNT_KERB |
933 			 NFSMNT_INTEGRITY |
934 			 NFSMNT_PRIVACY |
935 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
936 		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
937 		goto out;
938 	}
939 
940 	/*
941 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
942 	 * or no-connection mode for those protocols that support
943 	 * no-connection mode (the flag will be cleared later for protocols
944 	 * that do not support no-connection mode).  This will allow a client
945 	 * to receive replies from a different IP then the request was
946 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
947 	 * not 0.
948 	 */
949 	if (nfs_ip_paranoia == 0)
950 		args.flags |= NFSMNT_NOCONN;
951 
952 	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
953 	    &args.fhsize) == 0) {
954 		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
955 			vfs_mount_error(mp, "Bad file handle");
956 			error = EINVAL;
957 			goto out;
958 		}
959 		bcopy(args.fh, nfh, args.fhsize);
960 	} else {
961 		args.fhsize = 0;
962 	}
963 
964 	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
965 	    &len);
966 	if (args.hostname == NULL) {
967 		vfs_mount_error(mp, "Invalid hostname");
968 		error = EINVAL;
969 		goto out;
970 	}
971 	bcopy(args.hostname, hst, MNAMELEN);
972 	hst[MNAMELEN - 1] = '\0';
973 
974 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
975 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
976 	else
977 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
978 	args.srvkrbnamelen = strlen(srvkrbname);
979 
980 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
981 		strlcpy(krbname, name, sizeof (krbname));
982 	else
983 		krbname[0] = '\0';
984 	args.krbnamelen = strlen(krbname);
985 
986 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
987 		strlcpy(dirpath, name, sizeof (dirpath));
988 	else
989 		dirpath[0] = '\0';
990 	args.dirlen = strlen(dirpath);
991 
992 	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
993 	    &args.addrlen) == 0) {
994 		if (args.addrlen > SOCK_MAXADDRLEN) {
995 			error = ENAMETOOLONG;
996 			goto out;
997 		}
998 		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
999 		bcopy(args.addr, nam, args.addrlen);
1000 		nam->sa_len = args.addrlen;
1001 	}
1002 
1003 	args.fh = nfh;
1004 	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
1005 	    &vp, td->td_ucred, td, negnametimeo);
1006 out:
1007 	if (!error) {
1008 		MNT_ILOCK(mp);
1009 		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1010 		MNT_IUNLOCK(mp);
1011 	}
1012 	return (error);
1013 }
1014 
1015 
1016 /*
1017  * VFS Operations.
1018  *
1019  * mount system call
1020  * It seems a bit dumb to copyinstr() the host and path here and then
1021  * bcopy() them in mountnfs(), but I wanted to detect errors before
1022  * doing the sockargs() call because sockargs() allocates an mbuf and
1023  * an error after that means that I have to release the mbuf.
1024  */
1025 /* ARGSUSED */
1026 static int
1027 nfs_cmount(struct mntarg *ma, void *data, int flags)
1028 {
1029 	int error;
1030 	struct nfs_args args;
1031 
1032 	error = copyin(data, &args, sizeof (struct nfs_args));
1033 	if (error)
1034 		return error;
1035 
1036 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1037 
1038 	error = kernel_mount(ma, flags);
1039 	return (error);
1040 }
1041 
1042 /*
1043  * Common code for mount and mountroot
1044  */
1045 static int
1046 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1047     char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1048     struct vnode **vpp, struct ucred *cred, struct thread *td,
1049     int negnametimeo)
1050 {
1051 	struct nfsmount *nmp;
1052 	struct nfsnode *np;
1053 	int error, trycnt, ret;
1054 	struct nfsvattr nfsva;
1055 	static u_int64_t clval = 0;
1056 
1057 	if (mp->mnt_flag & MNT_UPDATE) {
1058 		nmp = VFSTONFS(mp);
1059 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1060 		FREE(nam, M_SONAME);
1061 		return (0);
1062 	} else {
1063 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1064 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1065 		    M_NEWNFSMNT, M_WAITOK);
1066 		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1067 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1068 		TAILQ_INIT(&nmp->nm_bufq);
1069 		if (clval == 0)
1070 			clval = (u_int64_t)nfsboottime.tv_sec;
1071 		nmp->nm_clval = clval++;
1072 		nmp->nm_krbnamelen = argp->krbnamelen;
1073 		nmp->nm_dirpathlen = argp->dirlen;
1074 		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1075 		if (td->td_ucred->cr_uid != (uid_t)0) {
1076 			/*
1077 			 * nm_uid is used to get KerberosV credentials for
1078 			 * the nfsv4 state handling operations if there is
1079 			 * no host based principal set. Use the uid of
1080 			 * this user if not root, since they are doing the
1081 			 * mount. I don't think setting this for root will
1082 			 * work, since root normally does not have user
1083 			 * credentials in a credentials cache.
1084 			 */
1085 			nmp->nm_uid = td->td_ucred->cr_uid;
1086 		} else {
1087 			/*
1088 			 * Just set to -1, so it won't be used.
1089 			 */
1090 			nmp->nm_uid = (uid_t)-1;
1091 		}
1092 
1093 		/* Copy and null terminate all the names */
1094 		if (nmp->nm_krbnamelen > 0) {
1095 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1096 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1097 		}
1098 		if (nmp->nm_dirpathlen > 0) {
1099 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1100 			    nmp->nm_dirpathlen);
1101 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1102 			    + 1] = '\0';
1103 		}
1104 		if (nmp->nm_srvkrbnamelen > 0) {
1105 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1106 			    nmp->nm_srvkrbnamelen);
1107 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1108 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1109 		}
1110 		nmp->nm_sockreq.nr_cred = crhold(cred);
1111 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1112 		mp->mnt_data = nmp;
1113 	}
1114 	vfs_getnewfsid(mp);
1115 	nmp->nm_mountp = mp;
1116 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1117 	nmp->nm_negnametimeo = negnametimeo;
1118 
1119 	nfs_decode_args(mp, nmp, argp, cred, td);
1120 
1121 	/*
1122 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1123 	 * high, depending on whether we end up with negative offsets in
1124 	 * the client or server somewhere.  2GB-1 may be safer.
1125 	 *
1126 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1127 	 * that we can handle until we find out otherwise.
1128 	 * XXX Our "safe" limit on the client is what we can store in our
1129 	 * buffer cache using signed(!) block numbers.
1130 	 */
1131 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1132 		nmp->nm_maxfilesize = 0xffffffffLL;
1133 	else
1134 		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1135 
1136 	nmp->nm_timeo = NFS_TIMEO;
1137 	nmp->nm_retry = NFS_RETRANS;
1138 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1139 		nmp->nm_wsize = NFS_WSIZE;
1140 		nmp->nm_rsize = NFS_RSIZE;
1141 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1142 	}
1143 	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1144 	nmp->nm_numgrps = NFS_MAXGRPS;
1145 	nmp->nm_readahead = NFS_DEFRAHEAD;
1146 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1147 	if (nmp->nm_tprintf_delay < 0)
1148 		nmp->nm_tprintf_delay = 0;
1149 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1150 	if (nmp->nm_tprintf_initial_delay < 0)
1151 		nmp->nm_tprintf_initial_delay = 0;
1152 	nmp->nm_fhsize = argp->fhsize;
1153 	if (nmp->nm_fhsize > 0)
1154 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1155 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1156 	nmp->nm_nam = nam;
1157 	/* Set up the sockets and per-host congestion */
1158 	nmp->nm_sotype = argp->sotype;
1159 	nmp->nm_soproto = argp->proto;
1160 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1161 	if ((argp->flags & NFSMNT_NFSV4))
1162 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1163 	else if ((argp->flags & NFSMNT_NFSV3))
1164 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1165 	else
1166 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1167 
1168 
1169 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1170 		goto bad;
1171 
1172 	/*
1173 	 * A reference count is needed on the nfsnode representing the
1174 	 * remote root.  If this object is not persistent, then backward
1175 	 * traversals of the mount point (i.e. "..") will not work if
1176 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1177 	 * this problem, because one can identify root inodes by their
1178 	 * number == ROOTINO (2).
1179 	 */
1180 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1181 	    nmp->nm_dirpathlen > 0) {
1182 		/*
1183 		 * If the fhsize on the mount point == 0 for V4, the mount
1184 		 * path needs to be looked up.
1185 		 */
1186 		trycnt = 3;
1187 		do {
1188 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1189 			    cred, td);
1190 			if (error)
1191 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1192 		} while (error && --trycnt > 0);
1193 		if (error) {
1194 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1195 			goto bad;
1196 		}
1197 	}
1198 	if (nmp->nm_fhsize > 0) {
1199 		/*
1200 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1201 		 * non-zero for the root vnode. f_iosize will be set correctly
1202 		 * by nfs_statfs() before any I/O occurs.
1203 		 */
1204 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1205 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1206 		if (error)
1207 			goto bad;
1208 		*vpp = NFSTOV(np);
1209 
1210 		/*
1211 		 * Get file attributes and transfer parameters for the
1212 		 * mountpoint.  This has the side effect of filling in
1213 		 * (*vpp)->v_type with the correct value.
1214 		 */
1215 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1216 		    cred, td, &nfsva, NULL);
1217 		if (ret) {
1218 			/*
1219 			 * Just set default values to get things going.
1220 			 */
1221 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1222 			nfsva.na_vattr.va_type = VDIR;
1223 			nfsva.na_vattr.va_mode = 0777;
1224 			nfsva.na_vattr.va_nlink = 100;
1225 			nfsva.na_vattr.va_uid = (uid_t)0;
1226 			nfsva.na_vattr.va_gid = (gid_t)0;
1227 			nfsva.na_vattr.va_fileid = 2;
1228 			nfsva.na_vattr.va_gen = 1;
1229 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1230 			nfsva.na_vattr.va_size = 512 * 1024;
1231 		}
1232 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1233 		if (argp->flags & NFSMNT_NFSV3)
1234 			ncl_fsinfo(nmp, *vpp, cred, td);
1235 
1236 		/*
1237 		 * Lose the lock but keep the ref.
1238 		 */
1239 		VOP_UNLOCK(*vpp, 0);
1240 		return (0);
1241 	}
1242 	error = EIO;
1243 
1244 bad:
1245 	newnfs_disconnect(&nmp->nm_sockreq);
1246 	crfree(nmp->nm_sockreq.nr_cred);
1247 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1248 	mtx_destroy(&nmp->nm_mtx);
1249 	FREE(nmp, M_NEWNFSMNT);
1250 	FREE(nam, M_SONAME);
1251 	return (error);
1252 }
1253 
1254 /*
1255  * unmount system call
1256  */
1257 static int
1258 nfs_unmount(struct mount *mp, int mntflags)
1259 {
1260 	struct thread *td;
1261 	struct nfsmount *nmp;
1262 	int error, flags = 0, trycnt = 0;
1263 
1264 	td = curthread;
1265 
1266 	if (mntflags & MNT_FORCE)
1267 		flags |= FORCECLOSE;
1268 	nmp = VFSTONFS(mp);
1269 	/*
1270 	 * Goes something like this..
1271 	 * - Call vflush() to clear out vnodes for this filesystem
1272 	 * - Close the socket
1273 	 * - Free up the data structures
1274 	 */
1275 	/* In the forced case, cancel any outstanding requests. */
1276 	if (mntflags & MNT_FORCE) {
1277 		error = newnfs_nmcancelreqs(nmp);
1278 		if (error)
1279 			goto out;
1280 		/* For a forced close, get rid of the renew thread now */
1281 		nfscl_umount(nmp, td);
1282 	}
1283 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1284 	do {
1285 		error = vflush(mp, 1, flags, td);
1286 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1287 			(void) nfs_catnap(PSOCK, error, "newndm");
1288 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1289 	if (error)
1290 		goto out;
1291 
1292 	/*
1293 	 * We are now committed to the unmount.
1294 	 */
1295 	if ((mntflags & MNT_FORCE) == 0)
1296 		nfscl_umount(nmp, td);
1297 	newnfs_disconnect(&nmp->nm_sockreq);
1298 	crfree(nmp->nm_sockreq.nr_cred);
1299 	FREE(nmp->nm_nam, M_SONAME);
1300 
1301 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1302 	mtx_destroy(&nmp->nm_mtx);
1303 	FREE(nmp, M_NEWNFSMNT);
1304 out:
1305 	return (error);
1306 }
1307 
1308 /*
1309  * Return root of a filesystem
1310  */
1311 static int
1312 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1313 {
1314 	struct vnode *vp;
1315 	struct nfsmount *nmp;
1316 	struct nfsnode *np;
1317 	int error;
1318 
1319 	nmp = VFSTONFS(mp);
1320 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1321 	if (error)
1322 		return error;
1323 	vp = NFSTOV(np);
1324 	/*
1325 	 * Get transfer parameters and attributes for root vnode once.
1326 	 */
1327 	mtx_lock(&nmp->nm_mtx);
1328 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1329 		mtx_unlock(&nmp->nm_mtx);
1330 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1331 	} else
1332 		mtx_unlock(&nmp->nm_mtx);
1333 	if (vp->v_type == VNON)
1334 	    vp->v_type = VDIR;
1335 	vp->v_vflag |= VV_ROOT;
1336 	*vpp = vp;
1337 	return (0);
1338 }
1339 
1340 /*
1341  * Flush out the buffer cache
1342  */
1343 /* ARGSUSED */
1344 static int
1345 nfs_sync(struct mount *mp, int waitfor)
1346 {
1347 	struct vnode *vp, *mvp;
1348 	struct thread *td;
1349 	int error, allerror = 0;
1350 
1351 	td = curthread;
1352 
1353 	/*
1354 	 * Force stale buffer cache information to be flushed.
1355 	 */
1356 	MNT_ILOCK(mp);
1357 loop:
1358 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1359 		VI_LOCK(vp);
1360 		MNT_IUNLOCK(mp);
1361 		/* XXX Racy bv_cnt check. */
1362 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1363 		    waitfor == MNT_LAZY) {
1364 			VI_UNLOCK(vp);
1365 			MNT_ILOCK(mp);
1366 			continue;
1367 		}
1368 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1369 			MNT_ILOCK(mp);
1370 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1371 			goto loop;
1372 		}
1373 		error = VOP_FSYNC(vp, waitfor, td);
1374 		if (error)
1375 			allerror = error;
1376 		VOP_UNLOCK(vp, 0);
1377 		vrele(vp);
1378 
1379 		MNT_ILOCK(mp);
1380 	}
1381 	MNT_IUNLOCK(mp);
1382 	return (allerror);
1383 }
1384 
1385 static int
1386 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1387 {
1388 	struct nfsmount *nmp = VFSTONFS(mp);
1389 	struct vfsquery vq;
1390 	int error;
1391 
1392 	bzero(&vq, sizeof(vq));
1393 	switch (op) {
1394 #if 0
1395 	case VFS_CTL_NOLOCKS:
1396 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1397  		if (req->oldptr != NULL) {
1398  			error = SYSCTL_OUT(req, &val, sizeof(val));
1399  			if (error)
1400  				return (error);
1401  		}
1402  		if (req->newptr != NULL) {
1403  			error = SYSCTL_IN(req, &val, sizeof(val));
1404  			if (error)
1405  				return (error);
1406 			if (val)
1407 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1408 			else
1409 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1410  		}
1411 		break;
1412 #endif
1413 	case VFS_CTL_QUERY:
1414 		mtx_lock(&nmp->nm_mtx);
1415 		if (nmp->nm_state & NFSSTA_TIMEO)
1416 			vq.vq_flags |= VQ_NOTRESP;
1417 		mtx_unlock(&nmp->nm_mtx);
1418 #if 0
1419 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1420 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1421 			vq.vq_flags |= VQ_NOTRESPLOCK;
1422 #endif
1423 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1424 		break;
1425  	case VFS_CTL_TIMEO:
1426  		if (req->oldptr != NULL) {
1427  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1428  			    sizeof(nmp->nm_tprintf_initial_delay));
1429  			if (error)
1430  				return (error);
1431  		}
1432  		if (req->newptr != NULL) {
1433 			error = vfs_suser(mp, req->td);
1434 			if (error)
1435 				return (error);
1436  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1437  			    sizeof(nmp->nm_tprintf_initial_delay));
1438  			if (error)
1439  				return (error);
1440  			if (nmp->nm_tprintf_initial_delay < 0)
1441  				nmp->nm_tprintf_initial_delay = 0;
1442  		}
1443 		break;
1444 	default:
1445 		return (ENOTSUP);
1446 	}
1447 	return (0);
1448 }
1449 
1450