xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision f4f8f02054f3abb6ceb84aefcdecc78d5c8b462f)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/vnode.h>
59 #include <sys/signalvar.h>
60 
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64 
65 #include <net/if.h>
66 #include <net/route.h>
67 #include <netinet/in.h>
68 
69 #include <fs/nfs/nfsport.h>
70 #include <fs/nfsclient/nfsnode.h>
71 #include <fs/nfsclient/nfsmount.h>
72 #include <fs/nfsclient/nfs.h>
73 #include <fs/nfsclient/nfsdiskless.h>
74 
75 extern int nfscl_ticks;
76 extern struct timeval nfsboottime;
77 extern struct nfsstats	newnfsstats;
78 
79 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
80 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
81 
82 SYSCTL_DECL(_vfs_newnfs);
83 SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
84 	&newnfsstats, nfsstats, "S,nfsstats");
85 static int nfs_ip_paranoia = 1;
86 SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
87     &nfs_ip_paranoia, 0, "");
88 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
89 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
90         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
91 /* how long between console messages "nfs server foo not responding" */
92 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
93 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
94         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
95 
96 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
97 		    struct nfs_args *argp, struct ucred *, struct thread *);
98 static int	mountnfs(struct nfs_args *, struct mount *,
99 		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
100 		    struct vnode **, struct ucred *, struct thread *);
101 static vfs_mount_t nfs_mount;
102 static vfs_cmount_t nfs_cmount;
103 static vfs_unmount_t nfs_unmount;
104 static vfs_root_t nfs_root;
105 static vfs_statfs_t nfs_statfs;
106 static vfs_sync_t nfs_sync;
107 static vfs_sysctl_t nfs_sysctl;
108 
109 /*
110  * nfs vfs operations.
111  */
112 static struct vfsops nfs_vfsops = {
113 	.vfs_init =		ncl_init,
114 	.vfs_mount =		nfs_mount,
115 	.vfs_cmount =		nfs_cmount,
116 	.vfs_root =		nfs_root,
117 	.vfs_statfs =		nfs_statfs,
118 	.vfs_sync =		nfs_sync,
119 	.vfs_uninit =		ncl_uninit,
120 	.vfs_unmount =		nfs_unmount,
121 	.vfs_sysctl =		nfs_sysctl,
122 };
123 VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
124 
125 /* So that loader and kldload(2) can find us, wherever we are.. */
126 MODULE_VERSION(newnfs, 1);
127 
128 /*
129  * This structure must be filled in by a primary bootstrap or bootstrap
130  * server for a diskless/dataless machine. It is initialized below just
131  * to ensure that it is allocated to initialized data (.data not .bss).
132  */
133 struct nfs_diskless newnfs_diskless = { { { 0 } } };
134 struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
135 int newnfs_diskless_valid = 0;
136 
137 SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
138 	&newnfs_diskless_valid, 0, "");
139 
140 SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
141 	newnfsv3_diskless.root_hostnam, 0, "");
142 
143 SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
144 	&newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
145 	"%Ssockaddr_in", "");
146 
147 
148 void		newnfsargs_ntoh(struct nfs_args *);
149 static int	nfs_mountdiskless(char *,
150 		    struct sockaddr_in *, struct nfs_args *,
151 		    struct thread *, struct vnode **, struct mount *);
152 static void	nfs_convert_diskless(void);
153 static void	nfs_convert_oargs(struct nfs_args *args,
154 		    struct onfs_args *oargs);
155 
156 int
157 newnfs_iosize(struct nfsmount *nmp)
158 {
159 	int iosize, maxio;
160 
161 	/* First, set the upper limit for iosize */
162 	if (nmp->nm_flag & NFSMNT_NFSV4) {
163 		maxio = NFS_MAXBSIZE;
164 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
165 		if (nmp->nm_sotype == SOCK_DGRAM)
166 			maxio = NFS_MAXDGRAMDATA;
167 		else
168 			maxio = NFS_MAXBSIZE;
169 	} else {
170 		maxio = NFS_V2MAXDATA;
171 	}
172 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
173 		nmp->nm_rsize = maxio;
174 	if (nmp->nm_rsize > MAXBSIZE)
175 		nmp->nm_rsize = MAXBSIZE;
176 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
177 		nmp->nm_readdirsize = maxio;
178 	if (nmp->nm_readdirsize > nmp->nm_rsize)
179 		nmp->nm_readdirsize = nmp->nm_rsize;
180 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
181 		nmp->nm_wsize = maxio;
182 	if (nmp->nm_wsize > MAXBSIZE)
183 		nmp->nm_wsize = MAXBSIZE;
184 
185 	/*
186 	 * Calculate the size used for io buffers.  Use the larger
187 	 * of the two sizes to minimise nfs requests but make sure
188 	 * that it is at least one VM page to avoid wasting buffer
189 	 * space.
190 	 */
191 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
192 	iosize = imax(iosize, PAGE_SIZE);
193 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
194 	return (iosize);
195 }
196 
197 static void
198 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
199 {
200 
201 	args->version = NFS_ARGSVERSION;
202 	args->addr = oargs->addr;
203 	args->addrlen = oargs->addrlen;
204 	args->sotype = oargs->sotype;
205 	args->proto = oargs->proto;
206 	args->fh = oargs->fh;
207 	args->fhsize = oargs->fhsize;
208 	args->flags = oargs->flags;
209 	args->wsize = oargs->wsize;
210 	args->rsize = oargs->rsize;
211 	args->readdirsize = oargs->readdirsize;
212 	args->timeo = oargs->timeo;
213 	args->retrans = oargs->retrans;
214 	args->readahead = oargs->readahead;
215 	args->hostname = oargs->hostname;
216 }
217 
218 static void
219 nfs_convert_diskless(void)
220 {
221 
222 	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
223 		sizeof(struct ifaliasreq));
224 	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
225 		sizeof(struct sockaddr_in));
226 	nfs_convert_oargs(&newnfsv3_diskless.root_args,&newnfs_diskless.root_args);
227 	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
228 		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
229 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh, NFSX_MYFH);
230 	} else {
231 		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
232 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh, NFSX_V2FH);
233 	}
234 	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
235 		sizeof(struct sockaddr_in));
236 	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam, MNAMELEN);
237 	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
238 	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
239 		MAXHOSTNAMELEN);
240 	newnfs_diskless_valid = 3;
241 }
242 
243 /*
244  * nfs statfs call
245  */
246 static int
247 nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
248 {
249 	struct vnode *vp;
250 	struct nfsmount *nmp = VFSTONFS(mp);
251 	struct nfsvattr nfsva;
252 	struct nfsfsinfo fs;
253 	struct nfsstatfs sb;
254 	int error = 0, attrflag, gotfsinfo = 0, ret;
255 	struct nfsnode *np;
256 
257 	error = vfs_busy(mp, MBF_NOWAIT);
258 	if (error)
259 		return (error);
260 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
261 	if (error) {
262 		vfs_unbusy(mp);
263 		return (error);
264 	}
265 	vp = NFSTOV(np);
266 	mtx_lock(&nmp->nm_mtx);
267 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
268 		mtx_unlock(&nmp->nm_mtx);
269 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
270 		    &attrflag, NULL);
271 		if (!error)
272 			gotfsinfo = 1;
273 	} else
274 		mtx_unlock(&nmp->nm_mtx);
275 	if (!error)
276 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
277 		    &attrflag, NULL);
278 	if (attrflag == 0) {
279 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
280 		    td->td_ucred, td, &nfsva, NULL);
281 		if (ret) {
282 			/*
283 			 * Just set default values to get things going.
284 			 */
285 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
286 			nfsva.na_vattr.va_type = VDIR;
287 			nfsva.na_vattr.va_mode = 0777;
288 			nfsva.na_vattr.va_nlink = 100;
289 			nfsva.na_vattr.va_uid = (uid_t)0;
290 			nfsva.na_vattr.va_gid = (gid_t)0;
291 			nfsva.na_vattr.va_fileid = 2;
292 			nfsva.na_vattr.va_gen = 1;
293 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
294 			nfsva.na_vattr.va_size = 512 * 1024;
295 		}
296 	}
297 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
298 	if (!error) {
299 	    mtx_lock(&nmp->nm_mtx);
300 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
301 		nfscl_loadfsinfo(nmp, &fs);
302 	    nfscl_loadsbinfo(nmp, &sb, sbp);
303 	    sbp->f_flags = nmp->nm_flag;
304 	    sbp->f_iosize = newnfs_iosize(nmp);
305 	    mtx_unlock(&nmp->nm_mtx);
306 	    if (sbp != &mp->mnt_stat) {
307 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
308 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
309 	    }
310 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
311 	} else if (NFS_ISV4(vp)) {
312 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
313 	}
314 	vput(vp);
315 	vfs_unbusy(mp);
316 	return (error);
317 }
318 
319 /*
320  * nfs version 3 fsinfo rpc call
321  */
322 int
323 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
324     struct thread *td)
325 {
326 	struct nfsfsinfo fs;
327 	struct nfsvattr nfsva;
328 	int error, attrflag;
329 
330 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
331 	if (!error) {
332 		if (attrflag)
333 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
334 			    1);
335 		mtx_lock(&nmp->nm_mtx);
336 		nfscl_loadfsinfo(nmp, &fs);
337 		mtx_unlock(&nmp->nm_mtx);
338 	}
339 	return (error);
340 }
341 
342 /*
343  * Mount a remote root fs via. nfs. This depends on the info in the
344  * newnfs_diskless structure that has been filled in properly by some primary
345  * bootstrap.
346  * It goes something like this:
347  * - do enough of "ifconfig" by calling ifioctl() so that the system
348  *   can talk to the server
349  * - If newnfs_diskless.mygateway is filled in, use that address as
350  *   a default gateway.
351  * - build the rootfs mount point and call mountnfs() to do the rest.
352  *
353  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
354  * structure, as well as other global NFS client variables here, as
355  * ncl_mountroot() will be called once in the boot before any other NFS
356  * client activity occurs.
357  */
358 int
359 ncl_mountroot(struct mount *mp, struct thread *td)
360 {
361 	struct nfsv3_diskless *nd = &newnfsv3_diskless;
362 	struct socket *so;
363 	struct vnode *vp;
364 	struct ifreq ir;
365 	int error, i;
366 	u_long l;
367 	char buf[128];
368 	char *cp;
369 
370 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
371 	bootpc_init();		/* use bootp to get newnfs_diskless filled in */
372 #elif defined(NFS_ROOT)
373 	nfs_setup_diskless();
374 #endif
375 
376 	nfscl_init();
377 
378 	if (newnfs_diskless_valid == 0)
379 		return (-1);
380 	if (newnfs_diskless_valid == 1)
381 		nfs_convert_diskless();
382 
383 	/*
384 	 * XXX splnet, so networks will receive...
385 	 */
386 	splnet();
387 
388 	/*
389 	 * Do enough of ifconfig(8) so that the critical net interface can
390 	 * talk to the server.
391 	 */
392 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
393 	    td->td_ucred, td);
394 	if (error)
395 		panic("ncl_mountroot: socreate(%04x): %d",
396 			nd->myif.ifra_addr.sa_family, error);
397 
398 #if 0 /* XXX Bad idea */
399 	/*
400 	 * We might not have been told the right interface, so we pass
401 	 * over the first ten interfaces of the same kind, until we get
402 	 * one of them configured.
403 	 */
404 
405 	for (i = strlen(nd->myif.ifra_name) - 1;
406 		nd->myif.ifra_name[i] >= '0' &&
407 		nd->myif.ifra_name[i] <= '9';
408 		nd->myif.ifra_name[i] ++) {
409 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
410 		if(!error)
411 			break;
412 	}
413 #endif
414 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
415 	if (error)
416 		panic("ncl_mountroot: SIOCAIFADDR: %d", error);
417 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
418 		ir.ifr_mtu = strtol(cp, NULL, 10);
419 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
420 		freeenv(cp);
421 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
422 		if (error)
423 			printf("ncl_mountroot: SIOCSIFMTU: %d", error);
424 	}
425 	soclose(so);
426 
427 	/*
428 	 * If the gateway field is filled in, set it as the default route.
429 	 * Note that pxeboot will set a default route of 0 if the route
430 	 * is not set by the DHCP server.  Check also for a value of 0
431 	 * to avoid panicking inappropriately in that situation.
432 	 */
433 	if (nd->mygateway.sin_len != 0 &&
434 	    nd->mygateway.sin_addr.s_addr != 0) {
435 		struct sockaddr_in mask, sin;
436 
437 		bzero((caddr_t)&mask, sizeof(mask));
438 		sin = mask;
439 		sin.sin_family = AF_INET;
440 		sin.sin_len = sizeof(sin);
441 		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
442 		    (struct sockaddr *)&nd->mygateway,
443 		    (struct sockaddr *)&mask,
444 		    RTF_UP | RTF_GATEWAY, NULL);
445 		if (error)
446 			panic("ncl_mountroot: RTM_ADD: %d", error);
447 	}
448 
449 	/*
450 	 * Create the rootfs mount point.
451 	 */
452 	nd->root_args.fh = nd->root_fh;
453 	nd->root_args.fhsize = nd->root_fhsize;
454 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
455 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
456 		(l >> 24) & 0xff, (l >> 16) & 0xff,
457 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
458 	printf("NFS ROOT: %s\n", buf);
459 	if ((error = nfs_mountdiskless(buf,
460 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
461 		return (error);
462 	}
463 
464 	/*
465 	 * This is not really an nfs issue, but it is much easier to
466 	 * set hostname here and then let the "/etc/rc.xxx" files
467 	 * mount the right /var based upon its preset value.
468 	 */
469 	bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
470 	hostname[MAXHOSTNAMELEN - 1] = '\0';
471 	for (i = 0; i < MAXHOSTNAMELEN; i++)
472 		if (hostname[i] == '\0')
473 			break;
474 	inittodr(ntohl(nd->root_time));
475 	return (0);
476 }
477 
478 /*
479  * Internal version of mount system call for diskless setup.
480  */
481 static int
482 nfs_mountdiskless(char *path,
483     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
484     struct vnode **vpp, struct mount *mp)
485 {
486 	struct sockaddr *nam;
487 	int error;
488 
489 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
490 	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
491 	    td->td_ucred, td)) != 0) {
492 		printf("ncl_mountroot: mount %s on /: %d\n", path, error);
493 		return (error);
494 	}
495 	return (0);
496 }
497 
498 static void
499 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
500     struct ucred *cred, struct thread *td)
501 {
502 	int s;
503 	int adjsock;
504 
505 	s = splnet();
506 
507 	/*
508 	 * Set read-only flag if requested; otherwise, clear it if this is
509 	 * an update.  If this is not an update, then either the read-only
510 	 * flag is already clear, or this is a root mount and it was set
511 	 * intentionally at some previous point.
512 	 */
513 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
514 		MNT_ILOCK(mp);
515 		mp->mnt_flag |= MNT_RDONLY;
516 		MNT_IUNLOCK(mp);
517 	} else if (mp->mnt_flag & MNT_UPDATE) {
518 		MNT_ILOCK(mp);
519 		mp->mnt_flag &= ~MNT_RDONLY;
520 		MNT_IUNLOCK(mp);
521 	}
522 
523 	/*
524 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
525 	 * no sense in that context.  Also, set up appropriate retransmit
526 	 * and soft timeout behavior.
527 	 */
528 	if (argp->sotype == SOCK_STREAM) {
529 		nmp->nm_flag &= ~NFSMNT_NOCONN;
530 		nmp->nm_timeo = NFS_MAXTIMEO;
531 	}
532 
533 	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
534 	if ((argp->flags & NFSMNT_NFSV3) == 0)
535 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
536 
537 	/* Also re-bind if we're switching to/from a connected UDP socket */
538 	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
539 		    (argp->flags & NFSMNT_NOCONN));
540 
541 	/* Update flags atomically.  Don't change the lock bits. */
542 	nmp->nm_flag = argp->flags | nmp->nm_flag;
543 	splx(s);
544 
545 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
546 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
547 		if (nmp->nm_timeo < NFS_MINTIMEO)
548 			nmp->nm_timeo = NFS_MINTIMEO;
549 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
550 			nmp->nm_timeo = NFS_MAXTIMEO;
551 	}
552 
553 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
554 		nmp->nm_retry = argp->retrans;
555 		if (nmp->nm_retry > NFS_MAXREXMIT)
556 			nmp->nm_retry = NFS_MAXREXMIT;
557 	}
558 
559 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
560 		nmp->nm_wsize = argp->wsize;
561 		/* Round down to multiple of blocksize */
562 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
563 		if (nmp->nm_wsize <= 0)
564 			nmp->nm_wsize = NFS_FABLKSIZE;
565 	}
566 
567 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
568 		nmp->nm_rsize = argp->rsize;
569 		/* Round down to multiple of blocksize */
570 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
571 		if (nmp->nm_rsize <= 0)
572 			nmp->nm_rsize = NFS_FABLKSIZE;
573 	}
574 
575 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
576 		nmp->nm_readdirsize = argp->readdirsize;
577 	}
578 
579 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
580 		nmp->nm_acregmin = argp->acregmin;
581 	else
582 		nmp->nm_acregmin = NFS_MINATTRTIMO;
583 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
584 		nmp->nm_acregmax = argp->acregmax;
585 	else
586 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
587 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
588 		nmp->nm_acdirmin = argp->acdirmin;
589 	else
590 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
591 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
592 		nmp->nm_acdirmax = argp->acdirmax;
593 	else
594 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
595 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
596 		nmp->nm_acdirmin = nmp->nm_acdirmax;
597 	if (nmp->nm_acregmin > nmp->nm_acregmax)
598 		nmp->nm_acregmin = nmp->nm_acregmax;
599 
600 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
601 		if (argp->readahead <= NFS_MAXRAHEAD)
602 			nmp->nm_readahead = argp->readahead;
603 		else
604 			nmp->nm_readahead = NFS_MAXRAHEAD;
605 	}
606 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
607 		if (argp->wcommitsize < nmp->nm_wsize)
608 			nmp->nm_wcommitsize = nmp->nm_wsize;
609 		else
610 			nmp->nm_wcommitsize = argp->wcommitsize;
611 	}
612 
613 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
614 		    (nmp->nm_soproto != argp->proto));
615 
616 	if (nmp->nm_client != NULL && adjsock) {
617 		int haslock = 0, error = 0;
618 
619 		if (nmp->nm_sotype == SOCK_STREAM) {
620 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
621 			if (!error)
622 				haslock = 1;
623 		}
624 		if (!error) {
625 		    newnfs_disconnect(&nmp->nm_sockreq);
626 		    if (haslock)
627 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
628 		    nmp->nm_sotype = argp->sotype;
629 		    nmp->nm_soproto = argp->proto;
630 		    if (nmp->nm_sotype == SOCK_DGRAM)
631 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
632 			    cred, td, 0)) {
633 				printf("newnfs_args: retrying connect\n");
634 				(void) nfs_catnap(PSOCK, "newnfscon");
635 			}
636 		}
637 	} else {
638 		nmp->nm_sotype = argp->sotype;
639 		nmp->nm_soproto = argp->proto;
640 	}
641 }
642 
643 static const char *nfs_opts[] = { "from", "nfs_args",
644     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
645     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
646     "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
647     "readdirsize", "soft", "hard", "mntudp", "tcp", "wsize", "rsize",
648     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax",
649     NULL };
650 
651 /*
652  * VFS Operations.
653  *
654  * mount system call
655  * It seems a bit dumb to copyinstr() the host and path here and then
656  * bcopy() them in mountnfs(), but I wanted to detect errors before
657  * doing the sockargs() call because sockargs() allocates an mbuf and
658  * an error after that means that I have to release the mbuf.
659  */
660 /* ARGSUSED */
661 static int
662 nfs_mount(struct mount *mp, struct thread *td)
663 {
664 	struct nfs_args args = {
665 	    .version = NFS_ARGSVERSION,
666 	    .addr = NULL,
667 	    .addrlen = sizeof (struct sockaddr_in),
668 	    .sotype = SOCK_STREAM,
669 	    .proto = 0,
670 	    .fh = NULL,
671 	    .fhsize = 0,
672 	    .flags = 0,
673 	    .wsize = NFS_WSIZE,
674 	    .rsize = NFS_RSIZE,
675 	    .readdirsize = NFS_READDIRSIZE,
676 	    .timeo = 10,
677 	    .retrans = NFS_RETRANS,
678 	    .readahead = NFS_DEFRAHEAD,
679 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
680 	    .hostname = NULL,
681 	    /* args version 4 */
682 	    .acregmin = NFS_MINATTRTIMO,
683 	    .acregmax = NFS_MAXATTRTIMO,
684 	    .acdirmin = NFS_MINDIRATTRTIMO,
685 	    .acdirmax = NFS_MAXDIRATTRTIMO,
686 	    .dirlen = 0,
687 	    .krbnamelen = 0,
688 	};
689 	int error;
690 	struct sockaddr *nam;
691 	struct vnode *vp;
692 	char hst[MNAMELEN];
693 	size_t len;
694 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
695 
696 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
697 		error = EINVAL;
698 		goto out;
699 	}
700 
701 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
702 		error = ncl_mountroot(mp, td);
703 		goto out;
704 	}
705 
706 	error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
707 	if (error)
708 		goto out;
709 
710 	if (args.version != NFS_ARGSVERSION) {
711 		error = EPROGMISMATCH;
712 		goto out;
713 	}
714 
715 	nfscl_init();
716 
717 	if (mp->mnt_flag & MNT_UPDATE) {
718 		struct nfsmount *nmp = VFSTONFS(mp);
719 
720 		if (nmp == NULL) {
721 			error = EIO;
722 			goto out;
723 		}
724 		/*
725 		 * When doing an update, we can't change version,
726 		 * security, switch lockd strategies or change cookie
727 		 * translation
728 		 */
729 		args.flags = (args.flags &
730 		    ~(NFSMNT_NFSV3 |
731 		      NFSMNT_NFSV4 |
732 		      NFSMNT_KERB |
733 		      NFSMNT_INTEGRITY |
734 		      NFSMNT_PRIVACY |
735 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
736 		    (nmp->nm_flag &
737 			(NFSMNT_NFSV3 |
738 			 NFSMNT_NFSV4 |
739 			 NFSMNT_KERB |
740 			 NFSMNT_INTEGRITY |
741 			 NFSMNT_PRIVACY |
742 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
743 		nfs_decode_args(mp, nmp, &args, td->td_ucred, td);
744 		goto out;
745 	}
746 
747 	/*
748 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
749 	 * or no-connection mode for those protocols that support
750 	 * no-connection mode (the flag will be cleared later for protocols
751 	 * that do not support no-connection mode).  This will allow a client
752 	 * to receive replies from a different IP then the request was
753 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
754 	 * not 0.
755 	 */
756 	if (nfs_ip_paranoia == 0)
757 		args.flags |= NFSMNT_NOCONN;
758 	if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
759 		error = EINVAL;
760 		goto out;
761 	}
762 	if (args.fhsize > 0) {
763 		error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
764 		if (error)
765 			goto out;
766 	}
767 	error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
768 	if (error)
769 		goto out;
770 	bzero(&hst[len], MNAMELEN - len);
771 	if (args.krbnamelen > 0) {
772 		if (args.krbnamelen >= 100) {
773 			error = EINVAL;
774 			goto out;
775 		}
776 		error = copyin(args.krbname, krbname, args.krbnamelen);
777 		if (error)
778 			goto out;
779 		krbname[args.krbnamelen] = '\0';
780 	} else {
781 		krbname[0] = '\0';
782 		args.krbnamelen = 0;
783 	}
784 	if (args.dirlen > 0) {
785 		if (args.dirlen >= 100) {
786 			error = EINVAL;
787 			goto out;
788 		}
789 		error = copyin(args.dirpath, dirpath, args.dirlen);
790 		if (error)
791 			goto out;
792 		dirpath[args.dirlen] = '\0';
793 	} else {
794 		dirpath[0] = '\0';
795 		args.dirlen = 0;
796 	}
797 	if (args.srvkrbnamelen > 0) {
798 		if (args.srvkrbnamelen >= 100) {
799 			error = EINVAL;
800 			goto out;
801 		}
802 		error = copyin(args.srvkrbname, srvkrbname, args.srvkrbnamelen);
803 		if (error)
804 			goto out;
805 		srvkrbname[args.srvkrbnamelen] = '\0';
806 	} else {
807 		srvkrbname[0] = '\0';
808 		args.srvkrbnamelen = 0;
809 	}
810 	/* sockargs() call must be after above copyin() calls */
811 	error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
812 	if (error)
813 		goto out;
814 	args.fh = nfh;
815 	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
816 	    &vp, td->td_ucred, td);
817 out:
818 	if (!error) {
819 		MNT_ILOCK(mp);
820 		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
821 		MNT_IUNLOCK(mp);
822 	}
823 	return (error);
824 }
825 
826 
827 /*
828  * VFS Operations.
829  *
830  * mount system call
831  * It seems a bit dumb to copyinstr() the host and path here and then
832  * bcopy() them in mountnfs(), but I wanted to detect errors before
833  * doing the sockargs() call because sockargs() allocates an mbuf and
834  * an error after that means that I have to release the mbuf.
835  */
836 /* ARGSUSED */
837 static int
838 nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
839 {
840 	int error;
841 	struct nfs_args args;
842 
843 	error = copyin(data, &args, sizeof (struct nfs_args));
844 	if (error)
845 		return error;
846 
847 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
848 
849 	error = kernel_mount(ma, flags);
850 	return (error);
851 }
852 
853 /*
854  * Common code for mount and mountroot
855  */
856 static int
857 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
858     char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
859     struct vnode **vpp, struct ucred *cred, struct thread *td)
860 {
861 	struct nfsmount *nmp;
862 	struct nfsnode *np;
863 	int error, trycnt, ret, clearintr;
864 	struct nfsvattr nfsva;
865 	static u_int64_t clval = 0;
866 
867 	if (mp->mnt_flag & MNT_UPDATE) {
868 		nmp = VFSTONFS(mp);
869 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
870 		FREE(nam, M_SONAME);
871 		return (0);
872 	} else {
873 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
874 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
875 		    M_NEWNFSMNT, M_WAITOK);
876 		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
877 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
878 		TAILQ_INIT(&nmp->nm_bufq);
879 		if (clval == 0)
880 			clval = (u_int64_t)nfsboottime.tv_sec;
881 		nmp->nm_clval = clval++;
882 		nmp->nm_krbnamelen = argp->krbnamelen;
883 		nmp->nm_dirpathlen = argp->dirlen;
884 		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
885 		if (nmp->nm_dirpathlen > 0) {
886 			/*
887 			 * Since we will be doing dirpath as root,
888 			 * set nm_uid to the real uid doing the mount,
889 			 * since that is normally the user with a valid TGT.
890 			 */
891 			nmp->nm_uid = td->td_ucred->cr_ruid;
892 		} else {
893 			/*
894 			 * Just set to -1, so the first Op
895 			 * will set it later, to the uid of
896 			 * the process doing that (usually
897 			 * from a first open in the mount
898 			 * point).
899 			 */
900 			nmp->nm_uid = (uid_t)-1;
901 		}
902 
903 		/* Copy and null terminate all the names */
904 		if (nmp->nm_krbnamelen > 0) {
905 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
906 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
907 		}
908 		if (nmp->nm_dirpathlen > 0) {
909 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
910 			    nmp->nm_dirpathlen);
911 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
912 			    + 1] = '\0';
913 		}
914 		if (nmp->nm_srvkrbnamelen > 0) {
915 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
916 			    nmp->nm_srvkrbnamelen);
917 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
918 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
919 		}
920 		nmp->nm_sockreq.nr_cred = crhold(cred);
921 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
922 		mp->mnt_data = nmp;
923 	}
924 	vfs_getnewfsid(mp);
925 	nmp->nm_mountp = mp;
926 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
927 
928 	/*
929 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
930 	 * high, depending on whether we end up with negative offsets in
931 	 * the client or server somewhere.  2GB-1 may be safer.
932 	 *
933 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
934 	 * that we can handle until we find out otherwise.
935 	 * XXX Our "safe" limit on the client is what we can store in our
936 	 * buffer cache using signed(!) block numbers.
937 	 */
938 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
939 		nmp->nm_maxfilesize = 0xffffffffLL;
940 	else
941 		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
942 
943 	nmp->nm_timeo = NFS_TIMEO;
944 	nmp->nm_retry = NFS_RETRANS;
945 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
946 		nmp->nm_wsize = NFS_WSIZE;
947 		nmp->nm_rsize = NFS_RSIZE;
948 		nmp->nm_readdirsize = NFS_READDIRSIZE;
949 	}
950 	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
951 	nmp->nm_numgrps = NFS_MAXGRPS;
952 	nmp->nm_readahead = NFS_DEFRAHEAD;
953 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
954 	if (nmp->nm_tprintf_delay < 0)
955 		nmp->nm_tprintf_delay = 0;
956 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
957 	if (nmp->nm_tprintf_initial_delay < 0)
958 		nmp->nm_tprintf_initial_delay = 0;
959 	nmp->nm_fhsize = argp->fhsize;
960 	if (nmp->nm_fhsize > 0)
961 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
962 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
963 	nmp->nm_nam = nam;
964 	/* Set up the sockets and per-host congestion */
965 	nmp->nm_sotype = argp->sotype;
966 	nmp->nm_soproto = argp->proto;
967 	nmp->nm_sockreq.nr_prog = NFS_PROG;
968 	if ((argp->flags & NFSMNT_NFSV4))
969 		nmp->nm_sockreq.nr_vers = NFS_VER4;
970 	else if ((argp->flags & NFSMNT_NFSV3))
971 		nmp->nm_sockreq.nr_vers = NFS_VER3;
972 	else
973 		nmp->nm_sockreq.nr_vers = NFS_VER2;
974 
975 	nfs_decode_args(mp, nmp, argp, cred, td);
976 
977 	/*
978 	 * For Connection based sockets (TCP,...) do the connect here,
979 	 * but make it interruptible, even for non-interuptible mounts.
980 	 */
981 	if ((nmp->nm_flag & NFSMNT_INT) == 0) {
982 		nmp->nm_flag |= NFSMNT_INT;
983 		clearintr = 1;
984 	} else {
985 		clearintr = 0;
986 	}
987 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
988 		goto bad;
989 	if (clearintr)
990 		nmp->nm_flag &= ~NFSMNT_INT;
991 
992 	/*
993 	 * A reference count is needed on the nfsnode representing the
994 	 * remote root.  If this object is not persistent, then backward
995 	 * traversals of the mount point (i.e. "..") will not work if
996 	 * the nfsnode gets flushed out of the cache. Ufs does not have
997 	 * this problem, because one can identify root inodes by their
998 	 * number == ROOTINO (2).
999 	 */
1000 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1001 	    nmp->nm_dirpathlen > 0) {
1002 		/*
1003 		 * If the fhsize on the mount point == 0 for V4, the mount
1004 		 * path needs to be looked up.
1005 		 */
1006 		trycnt = 3;
1007 		do {
1008 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1009 			    cred, td);
1010 			if (error)
1011 				(void) nfs_catnap(PZERO, "nfsgetdirp");
1012 		} while (error && --trycnt > 0);
1013 		if (error) {
1014 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1015 			goto bad;
1016 		}
1017 	}
1018 	if (nmp->nm_fhsize > 0) {
1019 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1020 		if (error)
1021 			goto bad;
1022 		*vpp = NFSTOV(np);
1023 
1024 		/*
1025 		 * Get file attributes and transfer parameters for the
1026 		 * mountpoint.  This has the side effect of filling in
1027 		 * (*vpp)->v_type with the correct value.
1028 		 */
1029 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1030 		    cred, td, &nfsva, NULL);
1031 		if (ret) {
1032 			/*
1033 			 * Just set default values to get things going.
1034 			 */
1035 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1036 			nfsva.na_vattr.va_type = VDIR;
1037 			nfsva.na_vattr.va_mode = 0777;
1038 			nfsva.na_vattr.va_nlink = 100;
1039 			nfsva.na_vattr.va_uid = (uid_t)0;
1040 			nfsva.na_vattr.va_gid = (gid_t)0;
1041 			nfsva.na_vattr.va_fileid = 2;
1042 			nfsva.na_vattr.va_gen = 1;
1043 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1044 			nfsva.na_vattr.va_size = 512 * 1024;
1045 		}
1046 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1047 		if (argp->flags & NFSMNT_NFSV3)
1048 			ncl_fsinfo(nmp, *vpp, cred, td);
1049 
1050 		/*
1051 		 * Lose the lock but keep the ref.
1052 		 */
1053 		VOP_UNLOCK(*vpp, 0);
1054 		return (0);
1055 	}
1056 	error = EIO;
1057 
1058 bad:
1059 	newnfs_disconnect(&nmp->nm_sockreq);
1060 	crfree(nmp->nm_sockreq.nr_cred);
1061 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1062 	mtx_destroy(&nmp->nm_mtx);
1063 	FREE(nmp, M_NEWNFSMNT);
1064 	FREE(nam, M_SONAME);
1065 	return (error);
1066 }
1067 
1068 /*
1069  * unmount system call
1070  */
1071 static int
1072 nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
1073 {
1074 	struct nfsmount *nmp;
1075 	int error, flags = 0, trycnt = 0;
1076 
1077 	if (mntflags & MNT_FORCE)
1078 		flags |= FORCECLOSE;
1079 	nmp = VFSTONFS(mp);
1080 	/*
1081 	 * Goes something like this..
1082 	 * - Call vflush() to clear out vnodes for this filesystem
1083 	 * - Close the socket
1084 	 * - Free up the data structures
1085 	 */
1086 	/* In the forced case, cancel any outstanding requests. */
1087 	if (mntflags & MNT_FORCE) {
1088 		error = newnfs_nmcancelreqs(nmp);
1089 		if (error)
1090 			goto out;
1091 		/* For a forced close, get rid of the renew thread now */
1092 		nfscl_umount(nmp, td);
1093 	}
1094 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1095 	do {
1096 		error = vflush(mp, 1, flags, td);
1097 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1098 			(void) nfs_catnap(PSOCK, "newndm");
1099 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1100 	if (error)
1101 		goto out;
1102 
1103 	/*
1104 	 * We are now committed to the unmount.
1105 	 */
1106 	if ((mntflags & MNT_FORCE) == 0)
1107 		nfscl_umount(nmp, td);
1108 	newnfs_disconnect(&nmp->nm_sockreq);
1109 	crfree(nmp->nm_sockreq.nr_cred);
1110 	FREE(nmp->nm_nam, M_SONAME);
1111 
1112 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1113 	mtx_destroy(&nmp->nm_mtx);
1114 	FREE(nmp, M_NEWNFSMNT);
1115 out:
1116 	return (error);
1117 }
1118 
1119 /*
1120  * Return root of a filesystem
1121  */
1122 static int
1123 nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
1124 {
1125 	struct vnode *vp;
1126 	struct nfsmount *nmp;
1127 	struct nfsnode *np;
1128 	int error;
1129 
1130 	nmp = VFSTONFS(mp);
1131 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1132 	if (error)
1133 		return error;
1134 	vp = NFSTOV(np);
1135 	/*
1136 	 * Get transfer parameters and attributes for root vnode once.
1137 	 */
1138 	mtx_lock(&nmp->nm_mtx);
1139 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1140 		mtx_unlock(&nmp->nm_mtx);
1141 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1142 	} else
1143 		mtx_unlock(&nmp->nm_mtx);
1144 	if (vp->v_type == VNON)
1145 	    vp->v_type = VDIR;
1146 	vp->v_vflag |= VV_ROOT;
1147 	*vpp = vp;
1148 	return (0);
1149 }
1150 
1151 /*
1152  * Flush out the buffer cache
1153  */
1154 /* ARGSUSED */
1155 static int
1156 nfs_sync(struct mount *mp, int waitfor, struct thread *td)
1157 {
1158 	struct vnode *vp, *mvp;
1159 	int error, allerror = 0;
1160 
1161 	/*
1162 	 * Force stale buffer cache information to be flushed.
1163 	 */
1164 	MNT_ILOCK(mp);
1165 loop:
1166 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1167 		VI_LOCK(vp);
1168 		MNT_IUNLOCK(mp);
1169 		/* XXX Racy bv_cnt check. */
1170 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1171 		    waitfor == MNT_LAZY) {
1172 			VI_UNLOCK(vp);
1173 			MNT_ILOCK(mp);
1174 			continue;
1175 		}
1176 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1177 			MNT_ILOCK(mp);
1178 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1179 			goto loop;
1180 		}
1181 		error = VOP_FSYNC(vp, waitfor, td);
1182 		if (error)
1183 			allerror = error;
1184 		VOP_UNLOCK(vp, 0);
1185 		vrele(vp);
1186 
1187 		MNT_ILOCK(mp);
1188 	}
1189 	MNT_IUNLOCK(mp);
1190 	return (allerror);
1191 }
1192 
1193 static int
1194 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1195 {
1196 	struct nfsmount *nmp = VFSTONFS(mp);
1197 	struct vfsquery vq;
1198 	int error;
1199 
1200 	bzero(&vq, sizeof(vq));
1201 	switch (op) {
1202 #if 0
1203 	case VFS_CTL_NOLOCKS:
1204 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1205  		if (req->oldptr != NULL) {
1206  			error = SYSCTL_OUT(req, &val, sizeof(val));
1207  			if (error)
1208  				return (error);
1209  		}
1210  		if (req->newptr != NULL) {
1211  			error = SYSCTL_IN(req, &val, sizeof(val));
1212  			if (error)
1213  				return (error);
1214 			if (val)
1215 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1216 			else
1217 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1218  		}
1219 		break;
1220 #endif
1221 	case VFS_CTL_QUERY:
1222 		mtx_lock(&nmp->nm_mtx);
1223 		if (nmp->nm_state & NFSSTA_TIMEO)
1224 			vq.vq_flags |= VQ_NOTRESP;
1225 		mtx_unlock(&nmp->nm_mtx);
1226 #if 0
1227 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1228 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1229 			vq.vq_flags |= VQ_NOTRESPLOCK;
1230 #endif
1231 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1232 		break;
1233  	case VFS_CTL_TIMEO:
1234  		if (req->oldptr != NULL) {
1235  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1236  			    sizeof(nmp->nm_tprintf_initial_delay));
1237  			if (error)
1238  				return (error);
1239  		}
1240  		if (req->newptr != NULL) {
1241 			error = vfs_suser(mp, req->td);
1242 			if (error)
1243 				return (error);
1244  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1245  			    sizeof(nmp->nm_tprintf_initial_delay));
1246  			if (error)
1247  				return (error);
1248  			if (nmp->nm_tprintf_initial_delay < 0)
1249  				nmp->nm_tprintf_initial_delay = 0;
1250  		}
1251 		break;
1252 	default:
1253 		return (ENOTSUP);
1254 	}
1255 	return (0);
1256 }
1257 
1258