xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision f02f7422801bb39f5eaab8fc383fa7b70c467ff9)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats	newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
87 NFSCLSTATEMUTEX;
88 
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91 
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95     &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103 
104 static int	nfs_mountroot(struct mount *);
105 static void	nfs_sec_name(char *, int *);
106 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107 		    struct nfs_args *argp, const char *, struct ucred *,
108 		    struct thread *);
109 static int	mountnfs(struct nfs_args *, struct mount *,
110 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111 		    u_char *, int, struct vnode **, struct ucred *,
112 		    struct thread *, int, int, int);
113 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114 		    struct sockaddr_storage *, int *, off_t *,
115 		    struct timeval *);
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123 static vfs_purge_t nfs_purge;
124 
125 /*
126  * nfs vfs operations.
127  */
128 static struct vfsops nfs_vfsops = {
129 	.vfs_init =		ncl_init,
130 	.vfs_mount =		nfs_mount,
131 	.vfs_cmount =		nfs_cmount,
132 	.vfs_root =		nfs_root,
133 	.vfs_statfs =		nfs_statfs,
134 	.vfs_sync =		nfs_sync,
135 	.vfs_uninit =		ncl_uninit,
136 	.vfs_unmount =		nfs_unmount,
137 	.vfs_sysctl =		nfs_sysctl,
138 	.vfs_purge =		nfs_purge,
139 };
140 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141 
142 /* So that loader and kldload(2) can find us, wherever we are.. */
143 MODULE_VERSION(nfs, 1);
144 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148 
149 /*
150  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151  * can be shared by both NFS clients. It is declared here so that it
152  * will be defined for kernels built without NFS_ROOT, although it
153  * isn't used in that case.
154  */
155 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156 struct nfs_diskless	nfs_diskless = { { { 0 } } };
157 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158 int			nfs_diskless_valid = 0;
159 #endif
160 
161 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162     &nfs_diskless_valid, 0,
163     "Has the diskless struct been filled correctly");
164 
165 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167 
168 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170     "%Ssockaddr_in", "Diskless root nfs address");
171 
172 
173 void		newnfsargs_ntoh(struct nfs_args *);
174 static int	nfs_mountdiskless(char *,
175 		    struct sockaddr_in *, struct nfs_args *,
176 		    struct thread *, struct vnode **, struct mount *);
177 static void	nfs_convert_diskless(void);
178 static void	nfs_convert_oargs(struct nfs_args *args,
179 		    struct onfs_args *oargs);
180 
181 int
182 newnfs_iosize(struct nfsmount *nmp)
183 {
184 	int iosize, maxio;
185 
186 	/* First, set the upper limit for iosize */
187 	if (nmp->nm_flag & NFSMNT_NFSV4) {
188 		maxio = NFS_MAXBSIZE;
189 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190 		if (nmp->nm_sotype == SOCK_DGRAM)
191 			maxio = NFS_MAXDGRAMDATA;
192 		else
193 			maxio = NFS_MAXBSIZE;
194 	} else {
195 		maxio = NFS_V2MAXDATA;
196 	}
197 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198 		nmp->nm_rsize = maxio;
199 	if (nmp->nm_rsize > MAXBSIZE)
200 		nmp->nm_rsize = MAXBSIZE;
201 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202 		nmp->nm_readdirsize = maxio;
203 	if (nmp->nm_readdirsize > nmp->nm_rsize)
204 		nmp->nm_readdirsize = nmp->nm_rsize;
205 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206 		nmp->nm_wsize = maxio;
207 	if (nmp->nm_wsize > MAXBSIZE)
208 		nmp->nm_wsize = MAXBSIZE;
209 
210 	/*
211 	 * Calculate the size used for io buffers.  Use the larger
212 	 * of the two sizes to minimise nfs requests but make sure
213 	 * that it is at least one VM page to avoid wasting buffer
214 	 * space.
215 	 */
216 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217 	iosize = imax(iosize, PAGE_SIZE);
218 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
219 	return (iosize);
220 }
221 
222 static void
223 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
224 {
225 
226 	args->version = NFS_ARGSVERSION;
227 	args->addr = oargs->addr;
228 	args->addrlen = oargs->addrlen;
229 	args->sotype = oargs->sotype;
230 	args->proto = oargs->proto;
231 	args->fh = oargs->fh;
232 	args->fhsize = oargs->fhsize;
233 	args->flags = oargs->flags;
234 	args->wsize = oargs->wsize;
235 	args->rsize = oargs->rsize;
236 	args->readdirsize = oargs->readdirsize;
237 	args->timeo = oargs->timeo;
238 	args->retrans = oargs->retrans;
239 	args->readahead = oargs->readahead;
240 	args->hostname = oargs->hostname;
241 }
242 
243 static void
244 nfs_convert_diskless(void)
245 {
246 
247 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248 		sizeof(struct ifaliasreq));
249 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250 		sizeof(struct sockaddr_in));
251 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
254 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
255 	} else {
256 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
257 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
258 	}
259 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260 		sizeof(struct sockaddr_in));
261 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262 	nfsv3_diskless.root_time = nfs_diskless.root_time;
263 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
264 		MAXHOSTNAMELEN);
265 	nfs_diskless_valid = 3;
266 }
267 
268 /*
269  * nfs statfs call
270  */
271 static int
272 nfs_statfs(struct mount *mp, struct statfs *sbp)
273 {
274 	struct vnode *vp;
275 	struct thread *td;
276 	struct nfsmount *nmp = VFSTONFS(mp);
277 	struct nfsvattr nfsva;
278 	struct nfsfsinfo fs;
279 	struct nfsstatfs sb;
280 	int error = 0, attrflag, gotfsinfo = 0, ret;
281 	struct nfsnode *np;
282 
283 	td = curthread;
284 
285 	error = vfs_busy(mp, MBF_NOWAIT);
286 	if (error)
287 		return (error);
288 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289 	if (error) {
290 		vfs_unbusy(mp);
291 		return (error);
292 	}
293 	vp = NFSTOV(np);
294 	mtx_lock(&nmp->nm_mtx);
295 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296 		mtx_unlock(&nmp->nm_mtx);
297 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
298 		    &attrflag, NULL);
299 		if (!error)
300 			gotfsinfo = 1;
301 	} else
302 		mtx_unlock(&nmp->nm_mtx);
303 	if (!error)
304 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
305 		    &attrflag, NULL);
306 	if (error != 0)
307 		NFSCL_DEBUG(2, "statfs=%d\n", error);
308 	if (attrflag == 0) {
309 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310 		    td->td_ucred, td, &nfsva, NULL, NULL);
311 		if (ret) {
312 			/*
313 			 * Just set default values to get things going.
314 			 */
315 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316 			nfsva.na_vattr.va_type = VDIR;
317 			nfsva.na_vattr.va_mode = 0777;
318 			nfsva.na_vattr.va_nlink = 100;
319 			nfsva.na_vattr.va_uid = (uid_t)0;
320 			nfsva.na_vattr.va_gid = (gid_t)0;
321 			nfsva.na_vattr.va_fileid = 2;
322 			nfsva.na_vattr.va_gen = 1;
323 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324 			nfsva.na_vattr.va_size = 512 * 1024;
325 		}
326 	}
327 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
328 	if (!error) {
329 	    mtx_lock(&nmp->nm_mtx);
330 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331 		nfscl_loadfsinfo(nmp, &fs);
332 	    nfscl_loadsbinfo(nmp, &sb, sbp);
333 	    sbp->f_iosize = newnfs_iosize(nmp);
334 	    mtx_unlock(&nmp->nm_mtx);
335 	    if (sbp != &mp->mnt_stat) {
336 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
338 	    }
339 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340 	} else if (NFS_ISV4(vp)) {
341 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
342 	}
343 	vput(vp);
344 	vfs_unbusy(mp);
345 	return (error);
346 }
347 
348 /*
349  * nfs version 3 fsinfo rpc call
350  */
351 int
352 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
353     struct thread *td)
354 {
355 	struct nfsfsinfo fs;
356 	struct nfsvattr nfsva;
357 	int error, attrflag;
358 
359 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
360 	if (!error) {
361 		if (attrflag)
362 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
363 			    1);
364 		mtx_lock(&nmp->nm_mtx);
365 		nfscl_loadfsinfo(nmp, &fs);
366 		mtx_unlock(&nmp->nm_mtx);
367 	}
368 	return (error);
369 }
370 
371 /*
372  * Mount a remote root fs via. nfs. This depends on the info in the
373  * nfs_diskless structure that has been filled in properly by some primary
374  * bootstrap.
375  * It goes something like this:
376  * - do enough of "ifconfig" by calling ifioctl() so that the system
377  *   can talk to the server
378  * - If nfs_diskless.mygateway is filled in, use that address as
379  *   a default gateway.
380  * - build the rootfs mount point and call mountnfs() to do the rest.
381  *
382  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383  * structure, as well as other global NFS client variables here, as
384  * nfs_mountroot() will be called once in the boot before any other NFS
385  * client activity occurs.
386  */
387 static int
388 nfs_mountroot(struct mount *mp)
389 {
390 	struct thread *td = curthread;
391 	struct nfsv3_diskless *nd = &nfsv3_diskless;
392 	struct socket *so;
393 	struct vnode *vp;
394 	struct ifreq ir;
395 	int error;
396 	u_long l;
397 	char buf[128];
398 	char *cp;
399 
400 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
401 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
402 #elif defined(NFS_ROOT)
403 	nfs_setup_diskless();
404 #endif
405 
406 	if (nfs_diskless_valid == 0)
407 		return (-1);
408 	if (nfs_diskless_valid == 1)
409 		nfs_convert_diskless();
410 
411 	/*
412 	 * XXX splnet, so networks will receive...
413 	 */
414 	splnet();
415 
416 	/*
417 	 * Do enough of ifconfig(8) so that the critical net interface can
418 	 * talk to the server.
419 	 */
420 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
421 	    td->td_ucred, td);
422 	if (error)
423 		panic("nfs_mountroot: socreate(%04x): %d",
424 			nd->myif.ifra_addr.sa_family, error);
425 
426 #if 0 /* XXX Bad idea */
427 	/*
428 	 * We might not have been told the right interface, so we pass
429 	 * over the first ten interfaces of the same kind, until we get
430 	 * one of them configured.
431 	 */
432 
433 	for (i = strlen(nd->myif.ifra_name) - 1;
434 		nd->myif.ifra_name[i] >= '0' &&
435 		nd->myif.ifra_name[i] <= '9';
436 		nd->myif.ifra_name[i] ++) {
437 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438 		if(!error)
439 			break;
440 	}
441 #endif
442 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443 	if (error)
444 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
446 		ir.ifr_mtu = strtol(cp, NULL, 10);
447 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
448 		freeenv(cp);
449 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
450 		if (error)
451 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
452 	}
453 	soclose(so);
454 
455 	/*
456 	 * If the gateway field is filled in, set it as the default route.
457 	 * Note that pxeboot will set a default route of 0 if the route
458 	 * is not set by the DHCP server.  Check also for a value of 0
459 	 * to avoid panicking inappropriately in that situation.
460 	 */
461 	if (nd->mygateway.sin_len != 0 &&
462 	    nd->mygateway.sin_addr.s_addr != 0) {
463 		struct sockaddr_in mask, sin;
464 
465 		bzero((caddr_t)&mask, sizeof(mask));
466 		sin = mask;
467 		sin.sin_family = AF_INET;
468 		sin.sin_len = sizeof(sin);
469                 /* XXX MRT use table 0 for this sort of thing */
470 		CURVNET_SET(TD_TO_VNET(td));
471 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472 		    (struct sockaddr *)&nd->mygateway,
473 		    (struct sockaddr *)&mask,
474 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
475 		CURVNET_RESTORE();
476 		if (error)
477 			panic("nfs_mountroot: RTM_ADD: %d", error);
478 	}
479 
480 	/*
481 	 * Create the rootfs mount point.
482 	 */
483 	nd->root_args.fh = nd->root_fh;
484 	nd->root_args.fhsize = nd->root_fhsize;
485 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
486 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487 		(l >> 24) & 0xff, (l >> 16) & 0xff,
488 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
489 	printf("NFS ROOT: %s\n", buf);
490 	nd->root_args.hostname = buf;
491 	if ((error = nfs_mountdiskless(buf,
492 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
493 		return (error);
494 	}
495 
496 	/*
497 	 * This is not really an nfs issue, but it is much easier to
498 	 * set hostname here and then let the "/etc/rc.xxx" files
499 	 * mount the right /var based upon its preset value.
500 	 */
501 	mtx_lock(&prison0.pr_mtx);
502 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
503 	    sizeof(prison0.pr_hostname));
504 	mtx_unlock(&prison0.pr_mtx);
505 	inittodr(ntohl(nd->root_time));
506 	return (0);
507 }
508 
509 /*
510  * Internal version of mount system call for diskless setup.
511  */
512 static int
513 nfs_mountdiskless(char *path,
514     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515     struct vnode **vpp, struct mount *mp)
516 {
517 	struct sockaddr *nam;
518 	int dirlen, error;
519 	char *dirpath;
520 
521 	/*
522 	 * Find the directory path in "path", which also has the server's
523 	 * name/ip address in it.
524 	 */
525 	dirpath = strchr(path, ':');
526 	if (dirpath != NULL)
527 		dirlen = strlen(++dirpath);
528 	else
529 		dirlen = 0;
530 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
535 		return (error);
536 	}
537 	return (0);
538 }
539 
540 static void
541 nfs_sec_name(char *sec, int *flagsp)
542 {
543 	if (!strcmp(sec, "krb5"))
544 		*flagsp |= NFSMNT_KERB;
545 	else if (!strcmp(sec, "krb5i"))
546 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547 	else if (!strcmp(sec, "krb5p"))
548 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
549 }
550 
551 static void
552 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553     const char *hostname, struct ucred *cred, struct thread *td)
554 {
555 	int s;
556 	int adjsock;
557 	char *p;
558 
559 	s = splnet();
560 
561 	/*
562 	 * Set read-only flag if requested; otherwise, clear it if this is
563 	 * an update.  If this is not an update, then either the read-only
564 	 * flag is already clear, or this is a root mount and it was set
565 	 * intentionally at some previous point.
566 	 */
567 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568 		MNT_ILOCK(mp);
569 		mp->mnt_flag |= MNT_RDONLY;
570 		MNT_IUNLOCK(mp);
571 	} else if (mp->mnt_flag & MNT_UPDATE) {
572 		MNT_ILOCK(mp);
573 		mp->mnt_flag &= ~MNT_RDONLY;
574 		MNT_IUNLOCK(mp);
575 	}
576 
577 	/*
578 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579 	 * no sense in that context.  Also, set up appropriate retransmit
580 	 * and soft timeout behavior.
581 	 */
582 	if (argp->sotype == SOCK_STREAM) {
583 		nmp->nm_flag &= ~NFSMNT_NOCONN;
584 		nmp->nm_timeo = NFS_MAXTIMEO;
585 		if ((argp->flags & NFSMNT_NFSV4) != 0)
586 			nmp->nm_retry = INT_MAX;
587 		else
588 			nmp->nm_retry = NFS_RETRANS_TCP;
589 	}
590 
591 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593 		argp->flags &= ~NFSMNT_RDIRPLUS;
594 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595 	}
596 
597 	/* Re-bind if rsrvd port requested and wasn't on one */
598 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599 		  && (argp->flags & NFSMNT_RESVPORT);
600 	/* Also re-bind if we're switching to/from a connected UDP socket */
601 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602 		    (argp->flags & NFSMNT_NOCONN));
603 
604 	/* Update flags atomically.  Don't change the lock bits. */
605 	nmp->nm_flag = argp->flags | nmp->nm_flag;
606 	splx(s);
607 
608 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610 		if (nmp->nm_timeo < NFS_MINTIMEO)
611 			nmp->nm_timeo = NFS_MINTIMEO;
612 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
613 			nmp->nm_timeo = NFS_MAXTIMEO;
614 	}
615 
616 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617 		nmp->nm_retry = argp->retrans;
618 		if (nmp->nm_retry > NFS_MAXREXMIT)
619 			nmp->nm_retry = NFS_MAXREXMIT;
620 	}
621 
622 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623 		nmp->nm_wsize = argp->wsize;
624 		/* Round down to multiple of blocksize */
625 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
626 		if (nmp->nm_wsize <= 0)
627 			nmp->nm_wsize = NFS_FABLKSIZE;
628 	}
629 
630 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
631 		nmp->nm_rsize = argp->rsize;
632 		/* Round down to multiple of blocksize */
633 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
634 		if (nmp->nm_rsize <= 0)
635 			nmp->nm_rsize = NFS_FABLKSIZE;
636 	}
637 
638 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
639 		nmp->nm_readdirsize = argp->readdirsize;
640 	}
641 
642 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
643 		nmp->nm_acregmin = argp->acregmin;
644 	else
645 		nmp->nm_acregmin = NFS_MINATTRTIMO;
646 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
647 		nmp->nm_acregmax = argp->acregmax;
648 	else
649 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
650 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
651 		nmp->nm_acdirmin = argp->acdirmin;
652 	else
653 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
654 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
655 		nmp->nm_acdirmax = argp->acdirmax;
656 	else
657 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
658 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
659 		nmp->nm_acdirmin = nmp->nm_acdirmax;
660 	if (nmp->nm_acregmin > nmp->nm_acregmax)
661 		nmp->nm_acregmin = nmp->nm_acregmax;
662 
663 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
664 		if (argp->readahead <= NFS_MAXRAHEAD)
665 			nmp->nm_readahead = argp->readahead;
666 		else
667 			nmp->nm_readahead = NFS_MAXRAHEAD;
668 	}
669 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
670 		if (argp->wcommitsize < nmp->nm_wsize)
671 			nmp->nm_wcommitsize = nmp->nm_wsize;
672 		else
673 			nmp->nm_wcommitsize = argp->wcommitsize;
674 	}
675 
676 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
677 		    (nmp->nm_soproto != argp->proto));
678 
679 	if (nmp->nm_client != NULL && adjsock) {
680 		int haslock = 0, error = 0;
681 
682 		if (nmp->nm_sotype == SOCK_STREAM) {
683 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
684 			if (!error)
685 				haslock = 1;
686 		}
687 		if (!error) {
688 		    newnfs_disconnect(&nmp->nm_sockreq);
689 		    if (haslock)
690 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
691 		    nmp->nm_sotype = argp->sotype;
692 		    nmp->nm_soproto = argp->proto;
693 		    if (nmp->nm_sotype == SOCK_DGRAM)
694 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
695 			    cred, td, 0)) {
696 				printf("newnfs_args: retrying connect\n");
697 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
698 			}
699 		}
700 	} else {
701 		nmp->nm_sotype = argp->sotype;
702 		nmp->nm_soproto = argp->proto;
703 	}
704 
705 	if (hostname != NULL) {
706 		strlcpy(nmp->nm_hostname, hostname,
707 		    sizeof(nmp->nm_hostname));
708 		p = strchr(nmp->nm_hostname, ':');
709 		if (p != NULL)
710 			*p = '\0';
711 	}
712 }
713 
714 static const char *nfs_opts[] = { "from", "nfs_args",
715     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
716     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
717     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
718     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
719     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
720     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
721     "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion",
722     "nametimeo", "negnametimeo", "nocto", "noncontigwr", "pnfs",
723     "wcommitsize",
724     NULL };
725 
726 /*
727  * VFS Operations.
728  *
729  * mount system call
730  * It seems a bit dumb to copyinstr() the host and path here and then
731  * bcopy() them in mountnfs(), but I wanted to detect errors before
732  * doing the sockargs() call because sockargs() allocates an mbuf and
733  * an error after that means that I have to release the mbuf.
734  */
735 /* ARGSUSED */
736 static int
737 nfs_mount(struct mount *mp)
738 {
739 	struct nfs_args args = {
740 	    .version = NFS_ARGSVERSION,
741 	    .addr = NULL,
742 	    .addrlen = sizeof (struct sockaddr_in),
743 	    .sotype = SOCK_STREAM,
744 	    .proto = 0,
745 	    .fh = NULL,
746 	    .fhsize = 0,
747 	    .flags = NFSMNT_RESVPORT,
748 	    .wsize = NFS_WSIZE,
749 	    .rsize = NFS_RSIZE,
750 	    .readdirsize = NFS_READDIRSIZE,
751 	    .timeo = 10,
752 	    .retrans = NFS_RETRANS,
753 	    .readahead = NFS_DEFRAHEAD,
754 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
755 	    .hostname = NULL,
756 	    .acregmin = NFS_MINATTRTIMO,
757 	    .acregmax = NFS_MAXATTRTIMO,
758 	    .acdirmin = NFS_MINDIRATTRTIMO,
759 	    .acdirmax = NFS_MAXDIRATTRTIMO,
760 	};
761 	int error = 0, ret, len;
762 	struct sockaddr *nam = NULL;
763 	struct vnode *vp;
764 	struct thread *td;
765 	char hst[MNAMELEN];
766 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
767 	char *opt, *name, *secname;
768 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
769 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
770 	int minvers = 0;
771 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
772 	size_t hstlen;
773 
774 	has_nfs_args_opt = 0;
775 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
776 		error = EINVAL;
777 		goto out;
778 	}
779 
780 	td = curthread;
781 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
782 		error = nfs_mountroot(mp);
783 		goto out;
784 	}
785 
786 	nfscl_init();
787 
788 	/*
789 	 * The old mount_nfs program passed the struct nfs_args
790 	 * from userspace to kernel.  The new mount_nfs program
791 	 * passes string options via nmount() from userspace to kernel
792 	 * and we populate the struct nfs_args in the kernel.
793 	 */
794 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
795 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
796 		    sizeof(args));
797 		if (error != 0)
798 			goto out;
799 
800 		if (args.version != NFS_ARGSVERSION) {
801 			error = EPROGMISMATCH;
802 			goto out;
803 		}
804 		has_nfs_args_opt = 1;
805 	}
806 
807 	/* Handle the new style options. */
808 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
809 		args.flags |= NFSMNT_NOCONN;
810 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
811 		args.flags |= NFSMNT_NOCONN;
812 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
813 		args.flags |= NFSMNT_NOLOCKD;
814 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
815 		args.flags &= ~NFSMNT_NOLOCKD;
816 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
817 		args.flags |= NFSMNT_INT;
818 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
819 		args.flags |= NFSMNT_RDIRPLUS;
820 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
821 		args.flags |= NFSMNT_RESVPORT;
822 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
823 		args.flags &= ~NFSMNT_RESVPORT;
824 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
825 		args.flags |= NFSMNT_SOFT;
826 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
827 		args.flags &= ~NFSMNT_SOFT;
828 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
829 		args.sotype = SOCK_DGRAM;
830 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
831 		args.sotype = SOCK_DGRAM;
832 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
833 		args.sotype = SOCK_STREAM;
834 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
835 		args.flags |= NFSMNT_NFSV3;
836 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
837 		args.flags |= NFSMNT_NFSV4;
838 		args.sotype = SOCK_STREAM;
839 	}
840 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
841 		args.flags |= NFSMNT_ALLGSSNAME;
842 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
843 		args.flags |= NFSMNT_NOCTO;
844 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
845 		args.flags |= NFSMNT_NONCONTIGWR;
846 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
847 		args.flags |= NFSMNT_PNFS;
848 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
849 		if (opt == NULL) {
850 			vfs_mount_error(mp, "illegal readdirsize");
851 			error = EINVAL;
852 			goto out;
853 		}
854 		ret = sscanf(opt, "%d", &args.readdirsize);
855 		if (ret != 1 || args.readdirsize <= 0) {
856 			vfs_mount_error(mp, "illegal readdirsize: %s",
857 			    opt);
858 			error = EINVAL;
859 			goto out;
860 		}
861 		args.flags |= NFSMNT_READDIRSIZE;
862 	}
863 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
864 		if (opt == NULL) {
865 			vfs_mount_error(mp, "illegal readahead");
866 			error = EINVAL;
867 			goto out;
868 		}
869 		ret = sscanf(opt, "%d", &args.readahead);
870 		if (ret != 1 || args.readahead <= 0) {
871 			vfs_mount_error(mp, "illegal readahead: %s",
872 			    opt);
873 			error = EINVAL;
874 			goto out;
875 		}
876 		args.flags |= NFSMNT_READAHEAD;
877 	}
878 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
879 		if (opt == NULL) {
880 			vfs_mount_error(mp, "illegal wsize");
881 			error = EINVAL;
882 			goto out;
883 		}
884 		ret = sscanf(opt, "%d", &args.wsize);
885 		if (ret != 1 || args.wsize <= 0) {
886 			vfs_mount_error(mp, "illegal wsize: %s",
887 			    opt);
888 			error = EINVAL;
889 			goto out;
890 		}
891 		args.flags |= NFSMNT_WSIZE;
892 	}
893 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
894 		if (opt == NULL) {
895 			vfs_mount_error(mp, "illegal rsize");
896 			error = EINVAL;
897 			goto out;
898 		}
899 		ret = sscanf(opt, "%d", &args.rsize);
900 		if (ret != 1 || args.rsize <= 0) {
901 			vfs_mount_error(mp, "illegal wsize: %s",
902 			    opt);
903 			error = EINVAL;
904 			goto out;
905 		}
906 		args.flags |= NFSMNT_RSIZE;
907 	}
908 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
909 		if (opt == NULL) {
910 			vfs_mount_error(mp, "illegal retrans");
911 			error = EINVAL;
912 			goto out;
913 		}
914 		ret = sscanf(opt, "%d", &args.retrans);
915 		if (ret != 1 || args.retrans <= 0) {
916 			vfs_mount_error(mp, "illegal retrans: %s",
917 			    opt);
918 			error = EINVAL;
919 			goto out;
920 		}
921 		args.flags |= NFSMNT_RETRANS;
922 	}
923 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
924 		ret = sscanf(opt, "%d", &args.acregmin);
925 		if (ret != 1 || args.acregmin < 0) {
926 			vfs_mount_error(mp, "illegal acregmin: %s",
927 			    opt);
928 			error = EINVAL;
929 			goto out;
930 		}
931 		args.flags |= NFSMNT_ACREGMIN;
932 	}
933 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
934 		ret = sscanf(opt, "%d", &args.acregmax);
935 		if (ret != 1 || args.acregmax < 0) {
936 			vfs_mount_error(mp, "illegal acregmax: %s",
937 			    opt);
938 			error = EINVAL;
939 			goto out;
940 		}
941 		args.flags |= NFSMNT_ACREGMAX;
942 	}
943 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
944 		ret = sscanf(opt, "%d", &args.acdirmin);
945 		if (ret != 1 || args.acdirmin < 0) {
946 			vfs_mount_error(mp, "illegal acdirmin: %s",
947 			    opt);
948 			error = EINVAL;
949 			goto out;
950 		}
951 		args.flags |= NFSMNT_ACDIRMIN;
952 	}
953 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
954 		ret = sscanf(opt, "%d", &args.acdirmax);
955 		if (ret != 1 || args.acdirmax < 0) {
956 			vfs_mount_error(mp, "illegal acdirmax: %s",
957 			    opt);
958 			error = EINVAL;
959 			goto out;
960 		}
961 		args.flags |= NFSMNT_ACDIRMAX;
962 	}
963 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
964 		ret = sscanf(opt, "%d", &args.wcommitsize);
965 		if (ret != 1 || args.wcommitsize < 0) {
966 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
967 			error = EINVAL;
968 			goto out;
969 		}
970 		args.flags |= NFSMNT_WCOMMITSIZE;
971 	}
972 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
973 		ret = sscanf(opt, "%d", &args.timeo);
974 		if (ret != 1 || args.timeo <= 0) {
975 			vfs_mount_error(mp, "illegal timeout: %s",
976 			    opt);
977 			error = EINVAL;
978 			goto out;
979 		}
980 		args.flags |= NFSMNT_TIMEO;
981 	}
982 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
983 		ret = sscanf(opt, "%d", &nametimeo);
984 		if (ret != 1 || nametimeo < 0) {
985 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
986 			error = EINVAL;
987 			goto out;
988 		}
989 	}
990 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
991 	    == 0) {
992 		ret = sscanf(opt, "%d", &negnametimeo);
993 		if (ret != 1 || negnametimeo < 0) {
994 			vfs_mount_error(mp, "illegal negnametimeo: %s",
995 			    opt);
996 			error = EINVAL;
997 			goto out;
998 		}
999 	}
1000 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1001 	    0) {
1002 		ret = sscanf(opt, "%d", &minvers);
1003 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1004 		    (args.flags & NFSMNT_NFSV4) == 0) {
1005 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1006 			error = EINVAL;
1007 			goto out;
1008 		}
1009 	}
1010 	if (vfs_getopt(mp->mnt_optnew, "sec",
1011 		(void **) &secname, NULL) == 0)
1012 		nfs_sec_name(secname, &args.flags);
1013 
1014 	if (mp->mnt_flag & MNT_UPDATE) {
1015 		struct nfsmount *nmp = VFSTONFS(mp);
1016 
1017 		if (nmp == NULL) {
1018 			error = EIO;
1019 			goto out;
1020 		}
1021 
1022 		/*
1023 		 * If a change from TCP->UDP is done and there are thread(s)
1024 		 * that have I/O RPC(s) in progress with a tranfer size
1025 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1026 		 * hung, retrying the RPC(s) forever. Usually these threads
1027 		 * will be seen doing an uninterruptible sleep on wait channel
1028 		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1029 		 */
1030 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1031 			tprintf(td->td_proc, LOG_WARNING,
1032 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1033 
1034 		/*
1035 		 * When doing an update, we can't change version,
1036 		 * security, switch lockd strategies or change cookie
1037 		 * translation
1038 		 */
1039 		args.flags = (args.flags &
1040 		    ~(NFSMNT_NFSV3 |
1041 		      NFSMNT_NFSV4 |
1042 		      NFSMNT_KERB |
1043 		      NFSMNT_INTEGRITY |
1044 		      NFSMNT_PRIVACY |
1045 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1046 		    (nmp->nm_flag &
1047 			(NFSMNT_NFSV3 |
1048 			 NFSMNT_NFSV4 |
1049 			 NFSMNT_KERB |
1050 			 NFSMNT_INTEGRITY |
1051 			 NFSMNT_PRIVACY |
1052 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1053 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1054 		goto out;
1055 	}
1056 
1057 	/*
1058 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1059 	 * or no-connection mode for those protocols that support
1060 	 * no-connection mode (the flag will be cleared later for protocols
1061 	 * that do not support no-connection mode).  This will allow a client
1062 	 * to receive replies from a different IP then the request was
1063 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1064 	 * not 0.
1065 	 */
1066 	if (nfs_ip_paranoia == 0)
1067 		args.flags |= NFSMNT_NOCONN;
1068 
1069 	if (has_nfs_args_opt != 0) {
1070 		/*
1071 		 * In the 'nfs_args' case, the pointers in the args
1072 		 * structure are in userland - we copy them in here.
1073 		 */
1074 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1075 			vfs_mount_error(mp, "Bad file handle");
1076 			error = EINVAL;
1077 			goto out;
1078 		}
1079 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1080 		    args.fhsize);
1081 		if (error != 0)
1082 			goto out;
1083 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1084 		if (error != 0)
1085 			goto out;
1086 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1087 		args.hostname = hst;
1088 		/* sockargs() call must be after above copyin() calls */
1089 		error = getsockaddr(&nam, (caddr_t)args.addr,
1090 		    args.addrlen);
1091 		if (error != 0)
1092 			goto out;
1093 	} else {
1094 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1095 		    &args.fhsize) == 0) {
1096 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1097 				vfs_mount_error(mp, "Bad file handle");
1098 				error = EINVAL;
1099 				goto out;
1100 			}
1101 			bcopy(args.fh, nfh, args.fhsize);
1102 		} else {
1103 			args.fhsize = 0;
1104 		}
1105 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1106 		    (void **)&args.hostname, &len);
1107 		if (args.hostname == NULL) {
1108 			vfs_mount_error(mp, "Invalid hostname");
1109 			error = EINVAL;
1110 			goto out;
1111 		}
1112 		bcopy(args.hostname, hst, MNAMELEN);
1113 		hst[MNAMELEN - 1] = '\0';
1114 	}
1115 
1116 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1117 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1118 	else
1119 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1120 	srvkrbnamelen = strlen(srvkrbname);
1121 
1122 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1123 		strlcpy(krbname, name, sizeof (krbname));
1124 	else
1125 		krbname[0] = '\0';
1126 	krbnamelen = strlen(krbname);
1127 
1128 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1129 		strlcpy(dirpath, name, sizeof (dirpath));
1130 	else
1131 		dirpath[0] = '\0';
1132 	dirlen = strlen(dirpath);
1133 
1134 	if (has_nfs_args_opt == 0) {
1135 		if (vfs_getopt(mp->mnt_optnew, "addr",
1136 		    (void **)&args.addr, &args.addrlen) == 0) {
1137 			if (args.addrlen > SOCK_MAXADDRLEN) {
1138 				error = ENAMETOOLONG;
1139 				goto out;
1140 			}
1141 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1142 			bcopy(args.addr, nam, args.addrlen);
1143 			nam->sa_len = args.addrlen;
1144 		} else {
1145 			vfs_mount_error(mp, "No server address");
1146 			error = EINVAL;
1147 			goto out;
1148 		}
1149 	}
1150 
1151 	args.fh = nfh;
1152 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1153 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1154 	    nametimeo, negnametimeo, minvers);
1155 out:
1156 	if (!error) {
1157 		MNT_ILOCK(mp);
1158 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF;
1159 		MNT_IUNLOCK(mp);
1160 	}
1161 	return (error);
1162 }
1163 
1164 
1165 /*
1166  * VFS Operations.
1167  *
1168  * mount system call
1169  * It seems a bit dumb to copyinstr() the host and path here and then
1170  * bcopy() them in mountnfs(), but I wanted to detect errors before
1171  * doing the sockargs() call because sockargs() allocates an mbuf and
1172  * an error after that means that I have to release the mbuf.
1173  */
1174 /* ARGSUSED */
1175 static int
1176 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1177 {
1178 	int error;
1179 	struct nfs_args args;
1180 
1181 	error = copyin(data, &args, sizeof (struct nfs_args));
1182 	if (error)
1183 		return error;
1184 
1185 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1186 
1187 	error = kernel_mount(ma, flags);
1188 	return (error);
1189 }
1190 
1191 /*
1192  * Common code for mount and mountroot
1193  */
1194 static int
1195 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1196     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1197     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1198     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1199     int minvers)
1200 {
1201 	struct nfsmount *nmp;
1202 	struct nfsnode *np;
1203 	int error, trycnt, ret;
1204 	struct nfsvattr nfsva;
1205 	struct nfsclclient *clp;
1206 	struct nfsclds *dsp, *tdsp;
1207 	uint32_t lease;
1208 	static u_int64_t clval = 0;
1209 
1210 	NFSCL_DEBUG(3, "in mnt\n");
1211 	clp = NULL;
1212 	if (mp->mnt_flag & MNT_UPDATE) {
1213 		nmp = VFSTONFS(mp);
1214 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1215 		FREE(nam, M_SONAME);
1216 		return (0);
1217 	} else {
1218 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1219 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1220 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1221 		TAILQ_INIT(&nmp->nm_bufq);
1222 		if (clval == 0)
1223 			clval = (u_int64_t)nfsboottime.tv_sec;
1224 		nmp->nm_clval = clval++;
1225 		nmp->nm_krbnamelen = krbnamelen;
1226 		nmp->nm_dirpathlen = dirlen;
1227 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1228 		if (td->td_ucred->cr_uid != (uid_t)0) {
1229 			/*
1230 			 * nm_uid is used to get KerberosV credentials for
1231 			 * the nfsv4 state handling operations if there is
1232 			 * no host based principal set. Use the uid of
1233 			 * this user if not root, since they are doing the
1234 			 * mount. I don't think setting this for root will
1235 			 * work, since root normally does not have user
1236 			 * credentials in a credentials cache.
1237 			 */
1238 			nmp->nm_uid = td->td_ucred->cr_uid;
1239 		} else {
1240 			/*
1241 			 * Just set to -1, so it won't be used.
1242 			 */
1243 			nmp->nm_uid = (uid_t)-1;
1244 		}
1245 
1246 		/* Copy and null terminate all the names */
1247 		if (nmp->nm_krbnamelen > 0) {
1248 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1249 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1250 		}
1251 		if (nmp->nm_dirpathlen > 0) {
1252 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1253 			    nmp->nm_dirpathlen);
1254 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1255 			    + 1] = '\0';
1256 		}
1257 		if (nmp->nm_srvkrbnamelen > 0) {
1258 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1259 			    nmp->nm_srvkrbnamelen);
1260 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1261 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1262 		}
1263 		nmp->nm_sockreq.nr_cred = crhold(cred);
1264 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1265 		mp->mnt_data = nmp;
1266 		nmp->nm_getinfo = nfs_getnlminfo;
1267 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1268 	}
1269 	vfs_getnewfsid(mp);
1270 	nmp->nm_mountp = mp;
1271 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1272 
1273 	/*
1274 	 * Since nfs_decode_args() might optionally set them, these
1275 	 * need to be set to defaults before the call, so that the
1276 	 * optional settings aren't overwritten.
1277 	 */
1278 	nmp->nm_nametimeo = nametimeo;
1279 	nmp->nm_negnametimeo = negnametimeo;
1280 	nmp->nm_timeo = NFS_TIMEO;
1281 	nmp->nm_retry = NFS_RETRANS;
1282 	nmp->nm_readahead = NFS_DEFRAHEAD;
1283 	if (desiredvnodes >= 11000)
1284 		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1285 	else
1286 		nmp->nm_wcommitsize = hibufspace / 10;
1287 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1288 		nmp->nm_minorvers = minvers;
1289 	else
1290 		nmp->nm_minorvers = 0;
1291 
1292 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1293 
1294 	/*
1295 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1296 	 * high, depending on whether we end up with negative offsets in
1297 	 * the client or server somewhere.  2GB-1 may be safer.
1298 	 *
1299 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1300 	 * that we can handle until we find out otherwise.
1301 	 */
1302 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1303 		nmp->nm_maxfilesize = 0xffffffffLL;
1304 	else
1305 		nmp->nm_maxfilesize = OFF_MAX;
1306 
1307 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1308 		nmp->nm_wsize = NFS_WSIZE;
1309 		nmp->nm_rsize = NFS_RSIZE;
1310 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1311 	}
1312 	nmp->nm_numgrps = NFS_MAXGRPS;
1313 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1314 	if (nmp->nm_tprintf_delay < 0)
1315 		nmp->nm_tprintf_delay = 0;
1316 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1317 	if (nmp->nm_tprintf_initial_delay < 0)
1318 		nmp->nm_tprintf_initial_delay = 0;
1319 	nmp->nm_fhsize = argp->fhsize;
1320 	if (nmp->nm_fhsize > 0)
1321 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1322 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1323 	nmp->nm_nam = nam;
1324 	/* Set up the sockets and per-host congestion */
1325 	nmp->nm_sotype = argp->sotype;
1326 	nmp->nm_soproto = argp->proto;
1327 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1328 	if ((argp->flags & NFSMNT_NFSV4))
1329 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1330 	else if ((argp->flags & NFSMNT_NFSV3))
1331 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1332 	else
1333 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1334 
1335 
1336 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1337 		goto bad;
1338 	/* For NFSv4.1, get the clientid now. */
1339 	if (nmp->nm_minorvers > 0) {
1340 		NFSCL_DEBUG(3, "at getcl\n");
1341 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1342 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1343 		if (error != 0)
1344 			goto bad;
1345 	}
1346 
1347 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1348 	    nmp->nm_dirpathlen > 0) {
1349 		NFSCL_DEBUG(3, "in dirp\n");
1350 		/*
1351 		 * If the fhsize on the mount point == 0 for V4, the mount
1352 		 * path needs to be looked up.
1353 		 */
1354 		trycnt = 3;
1355 		do {
1356 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1357 			    cred, td);
1358 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1359 			if (error)
1360 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1361 		} while (error && --trycnt > 0);
1362 		if (error) {
1363 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1364 			goto bad;
1365 		}
1366 	}
1367 
1368 	/*
1369 	 * A reference count is needed on the nfsnode representing the
1370 	 * remote root.  If this object is not persistent, then backward
1371 	 * traversals of the mount point (i.e. "..") will not work if
1372 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1373 	 * this problem, because one can identify root inodes by their
1374 	 * number == ROOTINO (2).
1375 	 */
1376 	if (nmp->nm_fhsize > 0) {
1377 		/*
1378 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1379 		 * non-zero for the root vnode. f_iosize will be set correctly
1380 		 * by nfs_statfs() before any I/O occurs.
1381 		 */
1382 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1383 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1384 		    LK_EXCLUSIVE);
1385 		if (error)
1386 			goto bad;
1387 		*vpp = NFSTOV(np);
1388 
1389 		/*
1390 		 * Get file attributes and transfer parameters for the
1391 		 * mountpoint.  This has the side effect of filling in
1392 		 * (*vpp)->v_type with the correct value.
1393 		 */
1394 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1395 		    cred, td, &nfsva, NULL, &lease);
1396 		if (ret) {
1397 			/*
1398 			 * Just set default values to get things going.
1399 			 */
1400 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1401 			nfsva.na_vattr.va_type = VDIR;
1402 			nfsva.na_vattr.va_mode = 0777;
1403 			nfsva.na_vattr.va_nlink = 100;
1404 			nfsva.na_vattr.va_uid = (uid_t)0;
1405 			nfsva.na_vattr.va_gid = (gid_t)0;
1406 			nfsva.na_vattr.va_fileid = 2;
1407 			nfsva.na_vattr.va_gen = 1;
1408 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1409 			nfsva.na_vattr.va_size = 512 * 1024;
1410 			lease = 60;
1411 		}
1412 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1413 		if (nmp->nm_minorvers > 0) {
1414 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1415 			NFSLOCKCLSTATE();
1416 			clp->nfsc_renew = NFSCL_RENEW(lease);
1417 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1418 			clp->nfsc_clientidrev++;
1419 			if (clp->nfsc_clientidrev == 0)
1420 				clp->nfsc_clientidrev++;
1421 			NFSUNLOCKCLSTATE();
1422 			/*
1423 			 * Mount will succeed, so the renew thread can be
1424 			 * started now.
1425 			 */
1426 			nfscl_start_renewthread(clp);
1427 			nfscl_clientrelease(clp);
1428 		}
1429 		if (argp->flags & NFSMNT_NFSV3)
1430 			ncl_fsinfo(nmp, *vpp, cred, td);
1431 
1432 		/* Mark if the mount point supports NFSv4 ACLs. */
1433 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1434 		    ret == 0 &&
1435 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1436 			MNT_ILOCK(mp);
1437 			mp->mnt_flag |= MNT_NFS4ACLS;
1438 			MNT_IUNLOCK(mp);
1439 		}
1440 
1441 		/*
1442 		 * Lose the lock but keep the ref.
1443 		 */
1444 		NFSVOPUNLOCK(*vpp, 0);
1445 		return (0);
1446 	}
1447 	error = EIO;
1448 
1449 bad:
1450 	if (clp != NULL)
1451 		nfscl_clientrelease(clp);
1452 	newnfs_disconnect(&nmp->nm_sockreq);
1453 	crfree(nmp->nm_sockreq.nr_cred);
1454 	if (nmp->nm_sockreq.nr_auth != NULL)
1455 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1456 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1457 	mtx_destroy(&nmp->nm_mtx);
1458 	if (nmp->nm_clp != NULL) {
1459 		NFSLOCKCLSTATE();
1460 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1461 		NFSUNLOCKCLSTATE();
1462 		free(nmp->nm_clp, M_NFSCLCLIENT);
1463 	}
1464 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1465 		nfscl_freenfsclds(dsp);
1466 	FREE(nmp, M_NEWNFSMNT);
1467 	FREE(nam, M_SONAME);
1468 	return (error);
1469 }
1470 
1471 /*
1472  * unmount system call
1473  */
1474 static int
1475 nfs_unmount(struct mount *mp, int mntflags)
1476 {
1477 	struct thread *td;
1478 	struct nfsmount *nmp;
1479 	int error, flags = 0, i, trycnt = 0;
1480 	struct nfsclds *dsp, *tdsp;
1481 
1482 	td = curthread;
1483 
1484 	if (mntflags & MNT_FORCE)
1485 		flags |= FORCECLOSE;
1486 	nmp = VFSTONFS(mp);
1487 	/*
1488 	 * Goes something like this..
1489 	 * - Call vflush() to clear out vnodes for this filesystem
1490 	 * - Close the socket
1491 	 * - Free up the data structures
1492 	 */
1493 	/* In the forced case, cancel any outstanding requests. */
1494 	if (mntflags & MNT_FORCE) {
1495 		error = newnfs_nmcancelreqs(nmp);
1496 		if (error)
1497 			goto out;
1498 		/* For a forced close, get rid of the renew thread now */
1499 		nfscl_umount(nmp, td);
1500 	}
1501 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1502 	do {
1503 		error = vflush(mp, 1, flags, td);
1504 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1505 			(void) nfs_catnap(PSOCK, error, "newndm");
1506 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1507 	if (error)
1508 		goto out;
1509 
1510 	/*
1511 	 * We are now committed to the unmount.
1512 	 */
1513 	if ((mntflags & MNT_FORCE) == 0)
1514 		nfscl_umount(nmp, td);
1515 	/* Make sure no nfsiods are assigned to this mount. */
1516 	mtx_lock(&ncl_iod_mutex);
1517 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1518 		if (ncl_iodmount[i] == nmp) {
1519 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1520 			ncl_iodmount[i] = NULL;
1521 		}
1522 	mtx_unlock(&ncl_iod_mutex);
1523 	newnfs_disconnect(&nmp->nm_sockreq);
1524 	crfree(nmp->nm_sockreq.nr_cred);
1525 	FREE(nmp->nm_nam, M_SONAME);
1526 	if (nmp->nm_sockreq.nr_auth != NULL)
1527 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1528 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1529 	mtx_destroy(&nmp->nm_mtx);
1530 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1531 		nfscl_freenfsclds(dsp);
1532 	FREE(nmp, M_NEWNFSMNT);
1533 out:
1534 	return (error);
1535 }
1536 
1537 /*
1538  * Return root of a filesystem
1539  */
1540 static int
1541 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1542 {
1543 	struct vnode *vp;
1544 	struct nfsmount *nmp;
1545 	struct nfsnode *np;
1546 	int error;
1547 
1548 	nmp = VFSTONFS(mp);
1549 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1550 	if (error)
1551 		return error;
1552 	vp = NFSTOV(np);
1553 	/*
1554 	 * Get transfer parameters and attributes for root vnode once.
1555 	 */
1556 	mtx_lock(&nmp->nm_mtx);
1557 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1558 		mtx_unlock(&nmp->nm_mtx);
1559 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1560 	} else
1561 		mtx_unlock(&nmp->nm_mtx);
1562 	if (vp->v_type == VNON)
1563 	    vp->v_type = VDIR;
1564 	vp->v_vflag |= VV_ROOT;
1565 	*vpp = vp;
1566 	return (0);
1567 }
1568 
1569 /*
1570  * Flush out the buffer cache
1571  */
1572 /* ARGSUSED */
1573 static int
1574 nfs_sync(struct mount *mp, int waitfor)
1575 {
1576 	struct vnode *vp, *mvp;
1577 	struct thread *td;
1578 	int error, allerror = 0;
1579 
1580 	td = curthread;
1581 
1582 	MNT_ILOCK(mp);
1583 	/*
1584 	 * If a forced dismount is in progress, return from here so that
1585 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1586 	 * calling VFS_UNMOUNT().
1587 	 */
1588 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1589 		MNT_IUNLOCK(mp);
1590 		return (EBADF);
1591 	}
1592 	MNT_IUNLOCK(mp);
1593 
1594 	/*
1595 	 * Force stale buffer cache information to be flushed.
1596 	 */
1597 loop:
1598 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1599 		/* XXX Racy bv_cnt check. */
1600 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1601 		    waitfor == MNT_LAZY) {
1602 			VI_UNLOCK(vp);
1603 			continue;
1604 		}
1605 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1606 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1607 			goto loop;
1608 		}
1609 		error = VOP_FSYNC(vp, waitfor, td);
1610 		if (error)
1611 			allerror = error;
1612 		NFSVOPUNLOCK(vp, 0);
1613 		vrele(vp);
1614 	}
1615 	return (allerror);
1616 }
1617 
1618 static int
1619 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1620 {
1621 	struct nfsmount *nmp = VFSTONFS(mp);
1622 	struct vfsquery vq;
1623 	int error;
1624 
1625 	bzero(&vq, sizeof(vq));
1626 	switch (op) {
1627 #if 0
1628 	case VFS_CTL_NOLOCKS:
1629 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1630  		if (req->oldptr != NULL) {
1631  			error = SYSCTL_OUT(req, &val, sizeof(val));
1632  			if (error)
1633  				return (error);
1634  		}
1635  		if (req->newptr != NULL) {
1636  			error = SYSCTL_IN(req, &val, sizeof(val));
1637  			if (error)
1638  				return (error);
1639 			if (val)
1640 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1641 			else
1642 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1643  		}
1644 		break;
1645 #endif
1646 	case VFS_CTL_QUERY:
1647 		mtx_lock(&nmp->nm_mtx);
1648 		if (nmp->nm_state & NFSSTA_TIMEO)
1649 			vq.vq_flags |= VQ_NOTRESP;
1650 		mtx_unlock(&nmp->nm_mtx);
1651 #if 0
1652 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1653 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1654 			vq.vq_flags |= VQ_NOTRESPLOCK;
1655 #endif
1656 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1657 		break;
1658  	case VFS_CTL_TIMEO:
1659  		if (req->oldptr != NULL) {
1660  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1661  			    sizeof(nmp->nm_tprintf_initial_delay));
1662  			if (error)
1663  				return (error);
1664  		}
1665  		if (req->newptr != NULL) {
1666 			error = vfs_suser(mp, req->td);
1667 			if (error)
1668 				return (error);
1669  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1670  			    sizeof(nmp->nm_tprintf_initial_delay));
1671  			if (error)
1672  				return (error);
1673  			if (nmp->nm_tprintf_initial_delay < 0)
1674  				nmp->nm_tprintf_initial_delay = 0;
1675  		}
1676 		break;
1677 	default:
1678 		return (ENOTSUP);
1679 	}
1680 	return (0);
1681 }
1682 
1683 /*
1684  * Purge any RPCs in progress, so that they will all return errors.
1685  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1686  * forced dismount.
1687  */
1688 static void
1689 nfs_purge(struct mount *mp)
1690 {
1691 	struct nfsmount *nmp = VFSTONFS(mp);
1692 
1693 	newnfs_nmcancelreqs(nmp);
1694 }
1695 
1696 /*
1697  * Extract the information needed by the nlm from the nfs vnode.
1698  */
1699 static void
1700 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1701     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1702     struct timeval *timeop)
1703 {
1704 	struct nfsmount *nmp;
1705 	struct nfsnode *np = VTONFS(vp);
1706 
1707 	nmp = VFSTONFS(vp->v_mount);
1708 	if (fhlenp != NULL)
1709 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1710 	if (fhp != NULL)
1711 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1712 	if (sp != NULL)
1713 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1714 	if (is_v3p != NULL)
1715 		*is_v3p = NFS_ISV3(vp);
1716 	if (sizep != NULL)
1717 		*sizep = np->n_size;
1718 	if (timeop != NULL) {
1719 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1720 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1721 	}
1722 }
1723 
1724 /*
1725  * This function prints out an option name, based on the conditional
1726  * argument.
1727  */
1728 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1729     char *opt, char **buf, size_t *blen)
1730 {
1731 	int len;
1732 
1733 	if (testval != 0 && *blen > strlen(opt)) {
1734 		len = snprintf(*buf, *blen, "%s", opt);
1735 		if (len != strlen(opt))
1736 			printf("EEK!!\n");
1737 		*buf += len;
1738 		*blen -= len;
1739 	}
1740 }
1741 
1742 /*
1743  * This function printf out an options integer value.
1744  */
1745 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1746     char *opt, char **buf, size_t *blen)
1747 {
1748 	int len;
1749 
1750 	if (*blen > strlen(opt) + 1) {
1751 		/* Could result in truncated output string. */
1752 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1753 		if (len < *blen) {
1754 			*buf += len;
1755 			*blen -= len;
1756 		}
1757 	}
1758 }
1759 
1760 /*
1761  * Load the option flags and values into the buffer.
1762  */
1763 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1764 {
1765 	char *buf;
1766 	size_t blen;
1767 
1768 	buf = buffer;
1769 	blen = buflen;
1770 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1771 	    &blen);
1772 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1773 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1774 		    &blen);
1775 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1776 		    &buf, &blen);
1777 	}
1778 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1779 	    &blen);
1780 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1781 	    "nfsv2", &buf, &blen);
1782 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1783 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1784 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1785 	    &buf, &blen);
1786 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1787 	    &buf, &blen);
1788 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1789 	    &blen);
1790 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1791 	    &blen);
1792 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1793 	    &blen);
1794 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1795 	    &blen);
1796 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1797 	    &blen);
1798 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1799 	    ",noncontigwr", &buf, &blen);
1800 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1801 	    0, ",lockd", &buf, &blen);
1802 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1803 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1804 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1805 	    &buf, &blen);
1806 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1807 	    &buf, &blen);
1808 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1809 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1810 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1811 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1812 	    &buf, &blen);
1813 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1814 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1815 	    &buf, &blen);
1816 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1817 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1818 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1819 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1820 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1821 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1822 	    &blen);
1823 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1824 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1825 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1826 	    &blen);
1827 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1828 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1829 	    &blen);
1830 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1831 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1832 }
1833 
1834