xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 5dcd9c10612684d1c823670cbb5b4715028784e7)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <netinet/in.h>
69 
70 #include <fs/nfs/nfsport.h>
71 #include <fs/nfsclient/nfsnode.h>
72 #include <fs/nfsclient/nfsmount.h>
73 #include <fs/nfsclient/nfs.h>
74 #include <fs/nfsclient/nfsdiskless.h>
75 
76 FEATURE(nfscl, "NFSv4 client");
77 
78 extern int nfscl_ticks;
79 extern struct timeval nfsboottime;
80 extern struct nfsstats	newnfsstats;
81 
82 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
83 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
84 
85 SYSCTL_DECL(_vfs_newnfs);
86 SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
87 	&newnfsstats, nfsstats, "S,nfsstats");
88 static int nfs_ip_paranoia = 1;
89 SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
90     &nfs_ip_paranoia, 0, "");
91 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
92 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
93         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
94 /* how long between console messages "nfs server foo not responding" */
95 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
96 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
97         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
98 
99 static void	nfs_sec_name(char *, int *);
100 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
101 		    struct nfs_args *argp, const char *, struct ucred *,
102 		    struct thread *);
103 static int	mountnfs(struct nfs_args *, struct mount *,
104 		    struct sockaddr *, char *, u_char *, u_char *, u_char *,
105 		    struct vnode **, struct ucred *, struct thread *, int);
106 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
107 		    struct sockaddr_storage *, int *, off_t *,
108 		    struct timeval *);
109 static vfs_mount_t nfs_mount;
110 static vfs_cmount_t nfs_cmount;
111 static vfs_unmount_t nfs_unmount;
112 static vfs_root_t nfs_root;
113 static vfs_statfs_t nfs_statfs;
114 static vfs_sync_t nfs_sync;
115 static vfs_sysctl_t nfs_sysctl;
116 
117 /*
118  * nfs vfs operations.
119  */
120 static struct vfsops nfs_vfsops = {
121 	.vfs_init =		ncl_init,
122 	.vfs_mount =		nfs_mount,
123 	.vfs_cmount =		nfs_cmount,
124 	.vfs_root =		nfs_root,
125 	.vfs_statfs =		nfs_statfs,
126 	.vfs_sync =		nfs_sync,
127 	.vfs_uninit =		ncl_uninit,
128 	.vfs_unmount =		nfs_unmount,
129 	.vfs_sysctl =		nfs_sysctl,
130 };
131 VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK);
132 
133 /* So that loader and kldload(2) can find us, wherever we are.. */
134 MODULE_VERSION(newnfs, 1);
135 
136 /*
137  * This structure must be filled in by a primary bootstrap or bootstrap
138  * server for a diskless/dataless machine. It is initialized below just
139  * to ensure that it is allocated to initialized data (.data not .bss).
140  */
141 struct nfs_diskless newnfs_diskless = { { { 0 } } };
142 struct nfsv3_diskless newnfsv3_diskless = { { { 0 } } };
143 int newnfs_diskless_valid = 0;
144 
145 SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
146     &newnfs_diskless_valid, 0,
147     "Has the diskless struct been filled correctly");
148 
149 SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
150     newnfsv3_diskless.root_hostnam, 0, "Path to nfs root");
151 
152 SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
153     &newnfsv3_diskless.root_saddr, sizeof newnfsv3_diskless.root_saddr,
154     "%Ssockaddr_in", "Diskless root nfs address");
155 
156 
157 void		newnfsargs_ntoh(struct nfs_args *);
158 static int	nfs_mountdiskless(char *,
159 		    struct sockaddr_in *, struct nfs_args *,
160 		    struct thread *, struct vnode **, struct mount *);
161 static void	nfs_convert_diskless(void);
162 static void	nfs_convert_oargs(struct nfs_args *args,
163 		    struct onfs_args *oargs);
164 
165 int
166 newnfs_iosize(struct nfsmount *nmp)
167 {
168 	int iosize, maxio;
169 
170 	/* First, set the upper limit for iosize */
171 	if (nmp->nm_flag & NFSMNT_NFSV4) {
172 		maxio = NFS_MAXBSIZE;
173 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
174 		if (nmp->nm_sotype == SOCK_DGRAM)
175 			maxio = NFS_MAXDGRAMDATA;
176 		else
177 			maxio = NFS_MAXBSIZE;
178 	} else {
179 		maxio = NFS_V2MAXDATA;
180 	}
181 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
182 		nmp->nm_rsize = maxio;
183 	if (nmp->nm_rsize > MAXBSIZE)
184 		nmp->nm_rsize = MAXBSIZE;
185 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
186 		nmp->nm_readdirsize = maxio;
187 	if (nmp->nm_readdirsize > nmp->nm_rsize)
188 		nmp->nm_readdirsize = nmp->nm_rsize;
189 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
190 		nmp->nm_wsize = maxio;
191 	if (nmp->nm_wsize > MAXBSIZE)
192 		nmp->nm_wsize = MAXBSIZE;
193 
194 	/*
195 	 * Calculate the size used for io buffers.  Use the larger
196 	 * of the two sizes to minimise nfs requests but make sure
197 	 * that it is at least one VM page to avoid wasting buffer
198 	 * space.
199 	 */
200 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
201 	iosize = imax(iosize, PAGE_SIZE);
202 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
203 	return (iosize);
204 }
205 
206 static void
207 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
208 {
209 
210 	args->version = NFS_ARGSVERSION;
211 	args->addr = oargs->addr;
212 	args->addrlen = oargs->addrlen;
213 	args->sotype = oargs->sotype;
214 	args->proto = oargs->proto;
215 	args->fh = oargs->fh;
216 	args->fhsize = oargs->fhsize;
217 	args->flags = oargs->flags;
218 	args->wsize = oargs->wsize;
219 	args->rsize = oargs->rsize;
220 	args->readdirsize = oargs->readdirsize;
221 	args->timeo = oargs->timeo;
222 	args->retrans = oargs->retrans;
223 	args->readahead = oargs->readahead;
224 	args->hostname = oargs->hostname;
225 }
226 
227 static void
228 nfs_convert_diskless(void)
229 {
230 
231 	bcopy(&newnfs_diskless.myif, &newnfsv3_diskless.myif,
232 	    sizeof (struct ifaliasreq));
233 	bcopy(&newnfs_diskless.mygateway, &newnfsv3_diskless.mygateway,
234 	    sizeof (struct sockaddr_in));
235 	nfs_convert_oargs(&newnfsv3_diskless.root_args,
236 	    &newnfs_diskless.root_args);
237 	if (newnfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
238 		newnfsv3_diskless.root_fhsize = NFSX_MYFH;
239 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
240 		    NFSX_MYFH);
241 	} else {
242 		newnfsv3_diskless.root_fhsize = NFSX_V2FH;
243 		bcopy(newnfs_diskless.root_fh, newnfsv3_diskless.root_fh,
244 		    NFSX_V2FH);
245 	}
246 	bcopy(&newnfs_diskless.root_saddr,&newnfsv3_diskless.root_saddr,
247 	    sizeof(struct sockaddr_in));
248 	bcopy(newnfs_diskless.root_hostnam, newnfsv3_diskless.root_hostnam,
249 	    MNAMELEN);
250 	newnfsv3_diskless.root_time = newnfs_diskless.root_time;
251 	bcopy(newnfs_diskless.my_hostnam, newnfsv3_diskless.my_hostnam,
252 	    MAXHOSTNAMELEN);
253 	newnfs_diskless_valid = 3;
254 }
255 
256 /*
257  * nfs statfs call
258  */
259 static int
260 nfs_statfs(struct mount *mp, struct statfs *sbp)
261 {
262 	struct vnode *vp;
263 	struct thread *td;
264 	struct nfsmount *nmp = VFSTONFS(mp);
265 	struct nfsvattr nfsva;
266 	struct nfsfsinfo fs;
267 	struct nfsstatfs sb;
268 	int error = 0, attrflag, gotfsinfo = 0, ret;
269 	struct nfsnode *np;
270 
271 	td = curthread;
272 
273 	error = vfs_busy(mp, MBF_NOWAIT);
274 	if (error)
275 		return (error);
276 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
277 	if (error) {
278 		vfs_unbusy(mp);
279 		return (error);
280 	}
281 	vp = NFSTOV(np);
282 	mtx_lock(&nmp->nm_mtx);
283 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
284 		mtx_unlock(&nmp->nm_mtx);
285 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
286 		    &attrflag, NULL);
287 		if (!error)
288 			gotfsinfo = 1;
289 	} else
290 		mtx_unlock(&nmp->nm_mtx);
291 	if (!error)
292 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
293 		    &attrflag, NULL);
294 	if (attrflag == 0) {
295 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
296 		    td->td_ucred, td, &nfsva, NULL);
297 		if (ret) {
298 			/*
299 			 * Just set default values to get things going.
300 			 */
301 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
302 			nfsva.na_vattr.va_type = VDIR;
303 			nfsva.na_vattr.va_mode = 0777;
304 			nfsva.na_vattr.va_nlink = 100;
305 			nfsva.na_vattr.va_uid = (uid_t)0;
306 			nfsva.na_vattr.va_gid = (gid_t)0;
307 			nfsva.na_vattr.va_fileid = 2;
308 			nfsva.na_vattr.va_gen = 1;
309 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
310 			nfsva.na_vattr.va_size = 512 * 1024;
311 		}
312 	}
313 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
314 	if (!error) {
315 	    mtx_lock(&nmp->nm_mtx);
316 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
317 		nfscl_loadfsinfo(nmp, &fs);
318 	    nfscl_loadsbinfo(nmp, &sb, sbp);
319 	    sbp->f_flags = nmp->nm_flag;
320 	    sbp->f_iosize = newnfs_iosize(nmp);
321 	    mtx_unlock(&nmp->nm_mtx);
322 	    if (sbp != &mp->mnt_stat) {
323 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
324 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
325 	    }
326 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
327 	} else if (NFS_ISV4(vp)) {
328 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
329 	}
330 	vput(vp);
331 	vfs_unbusy(mp);
332 	return (error);
333 }
334 
335 /*
336  * nfs version 3 fsinfo rpc call
337  */
338 int
339 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
340     struct thread *td)
341 {
342 	struct nfsfsinfo fs;
343 	struct nfsvattr nfsva;
344 	int error, attrflag;
345 
346 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
347 	if (!error) {
348 		if (attrflag)
349 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
350 			    1);
351 		mtx_lock(&nmp->nm_mtx);
352 		nfscl_loadfsinfo(nmp, &fs);
353 		mtx_unlock(&nmp->nm_mtx);
354 	}
355 	return (error);
356 }
357 
358 /*
359  * Mount a remote root fs via. nfs. This depends on the info in the
360  * newnfs_diskless structure that has been filled in properly by some primary
361  * bootstrap.
362  * It goes something like this:
363  * - do enough of "ifconfig" by calling ifioctl() so that the system
364  *   can talk to the server
365  * - If newnfs_diskless.mygateway is filled in, use that address as
366  *   a default gateway.
367  * - build the rootfs mount point and call mountnfs() to do the rest.
368  *
369  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
370  * structure, as well as other global NFS client variables here, as
371  * nfs_mountroot() will be called once in the boot before any other NFS
372  * client activity occurs.
373  */
374 int
375 ncl_mountroot(struct mount *mp)
376 {
377 	struct thread *td = curthread;
378 	struct nfsv3_diskless *nd = &newnfsv3_diskless;
379 	struct socket *so;
380 	struct vnode *vp;
381 	struct ifreq ir;
382 	int error;
383 	u_long l;
384 	char buf[128];
385 	char *cp;
386 
387 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
388 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
389 #elif defined(NFS_ROOT)
390 	nfs_setup_diskless();
391 #endif
392 
393 	if (newnfs_diskless_valid == 0)
394 		return (-1);
395 	if (newnfs_diskless_valid == 1)
396 		nfs_convert_diskless();
397 
398 	/*
399 	 * XXX splnet, so networks will receive...
400 	 */
401 	splnet();
402 
403 	/*
404 	 * Do enough of ifconfig(8) so that the critical net interface can
405 	 * talk to the server.
406 	 */
407 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
408 	    td->td_ucred, td);
409 	if (error)
410 		panic("nfs_mountroot: socreate(%04x): %d",
411 			nd->myif.ifra_addr.sa_family, error);
412 
413 #if 0 /* XXX Bad idea */
414 	/*
415 	 * We might not have been told the right interface, so we pass
416 	 * over the first ten interfaces of the same kind, until we get
417 	 * one of them configured.
418 	 */
419 
420 	for (i = strlen(nd->myif.ifra_name) - 1;
421 		nd->myif.ifra_name[i] >= '0' &&
422 		nd->myif.ifra_name[i] <= '9';
423 		nd->myif.ifra_name[i] ++) {
424 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
425 		if(!error)
426 			break;
427 	}
428 #endif
429 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
430 	if (error)
431 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
432 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
433 		ir.ifr_mtu = strtol(cp, NULL, 10);
434 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
435 		freeenv(cp);
436 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
437 		if (error)
438 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
439 	}
440 	soclose(so);
441 
442 	/*
443 	 * If the gateway field is filled in, set it as the default route.
444 	 * Note that pxeboot will set a default route of 0 if the route
445 	 * is not set by the DHCP server.  Check also for a value of 0
446 	 * to avoid panicking inappropriately in that situation.
447 	 */
448 	if (nd->mygateway.sin_len != 0 &&
449 	    nd->mygateway.sin_addr.s_addr != 0) {
450 		struct sockaddr_in mask, sin;
451 
452 		bzero((caddr_t)&mask, sizeof(mask));
453 		sin = mask;
454 		sin.sin_family = AF_INET;
455 		sin.sin_len = sizeof(sin);
456                 /* XXX MRT use table 0 for this sort of thing */
457 		CURVNET_SET(TD_TO_VNET(td));
458 		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
459 		    (struct sockaddr *)&nd->mygateway,
460 		    (struct sockaddr *)&mask,
461 		    RTF_UP | RTF_GATEWAY, NULL);
462 		CURVNET_RESTORE();
463 		if (error)
464 			panic("nfs_mountroot: RTM_ADD: %d", error);
465 	}
466 
467 	/*
468 	 * Create the rootfs mount point.
469 	 */
470 	nd->root_args.fh = nd->root_fh;
471 	nd->root_args.fhsize = nd->root_fhsize;
472 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
473 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
474 		(l >> 24) & 0xff, (l >> 16) & 0xff,
475 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
476 	printf("NFS ROOT: %s\n", buf);
477 	nd->root_args.hostname = buf;
478 	if ((error = nfs_mountdiskless(buf,
479 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
480 		return (error);
481 	}
482 
483 	/*
484 	 * This is not really an nfs issue, but it is much easier to
485 	 * set hostname here and then let the "/etc/rc.xxx" files
486 	 * mount the right /var based upon its preset value.
487 	 */
488 	mtx_lock(&prison0.pr_mtx);
489 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
490 	    sizeof(prison0.pr_hostname));
491 	mtx_unlock(&prison0.pr_mtx);
492 	inittodr(ntohl(nd->root_time));
493 	return (0);
494 }
495 
496 /*
497  * Internal version of mount system call for diskless setup.
498  */
499 static int
500 nfs_mountdiskless(char *path,
501     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
502     struct vnode **vpp, struct mount *mp)
503 {
504 	struct sockaddr *nam;
505 	int error;
506 
507 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
508 	if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp,
509 	    td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
510 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
511 		return (error);
512 	}
513 	return (0);
514 }
515 
516 static void
517 nfs_sec_name(char *sec, int *flagsp)
518 {
519 	if (!strcmp(sec, "krb5"))
520 		*flagsp |= NFSMNT_KERB;
521 	else if (!strcmp(sec, "krb5i"))
522 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
523 	else if (!strcmp(sec, "krb5p"))
524 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
525 }
526 
527 static void
528 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
529     const char *hostname, struct ucred *cred, struct thread *td)
530 {
531 	int s;
532 	int adjsock;
533 	char *p;
534 
535 	s = splnet();
536 
537 	/*
538 	 * Set read-only flag if requested; otherwise, clear it if this is
539 	 * an update.  If this is not an update, then either the read-only
540 	 * flag is already clear, or this is a root mount and it was set
541 	 * intentionally at some previous point.
542 	 */
543 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
544 		MNT_ILOCK(mp);
545 		mp->mnt_flag |= MNT_RDONLY;
546 		MNT_IUNLOCK(mp);
547 	} else if (mp->mnt_flag & MNT_UPDATE) {
548 		MNT_ILOCK(mp);
549 		mp->mnt_flag &= ~MNT_RDONLY;
550 		MNT_IUNLOCK(mp);
551 	}
552 
553 	/*
554 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
555 	 * no sense in that context.  Also, set up appropriate retransmit
556 	 * and soft timeout behavior.
557 	 */
558 	if (argp->sotype == SOCK_STREAM) {
559 		nmp->nm_flag &= ~NFSMNT_NOCONN;
560 		nmp->nm_timeo = NFS_MAXTIMEO;
561 	}
562 
563 	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
564 	if ((argp->flags & NFSMNT_NFSV3) == 0)
565 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
566 
567 	/* Also re-bind if we're switching to/from a connected UDP socket */
568 	adjsock = ((nmp->nm_flag & NFSMNT_NOCONN) !=
569 		    (argp->flags & NFSMNT_NOCONN));
570 
571 	/* Update flags atomically.  Don't change the lock bits. */
572 	nmp->nm_flag = argp->flags | nmp->nm_flag;
573 	splx(s);
574 
575 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
576 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
577 		if (nmp->nm_timeo < NFS_MINTIMEO)
578 			nmp->nm_timeo = NFS_MINTIMEO;
579 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
580 			nmp->nm_timeo = NFS_MAXTIMEO;
581 	}
582 
583 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
584 		nmp->nm_retry = argp->retrans;
585 		if (nmp->nm_retry > NFS_MAXREXMIT)
586 			nmp->nm_retry = NFS_MAXREXMIT;
587 	}
588 
589 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
590 		nmp->nm_wsize = argp->wsize;
591 		/* Round down to multiple of blocksize */
592 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
593 		if (nmp->nm_wsize <= 0)
594 			nmp->nm_wsize = NFS_FABLKSIZE;
595 	}
596 
597 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
598 		nmp->nm_rsize = argp->rsize;
599 		/* Round down to multiple of blocksize */
600 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
601 		if (nmp->nm_rsize <= 0)
602 			nmp->nm_rsize = NFS_FABLKSIZE;
603 	}
604 
605 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
606 		nmp->nm_readdirsize = argp->readdirsize;
607 	}
608 
609 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
610 		nmp->nm_acregmin = argp->acregmin;
611 	else
612 		nmp->nm_acregmin = NFS_MINATTRTIMO;
613 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
614 		nmp->nm_acregmax = argp->acregmax;
615 	else
616 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
617 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
618 		nmp->nm_acdirmin = argp->acdirmin;
619 	else
620 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
621 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
622 		nmp->nm_acdirmax = argp->acdirmax;
623 	else
624 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
625 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
626 		nmp->nm_acdirmin = nmp->nm_acdirmax;
627 	if (nmp->nm_acregmin > nmp->nm_acregmax)
628 		nmp->nm_acregmin = nmp->nm_acregmax;
629 
630 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
631 		if (argp->readahead <= NFS_MAXRAHEAD)
632 			nmp->nm_readahead = argp->readahead;
633 		else
634 			nmp->nm_readahead = NFS_MAXRAHEAD;
635 	}
636 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
637 		if (argp->wcommitsize < nmp->nm_wsize)
638 			nmp->nm_wcommitsize = nmp->nm_wsize;
639 		else
640 			nmp->nm_wcommitsize = argp->wcommitsize;
641 	}
642 
643 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
644 		    (nmp->nm_soproto != argp->proto));
645 
646 	if (nmp->nm_client != NULL && adjsock) {
647 		int haslock = 0, error = 0;
648 
649 		if (nmp->nm_sotype == SOCK_STREAM) {
650 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
651 			if (!error)
652 				haslock = 1;
653 		}
654 		if (!error) {
655 		    newnfs_disconnect(&nmp->nm_sockreq);
656 		    if (haslock)
657 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
658 		    nmp->nm_sotype = argp->sotype;
659 		    nmp->nm_soproto = argp->proto;
660 		    if (nmp->nm_sotype == SOCK_DGRAM)
661 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
662 			    cred, td, 0)) {
663 				printf("newnfs_args: retrying connect\n");
664 				(void) nfs_catnap(PSOCK, 0, "newnfscon");
665 			}
666 		}
667 	} else {
668 		nmp->nm_sotype = argp->sotype;
669 		nmp->nm_soproto = argp->proto;
670 	}
671 
672 	if (hostname != NULL) {
673 		strlcpy(nmp->nm_hostname, hostname,
674 		    sizeof(nmp->nm_hostname));
675 		p = strchr(nmp->nm_hostname, ':');
676 		if (p != NULL)
677 			*p = '\0';
678 	}
679 }
680 
681 static const char *nfs_opts[] = { "from",
682     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
683     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
684     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
685     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
686     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
687     "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
688     "principal", "nfsv4", "gssname", "allgssname", "dirpath",
689     "negnametimeo",
690     NULL };
691 
692 /*
693  * VFS Operations.
694  *
695  * mount system call
696  * It seems a bit dumb to copyinstr() the host and path here and then
697  * bcopy() them in mountnfs(), but I wanted to detect errors before
698  * doing the sockargs() call because sockargs() allocates an mbuf and
699  * an error after that means that I have to release the mbuf.
700  */
701 /* ARGSUSED */
702 static int
703 nfs_mount(struct mount *mp)
704 {
705 	struct nfs_args args = {
706 	    .version = NFS_ARGSVERSION,
707 	    .addr = NULL,
708 	    .addrlen = sizeof (struct sockaddr_in),
709 	    .sotype = SOCK_STREAM,
710 	    .proto = 0,
711 	    .fh = NULL,
712 	    .fhsize = 0,
713 	    .flags = 0,
714 	    .wsize = NFS_WSIZE,
715 	    .rsize = NFS_RSIZE,
716 	    .readdirsize = NFS_READDIRSIZE,
717 	    .timeo = 10,
718 	    .retrans = NFS_RETRANS,
719 	    .readahead = NFS_DEFRAHEAD,
720 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
721 	    .hostname = NULL,
722 	    /* args version 4 */
723 	    .acregmin = NFS_MINATTRTIMO,
724 	    .acregmax = NFS_MAXATTRTIMO,
725 	    .acdirmin = NFS_MINDIRATTRTIMO,
726 	    .acdirmax = NFS_MAXDIRATTRTIMO,
727 	    .dirlen = 0,
728 	    .krbnamelen = 0,
729 	    .srvkrbnamelen = 0,
730 	};
731 	int error = 0, ret, len;
732 	struct sockaddr *nam = NULL;
733 	struct vnode *vp;
734 	struct thread *td;
735 	char hst[MNAMELEN];
736 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
737 	char *opt, *name, *secname;
738 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
739 
740 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
741 		error = EINVAL;
742 		goto out;
743 	}
744 
745 	td = curthread;
746 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
747 		error = ncl_mountroot(mp);
748 		goto out;
749 	}
750 
751 	nfscl_init();
752 
753 	/* Handle the new style options. */
754 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
755 		args.flags |= NFSMNT_NOCONN;
756 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
757 		args.flags |= NFSMNT_NOCONN;
758 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
759 		args.flags |= NFSMNT_NOLOCKD;
760 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
761 		args.flags &= ~NFSMNT_NOLOCKD;
762 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
763 		args.flags |= NFSMNT_INT;
764 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
765 		args.flags |= NFSMNT_RDIRPLUS;
766 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
767 		args.flags |= NFSMNT_RESVPORT;
768 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
769 		args.flags &= ~NFSMNT_RESVPORT;
770 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
771 		args.flags |= NFSMNT_SOFT;
772 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
773 		args.flags &= ~NFSMNT_SOFT;
774 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
775 		args.sotype = SOCK_DGRAM;
776 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
777 		args.sotype = SOCK_DGRAM;
778 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
779 		args.sotype = SOCK_STREAM;
780 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
781 		args.flags |= NFSMNT_NFSV3;
782 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
783 		args.flags |= NFSMNT_NFSV4;
784 		args.sotype = SOCK_STREAM;
785 	}
786 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
787 		args.flags |= NFSMNT_ALLGSSNAME;
788 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
789 		if (opt == NULL) {
790 			vfs_mount_error(mp, "illegal readdirsize");
791 			error = EINVAL;
792 			goto out;
793 		}
794 		ret = sscanf(opt, "%d", &args.readdirsize);
795 		if (ret != 1 || args.readdirsize <= 0) {
796 			vfs_mount_error(mp, "illegal readdirsize: %s",
797 			    opt);
798 			error = EINVAL;
799 			goto out;
800 		}
801 		args.flags |= NFSMNT_READDIRSIZE;
802 	}
803 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
804 		if (opt == NULL) {
805 			vfs_mount_error(mp, "illegal readahead");
806 			error = EINVAL;
807 			goto out;
808 		}
809 		ret = sscanf(opt, "%d", &args.readahead);
810 		if (ret != 1 || args.readahead <= 0) {
811 			vfs_mount_error(mp, "illegal readahead: %s",
812 			    opt);
813 			error = EINVAL;
814 			goto out;
815 		}
816 		args.flags |= NFSMNT_READAHEAD;
817 	}
818 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
819 		if (opt == NULL) {
820 			vfs_mount_error(mp, "illegal wsize");
821 			error = EINVAL;
822 			goto out;
823 		}
824 		ret = sscanf(opt, "%d", &args.wsize);
825 		if (ret != 1 || args.wsize <= 0) {
826 			vfs_mount_error(mp, "illegal wsize: %s",
827 			    opt);
828 			error = EINVAL;
829 			goto out;
830 		}
831 		args.flags |= NFSMNT_WSIZE;
832 	}
833 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
834 		if (opt == NULL) {
835 			vfs_mount_error(mp, "illegal rsize");
836 			error = EINVAL;
837 			goto out;
838 		}
839 		ret = sscanf(opt, "%d", &args.rsize);
840 		if (ret != 1 || args.rsize <= 0) {
841 			vfs_mount_error(mp, "illegal wsize: %s",
842 			    opt);
843 			error = EINVAL;
844 			goto out;
845 		}
846 		args.flags |= NFSMNT_RSIZE;
847 	}
848 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
849 		if (opt == NULL) {
850 			vfs_mount_error(mp, "illegal retrans");
851 			error = EINVAL;
852 			goto out;
853 		}
854 		ret = sscanf(opt, "%d", &args.retrans);
855 		if (ret != 1 || args.retrans <= 0) {
856 			vfs_mount_error(mp, "illegal retrans: %s",
857 			    opt);
858 			error = EINVAL;
859 			goto out;
860 		}
861 		args.flags |= NFSMNT_RETRANS;
862 	}
863 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
864 		ret = sscanf(opt, "%d", &args.acregmin);
865 		if (ret != 1 || args.acregmin < 0) {
866 			vfs_mount_error(mp, "illegal acregmin: %s",
867 			    opt);
868 			error = EINVAL;
869 			goto out;
870 		}
871 		args.flags |= NFSMNT_ACREGMIN;
872 	}
873 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
874 		ret = sscanf(opt, "%d", &args.acregmax);
875 		if (ret != 1 || args.acregmax < 0) {
876 			vfs_mount_error(mp, "illegal acregmax: %s",
877 			    opt);
878 			error = EINVAL;
879 			goto out;
880 		}
881 		args.flags |= NFSMNT_ACREGMAX;
882 	}
883 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
884 		ret = sscanf(opt, "%d", &args.acdirmin);
885 		if (ret != 1 || args.acdirmin < 0) {
886 			vfs_mount_error(mp, "illegal acdirmin: %s",
887 			    opt);
888 			error = EINVAL;
889 			goto out;
890 		}
891 		args.flags |= NFSMNT_ACDIRMIN;
892 	}
893 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
894 		ret = sscanf(opt, "%d", &args.acdirmax);
895 		if (ret != 1 || args.acdirmax < 0) {
896 			vfs_mount_error(mp, "illegal acdirmax: %s",
897 			    opt);
898 			error = EINVAL;
899 			goto out;
900 		}
901 		args.flags |= NFSMNT_ACDIRMAX;
902 	}
903 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
904 		ret = sscanf(opt, "%d", &args.timeo);
905 		if (ret != 1 || args.timeo <= 0) {
906 			vfs_mount_error(mp, "illegal timeout: %s",
907 			    opt);
908 			error = EINVAL;
909 			goto out;
910 		}
911 		args.flags |= NFSMNT_TIMEO;
912 	}
913 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
914 	    == 0) {
915 		ret = sscanf(opt, "%d", &negnametimeo);
916 		if (ret != 1 || negnametimeo < 0) {
917 			vfs_mount_error(mp, "illegal negnametimeo: %s",
918 			    opt);
919 			error = EINVAL;
920 			goto out;
921 		}
922 	}
923 	if (vfs_getopt(mp->mnt_optnew, "sec",
924 		(void **) &secname, NULL) == 0)
925 		nfs_sec_name(secname, &args.flags);
926 
927 	if (mp->mnt_flag & MNT_UPDATE) {
928 		struct nfsmount *nmp = VFSTONFS(mp);
929 
930 		if (nmp == NULL) {
931 			error = EIO;
932 			goto out;
933 		}
934 		/*
935 		 * When doing an update, we can't change version,
936 		 * security, switch lockd strategies or change cookie
937 		 * translation
938 		 */
939 		args.flags = (args.flags &
940 		    ~(NFSMNT_NFSV3 |
941 		      NFSMNT_NFSV4 |
942 		      NFSMNT_KERB |
943 		      NFSMNT_INTEGRITY |
944 		      NFSMNT_PRIVACY |
945 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
946 		    (nmp->nm_flag &
947 			(NFSMNT_NFSV3 |
948 			 NFSMNT_NFSV4 |
949 			 NFSMNT_KERB |
950 			 NFSMNT_INTEGRITY |
951 			 NFSMNT_PRIVACY |
952 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
953 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
954 		goto out;
955 	}
956 
957 	/*
958 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
959 	 * or no-connection mode for those protocols that support
960 	 * no-connection mode (the flag will be cleared later for protocols
961 	 * that do not support no-connection mode).  This will allow a client
962 	 * to receive replies from a different IP then the request was
963 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
964 	 * not 0.
965 	 */
966 	if (nfs_ip_paranoia == 0)
967 		args.flags |= NFSMNT_NOCONN;
968 
969 	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
970 	    &args.fhsize) == 0) {
971 		if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
972 			vfs_mount_error(mp, "Bad file handle");
973 			error = EINVAL;
974 			goto out;
975 		}
976 		bcopy(args.fh, nfh, args.fhsize);
977 	} else {
978 		args.fhsize = 0;
979 	}
980 
981 	(void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
982 	    &len);
983 	if (args.hostname == NULL) {
984 		vfs_mount_error(mp, "Invalid hostname");
985 		error = EINVAL;
986 		goto out;
987 	}
988 	bcopy(args.hostname, hst, MNAMELEN);
989 	hst[MNAMELEN - 1] = '\0';
990 
991 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
992 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
993 	else
994 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
995 	args.srvkrbnamelen = strlen(srvkrbname);
996 
997 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
998 		strlcpy(krbname, name, sizeof (krbname));
999 	else
1000 		krbname[0] = '\0';
1001 	args.krbnamelen = strlen(krbname);
1002 
1003 	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1004 		strlcpy(dirpath, name, sizeof (dirpath));
1005 	else
1006 		dirpath[0] = '\0';
1007 	args.dirlen = strlen(dirpath);
1008 
1009 	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1010 	    &args.addrlen) == 0) {
1011 		if (args.addrlen > SOCK_MAXADDRLEN) {
1012 			error = ENAMETOOLONG;
1013 			goto out;
1014 		}
1015 		nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1016 		bcopy(args.addr, nam, args.addrlen);
1017 		nam->sa_len = args.addrlen;
1018 	}
1019 
1020 	args.fh = nfh;
1021 	error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname,
1022 	    &vp, td->td_ucred, td, negnametimeo);
1023 out:
1024 	if (!error) {
1025 		MNT_ILOCK(mp);
1026 		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1027 		MNT_IUNLOCK(mp);
1028 	}
1029 	return (error);
1030 }
1031 
1032 
1033 /*
1034  * VFS Operations.
1035  *
1036  * mount system call
1037  * It seems a bit dumb to copyinstr() the host and path here and then
1038  * bcopy() them in mountnfs(), but I wanted to detect errors before
1039  * doing the sockargs() call because sockargs() allocates an mbuf and
1040  * an error after that means that I have to release the mbuf.
1041  */
1042 /* ARGSUSED */
1043 static int
1044 nfs_cmount(struct mntarg *ma, void *data, int flags)
1045 {
1046 	int error;
1047 	struct nfs_args args;
1048 
1049 	error = copyin(data, &args, sizeof (struct nfs_args));
1050 	if (error)
1051 		return error;
1052 
1053 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1054 
1055 	error = kernel_mount(ma, flags);
1056 	return (error);
1057 }
1058 
1059 /*
1060  * Common code for mount and mountroot
1061  */
1062 static int
1063 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1064     char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname,
1065     struct vnode **vpp, struct ucred *cred, struct thread *td,
1066     int negnametimeo)
1067 {
1068 	struct nfsmount *nmp;
1069 	struct nfsnode *np;
1070 	int error, trycnt, ret;
1071 	struct nfsvattr nfsva;
1072 	static u_int64_t clval = 0;
1073 
1074 	if (mp->mnt_flag & MNT_UPDATE) {
1075 		nmp = VFSTONFS(mp);
1076 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1077 		FREE(nam, M_SONAME);
1078 		return (0);
1079 	} else {
1080 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1081 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2,
1082 		    M_NEWNFSMNT, M_WAITOK);
1083 		bzero((caddr_t)nmp, sizeof (struct nfsmount) +
1084 		    argp->krbnamelen + argp->dirlen + argp->srvkrbnamelen + 2);
1085 		TAILQ_INIT(&nmp->nm_bufq);
1086 		if (clval == 0)
1087 			clval = (u_int64_t)nfsboottime.tv_sec;
1088 		nmp->nm_clval = clval++;
1089 		nmp->nm_krbnamelen = argp->krbnamelen;
1090 		nmp->nm_dirpathlen = argp->dirlen;
1091 		nmp->nm_srvkrbnamelen = argp->srvkrbnamelen;
1092 		if (td->td_ucred->cr_uid != (uid_t)0) {
1093 			/*
1094 			 * nm_uid is used to get KerberosV credentials for
1095 			 * the nfsv4 state handling operations if there is
1096 			 * no host based principal set. Use the uid of
1097 			 * this user if not root, since they are doing the
1098 			 * mount. I don't think setting this for root will
1099 			 * work, since root normally does not have user
1100 			 * credentials in a credentials cache.
1101 			 */
1102 			nmp->nm_uid = td->td_ucred->cr_uid;
1103 		} else {
1104 			/*
1105 			 * Just set to -1, so it won't be used.
1106 			 */
1107 			nmp->nm_uid = (uid_t)-1;
1108 		}
1109 
1110 		/* Copy and null terminate all the names */
1111 		if (nmp->nm_krbnamelen > 0) {
1112 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1113 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1114 		}
1115 		if (nmp->nm_dirpathlen > 0) {
1116 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1117 			    nmp->nm_dirpathlen);
1118 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1119 			    + 1] = '\0';
1120 		}
1121 		if (nmp->nm_srvkrbnamelen > 0) {
1122 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1123 			    nmp->nm_srvkrbnamelen);
1124 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1125 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1126 		}
1127 		nmp->nm_sockreq.nr_cred = crhold(cred);
1128 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1129 		mp->mnt_data = nmp;
1130 		nmp->nm_getinfo = nfs_getnlminfo;
1131 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1132 	}
1133 	vfs_getnewfsid(mp);
1134 	nmp->nm_mountp = mp;
1135 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1136 	nmp->nm_negnametimeo = negnametimeo;
1137 
1138 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1139 
1140 	/*
1141 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1142 	 * high, depending on whether we end up with negative offsets in
1143 	 * the client or server somewhere.  2GB-1 may be safer.
1144 	 *
1145 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1146 	 * that we can handle until we find out otherwise.
1147 	 * XXX Our "safe" limit on the client is what we can store in our
1148 	 * buffer cache using signed(!) block numbers.
1149 	 */
1150 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1151 		nmp->nm_maxfilesize = 0xffffffffLL;
1152 	else
1153 		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1154 
1155 	nmp->nm_timeo = NFS_TIMEO;
1156 	nmp->nm_retry = NFS_RETRANS;
1157 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1158 		nmp->nm_wsize = NFS_WSIZE;
1159 		nmp->nm_rsize = NFS_RSIZE;
1160 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1161 	}
1162 	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1163 	nmp->nm_numgrps = NFS_MAXGRPS;
1164 	nmp->nm_readahead = NFS_DEFRAHEAD;
1165 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1166 	if (nmp->nm_tprintf_delay < 0)
1167 		nmp->nm_tprintf_delay = 0;
1168 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1169 	if (nmp->nm_tprintf_initial_delay < 0)
1170 		nmp->nm_tprintf_initial_delay = 0;
1171 	nmp->nm_fhsize = argp->fhsize;
1172 	if (nmp->nm_fhsize > 0)
1173 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1174 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1175 	nmp->nm_nam = nam;
1176 	/* Set up the sockets and per-host congestion */
1177 	nmp->nm_sotype = argp->sotype;
1178 	nmp->nm_soproto = argp->proto;
1179 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1180 	if ((argp->flags & NFSMNT_NFSV4))
1181 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1182 	else if ((argp->flags & NFSMNT_NFSV3))
1183 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1184 	else
1185 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1186 
1187 
1188 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1189 		goto bad;
1190 
1191 	/*
1192 	 * A reference count is needed on the nfsnode representing the
1193 	 * remote root.  If this object is not persistent, then backward
1194 	 * traversals of the mount point (i.e. "..") will not work if
1195 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1196 	 * this problem, because one can identify root inodes by their
1197 	 * number == ROOTINO (2).
1198 	 */
1199 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1200 	    nmp->nm_dirpathlen > 0) {
1201 		/*
1202 		 * If the fhsize on the mount point == 0 for V4, the mount
1203 		 * path needs to be looked up.
1204 		 */
1205 		trycnt = 3;
1206 		do {
1207 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1208 			    cred, td);
1209 			if (error)
1210 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1211 		} while (error && --trycnt > 0);
1212 		if (error) {
1213 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1214 			goto bad;
1215 		}
1216 	}
1217 	if (nmp->nm_fhsize > 0) {
1218 		/*
1219 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1220 		 * non-zero for the root vnode. f_iosize will be set correctly
1221 		 * by nfs_statfs() before any I/O occurs.
1222 		 */
1223 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1224 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1225 		if (error)
1226 			goto bad;
1227 		*vpp = NFSTOV(np);
1228 
1229 		/*
1230 		 * Get file attributes and transfer parameters for the
1231 		 * mountpoint.  This has the side effect of filling in
1232 		 * (*vpp)->v_type with the correct value.
1233 		 */
1234 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1235 		    cred, td, &nfsva, NULL);
1236 		if (ret) {
1237 			/*
1238 			 * Just set default values to get things going.
1239 			 */
1240 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1241 			nfsva.na_vattr.va_type = VDIR;
1242 			nfsva.na_vattr.va_mode = 0777;
1243 			nfsva.na_vattr.va_nlink = 100;
1244 			nfsva.na_vattr.va_uid = (uid_t)0;
1245 			nfsva.na_vattr.va_gid = (gid_t)0;
1246 			nfsva.na_vattr.va_fileid = 2;
1247 			nfsva.na_vattr.va_gen = 1;
1248 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1249 			nfsva.na_vattr.va_size = 512 * 1024;
1250 		}
1251 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1252 		if (argp->flags & NFSMNT_NFSV3)
1253 			ncl_fsinfo(nmp, *vpp, cred, td);
1254 
1255 		/*
1256 		 * Lose the lock but keep the ref.
1257 		 */
1258 		VOP_UNLOCK(*vpp, 0);
1259 		return (0);
1260 	}
1261 	error = EIO;
1262 
1263 bad:
1264 	newnfs_disconnect(&nmp->nm_sockreq);
1265 	crfree(nmp->nm_sockreq.nr_cred);
1266 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1267 	mtx_destroy(&nmp->nm_mtx);
1268 	FREE(nmp, M_NEWNFSMNT);
1269 	FREE(nam, M_SONAME);
1270 	return (error);
1271 }
1272 
1273 /*
1274  * unmount system call
1275  */
1276 static int
1277 nfs_unmount(struct mount *mp, int mntflags)
1278 {
1279 	struct thread *td;
1280 	struct nfsmount *nmp;
1281 	int error, flags = 0, trycnt = 0;
1282 
1283 	td = curthread;
1284 
1285 	if (mntflags & MNT_FORCE)
1286 		flags |= FORCECLOSE;
1287 	nmp = VFSTONFS(mp);
1288 	/*
1289 	 * Goes something like this..
1290 	 * - Call vflush() to clear out vnodes for this filesystem
1291 	 * - Close the socket
1292 	 * - Free up the data structures
1293 	 */
1294 	/* In the forced case, cancel any outstanding requests. */
1295 	if (mntflags & MNT_FORCE) {
1296 		error = newnfs_nmcancelreqs(nmp);
1297 		if (error)
1298 			goto out;
1299 		/* For a forced close, get rid of the renew thread now */
1300 		nfscl_umount(nmp, td);
1301 	}
1302 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1303 	do {
1304 		error = vflush(mp, 1, flags, td);
1305 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1306 			(void) nfs_catnap(PSOCK, error, "newndm");
1307 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1308 	if (error)
1309 		goto out;
1310 
1311 	/*
1312 	 * We are now committed to the unmount.
1313 	 */
1314 	if ((mntflags & MNT_FORCE) == 0)
1315 		nfscl_umount(nmp, td);
1316 	newnfs_disconnect(&nmp->nm_sockreq);
1317 	crfree(nmp->nm_sockreq.nr_cred);
1318 	FREE(nmp->nm_nam, M_SONAME);
1319 
1320 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1321 	mtx_destroy(&nmp->nm_mtx);
1322 	FREE(nmp, M_NEWNFSMNT);
1323 out:
1324 	return (error);
1325 }
1326 
1327 /*
1328  * Return root of a filesystem
1329  */
1330 static int
1331 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1332 {
1333 	struct vnode *vp;
1334 	struct nfsmount *nmp;
1335 	struct nfsnode *np;
1336 	int error;
1337 
1338 	nmp = VFSTONFS(mp);
1339 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np);
1340 	if (error)
1341 		return error;
1342 	vp = NFSTOV(np);
1343 	/*
1344 	 * Get transfer parameters and attributes for root vnode once.
1345 	 */
1346 	mtx_lock(&nmp->nm_mtx);
1347 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1348 		mtx_unlock(&nmp->nm_mtx);
1349 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1350 	} else
1351 		mtx_unlock(&nmp->nm_mtx);
1352 	if (vp->v_type == VNON)
1353 	    vp->v_type = VDIR;
1354 	vp->v_vflag |= VV_ROOT;
1355 	*vpp = vp;
1356 	return (0);
1357 }
1358 
1359 /*
1360  * Flush out the buffer cache
1361  */
1362 /* ARGSUSED */
1363 static int
1364 nfs_sync(struct mount *mp, int waitfor)
1365 {
1366 	struct vnode *vp, *mvp;
1367 	struct thread *td;
1368 	int error, allerror = 0;
1369 
1370 	td = curthread;
1371 
1372 	/*
1373 	 * Force stale buffer cache information to be flushed.
1374 	 */
1375 	MNT_ILOCK(mp);
1376 loop:
1377 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1378 		VI_LOCK(vp);
1379 		MNT_IUNLOCK(mp);
1380 		/* XXX Racy bv_cnt check. */
1381 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1382 		    waitfor == MNT_LAZY) {
1383 			VI_UNLOCK(vp);
1384 			MNT_ILOCK(mp);
1385 			continue;
1386 		}
1387 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1388 			MNT_ILOCK(mp);
1389 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1390 			goto loop;
1391 		}
1392 		error = VOP_FSYNC(vp, waitfor, td);
1393 		if (error)
1394 			allerror = error;
1395 		VOP_UNLOCK(vp, 0);
1396 		vrele(vp);
1397 
1398 		MNT_ILOCK(mp);
1399 	}
1400 	MNT_IUNLOCK(mp);
1401 	return (allerror);
1402 }
1403 
1404 static int
1405 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1406 {
1407 	struct nfsmount *nmp = VFSTONFS(mp);
1408 	struct vfsquery vq;
1409 	int error;
1410 
1411 	bzero(&vq, sizeof(vq));
1412 	switch (op) {
1413 #if 0
1414 	case VFS_CTL_NOLOCKS:
1415 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1416  		if (req->oldptr != NULL) {
1417  			error = SYSCTL_OUT(req, &val, sizeof(val));
1418  			if (error)
1419  				return (error);
1420  		}
1421  		if (req->newptr != NULL) {
1422  			error = SYSCTL_IN(req, &val, sizeof(val));
1423  			if (error)
1424  				return (error);
1425 			if (val)
1426 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1427 			else
1428 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1429  		}
1430 		break;
1431 #endif
1432 	case VFS_CTL_QUERY:
1433 		mtx_lock(&nmp->nm_mtx);
1434 		if (nmp->nm_state & NFSSTA_TIMEO)
1435 			vq.vq_flags |= VQ_NOTRESP;
1436 		mtx_unlock(&nmp->nm_mtx);
1437 #if 0
1438 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1439 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1440 			vq.vq_flags |= VQ_NOTRESPLOCK;
1441 #endif
1442 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1443 		break;
1444  	case VFS_CTL_TIMEO:
1445  		if (req->oldptr != NULL) {
1446  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1447  			    sizeof(nmp->nm_tprintf_initial_delay));
1448  			if (error)
1449  				return (error);
1450  		}
1451  		if (req->newptr != NULL) {
1452 			error = vfs_suser(mp, req->td);
1453 			if (error)
1454 				return (error);
1455  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1456  			    sizeof(nmp->nm_tprintf_initial_delay));
1457  			if (error)
1458  				return (error);
1459  			if (nmp->nm_tprintf_initial_delay < 0)
1460  				nmp->nm_tprintf_initial_delay = 0;
1461  		}
1462 		break;
1463 	default:
1464 		return (ENOTSUP);
1465 	}
1466 	return (0);
1467 }
1468 
1469 /*
1470  * Extract the information needed by the nlm from the nfs vnode.
1471  */
1472 static void
1473 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1474     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1475     struct timeval *timeop)
1476 {
1477 	struct nfsmount *nmp;
1478 	struct nfsnode *np = VTONFS(vp);
1479 
1480 	nmp = VFSTONFS(vp->v_mount);
1481 	if (fhlenp != NULL)
1482 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1483 	if (fhp != NULL)
1484 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1485 	if (sp != NULL)
1486 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1487 	if (is_v3p != NULL)
1488 		*is_v3p = NFS_ISV3(vp);
1489 	if (sizep != NULL)
1490 		*sizep = np->n_size;
1491 	if (timeop != NULL) {
1492 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1493 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1494 	}
1495 }
1496 
1497