xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 8b9775912cbc7bb3c05c1fdfc3597dc4b68a9b9e)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 #include "opt_bootp.h"
39 #include "opt_nfsroot.h"
40 #include "opt_kern_tls.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <net/route/route_ctl.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 #include <rpc/rpcsec_tls.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 extern struct mtx nfsrv_dslock_mtx;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int, uint32_t, char *, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		vfs_cache_root,
140 	.vfs_cachedroot =	nfs_root,
141 	.vfs_statfs =		nfs_statfs,
142 	.vfs_sync =		nfs_sync,
143 	.vfs_uninit =		ncl_uninit,
144 	.vfs_unmount =		nfs_unmount,
145 	.vfs_sysctl =		nfs_sysctl,
146 	.vfs_purge =		nfs_purge,
147 };
148 /*
149  * This macro declares that the file system type is named "nfs".
150  * It also declares a module name of "nfs" and uses vfs_modevent()
151  * as the event handling function.
152  * The main module declaration is found in sys/fs/nfsclient/nfs_clport.c
153  * for "nfscl" and is needed so that a custom event handling
154  * function gets called.  MODULE_DEPEND() macros are found there.
155  */
156 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
157 
158 MODULE_VERSION(nfs, 1);
159 
160 /*
161  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
162  * can be shared by both NFS clients. It is declared here so that it
163  * will be defined for kernels built without NFS_ROOT, although it
164  * isn't used in that case.
165  */
166 #if !defined(NFS_ROOT)
167 struct nfs_diskless	nfs_diskless = { { { 0 } } };
168 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
169 int			nfs_diskless_valid = 0;
170 #endif
171 
172 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
173     &nfs_diskless_valid, 0,
174     "Has the diskless struct been filled correctly");
175 
176 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
177     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
178 
179 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
180     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
181     "%Ssockaddr_in", "Diskless root nfs address");
182 
183 void		newnfsargs_ntoh(struct nfs_args *);
184 static int	nfs_mountdiskless(char *,
185 		    struct sockaddr_in *, struct nfs_args *,
186 		    struct thread *, struct vnode **, struct mount *);
187 static void	nfs_convert_diskless(void);
188 static void	nfs_convert_oargs(struct nfs_args *args,
189 		    struct onfs_args *oargs);
190 
191 int
newnfs_iosize(struct nfsmount * nmp)192 newnfs_iosize(struct nfsmount *nmp)
193 {
194 	int iosize, maxio;
195 
196 	/* First, set the upper limit for iosize */
197 	if (nmp->nm_flag & NFSMNT_NFSV4) {
198 		maxio = NFS_MAXBSIZE;
199 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
200 		if (nmp->nm_sotype == SOCK_DGRAM)
201 			maxio = NFS_MAXDGRAMDATA;
202 		else
203 			maxio = NFS_MAXBSIZE;
204 	} else {
205 		maxio = NFS_V2MAXDATA;
206 	}
207 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
208 		nmp->nm_rsize = maxio;
209 	if (nmp->nm_rsize > NFS_MAXBSIZE)
210 		nmp->nm_rsize = NFS_MAXBSIZE;
211 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
212 		nmp->nm_readdirsize = maxio;
213 	if (nmp->nm_readdirsize > nmp->nm_rsize)
214 		nmp->nm_readdirsize = nmp->nm_rsize;
215 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
216 		nmp->nm_wsize = maxio;
217 	if (nmp->nm_wsize > NFS_MAXBSIZE)
218 		nmp->nm_wsize = NFS_MAXBSIZE;
219 
220 	/*
221 	 * Calculate the size used for io buffers.  Use the larger
222 	 * of the two sizes to minimise nfs requests but make sure
223 	 * that it is at least one VM page to avoid wasting buffer
224 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
225 	 * that is the buffer size used for directories.
226 	 */
227 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
228 	iosize = imax(iosize, PAGE_SIZE);
229 	iosize = imax(iosize, NFS_DIRBLKSIZ);
230 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
231 	return (iosize);
232 }
233 
234 static void
nfs_convert_oargs(struct nfs_args * args,struct onfs_args * oargs)235 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
236 {
237 
238 	args->version = NFS_ARGSVERSION;
239 	args->addr = oargs->addr;
240 	args->addrlen = oargs->addrlen;
241 	args->sotype = oargs->sotype;
242 	args->proto = oargs->proto;
243 	args->fh = oargs->fh;
244 	args->fhsize = oargs->fhsize;
245 	args->flags = oargs->flags;
246 	args->wsize = oargs->wsize;
247 	args->rsize = oargs->rsize;
248 	args->readdirsize = oargs->readdirsize;
249 	args->timeo = oargs->timeo;
250 	args->retrans = oargs->retrans;
251 	args->readahead = oargs->readahead;
252 	args->hostname = oargs->hostname;
253 }
254 
255 static void
nfs_convert_diskless(void)256 nfs_convert_diskless(void)
257 {
258 
259 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
260 		sizeof(struct ifaliasreq));
261 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
262 		sizeof(struct sockaddr_in));
263 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
264 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
265 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
266 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
267 	} else {
268 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
269 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
270 	}
271 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
272 		sizeof(struct sockaddr_in));
273 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
274 	nfsv3_diskless.root_time = nfs_diskless.root_time;
275 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
276 		MAXHOSTNAMELEN);
277 	nfs_diskless_valid = 3;
278 }
279 
280 /*
281  * nfs statfs call
282  */
283 static int
nfs_statfs(struct mount * mp,struct statfs * sbp)284 nfs_statfs(struct mount *mp, struct statfs *sbp)
285 {
286 	struct vnode *vp;
287 	struct thread *td;
288 	struct nfsmount *nmp = VFSTONFS(mp);
289 	struct nfsvattr nfsva;
290 	struct nfsfsinfo fs;
291 	struct nfsstatfs sb;
292 	int error = 0, attrflag, gotfsinfo = 0, ret;
293 	struct nfsnode *np;
294 	char *fakefh;
295 	uint32_t clone_blksize;
296 
297 	td = curthread;
298 	clone_blksize = 0;
299 
300 	error = vfs_busy(mp, MBF_NOWAIT);
301 	if (error)
302 		return (error);
303 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
304 		if (nmp->nm_fhsize == 0) {
305 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
306 			    td->td_ucred, td);
307 			if (error != 0) {
308 				/*
309 				 * We cannot do anything yet.  Hopefully what
310 				 * is in mnt_stat is sufficient.
311 				 */
312 				if (sbp != &mp->mnt_stat)
313 					*sbp = mp->mnt_stat;
314 				strncpy(&sbp->f_fstypename[0],
315 				    mp->mnt_vfc->vfc_name, MFSNAMELEN);
316 				vfs_unbusy(mp);
317 				return (0);
318 			}
319 		}
320 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
321 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, LK_EXCLUSIVE);
322 		free(fakefh, M_TEMP);
323 	} else {
324 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
325 		    LK_EXCLUSIVE);
326 	}
327 	if (error) {
328 		vfs_unbusy(mp);
329 		return (error);
330 	}
331 	vp = NFSTOV(np);
332 	mtx_lock(&nmp->nm_mtx);
333 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
334 		mtx_unlock(&nmp->nm_mtx);
335 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
336 		    &attrflag);
337 		if (!error)
338 			gotfsinfo = 1;
339 	} else
340 		mtx_unlock(&nmp->nm_mtx);
341 	if (!error)
342 		error = nfsrpc_statfs(vp, &sb, &fs, NULL, &clone_blksize,
343 		    td->td_ucred, td, &nfsva, &attrflag);
344 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
345 	    error == NFSERR_WRONGSEC) {
346 		/* Cannot get new stats, so return what is in mnt_stat. */
347 		if (sbp != &mp->mnt_stat)
348 			*sbp = mp->mnt_stat;
349 		strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name,
350 		    MFSNAMELEN);
351 		vput(vp);
352 		vfs_unbusy(mp);
353 		return (0);
354 	}
355 	if (error != 0)
356 		NFSCL_DEBUG(2, "statfs=%d\n", error);
357 	if (attrflag == 0) {
358 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
359 		    td->td_ucred, td, &nfsva, NULL, NULL);
360 		if (ret) {
361 			/*
362 			 * Just set default values to get things going.
363 			 */
364 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
365 			nfsva.na_vattr.va_type = VDIR;
366 			nfsva.na_vattr.va_mode = 0777;
367 			nfsva.na_vattr.va_nlink = 100;
368 			nfsva.na_vattr.va_uid = (uid_t)0;
369 			nfsva.na_vattr.va_gid = (gid_t)0;
370 			nfsva.na_vattr.va_fileid = 2;
371 			nfsva.na_vattr.va_gen = 1;
372 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
373 			nfsva.na_vattr.va_size = 512 * 1024;
374 		}
375 	}
376 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
377 	if (!error) {
378 	    mtx_lock(&nmp->nm_mtx);
379 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
380 		nfscl_loadfsinfo(nmp, &fs, clone_blksize);
381 	    nfscl_loadsbinfo(nmp, &sb, sbp);
382 	    sbp->f_iosize = newnfs_iosize(nmp);
383 	    mtx_unlock(&nmp->nm_mtx);
384 	    if (sbp != &mp->mnt_stat) {
385 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
386 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
387 	    }
388 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
389 	} else if (NFS_ISV4(vp)) {
390 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
391 	}
392 	vput(vp);
393 	vfs_unbusy(mp);
394 	return (error);
395 }
396 
397 /*
398  * nfs version 3 fsinfo rpc call
399  */
400 int
ncl_fsinfo(struct nfsmount * nmp,struct vnode * vp,struct ucred * cred,struct thread * td)401 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
402     struct thread *td)
403 {
404 	struct nfsfsinfo fs;
405 	struct nfsvattr nfsva;
406 	int error, attrflag;
407 
408 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag);
409 	if (!error) {
410 		if (attrflag)
411 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
412 		mtx_lock(&nmp->nm_mtx);
413 		nfscl_loadfsinfo(nmp, &fs, 0);
414 		mtx_unlock(&nmp->nm_mtx);
415 	}
416 	return (error);
417 }
418 
419 /*
420  * Mount a remote root fs via nfs. This depends on the info in the
421  * nfs_diskless structure that has been filled in properly by some primary
422  * bootstrap.
423  * It goes something like this:
424  * - do enough of "ifconfig" by calling ifioctl() so that the system
425  *   can talk to the server
426  * - If nfs_diskless.mygateway is filled in, use that address as
427  *   a default gateway.
428  * - build the rootfs mount point and call mountnfs() to do the rest.
429  *
430  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
431  * structure, as well as other global NFS client variables here, as
432  * nfs_mountroot() will be called once in the boot before any other NFS
433  * client activity occurs.
434  */
435 static int
nfs_mountroot(struct mount * mp)436 nfs_mountroot(struct mount *mp)
437 {
438 	struct thread *td = curthread;
439 	struct nfsv3_diskless *nd = &nfsv3_diskless;
440 	struct socket *so;
441 	struct vnode *vp;
442 	struct ifreq ir;
443 	int error;
444 	u_long l;
445 	char buf[128];
446 	char *cp;
447 
448 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
449 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
450 #elif defined(NFS_ROOT)
451 	nfs_setup_diskless();
452 #endif
453 
454 	if (nfs_diskless_valid == 0)
455 		return (-1);
456 	if (nfs_diskless_valid == 1)
457 		nfs_convert_diskless();
458 
459 	/*
460 	 * Do enough of ifconfig(8) so that the critical net interface can
461 	 * talk to the server.
462 	 */
463 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
464 	    td->td_ucred, td);
465 	if (error)
466 		panic("nfs_mountroot: socreate(%04x): %d",
467 			nd->myif.ifra_addr.sa_family, error);
468 
469 #if 0 /* XXX Bad idea */
470 	/*
471 	 * We might not have been told the right interface, so we pass
472 	 * over the first ten interfaces of the same kind, until we get
473 	 * one of them configured.
474 	 */
475 
476 	for (i = strlen(nd->myif.ifra_name) - 1;
477 		nd->myif.ifra_name[i] >= '0' &&
478 		nd->myif.ifra_name[i] <= '9';
479 		nd->myif.ifra_name[i] ++) {
480 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
481 		if(!error)
482 			break;
483 	}
484 #endif
485 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
486 	if (error)
487 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
488 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
489 		ir.ifr_mtu = strtol(cp, NULL, 10);
490 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
491 		freeenv(cp);
492 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
493 		if (error)
494 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
495 	}
496 	soclose(so);
497 
498 	/*
499 	 * If the gateway field is filled in, set it as the default route.
500 	 * Note that pxeboot will set a default route of 0 if the route
501 	 * is not set by the DHCP server.  Check also for a value of 0
502 	 * to avoid panicking inappropriately in that situation.
503 	 */
504 	if (nd->mygateway.sin_len != 0 &&
505 	    nd->mygateway.sin_addr.s_addr != 0) {
506 		struct sockaddr_in mask, sin;
507 		struct epoch_tracker et;
508 		struct rt_addrinfo info;
509 		struct rib_cmd_info rc;
510 
511 		bzero((caddr_t)&mask, sizeof(mask));
512 		sin = mask;
513 		sin.sin_family = AF_INET;
514 		sin.sin_len = sizeof(sin);
515                 /* XXX MRT use table 0 for this sort of thing */
516 		NET_EPOCH_ENTER(et);
517 		CURVNET_SET(TD_TO_VNET(td));
518 
519 		bzero((caddr_t)&info, sizeof(info));
520 		info.rti_flags = RTF_UP | RTF_GATEWAY;
521 		info.rti_info[RTAX_DST] = (struct sockaddr *)&sin;
522 		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&nd->mygateway;
523 		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&mask;
524 
525 		error = rib_action(RT_DEFAULT_FIB, RTM_ADD, &info, &rc);
526 		CURVNET_RESTORE();
527 		NET_EPOCH_EXIT(et);
528 		if (error)
529 			panic("nfs_mountroot: RTM_ADD: %d", error);
530 	}
531 
532 	/*
533 	 * Create the rootfs mount point.
534 	 */
535 	nd->root_args.fh = nd->root_fh;
536 	nd->root_args.fhsize = nd->root_fhsize;
537 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
538 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
539 		(l >> 24) & 0xff, (l >> 16) & 0xff,
540 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
541 	printf("NFS ROOT: %s\n", buf);
542 	nd->root_args.hostname = buf;
543 	if ((error = nfs_mountdiskless(buf,
544 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
545 		return (error);
546 	}
547 
548 	/*
549 	 * This is not really an nfs issue, but it is much easier to
550 	 * set hostname here and then let the "/etc/rc.xxx" files
551 	 * mount the right /var based upon its preset value.
552 	 */
553 	mtx_lock(&prison0.pr_mtx);
554 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
555 	    sizeof(prison0.pr_hostname));
556 	mtx_unlock(&prison0.pr_mtx);
557 	inittodr(ntohl(nd->root_time));
558 	return (0);
559 }
560 
561 /*
562  * Internal version of mount system call for diskless setup.
563  */
564 static int
nfs_mountdiskless(char * path,struct sockaddr_in * sin,struct nfs_args * args,struct thread * td,struct vnode ** vpp,struct mount * mp)565 nfs_mountdiskless(char *path,
566     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
567     struct vnode **vpp, struct mount *mp)
568 {
569 	struct sockaddr *nam;
570 	int dirlen, error, minvers;
571 	char *dirpath;
572 
573 	/*
574 	 * Find the directory path in "path", which also has the server's
575 	 * name/ip address in it.
576 	 */
577 	dirpath = strchr(path, ':');
578 	if (dirpath != NULL)
579 		dirlen = strlen(++dirpath);
580 	else
581 		dirlen = 0;
582 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
583 	minvers = 0;
584 	if ((args->flags & NFSMNT_NFSV4) != 0)
585 		minvers = -1;
586 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
587 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
588 	    NFS_DEFAULT_NEGNAMETIMEO, minvers, 0, NULL, 0)) != 0) {
589 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
590 		return (error);
591 	}
592 	return (0);
593 }
594 
595 static void
nfs_sec_name(char * sec,int * flagsp)596 nfs_sec_name(char *sec, int *flagsp)
597 {
598 	if (!strcmp(sec, "krb5"))
599 		*flagsp |= NFSMNT_KERB;
600 	else if (!strcmp(sec, "krb5i"))
601 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
602 	else if (!strcmp(sec, "krb5p"))
603 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
604 }
605 
606 static void
nfs_decode_args(struct mount * mp,struct nfsmount * nmp,struct nfs_args * argp,const char * hostname,struct ucred * cred,struct thread * td)607 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
608     const char *hostname, struct ucred *cred, struct thread *td)
609 {
610 	int adjsock;
611 	char *p;
612 
613 	/*
614 	 * Set read-only flag if requested; otherwise, clear it if this is
615 	 * an update.  If this is not an update, then either the read-only
616 	 * flag is already clear, or this is a root mount and it was set
617 	 * intentionally at some previous point.
618 	 */
619 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
620 		MNT_ILOCK(mp);
621 		mp->mnt_flag |= MNT_RDONLY;
622 		MNT_IUNLOCK(mp);
623 	} else if (mp->mnt_flag & MNT_UPDATE) {
624 		MNT_ILOCK(mp);
625 		mp->mnt_flag &= ~MNT_RDONLY;
626 		MNT_IUNLOCK(mp);
627 	}
628 
629 	/*
630 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
631 	 * no sense in that context.  Also, set up appropriate retransmit
632 	 * and soft timeout behavior.
633 	 */
634 	if (argp->sotype == SOCK_STREAM) {
635 		nmp->nm_flag &= ~NFSMNT_NOCONN;
636 		nmp->nm_timeo = NFS_MAXTIMEO;
637 		if ((argp->flags & NFSMNT_NFSV4) != 0)
638 			nmp->nm_retry = INT_MAX;
639 		else
640 			nmp->nm_retry = NFS_RETRANS_TCP;
641 	}
642 
643 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
644 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
645 		argp->flags &= ~NFSMNT_RDIRPLUS;
646 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
647 	}
648 
649 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
650 	if (nmp->nm_minorvers == 0) {
651 		argp->flags &= ~NFSMNT_ONEOPENOWN;
652 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
653 	}
654 
655 	/* Re-bind if rsrvd port requested and wasn't on one */
656 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
657 		  && (argp->flags & NFSMNT_RESVPORT);
658 	/* Also re-bind if we're switching to/from a connected UDP socket */
659 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
660 		    (argp->flags & NFSMNT_NOCONN));
661 
662 	/* Update flags atomically.  Don't change the lock bits. */
663 	nmp->nm_flag = argp->flags | nmp->nm_flag;
664 
665 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
666 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
667 		if (nmp->nm_timeo < NFS_MINTIMEO)
668 			nmp->nm_timeo = NFS_MINTIMEO;
669 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
670 			nmp->nm_timeo = NFS_MAXTIMEO;
671 	}
672 
673 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
674 		nmp->nm_retry = argp->retrans;
675 		if (nmp->nm_retry > NFS_MAXREXMIT)
676 			nmp->nm_retry = NFS_MAXREXMIT;
677 	}
678 
679 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
680 		nmp->nm_wsize = argp->wsize;
681 		/*
682 		 * Clip at the power of 2 below the size. There is an
683 		 * issue (not isolated) that causes intermittent page
684 		 * faults if this is not done.
685 		 */
686 		if (nmp->nm_wsize > NFS_FABLKSIZE)
687 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
688 		else
689 			nmp->nm_wsize = NFS_FABLKSIZE;
690 	}
691 
692 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
693 		nmp->nm_rsize = argp->rsize;
694 		/*
695 		 * Clip at the power of 2 below the size. There is an
696 		 * issue (not isolated) that causes intermittent page
697 		 * faults if this is not done.
698 		 */
699 		if (nmp->nm_rsize > NFS_FABLKSIZE)
700 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
701 		else
702 			nmp->nm_rsize = NFS_FABLKSIZE;
703 	}
704 
705 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
706 		nmp->nm_readdirsize = argp->readdirsize;
707 	}
708 
709 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
710 		nmp->nm_acregmin = argp->acregmin;
711 	else
712 		nmp->nm_acregmin = NFS_MINATTRTIMO;
713 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
714 		nmp->nm_acregmax = argp->acregmax;
715 	else
716 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
717 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
718 		nmp->nm_acdirmin = argp->acdirmin;
719 	else
720 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
721 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
722 		nmp->nm_acdirmax = argp->acdirmax;
723 	else
724 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
725 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
726 		nmp->nm_acdirmin = nmp->nm_acdirmax;
727 	if (nmp->nm_acregmin > nmp->nm_acregmax)
728 		nmp->nm_acregmin = nmp->nm_acregmax;
729 
730 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
731 		if (argp->readahead <= NFS_MAXRAHEAD)
732 			nmp->nm_readahead = argp->readahead;
733 		else
734 			nmp->nm_readahead = NFS_MAXRAHEAD;
735 	}
736 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
737 		if (argp->wcommitsize < nmp->nm_wsize)
738 			nmp->nm_wcommitsize = nmp->nm_wsize;
739 		else
740 			nmp->nm_wcommitsize = argp->wcommitsize;
741 	}
742 
743 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
744 		    (nmp->nm_soproto != argp->proto));
745 
746 	if (nmp->nm_client != NULL && adjsock) {
747 		int haslock = 0, error = 0;
748 
749 		if (nmp->nm_sotype == SOCK_STREAM) {
750 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
751 			if (!error)
752 				haslock = 1;
753 		}
754 		if (!error) {
755 		    newnfs_disconnect(nmp, &nmp->nm_sockreq);
756 		    if (haslock)
757 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
758 		    nmp->nm_sotype = argp->sotype;
759 		    nmp->nm_soproto = argp->proto;
760 		    if (nmp->nm_sotype == SOCK_DGRAM)
761 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
762 			    cred, td, 0, false, &nmp->nm_sockreq.nr_client)) {
763 				printf("newnfs_args: retrying connect\n");
764 				(void) nfs_catnap(PSOCK, 0, "nfscon");
765 			}
766 		}
767 	} else {
768 		nmp->nm_sotype = argp->sotype;
769 		nmp->nm_soproto = argp->proto;
770 	}
771 
772 	if (hostname != NULL) {
773 		strlcpy(nmp->nm_hostname, hostname,
774 		    sizeof(nmp->nm_hostname));
775 		p = strchr(nmp->nm_hostname, ':');
776 		if (p != NULL)
777 			*p = '\0';
778 	}
779 }
780 
781 static const char *nfs_opts[] = { "from", "nfs_args",
782     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
783     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
784     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
785     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
786     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
787     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
788     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
789     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
790     "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", "nconnect",
791     "syskrb5", NULL };
792 
793 /*
794  * Parse the "from" mountarg, passed by the generic mount(8) program
795  * or the mountroot code.  This is used when rerooting into NFS.
796  *
797  * Note that the "hostname" is actually a "hostname:/share/path" string.
798  */
799 static int
nfs_mount_parse_from(struct vfsoptlist * opts,char ** hostnamep,struct sockaddr_in ** sinp,char * dirpath,size_t dirpathsize,int * dirlenp)800 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
801     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
802 {
803 	char *nam, *delimp, *hostp, *spec;
804 	int error, have_bracket = 0, offset, rv, speclen;
805 	struct sockaddr_in *sin;
806 	size_t len;
807 
808 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
809 	if (error != 0)
810 		return (error);
811 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
812 
813 	/*
814 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
815 	 */
816 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
817 	    *(delimp + 1) == ':') {
818 		hostp = spec + 1;
819 		spec = delimp + 2;
820 		have_bracket = 1;
821 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
822 		hostp = spec;
823 		spec = delimp + 1;
824 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
825 		printf("%s: path@server syntax is deprecated, "
826 		    "use server:path\n", __func__);
827 		hostp = delimp + 1;
828 	} else {
829 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
830 		free(nam, M_TEMP);
831 		return (EINVAL);
832 	}
833 	*delimp = '\0';
834 
835 	/*
836 	 * If there has been a trailing slash at mounttime it seems
837 	 * that some mountd implementations fail to remove the mount
838 	 * entries from their mountlist while unmounting.
839 	 */
840 	for (speclen = strlen(spec);
841 	    speclen > 1 && spec[speclen - 1] == '/';
842 	    speclen--)
843 		spec[speclen - 1] = '\0';
844 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
845 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
846 		free(nam, M_TEMP);
847 		return (EINVAL);
848 	}
849 	/* Make both '@' and ':' notations equal */
850 	if (*hostp != '\0') {
851 		len = strlen(hostp);
852 		offset = 0;
853 		if (have_bracket)
854 			nam[offset++] = '[';
855 		memmove(nam + offset, hostp, len);
856 		if (have_bracket)
857 			nam[len + offset++] = ']';
858 		nam[len + offset++] = ':';
859 		memmove(nam + len + offset, spec, speclen);
860 		nam[len + speclen + offset] = '\0';
861 	} else
862 		nam[0] = '\0';
863 
864 	/*
865 	 * XXX: IPv6
866 	 */
867 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
868 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
869 	if (rv != 1) {
870 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
871 		    __func__, hostp, rv);
872 		free(nam, M_TEMP);
873 		free(sin, M_SONAME);
874 		return (EINVAL);
875 	}
876 
877 	sin->sin_len = sizeof(*sin);
878 	sin->sin_family = AF_INET;
879 	/*
880 	 * XXX: hardcoded port number.
881 	 */
882 	sin->sin_port = htons(2049);
883 
884 	*hostnamep = strdup(nam, M_NEWNFSMNT);
885 	*sinp = sin;
886 	strlcpy(dirpath, spec, dirpathsize);
887 	*dirlenp = strlen(dirpath);
888 
889 	free(nam, M_TEMP);
890 	return (0);
891 }
892 
893 /*
894  * VFS Operations.
895  *
896  * mount system call
897  * It seems a bit dumb to copyinstr() the host and path here and then
898  * bcopy() them in mountnfs(), but I wanted to detect errors before
899  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
900  * an error after that means that I have to release the mbuf.
901  */
902 /* ARGSUSED */
903 static int
nfs_mount(struct mount * mp)904 nfs_mount(struct mount *mp)
905 {
906 	struct nfs_args args = {
907 	    .version = NFS_ARGSVERSION,
908 	    .addr = NULL,
909 	    .addrlen = sizeof (struct sockaddr_in),
910 	    .sotype = SOCK_STREAM,
911 	    .proto = 0,
912 	    .fh = NULL,
913 	    .fhsize = 0,
914 	    .flags = NFSMNT_RESVPORT,
915 	    .wsize = NFS_WSIZE,
916 	    .rsize = NFS_RSIZE,
917 	    .readdirsize = NFS_READDIRSIZE,
918 	    .timeo = 10,
919 	    .retrans = NFS_RETRANS,
920 	    .readahead = NFS_DEFRAHEAD,
921 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
922 	    .hostname = NULL,
923 	    .acregmin = NFS_MINATTRTIMO,
924 	    .acregmax = NFS_MAXATTRTIMO,
925 	    .acdirmin = NFS_MINDIRATTRTIMO,
926 	    .acdirmax = NFS_MAXDIRATTRTIMO,
927 	};
928 	int error = 0, ret, len;
929 	struct sockaddr *nam = NULL;
930 	struct vnode *vp;
931 	struct thread *td;
932 	char *hst;
933 	u_char nfh[NFSX_FHMAX], krbname[100], *dirpath, srvkrbname[100];
934 	char *cp, *opt, *name, *secname, *tlscertname;
935 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
936 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
937 	int minvers = -1;
938 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
939 	    krbnamelen, srvkrbnamelen;
940 	size_t hstlen;
941 	uint32_t newflag;
942 	int aconn = 0;
943 
944 	has_nfs_args_opt = 0;
945 	has_nfs_from_opt = 0;
946 	newflag = 0;
947 	tlscertname = NULL;
948 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
949 	dirpath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
950 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
951 		error = EINVAL;
952 		goto out;
953 	}
954 
955 	td = curthread;
956 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
957 	    nfs_diskless_valid != 0) {
958 		error = nfs_mountroot(mp);
959 		goto out;
960 	}
961 
962 	nfscl_init();
963 
964 	/*
965 	 * The old mount_nfs program passed the struct nfs_args
966 	 * from userspace to kernel.  The new mount_nfs program
967 	 * passes string options via nmount() from userspace to kernel
968 	 * and we populate the struct nfs_args in the kernel.
969 	 */
970 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
971 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
972 		    sizeof(args));
973 		if (error != 0)
974 			goto out;
975 
976 		if (args.version != NFS_ARGSVERSION) {
977 			error = EPROGMISMATCH;
978 			goto out;
979 		}
980 		has_nfs_args_opt = 1;
981 	}
982 
983 	/* Handle the new style options. */
984 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
985 		args.acdirmin = args.acdirmax =
986 		    args.acregmin = args.acregmax = 0;
987 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
988 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
989 	}
990 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
991 		args.flags |= NFSMNT_NOCONN;
992 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
993 		args.flags &= ~NFSMNT_NOCONN;
994 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
995 		args.flags |= NFSMNT_NOLOCKD;
996 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
997 		args.flags &= ~NFSMNT_NOLOCKD;
998 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
999 		args.flags |= NFSMNT_INT;
1000 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
1001 		args.flags |= NFSMNT_RDIRPLUS;
1002 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
1003 		args.flags |= NFSMNT_RESVPORT;
1004 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
1005 		args.flags &= ~NFSMNT_RESVPORT;
1006 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
1007 		args.flags |= NFSMNT_SOFT;
1008 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
1009 		args.flags &= ~NFSMNT_SOFT;
1010 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
1011 		args.sotype = SOCK_DGRAM;
1012 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
1013 		args.sotype = SOCK_DGRAM;
1014 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
1015 		args.sotype = SOCK_STREAM;
1016 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
1017 		args.flags |= NFSMNT_NFSV3;
1018 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
1019 		args.flags |= NFSMNT_NFSV4;
1020 		args.sotype = SOCK_STREAM;
1021 	}
1022 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
1023 		args.flags |= NFSMNT_ALLGSSNAME;
1024 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
1025 		args.flags |= NFSMNT_NOCTO;
1026 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
1027 		args.flags |= NFSMNT_NONCONTIGWR;
1028 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
1029 		args.flags |= NFSMNT_PNFS;
1030 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
1031 		args.flags |= NFSMNT_ONEOPENOWN;
1032 	if (vfs_getopt(mp->mnt_optnew, "tls", NULL, NULL) == 0)
1033 		newflag |= NFSMNT_TLS;
1034 	if (vfs_getopt(mp->mnt_optnew, "tlscertname", (void **)&opt, &len) ==
1035 	    0) {
1036 		/*
1037 		 * tlscertname with "key.pem" appended to it forms a file
1038 		 * name.  As such, the maximum allowable strlen(tlscertname) is
1039 		 * NAME_MAX - 7. However, "len" includes the nul termination
1040 		 * byte so it can be up to NAME_MAX - 6.
1041 		 */
1042 		if (opt == NULL || len <= 1 || len > NAME_MAX - 6) {
1043 			vfs_mount_error(mp, "invalid tlscertname");
1044 			error = EINVAL;
1045 			goto out;
1046 		}
1047 		tlscertname = malloc(len, M_NEWNFSMNT, M_WAITOK);
1048 		strlcpy(tlscertname, opt, len);
1049 	}
1050 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
1051 		if (opt == NULL) {
1052 			vfs_mount_error(mp, "illegal readdirsize");
1053 			error = EINVAL;
1054 			goto out;
1055 		}
1056 		ret = sscanf(opt, "%d", &args.readdirsize);
1057 		if (ret != 1 || args.readdirsize <= 0) {
1058 			vfs_mount_error(mp, "illegal readdirsize: %s",
1059 			    opt);
1060 			error = EINVAL;
1061 			goto out;
1062 		}
1063 		args.flags |= NFSMNT_READDIRSIZE;
1064 	}
1065 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
1066 		if (opt == NULL) {
1067 			vfs_mount_error(mp, "illegal readahead");
1068 			error = EINVAL;
1069 			goto out;
1070 		}
1071 		ret = sscanf(opt, "%d", &args.readahead);
1072 		if (ret != 1 || args.readahead <= 0) {
1073 			vfs_mount_error(mp, "illegal readahead: %s",
1074 			    opt);
1075 			error = EINVAL;
1076 			goto out;
1077 		}
1078 		args.flags |= NFSMNT_READAHEAD;
1079 	}
1080 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1081 		if (opt == NULL) {
1082 			vfs_mount_error(mp, "illegal wsize");
1083 			error = EINVAL;
1084 			goto out;
1085 		}
1086 		ret = sscanf(opt, "%d", &args.wsize);
1087 		if (ret != 1 || args.wsize <= 0) {
1088 			vfs_mount_error(mp, "illegal wsize: %s",
1089 			    opt);
1090 			error = EINVAL;
1091 			goto out;
1092 		}
1093 		args.flags |= NFSMNT_WSIZE;
1094 	}
1095 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1096 		if (opt == NULL) {
1097 			vfs_mount_error(mp, "illegal rsize");
1098 			error = EINVAL;
1099 			goto out;
1100 		}
1101 		ret = sscanf(opt, "%d", &args.rsize);
1102 		if (ret != 1 || args.rsize <= 0) {
1103 			vfs_mount_error(mp, "illegal wsize: %s",
1104 			    opt);
1105 			error = EINVAL;
1106 			goto out;
1107 		}
1108 		args.flags |= NFSMNT_RSIZE;
1109 	}
1110 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1111 		if (opt == NULL) {
1112 			vfs_mount_error(mp, "illegal retrans");
1113 			error = EINVAL;
1114 			goto out;
1115 		}
1116 		ret = sscanf(opt, "%d", &args.retrans);
1117 		if (ret != 1 || args.retrans <= 0) {
1118 			vfs_mount_error(mp, "illegal retrans: %s",
1119 			    opt);
1120 			error = EINVAL;
1121 			goto out;
1122 		}
1123 		args.flags |= NFSMNT_RETRANS;
1124 	}
1125 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1126 		ret = sscanf(opt, "%d", &args.acregmin);
1127 		if (ret != 1 || args.acregmin < 0) {
1128 			vfs_mount_error(mp, "illegal actimeo: %s",
1129 			    opt);
1130 			error = EINVAL;
1131 			goto out;
1132 		}
1133 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1134 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1135 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1136 	}
1137 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1138 		ret = sscanf(opt, "%d", &args.acregmin);
1139 		if (ret != 1 || args.acregmin < 0) {
1140 			vfs_mount_error(mp, "illegal acregmin: %s",
1141 			    opt);
1142 			error = EINVAL;
1143 			goto out;
1144 		}
1145 		args.flags |= NFSMNT_ACREGMIN;
1146 	}
1147 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1148 		ret = sscanf(opt, "%d", &args.acregmax);
1149 		if (ret != 1 || args.acregmax < 0) {
1150 			vfs_mount_error(mp, "illegal acregmax: %s",
1151 			    opt);
1152 			error = EINVAL;
1153 			goto out;
1154 		}
1155 		args.flags |= NFSMNT_ACREGMAX;
1156 	}
1157 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1158 		ret = sscanf(opt, "%d", &args.acdirmin);
1159 		if (ret != 1 || args.acdirmin < 0) {
1160 			vfs_mount_error(mp, "illegal acdirmin: %s",
1161 			    opt);
1162 			error = EINVAL;
1163 			goto out;
1164 		}
1165 		args.flags |= NFSMNT_ACDIRMIN;
1166 	}
1167 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1168 		ret = sscanf(opt, "%d", &args.acdirmax);
1169 		if (ret != 1 || args.acdirmax < 0) {
1170 			vfs_mount_error(mp, "illegal acdirmax: %s",
1171 			    opt);
1172 			error = EINVAL;
1173 			goto out;
1174 		}
1175 		args.flags |= NFSMNT_ACDIRMAX;
1176 	}
1177 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1178 		ret = sscanf(opt, "%d", &args.wcommitsize);
1179 		if (ret != 1 || args.wcommitsize < 0) {
1180 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1181 			error = EINVAL;
1182 			goto out;
1183 		}
1184 		args.flags |= NFSMNT_WCOMMITSIZE;
1185 	}
1186 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1187 		ret = sscanf(opt, "%d", &args.timeo);
1188 		if (ret != 1 || args.timeo <= 0) {
1189 			vfs_mount_error(mp, "illegal timeo: %s",
1190 			    opt);
1191 			error = EINVAL;
1192 			goto out;
1193 		}
1194 		args.flags |= NFSMNT_TIMEO;
1195 	}
1196 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1197 		ret = sscanf(opt, "%d", &args.timeo);
1198 		if (ret != 1 || args.timeo <= 0) {
1199 			vfs_mount_error(mp, "illegal timeout: %s",
1200 			    opt);
1201 			error = EINVAL;
1202 			goto out;
1203 		}
1204 		args.flags |= NFSMNT_TIMEO;
1205 	}
1206 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1207 		ret = sscanf(opt, "%d", &nametimeo);
1208 		if (ret != 1 || nametimeo < 0) {
1209 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1210 			error = EINVAL;
1211 			goto out;
1212 		}
1213 	}
1214 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1215 	    == 0) {
1216 		ret = sscanf(opt, "%d", &negnametimeo);
1217 		if (ret != 1 || negnametimeo < 0) {
1218 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1219 			    opt);
1220 			error = EINVAL;
1221 			goto out;
1222 		}
1223 	}
1224 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1225 	    0) {
1226 		ret = sscanf(opt, "%d", &minvers);
1227 		if (ret != 1 || minvers < 0 || minvers > 2 ||
1228 		    (args.flags & NFSMNT_NFSV4) == 0) {
1229 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1230 			error = EINVAL;
1231 			goto out;
1232 		}
1233 	}
1234 	if (vfs_getopt(mp->mnt_optnew, "nconnect", (void **)&opt, NULL) ==
1235 	    0) {
1236 		ret = sscanf(opt, "%d", &aconn);
1237 		if (ret != 1 || aconn < 1 || aconn > NFS_MAXNCONN) {
1238 			vfs_mount_error(mp, "illegal nconnect: %s", opt);
1239 			error = EINVAL;
1240 			goto out;
1241 		}
1242 		/*
1243 		 * Setting nconnect=1 is a no-op, allowed so that
1244 		 * the option can be used in a Linux compatible way.
1245 		 */
1246 		aconn--;
1247 	}
1248 	if (vfs_getopt(mp->mnt_optnew, "syskrb5", NULL, NULL) == 0)
1249 		newflag |= NFSMNT_SYSKRB5;
1250 	if (vfs_getopt(mp->mnt_optnew, "sec",
1251 		(void **) &secname, NULL) == 0)
1252 		nfs_sec_name(secname, &args.flags);
1253 
1254 	if (mp->mnt_flag & MNT_UPDATE) {
1255 		struct nfsmount *nmp = VFSTONFS(mp);
1256 
1257 		if (nmp == NULL) {
1258 			error = EIO;
1259 			goto out;
1260 		}
1261 
1262 		/*
1263 		 * If a change from TCP->UDP is done and there are thread(s)
1264 		 * that have I/O RPC(s) in progress with a transfer size
1265 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1266 		 * hung, retrying the RPC(s) forever. Usually these threads
1267 		 * will be seen doing an uninterruptible sleep on wait channel
1268 		 * "nfsreq".
1269 		 */
1270 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1271 			tprintf(td->td_proc, LOG_WARNING,
1272 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1273 
1274 		/*
1275 		 * When doing an update, we can't change version,
1276 		 * security, switch lockd strategies, change cookie
1277 		 * translation or switch oneopenown.
1278 		 */
1279 		args.flags = (args.flags &
1280 		    ~(NFSMNT_NFSV3 |
1281 		      NFSMNT_NFSV4 |
1282 		      NFSMNT_KERB |
1283 		      NFSMNT_INTEGRITY |
1284 		      NFSMNT_PRIVACY |
1285 		      NFSMNT_ONEOPENOWN |
1286 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1287 		    (nmp->nm_flag &
1288 			(NFSMNT_NFSV3 |
1289 			 NFSMNT_NFSV4 |
1290 			 NFSMNT_KERB |
1291 			 NFSMNT_INTEGRITY |
1292 			 NFSMNT_PRIVACY |
1293 			 NFSMNT_ONEOPENOWN |
1294 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1295 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1296 		goto out;
1297 	}
1298 
1299 	/*
1300 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1301 	 * or no-connection mode for those protocols that support
1302 	 * no-connection mode (the flag will be cleared later for protocols
1303 	 * that do not support no-connection mode).  This will allow a client
1304 	 * to receive replies from a different IP then the request was
1305 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1306 	 * not 0.
1307 	 */
1308 	if (nfs_ip_paranoia == 0)
1309 		args.flags |= NFSMNT_NOCONN;
1310 
1311 	if (has_nfs_args_opt != 0) {
1312 		/*
1313 		 * In the 'nfs_args' case, the pointers in the args
1314 		 * structure are in userland - we copy them in here.
1315 		 */
1316 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1317 			vfs_mount_error(mp, "Bad file handle");
1318 			error = EINVAL;
1319 			goto out;
1320 		}
1321 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1322 		    args.fhsize);
1323 		if (error != 0)
1324 			goto out;
1325 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1326 		if (error != 0)
1327 			goto out;
1328 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1329 		args.hostname = hst;
1330 		/* getsockaddr() call must be after above copyin() calls */
1331 		error = getsockaddr(&nam, args.addr, args.addrlen);
1332 		if (error != 0)
1333 			goto out;
1334 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1335 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1336 	    MNAMELEN, &dirlen) == 0) {
1337 		has_nfs_from_opt = 1;
1338 		bcopy(args.hostname, hst, MNAMELEN);
1339 		hst[MNAMELEN - 1] = '\0';
1340 
1341 		/*
1342 		 * This only works with NFSv4 for now.
1343 		 */
1344 		args.fhsize = 0;
1345 		args.flags |= NFSMNT_NFSV4;
1346 		args.sotype = SOCK_STREAM;
1347 	} else {
1348 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1349 		    &args.fhsize) == 0) {
1350 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1351 				vfs_mount_error(mp, "Bad file handle");
1352 				error = EINVAL;
1353 				goto out;
1354 			}
1355 			bcopy(args.fh, nfh, args.fhsize);
1356 		} else {
1357 			args.fhsize = 0;
1358 		}
1359 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1360 		    (void **)&args.hostname, &len);
1361 		if (args.hostname == NULL) {
1362 			vfs_mount_error(mp, "Invalid hostname");
1363 			error = EINVAL;
1364 			goto out;
1365 		}
1366 		if (len >= MNAMELEN) {
1367 			vfs_mount_error(mp, "Hostname too long");
1368 			error = EINVAL;
1369 			goto out;
1370 		}
1371 		bcopy(args.hostname, hst, len);
1372 		hst[len] = '\0';
1373 	}
1374 
1375 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1376 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1377 	else {
1378 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1379 		cp = strchr(srvkrbname, ':');
1380 		if (cp != NULL)
1381 			*cp = '\0';
1382 	}
1383 	srvkrbnamelen = strlen(srvkrbname);
1384 
1385 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1386 		strlcpy(krbname, name, sizeof (krbname));
1387 	else
1388 		krbname[0] = '\0';
1389 	krbnamelen = strlen(krbname);
1390 
1391 	if (has_nfs_from_opt == 0) {
1392 		if (vfs_getopt(mp->mnt_optnew,
1393 		    "dirpath", (void **)&name, NULL) == 0)
1394 			strlcpy(dirpath, name, MNAMELEN);
1395 		else
1396 			dirpath[0] = '\0';
1397 		dirlen = strlen(dirpath);
1398 	}
1399 
1400 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1401 		if (vfs_getopt(mp->mnt_optnew, "addr",
1402 		    (void **)&args.addr, &args.addrlen) == 0) {
1403 			if (args.addrlen > SOCK_MAXADDRLEN) {
1404 				error = ENAMETOOLONG;
1405 				goto out;
1406 			}
1407 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1408 			bcopy(args.addr, nam, args.addrlen);
1409 			nam->sa_len = args.addrlen;
1410 		} else {
1411 			vfs_mount_error(mp, "No server address");
1412 			error = EINVAL;
1413 			goto out;
1414 		}
1415 	}
1416 
1417 	if (aconn > 0 && (args.sotype != SOCK_STREAM ||
1418 	    (args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1419 		/*
1420 		 * RFC 5661 requires that an NFSv4.1/4.2 server
1421 		 * send an RPC reply on the same TCP connection
1422 		 * as the one it received the request on.
1423 		 * This property in required for "nconnect" and
1424 		 * might not be the case for NFSv3 or NFSv4.0 servers.
1425 		 */
1426 		vfs_mount_error(mp, "nconnect should only be used "
1427 		    "for NFSv4.1/4.2 mounts");
1428 		error = EINVAL;
1429 		goto out;
1430 	}
1431 
1432 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1433 	    ((args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1434 		/*
1435 		 * This option requires the use of SP4_NONE, which
1436 		 * is only in NFSv4.1/4.2.
1437 		 */
1438 		vfs_mount_error(mp, "syskrb5 should only be used "
1439 		    "for NFSv4.1/4.2 mounts");
1440 		error = EINVAL;
1441 		goto out;
1442 	}
1443 
1444 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1445 	    (args.flags & NFSMNT_KERB) == 0) {
1446 		/*
1447 		 * This option modifies the behaviour of sec=krb5[ip].
1448 		 */
1449 		vfs_mount_error(mp, "syskrb5 should only be used "
1450 		    "for sec=krb5[ip] mounts");
1451 		error = EINVAL;
1452 		goto out;
1453 	}
1454 
1455 	if ((newflag & NFSMNT_SYSKRB5) != 0 && krbname[0] != '\0') {
1456 		/*
1457 		 * This option is used as an alternative to "gssname".
1458 		 */
1459 		vfs_mount_error(mp, "syskrb5 should not be used "
1460 		    "with the gssname option");
1461 		error = EINVAL;
1462 		goto out;
1463 	}
1464 
1465 	args.fh = nfh;
1466 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1467 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1468 	    nametimeo, negnametimeo, minvers, newflag, tlscertname, aconn);
1469 out:
1470 	if (!error) {
1471 		MNT_ILOCK(mp);
1472 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1473 		    MNTK_USES_BCACHE;
1474 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1475 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1476 		MNT_IUNLOCK(mp);
1477 	}
1478 	free(hst, M_TEMP);
1479 	free(dirpath, M_TEMP);
1480 	return (error);
1481 }
1482 
1483 /*
1484  * VFS Operations.
1485  *
1486  * mount system call
1487  * It seems a bit dumb to copyinstr() the host and path here and then
1488  * bcopy() them in mountnfs(), but I wanted to detect errors before
1489  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1490  * an error after that means that I have to release the mbuf.
1491  */
1492 /* ARGSUSED */
1493 static int
nfs_cmount(struct mntarg * ma,void * data,uint64_t flags)1494 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1495 {
1496 	int error;
1497 	struct nfs_args args;
1498 
1499 	error = copyin(data, &args, sizeof (struct nfs_args));
1500 	if (error)
1501 		return error;
1502 
1503 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1504 
1505 	error = kernel_mount(ma, flags);
1506 	return (error);
1507 }
1508 
1509 /*
1510  * Common code for mount and mountroot
1511  */
1512 static int
mountnfs(struct nfs_args * argp,struct mount * mp,struct sockaddr * nam,char * hst,u_char * krbname,int krbnamelen,u_char * dirpath,int dirlen,u_char * srvkrbname,int srvkrbnamelen,struct vnode ** vpp,struct ucred * cred,struct thread * td,int nametimeo,int negnametimeo,int minvers,uint32_t newflag,char * tlscertname,int aconn)1513 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1514     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1515     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1516     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1517     int minvers, uint32_t newflag, char *tlscertname, int aconn)
1518 {
1519 	struct nfsmount *nmp;
1520 	struct nfsnode *np;
1521 	int error, trycnt, ret;
1522 	struct nfsvattr nfsva;
1523 	struct nfsclclient *clp;
1524 	struct nfsclds *dsp, *tdsp;
1525 	uint32_t lease;
1526 	bool tryminvers;
1527 	char *fakefh;
1528 	static u_int64_t clval = 0;
1529 #ifdef KERN_TLS
1530 	u_int maxlen;
1531 #endif
1532 
1533 	NFSCL_DEBUG(3, "in mnt\n");
1534 	CURVNET_SET(CRED_TO_VNET(cred));
1535 	clp = NULL;
1536 	if (mp->mnt_flag & MNT_UPDATE) {
1537 		nmp = VFSTONFS(mp);
1538 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1539 		free(nam, M_SONAME);
1540 		free(tlscertname, M_NEWNFSMNT);
1541 		CURVNET_RESTORE();
1542 		return (0);
1543 	} else {
1544 		/* NFS-over-TLS requires that rpctls be functioning. */
1545 		if ((newflag & NFSMNT_TLS) != 0) {
1546 			error = EINVAL;
1547 #ifdef KERN_TLS
1548 			/* KERN_TLS is only supported for TCP. */
1549 			if (argp->sotype == SOCK_STREAM &&
1550 			    rpctls_getinfo(&maxlen, true, false))
1551 				error = 0;
1552 #endif
1553 			if (error != 0) {
1554 				free(nam, M_SONAME);
1555 				free(tlscertname, M_NEWNFSMNT);
1556 				CURVNET_RESTORE();
1557 				return (error);
1558 			}
1559 		}
1560 		nmp = malloc(sizeof (struct nfsmount) +
1561 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1562 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1563 		nmp->nm_tlscertname = tlscertname;
1564 		nmp->nm_newflag = newflag;
1565 		TAILQ_INIT(&nmp->nm_bufq);
1566 		TAILQ_INIT(&nmp->nm_sess);
1567 		if (clval == 0)
1568 			clval = (u_int64_t)nfsboottime.tv_sec;
1569 		nmp->nm_clval = clval++;
1570 		nmp->nm_krbnamelen = krbnamelen;
1571 		nmp->nm_dirpathlen = dirlen;
1572 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1573 		if (td->td_ucred->cr_uid != (uid_t)0) {
1574 			/*
1575 			 * nm_uid is used to get KerberosV credentials for
1576 			 * the nfsv4 state handling operations if there is
1577 			 * no host based principal set. Use the uid of
1578 			 * this user if not root, since they are doing the
1579 			 * mount. I don't think setting this for root will
1580 			 * work, since root normally does not have user
1581 			 * credentials in a credentials cache.
1582 			 */
1583 			nmp->nm_uid = td->td_ucred->cr_uid;
1584 		} else {
1585 			/*
1586 			 * Just set to -1, so it won't be used.
1587 			 */
1588 			nmp->nm_uid = (uid_t)-1;
1589 		}
1590 
1591 		/* Copy and null terminate all the names */
1592 		if (nmp->nm_krbnamelen > 0) {
1593 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1594 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1595 		}
1596 		if (nmp->nm_dirpathlen > 0) {
1597 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1598 			    nmp->nm_dirpathlen);
1599 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1600 			    + 1] = '\0';
1601 		}
1602 		if (nmp->nm_srvkrbnamelen > 0) {
1603 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1604 			    nmp->nm_srvkrbnamelen);
1605 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1606 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1607 		}
1608 		nmp->nm_sockreq.nr_cred = crhold(cred);
1609 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1610 		mp->mnt_data = nmp;
1611 		nmp->nm_getinfo = nfs_getnlminfo;
1612 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1613 	}
1614 	vfs_getnewfsid(mp);
1615 	nmp->nm_mountp = mp;
1616 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1617 
1618 	/*
1619 	 * Since nfs_decode_args() might optionally set them, these
1620 	 * need to be set to defaults before the call, so that the
1621 	 * optional settings aren't overwritten.
1622 	 */
1623 	nmp->nm_nametimeo = nametimeo;
1624 	nmp->nm_negnametimeo = negnametimeo;
1625 	nmp->nm_timeo = NFS_TIMEO;
1626 	nmp->nm_retry = NFS_RETRANS;
1627 	nmp->nm_readahead = NFS_DEFRAHEAD;
1628 
1629 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1630 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1631 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1632 		nmp->nm_wcommitsize *= 2;
1633 	nmp->nm_wcommitsize *= 256;
1634 
1635 	tryminvers = false;
1636 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1637 		if (minvers < 0) {
1638 			tryminvers = true;
1639 			minvers = NFSV42_MINORVERSION;
1640 		}
1641 		nmp->nm_minorvers = minvers;
1642 	} else
1643 		nmp->nm_minorvers = 0;
1644 
1645 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1646 
1647 	/*
1648 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1649 	 * high, depending on whether we end up with negative offsets in
1650 	 * the client or server somewhere.  2GB-1 may be safer.
1651 	 *
1652 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1653 	 * that we can handle until we find out otherwise.
1654 	 */
1655 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1656 		nmp->nm_maxfilesize = 0xffffffffLL;
1657 	else
1658 		nmp->nm_maxfilesize = OFF_MAX;
1659 
1660 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1661 		nmp->nm_wsize = NFS_WSIZE;
1662 		nmp->nm_rsize = NFS_RSIZE;
1663 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1664 	}
1665 	nmp->nm_numgrps = NFS_MAXGRPS;
1666 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1667 	if (nmp->nm_tprintf_delay < 0)
1668 		nmp->nm_tprintf_delay = 0;
1669 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1670 	if (nmp->nm_tprintf_initial_delay < 0)
1671 		nmp->nm_tprintf_initial_delay = 0;
1672 	nmp->nm_fhsize = argp->fhsize;
1673 	if (nmp->nm_fhsize > 0)
1674 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1675 	strlcpy(mp->mnt_stat.f_mntfromname, hst, MNAMELEN);
1676 	nmp->nm_nam = nam;
1677 	/* Set up the sockets and per-host congestion */
1678 	nmp->nm_sotype = argp->sotype;
1679 	nmp->nm_soproto = argp->proto;
1680 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1681 	if ((argp->flags & NFSMNT_NFSV4))
1682 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1683 	else if ((argp->flags & NFSMNT_NFSV3))
1684 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1685 	else
1686 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1687 
1688 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false,
1689 	    &nmp->nm_sockreq.nr_client)))
1690 		goto bad;
1691 	/* For NFSv4, get the clientid now. */
1692 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1693 		NFSCL_DEBUG(3, "at getcl\n");
1694 		error = nfscl_getcl(mp, cred, td, tryminvers, true, &clp);
1695 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1696 		if (error != 0)
1697 			goto bad;
1698 		if (aconn > 0 && nmp->nm_minorvers == 0) {
1699 			vfs_mount_error(mp, "nconnect should only be used "
1700 			    "for NFSv4.1/4.2 mounts");
1701 			error = EINVAL;
1702 			goto bad;
1703 		}
1704 		if (NFSHASSYSKRB5(nmp) && nmp->nm_minorvers == 0) {
1705 			vfs_mount_error(mp, "syskrb5 should only be used "
1706 			    "for NFSv4.1/4.2 mounts");
1707 			error = EINVAL;
1708 			goto bad;
1709 		}
1710 	}
1711 
1712 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1713 	    nmp->nm_dirpathlen > 0) {
1714 		NFSCL_DEBUG(3, "in dirp\n");
1715 		/*
1716 		 * If the fhsize on the mount point == 0 for V4, the mount
1717 		 * path needs to be looked up.
1718 		 */
1719 		trycnt = 3;
1720 		do {
1721 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1722 			    cred, td);
1723 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1724 			if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1725 			    error != NFSERR_WRONGSEC))
1726 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1727 		} while (error != 0 && --trycnt > 0 &&
1728 		    (!NFSHASSYSKRB5(nmp) || error != NFSERR_WRONGSEC));
1729 		if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1730 		    error != NFSERR_WRONGSEC))
1731 			goto bad;
1732 	}
1733 
1734 	/*
1735 	 * A reference count is needed on the nfsnode representing the
1736 	 * remote root.  If this object is not persistent, then backward
1737 	 * traversals of the mount point (i.e. "..") will not work if
1738 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1739 	 * this problem, because one can identify root inodes by their
1740 	 * number == UFS_ROOTINO (2).
1741 	 * For the "syskrb5" mount, the file handle might not have
1742 	 * been acquired.  As such, use a "fake" file handle which
1743 	 * can never be returned by a server for the root vnode.
1744 	 */
1745 	if (nmp->nm_fhsize > 0 || NFSHASSYSKRB5(nmp)) {
1746 		/*
1747 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1748 		 * non-zero for the root vnode. f_iosize will be set correctly
1749 		 * by nfs_statfs() before any I/O occurs.
1750 		 */
1751 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1752 		if (nmp->nm_fhsize == 0) {
1753 			fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK |
1754 			    M_ZERO);
1755 			error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np,
1756 			    LK_EXCLUSIVE);
1757 			free(fakefh, M_TEMP);
1758 			nmp->nm_privflag |= NFSMNTP_FAKEROOTFH;
1759 		} else
1760 			error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1761 			    LK_EXCLUSIVE);
1762 		if (error)
1763 			goto bad;
1764 		*vpp = NFSTOV(np);
1765 
1766 		/*
1767 		 * Get file attributes and transfer parameters for the
1768 		 * mountpoint.  This has the side effect of filling in
1769 		 * (*vpp)->v_type with the correct value.
1770 		 */
1771 		ret = ENXIO;
1772 		if (nmp->nm_fhsize > 0)
1773 			ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh,
1774 			    nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease);
1775 		if (ret) {
1776 			/*
1777 			 * Just set default values to get things going.
1778 			 */
1779 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1780 			nfsva.na_vattr.va_type = VDIR;
1781 			nfsva.na_vattr.va_mode = 0777;
1782 			nfsva.na_vattr.va_nlink = 100;
1783 			nfsva.na_vattr.va_uid = (uid_t)0;
1784 			nfsva.na_vattr.va_gid = (gid_t)0;
1785 			nfsva.na_vattr.va_fileid = 2;
1786 			nfsva.na_vattr.va_gen = 1;
1787 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1788 			nfsva.na_vattr.va_size = 512 * 1024;
1789 			lease = 20;
1790 		}
1791 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, 0, 1);
1792 		if ((argp->flags & NFSMNT_NFSV4) != 0) {
1793 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1794 			NFSLOCKCLSTATE();
1795 			clp->nfsc_renew = NFSCL_RENEW(lease);
1796 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1797 			clp->nfsc_clientidrev++;
1798 			if (clp->nfsc_clientidrev == 0)
1799 				clp->nfsc_clientidrev++;
1800 			NFSUNLOCKCLSTATE();
1801 			/*
1802 			 * Mount will succeed, so the renew thread can be
1803 			 * started now.
1804 			 */
1805 			nfscl_start_renewthread(clp);
1806 			nfscl_clientrelease(clp);
1807 		}
1808 		if (argp->flags & NFSMNT_NFSV3)
1809 			ncl_fsinfo(nmp, *vpp, cred, td);
1810 
1811 		/*
1812 		 * Mark if the mount point supports NFSv4 ACLs and
1813 		 * named attributes.
1814 		 */
1815 		if ((argp->flags & NFSMNT_NFSV4) != 0) {
1816 			MNT_ILOCK(mp);
1817 			if (ret == 0 && nfsrv_useacl != 0 &&
1818 			    NFSISSET_ATTRBIT(&nfsva.na_suppattr,
1819 			    NFSATTRBIT_ACL))
1820 				mp->mnt_flag |= MNT_NFS4ACLS;
1821 			if (nmp->nm_minorvers > 0)
1822 				mp->mnt_flag |= MNT_NAMEDATTR;
1823 			MNT_IUNLOCK(mp);
1824 		}
1825 
1826 		/* Can now allow additional connections. */
1827 		if (aconn > 0)
1828 			nmp->nm_aconnect = aconn;
1829 
1830 		/*
1831 		 * Lose the lock but keep the ref.
1832 		 */
1833 		NFSVOPUNLOCK(*vpp);
1834 		vfs_cache_root_set(mp, *vpp);
1835 		CURVNET_RESTORE();
1836 		return (0);
1837 	}
1838 	error = EIO;
1839 
1840 bad:
1841 	if (clp != NULL)
1842 		nfscl_clientrelease(clp);
1843 	newnfs_disconnect(NULL, &nmp->nm_sockreq);
1844 	crfree(nmp->nm_sockreq.nr_cred);
1845 	if (nmp->nm_sockreq.nr_auth != NULL)
1846 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1847 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1848 	mtx_destroy(&nmp->nm_mtx);
1849 	if (nmp->nm_clp != NULL) {
1850 		NFSLOCKCLSTATE();
1851 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1852 		NFSUNLOCKCLSTATE();
1853 		free(nmp->nm_clp, M_NFSCLCLIENT);
1854 	}
1855 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1856 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1857 		    dsp->nfsclds_sockp != NULL)
1858 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1859 		nfscl_freenfsclds(dsp);
1860 	}
1861 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1862 	free(nmp, M_NEWNFSMNT);
1863 	free(nam, M_SONAME);
1864 	CURVNET_RESTORE();
1865 	return (error);
1866 }
1867 
1868 /*
1869  * unmount system call
1870  */
1871 static int
nfs_unmount(struct mount * mp,int mntflags)1872 nfs_unmount(struct mount *mp, int mntflags)
1873 {
1874 	struct thread *td;
1875 	struct nfsmount *nmp;
1876 	int error, flags = 0, i, trycnt = 0;
1877 	struct nfsclds *dsp, *tdsp;
1878 	struct nfscldeleg *dp, *ndp;
1879 	struct nfscldeleghead dh;
1880 
1881 	td = curthread;
1882 	TAILQ_INIT(&dh);
1883 
1884 	if (mntflags & MNT_FORCE)
1885 		flags |= FORCECLOSE;
1886 	nmp = VFSTONFS(mp);
1887 	error = 0;
1888 	/*
1889 	 * Goes something like this..
1890 	 * - Call vflush() to clear out vnodes for this filesystem
1891 	 * - Close the socket
1892 	 * - Free up the data structures
1893 	 */
1894 	/* In the forced case, cancel any outstanding requests. */
1895 	if (mntflags & MNT_FORCE) {
1896 		NFSDDSLOCK();
1897 		if (nfsv4_findmirror(nmp) != NULL)
1898 			error = ENXIO;
1899 		NFSDDSUNLOCK();
1900 		if (error)
1901 			goto out;
1902 		error = newnfs_nmcancelreqs(nmp);
1903 		if (error)
1904 			goto out;
1905 		/* For a forced close, get rid of the renew thread now */
1906 		nfscl_umount(nmp, td, &dh);
1907 	}
1908 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1909 	do {
1910 		error = vflush(mp, 1, flags, td);
1911 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1912 			(void) nfs_catnap(PSOCK, error, "newndm");
1913 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1914 	if (error)
1915 		goto out;
1916 
1917 	/*
1918 	 * We are now committed to the unmount.
1919 	 */
1920 	if ((mntflags & MNT_FORCE) == 0)
1921 		nfscl_umount(nmp, td, NULL);
1922 	else {
1923 		mtx_lock(&nmp->nm_mtx);
1924 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1925 		mtx_unlock(&nmp->nm_mtx);
1926 	}
1927 	/* Make sure no nfsiods are assigned to this mount. */
1928 	NFSLOCKIOD();
1929 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1930 		if (ncl_iodmount[i] == nmp) {
1931 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1932 			ncl_iodmount[i] = NULL;
1933 		}
1934 	NFSUNLOCKIOD();
1935 
1936 	/*
1937 	 * We can now set mnt_data to NULL and wait for
1938 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1939 	 */
1940 	mtx_lock(&mountlist_mtx);
1941 	mtx_lock(&nmp->nm_mtx);
1942 	mp->mnt_data = NULL;
1943 	mtx_unlock(&mountlist_mtx);
1944 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1945 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1946 	mtx_unlock(&nmp->nm_mtx);
1947 
1948 	newnfs_disconnect(nmp, &nmp->nm_sockreq);
1949 	crfree(nmp->nm_sockreq.nr_cred);
1950 	free(nmp->nm_nam, M_SONAME);
1951 	if (nmp->nm_sockreq.nr_auth != NULL)
1952 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1953 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1954 	mtx_destroy(&nmp->nm_mtx);
1955 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1956 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1957 		    dsp->nfsclds_sockp != NULL)
1958 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1959 		nfscl_freenfsclds(dsp);
1960 	}
1961 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1962 	free(nmp, M_NEWNFSMNT);
1963 
1964 	/* Free up the delegation structures for forced dismounts. */
1965 	TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
1966 		TAILQ_REMOVE(&dh, dp, nfsdl_list);
1967 		free(dp, M_NFSCLDELEG);
1968 	}
1969 out:
1970 	return (error);
1971 }
1972 
1973 /*
1974  * Return root of a filesystem
1975  */
1976 static int
nfs_root(struct mount * mp,int flags,struct vnode ** vpp)1977 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1978 {
1979 	struct vnode *vp;
1980 	struct nfsmount *nmp;
1981 	struct nfsnode *np;
1982 	int error;
1983 	char *fakefh;
1984 
1985 	nmp = VFSTONFS(mp);
1986 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
1987 		/* Attempt to get the actual root file handle. */
1988 		if (nmp->nm_fhsize == 0)
1989 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1990 			    curthread->td_ucred, curthread);
1991 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
1992 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, flags);
1993 		free(fakefh, M_TEMP);
1994 	} else {
1995 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1996 	}
1997 	if (error)
1998 		return error;
1999 	vp = NFSTOV(np);
2000 	/*
2001 	 * Get transfer parameters and attributes for root vnode once.
2002 	 */
2003 	mtx_lock(&nmp->nm_mtx);
2004 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
2005 		mtx_unlock(&nmp->nm_mtx);
2006 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
2007 	} else
2008 		mtx_unlock(&nmp->nm_mtx);
2009 	if (vp->v_type == VNON)
2010 	    vp->v_type = VDIR;
2011 	vp->v_vflag |= VV_ROOT;
2012 	*vpp = vp;
2013 	return (0);
2014 }
2015 
2016 /*
2017  * Flush out the buffer cache
2018  */
2019 /* ARGSUSED */
2020 static int
nfs_sync(struct mount * mp,int waitfor)2021 nfs_sync(struct mount *mp, int waitfor)
2022 {
2023 	struct vnode *vp, *mvp;
2024 	struct thread *td;
2025 	int error, allerror = 0;
2026 
2027 	td = curthread;
2028 
2029 	MNT_ILOCK(mp);
2030 	/*
2031 	 * If a forced dismount is in progress, return from here so that
2032 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
2033 	 * calling VFS_UNMOUNT().
2034 	 */
2035 	if (NFSCL_FORCEDISM(mp)) {
2036 		MNT_IUNLOCK(mp);
2037 		return (EBADF);
2038 	}
2039 	MNT_IUNLOCK(mp);
2040 
2041 	if (waitfor == MNT_LAZY)
2042 		return (0);
2043 
2044 	/*
2045 	 * Force stale buffer cache information to be flushed.
2046 	 */
2047 loop:
2048 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
2049 		/* XXX Racy bv_cnt check. */
2050 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0) {
2051 			VI_UNLOCK(vp);
2052 			continue;
2053 		}
2054 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
2055 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
2056 			goto loop;
2057 		}
2058 		error = VOP_FSYNC(vp, waitfor, td);
2059 		if (error)
2060 			allerror = error;
2061 		NFSVOPUNLOCK(vp);
2062 		vrele(vp);
2063 	}
2064 	return (allerror);
2065 }
2066 
2067 static int
nfs_sysctl(struct mount * mp,fsctlop_t op,struct sysctl_req * req)2068 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
2069 {
2070 	struct nfsmount *nmp = VFSTONFS(mp);
2071 	struct vfsquery vq;
2072 	int error;
2073 
2074 	bzero(&vq, sizeof(vq));
2075 	switch (op) {
2076 #if 0
2077 	case VFS_CTL_NOLOCKS:
2078 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
2079  		if (req->oldptr != NULL) {
2080  			error = SYSCTL_OUT(req, &val, sizeof(val));
2081  			if (error)
2082  				return (error);
2083  		}
2084  		if (req->newptr != NULL) {
2085  			error = SYSCTL_IN(req, &val, sizeof(val));
2086  			if (error)
2087  				return (error);
2088 			if (val)
2089 				nmp->nm_flag |= NFSMNT_NOLOCKS;
2090 			else
2091 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
2092  		}
2093 		break;
2094 #endif
2095 	case VFS_CTL_QUERY:
2096 		mtx_lock(&nmp->nm_mtx);
2097 		if (nmp->nm_state & NFSSTA_TIMEO)
2098 			vq.vq_flags |= VQ_NOTRESP;
2099 		mtx_unlock(&nmp->nm_mtx);
2100 #if 0
2101 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
2102 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
2103 			vq.vq_flags |= VQ_NOTRESPLOCK;
2104 #endif
2105 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
2106 		break;
2107  	case VFS_CTL_TIMEO:
2108  		if (req->oldptr != NULL) {
2109  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
2110  			    sizeof(nmp->nm_tprintf_initial_delay));
2111  			if (error)
2112  				return (error);
2113  		}
2114  		if (req->newptr != NULL) {
2115 			error = vfs_suser(mp, req->td);
2116 			if (error)
2117 				return (error);
2118  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
2119  			    sizeof(nmp->nm_tprintf_initial_delay));
2120  			if (error)
2121  				return (error);
2122  			if (nmp->nm_tprintf_initial_delay < 0)
2123  				nmp->nm_tprintf_initial_delay = 0;
2124  		}
2125 		break;
2126 	default:
2127 		return (ENOTSUP);
2128 	}
2129 	return (0);
2130 }
2131 
2132 /*
2133  * Purge any RPCs in progress, so that they will all return errors.
2134  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
2135  * forced dismount.
2136  */
2137 static void
nfs_purge(struct mount * mp)2138 nfs_purge(struct mount *mp)
2139 {
2140 	struct nfsmount *nmp = VFSTONFS(mp);
2141 
2142 	newnfs_nmcancelreqs(nmp);
2143 }
2144 
2145 /*
2146  * Extract the information needed by the nlm from the nfs vnode.
2147  */
2148 static void
nfs_getnlminfo(struct vnode * vp,uint8_t * fhp,size_t * fhlenp,struct sockaddr_storage * sp,int * is_v3p,off_t * sizep,struct timeval * timeop)2149 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
2150     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
2151     struct timeval *timeop)
2152 {
2153 	struct nfsmount *nmp;
2154 	struct nfsnode *np = VTONFS(vp);
2155 
2156 	nmp = VFSTONFS(vp->v_mount);
2157 	if (fhlenp != NULL)
2158 		*fhlenp = (size_t)np->n_fhp->nfh_len;
2159 	if (fhp != NULL)
2160 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
2161 	if (sp != NULL)
2162 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
2163 	if (is_v3p != NULL)
2164 		*is_v3p = NFS_ISV3(vp);
2165 	if (sizep != NULL)
2166 		*sizep = np->n_size;
2167 	if (timeop != NULL) {
2168 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
2169 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
2170 	}
2171 }
2172 
2173 /*
2174  * This function prints out an option name, based on the conditional
2175  * argument.
2176  */
nfscl_printopt(struct nfsmount * nmp,int testval,char * opt,char ** buf,size_t * blen)2177 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
2178     char *opt, char **buf, size_t *blen)
2179 {
2180 	int len;
2181 
2182 	if (testval != 0 && *blen > strlen(opt)) {
2183 		len = snprintf(*buf, *blen, "%s", opt);
2184 		if (len != strlen(opt))
2185 			printf("EEK!!\n");
2186 		*buf += len;
2187 		*blen -= len;
2188 	}
2189 }
2190 
2191 /*
2192  * This function printf out an options integer value.
2193  */
nfscl_printoptval(struct nfsmount * nmp,int optval,char * opt,char ** buf,size_t * blen)2194 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
2195     char *opt, char **buf, size_t *blen)
2196 {
2197 	int len;
2198 
2199 	if (*blen > strlen(opt) + 1) {
2200 		/* Could result in truncated output string. */
2201 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
2202 		if (len < *blen) {
2203 			*buf += len;
2204 			*blen -= len;
2205 		}
2206 	}
2207 }
2208 
2209 /*
2210  * Load the option flags and values into the buffer.
2211  */
nfscl_retopts(struct nfsmount * nmp,char * buffer,size_t buflen)2212 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
2213 {
2214 	char *buf;
2215 	size_t blen;
2216 
2217 	buf = buffer;
2218 	blen = buflen;
2219 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
2220 	    &blen);
2221 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
2222 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
2223 		    &blen);
2224 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
2225 		    &buf, &blen);
2226 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
2227 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
2228 	}
2229 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2230 	    &blen);
2231 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2232 	    "nfsv2", &buf, &blen);
2233 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2234 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2235 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2236 	    &buf, &blen);
2237 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_TLS) != 0, ",tls", &buf,
2238 	    &blen);
2239 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_SYSKRB5) != 0,
2240 	    ",syskrb5", &buf, &blen);
2241 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2242 	    &buf, &blen);
2243 	nfscl_printoptval(nmp, nmp->nm_aconnect + 1, ",nconnect", &buf, &blen);
2244 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2245 	    &blen);
2246 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2247 	    &blen);
2248 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2249 	    &blen);
2250 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2251 	    &blen);
2252 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2253 	    &blen);
2254 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2255 	    ",noncontigwr", &buf, &blen);
2256 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2257 	    0, ",lockd", &buf, &blen);
2258 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOLOCKD) != 0, ",nolockd",
2259 	    &buf, &blen);
2260 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2261 	    &buf, &blen);
2262 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2263 	    &buf, &blen);
2264 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2265 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2266 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2267 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2268 	    &buf, &blen);
2269 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2270 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2271 	    &buf, &blen);
2272 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2273 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2274 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2275 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2276 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2277 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2278 	    &blen);
2279 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2280 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2281 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2282 	    &blen);
2283 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2284 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2285 	    &blen);
2286 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2287 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2288 }
2289