xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision cce64f2e68511652a0edc3e3482d801115e91b43)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 #include "opt_bootp.h"
39 #include "opt_nfsroot.h"
40 #include "opt_kern_tls.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <net/route/route_ctl.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 #include <rpc/rpcsec_tls.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 extern struct mtx nfsrv_dslock_mtx;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int, uint32_t, char *, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		vfs_cache_root,
140 	.vfs_cachedroot =	nfs_root,
141 	.vfs_statfs =		nfs_statfs,
142 	.vfs_sync =		nfs_sync,
143 	.vfs_uninit =		ncl_uninit,
144 	.vfs_unmount =		nfs_unmount,
145 	.vfs_sysctl =		nfs_sysctl,
146 	.vfs_purge =		nfs_purge,
147 };
148 /*
149  * This macro declares that the file system type is named "nfs".
150  * It also declares a module name of "nfs" and uses vfs_modevent()
151  * as the event handling function.
152  * The main module declaration is found in sys/fs/nfsclient/nfs_clport.c
153  * for "nfscl" and is needed so that a custom event handling
154  * function gets called.  MODULE_DEPEND() macros are found there.
155  */
156 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
157 
158 MODULE_VERSION(nfs, 1);
159 
160 /*
161  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
162  * can be shared by both NFS clients. It is declared here so that it
163  * will be defined for kernels built without NFS_ROOT, although it
164  * isn't used in that case.
165  */
166 #if !defined(NFS_ROOT)
167 struct nfs_diskless	nfs_diskless = { { { 0 } } };
168 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
169 int			nfs_diskless_valid = 0;
170 #endif
171 
172 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
173     &nfs_diskless_valid, 0,
174     "Has the diskless struct been filled correctly");
175 
176 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
177     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
178 
179 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
180     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
181     "%Ssockaddr_in", "Diskless root nfs address");
182 
183 void		newnfsargs_ntoh(struct nfs_args *);
184 static int	nfs_mountdiskless(char *,
185 		    struct sockaddr_in *, struct nfs_args *,
186 		    struct thread *, struct vnode **, struct mount *);
187 static void	nfs_convert_diskless(void);
188 static void	nfs_convert_oargs(struct nfs_args *args,
189 		    struct onfs_args *oargs);
190 
191 int
newnfs_iosize(struct nfsmount * nmp)192 newnfs_iosize(struct nfsmount *nmp)
193 {
194 	int iosize, maxio;
195 
196 	/* First, set the upper limit for iosize */
197 	if (nmp->nm_flag & NFSMNT_NFSV4) {
198 		maxio = NFS_MAXBSIZE;
199 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
200 		if (nmp->nm_sotype == SOCK_DGRAM)
201 			maxio = NFS_MAXDGRAMDATA;
202 		else
203 			maxio = NFS_MAXBSIZE;
204 	} else {
205 		maxio = NFS_V2MAXDATA;
206 	}
207 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
208 		nmp->nm_rsize = maxio;
209 	if (nmp->nm_rsize > NFS_MAXBSIZE)
210 		nmp->nm_rsize = NFS_MAXBSIZE;
211 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
212 		nmp->nm_readdirsize = maxio;
213 	if (nmp->nm_readdirsize > nmp->nm_rsize)
214 		nmp->nm_readdirsize = nmp->nm_rsize;
215 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
216 		nmp->nm_wsize = maxio;
217 	if (nmp->nm_wsize > NFS_MAXBSIZE)
218 		nmp->nm_wsize = NFS_MAXBSIZE;
219 
220 	/*
221 	 * Calculate the size used for io buffers.  Use the larger
222 	 * of the two sizes to minimise nfs requests but make sure
223 	 * that it is at least one VM page to avoid wasting buffer
224 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
225 	 * that is the buffer size used for directories.
226 	 */
227 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
228 	iosize = imax(iosize, PAGE_SIZE);
229 	iosize = imax(iosize, NFS_DIRBLKSIZ);
230 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
231 	return (iosize);
232 }
233 
234 static void
nfs_convert_oargs(struct nfs_args * args,struct onfs_args * oargs)235 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
236 {
237 
238 	args->version = NFS_ARGSVERSION;
239 	args->addr = oargs->addr;
240 	args->addrlen = oargs->addrlen;
241 	args->sotype = oargs->sotype;
242 	args->proto = oargs->proto;
243 	args->fh = oargs->fh;
244 	args->fhsize = oargs->fhsize;
245 	args->flags = oargs->flags;
246 	args->wsize = oargs->wsize;
247 	args->rsize = oargs->rsize;
248 	args->readdirsize = oargs->readdirsize;
249 	args->timeo = oargs->timeo;
250 	args->retrans = oargs->retrans;
251 	args->readahead = oargs->readahead;
252 	args->hostname = oargs->hostname;
253 }
254 
255 static void
nfs_convert_diskless(void)256 nfs_convert_diskless(void)
257 {
258 
259 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
260 		sizeof(struct ifaliasreq));
261 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
262 		sizeof(struct sockaddr_in));
263 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
264 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
265 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
266 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
267 	} else {
268 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
269 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
270 	}
271 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
272 		sizeof(struct sockaddr_in));
273 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
274 	nfsv3_diskless.root_time = nfs_diskless.root_time;
275 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
276 		MAXHOSTNAMELEN);
277 	nfs_diskless_valid = 3;
278 }
279 
280 /*
281  * nfs statfs call
282  */
283 static int
nfs_statfs(struct mount * mp,struct statfs * sbp)284 nfs_statfs(struct mount *mp, struct statfs *sbp)
285 {
286 	struct vnode *vp;
287 	struct thread *td;
288 	struct nfsmount *nmp = VFSTONFS(mp);
289 	struct nfsvattr nfsva;
290 	struct nfsfsinfo fs;
291 	struct nfsstatfs sb;
292 	int error = 0, attrflag, gotfsinfo = 0, ret;
293 	struct nfsnode *np;
294 	char *fakefh;
295 	uint32_t clone_blksize;
296 
297 	td = curthread;
298 	clone_blksize = 0;
299 
300 	error = vfs_busy(mp, MBF_NOWAIT);
301 	if (error)
302 		return (error);
303 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
304 		if (nmp->nm_fhsize == 0) {
305 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
306 			    td->td_ucred, td);
307 			if (error != 0) {
308 				/*
309 				 * We cannot do anything yet.  Hopefully what
310 				 * is in mnt_stat is sufficient.
311 				 */
312 				if (sbp != &mp->mnt_stat)
313 					*sbp = mp->mnt_stat;
314 				strncpy(&sbp->f_fstypename[0],
315 				    mp->mnt_vfc->vfc_name, MFSNAMELEN);
316 				vfs_unbusy(mp);
317 				return (0);
318 			}
319 		}
320 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
321 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, LK_EXCLUSIVE);
322 		free(fakefh, M_TEMP);
323 	} else {
324 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
325 		    LK_EXCLUSIVE);
326 	}
327 	if (error) {
328 		vfs_unbusy(mp);
329 		return (error);
330 	}
331 	vp = NFSTOV(np);
332 	mtx_lock(&nmp->nm_mtx);
333 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
334 		mtx_unlock(&nmp->nm_mtx);
335 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
336 		    &attrflag);
337 		if (!error)
338 			gotfsinfo = 1;
339 	} else
340 		mtx_unlock(&nmp->nm_mtx);
341 	if (!error)
342 		error = nfsrpc_statfs(vp, &sb, &fs, NULL, &clone_blksize,
343 		    td->td_ucred, td, &nfsva, &attrflag);
344 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
345 	    error == NFSERR_WRONGSEC) {
346 		/* Cannot get new stats, so return what is in mnt_stat. */
347 		if (sbp != &mp->mnt_stat)
348 			*sbp = mp->mnt_stat;
349 		strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name,
350 		    MFSNAMELEN);
351 		vput(vp);
352 		vfs_unbusy(mp);
353 		return (0);
354 	}
355 	if (error != 0)
356 		NFSCL_DEBUG(2, "statfs=%d\n", error);
357 	if (attrflag == 0) {
358 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
359 		    td->td_ucred, td, &nfsva, NULL, NULL);
360 		if (ret) {
361 			/*
362 			 * Just set default values to get things going.
363 			 */
364 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
365 			nfsva.na_vattr.va_type = VDIR;
366 			nfsva.na_vattr.va_mode = 0777;
367 			nfsva.na_vattr.va_nlink = 100;
368 			nfsva.na_vattr.va_uid = (uid_t)0;
369 			nfsva.na_vattr.va_gid = (gid_t)0;
370 			nfsva.na_vattr.va_fileid = 2;
371 			nfsva.na_vattr.va_gen = 1;
372 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
373 			nfsva.na_vattr.va_size = 512 * 1024;
374 		}
375 	}
376 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
377 	if (!error) {
378 	    mtx_lock(&nmp->nm_mtx);
379 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
380 		nfscl_loadfsinfo(nmp, &fs, clone_blksize);
381 	    nfscl_loadsbinfo(nmp, &sb, sbp);
382 	    sbp->f_iosize = newnfs_iosize(nmp);
383 	    mtx_unlock(&nmp->nm_mtx);
384 	    if (sbp != &mp->mnt_stat) {
385 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
386 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
387 	    }
388 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
389 	} else if (NFS_ISV4(vp)) {
390 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
391 	}
392 	vput(vp);
393 	vfs_unbusy(mp);
394 	return (error);
395 }
396 
397 /*
398  * nfs version 3 fsinfo rpc call
399  */
400 int
ncl_fsinfo(struct nfsmount * nmp,struct vnode * vp,struct ucred * cred,struct thread * td)401 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
402     struct thread *td)
403 {
404 	struct nfsfsinfo fs;
405 	struct nfsvattr nfsva;
406 	int error, attrflag;
407 
408 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag);
409 	if (!error) {
410 		if (attrflag)
411 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
412 		mtx_lock(&nmp->nm_mtx);
413 		nfscl_loadfsinfo(nmp, &fs, 0);
414 		mtx_unlock(&nmp->nm_mtx);
415 	}
416 	return (error);
417 }
418 
419 /*
420  * Mount a remote root fs via nfs. This depends on the info in the
421  * nfs_diskless structure that has been filled in properly by some primary
422  * bootstrap.
423  * It goes something like this:
424  * - do enough of "ifconfig" by calling ifioctl() so that the system
425  *   can talk to the server
426  * - If nfs_diskless.mygateway is filled in, use that address as
427  *   a default gateway.
428  * - build the rootfs mount point and call mountnfs() to do the rest.
429  *
430  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
431  * structure, as well as other global NFS client variables here, as
432  * nfs_mountroot() will be called once in the boot before any other NFS
433  * client activity occurs.
434  */
435 static int
nfs_mountroot(struct mount * mp)436 nfs_mountroot(struct mount *mp)
437 {
438 	struct thread *td = curthread;
439 	struct nfsv3_diskless *nd = &nfsv3_diskless;
440 	struct socket *so;
441 	struct vnode *vp;
442 	struct ifreq ir;
443 	int error;
444 	u_long l;
445 	char buf[128];
446 	char *cp;
447 
448 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
449 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
450 #elif defined(NFS_ROOT)
451 	nfs_setup_diskless();
452 #endif
453 
454 	if (nfs_diskless_valid == 0)
455 		return (-1);
456 	if (nfs_diskless_valid == 1)
457 		nfs_convert_diskless();
458 
459 	/*
460 	 * Do enough of ifconfig(8) so that the critical net interface can
461 	 * talk to the server.
462 	 */
463 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
464 	    td->td_ucred, td);
465 	if (error)
466 		panic("nfs_mountroot: socreate(%04x): %d",
467 			nd->myif.ifra_addr.sa_family, error);
468 
469 #if 0 /* XXX Bad idea */
470 	/*
471 	 * We might not have been told the right interface, so we pass
472 	 * over the first ten interfaces of the same kind, until we get
473 	 * one of them configured.
474 	 */
475 
476 	for (i = strlen(nd->myif.ifra_name) - 1;
477 		nd->myif.ifra_name[i] >= '0' &&
478 		nd->myif.ifra_name[i] <= '9';
479 		nd->myif.ifra_name[i] ++) {
480 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
481 		if(!error)
482 			break;
483 	}
484 #endif
485 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
486 	if (error)
487 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
488 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
489 		ir.ifr_mtu = strtol(cp, NULL, 10);
490 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
491 		freeenv(cp);
492 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
493 		if (error)
494 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
495 	}
496 	soclose(so);
497 
498 	/*
499 	 * If the gateway field is filled in, set it as the default route.
500 	 * Note that pxeboot will set a default route of 0 if the route
501 	 * is not set by the DHCP server.  Check also for a value of 0
502 	 * to avoid panicking inappropriately in that situation.
503 	 */
504 	if (nd->mygateway.sin_len != 0 &&
505 	    nd->mygateway.sin_addr.s_addr != 0) {
506 		struct sockaddr_in mask, sin;
507 		struct epoch_tracker et;
508 		struct rt_addrinfo info;
509 		struct rib_cmd_info rc;
510 
511 		bzero((caddr_t)&mask, sizeof(mask));
512 		sin = mask;
513 		sin.sin_family = AF_INET;
514 		sin.sin_len = sizeof(sin);
515                 /* XXX MRT use table 0 for this sort of thing */
516 		NET_EPOCH_ENTER(et);
517 		CURVNET_SET(TD_TO_VNET(td));
518 
519 		bzero((caddr_t)&info, sizeof(info));
520 		info.rti_flags = RTF_UP | RTF_GATEWAY;
521 		info.rti_info[RTAX_DST] = (struct sockaddr *)&sin;
522 		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&nd->mygateway;
523 		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&mask;
524 
525 		error = rib_action(RT_DEFAULT_FIB, RTM_ADD, &info, &rc);
526 		CURVNET_RESTORE();
527 		NET_EPOCH_EXIT(et);
528 		if (error)
529 			panic("nfs_mountroot: RTM_ADD: %d", error);
530 	}
531 
532 	/*
533 	 * Create the rootfs mount point.
534 	 */
535 	nd->root_args.fh = nd->root_fh;
536 	nd->root_args.fhsize = nd->root_fhsize;
537 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
538 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
539 		(l >> 24) & 0xff, (l >> 16) & 0xff,
540 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
541 	printf("NFS ROOT: %s\n", buf);
542 	nd->root_args.hostname = buf;
543 	if ((error = nfs_mountdiskless(buf,
544 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
545 		return (error);
546 	}
547 
548 	/*
549 	 * This is not really an nfs issue, but it is much easier to
550 	 * set hostname here and then let the "/etc/rc.xxx" files
551 	 * mount the right /var based upon its preset value.
552 	 */
553 	mtx_lock(&prison0.pr_mtx);
554 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
555 	    sizeof(prison0.pr_hostname));
556 	mtx_unlock(&prison0.pr_mtx);
557 	inittodr(ntohl(nd->root_time));
558 	return (0);
559 }
560 
561 /*
562  * Internal version of mount system call for diskless setup.
563  */
564 static int
nfs_mountdiskless(char * path,struct sockaddr_in * sin,struct nfs_args * args,struct thread * td,struct vnode ** vpp,struct mount * mp)565 nfs_mountdiskless(char *path,
566     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
567     struct vnode **vpp, struct mount *mp)
568 {
569 	struct sockaddr *nam;
570 	int dirlen, error;
571 	char *dirpath;
572 
573 	/*
574 	 * Find the directory path in "path", which also has the server's
575 	 * name/ip address in it.
576 	 */
577 	dirpath = strchr(path, ':');
578 	if (dirpath != NULL)
579 		dirlen = strlen(++dirpath);
580 	else
581 		dirlen = 0;
582 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
583 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
584 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
585 	    NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL, 0)) != 0) {
586 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
587 		return (error);
588 	}
589 	return (0);
590 }
591 
592 static void
nfs_sec_name(char * sec,int * flagsp)593 nfs_sec_name(char *sec, int *flagsp)
594 {
595 	if (!strcmp(sec, "krb5"))
596 		*flagsp |= NFSMNT_KERB;
597 	else if (!strcmp(sec, "krb5i"))
598 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
599 	else if (!strcmp(sec, "krb5p"))
600 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
601 }
602 
603 static void
nfs_decode_args(struct mount * mp,struct nfsmount * nmp,struct nfs_args * argp,const char * hostname,struct ucred * cred,struct thread * td)604 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
605     const char *hostname, struct ucred *cred, struct thread *td)
606 {
607 	int adjsock;
608 	char *p;
609 
610 	/*
611 	 * Set read-only flag if requested; otherwise, clear it if this is
612 	 * an update.  If this is not an update, then either the read-only
613 	 * flag is already clear, or this is a root mount and it was set
614 	 * intentionally at some previous point.
615 	 */
616 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
617 		MNT_ILOCK(mp);
618 		mp->mnt_flag |= MNT_RDONLY;
619 		MNT_IUNLOCK(mp);
620 	} else if (mp->mnt_flag & MNT_UPDATE) {
621 		MNT_ILOCK(mp);
622 		mp->mnt_flag &= ~MNT_RDONLY;
623 		MNT_IUNLOCK(mp);
624 	}
625 
626 	/*
627 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
628 	 * no sense in that context.  Also, set up appropriate retransmit
629 	 * and soft timeout behavior.
630 	 */
631 	if (argp->sotype == SOCK_STREAM) {
632 		nmp->nm_flag &= ~NFSMNT_NOCONN;
633 		nmp->nm_timeo = NFS_MAXTIMEO;
634 		if ((argp->flags & NFSMNT_NFSV4) != 0)
635 			nmp->nm_retry = INT_MAX;
636 		else
637 			nmp->nm_retry = NFS_RETRANS_TCP;
638 	}
639 
640 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
641 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
642 		argp->flags &= ~NFSMNT_RDIRPLUS;
643 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
644 	}
645 
646 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
647 	if (nmp->nm_minorvers == 0) {
648 		argp->flags &= ~NFSMNT_ONEOPENOWN;
649 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
650 	}
651 
652 	/* Re-bind if rsrvd port requested and wasn't on one */
653 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
654 		  && (argp->flags & NFSMNT_RESVPORT);
655 	/* Also re-bind if we're switching to/from a connected UDP socket */
656 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
657 		    (argp->flags & NFSMNT_NOCONN));
658 
659 	/* Update flags atomically.  Don't change the lock bits. */
660 	nmp->nm_flag = argp->flags | nmp->nm_flag;
661 
662 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
663 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
664 		if (nmp->nm_timeo < NFS_MINTIMEO)
665 			nmp->nm_timeo = NFS_MINTIMEO;
666 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
667 			nmp->nm_timeo = NFS_MAXTIMEO;
668 	}
669 
670 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
671 		nmp->nm_retry = argp->retrans;
672 		if (nmp->nm_retry > NFS_MAXREXMIT)
673 			nmp->nm_retry = NFS_MAXREXMIT;
674 	}
675 
676 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
677 		nmp->nm_wsize = argp->wsize;
678 		/*
679 		 * Clip at the power of 2 below the size. There is an
680 		 * issue (not isolated) that causes intermittent page
681 		 * faults if this is not done.
682 		 */
683 		if (nmp->nm_wsize > NFS_FABLKSIZE)
684 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
685 		else
686 			nmp->nm_wsize = NFS_FABLKSIZE;
687 	}
688 
689 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
690 		nmp->nm_rsize = argp->rsize;
691 		/*
692 		 * Clip at the power of 2 below the size. There is an
693 		 * issue (not isolated) that causes intermittent page
694 		 * faults if this is not done.
695 		 */
696 		if (nmp->nm_rsize > NFS_FABLKSIZE)
697 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
698 		else
699 			nmp->nm_rsize = NFS_FABLKSIZE;
700 	}
701 
702 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
703 		nmp->nm_readdirsize = argp->readdirsize;
704 	}
705 
706 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
707 		nmp->nm_acregmin = argp->acregmin;
708 	else
709 		nmp->nm_acregmin = NFS_MINATTRTIMO;
710 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
711 		nmp->nm_acregmax = argp->acregmax;
712 	else
713 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
714 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
715 		nmp->nm_acdirmin = argp->acdirmin;
716 	else
717 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
718 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
719 		nmp->nm_acdirmax = argp->acdirmax;
720 	else
721 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
722 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
723 		nmp->nm_acdirmin = nmp->nm_acdirmax;
724 	if (nmp->nm_acregmin > nmp->nm_acregmax)
725 		nmp->nm_acregmin = nmp->nm_acregmax;
726 
727 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
728 		if (argp->readahead <= NFS_MAXRAHEAD)
729 			nmp->nm_readahead = argp->readahead;
730 		else
731 			nmp->nm_readahead = NFS_MAXRAHEAD;
732 	}
733 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
734 		if (argp->wcommitsize < nmp->nm_wsize)
735 			nmp->nm_wcommitsize = nmp->nm_wsize;
736 		else
737 			nmp->nm_wcommitsize = argp->wcommitsize;
738 	}
739 
740 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
741 		    (nmp->nm_soproto != argp->proto));
742 
743 	if (nmp->nm_client != NULL && adjsock) {
744 		int haslock = 0, error = 0;
745 
746 		if (nmp->nm_sotype == SOCK_STREAM) {
747 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
748 			if (!error)
749 				haslock = 1;
750 		}
751 		if (!error) {
752 		    newnfs_disconnect(nmp, &nmp->nm_sockreq);
753 		    if (haslock)
754 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
755 		    nmp->nm_sotype = argp->sotype;
756 		    nmp->nm_soproto = argp->proto;
757 		    if (nmp->nm_sotype == SOCK_DGRAM)
758 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
759 			    cred, td, 0, false, &nmp->nm_sockreq.nr_client)) {
760 				printf("newnfs_args: retrying connect\n");
761 				(void) nfs_catnap(PSOCK, 0, "nfscon");
762 			}
763 		}
764 	} else {
765 		nmp->nm_sotype = argp->sotype;
766 		nmp->nm_soproto = argp->proto;
767 	}
768 
769 	if (hostname != NULL) {
770 		strlcpy(nmp->nm_hostname, hostname,
771 		    sizeof(nmp->nm_hostname));
772 		p = strchr(nmp->nm_hostname, ':');
773 		if (p != NULL)
774 			*p = '\0';
775 	}
776 }
777 
778 static const char *nfs_opts[] = { "from", "nfs_args",
779     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
780     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
781     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
782     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
783     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
784     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
785     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
786     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
787     "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", "nconnect",
788     "syskrb5", NULL };
789 
790 /*
791  * Parse the "from" mountarg, passed by the generic mount(8) program
792  * or the mountroot code.  This is used when rerooting into NFS.
793  *
794  * Note that the "hostname" is actually a "hostname:/share/path" string.
795  */
796 static int
nfs_mount_parse_from(struct vfsoptlist * opts,char ** hostnamep,struct sockaddr_in ** sinp,char * dirpath,size_t dirpathsize,int * dirlenp)797 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
798     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
799 {
800 	char *nam, *delimp, *hostp, *spec;
801 	int error, have_bracket = 0, offset, rv, speclen;
802 	struct sockaddr_in *sin;
803 	size_t len;
804 
805 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
806 	if (error != 0)
807 		return (error);
808 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
809 
810 	/*
811 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
812 	 */
813 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
814 	    *(delimp + 1) == ':') {
815 		hostp = spec + 1;
816 		spec = delimp + 2;
817 		have_bracket = 1;
818 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
819 		hostp = spec;
820 		spec = delimp + 1;
821 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
822 		printf("%s: path@server syntax is deprecated, "
823 		    "use server:path\n", __func__);
824 		hostp = delimp + 1;
825 	} else {
826 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
827 		free(nam, M_TEMP);
828 		return (EINVAL);
829 	}
830 	*delimp = '\0';
831 
832 	/*
833 	 * If there has been a trailing slash at mounttime it seems
834 	 * that some mountd implementations fail to remove the mount
835 	 * entries from their mountlist while unmounting.
836 	 */
837 	for (speclen = strlen(spec);
838 	    speclen > 1 && spec[speclen - 1] == '/';
839 	    speclen--)
840 		spec[speclen - 1] = '\0';
841 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
842 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
843 		free(nam, M_TEMP);
844 		return (EINVAL);
845 	}
846 	/* Make both '@' and ':' notations equal */
847 	if (*hostp != '\0') {
848 		len = strlen(hostp);
849 		offset = 0;
850 		if (have_bracket)
851 			nam[offset++] = '[';
852 		memmove(nam + offset, hostp, len);
853 		if (have_bracket)
854 			nam[len + offset++] = ']';
855 		nam[len + offset++] = ':';
856 		memmove(nam + len + offset, spec, speclen);
857 		nam[len + speclen + offset] = '\0';
858 	} else
859 		nam[0] = '\0';
860 
861 	/*
862 	 * XXX: IPv6
863 	 */
864 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
865 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
866 	if (rv != 1) {
867 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
868 		    __func__, hostp, rv);
869 		free(nam, M_TEMP);
870 		free(sin, M_SONAME);
871 		return (EINVAL);
872 	}
873 
874 	sin->sin_len = sizeof(*sin);
875 	sin->sin_family = AF_INET;
876 	/*
877 	 * XXX: hardcoded port number.
878 	 */
879 	sin->sin_port = htons(2049);
880 
881 	*hostnamep = strdup(nam, M_NEWNFSMNT);
882 	*sinp = sin;
883 	strlcpy(dirpath, spec, dirpathsize);
884 	*dirlenp = strlen(dirpath);
885 
886 	free(nam, M_TEMP);
887 	return (0);
888 }
889 
890 /*
891  * VFS Operations.
892  *
893  * mount system call
894  * It seems a bit dumb to copyinstr() the host and path here and then
895  * bcopy() them in mountnfs(), but I wanted to detect errors before
896  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
897  * an error after that means that I have to release the mbuf.
898  */
899 /* ARGSUSED */
900 static int
nfs_mount(struct mount * mp)901 nfs_mount(struct mount *mp)
902 {
903 	struct nfs_args args = {
904 	    .version = NFS_ARGSVERSION,
905 	    .addr = NULL,
906 	    .addrlen = sizeof (struct sockaddr_in),
907 	    .sotype = SOCK_STREAM,
908 	    .proto = 0,
909 	    .fh = NULL,
910 	    .fhsize = 0,
911 	    .flags = NFSMNT_RESVPORT,
912 	    .wsize = NFS_WSIZE,
913 	    .rsize = NFS_RSIZE,
914 	    .readdirsize = NFS_READDIRSIZE,
915 	    .timeo = 10,
916 	    .retrans = NFS_RETRANS,
917 	    .readahead = NFS_DEFRAHEAD,
918 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
919 	    .hostname = NULL,
920 	    .acregmin = NFS_MINATTRTIMO,
921 	    .acregmax = NFS_MAXATTRTIMO,
922 	    .acdirmin = NFS_MINDIRATTRTIMO,
923 	    .acdirmax = NFS_MAXDIRATTRTIMO,
924 	};
925 	int error = 0, ret, len;
926 	struct sockaddr *nam = NULL;
927 	struct vnode *vp;
928 	struct thread *td;
929 	char *hst;
930 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
931 	char *cp, *opt, *name, *secname, *tlscertname;
932 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
933 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
934 	int minvers = -1;
935 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
936 	    krbnamelen, srvkrbnamelen;
937 	size_t hstlen;
938 	uint32_t newflag;
939 	int aconn = 0;
940 
941 	has_nfs_args_opt = 0;
942 	has_nfs_from_opt = 0;
943 	newflag = 0;
944 	tlscertname = NULL;
945 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
946 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
947 		error = EINVAL;
948 		goto out;
949 	}
950 
951 	td = curthread;
952 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
953 	    nfs_diskless_valid != 0) {
954 		error = nfs_mountroot(mp);
955 		goto out;
956 	}
957 
958 	nfscl_init();
959 
960 	/*
961 	 * The old mount_nfs program passed the struct nfs_args
962 	 * from userspace to kernel.  The new mount_nfs program
963 	 * passes string options via nmount() from userspace to kernel
964 	 * and we populate the struct nfs_args in the kernel.
965 	 */
966 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
967 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
968 		    sizeof(args));
969 		if (error != 0)
970 			goto out;
971 
972 		if (args.version != NFS_ARGSVERSION) {
973 			error = EPROGMISMATCH;
974 			goto out;
975 		}
976 		has_nfs_args_opt = 1;
977 	}
978 
979 	/* Handle the new style options. */
980 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
981 		args.acdirmin = args.acdirmax =
982 		    args.acregmin = args.acregmax = 0;
983 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
984 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
985 	}
986 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
987 		args.flags |= NFSMNT_NOCONN;
988 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
989 		args.flags &= ~NFSMNT_NOCONN;
990 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
991 		args.flags |= NFSMNT_NOLOCKD;
992 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
993 		args.flags &= ~NFSMNT_NOLOCKD;
994 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
995 		args.flags |= NFSMNT_INT;
996 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
997 		args.flags |= NFSMNT_RDIRPLUS;
998 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
999 		args.flags |= NFSMNT_RESVPORT;
1000 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
1001 		args.flags &= ~NFSMNT_RESVPORT;
1002 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
1003 		args.flags |= NFSMNT_SOFT;
1004 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
1005 		args.flags &= ~NFSMNT_SOFT;
1006 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
1007 		args.sotype = SOCK_DGRAM;
1008 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
1009 		args.sotype = SOCK_DGRAM;
1010 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
1011 		args.sotype = SOCK_STREAM;
1012 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
1013 		args.flags |= NFSMNT_NFSV3;
1014 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
1015 		args.flags |= NFSMNT_NFSV4;
1016 		args.sotype = SOCK_STREAM;
1017 	}
1018 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
1019 		args.flags |= NFSMNT_ALLGSSNAME;
1020 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
1021 		args.flags |= NFSMNT_NOCTO;
1022 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
1023 		args.flags |= NFSMNT_NONCONTIGWR;
1024 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
1025 		args.flags |= NFSMNT_PNFS;
1026 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
1027 		args.flags |= NFSMNT_ONEOPENOWN;
1028 	if (vfs_getopt(mp->mnt_optnew, "tls", NULL, NULL) == 0)
1029 		newflag |= NFSMNT_TLS;
1030 	if (vfs_getopt(mp->mnt_optnew, "tlscertname", (void **)&opt, &len) ==
1031 	    0) {
1032 		/*
1033 		 * tlscertname with "key.pem" appended to it forms a file
1034 		 * name.  As such, the maximum allowable strlen(tlscertname) is
1035 		 * NAME_MAX - 7. However, "len" includes the nul termination
1036 		 * byte so it can be up to NAME_MAX - 6.
1037 		 */
1038 		if (opt == NULL || len <= 1 || len > NAME_MAX - 6) {
1039 			vfs_mount_error(mp, "invalid tlscertname");
1040 			error = EINVAL;
1041 			goto out;
1042 		}
1043 		tlscertname = malloc(len, M_NEWNFSMNT, M_WAITOK);
1044 		strlcpy(tlscertname, opt, len);
1045 	}
1046 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
1047 		if (opt == NULL) {
1048 			vfs_mount_error(mp, "illegal readdirsize");
1049 			error = EINVAL;
1050 			goto out;
1051 		}
1052 		ret = sscanf(opt, "%d", &args.readdirsize);
1053 		if (ret != 1 || args.readdirsize <= 0) {
1054 			vfs_mount_error(mp, "illegal readdirsize: %s",
1055 			    opt);
1056 			error = EINVAL;
1057 			goto out;
1058 		}
1059 		args.flags |= NFSMNT_READDIRSIZE;
1060 	}
1061 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
1062 		if (opt == NULL) {
1063 			vfs_mount_error(mp, "illegal readahead");
1064 			error = EINVAL;
1065 			goto out;
1066 		}
1067 		ret = sscanf(opt, "%d", &args.readahead);
1068 		if (ret != 1 || args.readahead <= 0) {
1069 			vfs_mount_error(mp, "illegal readahead: %s",
1070 			    opt);
1071 			error = EINVAL;
1072 			goto out;
1073 		}
1074 		args.flags |= NFSMNT_READAHEAD;
1075 	}
1076 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1077 		if (opt == NULL) {
1078 			vfs_mount_error(mp, "illegal wsize");
1079 			error = EINVAL;
1080 			goto out;
1081 		}
1082 		ret = sscanf(opt, "%d", &args.wsize);
1083 		if (ret != 1 || args.wsize <= 0) {
1084 			vfs_mount_error(mp, "illegal wsize: %s",
1085 			    opt);
1086 			error = EINVAL;
1087 			goto out;
1088 		}
1089 		args.flags |= NFSMNT_WSIZE;
1090 	}
1091 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1092 		if (opt == NULL) {
1093 			vfs_mount_error(mp, "illegal rsize");
1094 			error = EINVAL;
1095 			goto out;
1096 		}
1097 		ret = sscanf(opt, "%d", &args.rsize);
1098 		if (ret != 1 || args.rsize <= 0) {
1099 			vfs_mount_error(mp, "illegal wsize: %s",
1100 			    opt);
1101 			error = EINVAL;
1102 			goto out;
1103 		}
1104 		args.flags |= NFSMNT_RSIZE;
1105 	}
1106 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1107 		if (opt == NULL) {
1108 			vfs_mount_error(mp, "illegal retrans");
1109 			error = EINVAL;
1110 			goto out;
1111 		}
1112 		ret = sscanf(opt, "%d", &args.retrans);
1113 		if (ret != 1 || args.retrans <= 0) {
1114 			vfs_mount_error(mp, "illegal retrans: %s",
1115 			    opt);
1116 			error = EINVAL;
1117 			goto out;
1118 		}
1119 		args.flags |= NFSMNT_RETRANS;
1120 	}
1121 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1122 		ret = sscanf(opt, "%d", &args.acregmin);
1123 		if (ret != 1 || args.acregmin < 0) {
1124 			vfs_mount_error(mp, "illegal actimeo: %s",
1125 			    opt);
1126 			error = EINVAL;
1127 			goto out;
1128 		}
1129 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1130 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1131 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1132 	}
1133 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1134 		ret = sscanf(opt, "%d", &args.acregmin);
1135 		if (ret != 1 || args.acregmin < 0) {
1136 			vfs_mount_error(mp, "illegal acregmin: %s",
1137 			    opt);
1138 			error = EINVAL;
1139 			goto out;
1140 		}
1141 		args.flags |= NFSMNT_ACREGMIN;
1142 	}
1143 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1144 		ret = sscanf(opt, "%d", &args.acregmax);
1145 		if (ret != 1 || args.acregmax < 0) {
1146 			vfs_mount_error(mp, "illegal acregmax: %s",
1147 			    opt);
1148 			error = EINVAL;
1149 			goto out;
1150 		}
1151 		args.flags |= NFSMNT_ACREGMAX;
1152 	}
1153 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1154 		ret = sscanf(opt, "%d", &args.acdirmin);
1155 		if (ret != 1 || args.acdirmin < 0) {
1156 			vfs_mount_error(mp, "illegal acdirmin: %s",
1157 			    opt);
1158 			error = EINVAL;
1159 			goto out;
1160 		}
1161 		args.flags |= NFSMNT_ACDIRMIN;
1162 	}
1163 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1164 		ret = sscanf(opt, "%d", &args.acdirmax);
1165 		if (ret != 1 || args.acdirmax < 0) {
1166 			vfs_mount_error(mp, "illegal acdirmax: %s",
1167 			    opt);
1168 			error = EINVAL;
1169 			goto out;
1170 		}
1171 		args.flags |= NFSMNT_ACDIRMAX;
1172 	}
1173 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1174 		ret = sscanf(opt, "%d", &args.wcommitsize);
1175 		if (ret != 1 || args.wcommitsize < 0) {
1176 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1177 			error = EINVAL;
1178 			goto out;
1179 		}
1180 		args.flags |= NFSMNT_WCOMMITSIZE;
1181 	}
1182 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1183 		ret = sscanf(opt, "%d", &args.timeo);
1184 		if (ret != 1 || args.timeo <= 0) {
1185 			vfs_mount_error(mp, "illegal timeo: %s",
1186 			    opt);
1187 			error = EINVAL;
1188 			goto out;
1189 		}
1190 		args.flags |= NFSMNT_TIMEO;
1191 	}
1192 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1193 		ret = sscanf(opt, "%d", &args.timeo);
1194 		if (ret != 1 || args.timeo <= 0) {
1195 			vfs_mount_error(mp, "illegal timeout: %s",
1196 			    opt);
1197 			error = EINVAL;
1198 			goto out;
1199 		}
1200 		args.flags |= NFSMNT_TIMEO;
1201 	}
1202 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1203 		ret = sscanf(opt, "%d", &nametimeo);
1204 		if (ret != 1 || nametimeo < 0) {
1205 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1206 			error = EINVAL;
1207 			goto out;
1208 		}
1209 	}
1210 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1211 	    == 0) {
1212 		ret = sscanf(opt, "%d", &negnametimeo);
1213 		if (ret != 1 || negnametimeo < 0) {
1214 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1215 			    opt);
1216 			error = EINVAL;
1217 			goto out;
1218 		}
1219 	}
1220 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1221 	    0) {
1222 		ret = sscanf(opt, "%d", &minvers);
1223 		if (ret != 1 || minvers < 0 || minvers > 2 ||
1224 		    (args.flags & NFSMNT_NFSV4) == 0) {
1225 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1226 			error = EINVAL;
1227 			goto out;
1228 		}
1229 	}
1230 	if (vfs_getopt(mp->mnt_optnew, "nconnect", (void **)&opt, NULL) ==
1231 	    0) {
1232 		ret = sscanf(opt, "%d", &aconn);
1233 		if (ret != 1 || aconn < 1 || aconn > NFS_MAXNCONN) {
1234 			vfs_mount_error(mp, "illegal nconnect: %s", opt);
1235 			error = EINVAL;
1236 			goto out;
1237 		}
1238 		/*
1239 		 * Setting nconnect=1 is a no-op, allowed so that
1240 		 * the option can be used in a Linux compatible way.
1241 		 */
1242 		aconn--;
1243 	}
1244 	if (vfs_getopt(mp->mnt_optnew, "syskrb5", NULL, NULL) == 0)
1245 		newflag |= NFSMNT_SYSKRB5;
1246 	if (vfs_getopt(mp->mnt_optnew, "sec",
1247 		(void **) &secname, NULL) == 0)
1248 		nfs_sec_name(secname, &args.flags);
1249 
1250 	if (mp->mnt_flag & MNT_UPDATE) {
1251 		struct nfsmount *nmp = VFSTONFS(mp);
1252 
1253 		if (nmp == NULL) {
1254 			error = EIO;
1255 			goto out;
1256 		}
1257 
1258 		/*
1259 		 * If a change from TCP->UDP is done and there are thread(s)
1260 		 * that have I/O RPC(s) in progress with a transfer size
1261 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1262 		 * hung, retrying the RPC(s) forever. Usually these threads
1263 		 * will be seen doing an uninterruptible sleep on wait channel
1264 		 * "nfsreq".
1265 		 */
1266 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1267 			tprintf(td->td_proc, LOG_WARNING,
1268 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1269 
1270 		/*
1271 		 * When doing an update, we can't change version,
1272 		 * security, switch lockd strategies, change cookie
1273 		 * translation or switch oneopenown.
1274 		 */
1275 		args.flags = (args.flags &
1276 		    ~(NFSMNT_NFSV3 |
1277 		      NFSMNT_NFSV4 |
1278 		      NFSMNT_KERB |
1279 		      NFSMNT_INTEGRITY |
1280 		      NFSMNT_PRIVACY |
1281 		      NFSMNT_ONEOPENOWN |
1282 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1283 		    (nmp->nm_flag &
1284 			(NFSMNT_NFSV3 |
1285 			 NFSMNT_NFSV4 |
1286 			 NFSMNT_KERB |
1287 			 NFSMNT_INTEGRITY |
1288 			 NFSMNT_PRIVACY |
1289 			 NFSMNT_ONEOPENOWN |
1290 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1291 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1292 		goto out;
1293 	}
1294 
1295 	/*
1296 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1297 	 * or no-connection mode for those protocols that support
1298 	 * no-connection mode (the flag will be cleared later for protocols
1299 	 * that do not support no-connection mode).  This will allow a client
1300 	 * to receive replies from a different IP then the request was
1301 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1302 	 * not 0.
1303 	 */
1304 	if (nfs_ip_paranoia == 0)
1305 		args.flags |= NFSMNT_NOCONN;
1306 
1307 	if (has_nfs_args_opt != 0) {
1308 		/*
1309 		 * In the 'nfs_args' case, the pointers in the args
1310 		 * structure are in userland - we copy them in here.
1311 		 */
1312 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1313 			vfs_mount_error(mp, "Bad file handle");
1314 			error = EINVAL;
1315 			goto out;
1316 		}
1317 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1318 		    args.fhsize);
1319 		if (error != 0)
1320 			goto out;
1321 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1322 		if (error != 0)
1323 			goto out;
1324 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1325 		args.hostname = hst;
1326 		/* getsockaddr() call must be after above copyin() calls */
1327 		error = getsockaddr(&nam, args.addr, args.addrlen);
1328 		if (error != 0)
1329 			goto out;
1330 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1331 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1332 	    sizeof(dirpath), &dirlen) == 0) {
1333 		has_nfs_from_opt = 1;
1334 		bcopy(args.hostname, hst, MNAMELEN);
1335 		hst[MNAMELEN - 1] = '\0';
1336 
1337 		/*
1338 		 * This only works with NFSv4 for now.
1339 		 */
1340 		args.fhsize = 0;
1341 		args.flags |= NFSMNT_NFSV4;
1342 		args.sotype = SOCK_STREAM;
1343 	} else {
1344 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1345 		    &args.fhsize) == 0) {
1346 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1347 				vfs_mount_error(mp, "Bad file handle");
1348 				error = EINVAL;
1349 				goto out;
1350 			}
1351 			bcopy(args.fh, nfh, args.fhsize);
1352 		} else {
1353 			args.fhsize = 0;
1354 		}
1355 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1356 		    (void **)&args.hostname, &len);
1357 		if (args.hostname == NULL) {
1358 			vfs_mount_error(mp, "Invalid hostname");
1359 			error = EINVAL;
1360 			goto out;
1361 		}
1362 		if (len >= MNAMELEN) {
1363 			vfs_mount_error(mp, "Hostname too long");
1364 			error = EINVAL;
1365 			goto out;
1366 		}
1367 		bcopy(args.hostname, hst, len);
1368 		hst[len] = '\0';
1369 	}
1370 
1371 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1372 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1373 	else {
1374 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1375 		cp = strchr(srvkrbname, ':');
1376 		if (cp != NULL)
1377 			*cp = '\0';
1378 	}
1379 	srvkrbnamelen = strlen(srvkrbname);
1380 
1381 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1382 		strlcpy(krbname, name, sizeof (krbname));
1383 	else
1384 		krbname[0] = '\0';
1385 	krbnamelen = strlen(krbname);
1386 
1387 	if (has_nfs_from_opt == 0) {
1388 		if (vfs_getopt(mp->mnt_optnew,
1389 		    "dirpath", (void **)&name, NULL) == 0)
1390 			strlcpy(dirpath, name, sizeof (dirpath));
1391 		else
1392 			dirpath[0] = '\0';
1393 		dirlen = strlen(dirpath);
1394 	}
1395 
1396 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1397 		if (vfs_getopt(mp->mnt_optnew, "addr",
1398 		    (void **)&args.addr, &args.addrlen) == 0) {
1399 			if (args.addrlen > SOCK_MAXADDRLEN) {
1400 				error = ENAMETOOLONG;
1401 				goto out;
1402 			}
1403 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1404 			bcopy(args.addr, nam, args.addrlen);
1405 			nam->sa_len = args.addrlen;
1406 		} else {
1407 			vfs_mount_error(mp, "No server address");
1408 			error = EINVAL;
1409 			goto out;
1410 		}
1411 	}
1412 
1413 	if (aconn > 0 && (args.sotype != SOCK_STREAM ||
1414 	    (args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1415 		/*
1416 		 * RFC 5661 requires that an NFSv4.1/4.2 server
1417 		 * send an RPC reply on the same TCP connection
1418 		 * as the one it received the request on.
1419 		 * This property in required for "nconnect" and
1420 		 * might not be the case for NFSv3 or NFSv4.0 servers.
1421 		 */
1422 		vfs_mount_error(mp, "nconnect should only be used "
1423 		    "for NFSv4.1/4.2 mounts");
1424 		error = EINVAL;
1425 		goto out;
1426 	}
1427 
1428 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1429 	    ((args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1430 		/*
1431 		 * This option requires the use of SP4_NONE, which
1432 		 * is only in NFSv4.1/4.2.
1433 		 */
1434 		vfs_mount_error(mp, "syskrb5 should only be used "
1435 		    "for NFSv4.1/4.2 mounts");
1436 		error = EINVAL;
1437 		goto out;
1438 	}
1439 
1440 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1441 	    (args.flags & NFSMNT_KERB) == 0) {
1442 		/*
1443 		 * This option modifies the behaviour of sec=krb5[ip].
1444 		 */
1445 		vfs_mount_error(mp, "syskrb5 should only be used "
1446 		    "for sec=krb5[ip] mounts");
1447 		error = EINVAL;
1448 		goto out;
1449 	}
1450 
1451 	if ((newflag & NFSMNT_SYSKRB5) != 0 && krbname[0] != '\0') {
1452 		/*
1453 		 * This option is used as an alternative to "gssname".
1454 		 */
1455 		vfs_mount_error(mp, "syskrb5 should not be used "
1456 		    "with the gssname option");
1457 		error = EINVAL;
1458 		goto out;
1459 	}
1460 
1461 	args.fh = nfh;
1462 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1463 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1464 	    nametimeo, negnametimeo, minvers, newflag, tlscertname, aconn);
1465 out:
1466 	if (!error) {
1467 		MNT_ILOCK(mp);
1468 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1469 		    MNTK_USES_BCACHE;
1470 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1471 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1472 		MNT_IUNLOCK(mp);
1473 	}
1474 	free(hst, M_TEMP);
1475 	return (error);
1476 }
1477 
1478 /*
1479  * VFS Operations.
1480  *
1481  * mount system call
1482  * It seems a bit dumb to copyinstr() the host and path here and then
1483  * bcopy() them in mountnfs(), but I wanted to detect errors before
1484  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1485  * an error after that means that I have to release the mbuf.
1486  */
1487 /* ARGSUSED */
1488 static int
nfs_cmount(struct mntarg * ma,void * data,uint64_t flags)1489 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1490 {
1491 	int error;
1492 	struct nfs_args args;
1493 
1494 	error = copyin(data, &args, sizeof (struct nfs_args));
1495 	if (error)
1496 		return error;
1497 
1498 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1499 
1500 	error = kernel_mount(ma, flags);
1501 	return (error);
1502 }
1503 
1504 /*
1505  * Common code for mount and mountroot
1506  */
1507 static int
mountnfs(struct nfs_args * argp,struct mount * mp,struct sockaddr * nam,char * hst,u_char * krbname,int krbnamelen,u_char * dirpath,int dirlen,u_char * srvkrbname,int srvkrbnamelen,struct vnode ** vpp,struct ucred * cred,struct thread * td,int nametimeo,int negnametimeo,int minvers,uint32_t newflag,char * tlscertname,int aconn)1508 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1509     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1510     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1511     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1512     int minvers, uint32_t newflag, char *tlscertname, int aconn)
1513 {
1514 	struct nfsmount *nmp;
1515 	struct nfsnode *np;
1516 	int error, trycnt, ret;
1517 	struct nfsvattr nfsva;
1518 	struct nfsclclient *clp;
1519 	struct nfsclds *dsp, *tdsp;
1520 	uint32_t lease;
1521 	bool tryminvers;
1522 	char *fakefh;
1523 	static u_int64_t clval = 0;
1524 #ifdef KERN_TLS
1525 	u_int maxlen;
1526 #endif
1527 
1528 	NFSCL_DEBUG(3, "in mnt\n");
1529 	CURVNET_SET(CRED_TO_VNET(cred));
1530 	clp = NULL;
1531 	if (mp->mnt_flag & MNT_UPDATE) {
1532 		nmp = VFSTONFS(mp);
1533 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1534 		free(nam, M_SONAME);
1535 		free(tlscertname, M_NEWNFSMNT);
1536 		CURVNET_RESTORE();
1537 		return (0);
1538 	} else {
1539 		/* NFS-over-TLS requires that rpctls be functioning. */
1540 		if ((newflag & NFSMNT_TLS) != 0) {
1541 			error = EINVAL;
1542 #ifdef KERN_TLS
1543 			/* KERN_TLS is only supported for TCP. */
1544 			if (argp->sotype == SOCK_STREAM &&
1545 			    rpctls_getinfo(&maxlen, true, false))
1546 				error = 0;
1547 #endif
1548 			if (error != 0) {
1549 				free(nam, M_SONAME);
1550 				free(tlscertname, M_NEWNFSMNT);
1551 				CURVNET_RESTORE();
1552 				return (error);
1553 			}
1554 		}
1555 		nmp = malloc(sizeof (struct nfsmount) +
1556 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1557 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1558 		nmp->nm_tlscertname = tlscertname;
1559 		nmp->nm_newflag = newflag;
1560 		TAILQ_INIT(&nmp->nm_bufq);
1561 		TAILQ_INIT(&nmp->nm_sess);
1562 		if (clval == 0)
1563 			clval = (u_int64_t)nfsboottime.tv_sec;
1564 		nmp->nm_clval = clval++;
1565 		nmp->nm_krbnamelen = krbnamelen;
1566 		nmp->nm_dirpathlen = dirlen;
1567 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1568 		if (td->td_ucred->cr_uid != (uid_t)0) {
1569 			/*
1570 			 * nm_uid is used to get KerberosV credentials for
1571 			 * the nfsv4 state handling operations if there is
1572 			 * no host based principal set. Use the uid of
1573 			 * this user if not root, since they are doing the
1574 			 * mount. I don't think setting this for root will
1575 			 * work, since root normally does not have user
1576 			 * credentials in a credentials cache.
1577 			 */
1578 			nmp->nm_uid = td->td_ucred->cr_uid;
1579 		} else {
1580 			/*
1581 			 * Just set to -1, so it won't be used.
1582 			 */
1583 			nmp->nm_uid = (uid_t)-1;
1584 		}
1585 
1586 		/* Copy and null terminate all the names */
1587 		if (nmp->nm_krbnamelen > 0) {
1588 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1589 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1590 		}
1591 		if (nmp->nm_dirpathlen > 0) {
1592 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1593 			    nmp->nm_dirpathlen);
1594 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1595 			    + 1] = '\0';
1596 		}
1597 		if (nmp->nm_srvkrbnamelen > 0) {
1598 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1599 			    nmp->nm_srvkrbnamelen);
1600 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1601 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1602 		}
1603 		nmp->nm_sockreq.nr_cred = crhold(cred);
1604 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1605 		mp->mnt_data = nmp;
1606 		nmp->nm_getinfo = nfs_getnlminfo;
1607 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1608 	}
1609 	vfs_getnewfsid(mp);
1610 	nmp->nm_mountp = mp;
1611 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1612 
1613 	/*
1614 	 * Since nfs_decode_args() might optionally set them, these
1615 	 * need to be set to defaults before the call, so that the
1616 	 * optional settings aren't overwritten.
1617 	 */
1618 	nmp->nm_nametimeo = nametimeo;
1619 	nmp->nm_negnametimeo = negnametimeo;
1620 	nmp->nm_timeo = NFS_TIMEO;
1621 	nmp->nm_retry = NFS_RETRANS;
1622 	nmp->nm_readahead = NFS_DEFRAHEAD;
1623 
1624 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1625 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1626 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1627 		nmp->nm_wcommitsize *= 2;
1628 	nmp->nm_wcommitsize *= 256;
1629 
1630 	tryminvers = false;
1631 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1632 		if (minvers < 0) {
1633 			tryminvers = true;
1634 			minvers = NFSV42_MINORVERSION;
1635 		}
1636 		nmp->nm_minorvers = minvers;
1637 	} else
1638 		nmp->nm_minorvers = 0;
1639 
1640 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1641 
1642 	/*
1643 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1644 	 * high, depending on whether we end up with negative offsets in
1645 	 * the client or server somewhere.  2GB-1 may be safer.
1646 	 *
1647 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1648 	 * that we can handle until we find out otherwise.
1649 	 */
1650 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1651 		nmp->nm_maxfilesize = 0xffffffffLL;
1652 	else
1653 		nmp->nm_maxfilesize = OFF_MAX;
1654 
1655 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1656 		nmp->nm_wsize = NFS_WSIZE;
1657 		nmp->nm_rsize = NFS_RSIZE;
1658 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1659 	}
1660 	nmp->nm_numgrps = NFS_MAXGRPS;
1661 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1662 	if (nmp->nm_tprintf_delay < 0)
1663 		nmp->nm_tprintf_delay = 0;
1664 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1665 	if (nmp->nm_tprintf_initial_delay < 0)
1666 		nmp->nm_tprintf_initial_delay = 0;
1667 	nmp->nm_fhsize = argp->fhsize;
1668 	if (nmp->nm_fhsize > 0)
1669 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1670 	strlcpy(mp->mnt_stat.f_mntfromname, hst, MNAMELEN);
1671 	nmp->nm_nam = nam;
1672 	/* Set up the sockets and per-host congestion */
1673 	nmp->nm_sotype = argp->sotype;
1674 	nmp->nm_soproto = argp->proto;
1675 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1676 	if ((argp->flags & NFSMNT_NFSV4))
1677 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1678 	else if ((argp->flags & NFSMNT_NFSV3))
1679 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1680 	else
1681 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1682 
1683 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false,
1684 	    &nmp->nm_sockreq.nr_client)))
1685 		goto bad;
1686 	/* For NFSv4, get the clientid now. */
1687 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1688 		NFSCL_DEBUG(3, "at getcl\n");
1689 		error = nfscl_getcl(mp, cred, td, tryminvers, true, &clp);
1690 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1691 		if (error != 0)
1692 			goto bad;
1693 		if (aconn > 0 && nmp->nm_minorvers == 0) {
1694 			vfs_mount_error(mp, "nconnect should only be used "
1695 			    "for NFSv4.1/4.2 mounts");
1696 			error = EINVAL;
1697 			goto bad;
1698 		}
1699 		if (NFSHASSYSKRB5(nmp) && nmp->nm_minorvers == 0) {
1700 			vfs_mount_error(mp, "syskrb5 should only be used "
1701 			    "for NFSv4.1/4.2 mounts");
1702 			error = EINVAL;
1703 			goto bad;
1704 		}
1705 	}
1706 
1707 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1708 	    nmp->nm_dirpathlen > 0) {
1709 		NFSCL_DEBUG(3, "in dirp\n");
1710 		/*
1711 		 * If the fhsize on the mount point == 0 for V4, the mount
1712 		 * path needs to be looked up.
1713 		 */
1714 		trycnt = 3;
1715 		do {
1716 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1717 			    cred, td);
1718 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1719 			if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1720 			    error != NFSERR_WRONGSEC))
1721 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1722 		} while (error != 0 && --trycnt > 0 &&
1723 		    (!NFSHASSYSKRB5(nmp) || error != NFSERR_WRONGSEC));
1724 		if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1725 		    error != NFSERR_WRONGSEC))
1726 			goto bad;
1727 	}
1728 
1729 	/*
1730 	 * A reference count is needed on the nfsnode representing the
1731 	 * remote root.  If this object is not persistent, then backward
1732 	 * traversals of the mount point (i.e. "..") will not work if
1733 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1734 	 * this problem, because one can identify root inodes by their
1735 	 * number == UFS_ROOTINO (2).
1736 	 * For the "syskrb5" mount, the file handle might not have
1737 	 * been acquired.  As such, use a "fake" file handle which
1738 	 * can never be returned by a server for the root vnode.
1739 	 */
1740 	if (nmp->nm_fhsize > 0 || NFSHASSYSKRB5(nmp)) {
1741 		/*
1742 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1743 		 * non-zero for the root vnode. f_iosize will be set correctly
1744 		 * by nfs_statfs() before any I/O occurs.
1745 		 */
1746 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1747 		if (nmp->nm_fhsize == 0) {
1748 			fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK |
1749 			    M_ZERO);
1750 			error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np,
1751 			    LK_EXCLUSIVE);
1752 			free(fakefh, M_TEMP);
1753 			nmp->nm_privflag |= NFSMNTP_FAKEROOTFH;
1754 		} else
1755 			error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1756 			    LK_EXCLUSIVE);
1757 		if (error)
1758 			goto bad;
1759 		*vpp = NFSTOV(np);
1760 
1761 		/*
1762 		 * Get file attributes and transfer parameters for the
1763 		 * mountpoint.  This has the side effect of filling in
1764 		 * (*vpp)->v_type with the correct value.
1765 		 */
1766 		ret = ENXIO;
1767 		if (nmp->nm_fhsize > 0)
1768 			ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh,
1769 			    nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease);
1770 		if (ret) {
1771 			/*
1772 			 * Just set default values to get things going.
1773 			 */
1774 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1775 			nfsva.na_vattr.va_type = VDIR;
1776 			nfsva.na_vattr.va_mode = 0777;
1777 			nfsva.na_vattr.va_nlink = 100;
1778 			nfsva.na_vattr.va_uid = (uid_t)0;
1779 			nfsva.na_vattr.va_gid = (gid_t)0;
1780 			nfsva.na_vattr.va_fileid = 2;
1781 			nfsva.na_vattr.va_gen = 1;
1782 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1783 			nfsva.na_vattr.va_size = 512 * 1024;
1784 			lease = 20;
1785 		}
1786 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, 0, 1);
1787 		if ((argp->flags & NFSMNT_NFSV4) != 0) {
1788 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1789 			NFSLOCKCLSTATE();
1790 			clp->nfsc_renew = NFSCL_RENEW(lease);
1791 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1792 			clp->nfsc_clientidrev++;
1793 			if (clp->nfsc_clientidrev == 0)
1794 				clp->nfsc_clientidrev++;
1795 			NFSUNLOCKCLSTATE();
1796 			/*
1797 			 * Mount will succeed, so the renew thread can be
1798 			 * started now.
1799 			 */
1800 			nfscl_start_renewthread(clp);
1801 			nfscl_clientrelease(clp);
1802 		}
1803 		if (argp->flags & NFSMNT_NFSV3)
1804 			ncl_fsinfo(nmp, *vpp, cred, td);
1805 
1806 		/*
1807 		 * Mark if the mount point supports NFSv4 ACLs and
1808 		 * named attributes.
1809 		 */
1810 		if ((argp->flags & NFSMNT_NFSV4) != 0) {
1811 			MNT_ILOCK(mp);
1812 			if (ret == 0 && nfsrv_useacl != 0 &&
1813 			    NFSISSET_ATTRBIT(&nfsva.na_suppattr,
1814 			    NFSATTRBIT_ACL))
1815 				mp->mnt_flag |= MNT_NFS4ACLS;
1816 			if (nmp->nm_minorvers > 0)
1817 				mp->mnt_flag |= MNT_NAMEDATTR;
1818 			MNT_IUNLOCK(mp);
1819 		}
1820 
1821 		/* Can now allow additional connections. */
1822 		if (aconn > 0)
1823 			nmp->nm_aconnect = aconn;
1824 
1825 		/*
1826 		 * Lose the lock but keep the ref.
1827 		 */
1828 		NFSVOPUNLOCK(*vpp);
1829 		vfs_cache_root_set(mp, *vpp);
1830 		CURVNET_RESTORE();
1831 		return (0);
1832 	}
1833 	error = EIO;
1834 
1835 bad:
1836 	if (clp != NULL)
1837 		nfscl_clientrelease(clp);
1838 	newnfs_disconnect(NULL, &nmp->nm_sockreq);
1839 	crfree(nmp->nm_sockreq.nr_cred);
1840 	if (nmp->nm_sockreq.nr_auth != NULL)
1841 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1842 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1843 	mtx_destroy(&nmp->nm_mtx);
1844 	if (nmp->nm_clp != NULL) {
1845 		NFSLOCKCLSTATE();
1846 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1847 		NFSUNLOCKCLSTATE();
1848 		free(nmp->nm_clp, M_NFSCLCLIENT);
1849 	}
1850 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1851 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1852 		    dsp->nfsclds_sockp != NULL)
1853 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1854 		nfscl_freenfsclds(dsp);
1855 	}
1856 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1857 	free(nmp, M_NEWNFSMNT);
1858 	free(nam, M_SONAME);
1859 	CURVNET_RESTORE();
1860 	return (error);
1861 }
1862 
1863 /*
1864  * unmount system call
1865  */
1866 static int
nfs_unmount(struct mount * mp,int mntflags)1867 nfs_unmount(struct mount *mp, int mntflags)
1868 {
1869 	struct thread *td;
1870 	struct nfsmount *nmp;
1871 	int error, flags = 0, i, trycnt = 0;
1872 	struct nfsclds *dsp, *tdsp;
1873 	struct nfscldeleg *dp, *ndp;
1874 	struct nfscldeleghead dh;
1875 
1876 	td = curthread;
1877 	TAILQ_INIT(&dh);
1878 
1879 	if (mntflags & MNT_FORCE)
1880 		flags |= FORCECLOSE;
1881 	nmp = VFSTONFS(mp);
1882 	error = 0;
1883 	/*
1884 	 * Goes something like this..
1885 	 * - Call vflush() to clear out vnodes for this filesystem
1886 	 * - Close the socket
1887 	 * - Free up the data structures
1888 	 */
1889 	/* In the forced case, cancel any outstanding requests. */
1890 	if (mntflags & MNT_FORCE) {
1891 		NFSDDSLOCK();
1892 		if (nfsv4_findmirror(nmp) != NULL)
1893 			error = ENXIO;
1894 		NFSDDSUNLOCK();
1895 		if (error)
1896 			goto out;
1897 		error = newnfs_nmcancelreqs(nmp);
1898 		if (error)
1899 			goto out;
1900 		/* For a forced close, get rid of the renew thread now */
1901 		nfscl_umount(nmp, td, &dh);
1902 	}
1903 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1904 	do {
1905 		error = vflush(mp, 1, flags, td);
1906 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1907 			(void) nfs_catnap(PSOCK, error, "newndm");
1908 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1909 	if (error)
1910 		goto out;
1911 
1912 	/*
1913 	 * We are now committed to the unmount.
1914 	 */
1915 	if ((mntflags & MNT_FORCE) == 0)
1916 		nfscl_umount(nmp, td, NULL);
1917 	else {
1918 		mtx_lock(&nmp->nm_mtx);
1919 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1920 		mtx_unlock(&nmp->nm_mtx);
1921 	}
1922 	/* Make sure no nfsiods are assigned to this mount. */
1923 	NFSLOCKIOD();
1924 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1925 		if (ncl_iodmount[i] == nmp) {
1926 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1927 			ncl_iodmount[i] = NULL;
1928 		}
1929 	NFSUNLOCKIOD();
1930 
1931 	/*
1932 	 * We can now set mnt_data to NULL and wait for
1933 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1934 	 */
1935 	mtx_lock(&mountlist_mtx);
1936 	mtx_lock(&nmp->nm_mtx);
1937 	mp->mnt_data = NULL;
1938 	mtx_unlock(&mountlist_mtx);
1939 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1940 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1941 	mtx_unlock(&nmp->nm_mtx);
1942 
1943 	newnfs_disconnect(nmp, &nmp->nm_sockreq);
1944 	crfree(nmp->nm_sockreq.nr_cred);
1945 	free(nmp->nm_nam, M_SONAME);
1946 	if (nmp->nm_sockreq.nr_auth != NULL)
1947 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1948 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1949 	mtx_destroy(&nmp->nm_mtx);
1950 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1951 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1952 		    dsp->nfsclds_sockp != NULL)
1953 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1954 		nfscl_freenfsclds(dsp);
1955 	}
1956 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1957 	free(nmp, M_NEWNFSMNT);
1958 
1959 	/* Free up the delegation structures for forced dismounts. */
1960 	TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
1961 		TAILQ_REMOVE(&dh, dp, nfsdl_list);
1962 		free(dp, M_NFSCLDELEG);
1963 	}
1964 out:
1965 	return (error);
1966 }
1967 
1968 /*
1969  * Return root of a filesystem
1970  */
1971 static int
nfs_root(struct mount * mp,int flags,struct vnode ** vpp)1972 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1973 {
1974 	struct vnode *vp;
1975 	struct nfsmount *nmp;
1976 	struct nfsnode *np;
1977 	int error;
1978 	char *fakefh;
1979 
1980 	nmp = VFSTONFS(mp);
1981 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
1982 		/* Attempt to get the actual root file handle. */
1983 		if (nmp->nm_fhsize == 0)
1984 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1985 			    curthread->td_ucred, curthread);
1986 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
1987 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, flags);
1988 		free(fakefh, M_TEMP);
1989 	} else {
1990 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1991 	}
1992 	if (error)
1993 		return error;
1994 	vp = NFSTOV(np);
1995 	/*
1996 	 * Get transfer parameters and attributes for root vnode once.
1997 	 */
1998 	mtx_lock(&nmp->nm_mtx);
1999 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
2000 		mtx_unlock(&nmp->nm_mtx);
2001 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
2002 	} else
2003 		mtx_unlock(&nmp->nm_mtx);
2004 	if (vp->v_type == VNON)
2005 	    vp->v_type = VDIR;
2006 	vp->v_vflag |= VV_ROOT;
2007 	*vpp = vp;
2008 	return (0);
2009 }
2010 
2011 /*
2012  * Flush out the buffer cache
2013  */
2014 /* ARGSUSED */
2015 static int
nfs_sync(struct mount * mp,int waitfor)2016 nfs_sync(struct mount *mp, int waitfor)
2017 {
2018 	struct vnode *vp, *mvp;
2019 	struct thread *td;
2020 	int error, allerror = 0;
2021 
2022 	td = curthread;
2023 
2024 	MNT_ILOCK(mp);
2025 	/*
2026 	 * If a forced dismount is in progress, return from here so that
2027 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
2028 	 * calling VFS_UNMOUNT().
2029 	 */
2030 	if (NFSCL_FORCEDISM(mp)) {
2031 		MNT_IUNLOCK(mp);
2032 		return (EBADF);
2033 	}
2034 	MNT_IUNLOCK(mp);
2035 
2036 	if (waitfor == MNT_LAZY)
2037 		return (0);
2038 
2039 	/*
2040 	 * Force stale buffer cache information to be flushed.
2041 	 */
2042 loop:
2043 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
2044 		/* XXX Racy bv_cnt check. */
2045 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0) {
2046 			VI_UNLOCK(vp);
2047 			continue;
2048 		}
2049 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
2050 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
2051 			goto loop;
2052 		}
2053 		error = VOP_FSYNC(vp, waitfor, td);
2054 		if (error)
2055 			allerror = error;
2056 		NFSVOPUNLOCK(vp);
2057 		vrele(vp);
2058 	}
2059 	return (allerror);
2060 }
2061 
2062 static int
nfs_sysctl(struct mount * mp,fsctlop_t op,struct sysctl_req * req)2063 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
2064 {
2065 	struct nfsmount *nmp = VFSTONFS(mp);
2066 	struct vfsquery vq;
2067 	int error;
2068 
2069 	bzero(&vq, sizeof(vq));
2070 	switch (op) {
2071 #if 0
2072 	case VFS_CTL_NOLOCKS:
2073 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
2074  		if (req->oldptr != NULL) {
2075  			error = SYSCTL_OUT(req, &val, sizeof(val));
2076  			if (error)
2077  				return (error);
2078  		}
2079  		if (req->newptr != NULL) {
2080  			error = SYSCTL_IN(req, &val, sizeof(val));
2081  			if (error)
2082  				return (error);
2083 			if (val)
2084 				nmp->nm_flag |= NFSMNT_NOLOCKS;
2085 			else
2086 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
2087  		}
2088 		break;
2089 #endif
2090 	case VFS_CTL_QUERY:
2091 		mtx_lock(&nmp->nm_mtx);
2092 		if (nmp->nm_state & NFSSTA_TIMEO)
2093 			vq.vq_flags |= VQ_NOTRESP;
2094 		mtx_unlock(&nmp->nm_mtx);
2095 #if 0
2096 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
2097 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
2098 			vq.vq_flags |= VQ_NOTRESPLOCK;
2099 #endif
2100 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
2101 		break;
2102  	case VFS_CTL_TIMEO:
2103  		if (req->oldptr != NULL) {
2104  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
2105  			    sizeof(nmp->nm_tprintf_initial_delay));
2106  			if (error)
2107  				return (error);
2108  		}
2109  		if (req->newptr != NULL) {
2110 			error = vfs_suser(mp, req->td);
2111 			if (error)
2112 				return (error);
2113  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
2114  			    sizeof(nmp->nm_tprintf_initial_delay));
2115  			if (error)
2116  				return (error);
2117  			if (nmp->nm_tprintf_initial_delay < 0)
2118  				nmp->nm_tprintf_initial_delay = 0;
2119  		}
2120 		break;
2121 	default:
2122 		return (ENOTSUP);
2123 	}
2124 	return (0);
2125 }
2126 
2127 /*
2128  * Purge any RPCs in progress, so that they will all return errors.
2129  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
2130  * forced dismount.
2131  */
2132 static void
nfs_purge(struct mount * mp)2133 nfs_purge(struct mount *mp)
2134 {
2135 	struct nfsmount *nmp = VFSTONFS(mp);
2136 
2137 	newnfs_nmcancelreqs(nmp);
2138 }
2139 
2140 /*
2141  * Extract the information needed by the nlm from the nfs vnode.
2142  */
2143 static void
nfs_getnlminfo(struct vnode * vp,uint8_t * fhp,size_t * fhlenp,struct sockaddr_storage * sp,int * is_v3p,off_t * sizep,struct timeval * timeop)2144 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
2145     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
2146     struct timeval *timeop)
2147 {
2148 	struct nfsmount *nmp;
2149 	struct nfsnode *np = VTONFS(vp);
2150 
2151 	nmp = VFSTONFS(vp->v_mount);
2152 	if (fhlenp != NULL)
2153 		*fhlenp = (size_t)np->n_fhp->nfh_len;
2154 	if (fhp != NULL)
2155 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
2156 	if (sp != NULL)
2157 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
2158 	if (is_v3p != NULL)
2159 		*is_v3p = NFS_ISV3(vp);
2160 	if (sizep != NULL)
2161 		*sizep = np->n_size;
2162 	if (timeop != NULL) {
2163 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
2164 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
2165 	}
2166 }
2167 
2168 /*
2169  * This function prints out an option name, based on the conditional
2170  * argument.
2171  */
nfscl_printopt(struct nfsmount * nmp,int testval,char * opt,char ** buf,size_t * blen)2172 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
2173     char *opt, char **buf, size_t *blen)
2174 {
2175 	int len;
2176 
2177 	if (testval != 0 && *blen > strlen(opt)) {
2178 		len = snprintf(*buf, *blen, "%s", opt);
2179 		if (len != strlen(opt))
2180 			printf("EEK!!\n");
2181 		*buf += len;
2182 		*blen -= len;
2183 	}
2184 }
2185 
2186 /*
2187  * This function printf out an options integer value.
2188  */
nfscl_printoptval(struct nfsmount * nmp,int optval,char * opt,char ** buf,size_t * blen)2189 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
2190     char *opt, char **buf, size_t *blen)
2191 {
2192 	int len;
2193 
2194 	if (*blen > strlen(opt) + 1) {
2195 		/* Could result in truncated output string. */
2196 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
2197 		if (len < *blen) {
2198 			*buf += len;
2199 			*blen -= len;
2200 		}
2201 	}
2202 }
2203 
2204 /*
2205  * Load the option flags and values into the buffer.
2206  */
nfscl_retopts(struct nfsmount * nmp,char * buffer,size_t buflen)2207 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
2208 {
2209 	char *buf;
2210 	size_t blen;
2211 
2212 	buf = buffer;
2213 	blen = buflen;
2214 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
2215 	    &blen);
2216 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
2217 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
2218 		    &blen);
2219 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
2220 		    &buf, &blen);
2221 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
2222 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
2223 	}
2224 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2225 	    &blen);
2226 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2227 	    "nfsv2", &buf, &blen);
2228 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2229 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2230 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2231 	    &buf, &blen);
2232 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_TLS) != 0, ",tls", &buf,
2233 	    &blen);
2234 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_SYSKRB5) != 0,
2235 	    ",syskrb5", &buf, &blen);
2236 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2237 	    &buf, &blen);
2238 	nfscl_printoptval(nmp, nmp->nm_aconnect + 1, ",nconnect", &buf, &blen);
2239 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2240 	    &blen);
2241 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2242 	    &blen);
2243 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2244 	    &blen);
2245 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2246 	    &blen);
2247 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2248 	    &blen);
2249 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2250 	    ",noncontigwr", &buf, &blen);
2251 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2252 	    0, ",lockd", &buf, &blen);
2253 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOLOCKD) != 0, ",nolockd",
2254 	    &buf, &blen);
2255 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2256 	    &buf, &blen);
2257 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2258 	    &buf, &blen);
2259 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2260 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2261 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2262 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2263 	    &buf, &blen);
2264 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2265 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2266 	    &buf, &blen);
2267 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2268 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2269 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2270 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2271 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2272 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2273 	    &blen);
2274 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2275 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2276 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2277 	    &blen);
2278 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2279 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2280 	    &blen);
2281 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2282 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2283 }
2284