xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 8d5a88ac95b23b0a8c4943be0aef1f93e3902bfb)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 #include "opt_bootp.h"
39 #include "opt_nfsroot.h"
40 #include "opt_kern_tls.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <net/route/route_ctl.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 #include <rpc/rpcsec_tls.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 extern struct mtx nfsrv_dslock_mtx;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int, uint32_t, char *, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		vfs_cache_root,
140 	.vfs_cachedroot =	nfs_root,
141 	.vfs_statfs =		nfs_statfs,
142 	.vfs_sync =		nfs_sync,
143 	.vfs_uninit =		ncl_uninit,
144 	.vfs_unmount =		nfs_unmount,
145 	.vfs_sysctl =		nfs_sysctl,
146 	.vfs_purge =		nfs_purge,
147 };
148 /*
149  * This macro declares that the file system type is named "nfs".
150  * It also declares a module name of "nfs" and uses vfs_modevent()
151  * as the event handling function.
152  * The main module declaration is found in sys/fs/nfsclient/nfs_clport.c
153  * for "nfscl" and is needed so that a custom event handling
154  * function gets called.  MODULE_DEPEND() macros are found there.
155  */
156 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
157 
158 MODULE_VERSION(nfs, 1);
159 
160 /*
161  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
162  * can be shared by both NFS clients. It is declared here so that it
163  * will be defined for kernels built without NFS_ROOT, although it
164  * isn't used in that case.
165  */
166 #if !defined(NFS_ROOT)
167 struct nfs_diskless	nfs_diskless = { { { 0 } } };
168 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
169 int			nfs_diskless_valid = 0;
170 #endif
171 
172 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
173     &nfs_diskless_valid, 0,
174     "Has the diskless struct been filled correctly");
175 
176 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
177     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
178 
179 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
180     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
181     "%Ssockaddr_in", "Diskless root nfs address");
182 
183 void		newnfsargs_ntoh(struct nfs_args *);
184 static int	nfs_mountdiskless(char *,
185 		    struct sockaddr_in *, struct nfs_args *,
186 		    struct thread *, struct vnode **, struct mount *);
187 static void	nfs_convert_diskless(void);
188 static void	nfs_convert_oargs(struct nfs_args *args,
189 		    struct onfs_args *oargs);
190 
191 int
newnfs_iosize(struct nfsmount * nmp)192 newnfs_iosize(struct nfsmount *nmp)
193 {
194 	int iosize, maxio;
195 
196 	/* First, set the upper limit for iosize */
197 	if (nmp->nm_flag & NFSMNT_NFSV4) {
198 		maxio = NFS_MAXBSIZE;
199 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
200 		if (nmp->nm_sotype == SOCK_DGRAM)
201 			maxio = NFS_MAXDGRAMDATA;
202 		else
203 			maxio = NFS_MAXBSIZE;
204 	} else {
205 		maxio = NFS_V2MAXDATA;
206 	}
207 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
208 		nmp->nm_rsize = maxio;
209 	if (nmp->nm_rsize > NFS_MAXBSIZE)
210 		nmp->nm_rsize = NFS_MAXBSIZE;
211 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
212 		nmp->nm_readdirsize = maxio;
213 	if (nmp->nm_readdirsize > nmp->nm_rsize)
214 		nmp->nm_readdirsize = nmp->nm_rsize;
215 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
216 		nmp->nm_wsize = maxio;
217 	if (nmp->nm_wsize > NFS_MAXBSIZE)
218 		nmp->nm_wsize = NFS_MAXBSIZE;
219 
220 	/*
221 	 * Calculate the size used for io buffers.  Use the larger
222 	 * of the two sizes to minimise nfs requests but make sure
223 	 * that it is at least one VM page to avoid wasting buffer
224 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
225 	 * that is the buffer size used for directories.
226 	 */
227 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
228 	iosize = imax(iosize, PAGE_SIZE);
229 	iosize = imax(iosize, NFS_DIRBLKSIZ);
230 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
231 	return (iosize);
232 }
233 
234 static void
nfs_convert_oargs(struct nfs_args * args,struct onfs_args * oargs)235 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
236 {
237 
238 	args->version = NFS_ARGSVERSION;
239 	args->addr = oargs->addr;
240 	args->addrlen = oargs->addrlen;
241 	args->sotype = oargs->sotype;
242 	args->proto = oargs->proto;
243 	args->fh = oargs->fh;
244 	args->fhsize = oargs->fhsize;
245 	args->flags = oargs->flags;
246 	args->wsize = oargs->wsize;
247 	args->rsize = oargs->rsize;
248 	args->readdirsize = oargs->readdirsize;
249 	args->timeo = oargs->timeo;
250 	args->retrans = oargs->retrans;
251 	args->readahead = oargs->readahead;
252 	args->hostname = oargs->hostname;
253 }
254 
255 static void
nfs_convert_diskless(void)256 nfs_convert_diskless(void)
257 {
258 
259 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
260 		sizeof(struct ifaliasreq));
261 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
262 		sizeof(struct sockaddr_in));
263 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
264 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
265 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
266 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
267 	} else {
268 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
269 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
270 	}
271 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
272 		sizeof(struct sockaddr_in));
273 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
274 	nfsv3_diskless.root_time = nfs_diskless.root_time;
275 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
276 		MAXHOSTNAMELEN);
277 	nfs_diskless_valid = 3;
278 }
279 
280 /*
281  * nfs statfs call
282  */
283 static int
nfs_statfs(struct mount * mp,struct statfs * sbp)284 nfs_statfs(struct mount *mp, struct statfs *sbp)
285 {
286 	struct vnode *vp;
287 	struct thread *td;
288 	struct nfsmount *nmp = VFSTONFS(mp);
289 	struct nfsvattr nfsva;
290 	struct nfsfsinfo fs;
291 	struct nfsstatfs sb;
292 	int error = 0, attrflag, gotfsinfo = 0, ret;
293 	struct nfsnode *np;
294 	char *fakefh;
295 	uint32_t clone_blksize;
296 
297 	td = curthread;
298 	clone_blksize = 0;
299 
300 	error = vfs_busy(mp, MBF_NOWAIT);
301 	if (error)
302 		return (error);
303 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
304 		if (nmp->nm_fhsize == 0) {
305 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
306 			    td->td_ucred, td);
307 			if (error != 0) {
308 				/*
309 				 * We cannot do anything yet.  Hopefully what
310 				 * is in mnt_stat is sufficient.
311 				 */
312 				if (sbp != &mp->mnt_stat)
313 					*sbp = mp->mnt_stat;
314 				strncpy(&sbp->f_fstypename[0],
315 				    mp->mnt_vfc->vfc_name, MFSNAMELEN);
316 				vfs_unbusy(mp);
317 				return (0);
318 			}
319 		}
320 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
321 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, LK_EXCLUSIVE);
322 		free(fakefh, M_TEMP);
323 	} else {
324 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
325 		    LK_EXCLUSIVE);
326 	}
327 	if (error) {
328 		vfs_unbusy(mp);
329 		return (error);
330 	}
331 	vp = NFSTOV(np);
332 	mtx_lock(&nmp->nm_mtx);
333 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
334 		mtx_unlock(&nmp->nm_mtx);
335 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
336 		    &attrflag);
337 		if (!error)
338 			gotfsinfo = 1;
339 	} else
340 		mtx_unlock(&nmp->nm_mtx);
341 	if (!error)
342 		error = nfsrpc_statfs(vp, &sb, &fs, NULL, &clone_blksize,
343 		    td->td_ucred, td, &nfsva, &attrflag);
344 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
345 	    error == NFSERR_WRONGSEC) {
346 		/* Cannot get new stats, so return what is in mnt_stat. */
347 		if (sbp != &mp->mnt_stat)
348 			*sbp = mp->mnt_stat;
349 		strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name,
350 		    MFSNAMELEN);
351 		vput(vp);
352 		vfs_unbusy(mp);
353 		return (0);
354 	}
355 	if (error != 0)
356 		NFSCL_DEBUG(2, "statfs=%d\n", error);
357 	if (attrflag == 0) {
358 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
359 		    td->td_ucred, td, &nfsva, NULL, NULL);
360 		if (ret) {
361 			/*
362 			 * Just set default values to get things going.
363 			 */
364 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
365 			nfsva.na_vattr.va_type = VDIR;
366 			nfsva.na_vattr.va_mode = 0777;
367 			nfsva.na_vattr.va_nlink = 100;
368 			nfsva.na_vattr.va_uid = (uid_t)0;
369 			nfsva.na_vattr.va_gid = (gid_t)0;
370 			nfsva.na_vattr.va_fileid = 2;
371 			nfsva.na_vattr.va_gen = 1;
372 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
373 			nfsva.na_vattr.va_size = 512 * 1024;
374 		}
375 	}
376 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
377 	if (!error) {
378 	    mtx_lock(&nmp->nm_mtx);
379 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
380 		nfscl_loadfsinfo(nmp, &fs, clone_blksize);
381 	    nfscl_loadsbinfo(nmp, &sb, sbp);
382 	    sbp->f_iosize = newnfs_iosize(nmp);
383 	    mtx_unlock(&nmp->nm_mtx);
384 	    if (sbp != &mp->mnt_stat) {
385 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
386 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
387 	    }
388 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
389 	} else if (NFS_ISV4(vp)) {
390 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
391 	}
392 	vput(vp);
393 	vfs_unbusy(mp);
394 	return (error);
395 }
396 
397 /*
398  * nfs version 3 fsinfo rpc call
399  */
400 int
ncl_fsinfo(struct nfsmount * nmp,struct vnode * vp,struct ucred * cred,struct thread * td)401 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
402     struct thread *td)
403 {
404 	struct nfsfsinfo fs;
405 	struct nfsvattr nfsva;
406 	int error, attrflag;
407 
408 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag);
409 	if (!error) {
410 		if (attrflag)
411 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
412 		mtx_lock(&nmp->nm_mtx);
413 		nfscl_loadfsinfo(nmp, &fs, 0);
414 		mtx_unlock(&nmp->nm_mtx);
415 	}
416 	return (error);
417 }
418 
419 /*
420  * Mount a remote root fs via nfs. This depends on the info in the
421  * nfs_diskless structure that has been filled in properly by some primary
422  * bootstrap.
423  * It goes something like this:
424  * - do enough of "ifconfig" by calling ifioctl() so that the system
425  *   can talk to the server
426  * - If nfs_diskless.mygateway is filled in, use that address as
427  *   a default gateway.
428  * - build the rootfs mount point and call mountnfs() to do the rest.
429  *
430  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
431  * structure, as well as other global NFS client variables here, as
432  * nfs_mountroot() will be called once in the boot before any other NFS
433  * client activity occurs.
434  */
435 static int
nfs_mountroot(struct mount * mp)436 nfs_mountroot(struct mount *mp)
437 {
438 	struct thread *td = curthread;
439 	struct nfsv3_diskless *nd = &nfsv3_diskless;
440 	struct socket *so;
441 	struct vnode *vp;
442 	struct ifreq ir;
443 	int error;
444 	u_long l;
445 	char buf[128];
446 	char *cp;
447 
448 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
449 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
450 #elif defined(NFS_ROOT)
451 	nfs_setup_diskless();
452 #endif
453 
454 	if (nfs_diskless_valid == 0)
455 		return (-1);
456 	if (nfs_diskless_valid == 1)
457 		nfs_convert_diskless();
458 
459 	/*
460 	 * Do enough of ifconfig(8) so that the critical net interface can
461 	 * talk to the server.
462 	 */
463 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
464 	    td->td_ucred, td);
465 	if (error)
466 		panic("nfs_mountroot: socreate(%04x): %d",
467 			nd->myif.ifra_addr.sa_family, error);
468 
469 #if 0 /* XXX Bad idea */
470 	/*
471 	 * We might not have been told the right interface, so we pass
472 	 * over the first ten interfaces of the same kind, until we get
473 	 * one of them configured.
474 	 */
475 
476 	for (i = strlen(nd->myif.ifra_name) - 1;
477 		nd->myif.ifra_name[i] >= '0' &&
478 		nd->myif.ifra_name[i] <= '9';
479 		nd->myif.ifra_name[i] ++) {
480 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
481 		if(!error)
482 			break;
483 	}
484 #endif
485 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
486 	if (error)
487 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
488 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
489 		ir.ifr_mtu = strtol(cp, NULL, 10);
490 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
491 		freeenv(cp);
492 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
493 		if (error)
494 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
495 	}
496 	soclose(so);
497 
498 	/*
499 	 * If the gateway field is filled in, set it as the default route.
500 	 * Note that pxeboot will set a default route of 0 if the route
501 	 * is not set by the DHCP server.  Check also for a value of 0
502 	 * to avoid panicking inappropriately in that situation.
503 	 */
504 	if (nd->mygateway.sin_len != 0 &&
505 	    nd->mygateway.sin_addr.s_addr != 0) {
506 		struct sockaddr_in mask, sin;
507 		struct epoch_tracker et;
508 		struct rt_addrinfo info;
509 		struct rib_cmd_info rc;
510 
511 		bzero((caddr_t)&mask, sizeof(mask));
512 		sin = mask;
513 		sin.sin_family = AF_INET;
514 		sin.sin_len = sizeof(sin);
515                 /* XXX MRT use table 0 for this sort of thing */
516 		NET_EPOCH_ENTER(et);
517 		CURVNET_SET(TD_TO_VNET(td));
518 
519 		bzero((caddr_t)&info, sizeof(info));
520 		info.rti_flags = RTF_UP | RTF_GATEWAY;
521 		info.rti_info[RTAX_DST] = (struct sockaddr *)&sin;
522 		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&nd->mygateway;
523 		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&mask;
524 
525 		error = rib_action(RT_DEFAULT_FIB, RTM_ADD, &info, &rc);
526 		CURVNET_RESTORE();
527 		NET_EPOCH_EXIT(et);
528 		if (error)
529 			panic("nfs_mountroot: RTM_ADD: %d", error);
530 	}
531 
532 	/*
533 	 * Create the rootfs mount point.
534 	 */
535 	nd->root_args.fh = nd->root_fh;
536 	nd->root_args.fhsize = nd->root_fhsize;
537 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
538 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
539 		(l >> 24) & 0xff, (l >> 16) & 0xff,
540 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
541 	printf("NFS ROOT: %s\n", buf);
542 	nd->root_args.hostname = buf;
543 	if ((error = nfs_mountdiskless(buf,
544 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
545 		return (error);
546 	}
547 
548 	/*
549 	 * This is not really an nfs issue, but it is much easier to
550 	 * set hostname here and then let the "/etc/rc.xxx" files
551 	 * mount the right /var based upon its preset value.
552 	 */
553 	mtx_lock(&prison0.pr_mtx);
554 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
555 	    sizeof(prison0.pr_hostname));
556 	mtx_unlock(&prison0.pr_mtx);
557 	inittodr(ntohl(nd->root_time));
558 	return (0);
559 }
560 
561 /*
562  * Internal version of mount system call for diskless setup.
563  */
564 static int
nfs_mountdiskless(char * path,struct sockaddr_in * sin,struct nfs_args * args,struct thread * td,struct vnode ** vpp,struct mount * mp)565 nfs_mountdiskless(char *path,
566     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
567     struct vnode **vpp, struct mount *mp)
568 {
569 	struct sockaddr *nam;
570 	int dirlen, error;
571 	char *dirpath;
572 
573 	/*
574 	 * Find the directory path in "path", which also has the server's
575 	 * name/ip address in it.
576 	 */
577 	dirpath = strchr(path, ':');
578 	if (dirpath != NULL)
579 		dirlen = strlen(++dirpath);
580 	else
581 		dirlen = 0;
582 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
583 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
584 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
585 	    NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL, 0)) != 0) {
586 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
587 		return (error);
588 	}
589 	return (0);
590 }
591 
592 static void
nfs_sec_name(char * sec,int * flagsp)593 nfs_sec_name(char *sec, int *flagsp)
594 {
595 	if (!strcmp(sec, "krb5"))
596 		*flagsp |= NFSMNT_KERB;
597 	else if (!strcmp(sec, "krb5i"))
598 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
599 	else if (!strcmp(sec, "krb5p"))
600 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
601 }
602 
603 static void
nfs_decode_args(struct mount * mp,struct nfsmount * nmp,struct nfs_args * argp,const char * hostname,struct ucred * cred,struct thread * td)604 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
605     const char *hostname, struct ucred *cred, struct thread *td)
606 {
607 	int adjsock;
608 	char *p;
609 
610 	/*
611 	 * Set read-only flag if requested; otherwise, clear it if this is
612 	 * an update.  If this is not an update, then either the read-only
613 	 * flag is already clear, or this is a root mount and it was set
614 	 * intentionally at some previous point.
615 	 */
616 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
617 		MNT_ILOCK(mp);
618 		mp->mnt_flag |= MNT_RDONLY;
619 		MNT_IUNLOCK(mp);
620 	} else if (mp->mnt_flag & MNT_UPDATE) {
621 		MNT_ILOCK(mp);
622 		mp->mnt_flag &= ~MNT_RDONLY;
623 		MNT_IUNLOCK(mp);
624 	}
625 
626 	/*
627 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
628 	 * no sense in that context.  Also, set up appropriate retransmit
629 	 * and soft timeout behavior.
630 	 */
631 	if (argp->sotype == SOCK_STREAM) {
632 		nmp->nm_flag &= ~NFSMNT_NOCONN;
633 		nmp->nm_timeo = NFS_MAXTIMEO;
634 		if ((argp->flags & NFSMNT_NFSV4) != 0)
635 			nmp->nm_retry = INT_MAX;
636 		else
637 			nmp->nm_retry = NFS_RETRANS_TCP;
638 	}
639 
640 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
641 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
642 		argp->flags &= ~NFSMNT_RDIRPLUS;
643 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
644 	}
645 
646 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
647 	if (nmp->nm_minorvers == 0) {
648 		argp->flags &= ~NFSMNT_ONEOPENOWN;
649 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
650 	}
651 
652 	/* Re-bind if rsrvd port requested and wasn't on one */
653 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
654 		  && (argp->flags & NFSMNT_RESVPORT);
655 	/* Also re-bind if we're switching to/from a connected UDP socket */
656 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
657 		    (argp->flags & NFSMNT_NOCONN));
658 
659 	/* Update flags atomically.  Don't change the lock bits. */
660 	nmp->nm_flag = argp->flags | nmp->nm_flag;
661 
662 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
663 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
664 		if (nmp->nm_timeo < NFS_MINTIMEO)
665 			nmp->nm_timeo = NFS_MINTIMEO;
666 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
667 			nmp->nm_timeo = NFS_MAXTIMEO;
668 	}
669 
670 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
671 		nmp->nm_retry = argp->retrans;
672 		if (nmp->nm_retry > NFS_MAXREXMIT)
673 			nmp->nm_retry = NFS_MAXREXMIT;
674 	}
675 
676 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
677 		nmp->nm_wsize = argp->wsize;
678 		/*
679 		 * Clip at the power of 2 below the size. There is an
680 		 * issue (not isolated) that causes intermittent page
681 		 * faults if this is not done.
682 		 */
683 		if (nmp->nm_wsize > NFS_FABLKSIZE)
684 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
685 		else
686 			nmp->nm_wsize = NFS_FABLKSIZE;
687 	}
688 
689 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
690 		nmp->nm_rsize = argp->rsize;
691 		/*
692 		 * Clip at the power of 2 below the size. There is an
693 		 * issue (not isolated) that causes intermittent page
694 		 * faults if this is not done.
695 		 */
696 		if (nmp->nm_rsize > NFS_FABLKSIZE)
697 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
698 		else
699 			nmp->nm_rsize = NFS_FABLKSIZE;
700 	}
701 
702 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
703 		nmp->nm_readdirsize = argp->readdirsize;
704 	}
705 
706 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
707 		nmp->nm_acregmin = argp->acregmin;
708 	else
709 		nmp->nm_acregmin = NFS_MINATTRTIMO;
710 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
711 		nmp->nm_acregmax = argp->acregmax;
712 	else
713 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
714 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
715 		nmp->nm_acdirmin = argp->acdirmin;
716 	else
717 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
718 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
719 		nmp->nm_acdirmax = argp->acdirmax;
720 	else
721 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
722 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
723 		nmp->nm_acdirmin = nmp->nm_acdirmax;
724 	if (nmp->nm_acregmin > nmp->nm_acregmax)
725 		nmp->nm_acregmin = nmp->nm_acregmax;
726 
727 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
728 		if (argp->readahead <= NFS_MAXRAHEAD)
729 			nmp->nm_readahead = argp->readahead;
730 		else
731 			nmp->nm_readahead = NFS_MAXRAHEAD;
732 	}
733 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
734 		if (argp->wcommitsize < nmp->nm_wsize)
735 			nmp->nm_wcommitsize = nmp->nm_wsize;
736 		else
737 			nmp->nm_wcommitsize = argp->wcommitsize;
738 	}
739 
740 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
741 		    (nmp->nm_soproto != argp->proto));
742 
743 	if (nmp->nm_client != NULL && adjsock) {
744 		int haslock = 0, error = 0;
745 
746 		if (nmp->nm_sotype == SOCK_STREAM) {
747 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
748 			if (!error)
749 				haslock = 1;
750 		}
751 		if (!error) {
752 		    newnfs_disconnect(nmp, &nmp->nm_sockreq);
753 		    if (haslock)
754 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
755 		    nmp->nm_sotype = argp->sotype;
756 		    nmp->nm_soproto = argp->proto;
757 		    if (nmp->nm_sotype == SOCK_DGRAM)
758 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
759 			    cred, td, 0, false, &nmp->nm_sockreq.nr_client)) {
760 				printf("newnfs_args: retrying connect\n");
761 				(void) nfs_catnap(PSOCK, 0, "nfscon");
762 			}
763 		}
764 	} else {
765 		nmp->nm_sotype = argp->sotype;
766 		nmp->nm_soproto = argp->proto;
767 	}
768 
769 	if (hostname != NULL) {
770 		strlcpy(nmp->nm_hostname, hostname,
771 		    sizeof(nmp->nm_hostname));
772 		p = strchr(nmp->nm_hostname, ':');
773 		if (p != NULL)
774 			*p = '\0';
775 	}
776 }
777 
778 static const char *nfs_opts[] = { "from", "nfs_args",
779     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
780     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
781     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
782     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
783     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
784     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
785     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
786     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
787     "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", "nconnect",
788     "syskrb5", NULL };
789 
790 /*
791  * Parse the "from" mountarg, passed by the generic mount(8) program
792  * or the mountroot code.  This is used when rerooting into NFS.
793  *
794  * Note that the "hostname" is actually a "hostname:/share/path" string.
795  */
796 static int
nfs_mount_parse_from(struct vfsoptlist * opts,char ** hostnamep,struct sockaddr_in ** sinp,char * dirpath,size_t dirpathsize,int * dirlenp)797 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
798     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
799 {
800 	char *nam, *delimp, *hostp, *spec;
801 	int error, have_bracket = 0, offset, rv, speclen;
802 	struct sockaddr_in *sin;
803 	size_t len;
804 
805 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
806 	if (error != 0)
807 		return (error);
808 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
809 
810 	/*
811 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
812 	 */
813 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
814 	    *(delimp + 1) == ':') {
815 		hostp = spec + 1;
816 		spec = delimp + 2;
817 		have_bracket = 1;
818 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
819 		hostp = spec;
820 		spec = delimp + 1;
821 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
822 		printf("%s: path@server syntax is deprecated, "
823 		    "use server:path\n", __func__);
824 		hostp = delimp + 1;
825 	} else {
826 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
827 		free(nam, M_TEMP);
828 		return (EINVAL);
829 	}
830 	*delimp = '\0';
831 
832 	/*
833 	 * If there has been a trailing slash at mounttime it seems
834 	 * that some mountd implementations fail to remove the mount
835 	 * entries from their mountlist while unmounting.
836 	 */
837 	for (speclen = strlen(spec);
838 	    speclen > 1 && spec[speclen - 1] == '/';
839 	    speclen--)
840 		spec[speclen - 1] = '\0';
841 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
842 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
843 		free(nam, M_TEMP);
844 		return (EINVAL);
845 	}
846 	/* Make both '@' and ':' notations equal */
847 	if (*hostp != '\0') {
848 		len = strlen(hostp);
849 		offset = 0;
850 		if (have_bracket)
851 			nam[offset++] = '[';
852 		memmove(nam + offset, hostp, len);
853 		if (have_bracket)
854 			nam[len + offset++] = ']';
855 		nam[len + offset++] = ':';
856 		memmove(nam + len + offset, spec, speclen);
857 		nam[len + speclen + offset] = '\0';
858 	} else
859 		nam[0] = '\0';
860 
861 	/*
862 	 * XXX: IPv6
863 	 */
864 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
865 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
866 	if (rv != 1) {
867 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
868 		    __func__, hostp, rv);
869 		free(nam, M_TEMP);
870 		free(sin, M_SONAME);
871 		return (EINVAL);
872 	}
873 
874 	sin->sin_len = sizeof(*sin);
875 	sin->sin_family = AF_INET;
876 	/*
877 	 * XXX: hardcoded port number.
878 	 */
879 	sin->sin_port = htons(2049);
880 
881 	*hostnamep = strdup(nam, M_NEWNFSMNT);
882 	*sinp = sin;
883 	strlcpy(dirpath, spec, dirpathsize);
884 	*dirlenp = strlen(dirpath);
885 
886 	free(nam, M_TEMP);
887 	return (0);
888 }
889 
890 /*
891  * VFS Operations.
892  *
893  * mount system call
894  * It seems a bit dumb to copyinstr() the host and path here and then
895  * bcopy() them in mountnfs(), but I wanted to detect errors before
896  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
897  * an error after that means that I have to release the mbuf.
898  */
899 /* ARGSUSED */
900 static int
nfs_mount(struct mount * mp)901 nfs_mount(struct mount *mp)
902 {
903 	struct nfs_args args = {
904 	    .version = NFS_ARGSVERSION,
905 	    .addr = NULL,
906 	    .addrlen = sizeof (struct sockaddr_in),
907 	    .sotype = SOCK_STREAM,
908 	    .proto = 0,
909 	    .fh = NULL,
910 	    .fhsize = 0,
911 	    .flags = NFSMNT_RESVPORT,
912 	    .wsize = NFS_WSIZE,
913 	    .rsize = NFS_RSIZE,
914 	    .readdirsize = NFS_READDIRSIZE,
915 	    .timeo = 10,
916 	    .retrans = NFS_RETRANS,
917 	    .readahead = NFS_DEFRAHEAD,
918 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
919 	    .hostname = NULL,
920 	    .acregmin = NFS_MINATTRTIMO,
921 	    .acregmax = NFS_MAXATTRTIMO,
922 	    .acdirmin = NFS_MINDIRATTRTIMO,
923 	    .acdirmax = NFS_MAXDIRATTRTIMO,
924 	};
925 	int error = 0, ret, len;
926 	struct sockaddr *nam = NULL;
927 	struct vnode *vp;
928 	struct thread *td;
929 	char *hst;
930 	u_char nfh[NFSX_FHMAX], krbname[100], *dirpath, srvkrbname[100];
931 	char *cp, *opt, *name, *secname, *tlscertname;
932 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
933 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
934 	int minvers = -1;
935 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
936 	    krbnamelen, srvkrbnamelen;
937 	size_t hstlen;
938 	uint32_t newflag;
939 	int aconn = 0;
940 
941 	has_nfs_args_opt = 0;
942 	has_nfs_from_opt = 0;
943 	newflag = 0;
944 	tlscertname = NULL;
945 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
946 	dirpath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
947 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
948 		error = EINVAL;
949 		goto out;
950 	}
951 
952 	td = curthread;
953 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
954 	    nfs_diskless_valid != 0) {
955 		error = nfs_mountroot(mp);
956 		goto out;
957 	}
958 
959 	nfscl_init();
960 
961 	/*
962 	 * The old mount_nfs program passed the struct nfs_args
963 	 * from userspace to kernel.  The new mount_nfs program
964 	 * passes string options via nmount() from userspace to kernel
965 	 * and we populate the struct nfs_args in the kernel.
966 	 */
967 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
968 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
969 		    sizeof(args));
970 		if (error != 0)
971 			goto out;
972 
973 		if (args.version != NFS_ARGSVERSION) {
974 			error = EPROGMISMATCH;
975 			goto out;
976 		}
977 		has_nfs_args_opt = 1;
978 	}
979 
980 	/* Handle the new style options. */
981 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
982 		args.acdirmin = args.acdirmax =
983 		    args.acregmin = args.acregmax = 0;
984 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
985 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
986 	}
987 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
988 		args.flags |= NFSMNT_NOCONN;
989 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
990 		args.flags &= ~NFSMNT_NOCONN;
991 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
992 		args.flags |= NFSMNT_NOLOCKD;
993 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
994 		args.flags &= ~NFSMNT_NOLOCKD;
995 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
996 		args.flags |= NFSMNT_INT;
997 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
998 		args.flags |= NFSMNT_RDIRPLUS;
999 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
1000 		args.flags |= NFSMNT_RESVPORT;
1001 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
1002 		args.flags &= ~NFSMNT_RESVPORT;
1003 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
1004 		args.flags |= NFSMNT_SOFT;
1005 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
1006 		args.flags &= ~NFSMNT_SOFT;
1007 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
1008 		args.sotype = SOCK_DGRAM;
1009 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
1010 		args.sotype = SOCK_DGRAM;
1011 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
1012 		args.sotype = SOCK_STREAM;
1013 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
1014 		args.flags |= NFSMNT_NFSV3;
1015 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
1016 		args.flags |= NFSMNT_NFSV4;
1017 		args.sotype = SOCK_STREAM;
1018 	}
1019 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
1020 		args.flags |= NFSMNT_ALLGSSNAME;
1021 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
1022 		args.flags |= NFSMNT_NOCTO;
1023 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
1024 		args.flags |= NFSMNT_NONCONTIGWR;
1025 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
1026 		args.flags |= NFSMNT_PNFS;
1027 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
1028 		args.flags |= NFSMNT_ONEOPENOWN;
1029 	if (vfs_getopt(mp->mnt_optnew, "tls", NULL, NULL) == 0)
1030 		newflag |= NFSMNT_TLS;
1031 	if (vfs_getopt(mp->mnt_optnew, "tlscertname", (void **)&opt, &len) ==
1032 	    0) {
1033 		/*
1034 		 * tlscertname with "key.pem" appended to it forms a file
1035 		 * name.  As such, the maximum allowable strlen(tlscertname) is
1036 		 * NAME_MAX - 7. However, "len" includes the nul termination
1037 		 * byte so it can be up to NAME_MAX - 6.
1038 		 */
1039 		if (opt == NULL || len <= 1 || len > NAME_MAX - 6) {
1040 			vfs_mount_error(mp, "invalid tlscertname");
1041 			error = EINVAL;
1042 			goto out;
1043 		}
1044 		tlscertname = malloc(len, M_NEWNFSMNT, M_WAITOK);
1045 		strlcpy(tlscertname, opt, len);
1046 	}
1047 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
1048 		if (opt == NULL) {
1049 			vfs_mount_error(mp, "illegal readdirsize");
1050 			error = EINVAL;
1051 			goto out;
1052 		}
1053 		ret = sscanf(opt, "%d", &args.readdirsize);
1054 		if (ret != 1 || args.readdirsize <= 0) {
1055 			vfs_mount_error(mp, "illegal readdirsize: %s",
1056 			    opt);
1057 			error = EINVAL;
1058 			goto out;
1059 		}
1060 		args.flags |= NFSMNT_READDIRSIZE;
1061 	}
1062 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
1063 		if (opt == NULL) {
1064 			vfs_mount_error(mp, "illegal readahead");
1065 			error = EINVAL;
1066 			goto out;
1067 		}
1068 		ret = sscanf(opt, "%d", &args.readahead);
1069 		if (ret != 1 || args.readahead <= 0) {
1070 			vfs_mount_error(mp, "illegal readahead: %s",
1071 			    opt);
1072 			error = EINVAL;
1073 			goto out;
1074 		}
1075 		args.flags |= NFSMNT_READAHEAD;
1076 	}
1077 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1078 		if (opt == NULL) {
1079 			vfs_mount_error(mp, "illegal wsize");
1080 			error = EINVAL;
1081 			goto out;
1082 		}
1083 		ret = sscanf(opt, "%d", &args.wsize);
1084 		if (ret != 1 || args.wsize <= 0) {
1085 			vfs_mount_error(mp, "illegal wsize: %s",
1086 			    opt);
1087 			error = EINVAL;
1088 			goto out;
1089 		}
1090 		args.flags |= NFSMNT_WSIZE;
1091 	}
1092 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1093 		if (opt == NULL) {
1094 			vfs_mount_error(mp, "illegal rsize");
1095 			error = EINVAL;
1096 			goto out;
1097 		}
1098 		ret = sscanf(opt, "%d", &args.rsize);
1099 		if (ret != 1 || args.rsize <= 0) {
1100 			vfs_mount_error(mp, "illegal wsize: %s",
1101 			    opt);
1102 			error = EINVAL;
1103 			goto out;
1104 		}
1105 		args.flags |= NFSMNT_RSIZE;
1106 	}
1107 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1108 		if (opt == NULL) {
1109 			vfs_mount_error(mp, "illegal retrans");
1110 			error = EINVAL;
1111 			goto out;
1112 		}
1113 		ret = sscanf(opt, "%d", &args.retrans);
1114 		if (ret != 1 || args.retrans <= 0) {
1115 			vfs_mount_error(mp, "illegal retrans: %s",
1116 			    opt);
1117 			error = EINVAL;
1118 			goto out;
1119 		}
1120 		args.flags |= NFSMNT_RETRANS;
1121 	}
1122 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1123 		ret = sscanf(opt, "%d", &args.acregmin);
1124 		if (ret != 1 || args.acregmin < 0) {
1125 			vfs_mount_error(mp, "illegal actimeo: %s",
1126 			    opt);
1127 			error = EINVAL;
1128 			goto out;
1129 		}
1130 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1131 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1132 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1133 	}
1134 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1135 		ret = sscanf(opt, "%d", &args.acregmin);
1136 		if (ret != 1 || args.acregmin < 0) {
1137 			vfs_mount_error(mp, "illegal acregmin: %s",
1138 			    opt);
1139 			error = EINVAL;
1140 			goto out;
1141 		}
1142 		args.flags |= NFSMNT_ACREGMIN;
1143 	}
1144 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1145 		ret = sscanf(opt, "%d", &args.acregmax);
1146 		if (ret != 1 || args.acregmax < 0) {
1147 			vfs_mount_error(mp, "illegal acregmax: %s",
1148 			    opt);
1149 			error = EINVAL;
1150 			goto out;
1151 		}
1152 		args.flags |= NFSMNT_ACREGMAX;
1153 	}
1154 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1155 		ret = sscanf(opt, "%d", &args.acdirmin);
1156 		if (ret != 1 || args.acdirmin < 0) {
1157 			vfs_mount_error(mp, "illegal acdirmin: %s",
1158 			    opt);
1159 			error = EINVAL;
1160 			goto out;
1161 		}
1162 		args.flags |= NFSMNT_ACDIRMIN;
1163 	}
1164 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1165 		ret = sscanf(opt, "%d", &args.acdirmax);
1166 		if (ret != 1 || args.acdirmax < 0) {
1167 			vfs_mount_error(mp, "illegal acdirmax: %s",
1168 			    opt);
1169 			error = EINVAL;
1170 			goto out;
1171 		}
1172 		args.flags |= NFSMNT_ACDIRMAX;
1173 	}
1174 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1175 		ret = sscanf(opt, "%d", &args.wcommitsize);
1176 		if (ret != 1 || args.wcommitsize < 0) {
1177 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1178 			error = EINVAL;
1179 			goto out;
1180 		}
1181 		args.flags |= NFSMNT_WCOMMITSIZE;
1182 	}
1183 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1184 		ret = sscanf(opt, "%d", &args.timeo);
1185 		if (ret != 1 || args.timeo <= 0) {
1186 			vfs_mount_error(mp, "illegal timeo: %s",
1187 			    opt);
1188 			error = EINVAL;
1189 			goto out;
1190 		}
1191 		args.flags |= NFSMNT_TIMEO;
1192 	}
1193 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1194 		ret = sscanf(opt, "%d", &args.timeo);
1195 		if (ret != 1 || args.timeo <= 0) {
1196 			vfs_mount_error(mp, "illegal timeout: %s",
1197 			    opt);
1198 			error = EINVAL;
1199 			goto out;
1200 		}
1201 		args.flags |= NFSMNT_TIMEO;
1202 	}
1203 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1204 		ret = sscanf(opt, "%d", &nametimeo);
1205 		if (ret != 1 || nametimeo < 0) {
1206 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1207 			error = EINVAL;
1208 			goto out;
1209 		}
1210 	}
1211 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1212 	    == 0) {
1213 		ret = sscanf(opt, "%d", &negnametimeo);
1214 		if (ret != 1 || negnametimeo < 0) {
1215 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1216 			    opt);
1217 			error = EINVAL;
1218 			goto out;
1219 		}
1220 	}
1221 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1222 	    0) {
1223 		ret = sscanf(opt, "%d", &minvers);
1224 		if (ret != 1 || minvers < 0 || minvers > 2 ||
1225 		    (args.flags & NFSMNT_NFSV4) == 0) {
1226 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1227 			error = EINVAL;
1228 			goto out;
1229 		}
1230 	}
1231 	if (vfs_getopt(mp->mnt_optnew, "nconnect", (void **)&opt, NULL) ==
1232 	    0) {
1233 		ret = sscanf(opt, "%d", &aconn);
1234 		if (ret != 1 || aconn < 1 || aconn > NFS_MAXNCONN) {
1235 			vfs_mount_error(mp, "illegal nconnect: %s", opt);
1236 			error = EINVAL;
1237 			goto out;
1238 		}
1239 		/*
1240 		 * Setting nconnect=1 is a no-op, allowed so that
1241 		 * the option can be used in a Linux compatible way.
1242 		 */
1243 		aconn--;
1244 	}
1245 	if (vfs_getopt(mp->mnt_optnew, "syskrb5", NULL, NULL) == 0)
1246 		newflag |= NFSMNT_SYSKRB5;
1247 	if (vfs_getopt(mp->mnt_optnew, "sec",
1248 		(void **) &secname, NULL) == 0)
1249 		nfs_sec_name(secname, &args.flags);
1250 
1251 	if (mp->mnt_flag & MNT_UPDATE) {
1252 		struct nfsmount *nmp = VFSTONFS(mp);
1253 
1254 		if (nmp == NULL) {
1255 			error = EIO;
1256 			goto out;
1257 		}
1258 
1259 		/*
1260 		 * If a change from TCP->UDP is done and there are thread(s)
1261 		 * that have I/O RPC(s) in progress with a transfer size
1262 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1263 		 * hung, retrying the RPC(s) forever. Usually these threads
1264 		 * will be seen doing an uninterruptible sleep on wait channel
1265 		 * "nfsreq".
1266 		 */
1267 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1268 			tprintf(td->td_proc, LOG_WARNING,
1269 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1270 
1271 		/*
1272 		 * When doing an update, we can't change version,
1273 		 * security, switch lockd strategies, change cookie
1274 		 * translation or switch oneopenown.
1275 		 */
1276 		args.flags = (args.flags &
1277 		    ~(NFSMNT_NFSV3 |
1278 		      NFSMNT_NFSV4 |
1279 		      NFSMNT_KERB |
1280 		      NFSMNT_INTEGRITY |
1281 		      NFSMNT_PRIVACY |
1282 		      NFSMNT_ONEOPENOWN |
1283 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1284 		    (nmp->nm_flag &
1285 			(NFSMNT_NFSV3 |
1286 			 NFSMNT_NFSV4 |
1287 			 NFSMNT_KERB |
1288 			 NFSMNT_INTEGRITY |
1289 			 NFSMNT_PRIVACY |
1290 			 NFSMNT_ONEOPENOWN |
1291 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1292 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1293 		goto out;
1294 	}
1295 
1296 	/*
1297 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1298 	 * or no-connection mode for those protocols that support
1299 	 * no-connection mode (the flag will be cleared later for protocols
1300 	 * that do not support no-connection mode).  This will allow a client
1301 	 * to receive replies from a different IP then the request was
1302 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1303 	 * not 0.
1304 	 */
1305 	if (nfs_ip_paranoia == 0)
1306 		args.flags |= NFSMNT_NOCONN;
1307 
1308 	if (has_nfs_args_opt != 0) {
1309 		/*
1310 		 * In the 'nfs_args' case, the pointers in the args
1311 		 * structure are in userland - we copy them in here.
1312 		 */
1313 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1314 			vfs_mount_error(mp, "Bad file handle");
1315 			error = EINVAL;
1316 			goto out;
1317 		}
1318 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1319 		    args.fhsize);
1320 		if (error != 0)
1321 			goto out;
1322 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1323 		if (error != 0)
1324 			goto out;
1325 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1326 		args.hostname = hst;
1327 		/* getsockaddr() call must be after above copyin() calls */
1328 		error = getsockaddr(&nam, args.addr, args.addrlen);
1329 		if (error != 0)
1330 			goto out;
1331 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1332 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1333 	    MNAMELEN, &dirlen) == 0) {
1334 		has_nfs_from_opt = 1;
1335 		bcopy(args.hostname, hst, MNAMELEN);
1336 		hst[MNAMELEN - 1] = '\0';
1337 
1338 		/*
1339 		 * This only works with NFSv4 for now.
1340 		 */
1341 		args.fhsize = 0;
1342 		args.flags |= NFSMNT_NFSV4;
1343 		args.sotype = SOCK_STREAM;
1344 	} else {
1345 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1346 		    &args.fhsize) == 0) {
1347 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1348 				vfs_mount_error(mp, "Bad file handle");
1349 				error = EINVAL;
1350 				goto out;
1351 			}
1352 			bcopy(args.fh, nfh, args.fhsize);
1353 		} else {
1354 			args.fhsize = 0;
1355 		}
1356 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1357 		    (void **)&args.hostname, &len);
1358 		if (args.hostname == NULL) {
1359 			vfs_mount_error(mp, "Invalid hostname");
1360 			error = EINVAL;
1361 			goto out;
1362 		}
1363 		if (len >= MNAMELEN) {
1364 			vfs_mount_error(mp, "Hostname too long");
1365 			error = EINVAL;
1366 			goto out;
1367 		}
1368 		bcopy(args.hostname, hst, len);
1369 		hst[len] = '\0';
1370 	}
1371 
1372 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1373 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1374 	else {
1375 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1376 		cp = strchr(srvkrbname, ':');
1377 		if (cp != NULL)
1378 			*cp = '\0';
1379 	}
1380 	srvkrbnamelen = strlen(srvkrbname);
1381 
1382 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1383 		strlcpy(krbname, name, sizeof (krbname));
1384 	else
1385 		krbname[0] = '\0';
1386 	krbnamelen = strlen(krbname);
1387 
1388 	if (has_nfs_from_opt == 0) {
1389 		if (vfs_getopt(mp->mnt_optnew,
1390 		    "dirpath", (void **)&name, NULL) == 0)
1391 			strlcpy(dirpath, name, MNAMELEN);
1392 		else
1393 			dirpath[0] = '\0';
1394 		dirlen = strlen(dirpath);
1395 	}
1396 
1397 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1398 		if (vfs_getopt(mp->mnt_optnew, "addr",
1399 		    (void **)&args.addr, &args.addrlen) == 0) {
1400 			if (args.addrlen > SOCK_MAXADDRLEN) {
1401 				error = ENAMETOOLONG;
1402 				goto out;
1403 			}
1404 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1405 			bcopy(args.addr, nam, args.addrlen);
1406 			nam->sa_len = args.addrlen;
1407 		} else {
1408 			vfs_mount_error(mp, "No server address");
1409 			error = EINVAL;
1410 			goto out;
1411 		}
1412 	}
1413 
1414 	if (aconn > 0 && (args.sotype != SOCK_STREAM ||
1415 	    (args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1416 		/*
1417 		 * RFC 5661 requires that an NFSv4.1/4.2 server
1418 		 * send an RPC reply on the same TCP connection
1419 		 * as the one it received the request on.
1420 		 * This property in required for "nconnect" and
1421 		 * might not be the case for NFSv3 or NFSv4.0 servers.
1422 		 */
1423 		vfs_mount_error(mp, "nconnect should only be used "
1424 		    "for NFSv4.1/4.2 mounts");
1425 		error = EINVAL;
1426 		goto out;
1427 	}
1428 
1429 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1430 	    ((args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1431 		/*
1432 		 * This option requires the use of SP4_NONE, which
1433 		 * is only in NFSv4.1/4.2.
1434 		 */
1435 		vfs_mount_error(mp, "syskrb5 should only be used "
1436 		    "for NFSv4.1/4.2 mounts");
1437 		error = EINVAL;
1438 		goto out;
1439 	}
1440 
1441 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1442 	    (args.flags & NFSMNT_KERB) == 0) {
1443 		/*
1444 		 * This option modifies the behaviour of sec=krb5[ip].
1445 		 */
1446 		vfs_mount_error(mp, "syskrb5 should only be used "
1447 		    "for sec=krb5[ip] mounts");
1448 		error = EINVAL;
1449 		goto out;
1450 	}
1451 
1452 	if ((newflag & NFSMNT_SYSKRB5) != 0 && krbname[0] != '\0') {
1453 		/*
1454 		 * This option is used as an alternative to "gssname".
1455 		 */
1456 		vfs_mount_error(mp, "syskrb5 should not be used "
1457 		    "with the gssname option");
1458 		error = EINVAL;
1459 		goto out;
1460 	}
1461 
1462 	args.fh = nfh;
1463 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1464 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1465 	    nametimeo, negnametimeo, minvers, newflag, tlscertname, aconn);
1466 out:
1467 	if (!error) {
1468 		MNT_ILOCK(mp);
1469 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1470 		    MNTK_USES_BCACHE;
1471 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1472 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1473 		MNT_IUNLOCK(mp);
1474 	}
1475 	free(hst, M_TEMP);
1476 	free(dirpath, M_TEMP);
1477 	return (error);
1478 }
1479 
1480 /*
1481  * VFS Operations.
1482  *
1483  * mount system call
1484  * It seems a bit dumb to copyinstr() the host and path here and then
1485  * bcopy() them in mountnfs(), but I wanted to detect errors before
1486  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1487  * an error after that means that I have to release the mbuf.
1488  */
1489 /* ARGSUSED */
1490 static int
nfs_cmount(struct mntarg * ma,void * data,uint64_t flags)1491 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1492 {
1493 	int error;
1494 	struct nfs_args args;
1495 
1496 	error = copyin(data, &args, sizeof (struct nfs_args));
1497 	if (error)
1498 		return error;
1499 
1500 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1501 
1502 	error = kernel_mount(ma, flags);
1503 	return (error);
1504 }
1505 
1506 /*
1507  * Common code for mount and mountroot
1508  */
1509 static int
mountnfs(struct nfs_args * argp,struct mount * mp,struct sockaddr * nam,char * hst,u_char * krbname,int krbnamelen,u_char * dirpath,int dirlen,u_char * srvkrbname,int srvkrbnamelen,struct vnode ** vpp,struct ucred * cred,struct thread * td,int nametimeo,int negnametimeo,int minvers,uint32_t newflag,char * tlscertname,int aconn)1510 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1511     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1512     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1513     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1514     int minvers, uint32_t newflag, char *tlscertname, int aconn)
1515 {
1516 	struct nfsmount *nmp;
1517 	struct nfsnode *np;
1518 	int error, trycnt, ret;
1519 	struct nfsvattr nfsva;
1520 	struct nfsclclient *clp;
1521 	struct nfsclds *dsp, *tdsp;
1522 	uint32_t lease;
1523 	bool tryminvers;
1524 	char *fakefh;
1525 	static u_int64_t clval = 0;
1526 #ifdef KERN_TLS
1527 	u_int maxlen;
1528 #endif
1529 
1530 	NFSCL_DEBUG(3, "in mnt\n");
1531 	CURVNET_SET(CRED_TO_VNET(cred));
1532 	clp = NULL;
1533 	if (mp->mnt_flag & MNT_UPDATE) {
1534 		nmp = VFSTONFS(mp);
1535 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1536 		free(nam, M_SONAME);
1537 		free(tlscertname, M_NEWNFSMNT);
1538 		CURVNET_RESTORE();
1539 		return (0);
1540 	} else {
1541 		/* NFS-over-TLS requires that rpctls be functioning. */
1542 		if ((newflag & NFSMNT_TLS) != 0) {
1543 			error = EINVAL;
1544 #ifdef KERN_TLS
1545 			/* KERN_TLS is only supported for TCP. */
1546 			if (argp->sotype == SOCK_STREAM &&
1547 			    rpctls_getinfo(&maxlen, true, false))
1548 				error = 0;
1549 #endif
1550 			if (error != 0) {
1551 				free(nam, M_SONAME);
1552 				free(tlscertname, M_NEWNFSMNT);
1553 				CURVNET_RESTORE();
1554 				return (error);
1555 			}
1556 		}
1557 		nmp = malloc(sizeof (struct nfsmount) +
1558 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1559 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1560 		nmp->nm_tlscertname = tlscertname;
1561 		nmp->nm_newflag = newflag;
1562 		TAILQ_INIT(&nmp->nm_bufq);
1563 		TAILQ_INIT(&nmp->nm_sess);
1564 		if (clval == 0)
1565 			clval = (u_int64_t)nfsboottime.tv_sec;
1566 		nmp->nm_clval = clval++;
1567 		nmp->nm_krbnamelen = krbnamelen;
1568 		nmp->nm_dirpathlen = dirlen;
1569 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1570 		if (td->td_ucred->cr_uid != (uid_t)0) {
1571 			/*
1572 			 * nm_uid is used to get KerberosV credentials for
1573 			 * the nfsv4 state handling operations if there is
1574 			 * no host based principal set. Use the uid of
1575 			 * this user if not root, since they are doing the
1576 			 * mount. I don't think setting this for root will
1577 			 * work, since root normally does not have user
1578 			 * credentials in a credentials cache.
1579 			 */
1580 			nmp->nm_uid = td->td_ucred->cr_uid;
1581 		} else {
1582 			/*
1583 			 * Just set to -1, so it won't be used.
1584 			 */
1585 			nmp->nm_uid = (uid_t)-1;
1586 		}
1587 
1588 		/* Copy and null terminate all the names */
1589 		if (nmp->nm_krbnamelen > 0) {
1590 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1591 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1592 		}
1593 		if (nmp->nm_dirpathlen > 0) {
1594 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1595 			    nmp->nm_dirpathlen);
1596 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1597 			    + 1] = '\0';
1598 		}
1599 		if (nmp->nm_srvkrbnamelen > 0) {
1600 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1601 			    nmp->nm_srvkrbnamelen);
1602 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1603 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1604 		}
1605 		nmp->nm_sockreq.nr_cred = crhold(cred);
1606 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1607 		mp->mnt_data = nmp;
1608 		nmp->nm_getinfo = nfs_getnlminfo;
1609 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1610 	}
1611 	vfs_getnewfsid(mp);
1612 	nmp->nm_mountp = mp;
1613 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1614 
1615 	/*
1616 	 * Since nfs_decode_args() might optionally set them, these
1617 	 * need to be set to defaults before the call, so that the
1618 	 * optional settings aren't overwritten.
1619 	 */
1620 	nmp->nm_nametimeo = nametimeo;
1621 	nmp->nm_negnametimeo = negnametimeo;
1622 	nmp->nm_timeo = NFS_TIMEO;
1623 	nmp->nm_retry = NFS_RETRANS;
1624 	nmp->nm_readahead = NFS_DEFRAHEAD;
1625 
1626 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1627 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1628 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1629 		nmp->nm_wcommitsize *= 2;
1630 	nmp->nm_wcommitsize *= 256;
1631 
1632 	tryminvers = false;
1633 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1634 		if (minvers < 0) {
1635 			tryminvers = true;
1636 			minvers = NFSV42_MINORVERSION;
1637 		}
1638 		nmp->nm_minorvers = minvers;
1639 	} else
1640 		nmp->nm_minorvers = 0;
1641 
1642 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1643 
1644 	/*
1645 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1646 	 * high, depending on whether we end up with negative offsets in
1647 	 * the client or server somewhere.  2GB-1 may be safer.
1648 	 *
1649 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1650 	 * that we can handle until we find out otherwise.
1651 	 */
1652 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1653 		nmp->nm_maxfilesize = 0xffffffffLL;
1654 	else
1655 		nmp->nm_maxfilesize = OFF_MAX;
1656 
1657 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1658 		nmp->nm_wsize = NFS_WSIZE;
1659 		nmp->nm_rsize = NFS_RSIZE;
1660 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1661 	}
1662 	nmp->nm_numgrps = NFS_MAXGRPS;
1663 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1664 	if (nmp->nm_tprintf_delay < 0)
1665 		nmp->nm_tprintf_delay = 0;
1666 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1667 	if (nmp->nm_tprintf_initial_delay < 0)
1668 		nmp->nm_tprintf_initial_delay = 0;
1669 	nmp->nm_fhsize = argp->fhsize;
1670 	if (nmp->nm_fhsize > 0)
1671 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1672 	strlcpy(mp->mnt_stat.f_mntfromname, hst, MNAMELEN);
1673 	nmp->nm_nam = nam;
1674 	/* Set up the sockets and per-host congestion */
1675 	nmp->nm_sotype = argp->sotype;
1676 	nmp->nm_soproto = argp->proto;
1677 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1678 	if ((argp->flags & NFSMNT_NFSV4))
1679 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1680 	else if ((argp->flags & NFSMNT_NFSV3))
1681 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1682 	else
1683 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1684 
1685 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false,
1686 	    &nmp->nm_sockreq.nr_client)))
1687 		goto bad;
1688 	/* For NFSv4, get the clientid now. */
1689 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1690 		NFSCL_DEBUG(3, "at getcl\n");
1691 		error = nfscl_getcl(mp, cred, td, tryminvers, true, &clp);
1692 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1693 		if (error != 0)
1694 			goto bad;
1695 		if (aconn > 0 && nmp->nm_minorvers == 0) {
1696 			vfs_mount_error(mp, "nconnect should only be used "
1697 			    "for NFSv4.1/4.2 mounts");
1698 			error = EINVAL;
1699 			goto bad;
1700 		}
1701 		if (NFSHASSYSKRB5(nmp) && nmp->nm_minorvers == 0) {
1702 			vfs_mount_error(mp, "syskrb5 should only be used "
1703 			    "for NFSv4.1/4.2 mounts");
1704 			error = EINVAL;
1705 			goto bad;
1706 		}
1707 	}
1708 
1709 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1710 	    nmp->nm_dirpathlen > 0) {
1711 		NFSCL_DEBUG(3, "in dirp\n");
1712 		/*
1713 		 * If the fhsize on the mount point == 0 for V4, the mount
1714 		 * path needs to be looked up.
1715 		 */
1716 		trycnt = 3;
1717 		do {
1718 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1719 			    cred, td);
1720 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1721 			if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1722 			    error != NFSERR_WRONGSEC))
1723 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1724 		} while (error != 0 && --trycnt > 0 &&
1725 		    (!NFSHASSYSKRB5(nmp) || error != NFSERR_WRONGSEC));
1726 		if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1727 		    error != NFSERR_WRONGSEC))
1728 			goto bad;
1729 	}
1730 
1731 	/*
1732 	 * A reference count is needed on the nfsnode representing the
1733 	 * remote root.  If this object is not persistent, then backward
1734 	 * traversals of the mount point (i.e. "..") will not work if
1735 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1736 	 * this problem, because one can identify root inodes by their
1737 	 * number == UFS_ROOTINO (2).
1738 	 * For the "syskrb5" mount, the file handle might not have
1739 	 * been acquired.  As such, use a "fake" file handle which
1740 	 * can never be returned by a server for the root vnode.
1741 	 */
1742 	if (nmp->nm_fhsize > 0 || NFSHASSYSKRB5(nmp)) {
1743 		/*
1744 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1745 		 * non-zero for the root vnode. f_iosize will be set correctly
1746 		 * by nfs_statfs() before any I/O occurs.
1747 		 */
1748 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1749 		if (nmp->nm_fhsize == 0) {
1750 			fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK |
1751 			    M_ZERO);
1752 			error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np,
1753 			    LK_EXCLUSIVE);
1754 			free(fakefh, M_TEMP);
1755 			nmp->nm_privflag |= NFSMNTP_FAKEROOTFH;
1756 		} else
1757 			error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1758 			    LK_EXCLUSIVE);
1759 		if (error)
1760 			goto bad;
1761 		*vpp = NFSTOV(np);
1762 
1763 		/*
1764 		 * Get file attributes and transfer parameters for the
1765 		 * mountpoint.  This has the side effect of filling in
1766 		 * (*vpp)->v_type with the correct value.
1767 		 */
1768 		ret = ENXIO;
1769 		if (nmp->nm_fhsize > 0)
1770 			ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh,
1771 			    nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease);
1772 		if (ret) {
1773 			/*
1774 			 * Just set default values to get things going.
1775 			 */
1776 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1777 			nfsva.na_vattr.va_type = VDIR;
1778 			nfsva.na_vattr.va_mode = 0777;
1779 			nfsva.na_vattr.va_nlink = 100;
1780 			nfsva.na_vattr.va_uid = (uid_t)0;
1781 			nfsva.na_vattr.va_gid = (gid_t)0;
1782 			nfsva.na_vattr.va_fileid = 2;
1783 			nfsva.na_vattr.va_gen = 1;
1784 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1785 			nfsva.na_vattr.va_size = 512 * 1024;
1786 			lease = 20;
1787 		}
1788 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, 0, 1);
1789 		if ((argp->flags & NFSMNT_NFSV4) != 0) {
1790 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1791 			NFSLOCKCLSTATE();
1792 			clp->nfsc_renew = NFSCL_RENEW(lease);
1793 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1794 			clp->nfsc_clientidrev++;
1795 			if (clp->nfsc_clientidrev == 0)
1796 				clp->nfsc_clientidrev++;
1797 			NFSUNLOCKCLSTATE();
1798 			/*
1799 			 * Mount will succeed, so the renew thread can be
1800 			 * started now.
1801 			 */
1802 			nfscl_start_renewthread(clp);
1803 			nfscl_clientrelease(clp);
1804 		}
1805 		if (argp->flags & NFSMNT_NFSV3)
1806 			ncl_fsinfo(nmp, *vpp, cred, td);
1807 
1808 		/*
1809 		 * Mark if the mount point supports NFSv4 ACLs and
1810 		 * named attributes.
1811 		 */
1812 		if ((argp->flags & NFSMNT_NFSV4) != 0) {
1813 			MNT_ILOCK(mp);
1814 			if (ret == 0 && nfsrv_useacl != 0 &&
1815 			    NFSISSET_ATTRBIT(&nfsva.na_suppattr,
1816 			    NFSATTRBIT_ACL))
1817 				mp->mnt_flag |= MNT_NFS4ACLS;
1818 			if (nmp->nm_minorvers > 0)
1819 				mp->mnt_flag |= MNT_NAMEDATTR;
1820 			MNT_IUNLOCK(mp);
1821 		}
1822 
1823 		/* Can now allow additional connections. */
1824 		if (aconn > 0)
1825 			nmp->nm_aconnect = aconn;
1826 
1827 		/*
1828 		 * Lose the lock but keep the ref.
1829 		 */
1830 		NFSVOPUNLOCK(*vpp);
1831 		vfs_cache_root_set(mp, *vpp);
1832 		CURVNET_RESTORE();
1833 		return (0);
1834 	}
1835 	error = EIO;
1836 
1837 bad:
1838 	if (clp != NULL)
1839 		nfscl_clientrelease(clp);
1840 	newnfs_disconnect(NULL, &nmp->nm_sockreq);
1841 	crfree(nmp->nm_sockreq.nr_cred);
1842 	if (nmp->nm_sockreq.nr_auth != NULL)
1843 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1844 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1845 	mtx_destroy(&nmp->nm_mtx);
1846 	if (nmp->nm_clp != NULL) {
1847 		NFSLOCKCLSTATE();
1848 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1849 		NFSUNLOCKCLSTATE();
1850 		free(nmp->nm_clp, M_NFSCLCLIENT);
1851 	}
1852 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1853 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1854 		    dsp->nfsclds_sockp != NULL)
1855 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1856 		nfscl_freenfsclds(dsp);
1857 	}
1858 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1859 	free(nmp, M_NEWNFSMNT);
1860 	free(nam, M_SONAME);
1861 	CURVNET_RESTORE();
1862 	return (error);
1863 }
1864 
1865 /*
1866  * unmount system call
1867  */
1868 static int
nfs_unmount(struct mount * mp,int mntflags)1869 nfs_unmount(struct mount *mp, int mntflags)
1870 {
1871 	struct thread *td;
1872 	struct nfsmount *nmp;
1873 	int error, flags = 0, i, trycnt = 0;
1874 	struct nfsclds *dsp, *tdsp;
1875 	struct nfscldeleg *dp, *ndp;
1876 	struct nfscldeleghead dh;
1877 
1878 	td = curthread;
1879 	TAILQ_INIT(&dh);
1880 
1881 	if (mntflags & MNT_FORCE)
1882 		flags |= FORCECLOSE;
1883 	nmp = VFSTONFS(mp);
1884 	error = 0;
1885 	/*
1886 	 * Goes something like this..
1887 	 * - Call vflush() to clear out vnodes for this filesystem
1888 	 * - Close the socket
1889 	 * - Free up the data structures
1890 	 */
1891 	/* In the forced case, cancel any outstanding requests. */
1892 	if (mntflags & MNT_FORCE) {
1893 		NFSDDSLOCK();
1894 		if (nfsv4_findmirror(nmp) != NULL)
1895 			error = ENXIO;
1896 		NFSDDSUNLOCK();
1897 		if (error)
1898 			goto out;
1899 		error = newnfs_nmcancelreqs(nmp);
1900 		if (error)
1901 			goto out;
1902 		/* For a forced close, get rid of the renew thread now */
1903 		nfscl_umount(nmp, td, &dh);
1904 	}
1905 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1906 	do {
1907 		error = vflush(mp, 1, flags, td);
1908 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1909 			(void) nfs_catnap(PSOCK, error, "newndm");
1910 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1911 	if (error)
1912 		goto out;
1913 
1914 	/*
1915 	 * We are now committed to the unmount.
1916 	 */
1917 	if ((mntflags & MNT_FORCE) == 0)
1918 		nfscl_umount(nmp, td, NULL);
1919 	else {
1920 		mtx_lock(&nmp->nm_mtx);
1921 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1922 		mtx_unlock(&nmp->nm_mtx);
1923 	}
1924 	/* Make sure no nfsiods are assigned to this mount. */
1925 	NFSLOCKIOD();
1926 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1927 		if (ncl_iodmount[i] == nmp) {
1928 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1929 			ncl_iodmount[i] = NULL;
1930 		}
1931 	NFSUNLOCKIOD();
1932 
1933 	/*
1934 	 * We can now set mnt_data to NULL and wait for
1935 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1936 	 */
1937 	mtx_lock(&mountlist_mtx);
1938 	mtx_lock(&nmp->nm_mtx);
1939 	mp->mnt_data = NULL;
1940 	mtx_unlock(&mountlist_mtx);
1941 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1942 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1943 	mtx_unlock(&nmp->nm_mtx);
1944 
1945 	newnfs_disconnect(nmp, &nmp->nm_sockreq);
1946 	crfree(nmp->nm_sockreq.nr_cred);
1947 	free(nmp->nm_nam, M_SONAME);
1948 	if (nmp->nm_sockreq.nr_auth != NULL)
1949 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1950 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1951 	mtx_destroy(&nmp->nm_mtx);
1952 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1953 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1954 		    dsp->nfsclds_sockp != NULL)
1955 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1956 		nfscl_freenfsclds(dsp);
1957 	}
1958 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1959 	free(nmp, M_NEWNFSMNT);
1960 
1961 	/* Free up the delegation structures for forced dismounts. */
1962 	TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
1963 		TAILQ_REMOVE(&dh, dp, nfsdl_list);
1964 		free(dp, M_NFSCLDELEG);
1965 	}
1966 out:
1967 	return (error);
1968 }
1969 
1970 /*
1971  * Return root of a filesystem
1972  */
1973 static int
nfs_root(struct mount * mp,int flags,struct vnode ** vpp)1974 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1975 {
1976 	struct vnode *vp;
1977 	struct nfsmount *nmp;
1978 	struct nfsnode *np;
1979 	int error;
1980 	char *fakefh;
1981 
1982 	nmp = VFSTONFS(mp);
1983 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
1984 		/* Attempt to get the actual root file handle. */
1985 		if (nmp->nm_fhsize == 0)
1986 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1987 			    curthread->td_ucred, curthread);
1988 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
1989 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, flags);
1990 		free(fakefh, M_TEMP);
1991 	} else {
1992 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1993 	}
1994 	if (error)
1995 		return error;
1996 	vp = NFSTOV(np);
1997 	/*
1998 	 * Get transfer parameters and attributes for root vnode once.
1999 	 */
2000 	mtx_lock(&nmp->nm_mtx);
2001 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
2002 		mtx_unlock(&nmp->nm_mtx);
2003 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
2004 	} else
2005 		mtx_unlock(&nmp->nm_mtx);
2006 	if (vp->v_type == VNON)
2007 	    vp->v_type = VDIR;
2008 	vp->v_vflag |= VV_ROOT;
2009 	*vpp = vp;
2010 	return (0);
2011 }
2012 
2013 /*
2014  * Flush out the buffer cache
2015  */
2016 /* ARGSUSED */
2017 static int
nfs_sync(struct mount * mp,int waitfor)2018 nfs_sync(struct mount *mp, int waitfor)
2019 {
2020 	struct vnode *vp, *mvp;
2021 	struct thread *td;
2022 	int error, allerror = 0;
2023 
2024 	td = curthread;
2025 
2026 	MNT_ILOCK(mp);
2027 	/*
2028 	 * If a forced dismount is in progress, return from here so that
2029 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
2030 	 * calling VFS_UNMOUNT().
2031 	 */
2032 	if (NFSCL_FORCEDISM(mp)) {
2033 		MNT_IUNLOCK(mp);
2034 		return (EBADF);
2035 	}
2036 	MNT_IUNLOCK(mp);
2037 
2038 	if (waitfor == MNT_LAZY)
2039 		return (0);
2040 
2041 	/*
2042 	 * Force stale buffer cache information to be flushed.
2043 	 */
2044 loop:
2045 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
2046 		/* XXX Racy bv_cnt check. */
2047 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0) {
2048 			VI_UNLOCK(vp);
2049 			continue;
2050 		}
2051 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
2052 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
2053 			goto loop;
2054 		}
2055 		error = VOP_FSYNC(vp, waitfor, td);
2056 		if (error)
2057 			allerror = error;
2058 		NFSVOPUNLOCK(vp);
2059 		vrele(vp);
2060 	}
2061 	return (allerror);
2062 }
2063 
2064 static int
nfs_sysctl(struct mount * mp,fsctlop_t op,struct sysctl_req * req)2065 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
2066 {
2067 	struct nfsmount *nmp = VFSTONFS(mp);
2068 	struct vfsquery vq;
2069 	int error;
2070 
2071 	bzero(&vq, sizeof(vq));
2072 	switch (op) {
2073 #if 0
2074 	case VFS_CTL_NOLOCKS:
2075 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
2076  		if (req->oldptr != NULL) {
2077  			error = SYSCTL_OUT(req, &val, sizeof(val));
2078  			if (error)
2079  				return (error);
2080  		}
2081  		if (req->newptr != NULL) {
2082  			error = SYSCTL_IN(req, &val, sizeof(val));
2083  			if (error)
2084  				return (error);
2085 			if (val)
2086 				nmp->nm_flag |= NFSMNT_NOLOCKS;
2087 			else
2088 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
2089  		}
2090 		break;
2091 #endif
2092 	case VFS_CTL_QUERY:
2093 		mtx_lock(&nmp->nm_mtx);
2094 		if (nmp->nm_state & NFSSTA_TIMEO)
2095 			vq.vq_flags |= VQ_NOTRESP;
2096 		mtx_unlock(&nmp->nm_mtx);
2097 #if 0
2098 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
2099 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
2100 			vq.vq_flags |= VQ_NOTRESPLOCK;
2101 #endif
2102 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
2103 		break;
2104  	case VFS_CTL_TIMEO:
2105  		if (req->oldptr != NULL) {
2106  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
2107  			    sizeof(nmp->nm_tprintf_initial_delay));
2108  			if (error)
2109  				return (error);
2110  		}
2111  		if (req->newptr != NULL) {
2112 			error = vfs_suser(mp, req->td);
2113 			if (error)
2114 				return (error);
2115  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
2116  			    sizeof(nmp->nm_tprintf_initial_delay));
2117  			if (error)
2118  				return (error);
2119  			if (nmp->nm_tprintf_initial_delay < 0)
2120  				nmp->nm_tprintf_initial_delay = 0;
2121  		}
2122 		break;
2123 	default:
2124 		return (ENOTSUP);
2125 	}
2126 	return (0);
2127 }
2128 
2129 /*
2130  * Purge any RPCs in progress, so that they will all return errors.
2131  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
2132  * forced dismount.
2133  */
2134 static void
nfs_purge(struct mount * mp)2135 nfs_purge(struct mount *mp)
2136 {
2137 	struct nfsmount *nmp = VFSTONFS(mp);
2138 
2139 	newnfs_nmcancelreqs(nmp);
2140 }
2141 
2142 /*
2143  * Extract the information needed by the nlm from the nfs vnode.
2144  */
2145 static void
nfs_getnlminfo(struct vnode * vp,uint8_t * fhp,size_t * fhlenp,struct sockaddr_storage * sp,int * is_v3p,off_t * sizep,struct timeval * timeop)2146 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
2147     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
2148     struct timeval *timeop)
2149 {
2150 	struct nfsmount *nmp;
2151 	struct nfsnode *np = VTONFS(vp);
2152 
2153 	nmp = VFSTONFS(vp->v_mount);
2154 	if (fhlenp != NULL)
2155 		*fhlenp = (size_t)np->n_fhp->nfh_len;
2156 	if (fhp != NULL)
2157 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
2158 	if (sp != NULL)
2159 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
2160 	if (is_v3p != NULL)
2161 		*is_v3p = NFS_ISV3(vp);
2162 	if (sizep != NULL)
2163 		*sizep = np->n_size;
2164 	if (timeop != NULL) {
2165 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
2166 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
2167 	}
2168 }
2169 
2170 /*
2171  * This function prints out an option name, based on the conditional
2172  * argument.
2173  */
nfscl_printopt(struct nfsmount * nmp,int testval,char * opt,char ** buf,size_t * blen)2174 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
2175     char *opt, char **buf, size_t *blen)
2176 {
2177 	int len;
2178 
2179 	if (testval != 0 && *blen > strlen(opt)) {
2180 		len = snprintf(*buf, *blen, "%s", opt);
2181 		if (len != strlen(opt))
2182 			printf("EEK!!\n");
2183 		*buf += len;
2184 		*blen -= len;
2185 	}
2186 }
2187 
2188 /*
2189  * This function printf out an options integer value.
2190  */
nfscl_printoptval(struct nfsmount * nmp,int optval,char * opt,char ** buf,size_t * blen)2191 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
2192     char *opt, char **buf, size_t *blen)
2193 {
2194 	int len;
2195 
2196 	if (*blen > strlen(opt) + 1) {
2197 		/* Could result in truncated output string. */
2198 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
2199 		if (len < *blen) {
2200 			*buf += len;
2201 			*blen -= len;
2202 		}
2203 	}
2204 }
2205 
2206 /*
2207  * Load the option flags and values into the buffer.
2208  */
nfscl_retopts(struct nfsmount * nmp,char * buffer,size_t buflen)2209 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
2210 {
2211 	char *buf;
2212 	size_t blen;
2213 
2214 	buf = buffer;
2215 	blen = buflen;
2216 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
2217 	    &blen);
2218 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
2219 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
2220 		    &blen);
2221 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
2222 		    &buf, &blen);
2223 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
2224 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
2225 	}
2226 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2227 	    &blen);
2228 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2229 	    "nfsv2", &buf, &blen);
2230 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2231 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2232 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2233 	    &buf, &blen);
2234 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_TLS) != 0, ",tls", &buf,
2235 	    &blen);
2236 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_SYSKRB5) != 0,
2237 	    ",syskrb5", &buf, &blen);
2238 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2239 	    &buf, &blen);
2240 	nfscl_printoptval(nmp, nmp->nm_aconnect + 1, ",nconnect", &buf, &blen);
2241 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2242 	    &blen);
2243 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2244 	    &blen);
2245 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2246 	    &blen);
2247 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2248 	    &blen);
2249 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2250 	    &blen);
2251 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2252 	    ",noncontigwr", &buf, &blen);
2253 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2254 	    0, ",lockd", &buf, &blen);
2255 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOLOCKD) != 0, ",nolockd",
2256 	    &buf, &blen);
2257 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2258 	    &buf, &blen);
2259 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2260 	    &buf, &blen);
2261 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2262 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2263 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2264 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2265 	    &buf, &blen);
2266 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2267 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2268 	    &buf, &blen);
2269 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2270 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2271 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2272 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2273 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2274 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2275 	    &blen);
2276 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2277 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2278 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2279 	    &blen);
2280 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2281 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2282 	    &blen);
2283 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2284 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2285 }
2286