xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision b837f100e1c854d1f805b615f0bce5ede85f8552)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 #include "opt_bootp.h"
39 #include "opt_nfsroot.h"
40 #include "opt_kern_tls.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <net/route/route_ctl.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 #include <rpc/rpcsec_tls.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 extern struct mtx nfsrv_dslock_mtx;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int, uint32_t, char *, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		vfs_cache_root,
140 	.vfs_cachedroot =	nfs_root,
141 	.vfs_statfs =		nfs_statfs,
142 	.vfs_sync =		nfs_sync,
143 	.vfs_uninit =		ncl_uninit,
144 	.vfs_unmount =		nfs_unmount,
145 	.vfs_sysctl =		nfs_sysctl,
146 	.vfs_purge =		nfs_purge,
147 };
148 /*
149  * This macro declares that the file system type is named "nfs".
150  * It also declares a module name of "nfs" and uses vfs_modevent()
151  * as the event handling function.
152  * The main module declaration is found in sys/fs/nfsclient/nfs_clport.c
153  * for "nfscl" and is needed so that a custom event handling
154  * function gets called.  MODULE_DEPEND() macros are found there.
155  */
156 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
157 
158 MODULE_VERSION(nfs, 1);
159 
160 /*
161  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
162  * can be shared by both NFS clients. It is declared here so that it
163  * will be defined for kernels built without NFS_ROOT, although it
164  * isn't used in that case.
165  */
166 #if !defined(NFS_ROOT)
167 struct nfs_diskless	nfs_diskless = { { { 0 } } };
168 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
169 int			nfs_diskless_valid = 0;
170 #endif
171 
172 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
173     &nfs_diskless_valid, 0,
174     "Has the diskless struct been filled correctly");
175 
176 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
177     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
178 
179 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
180     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
181     "%Ssockaddr_in", "Diskless root nfs address");
182 
183 void		newnfsargs_ntoh(struct nfs_args *);
184 static int	nfs_mountdiskless(char *,
185 		    struct sockaddr_in *, struct nfs_args *,
186 		    struct thread *, struct vnode **, struct mount *);
187 static void	nfs_convert_diskless(void);
188 static void	nfs_convert_oargs(struct nfs_args *args,
189 		    struct onfs_args *oargs);
190 
191 int
newnfs_iosize(struct nfsmount * nmp)192 newnfs_iosize(struct nfsmount *nmp)
193 {
194 	int iosize, maxio;
195 
196 	/* First, set the upper limit for iosize */
197 	if (nmp->nm_flag & NFSMNT_NFSV4) {
198 		maxio = NFS_MAXBSIZE;
199 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
200 		if (nmp->nm_sotype == SOCK_DGRAM)
201 			maxio = NFS_MAXDGRAMDATA;
202 		else
203 			maxio = NFS_MAXBSIZE;
204 	} else {
205 		maxio = NFS_V2MAXDATA;
206 	}
207 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
208 		nmp->nm_rsize = maxio;
209 	if (nmp->nm_rsize > NFS_MAXBSIZE)
210 		nmp->nm_rsize = NFS_MAXBSIZE;
211 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
212 		nmp->nm_readdirsize = maxio;
213 	if (nmp->nm_readdirsize > nmp->nm_rsize)
214 		nmp->nm_readdirsize = nmp->nm_rsize;
215 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
216 		nmp->nm_wsize = maxio;
217 	if (nmp->nm_wsize > NFS_MAXBSIZE)
218 		nmp->nm_wsize = NFS_MAXBSIZE;
219 
220 	/*
221 	 * Calculate the size used for io buffers.  Use the larger
222 	 * of the two sizes to minimise nfs requests but make sure
223 	 * that it is at least one VM page to avoid wasting buffer
224 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
225 	 * that is the buffer size used for directories.
226 	 */
227 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
228 	iosize = imax(iosize, PAGE_SIZE);
229 	iosize = imax(iosize, NFS_DIRBLKSIZ);
230 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
231 	return (iosize);
232 }
233 
234 static void
nfs_convert_oargs(struct nfs_args * args,struct onfs_args * oargs)235 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
236 {
237 
238 	args->version = NFS_ARGSVERSION;
239 	args->addr = oargs->addr;
240 	args->addrlen = oargs->addrlen;
241 	args->sotype = oargs->sotype;
242 	args->proto = oargs->proto;
243 	args->fh = oargs->fh;
244 	args->fhsize = oargs->fhsize;
245 	args->flags = oargs->flags;
246 	args->wsize = oargs->wsize;
247 	args->rsize = oargs->rsize;
248 	args->readdirsize = oargs->readdirsize;
249 	args->timeo = oargs->timeo;
250 	args->retrans = oargs->retrans;
251 	args->readahead = oargs->readahead;
252 	args->hostname = oargs->hostname;
253 }
254 
255 static void
nfs_convert_diskless(void)256 nfs_convert_diskless(void)
257 {
258 
259 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
260 		sizeof(struct ifaliasreq));
261 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
262 		sizeof(struct sockaddr_in));
263 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
264 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
265 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
266 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
267 	} else {
268 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
269 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
270 	}
271 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
272 		sizeof(struct sockaddr_in));
273 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
274 	nfsv3_diskless.root_time = nfs_diskless.root_time;
275 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
276 		MAXHOSTNAMELEN);
277 	nfs_diskless_valid = 3;
278 }
279 
280 /*
281  * nfs statfs call
282  */
283 static int
nfs_statfs(struct mount * mp,struct statfs * sbp)284 nfs_statfs(struct mount *mp, struct statfs *sbp)
285 {
286 	struct vnode *vp;
287 	struct thread *td;
288 	struct nfsmount *nmp = VFSTONFS(mp);
289 	struct nfsvattr nfsva;
290 	struct nfsfsinfo fs;
291 	struct nfsstatfs sb;
292 	int error = 0, attrflag, gotfsinfo = 0, ret;
293 	struct nfsnode *np;
294 	char *fakefh;
295 
296 	td = curthread;
297 
298 	error = vfs_busy(mp, MBF_NOWAIT);
299 	if (error)
300 		return (error);
301 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
302 		if (nmp->nm_fhsize == 0) {
303 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
304 			    td->td_ucred, td);
305 			if (error != 0) {
306 				/*
307 				 * We cannot do anything yet.  Hopefully what
308 				 * is in mnt_stat is sufficient.
309 				 */
310 				if (sbp != &mp->mnt_stat)
311 					*sbp = mp->mnt_stat;
312 				strncpy(&sbp->f_fstypename[0],
313 				    mp->mnt_vfc->vfc_name, MFSNAMELEN);
314 				vfs_unbusy(mp);
315 				return (0);
316 			}
317 		}
318 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
319 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, LK_EXCLUSIVE);
320 		free(fakefh, M_TEMP);
321 	} else {
322 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
323 		    LK_EXCLUSIVE);
324 	}
325 	if (error) {
326 		vfs_unbusy(mp);
327 		return (error);
328 	}
329 	vp = NFSTOV(np);
330 	mtx_lock(&nmp->nm_mtx);
331 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
332 		mtx_unlock(&nmp->nm_mtx);
333 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
334 		    &attrflag);
335 		if (!error)
336 			gotfsinfo = 1;
337 	} else
338 		mtx_unlock(&nmp->nm_mtx);
339 	if (!error)
340 		error = nfsrpc_statfs(vp, &sb, &fs, NULL, td->td_ucred, td,
341 		    &nfsva, &attrflag);
342 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
343 	    error == NFSERR_WRONGSEC) {
344 		/* Cannot get new stats, so return what is in mnt_stat. */
345 		if (sbp != &mp->mnt_stat)
346 			*sbp = mp->mnt_stat;
347 		strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name,
348 		    MFSNAMELEN);
349 		vput(vp);
350 		vfs_unbusy(mp);
351 		return (0);
352 	}
353 	if (error != 0)
354 		NFSCL_DEBUG(2, "statfs=%d\n", error);
355 	if (attrflag == 0) {
356 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
357 		    td->td_ucred, td, &nfsva, NULL, NULL);
358 		if (ret) {
359 			/*
360 			 * Just set default values to get things going.
361 			 */
362 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
363 			nfsva.na_vattr.va_type = VDIR;
364 			nfsva.na_vattr.va_mode = 0777;
365 			nfsva.na_vattr.va_nlink = 100;
366 			nfsva.na_vattr.va_uid = (uid_t)0;
367 			nfsva.na_vattr.va_gid = (gid_t)0;
368 			nfsva.na_vattr.va_fileid = 2;
369 			nfsva.na_vattr.va_gen = 1;
370 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
371 			nfsva.na_vattr.va_size = 512 * 1024;
372 		}
373 	}
374 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
375 	if (!error) {
376 	    mtx_lock(&nmp->nm_mtx);
377 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
378 		nfscl_loadfsinfo(nmp, &fs);
379 	    nfscl_loadsbinfo(nmp, &sb, sbp);
380 	    sbp->f_iosize = newnfs_iosize(nmp);
381 	    mtx_unlock(&nmp->nm_mtx);
382 	    if (sbp != &mp->mnt_stat) {
383 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
384 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
385 	    }
386 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
387 	} else if (NFS_ISV4(vp)) {
388 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
389 	}
390 	vput(vp);
391 	vfs_unbusy(mp);
392 	return (error);
393 }
394 
395 /*
396  * nfs version 3 fsinfo rpc call
397  */
398 int
ncl_fsinfo(struct nfsmount * nmp,struct vnode * vp,struct ucred * cred,struct thread * td)399 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
400     struct thread *td)
401 {
402 	struct nfsfsinfo fs;
403 	struct nfsvattr nfsva;
404 	int error, attrflag;
405 
406 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag);
407 	if (!error) {
408 		if (attrflag)
409 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
410 		mtx_lock(&nmp->nm_mtx);
411 		nfscl_loadfsinfo(nmp, &fs);
412 		mtx_unlock(&nmp->nm_mtx);
413 	}
414 	return (error);
415 }
416 
417 /*
418  * Mount a remote root fs via nfs. This depends on the info in the
419  * nfs_diskless structure that has been filled in properly by some primary
420  * bootstrap.
421  * It goes something like this:
422  * - do enough of "ifconfig" by calling ifioctl() so that the system
423  *   can talk to the server
424  * - If nfs_diskless.mygateway is filled in, use that address as
425  *   a default gateway.
426  * - build the rootfs mount point and call mountnfs() to do the rest.
427  *
428  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
429  * structure, as well as other global NFS client variables here, as
430  * nfs_mountroot() will be called once in the boot before any other NFS
431  * client activity occurs.
432  */
433 static int
nfs_mountroot(struct mount * mp)434 nfs_mountroot(struct mount *mp)
435 {
436 	struct thread *td = curthread;
437 	struct nfsv3_diskless *nd = &nfsv3_diskless;
438 	struct socket *so;
439 	struct vnode *vp;
440 	struct ifreq ir;
441 	int error;
442 	u_long l;
443 	char buf[128];
444 	char *cp;
445 
446 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
447 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
448 #elif defined(NFS_ROOT)
449 	nfs_setup_diskless();
450 #endif
451 
452 	if (nfs_diskless_valid == 0)
453 		return (-1);
454 	if (nfs_diskless_valid == 1)
455 		nfs_convert_diskless();
456 
457 	/*
458 	 * Do enough of ifconfig(8) so that the critical net interface can
459 	 * talk to the server.
460 	 */
461 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
462 	    td->td_ucred, td);
463 	if (error)
464 		panic("nfs_mountroot: socreate(%04x): %d",
465 			nd->myif.ifra_addr.sa_family, error);
466 
467 #if 0 /* XXX Bad idea */
468 	/*
469 	 * We might not have been told the right interface, so we pass
470 	 * over the first ten interfaces of the same kind, until we get
471 	 * one of them configured.
472 	 */
473 
474 	for (i = strlen(nd->myif.ifra_name) - 1;
475 		nd->myif.ifra_name[i] >= '0' &&
476 		nd->myif.ifra_name[i] <= '9';
477 		nd->myif.ifra_name[i] ++) {
478 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
479 		if(!error)
480 			break;
481 	}
482 #endif
483 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
484 	if (error)
485 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
486 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
487 		ir.ifr_mtu = strtol(cp, NULL, 10);
488 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
489 		freeenv(cp);
490 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
491 		if (error)
492 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
493 	}
494 	soclose(so);
495 
496 	/*
497 	 * If the gateway field is filled in, set it as the default route.
498 	 * Note that pxeboot will set a default route of 0 if the route
499 	 * is not set by the DHCP server.  Check also for a value of 0
500 	 * to avoid panicking inappropriately in that situation.
501 	 */
502 	if (nd->mygateway.sin_len != 0 &&
503 	    nd->mygateway.sin_addr.s_addr != 0) {
504 		struct sockaddr_in mask, sin;
505 		struct epoch_tracker et;
506 		struct rt_addrinfo info;
507 		struct rib_cmd_info rc;
508 
509 		bzero((caddr_t)&mask, sizeof(mask));
510 		sin = mask;
511 		sin.sin_family = AF_INET;
512 		sin.sin_len = sizeof(sin);
513                 /* XXX MRT use table 0 for this sort of thing */
514 		NET_EPOCH_ENTER(et);
515 		CURVNET_SET(TD_TO_VNET(td));
516 
517 		bzero((caddr_t)&info, sizeof(info));
518 		info.rti_flags = RTF_UP | RTF_GATEWAY;
519 		info.rti_info[RTAX_DST] = (struct sockaddr *)&sin;
520 		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&nd->mygateway;
521 		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&mask;
522 
523 		error = rib_action(RT_DEFAULT_FIB, RTM_ADD, &info, &rc);
524 		CURVNET_RESTORE();
525 		NET_EPOCH_EXIT(et);
526 		if (error)
527 			panic("nfs_mountroot: RTM_ADD: %d", error);
528 	}
529 
530 	/*
531 	 * Create the rootfs mount point.
532 	 */
533 	nd->root_args.fh = nd->root_fh;
534 	nd->root_args.fhsize = nd->root_fhsize;
535 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
536 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
537 		(l >> 24) & 0xff, (l >> 16) & 0xff,
538 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
539 	printf("NFS ROOT: %s\n", buf);
540 	nd->root_args.hostname = buf;
541 	if ((error = nfs_mountdiskless(buf,
542 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
543 		return (error);
544 	}
545 
546 	/*
547 	 * This is not really an nfs issue, but it is much easier to
548 	 * set hostname here and then let the "/etc/rc.xxx" files
549 	 * mount the right /var based upon its preset value.
550 	 */
551 	mtx_lock(&prison0.pr_mtx);
552 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
553 	    sizeof(prison0.pr_hostname));
554 	mtx_unlock(&prison0.pr_mtx);
555 	inittodr(ntohl(nd->root_time));
556 	return (0);
557 }
558 
559 /*
560  * Internal version of mount system call for diskless setup.
561  */
562 static int
nfs_mountdiskless(char * path,struct sockaddr_in * sin,struct nfs_args * args,struct thread * td,struct vnode ** vpp,struct mount * mp)563 nfs_mountdiskless(char *path,
564     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
565     struct vnode **vpp, struct mount *mp)
566 {
567 	struct sockaddr *nam;
568 	int dirlen, error;
569 	char *dirpath;
570 
571 	/*
572 	 * Find the directory path in "path", which also has the server's
573 	 * name/ip address in it.
574 	 */
575 	dirpath = strchr(path, ':');
576 	if (dirpath != NULL)
577 		dirlen = strlen(++dirpath);
578 	else
579 		dirlen = 0;
580 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
581 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
582 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
583 	    NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL, 0)) != 0) {
584 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
585 		return (error);
586 	}
587 	return (0);
588 }
589 
590 static void
nfs_sec_name(char * sec,int * flagsp)591 nfs_sec_name(char *sec, int *flagsp)
592 {
593 	if (!strcmp(sec, "krb5"))
594 		*flagsp |= NFSMNT_KERB;
595 	else if (!strcmp(sec, "krb5i"))
596 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
597 	else if (!strcmp(sec, "krb5p"))
598 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
599 }
600 
601 static void
nfs_decode_args(struct mount * mp,struct nfsmount * nmp,struct nfs_args * argp,const char * hostname,struct ucred * cred,struct thread * td)602 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
603     const char *hostname, struct ucred *cred, struct thread *td)
604 {
605 	int adjsock;
606 	char *p;
607 
608 	/*
609 	 * Set read-only flag if requested; otherwise, clear it if this is
610 	 * an update.  If this is not an update, then either the read-only
611 	 * flag is already clear, or this is a root mount and it was set
612 	 * intentionally at some previous point.
613 	 */
614 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
615 		MNT_ILOCK(mp);
616 		mp->mnt_flag |= MNT_RDONLY;
617 		MNT_IUNLOCK(mp);
618 	} else if (mp->mnt_flag & MNT_UPDATE) {
619 		MNT_ILOCK(mp);
620 		mp->mnt_flag &= ~MNT_RDONLY;
621 		MNT_IUNLOCK(mp);
622 	}
623 
624 	/*
625 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
626 	 * no sense in that context.  Also, set up appropriate retransmit
627 	 * and soft timeout behavior.
628 	 */
629 	if (argp->sotype == SOCK_STREAM) {
630 		nmp->nm_flag &= ~NFSMNT_NOCONN;
631 		nmp->nm_timeo = NFS_MAXTIMEO;
632 		if ((argp->flags & NFSMNT_NFSV4) != 0)
633 			nmp->nm_retry = INT_MAX;
634 		else
635 			nmp->nm_retry = NFS_RETRANS_TCP;
636 	}
637 
638 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
639 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
640 		argp->flags &= ~NFSMNT_RDIRPLUS;
641 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
642 	}
643 
644 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
645 	if (nmp->nm_minorvers == 0) {
646 		argp->flags &= ~NFSMNT_ONEOPENOWN;
647 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
648 	}
649 
650 	/* Re-bind if rsrvd port requested and wasn't on one */
651 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
652 		  && (argp->flags & NFSMNT_RESVPORT);
653 	/* Also re-bind if we're switching to/from a connected UDP socket */
654 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
655 		    (argp->flags & NFSMNT_NOCONN));
656 
657 	/* Update flags atomically.  Don't change the lock bits. */
658 	nmp->nm_flag = argp->flags | nmp->nm_flag;
659 
660 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
661 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
662 		if (nmp->nm_timeo < NFS_MINTIMEO)
663 			nmp->nm_timeo = NFS_MINTIMEO;
664 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
665 			nmp->nm_timeo = NFS_MAXTIMEO;
666 	}
667 
668 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
669 		nmp->nm_retry = argp->retrans;
670 		if (nmp->nm_retry > NFS_MAXREXMIT)
671 			nmp->nm_retry = NFS_MAXREXMIT;
672 	}
673 
674 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
675 		nmp->nm_wsize = argp->wsize;
676 		/*
677 		 * Clip at the power of 2 below the size. There is an
678 		 * issue (not isolated) that causes intermittent page
679 		 * faults if this is not done.
680 		 */
681 		if (nmp->nm_wsize > NFS_FABLKSIZE)
682 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
683 		else
684 			nmp->nm_wsize = NFS_FABLKSIZE;
685 	}
686 
687 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
688 		nmp->nm_rsize = argp->rsize;
689 		/*
690 		 * Clip at the power of 2 below the size. There is an
691 		 * issue (not isolated) that causes intermittent page
692 		 * faults if this is not done.
693 		 */
694 		if (nmp->nm_rsize > NFS_FABLKSIZE)
695 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
696 		else
697 			nmp->nm_rsize = NFS_FABLKSIZE;
698 	}
699 
700 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
701 		nmp->nm_readdirsize = argp->readdirsize;
702 	}
703 
704 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
705 		nmp->nm_acregmin = argp->acregmin;
706 	else
707 		nmp->nm_acregmin = NFS_MINATTRTIMO;
708 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
709 		nmp->nm_acregmax = argp->acregmax;
710 	else
711 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
712 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
713 		nmp->nm_acdirmin = argp->acdirmin;
714 	else
715 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
716 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
717 		nmp->nm_acdirmax = argp->acdirmax;
718 	else
719 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
720 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
721 		nmp->nm_acdirmin = nmp->nm_acdirmax;
722 	if (nmp->nm_acregmin > nmp->nm_acregmax)
723 		nmp->nm_acregmin = nmp->nm_acregmax;
724 
725 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
726 		if (argp->readahead <= NFS_MAXRAHEAD)
727 			nmp->nm_readahead = argp->readahead;
728 		else
729 			nmp->nm_readahead = NFS_MAXRAHEAD;
730 	}
731 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
732 		if (argp->wcommitsize < nmp->nm_wsize)
733 			nmp->nm_wcommitsize = nmp->nm_wsize;
734 		else
735 			nmp->nm_wcommitsize = argp->wcommitsize;
736 	}
737 
738 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
739 		    (nmp->nm_soproto != argp->proto));
740 
741 	if (nmp->nm_client != NULL && adjsock) {
742 		int haslock = 0, error = 0;
743 
744 		if (nmp->nm_sotype == SOCK_STREAM) {
745 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
746 			if (!error)
747 				haslock = 1;
748 		}
749 		if (!error) {
750 		    newnfs_disconnect(nmp, &nmp->nm_sockreq);
751 		    if (haslock)
752 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
753 		    nmp->nm_sotype = argp->sotype;
754 		    nmp->nm_soproto = argp->proto;
755 		    if (nmp->nm_sotype == SOCK_DGRAM)
756 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
757 			    cred, td, 0, false, &nmp->nm_sockreq.nr_client)) {
758 				printf("newnfs_args: retrying connect\n");
759 				(void) nfs_catnap(PSOCK, 0, "nfscon");
760 			}
761 		}
762 	} else {
763 		nmp->nm_sotype = argp->sotype;
764 		nmp->nm_soproto = argp->proto;
765 	}
766 
767 	if (hostname != NULL) {
768 		strlcpy(nmp->nm_hostname, hostname,
769 		    sizeof(nmp->nm_hostname));
770 		p = strchr(nmp->nm_hostname, ':');
771 		if (p != NULL)
772 			*p = '\0';
773 	}
774 }
775 
776 static const char *nfs_opts[] = { "from", "nfs_args",
777     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
778     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
779     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
780     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
781     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
782     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
783     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
784     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
785     "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", "nconnect",
786     "syskrb5", NULL };
787 
788 /*
789  * Parse the "from" mountarg, passed by the generic mount(8) program
790  * or the mountroot code.  This is used when rerooting into NFS.
791  *
792  * Note that the "hostname" is actually a "hostname:/share/path" string.
793  */
794 static int
nfs_mount_parse_from(struct vfsoptlist * opts,char ** hostnamep,struct sockaddr_in ** sinp,char * dirpath,size_t dirpathsize,int * dirlenp)795 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
796     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
797 {
798 	char *nam, *delimp, *hostp, *spec;
799 	int error, have_bracket = 0, offset, rv, speclen;
800 	struct sockaddr_in *sin;
801 	size_t len;
802 
803 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
804 	if (error != 0)
805 		return (error);
806 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
807 
808 	/*
809 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
810 	 */
811 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
812 	    *(delimp + 1) == ':') {
813 		hostp = spec + 1;
814 		spec = delimp + 2;
815 		have_bracket = 1;
816 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
817 		hostp = spec;
818 		spec = delimp + 1;
819 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
820 		printf("%s: path@server syntax is deprecated, "
821 		    "use server:path\n", __func__);
822 		hostp = delimp + 1;
823 	} else {
824 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
825 		free(nam, M_TEMP);
826 		return (EINVAL);
827 	}
828 	*delimp = '\0';
829 
830 	/*
831 	 * If there has been a trailing slash at mounttime it seems
832 	 * that some mountd implementations fail to remove the mount
833 	 * entries from their mountlist while unmounting.
834 	 */
835 	for (speclen = strlen(spec);
836 	    speclen > 1 && spec[speclen - 1] == '/';
837 	    speclen--)
838 		spec[speclen - 1] = '\0';
839 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
840 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
841 		free(nam, M_TEMP);
842 		return (EINVAL);
843 	}
844 	/* Make both '@' and ':' notations equal */
845 	if (*hostp != '\0') {
846 		len = strlen(hostp);
847 		offset = 0;
848 		if (have_bracket)
849 			nam[offset++] = '[';
850 		memmove(nam + offset, hostp, len);
851 		if (have_bracket)
852 			nam[len + offset++] = ']';
853 		nam[len + offset++] = ':';
854 		memmove(nam + len + offset, spec, speclen);
855 		nam[len + speclen + offset] = '\0';
856 	} else
857 		nam[0] = '\0';
858 
859 	/*
860 	 * XXX: IPv6
861 	 */
862 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
863 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
864 	if (rv != 1) {
865 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
866 		    __func__, hostp, rv);
867 		free(nam, M_TEMP);
868 		free(sin, M_SONAME);
869 		return (EINVAL);
870 	}
871 
872 	sin->sin_len = sizeof(*sin);
873 	sin->sin_family = AF_INET;
874 	/*
875 	 * XXX: hardcoded port number.
876 	 */
877 	sin->sin_port = htons(2049);
878 
879 	*hostnamep = strdup(nam, M_NEWNFSMNT);
880 	*sinp = sin;
881 	strlcpy(dirpath, spec, dirpathsize);
882 	*dirlenp = strlen(dirpath);
883 
884 	free(nam, M_TEMP);
885 	return (0);
886 }
887 
888 /*
889  * VFS Operations.
890  *
891  * mount system call
892  * It seems a bit dumb to copyinstr() the host and path here and then
893  * bcopy() them in mountnfs(), but I wanted to detect errors before
894  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
895  * an error after that means that I have to release the mbuf.
896  */
897 /* ARGSUSED */
898 static int
nfs_mount(struct mount * mp)899 nfs_mount(struct mount *mp)
900 {
901 	struct nfs_args args = {
902 	    .version = NFS_ARGSVERSION,
903 	    .addr = NULL,
904 	    .addrlen = sizeof (struct sockaddr_in),
905 	    .sotype = SOCK_STREAM,
906 	    .proto = 0,
907 	    .fh = NULL,
908 	    .fhsize = 0,
909 	    .flags = NFSMNT_RESVPORT,
910 	    .wsize = NFS_WSIZE,
911 	    .rsize = NFS_RSIZE,
912 	    .readdirsize = NFS_READDIRSIZE,
913 	    .timeo = 10,
914 	    .retrans = NFS_RETRANS,
915 	    .readahead = NFS_DEFRAHEAD,
916 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
917 	    .hostname = NULL,
918 	    .acregmin = NFS_MINATTRTIMO,
919 	    .acregmax = NFS_MAXATTRTIMO,
920 	    .acdirmin = NFS_MINDIRATTRTIMO,
921 	    .acdirmax = NFS_MAXDIRATTRTIMO,
922 	};
923 	int error = 0, ret, len;
924 	struct sockaddr *nam = NULL;
925 	struct vnode *vp;
926 	struct thread *td;
927 	char *hst;
928 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
929 	char *cp, *opt, *name, *secname, *tlscertname;
930 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
931 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
932 	int minvers = -1;
933 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
934 	    krbnamelen, srvkrbnamelen;
935 	size_t hstlen;
936 	uint32_t newflag;
937 	int aconn = 0;
938 
939 	has_nfs_args_opt = 0;
940 	has_nfs_from_opt = 0;
941 	newflag = 0;
942 	tlscertname = NULL;
943 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
944 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
945 		error = EINVAL;
946 		goto out;
947 	}
948 
949 	td = curthread;
950 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
951 	    nfs_diskless_valid != 0) {
952 		error = nfs_mountroot(mp);
953 		goto out;
954 	}
955 
956 	nfscl_init();
957 
958 	/*
959 	 * The old mount_nfs program passed the struct nfs_args
960 	 * from userspace to kernel.  The new mount_nfs program
961 	 * passes string options via nmount() from userspace to kernel
962 	 * and we populate the struct nfs_args in the kernel.
963 	 */
964 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
965 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
966 		    sizeof(args));
967 		if (error != 0)
968 			goto out;
969 
970 		if (args.version != NFS_ARGSVERSION) {
971 			error = EPROGMISMATCH;
972 			goto out;
973 		}
974 		has_nfs_args_opt = 1;
975 	}
976 
977 	/* Handle the new style options. */
978 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
979 		args.acdirmin = args.acdirmax =
980 		    args.acregmin = args.acregmax = 0;
981 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
982 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
983 	}
984 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
985 		args.flags |= NFSMNT_NOCONN;
986 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
987 		args.flags &= ~NFSMNT_NOCONN;
988 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
989 		args.flags |= NFSMNT_NOLOCKD;
990 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
991 		args.flags &= ~NFSMNT_NOLOCKD;
992 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
993 		args.flags |= NFSMNT_INT;
994 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
995 		args.flags |= NFSMNT_RDIRPLUS;
996 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
997 		args.flags |= NFSMNT_RESVPORT;
998 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
999 		args.flags &= ~NFSMNT_RESVPORT;
1000 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
1001 		args.flags |= NFSMNT_SOFT;
1002 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
1003 		args.flags &= ~NFSMNT_SOFT;
1004 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
1005 		args.sotype = SOCK_DGRAM;
1006 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
1007 		args.sotype = SOCK_DGRAM;
1008 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
1009 		args.sotype = SOCK_STREAM;
1010 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
1011 		args.flags |= NFSMNT_NFSV3;
1012 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
1013 		args.flags |= NFSMNT_NFSV4;
1014 		args.sotype = SOCK_STREAM;
1015 	}
1016 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
1017 		args.flags |= NFSMNT_ALLGSSNAME;
1018 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
1019 		args.flags |= NFSMNT_NOCTO;
1020 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
1021 		args.flags |= NFSMNT_NONCONTIGWR;
1022 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
1023 		args.flags |= NFSMNT_PNFS;
1024 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
1025 		args.flags |= NFSMNT_ONEOPENOWN;
1026 	if (vfs_getopt(mp->mnt_optnew, "tls", NULL, NULL) == 0)
1027 		newflag |= NFSMNT_TLS;
1028 	if (vfs_getopt(mp->mnt_optnew, "tlscertname", (void **)&opt, &len) ==
1029 	    0) {
1030 		/*
1031 		 * tlscertname with "key.pem" appended to it forms a file
1032 		 * name.  As such, the maximum allowable strlen(tlscertname) is
1033 		 * NAME_MAX - 7. However, "len" includes the nul termination
1034 		 * byte so it can be up to NAME_MAX - 6.
1035 		 */
1036 		if (opt == NULL || len <= 1 || len > NAME_MAX - 6) {
1037 			vfs_mount_error(mp, "invalid tlscertname");
1038 			error = EINVAL;
1039 			goto out;
1040 		}
1041 		tlscertname = malloc(len, M_NEWNFSMNT, M_WAITOK);
1042 		strlcpy(tlscertname, opt, len);
1043 	}
1044 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
1045 		if (opt == NULL) {
1046 			vfs_mount_error(mp, "illegal readdirsize");
1047 			error = EINVAL;
1048 			goto out;
1049 		}
1050 		ret = sscanf(opt, "%d", &args.readdirsize);
1051 		if (ret != 1 || args.readdirsize <= 0) {
1052 			vfs_mount_error(mp, "illegal readdirsize: %s",
1053 			    opt);
1054 			error = EINVAL;
1055 			goto out;
1056 		}
1057 		args.flags |= NFSMNT_READDIRSIZE;
1058 	}
1059 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
1060 		if (opt == NULL) {
1061 			vfs_mount_error(mp, "illegal readahead");
1062 			error = EINVAL;
1063 			goto out;
1064 		}
1065 		ret = sscanf(opt, "%d", &args.readahead);
1066 		if (ret != 1 || args.readahead <= 0) {
1067 			vfs_mount_error(mp, "illegal readahead: %s",
1068 			    opt);
1069 			error = EINVAL;
1070 			goto out;
1071 		}
1072 		args.flags |= NFSMNT_READAHEAD;
1073 	}
1074 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1075 		if (opt == NULL) {
1076 			vfs_mount_error(mp, "illegal wsize");
1077 			error = EINVAL;
1078 			goto out;
1079 		}
1080 		ret = sscanf(opt, "%d", &args.wsize);
1081 		if (ret != 1 || args.wsize <= 0) {
1082 			vfs_mount_error(mp, "illegal wsize: %s",
1083 			    opt);
1084 			error = EINVAL;
1085 			goto out;
1086 		}
1087 		args.flags |= NFSMNT_WSIZE;
1088 	}
1089 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1090 		if (opt == NULL) {
1091 			vfs_mount_error(mp, "illegal rsize");
1092 			error = EINVAL;
1093 			goto out;
1094 		}
1095 		ret = sscanf(opt, "%d", &args.rsize);
1096 		if (ret != 1 || args.rsize <= 0) {
1097 			vfs_mount_error(mp, "illegal wsize: %s",
1098 			    opt);
1099 			error = EINVAL;
1100 			goto out;
1101 		}
1102 		args.flags |= NFSMNT_RSIZE;
1103 	}
1104 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1105 		if (opt == NULL) {
1106 			vfs_mount_error(mp, "illegal retrans");
1107 			error = EINVAL;
1108 			goto out;
1109 		}
1110 		ret = sscanf(opt, "%d", &args.retrans);
1111 		if (ret != 1 || args.retrans <= 0) {
1112 			vfs_mount_error(mp, "illegal retrans: %s",
1113 			    opt);
1114 			error = EINVAL;
1115 			goto out;
1116 		}
1117 		args.flags |= NFSMNT_RETRANS;
1118 	}
1119 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1120 		ret = sscanf(opt, "%d", &args.acregmin);
1121 		if (ret != 1 || args.acregmin < 0) {
1122 			vfs_mount_error(mp, "illegal actimeo: %s",
1123 			    opt);
1124 			error = EINVAL;
1125 			goto out;
1126 		}
1127 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1128 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1129 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1130 	}
1131 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1132 		ret = sscanf(opt, "%d", &args.acregmin);
1133 		if (ret != 1 || args.acregmin < 0) {
1134 			vfs_mount_error(mp, "illegal acregmin: %s",
1135 			    opt);
1136 			error = EINVAL;
1137 			goto out;
1138 		}
1139 		args.flags |= NFSMNT_ACREGMIN;
1140 	}
1141 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1142 		ret = sscanf(opt, "%d", &args.acregmax);
1143 		if (ret != 1 || args.acregmax < 0) {
1144 			vfs_mount_error(mp, "illegal acregmax: %s",
1145 			    opt);
1146 			error = EINVAL;
1147 			goto out;
1148 		}
1149 		args.flags |= NFSMNT_ACREGMAX;
1150 	}
1151 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1152 		ret = sscanf(opt, "%d", &args.acdirmin);
1153 		if (ret != 1 || args.acdirmin < 0) {
1154 			vfs_mount_error(mp, "illegal acdirmin: %s",
1155 			    opt);
1156 			error = EINVAL;
1157 			goto out;
1158 		}
1159 		args.flags |= NFSMNT_ACDIRMIN;
1160 	}
1161 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1162 		ret = sscanf(opt, "%d", &args.acdirmax);
1163 		if (ret != 1 || args.acdirmax < 0) {
1164 			vfs_mount_error(mp, "illegal acdirmax: %s",
1165 			    opt);
1166 			error = EINVAL;
1167 			goto out;
1168 		}
1169 		args.flags |= NFSMNT_ACDIRMAX;
1170 	}
1171 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1172 		ret = sscanf(opt, "%d", &args.wcommitsize);
1173 		if (ret != 1 || args.wcommitsize < 0) {
1174 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1175 			error = EINVAL;
1176 			goto out;
1177 		}
1178 		args.flags |= NFSMNT_WCOMMITSIZE;
1179 	}
1180 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1181 		ret = sscanf(opt, "%d", &args.timeo);
1182 		if (ret != 1 || args.timeo <= 0) {
1183 			vfs_mount_error(mp, "illegal timeo: %s",
1184 			    opt);
1185 			error = EINVAL;
1186 			goto out;
1187 		}
1188 		args.flags |= NFSMNT_TIMEO;
1189 	}
1190 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1191 		ret = sscanf(opt, "%d", &args.timeo);
1192 		if (ret != 1 || args.timeo <= 0) {
1193 			vfs_mount_error(mp, "illegal timeout: %s",
1194 			    opt);
1195 			error = EINVAL;
1196 			goto out;
1197 		}
1198 		args.flags |= NFSMNT_TIMEO;
1199 	}
1200 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1201 		ret = sscanf(opt, "%d", &nametimeo);
1202 		if (ret != 1 || nametimeo < 0) {
1203 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1204 			error = EINVAL;
1205 			goto out;
1206 		}
1207 	}
1208 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1209 	    == 0) {
1210 		ret = sscanf(opt, "%d", &negnametimeo);
1211 		if (ret != 1 || negnametimeo < 0) {
1212 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1213 			    opt);
1214 			error = EINVAL;
1215 			goto out;
1216 		}
1217 	}
1218 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1219 	    0) {
1220 		ret = sscanf(opt, "%d", &minvers);
1221 		if (ret != 1 || minvers < 0 || minvers > 2 ||
1222 		    (args.flags & NFSMNT_NFSV4) == 0) {
1223 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1224 			error = EINVAL;
1225 			goto out;
1226 		}
1227 	}
1228 	if (vfs_getopt(mp->mnt_optnew, "nconnect", (void **)&opt, NULL) ==
1229 	    0) {
1230 		ret = sscanf(opt, "%d", &aconn);
1231 		if (ret != 1 || aconn < 1 || aconn > NFS_MAXNCONN) {
1232 			vfs_mount_error(mp, "illegal nconnect: %s", opt);
1233 			error = EINVAL;
1234 			goto out;
1235 		}
1236 		/*
1237 		 * Setting nconnect=1 is a no-op, allowed so that
1238 		 * the option can be used in a Linux compatible way.
1239 		 */
1240 		aconn--;
1241 	}
1242 	if (vfs_getopt(mp->mnt_optnew, "syskrb5", NULL, NULL) == 0)
1243 		newflag |= NFSMNT_SYSKRB5;
1244 	if (vfs_getopt(mp->mnt_optnew, "sec",
1245 		(void **) &secname, NULL) == 0)
1246 		nfs_sec_name(secname, &args.flags);
1247 
1248 	if (mp->mnt_flag & MNT_UPDATE) {
1249 		struct nfsmount *nmp = VFSTONFS(mp);
1250 
1251 		if (nmp == NULL) {
1252 			error = EIO;
1253 			goto out;
1254 		}
1255 
1256 		/*
1257 		 * If a change from TCP->UDP is done and there are thread(s)
1258 		 * that have I/O RPC(s) in progress with a transfer size
1259 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1260 		 * hung, retrying the RPC(s) forever. Usually these threads
1261 		 * will be seen doing an uninterruptible sleep on wait channel
1262 		 * "nfsreq".
1263 		 */
1264 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1265 			tprintf(td->td_proc, LOG_WARNING,
1266 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1267 
1268 		/*
1269 		 * When doing an update, we can't change version,
1270 		 * security, switch lockd strategies, change cookie
1271 		 * translation or switch oneopenown.
1272 		 */
1273 		args.flags = (args.flags &
1274 		    ~(NFSMNT_NFSV3 |
1275 		      NFSMNT_NFSV4 |
1276 		      NFSMNT_KERB |
1277 		      NFSMNT_INTEGRITY |
1278 		      NFSMNT_PRIVACY |
1279 		      NFSMNT_ONEOPENOWN |
1280 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1281 		    (nmp->nm_flag &
1282 			(NFSMNT_NFSV3 |
1283 			 NFSMNT_NFSV4 |
1284 			 NFSMNT_KERB |
1285 			 NFSMNT_INTEGRITY |
1286 			 NFSMNT_PRIVACY |
1287 			 NFSMNT_ONEOPENOWN |
1288 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1289 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1290 		goto out;
1291 	}
1292 
1293 	/*
1294 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1295 	 * or no-connection mode for those protocols that support
1296 	 * no-connection mode (the flag will be cleared later for protocols
1297 	 * that do not support no-connection mode).  This will allow a client
1298 	 * to receive replies from a different IP then the request was
1299 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1300 	 * not 0.
1301 	 */
1302 	if (nfs_ip_paranoia == 0)
1303 		args.flags |= NFSMNT_NOCONN;
1304 
1305 	if (has_nfs_args_opt != 0) {
1306 		/*
1307 		 * In the 'nfs_args' case, the pointers in the args
1308 		 * structure are in userland - we copy them in here.
1309 		 */
1310 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1311 			vfs_mount_error(mp, "Bad file handle");
1312 			error = EINVAL;
1313 			goto out;
1314 		}
1315 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1316 		    args.fhsize);
1317 		if (error != 0)
1318 			goto out;
1319 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1320 		if (error != 0)
1321 			goto out;
1322 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1323 		args.hostname = hst;
1324 		/* getsockaddr() call must be after above copyin() calls */
1325 		error = getsockaddr(&nam, args.addr, args.addrlen);
1326 		if (error != 0)
1327 			goto out;
1328 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1329 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1330 	    sizeof(dirpath), &dirlen) == 0) {
1331 		has_nfs_from_opt = 1;
1332 		bcopy(args.hostname, hst, MNAMELEN);
1333 		hst[MNAMELEN - 1] = '\0';
1334 
1335 		/*
1336 		 * This only works with NFSv4 for now.
1337 		 */
1338 		args.fhsize = 0;
1339 		args.flags |= NFSMNT_NFSV4;
1340 		args.sotype = SOCK_STREAM;
1341 	} else {
1342 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1343 		    &args.fhsize) == 0) {
1344 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1345 				vfs_mount_error(mp, "Bad file handle");
1346 				error = EINVAL;
1347 				goto out;
1348 			}
1349 			bcopy(args.fh, nfh, args.fhsize);
1350 		} else {
1351 			args.fhsize = 0;
1352 		}
1353 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1354 		    (void **)&args.hostname, &len);
1355 		if (args.hostname == NULL) {
1356 			vfs_mount_error(mp, "Invalid hostname");
1357 			error = EINVAL;
1358 			goto out;
1359 		}
1360 		if (len >= MNAMELEN) {
1361 			vfs_mount_error(mp, "Hostname too long");
1362 			error = EINVAL;
1363 			goto out;
1364 		}
1365 		bcopy(args.hostname, hst, len);
1366 		hst[len] = '\0';
1367 	}
1368 
1369 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1370 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1371 	else {
1372 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1373 		cp = strchr(srvkrbname, ':');
1374 		if (cp != NULL)
1375 			*cp = '\0';
1376 	}
1377 	srvkrbnamelen = strlen(srvkrbname);
1378 
1379 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1380 		strlcpy(krbname, name, sizeof (krbname));
1381 	else
1382 		krbname[0] = '\0';
1383 	krbnamelen = strlen(krbname);
1384 
1385 	if (has_nfs_from_opt == 0) {
1386 		if (vfs_getopt(mp->mnt_optnew,
1387 		    "dirpath", (void **)&name, NULL) == 0)
1388 			strlcpy(dirpath, name, sizeof (dirpath));
1389 		else
1390 			dirpath[0] = '\0';
1391 		dirlen = strlen(dirpath);
1392 	}
1393 
1394 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1395 		if (vfs_getopt(mp->mnt_optnew, "addr",
1396 		    (void **)&args.addr, &args.addrlen) == 0) {
1397 			if (args.addrlen > SOCK_MAXADDRLEN) {
1398 				error = ENAMETOOLONG;
1399 				goto out;
1400 			}
1401 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1402 			bcopy(args.addr, nam, args.addrlen);
1403 			nam->sa_len = args.addrlen;
1404 		} else {
1405 			vfs_mount_error(mp, "No server address");
1406 			error = EINVAL;
1407 			goto out;
1408 		}
1409 	}
1410 
1411 	if (aconn > 0 && (args.sotype != SOCK_STREAM ||
1412 	    (args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1413 		/*
1414 		 * RFC 5661 requires that an NFSv4.1/4.2 server
1415 		 * send an RPC reply on the same TCP connection
1416 		 * as the one it received the request on.
1417 		 * This property in required for "nconnect" and
1418 		 * might not be the case for NFSv3 or NFSv4.0 servers.
1419 		 */
1420 		vfs_mount_error(mp, "nconnect should only be used "
1421 		    "for NFSv4.1/4.2 mounts");
1422 		error = EINVAL;
1423 		goto out;
1424 	}
1425 
1426 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1427 	    ((args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1428 		/*
1429 		 * This option requires the use of SP4_NONE, which
1430 		 * is only in NFSv4.1/4.2.
1431 		 */
1432 		vfs_mount_error(mp, "syskrb5 should only be used "
1433 		    "for NFSv4.1/4.2 mounts");
1434 		error = EINVAL;
1435 		goto out;
1436 	}
1437 
1438 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1439 	    (args.flags & NFSMNT_KERB) == 0) {
1440 		/*
1441 		 * This option modifies the behaviour of sec=krb5[ip].
1442 		 */
1443 		vfs_mount_error(mp, "syskrb5 should only be used "
1444 		    "for sec=krb5[ip] mounts");
1445 		error = EINVAL;
1446 		goto out;
1447 	}
1448 
1449 	if ((newflag & NFSMNT_SYSKRB5) != 0 && krbname[0] != '\0') {
1450 		/*
1451 		 * This option is used as an alternative to "gssname".
1452 		 */
1453 		vfs_mount_error(mp, "syskrb5 should not be used "
1454 		    "with the gssname option");
1455 		error = EINVAL;
1456 		goto out;
1457 	}
1458 
1459 	args.fh = nfh;
1460 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1461 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1462 	    nametimeo, negnametimeo, minvers, newflag, tlscertname, aconn);
1463 out:
1464 	if (!error) {
1465 		MNT_ILOCK(mp);
1466 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1467 		    MNTK_USES_BCACHE;
1468 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1469 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1470 		MNT_IUNLOCK(mp);
1471 	}
1472 	free(hst, M_TEMP);
1473 	return (error);
1474 }
1475 
1476 /*
1477  * VFS Operations.
1478  *
1479  * mount system call
1480  * It seems a bit dumb to copyinstr() the host and path here and then
1481  * bcopy() them in mountnfs(), but I wanted to detect errors before
1482  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1483  * an error after that means that I have to release the mbuf.
1484  */
1485 /* ARGSUSED */
1486 static int
nfs_cmount(struct mntarg * ma,void * data,uint64_t flags)1487 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1488 {
1489 	int error;
1490 	struct nfs_args args;
1491 
1492 	error = copyin(data, &args, sizeof (struct nfs_args));
1493 	if (error)
1494 		return error;
1495 
1496 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1497 
1498 	error = kernel_mount(ma, flags);
1499 	return (error);
1500 }
1501 
1502 /*
1503  * Common code for mount and mountroot
1504  */
1505 static int
mountnfs(struct nfs_args * argp,struct mount * mp,struct sockaddr * nam,char * hst,u_char * krbname,int krbnamelen,u_char * dirpath,int dirlen,u_char * srvkrbname,int srvkrbnamelen,struct vnode ** vpp,struct ucred * cred,struct thread * td,int nametimeo,int negnametimeo,int minvers,uint32_t newflag,char * tlscertname,int aconn)1506 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1507     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1508     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1509     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1510     int minvers, uint32_t newflag, char *tlscertname, int aconn)
1511 {
1512 	struct nfsmount *nmp;
1513 	struct nfsnode *np;
1514 	int error, trycnt, ret;
1515 	struct nfsvattr nfsva;
1516 	struct nfsclclient *clp;
1517 	struct nfsclds *dsp, *tdsp;
1518 	uint32_t lease;
1519 	bool tryminvers;
1520 	char *fakefh;
1521 	static u_int64_t clval = 0;
1522 #ifdef KERN_TLS
1523 	u_int maxlen;
1524 #endif
1525 
1526 	NFSCL_DEBUG(3, "in mnt\n");
1527 	CURVNET_SET(CRED_TO_VNET(cred));
1528 	clp = NULL;
1529 	if (mp->mnt_flag & MNT_UPDATE) {
1530 		nmp = VFSTONFS(mp);
1531 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1532 		free(nam, M_SONAME);
1533 		free(tlscertname, M_NEWNFSMNT);
1534 		CURVNET_RESTORE();
1535 		return (0);
1536 	} else {
1537 		/* NFS-over-TLS requires that rpctls be functioning. */
1538 		if ((newflag & NFSMNT_TLS) != 0) {
1539 			error = EINVAL;
1540 #ifdef KERN_TLS
1541 			/* KERN_TLS is only supported for TCP. */
1542 			if (argp->sotype == SOCK_STREAM &&
1543 			    rpctls_getinfo(&maxlen, true, false))
1544 				error = 0;
1545 #endif
1546 			if (error != 0) {
1547 				free(nam, M_SONAME);
1548 				free(tlscertname, M_NEWNFSMNT);
1549 				CURVNET_RESTORE();
1550 				return (error);
1551 			}
1552 		}
1553 		nmp = malloc(sizeof (struct nfsmount) +
1554 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1555 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1556 		nmp->nm_tlscertname = tlscertname;
1557 		nmp->nm_newflag = newflag;
1558 		TAILQ_INIT(&nmp->nm_bufq);
1559 		TAILQ_INIT(&nmp->nm_sess);
1560 		if (clval == 0)
1561 			clval = (u_int64_t)nfsboottime.tv_sec;
1562 		nmp->nm_clval = clval++;
1563 		nmp->nm_krbnamelen = krbnamelen;
1564 		nmp->nm_dirpathlen = dirlen;
1565 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1566 		if (td->td_ucred->cr_uid != (uid_t)0) {
1567 			/*
1568 			 * nm_uid is used to get KerberosV credentials for
1569 			 * the nfsv4 state handling operations if there is
1570 			 * no host based principal set. Use the uid of
1571 			 * this user if not root, since they are doing the
1572 			 * mount. I don't think setting this for root will
1573 			 * work, since root normally does not have user
1574 			 * credentials in a credentials cache.
1575 			 */
1576 			nmp->nm_uid = td->td_ucred->cr_uid;
1577 		} else {
1578 			/*
1579 			 * Just set to -1, so it won't be used.
1580 			 */
1581 			nmp->nm_uid = (uid_t)-1;
1582 		}
1583 
1584 		/* Copy and null terminate all the names */
1585 		if (nmp->nm_krbnamelen > 0) {
1586 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1587 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1588 		}
1589 		if (nmp->nm_dirpathlen > 0) {
1590 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1591 			    nmp->nm_dirpathlen);
1592 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1593 			    + 1] = '\0';
1594 		}
1595 		if (nmp->nm_srvkrbnamelen > 0) {
1596 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1597 			    nmp->nm_srvkrbnamelen);
1598 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1599 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1600 		}
1601 		nmp->nm_sockreq.nr_cred = crhold(cred);
1602 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1603 		mp->mnt_data = nmp;
1604 		nmp->nm_getinfo = nfs_getnlminfo;
1605 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1606 	}
1607 	vfs_getnewfsid(mp);
1608 	nmp->nm_mountp = mp;
1609 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1610 
1611 	/*
1612 	 * Since nfs_decode_args() might optionally set them, these
1613 	 * need to be set to defaults before the call, so that the
1614 	 * optional settings aren't overwritten.
1615 	 */
1616 	nmp->nm_nametimeo = nametimeo;
1617 	nmp->nm_negnametimeo = negnametimeo;
1618 	nmp->nm_timeo = NFS_TIMEO;
1619 	nmp->nm_retry = NFS_RETRANS;
1620 	nmp->nm_readahead = NFS_DEFRAHEAD;
1621 
1622 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1623 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1624 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1625 		nmp->nm_wcommitsize *= 2;
1626 	nmp->nm_wcommitsize *= 256;
1627 
1628 	tryminvers = false;
1629 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1630 		if (minvers < 0) {
1631 			tryminvers = true;
1632 			minvers = NFSV42_MINORVERSION;
1633 		}
1634 		nmp->nm_minorvers = minvers;
1635 	} else
1636 		nmp->nm_minorvers = 0;
1637 
1638 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1639 
1640 	/*
1641 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1642 	 * high, depending on whether we end up with negative offsets in
1643 	 * the client or server somewhere.  2GB-1 may be safer.
1644 	 *
1645 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1646 	 * that we can handle until we find out otherwise.
1647 	 */
1648 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1649 		nmp->nm_maxfilesize = 0xffffffffLL;
1650 	else
1651 		nmp->nm_maxfilesize = OFF_MAX;
1652 
1653 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1654 		nmp->nm_wsize = NFS_WSIZE;
1655 		nmp->nm_rsize = NFS_RSIZE;
1656 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1657 	}
1658 	nmp->nm_numgrps = NFS_MAXGRPS;
1659 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1660 	if (nmp->nm_tprintf_delay < 0)
1661 		nmp->nm_tprintf_delay = 0;
1662 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1663 	if (nmp->nm_tprintf_initial_delay < 0)
1664 		nmp->nm_tprintf_initial_delay = 0;
1665 	nmp->nm_fhsize = argp->fhsize;
1666 	if (nmp->nm_fhsize > 0)
1667 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1668 	strlcpy(mp->mnt_stat.f_mntfromname, hst, MNAMELEN);
1669 	nmp->nm_nam = nam;
1670 	/* Set up the sockets and per-host congestion */
1671 	nmp->nm_sotype = argp->sotype;
1672 	nmp->nm_soproto = argp->proto;
1673 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1674 	if ((argp->flags & NFSMNT_NFSV4))
1675 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1676 	else if ((argp->flags & NFSMNT_NFSV3))
1677 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1678 	else
1679 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1680 
1681 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false,
1682 	    &nmp->nm_sockreq.nr_client)))
1683 		goto bad;
1684 	/* For NFSv4, get the clientid now. */
1685 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1686 		NFSCL_DEBUG(3, "at getcl\n");
1687 		error = nfscl_getcl(mp, cred, td, tryminvers, true, &clp);
1688 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1689 		if (error != 0)
1690 			goto bad;
1691 		if (aconn > 0 && nmp->nm_minorvers == 0) {
1692 			vfs_mount_error(mp, "nconnect should only be used "
1693 			    "for NFSv4.1/4.2 mounts");
1694 			error = EINVAL;
1695 			goto bad;
1696 		}
1697 		if (NFSHASSYSKRB5(nmp) && nmp->nm_minorvers == 0) {
1698 			vfs_mount_error(mp, "syskrb5 should only be used "
1699 			    "for NFSv4.1/4.2 mounts");
1700 			error = EINVAL;
1701 			goto bad;
1702 		}
1703 	}
1704 
1705 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1706 	    nmp->nm_dirpathlen > 0) {
1707 		NFSCL_DEBUG(3, "in dirp\n");
1708 		/*
1709 		 * If the fhsize on the mount point == 0 for V4, the mount
1710 		 * path needs to be looked up.
1711 		 */
1712 		trycnt = 3;
1713 		do {
1714 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1715 			    cred, td);
1716 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1717 			if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1718 			    error != NFSERR_WRONGSEC))
1719 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1720 		} while (error != 0 && --trycnt > 0 &&
1721 		    (!NFSHASSYSKRB5(nmp) || error != NFSERR_WRONGSEC));
1722 		if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1723 		    error != NFSERR_WRONGSEC))
1724 			goto bad;
1725 	}
1726 
1727 	/*
1728 	 * A reference count is needed on the nfsnode representing the
1729 	 * remote root.  If this object is not persistent, then backward
1730 	 * traversals of the mount point (i.e. "..") will not work if
1731 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1732 	 * this problem, because one can identify root inodes by their
1733 	 * number == UFS_ROOTINO (2).
1734 	 * For the "syskrb5" mount, the file handle might not have
1735 	 * been acquired.  As such, use a "fake" file handle which
1736 	 * can never be returned by a server for the root vnode.
1737 	 */
1738 	if (nmp->nm_fhsize > 0 || NFSHASSYSKRB5(nmp)) {
1739 		/*
1740 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1741 		 * non-zero for the root vnode. f_iosize will be set correctly
1742 		 * by nfs_statfs() before any I/O occurs.
1743 		 */
1744 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1745 		if (nmp->nm_fhsize == 0) {
1746 			fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK |
1747 			    M_ZERO);
1748 			error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np,
1749 			    LK_EXCLUSIVE);
1750 			free(fakefh, M_TEMP);
1751 			nmp->nm_privflag |= NFSMNTP_FAKEROOTFH;
1752 		} else
1753 			error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1754 			    LK_EXCLUSIVE);
1755 		if (error)
1756 			goto bad;
1757 		*vpp = NFSTOV(np);
1758 
1759 		/*
1760 		 * Get file attributes and transfer parameters for the
1761 		 * mountpoint.  This has the side effect of filling in
1762 		 * (*vpp)->v_type with the correct value.
1763 		 */
1764 		ret = ENXIO;
1765 		if (nmp->nm_fhsize > 0)
1766 			ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh,
1767 			    nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease);
1768 		if (ret) {
1769 			/*
1770 			 * Just set default values to get things going.
1771 			 */
1772 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1773 			nfsva.na_vattr.va_type = VDIR;
1774 			nfsva.na_vattr.va_mode = 0777;
1775 			nfsva.na_vattr.va_nlink = 100;
1776 			nfsva.na_vattr.va_uid = (uid_t)0;
1777 			nfsva.na_vattr.va_gid = (gid_t)0;
1778 			nfsva.na_vattr.va_fileid = 2;
1779 			nfsva.na_vattr.va_gen = 1;
1780 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1781 			nfsva.na_vattr.va_size = 512 * 1024;
1782 			lease = 20;
1783 		}
1784 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, 0, 1);
1785 		if ((argp->flags & NFSMNT_NFSV4) != 0) {
1786 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1787 			NFSLOCKCLSTATE();
1788 			clp->nfsc_renew = NFSCL_RENEW(lease);
1789 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1790 			clp->nfsc_clientidrev++;
1791 			if (clp->nfsc_clientidrev == 0)
1792 				clp->nfsc_clientidrev++;
1793 			NFSUNLOCKCLSTATE();
1794 			/*
1795 			 * Mount will succeed, so the renew thread can be
1796 			 * started now.
1797 			 */
1798 			nfscl_start_renewthread(clp);
1799 			nfscl_clientrelease(clp);
1800 		}
1801 		if (argp->flags & NFSMNT_NFSV3)
1802 			ncl_fsinfo(nmp, *vpp, cred, td);
1803 
1804 		/* Mark if the mount point supports NFSv4 ACLs. */
1805 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1806 		    ret == 0 &&
1807 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1808 			MNT_ILOCK(mp);
1809 			mp->mnt_flag |= MNT_NFS4ACLS;
1810 			MNT_IUNLOCK(mp);
1811 		}
1812 
1813 		/* Can now allow additional connections. */
1814 		if (aconn > 0)
1815 			nmp->nm_aconnect = aconn;
1816 
1817 		/*
1818 		 * Lose the lock but keep the ref.
1819 		 */
1820 		NFSVOPUNLOCK(*vpp);
1821 		vfs_cache_root_set(mp, *vpp);
1822 		CURVNET_RESTORE();
1823 		return (0);
1824 	}
1825 	error = EIO;
1826 
1827 bad:
1828 	if (clp != NULL)
1829 		nfscl_clientrelease(clp);
1830 	newnfs_disconnect(NULL, &nmp->nm_sockreq);
1831 	crfree(nmp->nm_sockreq.nr_cred);
1832 	if (nmp->nm_sockreq.nr_auth != NULL)
1833 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1834 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1835 	mtx_destroy(&nmp->nm_mtx);
1836 	if (nmp->nm_clp != NULL) {
1837 		NFSLOCKCLSTATE();
1838 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1839 		NFSUNLOCKCLSTATE();
1840 		free(nmp->nm_clp, M_NFSCLCLIENT);
1841 	}
1842 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1843 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1844 		    dsp->nfsclds_sockp != NULL)
1845 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1846 		nfscl_freenfsclds(dsp);
1847 	}
1848 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1849 	free(nmp, M_NEWNFSMNT);
1850 	free(nam, M_SONAME);
1851 	CURVNET_RESTORE();
1852 	return (error);
1853 }
1854 
1855 /*
1856  * unmount system call
1857  */
1858 static int
nfs_unmount(struct mount * mp,int mntflags)1859 nfs_unmount(struct mount *mp, int mntflags)
1860 {
1861 	struct thread *td;
1862 	struct nfsmount *nmp;
1863 	int error, flags = 0, i, trycnt = 0;
1864 	struct nfsclds *dsp, *tdsp;
1865 	struct nfscldeleg *dp, *ndp;
1866 	struct nfscldeleghead dh;
1867 
1868 	td = curthread;
1869 	TAILQ_INIT(&dh);
1870 
1871 	if (mntflags & MNT_FORCE)
1872 		flags |= FORCECLOSE;
1873 	nmp = VFSTONFS(mp);
1874 	error = 0;
1875 	/*
1876 	 * Goes something like this..
1877 	 * - Call vflush() to clear out vnodes for this filesystem
1878 	 * - Close the socket
1879 	 * - Free up the data structures
1880 	 */
1881 	/* In the forced case, cancel any outstanding requests. */
1882 	if (mntflags & MNT_FORCE) {
1883 		NFSDDSLOCK();
1884 		if (nfsv4_findmirror(nmp) != NULL)
1885 			error = ENXIO;
1886 		NFSDDSUNLOCK();
1887 		if (error)
1888 			goto out;
1889 		error = newnfs_nmcancelreqs(nmp);
1890 		if (error)
1891 			goto out;
1892 		/* For a forced close, get rid of the renew thread now */
1893 		nfscl_umount(nmp, td, &dh);
1894 	}
1895 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1896 	do {
1897 		error = vflush(mp, 1, flags, td);
1898 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1899 			(void) nfs_catnap(PSOCK, error, "newndm");
1900 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1901 	if (error)
1902 		goto out;
1903 
1904 	/*
1905 	 * We are now committed to the unmount.
1906 	 */
1907 	if ((mntflags & MNT_FORCE) == 0)
1908 		nfscl_umount(nmp, td, NULL);
1909 	else {
1910 		mtx_lock(&nmp->nm_mtx);
1911 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1912 		mtx_unlock(&nmp->nm_mtx);
1913 	}
1914 	/* Make sure no nfsiods are assigned to this mount. */
1915 	NFSLOCKIOD();
1916 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1917 		if (ncl_iodmount[i] == nmp) {
1918 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1919 			ncl_iodmount[i] = NULL;
1920 		}
1921 	NFSUNLOCKIOD();
1922 
1923 	/*
1924 	 * We can now set mnt_data to NULL and wait for
1925 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1926 	 */
1927 	mtx_lock(&mountlist_mtx);
1928 	mtx_lock(&nmp->nm_mtx);
1929 	mp->mnt_data = NULL;
1930 	mtx_unlock(&mountlist_mtx);
1931 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1932 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1933 	mtx_unlock(&nmp->nm_mtx);
1934 
1935 	newnfs_disconnect(nmp, &nmp->nm_sockreq);
1936 	crfree(nmp->nm_sockreq.nr_cred);
1937 	free(nmp->nm_nam, M_SONAME);
1938 	if (nmp->nm_sockreq.nr_auth != NULL)
1939 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1940 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1941 	mtx_destroy(&nmp->nm_mtx);
1942 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1943 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1944 		    dsp->nfsclds_sockp != NULL)
1945 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1946 		nfscl_freenfsclds(dsp);
1947 	}
1948 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1949 	free(nmp, M_NEWNFSMNT);
1950 
1951 	/* Free up the delegation structures for forced dismounts. */
1952 	TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
1953 		TAILQ_REMOVE(&dh, dp, nfsdl_list);
1954 		free(dp, M_NFSCLDELEG);
1955 	}
1956 out:
1957 	return (error);
1958 }
1959 
1960 /*
1961  * Return root of a filesystem
1962  */
1963 static int
nfs_root(struct mount * mp,int flags,struct vnode ** vpp)1964 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1965 {
1966 	struct vnode *vp;
1967 	struct nfsmount *nmp;
1968 	struct nfsnode *np;
1969 	int error;
1970 	char *fakefh;
1971 
1972 	nmp = VFSTONFS(mp);
1973 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
1974 		/* Attempt to get the actual root file handle. */
1975 		if (nmp->nm_fhsize == 0)
1976 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1977 			    curthread->td_ucred, curthread);
1978 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
1979 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, flags);
1980 		free(fakefh, M_TEMP);
1981 	} else {
1982 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1983 	}
1984 	if (error)
1985 		return error;
1986 	vp = NFSTOV(np);
1987 	/*
1988 	 * Get transfer parameters and attributes for root vnode once.
1989 	 */
1990 	mtx_lock(&nmp->nm_mtx);
1991 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1992 		mtx_unlock(&nmp->nm_mtx);
1993 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1994 	} else
1995 		mtx_unlock(&nmp->nm_mtx);
1996 	if (vp->v_type == VNON)
1997 	    vp->v_type = VDIR;
1998 	vp->v_vflag |= VV_ROOT;
1999 	*vpp = vp;
2000 	return (0);
2001 }
2002 
2003 /*
2004  * Flush out the buffer cache
2005  */
2006 /* ARGSUSED */
2007 static int
nfs_sync(struct mount * mp,int waitfor)2008 nfs_sync(struct mount *mp, int waitfor)
2009 {
2010 	struct vnode *vp, *mvp;
2011 	struct thread *td;
2012 	int error, allerror = 0;
2013 
2014 	td = curthread;
2015 
2016 	MNT_ILOCK(mp);
2017 	/*
2018 	 * If a forced dismount is in progress, return from here so that
2019 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
2020 	 * calling VFS_UNMOUNT().
2021 	 */
2022 	if (NFSCL_FORCEDISM(mp)) {
2023 		MNT_IUNLOCK(mp);
2024 		return (EBADF);
2025 	}
2026 	MNT_IUNLOCK(mp);
2027 
2028 	if (waitfor == MNT_LAZY)
2029 		return (0);
2030 
2031 	/*
2032 	 * Force stale buffer cache information to be flushed.
2033 	 */
2034 loop:
2035 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
2036 		/* XXX Racy bv_cnt check. */
2037 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0) {
2038 			VI_UNLOCK(vp);
2039 			continue;
2040 		}
2041 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
2042 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
2043 			goto loop;
2044 		}
2045 		error = VOP_FSYNC(vp, waitfor, td);
2046 		if (error)
2047 			allerror = error;
2048 		NFSVOPUNLOCK(vp);
2049 		vrele(vp);
2050 	}
2051 	return (allerror);
2052 }
2053 
2054 static int
nfs_sysctl(struct mount * mp,fsctlop_t op,struct sysctl_req * req)2055 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
2056 {
2057 	struct nfsmount *nmp = VFSTONFS(mp);
2058 	struct vfsquery vq;
2059 	int error;
2060 
2061 	bzero(&vq, sizeof(vq));
2062 	switch (op) {
2063 #if 0
2064 	case VFS_CTL_NOLOCKS:
2065 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
2066  		if (req->oldptr != NULL) {
2067  			error = SYSCTL_OUT(req, &val, sizeof(val));
2068  			if (error)
2069  				return (error);
2070  		}
2071  		if (req->newptr != NULL) {
2072  			error = SYSCTL_IN(req, &val, sizeof(val));
2073  			if (error)
2074  				return (error);
2075 			if (val)
2076 				nmp->nm_flag |= NFSMNT_NOLOCKS;
2077 			else
2078 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
2079  		}
2080 		break;
2081 #endif
2082 	case VFS_CTL_QUERY:
2083 		mtx_lock(&nmp->nm_mtx);
2084 		if (nmp->nm_state & NFSSTA_TIMEO)
2085 			vq.vq_flags |= VQ_NOTRESP;
2086 		mtx_unlock(&nmp->nm_mtx);
2087 #if 0
2088 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
2089 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
2090 			vq.vq_flags |= VQ_NOTRESPLOCK;
2091 #endif
2092 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
2093 		break;
2094  	case VFS_CTL_TIMEO:
2095  		if (req->oldptr != NULL) {
2096  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
2097  			    sizeof(nmp->nm_tprintf_initial_delay));
2098  			if (error)
2099  				return (error);
2100  		}
2101  		if (req->newptr != NULL) {
2102 			error = vfs_suser(mp, req->td);
2103 			if (error)
2104 				return (error);
2105  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
2106  			    sizeof(nmp->nm_tprintf_initial_delay));
2107  			if (error)
2108  				return (error);
2109  			if (nmp->nm_tprintf_initial_delay < 0)
2110  				nmp->nm_tprintf_initial_delay = 0;
2111  		}
2112 		break;
2113 	default:
2114 		return (ENOTSUP);
2115 	}
2116 	return (0);
2117 }
2118 
2119 /*
2120  * Purge any RPCs in progress, so that they will all return errors.
2121  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
2122  * forced dismount.
2123  */
2124 static void
nfs_purge(struct mount * mp)2125 nfs_purge(struct mount *mp)
2126 {
2127 	struct nfsmount *nmp = VFSTONFS(mp);
2128 
2129 	newnfs_nmcancelreqs(nmp);
2130 }
2131 
2132 /*
2133  * Extract the information needed by the nlm from the nfs vnode.
2134  */
2135 static void
nfs_getnlminfo(struct vnode * vp,uint8_t * fhp,size_t * fhlenp,struct sockaddr_storage * sp,int * is_v3p,off_t * sizep,struct timeval * timeop)2136 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
2137     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
2138     struct timeval *timeop)
2139 {
2140 	struct nfsmount *nmp;
2141 	struct nfsnode *np = VTONFS(vp);
2142 
2143 	nmp = VFSTONFS(vp->v_mount);
2144 	if (fhlenp != NULL)
2145 		*fhlenp = (size_t)np->n_fhp->nfh_len;
2146 	if (fhp != NULL)
2147 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
2148 	if (sp != NULL)
2149 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
2150 	if (is_v3p != NULL)
2151 		*is_v3p = NFS_ISV3(vp);
2152 	if (sizep != NULL)
2153 		*sizep = np->n_size;
2154 	if (timeop != NULL) {
2155 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
2156 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
2157 	}
2158 }
2159 
2160 /*
2161  * This function prints out an option name, based on the conditional
2162  * argument.
2163  */
nfscl_printopt(struct nfsmount * nmp,int testval,char * opt,char ** buf,size_t * blen)2164 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
2165     char *opt, char **buf, size_t *blen)
2166 {
2167 	int len;
2168 
2169 	if (testval != 0 && *blen > strlen(opt)) {
2170 		len = snprintf(*buf, *blen, "%s", opt);
2171 		if (len != strlen(opt))
2172 			printf("EEK!!\n");
2173 		*buf += len;
2174 		*blen -= len;
2175 	}
2176 }
2177 
2178 /*
2179  * This function printf out an options integer value.
2180  */
nfscl_printoptval(struct nfsmount * nmp,int optval,char * opt,char ** buf,size_t * blen)2181 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
2182     char *opt, char **buf, size_t *blen)
2183 {
2184 	int len;
2185 
2186 	if (*blen > strlen(opt) + 1) {
2187 		/* Could result in truncated output string. */
2188 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
2189 		if (len < *blen) {
2190 			*buf += len;
2191 			*blen -= len;
2192 		}
2193 	}
2194 }
2195 
2196 /*
2197  * Load the option flags and values into the buffer.
2198  */
nfscl_retopts(struct nfsmount * nmp,char * buffer,size_t buflen)2199 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
2200 {
2201 	char *buf;
2202 	size_t blen;
2203 
2204 	buf = buffer;
2205 	blen = buflen;
2206 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
2207 	    &blen);
2208 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
2209 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
2210 		    &blen);
2211 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
2212 		    &buf, &blen);
2213 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
2214 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
2215 	}
2216 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2217 	    &blen);
2218 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2219 	    "nfsv2", &buf, &blen);
2220 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2221 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2222 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2223 	    &buf, &blen);
2224 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_TLS) != 0, ",tls", &buf,
2225 	    &blen);
2226 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_SYSKRB5) != 0,
2227 	    ",syskrb5", &buf, &blen);
2228 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2229 	    &buf, &blen);
2230 	nfscl_printoptval(nmp, nmp->nm_aconnect + 1, ",nconnect", &buf, &blen);
2231 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2232 	    &blen);
2233 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2234 	    &blen);
2235 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2236 	    &blen);
2237 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2238 	    &blen);
2239 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2240 	    &blen);
2241 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2242 	    ",noncontigwr", &buf, &blen);
2243 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2244 	    0, ",lockd", &buf, &blen);
2245 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOLOCKD) != 0, ",nolockd",
2246 	    &buf, &blen);
2247 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2248 	    &buf, &blen);
2249 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2250 	    &buf, &blen);
2251 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2252 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2253 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2254 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2255 	    &buf, &blen);
2256 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2257 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2258 	    &buf, &blen);
2259 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2260 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2261 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2262 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2263 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2264 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2265 	    &blen);
2266 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2267 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2268 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2269 	    &blen);
2270 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2271 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2272 	    &blen);
2273 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2274 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2275 }
2276