xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 52d895fe63eb22c1265362c54983db6038ebea3c)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 #include "opt_bootp.h"
39 #include "opt_nfsroot.h"
40 #include "opt_kern_tls.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/vnode.h>
60 #include <sys/signalvar.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_extern.h>
64 #include <vm/uma.h>
65 
66 #include <net/if.h>
67 #include <net/route.h>
68 #include <net/route/route_ctl.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 #include <rpc/rpcsec_tls.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 extern struct mtx nfsrv_dslock_mtx;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int, uint32_t, char *, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		vfs_cache_root,
140 	.vfs_cachedroot =	nfs_root,
141 	.vfs_statfs =		nfs_statfs,
142 	.vfs_sync =		nfs_sync,
143 	.vfs_uninit =		ncl_uninit,
144 	.vfs_unmount =		nfs_unmount,
145 	.vfs_sysctl =		nfs_sysctl,
146 	.vfs_purge =		nfs_purge,
147 };
148 /*
149  * This macro declares that the file system type is named "nfs".
150  * It also declares a module name of "nfs" and uses vfs_modevent()
151  * as the event handling function.
152  * The main module declaration is found in sys/fs/nfsclient/nfs_clport.c
153  * for "nfscl" and is needed so that a custom event handling
154  * function gets called.  MODULE_DEPEND() macros are found there.
155  */
156 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
157 
158 MODULE_VERSION(nfs, 1);
159 
160 /*
161  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
162  * can be shared by both NFS clients. It is declared here so that it
163  * will be defined for kernels built without NFS_ROOT, although it
164  * isn't used in that case.
165  */
166 #if !defined(NFS_ROOT)
167 struct nfs_diskless	nfs_diskless = { { { 0 } } };
168 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
169 int			nfs_diskless_valid = 0;
170 #endif
171 
172 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
173     &nfs_diskless_valid, 0,
174     "Has the diskless struct been filled correctly");
175 
176 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
177     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
178 
179 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
180     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
181     "%Ssockaddr_in", "Diskless root nfs address");
182 
183 void		newnfsargs_ntoh(struct nfs_args *);
184 static int	nfs_mountdiskless(char *,
185 		    struct sockaddr_in *, struct nfs_args *,
186 		    struct thread *, struct vnode **, struct mount *);
187 static void	nfs_convert_diskless(void);
188 static void	nfs_convert_oargs(struct nfs_args *args,
189 		    struct onfs_args *oargs);
190 
191 int
newnfs_iosize(struct nfsmount * nmp)192 newnfs_iosize(struct nfsmount *nmp)
193 {
194 	int iosize, maxio;
195 
196 	/* First, set the upper limit for iosize */
197 	if (nmp->nm_flag & NFSMNT_NFSV4) {
198 		maxio = NFS_MAXBSIZE;
199 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
200 		if (nmp->nm_sotype == SOCK_DGRAM)
201 			maxio = NFS_MAXDGRAMDATA;
202 		else
203 			maxio = NFS_MAXBSIZE;
204 	} else {
205 		maxio = NFS_V2MAXDATA;
206 	}
207 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
208 		nmp->nm_rsize = maxio;
209 	if (nmp->nm_rsize > NFS_MAXBSIZE)
210 		nmp->nm_rsize = NFS_MAXBSIZE;
211 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
212 		nmp->nm_readdirsize = maxio;
213 	if (nmp->nm_readdirsize > nmp->nm_rsize)
214 		nmp->nm_readdirsize = nmp->nm_rsize;
215 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
216 		nmp->nm_wsize = maxio;
217 	if (nmp->nm_wsize > NFS_MAXBSIZE)
218 		nmp->nm_wsize = NFS_MAXBSIZE;
219 
220 	/*
221 	 * Calculate the size used for io buffers.  Use the larger
222 	 * of the two sizes to minimise nfs requests but make sure
223 	 * that it is at least one VM page to avoid wasting buffer
224 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
225 	 * that is the buffer size used for directories.
226 	 */
227 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
228 	iosize = imax(iosize, PAGE_SIZE);
229 	iosize = imax(iosize, NFS_DIRBLKSIZ);
230 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
231 	return (iosize);
232 }
233 
234 static void
nfs_convert_oargs(struct nfs_args * args,struct onfs_args * oargs)235 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
236 {
237 
238 	args->version = NFS_ARGSVERSION;
239 	args->addr = oargs->addr;
240 	args->addrlen = oargs->addrlen;
241 	args->sotype = oargs->sotype;
242 	args->proto = oargs->proto;
243 	args->fh = oargs->fh;
244 	args->fhsize = oargs->fhsize;
245 	args->flags = oargs->flags;
246 	args->wsize = oargs->wsize;
247 	args->rsize = oargs->rsize;
248 	args->readdirsize = oargs->readdirsize;
249 	args->timeo = oargs->timeo;
250 	args->retrans = oargs->retrans;
251 	args->readahead = oargs->readahead;
252 	args->hostname = oargs->hostname;
253 }
254 
255 static void
nfs_convert_diskless(void)256 nfs_convert_diskless(void)
257 {
258 
259 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
260 		sizeof(struct ifaliasreq));
261 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
262 		sizeof(struct sockaddr_in));
263 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
264 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
265 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
266 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
267 	} else {
268 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
269 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
270 	}
271 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
272 		sizeof(struct sockaddr_in));
273 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
274 	nfsv3_diskless.root_time = nfs_diskless.root_time;
275 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
276 		MAXHOSTNAMELEN);
277 	nfs_diskless_valid = 3;
278 }
279 
280 /*
281  * nfs statfs call
282  */
283 static int
nfs_statfs(struct mount * mp,struct statfs * sbp)284 nfs_statfs(struct mount *mp, struct statfs *sbp)
285 {
286 	struct vnode *vp;
287 	struct thread *td;
288 	struct nfsmount *nmp = VFSTONFS(mp);
289 	struct nfsvattr nfsva;
290 	struct nfsfsinfo fs;
291 	struct nfsstatfs sb;
292 	int error = 0, attrflag, gotfsinfo = 0, ret;
293 	struct nfsnode *np;
294 	char *fakefh;
295 
296 	td = curthread;
297 
298 	error = vfs_busy(mp, MBF_NOWAIT);
299 	if (error)
300 		return (error);
301 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
302 		if (nmp->nm_fhsize == 0) {
303 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
304 			    td->td_ucred, td);
305 			if (error != 0) {
306 				/*
307 				 * We cannot do anything yet.  Hopefully what
308 				 * is in mnt_stat is sufficient.
309 				 */
310 				if (sbp != &mp->mnt_stat)
311 					*sbp = mp->mnt_stat;
312 				strncpy(&sbp->f_fstypename[0],
313 				    mp->mnt_vfc->vfc_name, MFSNAMELEN);
314 				vfs_unbusy(mp);
315 				return (0);
316 			}
317 		}
318 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
319 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, LK_EXCLUSIVE);
320 		free(fakefh, M_TEMP);
321 	} else {
322 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
323 		    LK_EXCLUSIVE);
324 	}
325 	if (error) {
326 		vfs_unbusy(mp);
327 		return (error);
328 	}
329 	vp = NFSTOV(np);
330 	mtx_lock(&nmp->nm_mtx);
331 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
332 		mtx_unlock(&nmp->nm_mtx);
333 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
334 		    &attrflag);
335 		if (!error)
336 			gotfsinfo = 1;
337 	} else
338 		mtx_unlock(&nmp->nm_mtx);
339 	if (!error)
340 		error = nfsrpc_statfs(vp, &sb, &fs, NULL, td->td_ucred, td,
341 		    &nfsva, &attrflag);
342 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
343 	    error == NFSERR_WRONGSEC) {
344 		/* Cannot get new stats, so return what is in mnt_stat. */
345 		if (sbp != &mp->mnt_stat)
346 			*sbp = mp->mnt_stat;
347 		strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name,
348 		    MFSNAMELEN);
349 		vput(vp);
350 		vfs_unbusy(mp);
351 		return (0);
352 	}
353 	if (error != 0)
354 		NFSCL_DEBUG(2, "statfs=%d\n", error);
355 	if (attrflag == 0) {
356 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
357 		    td->td_ucred, td, &nfsva, NULL, NULL);
358 		if (ret) {
359 			/*
360 			 * Just set default values to get things going.
361 			 */
362 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
363 			nfsva.na_vattr.va_type = VDIR;
364 			nfsva.na_vattr.va_mode = 0777;
365 			nfsva.na_vattr.va_nlink = 100;
366 			nfsva.na_vattr.va_uid = (uid_t)0;
367 			nfsva.na_vattr.va_gid = (gid_t)0;
368 			nfsva.na_vattr.va_fileid = 2;
369 			nfsva.na_vattr.va_gen = 1;
370 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
371 			nfsva.na_vattr.va_size = 512 * 1024;
372 		}
373 	}
374 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
375 	if (!error) {
376 	    mtx_lock(&nmp->nm_mtx);
377 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
378 		nfscl_loadfsinfo(nmp, &fs);
379 	    nfscl_loadsbinfo(nmp, &sb, sbp);
380 	    sbp->f_iosize = newnfs_iosize(nmp);
381 	    mtx_unlock(&nmp->nm_mtx);
382 	    if (sbp != &mp->mnt_stat) {
383 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
384 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
385 	    }
386 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
387 	} else if (NFS_ISV4(vp)) {
388 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
389 	}
390 	vput(vp);
391 	vfs_unbusy(mp);
392 	return (error);
393 }
394 
395 /*
396  * nfs version 3 fsinfo rpc call
397  */
398 int
ncl_fsinfo(struct nfsmount * nmp,struct vnode * vp,struct ucred * cred,struct thread * td)399 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
400     struct thread *td)
401 {
402 	struct nfsfsinfo fs;
403 	struct nfsvattr nfsva;
404 	int error, attrflag;
405 
406 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag);
407 	if (!error) {
408 		if (attrflag)
409 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
410 		mtx_lock(&nmp->nm_mtx);
411 		nfscl_loadfsinfo(nmp, &fs);
412 		mtx_unlock(&nmp->nm_mtx);
413 	}
414 	return (error);
415 }
416 
417 /*
418  * Mount a remote root fs via nfs. This depends on the info in the
419  * nfs_diskless structure that has been filled in properly by some primary
420  * bootstrap.
421  * It goes something like this:
422  * - do enough of "ifconfig" by calling ifioctl() so that the system
423  *   can talk to the server
424  * - If nfs_diskless.mygateway is filled in, use that address as
425  *   a default gateway.
426  * - build the rootfs mount point and call mountnfs() to do the rest.
427  *
428  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
429  * structure, as well as other global NFS client variables here, as
430  * nfs_mountroot() will be called once in the boot before any other NFS
431  * client activity occurs.
432  */
433 static int
nfs_mountroot(struct mount * mp)434 nfs_mountroot(struct mount *mp)
435 {
436 	struct thread *td = curthread;
437 	struct nfsv3_diskless *nd = &nfsv3_diskless;
438 	struct socket *so;
439 	struct vnode *vp;
440 	struct ifreq ir;
441 	int error;
442 	u_long l;
443 	char buf[128];
444 	char *cp;
445 
446 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
447 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
448 #elif defined(NFS_ROOT)
449 	nfs_setup_diskless();
450 #endif
451 
452 	if (nfs_diskless_valid == 0)
453 		return (-1);
454 	if (nfs_diskless_valid == 1)
455 		nfs_convert_diskless();
456 
457 	/*
458 	 * Do enough of ifconfig(8) so that the critical net interface can
459 	 * talk to the server.
460 	 */
461 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
462 	    td->td_ucred, td);
463 	if (error)
464 		panic("nfs_mountroot: socreate(%04x): %d",
465 			nd->myif.ifra_addr.sa_family, error);
466 
467 #if 0 /* XXX Bad idea */
468 	/*
469 	 * We might not have been told the right interface, so we pass
470 	 * over the first ten interfaces of the same kind, until we get
471 	 * one of them configured.
472 	 */
473 
474 	for (i = strlen(nd->myif.ifra_name) - 1;
475 		nd->myif.ifra_name[i] >= '0' &&
476 		nd->myif.ifra_name[i] <= '9';
477 		nd->myif.ifra_name[i] ++) {
478 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
479 		if(!error)
480 			break;
481 	}
482 #endif
483 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
484 	if (error)
485 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
486 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
487 		ir.ifr_mtu = strtol(cp, NULL, 10);
488 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
489 		freeenv(cp);
490 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
491 		if (error)
492 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
493 	}
494 	soclose(so);
495 
496 	/*
497 	 * If the gateway field is filled in, set it as the default route.
498 	 * Note that pxeboot will set a default route of 0 if the route
499 	 * is not set by the DHCP server.  Check also for a value of 0
500 	 * to avoid panicking inappropriately in that situation.
501 	 */
502 	if (nd->mygateway.sin_len != 0 &&
503 	    nd->mygateway.sin_addr.s_addr != 0) {
504 		struct sockaddr_in mask, sin;
505 		struct epoch_tracker et;
506 		struct rt_addrinfo info;
507 		struct rib_cmd_info rc;
508 
509 		bzero((caddr_t)&mask, sizeof(mask));
510 		sin = mask;
511 		sin.sin_family = AF_INET;
512 		sin.sin_len = sizeof(sin);
513                 /* XXX MRT use table 0 for this sort of thing */
514 		NET_EPOCH_ENTER(et);
515 		CURVNET_SET(TD_TO_VNET(td));
516 
517 		bzero((caddr_t)&info, sizeof(info));
518 		info.rti_flags = RTF_UP | RTF_GATEWAY;
519 		info.rti_info[RTAX_DST] = (struct sockaddr *)&sin;
520 		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&nd->mygateway;
521 		info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&mask;
522 
523 		error = rib_action(RT_DEFAULT_FIB, RTM_ADD, &info, &rc);
524 		CURVNET_RESTORE();
525 		NET_EPOCH_EXIT(et);
526 		if (error)
527 			panic("nfs_mountroot: RTM_ADD: %d", error);
528 	}
529 
530 	/*
531 	 * Create the rootfs mount point.
532 	 */
533 	nd->root_args.fh = nd->root_fh;
534 	nd->root_args.fhsize = nd->root_fhsize;
535 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
536 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
537 		(l >> 24) & 0xff, (l >> 16) & 0xff,
538 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
539 	printf("NFS ROOT: %s\n", buf);
540 	nd->root_args.hostname = buf;
541 	if ((error = nfs_mountdiskless(buf,
542 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
543 		return (error);
544 	}
545 
546 	/*
547 	 * This is not really an nfs issue, but it is much easier to
548 	 * set hostname here and then let the "/etc/rc.xxx" files
549 	 * mount the right /var based upon its preset value.
550 	 */
551 	mtx_lock(&prison0.pr_mtx);
552 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
553 	    sizeof(prison0.pr_hostname));
554 	mtx_unlock(&prison0.pr_mtx);
555 	inittodr(ntohl(nd->root_time));
556 	return (0);
557 }
558 
559 /*
560  * Internal version of mount system call for diskless setup.
561  */
562 static int
nfs_mountdiskless(char * path,struct sockaddr_in * sin,struct nfs_args * args,struct thread * td,struct vnode ** vpp,struct mount * mp)563 nfs_mountdiskless(char *path,
564     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
565     struct vnode **vpp, struct mount *mp)
566 {
567 	struct sockaddr *nam;
568 	int dirlen, error;
569 	char *dirpath;
570 
571 	/*
572 	 * Find the directory path in "path", which also has the server's
573 	 * name/ip address in it.
574 	 */
575 	dirpath = strchr(path, ':');
576 	if (dirpath != NULL)
577 		dirlen = strlen(++dirpath);
578 	else
579 		dirlen = 0;
580 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
581 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
582 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
583 	    NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL, 0)) != 0) {
584 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
585 		return (error);
586 	}
587 	return (0);
588 }
589 
590 static void
nfs_sec_name(char * sec,int * flagsp)591 nfs_sec_name(char *sec, int *flagsp)
592 {
593 	if (!strcmp(sec, "krb5"))
594 		*flagsp |= NFSMNT_KERB;
595 	else if (!strcmp(sec, "krb5i"))
596 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
597 	else if (!strcmp(sec, "krb5p"))
598 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
599 }
600 
601 static void
nfs_decode_args(struct mount * mp,struct nfsmount * nmp,struct nfs_args * argp,const char * hostname,struct ucred * cred,struct thread * td)602 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
603     const char *hostname, struct ucred *cred, struct thread *td)
604 {
605 	int adjsock;
606 	char *p;
607 
608 	/*
609 	 * Set read-only flag if requested; otherwise, clear it if this is
610 	 * an update.  If this is not an update, then either the read-only
611 	 * flag is already clear, or this is a root mount and it was set
612 	 * intentionally at some previous point.
613 	 */
614 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
615 		MNT_ILOCK(mp);
616 		mp->mnt_flag |= MNT_RDONLY;
617 		MNT_IUNLOCK(mp);
618 	} else if (mp->mnt_flag & MNT_UPDATE) {
619 		MNT_ILOCK(mp);
620 		mp->mnt_flag &= ~MNT_RDONLY;
621 		MNT_IUNLOCK(mp);
622 	}
623 
624 	/*
625 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
626 	 * no sense in that context.  Also, set up appropriate retransmit
627 	 * and soft timeout behavior.
628 	 */
629 	if (argp->sotype == SOCK_STREAM) {
630 		nmp->nm_flag &= ~NFSMNT_NOCONN;
631 		nmp->nm_timeo = NFS_MAXTIMEO;
632 		if ((argp->flags & NFSMNT_NFSV4) != 0)
633 			nmp->nm_retry = INT_MAX;
634 		else
635 			nmp->nm_retry = NFS_RETRANS_TCP;
636 	}
637 
638 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
639 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
640 		argp->flags &= ~NFSMNT_RDIRPLUS;
641 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
642 	}
643 
644 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
645 	if (nmp->nm_minorvers == 0) {
646 		argp->flags &= ~NFSMNT_ONEOPENOWN;
647 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
648 	}
649 
650 	/* Re-bind if rsrvd port requested and wasn't on one */
651 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
652 		  && (argp->flags & NFSMNT_RESVPORT);
653 	/* Also re-bind if we're switching to/from a connected UDP socket */
654 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
655 		    (argp->flags & NFSMNT_NOCONN));
656 
657 	/* Update flags atomically.  Don't change the lock bits. */
658 	nmp->nm_flag = argp->flags | nmp->nm_flag;
659 
660 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
661 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
662 		if (nmp->nm_timeo < NFS_MINTIMEO)
663 			nmp->nm_timeo = NFS_MINTIMEO;
664 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
665 			nmp->nm_timeo = NFS_MAXTIMEO;
666 	}
667 
668 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
669 		nmp->nm_retry = argp->retrans;
670 		if (nmp->nm_retry > NFS_MAXREXMIT)
671 			nmp->nm_retry = NFS_MAXREXMIT;
672 	}
673 
674 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
675 		nmp->nm_wsize = argp->wsize;
676 		/*
677 		 * Clip at the power of 2 below the size. There is an
678 		 * issue (not isolated) that causes intermittent page
679 		 * faults if this is not done.
680 		 */
681 		if (nmp->nm_wsize > NFS_FABLKSIZE)
682 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
683 		else
684 			nmp->nm_wsize = NFS_FABLKSIZE;
685 	}
686 
687 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
688 		nmp->nm_rsize = argp->rsize;
689 		/*
690 		 * Clip at the power of 2 below the size. There is an
691 		 * issue (not isolated) that causes intermittent page
692 		 * faults if this is not done.
693 		 */
694 		if (nmp->nm_rsize > NFS_FABLKSIZE)
695 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
696 		else
697 			nmp->nm_rsize = NFS_FABLKSIZE;
698 	}
699 
700 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
701 		nmp->nm_readdirsize = argp->readdirsize;
702 	}
703 
704 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
705 		nmp->nm_acregmin = argp->acregmin;
706 	else
707 		nmp->nm_acregmin = NFS_MINATTRTIMO;
708 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
709 		nmp->nm_acregmax = argp->acregmax;
710 	else
711 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
712 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
713 		nmp->nm_acdirmin = argp->acdirmin;
714 	else
715 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
716 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
717 		nmp->nm_acdirmax = argp->acdirmax;
718 	else
719 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
720 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
721 		nmp->nm_acdirmin = nmp->nm_acdirmax;
722 	if (nmp->nm_acregmin > nmp->nm_acregmax)
723 		nmp->nm_acregmin = nmp->nm_acregmax;
724 
725 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
726 		if (argp->readahead <= NFS_MAXRAHEAD)
727 			nmp->nm_readahead = argp->readahead;
728 		else
729 			nmp->nm_readahead = NFS_MAXRAHEAD;
730 	}
731 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
732 		if (argp->wcommitsize < nmp->nm_wsize)
733 			nmp->nm_wcommitsize = nmp->nm_wsize;
734 		else
735 			nmp->nm_wcommitsize = argp->wcommitsize;
736 	}
737 
738 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
739 		    (nmp->nm_soproto != argp->proto));
740 
741 	if (nmp->nm_client != NULL && adjsock) {
742 		int haslock = 0, error = 0;
743 
744 		if (nmp->nm_sotype == SOCK_STREAM) {
745 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
746 			if (!error)
747 				haslock = 1;
748 		}
749 		if (!error) {
750 		    newnfs_disconnect(nmp, &nmp->nm_sockreq);
751 		    if (haslock)
752 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
753 		    nmp->nm_sotype = argp->sotype;
754 		    nmp->nm_soproto = argp->proto;
755 		    if (nmp->nm_sotype == SOCK_DGRAM)
756 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
757 			    cred, td, 0, false, &nmp->nm_sockreq.nr_client)) {
758 				printf("newnfs_args: retrying connect\n");
759 				(void) nfs_catnap(PSOCK, 0, "nfscon");
760 			}
761 		}
762 	} else {
763 		nmp->nm_sotype = argp->sotype;
764 		nmp->nm_soproto = argp->proto;
765 	}
766 
767 	if (hostname != NULL) {
768 		strlcpy(nmp->nm_hostname, hostname,
769 		    sizeof(nmp->nm_hostname));
770 		p = strchr(nmp->nm_hostname, ':');
771 		if (p != NULL)
772 			*p = '\0';
773 	}
774 }
775 
776 static const char *nfs_opts[] = { "from", "nfs_args",
777     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
778     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
779     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
780     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
781     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
782     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
783     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
784     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
785     "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", "nconnect",
786     "syskrb5", NULL };
787 
788 /*
789  * Parse the "from" mountarg, passed by the generic mount(8) program
790  * or the mountroot code.  This is used when rerooting into NFS.
791  *
792  * Note that the "hostname" is actually a "hostname:/share/path" string.
793  */
794 static int
nfs_mount_parse_from(struct vfsoptlist * opts,char ** hostnamep,struct sockaddr_in ** sinp,char * dirpath,size_t dirpathsize,int * dirlenp)795 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
796     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
797 {
798 	char *nam, *delimp, *hostp, *spec;
799 	int error, have_bracket = 0, offset, rv, speclen;
800 	struct sockaddr_in *sin;
801 	size_t len;
802 
803 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
804 	if (error != 0)
805 		return (error);
806 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
807 
808 	/*
809 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
810 	 */
811 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
812 	    *(delimp + 1) == ':') {
813 		hostp = spec + 1;
814 		spec = delimp + 2;
815 		have_bracket = 1;
816 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
817 		hostp = spec;
818 		spec = delimp + 1;
819 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
820 		printf("%s: path@server syntax is deprecated, "
821 		    "use server:path\n", __func__);
822 		hostp = delimp + 1;
823 	} else {
824 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
825 		free(nam, M_TEMP);
826 		return (EINVAL);
827 	}
828 	*delimp = '\0';
829 
830 	/*
831 	 * If there has been a trailing slash at mounttime it seems
832 	 * that some mountd implementations fail to remove the mount
833 	 * entries from their mountlist while unmounting.
834 	 */
835 	for (speclen = strlen(spec);
836 	    speclen > 1 && spec[speclen - 1] == '/';
837 	    speclen--)
838 		spec[speclen - 1] = '\0';
839 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
840 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
841 		free(nam, M_TEMP);
842 		return (EINVAL);
843 	}
844 	/* Make both '@' and ':' notations equal */
845 	if (*hostp != '\0') {
846 		len = strlen(hostp);
847 		offset = 0;
848 		if (have_bracket)
849 			nam[offset++] = '[';
850 		memmove(nam + offset, hostp, len);
851 		if (have_bracket)
852 			nam[len + offset++] = ']';
853 		nam[len + offset++] = ':';
854 		memmove(nam + len + offset, spec, speclen);
855 		nam[len + speclen + offset] = '\0';
856 	} else
857 		nam[0] = '\0';
858 
859 	/*
860 	 * XXX: IPv6
861 	 */
862 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
863 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
864 	if (rv != 1) {
865 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
866 		    __func__, hostp, rv);
867 		free(nam, M_TEMP);
868 		free(sin, M_SONAME);
869 		return (EINVAL);
870 	}
871 
872 	sin->sin_len = sizeof(*sin);
873 	sin->sin_family = AF_INET;
874 	/*
875 	 * XXX: hardcoded port number.
876 	 */
877 	sin->sin_port = htons(2049);
878 
879 	*hostnamep = strdup(nam, M_NEWNFSMNT);
880 	*sinp = sin;
881 	strlcpy(dirpath, spec, dirpathsize);
882 	*dirlenp = strlen(dirpath);
883 
884 	free(nam, M_TEMP);
885 	return (0);
886 }
887 
888 /*
889  * VFS Operations.
890  *
891  * mount system call
892  * It seems a bit dumb to copyinstr() the host and path here and then
893  * bcopy() them in mountnfs(), but I wanted to detect errors before
894  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
895  * an error after that means that I have to release the mbuf.
896  */
897 /* ARGSUSED */
898 static int
nfs_mount(struct mount * mp)899 nfs_mount(struct mount *mp)
900 {
901 	struct nfs_args args = {
902 	    .version = NFS_ARGSVERSION,
903 	    .addr = NULL,
904 	    .addrlen = sizeof (struct sockaddr_in),
905 	    .sotype = SOCK_STREAM,
906 	    .proto = 0,
907 	    .fh = NULL,
908 	    .fhsize = 0,
909 	    .flags = NFSMNT_RESVPORT,
910 	    .wsize = NFS_WSIZE,
911 	    .rsize = NFS_RSIZE,
912 	    .readdirsize = NFS_READDIRSIZE,
913 	    .timeo = 10,
914 	    .retrans = NFS_RETRANS,
915 	    .readahead = NFS_DEFRAHEAD,
916 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
917 	    .hostname = NULL,
918 	    .acregmin = NFS_MINATTRTIMO,
919 	    .acregmax = NFS_MAXATTRTIMO,
920 	    .acdirmin = NFS_MINDIRATTRTIMO,
921 	    .acdirmax = NFS_MAXDIRATTRTIMO,
922 	};
923 	int error = 0, ret, len;
924 	struct sockaddr *nam = NULL;
925 	struct vnode *vp;
926 	struct thread *td;
927 	char *hst;
928 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
929 	char *cp, *opt, *name, *secname, *tlscertname;
930 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
931 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
932 	int minvers = -1;
933 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
934 	    krbnamelen, srvkrbnamelen;
935 	size_t hstlen;
936 	uint32_t newflag;
937 	int aconn = 0;
938 
939 	has_nfs_args_opt = 0;
940 	has_nfs_from_opt = 0;
941 	newflag = 0;
942 	tlscertname = NULL;
943 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
944 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
945 		error = EINVAL;
946 		goto out;
947 	}
948 
949 	td = curthread;
950 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
951 	    nfs_diskless_valid != 0) {
952 		error = nfs_mountroot(mp);
953 		goto out;
954 	}
955 
956 	nfscl_init();
957 
958 	/*
959 	 * The old mount_nfs program passed the struct nfs_args
960 	 * from userspace to kernel.  The new mount_nfs program
961 	 * passes string options via nmount() from userspace to kernel
962 	 * and we populate the struct nfs_args in the kernel.
963 	 */
964 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
965 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
966 		    sizeof(args));
967 		if (error != 0)
968 			goto out;
969 
970 		if (args.version != NFS_ARGSVERSION) {
971 			error = EPROGMISMATCH;
972 			goto out;
973 		}
974 		has_nfs_args_opt = 1;
975 	}
976 
977 	/* Handle the new style options. */
978 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
979 		args.acdirmin = args.acdirmax =
980 		    args.acregmin = args.acregmax = 0;
981 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
982 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
983 	}
984 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
985 		args.flags |= NFSMNT_NOCONN;
986 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
987 		args.flags &= ~NFSMNT_NOCONN;
988 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
989 		args.flags |= NFSMNT_NOLOCKD;
990 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
991 		args.flags &= ~NFSMNT_NOLOCKD;
992 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
993 		args.flags |= NFSMNT_INT;
994 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
995 		args.flags |= NFSMNT_RDIRPLUS;
996 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
997 		args.flags |= NFSMNT_RESVPORT;
998 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
999 		args.flags &= ~NFSMNT_RESVPORT;
1000 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
1001 		args.flags |= NFSMNT_SOFT;
1002 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
1003 		args.flags &= ~NFSMNT_SOFT;
1004 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
1005 		args.sotype = SOCK_DGRAM;
1006 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
1007 		args.sotype = SOCK_DGRAM;
1008 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
1009 		args.sotype = SOCK_STREAM;
1010 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
1011 		args.flags |= NFSMNT_NFSV3;
1012 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
1013 		args.flags |= NFSMNT_NFSV4;
1014 		args.sotype = SOCK_STREAM;
1015 	}
1016 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
1017 		args.flags |= NFSMNT_ALLGSSNAME;
1018 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
1019 		args.flags |= NFSMNT_NOCTO;
1020 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
1021 		args.flags |= NFSMNT_NONCONTIGWR;
1022 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
1023 		args.flags |= NFSMNT_PNFS;
1024 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
1025 		args.flags |= NFSMNT_ONEOPENOWN;
1026 	if (vfs_getopt(mp->mnt_optnew, "tls", NULL, NULL) == 0)
1027 		newflag |= NFSMNT_TLS;
1028 	if (vfs_getopt(mp->mnt_optnew, "tlscertname", (void **)&opt, &len) ==
1029 	    0) {
1030 		/*
1031 		 * tlscertname with "key.pem" appended to it forms a file
1032 		 * name.  As such, the maximum allowable strlen(tlscertname) is
1033 		 * NAME_MAX - 7. However, "len" includes the nul termination
1034 		 * byte so it can be up to NAME_MAX - 6.
1035 		 */
1036 		if (opt == NULL || len <= 1 || len > NAME_MAX - 6) {
1037 			vfs_mount_error(mp, "invalid tlscertname");
1038 			error = EINVAL;
1039 			goto out;
1040 		}
1041 		tlscertname = malloc(len, M_NEWNFSMNT, M_WAITOK);
1042 		strlcpy(tlscertname, opt, len);
1043 	}
1044 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
1045 		if (opt == NULL) {
1046 			vfs_mount_error(mp, "illegal readdirsize");
1047 			error = EINVAL;
1048 			goto out;
1049 		}
1050 		ret = sscanf(opt, "%d", &args.readdirsize);
1051 		if (ret != 1 || args.readdirsize <= 0) {
1052 			vfs_mount_error(mp, "illegal readdirsize: %s",
1053 			    opt);
1054 			error = EINVAL;
1055 			goto out;
1056 		}
1057 		args.flags |= NFSMNT_READDIRSIZE;
1058 	}
1059 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
1060 		if (opt == NULL) {
1061 			vfs_mount_error(mp, "illegal readahead");
1062 			error = EINVAL;
1063 			goto out;
1064 		}
1065 		ret = sscanf(opt, "%d", &args.readahead);
1066 		if (ret != 1 || args.readahead <= 0) {
1067 			vfs_mount_error(mp, "illegal readahead: %s",
1068 			    opt);
1069 			error = EINVAL;
1070 			goto out;
1071 		}
1072 		args.flags |= NFSMNT_READAHEAD;
1073 	}
1074 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1075 		if (opt == NULL) {
1076 			vfs_mount_error(mp, "illegal wsize");
1077 			error = EINVAL;
1078 			goto out;
1079 		}
1080 		ret = sscanf(opt, "%d", &args.wsize);
1081 		if (ret != 1 || args.wsize <= 0) {
1082 			vfs_mount_error(mp, "illegal wsize: %s",
1083 			    opt);
1084 			error = EINVAL;
1085 			goto out;
1086 		}
1087 		args.flags |= NFSMNT_WSIZE;
1088 	}
1089 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1090 		if (opt == NULL) {
1091 			vfs_mount_error(mp, "illegal rsize");
1092 			error = EINVAL;
1093 			goto out;
1094 		}
1095 		ret = sscanf(opt, "%d", &args.rsize);
1096 		if (ret != 1 || args.rsize <= 0) {
1097 			vfs_mount_error(mp, "illegal wsize: %s",
1098 			    opt);
1099 			error = EINVAL;
1100 			goto out;
1101 		}
1102 		args.flags |= NFSMNT_RSIZE;
1103 	}
1104 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1105 		if (opt == NULL) {
1106 			vfs_mount_error(mp, "illegal retrans");
1107 			error = EINVAL;
1108 			goto out;
1109 		}
1110 		ret = sscanf(opt, "%d", &args.retrans);
1111 		if (ret != 1 || args.retrans <= 0) {
1112 			vfs_mount_error(mp, "illegal retrans: %s",
1113 			    opt);
1114 			error = EINVAL;
1115 			goto out;
1116 		}
1117 		args.flags |= NFSMNT_RETRANS;
1118 	}
1119 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1120 		ret = sscanf(opt, "%d", &args.acregmin);
1121 		if (ret != 1 || args.acregmin < 0) {
1122 			vfs_mount_error(mp, "illegal actimeo: %s",
1123 			    opt);
1124 			error = EINVAL;
1125 			goto out;
1126 		}
1127 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1128 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1129 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1130 	}
1131 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1132 		ret = sscanf(opt, "%d", &args.acregmin);
1133 		if (ret != 1 || args.acregmin < 0) {
1134 			vfs_mount_error(mp, "illegal acregmin: %s",
1135 			    opt);
1136 			error = EINVAL;
1137 			goto out;
1138 		}
1139 		args.flags |= NFSMNT_ACREGMIN;
1140 	}
1141 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1142 		ret = sscanf(opt, "%d", &args.acregmax);
1143 		if (ret != 1 || args.acregmax < 0) {
1144 			vfs_mount_error(mp, "illegal acregmax: %s",
1145 			    opt);
1146 			error = EINVAL;
1147 			goto out;
1148 		}
1149 		args.flags |= NFSMNT_ACREGMAX;
1150 	}
1151 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1152 		ret = sscanf(opt, "%d", &args.acdirmin);
1153 		if (ret != 1 || args.acdirmin < 0) {
1154 			vfs_mount_error(mp, "illegal acdirmin: %s",
1155 			    opt);
1156 			error = EINVAL;
1157 			goto out;
1158 		}
1159 		args.flags |= NFSMNT_ACDIRMIN;
1160 	}
1161 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1162 		ret = sscanf(opt, "%d", &args.acdirmax);
1163 		if (ret != 1 || args.acdirmax < 0) {
1164 			vfs_mount_error(mp, "illegal acdirmax: %s",
1165 			    opt);
1166 			error = EINVAL;
1167 			goto out;
1168 		}
1169 		args.flags |= NFSMNT_ACDIRMAX;
1170 	}
1171 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1172 		ret = sscanf(opt, "%d", &args.wcommitsize);
1173 		if (ret != 1 || args.wcommitsize < 0) {
1174 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1175 			error = EINVAL;
1176 			goto out;
1177 		}
1178 		args.flags |= NFSMNT_WCOMMITSIZE;
1179 	}
1180 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1181 		ret = sscanf(opt, "%d", &args.timeo);
1182 		if (ret != 1 || args.timeo <= 0) {
1183 			vfs_mount_error(mp, "illegal timeo: %s",
1184 			    opt);
1185 			error = EINVAL;
1186 			goto out;
1187 		}
1188 		args.flags |= NFSMNT_TIMEO;
1189 	}
1190 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1191 		ret = sscanf(opt, "%d", &args.timeo);
1192 		if (ret != 1 || args.timeo <= 0) {
1193 			vfs_mount_error(mp, "illegal timeout: %s",
1194 			    opt);
1195 			error = EINVAL;
1196 			goto out;
1197 		}
1198 		args.flags |= NFSMNT_TIMEO;
1199 	}
1200 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1201 		ret = sscanf(opt, "%d", &nametimeo);
1202 		if (ret != 1 || nametimeo < 0) {
1203 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1204 			error = EINVAL;
1205 			goto out;
1206 		}
1207 	}
1208 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1209 	    == 0) {
1210 		ret = sscanf(opt, "%d", &negnametimeo);
1211 		if (ret != 1 || negnametimeo < 0) {
1212 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1213 			    opt);
1214 			error = EINVAL;
1215 			goto out;
1216 		}
1217 	}
1218 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1219 	    0) {
1220 		ret = sscanf(opt, "%d", &minvers);
1221 		if (ret != 1 || minvers < 0 || minvers > 2 ||
1222 		    (args.flags & NFSMNT_NFSV4) == 0) {
1223 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1224 			error = EINVAL;
1225 			goto out;
1226 		}
1227 	}
1228 	if (vfs_getopt(mp->mnt_optnew, "nconnect", (void **)&opt, NULL) ==
1229 	    0) {
1230 		ret = sscanf(opt, "%d", &aconn);
1231 		if (ret != 1 || aconn < 1 || aconn > NFS_MAXNCONN) {
1232 			vfs_mount_error(mp, "illegal nconnect: %s", opt);
1233 			error = EINVAL;
1234 			goto out;
1235 		}
1236 		/*
1237 		 * Setting nconnect=1 is a no-op, allowed so that
1238 		 * the option can be used in a Linux compatible way.
1239 		 */
1240 		aconn--;
1241 	}
1242 	if (vfs_getopt(mp->mnt_optnew, "syskrb5", NULL, NULL) == 0)
1243 		newflag |= NFSMNT_SYSKRB5;
1244 	if (vfs_getopt(mp->mnt_optnew, "sec",
1245 		(void **) &secname, NULL) == 0)
1246 		nfs_sec_name(secname, &args.flags);
1247 
1248 	if (mp->mnt_flag & MNT_UPDATE) {
1249 		struct nfsmount *nmp = VFSTONFS(mp);
1250 
1251 		if (nmp == NULL) {
1252 			error = EIO;
1253 			goto out;
1254 		}
1255 
1256 		/*
1257 		 * If a change from TCP->UDP is done and there are thread(s)
1258 		 * that have I/O RPC(s) in progress with a transfer size
1259 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1260 		 * hung, retrying the RPC(s) forever. Usually these threads
1261 		 * will be seen doing an uninterruptible sleep on wait channel
1262 		 * "nfsreq".
1263 		 */
1264 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1265 			tprintf(td->td_proc, LOG_WARNING,
1266 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1267 
1268 		/*
1269 		 * When doing an update, we can't change version,
1270 		 * security, switch lockd strategies, change cookie
1271 		 * translation or switch oneopenown.
1272 		 */
1273 		args.flags = (args.flags &
1274 		    ~(NFSMNT_NFSV3 |
1275 		      NFSMNT_NFSV4 |
1276 		      NFSMNT_KERB |
1277 		      NFSMNT_INTEGRITY |
1278 		      NFSMNT_PRIVACY |
1279 		      NFSMNT_ONEOPENOWN |
1280 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1281 		    (nmp->nm_flag &
1282 			(NFSMNT_NFSV3 |
1283 			 NFSMNT_NFSV4 |
1284 			 NFSMNT_KERB |
1285 			 NFSMNT_INTEGRITY |
1286 			 NFSMNT_PRIVACY |
1287 			 NFSMNT_ONEOPENOWN |
1288 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1289 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1290 		goto out;
1291 	}
1292 
1293 	/*
1294 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1295 	 * or no-connection mode for those protocols that support
1296 	 * no-connection mode (the flag will be cleared later for protocols
1297 	 * that do not support no-connection mode).  This will allow a client
1298 	 * to receive replies from a different IP then the request was
1299 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1300 	 * not 0.
1301 	 */
1302 	if (nfs_ip_paranoia == 0)
1303 		args.flags |= NFSMNT_NOCONN;
1304 
1305 	if (has_nfs_args_opt != 0) {
1306 		/*
1307 		 * In the 'nfs_args' case, the pointers in the args
1308 		 * structure are in userland - we copy them in here.
1309 		 */
1310 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1311 			vfs_mount_error(mp, "Bad file handle");
1312 			error = EINVAL;
1313 			goto out;
1314 		}
1315 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1316 		    args.fhsize);
1317 		if (error != 0)
1318 			goto out;
1319 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1320 		if (error != 0)
1321 			goto out;
1322 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1323 		args.hostname = hst;
1324 		/* getsockaddr() call must be after above copyin() calls */
1325 		error = getsockaddr(&nam, args.addr, args.addrlen);
1326 		if (error != 0)
1327 			goto out;
1328 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1329 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1330 	    sizeof(dirpath), &dirlen) == 0) {
1331 		has_nfs_from_opt = 1;
1332 		bcopy(args.hostname, hst, MNAMELEN);
1333 		hst[MNAMELEN - 1] = '\0';
1334 
1335 		/*
1336 		 * This only works with NFSv4 for now.
1337 		 */
1338 		args.fhsize = 0;
1339 		args.flags |= NFSMNT_NFSV4;
1340 		args.sotype = SOCK_STREAM;
1341 	} else {
1342 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1343 		    &args.fhsize) == 0) {
1344 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1345 				vfs_mount_error(mp, "Bad file handle");
1346 				error = EINVAL;
1347 				goto out;
1348 			}
1349 			bcopy(args.fh, nfh, args.fhsize);
1350 		} else {
1351 			args.fhsize = 0;
1352 		}
1353 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1354 		    (void **)&args.hostname, &len);
1355 		if (args.hostname == NULL) {
1356 			vfs_mount_error(mp, "Invalid hostname");
1357 			error = EINVAL;
1358 			goto out;
1359 		}
1360 		if (len >= MNAMELEN) {
1361 			vfs_mount_error(mp, "Hostname too long");
1362 			error = EINVAL;
1363 			goto out;
1364 		}
1365 		bcopy(args.hostname, hst, len);
1366 		hst[len] = '\0';
1367 	}
1368 
1369 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1370 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1371 	else {
1372 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1373 		cp = strchr(srvkrbname, ':');
1374 		if (cp != NULL)
1375 			*cp = '\0';
1376 	}
1377 	srvkrbnamelen = strlen(srvkrbname);
1378 
1379 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1380 		strlcpy(krbname, name, sizeof (krbname));
1381 	else
1382 		krbname[0] = '\0';
1383 	krbnamelen = strlen(krbname);
1384 
1385 	if (has_nfs_from_opt == 0) {
1386 		if (vfs_getopt(mp->mnt_optnew,
1387 		    "dirpath", (void **)&name, NULL) == 0)
1388 			strlcpy(dirpath, name, sizeof (dirpath));
1389 		else
1390 			dirpath[0] = '\0';
1391 		dirlen = strlen(dirpath);
1392 	}
1393 
1394 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1395 		if (vfs_getopt(mp->mnt_optnew, "addr",
1396 		    (void **)&args.addr, &args.addrlen) == 0) {
1397 			if (args.addrlen > SOCK_MAXADDRLEN) {
1398 				error = ENAMETOOLONG;
1399 				goto out;
1400 			}
1401 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1402 			bcopy(args.addr, nam, args.addrlen);
1403 			nam->sa_len = args.addrlen;
1404 		} else {
1405 			vfs_mount_error(mp, "No server address");
1406 			error = EINVAL;
1407 			goto out;
1408 		}
1409 	}
1410 
1411 	if (aconn > 0 && (args.sotype != SOCK_STREAM ||
1412 	    (args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1413 		/*
1414 		 * RFC 5661 requires that an NFSv4.1/4.2 server
1415 		 * send an RPC reply on the same TCP connection
1416 		 * as the one it received the request on.
1417 		 * This property in required for "nconnect" and
1418 		 * might not be the case for NFSv3 or NFSv4.0 servers.
1419 		 */
1420 		vfs_mount_error(mp, "nconnect should only be used "
1421 		    "for NFSv4.1/4.2 mounts");
1422 		error = EINVAL;
1423 		goto out;
1424 	}
1425 
1426 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1427 	    ((args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1428 		/*
1429 		 * This option requires the use of SP4_NONE, which
1430 		 * is only in NFSv4.1/4.2.
1431 		 */
1432 		vfs_mount_error(mp, "syskrb5 should only be used "
1433 		    "for NFSv4.1/4.2 mounts");
1434 		error = EINVAL;
1435 		goto out;
1436 	}
1437 
1438 	if ((newflag & NFSMNT_SYSKRB5) != 0 &&
1439 	    (args.flags & NFSMNT_KERB) == 0) {
1440 		/*
1441 		 * This option modifies the behaviour of sec=krb5[ip].
1442 		 */
1443 		vfs_mount_error(mp, "syskrb5 should only be used "
1444 		    "for sec=krb5[ip] mounts");
1445 		error = EINVAL;
1446 		goto out;
1447 	}
1448 
1449 	if ((newflag & NFSMNT_SYSKRB5) != 0 && krbname[0] != '\0') {
1450 		/*
1451 		 * This option is used as an alternative to "gssname".
1452 		 */
1453 		vfs_mount_error(mp, "syskrb5 should not be used "
1454 		    "with the gssname option");
1455 		error = EINVAL;
1456 		goto out;
1457 	}
1458 
1459 	args.fh = nfh;
1460 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1461 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1462 	    nametimeo, negnametimeo, minvers, newflag, tlscertname, aconn);
1463 out:
1464 	if (!error) {
1465 		MNT_ILOCK(mp);
1466 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1467 		    MNTK_USES_BCACHE;
1468 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1469 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1470 		MNT_IUNLOCK(mp);
1471 	}
1472 	free(hst, M_TEMP);
1473 	return (error);
1474 }
1475 
1476 /*
1477  * VFS Operations.
1478  *
1479  * mount system call
1480  * It seems a bit dumb to copyinstr() the host and path here and then
1481  * bcopy() them in mountnfs(), but I wanted to detect errors before
1482  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1483  * an error after that means that I have to release the mbuf.
1484  */
1485 /* ARGSUSED */
1486 static int
nfs_cmount(struct mntarg * ma,void * data,uint64_t flags)1487 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1488 {
1489 	int error;
1490 	struct nfs_args args;
1491 
1492 	error = copyin(data, &args, sizeof (struct nfs_args));
1493 	if (error)
1494 		return error;
1495 
1496 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1497 
1498 	error = kernel_mount(ma, flags);
1499 	return (error);
1500 }
1501 
1502 /*
1503  * Common code for mount and mountroot
1504  */
1505 static int
mountnfs(struct nfs_args * argp,struct mount * mp,struct sockaddr * nam,char * hst,u_char * krbname,int krbnamelen,u_char * dirpath,int dirlen,u_char * srvkrbname,int srvkrbnamelen,struct vnode ** vpp,struct ucred * cred,struct thread * td,int nametimeo,int negnametimeo,int minvers,uint32_t newflag,char * tlscertname,int aconn)1506 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1507     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1508     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1509     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1510     int minvers, uint32_t newflag, char *tlscertname, int aconn)
1511 {
1512 	struct nfsmount *nmp;
1513 	struct nfsnode *np;
1514 	int error, trycnt, ret;
1515 	struct nfsvattr nfsva;
1516 	struct nfsclclient *clp;
1517 	struct nfsclds *dsp, *tdsp;
1518 	uint32_t lease;
1519 	bool tryminvers;
1520 	char *fakefh;
1521 	static u_int64_t clval = 0;
1522 #ifdef KERN_TLS
1523 	u_int maxlen;
1524 #endif
1525 
1526 	NFSCL_DEBUG(3, "in mnt\n");
1527 	clp = NULL;
1528 	if (mp->mnt_flag & MNT_UPDATE) {
1529 		nmp = VFSTONFS(mp);
1530 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1531 		free(nam, M_SONAME);
1532 		free(tlscertname, M_NEWNFSMNT);
1533 		return (0);
1534 	} else {
1535 		/* NFS-over-TLS requires that rpctls be functioning. */
1536 		if ((newflag & NFSMNT_TLS) != 0) {
1537 			error = EINVAL;
1538 #ifdef KERN_TLS
1539 			/* KERN_TLS is only supported for TCP. */
1540 			if (argp->sotype == SOCK_STREAM &&
1541 			    rpctls_getinfo(&maxlen, true, false))
1542 				error = 0;
1543 #endif
1544 			if (error != 0) {
1545 				free(nam, M_SONAME);
1546 				free(tlscertname, M_NEWNFSMNT);
1547 				return (error);
1548 			}
1549 		}
1550 		nmp = malloc(sizeof (struct nfsmount) +
1551 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1552 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1553 		nmp->nm_tlscertname = tlscertname;
1554 		nmp->nm_newflag = newflag;
1555 		TAILQ_INIT(&nmp->nm_bufq);
1556 		TAILQ_INIT(&nmp->nm_sess);
1557 		if (clval == 0)
1558 			clval = (u_int64_t)nfsboottime.tv_sec;
1559 		nmp->nm_clval = clval++;
1560 		nmp->nm_krbnamelen = krbnamelen;
1561 		nmp->nm_dirpathlen = dirlen;
1562 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1563 		if (td->td_ucred->cr_uid != (uid_t)0) {
1564 			/*
1565 			 * nm_uid is used to get KerberosV credentials for
1566 			 * the nfsv4 state handling operations if there is
1567 			 * no host based principal set. Use the uid of
1568 			 * this user if not root, since they are doing the
1569 			 * mount. I don't think setting this for root will
1570 			 * work, since root normally does not have user
1571 			 * credentials in a credentials cache.
1572 			 */
1573 			nmp->nm_uid = td->td_ucred->cr_uid;
1574 		} else {
1575 			/*
1576 			 * Just set to -1, so it won't be used.
1577 			 */
1578 			nmp->nm_uid = (uid_t)-1;
1579 		}
1580 
1581 		/* Copy and null terminate all the names */
1582 		if (nmp->nm_krbnamelen > 0) {
1583 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1584 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1585 		}
1586 		if (nmp->nm_dirpathlen > 0) {
1587 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1588 			    nmp->nm_dirpathlen);
1589 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1590 			    + 1] = '\0';
1591 		}
1592 		if (nmp->nm_srvkrbnamelen > 0) {
1593 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1594 			    nmp->nm_srvkrbnamelen);
1595 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1596 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1597 		}
1598 		nmp->nm_sockreq.nr_cred = crhold(cred);
1599 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1600 		mp->mnt_data = nmp;
1601 		nmp->nm_getinfo = nfs_getnlminfo;
1602 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1603 	}
1604 	vfs_getnewfsid(mp);
1605 	nmp->nm_mountp = mp;
1606 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1607 
1608 	/*
1609 	 * Since nfs_decode_args() might optionally set them, these
1610 	 * need to be set to defaults before the call, so that the
1611 	 * optional settings aren't overwritten.
1612 	 */
1613 	nmp->nm_nametimeo = nametimeo;
1614 	nmp->nm_negnametimeo = negnametimeo;
1615 	nmp->nm_timeo = NFS_TIMEO;
1616 	nmp->nm_retry = NFS_RETRANS;
1617 	nmp->nm_readahead = NFS_DEFRAHEAD;
1618 
1619 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1620 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1621 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1622 		nmp->nm_wcommitsize *= 2;
1623 	nmp->nm_wcommitsize *= 256;
1624 
1625 	tryminvers = false;
1626 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1627 		if (minvers < 0) {
1628 			tryminvers = true;
1629 			minvers = NFSV42_MINORVERSION;
1630 		}
1631 		nmp->nm_minorvers = minvers;
1632 	} else
1633 		nmp->nm_minorvers = 0;
1634 
1635 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1636 
1637 	/*
1638 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1639 	 * high, depending on whether we end up with negative offsets in
1640 	 * the client or server somewhere.  2GB-1 may be safer.
1641 	 *
1642 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1643 	 * that we can handle until we find out otherwise.
1644 	 */
1645 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1646 		nmp->nm_maxfilesize = 0xffffffffLL;
1647 	else
1648 		nmp->nm_maxfilesize = OFF_MAX;
1649 
1650 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1651 		nmp->nm_wsize = NFS_WSIZE;
1652 		nmp->nm_rsize = NFS_RSIZE;
1653 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1654 	}
1655 	nmp->nm_numgrps = NFS_MAXGRPS;
1656 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1657 	if (nmp->nm_tprintf_delay < 0)
1658 		nmp->nm_tprintf_delay = 0;
1659 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1660 	if (nmp->nm_tprintf_initial_delay < 0)
1661 		nmp->nm_tprintf_initial_delay = 0;
1662 	nmp->nm_fhsize = argp->fhsize;
1663 	if (nmp->nm_fhsize > 0)
1664 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1665 	strlcpy(mp->mnt_stat.f_mntfromname, hst, MNAMELEN);
1666 	nmp->nm_nam = nam;
1667 	/* Set up the sockets and per-host congestion */
1668 	nmp->nm_sotype = argp->sotype;
1669 	nmp->nm_soproto = argp->proto;
1670 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1671 	if ((argp->flags & NFSMNT_NFSV4))
1672 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1673 	else if ((argp->flags & NFSMNT_NFSV3))
1674 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1675 	else
1676 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1677 
1678 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false,
1679 	    &nmp->nm_sockreq.nr_client)))
1680 		goto bad;
1681 	/* For NFSv4, get the clientid now. */
1682 	if ((argp->flags & NFSMNT_NFSV4) != 0) {
1683 		NFSCL_DEBUG(3, "at getcl\n");
1684 		error = nfscl_getcl(mp, cred, td, tryminvers, true, &clp);
1685 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1686 		if (error != 0)
1687 			goto bad;
1688 		if (aconn > 0 && nmp->nm_minorvers == 0) {
1689 			vfs_mount_error(mp, "nconnect should only be used "
1690 			    "for NFSv4.1/4.2 mounts");
1691 			error = EINVAL;
1692 			goto bad;
1693 		}
1694 		if (NFSHASSYSKRB5(nmp) && nmp->nm_minorvers == 0) {
1695 			vfs_mount_error(mp, "syskrb5 should only be used "
1696 			    "for NFSv4.1/4.2 mounts");
1697 			error = EINVAL;
1698 			goto bad;
1699 		}
1700 	}
1701 
1702 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1703 	    nmp->nm_dirpathlen > 0) {
1704 		NFSCL_DEBUG(3, "in dirp\n");
1705 		/*
1706 		 * If the fhsize on the mount point == 0 for V4, the mount
1707 		 * path needs to be looked up.
1708 		 */
1709 		trycnt = 3;
1710 		do {
1711 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1712 			    cred, td);
1713 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1714 			if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1715 			    error != NFSERR_WRONGSEC))
1716 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1717 		} while (error != 0 && --trycnt > 0 &&
1718 		    (!NFSHASSYSKRB5(nmp) || error != NFSERR_WRONGSEC));
1719 		if (error != 0 && (!NFSHASSYSKRB5(nmp) ||
1720 		    error != NFSERR_WRONGSEC))
1721 			goto bad;
1722 	}
1723 
1724 	/*
1725 	 * A reference count is needed on the nfsnode representing the
1726 	 * remote root.  If this object is not persistent, then backward
1727 	 * traversals of the mount point (i.e. "..") will not work if
1728 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1729 	 * this problem, because one can identify root inodes by their
1730 	 * number == UFS_ROOTINO (2).
1731 	 * For the "syskrb5" mount, the file handle might not have
1732 	 * been acquired.  As such, use a "fake" file handle which
1733 	 * can never be returned by a server for the root vnode.
1734 	 */
1735 	if (nmp->nm_fhsize > 0 || NFSHASSYSKRB5(nmp)) {
1736 		/*
1737 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1738 		 * non-zero for the root vnode. f_iosize will be set correctly
1739 		 * by nfs_statfs() before any I/O occurs.
1740 		 */
1741 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1742 		if (nmp->nm_fhsize == 0) {
1743 			fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK |
1744 			    M_ZERO);
1745 			error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np,
1746 			    LK_EXCLUSIVE);
1747 			free(fakefh, M_TEMP);
1748 			nmp->nm_privflag |= NFSMNTP_FAKEROOTFH;
1749 		} else
1750 			error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1751 			    LK_EXCLUSIVE);
1752 		if (error)
1753 			goto bad;
1754 		*vpp = NFSTOV(np);
1755 
1756 		/*
1757 		 * Get file attributes and transfer parameters for the
1758 		 * mountpoint.  This has the side effect of filling in
1759 		 * (*vpp)->v_type with the correct value.
1760 		 */
1761 		ret = ENXIO;
1762 		if (nmp->nm_fhsize > 0)
1763 			ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh,
1764 			    nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease);
1765 		if (ret) {
1766 			/*
1767 			 * Just set default values to get things going.
1768 			 */
1769 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1770 			nfsva.na_vattr.va_type = VDIR;
1771 			nfsva.na_vattr.va_mode = 0777;
1772 			nfsva.na_vattr.va_nlink = 100;
1773 			nfsva.na_vattr.va_uid = (uid_t)0;
1774 			nfsva.na_vattr.va_gid = (gid_t)0;
1775 			nfsva.na_vattr.va_fileid = 2;
1776 			nfsva.na_vattr.va_gen = 1;
1777 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1778 			nfsva.na_vattr.va_size = 512 * 1024;
1779 			lease = 20;
1780 		}
1781 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, 0, 1);
1782 		if ((argp->flags & NFSMNT_NFSV4) != 0) {
1783 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1784 			NFSLOCKCLSTATE();
1785 			clp->nfsc_renew = NFSCL_RENEW(lease);
1786 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1787 			clp->nfsc_clientidrev++;
1788 			if (clp->nfsc_clientidrev == 0)
1789 				clp->nfsc_clientidrev++;
1790 			NFSUNLOCKCLSTATE();
1791 			/*
1792 			 * Mount will succeed, so the renew thread can be
1793 			 * started now.
1794 			 */
1795 			nfscl_start_renewthread(clp);
1796 			nfscl_clientrelease(clp);
1797 		}
1798 		if (argp->flags & NFSMNT_NFSV3)
1799 			ncl_fsinfo(nmp, *vpp, cred, td);
1800 
1801 		/* Mark if the mount point supports NFSv4 ACLs. */
1802 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1803 		    ret == 0 &&
1804 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1805 			MNT_ILOCK(mp);
1806 			mp->mnt_flag |= MNT_NFS4ACLS;
1807 			MNT_IUNLOCK(mp);
1808 		}
1809 
1810 		/* Can now allow additional connections. */
1811 		if (aconn > 0)
1812 			nmp->nm_aconnect = aconn;
1813 
1814 		/*
1815 		 * Lose the lock but keep the ref.
1816 		 */
1817 		NFSVOPUNLOCK(*vpp);
1818 		vfs_cache_root_set(mp, *vpp);
1819 		return (0);
1820 	}
1821 	error = EIO;
1822 
1823 bad:
1824 	if (clp != NULL)
1825 		nfscl_clientrelease(clp);
1826 	newnfs_disconnect(NULL, &nmp->nm_sockreq);
1827 	crfree(nmp->nm_sockreq.nr_cred);
1828 	if (nmp->nm_sockreq.nr_auth != NULL)
1829 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1830 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1831 	mtx_destroy(&nmp->nm_mtx);
1832 	if (nmp->nm_clp != NULL) {
1833 		NFSLOCKCLSTATE();
1834 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1835 		NFSUNLOCKCLSTATE();
1836 		free(nmp->nm_clp, M_NFSCLCLIENT);
1837 	}
1838 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1839 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1840 		    dsp->nfsclds_sockp != NULL)
1841 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1842 		nfscl_freenfsclds(dsp);
1843 	}
1844 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1845 	free(nmp, M_NEWNFSMNT);
1846 	free(nam, M_SONAME);
1847 	return (error);
1848 }
1849 
1850 /*
1851  * unmount system call
1852  */
1853 static int
nfs_unmount(struct mount * mp,int mntflags)1854 nfs_unmount(struct mount *mp, int mntflags)
1855 {
1856 	struct thread *td;
1857 	struct nfsmount *nmp;
1858 	int error, flags = 0, i, trycnt = 0;
1859 	struct nfsclds *dsp, *tdsp;
1860 	struct nfscldeleg *dp, *ndp;
1861 	struct nfscldeleghead dh;
1862 
1863 	td = curthread;
1864 	TAILQ_INIT(&dh);
1865 
1866 	if (mntflags & MNT_FORCE)
1867 		flags |= FORCECLOSE;
1868 	nmp = VFSTONFS(mp);
1869 	error = 0;
1870 	/*
1871 	 * Goes something like this..
1872 	 * - Call vflush() to clear out vnodes for this filesystem
1873 	 * - Close the socket
1874 	 * - Free up the data structures
1875 	 */
1876 	/* In the forced case, cancel any outstanding requests. */
1877 	if (mntflags & MNT_FORCE) {
1878 		NFSDDSLOCK();
1879 		if (nfsv4_findmirror(nmp) != NULL)
1880 			error = ENXIO;
1881 		NFSDDSUNLOCK();
1882 		if (error)
1883 			goto out;
1884 		error = newnfs_nmcancelreqs(nmp);
1885 		if (error)
1886 			goto out;
1887 		/* For a forced close, get rid of the renew thread now */
1888 		nfscl_umount(nmp, td, &dh);
1889 	}
1890 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1891 	do {
1892 		error = vflush(mp, 1, flags, td);
1893 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1894 			(void) nfs_catnap(PSOCK, error, "newndm");
1895 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1896 	if (error)
1897 		goto out;
1898 
1899 	/*
1900 	 * We are now committed to the unmount.
1901 	 */
1902 	if ((mntflags & MNT_FORCE) == 0)
1903 		nfscl_umount(nmp, td, NULL);
1904 	else {
1905 		mtx_lock(&nmp->nm_mtx);
1906 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1907 		mtx_unlock(&nmp->nm_mtx);
1908 	}
1909 	/* Make sure no nfsiods are assigned to this mount. */
1910 	NFSLOCKIOD();
1911 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1912 		if (ncl_iodmount[i] == nmp) {
1913 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1914 			ncl_iodmount[i] = NULL;
1915 		}
1916 	NFSUNLOCKIOD();
1917 
1918 	/*
1919 	 * We can now set mnt_data to NULL and wait for
1920 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1921 	 */
1922 	mtx_lock(&mountlist_mtx);
1923 	mtx_lock(&nmp->nm_mtx);
1924 	mp->mnt_data = NULL;
1925 	mtx_unlock(&mountlist_mtx);
1926 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1927 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1928 	mtx_unlock(&nmp->nm_mtx);
1929 
1930 	newnfs_disconnect(nmp, &nmp->nm_sockreq);
1931 	crfree(nmp->nm_sockreq.nr_cred);
1932 	free(nmp->nm_nam, M_SONAME);
1933 	if (nmp->nm_sockreq.nr_auth != NULL)
1934 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1935 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1936 	mtx_destroy(&nmp->nm_mtx);
1937 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1938 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1939 		    dsp->nfsclds_sockp != NULL)
1940 			newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1941 		nfscl_freenfsclds(dsp);
1942 	}
1943 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
1944 	free(nmp, M_NEWNFSMNT);
1945 
1946 	/* Free up the delegation structures for forced dismounts. */
1947 	TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
1948 		TAILQ_REMOVE(&dh, dp, nfsdl_list);
1949 		free(dp, M_NFSCLDELEG);
1950 	}
1951 out:
1952 	return (error);
1953 }
1954 
1955 /*
1956  * Return root of a filesystem
1957  */
1958 static int
nfs_root(struct mount * mp,int flags,struct vnode ** vpp)1959 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1960 {
1961 	struct vnode *vp;
1962 	struct nfsmount *nmp;
1963 	struct nfsnode *np;
1964 	int error;
1965 	char *fakefh;
1966 
1967 	nmp = VFSTONFS(mp);
1968 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
1969 		/* Attempt to get the actual root file handle. */
1970 		if (nmp->nm_fhsize == 0)
1971 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1972 			    curthread->td_ucred, curthread);
1973 		fakefh = malloc(NFSX_FHMAX + 1, M_TEMP, M_WAITOK | M_ZERO);
1974 		error = ncl_nget(mp, fakefh, NFSX_FHMAX + 1, &np, flags);
1975 		free(fakefh, M_TEMP);
1976 	} else {
1977 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1978 	}
1979 	if (error)
1980 		return error;
1981 	vp = NFSTOV(np);
1982 	/*
1983 	 * Get transfer parameters and attributes for root vnode once.
1984 	 */
1985 	mtx_lock(&nmp->nm_mtx);
1986 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1987 		mtx_unlock(&nmp->nm_mtx);
1988 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1989 	} else
1990 		mtx_unlock(&nmp->nm_mtx);
1991 	if (vp->v_type == VNON)
1992 	    vp->v_type = VDIR;
1993 	vp->v_vflag |= VV_ROOT;
1994 	*vpp = vp;
1995 	return (0);
1996 }
1997 
1998 /*
1999  * Flush out the buffer cache
2000  */
2001 /* ARGSUSED */
2002 static int
nfs_sync(struct mount * mp,int waitfor)2003 nfs_sync(struct mount *mp, int waitfor)
2004 {
2005 	struct vnode *vp, *mvp;
2006 	struct thread *td;
2007 	int error, allerror = 0;
2008 
2009 	td = curthread;
2010 
2011 	MNT_ILOCK(mp);
2012 	/*
2013 	 * If a forced dismount is in progress, return from here so that
2014 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
2015 	 * calling VFS_UNMOUNT().
2016 	 */
2017 	if (NFSCL_FORCEDISM(mp)) {
2018 		MNT_IUNLOCK(mp);
2019 		return (EBADF);
2020 	}
2021 	MNT_IUNLOCK(mp);
2022 
2023 	if (waitfor == MNT_LAZY)
2024 		return (0);
2025 
2026 	/*
2027 	 * Force stale buffer cache information to be flushed.
2028 	 */
2029 loop:
2030 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
2031 		/* XXX Racy bv_cnt check. */
2032 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0) {
2033 			VI_UNLOCK(vp);
2034 			continue;
2035 		}
2036 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
2037 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
2038 			goto loop;
2039 		}
2040 		error = VOP_FSYNC(vp, waitfor, td);
2041 		if (error)
2042 			allerror = error;
2043 		NFSVOPUNLOCK(vp);
2044 		vrele(vp);
2045 	}
2046 	return (allerror);
2047 }
2048 
2049 static int
nfs_sysctl(struct mount * mp,fsctlop_t op,struct sysctl_req * req)2050 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
2051 {
2052 	struct nfsmount *nmp = VFSTONFS(mp);
2053 	struct vfsquery vq;
2054 	int error;
2055 
2056 	bzero(&vq, sizeof(vq));
2057 	switch (op) {
2058 #if 0
2059 	case VFS_CTL_NOLOCKS:
2060 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
2061  		if (req->oldptr != NULL) {
2062  			error = SYSCTL_OUT(req, &val, sizeof(val));
2063  			if (error)
2064  				return (error);
2065  		}
2066  		if (req->newptr != NULL) {
2067  			error = SYSCTL_IN(req, &val, sizeof(val));
2068  			if (error)
2069  				return (error);
2070 			if (val)
2071 				nmp->nm_flag |= NFSMNT_NOLOCKS;
2072 			else
2073 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
2074  		}
2075 		break;
2076 #endif
2077 	case VFS_CTL_QUERY:
2078 		mtx_lock(&nmp->nm_mtx);
2079 		if (nmp->nm_state & NFSSTA_TIMEO)
2080 			vq.vq_flags |= VQ_NOTRESP;
2081 		mtx_unlock(&nmp->nm_mtx);
2082 #if 0
2083 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
2084 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
2085 			vq.vq_flags |= VQ_NOTRESPLOCK;
2086 #endif
2087 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
2088 		break;
2089  	case VFS_CTL_TIMEO:
2090  		if (req->oldptr != NULL) {
2091  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
2092  			    sizeof(nmp->nm_tprintf_initial_delay));
2093  			if (error)
2094  				return (error);
2095  		}
2096  		if (req->newptr != NULL) {
2097 			error = vfs_suser(mp, req->td);
2098 			if (error)
2099 				return (error);
2100  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
2101  			    sizeof(nmp->nm_tprintf_initial_delay));
2102  			if (error)
2103  				return (error);
2104  			if (nmp->nm_tprintf_initial_delay < 0)
2105  				nmp->nm_tprintf_initial_delay = 0;
2106  		}
2107 		break;
2108 	default:
2109 		return (ENOTSUP);
2110 	}
2111 	return (0);
2112 }
2113 
2114 /*
2115  * Purge any RPCs in progress, so that they will all return errors.
2116  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
2117  * forced dismount.
2118  */
2119 static void
nfs_purge(struct mount * mp)2120 nfs_purge(struct mount *mp)
2121 {
2122 	struct nfsmount *nmp = VFSTONFS(mp);
2123 
2124 	newnfs_nmcancelreqs(nmp);
2125 }
2126 
2127 /*
2128  * Extract the information needed by the nlm from the nfs vnode.
2129  */
2130 static void
nfs_getnlminfo(struct vnode * vp,uint8_t * fhp,size_t * fhlenp,struct sockaddr_storage * sp,int * is_v3p,off_t * sizep,struct timeval * timeop)2131 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
2132     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
2133     struct timeval *timeop)
2134 {
2135 	struct nfsmount *nmp;
2136 	struct nfsnode *np = VTONFS(vp);
2137 
2138 	nmp = VFSTONFS(vp->v_mount);
2139 	if (fhlenp != NULL)
2140 		*fhlenp = (size_t)np->n_fhp->nfh_len;
2141 	if (fhp != NULL)
2142 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
2143 	if (sp != NULL)
2144 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
2145 	if (is_v3p != NULL)
2146 		*is_v3p = NFS_ISV3(vp);
2147 	if (sizep != NULL)
2148 		*sizep = np->n_size;
2149 	if (timeop != NULL) {
2150 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
2151 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
2152 	}
2153 }
2154 
2155 /*
2156  * This function prints out an option name, based on the conditional
2157  * argument.
2158  */
nfscl_printopt(struct nfsmount * nmp,int testval,char * opt,char ** buf,size_t * blen)2159 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
2160     char *opt, char **buf, size_t *blen)
2161 {
2162 	int len;
2163 
2164 	if (testval != 0 && *blen > strlen(opt)) {
2165 		len = snprintf(*buf, *blen, "%s", opt);
2166 		if (len != strlen(opt))
2167 			printf("EEK!!\n");
2168 		*buf += len;
2169 		*blen -= len;
2170 	}
2171 }
2172 
2173 /*
2174  * This function printf out an options integer value.
2175  */
nfscl_printoptval(struct nfsmount * nmp,int optval,char * opt,char ** buf,size_t * blen)2176 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
2177     char *opt, char **buf, size_t *blen)
2178 {
2179 	int len;
2180 
2181 	if (*blen > strlen(opt) + 1) {
2182 		/* Could result in truncated output string. */
2183 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
2184 		if (len < *blen) {
2185 			*buf += len;
2186 			*blen -= len;
2187 		}
2188 	}
2189 }
2190 
2191 /*
2192  * Load the option flags and values into the buffer.
2193  */
nfscl_retopts(struct nfsmount * nmp,char * buffer,size_t buflen)2194 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
2195 {
2196 	char *buf;
2197 	size_t blen;
2198 
2199 	buf = buffer;
2200 	blen = buflen;
2201 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
2202 	    &blen);
2203 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
2204 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
2205 		    &blen);
2206 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
2207 		    &buf, &blen);
2208 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
2209 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
2210 	}
2211 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2212 	    &blen);
2213 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2214 	    "nfsv2", &buf, &blen);
2215 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2216 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2217 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2218 	    &buf, &blen);
2219 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_TLS) != 0, ",tls", &buf,
2220 	    &blen);
2221 	nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_SYSKRB5) != 0,
2222 	    ",syskrb5", &buf, &blen);
2223 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2224 	    &buf, &blen);
2225 	nfscl_printoptval(nmp, nmp->nm_aconnect + 1, ",nconnect", &buf, &blen);
2226 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2227 	    &blen);
2228 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2229 	    &blen);
2230 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2231 	    &blen);
2232 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2233 	    &blen);
2234 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2235 	    &blen);
2236 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2237 	    ",noncontigwr", &buf, &blen);
2238 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2239 	    0, ",lockd", &buf, &blen);
2240 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOLOCKD) != 0, ",nolockd",
2241 	    &buf, &blen);
2242 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2243 	    &buf, &blen);
2244 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2245 	    &buf, &blen);
2246 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2247 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2248 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2249 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2250 	    &buf, &blen);
2251 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2252 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2253 	    &buf, &blen);
2254 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2255 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2256 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2257 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2258 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2259 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2260 	    &blen);
2261 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2262 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2263 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2264 	    &blen);
2265 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2266 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2267 	    &blen);
2268 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2269 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2270 }
2271