xref: /freebsd/sys/fs/nfsclient/nfs_clport.c (revision b1f92fa22938fe29ab7e53692ffe0ed7a0ecc4d0)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_inet6.h"
38 
39 #include <sys/capsicum.h>
40 
41 /*
42  * generally, I don't like #includes inside .h files, but it seems to
43  * be the easiest way to handle the port.
44  */
45 #include <sys/fail.h>
46 #include <sys/hash.h>
47 #include <sys/sysctl.h>
48 #include <fs/nfs/nfsport.h>
49 #include <netinet/if_ether.h>
50 #include <net/if_types.h>
51 
52 #include <fs/nfsclient/nfs_kdtrace.h>
53 
54 #ifdef KDTRACE_HOOKS
55 dtrace_nfsclient_attrcache_flush_probe_func_t
56 		dtrace_nfscl_attrcache_flush_done_probe;
57 uint32_t	nfscl_attrcache_flush_done_id;
58 
59 dtrace_nfsclient_attrcache_get_hit_probe_func_t
60 		dtrace_nfscl_attrcache_get_hit_probe;
61 uint32_t	nfscl_attrcache_get_hit_id;
62 
63 dtrace_nfsclient_attrcache_get_miss_probe_func_t
64 		dtrace_nfscl_attrcache_get_miss_probe;
65 uint32_t	nfscl_attrcache_get_miss_id;
66 
67 dtrace_nfsclient_attrcache_load_probe_func_t
68 		dtrace_nfscl_attrcache_load_done_probe;
69 uint32_t	nfscl_attrcache_load_done_id;
70 #endif /* !KDTRACE_HOOKS */
71 
72 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
73 extern struct vop_vector newnfs_vnodeops;
74 extern struct vop_vector newnfs_fifoops;
75 extern uma_zone_t newnfsnode_zone;
76 extern struct buf_ops buf_ops_newnfs;
77 extern int ncl_pbuf_freecnt;
78 extern short nfsv4_cbport;
79 extern int nfscl_enablecallb;
80 extern int nfs_numnfscbd;
81 extern int nfscl_inited;
82 struct mtx nfs_clstate_mutex;
83 struct mtx ncl_iod_mutex;
84 NFSDLOCKMUTEX;
85 
86 extern void (*ncl_call_invalcaches)(struct vnode *);
87 
88 SYSCTL_DECL(_vfs_nfs);
89 static int ncl_fileid_maxwarnings = 10;
90 SYSCTL_INT(_vfs_nfs, OID_AUTO, fileid_maxwarnings, CTLFLAG_RWTUN,
91     &ncl_fileid_maxwarnings, 0,
92     "Limit fileid corruption warnings; 0 is off; -1 is unlimited");
93 static volatile int ncl_fileid_nwarnings;
94 
95 static void nfscl_warn_fileid(struct nfsmount *, struct nfsvattr *,
96     struct nfsvattr *);
97 
98 /*
99  * Comparison function for vfs_hash functions.
100  */
101 int
102 newnfs_vncmpf(struct vnode *vp, void *arg)
103 {
104 	struct nfsfh *nfhp = (struct nfsfh *)arg;
105 	struct nfsnode *np = VTONFS(vp);
106 
107 	if (np->n_fhp->nfh_len != nfhp->nfh_len ||
108 	    NFSBCMP(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len))
109 		return (1);
110 	return (0);
111 }
112 
113 /*
114  * Look up a vnode/nfsnode by file handle.
115  * Callers must check for mount points!!
116  * In all cases, a pointer to a
117  * nfsnode structure is returned.
118  * This variant takes a "struct nfsfh *" as second argument and uses
119  * that structure up, either by hanging off the nfsnode or FREEing it.
120  */
121 int
122 nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp,
123     struct componentname *cnp, struct thread *td, struct nfsnode **npp,
124     void *stuff, int lkflags)
125 {
126 	struct nfsnode *np, *dnp;
127 	struct vnode *vp, *nvp;
128 	struct nfsv4node *newd, *oldd;
129 	int error;
130 	u_int hash;
131 	struct nfsmount *nmp;
132 
133 	nmp = VFSTONFS(mntp);
134 	dnp = VTONFS(dvp);
135 	*npp = NULL;
136 
137 	hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT);
138 
139 	error = vfs_hash_get(mntp, hash, lkflags,
140 	    td, &nvp, newnfs_vncmpf, nfhp);
141 	if (error == 0 && nvp != NULL) {
142 		/*
143 		 * I believe there is a slight chance that vgonel() could
144 		 * get called on this vnode between when NFSVOPLOCK() drops
145 		 * the VI_LOCK() and vget() acquires it again, so that it
146 		 * hasn't yet had v_usecount incremented. If this were to
147 		 * happen, the VI_DOOMED flag would be set, so check for
148 		 * that here. Since we now have the v_usecount incremented,
149 		 * we should be ok until we vrele() it, if the VI_DOOMED
150 		 * flag isn't set now.
151 		 */
152 		VI_LOCK(nvp);
153 		if ((nvp->v_iflag & VI_DOOMED)) {
154 			VI_UNLOCK(nvp);
155 			vrele(nvp);
156 			error = ENOENT;
157 		} else {
158 			VI_UNLOCK(nvp);
159 		}
160 	}
161 	if (error) {
162 		FREE((caddr_t)nfhp, M_NFSFH);
163 		return (error);
164 	}
165 	if (nvp != NULL) {
166 		np = VTONFS(nvp);
167 		/*
168 		 * For NFSv4, check to see if it is the same name and
169 		 * replace the name, if it is different.
170 		 */
171 		oldd = newd = NULL;
172 		if ((nmp->nm_flag & NFSMNT_NFSV4) && np->n_v4 != NULL &&
173 		    nvp->v_type == VREG &&
174 		    (np->n_v4->n4_namelen != cnp->cn_namelen ||
175 		     NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4),
176 		     cnp->cn_namelen) ||
177 		     dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
178 		     NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
179 		     dnp->n_fhp->nfh_len))) {
180 		    MALLOC(newd, struct nfsv4node *,
181 			sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len +
182 			+ cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK);
183 		    NFSLOCKNODE(np);
184 		    if (newd != NULL && np->n_v4 != NULL && nvp->v_type == VREG
185 			&& (np->n_v4->n4_namelen != cnp->cn_namelen ||
186 			 NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4),
187 			 cnp->cn_namelen) ||
188 			 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
189 			 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
190 			 dnp->n_fhp->nfh_len))) {
191 			oldd = np->n_v4;
192 			np->n_v4 = newd;
193 			newd = NULL;
194 			np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
195 			np->n_v4->n4_namelen = cnp->cn_namelen;
196 			NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
197 			    dnp->n_fhp->nfh_len);
198 			NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4),
199 			    cnp->cn_namelen);
200 		    }
201 		    NFSUNLOCKNODE(np);
202 		}
203 		if (newd != NULL)
204 			FREE((caddr_t)newd, M_NFSV4NODE);
205 		if (oldd != NULL)
206 			FREE((caddr_t)oldd, M_NFSV4NODE);
207 		*npp = np;
208 		FREE((caddr_t)nfhp, M_NFSFH);
209 		return (0);
210 	}
211 	np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO);
212 
213 	error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp);
214 	if (error) {
215 		uma_zfree(newnfsnode_zone, np);
216 		FREE((caddr_t)nfhp, M_NFSFH);
217 		return (error);
218 	}
219 	vp = nvp;
220 	KASSERT(vp->v_bufobj.bo_bsize != 0, ("nfscl_nget: bo_bsize == 0"));
221 	vp->v_bufobj.bo_ops = &buf_ops_newnfs;
222 	vp->v_data = np;
223 	np->n_vnode = vp;
224 	/*
225 	 * Initialize the mutex even if the vnode is going to be a loser.
226 	 * This simplifies the logic in reclaim, which can then unconditionally
227 	 * destroy the mutex (in the case of the loser, or if hash_insert
228 	 * happened to return an error no special casing is needed).
229 	 */
230 	mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK);
231 
232 	/*
233 	 * Are we getting the root? If so, make sure the vnode flags
234 	 * are correct
235 	 */
236 	if ((nfhp->nfh_len == nmp->nm_fhsize) &&
237 	    !bcmp(nfhp->nfh_fh, nmp->nm_fh, nfhp->nfh_len)) {
238 		if (vp->v_type == VNON)
239 			vp->v_type = VDIR;
240 		vp->v_vflag |= VV_ROOT;
241 	}
242 
243 	np->n_fhp = nfhp;
244 	/*
245 	 * For NFSv4, we have to attach the directory file handle and
246 	 * file name, so that Open Ops can be done later.
247 	 */
248 	if (nmp->nm_flag & NFSMNT_NFSV4) {
249 		MALLOC(np->n_v4, struct nfsv4node *, sizeof (struct nfsv4node)
250 		    + dnp->n_fhp->nfh_len + cnp->cn_namelen - 1, M_NFSV4NODE,
251 		    M_WAITOK);
252 		np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
253 		np->n_v4->n4_namelen = cnp->cn_namelen;
254 		NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
255 		    dnp->n_fhp->nfh_len);
256 		NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4),
257 		    cnp->cn_namelen);
258 	} else {
259 		np->n_v4 = NULL;
260 	}
261 
262 	/*
263 	 * NFS supports recursive and shared locking.
264 	 */
265 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL);
266 	VN_LOCK_AREC(vp);
267 	VN_LOCK_ASHARE(vp);
268 	error = insmntque(vp, mntp);
269 	if (error != 0) {
270 		*npp = NULL;
271 		mtx_destroy(&np->n_mtx);
272 		FREE((caddr_t)nfhp, M_NFSFH);
273 		if (np->n_v4 != NULL)
274 			FREE((caddr_t)np->n_v4, M_NFSV4NODE);
275 		uma_zfree(newnfsnode_zone, np);
276 		return (error);
277 	}
278 	error = vfs_hash_insert(vp, hash, lkflags,
279 	    td, &nvp, newnfs_vncmpf, nfhp);
280 	if (error)
281 		return (error);
282 	if (nvp != NULL) {
283 		*npp = VTONFS(nvp);
284 		/* vfs_hash_insert() vput()'s the losing vnode */
285 		return (0);
286 	}
287 	*npp = np;
288 
289 	return (0);
290 }
291 
292 /*
293  * Anothe variant of nfs_nget(). This one is only used by reopen. It
294  * takes almost the same args as nfs_nget(), but only succeeds if an entry
295  * exists in the cache. (Since files should already be "open" with a
296  * vnode ref cnt on the node when reopen calls this, it should always
297  * succeed.)
298  * Also, don't get a vnode lock, since it may already be locked by some
299  * other process that is handling it. This is ok, since all other threads
300  * on the client are blocked by the nfsc_lock being exclusively held by the
301  * caller of this function.
302  */
303 int
304 nfscl_ngetreopen(struct mount *mntp, u_int8_t *fhp, int fhsize,
305     struct thread *td, struct nfsnode **npp)
306 {
307 	struct vnode *nvp;
308 	u_int hash;
309 	struct nfsfh *nfhp;
310 	int error;
311 
312 	*npp = NULL;
313 	/* For forced dismounts, just return error. */
314 	if ((mntp->mnt_kern_flag & MNTK_UNMOUNTF))
315 		return (EINTR);
316 	MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + fhsize,
317 	    M_NFSFH, M_WAITOK);
318 	bcopy(fhp, &nfhp->nfh_fh[0], fhsize);
319 	nfhp->nfh_len = fhsize;
320 
321 	hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT);
322 
323 	/*
324 	 * First, try to get the vnode locked, but don't block for the lock.
325 	 */
326 	error = vfs_hash_get(mntp, hash, (LK_EXCLUSIVE | LK_NOWAIT), td, &nvp,
327 	    newnfs_vncmpf, nfhp);
328 	if (error == 0 && nvp != NULL) {
329 		NFSVOPUNLOCK(nvp, 0);
330 	} else if (error == EBUSY) {
331 		/*
332 		 * The LK_EXCLOTHER lock type tells nfs_lock1() to not try
333 		 * and lock the vnode, but just get a v_usecount on it.
334 		 * LK_NOWAIT is set so that when vget() returns ENOENT,
335 		 * vfs_hash_get() fails instead of looping.
336 		 * If this succeeds, it is safe so long as a vflush() with
337 		 * FORCECLOSE has not been done. Since the Renew thread is
338 		 * stopped and the MNTK_UNMOUNTF flag is set before doing
339 		 * a vflush() with FORCECLOSE, we should be ok here.
340 		 */
341 		if ((mntp->mnt_kern_flag & MNTK_UNMOUNTF))
342 			error = EINTR;
343 		else
344 			error = vfs_hash_get(mntp, hash,
345 			    (LK_EXCLOTHER | LK_NOWAIT), td, &nvp,
346 			    newnfs_vncmpf, nfhp);
347 	}
348 	FREE(nfhp, M_NFSFH);
349 	if (error)
350 		return (error);
351 	if (nvp != NULL) {
352 		*npp = VTONFS(nvp);
353 		return (0);
354 	}
355 	return (EINVAL);
356 }
357 
358 static void
359 nfscl_warn_fileid(struct nfsmount *nmp, struct nfsvattr *oldnap,
360     struct nfsvattr *newnap)
361 {
362 	int off;
363 
364 	if (ncl_fileid_maxwarnings >= 0 &&
365 	    ncl_fileid_nwarnings >= ncl_fileid_maxwarnings)
366 		return;
367 	off = 0;
368 	if (ncl_fileid_maxwarnings >= 0) {
369 		if (++ncl_fileid_nwarnings >= ncl_fileid_maxwarnings)
370 			off = 1;
371 	}
372 
373 	printf("newnfs: server '%s' error: fileid changed. "
374 	    "fsid %jx:%jx: expected fileid %#jx, got %#jx. "
375 	    "(BROKEN NFS SERVER OR MIDDLEWARE)\n",
376 	    nmp->nm_com.nmcom_hostname,
377 	    (uintmax_t)nmp->nm_fsid[0],
378 	    (uintmax_t)nmp->nm_fsid[1],
379 	    (uintmax_t)oldnap->na_fileid,
380 	    (uintmax_t)newnap->na_fileid);
381 
382 	if (off)
383 		printf("newnfs: Logged %d times about fileid corruption; "
384 		    "going quiet to avoid spamming logs excessively. (Limit "
385 		    "is: %d).\n", ncl_fileid_nwarnings,
386 		    ncl_fileid_maxwarnings);
387 }
388 
389 /*
390  * Load the attribute cache (that lives in the nfsnode entry) with
391  * the attributes of the second argument and
392  * Iff vaper not NULL
393  *    copy the attributes to *vaper
394  * Similar to nfs_loadattrcache(), except the attributes are passed in
395  * instead of being parsed out of the mbuf list.
396  */
397 int
398 nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper,
399     void *stuff, int writeattr, int dontshrink)
400 {
401 	struct vnode *vp = *vpp;
402 	struct vattr *vap, *nvap = &nap->na_vattr, *vaper = nvaper;
403 	struct nfsnode *np;
404 	struct nfsmount *nmp;
405 	struct timespec mtime_save;
406 	u_quad_t nsize;
407 	int setnsize, error, force_fid_err;
408 
409 	error = 0;
410 	setnsize = 0;
411 	nsize = 0;
412 
413 	/*
414 	 * If v_type == VNON it is a new node, so fill in the v_type,
415 	 * n_mtime fields. Check to see if it represents a special
416 	 * device, and if so, check for a possible alias. Once the
417 	 * correct vnode has been obtained, fill in the rest of the
418 	 * information.
419 	 */
420 	np = VTONFS(vp);
421 	NFSLOCKNODE(np);
422 	if (vp->v_type != nvap->va_type) {
423 		vp->v_type = nvap->va_type;
424 		if (vp->v_type == VFIFO)
425 			vp->v_op = &newnfs_fifoops;
426 		np->n_mtime = nvap->va_mtime;
427 	}
428 	nmp = VFSTONFS(vp->v_mount);
429 	vap = &np->n_vattr.na_vattr;
430 	mtime_save = vap->va_mtime;
431 	if (writeattr) {
432 		np->n_vattr.na_filerev = nap->na_filerev;
433 		np->n_vattr.na_size = nap->na_size;
434 		np->n_vattr.na_mtime = nap->na_mtime;
435 		np->n_vattr.na_ctime = nap->na_ctime;
436 		np->n_vattr.na_fsid = nap->na_fsid;
437 		np->n_vattr.na_mode = nap->na_mode;
438 	} else {
439 		force_fid_err = 0;
440 		KFAIL_POINT_ERROR(DEBUG_FP, nfscl_force_fileid_warning,
441 		    force_fid_err);
442 		/*
443 		 * BROKEN NFS SERVER OR MIDDLEWARE
444 		 *
445 		 * Certain NFS servers (certain old proprietary filers ca.
446 		 * 2006) or broken middleboxes (e.g. WAN accelerator products)
447 		 * will respond to GETATTR requests with results for a
448 		 * different fileid.
449 		 *
450 		 * The WAN accelerator we've observed not only serves stale
451 		 * cache results for a given file, it also occasionally serves
452 		 * results for wholly different files.  This causes surprising
453 		 * problems; for example the cached size attribute of a file
454 		 * may truncate down and then back up, resulting in zero
455 		 * regions in file contents read by applications.  We observed
456 		 * this reliably with Clang and .c files during parallel build.
457 		 * A pcap revealed packet fragmentation and GETATTR RPC
458 		 * responses with wholly wrong fileids.
459 		 */
460 		if ((np->n_vattr.na_fileid != 0 &&
461 		     np->n_vattr.na_fileid != nap->na_fileid) ||
462 		    force_fid_err) {
463 			nfscl_warn_fileid(nmp, &np->n_vattr, nap);
464 			error = EIDRM;
465 			goto out;
466 		}
467 		NFSBCOPY((caddr_t)nap, (caddr_t)&np->n_vattr,
468 		    sizeof (struct nfsvattr));
469 	}
470 
471 	/*
472 	 * For NFSv4, if the node's fsid is not equal to the mount point's
473 	 * fsid, return the low order 32bits of the node's fsid. This
474 	 * allows getcwd(3) to work. There is a chance that the fsid might
475 	 * be the same as a local fs, but since this is in an NFS mount
476 	 * point, I don't think that will cause any problems?
477 	 */
478 	if (NFSHASNFSV4(nmp) && NFSHASHASSETFSID(nmp) &&
479 	    (nmp->nm_fsid[0] != np->n_vattr.na_filesid[0] ||
480 	     nmp->nm_fsid[1] != np->n_vattr.na_filesid[1])) {
481 		/*
482 		 * va_fsid needs to be set to some value derived from
483 		 * np->n_vattr.na_filesid that is not equal
484 		 * vp->v_mount->mnt_stat.f_fsid[0], so that it changes
485 		 * from the value used for the top level server volume
486 		 * in the mounted subtree.
487 		 */
488 		if (vp->v_mount->mnt_stat.f_fsid.val[0] !=
489 		    (uint32_t)np->n_vattr.na_filesid[0])
490 			vap->va_fsid = (uint32_t)np->n_vattr.na_filesid[0];
491 		else
492 			vap->va_fsid = (uint32_t)hash32_buf(
493 			    np->n_vattr.na_filesid, 2 * sizeof(uint64_t), 0);
494 	} else
495 		vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
496 	np->n_attrstamp = time_second;
497 	if (vap->va_size != np->n_size) {
498 		if (vap->va_type == VREG) {
499 			if (dontshrink && vap->va_size < np->n_size) {
500 				/*
501 				 * We've been told not to shrink the file;
502 				 * zero np->n_attrstamp to indicate that
503 				 * the attributes are stale.
504 				 */
505 				vap->va_size = np->n_size;
506 				np->n_attrstamp = 0;
507 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
508 				vnode_pager_setsize(vp, np->n_size);
509 			} else if (np->n_flag & NMODIFIED) {
510 				/*
511 				 * We've modified the file: Use the larger
512 				 * of our size, and the server's size.
513 				 */
514 				if (vap->va_size < np->n_size) {
515 					vap->va_size = np->n_size;
516 				} else {
517 					np->n_size = vap->va_size;
518 					np->n_flag |= NSIZECHANGED;
519 				}
520 				vnode_pager_setsize(vp, np->n_size);
521 			} else if (vap->va_size < np->n_size) {
522 				/*
523 				 * When shrinking the size, the call to
524 				 * vnode_pager_setsize() cannot be done
525 				 * with the mutex held, so delay it until
526 				 * after the mtx_unlock call.
527 				 */
528 				nsize = np->n_size = vap->va_size;
529 				np->n_flag |= NSIZECHANGED;
530 				setnsize = 1;
531 			} else {
532 				np->n_size = vap->va_size;
533 				np->n_flag |= NSIZECHANGED;
534 				vnode_pager_setsize(vp, np->n_size);
535 			}
536 		} else {
537 			np->n_size = vap->va_size;
538 		}
539 	}
540 	/*
541 	 * The following checks are added to prevent a race between (say)
542 	 * a READDIR+ and a WRITE.
543 	 * READDIR+, WRITE requests sent out.
544 	 * READDIR+ resp, WRITE resp received on client.
545 	 * However, the WRITE resp was handled before the READDIR+ resp
546 	 * causing the post op attrs from the write to be loaded first
547 	 * and the attrs from the READDIR+ to be loaded later. If this
548 	 * happens, we have stale attrs loaded into the attrcache.
549 	 * We detect this by for the mtime moving back. We invalidate the
550 	 * attrcache when this happens.
551 	 */
552 	if (timespeccmp(&mtime_save, &vap->va_mtime, >)) {
553 		/* Size changed or mtime went backwards */
554 		np->n_attrstamp = 0;
555 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
556 	}
557 	if (vaper != NULL) {
558 		NFSBCOPY((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
559 		if (np->n_flag & NCHG) {
560 			if (np->n_flag & NACC)
561 				vaper->va_atime = np->n_atim;
562 			if (np->n_flag & NUPD)
563 				vaper->va_mtime = np->n_mtim;
564 		}
565 	}
566 
567 out:
568 #ifdef KDTRACE_HOOKS
569 	if (np->n_attrstamp != 0)
570 		KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error);
571 #endif
572 	NFSUNLOCKNODE(np);
573 	if (setnsize)
574 		vnode_pager_setsize(vp, nsize);
575 	return (error);
576 }
577 
578 /*
579  * Fill in the client id name. For these bytes:
580  * 1 - they must be unique
581  * 2 - they should be persistent across client reboots
582  * 1 is more critical than 2
583  * Use the mount point's unique id plus either the uuid or, if that
584  * isn't set, random junk.
585  */
586 void
587 nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen)
588 {
589 	int uuidlen;
590 
591 	/*
592 	 * First, put in the 64bit mount point identifier.
593 	 */
594 	if (idlen >= sizeof (u_int64_t)) {
595 		NFSBCOPY((caddr_t)&clval, cp, sizeof (u_int64_t));
596 		cp += sizeof (u_int64_t);
597 		idlen -= sizeof (u_int64_t);
598 	}
599 
600 	/*
601 	 * If uuid is non-zero length, use it.
602 	 */
603 	uuidlen = strlen(uuid);
604 	if (uuidlen > 0 && idlen >= uuidlen) {
605 		NFSBCOPY(uuid, cp, uuidlen);
606 		cp += uuidlen;
607 		idlen -= uuidlen;
608 	}
609 
610 	/*
611 	 * This only normally happens if the uuid isn't set.
612 	 */
613 	while (idlen > 0) {
614 		*cp++ = (u_int8_t)(arc4random() % 256);
615 		idlen--;
616 	}
617 }
618 
619 /*
620  * Fill in a lock owner name. For now, pid + the process's creation time.
621  */
622 void
623 nfscl_filllockowner(void *id, u_int8_t *cp, int flags)
624 {
625 	union {
626 		u_int32_t	lval;
627 		u_int8_t	cval[4];
628 	} tl;
629 	struct proc *p;
630 
631 	if (id == NULL) {
632 		printf("NULL id\n");
633 		bzero(cp, NFSV4CL_LOCKNAMELEN);
634 		return;
635 	}
636 	if ((flags & F_POSIX) != 0) {
637 		p = (struct proc *)id;
638 		tl.lval = p->p_pid;
639 		*cp++ = tl.cval[0];
640 		*cp++ = tl.cval[1];
641 		*cp++ = tl.cval[2];
642 		*cp++ = tl.cval[3];
643 		tl.lval = p->p_stats->p_start.tv_sec;
644 		*cp++ = tl.cval[0];
645 		*cp++ = tl.cval[1];
646 		*cp++ = tl.cval[2];
647 		*cp++ = tl.cval[3];
648 		tl.lval = p->p_stats->p_start.tv_usec;
649 		*cp++ = tl.cval[0];
650 		*cp++ = tl.cval[1];
651 		*cp++ = tl.cval[2];
652 		*cp = tl.cval[3];
653 	} else if ((flags & F_FLOCK) != 0) {
654 		bcopy(&id, cp, sizeof(id));
655 		bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id));
656 	} else {
657 		printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n");
658 		bzero(cp, NFSV4CL_LOCKNAMELEN);
659 	}
660 }
661 
662 /*
663  * Find the parent process for the thread passed in as an argument.
664  * If none exists, return NULL, otherwise return a thread for the parent.
665  * (Can be any of the threads, since it is only used for td->td_proc.)
666  */
667 NFSPROC_T *
668 nfscl_getparent(struct thread *td)
669 {
670 	struct proc *p;
671 	struct thread *ptd;
672 
673 	if (td == NULL)
674 		return (NULL);
675 	p = td->td_proc;
676 	if (p->p_pid == 0)
677 		return (NULL);
678 	p = p->p_pptr;
679 	if (p == NULL)
680 		return (NULL);
681 	ptd = TAILQ_FIRST(&p->p_threads);
682 	return (ptd);
683 }
684 
685 /*
686  * Start up the renew kernel thread.
687  */
688 static void
689 start_nfscl(void *arg)
690 {
691 	struct nfsclclient *clp;
692 	struct thread *td;
693 
694 	clp = (struct nfsclclient *)arg;
695 	td = TAILQ_FIRST(&clp->nfsc_renewthread->p_threads);
696 	nfscl_renewthread(clp, td);
697 	kproc_exit(0);
698 }
699 
700 void
701 nfscl_start_renewthread(struct nfsclclient *clp)
702 {
703 
704 	kproc_create(start_nfscl, (void *)clp, &clp->nfsc_renewthread, 0, 0,
705 	    "nfscl");
706 }
707 
708 /*
709  * Handle wcc_data.
710  * For NFSv4, it assumes that nfsv4_wccattr() was used to set up the getattr
711  * as the first Op after PutFH.
712  * (For NFSv4, the postop attributes are after the Op, so they can't be
713  *  parsed here. A separate call to nfscl_postop_attr() is required.)
714  */
715 int
716 nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp,
717     struct nfsvattr *nap, int *flagp, int *wccflagp, void *stuff)
718 {
719 	u_int32_t *tl;
720 	struct nfsnode *np = VTONFS(vp);
721 	struct nfsvattr nfsva;
722 	int error = 0;
723 
724 	if (wccflagp != NULL)
725 		*wccflagp = 0;
726 	if (nd->nd_flag & ND_NFSV3) {
727 		*flagp = 0;
728 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
729 		if (*tl == newnfs_true) {
730 			NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
731 			if (wccflagp != NULL) {
732 				mtx_lock(&np->n_mtx);
733 				*wccflagp = (np->n_mtime.tv_sec ==
734 				    fxdr_unsigned(u_int32_t, *(tl + 2)) &&
735 				    np->n_mtime.tv_nsec ==
736 				    fxdr_unsigned(u_int32_t, *(tl + 3)));
737 				mtx_unlock(&np->n_mtx);
738 			}
739 		}
740 		error = nfscl_postop_attr(nd, nap, flagp, stuff);
741 	} else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR))
742 	    == (ND_NFSV4 | ND_V4WCCATTR)) {
743 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
744 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
745 		    NULL, NULL, NULL, NULL, NULL);
746 		if (error)
747 			return (error);
748 		/*
749 		 * Get rid of Op# and status for next op.
750 		 */
751 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
752 		if (*++tl)
753 			nd->nd_flag |= ND_NOMOREDATA;
754 		if (wccflagp != NULL &&
755 		    nfsva.na_vattr.va_mtime.tv_sec != 0) {
756 			mtx_lock(&np->n_mtx);
757 			*wccflagp = (np->n_mtime.tv_sec ==
758 			    nfsva.na_vattr.va_mtime.tv_sec &&
759 			    np->n_mtime.tv_nsec ==
760 			    nfsva.na_vattr.va_mtime.tv_sec);
761 			mtx_unlock(&np->n_mtx);
762 		}
763 	}
764 nfsmout:
765 	return (error);
766 }
767 
768 /*
769  * Get postop attributes.
770  */
771 int
772 nfscl_postop_attr(struct nfsrv_descript *nd, struct nfsvattr *nap, int *retp,
773     void *stuff)
774 {
775 	u_int32_t *tl;
776 	int error = 0;
777 
778 	*retp = 0;
779 	if (nd->nd_flag & ND_NOMOREDATA)
780 		return (error);
781 	if (nd->nd_flag & ND_NFSV3) {
782 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
783 		*retp = fxdr_unsigned(int, *tl);
784 	} else if (nd->nd_flag & ND_NFSV4) {
785 		/*
786 		 * For NFSv4, the postop attr are at the end, so no point
787 		 * in looking if nd_repstat != 0.
788 		 */
789 		if (!nd->nd_repstat) {
790 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
791 			if (*(tl + 1))
792 				/* should never happen since nd_repstat != 0 */
793 				nd->nd_flag |= ND_NOMOREDATA;
794 			else
795 				*retp = 1;
796 		}
797 	} else if (!nd->nd_repstat) {
798 		/* For NFSv2, the attributes are here iff nd_repstat == 0 */
799 		*retp = 1;
800 	}
801 	if (*retp) {
802 		error = nfsm_loadattr(nd, nap);
803 		if (error)
804 			*retp = 0;
805 	}
806 nfsmout:
807 	return (error);
808 }
809 
810 /*
811  * Fill in the setable attributes. The full argument indicates whether
812  * to fill in them all or just mode and time.
813  */
814 void
815 nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap,
816     struct vnode *vp, int flags, u_int32_t rdev)
817 {
818 	u_int32_t *tl;
819 	struct nfsv2_sattr *sp;
820 	nfsattrbit_t attrbits;
821 
822 	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
823 	case ND_NFSV2:
824 		NFSM_BUILD(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
825 		if (vap->va_mode == (mode_t)VNOVAL)
826 			sp->sa_mode = newnfs_xdrneg1;
827 		else
828 			sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
829 		if (vap->va_uid == (uid_t)VNOVAL)
830 			sp->sa_uid = newnfs_xdrneg1;
831 		else
832 			sp->sa_uid = txdr_unsigned(vap->va_uid);
833 		if (vap->va_gid == (gid_t)VNOVAL)
834 			sp->sa_gid = newnfs_xdrneg1;
835 		else
836 			sp->sa_gid = txdr_unsigned(vap->va_gid);
837 		if (flags & NFSSATTR_SIZE0)
838 			sp->sa_size = 0;
839 		else if (flags & NFSSATTR_SIZENEG1)
840 			sp->sa_size = newnfs_xdrneg1;
841 		else if (flags & NFSSATTR_SIZERDEV)
842 			sp->sa_size = txdr_unsigned(rdev);
843 		else
844 			sp->sa_size = txdr_unsigned(vap->va_size);
845 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
846 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
847 		break;
848 	case ND_NFSV3:
849 		if (vap->va_mode != (mode_t)VNOVAL) {
850 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
851 			*tl++ = newnfs_true;
852 			*tl = txdr_unsigned(vap->va_mode);
853 		} else {
854 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
855 			*tl = newnfs_false;
856 		}
857 		if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) {
858 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
859 			*tl++ = newnfs_true;
860 			*tl = txdr_unsigned(vap->va_uid);
861 		} else {
862 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
863 			*tl = newnfs_false;
864 		}
865 		if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) {
866 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
867 			*tl++ = newnfs_true;
868 			*tl = txdr_unsigned(vap->va_gid);
869 		} else {
870 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
871 			*tl = newnfs_false;
872 		}
873 		if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) {
874 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
875 			*tl++ = newnfs_true;
876 			txdr_hyper(vap->va_size, tl);
877 		} else {
878 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
879 			*tl = newnfs_false;
880 		}
881 		if (vap->va_atime.tv_sec != VNOVAL) {
882 			if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) {
883 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
884 				*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
885 				txdr_nfsv3time(&vap->va_atime, tl);
886 			} else {
887 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
888 				*tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
889 			}
890 		} else {
891 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
892 			*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
893 		}
894 		if (vap->va_mtime.tv_sec != VNOVAL) {
895 			if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) {
896 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
897 				*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
898 				txdr_nfsv3time(&vap->va_mtime, tl);
899 			} else {
900 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
901 				*tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
902 			}
903 		} else {
904 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
905 			*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
906 		}
907 		break;
908 	case ND_NFSV4:
909 		NFSZERO_ATTRBIT(&attrbits);
910 		if (vap->va_mode != (mode_t)VNOVAL)
911 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE);
912 		if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL)
913 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
914 		if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL)
915 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP);
916 		if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL)
917 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
918 		if (vap->va_atime.tv_sec != VNOVAL)
919 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET);
920 		if (vap->va_mtime.tv_sec != VNOVAL)
921 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET);
922 		(void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0,
923 		    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0);
924 		break;
925 	};
926 }
927 
928 /*
929  * nfscl_request() - mostly a wrapper for newnfs_request().
930  */
931 int
932 nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p,
933     struct ucred *cred, void *stuff)
934 {
935 	int ret, vers;
936 	struct nfsmount *nmp;
937 
938 	nmp = VFSTONFS(vp->v_mount);
939 	if (nd->nd_flag & ND_NFSV4)
940 		vers = NFS_VER4;
941 	else if (nd->nd_flag & ND_NFSV3)
942 		vers = NFS_VER3;
943 	else
944 		vers = NFS_VER2;
945 	ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
946 		NFS_PROG, vers, NULL, 1, NULL, NULL);
947 	return (ret);
948 }
949 
950 /*
951  * fill in this bsden's variant of statfs using nfsstatfs.
952  */
953 void
954 nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs)
955 {
956 	struct statfs *sbp = (struct statfs *)statfs;
957 
958 	if (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) {
959 		sbp->f_bsize = NFS_FABLKSIZE;
960 		sbp->f_blocks = sfp->sf_tbytes / NFS_FABLKSIZE;
961 		sbp->f_bfree = sfp->sf_fbytes / NFS_FABLKSIZE;
962 		/*
963 		 * Although sf_abytes is uint64_t and f_bavail is int64_t,
964 		 * the value after dividing by NFS_FABLKSIZE is small
965 		 * enough that it will fit in 63bits, so it is ok to
966 		 * assign it to f_bavail without fear that it will become
967 		 * negative.
968 		 */
969 		sbp->f_bavail = sfp->sf_abytes / NFS_FABLKSIZE;
970 		sbp->f_files = sfp->sf_tfiles;
971 		/* Since f_ffree is int64_t, clip it to 63bits. */
972 		if (sfp->sf_ffiles > INT64_MAX)
973 			sbp->f_ffree = INT64_MAX;
974 		else
975 			sbp->f_ffree = sfp->sf_ffiles;
976 	} else if ((nmp->nm_flag & NFSMNT_NFSV4) == 0) {
977 		/*
978 		 * The type casts to (int32_t) ensure that this code is
979 		 * compatible with the old NFS client, in that it will
980 		 * propagate bit31 to the high order bits. This may or may
981 		 * not be correct for NFSv2, but since it is a legacy
982 		 * environment, I'd rather retain backwards compatibility.
983 		 */
984 		sbp->f_bsize = (int32_t)sfp->sf_bsize;
985 		sbp->f_blocks = (int32_t)sfp->sf_blocks;
986 		sbp->f_bfree = (int32_t)sfp->sf_bfree;
987 		sbp->f_bavail = (int32_t)sfp->sf_bavail;
988 		sbp->f_files = 0;
989 		sbp->f_ffree = 0;
990 	}
991 }
992 
993 /*
994  * Use the fsinfo stuff to update the mount point.
995  */
996 void
997 nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp)
998 {
999 
1000 	if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) &&
1001 	    fsp->fs_wtpref >= NFS_FABLKSIZE)
1002 		nmp->nm_wsize = (fsp->fs_wtpref + NFS_FABLKSIZE - 1) &
1003 		    ~(NFS_FABLKSIZE - 1);
1004 	if (fsp->fs_wtmax < nmp->nm_wsize && fsp->fs_wtmax > 0) {
1005 		nmp->nm_wsize = fsp->fs_wtmax & ~(NFS_FABLKSIZE - 1);
1006 		if (nmp->nm_wsize == 0)
1007 			nmp->nm_wsize = fsp->fs_wtmax;
1008 	}
1009 	if (nmp->nm_wsize < NFS_FABLKSIZE)
1010 		nmp->nm_wsize = NFS_FABLKSIZE;
1011 	if ((nmp->nm_rsize == 0 || fsp->fs_rtpref < nmp->nm_rsize) &&
1012 	    fsp->fs_rtpref >= NFS_FABLKSIZE)
1013 		nmp->nm_rsize = (fsp->fs_rtpref + NFS_FABLKSIZE - 1) &
1014 		    ~(NFS_FABLKSIZE - 1);
1015 	if (fsp->fs_rtmax < nmp->nm_rsize && fsp->fs_rtmax > 0) {
1016 		nmp->nm_rsize = fsp->fs_rtmax & ~(NFS_FABLKSIZE - 1);
1017 		if (nmp->nm_rsize == 0)
1018 			nmp->nm_rsize = fsp->fs_rtmax;
1019 	}
1020 	if (nmp->nm_rsize < NFS_FABLKSIZE)
1021 		nmp->nm_rsize = NFS_FABLKSIZE;
1022 	if ((nmp->nm_readdirsize == 0 || fsp->fs_dtpref < nmp->nm_readdirsize)
1023 	    && fsp->fs_dtpref >= NFS_DIRBLKSIZ)
1024 		nmp->nm_readdirsize = (fsp->fs_dtpref + NFS_DIRBLKSIZ - 1) &
1025 		    ~(NFS_DIRBLKSIZ - 1);
1026 	if (fsp->fs_rtmax < nmp->nm_readdirsize && fsp->fs_rtmax > 0) {
1027 		nmp->nm_readdirsize = fsp->fs_rtmax & ~(NFS_DIRBLKSIZ - 1);
1028 		if (nmp->nm_readdirsize == 0)
1029 			nmp->nm_readdirsize = fsp->fs_rtmax;
1030 	}
1031 	if (nmp->nm_readdirsize < NFS_DIRBLKSIZ)
1032 		nmp->nm_readdirsize = NFS_DIRBLKSIZ;
1033 	if (fsp->fs_maxfilesize > 0 &&
1034 	    fsp->fs_maxfilesize < nmp->nm_maxfilesize)
1035 		nmp->nm_maxfilesize = fsp->fs_maxfilesize;
1036 	nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp);
1037 	nmp->nm_state |= NFSSTA_GOTFSINFO;
1038 }
1039 
1040 /*
1041  * Get a pointer to my IP addrress and return it.
1042  * Return NULL if you can't find one.
1043  */
1044 u_int8_t *
1045 nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
1046 {
1047 	struct sockaddr_in sad, *sin;
1048 	struct rtentry *rt;
1049 	u_int8_t *retp = NULL;
1050 	static struct in_addr laddr;
1051 
1052 	*isinet6p = 0;
1053 	/*
1054 	 * Loop up a route for the destination address.
1055 	 */
1056 	if (nmp->nm_nam->sa_family == AF_INET) {
1057 		bzero(&sad, sizeof (sad));
1058 		sin = (struct sockaddr_in *)nmp->nm_nam;
1059 		sad.sin_family = AF_INET;
1060 		sad.sin_len = sizeof (struct sockaddr_in);
1061 		sad.sin_addr.s_addr = sin->sin_addr.s_addr;
1062 		CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
1063 		rt = rtalloc1_fib((struct sockaddr *)&sad, 0, 0UL,
1064 		     curthread->td_proc->p_fibnum);
1065 		if (rt != NULL) {
1066 			if (rt->rt_ifp != NULL &&
1067 			    rt->rt_ifa != NULL &&
1068 			    ((rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) &&
1069 			    rt->rt_ifa->ifa_addr->sa_family == AF_INET) {
1070 				sin = (struct sockaddr_in *)
1071 				    rt->rt_ifa->ifa_addr;
1072 				laddr.s_addr = sin->sin_addr.s_addr;
1073 				retp = (u_int8_t *)&laddr;
1074 			}
1075 			RTFREE_LOCKED(rt);
1076 		}
1077 		CURVNET_RESTORE();
1078 #ifdef INET6
1079 	} else if (nmp->nm_nam->sa_family == AF_INET6) {
1080 		struct sockaddr_in6 sad6, *sin6;
1081 		static struct in6_addr laddr6;
1082 
1083 		bzero(&sad6, sizeof (sad6));
1084 		sin6 = (struct sockaddr_in6 *)nmp->nm_nam;
1085 		sad6.sin6_family = AF_INET6;
1086 		sad6.sin6_len = sizeof (struct sockaddr_in6);
1087 		sad6.sin6_addr = sin6->sin6_addr;
1088 		CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
1089 		rt = rtalloc1_fib((struct sockaddr *)&sad6, 0, 0UL,
1090 		     curthread->td_proc->p_fibnum);
1091 		if (rt != NULL) {
1092 			if (rt->rt_ifp != NULL &&
1093 			    rt->rt_ifa != NULL &&
1094 			    ((rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) &&
1095 			    rt->rt_ifa->ifa_addr->sa_family == AF_INET6) {
1096 				sin6 = (struct sockaddr_in6 *)
1097 				    rt->rt_ifa->ifa_addr;
1098 				laddr6 = sin6->sin6_addr;
1099 				retp = (u_int8_t *)&laddr6;
1100 				*isinet6p = 1;
1101 			}
1102 			RTFREE_LOCKED(rt);
1103 		}
1104 		CURVNET_RESTORE();
1105 #endif
1106 	}
1107 	return (retp);
1108 }
1109 
1110 /*
1111  * Copy NFS uid, gids from the cred structure.
1112  */
1113 void
1114 newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr)
1115 {
1116 	int i;
1117 
1118 	KASSERT(cr->cr_ngroups >= 0,
1119 	    ("newnfs_copyincred: negative cr_ngroups"));
1120 	nfscr->nfsc_uid = cr->cr_uid;
1121 	nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1);
1122 	for (i = 0; i < nfscr->nfsc_ngroups; i++)
1123 		nfscr->nfsc_groups[i] = cr->cr_groups[i];
1124 }
1125 
1126 
1127 /*
1128  * Do any client specific initialization.
1129  */
1130 void
1131 nfscl_init(void)
1132 {
1133 	static int inited = 0;
1134 
1135 	if (inited)
1136 		return;
1137 	inited = 1;
1138 	nfscl_inited = 1;
1139 	ncl_pbuf_freecnt = nswbuf / 2 + 1;
1140 }
1141 
1142 /*
1143  * Check each of the attributes to be set, to ensure they aren't already
1144  * the correct value. Disable setting ones already correct.
1145  */
1146 int
1147 nfscl_checksattr(struct vattr *vap, struct nfsvattr *nvap)
1148 {
1149 
1150 	if (vap->va_mode != (mode_t)VNOVAL) {
1151 		if (vap->va_mode == nvap->na_mode)
1152 			vap->va_mode = (mode_t)VNOVAL;
1153 	}
1154 	if (vap->va_uid != (uid_t)VNOVAL) {
1155 		if (vap->va_uid == nvap->na_uid)
1156 			vap->va_uid = (uid_t)VNOVAL;
1157 	}
1158 	if (vap->va_gid != (gid_t)VNOVAL) {
1159 		if (vap->va_gid == nvap->na_gid)
1160 			vap->va_gid = (gid_t)VNOVAL;
1161 	}
1162 	if (vap->va_size != VNOVAL) {
1163 		if (vap->va_size == nvap->na_size)
1164 			vap->va_size = VNOVAL;
1165 	}
1166 
1167 	/*
1168 	 * We are normally called with only a partially initialized
1169 	 * VAP.  Since the NFSv3 spec says that server may use the
1170 	 * file attributes to store the verifier, the spec requires
1171 	 * us to do a SETATTR RPC. FreeBSD servers store the verifier
1172 	 * in atime, but we can't really assume that all servers will
1173 	 * so we ensure that our SETATTR sets both atime and mtime.
1174 	 * Set the VA_UTIMES_NULL flag for this case, so that
1175 	 * the server's time will be used.  This is needed to
1176 	 * work around a bug in some Solaris servers, where
1177 	 * setting the time TOCLIENT causes the Setattr RPC
1178 	 * to return NFS_OK, but not set va_mode.
1179 	 */
1180 	if (vap->va_mtime.tv_sec == VNOVAL) {
1181 		vfs_timestamp(&vap->va_mtime);
1182 		vap->va_vaflags |= VA_UTIMES_NULL;
1183 	}
1184 	if (vap->va_atime.tv_sec == VNOVAL)
1185 		vap->va_atime = vap->va_mtime;
1186 	return (1);
1187 }
1188 
1189 /*
1190  * Map nfsv4 errors to errno.h errors.
1191  * The uid and gid arguments are only used for NFSERR_BADOWNER and that
1192  * error should only be returned for the Open, Create and Setattr Ops.
1193  * As such, most calls can just pass in 0 for those arguments.
1194  */
1195 APPLESTATIC int
1196 nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid)
1197 {
1198 	struct proc *p;
1199 
1200 	if (error < 10000)
1201 		return (error);
1202 	if (td != NULL)
1203 		p = td->td_proc;
1204 	else
1205 		p = NULL;
1206 	switch (error) {
1207 	case NFSERR_BADOWNER:
1208 		tprintf(p, LOG_INFO,
1209 		    "No name and/or group mapping for uid,gid:(%d,%d)\n",
1210 		    uid, gid);
1211 		return (EPERM);
1212 	case NFSERR_BADNAME:
1213 	case NFSERR_BADCHAR:
1214 		printf("nfsv4 char/name not handled by server\n");
1215 		return (ENOENT);
1216 	case NFSERR_STALECLIENTID:
1217 	case NFSERR_STALESTATEID:
1218 	case NFSERR_EXPIRED:
1219 	case NFSERR_BADSTATEID:
1220 	case NFSERR_BADSESSION:
1221 		printf("nfsv4 recover err returned %d\n", error);
1222 		return (EIO);
1223 	case NFSERR_BADHANDLE:
1224 	case NFSERR_SERVERFAULT:
1225 	case NFSERR_BADTYPE:
1226 	case NFSERR_FHEXPIRED:
1227 	case NFSERR_RESOURCE:
1228 	case NFSERR_MOVED:
1229 	case NFSERR_NOFILEHANDLE:
1230 	case NFSERR_MINORVERMISMATCH:
1231 	case NFSERR_OLDSTATEID:
1232 	case NFSERR_BADSEQID:
1233 	case NFSERR_LEASEMOVED:
1234 	case NFSERR_RECLAIMBAD:
1235 	case NFSERR_BADXDR:
1236 	case NFSERR_OPILLEGAL:
1237 		printf("nfsv4 client/server protocol prob err=%d\n",
1238 		    error);
1239 		return (EIO);
1240 	default:
1241 		tprintf(p, LOG_INFO, "nfsv4 err=%d\n", error);
1242 		return (EIO);
1243 	};
1244 }
1245 
1246 /*
1247  * Check to see if the process for this owner exists. Return 1 if it doesn't
1248  * and 0 otherwise.
1249  */
1250 int
1251 nfscl_procdoesntexist(u_int8_t *own)
1252 {
1253 	union {
1254 		u_int32_t	lval;
1255 		u_int8_t	cval[4];
1256 	} tl;
1257 	struct proc *p;
1258 	pid_t pid;
1259 	int ret = 0;
1260 
1261 	tl.cval[0] = *own++;
1262 	tl.cval[1] = *own++;
1263 	tl.cval[2] = *own++;
1264 	tl.cval[3] = *own++;
1265 	pid = tl.lval;
1266 	p = pfind_locked(pid);
1267 	if (p == NULL)
1268 		return (1);
1269 	if (p->p_stats == NULL) {
1270 		PROC_UNLOCK(p);
1271 		return (0);
1272 	}
1273 	tl.cval[0] = *own++;
1274 	tl.cval[1] = *own++;
1275 	tl.cval[2] = *own++;
1276 	tl.cval[3] = *own++;
1277 	if (tl.lval != p->p_stats->p_start.tv_sec) {
1278 		ret = 1;
1279 	} else {
1280 		tl.cval[0] = *own++;
1281 		tl.cval[1] = *own++;
1282 		tl.cval[2] = *own++;
1283 		tl.cval[3] = *own;
1284 		if (tl.lval != p->p_stats->p_start.tv_usec)
1285 			ret = 1;
1286 	}
1287 	PROC_UNLOCK(p);
1288 	return (ret);
1289 }
1290 
1291 /*
1292  * - nfs pseudo system call for the client
1293  */
1294 /*
1295  * MPSAFE
1296  */
1297 static int
1298 nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap)
1299 {
1300 	struct file *fp;
1301 	struct nfscbd_args nfscbdarg;
1302 	struct nfsd_nfscbd_args nfscbdarg2;
1303 	struct nameidata nd;
1304 	struct nfscl_dumpmntopts dumpmntopts;
1305 	cap_rights_t rights;
1306 	char *buf;
1307 	int error;
1308 
1309 	if (uap->flag & NFSSVC_CBADDSOCK) {
1310 		error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg));
1311 		if (error)
1312 			return (error);
1313 		/*
1314 		 * Since we don't know what rights might be required,
1315 		 * pretend that we need them all. It is better to be too
1316 		 * careful than too reckless.
1317 		 */
1318 		error = fget(td, nfscbdarg.sock,
1319 		    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1320 		if (error)
1321 			return (error);
1322 		if (fp->f_type != DTYPE_SOCKET) {
1323 			fdrop(fp, td);
1324 			return (EPERM);
1325 		}
1326 		error = nfscbd_addsock(fp);
1327 		fdrop(fp, td);
1328 		if (!error && nfscl_enablecallb == 0) {
1329 			nfsv4_cbport = nfscbdarg.port;
1330 			nfscl_enablecallb = 1;
1331 		}
1332 	} else if (uap->flag & NFSSVC_NFSCBD) {
1333 		if (uap->argp == NULL)
1334 			return (EINVAL);
1335 		error = copyin(uap->argp, (caddr_t)&nfscbdarg2,
1336 		    sizeof(nfscbdarg2));
1337 		if (error)
1338 			return (error);
1339 		error = nfscbd_nfsd(td, &nfscbdarg2);
1340 	} else if (uap->flag & NFSSVC_DUMPMNTOPTS) {
1341 		error = copyin(uap->argp, &dumpmntopts, sizeof(dumpmntopts));
1342 		if (error == 0 && (dumpmntopts.ndmnt_blen < 256 ||
1343 		    dumpmntopts.ndmnt_blen > 1024))
1344 			error = EINVAL;
1345 		if (error == 0)
1346 			error = nfsrv_lookupfilename(&nd,
1347 			    dumpmntopts.ndmnt_fname, td);
1348 		if (error == 0 && strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name,
1349 		    "nfs") != 0) {
1350 			vput(nd.ni_vp);
1351 			error = EINVAL;
1352 		}
1353 		if (error == 0) {
1354 			buf = malloc(dumpmntopts.ndmnt_blen, M_TEMP, M_WAITOK);
1355 			nfscl_retopts(VFSTONFS(nd.ni_vp->v_mount), buf,
1356 			    dumpmntopts.ndmnt_blen);
1357 			vput(nd.ni_vp);
1358 			error = copyout(buf, dumpmntopts.ndmnt_buf,
1359 			    dumpmntopts.ndmnt_blen);
1360 			free(buf, M_TEMP);
1361 		}
1362 	} else {
1363 		error = EINVAL;
1364 	}
1365 	return (error);
1366 }
1367 
1368 extern int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *);
1369 
1370 /*
1371  * Called once to initialize data structures...
1372  */
1373 static int
1374 nfscl_modevent(module_t mod, int type, void *data)
1375 {
1376 	int error = 0;
1377 	static int loaded = 0;
1378 
1379 	switch (type) {
1380 	case MOD_LOAD:
1381 		if (loaded)
1382 			return (0);
1383 		newnfs_portinit();
1384 		mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL,
1385 		    MTX_DEF);
1386 		mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF);
1387 		nfscl_init();
1388 		NFSD_LOCK();
1389 		nfsrvd_cbinit(0);
1390 		NFSD_UNLOCK();
1391 		ncl_call_invalcaches = ncl_invalcaches;
1392 		nfsd_call_nfscl = nfssvc_nfscl;
1393 		loaded = 1;
1394 		break;
1395 
1396 	case MOD_UNLOAD:
1397 		if (nfs_numnfscbd != 0) {
1398 			error = EBUSY;
1399 			break;
1400 		}
1401 
1402 		/*
1403 		 * XXX: Unloading of nfscl module is unsupported.
1404 		 */
1405 #if 0
1406 		ncl_call_invalcaches = NULL;
1407 		nfsd_call_nfscl = NULL;
1408 		/* and get rid of the mutexes */
1409 		mtx_destroy(&nfs_clstate_mutex);
1410 		mtx_destroy(&ncl_iod_mutex);
1411 		loaded = 0;
1412 		break;
1413 #else
1414 		/* FALLTHROUGH */
1415 #endif
1416 	default:
1417 		error = EOPNOTSUPP;
1418 		break;
1419 	}
1420 	return error;
1421 }
1422 static moduledata_t nfscl_mod = {
1423 	"nfscl",
1424 	nfscl_modevent,
1425 	NULL,
1426 };
1427 DECLARE_MODULE(nfscl, nfscl_mod, SI_SUB_VFS, SI_ORDER_FIRST);
1428 
1429 /* So that loader and kldload(2) can find us, wherever we are.. */
1430 MODULE_VERSION(nfscl, 1);
1431 MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1);
1432 MODULE_DEPEND(nfscl, krpc, 1, 1, 1);
1433 MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1);
1434 MODULE_DEPEND(nfscl, nfslock, 1, 1, 1);
1435 
1436