xref: /freebsd/sys/fs/nfsclient/nfs_clrpcops.c (revision edf8578117e8844e02c0121147f45e4609b30680)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 /*
38  * Rpc op calls, generally called from the vnode op calls or through the
39  * buffer cache, for NFS v2, 3 and 4.
40  * These do not normally make any changes to vnode arguments or use
41  * structures that might change between the VFS variants. The returned
42  * arguments are all at the end, after the NFSPROC_T *p one.
43  */
44 
45 #include "opt_inet6.h"
46 
47 #include <fs/nfs/nfsport.h>
48 #include <fs/nfsclient/nfs.h>
49 #include <sys/extattr.h>
50 #include <sys/sysctl.h>
51 #include <sys/taskqueue.h>
52 
53 SYSCTL_DECL(_vfs_nfs);
54 
55 static int	nfsignore_eexist = 0;
56 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
57     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
58 
59 static int	nfscl_dssameconn = 0;
60 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
61     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
62 
63 static uint64_t nfs_maxcopyrange = SSIZE_MAX;
64 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW,
65     &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable");
66 
67 /*
68  * Global variables
69  */
70 extern struct nfsstatsv1 nfsstatsv1;
71 extern int nfs_numnfscbd;
72 extern struct timeval nfsboottime;
73 extern u_int32_t newnfs_false, newnfs_true;
74 extern nfstype nfsv34_type[9];
75 extern int nfsrv_useacl;
76 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
77 extern int nfscl_debuglevel;
78 extern int nfs_pnfsiothreads;
79 extern u_long sb_max_adj;
80 NFSCLSTATEMUTEX;
81 int nfstest_outofseq = 0;
82 int nfscl_assumeposixlocks = 1;
83 int nfscl_enablecallb = 0;
84 short nfsv4_cbport = NFSV4_CBPORT;
85 int nfstest_openallsetattr = 0;
86 
87 #define	DIRHDSIZ	offsetof(struct dirent, d_name)
88 
89 /*
90  * nfscl_getsameserver() can return one of three values:
91  * NFSDSP_USETHISSESSION - Use this session for the DS.
92  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
93  *     session.
94  * NFSDSP_NOTFOUND - No matching server was found.
95  */
96 enum nfsclds_state {
97 	NFSDSP_USETHISSESSION = 0,
98 	NFSDSP_SEQTHISSESSION = 1,
99 	NFSDSP_NOTFOUND = 2,
100 };
101 
102 /*
103  * Do a write RPC on a DS data file, using this structure for the arguments,
104  * so that this function can be executed by a separate kernel process.
105  */
106 struct nfsclwritedsdorpc {
107 	int			done;
108 	int			inprog;
109 	struct task		tsk;
110 	struct vnode		*vp;
111 	int			iomode;
112 	int			must_commit;
113 	nfsv4stateid_t		*stateidp;
114 	struct nfsclds		*dsp;
115 	uint64_t		off;
116 	int			len;
117 #ifdef notyet
118 	int			advise;
119 #endif
120 	struct nfsfh		*fhp;
121 	struct mbuf		*m;
122 	int			vers;
123 	int			minorvers;
124 	struct ucred		*cred;
125 	NFSPROC_T		*p;
126 	int			err;
127 };
128 
129 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
130     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
131 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
132     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *);
133 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
134     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
135     int);
136 static int nfsrpc_deallocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
137     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
138 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
139     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
140     struct nfsvattr *, struct nfsfh **, int *, int *);
141 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
142     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
143     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
144     int *, int *);
145 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
146     struct nfscllockowner *, u_int64_t, u_int64_t,
147     u_int32_t, struct ucred *, NFSPROC_T *, int);
148 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
149     struct acl *, nfsv4stateid_t *);
150 static int nfsrpc_layouterror(struct nfsmount *, uint8_t *, int, uint64_t,
151     uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, uint32_t,
152     uint32_t, char *);
153 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
154     uint32_t, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
155     struct ucred *, NFSPROC_T *);
156 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
157     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
158     NFSPROC_T *);
159 static void nfscl_initsessionslots(struct nfsclsession *);
160 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
161     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
162     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
163     NFSPROC_T *);
164 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
165     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
166     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
167     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
168 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
169     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
170     struct ucred *, NFSPROC_T *);
171 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
172     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
173     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
174 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
175     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
176     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
177 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
178     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
179     struct ucred *, NFSPROC_T *);
180 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
181     struct nfsclds *, struct nfsclds **, uint32_t *);
182 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
183     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
184     NFSPROC_T *);
185 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
186     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
187 #ifdef notyet
188 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
189     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
190     NFSPROC_T *);
191 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
192     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
193 #endif
194 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
195     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
196 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
197     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
198 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
199     NFSPROC_T *);
200 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
201     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
202 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
203     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
204     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
205 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
206     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
207     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
208     struct nfsfh **, int *, int *, int *);
209 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
210     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
211     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
212     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
213 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
214     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
215     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
216     struct nfsfh **, int *, int *, int *, nfsv4stateid_t *,
217     int, int, int, int *, struct nfsclflayouthead *, int *);
218 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
219     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
220     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *);
221 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
222     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
223     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
224 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
225     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
226     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
227 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
228     int, struct nfsvattr *, int *, struct ucred *);
229 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
230 static void nfscl_statfs(struct vnode *, struct ucred *, NFSPROC_T *);
231 
232 int nfs_pnfsio(task_fn_t *, void *);
233 
234 /*
235  * nfs null call from vfs.
236  */
237 int
238 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
239 {
240 	int error;
241 	struct nfsrv_descript nfsd, *nd = &nfsd;
242 
243 	NFSCL_REQSTART(nd, NFSPROC_NULL, vp, NULL);
244 	error = nfscl_request(nd, vp, p, cred);
245 	if (nd->nd_repstat && !error)
246 		error = nd->nd_repstat;
247 	m_freem(nd->nd_mrep);
248 	return (error);
249 }
250 
251 /*
252  * nfs access rpc op.
253  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
254  * modes are changed on the server, accesses might still fail later.
255  */
256 int
257 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
258     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
259 {
260 	int error;
261 	u_int32_t mode, rmode;
262 
263 	if (acmode & VREAD)
264 		mode = NFSACCESS_READ;
265 	else
266 		mode = 0;
267 	if (vp->v_type == VDIR) {
268 		if (acmode & VWRITE)
269 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
270 				 NFSACCESS_DELETE);
271 		if (acmode & VEXEC)
272 			mode |= NFSACCESS_LOOKUP;
273 	} else {
274 		if (acmode & VWRITE)
275 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
276 		if (acmode & VEXEC)
277 			mode |= NFSACCESS_EXECUTE;
278 	}
279 
280 	/*
281 	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
282 	 */
283 	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode);
284 
285 	/*
286 	 * The NFS V3 spec does not clarify whether or not
287 	 * the returned access bits can be a superset of
288 	 * the ones requested, so...
289 	 */
290 	if (!error && (rmode & mode) != mode)
291 		error = EACCES;
292 	return (error);
293 }
294 
295 /*
296  * The actual rpc, separated out for Darwin.
297  */
298 int
299 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
300     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep)
301 {
302 	u_int32_t *tl;
303 	u_int32_t supported, rmode;
304 	int error;
305 	struct nfsrv_descript nfsd, *nd = &nfsd;
306 	nfsattrbit_t attrbits;
307 	struct nfsmount *nmp;
308 	struct nfsnode *np;
309 
310 	*attrflagp = 0;
311 	supported = mode;
312 	nmp = VFSTONFS(vp->v_mount);
313 	np = VTONFS(vp);
314 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
315 	    nmp->nm_fhsize == 0) {
316 		/* Attempt to get the actual root file handle. */
317 		error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
318 		if (error != 0)
319 			return (EACCES);
320 		if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
321 			nfscl_statfs(vp, cred, p);
322 	}
323 	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp, cred);
324 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
325 	*tl = txdr_unsigned(mode);
326 	if (nd->nd_flag & ND_NFSV4) {
327 		/*
328 		 * And do a Getattr op.
329 		 */
330 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
331 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
332 		NFSGETATTR_ATTRBIT(&attrbits);
333 		(void) nfsrv_putattrbit(nd, &attrbits);
334 	}
335 	error = nfscl_request(nd, vp, p, cred);
336 	if (error)
337 		return (error);
338 	if (nd->nd_flag & ND_NFSV3) {
339 		error = nfscl_postop_attr(nd, nap, attrflagp);
340 		if (error)
341 			goto nfsmout;
342 	}
343 	if (!nd->nd_repstat) {
344 		if (nd->nd_flag & ND_NFSV4) {
345 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
346 			supported = fxdr_unsigned(u_int32_t, *tl++);
347 		} else {
348 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
349 		}
350 		rmode = fxdr_unsigned(u_int32_t, *tl);
351 		if (nd->nd_flag & ND_NFSV4)
352 			error = nfscl_postop_attr(nd, nap, attrflagp);
353 
354 		/*
355 		 * It's not obvious what should be done about
356 		 * unsupported access modes. For now, be paranoid
357 		 * and clear the unsupported ones.
358 		 */
359 		rmode &= supported;
360 		*rmodep = rmode;
361 	} else
362 		error = nd->nd_repstat;
363 nfsmout:
364 	m_freem(nd->nd_mrep);
365 	return (error);
366 }
367 
368 /*
369  * nfs open rpc
370  */
371 int
372 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
373 {
374 	struct nfsclopen *op;
375 	struct nfscldeleg *dp;
376 	struct nfsfh *nfhp;
377 	struct nfsnode *np = VTONFS(vp);
378 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
379 	u_int32_t mode, clidrev;
380 	int ret, newone, error, expireret = 0, retrycnt;
381 
382 	/*
383 	 * For NFSv4, Open Ops are only done on Regular Files.
384 	 */
385 	if (vp->v_type != VREG)
386 		return (0);
387 	mode = 0;
388 	if (amode & FREAD)
389 		mode |= NFSV4OPEN_ACCESSREAD;
390 	if (amode & FWRITE)
391 		mode |= NFSV4OPEN_ACCESSWRITE;
392 	nfhp = np->n_fhp;
393 
394 	retrycnt = 0;
395 #ifdef notdef
396 { char name[100]; int namel;
397 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
398 bcopy(NFS4NODENAME(np->n_v4), name, namel);
399 name[namel] = '\0';
400 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
401 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
402 else printf(" fhl=0\n");
403 }
404 #endif
405 	do {
406 	    dp = NULL;
407 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
408 		cred, p, NULL, &op, &newone, &ret, 1, true);
409 	    if (error) {
410 		return (error);
411 	    }
412 	    if (nmp->nm_clp != NULL)
413 		clidrev = nmp->nm_clp->nfsc_clientidrev;
414 	    else
415 		clidrev = 0;
416 	    if (ret == NFSCLOPEN_DOOPEN) {
417 		if (np->n_v4 != NULL) {
418 			/*
419 			 * For the first attempt, try and get a layout, if
420 			 * pNFS is enabled for the mount.
421 			 */
422 			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
423 			    nfs_numnfscbd == 0 ||
424 			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
425 				error = nfsrpc_openrpc(nmp, vp,
426 				    np->n_v4->n4_data,
427 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
428 				    np->n_fhp->nfh_len, mode, op,
429 				    NFS4NODENAME(np->n_v4),
430 				    np->n_v4->n4_namelen,
431 				    &dp, 0, 0x0, cred, p, 0, 0);
432 			else
433 				error = nfsrpc_getopenlayout(nmp, vp,
434 				    np->n_v4->n4_data,
435 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
436 				    np->n_fhp->nfh_len, mode, op,
437 				    NFS4NODENAME(np->n_v4),
438 				    np->n_v4->n4_namelen, &dp, cred, p);
439 			if (dp != NULL) {
440 				NFSLOCKNODE(np);
441 				np->n_flag &= ~NDELEGMOD;
442 				/*
443 				 * Invalidate the attribute cache, so that
444 				 * attributes that pre-date the issue of a
445 				 * delegation are not cached, since the
446 				 * cached attributes will remain valid while
447 				 * the delegation is held.
448 				 */
449 				NFSINVALATTRCACHE(np);
450 				NFSUNLOCKNODE(np);
451 				(void) nfscl_deleg(nmp->nm_mountp,
452 				    op->nfso_own->nfsow_clp,
453 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
454 			}
455 		} else {
456 			error = EIO;
457 		}
458 		newnfs_copyincred(cred, &op->nfso_cred);
459 	    } else if (ret == NFSCLOPEN_SETCRED)
460 		/*
461 		 * This is a new local open on a delegation. It needs
462 		 * to have credentials so that an open can be done
463 		 * against the server during recovery.
464 		 */
465 		newnfs_copyincred(cred, &op->nfso_cred);
466 
467 	    /*
468 	     * nfso_opencnt is the count of how many VOP_OPEN()s have
469 	     * been done on this Open successfully and a VOP_CLOSE()
470 	     * is expected for each of these.
471 	     * If error is non-zero, don't increment it, since the Open
472 	     * hasn't succeeded yet.
473 	     */
474 	    if (!error) {
475 		op->nfso_opencnt++;
476 		if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
477 		    NFSLOCKNODE(np);
478 		    np->n_openstateid = op;
479 		    NFSUNLOCKNODE(np);
480 		}
481 	    }
482 	    nfscl_openrelease(nmp, op, error, newone);
483 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
484 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
485 		error == NFSERR_BADSESSION) {
486 		(void) nfs_catnap(PZERO, error, "nfs_open");
487 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
488 		&& clidrev != 0) {
489 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
490 		retrycnt++;
491 	    }
492 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
493 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
494 	    error == NFSERR_BADSESSION ||
495 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
496 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
497 	if (error && retrycnt >= 4)
498 		error = EIO;
499 	return (error);
500 }
501 
502 /*
503  * the actual open rpc
504  */
505 int
506 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
507     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
508     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
509     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
510     int syscred, int recursed)
511 {
512 	u_int32_t *tl;
513 	struct nfsrv_descript nfsd, *nd = &nfsd;
514 	struct nfscldeleg *dp, *ndp = NULL;
515 	struct nfsvattr nfsva;
516 	u_int32_t rflags, deleg;
517 	nfsattrbit_t attrbits;
518 	int error, ret, acesize, limitby;
519 	struct nfsclsession *tsep;
520 
521 	dp = *dpp;
522 	*dpp = NULL;
523 	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
524 	    cred);
525 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
526 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
527 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
528 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
529 	tsep = nfsmnt_mdssession(nmp);
530 	*tl++ = tsep->nfsess_clientid.lval[0];
531 	*tl = tsep->nfsess_clientid.lval[1];
532 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
533 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
534 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
535 	if (reclaim) {
536 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
537 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
538 		*tl = txdr_unsigned(delegtype);
539 	} else {
540 		if (dp != NULL) {
541 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
542 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
543 			if (NFSHASNFSV4N(nmp))
544 				*tl++ = 0;
545 			else
546 				*tl++ = dp->nfsdl_stateid.seqid;
547 			*tl++ = dp->nfsdl_stateid.other[0];
548 			*tl++ = dp->nfsdl_stateid.other[1];
549 			*tl = dp->nfsdl_stateid.other[2];
550 		} else {
551 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
552 		}
553 		(void) nfsm_strtom(nd, name, namelen);
554 	}
555 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
556 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
557 	NFSZERO_ATTRBIT(&attrbits);
558 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
559 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
560 	(void) nfsrv_putattrbit(nd, &attrbits);
561 	if (syscred)
562 		nd->nd_flag |= ND_USEGSSNAME;
563 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
564 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
565 	if (error)
566 		return (error);
567 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
568 	if (!nd->nd_repstat) {
569 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
570 		    6 * NFSX_UNSIGNED);
571 		op->nfso_stateid.seqid = *tl++;
572 		op->nfso_stateid.other[0] = *tl++;
573 		op->nfso_stateid.other[1] = *tl++;
574 		op->nfso_stateid.other[2] = *tl;
575 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
576 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
577 		if (error)
578 			goto nfsmout;
579 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
580 		deleg = fxdr_unsigned(u_int32_t, *tl);
581 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
582 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
583 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
584 			      NFSCLFLAGS_FIRSTDELEG))
585 				op->nfso_own->nfsow_clp->nfsc_flags |=
586 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
587 			ndp = malloc(
588 			    sizeof (struct nfscldeleg) + newfhlen,
589 			    M_NFSCLDELEG, M_WAITOK);
590 			LIST_INIT(&ndp->nfsdl_owner);
591 			LIST_INIT(&ndp->nfsdl_lock);
592 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
593 			ndp->nfsdl_fhlen = newfhlen;
594 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
595 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
596 			nfscl_lockinit(&ndp->nfsdl_rwlock);
597 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
598 			    NFSX_UNSIGNED);
599 			ndp->nfsdl_stateid.seqid = *tl++;
600 			ndp->nfsdl_stateid.other[0] = *tl++;
601 			ndp->nfsdl_stateid.other[1] = *tl++;
602 			ndp->nfsdl_stateid.other[2] = *tl++;
603 			ret = fxdr_unsigned(int, *tl);
604 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
605 				ndp->nfsdl_flags = NFSCLDL_WRITE;
606 				/*
607 				 * Indicates how much the file can grow.
608 				 */
609 				NFSM_DISSECT(tl, u_int32_t *,
610 				    3 * NFSX_UNSIGNED);
611 				limitby = fxdr_unsigned(int, *tl++);
612 				switch (limitby) {
613 				case NFSV4OPEN_LIMITSIZE:
614 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
615 					break;
616 				case NFSV4OPEN_LIMITBLOCKS:
617 					ndp->nfsdl_sizelimit =
618 					    fxdr_unsigned(u_int64_t, *tl++);
619 					ndp->nfsdl_sizelimit *=
620 					    fxdr_unsigned(u_int64_t, *tl);
621 					break;
622 				default:
623 					error = NFSERR_BADXDR;
624 					goto nfsmout;
625 				}
626 			} else {
627 				ndp->nfsdl_flags = NFSCLDL_READ;
628 			}
629 			if (ret)
630 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
631 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
632 			    &ret, &acesize, p);
633 			if (error)
634 				goto nfsmout;
635 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
636 			error = NFSERR_BADXDR;
637 			goto nfsmout;
638 		}
639 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
640 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
641 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
642 		    NULL, NULL, NULL, p, cred);
643 		if (error)
644 			goto nfsmout;
645 		if (ndp != NULL) {
646 			ndp->nfsdl_change = nfsva.na_filerev;
647 			ndp->nfsdl_modtime = nfsva.na_mtime;
648 			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
649 		}
650 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
651 		    do {
652 			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
653 			    cred, p);
654 			if (ret == NFSERR_DELAY)
655 			    (void) nfs_catnap(PZERO, ret, "nfs_open");
656 		    } while (ret == NFSERR_DELAY);
657 		    error = ret;
658 		}
659 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
660 		    nfscl_assumeposixlocks)
661 		    op->nfso_posixlock = 1;
662 		else
663 		    op->nfso_posixlock = 0;
664 
665 		/*
666 		 * If the server is handing out delegations, but we didn't
667 		 * get one because an OpenConfirm was required, try the
668 		 * Open again, to get a delegation. This is a harmless no-op,
669 		 * from a server's point of view.
670 		 */
671 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
672 		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
673 		    && !error && dp == NULL && ndp == NULL && !recursed) {
674 		    do {
675 			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
676 			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
677 			    cred, p, syscred, 1);
678 			if (ret == NFSERR_DELAY)
679 			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
680 		    } while (ret == NFSERR_DELAY);
681 		    if (ret) {
682 			if (ndp != NULL) {
683 				free(ndp, M_NFSCLDELEG);
684 				ndp = NULL;
685 			}
686 			if (ret == NFSERR_STALECLIENTID ||
687 			    ret == NFSERR_STALEDONTRECOVER ||
688 			    ret == NFSERR_BADSESSION)
689 				error = ret;
690 		    }
691 		}
692 	}
693 	if (nd->nd_repstat != 0 && error == 0)
694 		error = nd->nd_repstat;
695 	if (error == NFSERR_STALECLIENTID)
696 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
697 nfsmout:
698 	if (!error)
699 		*dpp = ndp;
700 	else if (ndp != NULL)
701 		free(ndp, M_NFSCLDELEG);
702 	m_freem(nd->nd_mrep);
703 	return (error);
704 }
705 
706 /*
707  * open downgrade rpc
708  */
709 int
710 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
711     struct ucred *cred, NFSPROC_T *p)
712 {
713 	u_int32_t *tl;
714 	struct nfsrv_descript nfsd, *nd = &nfsd;
715 	int error;
716 
717 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp, cred);
718 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
719 	if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
720 		*tl++ = 0;
721 	else
722 		*tl++ = op->nfso_stateid.seqid;
723 	*tl++ = op->nfso_stateid.other[0];
724 	*tl++ = op->nfso_stateid.other[1];
725 	*tl++ = op->nfso_stateid.other[2];
726 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
727 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
728 	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
729 	error = nfscl_request(nd, vp, p, cred);
730 	if (error)
731 		return (error);
732 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
733 	if (!nd->nd_repstat) {
734 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
735 		op->nfso_stateid.seqid = *tl++;
736 		op->nfso_stateid.other[0] = *tl++;
737 		op->nfso_stateid.other[1] = *tl++;
738 		op->nfso_stateid.other[2] = *tl;
739 	}
740 	if (nd->nd_repstat && error == 0)
741 		error = nd->nd_repstat;
742 	if (error == NFSERR_STALESTATEID)
743 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
744 nfsmout:
745 	m_freem(nd->nd_mrep);
746 	return (error);
747 }
748 
749 /*
750  * V4 Close operation.
751  */
752 int
753 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
754 {
755 	struct nfsclclient *clp;
756 	int error;
757 
758 	if (vp->v_type != VREG)
759 		return (0);
760 	if (doclose)
761 		error = nfscl_doclose(vp, &clp, p);
762 	else {
763 		error = nfscl_getclose(vp, &clp);
764 		if (error == 0)
765 			nfscl_clientrelease(clp);
766 	}
767 	return (error);
768 }
769 
770 /*
771  * Close the open.
772  */
773 int
774 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p,
775     bool loop_on_delayed, bool freeop)
776 {
777 	struct nfsrv_descript nfsd, *nd = &nfsd;
778 	struct nfscllockowner *lp, *nlp;
779 	struct nfscllock *lop, *nlop;
780 	struct ucred *tcred;
781 	u_int64_t off = 0, len = 0;
782 	u_int32_t type = NFSV4LOCKT_READ;
783 	int error, do_unlock, trycnt;
784 
785 	tcred = newnfs_getcred();
786 	newnfs_copycred(&op->nfso_cred, tcred);
787 	/*
788 	 * (Theoretically this could be done in the same
789 	 *  compound as the close, but having multiple
790 	 *  sequenced Ops in the same compound might be
791 	 *  too scary for some servers.)
792 	 */
793 	if (op->nfso_posixlock) {
794 		off = 0;
795 		len = NFS64BITSSET;
796 		type = NFSV4LOCKT_READ;
797 	}
798 
799 	/*
800 	 * Since this function is only called from VOP_INACTIVE(), no
801 	 * other thread will be manipulating this Open. As such, the
802 	 * lock lists are not being changed by other threads, so it should
803 	 * be safe to do this without locking.
804 	 */
805 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
806 		do_unlock = 1;
807 		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
808 			if (op->nfso_posixlock == 0) {
809 				off = lop->nfslo_first;
810 				len = lop->nfslo_end - lop->nfslo_first;
811 				if (lop->nfslo_type == F_WRLCK)
812 					type = NFSV4LOCKT_WRITE;
813 				else
814 					type = NFSV4LOCKT_READ;
815 			}
816 			if (do_unlock) {
817 				trycnt = 0;
818 				do {
819 					error = nfsrpc_locku(nd, nmp, lp, off,
820 					    len, type, tcred, p, 0);
821 					if ((nd->nd_repstat == NFSERR_GRACE ||
822 					    nd->nd_repstat == NFSERR_DELAY) &&
823 					    error == 0)
824 						(void) nfs_catnap(PZERO,
825 						    (int)nd->nd_repstat,
826 						    "nfs_close");
827 				} while ((nd->nd_repstat == NFSERR_GRACE ||
828 				    nd->nd_repstat == NFSERR_DELAY) &&
829 				    error == 0 && trycnt++ < 5);
830 				if (op->nfso_posixlock)
831 					do_unlock = 0;
832 			}
833 			nfscl_freelock(lop, 0);
834 		}
835 		/*
836 		 * Do a ReleaseLockOwner.
837 		 * The lock owner name nfsl_owner may be used by other opens for
838 		 * other files but the lock_owner4 name that nfsrpc_rellockown()
839 		 * puts on the wire has the file handle for this file appended
840 		 * to it, so it can be done now.
841 		 */
842 		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
843 		    lp->nfsl_open->nfso_fhlen, tcred, p);
844 	}
845 
846 	/*
847 	 * There could be other Opens for different files on the same
848 	 * OpenOwner, so locking is required.
849 	 */
850 	NFSLOCKCLSTATE();
851 	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
852 	NFSUNLOCKCLSTATE();
853 	do {
854 		error = nfscl_tryclose(op, tcred, nmp, p, loop_on_delayed);
855 		if (error == NFSERR_GRACE)
856 			(void) nfs_catnap(PZERO, error, "nfs_close");
857 	} while (error == NFSERR_GRACE);
858 	NFSLOCKCLSTATE();
859 	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
860 
861 	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
862 		nfscl_freelockowner(lp, 0);
863 	if (freeop && error != NFSERR_DELAY)
864 		nfscl_freeopen(op, 0, true);
865 	NFSUNLOCKCLSTATE();
866 	NFSFREECRED(tcred);
867 	return (error);
868 }
869 
870 /*
871  * The actual Close RPC.
872  */
873 int
874 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
875     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
876     int syscred)
877 {
878 	u_int32_t *tl;
879 	int error;
880 
881 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
882 	    op->nfso_fhlen, NULL, NULL, 0, 0, cred);
883 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
884 	if (NFSHASNFSV4N(nmp)) {
885 		*tl++ = 0;
886 		*tl++ = 0;
887 	} else {
888 		*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
889 		*tl++ = op->nfso_stateid.seqid;
890 	}
891 	*tl++ = op->nfso_stateid.other[0];
892 	*tl++ = op->nfso_stateid.other[1];
893 	*tl = op->nfso_stateid.other[2];
894 	if (syscred)
895 		nd->nd_flag |= ND_USEGSSNAME;
896 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
897 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
898 	if (error)
899 		return (error);
900 	if (!NFSHASNFSV4N(nmp))
901 		NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
902 	if (nd->nd_repstat == 0)
903 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
904 	error = nd->nd_repstat;
905 	if (!NFSHASNFSV4N(nmp) && error == NFSERR_STALESTATEID)
906 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
907 nfsmout:
908 	m_freem(nd->nd_mrep);
909 	return (error);
910 }
911 
912 /*
913  * V4 Open Confirm RPC.
914  */
915 int
916 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
917     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
918 {
919 	u_int32_t *tl;
920 	struct nfsrv_descript nfsd, *nd = &nfsd;
921 	struct nfsmount *nmp;
922 	int error;
923 
924 	nmp = VFSTONFS(vp->v_mount);
925 	if (NFSHASNFSV4N(nmp))
926 		return (0);		/* No confirmation for NFSv4.1. */
927 	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
928 	    0, 0, NULL);
929 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
930 	*tl++ = op->nfso_stateid.seqid;
931 	*tl++ = op->nfso_stateid.other[0];
932 	*tl++ = op->nfso_stateid.other[1];
933 	*tl++ = op->nfso_stateid.other[2];
934 	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
935 	error = nfscl_request(nd, vp, p, cred);
936 	if (error)
937 		return (error);
938 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
939 	if (!nd->nd_repstat) {
940 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
941 		op->nfso_stateid.seqid = *tl++;
942 		op->nfso_stateid.other[0] = *tl++;
943 		op->nfso_stateid.other[1] = *tl++;
944 		op->nfso_stateid.other[2] = *tl;
945 	}
946 	error = nd->nd_repstat;
947 	if (error == NFSERR_STALESTATEID)
948 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
949 nfsmout:
950 	m_freem(nd->nd_mrep);
951 	return (error);
952 }
953 
954 /*
955  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
956  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
957  */
958 int
959 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
960     bool *retokp, struct ucred *cred, NFSPROC_T *p)
961 {
962 	u_int32_t *tl;
963 	struct nfsrv_descript nfsd;
964 	struct nfsrv_descript *nd = &nfsd;
965 	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
966 	u_short port;
967 	int error, isinet6 = 0, callblen;
968 	nfsquad_t confirm;
969 	static u_int32_t rev = 0;
970 	struct nfsclds *dsp, *odsp;
971 	struct in6_addr a6;
972 	struct nfsclsession *tsep;
973 	struct rpc_reconupcall recon;
974 	struct nfscl_reconarg *rcp;
975 
976 	if (nfsboottime.tv_sec == 0)
977 		NFSSETBOOTTIME(nfsboottime);
978 	if (NFSHASNFSV4N(nmp)) {
979 		error = NFSERR_BADSESSION;
980 		odsp = dsp = NULL;
981 		if (retokp != NULL) {
982 			NFSLOCKMNT(nmp);
983 			odsp = TAILQ_FIRST(&nmp->nm_sess);
984 			NFSUNLOCKMNT(nmp);
985 		}
986 		if (odsp != NULL) {
987 			/*
988 			 * When a session already exists, first try a
989 			 * CreateSession with the extant ClientID.
990 			 */
991 			dsp = malloc(sizeof(struct nfsclds) +
992 			    odsp->nfsclds_servownlen + 1, M_NFSCLDS,
993 			    M_WAITOK | M_ZERO);
994 			dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
995 			dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
996 			dsp->nfsclds_sess.nfsess_clientid =
997 			    odsp->nfsclds_sess.nfsess_clientid;
998 			dsp->nfsclds_sess.nfsess_sequenceid =
999 			    odsp->nfsclds_sess.nfsess_sequenceid + 1;
1000 			dsp->nfsclds_flags = odsp->nfsclds_flags;
1001 			if (dsp->nfsclds_servownlen > 0)
1002 				memcpy(dsp->nfsclds_serverown,
1003 				    odsp->nfsclds_serverown,
1004 				    dsp->nfsclds_servownlen + 1);
1005 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1006 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
1007 			    NULL, MTX_DEF);
1008 			nfscl_initsessionslots(&dsp->nfsclds_sess);
1009 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
1010 			    &nmp->nm_sockreq, NULL,
1011 			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
1012 			NFSCL_DEBUG(1, "create session for extant "
1013 			    "ClientID=%d\n", error);
1014 			if (error != 0) {
1015 				nfscl_freenfsclds(dsp);
1016 				dsp = NULL;
1017 				/*
1018 				 * If *retokp is true, return any error other
1019 				 * than NFSERR_STALECLIENTID,
1020 				 * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
1021 				 * so that nfscl_recover() will not loop.
1022 				 */
1023 				if (*retokp)
1024 					return (NFSERR_IO);
1025 			} else
1026 				*retokp = true;
1027 		} else if (retokp != NULL && *retokp)
1028 			return (NFSERR_IO);
1029 		if (error != 0) {
1030 			/*
1031 			 * Either there was no previous session or the
1032 			 * CreateSession attempt failed, so...
1033 			 * do an ExchangeID followed by the CreateSession.
1034 			 */
1035 			clp->nfsc_rev = rev++;
1036 			error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1037 			    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1038 			    cred, p);
1039 			NFSCL_DEBUG(1, "aft exch=%d\n", error);
1040 			if (error == 0)
1041 				error = nfsrpc_createsession(nmp,
1042 				    &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1043 				    dsp->nfsclds_sess.nfsess_sequenceid, 1,
1044 				    cred, p);
1045 			NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1046 		}
1047 		if (error == 0) {
1048 			/*
1049 			 * If the session supports a backchannel, set up
1050 			 * the BindConnectionToSession call in the krpc
1051 			 * so that it is done on a reconnection.
1052 			 */
1053 			if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) {
1054 				rcp = mem_alloc(sizeof(*rcp));
1055 				rcp->minorvers = nmp->nm_minorvers;
1056 				memcpy(rcp->sessionid,
1057 				    dsp->nfsclds_sess.nfsess_sessionid,
1058 				    NFSX_V4SESSIONID);
1059 				recon.call = nfsrpc_bindconnsess;
1060 				recon.arg = rcp;
1061 				CLNT_CONTROL(nmp->nm_client, CLSET_RECONUPCALL,
1062 				    &recon);
1063 			}
1064 
1065 			NFSLOCKMNT(nmp);
1066 			/*
1067 			 * The old sessions cannot be safely free'd
1068 			 * here, since they may still be used by
1069 			 * in-progress RPCs.
1070 			 */
1071 			tsep = NULL;
1072 			if (TAILQ_FIRST(&nmp->nm_sess) != NULL) {
1073 				/*
1074 				 * Mark the old session defunct.  Needed
1075 				 * when called from nfscl_hasexpired().
1076 				 */
1077 				tsep = NFSMNT_MDSSESSION(nmp);
1078 				tsep->nfsess_defunct = 1;
1079 			}
1080 			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1081 			    nfsclds_list);
1082 			/*
1083 			 * Wake up RPCs waiting for a slot on the
1084 			 * old session. These will then fail with
1085 			 * NFSERR_BADSESSION and be retried with the
1086 			 * new session by nfsv4_setsequence().
1087 			 * Also wakeup() processes waiting for the
1088 			 * new session.
1089 			 */
1090 			if (tsep != NULL)
1091 				wakeup(&tsep->nfsess_slots);
1092 			wakeup(&nmp->nm_sess);
1093 			NFSUNLOCKMNT(nmp);
1094 		} else if (dsp != NULL)
1095 			nfscl_freenfsclds(dsp);
1096 		if (error == 0 && reclaim == 0) {
1097 			error = nfsrpc_reclaimcomplete(nmp, cred, p);
1098 			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1099 			if (error == NFSERR_COMPLETEALREADY ||
1100 			    error == NFSERR_NOTSUPP)
1101 				/* Ignore this error. */
1102 				error = 0;
1103 		}
1104 		return (error);
1105 	} else if (retokp != NULL && *retokp)
1106 		return (NFSERR_IO);
1107 	clp->nfsc_rev = rev++;
1108 
1109 	/*
1110 	 * Allocate a single session structure for NFSv4.0, because some of
1111 	 * the fields are used by NFSv4.0 although it doesn't do a session.
1112 	 */
1113 	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1114 	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1115 	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1116 	NFSLOCKMNT(nmp);
1117 	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1118 	tsep = NFSMNT_MDSSESSION(nmp);
1119 	NFSUNLOCKMNT(nmp);
1120 
1121 	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0,
1122 	    NULL);
1123 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1124 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
1125 	*tl = txdr_unsigned(clp->nfsc_rev);
1126 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1127 
1128 	/*
1129 	 * set up the callback address
1130 	 */
1131 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1132 	*tl = txdr_unsigned(NFS_CALLBCKPROG);
1133 	callblen = strlen(nfsv4_callbackaddr);
1134 	if (callblen == 0)
1135 		cp = nfscl_getmyip(nmp, &a6, &isinet6);
1136 	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1137 	    (callblen > 0 || cp != NULL)) {
1138 		port = htons(nfsv4_cbport);
1139 		cp2 = (u_int8_t *)&port;
1140 #ifdef INET6
1141 		if ((callblen > 0 &&
1142 		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1143 			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1144 
1145 			(void) nfsm_strtom(nd, "tcp6", 4);
1146 			if (callblen == 0) {
1147 				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1148 				ip6add = ip6buf;
1149 			} else {
1150 				ip6add = nfsv4_callbackaddr;
1151 			}
1152 			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1153 			    ip6add, cp2[0], cp2[1]);
1154 		} else
1155 #endif
1156 		{
1157 			(void) nfsm_strtom(nd, "tcp", 3);
1158 			if (callblen == 0)
1159 				snprintf(addr, INET6_ADDRSTRLEN + 9,
1160 				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1161 				    cp[2], cp[3], cp2[0], cp2[1]);
1162 			else
1163 				snprintf(addr, INET6_ADDRSTRLEN + 9,
1164 				    "%s.%d.%d", nfsv4_callbackaddr,
1165 				    cp2[0], cp2[1]);
1166 		}
1167 		(void) nfsm_strtom(nd, addr, strlen(addr));
1168 	} else {
1169 		(void) nfsm_strtom(nd, "tcp", 3);
1170 		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1171 	}
1172 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1173 	*tl = txdr_unsigned(clp->nfsc_cbident);
1174 	nd->nd_flag |= ND_USEGSSNAME;
1175 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1176 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1177 	if (error)
1178 		return (error);
1179 	if (nd->nd_repstat == 0) {
1180 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1181 	    tsep->nfsess_clientid.lval[0] = *tl++;
1182 	    tsep->nfsess_clientid.lval[1] = *tl++;
1183 	    confirm.lval[0] = *tl++;
1184 	    confirm.lval[1] = *tl;
1185 	    m_freem(nd->nd_mrep);
1186 	    nd->nd_mrep = NULL;
1187 
1188 	    /*
1189 	     * and confirm it.
1190 	     */
1191 	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1192 		NULL, 0, 0, NULL);
1193 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1194 	    *tl++ = tsep->nfsess_clientid.lval[0];
1195 	    *tl++ = tsep->nfsess_clientid.lval[1];
1196 	    *tl++ = confirm.lval[0];
1197 	    *tl = confirm.lval[1];
1198 	    nd->nd_flag |= ND_USEGSSNAME;
1199 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1200 		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1201 	    if (error)
1202 		return (error);
1203 	    m_freem(nd->nd_mrep);
1204 	    nd->nd_mrep = NULL;
1205 	}
1206 	error = nd->nd_repstat;
1207 nfsmout:
1208 	m_freem(nd->nd_mrep);
1209 	return (error);
1210 }
1211 
1212 /*
1213  * nfs getattr call.
1214  */
1215 int
1216 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1217     struct nfsvattr *nap)
1218 {
1219 	struct nfsrv_descript nfsd, *nd = &nfsd;
1220 	int error;
1221 	nfsattrbit_t attrbits;
1222 	struct nfsnode *np;
1223 	struct nfsmount *nmp;
1224 
1225 	nmp = VFSTONFS(vp->v_mount);
1226 	np = VTONFS(vp);
1227 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
1228 	    nmp->nm_fhsize == 0) {
1229 		/* Attempt to get the actual root file handle. */
1230 		error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
1231 		if (error != 0)
1232 			return (EACCES);
1233 		if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
1234 			nfscl_statfs(vp, cred, p);
1235 	}
1236 	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
1237 	if (nd->nd_flag & ND_NFSV4) {
1238 		NFSGETATTR_ATTRBIT(&attrbits);
1239 		(void) nfsrv_putattrbit(nd, &attrbits);
1240 	}
1241 	error = nfscl_request(nd, vp, p, cred);
1242 	if (error)
1243 		return (error);
1244 	if (!nd->nd_repstat)
1245 		error = nfsm_loadattr(nd, nap);
1246 	else
1247 		error = nd->nd_repstat;
1248 	m_freem(nd->nd_mrep);
1249 	return (error);
1250 }
1251 
1252 /*
1253  * nfs getattr call with non-vnode arguments.
1254  */
1255 int
1256 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1257     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1258     uint32_t *leasep)
1259 {
1260 	struct nfsrv_descript nfsd, *nd = &nfsd;
1261 	int error, vers = NFS_VER2;
1262 	nfsattrbit_t attrbits;
1263 
1264 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
1265 	    cred);
1266 	if (nd->nd_flag & ND_NFSV4) {
1267 		vers = NFS_VER4;
1268 		NFSGETATTR_ATTRBIT(&attrbits);
1269 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1270 		(void) nfsrv_putattrbit(nd, &attrbits);
1271 	} else if (nd->nd_flag & ND_NFSV3) {
1272 		vers = NFS_VER3;
1273 	}
1274 	if (syscred)
1275 		nd->nd_flag |= ND_USEGSSNAME;
1276 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1277 	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1278 	if (error)
1279 		return (error);
1280 	if (nd->nd_repstat == 0) {
1281 		if ((nd->nd_flag & ND_NFSV4) != 0)
1282 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1283 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1284 			    NULL, NULL);
1285 		else
1286 			error = nfsm_loadattr(nd, nap);
1287 	} else
1288 		error = nd->nd_repstat;
1289 	m_freem(nd->nd_mrep);
1290 	return (error);
1291 }
1292 
1293 /*
1294  * Do an nfs setattr operation.
1295  */
1296 int
1297 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1298     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp)
1299 {
1300 	int error, expireret = 0, openerr, retrycnt;
1301 	u_int32_t clidrev = 0, mode;
1302 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1303 	struct nfsfh *nfhp;
1304 	nfsv4stateid_t stateid;
1305 	void *lckp;
1306 
1307 	if (nmp->nm_clp != NULL)
1308 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1309 	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1310 		mode = NFSV4OPEN_ACCESSWRITE;
1311 	else
1312 		mode = NFSV4OPEN_ACCESSREAD;
1313 	retrycnt = 0;
1314 	do {
1315 		lckp = NULL;
1316 		openerr = 1;
1317 		if (NFSHASNFSV4(nmp)) {
1318 			nfhp = VTONFS(vp)->n_fhp;
1319 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1320 			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1321 			if (error && vp->v_type == VREG &&
1322 			    (mode == NFSV4OPEN_ACCESSWRITE ||
1323 			     nfstest_openallsetattr)) {
1324 				/*
1325 				 * No Open stateid, so try and open the file
1326 				 * now.
1327 				 */
1328 				if (mode == NFSV4OPEN_ACCESSWRITE)
1329 					openerr = nfsrpc_open(vp, FWRITE, cred,
1330 					    p);
1331 				else
1332 					openerr = nfsrpc_open(vp, FREAD, cred,
1333 					    p);
1334 				if (!openerr)
1335 					(void) nfscl_getstateid(vp,
1336 					    nfhp->nfh_fh, nfhp->nfh_len,
1337 					    mode, 0, cred, p, &stateid, &lckp);
1338 			}
1339 		}
1340 		if (vap != NULL)
1341 			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1342 			    rnap, attrflagp);
1343 		else
1344 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid);
1345 		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1346 			NFSLOCKMNT(nmp);
1347 			nmp->nm_state |= NFSSTA_OPENMODE;
1348 			NFSUNLOCKMNT(nmp);
1349 		}
1350 		if (error == NFSERR_STALESTATEID)
1351 			nfscl_initiate_recovery(nmp->nm_clp);
1352 		if (lckp != NULL)
1353 			nfscl_lockderef(lckp);
1354 		if (!openerr)
1355 			(void) nfsrpc_close(vp, 0, p);
1356 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1357 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1358 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1359 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1360 		} else if ((error == NFSERR_EXPIRED ||
1361 		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1362 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1363 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1364 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1365 		    NFSHASNFSV4N(nmp)) {
1366 			error = EIO;
1367 		}
1368 		retrycnt++;
1369 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1370 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1371 	    error == NFSERR_BADSESSION ||
1372 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1373 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1374 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1375 	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1376 	     retrycnt < 4));
1377 	if (error && retrycnt >= 4)
1378 		error = EIO;
1379 	return (error);
1380 }
1381 
1382 static int
1383 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1384     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1385     struct nfsvattr *rnap, int *attrflagp)
1386 {
1387 	u_int32_t *tl;
1388 	struct nfsrv_descript nfsd, *nd = &nfsd;
1389 	int error;
1390 	nfsattrbit_t attrbits;
1391 
1392 	*attrflagp = 0;
1393 	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp, cred);
1394 	if (nd->nd_flag & ND_NFSV4)
1395 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1396 	vap->va_type = vp->v_type;
1397 	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1398 	if (nd->nd_flag & ND_NFSV3) {
1399 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1400 		*tl = newnfs_false;
1401 	} else if (nd->nd_flag & ND_NFSV4) {
1402 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1403 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1404 		NFSGETATTR_ATTRBIT(&attrbits);
1405 		(void) nfsrv_putattrbit(nd, &attrbits);
1406 	}
1407 	error = nfscl_request(nd, vp, p, cred);
1408 	if (error)
1409 		return (error);
1410 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1411 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, NULL);
1412 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1413 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1414 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1415 		error = nfscl_postop_attr(nd, rnap, attrflagp);
1416 	m_freem(nd->nd_mrep);
1417 	if (nd->nd_repstat && !error)
1418 		error = nd->nd_repstat;
1419 	return (error);
1420 }
1421 
1422 /*
1423  * nfs lookup rpc
1424  */
1425 int
1426 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1427     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1428     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, uint32_t openmode)
1429 {
1430 	uint32_t deleg, rflags, *tl;
1431 	struct nfsrv_descript nfsd, *nd = &nfsd;
1432 	struct nfsmount *nmp;
1433 	struct nfsnode *np;
1434 	struct nfsfh *nfhp;
1435 	nfsattrbit_t attrbits;
1436 	int error = 0, lookupp = 0, newone, ret, retop;
1437 	uint8_t own[NFSV4CL_LOCKNAMELEN];
1438 	struct nfsclopen *op;
1439 	struct nfscldeleg *ndp;
1440 	nfsv4stateid_t stateid;
1441 
1442 	*attrflagp = 0;
1443 	*dattrflagp = 0;
1444 	if (dvp->v_type != VDIR)
1445 		return (ENOTDIR);
1446 	nmp = VFSTONFS(dvp->v_mount);
1447 	if (len > NFS_MAXNAMLEN)
1448 		return (ENAMETOOLONG);
1449 	if (NFSHASNFSV4(nmp) && len == 1 &&
1450 		name[0] == '.') {
1451 		/*
1452 		 * Just return the current dir's fh.
1453 		 */
1454 		np = VTONFS(dvp);
1455 		nfhp = malloc(sizeof (struct nfsfh) +
1456 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1457 		nfhp->nfh_len = np->n_fhp->nfh_len;
1458 		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1459 		*nfhpp = nfhp;
1460 		return (0);
1461 	}
1462 	if (NFSHASNFSV4(nmp) && len == 2 &&
1463 		name[0] == '.' && name[1] == '.') {
1464 		lookupp = 1;
1465 		openmode = 0;
1466 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp, cred);
1467 	} else if (openmode != 0) {
1468 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPOPEN, dvp, cred);
1469 		nfsm_strtom(nd, name, len);
1470 	} else {
1471 		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp, cred);
1472 		(void) nfsm_strtom(nd, name, len);
1473 	}
1474 	if (nd->nd_flag & ND_NFSV4) {
1475 		NFSGETATTR_ATTRBIT(&attrbits);
1476 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1477 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1478 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1479 		(void) nfsrv_putattrbit(nd, &attrbits);
1480 		if (openmode != 0) {
1481 			/* Test for a VREG file. */
1482 			NFSZERO_ATTRBIT(&attrbits);
1483 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
1484 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
1485 			*tl = txdr_unsigned(NFSV4OP_VERIFY);
1486 			nfsrv_putattrbit(nd, &attrbits);
1487 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1488 			*tl++ = txdr_unsigned(NFSX_UNSIGNED);
1489 			*tl = vtonfsv34_type(VREG);
1490 
1491 			/* Attempt the Open for VREG. */
1492 			nfscl_filllockowner(NULL, own, F_POSIX);
1493 			NFSM_BUILD(tl, uint32_t *, 6 * NFSX_UNSIGNED);
1494 			*tl++ = txdr_unsigned(NFSV4OP_OPEN);
1495 			*tl++ = 0;		/* seqid, ignored. */
1496 			*tl++ = txdr_unsigned(openmode);
1497 			*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
1498 			*tl++ = 0;		/* ClientID, ignored. */
1499 			*tl = 0;
1500 			nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
1501 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1502 			*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
1503 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
1504 		}
1505 	}
1506 	error = nfscl_request(nd, dvp, p, cred);
1507 	if (error)
1508 		return (error);
1509 	ndp = NULL;
1510 	if (nd->nd_repstat) {
1511 		/*
1512 		 * When an NFSv4 Lookupp returns ENOENT, it means that
1513 		 * the lookup is at the root of an fs, so return this dir.
1514 		 */
1515 		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1516 		    np = VTONFS(dvp);
1517 		    nfhp = malloc(sizeof (struct nfsfh) +
1518 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1519 		    nfhp->nfh_len = np->n_fhp->nfh_len;
1520 		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1521 		    *nfhpp = nfhp;
1522 		    m_freem(nd->nd_mrep);
1523 		    return (0);
1524 		}
1525 		if (nd->nd_flag & ND_NFSV3)
1526 		    error = nfscl_postop_attr(nd, dnap, dattrflagp);
1527 		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1528 		    ND_NFSV4) {
1529 			/* Load the directory attributes. */
1530 			error = nfsm_loadattr(nd, dnap);
1531 			if (error != 0)
1532 				goto nfsmout;
1533 			*dattrflagp = 1;
1534 		}
1535 		/* Check Lookup operation reply status. */
1536 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1537 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1538 			if (*++tl != 0)
1539 				goto nfsmout;
1540 		}
1541 		/* Look for GetFH reply. */
1542 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1543 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1544 			if (*++tl != 0)
1545 				goto nfsmout;
1546 			error = nfsm_getfh(nd, nfhpp);
1547 			if (error)
1548 				goto nfsmout;
1549 		}
1550 		/* Look for Getattr reply. */
1551 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1552 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1553 			if (*++tl != 0)
1554 				goto nfsmout;
1555 			error = nfsm_loadattr(nd, nap);
1556 			if (error == 0) {
1557 				/*
1558 				 * We have now successfully completed the
1559 				 * lookup, so set nd_repstat to 0.
1560 				 */
1561 				nd->nd_repstat = 0;
1562 				*attrflagp = 1;
1563 			}
1564 		}
1565 		goto nfsmout;
1566 	}
1567 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1568 		/* Load the directory attributes. */
1569 		error = nfsm_loadattr(nd, dnap);
1570 		if (error != 0)
1571 			goto nfsmout;
1572 		*dattrflagp = 1;
1573 		/* Skip over the Lookup and GetFH operation status values. */
1574 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1575 	}
1576 	error = nfsm_getfh(nd, nfhpp);
1577 	if (error)
1578 		goto nfsmout;
1579 
1580 	error = nfscl_postop_attr(nd, nap, attrflagp);
1581 	if (openmode != 0 && error == 0) {
1582 		NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID +
1583 		    10 * NFSX_UNSIGNED);
1584 		tl += 4;	/* Skip over Verify+Open status. */
1585 		stateid.seqid = *tl++;
1586 		stateid.other[0] = *tl++;
1587 		stateid.other[1] = *tl++;
1588 		stateid.other[2] = *tl;
1589 		rflags = fxdr_unsigned(uint32_t, *(tl + 6));
1590 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1591 		if (error != 0)
1592 			goto nfsmout;
1593 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
1594 		deleg = fxdr_unsigned(uint32_t, *tl);
1595 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
1596 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
1597 			/*
1598 			 * Just need to fill in the fields used by
1599 			 * nfscl_trydelegreturn().
1600 			 * Mark the mount point as acquiring
1601 			 * delegations, so NFSPROC_LOOKUPOPEN will
1602 			 * no longer be done.
1603 			 */
1604 			NFSLOCKMNT(nmp);
1605 			nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
1606 			NFSUNLOCKMNT(nmp);
1607 			ndp = malloc(sizeof(struct nfscldeleg) +
1608 			    (*nfhpp)->nfh_len, M_NFSCLDELEG, M_WAITOK);
1609 			ndp->nfsdl_fhlen = (*nfhpp)->nfh_len;
1610 			NFSBCOPY((*nfhpp)->nfh_fh, ndp->nfsdl_fh,
1611 			    ndp->nfsdl_fhlen);
1612 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
1613 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
1614 			ndp->nfsdl_stateid.seqid = *tl++;
1615 			ndp->nfsdl_stateid.other[0] = *tl++;
1616 			ndp->nfsdl_stateid.other[1] = *tl++;
1617 			ndp->nfsdl_stateid.other[2] = *tl++;
1618 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
1619 			error = NFSERR_BADXDR;
1620 			goto nfsmout;
1621 		}
1622 		ret = nfscl_open(dvp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len,
1623 		    openmode, 0, cred, p, NULL, &op, &newone, &retop, 1, true);
1624 		if (ret != 0)
1625 			goto nfsmout;
1626 		if (newone != 0) {
1627 			op->nfso_stateid.seqid = stateid.seqid;
1628 			op->nfso_stateid.other[0] = stateid.other[0];
1629 			op->nfso_stateid.other[1] = stateid.other[1];
1630 			op->nfso_stateid.other[2] = stateid.other[2];
1631 			op->nfso_mode = openmode;
1632 		} else {
1633 			op->nfso_stateid.seqid = stateid.seqid;
1634 			if (retop == NFSCLOPEN_DOOPEN)
1635 				op->nfso_mode |= openmode;
1636 		}
1637 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
1638 		    nfscl_assumeposixlocks)
1639 			op->nfso_posixlock = 1;
1640 		else
1641 			op->nfso_posixlock = 0;
1642 		nfscl_openrelease(nmp, op, 0, 0);
1643 		if (ndp != NULL) {
1644 			/*
1645 			 * Since we do not have the vnode, we
1646 			 * cannot invalidate cached attributes.
1647 			 * Just return the delegation.
1648 			 */
1649 			nfscl_trydelegreturn(ndp, cred, nmp, p);
1650 		}
1651 	}
1652 	if ((nd->nd_flag & ND_NFSV3) && !error)
1653 		error = nfscl_postop_attr(nd, dnap, dattrflagp);
1654 nfsmout:
1655 	m_freem(nd->nd_mrep);
1656 	if (!error && nd->nd_repstat)
1657 		error = nd->nd_repstat;
1658 	free(ndp, M_NFSCLDELEG);
1659 	return (error);
1660 }
1661 
1662 /*
1663  * Do a readlink rpc.
1664  */
1665 int
1666 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1667     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1668 {
1669 	u_int32_t *tl;
1670 	struct nfsrv_descript nfsd, *nd = &nfsd;
1671 	struct nfsnode *np = VTONFS(vp);
1672 	nfsattrbit_t attrbits;
1673 	int error, len, cangetattr = 1;
1674 
1675 	*attrflagp = 0;
1676 	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp, cred);
1677 	if (nd->nd_flag & ND_NFSV4) {
1678 		/*
1679 		 * And do a Getattr op.
1680 		 */
1681 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1682 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1683 		NFSGETATTR_ATTRBIT(&attrbits);
1684 		(void) nfsrv_putattrbit(nd, &attrbits);
1685 	}
1686 	error = nfscl_request(nd, vp, p, cred);
1687 	if (error)
1688 		return (error);
1689 	if (nd->nd_flag & ND_NFSV3)
1690 		error = nfscl_postop_attr(nd, nap, attrflagp);
1691 	if (!nd->nd_repstat && !error) {
1692 		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1693 		/*
1694 		 * This seems weird to me, but must have been added to
1695 		 * FreeBSD for some reason. The only thing I can think of
1696 		 * is that there was/is some server that replies with
1697 		 * more link data than it should?
1698 		 */
1699 		if (len == NFS_MAXPATHLEN) {
1700 			NFSLOCKNODE(np);
1701 			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1702 				len = np->n_size;
1703 				cangetattr = 0;
1704 			}
1705 			NFSUNLOCKNODE(np);
1706 		}
1707 		error = nfsm_mbufuio(nd, uiop, len);
1708 		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1709 			error = nfscl_postop_attr(nd, nap, attrflagp);
1710 	}
1711 	if (nd->nd_repstat && !error)
1712 		error = nd->nd_repstat;
1713 nfsmout:
1714 	m_freem(nd->nd_mrep);
1715 	return (error);
1716 }
1717 
1718 /*
1719  * Read operation.
1720  */
1721 int
1722 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1723     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1724 {
1725 	int error, expireret = 0, retrycnt;
1726 	u_int32_t clidrev = 0;
1727 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1728 	struct nfsnode *np = VTONFS(vp);
1729 	struct ucred *newcred;
1730 	struct nfsfh *nfhp = NULL;
1731 	nfsv4stateid_t stateid;
1732 	void *lckp;
1733 
1734 	if (nmp->nm_clp != NULL)
1735 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1736 	newcred = cred;
1737 	if (NFSHASNFSV4(nmp)) {
1738 		nfhp = np->n_fhp;
1739 		newcred = NFSNEWCRED(cred);
1740 	}
1741 	retrycnt = 0;
1742 	do {
1743 		lckp = NULL;
1744 		if (NFSHASNFSV4(nmp))
1745 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1746 			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1747 			    &lckp);
1748 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1749 		    attrflagp);
1750 		if (error == NFSERR_OPENMODE) {
1751 			NFSLOCKMNT(nmp);
1752 			nmp->nm_state |= NFSSTA_OPENMODE;
1753 			NFSUNLOCKMNT(nmp);
1754 		}
1755 		if (error == NFSERR_STALESTATEID)
1756 			nfscl_initiate_recovery(nmp->nm_clp);
1757 		if (lckp != NULL)
1758 			nfscl_lockderef(lckp);
1759 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1760 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1761 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1762 			(void) nfs_catnap(PZERO, error, "nfs_read");
1763 		} else if ((error == NFSERR_EXPIRED ||
1764 		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1765 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1766 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1767 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1768 		    NFSHASNFSV4N(nmp)) {
1769 			error = EIO;
1770 		}
1771 		retrycnt++;
1772 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1773 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1774 	    error == NFSERR_BADSESSION ||
1775 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1776 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1777 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1778 	    (error == NFSERR_OPENMODE && retrycnt < 4));
1779 	if (error && retrycnt >= 4)
1780 		error = EIO;
1781 	if (NFSHASNFSV4(nmp))
1782 		NFSFREECRED(newcred);
1783 	return (error);
1784 }
1785 
1786 /*
1787  * The actual read RPC.
1788  */
1789 static int
1790 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1791     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1792     int *attrflagp)
1793 {
1794 	u_int32_t *tl;
1795 	int error = 0, len, retlen, tsiz, eof = 0;
1796 	struct nfsrv_descript nfsd;
1797 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1798 	struct nfsrv_descript *nd = &nfsd;
1799 	int rsize;
1800 	off_t tmp_off;
1801 
1802 	*attrflagp = 0;
1803 	tsiz = uiop->uio_resid;
1804 	tmp_off = uiop->uio_offset + tsiz;
1805 	NFSLOCKMNT(nmp);
1806 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1807 		NFSUNLOCKMNT(nmp);
1808 		return (EFBIG);
1809 	}
1810 	rsize = nmp->nm_rsize;
1811 	NFSUNLOCKMNT(nmp);
1812 	nd->nd_mrep = NULL;
1813 	while (tsiz > 0) {
1814 		*attrflagp = 0;
1815 		len = (tsiz > rsize) ? rsize : tsiz;
1816 		NFSCL_REQSTART(nd, NFSPROC_READ, vp, cred);
1817 		if (nd->nd_flag & ND_NFSV4)
1818 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1819 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1820 		if (nd->nd_flag & ND_NFSV2) {
1821 			*tl++ = txdr_unsigned(uiop->uio_offset);
1822 			*tl++ = txdr_unsigned(len);
1823 			*tl = 0;
1824 		} else {
1825 			txdr_hyper(uiop->uio_offset, tl);
1826 			*(tl + 2) = txdr_unsigned(len);
1827 		}
1828 		/*
1829 		 * Since I can't do a Getattr for NFSv4 for Write, there
1830 		 * doesn't seem any point in doing one here, either.
1831 		 * (See the comment in nfsrpc_writerpc() for more info.)
1832 		 */
1833 		error = nfscl_request(nd, vp, p, cred);
1834 		if (error)
1835 			return (error);
1836 		if (nd->nd_flag & ND_NFSV3) {
1837 			error = nfscl_postop_attr(nd, nap, attrflagp);
1838 		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1839 			error = nfsm_loadattr(nd, nap);
1840 			if (!error)
1841 				*attrflagp = 1;
1842 		}
1843 		if (nd->nd_repstat || error) {
1844 			if (!error)
1845 				error = nd->nd_repstat;
1846 			goto nfsmout;
1847 		}
1848 		if (nd->nd_flag & ND_NFSV3) {
1849 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1850 			eof = fxdr_unsigned(int, *(tl + 1));
1851 		} else if (nd->nd_flag & ND_NFSV4) {
1852 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1853 			eof = fxdr_unsigned(int, *tl);
1854 		}
1855 		NFSM_STRSIZ(retlen, len);
1856 		error = nfsm_mbufuio(nd, uiop, retlen);
1857 		if (error)
1858 			goto nfsmout;
1859 		m_freem(nd->nd_mrep);
1860 		nd->nd_mrep = NULL;
1861 		tsiz -= retlen;
1862 		if (!(nd->nd_flag & ND_NFSV2)) {
1863 			if (eof || retlen == 0)
1864 				tsiz = 0;
1865 		} else if (retlen < len)
1866 			tsiz = 0;
1867 	}
1868 	return (0);
1869 nfsmout:
1870 	if (nd->nd_mrep != NULL)
1871 		m_freem(nd->nd_mrep);
1872 	return (error);
1873 }
1874 
1875 /*
1876  * nfs write operation
1877  * When called_from_strategy != 0, it should return EIO for an error that
1878  * indicates recovery is in progress, so that the buffer will be left
1879  * dirty and be written back to the server later. If it loops around,
1880  * the recovery thread could get stuck waiting for the buffer and recovery
1881  * will then deadlock.
1882  */
1883 int
1884 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1885     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1886     int called_from_strategy, int ioflag)
1887 {
1888 	int error, expireret = 0, retrycnt, nostateid;
1889 	u_int32_t clidrev = 0;
1890 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1891 	struct nfsnode *np = VTONFS(vp);
1892 	struct ucred *newcred;
1893 	struct nfsfh *nfhp = NULL;
1894 	nfsv4stateid_t stateid;
1895 	void *lckp;
1896 
1897 	KASSERT(*must_commit >= 0 && *must_commit <= 2,
1898 	    ("nfsrpc_write: must_commit out of range=%d", *must_commit));
1899 	if (nmp->nm_clp != NULL)
1900 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1901 	newcred = cred;
1902 	if (NFSHASNFSV4(nmp)) {
1903 		newcred = NFSNEWCRED(cred);
1904 		nfhp = np->n_fhp;
1905 	}
1906 	retrycnt = 0;
1907 	do {
1908 		lckp = NULL;
1909 		nostateid = 0;
1910 		if (NFSHASNFSV4(nmp)) {
1911 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1912 			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1913 			    &lckp);
1914 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1915 			    stateid.other[2] == 0) {
1916 				nostateid = 1;
1917 				NFSCL_DEBUG(1, "stateid0 in write\n");
1918 			}
1919 		}
1920 
1921 		/*
1922 		 * If there is no stateid for NFSv4, it means this is an
1923 		 * extraneous write after close. Basically a poorly
1924 		 * implemented buffer cache. Just don't do the write.
1925 		 */
1926 		if (nostateid)
1927 			error = 0;
1928 		else
1929 			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1930 			    newcred, &stateid, p, nap, attrflagp, ioflag);
1931 		if (error == NFSERR_STALESTATEID)
1932 			nfscl_initiate_recovery(nmp->nm_clp);
1933 		if (lckp != NULL)
1934 			nfscl_lockderef(lckp);
1935 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1936 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1937 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1938 			(void) nfs_catnap(PZERO, error, "nfs_write");
1939 		} else if ((error == NFSERR_EXPIRED ||
1940 		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1941 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1942 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1943 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1944 		    NFSHASNFSV4N(nmp)) {
1945 			error = EIO;
1946 		}
1947 		retrycnt++;
1948 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1949 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1950 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1951 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1952 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1953 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1954 	if (error != 0 && (retrycnt >= 4 ||
1955 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1956 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1957 		error = EIO;
1958 	if (NFSHASNFSV4(nmp))
1959 		NFSFREECRED(newcred);
1960 	return (error);
1961 }
1962 
1963 /*
1964  * The actual write RPC.
1965  */
1966 static int
1967 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1968     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1969     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int ioflag)
1970 {
1971 	u_int32_t *tl;
1972 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1973 	struct nfsnode *np = VTONFS(vp);
1974 	int error = 0, len, rlen, commit, committed = NFSWRITE_FILESYNC;
1975 	int wccflag = 0;
1976 	int32_t backup;
1977 	struct nfsrv_descript *nd;
1978 	nfsattrbit_t attrbits;
1979 	uint64_t tmp_off;
1980 	ssize_t tsiz, wsize;
1981 	bool do_append;
1982 
1983 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1984 	*attrflagp = 0;
1985 	tsiz = uiop->uio_resid;
1986 	tmp_off = uiop->uio_offset + tsiz;
1987 	NFSLOCKMNT(nmp);
1988 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1989 		NFSUNLOCKMNT(nmp);
1990 		return (EFBIG);
1991 	}
1992 	wsize = nmp->nm_wsize;
1993 	do_append = false;
1994 	if ((ioflag & IO_APPEND) != 0 && NFSHASNFSV4(nmp) && !NFSHASPNFS(nmp))
1995 		do_append = true;
1996 	NFSUNLOCKMNT(nmp);
1997 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK);
1998 	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
1999 	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
2000 	while (tsiz > 0) {
2001 		*attrflagp = 0;
2002 		len = (tsiz > wsize) ? wsize : tsiz;
2003 		if (do_append)
2004 			NFSCL_REQSTART(nd, NFSPROC_APPENDWRITE, vp, cred);
2005 		else
2006 			NFSCL_REQSTART(nd, NFSPROC_WRITE, vp, cred);
2007 		if (nd->nd_flag & ND_NFSV4) {
2008 			if (do_append) {
2009 				NFSZERO_ATTRBIT(&attrbits);
2010 				NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
2011 				nfsrv_putattrbit(nd, &attrbits);
2012 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED +
2013 				    NFSX_HYPER);
2014 				*tl++ = txdr_unsigned(NFSX_HYPER);
2015 				txdr_hyper(uiop->uio_offset, tl); tl += 2;
2016 				*tl = txdr_unsigned(NFSV4OP_WRITE);
2017 			}
2018 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2019 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
2020 			txdr_hyper(uiop->uio_offset, tl);
2021 			tl += 2;
2022 			*tl++ = txdr_unsigned(*iomode);
2023 			*tl = txdr_unsigned(len);
2024 		} else if (nd->nd_flag & ND_NFSV3) {
2025 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
2026 			txdr_hyper(uiop->uio_offset, tl);
2027 			tl += 2;
2028 			*tl++ = txdr_unsigned(len);
2029 			*tl++ = txdr_unsigned(*iomode);
2030 			*tl = txdr_unsigned(len);
2031 		} else {
2032 			u_int32_t x;
2033 
2034 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2035 			/*
2036 			 * Not sure why someone changed this, since the
2037 			 * RFC clearly states that "beginoffset" and
2038 			 * "totalcount" are ignored, but it wouldn't
2039 			 * surprise me if there's a busted server out there.
2040 			 */
2041 			/* Set both "begin" and "current" to non-garbage. */
2042 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
2043 			*tl++ = x;      /* "begin offset" */
2044 			*tl++ = x;      /* "current offset" */
2045 			x = txdr_unsigned(len);
2046 			*tl++ = x;      /* total to this offset */
2047 			*tl = x;        /* size of this write */
2048 		}
2049 		nfsm_uiombuf(nd, uiop, len);
2050 		/*
2051 		 * Although it is tempting to do a normal Getattr Op in the
2052 		 * NFSv4 compound, the result can be a nearly hung client
2053 		 * system if the Getattr asks for Owner and/or OwnerGroup.
2054 		 * It occurs when the client can't map either the Owner or
2055 		 * Owner_group name in the Getattr reply to a uid/gid. When
2056 		 * there is a cache miss, the kernel does an upcall to the
2057 		 * nfsuserd. Then, it can try and read the local /etc/passwd
2058 		 * or /etc/group file. It can then block in getnewbuf(),
2059 		 * waiting for dirty writes to be pushed to the NFS server.
2060 		 * The only reason this doesn't result in a complete
2061 		 * deadlock, is that the upcall times out and allows
2062 		 * the write to complete. However, progress is so slow
2063 		 * that it might just as well be deadlocked.
2064 		 * As such, we get the rest of the attributes, but not
2065 		 * Owner or Owner_group.
2066 		 * nb: nfscl_loadattrcache() needs to be told that these
2067 		 *     partial attributes from a write rpc are being
2068 		 *     passed in, via a argument flag.
2069 		 */
2070 		if (nd->nd_flag & ND_NFSV4) {
2071 			NFSWRITEGETATTR_ATTRBIT(&attrbits);
2072 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2073 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2074 			(void) nfsrv_putattrbit(nd, &attrbits);
2075 		}
2076 		error = nfscl_request(nd, vp, p, cred);
2077 		if (error) {
2078 			free(nd, M_TEMP);
2079 			return (error);
2080 		}
2081 		if (nd->nd_repstat) {
2082 			/*
2083 			 * In case the rpc gets retried, roll
2084 			 * the uio fields changed by nfsm_uiombuf()
2085 			 * back.
2086 			 */
2087 			uiop->uio_offset -= len;
2088 			uiop->uio_resid += len;
2089 			uiop->uio_iov->iov_base =
2090 			    (char *)uiop->uio_iov->iov_base - len;
2091 			uiop->uio_iov->iov_len += len;
2092 		}
2093 		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2094 			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
2095 			    &wccflag, &tmp_off);
2096 			if (error)
2097 				goto nfsmout;
2098 		}
2099 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2100 		    (ND_NFSV4 | ND_NOMOREDATA) &&
2101 		    nd->nd_repstat == NFSERR_NOTSAME && do_append) {
2102 			/*
2103 			 * Verify of the file's size failed, so redo the
2104 			 * write using the file's size as returned in
2105 			 * the wcc attributes.
2106 			 */
2107 			if (tmp_off + tsiz <= nmp->nm_maxfilesize) {
2108 				do_append = false;
2109 				uiop->uio_offset = tmp_off;
2110 				m_freem(nd->nd_mrep);
2111 				nd->nd_mrep = NULL;
2112 				continue;
2113 			} else
2114 				nd->nd_repstat = EFBIG;
2115 		}
2116 		if (!nd->nd_repstat) {
2117 			if (do_append) {
2118 				/* Strip off the Write reply status. */
2119 				do_append = false;
2120 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2121 			}
2122 			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2123 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
2124 					+ NFSX_VERF);
2125 				rlen = fxdr_unsigned(int, *tl++);
2126 				if (rlen == 0) {
2127 					error = NFSERR_IO;
2128 					goto nfsmout;
2129 				} else if (rlen < len) {
2130 					backup = len - rlen;
2131 					uiop->uio_iov->iov_base =
2132 					    (char *)uiop->uio_iov->iov_base -
2133 					    backup;
2134 					uiop->uio_iov->iov_len += backup;
2135 					uiop->uio_offset -= backup;
2136 					uiop->uio_resid += backup;
2137 					len = rlen;
2138 				}
2139 				commit = fxdr_unsigned(int, *tl++);
2140 
2141 				/*
2142 				 * Return the lowest commitment level
2143 				 * obtained by any of the RPCs.
2144 				 */
2145 				if (committed == NFSWRITE_FILESYNC)
2146 					committed = commit;
2147 				else if (committed == NFSWRITE_DATASYNC &&
2148 					commit == NFSWRITE_UNSTABLE)
2149 					committed = commit;
2150 				NFSLOCKMNT(nmp);
2151 				if (!NFSHASWRITEVERF(nmp)) {
2152 					NFSBCOPY((caddr_t)tl,
2153 					    (caddr_t)&nmp->nm_verf[0],
2154 					    NFSX_VERF);
2155 					NFSSETWRITEVERF(nmp);
2156 	    			} else if (NFSBCMP(tl, nmp->nm_verf,
2157 				    NFSX_VERF) && *must_commit != 2) {
2158 					*must_commit = 1;
2159 					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
2160 				}
2161 				NFSUNLOCKMNT(nmp);
2162 			}
2163 			if (nd->nd_flag & ND_NFSV4)
2164 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2165 			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
2166 				error = nfsm_loadattr(nd, nap);
2167 				if (!error)
2168 					*attrflagp = NFS_LATTR_NOSHRINK;
2169 			}
2170 		} else {
2171 			error = nd->nd_repstat;
2172 		}
2173 		if (error)
2174 			goto nfsmout;
2175 		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
2176 		m_freem(nd->nd_mrep);
2177 		nd->nd_mrep = NULL;
2178 		tsiz -= len;
2179 	}
2180 nfsmout:
2181 	if (nd->nd_mrep != NULL)
2182 		m_freem(nd->nd_mrep);
2183 	*iomode = committed;
2184 	if (nd->nd_repstat && !error)
2185 		error = nd->nd_repstat;
2186 	free(nd, M_TEMP);
2187 	return (error);
2188 }
2189 
2190 /*
2191  * Do an nfs deallocate operation.
2192  */
2193 int
2194 nfsrpc_deallocate(vnode_t vp, off_t offs, off_t len, struct nfsvattr *nap,
2195     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
2196 {
2197 	int error, expireret = 0, openerr, retrycnt;
2198 	uint32_t clidrev = 0;
2199 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2200 	struct nfsfh *nfhp;
2201 	nfsv4stateid_t stateid;
2202 	void *lckp;
2203 
2204 	if (nmp->nm_clp != NULL)
2205 		clidrev = nmp->nm_clp->nfsc_clientidrev;
2206 	retrycnt = 0;
2207 	do {
2208 		lckp = NULL;
2209 		openerr = 1;
2210 		nfhp = VTONFS(vp)->n_fhp;
2211 		error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
2212 		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
2213 		if (error != 0) {
2214 			/*
2215 			 * No Open stateid, so try and open the file
2216 			 * now.
2217 			 */
2218 			openerr = nfsrpc_open(vp, FWRITE, cred, p);
2219 			if (openerr == 0)
2220 				nfscl_getstateid(vp, nfhp->nfh_fh,
2221 				    nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0,
2222 				    cred, p, &stateid, &lckp);
2223 		}
2224 		error = nfsrpc_deallocaterpc(vp, offs, len, &stateid, nap,
2225 		    attrflagp, cred, p);
2226 		if (error == NFSERR_STALESTATEID)
2227 			nfscl_initiate_recovery(nmp->nm_clp);
2228 		if (lckp != NULL)
2229 			nfscl_lockderef(lckp);
2230 		if (openerr == 0)
2231 			nfsrpc_close(vp, 0, p);
2232 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2233 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2234 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
2235 			(void) nfs_catnap(PZERO, error, "nfs_deallocate");
2236 		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
2237 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
2238 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2239 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
2240 			error = EIO;
2241 		}
2242 		retrycnt++;
2243 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2244 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2245 	    error == NFSERR_BADSESSION ||
2246 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
2247 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2248 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
2249 	if (error && retrycnt >= 4)
2250 		error = EIO;
2251 	return (error);
2252 }
2253 
2254 /*
2255  * The actual deallocate RPC.
2256  */
2257 static int
2258 nfsrpc_deallocaterpc(vnode_t vp, off_t offs, off_t len,
2259     nfsv4stateid_t *stateidp, struct nfsvattr *nap, int *attrflagp,
2260     struct ucred *cred, NFSPROC_T *p)
2261 {
2262 	uint32_t *tl;
2263 	struct nfsnode *np = VTONFS(vp);
2264 	int error, wccflag;
2265 	struct nfsrv_descript nfsd;
2266 	struct nfsrv_descript *nd = &nfsd;
2267 	nfsattrbit_t attrbits;
2268 
2269 	*attrflagp = 0;
2270 	NFSCL_REQSTART(nd, NFSPROC_DEALLOCATE, vp, cred);
2271 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2272 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
2273 	txdr_hyper(offs, tl);
2274 	tl += 2;
2275 	txdr_hyper(len, tl);
2276 	NFSWRITEGETATTR_ATTRBIT(&attrbits);
2277 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
2278 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2279 	nfsrv_putattrbit(nd, &attrbits);
2280 	error = nfscl_request(nd, vp, p, cred);
2281 	if (error != 0)
2282 		return (error);
2283 	wccflag = 0;
2284 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, NULL);
2285 	if (error != 0)
2286 		goto nfsmout;
2287 	if (nd->nd_repstat == 0) {
2288 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2289 		error = nfsm_loadattr(nd, nap);
2290 		if (error != 0)
2291 			goto nfsmout;
2292 		*attrflagp = NFS_LATTR_NOSHRINK;
2293 	}
2294 	NFSWRITERPC_SETTIME(wccflag, np, nap, 1);
2295 nfsmout:
2296 	m_freem(nd->nd_mrep);
2297 	if (nd->nd_repstat != 0 && error == 0)
2298 		error = nd->nd_repstat;
2299 	return (error);
2300 }
2301 
2302 /*
2303  * nfs mknod rpc
2304  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
2305  * mode set to specify the file type and the size field for rdev.
2306  */
2307 int
2308 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2309     u_int32_t rdev, __enum_uint8(vtype) vtyp, struct ucred *cred, NFSPROC_T *p,
2310     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2311     int *attrflagp, int *dattrflagp)
2312 {
2313 	u_int32_t *tl;
2314 	int error = 0;
2315 	struct nfsrv_descript nfsd, *nd = &nfsd;
2316 	nfsattrbit_t attrbits;
2317 
2318 	*nfhpp = NULL;
2319 	*attrflagp = 0;
2320 	*dattrflagp = 0;
2321 	if (namelen > NFS_MAXNAMLEN)
2322 		return (ENAMETOOLONG);
2323 	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp, cred);
2324 	if (nd->nd_flag & ND_NFSV4) {
2325 		if (vtyp == VBLK || vtyp == VCHR) {
2326 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2327 			*tl++ = vtonfsv34_type(vtyp);
2328 			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2329 			*tl = txdr_unsigned(NFSMINOR(rdev));
2330 		} else {
2331 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2332 			*tl = vtonfsv34_type(vtyp);
2333 		}
2334 	}
2335 	(void) nfsm_strtom(nd, name, namelen);
2336 	if (nd->nd_flag & ND_NFSV3) {
2337 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2338 		*tl = vtonfsv34_type(vtyp);
2339 	}
2340 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2341 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2342 	if ((nd->nd_flag & ND_NFSV3) &&
2343 	    (vtyp == VCHR || vtyp == VBLK)) {
2344 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2345 		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2346 		*tl = txdr_unsigned(NFSMINOR(rdev));
2347 	}
2348 	if (nd->nd_flag & ND_NFSV4) {
2349 		NFSGETATTR_ATTRBIT(&attrbits);
2350 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2351 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2352 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2353 		(void) nfsrv_putattrbit(nd, &attrbits);
2354 	}
2355 	if (nd->nd_flag & ND_NFSV2)
2356 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2357 	error = nfscl_request(nd, dvp, p, cred);
2358 	if (error)
2359 		return (error);
2360 	if (nd->nd_flag & ND_NFSV4)
2361 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2362 	if (!nd->nd_repstat) {
2363 		if (nd->nd_flag & ND_NFSV4) {
2364 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2365 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2366 			if (error)
2367 				goto nfsmout;
2368 		}
2369 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2370 		if (error)
2371 			goto nfsmout;
2372 	}
2373 	if (nd->nd_flag & ND_NFSV3)
2374 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2375 	if (!error && nd->nd_repstat)
2376 		error = nd->nd_repstat;
2377 nfsmout:
2378 	m_freem(nd->nd_mrep);
2379 	return (error);
2380 }
2381 
2382 /*
2383  * nfs file create call
2384  * Mostly just call the approriate routine. (I separated out v4, so that
2385  * error recovery wouldn't be as difficult.)
2386  */
2387 int
2388 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2389     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2390     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2391     int *attrflagp, int *dattrflagp)
2392 {
2393 	int error = 0, newone, expireret = 0, retrycnt, unlocked;
2394 	struct nfsclowner *owp;
2395 	struct nfscldeleg *dp;
2396 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2397 	u_int32_t clidrev;
2398 
2399 	if (NFSHASNFSV4(nmp)) {
2400 	    retrycnt = 0;
2401 	    do {
2402 		dp = NULL;
2403 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2404 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2405 		    NULL, 1, true);
2406 		if (error)
2407 			return (error);
2408 		if (nmp->nm_clp != NULL)
2409 			clidrev = nmp->nm_clp->nfsc_clientidrev;
2410 		else
2411 			clidrev = 0;
2412 		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2413 		    nfs_numnfscbd == 0 || retrycnt > 0)
2414 			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2415 			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2416 			  attrflagp, dattrflagp, &unlocked);
2417 		else
2418 			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2419 			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2420 			  attrflagp, dattrflagp, &unlocked);
2421 		/*
2422 		 * There is no need to invalidate cached attributes here,
2423 		 * since new post-delegation issue attributes are always
2424 		 * returned by nfsrpc_createv4() and these will update the
2425 		 * attribute cache.
2426 		 */
2427 		if (dp != NULL)
2428 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2429 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2430 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2431 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2432 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2433 		    error == NFSERR_BADSESSION) {
2434 			(void) nfs_catnap(PZERO, error, "nfs_open");
2435 		} else if ((error == NFSERR_EXPIRED ||
2436 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
2437 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2438 			retrycnt++;
2439 		}
2440 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2441 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2442 		error == NFSERR_BADSESSION ||
2443 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2444 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
2445 	    if (error && retrycnt >= 4)
2446 		    error = EIO;
2447 	} else {
2448 		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2449 		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp);
2450 	}
2451 	return (error);
2452 }
2453 
2454 /*
2455  * The create rpc for v2 and 3.
2456  */
2457 static int
2458 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2459     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2460     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2461     int *attrflagp, int *dattrflagp)
2462 {
2463 	u_int32_t *tl;
2464 	int error = 0;
2465 	struct nfsrv_descript nfsd, *nd = &nfsd;
2466 
2467 	*nfhpp = NULL;
2468 	*attrflagp = 0;
2469 	*dattrflagp = 0;
2470 	if (namelen > NFS_MAXNAMLEN)
2471 		return (ENAMETOOLONG);
2472 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2473 	(void) nfsm_strtom(nd, name, namelen);
2474 	if (nd->nd_flag & ND_NFSV3) {
2475 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2476 		if (fmode & O_EXCL) {
2477 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2478 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2479 			*tl++ = cverf.lval[0];
2480 			*tl = cverf.lval[1];
2481 		} else {
2482 			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2483 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
2484 		}
2485 	} else {
2486 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2487 	}
2488 	error = nfscl_request(nd, dvp, p, cred);
2489 	if (error)
2490 		return (error);
2491 	if (nd->nd_repstat == 0) {
2492 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2493 		if (error)
2494 			goto nfsmout;
2495 	}
2496 	if (nd->nd_flag & ND_NFSV3)
2497 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2498 	if (nd->nd_repstat != 0 && error == 0)
2499 		error = nd->nd_repstat;
2500 nfsmout:
2501 	m_freem(nd->nd_mrep);
2502 	return (error);
2503 }
2504 
2505 static int
2506 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2507     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2508     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2509     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2510     int *dattrflagp, int *unlockedp)
2511 {
2512 	u_int32_t *tl;
2513 	int error = 0, deleg, newone, ret, acesize, limitby;
2514 	struct nfsrv_descript nfsd, *nd = &nfsd;
2515 	struct nfsclopen *op;
2516 	struct nfscldeleg *dp = NULL;
2517 	struct nfsnode *np;
2518 	struct nfsfh *nfhp;
2519 	nfsattrbit_t attrbits;
2520 	nfsv4stateid_t stateid;
2521 	u_int32_t rflags;
2522 	struct nfsmount *nmp;
2523 	struct nfsclsession *tsep;
2524 
2525 	nmp = VFSTONFS(dvp->v_mount);
2526 	np = VTONFS(dvp);
2527 	*unlockedp = 0;
2528 	*nfhpp = NULL;
2529 	*dpp = NULL;
2530 	*attrflagp = 0;
2531 	*dattrflagp = 0;
2532 	if (namelen > NFS_MAXNAMLEN)
2533 		return (ENAMETOOLONG);
2534 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2535 	/*
2536 	 * For V4, this is actually an Open op.
2537 	 */
2538 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2539 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
2540 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2541 	    NFSV4OPEN_ACCESSREAD);
2542 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2543 	tsep = nfsmnt_mdssession(nmp);
2544 	*tl++ = tsep->nfsess_clientid.lval[0];
2545 	*tl = tsep->nfsess_clientid.lval[1];
2546 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2547 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2548 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2549 	if (fmode & O_EXCL) {
2550 		if (NFSHASNFSV4N(nmp)) {
2551 			if (NFSHASSESSPERSIST(nmp)) {
2552 				/* Use GUARDED for persistent sessions. */
2553 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
2554 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2555 			} else {
2556 				/* Otherwise, use EXCLUSIVE4_1. */
2557 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2558 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2559 				*tl++ = cverf.lval[0];
2560 				*tl = cverf.lval[1];
2561 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2562 			}
2563 		} else {
2564 			/* NFSv4.0 */
2565 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2566 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2567 			*tl++ = cverf.lval[0];
2568 			*tl = cverf.lval[1];
2569 		}
2570 	} else {
2571 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2572 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2573 	}
2574 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2575 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2576 	(void) nfsm_strtom(nd, name, namelen);
2577 	/* Get the new file's handle and attributes. */
2578 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2579 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2580 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2581 	NFSGETATTR_ATTRBIT(&attrbits);
2582 	(void) nfsrv_putattrbit(nd, &attrbits);
2583 	/* Get the directory's post-op attributes. */
2584 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2585 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2586 	(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2587 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2588 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2589 	(void) nfsrv_putattrbit(nd, &attrbits);
2590 	error = nfscl_request(nd, dvp, p, cred);
2591 	if (error)
2592 		return (error);
2593 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2594 	if (nd->nd_repstat == 0) {
2595 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2596 		    6 * NFSX_UNSIGNED);
2597 		stateid.seqid = *tl++;
2598 		stateid.other[0] = *tl++;
2599 		stateid.other[1] = *tl++;
2600 		stateid.other[2] = *tl;
2601 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2602 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2603 		if (error)
2604 			goto nfsmout;
2605 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2606 		deleg = fxdr_unsigned(int, *tl);
2607 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2608 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2609 			if (!(owp->nfsow_clp->nfsc_flags &
2610 			      NFSCLFLAGS_FIRSTDELEG))
2611 				owp->nfsow_clp->nfsc_flags |=
2612 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2613 			dp = malloc(
2614 			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2615 			    M_NFSCLDELEG, M_WAITOK);
2616 			LIST_INIT(&dp->nfsdl_owner);
2617 			LIST_INIT(&dp->nfsdl_lock);
2618 			dp->nfsdl_clp = owp->nfsow_clp;
2619 			newnfs_copyincred(cred, &dp->nfsdl_cred);
2620 			nfscl_lockinit(&dp->nfsdl_rwlock);
2621 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2622 			    NFSX_UNSIGNED);
2623 			dp->nfsdl_stateid.seqid = *tl++;
2624 			dp->nfsdl_stateid.other[0] = *tl++;
2625 			dp->nfsdl_stateid.other[1] = *tl++;
2626 			dp->nfsdl_stateid.other[2] = *tl++;
2627 			ret = fxdr_unsigned(int, *tl);
2628 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2629 				dp->nfsdl_flags = NFSCLDL_WRITE;
2630 				/*
2631 				 * Indicates how much the file can grow.
2632 				 */
2633 				NFSM_DISSECT(tl, u_int32_t *,
2634 				    3 * NFSX_UNSIGNED);
2635 				limitby = fxdr_unsigned(int, *tl++);
2636 				switch (limitby) {
2637 				case NFSV4OPEN_LIMITSIZE:
2638 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2639 					break;
2640 				case NFSV4OPEN_LIMITBLOCKS:
2641 					dp->nfsdl_sizelimit =
2642 					    fxdr_unsigned(u_int64_t, *tl++);
2643 					dp->nfsdl_sizelimit *=
2644 					    fxdr_unsigned(u_int64_t, *tl);
2645 					break;
2646 				default:
2647 					error = NFSERR_BADXDR;
2648 					goto nfsmout;
2649 				}
2650 			} else {
2651 				dp->nfsdl_flags = NFSCLDL_READ;
2652 			}
2653 			if (ret)
2654 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2655 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
2656 			    &ret, &acesize, p);
2657 			if (error)
2658 				goto nfsmout;
2659 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2660 			error = NFSERR_BADXDR;
2661 			goto nfsmout;
2662 		}
2663 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2664 		if (error)
2665 			goto nfsmout;
2666 		/* Get rid of the PutFH and Getattr status values. */
2667 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2668 		/* Load the directory attributes. */
2669 		error = nfsm_loadattr(nd, dnap);
2670 		if (error)
2671 			goto nfsmout;
2672 		*dattrflagp = 1;
2673 		if (dp != NULL && *attrflagp) {
2674 			dp->nfsdl_change = nnap->na_filerev;
2675 			dp->nfsdl_modtime = nnap->na_mtime;
2676 			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2677 		}
2678 		/*
2679 		 * We can now complete the Open state.
2680 		 */
2681 		nfhp = *nfhpp;
2682 		if (dp != NULL) {
2683 			dp->nfsdl_fhlen = nfhp->nfh_len;
2684 			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2685 		}
2686 		/*
2687 		 * Get an Open structure that will be
2688 		 * attached to the OpenOwner, acquired already.
2689 		 */
2690 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2691 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2692 		    cred, p, NULL, &op, &newone, NULL, 0, false);
2693 		if (error)
2694 			goto nfsmout;
2695 		op->nfso_stateid = stateid;
2696 		newnfs_copyincred(cred, &op->nfso_cred);
2697 		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2698 		    do {
2699 			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2700 			    nfhp->nfh_len, op, cred, p);
2701 			if (ret == NFSERR_DELAY)
2702 			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2703 		    } while (ret == NFSERR_DELAY);
2704 		    error = ret;
2705 		}
2706 
2707 		/*
2708 		 * If the server is handing out delegations, but we didn't
2709 		 * get one because an OpenConfirm was required, try the
2710 		 * Open again, to get a delegation. This is a harmless no-op,
2711 		 * from a server's point of view.
2712 		 */
2713 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2714 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2715 		    !error && dp == NULL) {
2716 		    do {
2717 			ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2718 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2719 			    nfhp->nfh_fh, nfhp->nfh_len,
2720 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2721 			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2722 			if (ret == NFSERR_DELAY)
2723 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2724 		    } while (ret == NFSERR_DELAY);
2725 		    if (ret) {
2726 			if (dp != NULL) {
2727 				free(dp, M_NFSCLDELEG);
2728 				dp = NULL;
2729 			}
2730 			if (ret == NFSERR_STALECLIENTID ||
2731 			    ret == NFSERR_STALEDONTRECOVER ||
2732 			    ret == NFSERR_BADSESSION)
2733 				error = ret;
2734 		    }
2735 		}
2736 		nfscl_openrelease(nmp, op, error, newone);
2737 		*unlockedp = 1;
2738 	}
2739 	if (nd->nd_repstat != 0 && error == 0)
2740 		error = nd->nd_repstat;
2741 	if (error == NFSERR_STALECLIENTID)
2742 		nfscl_initiate_recovery(owp->nfsow_clp);
2743 nfsmout:
2744 	if (!error)
2745 		*dpp = dp;
2746 	else if (dp != NULL)
2747 		free(dp, M_NFSCLDELEG);
2748 	m_freem(nd->nd_mrep);
2749 	return (error);
2750 }
2751 
2752 /*
2753  * Nfs remove rpc
2754  */
2755 int
2756 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2757     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
2758 {
2759 	u_int32_t *tl;
2760 	struct nfsrv_descript nfsd, *nd = &nfsd;
2761 	struct nfsnode *np;
2762 	struct nfsmount *nmp;
2763 	nfsv4stateid_t dstateid;
2764 	int error, ret = 0, i;
2765 
2766 	*dattrflagp = 0;
2767 	if (namelen > NFS_MAXNAMLEN)
2768 		return (ENAMETOOLONG);
2769 	nmp = VFSTONFS(dvp->v_mount);
2770 tryagain:
2771 	if (NFSHASNFSV4(nmp) && ret == 0) {
2772 		ret = nfscl_removedeleg(vp, p, &dstateid);
2773 		if (ret == 1) {
2774 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp, cred);
2775 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2776 			    NFSX_UNSIGNED);
2777 			if (NFSHASNFSV4N(nmp))
2778 				*tl++ = 0;
2779 			else
2780 				*tl++ = dstateid.seqid;
2781 			*tl++ = dstateid.other[0];
2782 			*tl++ = dstateid.other[1];
2783 			*tl++ = dstateid.other[2];
2784 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2785 			np = VTONFS(dvp);
2786 			(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2787 			    np->n_fhp->nfh_len, 0);
2788 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2789 			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2790 		}
2791 	} else {
2792 		ret = 0;
2793 	}
2794 	if (ret == 0)
2795 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp, cred);
2796 	(void) nfsm_strtom(nd, name, namelen);
2797 	error = nfscl_request(nd, dvp, p, cred);
2798 	if (error)
2799 		return (error);
2800 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2801 		/* For NFSv4, parse out any Delereturn replies. */
2802 		if (ret > 0 && nd->nd_repstat != 0 &&
2803 		    (nd->nd_flag & ND_NOMOREDATA)) {
2804 			/*
2805 			 * If the Delegreturn failed, try again without
2806 			 * it. The server will Recall, as required.
2807 			 */
2808 			m_freem(nd->nd_mrep);
2809 			goto tryagain;
2810 		}
2811 		for (i = 0; i < (ret * 2); i++) {
2812 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2813 			    ND_NFSV4) {
2814 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2815 			    if (*(tl + 1))
2816 				nd->nd_flag |= ND_NOMOREDATA;
2817 			}
2818 		}
2819 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2820 	}
2821 	if (nd->nd_repstat && !error)
2822 		error = nd->nd_repstat;
2823 nfsmout:
2824 	m_freem(nd->nd_mrep);
2825 	return (error);
2826 }
2827 
2828 /*
2829  * Do an nfs rename rpc.
2830  */
2831 int
2832 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2833     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2834     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2835     int *fattrflagp, int *tattrflagp)
2836 {
2837 	u_int32_t *tl;
2838 	struct nfsrv_descript nfsd, *nd = &nfsd;
2839 	struct nfsmount *nmp;
2840 	struct nfsnode *np;
2841 	nfsattrbit_t attrbits;
2842 	nfsv4stateid_t fdstateid, tdstateid;
2843 	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2844 
2845 	*fattrflagp = 0;
2846 	*tattrflagp = 0;
2847 	nmp = VFSTONFS(fdvp->v_mount);
2848 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2849 		return (ENAMETOOLONG);
2850 tryagain:
2851 	if (NFSHASNFSV4(nmp) && ret == 0) {
2852 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2853 		    &tdstateid, &gottd, p);
2854 		if (gotfd && gottd) {
2855 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp, cred);
2856 		} else if (gotfd) {
2857 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp, cred);
2858 		} else if (gottd) {
2859 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp, cred);
2860 		}
2861 		if (gotfd) {
2862 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2863 			if (NFSHASNFSV4N(nmp))
2864 				*tl++ = 0;
2865 			else
2866 				*tl++ = fdstateid.seqid;
2867 			*tl++ = fdstateid.other[0];
2868 			*tl++ = fdstateid.other[1];
2869 			*tl = fdstateid.other[2];
2870 			if (gottd) {
2871 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2872 				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2873 				np = VTONFS(tvp);
2874 				(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2875 				    np->n_fhp->nfh_len, 0);
2876 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2877 				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2878 			}
2879 		}
2880 		if (gottd) {
2881 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2882 			if (NFSHASNFSV4N(nmp))
2883 				*tl++ = 0;
2884 			else
2885 				*tl++ = tdstateid.seqid;
2886 			*tl++ = tdstateid.other[0];
2887 			*tl++ = tdstateid.other[1];
2888 			*tl = tdstateid.other[2];
2889 		}
2890 		if (ret > 0) {
2891 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2892 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2893 			np = VTONFS(fdvp);
2894 			(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2895 			    np->n_fhp->nfh_len, 0);
2896 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2897 			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2898 		}
2899 	} else {
2900 		ret = 0;
2901 	}
2902 	if (ret == 0)
2903 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp, cred);
2904 	if (nd->nd_flag & ND_NFSV4) {
2905 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2906 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2907 		NFSWCCATTR_ATTRBIT(&attrbits);
2908 		(void) nfsrv_putattrbit(nd, &attrbits);
2909 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2910 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2911 		(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2912 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2913 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2914 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2915 		(void) nfsrv_putattrbit(nd, &attrbits);
2916 		nd->nd_flag |= ND_V4WCCATTR;
2917 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2918 		*tl = txdr_unsigned(NFSV4OP_RENAME);
2919 	}
2920 	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2921 	if (!(nd->nd_flag & ND_NFSV4))
2922 		(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2923 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2924 	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2925 	error = nfscl_request(nd, fdvp, p, cred);
2926 	if (error)
2927 		return (error);
2928 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2929 		/* For NFSv4, parse out any Delereturn replies. */
2930 		if (ret > 0 && nd->nd_repstat != 0 &&
2931 		    (nd->nd_flag & ND_NOMOREDATA)) {
2932 			/*
2933 			 * If the Delegreturn failed, try again without
2934 			 * it. The server will Recall, as required.
2935 			 */
2936 			m_freem(nd->nd_mrep);
2937 			goto tryagain;
2938 		}
2939 		for (i = 0; i < (ret * 2); i++) {
2940 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2941 			    ND_NFSV4) {
2942 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2943 			    if (*(tl + 1)) {
2944 				if (i == 0 && ret > 1) {
2945 				    /*
2946 				     * If the Delegreturn failed, try again
2947 				     * without it. The server will Recall, as
2948 				     * required.
2949 				     * If ret > 1, the first iteration of this
2950 				     * loop is the second DelegReturn result.
2951 				     */
2952 				    m_freem(nd->nd_mrep);
2953 				    goto tryagain;
2954 				} else {
2955 				    nd->nd_flag |= ND_NOMOREDATA;
2956 				}
2957 			    }
2958 			}
2959 		}
2960 		/* Now, the first wcc attribute reply. */
2961 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2962 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2963 			if (*(tl + 1))
2964 				nd->nd_flag |= ND_NOMOREDATA;
2965 		}
2966 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, NULL);
2967 		/* and the second wcc attribute reply. */
2968 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2969 		    !error) {
2970 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2971 			if (*(tl + 1))
2972 				nd->nd_flag |= ND_NOMOREDATA;
2973 		}
2974 		if (!error)
2975 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2976 			    NULL, NULL);
2977 	}
2978 	if (nd->nd_repstat && !error)
2979 		error = nd->nd_repstat;
2980 nfsmout:
2981 	m_freem(nd->nd_mrep);
2982 	return (error);
2983 }
2984 
2985 /*
2986  * nfs hard link create rpc
2987  */
2988 int
2989 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2990     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2991     struct nfsvattr *nap, int *attrflagp, int *dattrflagp)
2992 {
2993 	u_int32_t *tl;
2994 	struct nfsrv_descript nfsd, *nd = &nfsd;
2995 	nfsattrbit_t attrbits;
2996 	int error = 0;
2997 
2998 	*attrflagp = 0;
2999 	*dattrflagp = 0;
3000 	if (namelen > NFS_MAXNAMLEN)
3001 		return (ENAMETOOLONG);
3002 	NFSCL_REQSTART(nd, NFSPROC_LINK, vp, cred);
3003 	if (nd->nd_flag & ND_NFSV4) {
3004 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3005 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
3006 	}
3007 	(void)nfsm_fhtom(VFSTONFS(dvp->v_mount), nd, VTONFS(dvp)->n_fhp->nfh_fh,
3008 		VTONFS(dvp)->n_fhp->nfh_len, 0);
3009 	if (nd->nd_flag & ND_NFSV4) {
3010 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3011 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3012 		NFSWCCATTR_ATTRBIT(&attrbits);
3013 		(void) nfsrv_putattrbit(nd, &attrbits);
3014 		nd->nd_flag |= ND_V4WCCATTR;
3015 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3016 		*tl = txdr_unsigned(NFSV4OP_LINK);
3017 	}
3018 	(void) nfsm_strtom(nd, name, namelen);
3019 	error = nfscl_request(nd, vp, p, cred);
3020 	if (error)
3021 		return (error);
3022 	if (nd->nd_flag & ND_NFSV3) {
3023 		error = nfscl_postop_attr(nd, nap, attrflagp);
3024 		if (!error)
3025 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3026 			    NULL, NULL);
3027 	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
3028 		/*
3029 		 * First, parse out the PutFH and Getattr result.
3030 		 */
3031 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3032 		if (!(*(tl + 1)))
3033 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3034 		if (*(tl + 1))
3035 			nd->nd_flag |= ND_NOMOREDATA;
3036 		/*
3037 		 * Get the pre-op attributes.
3038 		 */
3039 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3040 	}
3041 	if (nd->nd_repstat && !error)
3042 		error = nd->nd_repstat;
3043 nfsmout:
3044 	m_freem(nd->nd_mrep);
3045 	return (error);
3046 }
3047 
3048 /*
3049  * nfs symbolic link create rpc
3050  */
3051 int
3052 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
3053     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3054     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3055     int *dattrflagp)
3056 {
3057 	u_int32_t *tl;
3058 	struct nfsrv_descript nfsd, *nd = &nfsd;
3059 	struct nfsmount *nmp;
3060 	int slen, error = 0;
3061 
3062 	*nfhpp = NULL;
3063 	*attrflagp = 0;
3064 	*dattrflagp = 0;
3065 	nmp = VFSTONFS(dvp->v_mount);
3066 	slen = strlen(target);
3067 	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
3068 		return (ENAMETOOLONG);
3069 	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp, cred);
3070 	if (nd->nd_flag & ND_NFSV4) {
3071 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3072 		*tl = txdr_unsigned(NFLNK);
3073 		(void) nfsm_strtom(nd, target, slen);
3074 	}
3075 	(void) nfsm_strtom(nd, name, namelen);
3076 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3077 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
3078 	if (!(nd->nd_flag & ND_NFSV4))
3079 		(void) nfsm_strtom(nd, target, slen);
3080 	if (nd->nd_flag & ND_NFSV2)
3081 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3082 	error = nfscl_request(nd, dvp, p, cred);
3083 	if (error)
3084 		return (error);
3085 	if (nd->nd_flag & ND_NFSV4)
3086 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3087 	if ((nd->nd_flag & ND_NFSV3) && !error) {
3088 		if (!nd->nd_repstat)
3089 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3090 		if (!error)
3091 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3092 			    NULL, NULL);
3093 	}
3094 	if (nd->nd_repstat && !error)
3095 		error = nd->nd_repstat;
3096 	m_freem(nd->nd_mrep);
3097 	/*
3098 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3099 	 * Only do this if vfs.nfs.ignore_eexist is set.
3100 	 * Never do this for NFSv4.1 or later minor versions, since sessions
3101 	 * should guarantee "exactly once" RPC semantics.
3102 	 */
3103 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3104 	    nmp->nm_minorvers == 0))
3105 		error = 0;
3106 	return (error);
3107 }
3108 
3109 /*
3110  * nfs make dir rpc
3111  */
3112 int
3113 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
3114     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3115     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3116     int *dattrflagp)
3117 {
3118 	u_int32_t *tl;
3119 	struct nfsrv_descript nfsd, *nd = &nfsd;
3120 	nfsattrbit_t attrbits;
3121 	int error = 0;
3122 	struct nfsfh *fhp;
3123 	struct nfsmount *nmp;
3124 
3125 	*nfhpp = NULL;
3126 	*attrflagp = 0;
3127 	*dattrflagp = 0;
3128 	nmp = VFSTONFS(dvp->v_mount);
3129 	fhp = VTONFS(dvp)->n_fhp;
3130 	if (namelen > NFS_MAXNAMLEN)
3131 		return (ENAMETOOLONG);
3132 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp, cred);
3133 	if (nd->nd_flag & ND_NFSV4) {
3134 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3135 		*tl = txdr_unsigned(NFDIR);
3136 	}
3137 	(void) nfsm_strtom(nd, name, namelen);
3138 	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3139 	if (nd->nd_flag & ND_NFSV4) {
3140 		NFSGETATTR_ATTRBIT(&attrbits);
3141 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3142 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3143 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3144 		(void) nfsrv_putattrbit(nd, &attrbits);
3145 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3146 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
3147 		(void)nfsm_fhtom(nmp, nd, fhp->nfh_fh, fhp->nfh_len, 0);
3148 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3149 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3150 		(void) nfsrv_putattrbit(nd, &attrbits);
3151 	}
3152 	error = nfscl_request(nd, dvp, p, cred);
3153 	if (error)
3154 		return (error);
3155 	if (nd->nd_flag & ND_NFSV4)
3156 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3157 	if (!nd->nd_repstat && !error) {
3158 		if (nd->nd_flag & ND_NFSV4) {
3159 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3160 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
3161 		}
3162 		if (!error)
3163 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3164 		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
3165 			/* Get rid of the PutFH and Getattr status values. */
3166 			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3167 			/* Load the directory attributes. */
3168 			error = nfsm_loadattr(nd, dnap);
3169 			if (error == 0)
3170 				*dattrflagp = 1;
3171 		}
3172 	}
3173 	if ((nd->nd_flag & ND_NFSV3) && !error)
3174 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3175 	if (nd->nd_repstat && !error)
3176 		error = nd->nd_repstat;
3177 nfsmout:
3178 	m_freem(nd->nd_mrep);
3179 	/*
3180 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3181 	 * Only do this if vfs.nfs.ignore_eexist is set.
3182 	 * Never do this for NFSv4.1 or later minor versions, since sessions
3183 	 * should guarantee "exactly once" RPC semantics.
3184 	 */
3185 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3186 	    nmp->nm_minorvers == 0))
3187 		error = 0;
3188 	return (error);
3189 }
3190 
3191 /*
3192  * nfs remove directory call
3193  */
3194 int
3195 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
3196     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
3197 {
3198 	struct nfsrv_descript nfsd, *nd = &nfsd;
3199 	int error = 0;
3200 
3201 	*dattrflagp = 0;
3202 	if (namelen > NFS_MAXNAMLEN)
3203 		return (ENAMETOOLONG);
3204 	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp, cred);
3205 	(void) nfsm_strtom(nd, name, namelen);
3206 	error = nfscl_request(nd, dvp, p, cred);
3207 	if (error)
3208 		return (error);
3209 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3210 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3211 	if (nd->nd_repstat && !error)
3212 		error = nd->nd_repstat;
3213 	m_freem(nd->nd_mrep);
3214 	/*
3215 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
3216 	 */
3217 	if (error == ENOENT)
3218 		error = 0;
3219 	return (error);
3220 }
3221 
3222 /*
3223  * Readdir rpc.
3224  * Always returns with either uio_resid unchanged, if you are at the
3225  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
3226  * filled in.
3227  * I felt this would allow caching of directory blocks more easily
3228  * than returning a pertially filled block.
3229  * Directory offset cookies:
3230  * Oh my, what to do with them...
3231  * I can think of three ways to deal with them:
3232  * 1 - have the layer above these RPCs maintain a map between logical
3233  *     directory byte offsets and the NFS directory offset cookies
3234  * 2 - pass the opaque directory offset cookies up into userland
3235  *     and let the libc functions deal with them, via the system call
3236  * 3 - return them to userland in the "struct dirent", so future versions
3237  *     of libc can use them and do whatever is necessary to make things work
3238  *     above these rpc calls, in the meantime
3239  * For now, I do #3 by "hiding" the directory offset cookies after the
3240  * d_name field in struct dirent. This is space inside d_reclen that
3241  * will be ignored by anything that doesn't know about them.
3242  * The directory offset cookies are filled in as the last 8 bytes of
3243  * each directory entry, after d_name. Someday, the userland libc
3244  * functions may be able to use these. In the meantime, it satisfies
3245  * OpenBSD's requirements for cookies being returned.
3246  * If expects the directory offset cookie for the read to be in uio_offset
3247  * and returns the one for the next entry after this directory block in
3248  * there, as well.
3249  */
3250 int
3251 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3252     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3253     int *eofp)
3254 {
3255 	int len, left;
3256 	struct dirent *dp = NULL;
3257 	u_int32_t *tl;
3258 	nfsquad_t cookie, ncookie;
3259 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3260 	struct nfsnode *dnp = VTONFS(vp);
3261 	struct nfsvattr nfsva;
3262 	struct nfsrv_descript nfsd, *nd = &nfsd;
3263 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3264 	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
3265 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3266 	char *cp;
3267 	nfsattrbit_t attrbits, dattrbits;
3268 	u_int32_t rderr, *tl2 = NULL;
3269 	size_t tresid;
3270 
3271 	KASSERT(uiop->uio_iovcnt == 1 &&
3272 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3273 	    ("nfs readdirrpc bad uio"));
3274 	ncookie.lval[0] = ncookie.lval[1] = 0;
3275 	/*
3276 	 * There is no point in reading a lot more than uio_resid, however
3277 	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
3278 	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
3279 	 * will never make readsize > nm_readdirsize.
3280 	 */
3281 	readsize = nmp->nm_readdirsize;
3282 	if (readsize > uiop->uio_resid)
3283 		readsize = uiop->uio_resid + DIRBLKSIZ;
3284 
3285 	*attrflagp = 0;
3286 	if (eofp)
3287 		*eofp = 0;
3288 	tresid = uiop->uio_resid;
3289 	cookie.lval[0] = cookiep->nfsuquad[0];
3290 	cookie.lval[1] = cookiep->nfsuquad[1];
3291 	nd->nd_mrep = NULL;
3292 
3293 	/*
3294 	 * For NFSv4, first create the "." and ".." entries.
3295 	 */
3296 	if (NFSHASNFSV4(nmp)) {
3297 		reqsize = 6 * NFSX_UNSIGNED;
3298 		NFSGETATTR_ATTRBIT(&dattrbits);
3299 		NFSZERO_ATTRBIT(&attrbits);
3300 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3301 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
3302 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3303 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3304 			NFSSETBIT_ATTRBIT(&attrbits,
3305 			    NFSATTRBIT_MOUNTEDONFILEID);
3306 			gotmnton = 1;
3307 		} else {
3308 			/*
3309 			 * Must fake it. Use the fileno, except when the
3310 			 * fsid is != to that of the directory. For that
3311 			 * case, generate a fake fileno that is not the same.
3312 			 */
3313 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3314 			gotmnton = 0;
3315 		}
3316 
3317 		/*
3318 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3319 		 */
3320 		if (uiop->uio_offset == 0) {
3321 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3322 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3323 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3324 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3325 			(void) nfsrv_putattrbit(nd, &attrbits);
3326 			error = nfscl_request(nd, vp, p, cred);
3327 			if (error)
3328 			    return (error);
3329 			dotfileid = 0;	/* Fake out the compiler. */
3330 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3331 			    error = nfsm_loadattr(nd, &nfsva);
3332 			    if (error != 0)
3333 				goto nfsmout;
3334 			    dotfileid = nfsva.na_fileid;
3335 			}
3336 			if (nd->nd_repstat == 0) {
3337 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3338 			    len = fxdr_unsigned(int, *(tl + 4));
3339 			    if (len > 0 && len <= NFSX_V4FHMAX)
3340 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3341 			    else
3342 				error = EPERM;
3343 			    if (!error) {
3344 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3345 				nfsva.na_mntonfileno = UINT64_MAX;
3346 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3347 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3348 				    NULL, NULL, NULL, p, cred);
3349 				if (error) {
3350 				    dotdotfileid = dotfileid;
3351 				} else if (gotmnton) {
3352 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3353 					dotdotfileid = nfsva.na_mntonfileno;
3354 				    else
3355 					dotdotfileid = nfsva.na_fileid;
3356 				} else if (nfsva.na_filesid[0] ==
3357 				    dnp->n_vattr.na_filesid[0] &&
3358 				    nfsva.na_filesid[1] ==
3359 				    dnp->n_vattr.na_filesid[1]) {
3360 				    dotdotfileid = nfsva.na_fileid;
3361 				} else {
3362 				    do {
3363 					fakefileno--;
3364 				    } while (fakefileno ==
3365 					nfsva.na_fileid);
3366 				    dotdotfileid = fakefileno;
3367 				}
3368 			    }
3369 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3370 			    /*
3371 			     * Lookupp returns NFSERR_NOENT when we are
3372 			     * at the root, so just use the current dir.
3373 			     */
3374 			    nd->nd_repstat = 0;
3375 			    dotdotfileid = dotfileid;
3376 			} else {
3377 			    error = nd->nd_repstat;
3378 			}
3379 			m_freem(nd->nd_mrep);
3380 			if (error)
3381 			    return (error);
3382 			nd->nd_mrep = NULL;
3383 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3384 			dp->d_pad0 = dp->d_pad1 = 0;
3385 			dp->d_off = 0;
3386 			dp->d_type = DT_DIR;
3387 			dp->d_fileno = dotfileid;
3388 			dp->d_namlen = 1;
3389 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3390 			dp->d_name[0] = '.';
3391 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3392 			/*
3393 			 * Just make these offset cookie 0.
3394 			 */
3395 			tl = (u_int32_t *)&dp->d_name[8];
3396 			*tl++ = 0;
3397 			*tl = 0;
3398 			blksiz += dp->d_reclen;
3399 			uiop->uio_resid -= dp->d_reclen;
3400 			uiop->uio_offset += dp->d_reclen;
3401 			uiop->uio_iov->iov_base =
3402 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3403 			uiop->uio_iov->iov_len -= dp->d_reclen;
3404 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3405 			dp->d_pad0 = dp->d_pad1 = 0;
3406 			dp->d_off = 0;
3407 			dp->d_type = DT_DIR;
3408 			dp->d_fileno = dotdotfileid;
3409 			dp->d_namlen = 2;
3410 			*((uint64_t *)dp->d_name) = 0;
3411 			dp->d_name[0] = '.';
3412 			dp->d_name[1] = '.';
3413 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3414 			/*
3415 			 * Just make these offset cookie 0.
3416 			 */
3417 			tl = (u_int32_t *)&dp->d_name[8];
3418 			*tl++ = 0;
3419 			*tl = 0;
3420 			blksiz += dp->d_reclen;
3421 			uiop->uio_resid -= dp->d_reclen;
3422 			uiop->uio_offset += dp->d_reclen;
3423 			uiop->uio_iov->iov_base =
3424 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3425 			uiop->uio_iov->iov_len -= dp->d_reclen;
3426 		}
3427 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3428 	} else {
3429 		reqsize = 5 * NFSX_UNSIGNED;
3430 	}
3431 
3432 	/*
3433 	 * Loop around doing readdir rpc's of size readsize.
3434 	 * The stopping criteria is EOF or buffer full.
3435 	 */
3436 	while (more_dirs && bigenough) {
3437 		*attrflagp = 0;
3438 		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp, cred);
3439 		if (nd->nd_flag & ND_NFSV2) {
3440 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3441 			*tl++ = cookie.lval[1];
3442 			*tl = txdr_unsigned(readsize);
3443 		} else {
3444 			NFSM_BUILD(tl, u_int32_t *, reqsize);
3445 			*tl++ = cookie.lval[0];
3446 			*tl++ = cookie.lval[1];
3447 			if (cookie.qval == 0) {
3448 				*tl++ = 0;
3449 				*tl++ = 0;
3450 			} else {
3451 				NFSLOCKNODE(dnp);
3452 				*tl++ = dnp->n_cookieverf.nfsuquad[0];
3453 				*tl++ = dnp->n_cookieverf.nfsuquad[1];
3454 				NFSUNLOCKNODE(dnp);
3455 			}
3456 			if (nd->nd_flag & ND_NFSV4) {
3457 				*tl++ = txdr_unsigned(readsize);
3458 				*tl = txdr_unsigned(readsize);
3459 				(void) nfsrv_putattrbit(nd, &attrbits);
3460 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3461 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
3462 				(void) nfsrv_putattrbit(nd, &dattrbits);
3463 			} else {
3464 				*tl = txdr_unsigned(readsize);
3465 			}
3466 		}
3467 		error = nfscl_request(nd, vp, p, cred);
3468 		if (error)
3469 			return (error);
3470 		if (!(nd->nd_flag & ND_NFSV2)) {
3471 			if (nd->nd_flag & ND_NFSV3)
3472 				error = nfscl_postop_attr(nd, nap, attrflagp);
3473 			if (!nd->nd_repstat && !error) {
3474 				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3475 				NFSLOCKNODE(dnp);
3476 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
3477 				dnp->n_cookieverf.nfsuquad[1] = *tl;
3478 				NFSUNLOCKNODE(dnp);
3479 			}
3480 		}
3481 		if (nd->nd_repstat || error) {
3482 			if (!error)
3483 				error = nd->nd_repstat;
3484 			goto nfsmout;
3485 		}
3486 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3487 		more_dirs = fxdr_unsigned(int, *tl);
3488 		if (!more_dirs)
3489 			tryformoredirs = 0;
3490 
3491 		/* loop through the dir entries, doctoring them to 4bsd form */
3492 		while (more_dirs && bigenough) {
3493 			if (nd->nd_flag & ND_NFSV4) {
3494 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3495 				ncookie.lval[0] = *tl++;
3496 				ncookie.lval[1] = *tl++;
3497 				len = fxdr_unsigned(int, *tl);
3498 			} else if (nd->nd_flag & ND_NFSV3) {
3499 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3500 				nfsva.na_fileid = fxdr_hyper(tl);
3501 				tl += 2;
3502 				len = fxdr_unsigned(int, *tl);
3503 			} else {
3504 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3505 				nfsva.na_fileid = fxdr_unsigned(uint64_t,
3506 				    *tl++);
3507 				len = fxdr_unsigned(int, *tl);
3508 			}
3509 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3510 				error = EBADRPC;
3511 				goto nfsmout;
3512 			}
3513 			tlen = roundup2(len, 8);
3514 			if (tlen == len)
3515 				tlen += 8;  /* To ensure null termination. */
3516 			left = DIRBLKSIZ - blksiz;
3517 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3518 				NFSBZERO(uiop->uio_iov->iov_base, left);
3519 				dp->d_reclen += left;
3520 				uiop->uio_iov->iov_base =
3521 				    (char *)uiop->uio_iov->iov_base + left;
3522 				uiop->uio_iov->iov_len -= left;
3523 				uiop->uio_resid -= left;
3524 				uiop->uio_offset += left;
3525 				blksiz = 0;
3526 			}
3527 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3528 			    uiop->uio_resid)
3529 				bigenough = 0;
3530 			if (bigenough) {
3531 				dp = (struct dirent *)uiop->uio_iov->iov_base;
3532 				dp->d_pad0 = dp->d_pad1 = 0;
3533 				dp->d_off = 0;
3534 				dp->d_namlen = len;
3535 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3536 				    NFSX_HYPER;
3537 				dp->d_type = DT_UNKNOWN;
3538 				blksiz += dp->d_reclen;
3539 				if (blksiz == DIRBLKSIZ)
3540 					blksiz = 0;
3541 				uiop->uio_resid -= DIRHDSIZ;
3542 				uiop->uio_offset += DIRHDSIZ;
3543 				uiop->uio_iov->iov_base =
3544 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3545 				uiop->uio_iov->iov_len -= DIRHDSIZ;
3546 				error = nfsm_mbufuio(nd, uiop, len);
3547 				if (error)
3548 					goto nfsmout;
3549 				cp = uiop->uio_iov->iov_base;
3550 				tlen -= len;
3551 				NFSBZERO(cp, tlen);
3552 				cp += tlen;	/* points to cookie storage */
3553 				tl2 = (u_int32_t *)cp;
3554 				uiop->uio_iov->iov_base =
3555 				    (char *)uiop->uio_iov->iov_base + tlen +
3556 				    NFSX_HYPER;
3557 				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3558 				uiop->uio_resid -= tlen + NFSX_HYPER;
3559 				uiop->uio_offset += (tlen + NFSX_HYPER);
3560 			} else {
3561 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3562 				if (error)
3563 					goto nfsmout;
3564 			}
3565 			if (nd->nd_flag & ND_NFSV4) {
3566 				rderr = 0;
3567 				nfsva.na_mntonfileno = UINT64_MAX;
3568 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3569 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3570 				    NULL, NULL, &rderr, p, cred);
3571 				if (error)
3572 					goto nfsmout;
3573 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3574 			} else if (nd->nd_flag & ND_NFSV3) {
3575 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3576 				ncookie.lval[0] = *tl++;
3577 				ncookie.lval[1] = *tl++;
3578 			} else {
3579 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3580 				ncookie.lval[0] = 0;
3581 				ncookie.lval[1] = *tl++;
3582 			}
3583 			if (bigenough) {
3584 			    if (nd->nd_flag & ND_NFSV4) {
3585 				if (rderr) {
3586 				    dp->d_fileno = 0;
3587 				} else {
3588 				    if (gotmnton) {
3589 					if (nfsva.na_mntonfileno != UINT64_MAX)
3590 					    dp->d_fileno = nfsva.na_mntonfileno;
3591 					else
3592 					    dp->d_fileno = nfsva.na_fileid;
3593 				    } else if (nfsva.na_filesid[0] ==
3594 					dnp->n_vattr.na_filesid[0] &&
3595 					nfsva.na_filesid[1] ==
3596 					dnp->n_vattr.na_filesid[1]) {
3597 					dp->d_fileno = nfsva.na_fileid;
3598 				    } else {
3599 					do {
3600 					    fakefileno--;
3601 					} while (fakefileno ==
3602 					    nfsva.na_fileid);
3603 					dp->d_fileno = fakefileno;
3604 				    }
3605 				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3606 				}
3607 			    } else {
3608 				dp->d_fileno = nfsva.na_fileid;
3609 			    }
3610 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3611 				ncookie.lval[0];
3612 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3613 				ncookie.lval[1];
3614 			}
3615 			more_dirs = fxdr_unsigned(int, *tl);
3616 		}
3617 		/*
3618 		 * If at end of rpc data, get the eof boolean
3619 		 */
3620 		if (!more_dirs) {
3621 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3622 			eof = fxdr_unsigned(int, *tl);
3623 			if (tryformoredirs)
3624 				more_dirs = !eof;
3625 			if (nd->nd_flag & ND_NFSV4) {
3626 				error = nfscl_postop_attr(nd, nap, attrflagp);
3627 				if (error)
3628 					goto nfsmout;
3629 			}
3630 		}
3631 		m_freem(nd->nd_mrep);
3632 		nd->nd_mrep = NULL;
3633 	}
3634 	/*
3635 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3636 	 * by increasing d_reclen for the last record.
3637 	 */
3638 	if (blksiz > 0) {
3639 		left = DIRBLKSIZ - blksiz;
3640 		NFSBZERO(uiop->uio_iov->iov_base, left);
3641 		dp->d_reclen += left;
3642 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3643 		    left;
3644 		uiop->uio_iov->iov_len -= left;
3645 		uiop->uio_resid -= left;
3646 		uiop->uio_offset += left;
3647 	}
3648 
3649 	/*
3650 	 * If returning no data, assume end of file.
3651 	 * If not bigenough, return not end of file, since you aren't
3652 	 *    returning all the data
3653 	 * Otherwise, return the eof flag from the server.
3654 	 */
3655 	if (eofp) {
3656 		if (tresid == ((size_t)(uiop->uio_resid)))
3657 			*eofp = 1;
3658 		else if (!bigenough)
3659 			*eofp = 0;
3660 		else
3661 			*eofp = eof;
3662 	}
3663 
3664 	/*
3665 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3666 	 */
3667 	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3668 		dp = (struct dirent *)uiop->uio_iov->iov_base;
3669 		NFSBZERO(dp, DIRBLKSIZ);
3670 		dp->d_type = DT_UNKNOWN;
3671 		tl = (u_int32_t *)&dp->d_name[4];
3672 		*tl++ = cookie.lval[0];
3673 		*tl = cookie.lval[1];
3674 		dp->d_reclen = DIRBLKSIZ;
3675 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3676 		    DIRBLKSIZ;
3677 		uiop->uio_iov->iov_len -= DIRBLKSIZ;
3678 		uiop->uio_resid -= DIRBLKSIZ;
3679 		uiop->uio_offset += DIRBLKSIZ;
3680 	}
3681 
3682 nfsmout:
3683 	if (nd->nd_mrep != NULL)
3684 		m_freem(nd->nd_mrep);
3685 	return (error);
3686 }
3687 
3688 /*
3689  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3690  * (Also used for NFS V4 when mount flag set.)
3691  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3692  */
3693 int
3694 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3695     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3696     int *eofp)
3697 {
3698 	int len, left;
3699 	struct dirent *dp = NULL;
3700 	u_int32_t *tl;
3701 	vnode_t newvp = NULLVP;
3702 	struct nfsrv_descript nfsd, *nd = &nfsd;
3703 	struct nameidata nami, *ndp = &nami;
3704 	struct componentname *cnp = &ndp->ni_cnd;
3705 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3706 	struct nfsnode *dnp = VTONFS(vp), *np;
3707 	struct nfsvattr nfsva;
3708 	struct nfsfh *nfhp;
3709 	nfsquad_t cookie, ncookie;
3710 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3711 	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3712 	int isdotdot = 0, unlocknewvp = 0;
3713 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3714 	u_int64_t fileno = 0;
3715 	char *cp;
3716 	nfsattrbit_t attrbits, dattrbits;
3717 	size_t tresid;
3718 	u_int32_t *tl2 = NULL, rderr;
3719 	struct timespec dctime, ts;
3720 	bool attr_ok;
3721 
3722 	KASSERT(uiop->uio_iovcnt == 1 &&
3723 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3724 	    ("nfs readdirplusrpc bad uio"));
3725 	ncookie.lval[0] = ncookie.lval[1] = 0;
3726 	timespecclear(&dctime);
3727 	*attrflagp = 0;
3728 	if (eofp != NULL)
3729 		*eofp = 0;
3730 	ndp->ni_dvp = vp;
3731 	nd->nd_mrep = NULL;
3732 	cookie.lval[0] = cookiep->nfsuquad[0];
3733 	cookie.lval[1] = cookiep->nfsuquad[1];
3734 	tresid = uiop->uio_resid;
3735 
3736 	/*
3737 	 * For NFSv4, first create the "." and ".." entries.
3738 	 */
3739 	if (NFSHASNFSV4(nmp)) {
3740 		NFSGETATTR_ATTRBIT(&dattrbits);
3741 		NFSZERO_ATTRBIT(&attrbits);
3742 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3743 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3744 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3745 			NFSSETBIT_ATTRBIT(&attrbits,
3746 			    NFSATTRBIT_MOUNTEDONFILEID);
3747 			gotmnton = 1;
3748 		} else {
3749 			/*
3750 			 * Must fake it. Use the fileno, except when the
3751 			 * fsid is != to that of the directory. For that
3752 			 * case, generate a fake fileno that is not the same.
3753 			 */
3754 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3755 			gotmnton = 0;
3756 		}
3757 
3758 		/*
3759 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3760 		 */
3761 		if (uiop->uio_offset == 0) {
3762 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3763 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3764 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3765 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3766 			(void) nfsrv_putattrbit(nd, &attrbits);
3767 			error = nfscl_request(nd, vp, p, cred);
3768 			if (error)
3769 			    return (error);
3770 			dotfileid = 0;	/* Fake out the compiler. */
3771 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3772 			    error = nfsm_loadattr(nd, &nfsva);
3773 			    if (error != 0)
3774 				goto nfsmout;
3775 			    dctime = nfsva.na_ctime;
3776 			    dotfileid = nfsva.na_fileid;
3777 			}
3778 			if (nd->nd_repstat == 0) {
3779 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3780 			    len = fxdr_unsigned(int, *(tl + 4));
3781 			    if (len > 0 && len <= NFSX_V4FHMAX)
3782 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3783 			    else
3784 				error = EPERM;
3785 			    if (!error) {
3786 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3787 				nfsva.na_mntonfileno = UINT64_MAX;
3788 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3789 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3790 				    NULL, NULL, NULL, p, cred);
3791 				if (error) {
3792 				    dotdotfileid = dotfileid;
3793 				} else if (gotmnton) {
3794 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3795 					dotdotfileid = nfsva.na_mntonfileno;
3796 				    else
3797 					dotdotfileid = nfsva.na_fileid;
3798 				} else if (nfsva.na_filesid[0] ==
3799 				    dnp->n_vattr.na_filesid[0] &&
3800 				    nfsva.na_filesid[1] ==
3801 				    dnp->n_vattr.na_filesid[1]) {
3802 				    dotdotfileid = nfsva.na_fileid;
3803 				} else {
3804 				    do {
3805 					fakefileno--;
3806 				    } while (fakefileno ==
3807 					nfsva.na_fileid);
3808 				    dotdotfileid = fakefileno;
3809 				}
3810 			    }
3811 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3812 			    /*
3813 			     * Lookupp returns NFSERR_NOENT when we are
3814 			     * at the root, so just use the current dir.
3815 			     */
3816 			    nd->nd_repstat = 0;
3817 			    dotdotfileid = dotfileid;
3818 			} else {
3819 			    error = nd->nd_repstat;
3820 			}
3821 			m_freem(nd->nd_mrep);
3822 			if (error)
3823 			    return (error);
3824 			nd->nd_mrep = NULL;
3825 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3826 			dp->d_pad0 = dp->d_pad1 = 0;
3827 			dp->d_off = 0;
3828 			dp->d_type = DT_DIR;
3829 			dp->d_fileno = dotfileid;
3830 			dp->d_namlen = 1;
3831 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3832 			dp->d_name[0] = '.';
3833 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3834 			/*
3835 			 * Just make these offset cookie 0.
3836 			 */
3837 			tl = (u_int32_t *)&dp->d_name[8];
3838 			*tl++ = 0;
3839 			*tl = 0;
3840 			blksiz += dp->d_reclen;
3841 			uiop->uio_resid -= dp->d_reclen;
3842 			uiop->uio_offset += dp->d_reclen;
3843 			uiop->uio_iov->iov_base =
3844 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3845 			uiop->uio_iov->iov_len -= dp->d_reclen;
3846 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3847 			dp->d_pad0 = dp->d_pad1 = 0;
3848 			dp->d_off = 0;
3849 			dp->d_type = DT_DIR;
3850 			dp->d_fileno = dotdotfileid;
3851 			dp->d_namlen = 2;
3852 			*((uint64_t *)dp->d_name) = 0;
3853 			dp->d_name[0] = '.';
3854 			dp->d_name[1] = '.';
3855 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3856 			/*
3857 			 * Just make these offset cookie 0.
3858 			 */
3859 			tl = (u_int32_t *)&dp->d_name[8];
3860 			*tl++ = 0;
3861 			*tl = 0;
3862 			blksiz += dp->d_reclen;
3863 			uiop->uio_resid -= dp->d_reclen;
3864 			uiop->uio_offset += dp->d_reclen;
3865 			uiop->uio_iov->iov_base =
3866 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3867 			uiop->uio_iov->iov_len -= dp->d_reclen;
3868 		}
3869 		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3870 		if (gotmnton)
3871 			NFSSETBIT_ATTRBIT(&attrbits,
3872 			    NFSATTRBIT_MOUNTEDONFILEID);
3873 		if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3874 		    NFSATTRBIT_TIMECREATE))
3875 			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
3876 	}
3877 
3878 	/*
3879 	 * Loop around doing readdir rpc's of size nm_readdirsize.
3880 	 * The stopping criteria is EOF or buffer full.
3881 	 */
3882 	while (more_dirs && bigenough) {
3883 		*attrflagp = 0;
3884 		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp, cred);
3885  		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3886 		*tl++ = cookie.lval[0];
3887 		*tl++ = cookie.lval[1];
3888 		if (cookie.qval == 0) {
3889 			*tl++ = 0;
3890 			*tl++ = 0;
3891 		} else {
3892 			NFSLOCKNODE(dnp);
3893 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3894 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3895 			NFSUNLOCKNODE(dnp);
3896 		}
3897 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3898 		*tl = txdr_unsigned(nmp->nm_readdirsize);
3899 		if (nd->nd_flag & ND_NFSV4) {
3900 			(void) nfsrv_putattrbit(nd, &attrbits);
3901 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3902 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3903 			(void) nfsrv_putattrbit(nd, &dattrbits);
3904 		}
3905 		nanouptime(&ts);
3906 		error = nfscl_request(nd, vp, p, cred);
3907 		if (error)
3908 			return (error);
3909 		if (nd->nd_flag & ND_NFSV3)
3910 			error = nfscl_postop_attr(nd, nap, attrflagp);
3911 		if (nd->nd_repstat || error) {
3912 			if (!error)
3913 				error = nd->nd_repstat;
3914 			goto nfsmout;
3915 		}
3916 		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3917 			dctime = nap->na_ctime;
3918 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3919 		NFSLOCKNODE(dnp);
3920 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3921 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3922 		NFSUNLOCKNODE(dnp);
3923 		more_dirs = fxdr_unsigned(int, *tl);
3924 		if (!more_dirs)
3925 			tryformoredirs = 0;
3926 
3927 		/* loop through the dir entries, doctoring them to 4bsd form */
3928 		while (more_dirs && bigenough) {
3929 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3930 			if (nd->nd_flag & ND_NFSV4) {
3931 				ncookie.lval[0] = *tl++;
3932 				ncookie.lval[1] = *tl++;
3933 			} else {
3934 				fileno = fxdr_hyper(tl);
3935 				tl += 2;
3936 			}
3937 			len = fxdr_unsigned(int, *tl);
3938 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3939 				error = EBADRPC;
3940 				goto nfsmout;
3941 			}
3942 			tlen = roundup2(len, 8);
3943 			if (tlen == len)
3944 				tlen += 8;  /* To ensure null termination. */
3945 			left = DIRBLKSIZ - blksiz;
3946 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3947 				NFSBZERO(uiop->uio_iov->iov_base, left);
3948 				dp->d_reclen += left;
3949 				uiop->uio_iov->iov_base =
3950 				    (char *)uiop->uio_iov->iov_base + left;
3951 				uiop->uio_iov->iov_len -= left;
3952 				uiop->uio_resid -= left;
3953 				uiop->uio_offset += left;
3954 				blksiz = 0;
3955 			}
3956 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3957 			    uiop->uio_resid)
3958 				bigenough = 0;
3959 			if (bigenough) {
3960 				dp = (struct dirent *)uiop->uio_iov->iov_base;
3961 				dp->d_pad0 = dp->d_pad1 = 0;
3962 				dp->d_off = 0;
3963 				dp->d_namlen = len;
3964 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3965 				    NFSX_HYPER;
3966 				dp->d_type = DT_UNKNOWN;
3967 				blksiz += dp->d_reclen;
3968 				if (blksiz == DIRBLKSIZ)
3969 					blksiz = 0;
3970 				uiop->uio_resid -= DIRHDSIZ;
3971 				uiop->uio_offset += DIRHDSIZ;
3972 				uiop->uio_iov->iov_base =
3973 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3974 				uiop->uio_iov->iov_len -= DIRHDSIZ;
3975 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
3976 				cnp->cn_namelen = len;
3977 				NFSCNHASHZERO(cnp);
3978 				error = nfsm_mbufuio(nd, uiop, len);
3979 				if (error)
3980 					goto nfsmout;
3981 				cp = uiop->uio_iov->iov_base;
3982 				tlen -= len;
3983 				NFSBZERO(cp, tlen);
3984 				cp += tlen;	/* points to cookie storage */
3985 				tl2 = (u_int32_t *)cp;
3986 				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3987 				    cnp->cn_nameptr[1] == '.')
3988 					isdotdot = 1;
3989 				else
3990 					isdotdot = 0;
3991 				uiop->uio_iov->iov_base =
3992 				    (char *)uiop->uio_iov->iov_base + tlen +
3993 				    NFSX_HYPER;
3994 				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3995 				uiop->uio_resid -= tlen + NFSX_HYPER;
3996 				uiop->uio_offset += (tlen + NFSX_HYPER);
3997 			} else {
3998 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3999 				if (error)
4000 					goto nfsmout;
4001 			}
4002 			nfhp = NULL;
4003 			if (nd->nd_flag & ND_NFSV3) {
4004 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
4005 				ncookie.lval[0] = *tl++;
4006 				ncookie.lval[1] = *tl++;
4007 				attrflag = fxdr_unsigned(int, *tl);
4008 				if (attrflag) {
4009 				  error = nfsm_loadattr(nd, &nfsva);
4010 				  if (error)
4011 					goto nfsmout;
4012 				}
4013 				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
4014 				if (*tl) {
4015 					error = nfsm_getfh(nd, &nfhp);
4016 					if (error)
4017 					    goto nfsmout;
4018 				}
4019 				if (!attrflag && nfhp != NULL) {
4020 					free(nfhp, M_NFSFH);
4021 					nfhp = NULL;
4022 				}
4023 			} else {
4024 				rderr = 0;
4025 				nfsva.na_mntonfileno = 0xffffffff;
4026 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
4027 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
4028 				    NULL, NULL, &rderr, p, cred);
4029 				if (error)
4030 					goto nfsmout;
4031 			}
4032 
4033 			if (bigenough) {
4034 			    if (nd->nd_flag & ND_NFSV4) {
4035 				if (rderr) {
4036 				    dp->d_fileno = 0;
4037 				} else if (gotmnton) {
4038 				    if (nfsva.na_mntonfileno != 0xffffffff)
4039 					dp->d_fileno = nfsva.na_mntonfileno;
4040 				    else
4041 					dp->d_fileno = nfsva.na_fileid;
4042 				} else if (nfsva.na_filesid[0] ==
4043 				    dnp->n_vattr.na_filesid[0] &&
4044 				    nfsva.na_filesid[1] ==
4045 				    dnp->n_vattr.na_filesid[1]) {
4046 				    dp->d_fileno = nfsva.na_fileid;
4047 				} else {
4048 				    do {
4049 					fakefileno--;
4050 				    } while (fakefileno ==
4051 					nfsva.na_fileid);
4052 				    dp->d_fileno = fakefileno;
4053 				}
4054 			    } else {
4055 				dp->d_fileno = fileno;
4056 			    }
4057 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
4058 				ncookie.lval[0];
4059 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
4060 				ncookie.lval[1];
4061 
4062 			    if (nfhp != NULL) {
4063 				attr_ok = true;
4064 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
4065 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
4066 				    VREF(vp);
4067 				    newvp = vp;
4068 				    unlocknewvp = 0;
4069 				    free(nfhp, M_NFSFH);
4070 				    np = dnp;
4071 				} else if (isdotdot != 0) {
4072 				    /*
4073 				     * Skip doing a nfscl_nget() call for "..".
4074 				     * There's a race between acquiring the nfs
4075 				     * node here and lookups that look for the
4076 				     * directory being read (in the parent).
4077 				     * It would try to get a lock on ".." here,
4078 				     * owning the lock on the directory being
4079 				     * read. Lookup will hold the lock on ".."
4080 				     * and try to acquire the lock on the
4081 				     * directory being read.
4082 				     * If the directory is unlocked/relocked,
4083 				     * then there is a LOR with the buflock
4084 				     * vp is relocked.
4085 				     */
4086 				    free(nfhp, M_NFSFH);
4087 				} else {
4088 				    error = nfscl_nget(vp->v_mount, vp,
4089 				      nfhp, cnp, p, &np, LK_EXCLUSIVE);
4090 				    if (!error) {
4091 					newvp = NFSTOV(np);
4092 					unlocknewvp = 1;
4093 					/*
4094 					 * If n_localmodtime >= time before RPC,
4095 					 * then a file modification operation,
4096 					 * such as VOP_SETATTR() of size, has
4097 					 * occurred while the Lookup RPC and
4098 					 * acquisition of the vnode happened. As
4099 					 * such, the attributes might be stale,
4100 					 * with possibly an incorrect size.
4101 					 */
4102 					NFSLOCKNODE(np);
4103 					if (timespecisset(
4104 					    &np->n_localmodtime) &&
4105 					    timespeccmp(&np->n_localmodtime,
4106 					    &ts, >=)) {
4107 					    NFSCL_DEBUG(4, "nfsrpc_readdirplus:"
4108 						" localmod stale attributes\n");
4109 					    attr_ok = false;
4110 					}
4111 					NFSUNLOCKNODE(np);
4112 				    }
4113 				}
4114 				nfhp = NULL;
4115 				if (newvp != NULLVP) {
4116 				    if (attr_ok)
4117 					error = nfscl_loadattrcache(&newvp,
4118 					    &nfsva, NULL, 0, 0);
4119 				    if (error) {
4120 					if (unlocknewvp)
4121 					    vput(newvp);
4122 					else
4123 					    vrele(newvp);
4124 					goto nfsmout;
4125 				    }
4126 				    dp->d_type =
4127 					vtonfs_dtype(np->n_vattr.na_type);
4128 				    ndp->ni_vp = newvp;
4129 				    NFSCNHASH(cnp, HASHINIT);
4130 				    if (cnp->cn_namelen <= NCHNAMLEN &&
4131 					ndp->ni_dvp != ndp->ni_vp &&
4132 					(newvp->v_type != VDIR ||
4133 					 dctime.tv_sec != 0)) {
4134 					cache_enter_time_flags(ndp->ni_dvp,
4135 					    ndp->ni_vp, cnp,
4136 					    &nfsva.na_ctime,
4137 					    newvp->v_type != VDIR ? NULL :
4138 					    &dctime, VFS_CACHE_DROPOLD);
4139 				    }
4140 				    if (unlocknewvp)
4141 					vput(newvp);
4142 				    else
4143 					vrele(newvp);
4144 				    newvp = NULLVP;
4145 				}
4146 			    }
4147 			} else if (nfhp != NULL) {
4148 			    free(nfhp, M_NFSFH);
4149 			}
4150 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4151 			more_dirs = fxdr_unsigned(int, *tl);
4152 		}
4153 		/*
4154 		 * If at end of rpc data, get the eof boolean
4155 		 */
4156 		if (!more_dirs) {
4157 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4158 			eof = fxdr_unsigned(int, *tl);
4159 			if (tryformoredirs)
4160 				more_dirs = !eof;
4161 			if (nd->nd_flag & ND_NFSV4) {
4162 				error = nfscl_postop_attr(nd, nap, attrflagp);
4163 				if (error)
4164 					goto nfsmout;
4165 			}
4166 		}
4167 		m_freem(nd->nd_mrep);
4168 		nd->nd_mrep = NULL;
4169 	}
4170 	/*
4171 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
4172 	 * by increasing d_reclen for the last record.
4173 	 */
4174 	if (blksiz > 0) {
4175 		left = DIRBLKSIZ - blksiz;
4176 		NFSBZERO(uiop->uio_iov->iov_base, left);
4177 		dp->d_reclen += left;
4178 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4179 		    left;
4180 		uiop->uio_iov->iov_len -= left;
4181 		uiop->uio_resid -= left;
4182 		uiop->uio_offset += left;
4183 	}
4184 
4185 	/*
4186 	 * If returning no data, assume end of file.
4187 	 * If not bigenough, return not end of file, since you aren't
4188 	 *    returning all the data
4189 	 * Otherwise, return the eof flag from the server.
4190 	 */
4191 	if (eofp != NULL) {
4192 		if (tresid == uiop->uio_resid)
4193 			*eofp = 1;
4194 		else if (!bigenough)
4195 			*eofp = 0;
4196 		else
4197 			*eofp = eof;
4198 	}
4199 
4200 	/*
4201 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
4202 	 */
4203 	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
4204 		dp = (struct dirent *)uiop->uio_iov->iov_base;
4205 		NFSBZERO(dp, DIRBLKSIZ);
4206 		dp->d_type = DT_UNKNOWN;
4207 		tl = (u_int32_t *)&dp->d_name[4];
4208 		*tl++ = cookie.lval[0];
4209 		*tl = cookie.lval[1];
4210 		dp->d_reclen = DIRBLKSIZ;
4211 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4212 		    DIRBLKSIZ;
4213 		uiop->uio_iov->iov_len -= DIRBLKSIZ;
4214 		uiop->uio_resid -= DIRBLKSIZ;
4215 		uiop->uio_offset += DIRBLKSIZ;
4216 	}
4217 
4218 nfsmout:
4219 	if (nd->nd_mrep != NULL)
4220 		m_freem(nd->nd_mrep);
4221 	return (error);
4222 }
4223 
4224 /*
4225  * Nfs commit rpc
4226  */
4227 int
4228 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
4229     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4230 {
4231 	u_int32_t *tl;
4232 	struct nfsrv_descript nfsd, *nd = &nfsd;
4233 	nfsattrbit_t attrbits;
4234 	int error;
4235 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4236 
4237 	*attrflagp = 0;
4238 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp, cred);
4239 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
4240 	txdr_hyper(offset, tl);
4241 	tl += 2;
4242 	*tl = txdr_unsigned(cnt);
4243 	if (nd->nd_flag & ND_NFSV4) {
4244 		/*
4245 		 * And do a Getattr op.
4246 		 */
4247 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4248 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
4249 		NFSGETATTR_ATTRBIT(&attrbits);
4250 		(void) nfsrv_putattrbit(nd, &attrbits);
4251 	}
4252 	error = nfscl_request(nd, vp, p, cred);
4253 	if (error)
4254 		return (error);
4255 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, NULL);
4256 	if (!error && !nd->nd_repstat) {
4257 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
4258 		NFSLOCKMNT(nmp);
4259 		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
4260 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
4261 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
4262 		}
4263 		NFSUNLOCKMNT(nmp);
4264 		if (nd->nd_flag & ND_NFSV4)
4265 			error = nfscl_postop_attr(nd, nap, attrflagp);
4266 	}
4267 nfsmout:
4268 	if (!error && nd->nd_repstat)
4269 		error = nd->nd_repstat;
4270 	m_freem(nd->nd_mrep);
4271 	return (error);
4272 }
4273 
4274 /*
4275  * NFS byte range lock rpc.
4276  * (Mostly just calls one of the three lower level RPC routines.)
4277  */
4278 int
4279 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
4280     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4281 {
4282 	struct nfscllockowner *lp;
4283 	struct nfsclclient *clp;
4284 	struct nfsfh *nfhp;
4285 	struct nfsrv_descript nfsd, *nd = &nfsd;
4286 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4287 	u_int64_t off, len;
4288 	off_t start, end;
4289 	u_int32_t clidrev = 0;
4290 	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
4291 	int callcnt, dorpc;
4292 
4293 	/*
4294 	 * Convert the flock structure into a start and end and do POSIX
4295 	 * bounds checking.
4296 	 */
4297 	switch (fl->l_whence) {
4298 	case SEEK_SET:
4299 	case SEEK_CUR:
4300 		/*
4301 		 * Caller is responsible for adding any necessary offset
4302 		 * when SEEK_CUR is used.
4303 		 */
4304 		start = fl->l_start;
4305 		off = fl->l_start;
4306 		break;
4307 	case SEEK_END:
4308 		start = size + fl->l_start;
4309 		off = size + fl->l_start;
4310 		break;
4311 	default:
4312 		return (EINVAL);
4313 	}
4314 	if (start < 0)
4315 		return (EINVAL);
4316 	if (fl->l_len != 0) {
4317 		end = start + fl->l_len - 1;
4318 		if (end < start)
4319 			return (EINVAL);
4320 	}
4321 
4322 	len = fl->l_len;
4323 	if (len == 0)
4324 		len = NFS64BITSSET;
4325 	retrycnt = 0;
4326 	do {
4327 	    nd->nd_repstat = 0;
4328 	    if (op == F_GETLK) {
4329 		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4330 		if (error)
4331 			return (error);
4332 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
4333 		if (!error) {
4334 			clidrev = clp->nfsc_clientidrev;
4335 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
4336 			    p, id, flags);
4337 		} else if (error == -1) {
4338 			error = 0;
4339 		}
4340 		nfscl_clientrelease(clp);
4341 	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
4342 		/*
4343 		 * We must loop around for all lockowner cases.
4344 		 */
4345 		callcnt = 0;
4346 		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4347 		if (error)
4348 			return (error);
4349 		do {
4350 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
4351 			clp, id, flags, &lp, &dorpc);
4352 		    /*
4353 		     * If it returns a NULL lp, we're done.
4354 		     */
4355 		    if (lp == NULL) {
4356 			if (callcnt == 0)
4357 			    nfscl_clientrelease(clp);
4358 			else
4359 			    nfscl_releasealllocks(clp, vp, p, id, flags);
4360 			return (error);
4361 		    }
4362 		    if (nmp->nm_clp != NULL)
4363 			clidrev = nmp->nm_clp->nfsc_clientidrev;
4364 		    else
4365 			clidrev = 0;
4366 		    /*
4367 		     * If the server doesn't support Posix lock semantics,
4368 		     * only allow locks on the entire file, since it won't
4369 		     * handle overlapping byte ranges.
4370 		     * There might still be a problem when a lock
4371 		     * upgrade/downgrade (read<->write) occurs, since the
4372 		     * server "might" expect an unlock first?
4373 		     */
4374 		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4375 			(off == 0 && len == NFS64BITSSET))) {
4376 			/*
4377 			 * Since the lock records will go away, we must
4378 			 * wait for grace and delay here.
4379 			 */
4380 			do {
4381 			    error = nfsrpc_locku(nd, nmp, lp, off, len,
4382 				NFSV4LOCKT_READ, cred, p, 0);
4383 			    if ((nd->nd_repstat == NFSERR_GRACE ||
4384 				 nd->nd_repstat == NFSERR_DELAY) &&
4385 				error == 0)
4386 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4387 				    "nfs_advlock");
4388 			} while ((nd->nd_repstat == NFSERR_GRACE ||
4389 			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
4390 		    }
4391 		    callcnt++;
4392 		} while (error == 0 && nd->nd_repstat == 0);
4393 		nfscl_releasealllocks(clp, vp, p, id, flags);
4394 	    } else if (op == F_SETLK) {
4395 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4396 		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4397 		if (error || donelocally) {
4398 			return (error);
4399 		}
4400 		if (nmp->nm_clp != NULL)
4401 			clidrev = nmp->nm_clp->nfsc_clientidrev;
4402 		else
4403 			clidrev = 0;
4404 		nfhp = VTONFS(vp)->n_fhp;
4405 		if (!lp->nfsl_open->nfso_posixlock &&
4406 		    (off != 0 || len != NFS64BITSSET)) {
4407 			error = EINVAL;
4408 		} else {
4409 			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4410 			    nfhp->nfh_len, lp, newone, reclaim, off,
4411 			    len, fl->l_type, cred, p, 0);
4412 		}
4413 		if (!error)
4414 			error = nd->nd_repstat;
4415 		nfscl_lockrelease(lp, error, newone);
4416 	    } else {
4417 		error = EINVAL;
4418 	    }
4419 	    if (!error)
4420 	        error = nd->nd_repstat;
4421 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4422 		error == NFSERR_STALEDONTRECOVER ||
4423 		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4424 		error == NFSERR_BADSESSION) {
4425 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
4426 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4427 		&& clidrev != 0) {
4428 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4429 		retrycnt++;
4430 	    }
4431 	} while (error == NFSERR_GRACE ||
4432 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4433 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4434 	    error == NFSERR_BADSESSION ||
4435 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4436 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
4437 	if (error && retrycnt >= 4)
4438 		error = EIO;
4439 	return (error);
4440 }
4441 
4442 /*
4443  * The lower level routine for the LockT case.
4444  */
4445 int
4446 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4447     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4448     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4449 {
4450 	u_int32_t *tl;
4451 	int error, type, size;
4452 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4453 	struct nfsnode *np;
4454 	struct nfsmount *nmp;
4455 	struct nfsclsession *tsep;
4456 
4457 	nmp = VFSTONFS(vp->v_mount);
4458 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp, cred);
4459 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4460 	if (fl->l_type == F_RDLCK)
4461 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4462 	else
4463 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4464 	txdr_hyper(off, tl);
4465 	tl += 2;
4466 	txdr_hyper(len, tl);
4467 	tl += 2;
4468 	tsep = nfsmnt_mdssession(nmp);
4469 	*tl++ = tsep->nfsess_clientid.lval[0];
4470 	*tl = tsep->nfsess_clientid.lval[1];
4471 	nfscl_filllockowner(id, own, flags);
4472 	np = VTONFS(vp);
4473 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4474 	    np->n_fhp->nfh_len);
4475 	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4476 	error = nfscl_request(nd, vp, p, cred);
4477 	if (error)
4478 		return (error);
4479 	if (nd->nd_repstat == 0) {
4480 		fl->l_type = F_UNLCK;
4481 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4482 		nd->nd_repstat = 0;
4483 		fl->l_whence = SEEK_SET;
4484 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4485 		fl->l_start = fxdr_hyper(tl);
4486 		tl += 2;
4487 		len = fxdr_hyper(tl);
4488 		tl += 2;
4489 		if (len == NFS64BITSSET)
4490 			fl->l_len = 0;
4491 		else
4492 			fl->l_len = len;
4493 		type = fxdr_unsigned(int, *tl++);
4494 		if (type == NFSV4LOCKT_WRITE)
4495 			fl->l_type = F_WRLCK;
4496 		else
4497 			fl->l_type = F_RDLCK;
4498 		/*
4499 		 * XXX For now, I have no idea what to do with the
4500 		 * conflicting lock_owner, so I'll just set the pid == 0
4501 		 * and skip over the lock_owner.
4502 		 */
4503 		fl->l_pid = (pid_t)0;
4504 		tl += 2;
4505 		size = fxdr_unsigned(int, *tl);
4506 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4507 			error = EBADRPC;
4508 		if (!error)
4509 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4510 	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4511 		nfscl_initiate_recovery(clp);
4512 nfsmout:
4513 	m_freem(nd->nd_mrep);
4514 	return (error);
4515 }
4516 
4517 /*
4518  * Lower level function that performs the LockU RPC.
4519  */
4520 static int
4521 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4522     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4523     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4524 {
4525 	u_int32_t *tl;
4526 	int error;
4527 
4528 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4529 	    lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0, cred);
4530 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4531 	*tl++ = txdr_unsigned(type);
4532 	*tl = txdr_unsigned(lp->nfsl_seqid);
4533 	if (nfstest_outofseq &&
4534 	    (arc4random() % nfstest_outofseq) == 0)
4535 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
4536 	tl++;
4537 	if (NFSHASNFSV4N(nmp))
4538 		*tl++ = 0;
4539 	else
4540 		*tl++ = lp->nfsl_stateid.seqid;
4541 	*tl++ = lp->nfsl_stateid.other[0];
4542 	*tl++ = lp->nfsl_stateid.other[1];
4543 	*tl++ = lp->nfsl_stateid.other[2];
4544 	txdr_hyper(off, tl);
4545 	tl += 2;
4546 	txdr_hyper(len, tl);
4547 	if (syscred)
4548 		nd->nd_flag |= ND_USEGSSNAME;
4549 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4550 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4551 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4552 	if (error)
4553 		return (error);
4554 	if (nd->nd_repstat == 0) {
4555 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4556 		lp->nfsl_stateid.seqid = *tl++;
4557 		lp->nfsl_stateid.other[0] = *tl++;
4558 		lp->nfsl_stateid.other[1] = *tl++;
4559 		lp->nfsl_stateid.other[2] = *tl;
4560 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4561 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4562 nfsmout:
4563 	m_freem(nd->nd_mrep);
4564 	return (error);
4565 }
4566 
4567 /*
4568  * The actual Lock RPC.
4569  */
4570 int
4571 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4572     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4573     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4574     NFSPROC_T *p, int syscred)
4575 {
4576 	u_int32_t *tl;
4577 	int error, size;
4578 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4579 	struct nfsclsession *tsep;
4580 
4581 	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
4582 	    cred);
4583 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4584 	if (type == F_RDLCK)
4585 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4586 	else
4587 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4588 	*tl++ = txdr_unsigned(reclaim);
4589 	txdr_hyper(off, tl);
4590 	tl += 2;
4591 	txdr_hyper(len, tl);
4592 	tl += 2;
4593 	if (newone) {
4594 	    *tl = newnfs_true;
4595 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4596 		2 * NFSX_UNSIGNED + NFSX_HYPER);
4597 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4598 	    if (NFSHASNFSV4N(nmp))
4599 		*tl++ = 0;
4600 	    else
4601 		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
4602 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4603 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4604 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4605 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
4606 	    tsep = nfsmnt_mdssession(nmp);
4607 	    *tl++ = tsep->nfsess_clientid.lval[0];
4608 	    *tl = tsep->nfsess_clientid.lval[1];
4609 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4610 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4611 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4612 	} else {
4613 	    *tl = newnfs_false;
4614 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4615 	    if (NFSHASNFSV4N(nmp))
4616 		*tl++ = 0;
4617 	    else
4618 		*tl++ = lp->nfsl_stateid.seqid;
4619 	    *tl++ = lp->nfsl_stateid.other[0];
4620 	    *tl++ = lp->nfsl_stateid.other[1];
4621 	    *tl++ = lp->nfsl_stateid.other[2];
4622 	    *tl = txdr_unsigned(lp->nfsl_seqid);
4623 	    if (nfstest_outofseq &&
4624 		(arc4random() % nfstest_outofseq) == 0)
4625 		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4626 	}
4627 	if (syscred)
4628 		nd->nd_flag |= ND_USEGSSNAME;
4629 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4630 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4631 	if (error)
4632 		return (error);
4633 	if (newone)
4634 	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4635 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4636 	if (nd->nd_repstat == 0) {
4637 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4638 		lp->nfsl_stateid.seqid = *tl++;
4639 		lp->nfsl_stateid.other[0] = *tl++;
4640 		lp->nfsl_stateid.other[1] = *tl++;
4641 		lp->nfsl_stateid.other[2] = *tl;
4642 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4643 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4644 		size = fxdr_unsigned(int, *(tl + 7));
4645 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4646 			error = EBADRPC;
4647 		if (!error)
4648 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4649 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4650 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4651 nfsmout:
4652 	m_freem(nd->nd_mrep);
4653 	return (error);
4654 }
4655 
4656 /*
4657  * nfs statfs rpc
4658  * (always called with the vp for the mount point)
4659  */
4660 int
4661 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4662     uint32_t *leasep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap,
4663     int *attrflagp)
4664 {
4665 	u_int32_t *tl = NULL;
4666 	struct nfsrv_descript nfsd, *nd = &nfsd;
4667 	struct nfsmount *nmp;
4668 	nfsattrbit_t attrbits;
4669 	int error;
4670 
4671 	*attrflagp = 0;
4672 	nmp = VFSTONFS(vp->v_mount);
4673 	if (NFSHASNFSV4(nmp)) {
4674 		/*
4675 		 * For V4, you actually do a getattr.
4676 		 */
4677 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4678 		if (leasep != NULL)
4679 			NFSROOTFS_GETATTRBIT(&attrbits);
4680 		else
4681 			NFSSTATFS_GETATTRBIT(&attrbits);
4682 		(void) nfsrv_putattrbit(nd, &attrbits);
4683 		nd->nd_flag |= ND_USEGSSNAME;
4684 		error = nfscl_request(nd, vp, p, cred);
4685 		if (error)
4686 			return (error);
4687 		if (nd->nd_repstat == 0) {
4688 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4689 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, leasep, NULL,
4690 			    p, cred);
4691 			if (!error) {
4692 				nmp->nm_fsid[0] = nap->na_filesid[0];
4693 				nmp->nm_fsid[1] = nap->na_filesid[1];
4694 				NFSSETHASSETFSID(nmp);
4695 				*attrflagp = 1;
4696 			}
4697 		} else {
4698 			error = nd->nd_repstat;
4699 		}
4700 		if (error)
4701 			goto nfsmout;
4702 	} else {
4703 		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp, NULL);
4704 		error = nfscl_request(nd, vp, p, cred);
4705 		if (error)
4706 			return (error);
4707 		if (nd->nd_flag & ND_NFSV3) {
4708 			error = nfscl_postop_attr(nd, nap, attrflagp);
4709 			if (error)
4710 				goto nfsmout;
4711 		}
4712 		if (nd->nd_repstat) {
4713 			error = nd->nd_repstat;
4714 			goto nfsmout;
4715 		}
4716 		NFSM_DISSECT(tl, u_int32_t *,
4717 		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4718 	}
4719 	if (NFSHASNFSV3(nmp)) {
4720 		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4721 		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4722 		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4723 		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4724 		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4725 		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4726 		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4727 	} else if (NFSHASNFSV4(nmp) == 0) {
4728 		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4729 		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4730 		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4731 		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4732 		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4733 	}
4734 nfsmout:
4735 	m_freem(nd->nd_mrep);
4736 	return (error);
4737 }
4738 
4739 /*
4740  * nfs pathconf rpc
4741  */
4742 int
4743 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4744     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4745 {
4746 	struct nfsrv_descript nfsd, *nd = &nfsd;
4747 	struct nfsmount *nmp;
4748 	u_int32_t *tl;
4749 	nfsattrbit_t attrbits;
4750 	int error;
4751 	struct nfsnode *np;
4752 
4753 	*attrflagp = 0;
4754 	nmp = VFSTONFS(vp->v_mount);
4755 	if (NFSHASNFSV4(nmp)) {
4756 		np = VTONFS(vp);
4757 		if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
4758 		    nmp->nm_fhsize == 0) {
4759 			/* Attempt to get the actual root file handle. */
4760 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
4761 			    cred, p);
4762 			if (error != 0)
4763 				return (EACCES);
4764 			if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
4765 				nfscl_statfs(vp, cred, p);
4766 		}
4767 		/*
4768 		 * For V4, you actually do a getattr.
4769 		 */
4770 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4771 		NFSPATHCONF_GETATTRBIT(&attrbits);
4772 		(void) nfsrv_putattrbit(nd, &attrbits);
4773 		nd->nd_flag |= ND_USEGSSNAME;
4774 		error = nfscl_request(nd, vp, p, cred);
4775 		if (error)
4776 			return (error);
4777 		if (nd->nd_repstat == 0) {
4778 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4779 			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4780 			    cred);
4781 			if (!error)
4782 				*attrflagp = 1;
4783 		} else {
4784 			error = nd->nd_repstat;
4785 		}
4786 	} else {
4787 		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp, NULL);
4788 		error = nfscl_request(nd, vp, p, cred);
4789 		if (error)
4790 			return (error);
4791 		error = nfscl_postop_attr(nd, nap, attrflagp);
4792 		if (nd->nd_repstat && !error)
4793 			error = nd->nd_repstat;
4794 		if (!error) {
4795 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4796 			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4797 			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4798 			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4799 			pc->pc_chownrestricted =
4800 			    fxdr_unsigned(u_int32_t, *tl++);
4801 			pc->pc_caseinsensitive =
4802 			    fxdr_unsigned(u_int32_t, *tl++);
4803 			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4804 		}
4805 	}
4806 nfsmout:
4807 	m_freem(nd->nd_mrep);
4808 	return (error);
4809 }
4810 
4811 /*
4812  * nfs version 3 fsinfo rpc call
4813  */
4814 int
4815 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4816     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4817 {
4818 	u_int32_t *tl;
4819 	struct nfsrv_descript nfsd, *nd = &nfsd;
4820 	int error;
4821 
4822 	*attrflagp = 0;
4823 	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp, NULL);
4824 	error = nfscl_request(nd, vp, p, cred);
4825 	if (error)
4826 		return (error);
4827 	error = nfscl_postop_attr(nd, nap, attrflagp);
4828 	if (nd->nd_repstat && !error)
4829 		error = nd->nd_repstat;
4830 	if (!error) {
4831 		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4832 		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4833 		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4834 		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4835 		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4836 		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4837 		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4838 		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4839 		fsp->fs_maxfilesize = fxdr_hyper(tl);
4840 		tl += 2;
4841 		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4842 		tl += 2;
4843 		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4844 	}
4845 nfsmout:
4846 	m_freem(nd->nd_mrep);
4847 	return (error);
4848 }
4849 
4850 /*
4851  * This function performs the Renew RPC.
4852  */
4853 int
4854 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4855     NFSPROC_T *p)
4856 {
4857 	u_int32_t *tl;
4858 	struct nfsrv_descript nfsd;
4859 	struct nfsrv_descript *nd = &nfsd;
4860 	struct nfsmount *nmp;
4861 	int error;
4862 	struct nfssockreq *nrp;
4863 	struct nfsclsession *tsep;
4864 
4865 	nmp = clp->nfsc_nmp;
4866 	if (nmp == NULL)
4867 		return (0);
4868 	if (dsp == NULL)
4869 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4870 		    0, cred);
4871 	else
4872 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4873 		    &dsp->nfsclds_sess, 0, 0, NULL);
4874 	if (!NFSHASNFSV4N(nmp)) {
4875 		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4876 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4877 		tsep = nfsmnt_mdssession(nmp);
4878 		*tl++ = tsep->nfsess_clientid.lval[0];
4879 		*tl = tsep->nfsess_clientid.lval[1];
4880 	}
4881 	nrp = NULL;
4882 	if (dsp != NULL)
4883 		nrp = dsp->nfsclds_sockp;
4884 	if (nrp == NULL)
4885 		/* If NULL, use the MDS socket. */
4886 		nrp = &nmp->nm_sockreq;
4887 	nd->nd_flag |= ND_USEGSSNAME;
4888 	if (dsp == NULL)
4889 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4890 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4891 	else {
4892 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4893 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4894 		if (error == ENXIO)
4895 			nfscl_cancelreqs(dsp);
4896 	}
4897 	if (error)
4898 		return (error);
4899 	error = nd->nd_repstat;
4900 	m_freem(nd->nd_mrep);
4901 	return (error);
4902 }
4903 
4904 /*
4905  * This function performs the Releaselockowner RPC.
4906  */
4907 int
4908 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4909     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4910 {
4911 	struct nfsrv_descript nfsd, *nd = &nfsd;
4912 	u_int32_t *tl;
4913 	int error;
4914 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4915 	struct nfsclsession *tsep;
4916 
4917 	if (NFSHASNFSV4N(nmp)) {
4918 		/* For NFSv4.1, do a FreeStateID. */
4919 		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4920 		    NULL, 0, 0, cred);
4921 		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4922 	} else {
4923 		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4924 		    NULL, 0, 0, NULL);
4925 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4926 		tsep = nfsmnt_mdssession(nmp);
4927 		*tl++ = tsep->nfsess_clientid.lval[0];
4928 		*tl = tsep->nfsess_clientid.lval[1];
4929 		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4930 		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4931 		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4932 	}
4933 	nd->nd_flag |= ND_USEGSSNAME;
4934 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4935 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4936 	if (error)
4937 		return (error);
4938 	error = nd->nd_repstat;
4939 	m_freem(nd->nd_mrep);
4940 	return (error);
4941 }
4942 
4943 /*
4944  * This function performs the Compound to get the mount pt FH.
4945  */
4946 int
4947 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4948     NFSPROC_T *p)
4949 {
4950 	u_int32_t *tl;
4951 	struct nfsrv_descript nfsd;
4952 	struct nfsrv_descript *nd = &nfsd;
4953 	u_char *cp, *cp2, *fhp;
4954 	int error, cnt, len, setnil;
4955 	u_int32_t *opcntp;
4956 
4957 	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4958 	    0, NULL);
4959 	cp = dirpath;
4960 	cnt = 0;
4961 	do {
4962 		setnil = 0;
4963 		while (*cp == '/')
4964 			cp++;
4965 		cp2 = cp;
4966 		while (*cp2 != '\0' && *cp2 != '/')
4967 			cp2++;
4968 		if (*cp2 == '/') {
4969 			setnil = 1;
4970 			*cp2 = '\0';
4971 		}
4972 		if (cp2 != cp) {
4973 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4974 			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
4975 			nfsm_strtom(nd, cp, strlen(cp));
4976 			cnt++;
4977 		}
4978 		if (setnil)
4979 			*cp2++ = '/';
4980 		cp = cp2;
4981 	} while (*cp != '\0');
4982 	if (NFSHASNFSV4N(nmp))
4983 		/* Has a Sequence Op done by nfscl_reqstart(). */
4984 		*opcntp = txdr_unsigned(3 + cnt);
4985 	else
4986 		*opcntp = txdr_unsigned(2 + cnt);
4987 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4988 	*tl = txdr_unsigned(NFSV4OP_GETFH);
4989 	nd->nd_flag |= ND_USEGSSNAME;
4990 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4991 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4992 	if (error)
4993 		return (error);
4994 	if (nd->nd_repstat == 0) {
4995 		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4996 		tl += (2 + 2 * cnt);
4997 		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4998 			len > NFSX_FHMAX) {
4999 			nd->nd_repstat = NFSERR_BADXDR;
5000 		} else {
5001 			fhp = malloc(len + 1, M_TEMP, M_WAITOK);
5002 			nd->nd_repstat = nfsrv_mtostr(nd, fhp, len);
5003 			if (nd->nd_repstat == 0) {
5004 				NFSLOCKMNT(nmp);
5005 				if (nmp->nm_fhsize == 0) {
5006 					NFSBCOPY(fhp, nmp->nm_fh, len);
5007 					nmp->nm_fhsize = len;
5008 				}
5009 				NFSUNLOCKMNT(nmp);
5010 			}
5011 			free(fhp, M_TEMP);
5012 		}
5013 	}
5014 	error = nd->nd_repstat;
5015 nfsmout:
5016 	m_freem(nd->nd_mrep);
5017 	return (error);
5018 }
5019 
5020 /*
5021  * This function performs the Delegreturn RPC.
5022  */
5023 int
5024 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
5025     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
5026 {
5027 	u_int32_t *tl;
5028 	struct nfsrv_descript nfsd;
5029 	struct nfsrv_descript *nd = &nfsd;
5030 	int error;
5031 
5032 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
5033 	    dp->nfsdl_fhlen, NULL, NULL, 0, 0, cred);
5034 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
5035 	if (NFSHASNFSV4N(nmp))
5036 		*tl++ = 0;
5037 	else
5038 		*tl++ = dp->nfsdl_stateid.seqid;
5039 	*tl++ = dp->nfsdl_stateid.other[0];
5040 	*tl++ = dp->nfsdl_stateid.other[1];
5041 	*tl = dp->nfsdl_stateid.other[2];
5042 	if (syscred)
5043 		nd->nd_flag |= ND_USEGSSNAME;
5044 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5045 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5046 	if (error)
5047 		return (error);
5048 	error = nd->nd_repstat;
5049 	m_freem(nd->nd_mrep);
5050 	return (error);
5051 }
5052 
5053 /*
5054  * nfs getacl call.
5055  */
5056 int
5057 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5058 {
5059 	struct nfsrv_descript nfsd, *nd = &nfsd;
5060 	int error;
5061 	nfsattrbit_t attrbits;
5062 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5063 
5064 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5065 		return (EOPNOTSUPP);
5066 	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp, cred);
5067 	NFSZERO_ATTRBIT(&attrbits);
5068 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5069 	(void) nfsrv_putattrbit(nd, &attrbits);
5070 	error = nfscl_request(nd, vp, p, cred);
5071 	if (error)
5072 		return (error);
5073 	if (!nd->nd_repstat)
5074 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
5075 		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
5076 	else
5077 		error = nd->nd_repstat;
5078 	m_freem(nd->nd_mrep);
5079 	return (error);
5080 }
5081 
5082 /*
5083  * nfs setacl call.
5084  */
5085 int
5086 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5087 {
5088 	int error;
5089 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5090 
5091 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5092 		return (EOPNOTSUPP);
5093 	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL);
5094 	return (error);
5095 }
5096 
5097 /*
5098  * nfs setacl call.
5099  */
5100 static int
5101 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
5102     struct acl *aclp, nfsv4stateid_t *stateidp)
5103 {
5104 	struct nfsrv_descript nfsd, *nd = &nfsd;
5105 	int error;
5106 	nfsattrbit_t attrbits;
5107 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5108 
5109 	if (!NFSHASNFSV4(nmp))
5110 		return (EOPNOTSUPP);
5111 	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp, cred);
5112 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
5113 	NFSZERO_ATTRBIT(&attrbits);
5114 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5115 	(void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
5116 	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
5117 	error = nfscl_request(nd, vp, p, cred);
5118 	if (error)
5119 		return (error);
5120 	/* Don't care about the pre/postop attributes */
5121 	m_freem(nd->nd_mrep);
5122 	return (nd->nd_repstat);
5123 }
5124 
5125 /*
5126  * Do the NFSv4.1 Exchange ID.
5127  */
5128 int
5129 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
5130     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
5131     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
5132 {
5133 	uint32_t *tl, v41flags;
5134 	struct nfsrv_descript nfsd;
5135 	struct nfsrv_descript *nd = &nfsd;
5136 	struct nfsclds *dsp;
5137 	struct timespec verstime;
5138 	int error, len;
5139 
5140 	*dspp = NULL;
5141 	if (minorvers == 0)
5142 		minorvers = nmp->nm_minorvers;
5143 	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
5144 	    NFS_VER4, minorvers, NULL);
5145 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5146 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
5147 	*tl = txdr_unsigned(clp->nfsc_rev);
5148 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
5149 
5150 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
5151 	*tl++ = txdr_unsigned(exchflags);
5152 	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
5153 
5154 	/* Set the implementation id4 */
5155 	*tl = txdr_unsigned(1);
5156 	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
5157 	(void) nfsm_strtom(nd, version, strlen(version));
5158 	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
5159 	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
5160 	verstime.tv_nsec = 0;
5161 	txdr_nfsv4time(&verstime, tl);
5162 	nd->nd_flag |= ND_USEGSSNAME;
5163 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
5164 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5165 	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
5166 	    (int)nd->nd_repstat);
5167 	if (error != 0)
5168 		return (error);
5169 	if (nd->nd_repstat == 0) {
5170 		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
5171 		len = fxdr_unsigned(int, *(tl + 7));
5172 		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
5173 			error = NFSERR_BADXDR;
5174 			goto nfsmout;
5175 		}
5176 		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
5177 		    M_WAITOK | M_ZERO);
5178 		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
5179 		dsp->nfsclds_servownlen = len;
5180 		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
5181 		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
5182 		dsp->nfsclds_sess.nfsess_sequenceid =
5183 		    fxdr_unsigned(uint32_t, *tl++);
5184 		v41flags = fxdr_unsigned(uint32_t, *tl);
5185 		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
5186 		    NFSHASPNFSOPT(nmp)) {
5187 			NFSCL_DEBUG(1, "set PNFS\n");
5188 			NFSLOCKMNT(nmp);
5189 			nmp->nm_state |= NFSSTA_PNFS;
5190 			NFSUNLOCKMNT(nmp);
5191 			dsp->nfsclds_flags |= NFSCLDS_MDS;
5192 		}
5193 		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
5194 			dsp->nfsclds_flags |= NFSCLDS_DS;
5195 		if (minorvers == NFSV42_MINORVERSION)
5196 			dsp->nfsclds_flags |= NFSCLDS_MINORV2;
5197 		if (len > 0)
5198 			nd->nd_repstat = nfsrv_mtostr(nd,
5199 			    dsp->nfsclds_serverown, len);
5200 		if (nd->nd_repstat == 0) {
5201 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5202 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5203 			    NULL, MTX_DEF);
5204 			nfscl_initsessionslots(&dsp->nfsclds_sess);
5205 			*dspp = dsp;
5206 		} else
5207 			free(dsp, M_NFSCLDS);
5208 	}
5209 	error = nd->nd_repstat;
5210 nfsmout:
5211 	m_freem(nd->nd_mrep);
5212 	return (error);
5213 }
5214 
5215 /*
5216  * Do the NFSv4.1 Create Session.
5217  */
5218 int
5219 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
5220     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
5221     struct ucred *cred, NFSPROC_T *p)
5222 {
5223 	uint32_t crflags, maxval, *tl;
5224 	struct nfsrv_descript nfsd;
5225 	struct nfsrv_descript *nd = &nfsd;
5226 	int error, irdcnt, minorvers;
5227 
5228 	/* Make sure nm_rsize, nm_wsize is set. */
5229 	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
5230 		nmp->nm_rsize = NFS_MAXBSIZE;
5231 	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
5232 		nmp->nm_wsize = NFS_MAXBSIZE;
5233 	if (dsp == NULL)
5234 		minorvers = nmp->nm_minorvers;
5235 	else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
5236 		minorvers = NFSV42_MINORVERSION;
5237 	else
5238 		minorvers = NFSV41_MINORVERSION;
5239 	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
5240 	    NFS_VER4, minorvers, NULL);
5241 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5242 	*tl++ = sep->nfsess_clientid.lval[0];
5243 	*tl++ = sep->nfsess_clientid.lval[1];
5244 	*tl++ = txdr_unsigned(sequenceid);
5245 	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
5246 	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
5247 		crflags |= NFSV4CRSESS_CONNBACKCHAN;
5248 	*tl = txdr_unsigned(crflags);
5249 
5250 	/* Fill in fore channel attributes. */
5251 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5252 	*tl++ = 0;				/* Header pad size */
5253 	if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
5254 	    nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
5255 		/*
5256 		 * NFSv4.2 Extended Attribute operations may want to do
5257 		 * requests/replies that are larger than nm_rsize/nm_wsize.
5258 		 */
5259 		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5260 		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5261 	} else {
5262 		*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
5263 		*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
5264 	}
5265 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5266 	*tl++ = txdr_unsigned(20);		/* Max operations */
5267 	*tl++ = txdr_unsigned(64);		/* Max slots */
5268 	*tl = 0;				/* No rdma ird */
5269 
5270 	/* Fill in back channel attributes. */
5271 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5272 	*tl++ = 0;				/* Header pad size */
5273 	*tl++ = txdr_unsigned(10000);		/* Max request size */
5274 	*tl++ = txdr_unsigned(10000);		/* Max response size */
5275 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5276 	*tl++ = txdr_unsigned(4);		/* Max operations */
5277 	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
5278 	*tl = 0;				/* No rdma ird */
5279 
5280 	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
5281 	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
5282 
5283 	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
5284 	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
5285 	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
5286 	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
5287 	*tl++ = 0;				/* Null machine name */
5288 	*tl++ = 0;				/* Uid == 0 */
5289 	*tl++ = 0;				/* Gid == 0 */
5290 	*tl = 0;				/* No additional gids */
5291 	nd->nd_flag |= ND_USEGSSNAME;
5292 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
5293 	    NFS_VER4, NULL, 1, NULL, NULL);
5294 	if (error != 0)
5295 		return (error);
5296 	if (nd->nd_repstat == 0) {
5297 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
5298 		    2 * NFSX_UNSIGNED);
5299 		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
5300 		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
5301 		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
5302 		crflags = fxdr_unsigned(uint32_t, *tl);
5303 		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
5304 			NFSLOCKMNT(nmp);
5305 			nmp->nm_state |= NFSSTA_SESSPERSIST;
5306 			NFSUNLOCKMNT(nmp);
5307 		}
5308 
5309 		/* Get the fore channel slot count. */
5310 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5311 		tl++;			/* Skip the header pad size. */
5312 
5313 		/* Make sure nm_wsize is small enough. */
5314 		maxval = fxdr_unsigned(uint32_t, *tl++);
5315 		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
5316 			if (nmp->nm_wsize > 8096)
5317 				nmp->nm_wsize /= 2;
5318 			else
5319 				break;
5320 		}
5321 		sep->nfsess_maxreq = maxval;
5322 
5323 		/* Make sure nm_rsize is small enough. */
5324 		maxval = fxdr_unsigned(uint32_t, *tl++);
5325 		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
5326 			if (nmp->nm_rsize > 8096)
5327 				nmp->nm_rsize /= 2;
5328 			else
5329 				break;
5330 		}
5331 		sep->nfsess_maxresp = maxval;
5332 
5333 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
5334 		tl++;
5335 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
5336 		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
5337 		irdcnt = fxdr_unsigned(int, *tl);
5338 		if (irdcnt < 0 || irdcnt > 1) {
5339 			error = NFSERR_BADXDR;
5340 			goto nfsmout;
5341 		}
5342 		if (irdcnt > 0)
5343 			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
5344 
5345 		/* and the back channel slot count. */
5346 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5347 		tl += 5;
5348 		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
5349 		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
5350 	}
5351 	error = nd->nd_repstat;
5352 nfsmout:
5353 	m_freem(nd->nd_mrep);
5354 	return (error);
5355 }
5356 
5357 /*
5358  * Do the NFSv4.1 Destroy Client.
5359  */
5360 int
5361 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5362     struct ucred *cred, NFSPROC_T *p)
5363 {
5364 	uint32_t *tl;
5365 	struct nfsrv_descript nfsd;
5366 	struct nfsrv_descript *nd = &nfsd;
5367 	int error;
5368 	struct nfsclsession *tsep;
5369 
5370 	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5371 	    0, NULL);
5372 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5373 	tsep = nfsmnt_mdssession(nmp);
5374 	*tl++ = tsep->nfsess_clientid.lval[0];
5375 	*tl = tsep->nfsess_clientid.lval[1];
5376 	nd->nd_flag |= ND_USEGSSNAME;
5377 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5378 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5379 	if (error != 0)
5380 		return (error);
5381 	error = nd->nd_repstat;
5382 	m_freem(nd->nd_mrep);
5383 	return (error);
5384 }
5385 
5386 /*
5387  * Do the NFSv4.1 LayoutGet.
5388  */
5389 static int
5390 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5391     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5392     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5393     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p)
5394 {
5395 	struct nfsrv_descript nfsd, *nd = &nfsd;
5396 	int error;
5397 
5398 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5399 	    0, cred);
5400 	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5401 	    layouttype, layoutlen, 0);
5402 	nd->nd_flag |= ND_USEGSSNAME;
5403 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5404 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5405 	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5406 	if (error != 0)
5407 		return (error);
5408 	if (nd->nd_repstat == 0)
5409 		error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5410 		    flhp);
5411 	if (error == 0 && nd->nd_repstat != 0)
5412 		error = nd->nd_repstat;
5413 	m_freem(nd->nd_mrep);
5414 	return (error);
5415 }
5416 
5417 /*
5418  * Do the NFSv4.1 Get Device Info.
5419  */
5420 int
5421 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5422     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5423     NFSPROC_T *p)
5424 {
5425 	uint32_t cnt, *tl, vers, minorvers;
5426 	struct nfsrv_descript nfsd;
5427 	struct nfsrv_descript *nd = &nfsd;
5428 	struct sockaddr_in sin, ssin;
5429 	struct sockaddr_in6 sin6, ssin6;
5430 	struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5431 	struct nfscldevinfo *ndi;
5432 	int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5433 	int stripecnt;
5434 	uint8_t stripeindex;
5435 	sa_family_t af, safilled;
5436 
5437 	ssin.sin_port = 0;		/* To shut up compiler. */
5438 	ssin.sin_addr.s_addr = 0;	/* ditto */
5439 	*ndip = NULL;
5440 	ndi = NULL;
5441 	gotdspp = NULL;
5442 	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5443 	    0, cred);
5444 	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5445 	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5446 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5447 	*tl++ = txdr_unsigned(layouttype);
5448 	*tl++ = txdr_unsigned(100000);
5449 	if (notifybitsp != NULL && *notifybitsp != 0) {
5450 		*tl = txdr_unsigned(1);		/* One word of bits. */
5451 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5452 		*tl = txdr_unsigned(*notifybitsp);
5453 	} else
5454 		*tl = txdr_unsigned(0);
5455 	nd->nd_flag |= ND_USEGSSNAME;
5456 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5457 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5458 	if (error != 0)
5459 		return (error);
5460 	if (nd->nd_repstat == 0) {
5461 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5462 		if (layouttype != fxdr_unsigned(int, *tl))
5463 			printf("EEK! devinfo layout type not same!\n");
5464 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5465 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5466 			stripecnt = fxdr_unsigned(int, *tl);
5467 			NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5468 			if (stripecnt < 1 || stripecnt > 4096) {
5469 				printf("pNFS File layout devinfo stripecnt %d:"
5470 				    " out of range\n", stripecnt);
5471 				error = NFSERR_BADXDR;
5472 				goto nfsmout;
5473 			}
5474 			NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5475 			    NFSX_UNSIGNED);
5476 			addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5477 			NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5478 			if (addrcnt < 1 || addrcnt > 128) {
5479 				printf("NFS devinfo addrcnt %d: out of range\n",
5480 				    addrcnt);
5481 				error = NFSERR_BADXDR;
5482 				goto nfsmout;
5483 			}
5484 
5485 			/*
5486 			 * Now we know how many stripe indices and addresses, so
5487 			 * we can allocate the structure the correct size.
5488 			 */
5489 			i = (stripecnt * sizeof(uint8_t)) /
5490 			    sizeof(struct nfsclds *) + 1;
5491 			NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5492 			ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5493 			    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5494 			    M_ZERO);
5495 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5496 			    NFSX_V4DEVICEID);
5497 			ndi->nfsdi_refcnt = 0;
5498 			ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5499 			ndi->nfsdi_stripecnt = stripecnt;
5500 			ndi->nfsdi_addrcnt = addrcnt;
5501 			/* Fill in the stripe indices. */
5502 			for (i = 0; i < stripecnt; i++) {
5503 				stripeindex = fxdr_unsigned(uint8_t, *tl++);
5504 				NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5505 				if (stripeindex >= addrcnt) {
5506 					printf("pNFS File Layout devinfo"
5507 					    " stripeindex %d: too big\n",
5508 					    (int)stripeindex);
5509 					error = NFSERR_BADXDR;
5510 					goto nfsmout;
5511 				}
5512 				nfsfldi_setstripeindex(ndi, i, stripeindex);
5513 			}
5514 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
5515 			/* For Flex File, we only get one address list. */
5516 			ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5517 			    M_NFSDEVINFO, M_WAITOK | M_ZERO);
5518 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5519 			    NFSX_V4DEVICEID);
5520 			ndi->nfsdi_refcnt = 0;
5521 			ndi->nfsdi_flags = NFSDI_FLEXFILE;
5522 			addrcnt = ndi->nfsdi_addrcnt = 1;
5523 		}
5524 
5525 		/* Now, dissect the server address(es). */
5526 		safilled = AF_UNSPEC;
5527 		for (i = 0; i < addrcnt; i++) {
5528 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5529 			cnt = fxdr_unsigned(uint32_t, *tl);
5530 			if (cnt == 0) {
5531 				printf("NFS devinfo 0 len addrlist\n");
5532 				error = NFSERR_BADXDR;
5533 				goto nfsmout;
5534 			}
5535 			dspp = nfsfldi_addr(ndi, i);
5536 			safilled = AF_UNSPEC;
5537 			for (j = 0; j < cnt; j++) {
5538 				error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5539 				    &isudp);
5540 				if (error != 0 && error != EPERM) {
5541 					error = NFSERR_BADXDR;
5542 					goto nfsmout;
5543 				}
5544 				if (error == 0 && isudp == 0) {
5545 					/*
5546 					 * The priority is:
5547 					 * - Same address family.
5548 					 * Save the address and dspp, so that
5549 					 * the connection can be done after
5550 					 * parsing is complete.
5551 					 */
5552 					if (safilled == AF_UNSPEC ||
5553 					    (af == nmp->nm_nam->sa_family &&
5554 					     safilled != nmp->nm_nam->sa_family)
5555 					   ) {
5556 						if (af == AF_INET)
5557 							ssin = sin;
5558 						else
5559 							ssin6 = sin6;
5560 						safilled = af;
5561 						gotdspp = dspp;
5562 					}
5563 				}
5564 			}
5565 		}
5566 
5567 		gotvers = NFS_VER4;	/* Default NFSv4.1 for File Layout. */
5568 		gotminor = NFSV41_MINORVERSION;
5569 		/* For Flex File, we will take one of the versions to use. */
5570 		if (layouttype == NFSLAYOUT_FLEXFILE) {
5571 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5572 			j = fxdr_unsigned(int, *tl);
5573 			if (j < 1 || j > NFSDEV_MAXVERS) {
5574 				printf("pNFS: too many versions\n");
5575 				error = NFSERR_BADXDR;
5576 				goto nfsmout;
5577 			}
5578 			gotvers = 0;
5579 			gotminor = 0;
5580 			for (i = 0; i < j; i++) {
5581 				NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5582 				vers = fxdr_unsigned(uint32_t, *tl++);
5583 				minorvers = fxdr_unsigned(uint32_t, *tl++);
5584 				if (vers == NFS_VER3)
5585 					minorvers = 0;
5586 				if ((vers == NFS_VER4 && ((minorvers ==
5587 				    NFSV41_MINORVERSION && gotminor == 0) ||
5588 				    minorvers == NFSV42_MINORVERSION)) ||
5589 				    (vers == NFS_VER3 && gotvers == 0)) {
5590 					gotvers = vers;
5591 					gotminor = minorvers;
5592 					/* We'll take this one. */
5593 					ndi->nfsdi_versindex = i;
5594 					ndi->nfsdi_vers = vers;
5595 					ndi->nfsdi_minorvers = minorvers;
5596 					ndi->nfsdi_rsize = fxdr_unsigned(
5597 					    uint32_t, *tl++);
5598 					ndi->nfsdi_wsize = fxdr_unsigned(
5599 					    uint32_t, *tl++);
5600 					if (*tl == newnfs_true)
5601 						ndi->nfsdi_flags |=
5602 						    NFSDI_TIGHTCOUPLED;
5603 					else
5604 						ndi->nfsdi_flags &=
5605 						    ~NFSDI_TIGHTCOUPLED;
5606 				}
5607 			}
5608 			if (gotvers == 0) {
5609 				printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5610 				error = NFSERR_BADXDR;
5611 				goto nfsmout;
5612 			}
5613 		}
5614 
5615 		/* And the notify bits. */
5616 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5617 		bitcnt = fxdr_unsigned(int, *tl);
5618 		if (bitcnt > 0) {
5619 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5620 			if (notifybitsp != NULL)
5621 				*notifybitsp =
5622 				    fxdr_unsigned(uint32_t, *tl);
5623 		}
5624 		if (safilled != AF_UNSPEC) {
5625 			KASSERT(ndi != NULL, ("ndi is NULL"));
5626 			*ndip = ndi;
5627 		} else
5628 			error = EPERM;
5629 		if (error == 0) {
5630 			/*
5631 			 * Now we can do a TCP connection for the correct
5632 			 * NFS version and IP address.
5633 			 */
5634 			error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5635 			    gotvers, gotminor, &dsp, p);
5636 		}
5637 		if (error == 0) {
5638 			KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5639 			*gotdspp = dsp;
5640 		}
5641 	}
5642 	if (nd->nd_repstat != 0 && error == 0)
5643 		error = nd->nd_repstat;
5644 nfsmout:
5645 	if (error != 0 && ndi != NULL)
5646 		nfscl_freedevinfo(ndi);
5647 	m_freem(nd->nd_mrep);
5648 	return (error);
5649 }
5650 
5651 /*
5652  * Do the NFSv4.1 LayoutCommit.
5653  */
5654 int
5655 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5656     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5657     int layouttype, struct ucred *cred, NFSPROC_T *p)
5658 {
5659 	uint32_t *tl;
5660 	struct nfsrv_descript nfsd, *nd = &nfsd;
5661 	int error;
5662 
5663 	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5664 	    0, 0, cred);
5665 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5666 	    NFSX_STATEID);
5667 	txdr_hyper(off, tl);
5668 	tl += 2;
5669 	txdr_hyper(len, tl);
5670 	tl += 2;
5671 	if (reclaim != 0)
5672 		*tl++ = newnfs_true;
5673 	else
5674 		*tl++ = newnfs_false;
5675 	*tl++ = txdr_unsigned(stateidp->seqid);
5676 	*tl++ = stateidp->other[0];
5677 	*tl++ = stateidp->other[1];
5678 	*tl++ = stateidp->other[2];
5679 	*tl++ = newnfs_true;
5680 	if (lastbyte < off)
5681 		lastbyte = off;
5682 	else if (lastbyte >= (off + len))
5683 		lastbyte = off + len - 1;
5684 	txdr_hyper(lastbyte, tl);
5685 	tl += 2;
5686 	*tl++ = newnfs_false;
5687 	*tl++ = txdr_unsigned(layouttype);
5688 	/* All supported layouts are 0 length. */
5689 	*tl = txdr_unsigned(0);
5690 	nd->nd_flag |= ND_USEGSSNAME;
5691 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5692 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5693 	if (error != 0)
5694 		return (error);
5695 	error = nd->nd_repstat;
5696 	m_freem(nd->nd_mrep);
5697 	return (error);
5698 }
5699 
5700 /*
5701  * Do the NFSv4.1 LayoutReturn.
5702  */
5703 int
5704 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5705     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5706     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5707     uint32_t stat, uint32_t op, char *devid)
5708 {
5709 	uint32_t *tl;
5710 	struct nfsrv_descript nfsd, *nd = &nfsd;
5711 	uint64_t tu64;
5712 	int error;
5713 
5714 	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5715 	    0, 0, cred);
5716 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5717 	if (reclaim != 0)
5718 		*tl++ = newnfs_true;
5719 	else
5720 		*tl++ = newnfs_false;
5721 	*tl++ = txdr_unsigned(layouttype);
5722 	*tl++ = txdr_unsigned(iomode);
5723 	*tl = txdr_unsigned(layoutreturn);
5724 	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5725 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5726 		    NFSX_UNSIGNED);
5727 		txdr_hyper(offset, tl);
5728 		tl += 2;
5729 		txdr_hyper(len, tl);
5730 		tl += 2;
5731 		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5732 		*tl++ = txdr_unsigned(stateidp->seqid);
5733 		*tl++ = stateidp->other[0];
5734 		*tl++ = stateidp->other[1];
5735 		*tl++ = stateidp->other[2];
5736 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5737 			*tl = txdr_unsigned(0);
5738 		else if (layouttype == NFSLAYOUT_FLEXFILE) {
5739 			if (stat != 0) {
5740 				*tl = txdr_unsigned(2 * NFSX_HYPER +
5741 				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5742 				    NFSX_UNSIGNED);
5743 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5744 				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5745 				    NFSX_UNSIGNED);
5746 				*tl++ = txdr_unsigned(1);	/* One error. */
5747 				tu64 = 0;			/* Offset. */
5748 				txdr_hyper(tu64, tl); tl += 2;
5749 				tu64 = UINT64_MAX;		/* Length. */
5750 				txdr_hyper(tu64, tl); tl += 2;
5751 				NFSBCOPY(stateidp, tl, NFSX_STATEID);
5752 				tl += (NFSX_STATEID / NFSX_UNSIGNED);
5753 				*tl++ = txdr_unsigned(1);	/* One error. */
5754 				NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5755 				tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5756 				*tl++ = txdr_unsigned(stat);
5757 				*tl++ = txdr_unsigned(op);
5758 			} else {
5759 				*tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5760 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5761 				/* No ioerrs. */
5762 				*tl++ = 0;
5763 			}
5764 			*tl = 0;	/* No stats yet. */
5765 		}
5766 	}
5767 	nd->nd_flag |= ND_USEGSSNAME;
5768 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5769 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5770 	if (error != 0)
5771 		return (error);
5772 	if (nd->nd_repstat == 0) {
5773 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5774 		if (*tl != 0) {
5775 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5776 			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5777 			stateidp->other[0] = *tl++;
5778 			stateidp->other[1] = *tl++;
5779 			stateidp->other[2] = *tl;
5780 		}
5781 	} else
5782 		error = nd->nd_repstat;
5783 nfsmout:
5784 	m_freem(nd->nd_mrep);
5785 	return (error);
5786 }
5787 
5788 /*
5789  * Do the NFSv4.2 LayoutError.
5790  */
5791 static int
5792 nfsrpc_layouterror(struct nfsmount *nmp, uint8_t *fh, int fhlen, uint64_t offset,
5793     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5794     uint32_t stat, uint32_t op, char *devid)
5795 {
5796 	uint32_t *tl;
5797 	struct nfsrv_descript nfsd, *nd = &nfsd;
5798 	int error;
5799 
5800 	nfscl_reqstart(nd, NFSPROC_LAYOUTERROR, nmp, fh, fhlen, NULL, NULL,
5801 	    0, 0, cred);
5802 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5803 	    NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5804 	txdr_hyper(offset, tl); tl += 2;
5805 	txdr_hyper(len, tl); tl += 2;
5806 	*tl++ = txdr_unsigned(stateidp->seqid);
5807 	*tl++ = stateidp->other[0];
5808 	*tl++ = stateidp->other[1];
5809 	*tl++ = stateidp->other[2];
5810 	*tl++ = txdr_unsigned(1);
5811 	NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5812 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5813 	*tl++ = txdr_unsigned(stat);
5814 	*tl = txdr_unsigned(op);
5815 	nd->nd_flag |= ND_USEGSSNAME;
5816 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5817 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5818 	if (error != 0)
5819 		return (error);
5820 	if (nd->nd_repstat != 0)
5821 		error = nd->nd_repstat;
5822 	m_freem(nd->nd_mrep);
5823 	return (error);
5824 }
5825 
5826 /*
5827  * Acquire a layout and devinfo, if possible. The caller must have acquired
5828  * a reference count on the nfsclclient structure before calling this.
5829  * Return the layout in lypp with a reference count on it, if successful.
5830  */
5831 static int
5832 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5833     int iomode, uint32_t rw, uint32_t *notifybitsp, nfsv4stateid_t *stateidp,
5834     uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5835 {
5836 	struct nfscllayout *lyp;
5837 	struct nfsclflayout *flp;
5838 	struct nfsclflayouthead flh;
5839 	int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5840 	nfsv4stateid_t stateid;
5841 	struct nfsclsession *tsep;
5842 
5843 	*lypp = NULL;
5844 	if (NFSHASFLEXFILE(nmp))
5845 		layouttype = NFSLAYOUT_FLEXFILE;
5846 	else
5847 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5848 	/*
5849 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5850 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5851 	 * flp == NULL.
5852 	 */
5853 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5854 	    off, rw, &flp, &recalled);
5855 	islocked = 0;
5856 	if (lyp == NULL || flp == NULL) {
5857 		if (recalled != 0)
5858 			return (EIO);
5859 		LIST_INIT(&flh);
5860 		tsep = nfsmnt_mdssession(nmp);
5861 		layoutlen = tsep->nfsess_maxcache -
5862 		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5863 		if (lyp == NULL) {
5864 			stateid.seqid = 0;
5865 			stateid.other[0] = stateidp->other[0];
5866 			stateid.other[1] = stateidp->other[1];
5867 			stateid.other[2] = stateidp->other[2];
5868 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5869 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5870 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5871 			    &retonclose, &flh, cred, p);
5872 		} else {
5873 			islocked = 1;
5874 			stateid.seqid = lyp->nfsly_stateid.seqid;
5875 			stateid.other[0] = lyp->nfsly_stateid.other[0];
5876 			stateid.other[1] = lyp->nfsly_stateid.other[1];
5877 			stateid.other[2] = lyp->nfsly_stateid.other[2];
5878 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5879 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
5880 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5881 			    &retonclose, &flh, cred, p);
5882 		}
5883 		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5884 		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5885 		    &flh, layouttype, error, NULL, cred, p);
5886 		if (error == 0)
5887 			*lypp = lyp;
5888 		else if (islocked != 0)
5889 			nfscl_rellayout(lyp, 1);
5890 	} else
5891 		*lypp = lyp;
5892 	return (error);
5893 }
5894 
5895 /*
5896  * Do a TCP connection plus exchange id and create session.
5897  * If successful, a "struct nfsclds" is linked into the list for the
5898  * mount point and a pointer to it is returned.
5899  */
5900 static int
5901 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5902     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5903     struct nfsclds **dspp, NFSPROC_T *p)
5904 {
5905 	struct sockaddr_in *msad, *sad;
5906 	struct sockaddr_in6 *msad6, *sad6;
5907 	struct nfsclclient *clp;
5908 	struct nfssockreq *nrp;
5909 	struct nfsclds *dsp, *tdsp;
5910 	int error, firsttry;
5911 	enum nfsclds_state retv;
5912 	uint32_t sequenceid = 0;
5913 
5914 	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5915 	    ("nfsrpc_fillsa: NULL nr_cred"));
5916 	NFSLOCKCLSTATE();
5917 	clp = nmp->nm_clp;
5918 	NFSUNLOCKCLSTATE();
5919 	if (clp == NULL)
5920 		return (EPERM);
5921 	if (af == AF_INET) {
5922 		NFSLOCKMNT(nmp);
5923 		/*
5924 		 * Check to see if we already have a session for this
5925 		 * address that is usable for a DS.
5926 		 * Note that the MDS's address is in a different place
5927 		 * than the sessions already acquired for DS's.
5928 		 */
5929 		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5930 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5931 		while (tdsp != NULL) {
5932 			if (msad != NULL && msad->sin_family == AF_INET &&
5933 			    sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5934 			    sin->sin_port == msad->sin_port &&
5935 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5936 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5937 				*dspp = tdsp;
5938 				NFSUNLOCKMNT(nmp);
5939 				NFSCL_DEBUG(4, "fnd same addr\n");
5940 				return (0);
5941 			}
5942 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5943 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5944 				msad = (struct sockaddr_in *)
5945 				    tdsp->nfsclds_sockp->nr_nam;
5946 			else
5947 				msad = NULL;
5948 		}
5949 		NFSUNLOCKMNT(nmp);
5950 
5951 		/* No IP address match, so look for new/trunked one. */
5952 		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5953 		sad->sin_len = sizeof(*sad);
5954 		sad->sin_family = AF_INET;
5955 		sad->sin_port = sin->sin_port;
5956 		sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5957 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5958 		nrp->nr_nam = (struct sockaddr *)sad;
5959 	} else if (af == AF_INET6) {
5960 		NFSLOCKMNT(nmp);
5961 		/*
5962 		 * Check to see if we already have a session for this
5963 		 * address that is usable for a DS.
5964 		 * Note that the MDS's address is in a different place
5965 		 * than the sessions already acquired for DS's.
5966 		 */
5967 		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5968 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5969 		while (tdsp != NULL) {
5970 			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5971 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5972 			    &msad6->sin6_addr) &&
5973 			    sin6->sin6_port == msad6->sin6_port &&
5974 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5975 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5976 				*dspp = tdsp;
5977 				NFSUNLOCKMNT(nmp);
5978 				return (0);
5979 			}
5980 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5981 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5982 				msad6 = (struct sockaddr_in6 *)
5983 				    tdsp->nfsclds_sockp->nr_nam;
5984 			else
5985 				msad6 = NULL;
5986 		}
5987 		NFSUNLOCKMNT(nmp);
5988 
5989 		/* No IP address match, so look for new/trunked one. */
5990 		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5991 		sad6->sin6_len = sizeof(*sad6);
5992 		sad6->sin6_family = AF_INET6;
5993 		sad6->sin6_port = sin6->sin6_port;
5994 		NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5995 		    sizeof(struct in6_addr));
5996 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5997 		nrp->nr_nam = (struct sockaddr *)sad6;
5998 	} else
5999 		return (EPERM);
6000 
6001 	nrp->nr_sotype = SOCK_STREAM;
6002 	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
6003 	nrp->nr_prog = NFS_PROG;
6004 	nrp->nr_vers = vers;
6005 
6006 	/*
6007 	 * Use the credentials that were used for the mount, which are
6008 	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
6009 	 * Ref. counting the credentials with crhold() is probably not
6010 	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
6011 	 * unmount, but I did it anyhow.
6012 	 */
6013 	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
6014 	error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client);
6015 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
6016 
6017 	dsp = NULL;
6018 	/* Now, do the exchangeid and create session. */
6019 	if (error == 0) {
6020 		if (vers == NFS_VER4) {
6021 			firsttry = 0;
6022 			do {
6023 				error = nfsrpc_exchangeid(nmp, clp, nrp,
6024 				    minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
6025 				    nrp->nr_cred, p);
6026 				NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
6027 				if (error == NFSERR_MINORVERMISMATCH)
6028 					minorvers = NFSV42_MINORVERSION;
6029 			} while (error == NFSERR_MINORVERMISMATCH &&
6030 			    firsttry++ == 0);
6031 			if (error != 0)
6032 				newnfs_disconnect(NULL, nrp);
6033 		} else {
6034 			dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
6035 			    M_WAITOK | M_ZERO);
6036 			dsp->nfsclds_flags |= NFSCLDS_DS;
6037 			dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
6038 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
6039 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
6040 			    NULL, MTX_DEF);
6041 		}
6042 	}
6043 	if (error == 0) {
6044 		dsp->nfsclds_sockp = nrp;
6045 		if (vers == NFS_VER4) {
6046 			NFSLOCKMNT(nmp);
6047 			retv = nfscl_getsameserver(nmp, dsp, &tdsp,
6048 			    &sequenceid);
6049 			NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
6050 			if (retv == NFSDSP_USETHISSESSION &&
6051 			    nfscl_dssameconn != 0) {
6052 				NFSLOCKDS(tdsp);
6053 				tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
6054 				NFSUNLOCKDS(tdsp);
6055 				NFSUNLOCKMNT(nmp);
6056 				/*
6057 				 * If there is already a session for this
6058 				 * server, use it.
6059 				 */
6060 				newnfs_disconnect(NULL, nrp);
6061 				nfscl_freenfsclds(dsp);
6062 				*dspp = tdsp;
6063 				return (0);
6064 			}
6065 			if (retv == NFSDSP_NOTFOUND)
6066 				sequenceid =
6067 				    dsp->nfsclds_sess.nfsess_sequenceid;
6068 			NFSUNLOCKMNT(nmp);
6069 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
6070 			    nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
6071 			NFSCL_DEBUG(3, "DS createsess=%d\n", error);
6072 		}
6073 	} else {
6074 		NFSFREECRED(nrp->nr_cred);
6075 		NFSFREEMUTEX(&nrp->nr_mtx);
6076 		free(nrp->nr_nam, M_SONAME);
6077 		free(nrp, M_NFSSOCKREQ);
6078 	}
6079 	if (error == 0) {
6080 		NFSCL_DEBUG(3, "add DS session\n");
6081 		/*
6082 		 * Put it at the end of the list. That way the list
6083 		 * is ordered by when the entry was added. This matters
6084 		 * since the one done first is the one that should be
6085 		 * used for sequencid'ing any subsequent create sessions.
6086 		 */
6087 		NFSLOCKMNT(nmp);
6088 		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
6089 		NFSUNLOCKMNT(nmp);
6090 		*dspp = dsp;
6091 	} else if (dsp != NULL) {
6092 		newnfs_disconnect(NULL, nrp);
6093 		nfscl_freenfsclds(dsp);
6094 	}
6095 	return (error);
6096 }
6097 
6098 /*
6099  * Do the NFSv4.1 Reclaim Complete.
6100  */
6101 int
6102 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
6103 {
6104 	uint32_t *tl;
6105 	struct nfsrv_descript nfsd;
6106 	struct nfsrv_descript *nd = &nfsd;
6107 	int error;
6108 
6109 	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
6110 	    0, cred);
6111 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6112 	*tl = newnfs_false;
6113 	nd->nd_flag |= ND_USEGSSNAME;
6114 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6115 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6116 	if (error != 0)
6117 		return (error);
6118 	error = nd->nd_repstat;
6119 	m_freem(nd->nd_mrep);
6120 	return (error);
6121 }
6122 
6123 /*
6124  * Initialize the slot tables for a session.
6125  */
6126 static void
6127 nfscl_initsessionslots(struct nfsclsession *sep)
6128 {
6129 	int i;
6130 
6131 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
6132 		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
6133 			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
6134 		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
6135 	}
6136 	for (i = 0; i < 64; i++)
6137 		sep->nfsess_slotseq[i] = 0;
6138 	sep->nfsess_slots = 0;
6139 	sep->nfsess_badslots = 0;
6140 }
6141 
6142 /*
6143  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
6144  */
6145 int
6146 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6147     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
6148 {
6149 	struct nfsnode *np = VTONFS(vp);
6150 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6151 	struct nfscllayout *layp;
6152 	struct nfscldevinfo *dip;
6153 	struct nfsclflayout *rflp;
6154 	struct mbuf *m, *m2;
6155 	struct nfsclwritedsdorpc *drpc, *tdrpc;
6156 	nfsv4stateid_t stateid;
6157 	struct ucred *newcred;
6158 	uint64_t lastbyte, len, off, oresid, xfer;
6159 	int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
6160 	void *lckp;
6161 	uint8_t *dev;
6162 	void *iovbase = NULL;
6163 	size_t iovlen = 0;
6164 	off_t offs = 0;
6165 	ssize_t resid = 0;
6166 	uint32_t op;
6167 
6168 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
6169 	    (np->n_flag & NNOLAYOUT) != 0)
6170 		return (EIO);
6171 	/* Now, get a reference cnt on the clientid for this mount. */
6172 	if (nfscl_getref(nmp) == 0)
6173 		return (EIO);
6174 
6175 	/* Find an appropriate stateid. */
6176 	newcred = NFSNEWCRED(cred);
6177 	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
6178 	    rwaccess, 1, newcred, p, &stateid, &lckp);
6179 	if (error != 0) {
6180 		NFSFREECRED(newcred);
6181 		nfscl_relref(nmp);
6182 		return (error);
6183 	}
6184 	/* Search for a layout for this file. */
6185 	off = uiop->uio_offset;
6186 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
6187 	    np->n_fhp->nfh_len, off, rwaccess, &rflp, &recalled);
6188 	if (layp == NULL || rflp == NULL) {
6189 		if (recalled != 0) {
6190 			NFSFREECRED(newcred);
6191 			if (lckp != NULL)
6192 				nfscl_lockderef(lckp);
6193 			nfscl_relref(nmp);
6194 			return (EIO);
6195 		}
6196 		if (layp != NULL) {
6197 			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
6198 			layp = NULL;
6199 		}
6200 		/* Try and get a Layout, if it is supported. */
6201 		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
6202 		    (np->n_flag & NWRITEOPENED) != 0)
6203 			iolaymode = NFSLAYOUTIOMODE_RW;
6204 		else
6205 			iolaymode = NFSLAYOUTIOMODE_READ;
6206 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
6207 		    rwaccess, NULL, &stateid, off, &layp, newcred, p);
6208 		if (error != 0) {
6209 			NFSLOCKNODE(np);
6210 			np->n_flag |= NNOLAYOUT;
6211 			NFSUNLOCKNODE(np);
6212 			if (lckp != NULL)
6213 				nfscl_lockderef(lckp);
6214 			NFSFREECRED(newcred);
6215 			if (layp != NULL)
6216 				nfscl_rellayout(layp, 0);
6217 			nfscl_relref(nmp);
6218 			return (error);
6219 		}
6220 	}
6221 
6222 	/*
6223 	 * Loop around finding a layout that works for the first part of
6224 	 * this I/O operation, and then call the function that actually
6225 	 * does the RPC.
6226 	 */
6227 	eof = 0;
6228 	len = (uint64_t)uiop->uio_resid;
6229 	while (len > 0 && error == 0 && eof == 0) {
6230 		off = uiop->uio_offset;
6231 		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
6232 		if (error == 0) {
6233 			oresid = xfer = (uint64_t)uiop->uio_resid;
6234 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
6235 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
6236 			/*
6237 			 * For Flex File layout with mirrored DSs, select one
6238 			 * of them at random for reads. For writes and commits,
6239 			 * do all mirrors.
6240 			 */
6241 			m = NULL;
6242 			tdrpc = drpc = NULL;
6243 			firstmirror = 0;
6244 			mirrorcnt = 1;
6245 			if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
6246 			    (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
6247 				if (rwaccess == NFSV4OPEN_ACCESSREAD) {
6248 					firstmirror = arc4random() % mirrorcnt;
6249 					mirrorcnt = firstmirror + 1;
6250 				} else {
6251 					if (docommit == 0) {
6252 						/*
6253 						 * Save values, so uiop can be
6254 						 * rolled back upon a write
6255 						 * error.
6256 						 */
6257 						offs = uiop->uio_offset;
6258 						resid = uiop->uio_resid;
6259 						iovbase =
6260 						    uiop->uio_iov->iov_base;
6261 						iovlen = uiop->uio_iov->iov_len;
6262 						m = nfsm_uiombuflist(uiop, len,
6263 						    0);
6264 					}
6265 					tdrpc = drpc = malloc(sizeof(*drpc) *
6266 					    (mirrorcnt - 1), M_TEMP, M_WAITOK |
6267 					    M_ZERO);
6268 				}
6269 			}
6270 			for (i = firstmirror; i < mirrorcnt && error == 0; i++){
6271 				m2 = NULL;
6272 				if (m != NULL && i < mirrorcnt - 1)
6273 					m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
6274 				else {
6275 					m2 = m;
6276 					m = NULL;
6277 				}
6278 				if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
6279 					dev = rflp->nfsfl_ffm[i].dev;
6280 					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6281 					    rflp->nfsfl_ffm[i].devp);
6282 				} else {
6283 					dev = rflp->nfsfl_dev;
6284 					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6285 					    rflp->nfsfl_devp);
6286 				}
6287 				if (dip != NULL) {
6288 					if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
6289 					    != 0)
6290 						error = nfscl_dofflayoutio(vp,
6291 						    uiop, iomode, must_commit,
6292 						    &eof, &stateid, rwaccess,
6293 						    dip, layp, rflp, off, xfer,
6294 						    i, docommit, m2, tdrpc,
6295 						    newcred, p);
6296 					else
6297 						error = nfscl_doflayoutio(vp,
6298 						    uiop, iomode, must_commit,
6299 						    &eof, &stateid, rwaccess,
6300 						    dip, layp, rflp, off, xfer,
6301 						    docommit, newcred, p);
6302 					nfscl_reldevinfo(dip);
6303 				} else {
6304 					if (m2 != NULL)
6305 						m_freem(m2);
6306 					error = EIO;
6307 				}
6308 				tdrpc++;
6309 			}
6310 			if (m != NULL)
6311 				m_freem(m);
6312 			tdrpc = drpc;
6313 			timo = hz / 50;		/* Wait for 20msec. */
6314 			if (timo < 1)
6315 				timo = 1;
6316 			for (i = firstmirror; i < mirrorcnt - 1 &&
6317 			    tdrpc != NULL; i++, tdrpc++) {
6318 				/*
6319 				 * For the unused drpc entries, both inprog and
6320 				 * err == 0, so this loop won't break.
6321 				 */
6322 				while (tdrpc->inprog != 0 && tdrpc->done == 0)
6323 					tsleep(&tdrpc->tsk, PVFS, "clrpcio",
6324 					    timo);
6325 				if (error == 0 && tdrpc->err != 0)
6326 					error = tdrpc->err;
6327 				if (rwaccess != NFSV4OPEN_ACCESSREAD &&
6328 				    docommit == 0 && *must_commit == 0 &&
6329 				    tdrpc->must_commit == 1)
6330 					*must_commit = 1;
6331 			}
6332 			free(drpc, M_TEMP);
6333 			if (error == 0) {
6334 				if (mirrorcnt > 1 && rwaccess ==
6335 				    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6336 					NFSLOCKCLSTATE();
6337 					layp->nfsly_flags |= NFSLY_WRITTEN;
6338 					NFSUNLOCKCLSTATE();
6339 				}
6340 				lastbyte = off + xfer - 1;
6341 				NFSLOCKCLSTATE();
6342 				if (lastbyte > layp->nfsly_lastbyte)
6343 					layp->nfsly_lastbyte = lastbyte;
6344 				NFSUNLOCKCLSTATE();
6345 			} else if (error == NFSERR_OPENMODE &&
6346 			    rwaccess == NFSV4OPEN_ACCESSREAD) {
6347 				NFSLOCKMNT(nmp);
6348 				nmp->nm_state |= NFSSTA_OPENMODE;
6349 				NFSUNLOCKMNT(nmp);
6350 			} else if ((error == NFSERR_NOSPC ||
6351 			    error == NFSERR_IO || error == NFSERR_NXIO) &&
6352 			    nmp->nm_minorvers == NFSV42_MINORVERSION) {
6353 				if (docommit != 0)
6354 					op = NFSV4OP_COMMIT;
6355 				else if (rwaccess == NFSV4OPEN_ACCESSREAD)
6356 					op = NFSV4OP_READ;
6357 				else
6358 					op = NFSV4OP_WRITE;
6359 				nfsrpc_layouterror(nmp, np->n_fhp->nfh_fh,
6360 				    np->n_fhp->nfh_len, off, xfer,
6361 				    &layp->nfsly_stateid, newcred, p, error, op,
6362 				    dip->nfsdi_deviceid);
6363 				error = EIO;
6364 			} else
6365 				error = EIO;
6366 			if (error == 0)
6367 				len -= (oresid - (uint64_t)uiop->uio_resid);
6368 			else if (mirrorcnt > 1 && rwaccess ==
6369 			    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6370 				/*
6371 				 * In case the rpc gets retried, roll the
6372 				 * uio fields changed by nfsm_uiombuflist()
6373 				 * back.
6374 				 */
6375 				uiop->uio_offset = offs;
6376 				uiop->uio_resid = resid;
6377 				uiop->uio_iov->iov_base = iovbase;
6378 				uiop->uio_iov->iov_len = iovlen;
6379 			}
6380 		}
6381 	}
6382 	if (lckp != NULL)
6383 		nfscl_lockderef(lckp);
6384 	NFSFREECRED(newcred);
6385 	nfscl_rellayout(layp, 0);
6386 	nfscl_relref(nmp);
6387 	return (error);
6388 }
6389 
6390 /*
6391  * Find a file layout that will handle the first bytes of the requested
6392  * range and return the information from it needed to the I/O operation.
6393  */
6394 int
6395 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
6396     struct nfsclflayout **retflpp)
6397 {
6398 	struct nfsclflayout *flp, *nflp, *rflp;
6399 	uint32_t rw;
6400 
6401 	rflp = NULL;
6402 	rw = rwaccess;
6403 	/* For reading, do the Read list first and then the Write list. */
6404 	do {
6405 		if (rw == NFSV4OPEN_ACCESSREAD)
6406 			flp = LIST_FIRST(&lyp->nfsly_flayread);
6407 		else
6408 			flp = LIST_FIRST(&lyp->nfsly_flayrw);
6409 		while (flp != NULL) {
6410 			nflp = LIST_NEXT(flp, nfsfl_list);
6411 			if (flp->nfsfl_off > off)
6412 				break;
6413 			if (flp->nfsfl_end > off &&
6414 			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
6415 				rflp = flp;
6416 			flp = nflp;
6417 		}
6418 		if (rw == NFSV4OPEN_ACCESSREAD)
6419 			rw = NFSV4OPEN_ACCESSWRITE;
6420 		else
6421 			rw = 0;
6422 	} while (rw != 0);
6423 	if (rflp != NULL) {
6424 		/* This one covers the most bytes starting at off. */
6425 		*retflpp = rflp;
6426 		return (0);
6427 	}
6428 	return (EIO);
6429 }
6430 
6431 /*
6432  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6433  */
6434 static int
6435 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6436     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6437     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6438     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6439 {
6440 	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6441 	int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6442 	struct nfsnode *np;
6443 	struct nfsfh *fhp;
6444 	struct nfsclds **dspp;
6445 
6446 	np = VTONFS(vp);
6447 	rel_off = off - flp->nfsfl_patoff;
6448 	stripe_unit_size = flp->nfsfl_util & NFSFLAYUTIL_STRIPE_MASK;
6449 	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6450 	    dp->nfsdi_stripecnt;
6451 	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6452 	error = 0;
6453 
6454 	/* Loop around, doing I/O for each stripe unit. */
6455 	while (len > 0 && error == 0) {
6456 		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6457 		dspp = nfsfldi_addr(dp, stripe_index);
6458 		if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6459 			minorvers = NFSV42_MINORVERSION;
6460 		else
6461 			minorvers = NFSV41_MINORVERSION;
6462 		if (len > transfer && docommit == 0)
6463 			xfer = transfer;
6464 		else
6465 			xfer = len;
6466 		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6467 			/* Dense layout. */
6468 			if (stripe_pos >= flp->nfsfl_fhcnt)
6469 				return (EIO);
6470 			fhp = flp->nfsfl_fh[stripe_pos];
6471 			io_off = (rel_off / (stripe_unit_size *
6472 			    dp->nfsdi_stripecnt)) * stripe_unit_size +
6473 			    rel_off % stripe_unit_size;
6474 		} else {
6475 			/* Sparse layout. */
6476 			if (flp->nfsfl_fhcnt > 1) {
6477 				if (stripe_index >= flp->nfsfl_fhcnt)
6478 					return (EIO);
6479 				fhp = flp->nfsfl_fh[stripe_index];
6480 			} else if (flp->nfsfl_fhcnt == 1)
6481 				fhp = flp->nfsfl_fh[0];
6482 			else
6483 				fhp = np->n_fhp;
6484 			io_off = off;
6485 		}
6486 		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6487 			commit_thru_mds = 1;
6488 			if (docommit != 0)
6489 				error = EIO;
6490 		} else {
6491 			commit_thru_mds = 0;
6492 			NFSLOCKNODE(np);
6493 			np->n_flag |= NDSCOMMIT;
6494 			NFSUNLOCKNODE(np);
6495 		}
6496 		if (docommit != 0) {
6497 			if (error == 0)
6498 				error = nfsrpc_commitds(vp, io_off, xfer,
6499 				    *dspp, fhp, NFS_VER4, minorvers, cred, p);
6500 			if (error == 0) {
6501 				/*
6502 				 * Set both eof and uio_resid = 0 to end any
6503 				 * loops.
6504 				 */
6505 				*eofp = 1;
6506 				uiop->uio_resid = 0;
6507 			} else {
6508 				NFSLOCKNODE(np);
6509 				np->n_flag &= ~NDSCOMMIT;
6510 				NFSUNLOCKNODE(np);
6511 			}
6512 		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
6513 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6514 			    io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6515 		else {
6516 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6517 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6518 			    0, NFS_VER4, minorvers, cred, p);
6519 			if (error == 0) {
6520 				NFSLOCKCLSTATE();
6521 				lyp->nfsly_flags |= NFSLY_WRITTEN;
6522 				NFSUNLOCKCLSTATE();
6523 			}
6524 		}
6525 		if (error == 0) {
6526 			transfer = stripe_unit_size;
6527 			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6528 			len -= xfer;
6529 			off += xfer;
6530 		}
6531 	}
6532 	return (error);
6533 }
6534 
6535 /*
6536  * Do I/O using an NFSv4.1 flex file layout.
6537  */
6538 static int
6539 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6540     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6541     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6542     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6543     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6544 {
6545 	uint64_t xfer;
6546 	int error;
6547 	struct nfsnode *np;
6548 	struct nfsfh *fhp;
6549 	struct nfsclds **dspp;
6550 	struct ucred *tcred;
6551 	struct mbuf *m, *m2;
6552 	uint32_t copylen;
6553 
6554 	np = VTONFS(vp);
6555 	error = 0;
6556 	NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6557 	    (uintmax_t)len);
6558 	/* Loop around, doing I/O for each stripe unit. */
6559 	while (len > 0 && error == 0) {
6560 		dspp = nfsfldi_addr(dp, 0);
6561 		fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6562 		stateidp = &flp->nfsfl_ffm[mirror].st;
6563 		NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6564 		    mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6565 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6566 			tcred = NFSNEWCRED(cred);
6567 			tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6568 			tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6569 			tcred->cr_ngroups = 1;
6570 		} else
6571 			tcred = cred;
6572 		if (rwflag == NFSV4OPEN_ACCESSREAD)
6573 			copylen = dp->nfsdi_rsize;
6574 		else {
6575 			copylen = dp->nfsdi_wsize;
6576 			if (len > copylen && mp != NULL) {
6577 				/*
6578 				 * When a mirrored configuration needs to do
6579 				 * multiple writes to each mirror, all writes
6580 				 * except the last one must be a multiple of
6581 				 * 4 bytes.  This is required so that the XDR
6582 				 * does not need padding.
6583 				 * If possible, clip the size to an exact
6584 				 * multiple of the mbuf length, so that the
6585 				 * split will be on an mbuf boundary.
6586 				 */
6587 				copylen &= 0xfffffffc;
6588 				if (copylen > mp->m_len)
6589 					copylen = copylen / mp->m_len *
6590 					    mp->m_len;
6591 			}
6592 		}
6593 		NFSLOCKNODE(np);
6594 		np->n_flag |= NDSCOMMIT;
6595 		NFSUNLOCKNODE(np);
6596 		if (len > copylen && docommit == 0)
6597 			xfer = copylen;
6598 		else
6599 			xfer = len;
6600 		if (docommit != 0) {
6601 			if (error == 0) {
6602 				/*
6603 				 * Do last mirrored DS commit with this thread.
6604 				 */
6605 				if (mirror < flp->nfsfl_mirrorcnt - 1)
6606 					error = nfsio_commitds(vp, off, xfer,
6607 					    *dspp, fhp, dp->nfsdi_vers,
6608 					    dp->nfsdi_minorvers, drpc, tcred,
6609 					    p);
6610 				else
6611 					error = nfsrpc_commitds(vp, off, xfer,
6612 					    *dspp, fhp, dp->nfsdi_vers,
6613 					    dp->nfsdi_minorvers, tcred, p);
6614 				NFSCL_DEBUG(4, "commitds=%d\n", error);
6615 				if (error != 0 && error != EACCES && error !=
6616 				    ESTALE) {
6617 					NFSCL_DEBUG(4,
6618 					    "DS layreterr for commit\n");
6619 					nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6620 					    lyp, *dspp);
6621 				}
6622 			}
6623 			NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6624 			if (error == 0) {
6625 				/*
6626 				 * Set both eof and uio_resid = 0 to end any
6627 				 * loops.
6628 				 */
6629 				*eofp = 1;
6630 				uiop->uio_resid = 0;
6631 			} else {
6632 				NFSLOCKNODE(np);
6633 				np->n_flag &= ~NDSCOMMIT;
6634 				NFSUNLOCKNODE(np);
6635 			}
6636 		} else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6637 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6638 			    off, xfer, fhp, 1, dp->nfsdi_vers,
6639 			    dp->nfsdi_minorvers, tcred, p);
6640 			NFSCL_DEBUG(4, "readds=%d\n", error);
6641 			if (error != 0 && error != EACCES && error != ESTALE) {
6642 				NFSCL_DEBUG(4, "DS layreterr for read\n");
6643 				nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6644 				    *dspp);
6645 			}
6646 		} else {
6647 			if (flp->nfsfl_mirrorcnt == 1) {
6648 				error = nfsrpc_writeds(vp, uiop, iomode,
6649 				    must_commit, stateidp, *dspp, off, xfer,
6650 				    fhp, 0, 1, dp->nfsdi_vers,
6651 				    dp->nfsdi_minorvers, tcred, p);
6652 				if (error == 0) {
6653 					NFSLOCKCLSTATE();
6654 					lyp->nfsly_flags |= NFSLY_WRITTEN;
6655 					NFSUNLOCKCLSTATE();
6656 				}
6657 			} else {
6658 				m = mp;
6659 				if (xfer < len) {
6660 					/* The mbuf list must be split. */
6661 					m2 = nfsm_split(mp, xfer);
6662 					if (m2 != NULL)
6663 						mp = m2;
6664 					else {
6665 						m_freem(mp);
6666 						error = EIO;
6667 					}
6668 				}
6669 				NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6670 				    (uintmax_t)len, (uintmax_t)xfer);
6671 				/*
6672 				 * Do last write to a mirrored DS with this
6673 				 * thread.
6674 				 */
6675 				if (error == 0) {
6676 					if (mirror < flp->nfsfl_mirrorcnt - 1)
6677 						error = nfsio_writedsmir(vp,
6678 						    iomode, must_commit,
6679 						    stateidp, *dspp, off,
6680 						    xfer, fhp, m,
6681 						    dp->nfsdi_vers,
6682 						    dp->nfsdi_minorvers, drpc,
6683 						    tcred, p);
6684 					else
6685 						error = nfsrpc_writedsmir(vp,
6686 						    iomode, must_commit,
6687 						    stateidp, *dspp, off,
6688 						    xfer, fhp, m,
6689 						    dp->nfsdi_vers,
6690 						    dp->nfsdi_minorvers, tcred,
6691 						    p);
6692 				}
6693 				NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6694 				if (error != 0 && error != EACCES && error !=
6695 				    ESTALE) {
6696 					NFSCL_DEBUG(4,
6697 					    "DS layreterr for write\n");
6698 					nfscl_dserr(NFSV4OP_WRITE, error, dp,
6699 					    lyp, *dspp);
6700 				}
6701 			}
6702 		}
6703 		NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6704 		if (error == 0) {
6705 			len -= xfer;
6706 			off += xfer;
6707 		}
6708 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6709 			NFSFREECRED(tcred);
6710 	}
6711 	NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6712 	return (error);
6713 }
6714 
6715 /*
6716  * The actual read RPC done to a DS.
6717  */
6718 static int
6719 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6720     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6721     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6722 {
6723 	uint32_t *tl;
6724 	int attrflag, error, retlen;
6725 	struct nfsrv_descript nfsd;
6726 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6727 	struct nfsrv_descript *nd = &nfsd;
6728 	struct nfssockreq *nrp;
6729 	struct nfsvattr na;
6730 
6731 	nd->nd_mrep = NULL;
6732 	if (vers == 0 || vers == NFS_VER4) {
6733 		nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6734 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6735 		    NULL);
6736 		vers = NFS_VER4;
6737 		NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6738 		if (flex != 0)
6739 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6740 		else
6741 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6742 	} else {
6743 		nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6744 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6745 		    NULL);
6746 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READ]);
6747 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READDS]);
6748 		NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6749 	}
6750 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6751 	txdr_hyper(io_off, tl);
6752 	*(tl + 2) = txdr_unsigned(len);
6753 	nrp = dsp->nfsclds_sockp;
6754 	NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6755 	if (nrp == NULL)
6756 		/* If NULL, use the MDS socket. */
6757 		nrp = &nmp->nm_sockreq;
6758 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6759 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6760 	NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6761 	    error);
6762 	if (error != 0)
6763 		return (error);
6764 	if (vers == NFS_VER3) {
6765 		error = nfscl_postop_attr(nd, &na, &attrflag);
6766 		NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6767 		if (error != 0)
6768 			goto nfsmout;
6769 	}
6770 	if (nd->nd_repstat != 0) {
6771 		error = nd->nd_repstat;
6772 		goto nfsmout;
6773 	}
6774 	if (vers == NFS_VER3) {
6775 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6776 		*eofp = fxdr_unsigned(int, *(tl + 1));
6777 	} else {
6778 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6779 		*eofp = fxdr_unsigned(int, *tl);
6780 	}
6781 	NFSM_STRSIZ(retlen, len);
6782 	NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6783 	error = nfsm_mbufuio(nd, uiop, retlen);
6784 nfsmout:
6785 	if (nd->nd_mrep != NULL)
6786 		m_freem(nd->nd_mrep);
6787 	return (error);
6788 }
6789 
6790 /*
6791  * The actual write RPC done to a DS.
6792  */
6793 static int
6794 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6795     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6796     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6797     struct ucred *cred, NFSPROC_T *p)
6798 {
6799 	uint32_t *tl;
6800 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6801 	int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6802 	int32_t backup;
6803 	struct nfsrv_descript nfsd;
6804 	struct nfsrv_descript *nd = &nfsd;
6805 	struct nfssockreq *nrp;
6806 	struct nfsvattr na;
6807 
6808 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6809 	nd->nd_mrep = NULL;
6810 	if (vers == 0 || vers == NFS_VER4) {
6811 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6812 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6813 		    NULL);
6814 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6815 		vers = NFS_VER4;
6816 		if (flex != 0)
6817 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6818 		else
6819 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6820 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6821 	} else {
6822 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6823 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6824 		    NULL);
6825 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6826 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6827 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6828 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6829 	}
6830 	txdr_hyper(io_off, tl);
6831 	tl += 2;
6832 	if (vers == NFS_VER3)
6833 		*tl++ = txdr_unsigned(len);
6834 	*tl++ = txdr_unsigned(*iomode);
6835 	*tl = txdr_unsigned(len);
6836 	nfsm_uiombuf(nd, uiop, len);
6837 	nrp = dsp->nfsclds_sockp;
6838 	if (nrp == NULL)
6839 		/* If NULL, use the MDS socket. */
6840 		nrp = &nmp->nm_sockreq;
6841 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6842 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6843 	NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6844 	    nd->nd_repstat);
6845 	if (error != 0)
6846 		return (error);
6847 	if (nd->nd_repstat != 0) {
6848 		/*
6849 		 * In case the rpc gets retried, roll
6850 		 * the uio fields changed by nfsm_uiombuf()
6851 		 * back.
6852 		 */
6853 		uiop->uio_offset -= len;
6854 		uiop->uio_resid += len;
6855 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6856 		uiop->uio_iov->iov_len += len;
6857 		error = nd->nd_repstat;
6858 	} else {
6859 		if (vers == NFS_VER3) {
6860 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6861 			    NULL);
6862 			NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6863 			if (error != 0)
6864 				goto nfsmout;
6865 		}
6866 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6867 		rlen = fxdr_unsigned(int, *tl++);
6868 		NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6869 		if (rlen == 0) {
6870 			error = NFSERR_IO;
6871 			goto nfsmout;
6872 		} else if (rlen < len) {
6873 			backup = len - rlen;
6874 			uiop->uio_iov->iov_base =
6875 			    (char *)uiop->uio_iov->iov_base - backup;
6876 			uiop->uio_iov->iov_len += backup;
6877 			uiop->uio_offset -= backup;
6878 			uiop->uio_resid += backup;
6879 			len = rlen;
6880 		}
6881 		commit = fxdr_unsigned(int, *tl++);
6882 
6883 		/*
6884 		 * Return the lowest commitment level
6885 		 * obtained by any of the RPCs.
6886 		 */
6887 		if (committed == NFSWRITE_FILESYNC)
6888 			committed = commit;
6889 		else if (committed == NFSWRITE_DATASYNC &&
6890 		    commit == NFSWRITE_UNSTABLE)
6891 			committed = commit;
6892 		if (commit_thru_mds != 0) {
6893 			NFSLOCKMNT(nmp);
6894 			if (!NFSHASWRITEVERF(nmp)) {
6895 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6896 				NFSSETWRITEVERF(nmp);
6897 			} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF) &&
6898 			    *must_commit != 2) {
6899 				*must_commit = 1;
6900 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6901 			}
6902 			NFSUNLOCKMNT(nmp);
6903 		} else {
6904 			NFSLOCKDS(dsp);
6905 			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6906 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6907 				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6908 			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
6909 			    *must_commit != 2) {
6910 				*must_commit = 1;
6911 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6912 			}
6913 			NFSUNLOCKDS(dsp);
6914 		}
6915 	}
6916 nfsmout:
6917 	if (nd->nd_mrep != NULL)
6918 		m_freem(nd->nd_mrep);
6919 	*iomode = committed;
6920 	if (nd->nd_repstat != 0 && error == 0)
6921 		error = nd->nd_repstat;
6922 	return (error);
6923 }
6924 
6925 /*
6926  * The actual write RPC done to a DS.
6927  * This variant is called from a separate kernel process for mirrors.
6928  * Any short write is considered an IO error.
6929  */
6930 static int
6931 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6932     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6933     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6934     struct ucred *cred, NFSPROC_T *p)
6935 {
6936 	uint32_t *tl;
6937 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6938 	int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6939 	struct nfsrv_descript nfsd;
6940 	struct nfsrv_descript *nd = &nfsd;
6941 	struct nfssockreq *nrp;
6942 	struct nfsvattr na;
6943 
6944 	nd->nd_mrep = NULL;
6945 	if (vers == 0 || vers == NFS_VER4) {
6946 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6947 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6948 		    NULL);
6949 		vers = NFS_VER4;
6950 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6951 		    minorvers);
6952 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6953 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6954 	} else {
6955 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6956 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6957 		    NULL);
6958 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6959 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6960 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6961 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6962 	}
6963 	txdr_hyper(io_off, tl);
6964 	tl += 2;
6965 	if (vers == NFS_VER3)
6966 		*tl++ = txdr_unsigned(len);
6967 	*tl++ = txdr_unsigned(*iomode);
6968 	*tl = txdr_unsigned(len);
6969 	if (len > 0) {
6970 		/* Put data in mbuf chain. */
6971 		nd->nd_mb->m_next = m;
6972 	}
6973 	nrp = dsp->nfsclds_sockp;
6974 	if (nrp == NULL)
6975 		/* If NULL, use the MDS socket. */
6976 		nrp = &nmp->nm_sockreq;
6977 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6978 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6979 	NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6980 	    nd->nd_repstat);
6981 	if (error != 0)
6982 		return (error);
6983 	if (nd->nd_repstat != 0)
6984 		error = nd->nd_repstat;
6985 	else {
6986 		if (vers == NFS_VER3) {
6987 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6988 			    NULL);
6989 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6990 			    error);
6991 			if (error != 0)
6992 				goto nfsmout;
6993 		}
6994 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6995 		rlen = fxdr_unsigned(int, *tl++);
6996 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6997 		    rlen);
6998 		if (rlen != len) {
6999 			error = NFSERR_IO;
7000 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
7001 			    len, rlen);
7002 			goto nfsmout;
7003 		}
7004 		commit = fxdr_unsigned(int, *tl++);
7005 
7006 		/*
7007 		 * Return the lowest commitment level
7008 		 * obtained by any of the RPCs.
7009 		 */
7010 		if (committed == NFSWRITE_FILESYNC)
7011 			committed = commit;
7012 		else if (committed == NFSWRITE_DATASYNC &&
7013 		    commit == NFSWRITE_UNSTABLE)
7014 			committed = commit;
7015 		NFSLOCKDS(dsp);
7016 		if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
7017 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7018 			dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
7019 		} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
7020 		    *must_commit != 2) {
7021 			*must_commit = 1;
7022 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7023 		}
7024 		NFSUNLOCKDS(dsp);
7025 	}
7026 nfsmout:
7027 	if (nd->nd_mrep != NULL)
7028 		m_freem(nd->nd_mrep);
7029 	*iomode = committed;
7030 	if (nd->nd_repstat != 0 && error == 0)
7031 		error = nd->nd_repstat;
7032 	return (error);
7033 }
7034 
7035 /*
7036  * Start up the thread that will execute nfsrpc_writedsmir().
7037  */
7038 static void
7039 start_writedsmir(void *arg, int pending)
7040 {
7041 	struct nfsclwritedsdorpc *drpc;
7042 
7043 	drpc = (struct nfsclwritedsdorpc *)arg;
7044 	drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
7045 	    &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
7046 	    drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
7047 	    drpc->p);
7048 	drpc->done = 1;
7049 	crfree(drpc->cred);
7050 	NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
7051 }
7052 
7053 /*
7054  * Set up the write DS mirror call for the pNFS I/O thread.
7055  */
7056 static int
7057 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
7058     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
7059     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
7060     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7061 {
7062 	int error, ret;
7063 
7064 	error = 0;
7065 	drpc->done = 0;
7066 	drpc->vp = vp;
7067 	drpc->iomode = *iomode;
7068 	drpc->must_commit = *must_commit;
7069 	drpc->stateidp = stateidp;
7070 	drpc->dsp = dsp;
7071 	drpc->off = off;
7072 	drpc->len = len;
7073 	drpc->fhp = fhp;
7074 	drpc->m = m;
7075 	drpc->vers = vers;
7076 	drpc->minorvers = minorvers;
7077 	drpc->cred = crhold(cred);
7078 	drpc->p = p;
7079 	drpc->inprog = 0;
7080 	ret = EIO;
7081 	if (nfs_pnfsiothreads != 0) {
7082 		ret = nfs_pnfsio(start_writedsmir, drpc);
7083 		NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
7084 	}
7085 	if (ret != 0) {
7086 		error = nfsrpc_writedsmir(vp, iomode, &drpc->must_commit,
7087 		    stateidp, dsp, off, len, fhp, m, vers, minorvers, cred, p);
7088 		crfree(drpc->cred);
7089 	}
7090 	NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
7091 	return (error);
7092 }
7093 
7094 /*
7095  * Free up the nfsclds structure.
7096  */
7097 void
7098 nfscl_freenfsclds(struct nfsclds *dsp)
7099 {
7100 	int i;
7101 
7102 	if (dsp == NULL)
7103 		return;
7104 	if (dsp->nfsclds_sockp != NULL) {
7105 		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
7106 		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
7107 		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
7108 		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
7109 	}
7110 	NFSFREEMUTEX(&dsp->nfsclds_mtx);
7111 	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
7112 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
7113 		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
7114 			m_freem(
7115 			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
7116 	}
7117 	free(dsp, M_NFSCLDS);
7118 }
7119 
7120 static enum nfsclds_state
7121 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
7122     struct nfsclds **retdspp, uint32_t *sequencep)
7123 {
7124 	struct nfsclds *dsp;
7125 	int fndseq;
7126 
7127 	/*
7128 	 * Search the list of nfsclds structures for one with the same
7129 	 * server.
7130 	 */
7131 	fndseq = 0;
7132 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
7133 		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
7134 		    dsp->nfsclds_servownlen != 0 &&
7135 		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
7136 		    dsp->nfsclds_servownlen) &&
7137 		    dsp->nfsclds_sess.nfsess_defunct == 0) {
7138 			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
7139 			    TAILQ_FIRST(&nmp->nm_sess), dsp,
7140 			    dsp->nfsclds_flags);
7141 			if (fndseq == 0) {
7142 				/* Get sequenceid# from first entry. */
7143 				*sequencep =
7144 				    dsp->nfsclds_sess.nfsess_sequenceid;
7145 				fndseq = 1;
7146 			}
7147 			/* Server major id matches. */
7148 			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
7149 				*retdspp = dsp;
7150 				return (NFSDSP_USETHISSESSION);
7151 			}
7152 		}
7153 	}
7154 	if (fndseq != 0)
7155 		return (NFSDSP_SEQTHISSESSION);
7156 	return (NFSDSP_NOTFOUND);
7157 }
7158 
7159 /*
7160  * NFS commit rpc to a NFSv4.1 DS.
7161  */
7162 static int
7163 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7164     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
7165     NFSPROC_T *p)
7166 {
7167 	uint32_t *tl;
7168 	struct nfsrv_descript nfsd, *nd = &nfsd;
7169 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7170 	struct nfssockreq *nrp;
7171 	struct nfsvattr na;
7172 	int attrflag, error;
7173 
7174 	nd->nd_mrep = NULL;
7175 	if (vers == 0 || vers == NFS_VER4) {
7176 		nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
7177 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7178 		    NULL);
7179 		vers = NFS_VER4;
7180 	} else {
7181 		nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
7182 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7183 		    NULL);
7184 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMIT]);
7185 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMITDS]);
7186 	}
7187 	NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
7188 	    minorvers);
7189 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7190 	txdr_hyper(offset, tl);
7191 	tl += 2;
7192 	*tl = txdr_unsigned(cnt);
7193 	nrp = dsp->nfsclds_sockp;
7194 	if (nrp == NULL)
7195 		/* If NULL, use the MDS socket. */
7196 		nrp = &nmp->nm_sockreq;
7197 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7198 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7199 	NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
7200 	    nd->nd_repstat);
7201 	if (error != 0)
7202 		return (error);
7203 	if (nd->nd_repstat == 0) {
7204 		if (vers == NFS_VER3) {
7205 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7206 			    NULL);
7207 			NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
7208 			if (error != 0)
7209 				goto nfsmout;
7210 		}
7211 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
7212 		NFSLOCKDS(dsp);
7213 		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
7214 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7215 			error = NFSERR_STALEWRITEVERF;
7216 		}
7217 		NFSUNLOCKDS(dsp);
7218 	}
7219 nfsmout:
7220 	if (error == 0 && nd->nd_repstat != 0)
7221 		error = nd->nd_repstat;
7222 	m_freem(nd->nd_mrep);
7223 	return (error);
7224 }
7225 
7226 /*
7227  * Start up the thread that will execute nfsrpc_commitds().
7228  */
7229 static void
7230 start_commitds(void *arg, int pending)
7231 {
7232 	struct nfsclwritedsdorpc *drpc;
7233 
7234 	drpc = (struct nfsclwritedsdorpc *)arg;
7235 	drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
7236 	    drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
7237 	    drpc->p);
7238 	drpc->done = 1;
7239 	crfree(drpc->cred);
7240 	NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
7241 }
7242 
7243 /*
7244  * Set up the commit DS mirror call for the pNFS I/O thread.
7245  */
7246 static int
7247 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7248     struct nfsfh *fhp, int vers, int minorvers,
7249     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7250 {
7251 	int error, ret;
7252 
7253 	error = 0;
7254 	drpc->done = 0;
7255 	drpc->vp = vp;
7256 	drpc->off = offset;
7257 	drpc->len = cnt;
7258 	drpc->dsp = dsp;
7259 	drpc->fhp = fhp;
7260 	drpc->vers = vers;
7261 	drpc->minorvers = minorvers;
7262 	drpc->cred = crhold(cred);
7263 	drpc->p = p;
7264 	drpc->inprog = 0;
7265 	ret = EIO;
7266 	if (nfs_pnfsiothreads != 0) {
7267 		ret = nfs_pnfsio(start_commitds, drpc);
7268 		NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
7269 	}
7270 	if (ret != 0) {
7271 		error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
7272 		    minorvers, cred, p);
7273 		crfree(drpc->cred);
7274 	}
7275 	NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
7276 	return (error);
7277 }
7278 
7279 /*
7280  * NFS Advise rpc
7281  */
7282 int
7283 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
7284     struct ucred *cred, NFSPROC_T *p)
7285 {
7286 	u_int32_t *tl;
7287 	struct nfsrv_descript nfsd, *nd = &nfsd;
7288 	nfsattrbit_t hints;
7289 	int error;
7290 
7291 	NFSZERO_ATTRBIT(&hints);
7292 	if (advise == POSIX_FADV_WILLNEED)
7293 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7294 	else if (advise == POSIX_FADV_DONTNEED)
7295 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7296 	else
7297 		return (0);
7298 	NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp, cred);
7299 	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7300 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
7301 	txdr_hyper(offset, tl);
7302 	tl += 2;
7303 	txdr_hyper(cnt, tl);
7304 	nfsrv_putattrbit(nd, &hints);
7305 	error = nfscl_request(nd, vp, p, cred);
7306 	if (error != 0)
7307 		return (error);
7308 	if (nd->nd_repstat != 0)
7309 		error = nd->nd_repstat;
7310 	m_freem(nd->nd_mrep);
7311 	return (error);
7312 }
7313 
7314 #ifdef notyet
7315 /*
7316  * NFS advise rpc to a NFSv4.2 DS.
7317  */
7318 static int
7319 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7320     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7321     struct ucred *cred, NFSPROC_T *p)
7322 {
7323 	uint32_t *tl;
7324 	struct nfsrv_descript nfsd, *nd = &nfsd;
7325 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7326 	struct nfssockreq *nrp;
7327 	nfsattrbit_t hints;
7328 	int error;
7329 
7330 	/* For NFS DSs prior to NFSv4.2, just return OK. */
7331 	if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
7332 		return (0);
7333 	NFSZERO_ATTRBIT(&hints);
7334 	if (advise == POSIX_FADV_WILLNEED)
7335 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7336 	else if (advise == POSIX_FADV_DONTNEED)
7337 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7338 	else
7339 		return (0);
7340 	nd->nd_mrep = NULL;
7341 	nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
7342 	    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers, NULL);
7343 	vers = NFS_VER4;
7344 	NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
7345 	    minorvers);
7346 	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7347 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7348 	txdr_hyper(offset, tl);
7349 	tl += 2;
7350 	*tl = txdr_unsigned(cnt);
7351 	nfsrv_putattrbit(nd, &hints);
7352 	nrp = dsp->nfsclds_sockp;
7353 	if (nrp == NULL)
7354 		/* If NULL, use the MDS socket. */
7355 		nrp = &nmp->nm_sockreq;
7356 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7357 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7358 	NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
7359 	    nd->nd_repstat);
7360 	if (error != 0)
7361 		return (error);
7362 	if (nd->nd_repstat != 0)
7363 		error = nd->nd_repstat;
7364 	m_freem(nd->nd_mrep);
7365 	return (error);
7366 }
7367 
7368 /*
7369  * Start up the thread that will execute nfsrpc_commitds().
7370  */
7371 static void
7372 start_adviseds(void *arg, int pending)
7373 {
7374 	struct nfsclwritedsdorpc *drpc;
7375 
7376 	drpc = (struct nfsclwritedsdorpc *)arg;
7377 	drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
7378 	    drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
7379 	    drpc->cred, drpc->p);
7380 	drpc->done = 1;
7381 	crfree(drpc->cred);
7382 	NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
7383 }
7384 
7385 /*
7386  * Set up the advise DS mirror call for the pNFS I/O thread.
7387  */
7388 static int
7389 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7390     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7391     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7392 {
7393 	int error, ret;
7394 
7395 	error = 0;
7396 	drpc->done = 0;
7397 	drpc->vp = vp;
7398 	drpc->off = offset;
7399 	drpc->len = cnt;
7400 	drpc->advise = advise;
7401 	drpc->dsp = dsp;
7402 	drpc->fhp = fhp;
7403 	drpc->vers = vers;
7404 	drpc->minorvers = minorvers;
7405 	drpc->cred = crhold(cred);
7406 	drpc->p = p;
7407 	drpc->inprog = 0;
7408 	ret = EIO;
7409 	if (nfs_pnfsiothreads != 0) {
7410 		ret = nfs_pnfsio(start_adviseds, drpc);
7411 		NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
7412 	}
7413 	if (ret != 0) {
7414 		error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
7415 		    minorvers, cred, p);
7416 		crfree(drpc->cred);
7417 	}
7418 	NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
7419 	return (error);
7420 }
7421 #endif	/* notyet */
7422 
7423 /*
7424  * Do the Allocate operation, retrying for recovery.
7425  */
7426 int
7427 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
7428     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7429 {
7430 	int error, expireret = 0, retrycnt, nostateid;
7431 	uint32_t clidrev = 0;
7432 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7433 	struct nfsfh *nfhp = NULL;
7434 	nfsv4stateid_t stateid;
7435 	off_t tmp_off;
7436 	void *lckp;
7437 
7438 	if (len < 0)
7439 		return (EINVAL);
7440 	if (len == 0)
7441 		return (0);
7442 	tmp_off = off + len;
7443 	NFSLOCKMNT(nmp);
7444 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
7445 		NFSUNLOCKMNT(nmp);
7446 		return (EFBIG);
7447 	}
7448 	if (nmp->nm_clp != NULL)
7449 		clidrev = nmp->nm_clp->nfsc_clientidrev;
7450 	NFSUNLOCKMNT(nmp);
7451 	nfhp = VTONFS(vp)->n_fhp;
7452 	retrycnt = 0;
7453 	do {
7454 		lckp = NULL;
7455 		nostateid = 0;
7456 		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7457 		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7458 		if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7459 		    stateid.other[2] == 0) {
7460 			nostateid = 1;
7461 			NFSCL_DEBUG(1, "stateid0 in allocate\n");
7462 		}
7463 
7464 		/*
7465 		 * Not finding a stateid should probably never happen,
7466 		 * but just return an error for this case.
7467 		 */
7468 		if (nostateid != 0)
7469 			error = EIO;
7470 		else
7471 			error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7472 			    nap, attrflagp, cred, p);
7473 		if (error == NFSERR_STALESTATEID)
7474 			nfscl_initiate_recovery(nmp->nm_clp);
7475 		if (lckp != NULL)
7476 			nfscl_lockderef(lckp);
7477 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7478 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7479 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7480 			(void) nfs_catnap(PZERO, error, "nfs_allocate");
7481 		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
7482 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
7483 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7484 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
7485 			error = EIO;
7486 		}
7487 		retrycnt++;
7488 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7489 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7490 	    error == NFSERR_STALEDONTRECOVER ||
7491 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7492 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7493 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
7494 	if (error != 0 && retrycnt >= 4)
7495 		error = EIO;
7496 	return (error);
7497 }
7498 
7499 /*
7500  * The allocate RPC.
7501  */
7502 static int
7503 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7504     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7505 {
7506 	uint32_t *tl;
7507 	int error;
7508 	struct nfsrv_descript nfsd;
7509 	struct nfsrv_descript *nd = &nfsd;
7510 	nfsattrbit_t attrbits;
7511 
7512 	*attrflagp = 0;
7513 	NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp, cred);
7514 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7515 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7516 	txdr_hyper(off, tl); tl += 2;
7517 	txdr_hyper(len, tl); tl += 2;
7518 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7519 	NFSGETATTR_ATTRBIT(&attrbits);
7520 	nfsrv_putattrbit(nd, &attrbits);
7521 	error = nfscl_request(nd, vp, p, cred);
7522 	if (error != 0)
7523 		return (error);
7524 	if (nd->nd_repstat == 0) {
7525 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7526 		error = nfsm_loadattr(nd, nap);
7527 		if (error == 0)
7528 			*attrflagp = NFS_LATTR_NOSHRINK;
7529 	} else
7530 		error = nd->nd_repstat;
7531 nfsmout:
7532 	m_freem(nd->nd_mrep);
7533 	return (error);
7534 }
7535 
7536 /*
7537  * Set up the XDR arguments for the LayoutGet operation.
7538  */
7539 static void
7540 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7541     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7542     int layoutlen, int usecurstateid)
7543 {
7544 	uint32_t *tl;
7545 
7546 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7547 	    NFSX_STATEID);
7548 	*tl++ = newnfs_false;		/* Don't signal availability. */
7549 	*tl++ = txdr_unsigned(layouttype);
7550 	*tl++ = txdr_unsigned(iomode);
7551 	txdr_hyper(offset, tl);
7552 	tl += 2;
7553 	txdr_hyper(len, tl);
7554 	tl += 2;
7555 	txdr_hyper(minlen, tl);
7556 	tl += 2;
7557 	if (usecurstateid != 0) {
7558 		/* Special stateid for Current stateid. */
7559 		*tl++ = txdr_unsigned(1);
7560 		*tl++ = 0;
7561 		*tl++ = 0;
7562 		*tl++ = 0;
7563 	} else {
7564 		*tl++ = txdr_unsigned(stateidp->seqid);
7565 		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7566 		*tl++ = stateidp->other[0];
7567 		*tl++ = stateidp->other[1];
7568 		*tl++ = stateidp->other[2];
7569 	}
7570 	*tl = txdr_unsigned(layoutlen);
7571 }
7572 
7573 /*
7574  * Parse the reply for a successful LayoutGet operation.
7575  */
7576 static int
7577 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7578     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7579 {
7580 	uint32_t *tl;
7581 	struct nfsclflayout *flp, *prevflp, *tflp;
7582 	int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7583 	int m, mirrorcnt;
7584 	uint64_t retlen, off;
7585 	struct nfsfh *nfhp;
7586 	uint8_t *cp;
7587 	uid_t user;
7588 	gid_t grp;
7589 
7590 	NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7591 	error = 0;
7592 	flp = NULL;
7593 	gotiomode = -1;
7594 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7595 	if (*tl++ != 0)
7596 		*retonclosep = 1;
7597 	else
7598 		*retonclosep = 0;
7599 	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7600 	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7601 	    (int)stateidp->seqid);
7602 	stateidp->other[0] = *tl++;
7603 	stateidp->other[1] = *tl++;
7604 	stateidp->other[2] = *tl++;
7605 	cnt = fxdr_unsigned(int, *tl);
7606 	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7607 	if (cnt <= 0 || cnt > 10000) {
7608 		/* Don't accept more than 10000 layouts in reply. */
7609 		error = NFSERR_BADXDR;
7610 		goto nfsmout;
7611 	}
7612 	for (i = 0; i < cnt; i++) {
7613 		/* Dissect to the layout type. */
7614 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7615 		    3 * NFSX_UNSIGNED);
7616 		off = fxdr_hyper(tl); tl += 2;
7617 		retlen = fxdr_hyper(tl); tl += 2;
7618 		iomode = fxdr_unsigned(int, *tl++);
7619 		laytype = fxdr_unsigned(int, *tl);
7620 		NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7621 		    (uintmax_t)off, (uintmax_t)retlen, iomode);
7622 		/* Ignore length of layout body for now. */
7623 		if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7624 			/* Parse the File layout up to fhcnt. */
7625 			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7626 			    NFSX_HYPER + NFSX_V4DEVICEID);
7627 			fhcnt = fxdr_unsigned(int, *(tl + 4 +
7628 			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
7629 			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7630 			if (fhcnt < 0 || fhcnt > 100) {
7631 				/* Don't accept more than 100 file handles. */
7632 				error = NFSERR_BADXDR;
7633 				goto nfsmout;
7634 			}
7635 			if (fhcnt > 0)
7636 				flp = malloc(sizeof(*flp) + fhcnt *
7637 				    sizeof(struct nfsfh *), M_NFSFLAYOUT,
7638 				    M_WAITOK);
7639 			else
7640 				flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7641 				    M_WAITOK);
7642 			flp->nfsfl_flags = NFSFL_FILE;
7643 			flp->nfsfl_fhcnt = 0;
7644 			flp->nfsfl_devp = NULL;
7645 			flp->nfsfl_off = off;
7646 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7647 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7648 			else
7649 				flp->nfsfl_end = flp->nfsfl_off + retlen;
7650 			flp->nfsfl_iomode = iomode;
7651 			if (gotiomode == -1)
7652 				gotiomode = flp->nfsfl_iomode;
7653 			/* Ignore layout body length for now. */
7654 			NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7655 			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7656 			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7657 			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7658 			mtx_lock(&nmp->nm_mtx);
7659 			if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7660 			    NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7661 				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7662 			mtx_unlock(&nmp->nm_mtx);
7663 			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7664 			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7665 			NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7666 			    flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7667 			for (j = 0; j < fhcnt; j++) {
7668 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7669 				nfhlen = fxdr_unsigned(int, *tl);
7670 				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7671 					error = NFSERR_BADXDR;
7672 					goto nfsmout;
7673 				}
7674 				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7675 				    M_NFSFH, M_WAITOK);
7676 				flp->nfsfl_fh[j] = nfhp;
7677 				flp->nfsfl_fhcnt++;
7678 				nfhp->nfh_len = nfhlen;
7679 				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7680 				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7681 			}
7682 		} else if (laytype == NFSLAYOUT_FLEXFILE) {
7683 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7684 			    NFSX_HYPER);
7685 			mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7686 			NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7687 			if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7688 				error = NFSERR_BADXDR;
7689 				goto nfsmout;
7690 			}
7691 			flp = malloc(sizeof(*flp) + mirrorcnt *
7692 			    sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7693 			flp->nfsfl_flags = NFSFL_FLEXFILE;
7694 			flp->nfsfl_mirrorcnt = mirrorcnt;
7695 			for (j = 0; j < mirrorcnt; j++)
7696 				flp->nfsfl_ffm[j].devp = NULL;
7697 			flp->nfsfl_off = off;
7698 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7699 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7700 			else
7701 				flp->nfsfl_end = flp->nfsfl_off + retlen;
7702 			flp->nfsfl_iomode = iomode;
7703 			if (gotiomode == -1)
7704 				gotiomode = flp->nfsfl_iomode;
7705 			flp->nfsfl_stripeunit = fxdr_hyper(tl);
7706 			NFSCL_DEBUG(4, "stripeunit=%ju\n",
7707 			    (uintmax_t)flp->nfsfl_stripeunit);
7708 			for (j = 0; j < mirrorcnt; j++) {
7709 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7710 				k = fxdr_unsigned(int, *tl);
7711 				if (k < 1 || k > 128) {
7712 					error = NFSERR_BADXDR;
7713 					goto nfsmout;
7714 				}
7715 				NFSCL_DEBUG(4, "servercnt=%d\n", k);
7716 				for (l = 0; l < k; l++) {
7717 					NFSM_DISSECT(tl, uint32_t *,
7718 					    NFSX_V4DEVICEID + NFSX_STATEID +
7719 					    2 * NFSX_UNSIGNED);
7720 					if (l == 0) {
7721 						/* Just use the first server. */
7722 						NFSBCOPY(tl,
7723 						    flp->nfsfl_ffm[j].dev,
7724 						    NFSX_V4DEVICEID);
7725 						tl += (NFSX_V4DEVICEID /
7726 						    NFSX_UNSIGNED);
7727 						tl++;
7728 						flp->nfsfl_ffm[j].st.seqid =
7729 						    *tl++;
7730 						flp->nfsfl_ffm[j].st.other[0] =
7731 						    *tl++;
7732 						flp->nfsfl_ffm[j].st.other[1] =
7733 						    *tl++;
7734 						flp->nfsfl_ffm[j].st.other[2] =
7735 						    *tl++;
7736 						NFSCL_DEBUG(4, "st.seqid=%u "
7737 						 "st.o0=0x%x st.o1=0x%x "
7738 						 "st.o2=0x%x\n",
7739 						 flp->nfsfl_ffm[j].st.seqid,
7740 						 flp->nfsfl_ffm[j].st.other[0],
7741 						 flp->nfsfl_ffm[j].st.other[1],
7742 						 flp->nfsfl_ffm[j].st.other[2]);
7743 					} else
7744 						tl += ((NFSX_V4DEVICEID +
7745 						    NFSX_STATEID +
7746 						    NFSX_UNSIGNED) /
7747 						    NFSX_UNSIGNED);
7748 					fhcnt = fxdr_unsigned(int, *tl);
7749 					NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7750 					if (fhcnt < 1 ||
7751 					    fhcnt > NFSDEV_MAXVERS) {
7752 						error = NFSERR_BADXDR;
7753 						goto nfsmout;
7754 					}
7755 					for (m = 0; m < fhcnt; m++) {
7756 						NFSM_DISSECT(tl, uint32_t *,
7757 						    NFSX_UNSIGNED);
7758 						nfhlen = fxdr_unsigned(int,
7759 						    *tl);
7760 						NFSCL_DEBUG(4, "nfhlen=%d\n",
7761 						    nfhlen);
7762 						if (nfhlen <= 0 || nfhlen >
7763 						    NFSX_V4FHMAX) {
7764 							error = NFSERR_BADXDR;
7765 							goto nfsmout;
7766 						}
7767 						NFSM_DISSECT(cp, uint8_t *,
7768 						    NFSM_RNDUP(nfhlen));
7769 						if (l == 0) {
7770 							flp->nfsfl_ffm[j].fhcnt
7771 							    = fhcnt;
7772 							nfhp = malloc(
7773 							    sizeof(*nfhp) +
7774 							    nfhlen - 1, M_NFSFH,
7775 							    M_WAITOK);
7776 							flp->nfsfl_ffm[j].fh[m]
7777 							    = nfhp;
7778 							nfhp->nfh_len = nfhlen;
7779 							NFSBCOPY(cp,
7780 							    nfhp->nfh_fh,
7781 							    nfhlen);
7782 							NFSCL_DEBUG(4,
7783 							    "got fh\n");
7784 						}
7785 					}
7786 					/* Now, get the ffsd_user/ffds_group. */
7787 					error = nfsrv_parseug(nd, 0, &user,
7788 					    &grp, curthread);
7789 					NFSCL_DEBUG(4, "after parseu=%d\n",
7790 					    error);
7791 					if (error == 0)
7792 						error = nfsrv_parseug(nd, 1,
7793 						    &user, &grp, curthread);
7794 					NFSCL_DEBUG(4, "aft parseg=%d\n",
7795 					    grp);
7796 					if (error != 0)
7797 						goto nfsmout;
7798 					NFSCL_DEBUG(4, "user=%d group=%d\n",
7799 					    user, grp);
7800 					if (l == 0) {
7801 						flp->nfsfl_ffm[j].user = user;
7802 						flp->nfsfl_ffm[j].group = grp;
7803 						NFSCL_DEBUG(4,
7804 						    "usr=%d grp=%d\n", user,
7805 						    grp);
7806 					}
7807 				}
7808 			}
7809 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7810 			flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7811 #ifdef notnow
7812 			/*
7813 			 * At this time, there is no flag.
7814 			 * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7815 			 * added, or it may never exist?
7816 			 */
7817 			mtx_lock(&nmp->nm_mtx);
7818 			if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7819 			    NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7820 				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7821 			mtx_unlock(&nmp->nm_mtx);
7822 #endif
7823 			flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7824 			NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7825 			    flp->nfsfl_fflags, flp->nfsfl_statshint);
7826 		} else {
7827 			error = NFSERR_BADXDR;
7828 			goto nfsmout;
7829 		}
7830 		if (flp->nfsfl_iomode == gotiomode) {
7831 			/* Keep the list in increasing offset order. */
7832 			tflp = LIST_FIRST(flhp);
7833 			prevflp = NULL;
7834 			while (tflp != NULL &&
7835 			    tflp->nfsfl_off < flp->nfsfl_off) {
7836 				prevflp = tflp;
7837 				tflp = LIST_NEXT(tflp, nfsfl_list);
7838 			}
7839 			if (prevflp == NULL)
7840 				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7841 			else
7842 				LIST_INSERT_AFTER(prevflp, flp,
7843 				    nfsfl_list);
7844 			NFSCL_DEBUG(4, "flp inserted\n");
7845 		} else {
7846 			printf("nfscl_layoutget(): got wrong iomode\n");
7847 			nfscl_freeflayout(flp);
7848 		}
7849 		flp = NULL;
7850 	}
7851 nfsmout:
7852 	NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7853 	if (error != 0 && flp != NULL)
7854 		nfscl_freeflayout(flp);
7855 	return (error);
7856 }
7857 
7858 /*
7859  * Parse a user/group digit string.
7860  */
7861 static int
7862 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7863     NFSPROC_T *p)
7864 {
7865 	uint32_t *tl;
7866 	char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7867 	uint32_t len = 0;
7868 	int error = 0;
7869 
7870 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7871 	len = fxdr_unsigned(uint32_t, *tl);
7872 	str = NULL;
7873 	if (len > NFSV4_OPAQUELIMIT) {
7874 		error = NFSERR_BADXDR;
7875 		goto nfsmout;
7876 	}
7877 	NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7878 	if (len == 0) {
7879 		if (dogrp != 0)
7880 			*gidp = GID_NOGROUP;
7881 		else
7882 			*uidp = UID_NOBODY;
7883 		return (0);
7884 	}
7885 	if (len > NFSV4_SMALLSTR)
7886 		str = malloc(len + 1, M_TEMP, M_WAITOK);
7887 	else
7888 		str = str0;
7889 	NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7890 	NFSBCOPY(cp, str, len);
7891 	str[len] = '\0';
7892 	NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7893 	if (dogrp != 0)
7894 		error = nfsv4_strtogid(nd, str, len, gidp);
7895 	else
7896 		error = nfsv4_strtouid(nd, str, len, uidp);
7897 nfsmout:
7898 	if (len > NFSV4_SMALLSTR)
7899 		free(str, M_TEMP);
7900 	NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
7901 	return (error);
7902 }
7903 
7904 /*
7905  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
7906  * so that it does both an Open and a Layoutget.
7907  */
7908 static int
7909 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7910     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7911     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7912     struct ucred *cred, NFSPROC_T *p)
7913 {
7914 	struct nfscllayout *lyp;
7915 	struct nfsclflayout *flp;
7916 	struct nfsclflayouthead flh;
7917 	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
7918 	int layouttype, laystat;
7919 	nfsv4stateid_t stateid;
7920 	struct nfsclsession *tsep;
7921 
7922 	error = 0;
7923 	if (NFSHASFLEXFILE(nmp))
7924 		layouttype = NFSLAYOUT_FLEXFILE;
7925 	else
7926 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
7927 	/*
7928 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7929 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7930 	 * flp == NULL.
7931 	 */
7932 	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, mode, &flp,
7933 	    &recalled);
7934 	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7935 	if (lyp == NULL)
7936 		islocked = 0;
7937 	else if (flp != NULL)
7938 		islocked = 1;
7939 	else
7940 		islocked = 2;
7941 	if ((lyp == NULL || flp == NULL) && recalled == 0) {
7942 		LIST_INIT(&flh);
7943 		tsep = nfsmnt_mdssession(nmp);
7944 		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7945 		    3 * NFSX_UNSIGNED);
7946 		if (lyp == NULL)
7947 			usecurstateid = 1;
7948 		else {
7949 			usecurstateid = 0;
7950 			stateid.seqid = lyp->nfsly_stateid.seqid;
7951 			stateid.other[0] = lyp->nfsly_stateid.other[0];
7952 			stateid.other[1] = lyp->nfsly_stateid.other[1];
7953 			stateid.other[2] = lyp->nfsly_stateid.other[2];
7954 		}
7955 		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7956 		    newfhp, newfhlen, mode, op, name, namelen,
7957 		    dpp, &stateid, usecurstateid, layouttype, layoutlen,
7958 		    &retonclose, &flh, &laystat, cred, p);
7959 		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7960 		    laystat, error);
7961 		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7962 		    &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7963 		    &islocked, cred, p);
7964 	} else
7965 		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7966 		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7967 	if (islocked == 2)
7968 		nfscl_rellayout(lyp, 1);
7969 	else if (islocked == 1)
7970 		nfscl_rellayout(lyp, 0);
7971 	return (error);
7972 }
7973 
7974 /*
7975  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7976  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7977  * handled by nfsrpc_openrpc().
7978  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7979  * can be NULL.
7980  */
7981 static int
7982 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7983     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7984     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7985     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7986     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7987     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7988 {
7989 	uint32_t *tl;
7990 	struct nfsrv_descript nfsd, *nd = &nfsd;
7991 	struct nfscldeleg *ndp = NULL;
7992 	struct nfsvattr nfsva;
7993 	struct nfsclsession *tsep;
7994 	uint32_t rflags, deleg;
7995 	nfsattrbit_t attrbits;
7996 	int error, ret, acesize, limitby, iomode;
7997 
7998 	*dpp = NULL;
7999 	*laystatp = ENXIO;
8000 	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
8001 	    0, 0, cred);
8002 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
8003 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
8004 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
8005 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
8006 	tsep = nfsmnt_mdssession(nmp);
8007 	*tl++ = tsep->nfsess_clientid.lval[0];
8008 	*tl = tsep->nfsess_clientid.lval[1];
8009 	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8010 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8011 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
8012 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8013 	nfsm_strtom(nd, name, namelen);
8014 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8015 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8016 	NFSZERO_ATTRBIT(&attrbits);
8017 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
8018 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
8019 	nfsrv_putattrbit(nd, &attrbits);
8020 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8021 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8022 	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
8023 		iomode = NFSLAYOUTIOMODE_RW;
8024 	else
8025 		iomode = NFSLAYOUTIOMODE_READ;
8026 	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
8027 	    layouttype, layoutlen, usecurstateid);
8028 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
8029 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
8030 	if (error != 0)
8031 		return (error);
8032 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
8033 	if (nd->nd_repstat != 0)
8034 		*laystatp = nd->nd_repstat;
8035 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8036 		/* ND_NOMOREDATA will be set if the Open operation failed. */
8037 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8038 		    6 * NFSX_UNSIGNED);
8039 		op->nfso_stateid.seqid = *tl++;
8040 		op->nfso_stateid.other[0] = *tl++;
8041 		op->nfso_stateid.other[1] = *tl++;
8042 		op->nfso_stateid.other[2] = *tl;
8043 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
8044 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8045 		if (error != 0)
8046 			goto nfsmout;
8047 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8048 		deleg = fxdr_unsigned(u_int32_t, *tl);
8049 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
8050 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
8051 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
8052 			      NFSCLFLAGS_FIRSTDELEG))
8053 				op->nfso_own->nfsow_clp->nfsc_flags |=
8054 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8055 			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
8056 			    M_NFSCLDELEG, M_WAITOK);
8057 			LIST_INIT(&ndp->nfsdl_owner);
8058 			LIST_INIT(&ndp->nfsdl_lock);
8059 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
8060 			ndp->nfsdl_fhlen = newfhlen;
8061 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
8062 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
8063 			nfscl_lockinit(&ndp->nfsdl_rwlock);
8064 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8065 			    NFSX_UNSIGNED);
8066 			ndp->nfsdl_stateid.seqid = *tl++;
8067 			ndp->nfsdl_stateid.other[0] = *tl++;
8068 			ndp->nfsdl_stateid.other[1] = *tl++;
8069 			ndp->nfsdl_stateid.other[2] = *tl++;
8070 			ret = fxdr_unsigned(int, *tl);
8071 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8072 				ndp->nfsdl_flags = NFSCLDL_WRITE;
8073 				/*
8074 				 * Indicates how much the file can grow.
8075 				 */
8076 				NFSM_DISSECT(tl, u_int32_t *,
8077 				    3 * NFSX_UNSIGNED);
8078 				limitby = fxdr_unsigned(int, *tl++);
8079 				switch (limitby) {
8080 				case NFSV4OPEN_LIMITSIZE:
8081 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
8082 					break;
8083 				case NFSV4OPEN_LIMITBLOCKS:
8084 					ndp->nfsdl_sizelimit =
8085 					    fxdr_unsigned(u_int64_t, *tl++);
8086 					ndp->nfsdl_sizelimit *=
8087 					    fxdr_unsigned(u_int64_t, *tl);
8088 					break;
8089 				default:
8090 					error = NFSERR_BADXDR;
8091 					goto nfsmout;
8092 				};
8093 			} else
8094 				ndp->nfsdl_flags = NFSCLDL_READ;
8095 			if (ret != 0)
8096 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
8097 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
8098 			    &ret, &acesize, p);
8099 			if (error != 0)
8100 				goto nfsmout;
8101 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8102 			error = NFSERR_BADXDR;
8103 			goto nfsmout;
8104 		}
8105 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
8106 		    nfscl_assumeposixlocks)
8107 			op->nfso_posixlock = 1;
8108 		else
8109 			op->nfso_posixlock = 0;
8110 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8111 		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
8112 		if (*++tl == 0) {
8113 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
8114 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
8115 			    NULL, NULL, NULL, p, cred);
8116 			if (error != 0)
8117 				goto nfsmout;
8118 			if (ndp != NULL) {
8119 				ndp->nfsdl_change = nfsva.na_filerev;
8120 				ndp->nfsdl_modtime = nfsva.na_mtime;
8121 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8122 				*dpp = ndp;
8123 				ndp = NULL;
8124 			}
8125 			/*
8126 			 * At this point, the Open has succeeded, so set
8127 			 * nd_repstat = NFS_OK.  If the Layoutget failed,
8128 			 * this function just won't return a layout.
8129 			 */
8130 			if (nd->nd_repstat == 0) {
8131 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8132 				*laystatp = fxdr_unsigned(int, *++tl);
8133 				if (*laystatp == 0) {
8134 					error = nfsrv_parselayoutget(nmp, nd,
8135 					    stateidp, retonclosep, flhp);
8136 					if (error != 0)
8137 						*laystatp = error;
8138 				}
8139 			} else
8140 				nd->nd_repstat = 0;	/* Return 0 for Open. */
8141 		}
8142 	}
8143 	if (nd->nd_repstat != 0 && error == 0)
8144 		error = nd->nd_repstat;
8145 nfsmout:
8146 	free(ndp, M_NFSCLDELEG);
8147 	m_freem(nd->nd_mrep);
8148 	return (error);
8149 }
8150 
8151 /*
8152  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
8153  * Used only for mounts with pNFS enabled.
8154  */
8155 static int
8156 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8157     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8158     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8159     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8160     int *dattrflagp, int *unlockedp, nfsv4stateid_t *stateidp,
8161     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
8162     struct nfsclflayouthead *flhp, int *laystatp)
8163 {
8164 	uint32_t *tl;
8165 	int error = 0, deleg, newone, ret, acesize, limitby;
8166 	struct nfsrv_descript nfsd, *nd = &nfsd;
8167 	struct nfsclopen *op;
8168 	struct nfscldeleg *dp = NULL;
8169 	struct nfsnode *np;
8170 	struct nfsfh *nfhp;
8171 	struct nfsclsession *tsep;
8172 	nfsattrbit_t attrbits;
8173 	nfsv4stateid_t stateid;
8174 	struct nfsmount *nmp;
8175 
8176 	nmp = VFSTONFS(dvp->v_mount);
8177 	np = VTONFS(dvp);
8178 	*laystatp = ENXIO;
8179 	*unlockedp = 0;
8180 	*nfhpp = NULL;
8181 	*dpp = NULL;
8182 	*attrflagp = 0;
8183 	*dattrflagp = 0;
8184 	if (namelen > NFS_MAXNAMLEN)
8185 		return (ENAMETOOLONG);
8186 	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp, cred);
8187 	/*
8188 	 * For V4, this is actually an Open op.
8189 	 */
8190 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
8191 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
8192 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
8193 	    NFSV4OPEN_ACCESSREAD);
8194 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
8195 	tsep = nfsmnt_mdssession(nmp);
8196 	*tl++ = tsep->nfsess_clientid.lval[0];
8197 	*tl = tsep->nfsess_clientid.lval[1];
8198 	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8199 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8200 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
8201 	if ((fmode & O_EXCL) != 0) {
8202 		if (NFSHASSESSPERSIST(nmp)) {
8203 			/* Use GUARDED for persistent sessions. */
8204 			*tl = txdr_unsigned(NFSCREATE_GUARDED);
8205 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8206 		} else {
8207 			/* Otherwise, use EXCLUSIVE4_1. */
8208 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
8209 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
8210 			*tl++ = cverf.lval[0];
8211 			*tl = cverf.lval[1];
8212 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8213 		}
8214 	} else {
8215 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
8216 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
8217 	}
8218 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8219 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8220 	nfsm_strtom(nd, name, namelen);
8221 	/* Get the new file's handle and attributes, plus save the FH. */
8222 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
8223 	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
8224 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
8225 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8226 	NFSGETATTR_ATTRBIT(&attrbits);
8227 	nfsrv_putattrbit(nd, &attrbits);
8228 	/* Get the directory's post-op attributes. */
8229 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8230 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8231 	(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
8232 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8233 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8234 	nfsrv_putattrbit(nd, &attrbits);
8235 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8236 	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
8237 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8238 	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
8239 	    layouttype, layoutlen, usecurstateid);
8240 	error = nfscl_request(nd, dvp, p, cred);
8241 	if (error != 0)
8242 		return (error);
8243 	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
8244 	    error);
8245 	if (nd->nd_repstat != 0)
8246 		*laystatp = nd->nd_repstat;
8247 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
8248 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8249 		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
8250 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8251 		    6 * NFSX_UNSIGNED);
8252 		stateid.seqid = *tl++;
8253 		stateid.other[0] = *tl++;
8254 		stateid.other[1] = *tl++;
8255 		stateid.other[2] = *tl;
8256 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8257 		if (error != 0)
8258 			goto nfsmout;
8259 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8260 		deleg = fxdr_unsigned(int, *tl);
8261 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
8262 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
8263 			if (!(owp->nfsow_clp->nfsc_flags &
8264 			      NFSCLFLAGS_FIRSTDELEG))
8265 				owp->nfsow_clp->nfsc_flags |=
8266 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8267 			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
8268 			    M_NFSCLDELEG, M_WAITOK);
8269 			LIST_INIT(&dp->nfsdl_owner);
8270 			LIST_INIT(&dp->nfsdl_lock);
8271 			dp->nfsdl_clp = owp->nfsow_clp;
8272 			newnfs_copyincred(cred, &dp->nfsdl_cred);
8273 			nfscl_lockinit(&dp->nfsdl_rwlock);
8274 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8275 			    NFSX_UNSIGNED);
8276 			dp->nfsdl_stateid.seqid = *tl++;
8277 			dp->nfsdl_stateid.other[0] = *tl++;
8278 			dp->nfsdl_stateid.other[1] = *tl++;
8279 			dp->nfsdl_stateid.other[2] = *tl++;
8280 			ret = fxdr_unsigned(int, *tl);
8281 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8282 				dp->nfsdl_flags = NFSCLDL_WRITE;
8283 				/*
8284 				 * Indicates how much the file can grow.
8285 				 */
8286 				NFSM_DISSECT(tl, u_int32_t *,
8287 				    3 * NFSX_UNSIGNED);
8288 				limitby = fxdr_unsigned(int, *tl++);
8289 				switch (limitby) {
8290 				case NFSV4OPEN_LIMITSIZE:
8291 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
8292 					break;
8293 				case NFSV4OPEN_LIMITBLOCKS:
8294 					dp->nfsdl_sizelimit =
8295 					    fxdr_unsigned(u_int64_t, *tl++);
8296 					dp->nfsdl_sizelimit *=
8297 					    fxdr_unsigned(u_int64_t, *tl);
8298 					break;
8299 				default:
8300 					error = NFSERR_BADXDR;
8301 					goto nfsmout;
8302 				};
8303 			} else {
8304 				dp->nfsdl_flags = NFSCLDL_READ;
8305 			}
8306 			if (ret != 0)
8307 				dp->nfsdl_flags |= NFSCLDL_RECALL;
8308 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
8309 			    &ret, &acesize, p);
8310 			if (error != 0)
8311 				goto nfsmout;
8312 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8313 			error = NFSERR_BADXDR;
8314 			goto nfsmout;
8315 		}
8316 
8317 		/* Now, we should have the status for the SaveFH. */
8318 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8319 		if (*++tl == 0) {
8320 			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
8321 			/*
8322 			 * Now, process the GetFH and Getattr for the newly
8323 			 * created file. nfscl_mtofh() will set
8324 			 * ND_NOMOREDATA if these weren't successful.
8325 			 */
8326 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
8327 			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
8328 			if (error != 0)
8329 				goto nfsmout;
8330 		} else
8331 			nd->nd_flag |= ND_NOMOREDATA;
8332 		/* Now we have the PutFH and Getattr for the directory. */
8333 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8334 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8335 			if (*++tl != 0)
8336 				nd->nd_flag |= ND_NOMOREDATA;
8337 			else {
8338 				NFSM_DISSECT(tl, uint32_t *, 2 *
8339 				    NFSX_UNSIGNED);
8340 				if (*++tl != 0)
8341 					nd->nd_flag |= ND_NOMOREDATA;
8342 			}
8343 		}
8344 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8345 			/* Load the directory attributes. */
8346 			error = nfsm_loadattr(nd, dnap);
8347 			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
8348 			if (error != 0)
8349 				goto nfsmout;
8350 			*dattrflagp = 1;
8351 			if (dp != NULL && *attrflagp != 0) {
8352 				dp->nfsdl_change = nnap->na_filerev;
8353 				dp->nfsdl_modtime = nnap->na_mtime;
8354 				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8355 			}
8356 			/*
8357 			 * We can now complete the Open state.
8358 			 */
8359 			nfhp = *nfhpp;
8360 			if (dp != NULL) {
8361 				dp->nfsdl_fhlen = nfhp->nfh_len;
8362 				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
8363 				    nfhp->nfh_len);
8364 			}
8365 			/*
8366 			 * Get an Open structure that will be
8367 			 * attached to the OpenOwner, acquired already.
8368 			 */
8369 			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
8370 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
8371 			    cred, p, NULL, &op, &newone, NULL, 0, false);
8372 			if (error != 0)
8373 				goto nfsmout;
8374 			op->nfso_stateid = stateid;
8375 			newnfs_copyincred(cred, &op->nfso_cred);
8376 
8377 			nfscl_openrelease(nmp, op, error, newone);
8378 			*unlockedp = 1;
8379 
8380 			/* Now, handle the RestoreFH and LayoutGet. */
8381 			if (nd->nd_repstat == 0) {
8382 				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
8383 				*laystatp = fxdr_unsigned(int, *(tl + 3));
8384 				if (*laystatp == 0) {
8385 					error = nfsrv_parselayoutget(nmp, nd,
8386 					    stateidp, retonclosep, flhp);
8387 					if (error != 0)
8388 						*laystatp = error;
8389 				}
8390 				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
8391 				    error);
8392 			} else
8393 				nd->nd_repstat = 0;
8394 		}
8395 	}
8396 	if (nd->nd_repstat != 0 && error == 0)
8397 		error = nd->nd_repstat;
8398 	if (error == NFSERR_STALECLIENTID)
8399 		nfscl_initiate_recovery(owp->nfsow_clp);
8400 nfsmout:
8401 	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
8402 	if (error == 0)
8403 		*dpp = dp;
8404 	else
8405 		free(dp, M_NFSCLDELEG);
8406 	m_freem(nd->nd_mrep);
8407 	return (error);
8408 }
8409 
8410 /*
8411  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
8412  */
8413 static int
8414 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8415     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8416     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8417     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8418     int *dattrflagp, int *unlockedp)
8419 {
8420 	struct nfscllayout *lyp;
8421 	struct nfsclflayouthead flh;
8422 	struct nfsfh *nfhp;
8423 	struct nfsclsession *tsep;
8424 	struct nfsmount *nmp;
8425 	nfsv4stateid_t stateid;
8426 	int error, layoutlen, layouttype, retonclose, laystat;
8427 
8428 	error = 0;
8429 	nmp = VFSTONFS(dvp->v_mount);
8430 	if (NFSHASFLEXFILE(nmp))
8431 		layouttype = NFSLAYOUT_FLEXFILE;
8432 	else
8433 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
8434 	LIST_INIT(&flh);
8435 	tsep = nfsmnt_mdssession(nmp);
8436 	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
8437 	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
8438 	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
8439 	    unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
8440 	    &flh, &laystat);
8441 	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
8442 	    laystat, error);
8443 	lyp = NULL;
8444 	if (laystat == 0) {
8445 		nfhp = *nfhpp;
8446 		laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
8447 		    nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
8448 		    layouttype, laystat, NULL, cred, p);
8449 	} else
8450 		laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
8451 		    retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
8452 		    cred, p);
8453 	if (laystat == 0)
8454 		nfscl_rellayout(lyp, 0);
8455 	return (error);
8456 }
8457 
8458 /*
8459  * Process the results of a layoutget() operation.
8460  */
8461 static int
8462 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8463     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8464     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8465     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8466 {
8467 	struct nfsclflayout *tflp;
8468 	struct nfscldevinfo *dip;
8469 	uint8_t *dev;
8470 	int i, mirrorcnt;
8471 
8472 	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8473 		NFSLOCKMNT(nmp);
8474 		if (!NFSHASFLEXFILE(nmp)) {
8475 			/* Switch to using Flex File Layout. */
8476 			nmp->nm_state |= NFSSTA_FLEXFILE;
8477 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
8478 			/* Disable pNFS. */
8479 			NFSCL_DEBUG(1, "disable PNFS\n");
8480 			nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8481 		}
8482 		NFSUNLOCKMNT(nmp);
8483 	}
8484 	if (laystat == 0) {
8485 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8486 		LIST_FOREACH(tflp, flhp, nfsfl_list) {
8487 			if (layouttype == NFSLAYOUT_FLEXFILE)
8488 				mirrorcnt = tflp->nfsfl_mirrorcnt;
8489 			else
8490 				mirrorcnt = 1;
8491 			for (i = 0; i < mirrorcnt; i++) {
8492 				laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8493 				NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8494 				if (laystat != 0) {
8495 					if (layouttype == NFSLAYOUT_FLEXFILE)
8496 						dev = tflp->nfsfl_ffm[i].dev;
8497 					else
8498 						dev = tflp->nfsfl_dev;
8499 					laystat = nfsrpc_getdeviceinfo(nmp, dev,
8500 					    layouttype, notifybit, &dip, cred,
8501 					    p);
8502 					NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8503 					    laystat);
8504 					if (laystat != 0)
8505 						goto out;
8506 					laystat = nfscl_adddevinfo(nmp, dip, i,
8507 					    tflp);
8508 					if (laystat != 0)
8509 						printf("nfsrpc_layoutgetresout"
8510 						    ": cannot add\n");
8511 				}
8512 			}
8513 		}
8514 	}
8515 out:
8516 	if (laystat == 0) {
8517 		/*
8518 		 * nfscl_layout() always returns with the nfsly_lock
8519 		 * set to a refcnt (shared lock).
8520 		 * Passing in dvp is sufficient, since it is only used to
8521 		 * get the fsid for the file system.
8522 		 */
8523 		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8524 		    layouttype, retonclose, flhp, lypp, cred, p);
8525 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8526 		    laystat);
8527 		if (laystat == 0 && islockedp != NULL)
8528 			*islockedp = 1;
8529 	}
8530 	return (laystat);
8531 }
8532 
8533 /*
8534  * nfs copy_file_range operation.
8535  */
8536 int
8537 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8538     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8539     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8540     struct ucred *cred, bool consecutive, bool *must_commitp)
8541 {
8542 	int commit, error, expireret = 0, retrycnt;
8543 	u_int32_t clidrev = 0;
8544 	struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8545 	struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8546 	nfsv4stateid_t instateid, outstateid;
8547 	void *inlckp, *outlckp;
8548 
8549 	if (nmp->nm_clp != NULL)
8550 		clidrev = nmp->nm_clp->nfsc_clientidrev;
8551 	innfhp = VTONFS(invp)->n_fhp;
8552 	outnfhp = VTONFS(outvp)->n_fhp;
8553 	retrycnt = 0;
8554 	do {
8555 		/* Get both stateids. */
8556 		inlckp = NULL;
8557 		nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8558 		    NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8559 		    &inlckp);
8560 		outlckp = NULL;
8561 		nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8562 		    NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8563 		    &outlckp);
8564 
8565 		error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8566 		    &instateid, &outstateid, innap, inattrflagp, outnap,
8567 		    outattrflagp, consecutive, &commit, cred, curthread);
8568 		if (error == 0) {
8569 			if (commit != NFSWRITE_FILESYNC)
8570 				*must_commitp = true;
8571 			*inoffp += *lenp;
8572 			*outoffp += *lenp;
8573 		} else if (error == NFSERR_STALESTATEID)
8574 			nfscl_initiate_recovery(nmp->nm_clp);
8575 		if (inlckp != NULL)
8576 			nfscl_lockderef(inlckp);
8577 		if (outlckp != NULL)
8578 			nfscl_lockderef(outlckp);
8579 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8580 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8581 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8582 			(void) nfs_catnap(PZERO, error, "nfs_cfr");
8583 		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8584 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
8585 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8586 			    curthread);
8587 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8588 			error = EIO;
8589 		}
8590 		retrycnt++;
8591 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8592 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8593 	      error == NFSERR_STALEDONTRECOVER ||
8594 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8595 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8596 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
8597 	if (error != 0 && (retrycnt >= 4 ||
8598 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8599 	      error == NFSERR_STALEDONTRECOVER))
8600 		error = EIO;
8601 	return (error);
8602 }
8603 
8604 /*
8605  * The copy RPC.
8606  */
8607 static int
8608 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8609     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8610     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8611     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8612     NFSPROC_T *p)
8613 {
8614 	uint32_t *tl;
8615 	int error;
8616 	struct nfsrv_descript nfsd;
8617 	struct nfsrv_descript *nd = &nfsd;
8618 	struct nfsmount *nmp;
8619 	nfsattrbit_t attrbits;
8620 	uint64_t len;
8621 
8622 	nmp = VFSTONFS(outvp->v_mount);
8623 	*inattrflagp = *outattrflagp = 0;
8624 	*commitp = NFSWRITE_UNSTABLE;
8625 	len = *lenp;
8626 	*lenp = 0;
8627 	if (len > nfs_maxcopyrange)
8628 		len = nfs_maxcopyrange;
8629 	NFSCL_REQSTART(nd, NFSPROC_COPY, invp, cred);
8630 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8631 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8632 	NFSGETATTR_ATTRBIT(&attrbits);
8633 	nfsrv_putattrbit(nd, &attrbits);
8634 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8635 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8636 	(void)nfsm_fhtom(nmp, nd, VTONFS(outvp)->n_fhp->nfh_fh,
8637 	    VTONFS(outvp)->n_fhp->nfh_len, 0);
8638 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8639 	*tl = txdr_unsigned(NFSV4OP_COPY);
8640 	nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8641 	nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8642 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8643 	txdr_hyper(inoff, tl); tl += 2;
8644 	txdr_hyper(outoff, tl); tl += 2;
8645 	txdr_hyper(len, tl); tl += 2;
8646 	if (consecutive)
8647 		*tl++ = newnfs_true;
8648 	else
8649 		*tl++ = newnfs_false;
8650 	*tl++ = newnfs_true;
8651 	*tl++ = 0;
8652 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8653 	NFSWRITEGETATTR_ATTRBIT(&attrbits);
8654 	nfsrv_putattrbit(nd, &attrbits);
8655 	error = nfscl_request(nd, invp, p, cred);
8656 	if (error != 0)
8657 		return (error);
8658 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8659 		/* Get the input file's attributes. */
8660 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8661 		if (*(tl + 1) == 0) {
8662 			error = nfsm_loadattr(nd, innap);
8663 			if (error != 0)
8664 				goto nfsmout;
8665 			*inattrflagp = 1;
8666 		} else
8667 			nd->nd_flag |= ND_NOMOREDATA;
8668 	}
8669 	/* Skip over return stat for PutFH. */
8670 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8671 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8672 		if (*++tl != 0)
8673 			nd->nd_flag |= ND_NOMOREDATA;
8674 	}
8675 	/* Skip over return stat for Copy. */
8676 	if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8677 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8678 	if (nd->nd_repstat == 0) {
8679 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8680 		if (*tl != 0) {
8681 			/* There should be no callback ids. */
8682 			error = NFSERR_BADXDR;
8683 			goto nfsmout;
8684 		}
8685 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8686 		    NFSX_VERF);
8687 		len = fxdr_hyper(tl); tl += 2;
8688 		*commitp = fxdr_unsigned(int, *tl++);
8689 		NFSLOCKMNT(nmp);
8690 		if (!NFSHASWRITEVERF(nmp)) {
8691 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8692 			NFSSETWRITEVERF(nmp);
8693 	    	} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8694 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8695 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
8696 		}
8697 		NFSUNLOCKMNT(nmp);
8698 		tl += (NFSX_VERF / NFSX_UNSIGNED);
8699 		if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8700 			/* Must be a synchronous copy. */
8701 			nd->nd_repstat = NFSERR_NOTSUPP;
8702 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8703 		error = nfsm_loadattr(nd, outnap);
8704 		if (error == 0)
8705 			*outattrflagp = NFS_LATTR_NOSHRINK;
8706 		if (nd->nd_repstat == 0)
8707 			*lenp = len;
8708 	} else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8709 		/*
8710 		 * For the case where consecutive is not supported, but
8711 		 * synchronous is supported, we can try consecutive == false
8712 		 * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8713 		 * since Copy cannot be done.
8714 		 */
8715 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8716 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8717 			if (!consecutive || *++tl == newnfs_false)
8718 				nd->nd_repstat = NFSERR_NOTSUPP;
8719 		} else
8720 			nd->nd_repstat = NFSERR_BADXDR;
8721 	}
8722 	if (error == 0)
8723 		error = nd->nd_repstat;
8724 nfsmout:
8725 	m_freem(nd->nd_mrep);
8726 	return (error);
8727 }
8728 
8729 /*
8730  * Seek operation.
8731  */
8732 int
8733 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8734     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8735 {
8736 	int error, expireret = 0, retrycnt;
8737 	u_int32_t clidrev = 0;
8738 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8739 	struct nfsnode *np = VTONFS(vp);
8740 	struct nfsfh *nfhp = NULL;
8741 	nfsv4stateid_t stateid;
8742 	void *lckp;
8743 
8744 	if (nmp->nm_clp != NULL)
8745 		clidrev = nmp->nm_clp->nfsc_clientidrev;
8746 	nfhp = np->n_fhp;
8747 	retrycnt = 0;
8748 	do {
8749 		lckp = NULL;
8750 		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8751 		    NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8752 		error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8753 		    nap, attrflagp, cred);
8754 		if (error == NFSERR_STALESTATEID)
8755 			nfscl_initiate_recovery(nmp->nm_clp);
8756 		if (lckp != NULL)
8757 			nfscl_lockderef(lckp);
8758 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8759 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8760 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8761 			(void) nfs_catnap(PZERO, error, "nfs_seek");
8762 		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8763 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
8764 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8765 			    curthread);
8766 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8767 			error = EIO;
8768 		}
8769 		retrycnt++;
8770 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8771 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8772 	    error == NFSERR_BADSESSION ||
8773 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8774 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8775 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8776 	    (error == NFSERR_OPENMODE && retrycnt < 4));
8777 	if (error && retrycnt >= 4)
8778 		error = EIO;
8779 	return (error);
8780 }
8781 
8782 /*
8783  * The seek RPC.
8784  */
8785 static int
8786 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8787     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8788 {
8789 	uint32_t *tl;
8790 	int error;
8791 	struct nfsrv_descript nfsd;
8792 	struct nfsrv_descript *nd = &nfsd;
8793 	nfsattrbit_t attrbits;
8794 
8795 	*attrflagp = 0;
8796 	NFSCL_REQSTART(nd, NFSPROC_SEEK, vp, cred);
8797 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8798 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8799 	txdr_hyper(*offp, tl); tl += 2;
8800 	*tl++ = txdr_unsigned(content);
8801 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8802 	NFSGETATTR_ATTRBIT(&attrbits);
8803 	nfsrv_putattrbit(nd, &attrbits);
8804 	error = nfscl_request(nd, vp, curthread, cred);
8805 	if (error != 0)
8806 		return (error);
8807 	if (nd->nd_repstat == 0) {
8808 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8809 		if (*tl++ == newnfs_true)
8810 			*eofp = true;
8811 		else
8812 			*eofp = false;
8813 		*offp = fxdr_hyper(tl);
8814 		/* Just skip over Getattr op status. */
8815 		error = nfsm_loadattr(nd, nap);
8816 		if (error == 0)
8817 			*attrflagp = 1;
8818 	}
8819 	error = nd->nd_repstat;
8820 nfsmout:
8821 	m_freem(nd->nd_mrep);
8822 	return (error);
8823 }
8824 
8825 /*
8826  * The getextattr RPC.
8827  */
8828 int
8829 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8830     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8831 {
8832 	uint32_t *tl;
8833 	int error;
8834 	struct nfsrv_descript nfsd;
8835 	struct nfsrv_descript *nd = &nfsd;
8836 	nfsattrbit_t attrbits;
8837 	uint32_t len, len2;
8838 
8839 	*attrflagp = 0;
8840 	NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp, cred);
8841 	nfsm_strtom(nd, name, strlen(name));
8842 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8843 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8844 	NFSGETATTR_ATTRBIT(&attrbits);
8845 	nfsrv_putattrbit(nd, &attrbits);
8846 	error = nfscl_request(nd, vp, p, cred);
8847 	if (error != 0)
8848 		return (error);
8849 	if (nd->nd_repstat == 0) {
8850 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8851 		len = fxdr_unsigned(uint32_t, *tl);
8852 		/* Sanity check lengths. */
8853 		if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
8854 		    uiop->uio_resid <= UINT32_MAX) {
8855 			len2 = uiop->uio_resid;
8856 			if (len2 >= len)
8857 				error = nfsm_mbufuio(nd, uiop, len);
8858 			else {
8859 				error = nfsm_mbufuio(nd, uiop, len2);
8860 				if (error == 0) {
8861 					/*
8862 					 * nfsm_mbufuio() advances to a multiple
8863 					 * of 4, so round up len2 as well.  Then
8864 					 * we need to advance over the rest of
8865 					 * the data, rounding up the remaining
8866 					 * length.
8867 					 */
8868 					len2 = NFSM_RNDUP(len2);
8869 					len2 = NFSM_RNDUP(len - len2);
8870 					if (len2 > 0)
8871 						error = nfsm_advance(nd, len2,
8872 						    -1);
8873 				}
8874 			}
8875 		} else if (uiop == NULL && len > 0) {
8876 			/* Just wants the length and not the data. */
8877 			error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8878 		} else if (len > 0)
8879 			error = ENOATTR;
8880 		if (error != 0)
8881 			goto nfsmout;
8882 		*lenp = len;
8883 		/* Just skip over Getattr op status. */
8884 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8885 		error = nfsm_loadattr(nd, nap);
8886 		if (error == 0)
8887 			*attrflagp = 1;
8888 	}
8889 	if (error == 0)
8890 		error = nd->nd_repstat;
8891 nfsmout:
8892 	m_freem(nd->nd_mrep);
8893 	return (error);
8894 }
8895 
8896 /*
8897  * The setextattr RPC.
8898  */
8899 int
8900 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
8901     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8902 {
8903 	uint32_t *tl;
8904 	int error;
8905 	struct nfsrv_descript nfsd;
8906 	struct nfsrv_descript *nd = &nfsd;
8907 	nfsattrbit_t attrbits;
8908 
8909 	*attrflagp = 0;
8910 	NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp, cred);
8911 	if (uiop->uio_resid > nd->nd_maxreq) {
8912 		/* nd_maxreq is set by NFSCL_REQSTART(). */
8913 		m_freem(nd->nd_mreq);
8914 		return (EINVAL);
8915 	}
8916 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8917 	*tl = txdr_unsigned(NFSV4SXATTR_EITHER);
8918 	nfsm_strtom(nd, name, strlen(name));
8919 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8920 	*tl = txdr_unsigned(uiop->uio_resid);
8921 	nfsm_uiombuf(nd, uiop, uiop->uio_resid);
8922 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8923 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8924 	NFSGETATTR_ATTRBIT(&attrbits);
8925 	nfsrv_putattrbit(nd, &attrbits);
8926 	error = nfscl_request(nd, vp, p, cred);
8927 	if (error != 0)
8928 		return (error);
8929 	if (nd->nd_repstat == 0) {
8930 		/* Just skip over the reply and Getattr op status. */
8931 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8932 		    NFSX_UNSIGNED);
8933 		error = nfsm_loadattr(nd, nap);
8934 		if (error == 0)
8935 			*attrflagp = 1;
8936 	}
8937 	if (error == 0)
8938 		error = nd->nd_repstat;
8939 nfsmout:
8940 	m_freem(nd->nd_mrep);
8941 	return (error);
8942 }
8943 
8944 /*
8945  * The removeextattr RPC.
8946  */
8947 int
8948 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
8949     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8950 {
8951 	uint32_t *tl;
8952 	int error;
8953 	struct nfsrv_descript nfsd;
8954 	struct nfsrv_descript *nd = &nfsd;
8955 	nfsattrbit_t attrbits;
8956 
8957 	*attrflagp = 0;
8958 	NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp, cred);
8959 	nfsm_strtom(nd, name, strlen(name));
8960 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8961 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8962 	NFSGETATTR_ATTRBIT(&attrbits);
8963 	nfsrv_putattrbit(nd, &attrbits);
8964 	error = nfscl_request(nd, vp, p, cred);
8965 	if (error != 0)
8966 		return (error);
8967 	if (nd->nd_repstat == 0) {
8968 		/* Just skip over the reply and Getattr op status. */
8969 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8970 		    NFSX_UNSIGNED);
8971 		error = nfsm_loadattr(nd, nap);
8972 		if (error == 0)
8973 			*attrflagp = 1;
8974 	}
8975 	if (error == 0)
8976 		error = nd->nd_repstat;
8977 nfsmout:
8978 	m_freem(nd->nd_mrep);
8979 	return (error);
8980 }
8981 
8982 /*
8983  * The listextattr RPC.
8984  */
8985 int
8986 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
8987     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
8988     struct ucred *cred, NFSPROC_T *p)
8989 {
8990 	uint32_t *tl;
8991 	int cnt, error, i, len;
8992 	struct nfsrv_descript nfsd;
8993 	struct nfsrv_descript *nd = &nfsd;
8994 	nfsattrbit_t attrbits;
8995 	u_char c;
8996 
8997 	*attrflagp = 0;
8998 	NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp, cred);
8999 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
9000 	txdr_hyper(*cookiep, tl); tl += 2;
9001 	*tl++ = txdr_unsigned(*lenp);
9002 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
9003 	NFSGETATTR_ATTRBIT(&attrbits);
9004 	nfsrv_putattrbit(nd, &attrbits);
9005 	error = nfscl_request(nd, vp, p, cred);
9006 	if (error != 0)
9007 		return (error);
9008 	*eofp = true;
9009 	*lenp = 0;
9010 	if (nd->nd_repstat == 0) {
9011 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
9012 		*cookiep = fxdr_hyper(tl); tl += 2;
9013 		cnt = fxdr_unsigned(int, *tl);
9014 		if (cnt < 0) {
9015 			error = EBADRPC;
9016 			goto nfsmout;
9017 		}
9018 		for (i = 0; i < cnt; i++) {
9019 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
9020 			len = fxdr_unsigned(int, *tl);
9021 			if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
9022 				error = EBADRPC;
9023 				goto nfsmout;
9024 			}
9025 			if (uiop == NULL)
9026 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9027 			else if (uiop->uio_resid >= len + 1) {
9028 				c = len;
9029 				error = uiomove(&c, sizeof(c), uiop);
9030 				if (error == 0)
9031 					error = nfsm_mbufuio(nd, uiop, len);
9032 			} else {
9033 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9034 				*eofp = false;
9035 			}
9036 			if (error != 0)
9037 				goto nfsmout;
9038 			*lenp += (len + 1);
9039 		}
9040 		/* Get the eof and skip over the Getattr op status. */
9041 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
9042 		/*
9043 		 * *eofp is set false above, because it wasn't able to copy
9044 		 * all of the reply.
9045 		 */
9046 		if (*eofp && *tl == 0)
9047 			*eofp = false;
9048 		error = nfsm_loadattr(nd, nap);
9049 		if (error == 0)
9050 			*attrflagp = 1;
9051 	}
9052 	if (error == 0)
9053 		error = nd->nd_repstat;
9054 nfsmout:
9055 	m_freem(nd->nd_mrep);
9056 	return (error);
9057 }
9058 
9059 /*
9060  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
9061  */
9062 static struct mbuf *
9063 nfsm_split(struct mbuf *mp, uint64_t xfer)
9064 {
9065 	struct mbuf *m, *m2;
9066 	vm_page_t pg;
9067 	int i, j, left, pgno, plen, trim;
9068 	char *cp, *cp2;
9069 
9070 	if ((mp->m_flags & M_EXTPG) == 0) {
9071 		m = m_split(mp, xfer, M_WAITOK);
9072 		return (m);
9073 	}
9074 
9075 	/* Find the correct mbuf to split at. */
9076 	for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
9077 		xfer -= m->m_len;
9078 	if (m == NULL)
9079 		return (NULL);
9080 
9081 	/* If xfer == m->m_len, we can just split the mbuf list. */
9082 	if (xfer == m->m_len) {
9083 		m2 = m->m_next;
9084 		m->m_next = NULL;
9085 		return (m2);
9086 	}
9087 
9088 	/* Find the page to split at. */
9089 	pgno = 0;
9090 	left = xfer;
9091 	do {
9092 		if (pgno == 0)
9093 			plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
9094 		else
9095 			plen = m_epg_pagelen(m, pgno, 0);
9096 		if (left <= plen)
9097 			break;
9098 		left -= plen;
9099 		pgno++;
9100 	} while (pgno < m->m_epg_npgs);
9101 	if (pgno == m->m_epg_npgs)
9102 		panic("nfsm_split: eroneous ext_pgs mbuf");
9103 
9104 	m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
9105 	m2->m_epg_flags |= EPG_FLAG_ANON;
9106 
9107 	/*
9108 	 * If left < plen, allocate a new page for the new mbuf
9109 	 * and copy the data after left in the page to this new
9110 	 * page.
9111 	 */
9112 	if (left < plen) {
9113 		pg = vm_page_alloc_noobj(VM_ALLOC_WAITOK | VM_ALLOC_NODUMP |
9114 		    VM_ALLOC_WIRED);
9115 		m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
9116 		m2->m_epg_npgs = 1;
9117 
9118 		/* Copy the data after left to the new page. */
9119 		trim = plen - left;
9120 		cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
9121 		if (pgno == 0)
9122 			cp += m->m_epg_1st_off;
9123 		cp += left;
9124 		cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
9125 		if (pgno == m->m_epg_npgs - 1)
9126 			m2->m_epg_last_len = trim;
9127 		else {
9128 			cp2 += PAGE_SIZE - trim;
9129 			m2->m_epg_1st_off = PAGE_SIZE - trim;
9130 			m2->m_epg_last_len = m->m_epg_last_len;
9131 		}
9132 		memcpy(cp2, cp, trim);
9133 		m2->m_len = trim;
9134 	} else {
9135 		m2->m_len = 0;
9136 		m2->m_epg_last_len = m->m_epg_last_len;
9137 	}
9138 
9139 	/* Move the pages beyond pgno to the new mbuf. */
9140 	for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
9141 		m2->m_epg_pa[j] = m->m_epg_pa[i];
9142 		/* Never moves page 0. */
9143 		m2->m_len += m_epg_pagelen(m, i, 0);
9144 	}
9145 	m2->m_epg_npgs = j;
9146 	m->m_epg_npgs = pgno + 1;
9147 	m->m_epg_last_len = left;
9148 	m->m_len = xfer;
9149 
9150 	m2->m_next = m->m_next;
9151 	m->m_next = NULL;
9152 	return (m2);
9153 }
9154 
9155 /*
9156  * Do the NFSv4.1 Bind Connection to Session.
9157  * Called from the reconnect layer of the krpc (sys/rpc/clnt_rc.c).
9158  */
9159 void
9160 nfsrpc_bindconnsess(CLIENT *cl, void *arg, struct ucred *cr)
9161 {
9162 	struct nfscl_reconarg *rcp = (struct nfscl_reconarg *)arg;
9163 	uint32_t res, *tl;
9164 	struct nfsrv_descript nfsd;
9165 	struct nfsrv_descript *nd = &nfsd;
9166 	struct rpc_callextra ext;
9167 	struct timeval utimeout;
9168 	enum clnt_stat stat;
9169 	int error;
9170 
9171 	nfscl_reqstart(nd, NFSPROC_BINDCONNTOSESS, NULL, NULL, 0, NULL, NULL,
9172 	    NFS_VER4, rcp->minorvers, NULL);
9173 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED);
9174 	memcpy(tl, rcp->sessionid, NFSX_V4SESSIONID);
9175 	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9176 	*tl++ = txdr_unsigned(NFSCDFC4_FORE_OR_BOTH);
9177 	*tl = newnfs_false;
9178 
9179 	memset(&ext, 0, sizeof(ext));
9180 	utimeout.tv_sec = 30;
9181 	utimeout.tv_usec = 0;
9182 	ext.rc_auth = authunix_create(cr);
9183 	nd->nd_mrep = NULL;
9184 	stat = CLNT_CALL_MBUF(cl, &ext, NFSV4PROC_COMPOUND, nd->nd_mreq,
9185 	    &nd->nd_mrep, utimeout);
9186 	AUTH_DESTROY(ext.rc_auth);
9187 	if (stat != RPC_SUCCESS) {
9188 		printf("nfsrpc_bindconnsess: call failed stat=%d\n", stat);
9189 		return;
9190 	}
9191 	if (nd->nd_mrep == NULL) {
9192 		printf("nfsrpc_bindconnsess: no reply args\n");
9193 		return;
9194 	}
9195 	error = 0;
9196 	newnfs_realign(&nd->nd_mrep, M_WAITOK);
9197 	nd->nd_md = nd->nd_mrep;
9198 	nd->nd_dpos = mtod(nd->nd_md, char *);
9199 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9200 	nd->nd_repstat = fxdr_unsigned(uint32_t, *tl++);
9201 	if (nd->nd_repstat == NFSERR_OK) {
9202 		res = fxdr_unsigned(uint32_t, *tl);
9203 		if (res > 0 && (error = nfsm_advance(nd, NFSM_RNDUP(res),
9204 		    -1)) != 0)
9205 			goto nfsmout;
9206 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
9207 		    4 * NFSX_UNSIGNED);
9208 		tl += 3;
9209 		if (!NFSBCMP(tl, rcp->sessionid, NFSX_V4SESSIONID)) {
9210 			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9211 			res = fxdr_unsigned(uint32_t, *tl);
9212 			if (res != NFSCDFS4_BOTH)
9213 				printf("nfsrpc_bindconnsess: did not "
9214 				    "return FS4_BOTH\n");
9215 		} else
9216 			printf("nfsrpc_bindconnsess: not same "
9217 			    "sessionid\n");
9218 	} else if (nd->nd_repstat != NFSERR_BADSESSION)
9219 		printf("nfsrpc_bindconnsess: returned %d\n", nd->nd_repstat);
9220 nfsmout:
9221 	if (error != 0)
9222 		printf("nfsrpc_bindconnsess: reply bad xdr\n");
9223 	m_freem(nd->nd_mrep);
9224 }
9225 
9226 /*
9227  * Do roughly what nfs_statfs() does for NFSv4, but when called with a shared
9228  * locked vnode.
9229  */
9230 static void
9231 nfscl_statfs(struct vnode *vp, struct ucred *cred, NFSPROC_T *td)
9232 {
9233 	struct nfsvattr nfsva;
9234 	struct nfsfsinfo fs;
9235 	struct nfsstatfs sb;
9236 	struct mount *mp;
9237 	struct nfsmount *nmp;
9238 	uint32_t lease;
9239 	int attrflag, error;
9240 
9241 	mp = vp->v_mount;
9242 	nmp = VFSTONFS(mp);
9243 	error = nfsrpc_statfs(vp, &sb, &fs, &lease, cred, td, &nfsva,
9244 	    &attrflag);
9245 	if (attrflag != 0)
9246 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
9247 	if (error == 0) {
9248 		NFSLOCKCLSTATE();
9249 		if (nmp->nm_clp != NULL)
9250 			nmp->nm_clp->nfsc_renew = NFSCL_RENEW(lease);
9251 		NFSUNLOCKCLSTATE();
9252 		mtx_lock(&nmp->nm_mtx);
9253 		nfscl_loadfsinfo(nmp, &fs);
9254 		nfscl_loadsbinfo(nmp, &sb, &mp->mnt_stat);
9255 		mp->mnt_stat.f_iosize = newnfs_iosize(nmp);
9256 		mtx_unlock(&nmp->nm_mtx);
9257 	}
9258 }
9259