xref: /freebsd/sys/fs/nfsclient/nfs_clrpcops.c (revision 2a243b9539a45b392a515569cab2091844cf2bdf)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 /*
38  * Rpc op calls, generally called from the vnode op calls or through the
39  * buffer cache, for NFS v2, 3 and 4.
40  * These do not normally make any changes to vnode arguments or use
41  * structures that might change between the VFS variants. The returned
42  * arguments are all at the end, after the NFSPROC_T *p one.
43  */
44 
45 #ifndef APPLEKEXT
46 #include "opt_inet6.h"
47 
48 #include <fs/nfs/nfsport.h>
49 #include <sys/sysctl.h>
50 
51 SYSCTL_DECL(_vfs_nfs);
52 
53 static int	nfsignore_eexist = 0;
54 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
55     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
56 
57 /*
58  * Global variables
59  */
60 extern int nfs_numnfscbd;
61 extern struct timeval nfsboottime;
62 extern u_int32_t newnfs_false, newnfs_true;
63 extern nfstype nfsv34_type[9];
64 extern int nfsrv_useacl;
65 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
66 extern int nfscl_debuglevel;
67 NFSCLSTATEMUTEX;
68 int nfstest_outofseq = 0;
69 int nfscl_assumeposixlocks = 1;
70 int nfscl_enablecallb = 0;
71 short nfsv4_cbport = NFSV4_CBPORT;
72 int nfstest_openallsetattr = 0;
73 #endif	/* !APPLEKEXT */
74 
75 #define	DIRHDSIZ	offsetof(struct dirent, d_name)
76 
77 /*
78  * nfscl_getsameserver() can return one of three values:
79  * NFSDSP_USETHISSESSION - Use this session for the DS.
80  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
81  *     session.
82  * NFSDSP_NOTFOUND - No matching server was found.
83  */
84 enum nfsclds_state {
85 	NFSDSP_USETHISSESSION = 0,
86 	NFSDSP_SEQTHISSESSION = 1,
87 	NFSDSP_NOTFOUND = 2,
88 };
89 
90 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
91     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
92 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
93     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
94 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
95     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
96     void *);
97 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
98     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
99     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
100 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
101     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
102     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
103     int *, void *, int *);
104 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
105     struct nfscllockowner *, u_int64_t, u_int64_t,
106     u_int32_t, struct ucred *, NFSPROC_T *, int);
107 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
108     struct acl *, nfsv4stateid_t *, void *);
109 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
110     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
111     struct ucred *, NFSPROC_T *);
112 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
113     struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *);
114 static void nfscl_initsessionslots(struct nfsclsession *);
115 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
116     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
117     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
118     NFSPROC_T *);
119 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
120     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
121     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
122     struct ucred *, NFSPROC_T *);
123 static struct mbuf *nfsm_copym(struct mbuf *, int, int);
124 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
125     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
126     struct ucred *, NFSPROC_T *);
127 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
128     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
129     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
130 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
131     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
132     struct ucred *, NFSPROC_T *);
133 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
134     struct nfsclds *, struct nfsclds **);
135 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
136     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
137 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
138     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
139 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
140     NFSPROC_T *);
141 static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *,
142     int *, struct nfsclflayouthead *);
143 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
144     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
145     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
146 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
147     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
148     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
149     struct nfsfh **, int *, int *, void *, int *);
150 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
151     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
152     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
153     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
154 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
155     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
156     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
157     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
158     int, int, int, int *, struct nfsclflayouthead *, int *);
159 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
160     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
161     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
162 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
163     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
164     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
165 
166 /*
167  * nfs null call from vfs.
168  */
169 APPLESTATIC int
170 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
171 {
172 	int error;
173 	struct nfsrv_descript nfsd, *nd = &nfsd;
174 
175 	NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
176 	error = nfscl_request(nd, vp, p, cred, NULL);
177 	if (nd->nd_repstat && !error)
178 		error = nd->nd_repstat;
179 	mbuf_freem(nd->nd_mrep);
180 	return (error);
181 }
182 
183 /*
184  * nfs access rpc op.
185  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
186  * modes are changed on the server, accesses might still fail later.
187  */
188 APPLESTATIC int
189 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
190     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
191 {
192 	int error;
193 	u_int32_t mode, rmode;
194 
195 	if (acmode & VREAD)
196 		mode = NFSACCESS_READ;
197 	else
198 		mode = 0;
199 	if (vnode_vtype(vp) == VDIR) {
200 		if (acmode & VWRITE)
201 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
202 				 NFSACCESS_DELETE);
203 		if (acmode & VEXEC)
204 			mode |= NFSACCESS_LOOKUP;
205 	} else {
206 		if (acmode & VWRITE)
207 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
208 		if (acmode & VEXEC)
209 			mode |= NFSACCESS_EXECUTE;
210 	}
211 
212 	/*
213 	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
214 	 */
215 	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
216 	    NULL);
217 
218 	/*
219 	 * The NFS V3 spec does not clarify whether or not
220 	 * the returned access bits can be a superset of
221 	 * the ones requested, so...
222 	 */
223 	if (!error && (rmode & mode) != mode)
224 		error = EACCES;
225 	return (error);
226 }
227 
228 /*
229  * The actual rpc, separated out for Darwin.
230  */
231 APPLESTATIC int
232 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
233     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
234     void *stuff)
235 {
236 	u_int32_t *tl;
237 	u_int32_t supported, rmode;
238 	int error;
239 	struct nfsrv_descript nfsd, *nd = &nfsd;
240 	nfsattrbit_t attrbits;
241 
242 	*attrflagp = 0;
243 	supported = mode;
244 	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
245 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
246 	*tl = txdr_unsigned(mode);
247 	if (nd->nd_flag & ND_NFSV4) {
248 		/*
249 		 * And do a Getattr op.
250 		 */
251 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
252 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
253 		NFSGETATTR_ATTRBIT(&attrbits);
254 		(void) nfsrv_putattrbit(nd, &attrbits);
255 	}
256 	error = nfscl_request(nd, vp, p, cred, stuff);
257 	if (error)
258 		return (error);
259 	if (nd->nd_flag & ND_NFSV3) {
260 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
261 		if (error)
262 			goto nfsmout;
263 	}
264 	if (!nd->nd_repstat) {
265 		if (nd->nd_flag & ND_NFSV4) {
266 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
267 			supported = fxdr_unsigned(u_int32_t, *tl++);
268 		} else {
269 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
270 		}
271 		rmode = fxdr_unsigned(u_int32_t, *tl);
272 		if (nd->nd_flag & ND_NFSV4)
273 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
274 
275 		/*
276 		 * It's not obvious what should be done about
277 		 * unsupported access modes. For now, be paranoid
278 		 * and clear the unsupported ones.
279 		 */
280 		rmode &= supported;
281 		*rmodep = rmode;
282 	} else
283 		error = nd->nd_repstat;
284 nfsmout:
285 	mbuf_freem(nd->nd_mrep);
286 	return (error);
287 }
288 
289 /*
290  * nfs open rpc
291  */
292 APPLESTATIC int
293 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
294 {
295 	struct nfsclopen *op;
296 	struct nfscldeleg *dp;
297 	struct nfsfh *nfhp;
298 	struct nfsnode *np = VTONFS(vp);
299 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
300 	u_int32_t mode, clidrev;
301 	int ret, newone, error, expireret = 0, retrycnt;
302 
303 	/*
304 	 * For NFSv4, Open Ops are only done on Regular Files.
305 	 */
306 	if (vnode_vtype(vp) != VREG)
307 		return (0);
308 	mode = 0;
309 	if (amode & FREAD)
310 		mode |= NFSV4OPEN_ACCESSREAD;
311 	if (amode & FWRITE)
312 		mode |= NFSV4OPEN_ACCESSWRITE;
313 	nfhp = np->n_fhp;
314 
315 	retrycnt = 0;
316 #ifdef notdef
317 { char name[100]; int namel;
318 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
319 bcopy(NFS4NODENAME(np->n_v4), name, namel);
320 name[namel] = '\0';
321 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
322 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
323 else printf(" fhl=0\n");
324 }
325 #endif
326 	do {
327 	    dp = NULL;
328 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
329 		cred, p, NULL, &op, &newone, &ret, 1);
330 	    if (error) {
331 		return (error);
332 	    }
333 	    if (nmp->nm_clp != NULL)
334 		clidrev = nmp->nm_clp->nfsc_clientidrev;
335 	    else
336 		clidrev = 0;
337 	    if (ret == NFSCLOPEN_DOOPEN) {
338 		if (np->n_v4 != NULL) {
339 			/*
340 			 * For the first attempt, try and get a layout, if
341 			 * pNFS is enabled for the mount.
342 			 */
343 			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
344 			    nfs_numnfscbd == 0 ||
345 			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
346 				error = nfsrpc_openrpc(nmp, vp,
347 				    np->n_v4->n4_data,
348 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
349 				    np->n_fhp->nfh_len, mode, op,
350 				    NFS4NODENAME(np->n_v4),
351 				    np->n_v4->n4_namelen,
352 				    &dp, 0, 0x0, cred, p, 0, 0);
353 			else
354 				error = nfsrpc_getopenlayout(nmp, vp,
355 				    np->n_v4->n4_data,
356 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
357 				    np->n_fhp->nfh_len, mode, op,
358 				    NFS4NODENAME(np->n_v4),
359 				    np->n_v4->n4_namelen, &dp, cred, p);
360 			if (dp != NULL) {
361 #ifdef APPLE
362 				OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
363 #else
364 				NFSLOCKNODE(np);
365 				np->n_flag &= ~NDELEGMOD;
366 				/*
367 				 * Invalidate the attribute cache, so that
368 				 * attributes that pre-date the issue of a
369 				 * delegation are not cached, since the
370 				 * cached attributes will remain valid while
371 				 * the delegation is held.
372 				 */
373 				NFSINVALATTRCACHE(np);
374 				NFSUNLOCKNODE(np);
375 #endif
376 				(void) nfscl_deleg(nmp->nm_mountp,
377 				    op->nfso_own->nfsow_clp,
378 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
379 			}
380 		} else {
381 			error = EIO;
382 		}
383 		newnfs_copyincred(cred, &op->nfso_cred);
384 	    } else if (ret == NFSCLOPEN_SETCRED)
385 		/*
386 		 * This is a new local open on a delegation. It needs
387 		 * to have credentials so that an open can be done
388 		 * against the server during recovery.
389 		 */
390 		newnfs_copyincred(cred, &op->nfso_cred);
391 
392 	    /*
393 	     * nfso_opencnt is the count of how many VOP_OPEN()s have
394 	     * been done on this Open successfully and a VOP_CLOSE()
395 	     * is expected for each of these.
396 	     * If error is non-zero, don't increment it, since the Open
397 	     * hasn't succeeded yet.
398 	     */
399 	    if (!error)
400 		op->nfso_opencnt++;
401 	    nfscl_openrelease(nmp, op, error, newone);
402 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
403 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
404 		error == NFSERR_BADSESSION) {
405 		(void) nfs_catnap(PZERO, error, "nfs_open");
406 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
407 		&& clidrev != 0) {
408 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
409 		retrycnt++;
410 	    }
411 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
412 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
413 	    error == NFSERR_BADSESSION ||
414 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
415 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
416 	if (error && retrycnt >= 4)
417 		error = EIO;
418 	return (error);
419 }
420 
421 /*
422  * the actual open rpc
423  */
424 APPLESTATIC int
425 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
426     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
427     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
428     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
429     int syscred, int recursed)
430 {
431 	u_int32_t *tl;
432 	struct nfsrv_descript nfsd, *nd = &nfsd;
433 	struct nfscldeleg *dp, *ndp = NULL;
434 	struct nfsvattr nfsva;
435 	u_int32_t rflags, deleg;
436 	nfsattrbit_t attrbits;
437 	int error, ret, acesize, limitby;
438 	struct nfsclsession *tsep;
439 
440 	dp = *dpp;
441 	*dpp = NULL;
442 	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
443 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
444 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
445 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
446 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
447 	tsep = nfsmnt_mdssession(nmp);
448 	*tl++ = tsep->nfsess_clientid.lval[0];
449 	*tl = tsep->nfsess_clientid.lval[1];
450 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
451 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
452 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
453 	if (reclaim) {
454 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
455 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
456 		*tl = txdr_unsigned(delegtype);
457 	} else {
458 		if (dp != NULL) {
459 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
460 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
461 			if (NFSHASNFSV4N(nmp))
462 				*tl++ = 0;
463 			else
464 				*tl++ = dp->nfsdl_stateid.seqid;
465 			*tl++ = dp->nfsdl_stateid.other[0];
466 			*tl++ = dp->nfsdl_stateid.other[1];
467 			*tl = dp->nfsdl_stateid.other[2];
468 		} else {
469 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
470 		}
471 		(void) nfsm_strtom(nd, name, namelen);
472 	}
473 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
474 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
475 	NFSZERO_ATTRBIT(&attrbits);
476 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
477 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
478 	(void) nfsrv_putattrbit(nd, &attrbits);
479 	if (syscred)
480 		nd->nd_flag |= ND_USEGSSNAME;
481 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
482 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
483 	if (error)
484 		return (error);
485 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
486 	if (!nd->nd_repstat) {
487 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
488 		    6 * NFSX_UNSIGNED);
489 		op->nfso_stateid.seqid = *tl++;
490 		op->nfso_stateid.other[0] = *tl++;
491 		op->nfso_stateid.other[1] = *tl++;
492 		op->nfso_stateid.other[2] = *tl;
493 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
494 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
495 		if (error)
496 			goto nfsmout;
497 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
498 		deleg = fxdr_unsigned(u_int32_t, *tl);
499 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
500 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
501 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
502 			      NFSCLFLAGS_FIRSTDELEG))
503 				op->nfso_own->nfsow_clp->nfsc_flags |=
504 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
505 			MALLOC(ndp, struct nfscldeleg *,
506 			    sizeof (struct nfscldeleg) + newfhlen,
507 			    M_NFSCLDELEG, M_WAITOK);
508 			LIST_INIT(&ndp->nfsdl_owner);
509 			LIST_INIT(&ndp->nfsdl_lock);
510 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
511 			ndp->nfsdl_fhlen = newfhlen;
512 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
513 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
514 			nfscl_lockinit(&ndp->nfsdl_rwlock);
515 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
516 			    NFSX_UNSIGNED);
517 			ndp->nfsdl_stateid.seqid = *tl++;
518 			ndp->nfsdl_stateid.other[0] = *tl++;
519 			ndp->nfsdl_stateid.other[1] = *tl++;
520 			ndp->nfsdl_stateid.other[2] = *tl++;
521 			ret = fxdr_unsigned(int, *tl);
522 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
523 				ndp->nfsdl_flags = NFSCLDL_WRITE;
524 				/*
525 				 * Indicates how much the file can grow.
526 				 */
527 				NFSM_DISSECT(tl, u_int32_t *,
528 				    3 * NFSX_UNSIGNED);
529 				limitby = fxdr_unsigned(int, *tl++);
530 				switch (limitby) {
531 				case NFSV4OPEN_LIMITSIZE:
532 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
533 					break;
534 				case NFSV4OPEN_LIMITBLOCKS:
535 					ndp->nfsdl_sizelimit =
536 					    fxdr_unsigned(u_int64_t, *tl++);
537 					ndp->nfsdl_sizelimit *=
538 					    fxdr_unsigned(u_int64_t, *tl);
539 					break;
540 				default:
541 					error = NFSERR_BADXDR;
542 					goto nfsmout;
543 				}
544 			} else {
545 				ndp->nfsdl_flags = NFSCLDL_READ;
546 			}
547 			if (ret)
548 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
549 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
550 			    &acesize, p);
551 			if (error)
552 				goto nfsmout;
553 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
554 			error = NFSERR_BADXDR;
555 			goto nfsmout;
556 		}
557 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
558 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
559 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
560 		    NULL, NULL, NULL, p, cred);
561 		if (error)
562 			goto nfsmout;
563 		if (ndp != NULL) {
564 			ndp->nfsdl_change = nfsva.na_filerev;
565 			ndp->nfsdl_modtime = nfsva.na_mtime;
566 			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
567 		}
568 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
569 		    do {
570 			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
571 			    cred, p);
572 			if (ret == NFSERR_DELAY)
573 			    (void) nfs_catnap(PZERO, ret, "nfs_open");
574 		    } while (ret == NFSERR_DELAY);
575 		    error = ret;
576 		}
577 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
578 		    nfscl_assumeposixlocks)
579 		    op->nfso_posixlock = 1;
580 		else
581 		    op->nfso_posixlock = 0;
582 
583 		/*
584 		 * If the server is handing out delegations, but we didn't
585 		 * get one because an OpenConfirm was required, try the
586 		 * Open again, to get a delegation. This is a harmless no-op,
587 		 * from a server's point of view.
588 		 */
589 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
590 		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
591 		    && !error && dp == NULL && ndp == NULL && !recursed) {
592 		    do {
593 			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
594 			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
595 			    cred, p, syscred, 1);
596 			if (ret == NFSERR_DELAY)
597 			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
598 		    } while (ret == NFSERR_DELAY);
599 		    if (ret) {
600 			if (ndp != NULL) {
601 				FREE((caddr_t)ndp, M_NFSCLDELEG);
602 				ndp = NULL;
603 			}
604 			if (ret == NFSERR_STALECLIENTID ||
605 			    ret == NFSERR_STALEDONTRECOVER ||
606 			    ret == NFSERR_BADSESSION)
607 				error = ret;
608 		    }
609 		}
610 	}
611 	if (nd->nd_repstat != 0 && error == 0)
612 		error = nd->nd_repstat;
613 	if (error == NFSERR_STALECLIENTID)
614 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
615 nfsmout:
616 	if (!error)
617 		*dpp = ndp;
618 	else if (ndp != NULL)
619 		FREE((caddr_t)ndp, M_NFSCLDELEG);
620 	mbuf_freem(nd->nd_mrep);
621 	return (error);
622 }
623 
624 /*
625  * open downgrade rpc
626  */
627 APPLESTATIC int
628 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
629     struct ucred *cred, NFSPROC_T *p)
630 {
631 	u_int32_t *tl;
632 	struct nfsrv_descript nfsd, *nd = &nfsd;
633 	int error;
634 
635 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
636 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
637 	if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
638 		*tl++ = 0;
639 	else
640 		*tl++ = op->nfso_stateid.seqid;
641 	*tl++ = op->nfso_stateid.other[0];
642 	*tl++ = op->nfso_stateid.other[1];
643 	*tl++ = op->nfso_stateid.other[2];
644 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
645 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
646 	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
647 	error = nfscl_request(nd, vp, p, cred, NULL);
648 	if (error)
649 		return (error);
650 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
651 	if (!nd->nd_repstat) {
652 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
653 		op->nfso_stateid.seqid = *tl++;
654 		op->nfso_stateid.other[0] = *tl++;
655 		op->nfso_stateid.other[1] = *tl++;
656 		op->nfso_stateid.other[2] = *tl;
657 	}
658 	if (nd->nd_repstat && error == 0)
659 		error = nd->nd_repstat;
660 	if (error == NFSERR_STALESTATEID)
661 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
662 nfsmout:
663 	mbuf_freem(nd->nd_mrep);
664 	return (error);
665 }
666 
667 /*
668  * V4 Close operation.
669  */
670 APPLESTATIC int
671 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
672 {
673 	struct nfsclclient *clp;
674 	int error;
675 
676 	if (vnode_vtype(vp) != VREG)
677 		return (0);
678 	if (doclose)
679 		error = nfscl_doclose(vp, &clp, p);
680 	else
681 		error = nfscl_getclose(vp, &clp);
682 	if (error)
683 		return (error);
684 
685 	nfscl_clientrelease(clp);
686 	return (0);
687 }
688 
689 /*
690  * Close the open.
691  */
692 APPLESTATIC void
693 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
694 {
695 	struct nfsrv_descript nfsd, *nd = &nfsd;
696 	struct nfscllockowner *lp, *nlp;
697 	struct nfscllock *lop, *nlop;
698 	struct ucred *tcred;
699 	u_int64_t off = 0, len = 0;
700 	u_int32_t type = NFSV4LOCKT_READ;
701 	int error, do_unlock, trycnt;
702 
703 	tcred = newnfs_getcred();
704 	newnfs_copycred(&op->nfso_cred, tcred);
705 	/*
706 	 * (Theoretically this could be done in the same
707 	 *  compound as the close, but having multiple
708 	 *  sequenced Ops in the same compound might be
709 	 *  too scary for some servers.)
710 	 */
711 	if (op->nfso_posixlock) {
712 		off = 0;
713 		len = NFS64BITSSET;
714 		type = NFSV4LOCKT_READ;
715 	}
716 
717 	/*
718 	 * Since this function is only called from VOP_INACTIVE(), no
719 	 * other thread will be manipulating this Open. As such, the
720 	 * lock lists are not being changed by other threads, so it should
721 	 * be safe to do this without locking.
722 	 */
723 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
724 		do_unlock = 1;
725 		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
726 			if (op->nfso_posixlock == 0) {
727 				off = lop->nfslo_first;
728 				len = lop->nfslo_end - lop->nfslo_first;
729 				if (lop->nfslo_type == F_WRLCK)
730 					type = NFSV4LOCKT_WRITE;
731 				else
732 					type = NFSV4LOCKT_READ;
733 			}
734 			if (do_unlock) {
735 				trycnt = 0;
736 				do {
737 					error = nfsrpc_locku(nd, nmp, lp, off,
738 					    len, type, tcred, p, 0);
739 					if ((nd->nd_repstat == NFSERR_GRACE ||
740 					    nd->nd_repstat == NFSERR_DELAY) &&
741 					    error == 0)
742 						(void) nfs_catnap(PZERO,
743 						    (int)nd->nd_repstat,
744 						    "nfs_close");
745 				} while ((nd->nd_repstat == NFSERR_GRACE ||
746 				    nd->nd_repstat == NFSERR_DELAY) &&
747 				    error == 0 && trycnt++ < 5);
748 				if (op->nfso_posixlock)
749 					do_unlock = 0;
750 			}
751 			nfscl_freelock(lop, 0);
752 		}
753 		/*
754 		 * Do a ReleaseLockOwner.
755 		 * The lock owner name nfsl_owner may be used by other opens for
756 		 * other files but the lock_owner4 name that nfsrpc_rellockown()
757 		 * puts on the wire has the file handle for this file appended
758 		 * to it, so it can be done now.
759 		 */
760 		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
761 		    lp->nfsl_open->nfso_fhlen, tcred, p);
762 	}
763 
764 	/*
765 	 * There could be other Opens for different files on the same
766 	 * OpenOwner, so locking is required.
767 	 */
768 	NFSLOCKCLSTATE();
769 	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
770 	NFSUNLOCKCLSTATE();
771 	do {
772 		error = nfscl_tryclose(op, tcred, nmp, p);
773 		if (error == NFSERR_GRACE)
774 			(void) nfs_catnap(PZERO, error, "nfs_close");
775 	} while (error == NFSERR_GRACE);
776 	NFSLOCKCLSTATE();
777 	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
778 
779 	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
780 		nfscl_freelockowner(lp, 0);
781 	nfscl_freeopen(op, 0);
782 	NFSUNLOCKCLSTATE();
783 	NFSFREECRED(tcred);
784 }
785 
786 /*
787  * The actual Close RPC.
788  */
789 APPLESTATIC int
790 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
791     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
792     int syscred)
793 {
794 	u_int32_t *tl;
795 	int error;
796 
797 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
798 	    op->nfso_fhlen, NULL, NULL, 0, 0);
799 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
800 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
801 	if (NFSHASNFSV4N(nmp))
802 		*tl++ = 0;
803 	else
804 		*tl++ = op->nfso_stateid.seqid;
805 	*tl++ = op->nfso_stateid.other[0];
806 	*tl++ = op->nfso_stateid.other[1];
807 	*tl = op->nfso_stateid.other[2];
808 	if (syscred)
809 		nd->nd_flag |= ND_USEGSSNAME;
810 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
811 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
812 	if (error)
813 		return (error);
814 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
815 	if (nd->nd_repstat == 0)
816 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
817 	error = nd->nd_repstat;
818 	if (error == NFSERR_STALESTATEID)
819 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
820 nfsmout:
821 	mbuf_freem(nd->nd_mrep);
822 	return (error);
823 }
824 
825 /*
826  * V4 Open Confirm RPC.
827  */
828 APPLESTATIC int
829 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
830     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
831 {
832 	u_int32_t *tl;
833 	struct nfsrv_descript nfsd, *nd = &nfsd;
834 	struct nfsmount *nmp;
835 	int error;
836 
837 	nmp = VFSTONFS(vnode_mount(vp));
838 	if (NFSHASNFSV4N(nmp))
839 		return (0);		/* No confirmation for NFSv4.1. */
840 	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
841 	    0, 0);
842 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
843 	*tl++ = op->nfso_stateid.seqid;
844 	*tl++ = op->nfso_stateid.other[0];
845 	*tl++ = op->nfso_stateid.other[1];
846 	*tl++ = op->nfso_stateid.other[2];
847 	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
848 	error = nfscl_request(nd, vp, p, cred, NULL);
849 	if (error)
850 		return (error);
851 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
852 	if (!nd->nd_repstat) {
853 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
854 		op->nfso_stateid.seqid = *tl++;
855 		op->nfso_stateid.other[0] = *tl++;
856 		op->nfso_stateid.other[1] = *tl++;
857 		op->nfso_stateid.other[2] = *tl;
858 	}
859 	error = nd->nd_repstat;
860 	if (error == NFSERR_STALESTATEID)
861 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
862 nfsmout:
863 	mbuf_freem(nd->nd_mrep);
864 	return (error);
865 }
866 
867 /*
868  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
869  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
870  */
871 APPLESTATIC int
872 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
873     struct ucred *cred, NFSPROC_T *p)
874 {
875 	u_int32_t *tl;
876 	struct nfsrv_descript nfsd;
877 	struct nfsrv_descript *nd = &nfsd;
878 	nfsattrbit_t attrbits;
879 	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
880 	u_short port;
881 	int error, isinet6 = 0, callblen;
882 	nfsquad_t confirm;
883 	u_int32_t lease;
884 	static u_int32_t rev = 0;
885 	struct nfsclds *dsp;
886 	struct in6_addr a6;
887 	struct nfsclsession *tsep;
888 
889 	if (nfsboottime.tv_sec == 0)
890 		NFSSETBOOTTIME(nfsboottime);
891 	clp->nfsc_rev = rev++;
892 	if (NFSHASNFSV4N(nmp)) {
893 		/*
894 		 * Either there was no previous session or the
895 		 * previous session has failed, so...
896 		 * do an ExchangeID followed by the CreateSession.
897 		 */
898 		error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
899 		    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
900 		NFSCL_DEBUG(1, "aft exch=%d\n", error);
901 		if (error == 0)
902 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
903 			    &nmp->nm_sockreq,
904 			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
905 		if (error == 0) {
906 			NFSLOCKMNT(nmp);
907 			/*
908 			 * The old sessions cannot be safely free'd
909 			 * here, since they may still be used by
910 			 * in-progress RPCs.
911 			 */
912 			tsep = NULL;
913 			if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
914 				tsep = NFSMNT_MDSSESSION(nmp);
915 			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
916 			    nfsclds_list);
917 			/*
918 			 * Wake up RPCs waiting for a slot on the
919 			 * old session. These will then fail with
920 			 * NFSERR_BADSESSION and be retried with the
921 			 * new session by nfsv4_setsequence().
922 			 * Also wakeup() processes waiting for the
923 			 * new session.
924 			 */
925 			if (tsep != NULL)
926 				wakeup(&tsep->nfsess_slots);
927 			wakeup(&nmp->nm_sess);
928 			NFSUNLOCKMNT(nmp);
929 		} else
930 			nfscl_freenfsclds(dsp);
931 		NFSCL_DEBUG(1, "aft createsess=%d\n", error);
932 		if (error == 0 && reclaim == 0) {
933 			error = nfsrpc_reclaimcomplete(nmp, cred, p);
934 			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
935 			if (error == NFSERR_COMPLETEALREADY ||
936 			    error == NFSERR_NOTSUPP)
937 				/* Ignore this error. */
938 				error = 0;
939 		}
940 		return (error);
941 	}
942 
943 	/*
944 	 * Allocate a single session structure for NFSv4.0, because some of
945 	 * the fields are used by NFSv4.0 although it doesn't do a session.
946 	 */
947 	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
948 	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
949 	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
950 	NFSLOCKMNT(nmp);
951 	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
952 	tsep = NFSMNT_MDSSESSION(nmp);
953 	NFSUNLOCKMNT(nmp);
954 
955 	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0);
956 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
957 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
958 	*tl = txdr_unsigned(clp->nfsc_rev);
959 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
960 
961 	/*
962 	 * set up the callback address
963 	 */
964 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
965 	*tl = txdr_unsigned(NFS_CALLBCKPROG);
966 	callblen = strlen(nfsv4_callbackaddr);
967 	if (callblen == 0)
968 		cp = nfscl_getmyip(nmp, &a6, &isinet6);
969 	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
970 	    (callblen > 0 || cp != NULL)) {
971 		port = htons(nfsv4_cbport);
972 		cp2 = (u_int8_t *)&port;
973 #ifdef INET6
974 		if ((callblen > 0 &&
975 		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
976 			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
977 
978 			(void) nfsm_strtom(nd, "tcp6", 4);
979 			if (callblen == 0) {
980 				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
981 				ip6add = ip6buf;
982 			} else {
983 				ip6add = nfsv4_callbackaddr;
984 			}
985 			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
986 			    ip6add, cp2[0], cp2[1]);
987 		} else
988 #endif
989 		{
990 			(void) nfsm_strtom(nd, "tcp", 3);
991 			if (callblen == 0)
992 				snprintf(addr, INET6_ADDRSTRLEN + 9,
993 				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
994 				    cp[2], cp[3], cp2[0], cp2[1]);
995 			else
996 				snprintf(addr, INET6_ADDRSTRLEN + 9,
997 				    "%s.%d.%d", nfsv4_callbackaddr,
998 				    cp2[0], cp2[1]);
999 		}
1000 		(void) nfsm_strtom(nd, addr, strlen(addr));
1001 	} else {
1002 		(void) nfsm_strtom(nd, "tcp", 3);
1003 		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1004 	}
1005 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1006 	*tl = txdr_unsigned(clp->nfsc_cbident);
1007 	nd->nd_flag |= ND_USEGSSNAME;
1008 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1009 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1010 	if (error)
1011 		return (error);
1012 	if (nd->nd_repstat == 0) {
1013 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1014 	    tsep->nfsess_clientid.lval[0] = *tl++;
1015 	    tsep->nfsess_clientid.lval[1] = *tl++;
1016 	    confirm.lval[0] = *tl++;
1017 	    confirm.lval[1] = *tl;
1018 	    mbuf_freem(nd->nd_mrep);
1019 	    nd->nd_mrep = NULL;
1020 
1021 	    /*
1022 	     * and confirm it.
1023 	     */
1024 	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1025 		NULL, 0, 0);
1026 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1027 	    *tl++ = tsep->nfsess_clientid.lval[0];
1028 	    *tl++ = tsep->nfsess_clientid.lval[1];
1029 	    *tl++ = confirm.lval[0];
1030 	    *tl = confirm.lval[1];
1031 	    nd->nd_flag |= ND_USEGSSNAME;
1032 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1033 		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1034 	    if (error)
1035 		return (error);
1036 	    mbuf_freem(nd->nd_mrep);
1037 	    nd->nd_mrep = NULL;
1038 	    if (nd->nd_repstat == 0) {
1039 		nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
1040 		    nmp->nm_fhsize, NULL, NULL, 0, 0);
1041 		NFSZERO_ATTRBIT(&attrbits);
1042 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1043 		(void) nfsrv_putattrbit(nd, &attrbits);
1044 		nd->nd_flag |= ND_USEGSSNAME;
1045 		error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1046 		    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1047 		if (error)
1048 		    return (error);
1049 		if (nd->nd_repstat == 0) {
1050 		    error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
1051 			NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
1052 		    if (error)
1053 			goto nfsmout;
1054 		    clp->nfsc_renew = NFSCL_RENEW(lease);
1055 		    clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1056 		    clp->nfsc_clientidrev++;
1057 		    if (clp->nfsc_clientidrev == 0)
1058 			clp->nfsc_clientidrev++;
1059 		}
1060 	    }
1061 	}
1062 	error = nd->nd_repstat;
1063 nfsmout:
1064 	mbuf_freem(nd->nd_mrep);
1065 	return (error);
1066 }
1067 
1068 /*
1069  * nfs getattr call.
1070  */
1071 APPLESTATIC int
1072 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1073     struct nfsvattr *nap, void *stuff)
1074 {
1075 	struct nfsrv_descript nfsd, *nd = &nfsd;
1076 	int error;
1077 	nfsattrbit_t attrbits;
1078 
1079 	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1080 	if (nd->nd_flag & ND_NFSV4) {
1081 		NFSGETATTR_ATTRBIT(&attrbits);
1082 		(void) nfsrv_putattrbit(nd, &attrbits);
1083 	}
1084 	error = nfscl_request(nd, vp, p, cred, stuff);
1085 	if (error)
1086 		return (error);
1087 	if (!nd->nd_repstat)
1088 		error = nfsm_loadattr(nd, nap);
1089 	else
1090 		error = nd->nd_repstat;
1091 	mbuf_freem(nd->nd_mrep);
1092 	return (error);
1093 }
1094 
1095 /*
1096  * nfs getattr call with non-vnode arguemnts.
1097  */
1098 APPLESTATIC int
1099 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1100     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1101     uint32_t *leasep)
1102 {
1103 	struct nfsrv_descript nfsd, *nd = &nfsd;
1104 	int error, vers = NFS_VER2;
1105 	nfsattrbit_t attrbits;
1106 
1107 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0);
1108 	if (nd->nd_flag & ND_NFSV4) {
1109 		vers = NFS_VER4;
1110 		NFSGETATTR_ATTRBIT(&attrbits);
1111 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1112 		(void) nfsrv_putattrbit(nd, &attrbits);
1113 	} else if (nd->nd_flag & ND_NFSV3) {
1114 		vers = NFS_VER3;
1115 	}
1116 	if (syscred)
1117 		nd->nd_flag |= ND_USEGSSNAME;
1118 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1119 	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1120 	if (error)
1121 		return (error);
1122 	if (nd->nd_repstat == 0) {
1123 		if ((nd->nd_flag & ND_NFSV4) != 0)
1124 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1125 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1126 			    NULL, NULL);
1127 		else
1128 			error = nfsm_loadattr(nd, nap);
1129 	} else
1130 		error = nd->nd_repstat;
1131 	mbuf_freem(nd->nd_mrep);
1132 	return (error);
1133 }
1134 
1135 /*
1136  * Do an nfs setattr operation.
1137  */
1138 APPLESTATIC int
1139 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1140     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1141     void *stuff)
1142 {
1143 	int error, expireret = 0, openerr, retrycnt;
1144 	u_int32_t clidrev = 0, mode;
1145 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1146 	struct nfsfh *nfhp;
1147 	nfsv4stateid_t stateid;
1148 	void *lckp;
1149 
1150 	if (nmp->nm_clp != NULL)
1151 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1152 	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1153 		mode = NFSV4OPEN_ACCESSWRITE;
1154 	else
1155 		mode = NFSV4OPEN_ACCESSREAD;
1156 	retrycnt = 0;
1157 	do {
1158 		lckp = NULL;
1159 		openerr = 1;
1160 		if (NFSHASNFSV4(nmp)) {
1161 			nfhp = VTONFS(vp)->n_fhp;
1162 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1163 			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1164 			if (error && vnode_vtype(vp) == VREG &&
1165 			    (mode == NFSV4OPEN_ACCESSWRITE ||
1166 			     nfstest_openallsetattr)) {
1167 				/*
1168 				 * No Open stateid, so try and open the file
1169 				 * now.
1170 				 */
1171 				if (mode == NFSV4OPEN_ACCESSWRITE)
1172 					openerr = nfsrpc_open(vp, FWRITE, cred,
1173 					    p);
1174 				else
1175 					openerr = nfsrpc_open(vp, FREAD, cred,
1176 					    p);
1177 				if (!openerr)
1178 					(void) nfscl_getstateid(vp,
1179 					    nfhp->nfh_fh, nfhp->nfh_len,
1180 					    mode, 0, cred, p, &stateid, &lckp);
1181 			}
1182 		}
1183 		if (vap != NULL)
1184 			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1185 			    rnap, attrflagp, stuff);
1186 		else
1187 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1188 			    stuff);
1189 		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1190 			NFSLOCKMNT(nmp);
1191 			nmp->nm_state |= NFSSTA_OPENMODE;
1192 			NFSUNLOCKMNT(nmp);
1193 		}
1194 		if (error == NFSERR_STALESTATEID)
1195 			nfscl_initiate_recovery(nmp->nm_clp);
1196 		if (lckp != NULL)
1197 			nfscl_lockderef(lckp);
1198 		if (!openerr)
1199 			(void) nfsrpc_close(vp, 0, p);
1200 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1201 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1202 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1203 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1204 		} else if ((error == NFSERR_EXPIRED ||
1205 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1206 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1207 		}
1208 		retrycnt++;
1209 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1210 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1211 	    error == NFSERR_BADSESSION ||
1212 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1213 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1214 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1215 	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1216 	     retrycnt < 4));
1217 	if (error && retrycnt >= 4)
1218 		error = EIO;
1219 	return (error);
1220 }
1221 
1222 static int
1223 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1224     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1225     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1226 {
1227 	u_int32_t *tl;
1228 	struct nfsrv_descript nfsd, *nd = &nfsd;
1229 	int error;
1230 	nfsattrbit_t attrbits;
1231 
1232 	*attrflagp = 0;
1233 	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1234 	if (nd->nd_flag & ND_NFSV4)
1235 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1236 	vap->va_type = vnode_vtype(vp);
1237 	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1238 	if (nd->nd_flag & ND_NFSV3) {
1239 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1240 		*tl = newnfs_false;
1241 	} else if (nd->nd_flag & ND_NFSV4) {
1242 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1243 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1244 		NFSGETATTR_ATTRBIT(&attrbits);
1245 		(void) nfsrv_putattrbit(nd, &attrbits);
1246 	}
1247 	error = nfscl_request(nd, vp, p, cred, stuff);
1248 	if (error)
1249 		return (error);
1250 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1251 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1252 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1253 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1254 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1255 		error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1256 	mbuf_freem(nd->nd_mrep);
1257 	if (nd->nd_repstat && !error)
1258 		error = nd->nd_repstat;
1259 	return (error);
1260 }
1261 
1262 /*
1263  * nfs lookup rpc
1264  */
1265 APPLESTATIC int
1266 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1267     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1268     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1269 {
1270 	u_int32_t *tl;
1271 	struct nfsrv_descript nfsd, *nd = &nfsd;
1272 	struct nfsmount *nmp;
1273 	struct nfsnode *np;
1274 	struct nfsfh *nfhp;
1275 	nfsattrbit_t attrbits;
1276 	int error = 0, lookupp = 0;
1277 
1278 	*attrflagp = 0;
1279 	*dattrflagp = 0;
1280 	if (vnode_vtype(dvp) != VDIR)
1281 		return (ENOTDIR);
1282 	nmp = VFSTONFS(vnode_mount(dvp));
1283 	if (len > NFS_MAXNAMLEN)
1284 		return (ENAMETOOLONG);
1285 	if (NFSHASNFSV4(nmp) && len == 1 &&
1286 		name[0] == '.') {
1287 		/*
1288 		 * Just return the current dir's fh.
1289 		 */
1290 		np = VTONFS(dvp);
1291 		MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
1292 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1293 		nfhp->nfh_len = np->n_fhp->nfh_len;
1294 		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1295 		*nfhpp = nfhp;
1296 		return (0);
1297 	}
1298 	if (NFSHASNFSV4(nmp) && len == 2 &&
1299 		name[0] == '.' && name[1] == '.') {
1300 		lookupp = 1;
1301 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1302 	} else {
1303 		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1304 		(void) nfsm_strtom(nd, name, len);
1305 	}
1306 	if (nd->nd_flag & ND_NFSV4) {
1307 		NFSGETATTR_ATTRBIT(&attrbits);
1308 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1309 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1310 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1311 		(void) nfsrv_putattrbit(nd, &attrbits);
1312 	}
1313 	error = nfscl_request(nd, dvp, p, cred, stuff);
1314 	if (error)
1315 		return (error);
1316 	if (nd->nd_repstat) {
1317 		/*
1318 		 * When an NFSv4 Lookupp returns ENOENT, it means that
1319 		 * the lookup is at the root of an fs, so return this dir.
1320 		 */
1321 		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1322 		    np = VTONFS(dvp);
1323 		    MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
1324 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1325 		    nfhp->nfh_len = np->n_fhp->nfh_len;
1326 		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1327 		    *nfhpp = nfhp;
1328 		    mbuf_freem(nd->nd_mrep);
1329 		    return (0);
1330 		}
1331 		if (nd->nd_flag & ND_NFSV3)
1332 		    error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1333 		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1334 		    ND_NFSV4) {
1335 			/* Load the directory attributes. */
1336 			error = nfsm_loadattr(nd, dnap);
1337 			if (error == 0)
1338 				*dattrflagp = 1;
1339 		}
1340 		goto nfsmout;
1341 	}
1342 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1343 		/* Load the directory attributes. */
1344 		error = nfsm_loadattr(nd, dnap);
1345 		if (error != 0)
1346 			goto nfsmout;
1347 		*dattrflagp = 1;
1348 		/* Skip over the Lookup and GetFH operation status values. */
1349 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1350 	}
1351 	error = nfsm_getfh(nd, nfhpp);
1352 	if (error)
1353 		goto nfsmout;
1354 
1355 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1356 	if ((nd->nd_flag & ND_NFSV3) && !error)
1357 		error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1358 nfsmout:
1359 	mbuf_freem(nd->nd_mrep);
1360 	if (!error && nd->nd_repstat)
1361 		error = nd->nd_repstat;
1362 	return (error);
1363 }
1364 
1365 /*
1366  * Do a readlink rpc.
1367  */
1368 APPLESTATIC int
1369 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1370     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1371 {
1372 	u_int32_t *tl;
1373 	struct nfsrv_descript nfsd, *nd = &nfsd;
1374 	struct nfsnode *np = VTONFS(vp);
1375 	nfsattrbit_t attrbits;
1376 	int error, len, cangetattr = 1;
1377 
1378 	*attrflagp = 0;
1379 	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1380 	if (nd->nd_flag & ND_NFSV4) {
1381 		/*
1382 		 * And do a Getattr op.
1383 		 */
1384 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1385 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1386 		NFSGETATTR_ATTRBIT(&attrbits);
1387 		(void) nfsrv_putattrbit(nd, &attrbits);
1388 	}
1389 	error = nfscl_request(nd, vp, p, cred, stuff);
1390 	if (error)
1391 		return (error);
1392 	if (nd->nd_flag & ND_NFSV3)
1393 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1394 	if (!nd->nd_repstat && !error) {
1395 		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1396 		/*
1397 		 * This seems weird to me, but must have been added to
1398 		 * FreeBSD for some reason. The only thing I can think of
1399 		 * is that there was/is some server that replies with
1400 		 * more link data than it should?
1401 		 */
1402 		if (len == NFS_MAXPATHLEN) {
1403 			NFSLOCKNODE(np);
1404 			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1405 				len = np->n_size;
1406 				cangetattr = 0;
1407 			}
1408 			NFSUNLOCKNODE(np);
1409 		}
1410 		error = nfsm_mbufuio(nd, uiop, len);
1411 		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1412 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1413 	}
1414 	if (nd->nd_repstat && !error)
1415 		error = nd->nd_repstat;
1416 nfsmout:
1417 	mbuf_freem(nd->nd_mrep);
1418 	return (error);
1419 }
1420 
1421 /*
1422  * Read operation.
1423  */
1424 APPLESTATIC int
1425 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1426     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1427 {
1428 	int error, expireret = 0, retrycnt;
1429 	u_int32_t clidrev = 0;
1430 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1431 	struct nfsnode *np = VTONFS(vp);
1432 	struct ucred *newcred;
1433 	struct nfsfh *nfhp = NULL;
1434 	nfsv4stateid_t stateid;
1435 	void *lckp;
1436 
1437 	if (nmp->nm_clp != NULL)
1438 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1439 	newcred = cred;
1440 	if (NFSHASNFSV4(nmp)) {
1441 		nfhp = np->n_fhp;
1442 		newcred = NFSNEWCRED(cred);
1443 	}
1444 	retrycnt = 0;
1445 	do {
1446 		lckp = NULL;
1447 		if (NFSHASNFSV4(nmp))
1448 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1449 			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1450 			    &lckp);
1451 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1452 		    attrflagp, stuff);
1453 		if (error == NFSERR_OPENMODE) {
1454 			NFSLOCKMNT(nmp);
1455 			nmp->nm_state |= NFSSTA_OPENMODE;
1456 			NFSUNLOCKMNT(nmp);
1457 		}
1458 		if (error == NFSERR_STALESTATEID)
1459 			nfscl_initiate_recovery(nmp->nm_clp);
1460 		if (lckp != NULL)
1461 			nfscl_lockderef(lckp);
1462 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1463 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1464 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1465 			(void) nfs_catnap(PZERO, error, "nfs_read");
1466 		} else if ((error == NFSERR_EXPIRED ||
1467 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1468 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1469 		}
1470 		retrycnt++;
1471 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1472 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1473 	    error == NFSERR_BADSESSION ||
1474 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1475 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1476 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1477 	    (error == NFSERR_OPENMODE && retrycnt < 4));
1478 	if (error && retrycnt >= 4)
1479 		error = EIO;
1480 	if (NFSHASNFSV4(nmp))
1481 		NFSFREECRED(newcred);
1482 	return (error);
1483 }
1484 
1485 /*
1486  * The actual read RPC.
1487  */
1488 static int
1489 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1490     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1491     int *attrflagp, void *stuff)
1492 {
1493 	u_int32_t *tl;
1494 	int error = 0, len, retlen, tsiz, eof = 0;
1495 	struct nfsrv_descript nfsd;
1496 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1497 	struct nfsrv_descript *nd = &nfsd;
1498 	int rsize;
1499 	off_t tmp_off;
1500 
1501 	*attrflagp = 0;
1502 	tsiz = uio_uio_resid(uiop);
1503 	tmp_off = uiop->uio_offset + tsiz;
1504 	NFSLOCKMNT(nmp);
1505 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1506 		NFSUNLOCKMNT(nmp);
1507 		return (EFBIG);
1508 	}
1509 	rsize = nmp->nm_rsize;
1510 	NFSUNLOCKMNT(nmp);
1511 	nd->nd_mrep = NULL;
1512 	while (tsiz > 0) {
1513 		*attrflagp = 0;
1514 		len = (tsiz > rsize) ? rsize : tsiz;
1515 		NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1516 		if (nd->nd_flag & ND_NFSV4)
1517 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1518 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1519 		if (nd->nd_flag & ND_NFSV2) {
1520 			*tl++ = txdr_unsigned(uiop->uio_offset);
1521 			*tl++ = txdr_unsigned(len);
1522 			*tl = 0;
1523 		} else {
1524 			txdr_hyper(uiop->uio_offset, tl);
1525 			*(tl + 2) = txdr_unsigned(len);
1526 		}
1527 		/*
1528 		 * Since I can't do a Getattr for NFSv4 for Write, there
1529 		 * doesn't seem any point in doing one here, either.
1530 		 * (See the comment in nfsrpc_writerpc() for more info.)
1531 		 */
1532 		error = nfscl_request(nd, vp, p, cred, stuff);
1533 		if (error)
1534 			return (error);
1535 		if (nd->nd_flag & ND_NFSV3) {
1536 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1537 		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1538 			error = nfsm_loadattr(nd, nap);
1539 			if (!error)
1540 				*attrflagp = 1;
1541 		}
1542 		if (nd->nd_repstat || error) {
1543 			if (!error)
1544 				error = nd->nd_repstat;
1545 			goto nfsmout;
1546 		}
1547 		if (nd->nd_flag & ND_NFSV3) {
1548 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1549 			eof = fxdr_unsigned(int, *(tl + 1));
1550 		} else if (nd->nd_flag & ND_NFSV4) {
1551 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1552 			eof = fxdr_unsigned(int, *tl);
1553 		}
1554 		NFSM_STRSIZ(retlen, len);
1555 		error = nfsm_mbufuio(nd, uiop, retlen);
1556 		if (error)
1557 			goto nfsmout;
1558 		mbuf_freem(nd->nd_mrep);
1559 		nd->nd_mrep = NULL;
1560 		tsiz -= retlen;
1561 		if (!(nd->nd_flag & ND_NFSV2)) {
1562 			if (eof || retlen == 0)
1563 				tsiz = 0;
1564 		} else if (retlen < len)
1565 			tsiz = 0;
1566 	}
1567 	return (0);
1568 nfsmout:
1569 	if (nd->nd_mrep != NULL)
1570 		mbuf_freem(nd->nd_mrep);
1571 	return (error);
1572 }
1573 
1574 /*
1575  * nfs write operation
1576  * When called_from_strategy != 0, it should return EIO for an error that
1577  * indicates recovery is in progress, so that the buffer will be left
1578  * dirty and be written back to the server later. If it loops around,
1579  * the recovery thread could get stuck waiting for the buffer and recovery
1580  * will then deadlock.
1581  */
1582 APPLESTATIC int
1583 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1584     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1585     void *stuff, int called_from_strategy)
1586 {
1587 	int error, expireret = 0, retrycnt, nostateid;
1588 	u_int32_t clidrev = 0;
1589 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1590 	struct nfsnode *np = VTONFS(vp);
1591 	struct ucred *newcred;
1592 	struct nfsfh *nfhp = NULL;
1593 	nfsv4stateid_t stateid;
1594 	void *lckp;
1595 
1596 	*must_commit = 0;
1597 	if (nmp->nm_clp != NULL)
1598 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1599 	newcred = cred;
1600 	if (NFSHASNFSV4(nmp)) {
1601 		newcred = NFSNEWCRED(cred);
1602 		nfhp = np->n_fhp;
1603 	}
1604 	retrycnt = 0;
1605 	do {
1606 		lckp = NULL;
1607 		nostateid = 0;
1608 		if (NFSHASNFSV4(nmp)) {
1609 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1610 			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1611 			    &lckp);
1612 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1613 			    stateid.other[2] == 0) {
1614 				nostateid = 1;
1615 				NFSCL_DEBUG(1, "stateid0 in write\n");
1616 			}
1617 		}
1618 
1619 		/*
1620 		 * If there is no stateid for NFSv4, it means this is an
1621 		 * extraneous write after close. Basically a poorly
1622 		 * implemented buffer cache. Just don't do the write.
1623 		 */
1624 		if (nostateid)
1625 			error = 0;
1626 		else
1627 			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1628 			    newcred, &stateid, p, nap, attrflagp, stuff);
1629 		if (error == NFSERR_STALESTATEID)
1630 			nfscl_initiate_recovery(nmp->nm_clp);
1631 		if (lckp != NULL)
1632 			nfscl_lockderef(lckp);
1633 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1634 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1635 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1636 			(void) nfs_catnap(PZERO, error, "nfs_write");
1637 		} else if ((error == NFSERR_EXPIRED ||
1638 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1639 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1640 		}
1641 		retrycnt++;
1642 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1643 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1644 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1645 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1646 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1647 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1648 	if (error != 0 && (retrycnt >= 4 ||
1649 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1650 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1651 		error = EIO;
1652 	if (NFSHASNFSV4(nmp))
1653 		NFSFREECRED(newcred);
1654 	return (error);
1655 }
1656 
1657 /*
1658  * The actual write RPC.
1659  */
1660 static int
1661 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1662     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1663     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1664 {
1665 	u_int32_t *tl;
1666 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1667 	struct nfsnode *np = VTONFS(vp);
1668 	int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1669 	int wccflag = 0, wsize;
1670 	int32_t backup;
1671 	struct nfsrv_descript nfsd;
1672 	struct nfsrv_descript *nd = &nfsd;
1673 	nfsattrbit_t attrbits;
1674 	off_t tmp_off;
1675 
1676 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1677 	*attrflagp = 0;
1678 	tsiz = uio_uio_resid(uiop);
1679 	tmp_off = uiop->uio_offset + tsiz;
1680 	NFSLOCKMNT(nmp);
1681 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1682 		NFSUNLOCKMNT(nmp);
1683 		return (EFBIG);
1684 	}
1685 	wsize = nmp->nm_wsize;
1686 	NFSUNLOCKMNT(nmp);
1687 	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
1688 	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
1689 	while (tsiz > 0) {
1690 		*attrflagp = 0;
1691 		len = (tsiz > wsize) ? wsize : tsiz;
1692 		NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1693 		if (nd->nd_flag & ND_NFSV4) {
1694 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1695 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1696 			txdr_hyper(uiop->uio_offset, tl);
1697 			tl += 2;
1698 			*tl++ = txdr_unsigned(*iomode);
1699 			*tl = txdr_unsigned(len);
1700 		} else if (nd->nd_flag & ND_NFSV3) {
1701 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1702 			txdr_hyper(uiop->uio_offset, tl);
1703 			tl += 2;
1704 			*tl++ = txdr_unsigned(len);
1705 			*tl++ = txdr_unsigned(*iomode);
1706 			*tl = txdr_unsigned(len);
1707 		} else {
1708 			u_int32_t x;
1709 
1710 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1711 			/*
1712 			 * Not sure why someone changed this, since the
1713 			 * RFC clearly states that "beginoffset" and
1714 			 * "totalcount" are ignored, but it wouldn't
1715 			 * surprise me if there's a busted server out there.
1716 			 */
1717 			/* Set both "begin" and "current" to non-garbage. */
1718 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1719 			*tl++ = x;      /* "begin offset" */
1720 			*tl++ = x;      /* "current offset" */
1721 			x = txdr_unsigned(len);
1722 			*tl++ = x;      /* total to this offset */
1723 			*tl = x;        /* size of this write */
1724 
1725 		}
1726 		nfsm_uiombuf(nd, uiop, len);
1727 		/*
1728 		 * Although it is tempting to do a normal Getattr Op in the
1729 		 * NFSv4 compound, the result can be a nearly hung client
1730 		 * system if the Getattr asks for Owner and/or OwnerGroup.
1731 		 * It occurs when the client can't map either the Owner or
1732 		 * Owner_group name in the Getattr reply to a uid/gid. When
1733 		 * there is a cache miss, the kernel does an upcall to the
1734 		 * nfsuserd. Then, it can try and read the local /etc/passwd
1735 		 * or /etc/group file. It can then block in getnewbuf(),
1736 		 * waiting for dirty writes to be pushed to the NFS server.
1737 		 * The only reason this doesn't result in a complete
1738 		 * deadlock, is that the upcall times out and allows
1739 		 * the write to complete. However, progress is so slow
1740 		 * that it might just as well be deadlocked.
1741 		 * As such, we get the rest of the attributes, but not
1742 		 * Owner or Owner_group.
1743 		 * nb: nfscl_loadattrcache() needs to be told that these
1744 		 *     partial attributes from a write rpc are being
1745 		 *     passed in, via a argument flag.
1746 		 */
1747 		if (nd->nd_flag & ND_NFSV4) {
1748 			NFSWRITEGETATTR_ATTRBIT(&attrbits);
1749 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1750 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
1751 			(void) nfsrv_putattrbit(nd, &attrbits);
1752 		}
1753 		error = nfscl_request(nd, vp, p, cred, stuff);
1754 		if (error)
1755 			return (error);
1756 		if (nd->nd_repstat) {
1757 			/*
1758 			 * In case the rpc gets retried, roll
1759 			 * the uio fileds changed by nfsm_uiombuf()
1760 			 * back.
1761 			 */
1762 			uiop->uio_offset -= len;
1763 			uio_uio_resid_add(uiop, len);
1764 			uio_iov_base_add(uiop, -len);
1765 			uio_iov_len_add(uiop, len);
1766 		}
1767 		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1768 			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1769 			    &wccflag, stuff);
1770 			if (error)
1771 				goto nfsmout;
1772 		}
1773 		if (!nd->nd_repstat) {
1774 			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1775 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1776 					+ NFSX_VERF);
1777 				rlen = fxdr_unsigned(int, *tl++);
1778 				if (rlen == 0) {
1779 					error = NFSERR_IO;
1780 					goto nfsmout;
1781 				} else if (rlen < len) {
1782 					backup = len - rlen;
1783 					uio_iov_base_add(uiop, -(backup));
1784 					uio_iov_len_add(uiop, backup);
1785 					uiop->uio_offset -= backup;
1786 					uio_uio_resid_add(uiop, backup);
1787 					len = rlen;
1788 				}
1789 				commit = fxdr_unsigned(int, *tl++);
1790 
1791 				/*
1792 				 * Return the lowest commitment level
1793 				 * obtained by any of the RPCs.
1794 				 */
1795 				if (committed == NFSWRITE_FILESYNC)
1796 					committed = commit;
1797 				else if (committed == NFSWRITE_DATASYNC &&
1798 					commit == NFSWRITE_UNSTABLE)
1799 					committed = commit;
1800 				NFSLOCKMNT(nmp);
1801 				if (!NFSHASWRITEVERF(nmp)) {
1802 					NFSBCOPY((caddr_t)tl,
1803 					    (caddr_t)&nmp->nm_verf[0],
1804 					    NFSX_VERF);
1805 					NFSSETWRITEVERF(nmp);
1806 	    			} else if (NFSBCMP(tl, nmp->nm_verf,
1807 				    NFSX_VERF)) {
1808 					*must_commit = 1;
1809 					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1810 				}
1811 				NFSUNLOCKMNT(nmp);
1812 			}
1813 			if (nd->nd_flag & ND_NFSV4)
1814 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1815 			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1816 				error = nfsm_loadattr(nd, nap);
1817 				if (!error)
1818 					*attrflagp = NFS_LATTR_NOSHRINK;
1819 			}
1820 		} else {
1821 			error = nd->nd_repstat;
1822 		}
1823 		if (error)
1824 			goto nfsmout;
1825 		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
1826 		mbuf_freem(nd->nd_mrep);
1827 		nd->nd_mrep = NULL;
1828 		tsiz -= len;
1829 	}
1830 nfsmout:
1831 	if (nd->nd_mrep != NULL)
1832 		mbuf_freem(nd->nd_mrep);
1833 	*iomode = committed;
1834 	if (nd->nd_repstat && !error)
1835 		error = nd->nd_repstat;
1836 	return (error);
1837 }
1838 
1839 /*
1840  * nfs mknod rpc
1841  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1842  * mode set to specify the file type and the size field for rdev.
1843  */
1844 APPLESTATIC int
1845 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1846     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1847     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1848     int *attrflagp, int *dattrflagp, void *dstuff)
1849 {
1850 	u_int32_t *tl;
1851 	int error = 0;
1852 	struct nfsrv_descript nfsd, *nd = &nfsd;
1853 	nfsattrbit_t attrbits;
1854 
1855 	*nfhpp = NULL;
1856 	*attrflagp = 0;
1857 	*dattrflagp = 0;
1858 	if (namelen > NFS_MAXNAMLEN)
1859 		return (ENAMETOOLONG);
1860 	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1861 	if (nd->nd_flag & ND_NFSV4) {
1862 		if (vtyp == VBLK || vtyp == VCHR) {
1863 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1864 			*tl++ = vtonfsv34_type(vtyp);
1865 			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
1866 			*tl = txdr_unsigned(NFSMINOR(rdev));
1867 		} else {
1868 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1869 			*tl = vtonfsv34_type(vtyp);
1870 		}
1871 	}
1872 	(void) nfsm_strtom(nd, name, namelen);
1873 	if (nd->nd_flag & ND_NFSV3) {
1874 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1875 		*tl = vtonfsv34_type(vtyp);
1876 	}
1877 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1878 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
1879 	if ((nd->nd_flag & ND_NFSV3) &&
1880 	    (vtyp == VCHR || vtyp == VBLK)) {
1881 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1882 		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
1883 		*tl = txdr_unsigned(NFSMINOR(rdev));
1884 	}
1885 	if (nd->nd_flag & ND_NFSV4) {
1886 		NFSGETATTR_ATTRBIT(&attrbits);
1887 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1888 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1889 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1890 		(void) nfsrv_putattrbit(nd, &attrbits);
1891 	}
1892 	if (nd->nd_flag & ND_NFSV2)
1893 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
1894 	error = nfscl_request(nd, dvp, p, cred, dstuff);
1895 	if (error)
1896 		return (error);
1897 	if (nd->nd_flag & ND_NFSV4)
1898 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1899 	if (!nd->nd_repstat) {
1900 		if (nd->nd_flag & ND_NFSV4) {
1901 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1902 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1903 			if (error)
1904 				goto nfsmout;
1905 		}
1906 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
1907 		if (error)
1908 			goto nfsmout;
1909 	}
1910 	if (nd->nd_flag & ND_NFSV3)
1911 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1912 	if (!error && nd->nd_repstat)
1913 		error = nd->nd_repstat;
1914 nfsmout:
1915 	mbuf_freem(nd->nd_mrep);
1916 	return (error);
1917 }
1918 
1919 /*
1920  * nfs file create call
1921  * Mostly just call the approriate routine. (I separated out v4, so that
1922  * error recovery wouldn't be as difficult.)
1923  */
1924 APPLESTATIC int
1925 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1926     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1927     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1928     int *attrflagp, int *dattrflagp, void *dstuff)
1929 {
1930 	int error = 0, newone, expireret = 0, retrycnt, unlocked;
1931 	struct nfsclowner *owp;
1932 	struct nfscldeleg *dp;
1933 	struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp));
1934 	u_int32_t clidrev;
1935 
1936 	if (NFSHASNFSV4(nmp)) {
1937 	    retrycnt = 0;
1938 	    do {
1939 		dp = NULL;
1940 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
1941 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
1942 		    NULL, 1);
1943 		if (error)
1944 			return (error);
1945 		if (nmp->nm_clp != NULL)
1946 			clidrev = nmp->nm_clp->nfsc_clientidrev;
1947 		else
1948 			clidrev = 0;
1949 		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
1950 		    nfs_numnfscbd == 0 || retrycnt > 0)
1951 			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
1952 			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1953 			  attrflagp, dattrflagp, dstuff, &unlocked);
1954 		else
1955 			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
1956 			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1957 			  attrflagp, dattrflagp, dstuff, &unlocked);
1958 		/*
1959 		 * There is no need to invalidate cached attributes here,
1960 		 * since new post-delegation issue attributes are always
1961 		 * returned by nfsrpc_createv4() and these will update the
1962 		 * attribute cache.
1963 		 */
1964 		if (dp != NULL)
1965 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
1966 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
1967 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
1968 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
1969 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1970 		    error == NFSERR_BADSESSION) {
1971 			(void) nfs_catnap(PZERO, error, "nfs_open");
1972 		} else if ((error == NFSERR_EXPIRED ||
1973 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1974 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1975 			retrycnt++;
1976 		}
1977 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
1978 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1979 		error == NFSERR_BADSESSION ||
1980 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1981 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
1982 	    if (error && retrycnt >= 4)
1983 		    error = EIO;
1984 	} else {
1985 		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
1986 		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
1987 		    dstuff);
1988 	}
1989 	return (error);
1990 }
1991 
1992 /*
1993  * The create rpc for v2 and 3.
1994  */
1995 static int
1996 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1997     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1998     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1999     int *attrflagp, int *dattrflagp, void *dstuff)
2000 {
2001 	u_int32_t *tl;
2002 	int error = 0;
2003 	struct nfsrv_descript nfsd, *nd = &nfsd;
2004 
2005 	*nfhpp = NULL;
2006 	*attrflagp = 0;
2007 	*dattrflagp = 0;
2008 	if (namelen > NFS_MAXNAMLEN)
2009 		return (ENAMETOOLONG);
2010 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2011 	(void) nfsm_strtom(nd, name, namelen);
2012 	if (nd->nd_flag & ND_NFSV3) {
2013 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2014 		if (fmode & O_EXCL) {
2015 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2016 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2017 			*tl++ = cverf.lval[0];
2018 			*tl = cverf.lval[1];
2019 		} else {
2020 			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2021 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
2022 		}
2023 	} else {
2024 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2025 	}
2026 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2027 	if (error)
2028 		return (error);
2029 	if (nd->nd_repstat == 0) {
2030 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2031 		if (error)
2032 			goto nfsmout;
2033 	}
2034 	if (nd->nd_flag & ND_NFSV3)
2035 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2036 	if (nd->nd_repstat != 0 && error == 0)
2037 		error = nd->nd_repstat;
2038 nfsmout:
2039 	mbuf_freem(nd->nd_mrep);
2040 	return (error);
2041 }
2042 
2043 static int
2044 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2045     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2046     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2047     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2048     int *dattrflagp, void *dstuff, int *unlockedp)
2049 {
2050 	u_int32_t *tl;
2051 	int error = 0, deleg, newone, ret, acesize, limitby;
2052 	struct nfsrv_descript nfsd, *nd = &nfsd;
2053 	struct nfsclopen *op;
2054 	struct nfscldeleg *dp = NULL;
2055 	struct nfsnode *np;
2056 	struct nfsfh *nfhp;
2057 	nfsattrbit_t attrbits;
2058 	nfsv4stateid_t stateid;
2059 	u_int32_t rflags;
2060 	struct nfsmount *nmp;
2061 	struct nfsclsession *tsep;
2062 
2063 	nmp = VFSTONFS(dvp->v_mount);
2064 	np = VTONFS(dvp);
2065 	*unlockedp = 0;
2066 	*nfhpp = NULL;
2067 	*dpp = NULL;
2068 	*attrflagp = 0;
2069 	*dattrflagp = 0;
2070 	if (namelen > NFS_MAXNAMLEN)
2071 		return (ENAMETOOLONG);
2072 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2073 	/*
2074 	 * For V4, this is actually an Open op.
2075 	 */
2076 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2077 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
2078 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2079 	    NFSV4OPEN_ACCESSREAD);
2080 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2081 	tsep = nfsmnt_mdssession(nmp);
2082 	*tl++ = tsep->nfsess_clientid.lval[0];
2083 	*tl = tsep->nfsess_clientid.lval[1];
2084 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2085 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2086 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2087 	if (fmode & O_EXCL) {
2088 		if (NFSHASNFSV4N(nmp)) {
2089 			if (NFSHASSESSPERSIST(nmp)) {
2090 				/* Use GUARDED for persistent sessions. */
2091 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
2092 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2093 			} else {
2094 				/* Otherwise, use EXCLUSIVE4_1. */
2095 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2096 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2097 				*tl++ = cverf.lval[0];
2098 				*tl = cverf.lval[1];
2099 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2100 			}
2101 		} else {
2102 			/* NFSv4.0 */
2103 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2104 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2105 			*tl++ = cverf.lval[0];
2106 			*tl = cverf.lval[1];
2107 		}
2108 	} else {
2109 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2110 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2111 	}
2112 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2113 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2114 	(void) nfsm_strtom(nd, name, namelen);
2115 	/* Get the new file's handle and attributes. */
2116 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2117 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2118 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2119 	NFSGETATTR_ATTRBIT(&attrbits);
2120 	(void) nfsrv_putattrbit(nd, &attrbits);
2121 	/* Get the directory's post-op attributes. */
2122 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2123 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2124 	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2125 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2126 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2127 	(void) nfsrv_putattrbit(nd, &attrbits);
2128 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2129 	if (error)
2130 		return (error);
2131 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2132 	if (nd->nd_repstat == 0) {
2133 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2134 		    6 * NFSX_UNSIGNED);
2135 		stateid.seqid = *tl++;
2136 		stateid.other[0] = *tl++;
2137 		stateid.other[1] = *tl++;
2138 		stateid.other[2] = *tl;
2139 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2140 		(void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2141 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2142 		deleg = fxdr_unsigned(int, *tl);
2143 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2144 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2145 			if (!(owp->nfsow_clp->nfsc_flags &
2146 			      NFSCLFLAGS_FIRSTDELEG))
2147 				owp->nfsow_clp->nfsc_flags |=
2148 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2149 			MALLOC(dp, struct nfscldeleg *,
2150 			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2151 			    M_NFSCLDELEG, M_WAITOK);
2152 			LIST_INIT(&dp->nfsdl_owner);
2153 			LIST_INIT(&dp->nfsdl_lock);
2154 			dp->nfsdl_clp = owp->nfsow_clp;
2155 			newnfs_copyincred(cred, &dp->nfsdl_cred);
2156 			nfscl_lockinit(&dp->nfsdl_rwlock);
2157 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2158 			    NFSX_UNSIGNED);
2159 			dp->nfsdl_stateid.seqid = *tl++;
2160 			dp->nfsdl_stateid.other[0] = *tl++;
2161 			dp->nfsdl_stateid.other[1] = *tl++;
2162 			dp->nfsdl_stateid.other[2] = *tl++;
2163 			ret = fxdr_unsigned(int, *tl);
2164 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2165 				dp->nfsdl_flags = NFSCLDL_WRITE;
2166 				/*
2167 				 * Indicates how much the file can grow.
2168 				 */
2169 				NFSM_DISSECT(tl, u_int32_t *,
2170 				    3 * NFSX_UNSIGNED);
2171 				limitby = fxdr_unsigned(int, *tl++);
2172 				switch (limitby) {
2173 				case NFSV4OPEN_LIMITSIZE:
2174 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2175 					break;
2176 				case NFSV4OPEN_LIMITBLOCKS:
2177 					dp->nfsdl_sizelimit =
2178 					    fxdr_unsigned(u_int64_t, *tl++);
2179 					dp->nfsdl_sizelimit *=
2180 					    fxdr_unsigned(u_int64_t, *tl);
2181 					break;
2182 				default:
2183 					error = NFSERR_BADXDR;
2184 					goto nfsmout;
2185 				}
2186 			} else {
2187 				dp->nfsdl_flags = NFSCLDL_READ;
2188 			}
2189 			if (ret)
2190 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2191 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2192 			    &acesize, p);
2193 			if (error)
2194 				goto nfsmout;
2195 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2196 			error = NFSERR_BADXDR;
2197 			goto nfsmout;
2198 		}
2199 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2200 		if (error)
2201 			goto nfsmout;
2202 		/* Get rid of the PutFH and Getattr status values. */
2203 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2204 		/* Load the directory attributes. */
2205 		error = nfsm_loadattr(nd, dnap);
2206 		if (error)
2207 			goto nfsmout;
2208 		*dattrflagp = 1;
2209 		if (dp != NULL && *attrflagp) {
2210 			dp->nfsdl_change = nnap->na_filerev;
2211 			dp->nfsdl_modtime = nnap->na_mtime;
2212 			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2213 		}
2214 		/*
2215 		 * We can now complete the Open state.
2216 		 */
2217 		nfhp = *nfhpp;
2218 		if (dp != NULL) {
2219 			dp->nfsdl_fhlen = nfhp->nfh_len;
2220 			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2221 		}
2222 		/*
2223 		 * Get an Open structure that will be
2224 		 * attached to the OpenOwner, acquired already.
2225 		 */
2226 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2227 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2228 		    cred, p, NULL, &op, &newone, NULL, 0);
2229 		if (error)
2230 			goto nfsmout;
2231 		op->nfso_stateid = stateid;
2232 		newnfs_copyincred(cred, &op->nfso_cred);
2233 		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2234 		    do {
2235 			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2236 			    nfhp->nfh_len, op, cred, p);
2237 			if (ret == NFSERR_DELAY)
2238 			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2239 		    } while (ret == NFSERR_DELAY);
2240 		    error = ret;
2241 		}
2242 
2243 		/*
2244 		 * If the server is handing out delegations, but we didn't
2245 		 * get one because an OpenConfirm was required, try the
2246 		 * Open again, to get a delegation. This is a harmless no-op,
2247 		 * from a server's point of view.
2248 		 */
2249 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2250 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2251 		    !error && dp == NULL) {
2252 		    do {
2253 			ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
2254 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2255 			    nfhp->nfh_fh, nfhp->nfh_len,
2256 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2257 			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2258 			if (ret == NFSERR_DELAY)
2259 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2260 		    } while (ret == NFSERR_DELAY);
2261 		    if (ret) {
2262 			if (dp != NULL) {
2263 				FREE((caddr_t)dp, M_NFSCLDELEG);
2264 				dp = NULL;
2265 			}
2266 			if (ret == NFSERR_STALECLIENTID ||
2267 			    ret == NFSERR_STALEDONTRECOVER ||
2268 			    ret == NFSERR_BADSESSION)
2269 				error = ret;
2270 		    }
2271 		}
2272 		nfscl_openrelease(nmp, op, error, newone);
2273 		*unlockedp = 1;
2274 	}
2275 	if (nd->nd_repstat != 0 && error == 0)
2276 		error = nd->nd_repstat;
2277 	if (error == NFSERR_STALECLIENTID)
2278 		nfscl_initiate_recovery(owp->nfsow_clp);
2279 nfsmout:
2280 	if (!error)
2281 		*dpp = dp;
2282 	else if (dp != NULL)
2283 		FREE((caddr_t)dp, M_NFSCLDELEG);
2284 	mbuf_freem(nd->nd_mrep);
2285 	return (error);
2286 }
2287 
2288 /*
2289  * Nfs remove rpc
2290  */
2291 APPLESTATIC int
2292 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2293     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2294     void *dstuff)
2295 {
2296 	u_int32_t *tl;
2297 	struct nfsrv_descript nfsd, *nd = &nfsd;
2298 	struct nfsnode *np;
2299 	struct nfsmount *nmp;
2300 	nfsv4stateid_t dstateid;
2301 	int error, ret = 0, i;
2302 
2303 	*dattrflagp = 0;
2304 	if (namelen > NFS_MAXNAMLEN)
2305 		return (ENAMETOOLONG);
2306 	nmp = VFSTONFS(vnode_mount(dvp));
2307 tryagain:
2308 	if (NFSHASNFSV4(nmp) && ret == 0) {
2309 		ret = nfscl_removedeleg(vp, p, &dstateid);
2310 		if (ret == 1) {
2311 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2312 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2313 			    NFSX_UNSIGNED);
2314 			if (NFSHASNFSV4N(nmp))
2315 				*tl++ = 0;
2316 			else
2317 				*tl++ = dstateid.seqid;
2318 			*tl++ = dstateid.other[0];
2319 			*tl++ = dstateid.other[1];
2320 			*tl++ = dstateid.other[2];
2321 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2322 			np = VTONFS(dvp);
2323 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2324 			    np->n_fhp->nfh_len, 0);
2325 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2326 			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2327 		}
2328 	} else {
2329 		ret = 0;
2330 	}
2331 	if (ret == 0)
2332 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2333 	(void) nfsm_strtom(nd, name, namelen);
2334 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2335 	if (error)
2336 		return (error);
2337 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2338 		/* For NFSv4, parse out any Delereturn replies. */
2339 		if (ret > 0 && nd->nd_repstat != 0 &&
2340 		    (nd->nd_flag & ND_NOMOREDATA)) {
2341 			/*
2342 			 * If the Delegreturn failed, try again without
2343 			 * it. The server will Recall, as required.
2344 			 */
2345 			mbuf_freem(nd->nd_mrep);
2346 			goto tryagain;
2347 		}
2348 		for (i = 0; i < (ret * 2); i++) {
2349 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2350 			    ND_NFSV4) {
2351 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2352 			    if (*(tl + 1))
2353 				nd->nd_flag |= ND_NOMOREDATA;
2354 			}
2355 		}
2356 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2357 	}
2358 	if (nd->nd_repstat && !error)
2359 		error = nd->nd_repstat;
2360 nfsmout:
2361 	mbuf_freem(nd->nd_mrep);
2362 	return (error);
2363 }
2364 
2365 /*
2366  * Do an nfs rename rpc.
2367  */
2368 APPLESTATIC int
2369 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2370     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2371     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2372     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2373 {
2374 	u_int32_t *tl;
2375 	struct nfsrv_descript nfsd, *nd = &nfsd;
2376 	struct nfsmount *nmp;
2377 	struct nfsnode *np;
2378 	nfsattrbit_t attrbits;
2379 	nfsv4stateid_t fdstateid, tdstateid;
2380 	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2381 
2382 	*fattrflagp = 0;
2383 	*tattrflagp = 0;
2384 	nmp = VFSTONFS(vnode_mount(fdvp));
2385 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2386 		return (ENAMETOOLONG);
2387 tryagain:
2388 	if (NFSHASNFSV4(nmp) && ret == 0) {
2389 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2390 		    &tdstateid, &gottd, p);
2391 		if (gotfd && gottd) {
2392 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2393 		} else if (gotfd) {
2394 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2395 		} else if (gottd) {
2396 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2397 		}
2398 		if (gotfd) {
2399 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2400 			if (NFSHASNFSV4N(nmp))
2401 				*tl++ = 0;
2402 			else
2403 				*tl++ = fdstateid.seqid;
2404 			*tl++ = fdstateid.other[0];
2405 			*tl++ = fdstateid.other[1];
2406 			*tl = fdstateid.other[2];
2407 			if (gottd) {
2408 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2409 				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2410 				np = VTONFS(tvp);
2411 				(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2412 				    np->n_fhp->nfh_len, 0);
2413 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2414 				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2415 			}
2416 		}
2417 		if (gottd) {
2418 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2419 			if (NFSHASNFSV4N(nmp))
2420 				*tl++ = 0;
2421 			else
2422 				*tl++ = tdstateid.seqid;
2423 			*tl++ = tdstateid.other[0];
2424 			*tl++ = tdstateid.other[1];
2425 			*tl = tdstateid.other[2];
2426 		}
2427 		if (ret > 0) {
2428 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2429 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2430 			np = VTONFS(fdvp);
2431 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2432 			    np->n_fhp->nfh_len, 0);
2433 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2434 			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2435 		}
2436 	} else {
2437 		ret = 0;
2438 	}
2439 	if (ret == 0)
2440 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2441 	if (nd->nd_flag & ND_NFSV4) {
2442 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2443 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2444 		NFSWCCATTR_ATTRBIT(&attrbits);
2445 		(void) nfsrv_putattrbit(nd, &attrbits);
2446 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2447 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2448 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2449 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2450 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2451 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2452 		(void) nfsrv_putattrbit(nd, &attrbits);
2453 		nd->nd_flag |= ND_V4WCCATTR;
2454 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2455 		*tl = txdr_unsigned(NFSV4OP_RENAME);
2456 	}
2457 	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2458 	if (!(nd->nd_flag & ND_NFSV4))
2459 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2460 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2461 	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2462 	error = nfscl_request(nd, fdvp, p, cred, fstuff);
2463 	if (error)
2464 		return (error);
2465 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2466 		/* For NFSv4, parse out any Delereturn replies. */
2467 		if (ret > 0 && nd->nd_repstat != 0 &&
2468 		    (nd->nd_flag & ND_NOMOREDATA)) {
2469 			/*
2470 			 * If the Delegreturn failed, try again without
2471 			 * it. The server will Recall, as required.
2472 			 */
2473 			mbuf_freem(nd->nd_mrep);
2474 			goto tryagain;
2475 		}
2476 		for (i = 0; i < (ret * 2); i++) {
2477 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2478 			    ND_NFSV4) {
2479 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2480 			    if (*(tl + 1)) {
2481 				if (i == 0 && ret > 1) {
2482 				    /*
2483 				     * If the Delegreturn failed, try again
2484 				     * without it. The server will Recall, as
2485 				     * required.
2486 				     * If ret > 1, the first iteration of this
2487 				     * loop is the second DelegReturn result.
2488 				     */
2489 				    mbuf_freem(nd->nd_mrep);
2490 				    goto tryagain;
2491 				} else {
2492 				    nd->nd_flag |= ND_NOMOREDATA;
2493 				}
2494 			    }
2495 			}
2496 		}
2497 		/* Now, the first wcc attribute reply. */
2498 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2499 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2500 			if (*(tl + 1))
2501 				nd->nd_flag |= ND_NOMOREDATA;
2502 		}
2503 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2504 		    fstuff);
2505 		/* and the second wcc attribute reply. */
2506 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2507 		    !error) {
2508 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2509 			if (*(tl + 1))
2510 				nd->nd_flag |= ND_NOMOREDATA;
2511 		}
2512 		if (!error)
2513 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2514 			    NULL, tstuff);
2515 	}
2516 	if (nd->nd_repstat && !error)
2517 		error = nd->nd_repstat;
2518 nfsmout:
2519 	mbuf_freem(nd->nd_mrep);
2520 	return (error);
2521 }
2522 
2523 /*
2524  * nfs hard link create rpc
2525  */
2526 APPLESTATIC int
2527 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2528     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2529     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2530 {
2531 	u_int32_t *tl;
2532 	struct nfsrv_descript nfsd, *nd = &nfsd;
2533 	nfsattrbit_t attrbits;
2534 	int error = 0;
2535 
2536 	*attrflagp = 0;
2537 	*dattrflagp = 0;
2538 	if (namelen > NFS_MAXNAMLEN)
2539 		return (ENAMETOOLONG);
2540 	NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2541 	if (nd->nd_flag & ND_NFSV4) {
2542 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2543 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2544 	}
2545 	(void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2546 		VTONFS(dvp)->n_fhp->nfh_len, 0);
2547 	if (nd->nd_flag & ND_NFSV4) {
2548 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2549 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2550 		NFSWCCATTR_ATTRBIT(&attrbits);
2551 		(void) nfsrv_putattrbit(nd, &attrbits);
2552 		nd->nd_flag |= ND_V4WCCATTR;
2553 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2554 		*tl = txdr_unsigned(NFSV4OP_LINK);
2555 	}
2556 	(void) nfsm_strtom(nd, name, namelen);
2557 	error = nfscl_request(nd, vp, p, cred, dstuff);
2558 	if (error)
2559 		return (error);
2560 	if (nd->nd_flag & ND_NFSV3) {
2561 		error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2562 		if (!error)
2563 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2564 			    NULL, dstuff);
2565 	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2566 		/*
2567 		 * First, parse out the PutFH and Getattr result.
2568 		 */
2569 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2570 		if (!(*(tl + 1)))
2571 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2572 		if (*(tl + 1))
2573 			nd->nd_flag |= ND_NOMOREDATA;
2574 		/*
2575 		 * Get the pre-op attributes.
2576 		 */
2577 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2578 	}
2579 	if (nd->nd_repstat && !error)
2580 		error = nd->nd_repstat;
2581 nfsmout:
2582 	mbuf_freem(nd->nd_mrep);
2583 	return (error);
2584 }
2585 
2586 /*
2587  * nfs symbolic link create rpc
2588  */
2589 APPLESTATIC int
2590 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target,
2591     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2592     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2593     int *dattrflagp, void *dstuff)
2594 {
2595 	u_int32_t *tl;
2596 	struct nfsrv_descript nfsd, *nd = &nfsd;
2597 	struct nfsmount *nmp;
2598 	int slen, error = 0;
2599 
2600 	*nfhpp = NULL;
2601 	*attrflagp = 0;
2602 	*dattrflagp = 0;
2603 	nmp = VFSTONFS(vnode_mount(dvp));
2604 	slen = strlen(target);
2605 	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2606 		return (ENAMETOOLONG);
2607 	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2608 	if (nd->nd_flag & ND_NFSV4) {
2609 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2610 		*tl = txdr_unsigned(NFLNK);
2611 		(void) nfsm_strtom(nd, target, slen);
2612 	}
2613 	(void) nfsm_strtom(nd, name, namelen);
2614 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2615 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2616 	if (!(nd->nd_flag & ND_NFSV4))
2617 		(void) nfsm_strtom(nd, target, slen);
2618 	if (nd->nd_flag & ND_NFSV2)
2619 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2620 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2621 	if (error)
2622 		return (error);
2623 	if (nd->nd_flag & ND_NFSV4)
2624 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2625 	if ((nd->nd_flag & ND_NFSV3) && !error) {
2626 		if (!nd->nd_repstat)
2627 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2628 		if (!error)
2629 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2630 			    NULL, dstuff);
2631 	}
2632 	if (nd->nd_repstat && !error)
2633 		error = nd->nd_repstat;
2634 	mbuf_freem(nd->nd_mrep);
2635 	/*
2636 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2637 	 * Only do this if vfs.nfs.ignore_eexist is set.
2638 	 * Never do this for NFSv4.1 or later minor versions, since sessions
2639 	 * should guarantee "exactly once" RPC semantics.
2640 	 */
2641 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2642 	    nmp->nm_minorvers == 0))
2643 		error = 0;
2644 	return (error);
2645 }
2646 
2647 /*
2648  * nfs make dir rpc
2649  */
2650 APPLESTATIC int
2651 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2652     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2653     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2654     int *dattrflagp, void *dstuff)
2655 {
2656 	u_int32_t *tl;
2657 	struct nfsrv_descript nfsd, *nd = &nfsd;
2658 	nfsattrbit_t attrbits;
2659 	int error = 0;
2660 	struct nfsfh *fhp;
2661 	struct nfsmount *nmp;
2662 
2663 	*nfhpp = NULL;
2664 	*attrflagp = 0;
2665 	*dattrflagp = 0;
2666 	nmp = VFSTONFS(vnode_mount(dvp));
2667 	fhp = VTONFS(dvp)->n_fhp;
2668 	if (namelen > NFS_MAXNAMLEN)
2669 		return (ENAMETOOLONG);
2670 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2671 	if (nd->nd_flag & ND_NFSV4) {
2672 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2673 		*tl = txdr_unsigned(NFDIR);
2674 	}
2675 	(void) nfsm_strtom(nd, name, namelen);
2676 	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2677 	if (nd->nd_flag & ND_NFSV4) {
2678 		NFSGETATTR_ATTRBIT(&attrbits);
2679 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2680 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2681 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2682 		(void) nfsrv_putattrbit(nd, &attrbits);
2683 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2684 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2685 		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2686 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2687 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2688 		(void) nfsrv_putattrbit(nd, &attrbits);
2689 	}
2690 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2691 	if (error)
2692 		return (error);
2693 	if (nd->nd_flag & ND_NFSV4)
2694 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2695 	if (!nd->nd_repstat && !error) {
2696 		if (nd->nd_flag & ND_NFSV4) {
2697 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2698 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2699 		}
2700 		if (!error)
2701 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2702 		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2703 			/* Get rid of the PutFH and Getattr status values. */
2704 			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2705 			/* Load the directory attributes. */
2706 			error = nfsm_loadattr(nd, dnap);
2707 			if (error == 0)
2708 				*dattrflagp = 1;
2709 		}
2710 	}
2711 	if ((nd->nd_flag & ND_NFSV3) && !error)
2712 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2713 	if (nd->nd_repstat && !error)
2714 		error = nd->nd_repstat;
2715 nfsmout:
2716 	mbuf_freem(nd->nd_mrep);
2717 	/*
2718 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2719 	 * Only do this if vfs.nfs.ignore_eexist is set.
2720 	 * Never do this for NFSv4.1 or later minor versions, since sessions
2721 	 * should guarantee "exactly once" RPC semantics.
2722 	 */
2723 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2724 	    nmp->nm_minorvers == 0))
2725 		error = 0;
2726 	return (error);
2727 }
2728 
2729 /*
2730  * nfs remove directory call
2731  */
2732 APPLESTATIC int
2733 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2734     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2735 {
2736 	struct nfsrv_descript nfsd, *nd = &nfsd;
2737 	int error = 0;
2738 
2739 	*dattrflagp = 0;
2740 	if (namelen > NFS_MAXNAMLEN)
2741 		return (ENAMETOOLONG);
2742 	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2743 	(void) nfsm_strtom(nd, name, namelen);
2744 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2745 	if (error)
2746 		return (error);
2747 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2748 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2749 	if (nd->nd_repstat && !error)
2750 		error = nd->nd_repstat;
2751 	mbuf_freem(nd->nd_mrep);
2752 	/*
2753 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2754 	 */
2755 	if (error == ENOENT)
2756 		error = 0;
2757 	return (error);
2758 }
2759 
2760 /*
2761  * Readdir rpc.
2762  * Always returns with either uio_resid unchanged, if you are at the
2763  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2764  * filled in.
2765  * I felt this would allow caching of directory blocks more easily
2766  * than returning a pertially filled block.
2767  * Directory offset cookies:
2768  * Oh my, what to do with them...
2769  * I can think of three ways to deal with them:
2770  * 1 - have the layer above these RPCs maintain a map between logical
2771  *     directory byte offsets and the NFS directory offset cookies
2772  * 2 - pass the opaque directory offset cookies up into userland
2773  *     and let the libc functions deal with them, via the system call
2774  * 3 - return them to userland in the "struct dirent", so future versions
2775  *     of libc can use them and do whatever is necessary to make things work
2776  *     above these rpc calls, in the meantime
2777  * For now, I do #3 by "hiding" the directory offset cookies after the
2778  * d_name field in struct dirent. This is space inside d_reclen that
2779  * will be ignored by anything that doesn't know about them.
2780  * The directory offset cookies are filled in as the last 8 bytes of
2781  * each directory entry, after d_name. Someday, the userland libc
2782  * functions may be able to use these. In the meantime, it satisfies
2783  * OpenBSD's requirements for cookies being returned.
2784  * If expects the directory offset cookie for the read to be in uio_offset
2785  * and returns the one for the next entry after this directory block in
2786  * there, as well.
2787  */
2788 APPLESTATIC int
2789 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2790     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2791     int *eofp, void *stuff)
2792 {
2793 	int len, left;
2794 	struct dirent *dp = NULL;
2795 	u_int32_t *tl;
2796 	nfsquad_t cookie, ncookie;
2797 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
2798 	struct nfsnode *dnp = VTONFS(vp);
2799 	struct nfsvattr nfsva;
2800 	struct nfsrv_descript nfsd, *nd = &nfsd;
2801 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2802 	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2803 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
2804 	char *cp;
2805 	nfsattrbit_t attrbits, dattrbits;
2806 	u_int32_t rderr, *tl2 = NULL;
2807 	size_t tresid;
2808 
2809 	KASSERT(uiop->uio_iovcnt == 1 &&
2810 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
2811 	    ("nfs readdirrpc bad uio"));
2812 
2813 	/*
2814 	 * There is no point in reading a lot more than uio_resid, however
2815 	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2816 	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2817 	 * will never make readsize > nm_readdirsize.
2818 	 */
2819 	readsize = nmp->nm_readdirsize;
2820 	if (readsize > uio_uio_resid(uiop))
2821 		readsize = uio_uio_resid(uiop) + DIRBLKSIZ;
2822 
2823 	*attrflagp = 0;
2824 	if (eofp)
2825 		*eofp = 0;
2826 	tresid = uio_uio_resid(uiop);
2827 	cookie.lval[0] = cookiep->nfsuquad[0];
2828 	cookie.lval[1] = cookiep->nfsuquad[1];
2829 	nd->nd_mrep = NULL;
2830 
2831 	/*
2832 	 * For NFSv4, first create the "." and ".." entries.
2833 	 */
2834 	if (NFSHASNFSV4(nmp)) {
2835 		reqsize = 6 * NFSX_UNSIGNED;
2836 		NFSGETATTR_ATTRBIT(&dattrbits);
2837 		NFSZERO_ATTRBIT(&attrbits);
2838 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2839 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2840 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2841 		    NFSATTRBIT_MOUNTEDONFILEID)) {
2842 			NFSSETBIT_ATTRBIT(&attrbits,
2843 			    NFSATTRBIT_MOUNTEDONFILEID);
2844 			gotmnton = 1;
2845 		} else {
2846 			/*
2847 			 * Must fake it. Use the fileno, except when the
2848 			 * fsid is != to that of the directory. For that
2849 			 * case, generate a fake fileno that is not the same.
2850 			 */
2851 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2852 			gotmnton = 0;
2853 		}
2854 
2855 		/*
2856 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2857 		 */
2858 		if (uiop->uio_offset == 0) {
2859 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2860 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2861 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2862 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2863 			(void) nfsrv_putattrbit(nd, &attrbits);
2864 			error = nfscl_request(nd, vp, p, cred, stuff);
2865 			if (error)
2866 			    return (error);
2867 			dotfileid = 0;	/* Fake out the compiler. */
2868 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2869 			    error = nfsm_loadattr(nd, &nfsva);
2870 			    if (error != 0)
2871 				goto nfsmout;
2872 			    dotfileid = nfsva.na_fileid;
2873 			}
2874 			if (nd->nd_repstat == 0) {
2875 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2876 			    len = fxdr_unsigned(int, *(tl + 4));
2877 			    if (len > 0 && len <= NFSX_V4FHMAX)
2878 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2879 			    else
2880 				error = EPERM;
2881 			    if (!error) {
2882 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
2883 				nfsva.na_mntonfileno = UINT64_MAX;
2884 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
2885 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
2886 				    NULL, NULL, NULL, p, cred);
2887 				if (error) {
2888 				    dotdotfileid = dotfileid;
2889 				} else if (gotmnton) {
2890 				    if (nfsva.na_mntonfileno != UINT64_MAX)
2891 					dotdotfileid = nfsva.na_mntonfileno;
2892 				    else
2893 					dotdotfileid = nfsva.na_fileid;
2894 				} else if (nfsva.na_filesid[0] ==
2895 				    dnp->n_vattr.na_filesid[0] &&
2896 				    nfsva.na_filesid[1] ==
2897 				    dnp->n_vattr.na_filesid[1]) {
2898 				    dotdotfileid = nfsva.na_fileid;
2899 				} else {
2900 				    do {
2901 					fakefileno--;
2902 				    } while (fakefileno ==
2903 					nfsva.na_fileid);
2904 				    dotdotfileid = fakefileno;
2905 				}
2906 			    }
2907 			} else if (nd->nd_repstat == NFSERR_NOENT) {
2908 			    /*
2909 			     * Lookupp returns NFSERR_NOENT when we are
2910 			     * at the root, so just use the current dir.
2911 			     */
2912 			    nd->nd_repstat = 0;
2913 			    dotdotfileid = dotfileid;
2914 			} else {
2915 			    error = nd->nd_repstat;
2916 			}
2917 			mbuf_freem(nd->nd_mrep);
2918 			if (error)
2919 			    return (error);
2920 			nd->nd_mrep = NULL;
2921 			dp = (struct dirent *)uio_iov_base(uiop);
2922 			dp->d_off = 0;
2923 			dp->d_type = DT_DIR;
2924 			dp->d_fileno = dotfileid;
2925 			dp->d_namlen = 1;
2926 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
2927 			dp->d_name[0] = '.';
2928 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
2929 			/*
2930 			 * Just make these offset cookie 0.
2931 			 */
2932 			tl = (u_int32_t *)&dp->d_name[8];
2933 			*tl++ = 0;
2934 			*tl = 0;
2935 			blksiz += dp->d_reclen;
2936 			uio_uio_resid_add(uiop, -(dp->d_reclen));
2937 			uiop->uio_offset += dp->d_reclen;
2938 			uio_iov_base_add(uiop, dp->d_reclen);
2939 			uio_iov_len_add(uiop, -(dp->d_reclen));
2940 			dp = (struct dirent *)uio_iov_base(uiop);
2941 			dp->d_off = 0;
2942 			dp->d_type = DT_DIR;
2943 			dp->d_fileno = dotdotfileid;
2944 			dp->d_namlen = 2;
2945 			*((uint64_t *)dp->d_name) = 0;
2946 			dp->d_name[0] = '.';
2947 			dp->d_name[1] = '.';
2948 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
2949 			/*
2950 			 * Just make these offset cookie 0.
2951 			 */
2952 			tl = (u_int32_t *)&dp->d_name[8];
2953 			*tl++ = 0;
2954 			*tl = 0;
2955 			blksiz += dp->d_reclen;
2956 			uio_uio_resid_add(uiop, -(dp->d_reclen));
2957 			uiop->uio_offset += dp->d_reclen;
2958 			uio_iov_base_add(uiop, dp->d_reclen);
2959 			uio_iov_len_add(uiop, -(dp->d_reclen));
2960 		}
2961 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
2962 	} else {
2963 		reqsize = 5 * NFSX_UNSIGNED;
2964 	}
2965 
2966 
2967 	/*
2968 	 * Loop around doing readdir rpc's of size readsize.
2969 	 * The stopping criteria is EOF or buffer full.
2970 	 */
2971 	while (more_dirs && bigenough) {
2972 		*attrflagp = 0;
2973 		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
2974 		if (nd->nd_flag & ND_NFSV2) {
2975 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2976 			*tl++ = cookie.lval[1];
2977 			*tl = txdr_unsigned(readsize);
2978 		} else {
2979 			NFSM_BUILD(tl, u_int32_t *, reqsize);
2980 			*tl++ = cookie.lval[0];
2981 			*tl++ = cookie.lval[1];
2982 			if (cookie.qval == 0) {
2983 				*tl++ = 0;
2984 				*tl++ = 0;
2985 			} else {
2986 				NFSLOCKNODE(dnp);
2987 				*tl++ = dnp->n_cookieverf.nfsuquad[0];
2988 				*tl++ = dnp->n_cookieverf.nfsuquad[1];
2989 				NFSUNLOCKNODE(dnp);
2990 			}
2991 			if (nd->nd_flag & ND_NFSV4) {
2992 				*tl++ = txdr_unsigned(readsize);
2993 				*tl = txdr_unsigned(readsize);
2994 				(void) nfsrv_putattrbit(nd, &attrbits);
2995 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2996 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
2997 				(void) nfsrv_putattrbit(nd, &dattrbits);
2998 			} else {
2999 				*tl = txdr_unsigned(readsize);
3000 			}
3001 		}
3002 		error = nfscl_request(nd, vp, p, cred, stuff);
3003 		if (error)
3004 			return (error);
3005 		if (!(nd->nd_flag & ND_NFSV2)) {
3006 			if (nd->nd_flag & ND_NFSV3)
3007 				error = nfscl_postop_attr(nd, nap, attrflagp,
3008 				    stuff);
3009 			if (!nd->nd_repstat && !error) {
3010 				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3011 				NFSLOCKNODE(dnp);
3012 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
3013 				dnp->n_cookieverf.nfsuquad[1] = *tl;
3014 				NFSUNLOCKNODE(dnp);
3015 			}
3016 		}
3017 		if (nd->nd_repstat || error) {
3018 			if (!error)
3019 				error = nd->nd_repstat;
3020 			goto nfsmout;
3021 		}
3022 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3023 		more_dirs = fxdr_unsigned(int, *tl);
3024 		if (!more_dirs)
3025 			tryformoredirs = 0;
3026 
3027 		/* loop through the dir entries, doctoring them to 4bsd form */
3028 		while (more_dirs && bigenough) {
3029 			if (nd->nd_flag & ND_NFSV4) {
3030 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3031 				ncookie.lval[0] = *tl++;
3032 				ncookie.lval[1] = *tl++;
3033 				len = fxdr_unsigned(int, *tl);
3034 			} else if (nd->nd_flag & ND_NFSV3) {
3035 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3036 				nfsva.na_fileid = fxdr_hyper(tl);
3037 				tl += 2;
3038 				len = fxdr_unsigned(int, *tl);
3039 			} else {
3040 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3041 				nfsva.na_fileid = fxdr_unsigned(uint64_t,
3042 				    *tl++);
3043 				len = fxdr_unsigned(int, *tl);
3044 			}
3045 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3046 				error = EBADRPC;
3047 				goto nfsmout;
3048 			}
3049 			tlen = roundup2(len, 8);
3050 			if (tlen == len)
3051 				tlen += 8;  /* To ensure null termination. */
3052 			left = DIRBLKSIZ - blksiz;
3053 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3054 				dp->d_reclen += left;
3055 				uio_iov_base_add(uiop, left);
3056 				uio_iov_len_add(uiop, -(left));
3057 				uio_uio_resid_add(uiop, -(left));
3058 				uiop->uio_offset += left;
3059 				blksiz = 0;
3060 			}
3061 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3062 			    uio_uio_resid(uiop))
3063 				bigenough = 0;
3064 			if (bigenough) {
3065 				dp = (struct dirent *)uio_iov_base(uiop);
3066 				dp->d_off = 0;
3067 				dp->d_namlen = len;
3068 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3069 				    NFSX_HYPER;
3070 				dp->d_type = DT_UNKNOWN;
3071 				blksiz += dp->d_reclen;
3072 				if (blksiz == DIRBLKSIZ)
3073 					blksiz = 0;
3074 				uio_uio_resid_add(uiop, -(DIRHDSIZ));
3075 				uiop->uio_offset += DIRHDSIZ;
3076 				uio_iov_base_add(uiop, DIRHDSIZ);
3077 				uio_iov_len_add(uiop, -(DIRHDSIZ));
3078 				error = nfsm_mbufuio(nd, uiop, len);
3079 				if (error)
3080 					goto nfsmout;
3081 				cp = uio_iov_base(uiop);
3082 				tlen -= len;
3083 				*cp = '\0';	/* null terminate */
3084 				cp += tlen;	/* points to cookie storage */
3085 				tl2 = (u_int32_t *)cp;
3086 				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3087 				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3088 				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3089 				uiop->uio_offset += (tlen + NFSX_HYPER);
3090 			} else {
3091 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3092 				if (error)
3093 					goto nfsmout;
3094 			}
3095 			if (nd->nd_flag & ND_NFSV4) {
3096 				rderr = 0;
3097 				nfsva.na_mntonfileno = UINT64_MAX;
3098 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3099 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3100 				    NULL, NULL, &rderr, p, cred);
3101 				if (error)
3102 					goto nfsmout;
3103 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3104 			} else if (nd->nd_flag & ND_NFSV3) {
3105 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3106 				ncookie.lval[0] = *tl++;
3107 				ncookie.lval[1] = *tl++;
3108 			} else {
3109 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3110 				ncookie.lval[0] = 0;
3111 				ncookie.lval[1] = *tl++;
3112 			}
3113 			if (bigenough) {
3114 			    if (nd->nd_flag & ND_NFSV4) {
3115 				if (rderr) {
3116 				    dp->d_fileno = 0;
3117 				} else {
3118 				    if (gotmnton) {
3119 					if (nfsva.na_mntonfileno != UINT64_MAX)
3120 					    dp->d_fileno = nfsva.na_mntonfileno;
3121 					else
3122 					    dp->d_fileno = nfsva.na_fileid;
3123 				    } else if (nfsva.na_filesid[0] ==
3124 					dnp->n_vattr.na_filesid[0] &&
3125 					nfsva.na_filesid[1] ==
3126 					dnp->n_vattr.na_filesid[1]) {
3127 					dp->d_fileno = nfsva.na_fileid;
3128 				    } else {
3129 					do {
3130 					    fakefileno--;
3131 					} while (fakefileno ==
3132 					    nfsva.na_fileid);
3133 					dp->d_fileno = fakefileno;
3134 				    }
3135 				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3136 				}
3137 			    } else {
3138 				dp->d_fileno = nfsva.na_fileid;
3139 			    }
3140 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3141 				ncookie.lval[0];
3142 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3143 				ncookie.lval[1];
3144 			}
3145 			more_dirs = fxdr_unsigned(int, *tl);
3146 		}
3147 		/*
3148 		 * If at end of rpc data, get the eof boolean
3149 		 */
3150 		if (!more_dirs) {
3151 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3152 			eof = fxdr_unsigned(int, *tl);
3153 			if (tryformoredirs)
3154 				more_dirs = !eof;
3155 			if (nd->nd_flag & ND_NFSV4) {
3156 				error = nfscl_postop_attr(nd, nap, attrflagp,
3157 				    stuff);
3158 				if (error)
3159 					goto nfsmout;
3160 			}
3161 		}
3162 		mbuf_freem(nd->nd_mrep);
3163 		nd->nd_mrep = NULL;
3164 	}
3165 	/*
3166 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3167 	 * by increasing d_reclen for the last record.
3168 	 */
3169 	if (blksiz > 0) {
3170 		left = DIRBLKSIZ - blksiz;
3171 		dp->d_reclen += left;
3172 		uio_iov_base_add(uiop, left);
3173 		uio_iov_len_add(uiop, -(left));
3174 		uio_uio_resid_add(uiop, -(left));
3175 		uiop->uio_offset += left;
3176 	}
3177 
3178 	/*
3179 	 * If returning no data, assume end of file.
3180 	 * If not bigenough, return not end of file, since you aren't
3181 	 *    returning all the data
3182 	 * Otherwise, return the eof flag from the server.
3183 	 */
3184 	if (eofp) {
3185 		if (tresid == ((size_t)(uio_uio_resid(uiop))))
3186 			*eofp = 1;
3187 		else if (!bigenough)
3188 			*eofp = 0;
3189 		else
3190 			*eofp = eof;
3191 	}
3192 
3193 	/*
3194 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3195 	 */
3196 	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3197 		dp = (struct dirent *)uio_iov_base(uiop);
3198 		dp->d_type = DT_UNKNOWN;
3199 		dp->d_fileno = 0;
3200 		dp->d_namlen = 0;
3201 		dp->d_name[0] = '\0';
3202 		tl = (u_int32_t *)&dp->d_name[4];
3203 		*tl++ = cookie.lval[0];
3204 		*tl = cookie.lval[1];
3205 		dp->d_reclen = DIRBLKSIZ;
3206 		uio_iov_base_add(uiop, DIRBLKSIZ);
3207 		uio_iov_len_add(uiop, -(DIRBLKSIZ));
3208 		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3209 		uiop->uio_offset += DIRBLKSIZ;
3210 	}
3211 
3212 nfsmout:
3213 	if (nd->nd_mrep != NULL)
3214 		mbuf_freem(nd->nd_mrep);
3215 	return (error);
3216 }
3217 
3218 #ifndef APPLE
3219 /*
3220  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3221  * (Also used for NFS V4 when mount flag set.)
3222  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3223  */
3224 APPLESTATIC int
3225 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3226     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3227     int *eofp, void *stuff)
3228 {
3229 	int len, left;
3230 	struct dirent *dp = NULL;
3231 	u_int32_t *tl;
3232 	vnode_t newvp = NULLVP;
3233 	struct nfsrv_descript nfsd, *nd = &nfsd;
3234 	struct nameidata nami, *ndp = &nami;
3235 	struct componentname *cnp = &ndp->ni_cnd;
3236 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3237 	struct nfsnode *dnp = VTONFS(vp), *np;
3238 	struct nfsvattr nfsva;
3239 	struct nfsfh *nfhp;
3240 	nfsquad_t cookie, ncookie;
3241 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3242 	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3243 	int isdotdot = 0, unlocknewvp = 0;
3244 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3245 	u_int64_t fileno = 0;
3246 	char *cp;
3247 	nfsattrbit_t attrbits, dattrbits;
3248 	size_t tresid;
3249 	u_int32_t *tl2 = NULL, rderr;
3250 	struct timespec dctime;
3251 
3252 	KASSERT(uiop->uio_iovcnt == 1 &&
3253 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
3254 	    ("nfs readdirplusrpc bad uio"));
3255 	timespecclear(&dctime);
3256 	*attrflagp = 0;
3257 	if (eofp != NULL)
3258 		*eofp = 0;
3259 	ndp->ni_dvp = vp;
3260 	nd->nd_mrep = NULL;
3261 	cookie.lval[0] = cookiep->nfsuquad[0];
3262 	cookie.lval[1] = cookiep->nfsuquad[1];
3263 	tresid = uio_uio_resid(uiop);
3264 
3265 	/*
3266 	 * For NFSv4, first create the "." and ".." entries.
3267 	 */
3268 	if (NFSHASNFSV4(nmp)) {
3269 		NFSGETATTR_ATTRBIT(&dattrbits);
3270 		NFSZERO_ATTRBIT(&attrbits);
3271 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3272 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3273 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3274 			NFSSETBIT_ATTRBIT(&attrbits,
3275 			    NFSATTRBIT_MOUNTEDONFILEID);
3276 			gotmnton = 1;
3277 		} else {
3278 			/*
3279 			 * Must fake it. Use the fileno, except when the
3280 			 * fsid is != to that of the directory. For that
3281 			 * case, generate a fake fileno that is not the same.
3282 			 */
3283 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3284 			gotmnton = 0;
3285 		}
3286 
3287 		/*
3288 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3289 		 */
3290 		if (uiop->uio_offset == 0) {
3291 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3292 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3293 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3294 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3295 			(void) nfsrv_putattrbit(nd, &attrbits);
3296 			error = nfscl_request(nd, vp, p, cred, stuff);
3297 			if (error)
3298 			    return (error);
3299 			dotfileid = 0;	/* Fake out the compiler. */
3300 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3301 			    error = nfsm_loadattr(nd, &nfsva);
3302 			    if (error != 0)
3303 				goto nfsmout;
3304 			    dctime = nfsva.na_ctime;
3305 			    dotfileid = nfsva.na_fileid;
3306 			}
3307 			if (nd->nd_repstat == 0) {
3308 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3309 			    len = fxdr_unsigned(int, *(tl + 4));
3310 			    if (len > 0 && len <= NFSX_V4FHMAX)
3311 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3312 			    else
3313 				error = EPERM;
3314 			    if (!error) {
3315 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3316 				nfsva.na_mntonfileno = UINT64_MAX;
3317 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3318 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3319 				    NULL, NULL, NULL, p, cred);
3320 				if (error) {
3321 				    dotdotfileid = dotfileid;
3322 				} else if (gotmnton) {
3323 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3324 					dotdotfileid = nfsva.na_mntonfileno;
3325 				    else
3326 					dotdotfileid = nfsva.na_fileid;
3327 				} else if (nfsva.na_filesid[0] ==
3328 				    dnp->n_vattr.na_filesid[0] &&
3329 				    nfsva.na_filesid[1] ==
3330 				    dnp->n_vattr.na_filesid[1]) {
3331 				    dotdotfileid = nfsva.na_fileid;
3332 				} else {
3333 				    do {
3334 					fakefileno--;
3335 				    } while (fakefileno ==
3336 					nfsva.na_fileid);
3337 				    dotdotfileid = fakefileno;
3338 				}
3339 			    }
3340 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3341 			    /*
3342 			     * Lookupp returns NFSERR_NOENT when we are
3343 			     * at the root, so just use the current dir.
3344 			     */
3345 			    nd->nd_repstat = 0;
3346 			    dotdotfileid = dotfileid;
3347 			} else {
3348 			    error = nd->nd_repstat;
3349 			}
3350 			mbuf_freem(nd->nd_mrep);
3351 			if (error)
3352 			    return (error);
3353 			nd->nd_mrep = NULL;
3354 			dp = (struct dirent *)uio_iov_base(uiop);
3355 			dp->d_off = 0;
3356 			dp->d_type = DT_DIR;
3357 			dp->d_fileno = dotfileid;
3358 			dp->d_namlen = 1;
3359 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3360 			dp->d_name[0] = '.';
3361 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3362 			/*
3363 			 * Just make these offset cookie 0.
3364 			 */
3365 			tl = (u_int32_t *)&dp->d_name[8];
3366 			*tl++ = 0;
3367 			*tl = 0;
3368 			blksiz += dp->d_reclen;
3369 			uio_uio_resid_add(uiop, -(dp->d_reclen));
3370 			uiop->uio_offset += dp->d_reclen;
3371 			uio_iov_base_add(uiop, dp->d_reclen);
3372 			uio_iov_len_add(uiop, -(dp->d_reclen));
3373 			dp = (struct dirent *)uio_iov_base(uiop);
3374 			dp->d_off = 0;
3375 			dp->d_type = DT_DIR;
3376 			dp->d_fileno = dotdotfileid;
3377 			dp->d_namlen = 2;
3378 			*((uint64_t *)dp->d_name) = 0;
3379 			dp->d_name[0] = '.';
3380 			dp->d_name[1] = '.';
3381 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3382 			/*
3383 			 * Just make these offset cookie 0.
3384 			 */
3385 			tl = (u_int32_t *)&dp->d_name[8];
3386 			*tl++ = 0;
3387 			*tl = 0;
3388 			blksiz += dp->d_reclen;
3389 			uio_uio_resid_add(uiop, -(dp->d_reclen));
3390 			uiop->uio_offset += dp->d_reclen;
3391 			uio_iov_base_add(uiop, dp->d_reclen);
3392 			uio_iov_len_add(uiop, -(dp->d_reclen));
3393 		}
3394 		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3395 		if (gotmnton)
3396 			NFSSETBIT_ATTRBIT(&attrbits,
3397 			    NFSATTRBIT_MOUNTEDONFILEID);
3398 	}
3399 
3400 	/*
3401 	 * Loop around doing readdir rpc's of size nm_readdirsize.
3402 	 * The stopping criteria is EOF or buffer full.
3403 	 */
3404 	while (more_dirs && bigenough) {
3405 		*attrflagp = 0;
3406 		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3407  		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3408 		*tl++ = cookie.lval[0];
3409 		*tl++ = cookie.lval[1];
3410 		if (cookie.qval == 0) {
3411 			*tl++ = 0;
3412 			*tl++ = 0;
3413 		} else {
3414 			NFSLOCKNODE(dnp);
3415 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3416 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3417 			NFSUNLOCKNODE(dnp);
3418 		}
3419 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3420 		*tl = txdr_unsigned(nmp->nm_readdirsize);
3421 		if (nd->nd_flag & ND_NFSV4) {
3422 			(void) nfsrv_putattrbit(nd, &attrbits);
3423 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3424 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3425 			(void) nfsrv_putattrbit(nd, &dattrbits);
3426 		}
3427 		error = nfscl_request(nd, vp, p, cred, stuff);
3428 		if (error)
3429 			return (error);
3430 		if (nd->nd_flag & ND_NFSV3)
3431 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3432 		if (nd->nd_repstat || error) {
3433 			if (!error)
3434 				error = nd->nd_repstat;
3435 			goto nfsmout;
3436 		}
3437 		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3438 			dctime = nap->na_ctime;
3439 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3440 		NFSLOCKNODE(dnp);
3441 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3442 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3443 		NFSUNLOCKNODE(dnp);
3444 		more_dirs = fxdr_unsigned(int, *tl);
3445 		if (!more_dirs)
3446 			tryformoredirs = 0;
3447 
3448 		/* loop through the dir entries, doctoring them to 4bsd form */
3449 		while (more_dirs && bigenough) {
3450 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3451 			if (nd->nd_flag & ND_NFSV4) {
3452 				ncookie.lval[0] = *tl++;
3453 				ncookie.lval[1] = *tl++;
3454 			} else {
3455 				fileno = fxdr_hyper(tl);
3456 				tl += 2;
3457 			}
3458 			len = fxdr_unsigned(int, *tl);
3459 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3460 				error = EBADRPC;
3461 				goto nfsmout;
3462 			}
3463 			tlen = roundup2(len, 8);
3464 			if (tlen == len)
3465 				tlen += 8;  /* To ensure null termination. */
3466 			left = DIRBLKSIZ - blksiz;
3467 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3468 				dp->d_reclen += left;
3469 				uio_iov_base_add(uiop, left);
3470 				uio_iov_len_add(uiop, -(left));
3471 				uio_uio_resid_add(uiop, -(left));
3472 				uiop->uio_offset += left;
3473 				blksiz = 0;
3474 			}
3475 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3476 			    uio_uio_resid(uiop))
3477 				bigenough = 0;
3478 			if (bigenough) {
3479 				dp = (struct dirent *)uio_iov_base(uiop);
3480 				dp->d_off = 0;
3481 				dp->d_namlen = len;
3482 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3483 				    NFSX_HYPER;
3484 				dp->d_type = DT_UNKNOWN;
3485 				blksiz += dp->d_reclen;
3486 				if (blksiz == DIRBLKSIZ)
3487 					blksiz = 0;
3488 				uio_uio_resid_add(uiop, -(DIRHDSIZ));
3489 				uiop->uio_offset += DIRHDSIZ;
3490 				uio_iov_base_add(uiop, DIRHDSIZ);
3491 				uio_iov_len_add(uiop, -(DIRHDSIZ));
3492 				cnp->cn_nameptr = uio_iov_base(uiop);
3493 				cnp->cn_namelen = len;
3494 				NFSCNHASHZERO(cnp);
3495 				error = nfsm_mbufuio(nd, uiop, len);
3496 				if (error)
3497 					goto nfsmout;
3498 				cp = uio_iov_base(uiop);
3499 				tlen -= len;
3500 				*cp = '\0';
3501 				cp += tlen;	/* points to cookie storage */
3502 				tl2 = (u_int32_t *)cp;
3503 				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3504 				    cnp->cn_nameptr[1] == '.')
3505 					isdotdot = 1;
3506 				else
3507 					isdotdot = 0;
3508 				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3509 				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3510 				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3511 				uiop->uio_offset += (tlen + NFSX_HYPER);
3512 			} else {
3513 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3514 				if (error)
3515 					goto nfsmout;
3516 			}
3517 			nfhp = NULL;
3518 			if (nd->nd_flag & ND_NFSV3) {
3519 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3520 				ncookie.lval[0] = *tl++;
3521 				ncookie.lval[1] = *tl++;
3522 				attrflag = fxdr_unsigned(int, *tl);
3523 				if (attrflag) {
3524 				  error = nfsm_loadattr(nd, &nfsva);
3525 				  if (error)
3526 					goto nfsmout;
3527 				}
3528 				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3529 				if (*tl) {
3530 					error = nfsm_getfh(nd, &nfhp);
3531 					if (error)
3532 					    goto nfsmout;
3533 				}
3534 				if (!attrflag && nfhp != NULL) {
3535 					FREE((caddr_t)nfhp, M_NFSFH);
3536 					nfhp = NULL;
3537 				}
3538 			} else {
3539 				rderr = 0;
3540 				nfsva.na_mntonfileno = 0xffffffff;
3541 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3542 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3543 				    NULL, NULL, &rderr, p, cred);
3544 				if (error)
3545 					goto nfsmout;
3546 			}
3547 
3548 			if (bigenough) {
3549 			    if (nd->nd_flag & ND_NFSV4) {
3550 				if (rderr) {
3551 				    dp->d_fileno = 0;
3552 				} else if (gotmnton) {
3553 				    if (nfsva.na_mntonfileno != 0xffffffff)
3554 					dp->d_fileno = nfsva.na_mntonfileno;
3555 				    else
3556 					dp->d_fileno = nfsva.na_fileid;
3557 				} else if (nfsva.na_filesid[0] ==
3558 				    dnp->n_vattr.na_filesid[0] &&
3559 				    nfsva.na_filesid[1] ==
3560 				    dnp->n_vattr.na_filesid[1]) {
3561 				    dp->d_fileno = nfsva.na_fileid;
3562 				} else {
3563 				    do {
3564 					fakefileno--;
3565 				    } while (fakefileno ==
3566 					nfsva.na_fileid);
3567 				    dp->d_fileno = fakefileno;
3568 				}
3569 			    } else {
3570 				dp->d_fileno = fileno;
3571 			    }
3572 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3573 				ncookie.lval[0];
3574 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3575 				ncookie.lval[1];
3576 
3577 			    if (nfhp != NULL) {
3578 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3579 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3580 				    VREF(vp);
3581 				    newvp = vp;
3582 				    unlocknewvp = 0;
3583 				    FREE((caddr_t)nfhp, M_NFSFH);
3584 				    np = dnp;
3585 				} else if (isdotdot != 0) {
3586 				    /*
3587 				     * Skip doing a nfscl_nget() call for "..".
3588 				     * There's a race between acquiring the nfs
3589 				     * node here and lookups that look for the
3590 				     * directory being read (in the parent).
3591 				     * It would try to get a lock on ".." here,
3592 				     * owning the lock on the directory being
3593 				     * read. Lookup will hold the lock on ".."
3594 				     * and try to acquire the lock on the
3595 				     * directory being read.
3596 				     * If the directory is unlocked/relocked,
3597 				     * then there is a LOR with the buflock
3598 				     * vp is relocked.
3599 				     */
3600 				    free(nfhp, M_NFSFH);
3601 				} else {
3602 				    error = nfscl_nget(vnode_mount(vp), vp,
3603 				      nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3604 				    if (!error) {
3605 					newvp = NFSTOV(np);
3606 					unlocknewvp = 1;
3607 				    }
3608 				}
3609 				nfhp = NULL;
3610 				if (newvp != NULLVP) {
3611 				    error = nfscl_loadattrcache(&newvp,
3612 					&nfsva, NULL, NULL, 0, 0);
3613 				    if (error) {
3614 					if (unlocknewvp)
3615 					    vput(newvp);
3616 					else
3617 					    vrele(newvp);
3618 					goto nfsmout;
3619 				    }
3620 				    dp->d_type =
3621 					vtonfs_dtype(np->n_vattr.na_type);
3622 				    ndp->ni_vp = newvp;
3623 				    NFSCNHASH(cnp, HASHINIT);
3624 				    if (cnp->cn_namelen <= NCHNAMLEN &&
3625 					(newvp->v_type != VDIR ||
3626 					 dctime.tv_sec != 0)) {
3627 					cache_enter_time(ndp->ni_dvp,
3628 					    ndp->ni_vp, cnp,
3629 					    &nfsva.na_ctime,
3630 					    newvp->v_type != VDIR ? NULL :
3631 					    &dctime);
3632 				    }
3633 				    if (unlocknewvp)
3634 					vput(newvp);
3635 				    else
3636 					vrele(newvp);
3637 				    newvp = NULLVP;
3638 				}
3639 			    }
3640 			} else if (nfhp != NULL) {
3641 			    FREE((caddr_t)nfhp, M_NFSFH);
3642 			}
3643 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3644 			more_dirs = fxdr_unsigned(int, *tl);
3645 		}
3646 		/*
3647 		 * If at end of rpc data, get the eof boolean
3648 		 */
3649 		if (!more_dirs) {
3650 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3651 			eof = fxdr_unsigned(int, *tl);
3652 			if (tryformoredirs)
3653 				more_dirs = !eof;
3654 			if (nd->nd_flag & ND_NFSV4) {
3655 				error = nfscl_postop_attr(nd, nap, attrflagp,
3656 				    stuff);
3657 				if (error)
3658 					goto nfsmout;
3659 			}
3660 		}
3661 		mbuf_freem(nd->nd_mrep);
3662 		nd->nd_mrep = NULL;
3663 	}
3664 	/*
3665 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3666 	 * by increasing d_reclen for the last record.
3667 	 */
3668 	if (blksiz > 0) {
3669 		left = DIRBLKSIZ - blksiz;
3670 		dp->d_reclen += left;
3671 		uio_iov_base_add(uiop, left);
3672 		uio_iov_len_add(uiop, -(left));
3673 		uio_uio_resid_add(uiop, -(left));
3674 		uiop->uio_offset += left;
3675 	}
3676 
3677 	/*
3678 	 * If returning no data, assume end of file.
3679 	 * If not bigenough, return not end of file, since you aren't
3680 	 *    returning all the data
3681 	 * Otherwise, return the eof flag from the server.
3682 	 */
3683 	if (eofp != NULL) {
3684 		if (tresid == uio_uio_resid(uiop))
3685 			*eofp = 1;
3686 		else if (!bigenough)
3687 			*eofp = 0;
3688 		else
3689 			*eofp = eof;
3690 	}
3691 
3692 	/*
3693 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3694 	 */
3695 	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3696 		dp = (struct dirent *)uio_iov_base(uiop);
3697 		dp->d_type = DT_UNKNOWN;
3698 		dp->d_fileno = 0;
3699 		dp->d_namlen = 0;
3700 		dp->d_name[0] = '\0';
3701 		tl = (u_int32_t *)&dp->d_name[4];
3702 		*tl++ = cookie.lval[0];
3703 		*tl = cookie.lval[1];
3704 		dp->d_reclen = DIRBLKSIZ;
3705 		uio_iov_base_add(uiop, DIRBLKSIZ);
3706 		uio_iov_len_add(uiop, -(DIRBLKSIZ));
3707 		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3708 		uiop->uio_offset += DIRBLKSIZ;
3709 	}
3710 
3711 nfsmout:
3712 	if (nd->nd_mrep != NULL)
3713 		mbuf_freem(nd->nd_mrep);
3714 	return (error);
3715 }
3716 #endif	/* !APPLE */
3717 
3718 /*
3719  * Nfs commit rpc
3720  */
3721 APPLESTATIC int
3722 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3723     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3724 {
3725 	u_int32_t *tl;
3726 	struct nfsrv_descript nfsd, *nd = &nfsd;
3727 	nfsattrbit_t attrbits;
3728 	int error;
3729 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3730 
3731 	*attrflagp = 0;
3732 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3733 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3734 	txdr_hyper(offset, tl);
3735 	tl += 2;
3736 	*tl = txdr_unsigned(cnt);
3737 	if (nd->nd_flag & ND_NFSV4) {
3738 		/*
3739 		 * And do a Getattr op.
3740 		 */
3741 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3742 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3743 		NFSGETATTR_ATTRBIT(&attrbits);
3744 		(void) nfsrv_putattrbit(nd, &attrbits);
3745 	}
3746 	error = nfscl_request(nd, vp, p, cred, stuff);
3747 	if (error)
3748 		return (error);
3749 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3750 	if (!error && !nd->nd_repstat) {
3751 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3752 		NFSLOCKMNT(nmp);
3753 		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3754 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3755 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
3756 		}
3757 		NFSUNLOCKMNT(nmp);
3758 		if (nd->nd_flag & ND_NFSV4)
3759 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3760 	}
3761 nfsmout:
3762 	if (!error && nd->nd_repstat)
3763 		error = nd->nd_repstat;
3764 	mbuf_freem(nd->nd_mrep);
3765 	return (error);
3766 }
3767 
3768 /*
3769  * NFS byte range lock rpc.
3770  * (Mostly just calls one of the three lower level RPC routines.)
3771  */
3772 APPLESTATIC int
3773 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3774     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3775 {
3776 	struct nfscllockowner *lp;
3777 	struct nfsclclient *clp;
3778 	struct nfsfh *nfhp;
3779 	struct nfsrv_descript nfsd, *nd = &nfsd;
3780 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3781 	u_int64_t off, len;
3782 	off_t start, end;
3783 	u_int32_t clidrev = 0;
3784 	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3785 	int callcnt, dorpc;
3786 
3787 	/*
3788 	 * Convert the flock structure into a start and end and do POSIX
3789 	 * bounds checking.
3790 	 */
3791 	switch (fl->l_whence) {
3792 	case SEEK_SET:
3793 	case SEEK_CUR:
3794 		/*
3795 		 * Caller is responsible for adding any necessary offset
3796 		 * when SEEK_CUR is used.
3797 		 */
3798 		start = fl->l_start;
3799 		off = fl->l_start;
3800 		break;
3801 	case SEEK_END:
3802 		start = size + fl->l_start;
3803 		off = size + fl->l_start;
3804 		break;
3805 	default:
3806 		return (EINVAL);
3807 	}
3808 	if (start < 0)
3809 		return (EINVAL);
3810 	if (fl->l_len != 0) {
3811 		end = start + fl->l_len - 1;
3812 		if (end < start)
3813 			return (EINVAL);
3814 	}
3815 
3816 	len = fl->l_len;
3817 	if (len == 0)
3818 		len = NFS64BITSSET;
3819 	retrycnt = 0;
3820 	do {
3821 	    nd->nd_repstat = 0;
3822 	    if (op == F_GETLK) {
3823 		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3824 		if (error)
3825 			return (error);
3826 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3827 		if (!error) {
3828 			clidrev = clp->nfsc_clientidrev;
3829 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3830 			    p, id, flags);
3831 		} else if (error == -1) {
3832 			error = 0;
3833 		}
3834 		nfscl_clientrelease(clp);
3835 	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3836 		/*
3837 		 * We must loop around for all lockowner cases.
3838 		 */
3839 		callcnt = 0;
3840 		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3841 		if (error)
3842 			return (error);
3843 		do {
3844 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3845 			clp, id, flags, &lp, &dorpc);
3846 		    /*
3847 		     * If it returns a NULL lp, we're done.
3848 		     */
3849 		    if (lp == NULL) {
3850 			if (callcnt == 0)
3851 			    nfscl_clientrelease(clp);
3852 			else
3853 			    nfscl_releasealllocks(clp, vp, p, id, flags);
3854 			return (error);
3855 		    }
3856 		    if (nmp->nm_clp != NULL)
3857 			clidrev = nmp->nm_clp->nfsc_clientidrev;
3858 		    else
3859 			clidrev = 0;
3860 		    /*
3861 		     * If the server doesn't support Posix lock semantics,
3862 		     * only allow locks on the entire file, since it won't
3863 		     * handle overlapping byte ranges.
3864 		     * There might still be a problem when a lock
3865 		     * upgrade/downgrade (read<->write) occurs, since the
3866 		     * server "might" expect an unlock first?
3867 		     */
3868 		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
3869 			(off == 0 && len == NFS64BITSSET))) {
3870 			/*
3871 			 * Since the lock records will go away, we must
3872 			 * wait for grace and delay here.
3873 			 */
3874 			do {
3875 			    error = nfsrpc_locku(nd, nmp, lp, off, len,
3876 				NFSV4LOCKT_READ, cred, p, 0);
3877 			    if ((nd->nd_repstat == NFSERR_GRACE ||
3878 				 nd->nd_repstat == NFSERR_DELAY) &&
3879 				error == 0)
3880 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
3881 				    "nfs_advlock");
3882 			} while ((nd->nd_repstat == NFSERR_GRACE ||
3883 			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
3884 		    }
3885 		    callcnt++;
3886 		} while (error == 0 && nd->nd_repstat == 0);
3887 		nfscl_releasealllocks(clp, vp, p, id, flags);
3888 	    } else if (op == F_SETLK) {
3889 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
3890 		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
3891 		if (error || donelocally) {
3892 			return (error);
3893 		}
3894 		if (nmp->nm_clp != NULL)
3895 			clidrev = nmp->nm_clp->nfsc_clientidrev;
3896 		else
3897 			clidrev = 0;
3898 		nfhp = VTONFS(vp)->n_fhp;
3899 		if (!lp->nfsl_open->nfso_posixlock &&
3900 		    (off != 0 || len != NFS64BITSSET)) {
3901 			error = EINVAL;
3902 		} else {
3903 			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
3904 			    nfhp->nfh_len, lp, newone, reclaim, off,
3905 			    len, fl->l_type, cred, p, 0);
3906 		}
3907 		if (!error)
3908 			error = nd->nd_repstat;
3909 		nfscl_lockrelease(lp, error, newone);
3910 	    } else {
3911 		error = EINVAL;
3912 	    }
3913 	    if (!error)
3914 	        error = nd->nd_repstat;
3915 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
3916 		error == NFSERR_STALEDONTRECOVER ||
3917 		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3918 		error == NFSERR_BADSESSION) {
3919 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
3920 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
3921 		&& clidrev != 0) {
3922 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
3923 		retrycnt++;
3924 	    }
3925 	} while (error == NFSERR_GRACE ||
3926 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3927 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
3928 	    error == NFSERR_BADSESSION ||
3929 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
3930 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
3931 	if (error && retrycnt >= 4)
3932 		error = EIO;
3933 	return (error);
3934 }
3935 
3936 /*
3937  * The lower level routine for the LockT case.
3938  */
3939 APPLESTATIC int
3940 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
3941     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
3942     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3943 {
3944 	u_int32_t *tl;
3945 	int error, type, size;
3946 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
3947 	struct nfsnode *np;
3948 	struct nfsmount *nmp;
3949 	struct nfsclsession *tsep;
3950 
3951 	nmp = VFSTONFS(vp->v_mount);
3952 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
3953 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
3954 	if (fl->l_type == F_RDLCK)
3955 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
3956 	else
3957 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
3958 	txdr_hyper(off, tl);
3959 	tl += 2;
3960 	txdr_hyper(len, tl);
3961 	tl += 2;
3962 	tsep = nfsmnt_mdssession(nmp);
3963 	*tl++ = tsep->nfsess_clientid.lval[0];
3964 	*tl = tsep->nfsess_clientid.lval[1];
3965 	nfscl_filllockowner(id, own, flags);
3966 	np = VTONFS(vp);
3967 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
3968 	    np->n_fhp->nfh_len);
3969 	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
3970 	error = nfscl_request(nd, vp, p, cred, NULL);
3971 	if (error)
3972 		return (error);
3973 	if (nd->nd_repstat == 0) {
3974 		fl->l_type = F_UNLCK;
3975 	} else if (nd->nd_repstat == NFSERR_DENIED) {
3976 		nd->nd_repstat = 0;
3977 		fl->l_whence = SEEK_SET;
3978 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
3979 		fl->l_start = fxdr_hyper(tl);
3980 		tl += 2;
3981 		len = fxdr_hyper(tl);
3982 		tl += 2;
3983 		if (len == NFS64BITSSET)
3984 			fl->l_len = 0;
3985 		else
3986 			fl->l_len = len;
3987 		type = fxdr_unsigned(int, *tl++);
3988 		if (type == NFSV4LOCKT_WRITE)
3989 			fl->l_type = F_WRLCK;
3990 		else
3991 			fl->l_type = F_RDLCK;
3992 		/*
3993 		 * XXX For now, I have no idea what to do with the
3994 		 * conflicting lock_owner, so I'll just set the pid == 0
3995 		 * and skip over the lock_owner.
3996 		 */
3997 		fl->l_pid = (pid_t)0;
3998 		tl += 2;
3999 		size = fxdr_unsigned(int, *tl);
4000 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4001 			error = EBADRPC;
4002 		if (!error)
4003 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4004 	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4005 		nfscl_initiate_recovery(clp);
4006 nfsmout:
4007 	mbuf_freem(nd->nd_mrep);
4008 	return (error);
4009 }
4010 
4011 /*
4012  * Lower level function that performs the LockU RPC.
4013  */
4014 static int
4015 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4016     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4017     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4018 {
4019 	u_int32_t *tl;
4020 	int error;
4021 
4022 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4023 	    lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0);
4024 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4025 	*tl++ = txdr_unsigned(type);
4026 	*tl = txdr_unsigned(lp->nfsl_seqid);
4027 	if (nfstest_outofseq &&
4028 	    (arc4random() % nfstest_outofseq) == 0)
4029 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
4030 	tl++;
4031 	if (NFSHASNFSV4N(nmp))
4032 		*tl++ = 0;
4033 	else
4034 		*tl++ = lp->nfsl_stateid.seqid;
4035 	*tl++ = lp->nfsl_stateid.other[0];
4036 	*tl++ = lp->nfsl_stateid.other[1];
4037 	*tl++ = lp->nfsl_stateid.other[2];
4038 	txdr_hyper(off, tl);
4039 	tl += 2;
4040 	txdr_hyper(len, tl);
4041 	if (syscred)
4042 		nd->nd_flag |= ND_USEGSSNAME;
4043 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4044 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4045 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4046 	if (error)
4047 		return (error);
4048 	if (nd->nd_repstat == 0) {
4049 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4050 		lp->nfsl_stateid.seqid = *tl++;
4051 		lp->nfsl_stateid.other[0] = *tl++;
4052 		lp->nfsl_stateid.other[1] = *tl++;
4053 		lp->nfsl_stateid.other[2] = *tl;
4054 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4055 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4056 nfsmout:
4057 	mbuf_freem(nd->nd_mrep);
4058 	return (error);
4059 }
4060 
4061 /*
4062  * The actual Lock RPC.
4063  */
4064 APPLESTATIC int
4065 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4066     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4067     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4068     NFSPROC_T *p, int syscred)
4069 {
4070 	u_int32_t *tl;
4071 	int error, size;
4072 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4073 	struct nfsclsession *tsep;
4074 
4075 	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
4076 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4077 	if (type == F_RDLCK)
4078 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4079 	else
4080 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4081 	*tl++ = txdr_unsigned(reclaim);
4082 	txdr_hyper(off, tl);
4083 	tl += 2;
4084 	txdr_hyper(len, tl);
4085 	tl += 2;
4086 	if (newone) {
4087 	    *tl = newnfs_true;
4088 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4089 		2 * NFSX_UNSIGNED + NFSX_HYPER);
4090 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4091 	    if (NFSHASNFSV4N(nmp))
4092 		*tl++ = 0;
4093 	    else
4094 		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
4095 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4096 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4097 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4098 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
4099 	    tsep = nfsmnt_mdssession(nmp);
4100 	    *tl++ = tsep->nfsess_clientid.lval[0];
4101 	    *tl = tsep->nfsess_clientid.lval[1];
4102 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4103 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4104 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4105 	} else {
4106 	    *tl = newnfs_false;
4107 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4108 	    if (NFSHASNFSV4N(nmp))
4109 		*tl++ = 0;
4110 	    else
4111 		*tl++ = lp->nfsl_stateid.seqid;
4112 	    *tl++ = lp->nfsl_stateid.other[0];
4113 	    *tl++ = lp->nfsl_stateid.other[1];
4114 	    *tl++ = lp->nfsl_stateid.other[2];
4115 	    *tl = txdr_unsigned(lp->nfsl_seqid);
4116 	    if (nfstest_outofseq &&
4117 		(arc4random() % nfstest_outofseq) == 0)
4118 		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4119 	}
4120 	if (syscred)
4121 		nd->nd_flag |= ND_USEGSSNAME;
4122 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4123 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4124 	if (error)
4125 		return (error);
4126 	if (newone)
4127 	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4128 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4129 	if (nd->nd_repstat == 0) {
4130 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4131 		lp->nfsl_stateid.seqid = *tl++;
4132 		lp->nfsl_stateid.other[0] = *tl++;
4133 		lp->nfsl_stateid.other[1] = *tl++;
4134 		lp->nfsl_stateid.other[2] = *tl;
4135 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4136 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4137 		size = fxdr_unsigned(int, *(tl + 7));
4138 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4139 			error = EBADRPC;
4140 		if (!error)
4141 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4142 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4143 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4144 nfsmout:
4145 	mbuf_freem(nd->nd_mrep);
4146 	return (error);
4147 }
4148 
4149 /*
4150  * nfs statfs rpc
4151  * (always called with the vp for the mount point)
4152  */
4153 APPLESTATIC int
4154 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4155     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4156     void *stuff)
4157 {
4158 	u_int32_t *tl = NULL;
4159 	struct nfsrv_descript nfsd, *nd = &nfsd;
4160 	struct nfsmount *nmp;
4161 	nfsattrbit_t attrbits;
4162 	int error;
4163 
4164 	*attrflagp = 0;
4165 	nmp = VFSTONFS(vnode_mount(vp));
4166 	if (NFSHASNFSV4(nmp)) {
4167 		/*
4168 		 * For V4, you actually do a getattr.
4169 		 */
4170 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4171 		NFSSTATFS_GETATTRBIT(&attrbits);
4172 		(void) nfsrv_putattrbit(nd, &attrbits);
4173 		nd->nd_flag |= ND_USEGSSNAME;
4174 		error = nfscl_request(nd, vp, p, cred, stuff);
4175 		if (error)
4176 			return (error);
4177 		if (nd->nd_repstat == 0) {
4178 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4179 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4180 			    cred);
4181 			if (!error) {
4182 				nmp->nm_fsid[0] = nap->na_filesid[0];
4183 				nmp->nm_fsid[1] = nap->na_filesid[1];
4184 				NFSSETHASSETFSID(nmp);
4185 				*attrflagp = 1;
4186 			}
4187 		} else {
4188 			error = nd->nd_repstat;
4189 		}
4190 		if (error)
4191 			goto nfsmout;
4192 	} else {
4193 		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4194 		error = nfscl_request(nd, vp, p, cred, stuff);
4195 		if (error)
4196 			return (error);
4197 		if (nd->nd_flag & ND_NFSV3) {
4198 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4199 			if (error)
4200 				goto nfsmout;
4201 		}
4202 		if (nd->nd_repstat) {
4203 			error = nd->nd_repstat;
4204 			goto nfsmout;
4205 		}
4206 		NFSM_DISSECT(tl, u_int32_t *,
4207 		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4208 	}
4209 	if (NFSHASNFSV3(nmp)) {
4210 		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4211 		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4212 		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4213 		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4214 		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4215 		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4216 		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4217 	} else if (NFSHASNFSV4(nmp) == 0) {
4218 		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4219 		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4220 		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4221 		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4222 		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4223 	}
4224 nfsmout:
4225 	mbuf_freem(nd->nd_mrep);
4226 	return (error);
4227 }
4228 
4229 /*
4230  * nfs pathconf rpc
4231  */
4232 APPLESTATIC int
4233 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4234     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4235     void *stuff)
4236 {
4237 	struct nfsrv_descript nfsd, *nd = &nfsd;
4238 	struct nfsmount *nmp;
4239 	u_int32_t *tl;
4240 	nfsattrbit_t attrbits;
4241 	int error;
4242 
4243 	*attrflagp = 0;
4244 	nmp = VFSTONFS(vnode_mount(vp));
4245 	if (NFSHASNFSV4(nmp)) {
4246 		/*
4247 		 * For V4, you actually do a getattr.
4248 		 */
4249 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4250 		NFSPATHCONF_GETATTRBIT(&attrbits);
4251 		(void) nfsrv_putattrbit(nd, &attrbits);
4252 		nd->nd_flag |= ND_USEGSSNAME;
4253 		error = nfscl_request(nd, vp, p, cred, stuff);
4254 		if (error)
4255 			return (error);
4256 		if (nd->nd_repstat == 0) {
4257 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4258 			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4259 			    cred);
4260 			if (!error)
4261 				*attrflagp = 1;
4262 		} else {
4263 			error = nd->nd_repstat;
4264 		}
4265 	} else {
4266 		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4267 		error = nfscl_request(nd, vp, p, cred, stuff);
4268 		if (error)
4269 			return (error);
4270 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4271 		if (nd->nd_repstat && !error)
4272 			error = nd->nd_repstat;
4273 		if (!error) {
4274 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4275 			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4276 			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4277 			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4278 			pc->pc_chownrestricted =
4279 			    fxdr_unsigned(u_int32_t, *tl++);
4280 			pc->pc_caseinsensitive =
4281 			    fxdr_unsigned(u_int32_t, *tl++);
4282 			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4283 		}
4284 	}
4285 nfsmout:
4286 	mbuf_freem(nd->nd_mrep);
4287 	return (error);
4288 }
4289 
4290 /*
4291  * nfs version 3 fsinfo rpc call
4292  */
4293 APPLESTATIC int
4294 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4295     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4296 {
4297 	u_int32_t *tl;
4298 	struct nfsrv_descript nfsd, *nd = &nfsd;
4299 	int error;
4300 
4301 	*attrflagp = 0;
4302 	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4303 	error = nfscl_request(nd, vp, p, cred, stuff);
4304 	if (error)
4305 		return (error);
4306 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4307 	if (nd->nd_repstat && !error)
4308 		error = nd->nd_repstat;
4309 	if (!error) {
4310 		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4311 		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4312 		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4313 		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4314 		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4315 		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4316 		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4317 		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4318 		fsp->fs_maxfilesize = fxdr_hyper(tl);
4319 		tl += 2;
4320 		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4321 		tl += 2;
4322 		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4323 	}
4324 nfsmout:
4325 	mbuf_freem(nd->nd_mrep);
4326 	return (error);
4327 }
4328 
4329 /*
4330  * This function performs the Renew RPC.
4331  */
4332 APPLESTATIC int
4333 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4334     NFSPROC_T *p)
4335 {
4336 	u_int32_t *tl;
4337 	struct nfsrv_descript nfsd;
4338 	struct nfsrv_descript *nd = &nfsd;
4339 	struct nfsmount *nmp;
4340 	int error;
4341 	struct nfssockreq *nrp;
4342 	struct nfsclsession *tsep;
4343 
4344 	nmp = clp->nfsc_nmp;
4345 	if (nmp == NULL)
4346 		return (0);
4347 	if (dsp == NULL)
4348 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4349 		    0);
4350 	else
4351 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4352 		    &dsp->nfsclds_sess, 0, 0);
4353 	if (!NFSHASNFSV4N(nmp)) {
4354 		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4355 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4356 		tsep = nfsmnt_mdssession(nmp);
4357 		*tl++ = tsep->nfsess_clientid.lval[0];
4358 		*tl = tsep->nfsess_clientid.lval[1];
4359 	}
4360 	nrp = NULL;
4361 	if (dsp != NULL)
4362 		nrp = dsp->nfsclds_sockp;
4363 	if (nrp == NULL)
4364 		/* If NULL, use the MDS socket. */
4365 		nrp = &nmp->nm_sockreq;
4366 	nd->nd_flag |= ND_USEGSSNAME;
4367 	if (dsp == NULL)
4368 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4369 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4370 	else
4371 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4372 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4373 	if (error)
4374 		return (error);
4375 	error = nd->nd_repstat;
4376 	mbuf_freem(nd->nd_mrep);
4377 	return (error);
4378 }
4379 
4380 /*
4381  * This function performs the Releaselockowner RPC.
4382  */
4383 APPLESTATIC int
4384 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4385     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4386 {
4387 	struct nfsrv_descript nfsd, *nd = &nfsd;
4388 	u_int32_t *tl;
4389 	int error;
4390 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4391 	struct nfsclsession *tsep;
4392 
4393 	if (NFSHASNFSV4N(nmp)) {
4394 		/* For NFSv4.1, do a FreeStateID. */
4395 		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4396 		    NULL, 0, 0);
4397 		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4398 	} else {
4399 		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4400 		    NULL, 0, 0);
4401 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4402 		tsep = nfsmnt_mdssession(nmp);
4403 		*tl++ = tsep->nfsess_clientid.lval[0];
4404 		*tl = tsep->nfsess_clientid.lval[1];
4405 		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4406 		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4407 		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4408 	}
4409 	nd->nd_flag |= ND_USEGSSNAME;
4410 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4411 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4412 	if (error)
4413 		return (error);
4414 	error = nd->nd_repstat;
4415 	mbuf_freem(nd->nd_mrep);
4416 	return (error);
4417 }
4418 
4419 /*
4420  * This function performs the Compound to get the mount pt FH.
4421  */
4422 APPLESTATIC int
4423 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4424     NFSPROC_T *p)
4425 {
4426 	u_int32_t *tl;
4427 	struct nfsrv_descript nfsd;
4428 	struct nfsrv_descript *nd = &nfsd;
4429 	u_char *cp, *cp2;
4430 	int error, cnt, len, setnil;
4431 	u_int32_t *opcntp;
4432 
4433 	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4434 	    0);
4435 	cp = dirpath;
4436 	cnt = 0;
4437 	do {
4438 		setnil = 0;
4439 		while (*cp == '/')
4440 			cp++;
4441 		cp2 = cp;
4442 		while (*cp2 != '\0' && *cp2 != '/')
4443 			cp2++;
4444 		if (*cp2 == '/') {
4445 			setnil = 1;
4446 			*cp2 = '\0';
4447 		}
4448 		if (cp2 != cp) {
4449 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4450 			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
4451 			nfsm_strtom(nd, cp, strlen(cp));
4452 			cnt++;
4453 		}
4454 		if (setnil)
4455 			*cp2++ = '/';
4456 		cp = cp2;
4457 	} while (*cp != '\0');
4458 	if (NFSHASNFSV4N(nmp))
4459 		/* Has a Sequence Op done by nfscl_reqstart(). */
4460 		*opcntp = txdr_unsigned(3 + cnt);
4461 	else
4462 		*opcntp = txdr_unsigned(2 + cnt);
4463 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4464 	*tl = txdr_unsigned(NFSV4OP_GETFH);
4465 	nd->nd_flag |= ND_USEGSSNAME;
4466 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4467 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4468 	if (error)
4469 		return (error);
4470 	if (nd->nd_repstat == 0) {
4471 		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4472 		tl += (2 + 2 * cnt);
4473 		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4474 			len > NFSX_FHMAX) {
4475 			nd->nd_repstat = NFSERR_BADXDR;
4476 		} else {
4477 			nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4478 			if (nd->nd_repstat == 0)
4479 				nmp->nm_fhsize = len;
4480 		}
4481 	}
4482 	error = nd->nd_repstat;
4483 nfsmout:
4484 	mbuf_freem(nd->nd_mrep);
4485 	return (error);
4486 }
4487 
4488 /*
4489  * This function performs the Delegreturn RPC.
4490  */
4491 APPLESTATIC int
4492 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4493     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4494 {
4495 	u_int32_t *tl;
4496 	struct nfsrv_descript nfsd;
4497 	struct nfsrv_descript *nd = &nfsd;
4498 	int error;
4499 
4500 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4501 	    dp->nfsdl_fhlen, NULL, NULL, 0, 0);
4502 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4503 	if (NFSHASNFSV4N(nmp))
4504 		*tl++ = 0;
4505 	else
4506 		*tl++ = dp->nfsdl_stateid.seqid;
4507 	*tl++ = dp->nfsdl_stateid.other[0];
4508 	*tl++ = dp->nfsdl_stateid.other[1];
4509 	*tl = dp->nfsdl_stateid.other[2];
4510 	if (syscred)
4511 		nd->nd_flag |= ND_USEGSSNAME;
4512 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4513 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4514 	if (error)
4515 		return (error);
4516 	error = nd->nd_repstat;
4517 	mbuf_freem(nd->nd_mrep);
4518 	return (error);
4519 }
4520 
4521 /*
4522  * nfs getacl call.
4523  */
4524 APPLESTATIC int
4525 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4526     struct acl *aclp, void *stuff)
4527 {
4528 	struct nfsrv_descript nfsd, *nd = &nfsd;
4529 	int error;
4530 	nfsattrbit_t attrbits;
4531 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4532 
4533 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4534 		return (EOPNOTSUPP);
4535 	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4536 	NFSZERO_ATTRBIT(&attrbits);
4537 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4538 	(void) nfsrv_putattrbit(nd, &attrbits);
4539 	error = nfscl_request(nd, vp, p, cred, stuff);
4540 	if (error)
4541 		return (error);
4542 	if (!nd->nd_repstat)
4543 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4544 		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4545 	else
4546 		error = nd->nd_repstat;
4547 	mbuf_freem(nd->nd_mrep);
4548 	return (error);
4549 }
4550 
4551 /*
4552  * nfs setacl call.
4553  */
4554 APPLESTATIC int
4555 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4556     struct acl *aclp, void *stuff)
4557 {
4558 	int error;
4559 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4560 
4561 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4562 		return (EOPNOTSUPP);
4563 	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4564 	return (error);
4565 }
4566 
4567 /*
4568  * nfs setacl call.
4569  */
4570 static int
4571 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4572     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4573 {
4574 	struct nfsrv_descript nfsd, *nd = &nfsd;
4575 	int error;
4576 	nfsattrbit_t attrbits;
4577 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4578 
4579 	if (!NFSHASNFSV4(nmp))
4580 		return (EOPNOTSUPP);
4581 	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4582 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4583 	NFSZERO_ATTRBIT(&attrbits);
4584 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4585 	(void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0,
4586 	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0);
4587 	error = nfscl_request(nd, vp, p, cred, stuff);
4588 	if (error)
4589 		return (error);
4590 	/* Don't care about the pre/postop attributes */
4591 	mbuf_freem(nd->nd_mrep);
4592 	return (nd->nd_repstat);
4593 }
4594 
4595 /*
4596  * Do the NFSv4.1 Exchange ID.
4597  */
4598 int
4599 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4600     struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
4601     struct ucred *cred, NFSPROC_T *p)
4602 {
4603 	uint32_t *tl, v41flags;
4604 	struct nfsrv_descript nfsd;
4605 	struct nfsrv_descript *nd = &nfsd;
4606 	struct nfsclds *dsp;
4607 	struct timespec verstime;
4608 	int error, len;
4609 
4610 	*dspp = NULL;
4611 	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, 0, 0);
4612 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4613 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
4614 	*tl = txdr_unsigned(clp->nfsc_rev);
4615 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4616 
4617 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4618 	*tl++ = txdr_unsigned(exchflags);
4619 	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4620 
4621 	/* Set the implementation id4 */
4622 	*tl = txdr_unsigned(1);
4623 	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4624 	(void) nfsm_strtom(nd, version, strlen(version));
4625 	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4626 	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
4627 	verstime.tv_nsec = 0;
4628 	txdr_nfsv4time(&verstime, tl);
4629 	nd->nd_flag |= ND_USEGSSNAME;
4630 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4631 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4632 	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4633 	    (int)nd->nd_repstat);
4634 	if (error != 0)
4635 		return (error);
4636 	if (nd->nd_repstat == 0) {
4637 		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4638 		len = fxdr_unsigned(int, *(tl + 7));
4639 		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4640 			error = NFSERR_BADXDR;
4641 			goto nfsmout;
4642 		}
4643 		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4644 		    M_WAITOK | M_ZERO);
4645 		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4646 		dsp->nfsclds_servownlen = len;
4647 		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4648 		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4649 		dsp->nfsclds_sess.nfsess_sequenceid =
4650 		    fxdr_unsigned(uint32_t, *tl++);
4651 		v41flags = fxdr_unsigned(uint32_t, *tl);
4652 		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4653 		    NFSHASPNFSOPT(nmp)) {
4654 			NFSCL_DEBUG(1, "set PNFS\n");
4655 			NFSLOCKMNT(nmp);
4656 			nmp->nm_state |= NFSSTA_PNFS;
4657 			NFSUNLOCKMNT(nmp);
4658 			dsp->nfsclds_flags |= NFSCLDS_MDS;
4659 		}
4660 		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4661 			dsp->nfsclds_flags |= NFSCLDS_DS;
4662 		if (len > 0)
4663 			nd->nd_repstat = nfsrv_mtostr(nd,
4664 			    dsp->nfsclds_serverown, len);
4665 		if (nd->nd_repstat == 0) {
4666 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4667 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4668 			    NULL, MTX_DEF);
4669 			nfscl_initsessionslots(&dsp->nfsclds_sess);
4670 			*dspp = dsp;
4671 		} else
4672 			free(dsp, M_NFSCLDS);
4673 	}
4674 	error = nd->nd_repstat;
4675 nfsmout:
4676 	mbuf_freem(nd->nd_mrep);
4677 	return (error);
4678 }
4679 
4680 /*
4681  * Do the NFSv4.1 Create Session.
4682  */
4683 int
4684 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4685     struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
4686     NFSPROC_T *p)
4687 {
4688 	uint32_t crflags, maxval, *tl;
4689 	struct nfsrv_descript nfsd;
4690 	struct nfsrv_descript *nd = &nfsd;
4691 	int error, irdcnt;
4692 
4693 	/* Make sure nm_rsize, nm_wsize is set. */
4694 	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
4695 		nmp->nm_rsize = NFS_MAXBSIZE;
4696 	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
4697 		nmp->nm_wsize = NFS_MAXBSIZE;
4698 	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, 0,
4699 	    0);
4700 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4701 	*tl++ = sep->nfsess_clientid.lval[0];
4702 	*tl++ = sep->nfsess_clientid.lval[1];
4703 	*tl++ = txdr_unsigned(sequenceid);
4704 	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4705 	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
4706 		crflags |= NFSV4CRSESS_CONNBACKCHAN;
4707 	*tl = txdr_unsigned(crflags);
4708 
4709 	/* Fill in fore channel attributes. */
4710 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4711 	*tl++ = 0;				/* Header pad size */
4712 	*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
4713 	*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
4714 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
4715 	*tl++ = txdr_unsigned(20);		/* Max operations */
4716 	*tl++ = txdr_unsigned(64);		/* Max slots */
4717 	*tl = 0;				/* No rdma ird */
4718 
4719 	/* Fill in back channel attributes. */
4720 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4721 	*tl++ = 0;				/* Header pad size */
4722 	*tl++ = txdr_unsigned(10000);		/* Max request size */
4723 	*tl++ = txdr_unsigned(10000);		/* Max response size */
4724 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
4725 	*tl++ = txdr_unsigned(4);		/* Max operations */
4726 	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
4727 	*tl = 0;				/* No rdma ird */
4728 
4729 	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4730 	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
4731 
4732 	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
4733 	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
4734 	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
4735 	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4736 	*tl++ = 0;				/* Null machine name */
4737 	*tl++ = 0;				/* Uid == 0 */
4738 	*tl++ = 0;				/* Gid == 0 */
4739 	*tl = 0;				/* No additional gids */
4740 	nd->nd_flag |= ND_USEGSSNAME;
4741 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4742 	    NFS_VER4, NULL, 1, NULL, NULL);
4743 	if (error != 0)
4744 		return (error);
4745 	if (nd->nd_repstat == 0) {
4746 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4747 		    2 * NFSX_UNSIGNED);
4748 		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4749 		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4750 		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4751 		crflags = fxdr_unsigned(uint32_t, *tl);
4752 		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4753 			NFSLOCKMNT(nmp);
4754 			nmp->nm_state |= NFSSTA_SESSPERSIST;
4755 			NFSUNLOCKMNT(nmp);
4756 		}
4757 
4758 		/* Get the fore channel slot count. */
4759 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4760 		tl++;			/* Skip the header pad size. */
4761 
4762 		/* Make sure nm_wsize is small enough. */
4763 		maxval = fxdr_unsigned(uint32_t, *tl++);
4764 		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
4765 			if (nmp->nm_wsize > 8096)
4766 				nmp->nm_wsize /= 2;
4767 			else
4768 				break;
4769 		}
4770 
4771 		/* Make sure nm_rsize is small enough. */
4772 		maxval = fxdr_unsigned(uint32_t, *tl++);
4773 		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
4774 			if (nmp->nm_rsize > 8096)
4775 				nmp->nm_rsize /= 2;
4776 			else
4777 				break;
4778 		}
4779 
4780 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4781 		tl++;
4782 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4783 		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4784 		irdcnt = fxdr_unsigned(int, *tl);
4785 		if (irdcnt > 0)
4786 			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4787 
4788 		/* and the back channel slot count. */
4789 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4790 		tl += 5;
4791 		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4792 		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4793 	}
4794 	error = nd->nd_repstat;
4795 nfsmout:
4796 	mbuf_freem(nd->nd_mrep);
4797 	return (error);
4798 }
4799 
4800 /*
4801  * Do the NFSv4.1 Destroy Session.
4802  */
4803 int
4804 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4805     struct ucred *cred, NFSPROC_T *p)
4806 {
4807 	uint32_t *tl;
4808 	struct nfsrv_descript nfsd;
4809 	struct nfsrv_descript *nd = &nfsd;
4810 	int error;
4811 	struct nfsclsession *tsep;
4812 
4813 	nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0,
4814 	    0);
4815 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4816 	tsep = nfsmnt_mdssession(nmp);
4817 	bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4818 	nd->nd_flag |= ND_USEGSSNAME;
4819 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4820 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4821 	if (error != 0)
4822 		return (error);
4823 	error = nd->nd_repstat;
4824 	mbuf_freem(nd->nd_mrep);
4825 	return (error);
4826 }
4827 
4828 /*
4829  * Do the NFSv4.1 Destroy Client.
4830  */
4831 int
4832 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
4833     struct ucred *cred, NFSPROC_T *p)
4834 {
4835 	uint32_t *tl;
4836 	struct nfsrv_descript nfsd;
4837 	struct nfsrv_descript *nd = &nfsd;
4838 	int error;
4839 	struct nfsclsession *tsep;
4840 
4841 	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
4842 	    0);
4843 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4844 	tsep = nfsmnt_mdssession(nmp);
4845 	*tl++ = tsep->nfsess_clientid.lval[0];
4846 	*tl = tsep->nfsess_clientid.lval[1];
4847 	nd->nd_flag |= ND_USEGSSNAME;
4848 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4849 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4850 	if (error != 0)
4851 		return (error);
4852 	error = nd->nd_repstat;
4853 	mbuf_freem(nd->nd_mrep);
4854 	return (error);
4855 }
4856 
4857 /*
4858  * Do the NFSv4.1 LayoutGet.
4859  */
4860 static int
4861 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
4862     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
4863     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
4864     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
4865     void *stuff)
4866 {
4867 	struct nfsrv_descript nfsd, *nd = &nfsd;
4868 	int error;
4869 
4870 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
4871 	    0);
4872 	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
4873 	    layouttype, layoutlen, 0);
4874 	nd->nd_flag |= ND_USEGSSNAME;
4875 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4876 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4877 	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
4878 	if (error != 0)
4879 		return (error);
4880 	if (nd->nd_repstat == 0)
4881 		error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp);
4882 	if (error == 0 && nd->nd_repstat != 0)
4883 		error = nd->nd_repstat;
4884 	mbuf_freem(nd->nd_mrep);
4885 	return (error);
4886 }
4887 
4888 /*
4889  * Do the NFSv4.1 Get Device Info.
4890  */
4891 int
4892 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
4893     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
4894     NFSPROC_T *p)
4895 {
4896 	uint32_t cnt, *tl, vers, minorvers;
4897 	struct nfsrv_descript nfsd;
4898 	struct nfsrv_descript *nd = &nfsd;
4899 	struct sockaddr_in sin, ssin;
4900 	struct sockaddr_in6 sin6, ssin6;
4901 	struct nfsclds *dsp = NULL, **dspp, **gotdspp;
4902 	struct nfscldevinfo *ndi;
4903 	int addrcnt = 0, bitcnt, error, gotvers, i, isudp, j, stripecnt;
4904 	uint8_t stripeindex;
4905 	sa_family_t af, safilled;
4906 
4907 	*ndip = NULL;
4908 	ndi = NULL;
4909 	gotdspp = NULL;
4910 	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
4911 	    0);
4912 	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
4913 	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
4914 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
4915 	*tl++ = txdr_unsigned(layouttype);
4916 	*tl++ = txdr_unsigned(100000);
4917 	if (notifybitsp != NULL && *notifybitsp != 0) {
4918 		*tl = txdr_unsigned(1);		/* One word of bits. */
4919 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
4920 		*tl = txdr_unsigned(*notifybitsp);
4921 	} else
4922 		*tl = txdr_unsigned(0);
4923 	nd->nd_flag |= ND_USEGSSNAME;
4924 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4925 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4926 	if (error != 0)
4927 		return (error);
4928 	if (nd->nd_repstat == 0) {
4929 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4930 		if (layouttype != fxdr_unsigned(int, *tl))
4931 			printf("EEK! devinfo layout type not same!\n");
4932 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
4933 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4934 			stripecnt = fxdr_unsigned(int, *tl);
4935 			NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
4936 			if (stripecnt < 1 || stripecnt > 4096) {
4937 				printf("pNFS File layout devinfo stripecnt %d:"
4938 				    " out of range\n", stripecnt);
4939 				error = NFSERR_BADXDR;
4940 				goto nfsmout;
4941 			}
4942 			NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
4943 			    NFSX_UNSIGNED);
4944 			addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
4945 			NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
4946 			if (addrcnt < 1 || addrcnt > 128) {
4947 				printf("NFS devinfo addrcnt %d: out of range\n",
4948 				    addrcnt);
4949 				error = NFSERR_BADXDR;
4950 				goto nfsmout;
4951 			}
4952 
4953 			/*
4954 			 * Now we know how many stripe indices and addresses, so
4955 			 * we can allocate the structure the correct size.
4956 			 */
4957 			i = (stripecnt * sizeof(uint8_t)) /
4958 			    sizeof(struct nfsclds *) + 1;
4959 			NFSCL_DEBUG(4, "stripeindices=%d\n", i);
4960 			ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
4961 			    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
4962 			    M_ZERO);
4963 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
4964 			    NFSX_V4DEVICEID);
4965 			ndi->nfsdi_refcnt = 0;
4966 			ndi->nfsdi_flags = NFSDI_FILELAYOUT;
4967 			ndi->nfsdi_stripecnt = stripecnt;
4968 			ndi->nfsdi_addrcnt = addrcnt;
4969 			/* Fill in the stripe indices. */
4970 			for (i = 0; i < stripecnt; i++) {
4971 				stripeindex = fxdr_unsigned(uint8_t, *tl++);
4972 				NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
4973 				if (stripeindex >= addrcnt) {
4974 					printf("pNFS File Layout devinfo"
4975 					    " stripeindex %d: too big\n",
4976 					    (int)stripeindex);
4977 					error = NFSERR_BADXDR;
4978 					goto nfsmout;
4979 				}
4980 				nfsfldi_setstripeindex(ndi, i, stripeindex);
4981 			}
4982 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
4983 			/* For Flex File, we only get one address list. */
4984 			ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
4985 			    M_NFSDEVINFO, M_WAITOK | M_ZERO);
4986 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
4987 			    NFSX_V4DEVICEID);
4988 			ndi->nfsdi_refcnt = 0;
4989 			ndi->nfsdi_flags = NFSDI_FLEXFILE;
4990 			addrcnt = ndi->nfsdi_addrcnt = 1;
4991 		}
4992 
4993 		/* Now, dissect the server address(es). */
4994 		safilled = AF_UNSPEC;
4995 		for (i = 0; i < addrcnt; i++) {
4996 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4997 			cnt = fxdr_unsigned(uint32_t, *tl);
4998 			if (cnt == 0) {
4999 				printf("NFS devinfo 0 len addrlist\n");
5000 				error = NFSERR_BADXDR;
5001 				goto nfsmout;
5002 			}
5003 			dspp = nfsfldi_addr(ndi, i);
5004 			safilled = AF_UNSPEC;
5005 			for (j = 0; j < cnt; j++) {
5006 				error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5007 				    &isudp);
5008 				if (error != 0 && error != EPERM) {
5009 					error = NFSERR_BADXDR;
5010 					goto nfsmout;
5011 				}
5012 				if (error == 0 && isudp == 0) {
5013 					/*
5014 					 * The priority is:
5015 					 * - Same address family.
5016 					 * Save the address and dspp, so that
5017 					 * the connection can be done after
5018 					 * parsing is complete.
5019 					 */
5020 					if (safilled == AF_UNSPEC ||
5021 					    (af == nmp->nm_nam->sa_family &&
5022 					     safilled != nmp->nm_nam->sa_family)
5023 					   ) {
5024 						if (af == AF_INET)
5025 							ssin = sin;
5026 						else
5027 							ssin6 = sin6;
5028 						safilled = af;
5029 						gotdspp = dspp;
5030 					}
5031 				}
5032 			}
5033 		}
5034 
5035 		gotvers = NFS_VER4;	/* Always NFSv4 for File Layout. */
5036 		/* For Flex File, we will take one of the versions to use. */
5037 		if (layouttype == NFSLAYOUT_FLEXFILE) {
5038 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5039 			j = fxdr_unsigned(int, *tl);
5040 			if (j < 1 || j > NFSDEV_MAXVERS) {
5041 				printf("pNFS: too many versions\n");
5042 				error = NFSERR_BADXDR;
5043 				goto nfsmout;
5044 			}
5045 			gotvers = 0;
5046 			for (i = 0; i < j; i++) {
5047 				NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5048 				vers = fxdr_unsigned(uint32_t, *tl++);
5049 				minorvers = fxdr_unsigned(uint32_t, *tl++);
5050 				if ((vers == NFS_VER4 && minorvers ==
5051 				    NFSV41_MINORVERSION) || (vers == NFS_VER3 &&
5052 				    gotvers == 0)) {
5053 					gotvers = vers;
5054 					/* We'll take this one. */
5055 					ndi->nfsdi_versindex = i;
5056 					ndi->nfsdi_vers = vers;
5057 					ndi->nfsdi_minorvers = minorvers;
5058 					ndi->nfsdi_rsize = fxdr_unsigned(
5059 					    uint32_t, *tl++);
5060 					ndi->nfsdi_wsize = fxdr_unsigned(
5061 					    uint32_t, *tl++);
5062 					if (*tl == newnfs_true)
5063 						ndi->nfsdi_flags |=
5064 						    NFSDI_TIGHTCOUPLED;
5065 					else
5066 						ndi->nfsdi_flags &=
5067 						    ~NFSDI_TIGHTCOUPLED;
5068 				}
5069 			}
5070 			if (gotvers == 0) {
5071 				printf("pNFS: no NFSv3 or NFSv4.1\n");
5072 				error = NFSERR_BADXDR;
5073 				goto nfsmout;
5074 			}
5075 		}
5076 
5077 		/* And the notify bits. */
5078 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5079 		bitcnt = fxdr_unsigned(int, *tl);
5080 		if (bitcnt > 0) {
5081 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5082 			if (notifybitsp != NULL)
5083 				*notifybitsp =
5084 				    fxdr_unsigned(uint32_t, *tl);
5085 		}
5086 		if (safilled != AF_UNSPEC) {
5087 			KASSERT(ndi != NULL, ("ndi is NULL"));
5088 			*ndip = ndi;
5089 		} else
5090 			error = EPERM;
5091 		if (error == 0) {
5092 			/*
5093 			 * Now we can do a TCP connection for the correct
5094 			 * NFS version and IP address.
5095 			 */
5096 			error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5097 			    gotvers, &dsp, p);
5098 		}
5099 		if (error == 0) {
5100 			KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5101 			*gotdspp = dsp;
5102 		}
5103 	}
5104 	if (nd->nd_repstat != 0 && error == 0)
5105 		error = nd->nd_repstat;
5106 nfsmout:
5107 	if (error != 0 && ndi != NULL)
5108 		nfscl_freedevinfo(ndi);
5109 	mbuf_freem(nd->nd_mrep);
5110 	return (error);
5111 }
5112 
5113 /*
5114  * Do the NFSv4.1 LayoutCommit.
5115  */
5116 int
5117 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5118     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5119     int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff)
5120 {
5121 	uint32_t *tl;
5122 	struct nfsrv_descript nfsd, *nd = &nfsd;
5123 	int error;
5124 
5125 	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5126 	    0, 0);
5127 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5128 	    NFSX_STATEID);
5129 	txdr_hyper(off, tl);
5130 	tl += 2;
5131 	txdr_hyper(len, tl);
5132 	tl += 2;
5133 	if (reclaim != 0)
5134 		*tl++ = newnfs_true;
5135 	else
5136 		*tl++ = newnfs_false;
5137 	*tl++ = txdr_unsigned(stateidp->seqid);
5138 	*tl++ = stateidp->other[0];
5139 	*tl++ = stateidp->other[1];
5140 	*tl++ = stateidp->other[2];
5141 	*tl++ = newnfs_true;
5142 	if (lastbyte < off)
5143 		lastbyte = off;
5144 	else if (lastbyte >= (off + len))
5145 		lastbyte = off + len - 1;
5146 	txdr_hyper(lastbyte, tl);
5147 	tl += 2;
5148 	*tl++ = newnfs_false;
5149 	*tl++ = txdr_unsigned(layouttype);
5150 	/* All supported layouts are 0 length. */
5151 	*tl = txdr_unsigned(0);
5152 	nd->nd_flag |= ND_USEGSSNAME;
5153 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5154 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5155 	if (error != 0)
5156 		return (error);
5157 	error = nd->nd_repstat;
5158 	mbuf_freem(nd->nd_mrep);
5159 	return (error);
5160 }
5161 
5162 /*
5163  * Do the NFSv4.1 LayoutReturn.
5164  */
5165 int
5166 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5167     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5168     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5169     void *stuff)
5170 {
5171 	uint32_t *tl;
5172 	struct nfsrv_descript nfsd, *nd = &nfsd;
5173 	int error;
5174 
5175 	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5176 	    0, 0);
5177 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5178 	if (reclaim != 0)
5179 		*tl++ = newnfs_true;
5180 	else
5181 		*tl++ = newnfs_false;
5182 	*tl++ = txdr_unsigned(layouttype);
5183 	*tl++ = txdr_unsigned(iomode);
5184 	*tl = txdr_unsigned(layoutreturn);
5185 	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5186 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5187 		    NFSX_UNSIGNED);
5188 		txdr_hyper(offset, tl);
5189 		tl += 2;
5190 		txdr_hyper(len, tl);
5191 		tl += 2;
5192 		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5193 		*tl++ = txdr_unsigned(stateidp->seqid);
5194 		*tl++ = stateidp->other[0];
5195 		*tl++ = stateidp->other[1];
5196 		*tl++ = stateidp->other[2];
5197 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5198 			*tl = txdr_unsigned(0);
5199 		else if (layouttype == NFSLAYOUT_FLEXFILE) {
5200 			*tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5201 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5202 			/* No ioerrs or stats yet. */
5203 			*tl++ = 0;
5204 			*tl = 0;
5205 		}
5206 	}
5207 	nd->nd_flag |= ND_USEGSSNAME;
5208 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5209 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5210 	if (error != 0)
5211 		return (error);
5212 	if (nd->nd_repstat == 0) {
5213 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5214 		if (*tl != 0) {
5215 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5216 			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5217 			stateidp->other[0] = *tl++;
5218 			stateidp->other[1] = *tl++;
5219 			stateidp->other[2] = *tl;
5220 		}
5221 	} else
5222 		error = nd->nd_repstat;
5223 nfsmout:
5224 	mbuf_freem(nd->nd_mrep);
5225 	return (error);
5226 }
5227 
5228 /*
5229  * Acquire a layout and devinfo, if possible. The caller must have acquired
5230  * a reference count on the nfsclclient structure before calling this.
5231  * Return the layout in lypp with a reference count on it, if successful.
5232  */
5233 static int
5234 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5235     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5236     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5237 {
5238 	struct nfscllayout *lyp;
5239 	struct nfsclflayout *flp;
5240 	struct nfsclflayouthead flh;
5241 	int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5242 	nfsv4stateid_t stateid;
5243 	struct nfsclsession *tsep;
5244 
5245 	*lypp = NULL;
5246 	if (NFSHASFLEXFILE(nmp))
5247 		layouttype = NFSLAYOUT_FLEXFILE;
5248 	else
5249 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5250 	/*
5251 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5252 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5253 	 * flp == NULL.
5254 	 */
5255 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5256 	    off, &flp, &recalled);
5257 	islocked = 0;
5258 	if (lyp == NULL || flp == NULL) {
5259 		if (recalled != 0)
5260 			return (EIO);
5261 		LIST_INIT(&flh);
5262 		tsep = nfsmnt_mdssession(nmp);
5263 		layoutlen = tsep->nfsess_maxcache -
5264 		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5265 		if (lyp == NULL) {
5266 			stateid.seqid = 0;
5267 			stateid.other[0] = stateidp->other[0];
5268 			stateid.other[1] = stateidp->other[1];
5269 			stateid.other[2] = stateidp->other[2];
5270 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5271 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5272 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5273 			    &retonclose, &flh, cred, p, NULL);
5274 		} else {
5275 			islocked = 1;
5276 			stateid.seqid = lyp->nfsly_stateid.seqid;
5277 			stateid.other[0] = lyp->nfsly_stateid.other[0];
5278 			stateid.other[1] = lyp->nfsly_stateid.other[1];
5279 			stateid.other[2] = lyp->nfsly_stateid.other[2];
5280 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5281 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
5282 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5283 			    &retonclose, &flh, cred, p, NULL);
5284 		}
5285 		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5286 		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5287 		    &flh, layouttype, error, NULL, cred, p);
5288 		if (error == 0)
5289 			*lypp = lyp;
5290 		else if (islocked != 0)
5291 			nfscl_rellayout(lyp, 1);
5292 	} else
5293 		*lypp = lyp;
5294 	return (error);
5295 }
5296 
5297 /*
5298  * Do a TCP connection plus exchange id and create session.
5299  * If successful, a "struct nfsclds" is linked into the list for the
5300  * mount point and a pointer to it is returned.
5301  */
5302 static int
5303 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5304     struct sockaddr_in6 *sin6, sa_family_t af, int vers, struct nfsclds **dspp,
5305     NFSPROC_T *p)
5306 {
5307 	struct sockaddr_in *msad, *sad;
5308 	struct sockaddr_in6 *msad6, *sad6;
5309 	struct nfsclclient *clp;
5310 	struct nfssockreq *nrp;
5311 	struct nfsclds *dsp, *tdsp;
5312 	int error;
5313 	enum nfsclds_state retv;
5314 	uint32_t sequenceid;
5315 
5316 	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5317 	    ("nfsrpc_fillsa: NULL nr_cred"));
5318 	NFSLOCKCLSTATE();
5319 	clp = nmp->nm_clp;
5320 	NFSUNLOCKCLSTATE();
5321 	if (clp == NULL)
5322 		return (EPERM);
5323 	if (af == AF_INET) {
5324 		NFSLOCKMNT(nmp);
5325 		/*
5326 		 * Check to see if we already have a session for this
5327 		 * address that is usable for a DS.
5328 		 * Note that the MDS's address is in a different place
5329 		 * than the sessions already acquired for DS's.
5330 		 */
5331 		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5332 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5333 		while (tdsp != NULL) {
5334 			if (msad != NULL && msad->sin_family == AF_INET &&
5335 			    sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5336 			    sin->sin_port == msad->sin_port &&
5337 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5338 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5339 				*dspp = tdsp;
5340 				NFSUNLOCKMNT(nmp);
5341 				NFSCL_DEBUG(4, "fnd same addr\n");
5342 				return (0);
5343 			}
5344 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5345 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5346 				msad = (struct sockaddr_in *)
5347 				    tdsp->nfsclds_sockp->nr_nam;
5348 			else
5349 				msad = NULL;
5350 		}
5351 		NFSUNLOCKMNT(nmp);
5352 
5353 		/* No IP address match, so look for new/trunked one. */
5354 		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5355 		sad->sin_len = sizeof(*sad);
5356 		sad->sin_family = AF_INET;
5357 		sad->sin_port = sin->sin_port;
5358 		sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5359 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5360 		nrp->nr_nam = (struct sockaddr *)sad;
5361 	} else if (af == AF_INET6) {
5362 		NFSLOCKMNT(nmp);
5363 		/*
5364 		 * Check to see if we already have a session for this
5365 		 * address that is usable for a DS.
5366 		 * Note that the MDS's address is in a different place
5367 		 * than the sessions already acquired for DS's.
5368 		 */
5369 		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5370 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5371 		while (tdsp != NULL) {
5372 			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5373 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5374 			    &msad6->sin6_addr) &&
5375 			    sin6->sin6_port == msad6->sin6_port &&
5376 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5377 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5378 				*dspp = tdsp;
5379 				NFSUNLOCKMNT(nmp);
5380 				return (0);
5381 			}
5382 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5383 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5384 				msad6 = (struct sockaddr_in6 *)
5385 				    tdsp->nfsclds_sockp->nr_nam;
5386 			else
5387 				msad6 = NULL;
5388 		}
5389 		NFSUNLOCKMNT(nmp);
5390 
5391 		/* No IP address match, so look for new/trunked one. */
5392 		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5393 		sad6->sin6_len = sizeof(*sad6);
5394 		sad6->sin6_family = AF_INET6;
5395 		sad6->sin6_port = sin6->sin6_port;
5396 		NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5397 		    sizeof(struct in6_addr));
5398 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5399 		nrp->nr_nam = (struct sockaddr *)sad6;
5400 	} else
5401 		return (EPERM);
5402 
5403 	nrp->nr_sotype = SOCK_STREAM;
5404 	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5405 	nrp->nr_prog = NFS_PROG;
5406 	nrp->nr_vers = vers;
5407 
5408 	/*
5409 	 * Use the credentials that were used for the mount, which are
5410 	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5411 	 * Ref. counting the credentials with crhold() is probably not
5412 	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5413 	 * unmount, but I did it anyhow.
5414 	 */
5415 	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5416 	error = newnfs_connect(nmp, nrp, NULL, p, 0);
5417 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
5418 
5419 	dsp = NULL;
5420 	/* Now, do the exchangeid and create session. */
5421 	if (error == 0) {
5422 		if (vers == NFS_VER4) {
5423 			error = nfsrpc_exchangeid(nmp, clp, nrp,
5424 			    NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p);
5425 			NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5426 			if (error != 0)
5427 				newnfs_disconnect(nrp);
5428 		} else {
5429 			dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5430 			    M_WAITOK | M_ZERO);
5431 			dsp->nfsclds_flags |= NFSCLDS_DS;
5432 			dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5433 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5434 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5435 			    NULL, MTX_DEF);
5436 		}
5437 	}
5438 	if (error == 0) {
5439 		dsp->nfsclds_sockp = nrp;
5440 		if (vers == NFS_VER4) {
5441 			NFSLOCKMNT(nmp);
5442 			retv = nfscl_getsameserver(nmp, dsp, &tdsp);
5443 			NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5444 			if (retv == NFSDSP_USETHISSESSION) {
5445 				NFSUNLOCKMNT(nmp);
5446 				/*
5447 				 * If there is already a session for this
5448 				 * server, use it.
5449 				 */
5450 				(void)newnfs_disconnect(nrp);
5451 				nfscl_freenfsclds(dsp);
5452 				*dspp = tdsp;
5453 				return (0);
5454 			}
5455 			if (retv == NFSDSP_SEQTHISSESSION)
5456 				sequenceid =
5457 				    tdsp->nfsclds_sess.nfsess_sequenceid;
5458 			else
5459 				sequenceid =
5460 				    dsp->nfsclds_sess.nfsess_sequenceid;
5461 			NFSUNLOCKMNT(nmp);
5462 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5463 			    nrp, sequenceid, 0, nrp->nr_cred, p);
5464 			NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5465 		}
5466 	} else {
5467 		NFSFREECRED(nrp->nr_cred);
5468 		NFSFREEMUTEX(&nrp->nr_mtx);
5469 		free(nrp->nr_nam, M_SONAME);
5470 		free(nrp, M_NFSSOCKREQ);
5471 	}
5472 	if (error == 0) {
5473 		NFSCL_DEBUG(3, "add DS session\n");
5474 		/*
5475 		 * Put it at the end of the list. That way the list
5476 		 * is ordered by when the entry was added. This matters
5477 		 * since the one done first is the one that should be
5478 		 * used for sequencid'ing any subsequent create sessions.
5479 		 */
5480 		NFSLOCKMNT(nmp);
5481 		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5482 		NFSUNLOCKMNT(nmp);
5483 		*dspp = dsp;
5484 	} else if (dsp != NULL) {
5485 		newnfs_disconnect(nrp);
5486 		nfscl_freenfsclds(dsp);
5487 	}
5488 	return (error);
5489 }
5490 
5491 /*
5492  * Do the NFSv4.1 Reclaim Complete.
5493  */
5494 int
5495 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5496 {
5497 	uint32_t *tl;
5498 	struct nfsrv_descript nfsd;
5499 	struct nfsrv_descript *nd = &nfsd;
5500 	int error;
5501 
5502 	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
5503 	    0);
5504 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5505 	*tl = newnfs_false;
5506 	nd->nd_flag |= ND_USEGSSNAME;
5507 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5508 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5509 	if (error != 0)
5510 		return (error);
5511 	error = nd->nd_repstat;
5512 	mbuf_freem(nd->nd_mrep);
5513 	return (error);
5514 }
5515 
5516 /*
5517  * Initialize the slot tables for a session.
5518  */
5519 static void
5520 nfscl_initsessionslots(struct nfsclsession *sep)
5521 {
5522 	int i;
5523 
5524 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
5525 		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5526 			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5527 		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5528 	}
5529 	for (i = 0; i < 64; i++)
5530 		sep->nfsess_slotseq[i] = 0;
5531 	sep->nfsess_slots = 0;
5532 }
5533 
5534 /*
5535  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5536  */
5537 int
5538 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5539     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5540 {
5541 	struct nfsnode *np = VTONFS(vp);
5542 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5543 	struct nfscllayout *layp;
5544 	struct nfscldevinfo *dip;
5545 	struct nfsclflayout *rflp;
5546 	struct mbuf *m;
5547 	nfsv4stateid_t stateid;
5548 	struct ucred *newcred;
5549 	uint64_t lastbyte, len, off, oresid, xfer;
5550 	int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled;
5551 	void *lckp;
5552 	uint8_t *dev;
5553 	void *iovbase;
5554 	size_t iovlen;
5555 	off_t offs;
5556 	ssize_t resid;
5557 
5558 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5559 	    (np->n_flag & NNOLAYOUT) != 0)
5560 		return (EIO);
5561 	/* Now, get a reference cnt on the clientid for this mount. */
5562 	if (nfscl_getref(nmp) == 0)
5563 		return (EIO);
5564 
5565 	/* Find an appropriate stateid. */
5566 	newcred = NFSNEWCRED(cred);
5567 	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5568 	    rwaccess, 1, newcred, p, &stateid, &lckp);
5569 	if (error != 0) {
5570 		NFSFREECRED(newcred);
5571 		nfscl_relref(nmp);
5572 		return (error);
5573 	}
5574 	/* Search for a layout for this file. */
5575 	off = uiop->uio_offset;
5576 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5577 	    np->n_fhp->nfh_len, off, &rflp, &recalled);
5578 	if (layp == NULL || rflp == NULL) {
5579 		if (recalled != 0) {
5580 			NFSFREECRED(newcred);
5581 			nfscl_relref(nmp);
5582 			return (EIO);
5583 		}
5584 		if (layp != NULL) {
5585 			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5586 			layp = NULL;
5587 		}
5588 		/* Try and get a Layout, if it is supported. */
5589 		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5590 		    (np->n_flag & NWRITEOPENED) != 0)
5591 			iolaymode = NFSLAYOUTIOMODE_RW;
5592 		else
5593 			iolaymode = NFSLAYOUTIOMODE_READ;
5594 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5595 		    NULL, &stateid, off, &layp, newcred, p);
5596 		if (error != 0) {
5597 			NFSLOCKNODE(np);
5598 			np->n_flag |= NNOLAYOUT;
5599 			NFSUNLOCKNODE(np);
5600 			if (lckp != NULL)
5601 				nfscl_lockderef(lckp);
5602 			NFSFREECRED(newcred);
5603 			if (layp != NULL)
5604 				nfscl_rellayout(layp, 0);
5605 			nfscl_relref(nmp);
5606 			return (error);
5607 		}
5608 	}
5609 
5610 	/*
5611 	 * Loop around finding a layout that works for the first part of
5612 	 * this I/O operation, and then call the function that actually
5613 	 * does the RPC.
5614 	 */
5615 	eof = 0;
5616 	len = (uint64_t)uiop->uio_resid;
5617 	while (len > 0 && error == 0 && eof == 0) {
5618 		off = uiop->uio_offset;
5619 		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5620 		if (error == 0) {
5621 			oresid = xfer = (uint64_t)uiop->uio_resid;
5622 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5623 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5624 			/*
5625 			 * For Flex File layout with mirrored DSs, select one
5626 			 * of them at random for reads. For writes and commits,
5627 			 * do all mirrors.
5628 			 */
5629 			m = NULL;
5630 			firstmirror = 0;
5631 			mirrorcnt = 1;
5632 			if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
5633 			    (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
5634 				if (rwaccess == NFSV4OPEN_ACCESSREAD) {
5635 					firstmirror = arc4random() % mirrorcnt;
5636 					mirrorcnt = firstmirror + 1;
5637 				} else if (docommit == 0) {
5638 					/*
5639 					 * Save values, so uiop can be rolled
5640 					 * back upon a write error.
5641 					 */
5642 					offs = uiop->uio_offset;
5643 					resid = uiop->uio_resid;
5644 					iovbase = uiop->uio_iov->iov_base;
5645 					iovlen = uiop->uio_iov->iov_len;
5646 					m = nfsm_uiombuflist(uiop, len, NULL,
5647 					    NULL);
5648 				}
5649 			}
5650 			for (i = firstmirror; i < mirrorcnt && error == 0; i++){
5651 				if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0)
5652 					dev = rflp->nfsfl_ffm[i].dev;
5653 				else
5654 					dev = rflp->nfsfl_dev;
5655 				dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5656 				    rflp->nfsfl_devp);
5657 				if (dip != NULL) {
5658 					if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
5659 					    != 0)
5660 						error = nfscl_dofflayoutio(vp,
5661 						    uiop, iomode, must_commit,
5662 						    &eof, &stateid, rwaccess,
5663 						    dip, layp, rflp, off, xfer,
5664 						    i, docommit, m, newcred,
5665 						    p);
5666 					else
5667 						error = nfscl_doflayoutio(vp,
5668 						    uiop, iomode, must_commit,
5669 						    &eof, &stateid, rwaccess,
5670 						    dip, layp, rflp, off, xfer,
5671 						    docommit, newcred, p);
5672 					nfscl_reldevinfo(dip);
5673 				} else
5674 					error = EIO;
5675 			}
5676 			if (m != NULL)
5677 				m_freem(m);
5678 			if (error == 0) {
5679 				if (mirrorcnt > 1 && rwaccess ==
5680 				    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5681 					NFSLOCKCLSTATE();
5682 					layp->nfsly_flags |= NFSLY_WRITTEN;
5683 					NFSUNLOCKCLSTATE();
5684 				}
5685 				lastbyte = off + xfer - 1;
5686 				NFSLOCKCLSTATE();
5687 				if (lastbyte > layp->nfsly_lastbyte)
5688 					layp->nfsly_lastbyte = lastbyte;
5689 				NFSUNLOCKCLSTATE();
5690 			} else if (error == NFSERR_OPENMODE &&
5691 			    rwaccess == NFSV4OPEN_ACCESSREAD) {
5692 				NFSLOCKMNT(nmp);
5693 				nmp->nm_state |= NFSSTA_OPENMODE;
5694 				NFSUNLOCKMNT(nmp);
5695 			} else
5696 				error = EIO;
5697 			if (error == 0)
5698 				len -= (oresid - (uint64_t)uiop->uio_resid);
5699 			else if (mirrorcnt > 1 && rwaccess ==
5700 			    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5701 				/*
5702 				 * In case the rpc gets retried, roll the
5703 				 * uio fields changed by nfsm_uiombuflist()
5704 				 * back.
5705 				 */
5706 				uiop->uio_offset = offs;
5707 				uiop->uio_resid = resid;
5708 				uiop->uio_iov->iov_base = iovbase;
5709 				uiop->uio_iov->iov_len = iovlen;
5710 			}
5711 		}
5712 	}
5713 	if (lckp != NULL)
5714 		nfscl_lockderef(lckp);
5715 	NFSFREECRED(newcred);
5716 	nfscl_rellayout(layp, 0);
5717 	nfscl_relref(nmp);
5718 	return (error);
5719 }
5720 
5721 /*
5722  * Make a copy of the mbuf chain and add an mbuf for null padding, as required.
5723  */
5724 static struct mbuf *
5725 nfsm_copym(struct mbuf *m, int off, int xfer)
5726 {
5727 	struct mbuf *m2, *m3, *m4;
5728 	uint32_t *tl;
5729 	int rem;
5730 
5731 	m2 = m_copym(m, off, xfer, M_WAITOK);
5732 	rem = NFSM_RNDUP(xfer) - xfer;
5733 	if (rem > 0) {
5734 		/*
5735 		 * The zero padding to a multiple of 4 bytes is required by
5736 		 * the XDR. So that the mbufs copied by reference aren't
5737 		 * modified, add an mbuf with the zero'd bytes to the list.
5738 		 * rem will be a maximum of 3, so one zero'd uint32_t is
5739 		 * sufficient.
5740 		 */
5741 		m3 = m2;
5742 		while (m3->m_next != NULL)
5743 			m3 = m3->m_next;
5744 		NFSMGET(m4);
5745 		tl = NFSMTOD(m4, uint32_t *);
5746 		*tl = 0;
5747 		mbuf_setlen(m4, rem);
5748 		mbuf_setnext(m3, m4);
5749 	}
5750 	return (m2);
5751 }
5752 
5753 /*
5754  * Find a file layout that will handle the first bytes of the requested
5755  * range and return the information from it needed to to the I/O operation.
5756  */
5757 int
5758 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5759     struct nfsclflayout **retflpp)
5760 {
5761 	struct nfsclflayout *flp, *nflp, *rflp;
5762 	uint32_t rw;
5763 
5764 	rflp = NULL;
5765 	rw = rwaccess;
5766 	/* For reading, do the Read list first and then the Write list. */
5767 	do {
5768 		if (rw == NFSV4OPEN_ACCESSREAD)
5769 			flp = LIST_FIRST(&lyp->nfsly_flayread);
5770 		else
5771 			flp = LIST_FIRST(&lyp->nfsly_flayrw);
5772 		while (flp != NULL) {
5773 			nflp = LIST_NEXT(flp, nfsfl_list);
5774 			if (flp->nfsfl_off > off)
5775 				break;
5776 			if (flp->nfsfl_end > off &&
5777 			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5778 				rflp = flp;
5779 			flp = nflp;
5780 		}
5781 		if (rw == NFSV4OPEN_ACCESSREAD)
5782 			rw = NFSV4OPEN_ACCESSWRITE;
5783 		else
5784 			rw = 0;
5785 	} while (rw != 0);
5786 	if (rflp != NULL) {
5787 		/* This one covers the most bytes starting at off. */
5788 		*retflpp = rflp;
5789 		return (0);
5790 	}
5791 	return (EIO);
5792 }
5793 
5794 /*
5795  * Do I/O using an NFSv4.1 file layout.
5796  */
5797 static int
5798 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5799     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5800     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5801     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
5802 {
5803 	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
5804 	int commit_thru_mds, error, stripe_index, stripe_pos;
5805 	struct nfsnode *np;
5806 	struct nfsfh *fhp;
5807 	struct nfsclds **dspp;
5808 
5809 	np = VTONFS(vp);
5810 	rel_off = off - flp->nfsfl_patoff;
5811 	stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
5812 	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
5813 	    dp->nfsdi_stripecnt;
5814 	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
5815 	error = 0;
5816 
5817 	/* Loop around, doing I/O for each stripe unit. */
5818 	while (len > 0 && error == 0) {
5819 		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
5820 		dspp = nfsfldi_addr(dp, stripe_index);
5821 		if (len > transfer && docommit == 0)
5822 			xfer = transfer;
5823 		else
5824 			xfer = len;
5825 		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
5826 			/* Dense layout. */
5827 			if (stripe_pos >= flp->nfsfl_fhcnt)
5828 				return (EIO);
5829 			fhp = flp->nfsfl_fh[stripe_pos];
5830 			io_off = (rel_off / (stripe_unit_size *
5831 			    dp->nfsdi_stripecnt)) * stripe_unit_size +
5832 			    rel_off % stripe_unit_size;
5833 		} else {
5834 			/* Sparse layout. */
5835 			if (flp->nfsfl_fhcnt > 1) {
5836 				if (stripe_index >= flp->nfsfl_fhcnt)
5837 					return (EIO);
5838 				fhp = flp->nfsfl_fh[stripe_index];
5839 			} else if (flp->nfsfl_fhcnt == 1)
5840 				fhp = flp->nfsfl_fh[0];
5841 			else
5842 				fhp = np->n_fhp;
5843 			io_off = off;
5844 		}
5845 		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
5846 			commit_thru_mds = 1;
5847 			if (docommit != 0)
5848 				error = EIO;
5849 		} else {
5850 			commit_thru_mds = 0;
5851 			mtx_lock(&np->n_mtx);
5852 			np->n_flag |= NDSCOMMIT;
5853 			mtx_unlock(&np->n_mtx);
5854 		}
5855 		if (docommit != 0) {
5856 			if (error == 0)
5857 				error = nfsrpc_commitds(vp, io_off, xfer,
5858 				    *dspp, fhp, 0, 0, cred, p);
5859 			if (error == 0) {
5860 				/*
5861 				 * Set both eof and uio_resid = 0 to end any
5862 				 * loops.
5863 				 */
5864 				*eofp = 1;
5865 				uiop->uio_resid = 0;
5866 			} else {
5867 				mtx_lock(&np->n_mtx);
5868 				np->n_flag &= ~NDSCOMMIT;
5869 				mtx_unlock(&np->n_mtx);
5870 			}
5871 		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
5872 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
5873 			    io_off, xfer, fhp, 0, 0, 0, cred, p);
5874 		else {
5875 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
5876 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
5877 			    0, 0, 0, cred, p);
5878 			if (error == 0) {
5879 				NFSLOCKCLSTATE();
5880 				lyp->nfsly_flags |= NFSLY_WRITTEN;
5881 				NFSUNLOCKCLSTATE();
5882 			}
5883 		}
5884 		if (error == 0) {
5885 			transfer = stripe_unit_size;
5886 			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
5887 			len -= xfer;
5888 			off += xfer;
5889 		}
5890 	}
5891 	return (error);
5892 }
5893 
5894 /*
5895  * Do I/O using an NFSv4.1 flex file layout.
5896  */
5897 static int
5898 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5899     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5900     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5901     uint64_t len, int mirror, int docommit, struct mbuf *mp, struct ucred *cred,
5902     NFSPROC_T *p)
5903 {
5904 	uint64_t transfer, xfer;
5905 	int error, rel_off;
5906 	struct nfsnode *np;
5907 	struct nfsfh *fhp;
5908 	struct nfsclds **dspp;
5909 	struct ucred *tcred;
5910 	struct mbuf *m;
5911 
5912 	np = VTONFS(vp);
5913 	error = 0;
5914 	rel_off = 0;
5915 	NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
5916 	    (uintmax_t)len);
5917 	/* Loop around, doing I/O for each stripe unit. */
5918 	while (len > 0 && error == 0) {
5919 		dspp = nfsfldi_addr(dp, 0);
5920 		fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
5921 		stateidp = &flp->nfsfl_ffm[mirror].st;
5922 		NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
5923 		    mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
5924 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
5925 			tcred = NFSNEWCRED(cred);
5926 			tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
5927 			tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
5928 			tcred->cr_ngroups = 1;
5929 		} else
5930 			tcred = cred;
5931 		if (rwflag == NFSV4OPEN_ACCESSREAD)
5932 			transfer = dp->nfsdi_rsize;
5933 		else
5934 			transfer = dp->nfsdi_wsize;
5935 		mtx_lock(&np->n_mtx);
5936 		np->n_flag |= NDSCOMMIT;
5937 		mtx_unlock(&np->n_mtx);
5938 		if (len > transfer && docommit == 0)
5939 			xfer = transfer;
5940 		else
5941 			xfer = len;
5942 		if (docommit != 0) {
5943 			if (error == 0)
5944 				error = nfsrpc_commitds(vp, off, xfer, *dspp,
5945 				    fhp, dp->nfsdi_vers, dp->nfsdi_minorvers,
5946 				    tcred, p);
5947 			NFSCL_DEBUG(4, "aft nfsrpc_commitds=%d\n", error);
5948 			if (error == 0) {
5949 				/*
5950 				 * Set both eof and uio_resid = 0 to end any
5951 				 * loops.
5952 				 */
5953 				*eofp = 1;
5954 				uiop->uio_resid = 0;
5955 			} else {
5956 				mtx_lock(&np->n_mtx);
5957 				np->n_flag &= ~NDSCOMMIT;
5958 				mtx_unlock(&np->n_mtx);
5959 			}
5960 		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
5961 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
5962 			    off, xfer, fhp, 1, dp->nfsdi_vers,
5963 			    dp->nfsdi_minorvers, tcred, p);
5964 		else {
5965 			if (flp->nfsfl_mirrorcnt == 1) {
5966 				error = nfsrpc_writeds(vp, uiop, iomode,
5967 				    must_commit, stateidp, *dspp, off, xfer,
5968 				    fhp, 0, 1, dp->nfsdi_vers,
5969 				    dp->nfsdi_minorvers, tcred, p);
5970 				if (error == 0) {
5971 					NFSLOCKCLSTATE();
5972 					lyp->nfsly_flags |= NFSLY_WRITTEN;
5973 					NFSUNLOCKCLSTATE();
5974 				}
5975 			} else {
5976 				m = nfsm_copym(mp, rel_off, xfer);
5977 				NFSCL_DEBUG(4, "mcopy reloff=%d xfer=%jd\n",
5978 				    rel_off, (uintmax_t)xfer);
5979 				error = nfsrpc_writedsmir(vp, iomode,
5980 				    must_commit, stateidp, *dspp, off, xfer,
5981 				    fhp, m, dp->nfsdi_vers, dp->nfsdi_minorvers,
5982 				    tcred, p);
5983 				NFSCL_DEBUG(4, "nfsrpc_writedsmir=%d\n", error);
5984 			}
5985 		}
5986 		NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
5987 		if (error == 0) {
5988 			len -= xfer;
5989 			off += xfer;
5990 			rel_off += xfer;
5991 		}
5992 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
5993 			NFSFREECRED(tcred);
5994 	}
5995 	NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
5996 	return (error);
5997 }
5998 
5999 /*
6000  * The actual read RPC done to a DS.
6001  */
6002 static int
6003 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6004     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6005     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6006 {
6007 	uint32_t *tl;
6008 	int attrflag, error, retlen;
6009 	struct nfsrv_descript nfsd;
6010 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6011 	struct nfsrv_descript *nd = &nfsd;
6012 	struct nfssockreq *nrp;
6013 	struct nfsvattr na;
6014 
6015 	nd->nd_mrep = NULL;
6016 	if (vers == 0 || vers == NFS_VER4) {
6017 		nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6018 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6019 		vers = NFS_VER4;
6020 		NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6021 		if (flex != 0)
6022 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6023 		else
6024 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6025 	} else {
6026 		nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6027 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6028 		NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6029 	}
6030 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6031 	txdr_hyper(io_off, tl);
6032 	*(tl + 2) = txdr_unsigned(len);
6033 	nrp = dsp->nfsclds_sockp;
6034 	NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6035 	if (nrp == NULL)
6036 		/* If NULL, use the MDS socket. */
6037 		nrp = &nmp->nm_sockreq;
6038 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6039 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6040 	NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6041 	    error);
6042 	if (error != 0)
6043 		return (error);
6044 	if (vers == NFS_VER3) {
6045 		error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
6046 		NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6047 		if (error != 0)
6048 			goto nfsmout;
6049 	}
6050 	if (nd->nd_repstat != 0) {
6051 		error = nd->nd_repstat;
6052 		goto nfsmout;
6053 	}
6054 	if (vers == NFS_VER3) {
6055 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6056 		*eofp = fxdr_unsigned(int, *(tl + 1));
6057 	} else {
6058 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6059 		*eofp = fxdr_unsigned(int, *tl);
6060 	}
6061 	NFSM_STRSIZ(retlen, len);
6062 	NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6063 	error = nfsm_mbufuio(nd, uiop, retlen);
6064 nfsmout:
6065 	if (nd->nd_mrep != NULL)
6066 		mbuf_freem(nd->nd_mrep);
6067 	return (error);
6068 }
6069 
6070 /*
6071  * The actual write RPC done to a DS.
6072  */
6073 static int
6074 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6075     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6076     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6077     struct ucred *cred, NFSPROC_T *p)
6078 {
6079 	uint32_t *tl;
6080 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6081 	int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6082 	int32_t backup;
6083 	struct nfsrv_descript nfsd;
6084 	struct nfsrv_descript *nd = &nfsd;
6085 	struct nfssockreq *nrp;
6086 	struct nfsvattr na;
6087 
6088 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6089 	nd->nd_mrep = NULL;
6090 	if (vers == 0 || vers == NFS_VER4) {
6091 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6092 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6093 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6094 		vers = NFS_VER4;
6095 		if (flex != 0)
6096 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6097 		else
6098 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6099 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6100 	} else {
6101 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6102 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6103 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6104 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6105 	}
6106 	txdr_hyper(io_off, tl);
6107 	tl += 2;
6108 	if (vers == NFS_VER3)
6109 		*tl++ = txdr_unsigned(len);
6110 	*tl++ = txdr_unsigned(*iomode);
6111 	*tl = txdr_unsigned(len);
6112 	nfsm_uiombuf(nd, uiop, len);
6113 	nrp = dsp->nfsclds_sockp;
6114 	if (nrp == NULL)
6115 		/* If NULL, use the MDS socket. */
6116 		nrp = &nmp->nm_sockreq;
6117 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6118 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6119 	NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6120 	    nd->nd_repstat);
6121 	if (error != 0)
6122 		return (error);
6123 	if (nd->nd_repstat != 0) {
6124 		/*
6125 		 * In case the rpc gets retried, roll
6126 		 * the uio fileds changed by nfsm_uiombuf()
6127 		 * back.
6128 		 */
6129 		uiop->uio_offset -= len;
6130 		uio_uio_resid_add(uiop, len);
6131 		uio_iov_base_add(uiop, -len);
6132 		uio_iov_len_add(uiop, len);
6133 		error = nd->nd_repstat;
6134 	} else {
6135 		if (vers == NFS_VER3) {
6136 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6137 			    NULL);
6138 			NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6139 			if (error != 0)
6140 				goto nfsmout;
6141 		}
6142 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6143 		rlen = fxdr_unsigned(int, *tl++);
6144 		NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6145 		if (rlen == 0) {
6146 			error = NFSERR_IO;
6147 			goto nfsmout;
6148 		} else if (rlen < len) {
6149 			backup = len - rlen;
6150 			uio_iov_base_add(uiop, -(backup));
6151 			uio_iov_len_add(uiop, backup);
6152 			uiop->uio_offset -= backup;
6153 			uio_uio_resid_add(uiop, backup);
6154 			len = rlen;
6155 		}
6156 		commit = fxdr_unsigned(int, *tl++);
6157 
6158 		/*
6159 		 * Return the lowest commitment level
6160 		 * obtained by any of the RPCs.
6161 		 */
6162 		if (committed == NFSWRITE_FILESYNC)
6163 			committed = commit;
6164 		else if (committed == NFSWRITE_DATASYNC &&
6165 		    commit == NFSWRITE_UNSTABLE)
6166 			committed = commit;
6167 		if (commit_thru_mds != 0) {
6168 			NFSLOCKMNT(nmp);
6169 			if (!NFSHASWRITEVERF(nmp)) {
6170 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6171 				NFSSETWRITEVERF(nmp);
6172 	    		} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
6173 				*must_commit = 1;
6174 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6175 			}
6176 			NFSUNLOCKMNT(nmp);
6177 		} else {
6178 			NFSLOCKDS(dsp);
6179 			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6180 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6181 				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6182 			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6183 				*must_commit = 1;
6184 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6185 			}
6186 			NFSUNLOCKDS(dsp);
6187 		}
6188 	}
6189 nfsmout:
6190 	if (nd->nd_mrep != NULL)
6191 		mbuf_freem(nd->nd_mrep);
6192 	*iomode = committed;
6193 	if (nd->nd_repstat != 0 && error == 0)
6194 		error = nd->nd_repstat;
6195 	return (error);
6196 }
6197 
6198 /*
6199  * The actual write RPC done to a DS.
6200  * This variant is called from a separate kernel process for mirrors.
6201  * Any short write is considered an IO error.
6202  */
6203 static int
6204 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6205     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6206     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6207     struct ucred *cred, NFSPROC_T *p)
6208 {
6209 	uint32_t *tl;
6210 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6211 	int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6212 	struct nfsrv_descript nfsd;
6213 	struct nfsrv_descript *nd = &nfsd;
6214 	struct nfssockreq *nrp;
6215 	struct nfsvattr na;
6216 
6217 	nd->nd_mrep = NULL;
6218 	if (vers == 0 || vers == NFS_VER4) {
6219 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6220 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6221 		vers = NFS_VER4;
6222 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6223 		    minorvers);
6224 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6225 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6226 	} else {
6227 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6228 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6229 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6230 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6231 	}
6232 	txdr_hyper(io_off, tl);
6233 	tl += 2;
6234 	if (vers == NFS_VER3)
6235 		*tl++ = txdr_unsigned(len);
6236 	*tl++ = txdr_unsigned(*iomode);
6237 	*tl = txdr_unsigned(len);
6238 	if (len > 0) {
6239 		/* Put data in mbuf chain. */
6240 		nd->nd_mb->m_next = m;
6241 		/* Set nd_mb and nd_bpos to end of data. */
6242 		while (m->m_next != NULL)
6243 			m = m->m_next;
6244 		nd->nd_mb = m;
6245 		nd->nd_bpos = mtod(m, char *) + m->m_len;
6246 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: lastmb len=%d\n", m->m_len);
6247 	}
6248 	nrp = dsp->nfsclds_sockp;
6249 	if (nrp == NULL)
6250 		/* If NULL, use the MDS socket. */
6251 		nrp = &nmp->nm_sockreq;
6252 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6253 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6254 	NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6255 	    nd->nd_repstat);
6256 	if (error != 0)
6257 		return (error);
6258 	if (nd->nd_repstat != 0)
6259 		error = nd->nd_repstat;
6260 	else {
6261 		if (vers == NFS_VER3) {
6262 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6263 			    NULL);
6264 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6265 			    error);
6266 			if (error != 0)
6267 				goto nfsmout;
6268 		}
6269 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6270 		rlen = fxdr_unsigned(int, *tl++);
6271 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6272 		    rlen);
6273 		if (rlen != len) {
6274 			error = NFSERR_IO;
6275 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6276 			    len, rlen);
6277 			goto nfsmout;
6278 		}
6279 		commit = fxdr_unsigned(int, *tl++);
6280 
6281 		/*
6282 		 * Return the lowest commitment level
6283 		 * obtained by any of the RPCs.
6284 		 */
6285 		if (committed == NFSWRITE_FILESYNC)
6286 			committed = commit;
6287 		else if (committed == NFSWRITE_DATASYNC &&
6288 		    commit == NFSWRITE_UNSTABLE)
6289 			committed = commit;
6290 		NFSLOCKDS(dsp);
6291 		if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6292 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6293 			dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6294 		} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6295 			*must_commit = 1;
6296 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6297 		}
6298 		NFSUNLOCKDS(dsp);
6299 	}
6300 nfsmout:
6301 	if (nd->nd_mrep != NULL)
6302 		mbuf_freem(nd->nd_mrep);
6303 	*iomode = committed;
6304 	if (nd->nd_repstat != 0 && error == 0)
6305 		error = nd->nd_repstat;
6306 	return (error);
6307 }
6308 
6309 /*
6310  * Free up the nfsclds structure.
6311  */
6312 void
6313 nfscl_freenfsclds(struct nfsclds *dsp)
6314 {
6315 	int i;
6316 
6317 	if (dsp == NULL)
6318 		return;
6319 	if (dsp->nfsclds_sockp != NULL) {
6320 		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
6321 		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
6322 		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
6323 		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
6324 	}
6325 	NFSFREEMUTEX(&dsp->nfsclds_mtx);
6326 	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
6327 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
6328 		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
6329 			m_freem(
6330 			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
6331 	}
6332 	free(dsp, M_NFSCLDS);
6333 }
6334 
6335 static enum nfsclds_state
6336 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
6337     struct nfsclds **retdspp)
6338 {
6339 	struct nfsclds *dsp, *cur_dsp;
6340 
6341 	/*
6342 	 * Search the list of nfsclds structures for one with the same
6343 	 * server.
6344 	 */
6345 	cur_dsp = NULL;
6346 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
6347 		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
6348 		    dsp->nfsclds_servownlen != 0 &&
6349 		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
6350 		    dsp->nfsclds_servownlen) &&
6351 		    dsp->nfsclds_sess.nfsess_defunct == 0) {
6352 			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
6353 			    TAILQ_FIRST(&nmp->nm_sess), dsp,
6354 			    dsp->nfsclds_flags);
6355 			/* Server major id matches. */
6356 			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
6357 				*retdspp = dsp;
6358 				return (NFSDSP_USETHISSESSION);
6359 			}
6360 
6361 			/*
6362 			 * Note the first match, so it can be used for
6363 			 * sequence'ing new sessions.
6364 			 */
6365 			if (cur_dsp == NULL)
6366 				cur_dsp = dsp;
6367 		}
6368 	}
6369 	if (cur_dsp != NULL) {
6370 		*retdspp = cur_dsp;
6371 		return (NFSDSP_SEQTHISSESSION);
6372 	}
6373 	return (NFSDSP_NOTFOUND);
6374 }
6375 
6376 /*
6377  * NFS commit rpc to a NFSv4.1 DS.
6378  */
6379 static int
6380 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6381     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
6382     NFSPROC_T *p)
6383 {
6384 	uint32_t *tl;
6385 	struct nfsrv_descript nfsd, *nd = &nfsd;
6386 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6387 	struct nfssockreq *nrp;
6388 	struct nfsvattr na;
6389 	int attrflag, error;
6390 
6391 	nd->nd_mrep = NULL;
6392 	if (vers == 0 || vers == NFS_VER4) {
6393 		nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
6394 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6395 		vers = NFS_VER4;
6396 	} else
6397 		nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
6398 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6399 	NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
6400 	    minorvers);
6401 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6402 	txdr_hyper(offset, tl);
6403 	tl += 2;
6404 	*tl = txdr_unsigned(cnt);
6405 	nrp = dsp->nfsclds_sockp;
6406 	if (nrp == NULL)
6407 		/* If NULL, use the MDS socket. */
6408 		nrp = &nmp->nm_sockreq;
6409 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6410 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6411 	NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
6412 	    nd->nd_repstat);
6413 	if (error != 0)
6414 		return (error);
6415 	if (nd->nd_repstat == 0) {
6416 		if (vers == NFS_VER3) {
6417 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6418 			    NULL);
6419 			NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
6420 			if (error != 0)
6421 				goto nfsmout;
6422 		}
6423 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
6424 		NFSLOCKDS(dsp);
6425 		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6426 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6427 			error = NFSERR_STALEWRITEVERF;
6428 		}
6429 		NFSUNLOCKDS(dsp);
6430 	}
6431 nfsmout:
6432 	if (error == 0 && nd->nd_repstat != 0)
6433 		error = nd->nd_repstat;
6434 	mbuf_freem(nd->nd_mrep);
6435 	return (error);
6436 }
6437 
6438 /*
6439  * Set up the XDR arguments for the LayoutGet operation.
6440  */
6441 static void
6442 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
6443     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
6444     int layoutlen, int usecurstateid)
6445 {
6446 	uint32_t *tl;
6447 
6448 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
6449 	    NFSX_STATEID);
6450 	*tl++ = newnfs_false;		/* Don't signal availability. */
6451 	*tl++ = txdr_unsigned(layouttype);
6452 	*tl++ = txdr_unsigned(iomode);
6453 	txdr_hyper(offset, tl);
6454 	tl += 2;
6455 	txdr_hyper(len, tl);
6456 	tl += 2;
6457 	txdr_hyper(minlen, tl);
6458 	tl += 2;
6459 	if (usecurstateid != 0) {
6460 		/* Special stateid for Current stateid. */
6461 		*tl++ = txdr_unsigned(1);
6462 		*tl++ = 0;
6463 		*tl++ = 0;
6464 		*tl++ = 0;
6465 	} else {
6466 		*tl++ = txdr_unsigned(stateidp->seqid);
6467 		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
6468 		*tl++ = stateidp->other[0];
6469 		*tl++ = stateidp->other[1];
6470 		*tl++ = stateidp->other[2];
6471 	}
6472 	*tl = txdr_unsigned(layoutlen);
6473 }
6474 
6475 /*
6476  * Parse the reply for a successful LayoutGet operation.
6477  */
6478 static int
6479 nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6480     int *retonclosep, struct nfsclflayouthead *flhp)
6481 {
6482 	uint32_t *tl;
6483 	struct nfsclflayout *flp, *prevflp, *tflp;
6484 	int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
6485 	int m, mirrorcnt;
6486 	uint64_t retlen, off;
6487 	struct nfsfh *nfhp;
6488 	uint8_t *cp;
6489 	uid_t user;
6490 	gid_t grp;
6491 
6492 	NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
6493 	error = 0;
6494 	flp = NULL;
6495 	gotiomode = -1;
6496 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
6497 	if (*tl++ != 0)
6498 		*retonclosep = 1;
6499 	else
6500 		*retonclosep = 0;
6501 	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
6502 	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
6503 	    (int)stateidp->seqid);
6504 	stateidp->other[0] = *tl++;
6505 	stateidp->other[1] = *tl++;
6506 	stateidp->other[2] = *tl++;
6507 	cnt = fxdr_unsigned(int, *tl);
6508 	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
6509 	if (cnt <= 0 || cnt > 10000) {
6510 		/* Don't accept more than 10000 layouts in reply. */
6511 		error = NFSERR_BADXDR;
6512 		goto nfsmout;
6513 	}
6514 	for (i = 0; i < cnt; i++) {
6515 		/* Dissect to the layout type. */
6516 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
6517 		    3 * NFSX_UNSIGNED);
6518 		off = fxdr_hyper(tl); tl += 2;
6519 		retlen = fxdr_hyper(tl); tl += 2;
6520 		iomode = fxdr_unsigned(int, *tl++);
6521 		laytype = fxdr_unsigned(int, *tl);
6522 		NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
6523 		    (uintmax_t)off, (uintmax_t)retlen, iomode);
6524 		/* Ignore length of layout body for now. */
6525 		if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
6526 			/* Parse the File layout up to fhcnt. */
6527 			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
6528 			    NFSX_HYPER + NFSX_V4DEVICEID);
6529 			fhcnt = fxdr_unsigned(int, *(tl + 4 +
6530 			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
6531 			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
6532 			if (fhcnt < 0 || fhcnt > 100) {
6533 				/* Don't accept more than 100 file handles. */
6534 				error = NFSERR_BADXDR;
6535 				goto nfsmout;
6536 			}
6537 			if (fhcnt > 0)
6538 				flp = malloc(sizeof(*flp) + fhcnt *
6539 				    sizeof(struct nfsfh *), M_NFSFLAYOUT,
6540 				    M_WAITOK);
6541 			else
6542 				flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
6543 				    M_WAITOK);
6544 			flp->nfsfl_flags = NFSFL_FILE;
6545 			flp->nfsfl_fhcnt = 0;
6546 			flp->nfsfl_devp = NULL;
6547 			flp->nfsfl_off = off;
6548 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
6549 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
6550 			else
6551 				flp->nfsfl_end = flp->nfsfl_off + retlen;
6552 			flp->nfsfl_iomode = iomode;
6553 			if (gotiomode == -1)
6554 				gotiomode = flp->nfsfl_iomode;
6555 			/* Ignore layout body length for now. */
6556 			NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
6557 			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6558 			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
6559 			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
6560 			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
6561 			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
6562 			NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
6563 			    flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
6564 			for (j = 0; j < fhcnt; j++) {
6565 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6566 				nfhlen = fxdr_unsigned(int, *tl);
6567 				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
6568 					error = NFSERR_BADXDR;
6569 					goto nfsmout;
6570 				}
6571 				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
6572 				    M_NFSFH, M_WAITOK);
6573 				flp->nfsfl_fh[j] = nfhp;
6574 				flp->nfsfl_fhcnt++;
6575 				nfhp->nfh_len = nfhlen;
6576 				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
6577 				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
6578 			}
6579 		} else if (laytype == NFSLAYOUT_FLEXFILE) {
6580 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
6581 			    NFSX_HYPER);
6582 			mirrorcnt = fxdr_unsigned(int, *(tl + 2));
6583 			NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
6584 			if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
6585 				error = NFSERR_BADXDR;
6586 				goto nfsmout;
6587 			}
6588 			flp = malloc(sizeof(*flp) + mirrorcnt *
6589 			    sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
6590 			flp->nfsfl_flags = NFSFL_FLEXFILE;
6591 			flp->nfsfl_mirrorcnt = mirrorcnt;
6592 			flp->nfsfl_devp = NULL;
6593 			flp->nfsfl_off = off;
6594 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
6595 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
6596 			else
6597 				flp->nfsfl_end = flp->nfsfl_off + retlen;
6598 			flp->nfsfl_iomode = iomode;
6599 			if (gotiomode == -1)
6600 				gotiomode = flp->nfsfl_iomode;
6601 			flp->nfsfl_stripeunit = fxdr_hyper(tl);
6602 			NFSCL_DEBUG(4, "stripeunit=%ju\n",
6603 			    (uintmax_t)flp->nfsfl_stripeunit);
6604 			for (j = 0; j < mirrorcnt; j++) {
6605 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6606 				k = fxdr_unsigned(int, *tl);
6607 				if (k < 1 || k > 128) {
6608 					error = NFSERR_BADXDR;
6609 					goto nfsmout;
6610 				}
6611 				NFSCL_DEBUG(4, "servercnt=%d\n", k);
6612 				for (l = 0; l < k; l++) {
6613 					NFSM_DISSECT(tl, uint32_t *,
6614 					    NFSX_V4DEVICEID + NFSX_STATEID +
6615 					    2 * NFSX_UNSIGNED);
6616 					if (l == 0) {
6617 						/* Just use the first server. */
6618 						NFSBCOPY(tl,
6619 						    flp->nfsfl_ffm[j].dev,
6620 						    NFSX_V4DEVICEID);
6621 						tl += (NFSX_V4DEVICEID /
6622 						    NFSX_UNSIGNED);
6623 						tl++;
6624 						flp->nfsfl_ffm[j].st.seqid =
6625 						    *tl++;
6626 						flp->nfsfl_ffm[j].st.other[0] =
6627 						    *tl++;
6628 						flp->nfsfl_ffm[j].st.other[1] =
6629 						    *tl++;
6630 						flp->nfsfl_ffm[j].st.other[2] =
6631 						    *tl++;
6632 						NFSCL_DEBUG(4, "st.seqid=%u "
6633 						 "st.o0=0x%x st.o1=0x%x "
6634 						 "st.o2=0x%x\n",
6635 						 flp->nfsfl_ffm[j].st.seqid,
6636 						 flp->nfsfl_ffm[j].st.other[0],
6637 						 flp->nfsfl_ffm[j].st.other[1],
6638 						 flp->nfsfl_ffm[j].st.other[2]);
6639 					} else
6640 						tl += ((NFSX_V4DEVICEID +
6641 						    NFSX_STATEID +
6642 						    NFSX_UNSIGNED) /
6643 						    NFSX_UNSIGNED);
6644 					fhcnt = fxdr_unsigned(int, *tl);
6645 					NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
6646 					if (fhcnt < 1 ||
6647 					    fhcnt > NFSDEV_MAXVERS) {
6648 						error = NFSERR_BADXDR;
6649 						goto nfsmout;
6650 					}
6651 					for (m = 0; m < fhcnt; m++) {
6652 						NFSM_DISSECT(tl, uint32_t *,
6653 						    NFSX_UNSIGNED);
6654 						nfhlen = fxdr_unsigned(int,
6655 						    *tl);
6656 						NFSCL_DEBUG(4, "nfhlen=%d\n",
6657 						    nfhlen);
6658 						if (nfhlen <= 0 || nfhlen >
6659 						    NFSX_V4FHMAX) {
6660 							error = NFSERR_BADXDR;
6661 							goto nfsmout;
6662 						}
6663 						NFSM_DISSECT(cp, uint8_t *,
6664 						    NFSM_RNDUP(nfhlen));
6665 						if (l == 0) {
6666 							flp->nfsfl_ffm[j].fhcnt
6667 							    = fhcnt;
6668 							nfhp = malloc(
6669 							    sizeof(*nfhp) +
6670 							    nfhlen - 1, M_NFSFH,
6671 							    M_WAITOK);
6672 							flp->nfsfl_ffm[j].fh[m]
6673 							    = nfhp;
6674 							nfhp->nfh_len = nfhlen;
6675 							NFSBCOPY(cp,
6676 							    nfhp->nfh_fh,
6677 							    nfhlen);
6678 							NFSCL_DEBUG(4,
6679 							    "got fh\n");
6680 						}
6681 					}
6682 					/* Now, get the ffsd_user/ffds_group. */
6683 					error = nfsrv_parseug(nd, 0, &user,
6684 					    &grp, curthread);
6685 					NFSCL_DEBUG(4, "after parseu=%d\n",
6686 					    error);
6687 					if (error == 0)
6688 						error = nfsrv_parseug(nd, 1,
6689 						    &user, &grp, curthread);
6690 					NFSCL_DEBUG(4, "aft parseg=%d\n",
6691 					    grp);
6692 					if (error != 0)
6693 						goto nfsmout;
6694 					NFSCL_DEBUG(4, "user=%d group=%d\n",
6695 					    user, grp);
6696 					if (l == 0) {
6697 						flp->nfsfl_ffm[j].user = user;
6698 						flp->nfsfl_ffm[j].group = grp;
6699 						NFSCL_DEBUG(4,
6700 						    "usr=%d grp=%d\n", user,
6701 						    grp);
6702 					}
6703 				}
6704 			}
6705 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6706 			flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
6707 			flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
6708 			NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
6709 			    flp->nfsfl_fflags, flp->nfsfl_statshint);
6710 		} else {
6711 			error = NFSERR_BADXDR;
6712 			goto nfsmout;
6713 		}
6714 		if (flp->nfsfl_iomode == gotiomode) {
6715 			/* Keep the list in increasing offset order. */
6716 			tflp = LIST_FIRST(flhp);
6717 			prevflp = NULL;
6718 			while (tflp != NULL &&
6719 			    tflp->nfsfl_off < flp->nfsfl_off) {
6720 				prevflp = tflp;
6721 				tflp = LIST_NEXT(tflp, nfsfl_list);
6722 			}
6723 			if (prevflp == NULL)
6724 				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
6725 			else
6726 				LIST_INSERT_AFTER(prevflp, flp,
6727 				    nfsfl_list);
6728 			NFSCL_DEBUG(4, "flp inserted\n");
6729 		} else {
6730 			printf("nfscl_layoutget(): got wrong iomode\n");
6731 			nfscl_freeflayout(flp);
6732 		}
6733 		flp = NULL;
6734 	}
6735 nfsmout:
6736 	NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
6737 	if (error != 0 && flp != NULL)
6738 		nfscl_freeflayout(flp);
6739 	return (error);
6740 }
6741 
6742 /*
6743  * Parse a user/group digit string.
6744  */
6745 static int
6746 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
6747     NFSPROC_T *p)
6748 {
6749 	uint32_t *tl;
6750 	char *cp, *str, str0[NFSV4_SMALLSTR + 1];
6751 	uint32_t len = 0;
6752 	int error = 0;
6753 
6754 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6755 	len = fxdr_unsigned(uint32_t, *tl);
6756 	if (len > NFSV4_OPAQUELIMIT) {
6757 		error = NFSERR_BADXDR;
6758 		goto nfsmout;
6759 	}
6760 	NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
6761 	if (len == 0) {
6762 		if (dogrp != 0)
6763 			*gidp = GID_NOGROUP;
6764 		else
6765 			*uidp = UID_NOBODY;
6766 		return (0);
6767 	}
6768 	if (len > NFSV4_SMALLSTR)
6769 		str = malloc(len + 1, M_TEMP, M_WAITOK);
6770 	else
6771 		str = str0;
6772 	NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
6773 	NFSBCOPY(cp, str, len);
6774 	str[len] = '\0';
6775 	NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
6776 	if (dogrp != 0)
6777 		error = nfsv4_strtogid(nd, str, len, gidp, p);
6778 	else
6779 		error = nfsv4_strtouid(nd, str, len, uidp, p);
6780 nfsmout:
6781 	if (len > NFSV4_SMALLSTR)
6782 		free(str, M_TEMP);
6783 	NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
6784 	return (error);
6785 }
6786 
6787 /*
6788  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
6789  * so that it does both an Open and a Layoutget.
6790  */
6791 static int
6792 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
6793     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
6794     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
6795     struct ucred *cred, NFSPROC_T *p)
6796 {
6797 	struct nfscllayout *lyp;
6798 	struct nfsclflayout *flp;
6799 	struct nfsclflayouthead flh;
6800 	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
6801 	int layouttype, laystat;
6802 	nfsv4stateid_t stateid;
6803 	struct nfsclsession *tsep;
6804 
6805 	error = 0;
6806 	if (NFSHASFLEXFILE(nmp))
6807 		layouttype = NFSLAYOUT_FLEXFILE;
6808 	else
6809 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
6810 	/*
6811 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
6812 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
6813 	 * flp == NULL.
6814 	 */
6815 	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
6816 	    &recalled);
6817 	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
6818 	if (lyp == NULL)
6819 		islocked = 0;
6820 	else if (flp != NULL)
6821 		islocked = 1;
6822 	else
6823 		islocked = 2;
6824 	if ((lyp == NULL || flp == NULL) && recalled == 0) {
6825 		LIST_INIT(&flh);
6826 		tsep = nfsmnt_mdssession(nmp);
6827 		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
6828 		    3 * NFSX_UNSIGNED);
6829 		if (lyp == NULL)
6830 			usecurstateid = 1;
6831 		else {
6832 			usecurstateid = 0;
6833 			stateid.seqid = lyp->nfsly_stateid.seqid;
6834 			stateid.other[0] = lyp->nfsly_stateid.other[0];
6835 			stateid.other[1] = lyp->nfsly_stateid.other[1];
6836 			stateid.other[2] = lyp->nfsly_stateid.other[2];
6837 		}
6838 		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
6839 		    newfhp, newfhlen, mode, op, name, namelen,
6840 		    dpp, &stateid, usecurstateid, layouttype, layoutlen,
6841 		    &retonclose, &flh, &laystat, cred, p);
6842 		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
6843 		    laystat, error);
6844 		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
6845 		    &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
6846 		    &islocked, cred, p);
6847 	} else
6848 		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
6849 		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
6850 	if (islocked == 2)
6851 		nfscl_rellayout(lyp, 1);
6852 	else if (islocked == 1)
6853 		nfscl_rellayout(lyp, 0);
6854 	return (error);
6855 }
6856 
6857 /*
6858  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
6859  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
6860  * handled by nfsrpc_openrpc().
6861  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
6862  * can be NULL.
6863  */
6864 static int
6865 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
6866     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
6867     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
6868     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
6869     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
6870     int *laystatp, struct ucred *cred, NFSPROC_T *p)
6871 {
6872 	uint32_t *tl;
6873 	struct nfsrv_descript nfsd, *nd = &nfsd;
6874 	struct nfscldeleg *ndp = NULL;
6875 	struct nfsvattr nfsva;
6876 	struct nfsclsession *tsep;
6877 	uint32_t rflags, deleg;
6878 	nfsattrbit_t attrbits;
6879 	int error, ret, acesize, limitby, iomode;
6880 
6881 	*dpp = NULL;
6882 	*laystatp = ENXIO;
6883 	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
6884 	    0, 0);
6885 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
6886 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
6887 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
6888 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
6889 	tsep = nfsmnt_mdssession(nmp);
6890 	*tl++ = tsep->nfsess_clientid.lval[0];
6891 	*tl = tsep->nfsess_clientid.lval[1];
6892 	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
6893 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6894 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
6895 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
6896 	nfsm_strtom(nd, name, namelen);
6897 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6898 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
6899 	NFSZERO_ATTRBIT(&attrbits);
6900 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
6901 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
6902 	nfsrv_putattrbit(nd, &attrbits);
6903 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6904 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
6905 	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
6906 		iomode = NFSLAYOUTIOMODE_RW;
6907 	else
6908 		iomode = NFSLAYOUTIOMODE_READ;
6909 	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
6910 	    layouttype, layoutlen, usecurstateid);
6911 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
6912 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6913 	if (error != 0)
6914 		return (error);
6915 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
6916 	if (nd->nd_repstat != 0)
6917 		*laystatp = nd->nd_repstat;
6918 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6919 		/* ND_NOMOREDATA will be set if the Open operation failed. */
6920 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6921 		    6 * NFSX_UNSIGNED);
6922 		op->nfso_stateid.seqid = *tl++;
6923 		op->nfso_stateid.other[0] = *tl++;
6924 		op->nfso_stateid.other[1] = *tl++;
6925 		op->nfso_stateid.other[2] = *tl;
6926 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
6927 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
6928 		if (error != 0)
6929 			goto nfsmout;
6930 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
6931 		deleg = fxdr_unsigned(u_int32_t, *tl);
6932 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
6933 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
6934 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
6935 			      NFSCLFLAGS_FIRSTDELEG))
6936 				op->nfso_own->nfsow_clp->nfsc_flags |=
6937 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
6938 			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
6939 			    M_NFSCLDELEG, M_WAITOK);
6940 			LIST_INIT(&ndp->nfsdl_owner);
6941 			LIST_INIT(&ndp->nfsdl_lock);
6942 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
6943 			ndp->nfsdl_fhlen = newfhlen;
6944 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
6945 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
6946 			nfscl_lockinit(&ndp->nfsdl_rwlock);
6947 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6948 			    NFSX_UNSIGNED);
6949 			ndp->nfsdl_stateid.seqid = *tl++;
6950 			ndp->nfsdl_stateid.other[0] = *tl++;
6951 			ndp->nfsdl_stateid.other[1] = *tl++;
6952 			ndp->nfsdl_stateid.other[2] = *tl++;
6953 			ret = fxdr_unsigned(int, *tl);
6954 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
6955 				ndp->nfsdl_flags = NFSCLDL_WRITE;
6956 				/*
6957 				 * Indicates how much the file can grow.
6958 				 */
6959 				NFSM_DISSECT(tl, u_int32_t *,
6960 				    3 * NFSX_UNSIGNED);
6961 				limitby = fxdr_unsigned(int, *tl++);
6962 				switch (limitby) {
6963 				case NFSV4OPEN_LIMITSIZE:
6964 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
6965 					break;
6966 				case NFSV4OPEN_LIMITBLOCKS:
6967 					ndp->nfsdl_sizelimit =
6968 					    fxdr_unsigned(u_int64_t, *tl++);
6969 					ndp->nfsdl_sizelimit *=
6970 					    fxdr_unsigned(u_int64_t, *tl);
6971 					break;
6972 				default:
6973 					error = NFSERR_BADXDR;
6974 					goto nfsmout;
6975 				};
6976 			} else
6977 				ndp->nfsdl_flags = NFSCLDL_READ;
6978 			if (ret != 0)
6979 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
6980 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
6981 			    &acesize, p);
6982 			if (error != 0)
6983 				goto nfsmout;
6984 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
6985 			error = NFSERR_BADXDR;
6986 			goto nfsmout;
6987 		}
6988 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
6989 		    nfscl_assumeposixlocks)
6990 			op->nfso_posixlock = 1;
6991 		else
6992 			op->nfso_posixlock = 0;
6993 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
6994 		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
6995 		if (*++tl == 0) {
6996 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
6997 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
6998 			    NULL, NULL, NULL, p, cred);
6999 			if (error != 0)
7000 				goto nfsmout;
7001 			if (ndp != NULL) {
7002 				ndp->nfsdl_change = nfsva.na_filerev;
7003 				ndp->nfsdl_modtime = nfsva.na_mtime;
7004 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7005 				*dpp = ndp;
7006 				ndp = NULL;
7007 			}
7008 			/*
7009 			 * At this point, the Open has succeeded, so set
7010 			 * nd_repstat = NFS_OK.  If the Layoutget failed,
7011 			 * this function just won't return a layout.
7012 			 */
7013 			if (nd->nd_repstat == 0) {
7014 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7015 				*laystatp = fxdr_unsigned(int, *++tl);
7016 				if (*laystatp == 0) {
7017 					error = nfsrv_parselayoutget(nd,
7018 					    stateidp, retonclosep, flhp);
7019 					if (error != 0)
7020 						*laystatp = error;
7021 				}
7022 			} else
7023 				nd->nd_repstat = 0;	/* Return 0 for Open. */
7024 		}
7025 	}
7026 	if (nd->nd_repstat != 0 && error == 0)
7027 		error = nd->nd_repstat;
7028 nfsmout:
7029 	free(ndp, M_NFSCLDELEG);
7030 	mbuf_freem(nd->nd_mrep);
7031 	return (error);
7032 }
7033 
7034 /*
7035  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
7036  * Used only for mounts with pNFS enabled.
7037  */
7038 static int
7039 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7040     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7041     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7042     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7043     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
7044     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
7045     struct nfsclflayouthead *flhp, int *laystatp)
7046 {
7047 	uint32_t *tl;
7048 	int error = 0, deleg, newone, ret, acesize, limitby;
7049 	struct nfsrv_descript nfsd, *nd = &nfsd;
7050 	struct nfsclopen *op;
7051 	struct nfscldeleg *dp = NULL;
7052 	struct nfsnode *np;
7053 	struct nfsfh *nfhp;
7054 	struct nfsclsession *tsep;
7055 	nfsattrbit_t attrbits;
7056 	nfsv4stateid_t stateid;
7057 	uint32_t rflags;
7058 	struct nfsmount *nmp;
7059 
7060 	nmp = VFSTONFS(dvp->v_mount);
7061 	np = VTONFS(dvp);
7062 	*laystatp = ENXIO;
7063 	*unlockedp = 0;
7064 	*nfhpp = NULL;
7065 	*dpp = NULL;
7066 	*attrflagp = 0;
7067 	*dattrflagp = 0;
7068 	if (namelen > NFS_MAXNAMLEN)
7069 		return (ENAMETOOLONG);
7070 	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
7071 	/*
7072 	 * For V4, this is actually an Open op.
7073 	 */
7074 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
7075 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
7076 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
7077 	    NFSV4OPEN_ACCESSREAD);
7078 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
7079 	tsep = nfsmnt_mdssession(nmp);
7080 	*tl++ = tsep->nfsess_clientid.lval[0];
7081 	*tl = tsep->nfsess_clientid.lval[1];
7082 	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7083 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7084 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
7085 	if ((fmode & O_EXCL) != 0) {
7086 		if (NFSHASSESSPERSIST(nmp)) {
7087 			/* Use GUARDED for persistent sessions. */
7088 			*tl = txdr_unsigned(NFSCREATE_GUARDED);
7089 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
7090 		} else {
7091 			/* Otherwise, use EXCLUSIVE4_1. */
7092 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
7093 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
7094 			*tl++ = cverf.lval[0];
7095 			*tl = cverf.lval[1];
7096 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
7097 		}
7098 	} else {
7099 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
7100 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
7101 	}
7102 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7103 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7104 	nfsm_strtom(nd, name, namelen);
7105 	/* Get the new file's handle and attributes, plus save the FH. */
7106 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
7107 	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
7108 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
7109 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7110 	NFSGETATTR_ATTRBIT(&attrbits);
7111 	nfsrv_putattrbit(nd, &attrbits);
7112 	/* Get the directory's post-op attributes. */
7113 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7114 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
7115 	nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
7116 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7117 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7118 	nfsrv_putattrbit(nd, &attrbits);
7119 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7120 	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
7121 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7122 	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
7123 	    layouttype, layoutlen, usecurstateid);
7124 	error = nfscl_request(nd, dvp, p, cred, dstuff);
7125 	if (error != 0)
7126 		return (error);
7127 	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
7128 	    error);
7129 	if (nd->nd_repstat != 0)
7130 		*laystatp = nd->nd_repstat;
7131 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
7132 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7133 		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
7134 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7135 		    6 * NFSX_UNSIGNED);
7136 		stateid.seqid = *tl++;
7137 		stateid.other[0] = *tl++;
7138 		stateid.other[1] = *tl++;
7139 		stateid.other[2] = *tl;
7140 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7141 		nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7142 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7143 		deleg = fxdr_unsigned(int, *tl);
7144 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
7145 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
7146 			if (!(owp->nfsow_clp->nfsc_flags &
7147 			      NFSCLFLAGS_FIRSTDELEG))
7148 				owp->nfsow_clp->nfsc_flags |=
7149 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7150 			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
7151 			    M_NFSCLDELEG, M_WAITOK);
7152 			LIST_INIT(&dp->nfsdl_owner);
7153 			LIST_INIT(&dp->nfsdl_lock);
7154 			dp->nfsdl_clp = owp->nfsow_clp;
7155 			newnfs_copyincred(cred, &dp->nfsdl_cred);
7156 			nfscl_lockinit(&dp->nfsdl_rwlock);
7157 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7158 			    NFSX_UNSIGNED);
7159 			dp->nfsdl_stateid.seqid = *tl++;
7160 			dp->nfsdl_stateid.other[0] = *tl++;
7161 			dp->nfsdl_stateid.other[1] = *tl++;
7162 			dp->nfsdl_stateid.other[2] = *tl++;
7163 			ret = fxdr_unsigned(int, *tl);
7164 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7165 				dp->nfsdl_flags = NFSCLDL_WRITE;
7166 				/*
7167 				 * Indicates how much the file can grow.
7168 				 */
7169 				NFSM_DISSECT(tl, u_int32_t *,
7170 				    3 * NFSX_UNSIGNED);
7171 				limitby = fxdr_unsigned(int, *tl++);
7172 				switch (limitby) {
7173 				case NFSV4OPEN_LIMITSIZE:
7174 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
7175 					break;
7176 				case NFSV4OPEN_LIMITBLOCKS:
7177 					dp->nfsdl_sizelimit =
7178 					    fxdr_unsigned(u_int64_t, *tl++);
7179 					dp->nfsdl_sizelimit *=
7180 					    fxdr_unsigned(u_int64_t, *tl);
7181 					break;
7182 				default:
7183 					error = NFSERR_BADXDR;
7184 					goto nfsmout;
7185 				};
7186 			} else {
7187 				dp->nfsdl_flags = NFSCLDL_READ;
7188 			}
7189 			if (ret != 0)
7190 				dp->nfsdl_flags |= NFSCLDL_RECALL;
7191 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
7192 			    &acesize, p);
7193 			if (error != 0)
7194 				goto nfsmout;
7195 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
7196 			error = NFSERR_BADXDR;
7197 			goto nfsmout;
7198 		}
7199 
7200 		/* Now, we should have the status for the SaveFH. */
7201 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7202 		if (*++tl == 0) {
7203 			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
7204 			/*
7205 			 * Now, process the GetFH and Getattr for the newly
7206 			 * created file. nfscl_mtofh() will set
7207 			 * ND_NOMOREDATA if these weren't successful.
7208 			 */
7209 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
7210 			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
7211 			if (error != 0)
7212 				goto nfsmout;
7213 		} else
7214 			nd->nd_flag |= ND_NOMOREDATA;
7215 		/* Now we have the PutFH and Getattr for the directory. */
7216 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7217 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7218 			if (*++tl != 0)
7219 				nd->nd_flag |= ND_NOMOREDATA;
7220 			else {
7221 				NFSM_DISSECT(tl, uint32_t *, 2 *
7222 				    NFSX_UNSIGNED);
7223 				if (*++tl != 0)
7224 					nd->nd_flag |= ND_NOMOREDATA;
7225 			}
7226 		}
7227 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7228 			/* Load the directory attributes. */
7229 			error = nfsm_loadattr(nd, dnap);
7230 			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
7231 			if (error != 0)
7232 				goto nfsmout;
7233 			*dattrflagp = 1;
7234 			if (dp != NULL && *attrflagp != 0) {
7235 				dp->nfsdl_change = nnap->na_filerev;
7236 				dp->nfsdl_modtime = nnap->na_mtime;
7237 				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7238 			}
7239 			/*
7240 			 * We can now complete the Open state.
7241 			 */
7242 			nfhp = *nfhpp;
7243 			if (dp != NULL) {
7244 				dp->nfsdl_fhlen = nfhp->nfh_len;
7245 				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
7246 				    nfhp->nfh_len);
7247 			}
7248 			/*
7249 			 * Get an Open structure that will be
7250 			 * attached to the OpenOwner, acquired already.
7251 			 */
7252 			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
7253 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
7254 			    cred, p, NULL, &op, &newone, NULL, 0);
7255 			if (error != 0)
7256 				goto nfsmout;
7257 			op->nfso_stateid = stateid;
7258 			newnfs_copyincred(cred, &op->nfso_cred);
7259 
7260 			nfscl_openrelease(nmp, op, error, newone);
7261 			*unlockedp = 1;
7262 
7263 			/* Now, handle the RestoreFH and LayoutGet. */
7264 			if (nd->nd_repstat == 0) {
7265 				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
7266 				*laystatp = fxdr_unsigned(int, *(tl + 3));
7267 				if (*laystatp == 0) {
7268 					error = nfsrv_parselayoutget(nd,
7269 					    stateidp, retonclosep, flhp);
7270 					if (error != 0)
7271 						*laystatp = error;
7272 				}
7273 				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
7274 				    error);
7275 			} else
7276 				nd->nd_repstat = 0;
7277 		}
7278 	}
7279 	if (nd->nd_repstat != 0 && error == 0)
7280 		error = nd->nd_repstat;
7281 	if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
7282 		nfscl_initiate_recovery(owp->nfsow_clp);
7283 nfsmout:
7284 	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
7285 	if (error == 0)
7286 		*dpp = dp;
7287 	else
7288 		free(dp, M_NFSCLDELEG);
7289 	mbuf_freem(nd->nd_mrep);
7290 	return (error);
7291 }
7292 
7293 /*
7294  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
7295  */
7296 static int
7297 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7298     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7299     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7300     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7301     int *dattrflagp, void *dstuff, int *unlockedp)
7302 {
7303 	struct nfscllayout *lyp;
7304 	struct nfsclflayouthead flh;
7305 	struct nfsfh *nfhp;
7306 	struct nfsclsession *tsep;
7307 	struct nfsmount *nmp;
7308 	nfsv4stateid_t stateid;
7309 	int error, layoutlen, layouttype, retonclose, laystat;
7310 
7311 	error = 0;
7312 	nmp = VFSTONFS(dvp->v_mount);
7313 	if (NFSHASFLEXFILE(nmp))
7314 		layouttype = NFSLAYOUT_FLEXFILE;
7315 	else
7316 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
7317 	LIST_INIT(&flh);
7318 	tsep = nfsmnt_mdssession(nmp);
7319 	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
7320 	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
7321 	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
7322 	    dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
7323 	    &flh, &laystat);
7324 	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
7325 	    laystat, error);
7326 	lyp = NULL;
7327 	if (laystat == 0) {
7328 		nfhp = *nfhpp;
7329 		laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
7330 		    nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
7331 		    layouttype, laystat, NULL, cred, p);
7332 	} else
7333 		laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
7334 		    retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
7335 		    cred, p);
7336 	if (laystat == 0)
7337 		nfscl_rellayout(lyp, 0);
7338 	return (error);
7339 }
7340 
7341 /*
7342  * Process the results of a layoutget() operation.
7343  */
7344 static int
7345 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
7346     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
7347     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
7348     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
7349 {
7350 	struct nfsclflayout *tflp;
7351 	struct nfscldevinfo *dip;
7352 	uint8_t *dev;
7353 
7354 	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
7355 		NFSLOCKMNT(nmp);
7356 		if (!NFSHASFLEXFILE(nmp)) {
7357 			/* Switch to using Flex File Layout. */
7358 			nmp->nm_state |= NFSSTA_FLEXFILE;
7359 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
7360 			/* Disable pNFS. */
7361 			NFSCL_DEBUG(1, "disable PNFS\n");
7362 			nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
7363 		}
7364 		NFSUNLOCKMNT(nmp);
7365 	}
7366 	if (laystat == 0) {
7367 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
7368 		LIST_FOREACH(tflp, flhp, nfsfl_list) {
7369 			laystat = nfscl_adddevinfo(nmp, NULL, tflp);
7370 			NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
7371 			if (laystat != 0) {
7372 				if (layouttype == NFSLAYOUT_FLEXFILE)
7373 					dev = tflp->nfsfl_ffm[0].dev;
7374 				else
7375 					dev = tflp->nfsfl_dev;
7376 				laystat = nfsrpc_getdeviceinfo(nmp, dev,
7377 				    layouttype, notifybit, &dip, cred, p);
7378 				NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
7379 				    laystat);
7380 				if (laystat != 0)
7381 					break;
7382 				laystat = nfscl_adddevinfo(nmp, dip, tflp);
7383 				if (laystat != 0)
7384 					printf("getlayout: cannot add\n");
7385 			}
7386 		}
7387 	}
7388 	if (laystat == 0) {
7389 		/*
7390 		 * nfscl_layout() always returns with the nfsly_lock
7391 		 * set to a refcnt (shared lock).
7392 		 * Passing in dvp is sufficient, since it is only used to
7393 		 * get the fsid for the file system.
7394 		 */
7395 		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
7396 		    layouttype, retonclose, flhp, lypp, cred, p);
7397 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
7398 		    laystat);
7399 		if (laystat == 0 && islockedp != NULL)
7400 			*islockedp = 1;
7401 	}
7402 	return (laystat);
7403 }
7404 
7405