xref: /freebsd/sys/fs/nfsclient/nfs_clrpcops.c (revision 3416500aef140042c64bc149cb1ec6620483bc44)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 /*
38  * Rpc op calls, generally called from the vnode op calls or through the
39  * buffer cache, for NFS v2, 3 and 4.
40  * These do not normally make any changes to vnode arguments or use
41  * structures that might change between the VFS variants. The returned
42  * arguments are all at the end, after the NFSPROC_T *p one.
43  */
44 
45 #ifndef APPLEKEXT
46 #include "opt_inet6.h"
47 
48 #include <fs/nfs/nfsport.h>
49 #include <sys/sysctl.h>
50 
51 SYSCTL_DECL(_vfs_nfs);
52 
53 static int	nfsignore_eexist = 0;
54 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
55     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
56 
57 /*
58  * Global variables
59  */
60 extern int nfs_numnfscbd;
61 extern struct timeval nfsboottime;
62 extern u_int32_t newnfs_false, newnfs_true;
63 extern nfstype nfsv34_type[9];
64 extern int nfsrv_useacl;
65 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
66 extern int nfscl_debuglevel;
67 NFSCLSTATEMUTEX;
68 int nfstest_outofseq = 0;
69 int nfscl_assumeposixlocks = 1;
70 int nfscl_enablecallb = 0;
71 short nfsv4_cbport = NFSV4_CBPORT;
72 int nfstest_openallsetattr = 0;
73 #endif	/* !APPLEKEXT */
74 
75 #define	DIRHDSIZ	offsetof(struct dirent, d_name)
76 
77 /*
78  * nfscl_getsameserver() can return one of three values:
79  * NFSDSP_USETHISSESSION - Use this session for the DS.
80  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
81  *     session.
82  * NFSDSP_NOTFOUND - No matching server was found.
83  */
84 enum nfsclds_state {
85 	NFSDSP_USETHISSESSION = 0,
86 	NFSDSP_SEQTHISSESSION = 1,
87 	NFSDSP_NOTFOUND = 2,
88 };
89 
90 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
91     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
92 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
93     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
94 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
95     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
96     void *);
97 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
98     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
99     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
100 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
101     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
102     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
103     int *, void *, int *);
104 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
105     struct nfscllockowner *, u_int64_t, u_int64_t,
106     u_int32_t, struct ucred *, NFSPROC_T *, int);
107 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
108     struct acl *, nfsv4stateid_t *, void *);
109 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
110     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
111     struct ucred *, NFSPROC_T *);
112 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *,
113     struct nfsclds **, NFSPROC_T *);
114 static void nfscl_initsessionslots(struct nfsclsession *);
115 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
116     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
117     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
118     NFSPROC_T *);
119 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
120     struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *,
121     NFSPROC_T *);
122 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
123     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
124     struct nfsfh *, int, struct ucred *, NFSPROC_T *);
125 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
126     struct nfsclds *, struct nfsclds **);
127 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
128     struct nfsfh *, struct ucred *, NFSPROC_T *);
129 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
130     uint64_t, uint64_t, nfsv4stateid_t *, int, int);
131 static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *,
132     int *, struct nfsclflayouthead *);
133 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
134     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
135     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
136 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
137     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
138     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
139     struct nfsfh **, int *, int *, void *, int *);
140 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
141     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
142     struct nfscldeleg **, nfsv4stateid_t *, int, int, int *,
143     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
144 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
145     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
146     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
147     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
148     int, int, int *, struct nfsclflayouthead *, int *);
149 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
150     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
151     struct nfsclflayouthead *, int, int *, struct ucred *, NFSPROC_T *);
152 
153 /*
154  * nfs null call from vfs.
155  */
156 APPLESTATIC int
157 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
158 {
159 	int error;
160 	struct nfsrv_descript nfsd, *nd = &nfsd;
161 
162 	NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
163 	error = nfscl_request(nd, vp, p, cred, NULL);
164 	if (nd->nd_repstat && !error)
165 		error = nd->nd_repstat;
166 	mbuf_freem(nd->nd_mrep);
167 	return (error);
168 }
169 
170 /*
171  * nfs access rpc op.
172  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
173  * modes are changed on the server, accesses might still fail later.
174  */
175 APPLESTATIC int
176 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
177     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
178 {
179 	int error;
180 	u_int32_t mode, rmode;
181 
182 	if (acmode & VREAD)
183 		mode = NFSACCESS_READ;
184 	else
185 		mode = 0;
186 	if (vnode_vtype(vp) == VDIR) {
187 		if (acmode & VWRITE)
188 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
189 				 NFSACCESS_DELETE);
190 		if (acmode & VEXEC)
191 			mode |= NFSACCESS_LOOKUP;
192 	} else {
193 		if (acmode & VWRITE)
194 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
195 		if (acmode & VEXEC)
196 			mode |= NFSACCESS_EXECUTE;
197 	}
198 
199 	/*
200 	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
201 	 */
202 	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
203 	    NULL);
204 
205 	/*
206 	 * The NFS V3 spec does not clarify whether or not
207 	 * the returned access bits can be a superset of
208 	 * the ones requested, so...
209 	 */
210 	if (!error && (rmode & mode) != mode)
211 		error = EACCES;
212 	return (error);
213 }
214 
215 /*
216  * The actual rpc, separated out for Darwin.
217  */
218 APPLESTATIC int
219 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
220     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
221     void *stuff)
222 {
223 	u_int32_t *tl;
224 	u_int32_t supported, rmode;
225 	int error;
226 	struct nfsrv_descript nfsd, *nd = &nfsd;
227 	nfsattrbit_t attrbits;
228 
229 	*attrflagp = 0;
230 	supported = mode;
231 	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
232 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
233 	*tl = txdr_unsigned(mode);
234 	if (nd->nd_flag & ND_NFSV4) {
235 		/*
236 		 * And do a Getattr op.
237 		 */
238 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
239 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
240 		NFSGETATTR_ATTRBIT(&attrbits);
241 		(void) nfsrv_putattrbit(nd, &attrbits);
242 	}
243 	error = nfscl_request(nd, vp, p, cred, stuff);
244 	if (error)
245 		return (error);
246 	if (nd->nd_flag & ND_NFSV3) {
247 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
248 		if (error)
249 			goto nfsmout;
250 	}
251 	if (!nd->nd_repstat) {
252 		if (nd->nd_flag & ND_NFSV4) {
253 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
254 			supported = fxdr_unsigned(u_int32_t, *tl++);
255 		} else {
256 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
257 		}
258 		rmode = fxdr_unsigned(u_int32_t, *tl);
259 		if (nd->nd_flag & ND_NFSV4)
260 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
261 
262 		/*
263 		 * It's not obvious what should be done about
264 		 * unsupported access modes. For now, be paranoid
265 		 * and clear the unsupported ones.
266 		 */
267 		rmode &= supported;
268 		*rmodep = rmode;
269 	} else
270 		error = nd->nd_repstat;
271 nfsmout:
272 	mbuf_freem(nd->nd_mrep);
273 	return (error);
274 }
275 
276 /*
277  * nfs open rpc
278  */
279 APPLESTATIC int
280 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
281 {
282 	struct nfsclopen *op;
283 	struct nfscldeleg *dp;
284 	struct nfsfh *nfhp;
285 	struct nfsnode *np = VTONFS(vp);
286 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
287 	u_int32_t mode, clidrev;
288 	int ret, newone, error, expireret = 0, retrycnt;
289 
290 	/*
291 	 * For NFSv4, Open Ops are only done on Regular Files.
292 	 */
293 	if (vnode_vtype(vp) != VREG)
294 		return (0);
295 	mode = 0;
296 	if (amode & FREAD)
297 		mode |= NFSV4OPEN_ACCESSREAD;
298 	if (amode & FWRITE)
299 		mode |= NFSV4OPEN_ACCESSWRITE;
300 	nfhp = np->n_fhp;
301 
302 	retrycnt = 0;
303 #ifdef notdef
304 { char name[100]; int namel;
305 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
306 bcopy(NFS4NODENAME(np->n_v4), name, namel);
307 name[namel] = '\0';
308 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
309 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
310 else printf(" fhl=0\n");
311 }
312 #endif
313 	do {
314 	    dp = NULL;
315 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
316 		cred, p, NULL, &op, &newone, &ret, 1);
317 	    if (error) {
318 		return (error);
319 	    }
320 	    if (nmp->nm_clp != NULL)
321 		clidrev = nmp->nm_clp->nfsc_clientidrev;
322 	    else
323 		clidrev = 0;
324 	    if (ret == NFSCLOPEN_DOOPEN) {
325 		if (np->n_v4 != NULL) {
326 			/*
327 			 * For the first attempt, try and get a layout, if
328 			 * pNFS is enabled for the mount.
329 			 */
330 			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
331 			    nfs_numnfscbd == 0 ||
332 			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
333 				error = nfsrpc_openrpc(nmp, vp,
334 				    np->n_v4->n4_data,
335 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
336 				    np->n_fhp->nfh_len, mode, op,
337 				    NFS4NODENAME(np->n_v4),
338 				    np->n_v4->n4_namelen,
339 				    &dp, 0, 0x0, cred, p, 0, 0);
340 			else
341 				error = nfsrpc_getopenlayout(nmp, vp,
342 				    np->n_v4->n4_data,
343 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
344 				    np->n_fhp->nfh_len, mode, op,
345 				    NFS4NODENAME(np->n_v4),
346 				    np->n_v4->n4_namelen, &dp, cred, p);
347 			if (dp != NULL) {
348 #ifdef APPLE
349 				OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
350 #else
351 				NFSLOCKNODE(np);
352 				np->n_flag &= ~NDELEGMOD;
353 				/*
354 				 * Invalidate the attribute cache, so that
355 				 * attributes that pre-date the issue of a
356 				 * delegation are not cached, since the
357 				 * cached attributes will remain valid while
358 				 * the delegation is held.
359 				 */
360 				NFSINVALATTRCACHE(np);
361 				NFSUNLOCKNODE(np);
362 #endif
363 				(void) nfscl_deleg(nmp->nm_mountp,
364 				    op->nfso_own->nfsow_clp,
365 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
366 			}
367 		} else {
368 			error = EIO;
369 		}
370 		newnfs_copyincred(cred, &op->nfso_cred);
371 	    } else if (ret == NFSCLOPEN_SETCRED)
372 		/*
373 		 * This is a new local open on a delegation. It needs
374 		 * to have credentials so that an open can be done
375 		 * against the server during recovery.
376 		 */
377 		newnfs_copyincred(cred, &op->nfso_cred);
378 
379 	    /*
380 	     * nfso_opencnt is the count of how many VOP_OPEN()s have
381 	     * been done on this Open successfully and a VOP_CLOSE()
382 	     * is expected for each of these.
383 	     * If error is non-zero, don't increment it, since the Open
384 	     * hasn't succeeded yet.
385 	     */
386 	    if (!error)
387 		op->nfso_opencnt++;
388 	    nfscl_openrelease(nmp, op, error, newone);
389 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
390 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
391 		error == NFSERR_BADSESSION) {
392 		(void) nfs_catnap(PZERO, error, "nfs_open");
393 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
394 		&& clidrev != 0) {
395 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
396 		retrycnt++;
397 	    }
398 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
399 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
400 	    error == NFSERR_BADSESSION ||
401 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
402 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
403 	if (error && retrycnt >= 4)
404 		error = EIO;
405 	return (error);
406 }
407 
408 /*
409  * the actual open rpc
410  */
411 APPLESTATIC int
412 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
413     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
414     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
415     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
416     int syscred, int recursed)
417 {
418 	u_int32_t *tl;
419 	struct nfsrv_descript nfsd, *nd = &nfsd;
420 	struct nfscldeleg *dp, *ndp = NULL;
421 	struct nfsvattr nfsva;
422 	u_int32_t rflags, deleg;
423 	nfsattrbit_t attrbits;
424 	int error, ret, acesize, limitby;
425 	struct nfsclsession *tsep;
426 
427 	dp = *dpp;
428 	*dpp = NULL;
429 	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL);
430 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
431 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
432 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
433 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
434 	tsep = nfsmnt_mdssession(nmp);
435 	*tl++ = tsep->nfsess_clientid.lval[0];
436 	*tl = tsep->nfsess_clientid.lval[1];
437 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
438 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
439 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
440 	if (reclaim) {
441 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
442 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
443 		*tl = txdr_unsigned(delegtype);
444 	} else {
445 		if (dp != NULL) {
446 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
447 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
448 			if (NFSHASNFSV4N(nmp))
449 				*tl++ = 0;
450 			else
451 				*tl++ = dp->nfsdl_stateid.seqid;
452 			*tl++ = dp->nfsdl_stateid.other[0];
453 			*tl++ = dp->nfsdl_stateid.other[1];
454 			*tl = dp->nfsdl_stateid.other[2];
455 		} else {
456 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
457 		}
458 		(void) nfsm_strtom(nd, name, namelen);
459 	}
460 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
461 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
462 	NFSZERO_ATTRBIT(&attrbits);
463 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
464 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
465 	(void) nfsrv_putattrbit(nd, &attrbits);
466 	if (syscred)
467 		nd->nd_flag |= ND_USEGSSNAME;
468 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
469 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
470 	if (error)
471 		return (error);
472 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
473 	if (!nd->nd_repstat) {
474 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
475 		    6 * NFSX_UNSIGNED);
476 		op->nfso_stateid.seqid = *tl++;
477 		op->nfso_stateid.other[0] = *tl++;
478 		op->nfso_stateid.other[1] = *tl++;
479 		op->nfso_stateid.other[2] = *tl;
480 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
481 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
482 		if (error)
483 			goto nfsmout;
484 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
485 		deleg = fxdr_unsigned(u_int32_t, *tl);
486 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
487 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
488 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
489 			      NFSCLFLAGS_FIRSTDELEG))
490 				op->nfso_own->nfsow_clp->nfsc_flags |=
491 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
492 			MALLOC(ndp, struct nfscldeleg *,
493 			    sizeof (struct nfscldeleg) + newfhlen,
494 			    M_NFSCLDELEG, M_WAITOK);
495 			LIST_INIT(&ndp->nfsdl_owner);
496 			LIST_INIT(&ndp->nfsdl_lock);
497 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
498 			ndp->nfsdl_fhlen = newfhlen;
499 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
500 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
501 			nfscl_lockinit(&ndp->nfsdl_rwlock);
502 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
503 			    NFSX_UNSIGNED);
504 			ndp->nfsdl_stateid.seqid = *tl++;
505 			ndp->nfsdl_stateid.other[0] = *tl++;
506 			ndp->nfsdl_stateid.other[1] = *tl++;
507 			ndp->nfsdl_stateid.other[2] = *tl++;
508 			ret = fxdr_unsigned(int, *tl);
509 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
510 				ndp->nfsdl_flags = NFSCLDL_WRITE;
511 				/*
512 				 * Indicates how much the file can grow.
513 				 */
514 				NFSM_DISSECT(tl, u_int32_t *,
515 				    3 * NFSX_UNSIGNED);
516 				limitby = fxdr_unsigned(int, *tl++);
517 				switch (limitby) {
518 				case NFSV4OPEN_LIMITSIZE:
519 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
520 					break;
521 				case NFSV4OPEN_LIMITBLOCKS:
522 					ndp->nfsdl_sizelimit =
523 					    fxdr_unsigned(u_int64_t, *tl++);
524 					ndp->nfsdl_sizelimit *=
525 					    fxdr_unsigned(u_int64_t, *tl);
526 					break;
527 				default:
528 					error = NFSERR_BADXDR;
529 					goto nfsmout;
530 				}
531 			} else {
532 				ndp->nfsdl_flags = NFSCLDL_READ;
533 			}
534 			if (ret)
535 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
536 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
537 			    &acesize, p);
538 			if (error)
539 				goto nfsmout;
540 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
541 			error = NFSERR_BADXDR;
542 			goto nfsmout;
543 		}
544 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
545 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
546 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
547 		    NULL, NULL, NULL, p, cred);
548 		if (error)
549 			goto nfsmout;
550 		if (ndp != NULL) {
551 			ndp->nfsdl_change = nfsva.na_filerev;
552 			ndp->nfsdl_modtime = nfsva.na_mtime;
553 			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
554 		}
555 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
556 		    do {
557 			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
558 			    cred, p);
559 			if (ret == NFSERR_DELAY)
560 			    (void) nfs_catnap(PZERO, ret, "nfs_open");
561 		    } while (ret == NFSERR_DELAY);
562 		    error = ret;
563 		}
564 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
565 		    nfscl_assumeposixlocks)
566 		    op->nfso_posixlock = 1;
567 		else
568 		    op->nfso_posixlock = 0;
569 
570 		/*
571 		 * If the server is handing out delegations, but we didn't
572 		 * get one because an OpenConfirm was required, try the
573 		 * Open again, to get a delegation. This is a harmless no-op,
574 		 * from a server's point of view.
575 		 */
576 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
577 		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
578 		    && !error && dp == NULL && ndp == NULL && !recursed) {
579 		    do {
580 			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
581 			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
582 			    cred, p, syscred, 1);
583 			if (ret == NFSERR_DELAY)
584 			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
585 		    } while (ret == NFSERR_DELAY);
586 		    if (ret) {
587 			if (ndp != NULL) {
588 				FREE((caddr_t)ndp, M_NFSCLDELEG);
589 				ndp = NULL;
590 			}
591 			if (ret == NFSERR_STALECLIENTID ||
592 			    ret == NFSERR_STALEDONTRECOVER ||
593 			    ret == NFSERR_BADSESSION)
594 				error = ret;
595 		    }
596 		}
597 	}
598 	if (nd->nd_repstat != 0 && error == 0)
599 		error = nd->nd_repstat;
600 	if (error == NFSERR_STALECLIENTID)
601 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
602 nfsmout:
603 	if (!error)
604 		*dpp = ndp;
605 	else if (ndp != NULL)
606 		FREE((caddr_t)ndp, M_NFSCLDELEG);
607 	mbuf_freem(nd->nd_mrep);
608 	return (error);
609 }
610 
611 /*
612  * open downgrade rpc
613  */
614 APPLESTATIC int
615 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
616     struct ucred *cred, NFSPROC_T *p)
617 {
618 	u_int32_t *tl;
619 	struct nfsrv_descript nfsd, *nd = &nfsd;
620 	int error;
621 
622 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
623 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
624 	if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
625 		*tl++ = 0;
626 	else
627 		*tl++ = op->nfso_stateid.seqid;
628 	*tl++ = op->nfso_stateid.other[0];
629 	*tl++ = op->nfso_stateid.other[1];
630 	*tl++ = op->nfso_stateid.other[2];
631 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
632 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
633 	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
634 	error = nfscl_request(nd, vp, p, cred, NULL);
635 	if (error)
636 		return (error);
637 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
638 	if (!nd->nd_repstat) {
639 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
640 		op->nfso_stateid.seqid = *tl++;
641 		op->nfso_stateid.other[0] = *tl++;
642 		op->nfso_stateid.other[1] = *tl++;
643 		op->nfso_stateid.other[2] = *tl;
644 	}
645 	if (nd->nd_repstat && error == 0)
646 		error = nd->nd_repstat;
647 	if (error == NFSERR_STALESTATEID)
648 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
649 nfsmout:
650 	mbuf_freem(nd->nd_mrep);
651 	return (error);
652 }
653 
654 /*
655  * V4 Close operation.
656  */
657 APPLESTATIC int
658 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
659 {
660 	struct nfsclclient *clp;
661 	int error;
662 
663 	if (vnode_vtype(vp) != VREG)
664 		return (0);
665 	if (doclose)
666 		error = nfscl_doclose(vp, &clp, p);
667 	else
668 		error = nfscl_getclose(vp, &clp);
669 	if (error)
670 		return (error);
671 
672 	nfscl_clientrelease(clp);
673 	return (0);
674 }
675 
676 /*
677  * Close the open.
678  */
679 APPLESTATIC void
680 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
681 {
682 	struct nfsrv_descript nfsd, *nd = &nfsd;
683 	struct nfscllockowner *lp, *nlp;
684 	struct nfscllock *lop, *nlop;
685 	struct ucred *tcred;
686 	u_int64_t off = 0, len = 0;
687 	u_int32_t type = NFSV4LOCKT_READ;
688 	int error, do_unlock, trycnt;
689 
690 	tcred = newnfs_getcred();
691 	newnfs_copycred(&op->nfso_cred, tcred);
692 	/*
693 	 * (Theoretically this could be done in the same
694 	 *  compound as the close, but having multiple
695 	 *  sequenced Ops in the same compound might be
696 	 *  too scary for some servers.)
697 	 */
698 	if (op->nfso_posixlock) {
699 		off = 0;
700 		len = NFS64BITSSET;
701 		type = NFSV4LOCKT_READ;
702 	}
703 
704 	/*
705 	 * Since this function is only called from VOP_INACTIVE(), no
706 	 * other thread will be manipulating this Open. As such, the
707 	 * lock lists are not being changed by other threads, so it should
708 	 * be safe to do this without locking.
709 	 */
710 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
711 		do_unlock = 1;
712 		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
713 			if (op->nfso_posixlock == 0) {
714 				off = lop->nfslo_first;
715 				len = lop->nfslo_end - lop->nfslo_first;
716 				if (lop->nfslo_type == F_WRLCK)
717 					type = NFSV4LOCKT_WRITE;
718 				else
719 					type = NFSV4LOCKT_READ;
720 			}
721 			if (do_unlock) {
722 				trycnt = 0;
723 				do {
724 					error = nfsrpc_locku(nd, nmp, lp, off,
725 					    len, type, tcred, p, 0);
726 					if ((nd->nd_repstat == NFSERR_GRACE ||
727 					    nd->nd_repstat == NFSERR_DELAY) &&
728 					    error == 0)
729 						(void) nfs_catnap(PZERO,
730 						    (int)nd->nd_repstat,
731 						    "nfs_close");
732 				} while ((nd->nd_repstat == NFSERR_GRACE ||
733 				    nd->nd_repstat == NFSERR_DELAY) &&
734 				    error == 0 && trycnt++ < 5);
735 				if (op->nfso_posixlock)
736 					do_unlock = 0;
737 			}
738 			nfscl_freelock(lop, 0);
739 		}
740 		/*
741 		 * Do a ReleaseLockOwner.
742 		 * The lock owner name nfsl_owner may be used by other opens for
743 		 * other files but the lock_owner4 name that nfsrpc_rellockown()
744 		 * puts on the wire has the file handle for this file appended
745 		 * to it, so it can be done now.
746 		 */
747 		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
748 		    lp->nfsl_open->nfso_fhlen, tcred, p);
749 	}
750 
751 	/*
752 	 * There could be other Opens for different files on the same
753 	 * OpenOwner, so locking is required.
754 	 */
755 	NFSLOCKCLSTATE();
756 	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
757 	NFSUNLOCKCLSTATE();
758 	do {
759 		error = nfscl_tryclose(op, tcred, nmp, p);
760 		if (error == NFSERR_GRACE)
761 			(void) nfs_catnap(PZERO, error, "nfs_close");
762 	} while (error == NFSERR_GRACE);
763 	NFSLOCKCLSTATE();
764 	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
765 
766 	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
767 		nfscl_freelockowner(lp, 0);
768 	nfscl_freeopen(op, 0);
769 	NFSUNLOCKCLSTATE();
770 	NFSFREECRED(tcred);
771 }
772 
773 /*
774  * The actual Close RPC.
775  */
776 APPLESTATIC int
777 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
778     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
779     int syscred)
780 {
781 	u_int32_t *tl;
782 	int error;
783 
784 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
785 	    op->nfso_fhlen, NULL, NULL);
786 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
787 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
788 	if (NFSHASNFSV4N(nmp))
789 		*tl++ = 0;
790 	else
791 		*tl++ = op->nfso_stateid.seqid;
792 	*tl++ = op->nfso_stateid.other[0];
793 	*tl++ = op->nfso_stateid.other[1];
794 	*tl = op->nfso_stateid.other[2];
795 	if (syscred)
796 		nd->nd_flag |= ND_USEGSSNAME;
797 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
798 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
799 	if (error)
800 		return (error);
801 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
802 	if (nd->nd_repstat == 0)
803 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
804 	error = nd->nd_repstat;
805 	if (error == NFSERR_STALESTATEID)
806 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
807 nfsmout:
808 	mbuf_freem(nd->nd_mrep);
809 	return (error);
810 }
811 
812 /*
813  * V4 Open Confirm RPC.
814  */
815 APPLESTATIC int
816 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
817     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
818 {
819 	u_int32_t *tl;
820 	struct nfsrv_descript nfsd, *nd = &nfsd;
821 	struct nfsmount *nmp;
822 	int error;
823 
824 	nmp = VFSTONFS(vnode_mount(vp));
825 	if (NFSHASNFSV4N(nmp))
826 		return (0);		/* No confirmation for NFSv4.1. */
827 	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL);
828 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
829 	*tl++ = op->nfso_stateid.seqid;
830 	*tl++ = op->nfso_stateid.other[0];
831 	*tl++ = op->nfso_stateid.other[1];
832 	*tl++ = op->nfso_stateid.other[2];
833 	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
834 	error = nfscl_request(nd, vp, p, cred, NULL);
835 	if (error)
836 		return (error);
837 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
838 	if (!nd->nd_repstat) {
839 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
840 		op->nfso_stateid.seqid = *tl++;
841 		op->nfso_stateid.other[0] = *tl++;
842 		op->nfso_stateid.other[1] = *tl++;
843 		op->nfso_stateid.other[2] = *tl;
844 	}
845 	error = nd->nd_repstat;
846 	if (error == NFSERR_STALESTATEID)
847 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
848 nfsmout:
849 	mbuf_freem(nd->nd_mrep);
850 	return (error);
851 }
852 
853 /*
854  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
855  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
856  */
857 APPLESTATIC int
858 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
859     struct ucred *cred, NFSPROC_T *p)
860 {
861 	u_int32_t *tl;
862 	struct nfsrv_descript nfsd;
863 	struct nfsrv_descript *nd = &nfsd;
864 	nfsattrbit_t attrbits;
865 	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
866 	u_short port;
867 	int error, isinet6 = 0, callblen;
868 	nfsquad_t confirm;
869 	u_int32_t lease;
870 	static u_int32_t rev = 0;
871 	struct nfsclds *dsp;
872 	struct in6_addr a6;
873 	struct nfsclsession *tsep;
874 
875 	if (nfsboottime.tv_sec == 0)
876 		NFSSETBOOTTIME(nfsboottime);
877 	clp->nfsc_rev = rev++;
878 	if (NFSHASNFSV4N(nmp)) {
879 		/*
880 		 * Either there was no previous session or the
881 		 * previous session has failed, so...
882 		 * do an ExchangeID followed by the CreateSession.
883 		 */
884 		error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
885 		    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
886 		NFSCL_DEBUG(1, "aft exch=%d\n", error);
887 		if (error == 0)
888 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
889 			    &nmp->nm_sockreq,
890 			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
891 		if (error == 0) {
892 			NFSLOCKMNT(nmp);
893 			/*
894 			 * The old sessions cannot be safely free'd
895 			 * here, since they may still be used by
896 			 * in-progress RPCs.
897 			 */
898 			tsep = NULL;
899 			if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
900 				tsep = NFSMNT_MDSSESSION(nmp);
901 			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
902 			    nfsclds_list);
903 			/*
904 			 * Wake up RPCs waiting for a slot on the
905 			 * old session. These will then fail with
906 			 * NFSERR_BADSESSION and be retried with the
907 			 * new session by nfsv4_setsequence().
908 			 * Also wakeup() processes waiting for the
909 			 * new session.
910 			 */
911 			if (tsep != NULL)
912 				wakeup(&tsep->nfsess_slots);
913 			wakeup(&nmp->nm_sess);
914 			NFSUNLOCKMNT(nmp);
915 		} else
916 			nfscl_freenfsclds(dsp);
917 		NFSCL_DEBUG(1, "aft createsess=%d\n", error);
918 		if (error == 0 && reclaim == 0) {
919 			error = nfsrpc_reclaimcomplete(nmp, cred, p);
920 			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
921 			if (error == NFSERR_COMPLETEALREADY ||
922 			    error == NFSERR_NOTSUPP)
923 				/* Ignore this error. */
924 				error = 0;
925 		}
926 		return (error);
927 	}
928 
929 	/*
930 	 * Allocate a single session structure for NFSv4.0, because some of
931 	 * the fields are used by NFSv4.0 although it doesn't do a session.
932 	 */
933 	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
934 	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
935 	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
936 	NFSLOCKMNT(nmp);
937 	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
938 	tsep = NFSMNT_MDSSESSION(nmp);
939 	NFSUNLOCKMNT(nmp);
940 
941 	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL);
942 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
943 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
944 	*tl = txdr_unsigned(clp->nfsc_rev);
945 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
946 
947 	/*
948 	 * set up the callback address
949 	 */
950 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
951 	*tl = txdr_unsigned(NFS_CALLBCKPROG);
952 	callblen = strlen(nfsv4_callbackaddr);
953 	if (callblen == 0)
954 		cp = nfscl_getmyip(nmp, &a6, &isinet6);
955 	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
956 	    (callblen > 0 || cp != NULL)) {
957 		port = htons(nfsv4_cbport);
958 		cp2 = (u_int8_t *)&port;
959 #ifdef INET6
960 		if ((callblen > 0 &&
961 		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
962 			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
963 
964 			(void) nfsm_strtom(nd, "tcp6", 4);
965 			if (callblen == 0) {
966 				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
967 				ip6add = ip6buf;
968 			} else {
969 				ip6add = nfsv4_callbackaddr;
970 			}
971 			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
972 			    ip6add, cp2[0], cp2[1]);
973 		} else
974 #endif
975 		{
976 			(void) nfsm_strtom(nd, "tcp", 3);
977 			if (callblen == 0)
978 				snprintf(addr, INET6_ADDRSTRLEN + 9,
979 				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
980 				    cp[2], cp[3], cp2[0], cp2[1]);
981 			else
982 				snprintf(addr, INET6_ADDRSTRLEN + 9,
983 				    "%s.%d.%d", nfsv4_callbackaddr,
984 				    cp2[0], cp2[1]);
985 		}
986 		(void) nfsm_strtom(nd, addr, strlen(addr));
987 	} else {
988 		(void) nfsm_strtom(nd, "tcp", 3);
989 		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
990 	}
991 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
992 	*tl = txdr_unsigned(clp->nfsc_cbident);
993 	nd->nd_flag |= ND_USEGSSNAME;
994 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
995 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
996 	if (error)
997 		return (error);
998 	if (nd->nd_repstat == 0) {
999 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1000 	    tsep->nfsess_clientid.lval[0] = *tl++;
1001 	    tsep->nfsess_clientid.lval[1] = *tl++;
1002 	    confirm.lval[0] = *tl++;
1003 	    confirm.lval[1] = *tl;
1004 	    mbuf_freem(nd->nd_mrep);
1005 	    nd->nd_mrep = NULL;
1006 
1007 	    /*
1008 	     * and confirm it.
1009 	     */
1010 	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1011 		NULL);
1012 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1013 	    *tl++ = tsep->nfsess_clientid.lval[0];
1014 	    *tl++ = tsep->nfsess_clientid.lval[1];
1015 	    *tl++ = confirm.lval[0];
1016 	    *tl = confirm.lval[1];
1017 	    nd->nd_flag |= ND_USEGSSNAME;
1018 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1019 		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1020 	    if (error)
1021 		return (error);
1022 	    mbuf_freem(nd->nd_mrep);
1023 	    nd->nd_mrep = NULL;
1024 	    if (nd->nd_repstat == 0) {
1025 		nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
1026 		    nmp->nm_fhsize, NULL, NULL);
1027 		NFSZERO_ATTRBIT(&attrbits);
1028 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1029 		(void) nfsrv_putattrbit(nd, &attrbits);
1030 		nd->nd_flag |= ND_USEGSSNAME;
1031 		error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1032 		    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1033 		if (error)
1034 		    return (error);
1035 		if (nd->nd_repstat == 0) {
1036 		    error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
1037 			NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
1038 		    if (error)
1039 			goto nfsmout;
1040 		    clp->nfsc_renew = NFSCL_RENEW(lease);
1041 		    clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1042 		    clp->nfsc_clientidrev++;
1043 		    if (clp->nfsc_clientidrev == 0)
1044 			clp->nfsc_clientidrev++;
1045 		}
1046 	    }
1047 	}
1048 	error = nd->nd_repstat;
1049 nfsmout:
1050 	mbuf_freem(nd->nd_mrep);
1051 	return (error);
1052 }
1053 
1054 /*
1055  * nfs getattr call.
1056  */
1057 APPLESTATIC int
1058 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1059     struct nfsvattr *nap, void *stuff)
1060 {
1061 	struct nfsrv_descript nfsd, *nd = &nfsd;
1062 	int error;
1063 	nfsattrbit_t attrbits;
1064 
1065 	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1066 	if (nd->nd_flag & ND_NFSV4) {
1067 		NFSGETATTR_ATTRBIT(&attrbits);
1068 		(void) nfsrv_putattrbit(nd, &attrbits);
1069 	}
1070 	error = nfscl_request(nd, vp, p, cred, stuff);
1071 	if (error)
1072 		return (error);
1073 	if (!nd->nd_repstat)
1074 		error = nfsm_loadattr(nd, nap);
1075 	else
1076 		error = nd->nd_repstat;
1077 	mbuf_freem(nd->nd_mrep);
1078 	return (error);
1079 }
1080 
1081 /*
1082  * nfs getattr call with non-vnode arguemnts.
1083  */
1084 APPLESTATIC int
1085 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1086     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1087     uint32_t *leasep)
1088 {
1089 	struct nfsrv_descript nfsd, *nd = &nfsd;
1090 	int error, vers = NFS_VER2;
1091 	nfsattrbit_t attrbits;
1092 
1093 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL);
1094 	if (nd->nd_flag & ND_NFSV4) {
1095 		vers = NFS_VER4;
1096 		NFSGETATTR_ATTRBIT(&attrbits);
1097 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1098 		(void) nfsrv_putattrbit(nd, &attrbits);
1099 	} else if (nd->nd_flag & ND_NFSV3) {
1100 		vers = NFS_VER3;
1101 	}
1102 	if (syscred)
1103 		nd->nd_flag |= ND_USEGSSNAME;
1104 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1105 	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1106 	if (error)
1107 		return (error);
1108 	if (nd->nd_repstat == 0) {
1109 		if ((nd->nd_flag & ND_NFSV4) != 0)
1110 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1111 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1112 			    NULL, NULL);
1113 		else
1114 			error = nfsm_loadattr(nd, nap);
1115 	} else
1116 		error = nd->nd_repstat;
1117 	mbuf_freem(nd->nd_mrep);
1118 	return (error);
1119 }
1120 
1121 /*
1122  * Do an nfs setattr operation.
1123  */
1124 APPLESTATIC int
1125 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1126     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1127     void *stuff)
1128 {
1129 	int error, expireret = 0, openerr, retrycnt;
1130 	u_int32_t clidrev = 0, mode;
1131 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1132 	struct nfsfh *nfhp;
1133 	nfsv4stateid_t stateid;
1134 	void *lckp;
1135 
1136 	if (nmp->nm_clp != NULL)
1137 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1138 	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1139 		mode = NFSV4OPEN_ACCESSWRITE;
1140 	else
1141 		mode = NFSV4OPEN_ACCESSREAD;
1142 	retrycnt = 0;
1143 	do {
1144 		lckp = NULL;
1145 		openerr = 1;
1146 		if (NFSHASNFSV4(nmp)) {
1147 			nfhp = VTONFS(vp)->n_fhp;
1148 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1149 			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1150 			if (error && vnode_vtype(vp) == VREG &&
1151 			    (mode == NFSV4OPEN_ACCESSWRITE ||
1152 			     nfstest_openallsetattr)) {
1153 				/*
1154 				 * No Open stateid, so try and open the file
1155 				 * now.
1156 				 */
1157 				if (mode == NFSV4OPEN_ACCESSWRITE)
1158 					openerr = nfsrpc_open(vp, FWRITE, cred,
1159 					    p);
1160 				else
1161 					openerr = nfsrpc_open(vp, FREAD, cred,
1162 					    p);
1163 				if (!openerr)
1164 					(void) nfscl_getstateid(vp,
1165 					    nfhp->nfh_fh, nfhp->nfh_len,
1166 					    mode, 0, cred, p, &stateid, &lckp);
1167 			}
1168 		}
1169 		if (vap != NULL)
1170 			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1171 			    rnap, attrflagp, stuff);
1172 		else
1173 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1174 			    stuff);
1175 		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1176 			NFSLOCKMNT(nmp);
1177 			nmp->nm_state |= NFSSTA_OPENMODE;
1178 			NFSUNLOCKMNT(nmp);
1179 		}
1180 		if (error == NFSERR_STALESTATEID)
1181 			nfscl_initiate_recovery(nmp->nm_clp);
1182 		if (lckp != NULL)
1183 			nfscl_lockderef(lckp);
1184 		if (!openerr)
1185 			(void) nfsrpc_close(vp, 0, p);
1186 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1187 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1188 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1189 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1190 		} else if ((error == NFSERR_EXPIRED ||
1191 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1192 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1193 		}
1194 		retrycnt++;
1195 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1196 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1197 	    error == NFSERR_BADSESSION ||
1198 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1199 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1200 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1201 	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1202 	     retrycnt < 4));
1203 	if (error && retrycnt >= 4)
1204 		error = EIO;
1205 	return (error);
1206 }
1207 
1208 static int
1209 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1210     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1211     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1212 {
1213 	u_int32_t *tl;
1214 	struct nfsrv_descript nfsd, *nd = &nfsd;
1215 	int error;
1216 	nfsattrbit_t attrbits;
1217 
1218 	*attrflagp = 0;
1219 	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1220 	if (nd->nd_flag & ND_NFSV4)
1221 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1222 	vap->va_type = vnode_vtype(vp);
1223 	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1224 	if (nd->nd_flag & ND_NFSV3) {
1225 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1226 		*tl = newnfs_false;
1227 	} else if (nd->nd_flag & ND_NFSV4) {
1228 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1229 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1230 		NFSGETATTR_ATTRBIT(&attrbits);
1231 		(void) nfsrv_putattrbit(nd, &attrbits);
1232 	}
1233 	error = nfscl_request(nd, vp, p, cred, stuff);
1234 	if (error)
1235 		return (error);
1236 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1237 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1238 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1239 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1240 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1241 		error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1242 	mbuf_freem(nd->nd_mrep);
1243 	if (nd->nd_repstat && !error)
1244 		error = nd->nd_repstat;
1245 	return (error);
1246 }
1247 
1248 /*
1249  * nfs lookup rpc
1250  */
1251 APPLESTATIC int
1252 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1253     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1254     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1255 {
1256 	u_int32_t *tl;
1257 	struct nfsrv_descript nfsd, *nd = &nfsd;
1258 	struct nfsmount *nmp;
1259 	struct nfsnode *np;
1260 	struct nfsfh *nfhp;
1261 	nfsattrbit_t attrbits;
1262 	int error = 0, lookupp = 0;
1263 
1264 	*attrflagp = 0;
1265 	*dattrflagp = 0;
1266 	if (vnode_vtype(dvp) != VDIR)
1267 		return (ENOTDIR);
1268 	nmp = VFSTONFS(vnode_mount(dvp));
1269 	if (len > NFS_MAXNAMLEN)
1270 		return (ENAMETOOLONG);
1271 	if (NFSHASNFSV4(nmp) && len == 1 &&
1272 		name[0] == '.') {
1273 		/*
1274 		 * Just return the current dir's fh.
1275 		 */
1276 		np = VTONFS(dvp);
1277 		MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
1278 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1279 		nfhp->nfh_len = np->n_fhp->nfh_len;
1280 		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1281 		*nfhpp = nfhp;
1282 		return (0);
1283 	}
1284 	if (NFSHASNFSV4(nmp) && len == 2 &&
1285 		name[0] == '.' && name[1] == '.') {
1286 		lookupp = 1;
1287 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1288 	} else {
1289 		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1290 		(void) nfsm_strtom(nd, name, len);
1291 	}
1292 	if (nd->nd_flag & ND_NFSV4) {
1293 		NFSGETATTR_ATTRBIT(&attrbits);
1294 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1295 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1296 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1297 		(void) nfsrv_putattrbit(nd, &attrbits);
1298 	}
1299 	error = nfscl_request(nd, dvp, p, cred, stuff);
1300 	if (error)
1301 		return (error);
1302 	if (nd->nd_repstat) {
1303 		/*
1304 		 * When an NFSv4 Lookupp returns ENOENT, it means that
1305 		 * the lookup is at the root of an fs, so return this dir.
1306 		 */
1307 		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1308 		    np = VTONFS(dvp);
1309 		    MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
1310 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1311 		    nfhp->nfh_len = np->n_fhp->nfh_len;
1312 		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1313 		    *nfhpp = nfhp;
1314 		    mbuf_freem(nd->nd_mrep);
1315 		    return (0);
1316 		}
1317 		if (nd->nd_flag & ND_NFSV3)
1318 		    error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1319 		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1320 		    ND_NFSV4) {
1321 			/* Load the directory attributes. */
1322 			error = nfsm_loadattr(nd, dnap);
1323 			if (error == 0)
1324 				*dattrflagp = 1;
1325 		}
1326 		goto nfsmout;
1327 	}
1328 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1329 		/* Load the directory attributes. */
1330 		error = nfsm_loadattr(nd, dnap);
1331 		if (error != 0)
1332 			goto nfsmout;
1333 		*dattrflagp = 1;
1334 		/* Skip over the Lookup and GetFH operation status values. */
1335 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1336 	}
1337 	error = nfsm_getfh(nd, nfhpp);
1338 	if (error)
1339 		goto nfsmout;
1340 
1341 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1342 	if ((nd->nd_flag & ND_NFSV3) && !error)
1343 		error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1344 nfsmout:
1345 	mbuf_freem(nd->nd_mrep);
1346 	if (!error && nd->nd_repstat)
1347 		error = nd->nd_repstat;
1348 	return (error);
1349 }
1350 
1351 /*
1352  * Do a readlink rpc.
1353  */
1354 APPLESTATIC int
1355 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1356     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1357 {
1358 	u_int32_t *tl;
1359 	struct nfsrv_descript nfsd, *nd = &nfsd;
1360 	struct nfsnode *np = VTONFS(vp);
1361 	nfsattrbit_t attrbits;
1362 	int error, len, cangetattr = 1;
1363 
1364 	*attrflagp = 0;
1365 	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1366 	if (nd->nd_flag & ND_NFSV4) {
1367 		/*
1368 		 * And do a Getattr op.
1369 		 */
1370 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1371 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1372 		NFSGETATTR_ATTRBIT(&attrbits);
1373 		(void) nfsrv_putattrbit(nd, &attrbits);
1374 	}
1375 	error = nfscl_request(nd, vp, p, cred, stuff);
1376 	if (error)
1377 		return (error);
1378 	if (nd->nd_flag & ND_NFSV3)
1379 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1380 	if (!nd->nd_repstat && !error) {
1381 		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1382 		/*
1383 		 * This seems weird to me, but must have been added to
1384 		 * FreeBSD for some reason. The only thing I can think of
1385 		 * is that there was/is some server that replies with
1386 		 * more link data than it should?
1387 		 */
1388 		if (len == NFS_MAXPATHLEN) {
1389 			NFSLOCKNODE(np);
1390 			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1391 				len = np->n_size;
1392 				cangetattr = 0;
1393 			}
1394 			NFSUNLOCKNODE(np);
1395 		}
1396 		error = nfsm_mbufuio(nd, uiop, len);
1397 		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1398 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1399 	}
1400 	if (nd->nd_repstat && !error)
1401 		error = nd->nd_repstat;
1402 nfsmout:
1403 	mbuf_freem(nd->nd_mrep);
1404 	return (error);
1405 }
1406 
1407 /*
1408  * Read operation.
1409  */
1410 APPLESTATIC int
1411 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1412     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1413 {
1414 	int error, expireret = 0, retrycnt;
1415 	u_int32_t clidrev = 0;
1416 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1417 	struct nfsnode *np = VTONFS(vp);
1418 	struct ucred *newcred;
1419 	struct nfsfh *nfhp = NULL;
1420 	nfsv4stateid_t stateid;
1421 	void *lckp;
1422 
1423 	if (nmp->nm_clp != NULL)
1424 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1425 	newcred = cred;
1426 	if (NFSHASNFSV4(nmp)) {
1427 		nfhp = np->n_fhp;
1428 		newcred = NFSNEWCRED(cred);
1429 	}
1430 	retrycnt = 0;
1431 	do {
1432 		lckp = NULL;
1433 		if (NFSHASNFSV4(nmp))
1434 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1435 			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1436 			    &lckp);
1437 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1438 		    attrflagp, stuff);
1439 		if (error == NFSERR_OPENMODE) {
1440 			NFSLOCKMNT(nmp);
1441 			nmp->nm_state |= NFSSTA_OPENMODE;
1442 			NFSUNLOCKMNT(nmp);
1443 		}
1444 		if (error == NFSERR_STALESTATEID)
1445 			nfscl_initiate_recovery(nmp->nm_clp);
1446 		if (lckp != NULL)
1447 			nfscl_lockderef(lckp);
1448 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1449 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1450 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1451 			(void) nfs_catnap(PZERO, error, "nfs_read");
1452 		} else if ((error == NFSERR_EXPIRED ||
1453 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1454 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1455 		}
1456 		retrycnt++;
1457 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1458 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1459 	    error == NFSERR_BADSESSION ||
1460 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1461 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1462 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1463 	    (error == NFSERR_OPENMODE && retrycnt < 4));
1464 	if (error && retrycnt >= 4)
1465 		error = EIO;
1466 	if (NFSHASNFSV4(nmp))
1467 		NFSFREECRED(newcred);
1468 	return (error);
1469 }
1470 
1471 /*
1472  * The actual read RPC.
1473  */
1474 static int
1475 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1476     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1477     int *attrflagp, void *stuff)
1478 {
1479 	u_int32_t *tl;
1480 	int error = 0, len, retlen, tsiz, eof = 0;
1481 	struct nfsrv_descript nfsd;
1482 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1483 	struct nfsrv_descript *nd = &nfsd;
1484 	int rsize;
1485 	off_t tmp_off;
1486 
1487 	*attrflagp = 0;
1488 	tsiz = uio_uio_resid(uiop);
1489 	tmp_off = uiop->uio_offset + tsiz;
1490 	NFSLOCKMNT(nmp);
1491 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1492 		NFSUNLOCKMNT(nmp);
1493 		return (EFBIG);
1494 	}
1495 	rsize = nmp->nm_rsize;
1496 	NFSUNLOCKMNT(nmp);
1497 	nd->nd_mrep = NULL;
1498 	while (tsiz > 0) {
1499 		*attrflagp = 0;
1500 		len = (tsiz > rsize) ? rsize : tsiz;
1501 		NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1502 		if (nd->nd_flag & ND_NFSV4)
1503 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1504 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1505 		if (nd->nd_flag & ND_NFSV2) {
1506 			*tl++ = txdr_unsigned(uiop->uio_offset);
1507 			*tl++ = txdr_unsigned(len);
1508 			*tl = 0;
1509 		} else {
1510 			txdr_hyper(uiop->uio_offset, tl);
1511 			*(tl + 2) = txdr_unsigned(len);
1512 		}
1513 		/*
1514 		 * Since I can't do a Getattr for NFSv4 for Write, there
1515 		 * doesn't seem any point in doing one here, either.
1516 		 * (See the comment in nfsrpc_writerpc() for more info.)
1517 		 */
1518 		error = nfscl_request(nd, vp, p, cred, stuff);
1519 		if (error)
1520 			return (error);
1521 		if (nd->nd_flag & ND_NFSV3) {
1522 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1523 		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1524 			error = nfsm_loadattr(nd, nap);
1525 			if (!error)
1526 				*attrflagp = 1;
1527 		}
1528 		if (nd->nd_repstat || error) {
1529 			if (!error)
1530 				error = nd->nd_repstat;
1531 			goto nfsmout;
1532 		}
1533 		if (nd->nd_flag & ND_NFSV3) {
1534 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1535 			eof = fxdr_unsigned(int, *(tl + 1));
1536 		} else if (nd->nd_flag & ND_NFSV4) {
1537 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1538 			eof = fxdr_unsigned(int, *tl);
1539 		}
1540 		NFSM_STRSIZ(retlen, len);
1541 		error = nfsm_mbufuio(nd, uiop, retlen);
1542 		if (error)
1543 			goto nfsmout;
1544 		mbuf_freem(nd->nd_mrep);
1545 		nd->nd_mrep = NULL;
1546 		tsiz -= retlen;
1547 		if (!(nd->nd_flag & ND_NFSV2)) {
1548 			if (eof || retlen == 0)
1549 				tsiz = 0;
1550 		} else if (retlen < len)
1551 			tsiz = 0;
1552 	}
1553 	return (0);
1554 nfsmout:
1555 	if (nd->nd_mrep != NULL)
1556 		mbuf_freem(nd->nd_mrep);
1557 	return (error);
1558 }
1559 
1560 /*
1561  * nfs write operation
1562  * When called_from_strategy != 0, it should return EIO for an error that
1563  * indicates recovery is in progress, so that the buffer will be left
1564  * dirty and be written back to the server later. If it loops around,
1565  * the recovery thread could get stuck waiting for the buffer and recovery
1566  * will then deadlock.
1567  */
1568 APPLESTATIC int
1569 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1570     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1571     void *stuff, int called_from_strategy)
1572 {
1573 	int error, expireret = 0, retrycnt, nostateid;
1574 	u_int32_t clidrev = 0;
1575 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1576 	struct nfsnode *np = VTONFS(vp);
1577 	struct ucred *newcred;
1578 	struct nfsfh *nfhp = NULL;
1579 	nfsv4stateid_t stateid;
1580 	void *lckp;
1581 
1582 	*must_commit = 0;
1583 	if (nmp->nm_clp != NULL)
1584 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1585 	newcred = cred;
1586 	if (NFSHASNFSV4(nmp)) {
1587 		newcred = NFSNEWCRED(cred);
1588 		nfhp = np->n_fhp;
1589 	}
1590 	retrycnt = 0;
1591 	do {
1592 		lckp = NULL;
1593 		nostateid = 0;
1594 		if (NFSHASNFSV4(nmp)) {
1595 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1596 			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1597 			    &lckp);
1598 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1599 			    stateid.other[2] == 0) {
1600 				nostateid = 1;
1601 				NFSCL_DEBUG(1, "stateid0 in write\n");
1602 			}
1603 		}
1604 
1605 		/*
1606 		 * If there is no stateid for NFSv4, it means this is an
1607 		 * extraneous write after close. Basically a poorly
1608 		 * implemented buffer cache. Just don't do the write.
1609 		 */
1610 		if (nostateid)
1611 			error = 0;
1612 		else
1613 			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1614 			    newcred, &stateid, p, nap, attrflagp, stuff);
1615 		if (error == NFSERR_STALESTATEID)
1616 			nfscl_initiate_recovery(nmp->nm_clp);
1617 		if (lckp != NULL)
1618 			nfscl_lockderef(lckp);
1619 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1620 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1621 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1622 			(void) nfs_catnap(PZERO, error, "nfs_write");
1623 		} else if ((error == NFSERR_EXPIRED ||
1624 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1625 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1626 		}
1627 		retrycnt++;
1628 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1629 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1630 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1631 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1632 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1633 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1634 	if (error != 0 && (retrycnt >= 4 ||
1635 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1636 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1637 		error = EIO;
1638 	if (NFSHASNFSV4(nmp))
1639 		NFSFREECRED(newcred);
1640 	return (error);
1641 }
1642 
1643 /*
1644  * The actual write RPC.
1645  */
1646 static int
1647 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1648     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1649     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1650 {
1651 	u_int32_t *tl;
1652 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1653 	struct nfsnode *np = VTONFS(vp);
1654 	int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1655 	int wccflag = 0, wsize;
1656 	int32_t backup;
1657 	struct nfsrv_descript nfsd;
1658 	struct nfsrv_descript *nd = &nfsd;
1659 	nfsattrbit_t attrbits;
1660 	off_t tmp_off;
1661 
1662 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1663 	*attrflagp = 0;
1664 	tsiz = uio_uio_resid(uiop);
1665 	tmp_off = uiop->uio_offset + tsiz;
1666 	NFSLOCKMNT(nmp);
1667 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1668 		NFSUNLOCKMNT(nmp);
1669 		return (EFBIG);
1670 	}
1671 	wsize = nmp->nm_wsize;
1672 	NFSUNLOCKMNT(nmp);
1673 	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
1674 	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
1675 	while (tsiz > 0) {
1676 		*attrflagp = 0;
1677 		len = (tsiz > wsize) ? wsize : tsiz;
1678 		NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1679 		if (nd->nd_flag & ND_NFSV4) {
1680 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1681 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1682 			txdr_hyper(uiop->uio_offset, tl);
1683 			tl += 2;
1684 			*tl++ = txdr_unsigned(*iomode);
1685 			*tl = txdr_unsigned(len);
1686 		} else if (nd->nd_flag & ND_NFSV3) {
1687 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1688 			txdr_hyper(uiop->uio_offset, tl);
1689 			tl += 2;
1690 			*tl++ = txdr_unsigned(len);
1691 			*tl++ = txdr_unsigned(*iomode);
1692 			*tl = txdr_unsigned(len);
1693 		} else {
1694 			u_int32_t x;
1695 
1696 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1697 			/*
1698 			 * Not sure why someone changed this, since the
1699 			 * RFC clearly states that "beginoffset" and
1700 			 * "totalcount" are ignored, but it wouldn't
1701 			 * surprise me if there's a busted server out there.
1702 			 */
1703 			/* Set both "begin" and "current" to non-garbage. */
1704 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1705 			*tl++ = x;      /* "begin offset" */
1706 			*tl++ = x;      /* "current offset" */
1707 			x = txdr_unsigned(len);
1708 			*tl++ = x;      /* total to this offset */
1709 			*tl = x;        /* size of this write */
1710 
1711 		}
1712 		nfsm_uiombuf(nd, uiop, len);
1713 		/*
1714 		 * Although it is tempting to do a normal Getattr Op in the
1715 		 * NFSv4 compound, the result can be a nearly hung client
1716 		 * system if the Getattr asks for Owner and/or OwnerGroup.
1717 		 * It occurs when the client can't map either the Owner or
1718 		 * Owner_group name in the Getattr reply to a uid/gid. When
1719 		 * there is a cache miss, the kernel does an upcall to the
1720 		 * nfsuserd. Then, it can try and read the local /etc/passwd
1721 		 * or /etc/group file. It can then block in getnewbuf(),
1722 		 * waiting for dirty writes to be pushed to the NFS server.
1723 		 * The only reason this doesn't result in a complete
1724 		 * deadlock, is that the upcall times out and allows
1725 		 * the write to complete. However, progress is so slow
1726 		 * that it might just as well be deadlocked.
1727 		 * As such, we get the rest of the attributes, but not
1728 		 * Owner or Owner_group.
1729 		 * nb: nfscl_loadattrcache() needs to be told that these
1730 		 *     partial attributes from a write rpc are being
1731 		 *     passed in, via a argument flag.
1732 		 */
1733 		if (nd->nd_flag & ND_NFSV4) {
1734 			NFSWRITEGETATTR_ATTRBIT(&attrbits);
1735 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1736 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
1737 			(void) nfsrv_putattrbit(nd, &attrbits);
1738 		}
1739 		error = nfscl_request(nd, vp, p, cred, stuff);
1740 		if (error)
1741 			return (error);
1742 		if (nd->nd_repstat) {
1743 			/*
1744 			 * In case the rpc gets retried, roll
1745 			 * the uio fileds changed by nfsm_uiombuf()
1746 			 * back.
1747 			 */
1748 			uiop->uio_offset -= len;
1749 			uio_uio_resid_add(uiop, len);
1750 			uio_iov_base_add(uiop, -len);
1751 			uio_iov_len_add(uiop, len);
1752 		}
1753 		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1754 			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1755 			    &wccflag, stuff);
1756 			if (error)
1757 				goto nfsmout;
1758 		}
1759 		if (!nd->nd_repstat) {
1760 			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1761 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1762 					+ NFSX_VERF);
1763 				rlen = fxdr_unsigned(int, *tl++);
1764 				if (rlen == 0) {
1765 					error = NFSERR_IO;
1766 					goto nfsmout;
1767 				} else if (rlen < len) {
1768 					backup = len - rlen;
1769 					uio_iov_base_add(uiop, -(backup));
1770 					uio_iov_len_add(uiop, backup);
1771 					uiop->uio_offset -= backup;
1772 					uio_uio_resid_add(uiop, backup);
1773 					len = rlen;
1774 				}
1775 				commit = fxdr_unsigned(int, *tl++);
1776 
1777 				/*
1778 				 * Return the lowest commitment level
1779 				 * obtained by any of the RPCs.
1780 				 */
1781 				if (committed == NFSWRITE_FILESYNC)
1782 					committed = commit;
1783 				else if (committed == NFSWRITE_DATASYNC &&
1784 					commit == NFSWRITE_UNSTABLE)
1785 					committed = commit;
1786 				NFSLOCKMNT(nmp);
1787 				if (!NFSHASWRITEVERF(nmp)) {
1788 					NFSBCOPY((caddr_t)tl,
1789 					    (caddr_t)&nmp->nm_verf[0],
1790 					    NFSX_VERF);
1791 					NFSSETWRITEVERF(nmp);
1792 	    			} else if (NFSBCMP(tl, nmp->nm_verf,
1793 				    NFSX_VERF)) {
1794 					*must_commit = 1;
1795 					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1796 				}
1797 				NFSUNLOCKMNT(nmp);
1798 			}
1799 			if (nd->nd_flag & ND_NFSV4)
1800 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1801 			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1802 				error = nfsm_loadattr(nd, nap);
1803 				if (!error)
1804 					*attrflagp = NFS_LATTR_NOSHRINK;
1805 			}
1806 		} else {
1807 			error = nd->nd_repstat;
1808 		}
1809 		if (error)
1810 			goto nfsmout;
1811 		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
1812 		mbuf_freem(nd->nd_mrep);
1813 		nd->nd_mrep = NULL;
1814 		tsiz -= len;
1815 	}
1816 nfsmout:
1817 	if (nd->nd_mrep != NULL)
1818 		mbuf_freem(nd->nd_mrep);
1819 	*iomode = committed;
1820 	if (nd->nd_repstat && !error)
1821 		error = nd->nd_repstat;
1822 	return (error);
1823 }
1824 
1825 /*
1826  * nfs mknod rpc
1827  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1828  * mode set to specify the file type and the size field for rdev.
1829  */
1830 APPLESTATIC int
1831 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1832     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1833     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1834     int *attrflagp, int *dattrflagp, void *dstuff)
1835 {
1836 	u_int32_t *tl;
1837 	int error = 0;
1838 	struct nfsrv_descript nfsd, *nd = &nfsd;
1839 	nfsattrbit_t attrbits;
1840 
1841 	*nfhpp = NULL;
1842 	*attrflagp = 0;
1843 	*dattrflagp = 0;
1844 	if (namelen > NFS_MAXNAMLEN)
1845 		return (ENAMETOOLONG);
1846 	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1847 	if (nd->nd_flag & ND_NFSV4) {
1848 		if (vtyp == VBLK || vtyp == VCHR) {
1849 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1850 			*tl++ = vtonfsv34_type(vtyp);
1851 			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
1852 			*tl = txdr_unsigned(NFSMINOR(rdev));
1853 		} else {
1854 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1855 			*tl = vtonfsv34_type(vtyp);
1856 		}
1857 	}
1858 	(void) nfsm_strtom(nd, name, namelen);
1859 	if (nd->nd_flag & ND_NFSV3) {
1860 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1861 		*tl = vtonfsv34_type(vtyp);
1862 	}
1863 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1864 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
1865 	if ((nd->nd_flag & ND_NFSV3) &&
1866 	    (vtyp == VCHR || vtyp == VBLK)) {
1867 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1868 		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
1869 		*tl = txdr_unsigned(NFSMINOR(rdev));
1870 	}
1871 	if (nd->nd_flag & ND_NFSV4) {
1872 		NFSGETATTR_ATTRBIT(&attrbits);
1873 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1874 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1875 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1876 		(void) nfsrv_putattrbit(nd, &attrbits);
1877 	}
1878 	if (nd->nd_flag & ND_NFSV2)
1879 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
1880 	error = nfscl_request(nd, dvp, p, cred, dstuff);
1881 	if (error)
1882 		return (error);
1883 	if (nd->nd_flag & ND_NFSV4)
1884 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1885 	if (!nd->nd_repstat) {
1886 		if (nd->nd_flag & ND_NFSV4) {
1887 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1888 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1889 			if (error)
1890 				goto nfsmout;
1891 		}
1892 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
1893 		if (error)
1894 			goto nfsmout;
1895 	}
1896 	if (nd->nd_flag & ND_NFSV3)
1897 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1898 	if (!error && nd->nd_repstat)
1899 		error = nd->nd_repstat;
1900 nfsmout:
1901 	mbuf_freem(nd->nd_mrep);
1902 	return (error);
1903 }
1904 
1905 /*
1906  * nfs file create call
1907  * Mostly just call the approriate routine. (I separated out v4, so that
1908  * error recovery wouldn't be as difficult.)
1909  */
1910 APPLESTATIC int
1911 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1912     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1913     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1914     int *attrflagp, int *dattrflagp, void *dstuff)
1915 {
1916 	int error = 0, newone, expireret = 0, retrycnt, unlocked;
1917 	struct nfsclowner *owp;
1918 	struct nfscldeleg *dp;
1919 	struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp));
1920 	u_int32_t clidrev;
1921 
1922 	if (NFSHASNFSV4(nmp)) {
1923 	    retrycnt = 0;
1924 	    do {
1925 		dp = NULL;
1926 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
1927 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
1928 		    NULL, 1);
1929 		if (error)
1930 			return (error);
1931 		if (nmp->nm_clp != NULL)
1932 			clidrev = nmp->nm_clp->nfsc_clientidrev;
1933 		else
1934 			clidrev = 0;
1935 		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
1936 		    nfs_numnfscbd == 0 || retrycnt > 0)
1937 			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
1938 			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1939 			  attrflagp, dattrflagp, dstuff, &unlocked);
1940 		else
1941 			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
1942 			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1943 			  attrflagp, dattrflagp, dstuff, &unlocked);
1944 		/*
1945 		 * There is no need to invalidate cached attributes here,
1946 		 * since new post-delegation issue attributes are always
1947 		 * returned by nfsrpc_createv4() and these will update the
1948 		 * attribute cache.
1949 		 */
1950 		if (dp != NULL)
1951 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
1952 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
1953 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
1954 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
1955 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1956 		    error == NFSERR_BADSESSION) {
1957 			(void) nfs_catnap(PZERO, error, "nfs_open");
1958 		} else if ((error == NFSERR_EXPIRED ||
1959 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1960 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1961 			retrycnt++;
1962 		}
1963 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
1964 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1965 		error == NFSERR_BADSESSION ||
1966 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1967 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
1968 	    if (error && retrycnt >= 4)
1969 		    error = EIO;
1970 	} else {
1971 		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
1972 		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
1973 		    dstuff);
1974 	}
1975 	return (error);
1976 }
1977 
1978 /*
1979  * The create rpc for v2 and 3.
1980  */
1981 static int
1982 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1983     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1984     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1985     int *attrflagp, int *dattrflagp, void *dstuff)
1986 {
1987 	u_int32_t *tl;
1988 	int error = 0;
1989 	struct nfsrv_descript nfsd, *nd = &nfsd;
1990 
1991 	*nfhpp = NULL;
1992 	*attrflagp = 0;
1993 	*dattrflagp = 0;
1994 	if (namelen > NFS_MAXNAMLEN)
1995 		return (ENAMETOOLONG);
1996 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
1997 	(void) nfsm_strtom(nd, name, namelen);
1998 	if (nd->nd_flag & ND_NFSV3) {
1999 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2000 		if (fmode & O_EXCL) {
2001 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2002 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2003 			*tl++ = cverf.lval[0];
2004 			*tl = cverf.lval[1];
2005 		} else {
2006 			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2007 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
2008 		}
2009 	} else {
2010 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2011 	}
2012 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2013 	if (error)
2014 		return (error);
2015 	if (nd->nd_repstat == 0) {
2016 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2017 		if (error)
2018 			goto nfsmout;
2019 	}
2020 	if (nd->nd_flag & ND_NFSV3)
2021 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2022 	if (nd->nd_repstat != 0 && error == 0)
2023 		error = nd->nd_repstat;
2024 nfsmout:
2025 	mbuf_freem(nd->nd_mrep);
2026 	return (error);
2027 }
2028 
2029 static int
2030 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2031     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2032     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2033     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2034     int *dattrflagp, void *dstuff, int *unlockedp)
2035 {
2036 	u_int32_t *tl;
2037 	int error = 0, deleg, newone, ret, acesize, limitby;
2038 	struct nfsrv_descript nfsd, *nd = &nfsd;
2039 	struct nfsclopen *op;
2040 	struct nfscldeleg *dp = NULL;
2041 	struct nfsnode *np;
2042 	struct nfsfh *nfhp;
2043 	nfsattrbit_t attrbits;
2044 	nfsv4stateid_t stateid;
2045 	u_int32_t rflags;
2046 	struct nfsmount *nmp;
2047 	struct nfsclsession *tsep;
2048 
2049 	nmp = VFSTONFS(dvp->v_mount);
2050 	np = VTONFS(dvp);
2051 	*unlockedp = 0;
2052 	*nfhpp = NULL;
2053 	*dpp = NULL;
2054 	*attrflagp = 0;
2055 	*dattrflagp = 0;
2056 	if (namelen > NFS_MAXNAMLEN)
2057 		return (ENAMETOOLONG);
2058 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2059 	/*
2060 	 * For V4, this is actually an Open op.
2061 	 */
2062 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2063 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
2064 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2065 	    NFSV4OPEN_ACCESSREAD);
2066 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2067 	tsep = nfsmnt_mdssession(nmp);
2068 	*tl++ = tsep->nfsess_clientid.lval[0];
2069 	*tl = tsep->nfsess_clientid.lval[1];
2070 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2071 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2072 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2073 	if (fmode & O_EXCL) {
2074 		if (NFSHASNFSV4N(nmp)) {
2075 			if (NFSHASSESSPERSIST(nmp)) {
2076 				/* Use GUARDED for persistent sessions. */
2077 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
2078 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2079 			} else {
2080 				/* Otherwise, use EXCLUSIVE4_1. */
2081 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2082 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2083 				*tl++ = cverf.lval[0];
2084 				*tl = cverf.lval[1];
2085 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2086 			}
2087 		} else {
2088 			/* NFSv4.0 */
2089 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2090 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2091 			*tl++ = cverf.lval[0];
2092 			*tl = cverf.lval[1];
2093 		}
2094 	} else {
2095 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2096 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2097 	}
2098 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2099 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2100 	(void) nfsm_strtom(nd, name, namelen);
2101 	/* Get the new file's handle and attributes. */
2102 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2103 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2104 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2105 	NFSGETATTR_ATTRBIT(&attrbits);
2106 	(void) nfsrv_putattrbit(nd, &attrbits);
2107 	/* Get the directory's post-op attributes. */
2108 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2109 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2110 	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2111 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2112 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2113 	(void) nfsrv_putattrbit(nd, &attrbits);
2114 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2115 	if (error)
2116 		return (error);
2117 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2118 	if (nd->nd_repstat == 0) {
2119 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2120 		    6 * NFSX_UNSIGNED);
2121 		stateid.seqid = *tl++;
2122 		stateid.other[0] = *tl++;
2123 		stateid.other[1] = *tl++;
2124 		stateid.other[2] = *tl;
2125 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2126 		(void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2127 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2128 		deleg = fxdr_unsigned(int, *tl);
2129 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2130 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2131 			if (!(owp->nfsow_clp->nfsc_flags &
2132 			      NFSCLFLAGS_FIRSTDELEG))
2133 				owp->nfsow_clp->nfsc_flags |=
2134 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2135 			MALLOC(dp, struct nfscldeleg *,
2136 			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2137 			    M_NFSCLDELEG, M_WAITOK);
2138 			LIST_INIT(&dp->nfsdl_owner);
2139 			LIST_INIT(&dp->nfsdl_lock);
2140 			dp->nfsdl_clp = owp->nfsow_clp;
2141 			newnfs_copyincred(cred, &dp->nfsdl_cred);
2142 			nfscl_lockinit(&dp->nfsdl_rwlock);
2143 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2144 			    NFSX_UNSIGNED);
2145 			dp->nfsdl_stateid.seqid = *tl++;
2146 			dp->nfsdl_stateid.other[0] = *tl++;
2147 			dp->nfsdl_stateid.other[1] = *tl++;
2148 			dp->nfsdl_stateid.other[2] = *tl++;
2149 			ret = fxdr_unsigned(int, *tl);
2150 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2151 				dp->nfsdl_flags = NFSCLDL_WRITE;
2152 				/*
2153 				 * Indicates how much the file can grow.
2154 				 */
2155 				NFSM_DISSECT(tl, u_int32_t *,
2156 				    3 * NFSX_UNSIGNED);
2157 				limitby = fxdr_unsigned(int, *tl++);
2158 				switch (limitby) {
2159 				case NFSV4OPEN_LIMITSIZE:
2160 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2161 					break;
2162 				case NFSV4OPEN_LIMITBLOCKS:
2163 					dp->nfsdl_sizelimit =
2164 					    fxdr_unsigned(u_int64_t, *tl++);
2165 					dp->nfsdl_sizelimit *=
2166 					    fxdr_unsigned(u_int64_t, *tl);
2167 					break;
2168 				default:
2169 					error = NFSERR_BADXDR;
2170 					goto nfsmout;
2171 				}
2172 			} else {
2173 				dp->nfsdl_flags = NFSCLDL_READ;
2174 			}
2175 			if (ret)
2176 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2177 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2178 			    &acesize, p);
2179 			if (error)
2180 				goto nfsmout;
2181 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2182 			error = NFSERR_BADXDR;
2183 			goto nfsmout;
2184 		}
2185 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2186 		if (error)
2187 			goto nfsmout;
2188 		/* Get rid of the PutFH and Getattr status values. */
2189 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2190 		/* Load the directory attributes. */
2191 		error = nfsm_loadattr(nd, dnap);
2192 		if (error)
2193 			goto nfsmout;
2194 		*dattrflagp = 1;
2195 		if (dp != NULL && *attrflagp) {
2196 			dp->nfsdl_change = nnap->na_filerev;
2197 			dp->nfsdl_modtime = nnap->na_mtime;
2198 			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2199 		}
2200 		/*
2201 		 * We can now complete the Open state.
2202 		 */
2203 		nfhp = *nfhpp;
2204 		if (dp != NULL) {
2205 			dp->nfsdl_fhlen = nfhp->nfh_len;
2206 			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2207 		}
2208 		/*
2209 		 * Get an Open structure that will be
2210 		 * attached to the OpenOwner, acquired already.
2211 		 */
2212 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2213 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2214 		    cred, p, NULL, &op, &newone, NULL, 0);
2215 		if (error)
2216 			goto nfsmout;
2217 		op->nfso_stateid = stateid;
2218 		newnfs_copyincred(cred, &op->nfso_cred);
2219 		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2220 		    do {
2221 			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2222 			    nfhp->nfh_len, op, cred, p);
2223 			if (ret == NFSERR_DELAY)
2224 			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2225 		    } while (ret == NFSERR_DELAY);
2226 		    error = ret;
2227 		}
2228 
2229 		/*
2230 		 * If the server is handing out delegations, but we didn't
2231 		 * get one because an OpenConfirm was required, try the
2232 		 * Open again, to get a delegation. This is a harmless no-op,
2233 		 * from a server's point of view.
2234 		 */
2235 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2236 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2237 		    !error && dp == NULL) {
2238 		    do {
2239 			ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
2240 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2241 			    nfhp->nfh_fh, nfhp->nfh_len,
2242 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2243 			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2244 			if (ret == NFSERR_DELAY)
2245 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2246 		    } while (ret == NFSERR_DELAY);
2247 		    if (ret) {
2248 			if (dp != NULL) {
2249 				FREE((caddr_t)dp, M_NFSCLDELEG);
2250 				dp = NULL;
2251 			}
2252 			if (ret == NFSERR_STALECLIENTID ||
2253 			    ret == NFSERR_STALEDONTRECOVER ||
2254 			    ret == NFSERR_BADSESSION)
2255 				error = ret;
2256 		    }
2257 		}
2258 		nfscl_openrelease(nmp, op, error, newone);
2259 		*unlockedp = 1;
2260 	}
2261 	if (nd->nd_repstat != 0 && error == 0)
2262 		error = nd->nd_repstat;
2263 	if (error == NFSERR_STALECLIENTID)
2264 		nfscl_initiate_recovery(owp->nfsow_clp);
2265 nfsmout:
2266 	if (!error)
2267 		*dpp = dp;
2268 	else if (dp != NULL)
2269 		FREE((caddr_t)dp, M_NFSCLDELEG);
2270 	mbuf_freem(nd->nd_mrep);
2271 	return (error);
2272 }
2273 
2274 /*
2275  * Nfs remove rpc
2276  */
2277 APPLESTATIC int
2278 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2279     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2280     void *dstuff)
2281 {
2282 	u_int32_t *tl;
2283 	struct nfsrv_descript nfsd, *nd = &nfsd;
2284 	struct nfsnode *np;
2285 	struct nfsmount *nmp;
2286 	nfsv4stateid_t dstateid;
2287 	int error, ret = 0, i;
2288 
2289 	*dattrflagp = 0;
2290 	if (namelen > NFS_MAXNAMLEN)
2291 		return (ENAMETOOLONG);
2292 	nmp = VFSTONFS(vnode_mount(dvp));
2293 tryagain:
2294 	if (NFSHASNFSV4(nmp) && ret == 0) {
2295 		ret = nfscl_removedeleg(vp, p, &dstateid);
2296 		if (ret == 1) {
2297 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2298 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2299 			    NFSX_UNSIGNED);
2300 			if (NFSHASNFSV4N(nmp))
2301 				*tl++ = 0;
2302 			else
2303 				*tl++ = dstateid.seqid;
2304 			*tl++ = dstateid.other[0];
2305 			*tl++ = dstateid.other[1];
2306 			*tl++ = dstateid.other[2];
2307 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2308 			np = VTONFS(dvp);
2309 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2310 			    np->n_fhp->nfh_len, 0);
2311 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2312 			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2313 		}
2314 	} else {
2315 		ret = 0;
2316 	}
2317 	if (ret == 0)
2318 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2319 	(void) nfsm_strtom(nd, name, namelen);
2320 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2321 	if (error)
2322 		return (error);
2323 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2324 		/* For NFSv4, parse out any Delereturn replies. */
2325 		if (ret > 0 && nd->nd_repstat != 0 &&
2326 		    (nd->nd_flag & ND_NOMOREDATA)) {
2327 			/*
2328 			 * If the Delegreturn failed, try again without
2329 			 * it. The server will Recall, as required.
2330 			 */
2331 			mbuf_freem(nd->nd_mrep);
2332 			goto tryagain;
2333 		}
2334 		for (i = 0; i < (ret * 2); i++) {
2335 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2336 			    ND_NFSV4) {
2337 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2338 			    if (*(tl + 1))
2339 				nd->nd_flag |= ND_NOMOREDATA;
2340 			}
2341 		}
2342 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2343 	}
2344 	if (nd->nd_repstat && !error)
2345 		error = nd->nd_repstat;
2346 nfsmout:
2347 	mbuf_freem(nd->nd_mrep);
2348 	return (error);
2349 }
2350 
2351 /*
2352  * Do an nfs rename rpc.
2353  */
2354 APPLESTATIC int
2355 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2356     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2357     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2358     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2359 {
2360 	u_int32_t *tl;
2361 	struct nfsrv_descript nfsd, *nd = &nfsd;
2362 	struct nfsmount *nmp;
2363 	struct nfsnode *np;
2364 	nfsattrbit_t attrbits;
2365 	nfsv4stateid_t fdstateid, tdstateid;
2366 	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2367 
2368 	*fattrflagp = 0;
2369 	*tattrflagp = 0;
2370 	nmp = VFSTONFS(vnode_mount(fdvp));
2371 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2372 		return (ENAMETOOLONG);
2373 tryagain:
2374 	if (NFSHASNFSV4(nmp) && ret == 0) {
2375 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2376 		    &tdstateid, &gottd, p);
2377 		if (gotfd && gottd) {
2378 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2379 		} else if (gotfd) {
2380 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2381 		} else if (gottd) {
2382 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2383 		}
2384 		if (gotfd) {
2385 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2386 			if (NFSHASNFSV4N(nmp))
2387 				*tl++ = 0;
2388 			else
2389 				*tl++ = fdstateid.seqid;
2390 			*tl++ = fdstateid.other[0];
2391 			*tl++ = fdstateid.other[1];
2392 			*tl = fdstateid.other[2];
2393 			if (gottd) {
2394 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2395 				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2396 				np = VTONFS(tvp);
2397 				(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2398 				    np->n_fhp->nfh_len, 0);
2399 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2400 				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2401 			}
2402 		}
2403 		if (gottd) {
2404 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2405 			if (NFSHASNFSV4N(nmp))
2406 				*tl++ = 0;
2407 			else
2408 				*tl++ = tdstateid.seqid;
2409 			*tl++ = tdstateid.other[0];
2410 			*tl++ = tdstateid.other[1];
2411 			*tl = tdstateid.other[2];
2412 		}
2413 		if (ret > 0) {
2414 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2415 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2416 			np = VTONFS(fdvp);
2417 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2418 			    np->n_fhp->nfh_len, 0);
2419 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2420 			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2421 		}
2422 	} else {
2423 		ret = 0;
2424 	}
2425 	if (ret == 0)
2426 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2427 	if (nd->nd_flag & ND_NFSV4) {
2428 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2429 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2430 		NFSWCCATTR_ATTRBIT(&attrbits);
2431 		(void) nfsrv_putattrbit(nd, &attrbits);
2432 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2433 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2434 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2435 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2436 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2437 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2438 		(void) nfsrv_putattrbit(nd, &attrbits);
2439 		nd->nd_flag |= ND_V4WCCATTR;
2440 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2441 		*tl = txdr_unsigned(NFSV4OP_RENAME);
2442 	}
2443 	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2444 	if (!(nd->nd_flag & ND_NFSV4))
2445 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2446 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2447 	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2448 	error = nfscl_request(nd, fdvp, p, cred, fstuff);
2449 	if (error)
2450 		return (error);
2451 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2452 		/* For NFSv4, parse out any Delereturn replies. */
2453 		if (ret > 0 && nd->nd_repstat != 0 &&
2454 		    (nd->nd_flag & ND_NOMOREDATA)) {
2455 			/*
2456 			 * If the Delegreturn failed, try again without
2457 			 * it. The server will Recall, as required.
2458 			 */
2459 			mbuf_freem(nd->nd_mrep);
2460 			goto tryagain;
2461 		}
2462 		for (i = 0; i < (ret * 2); i++) {
2463 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2464 			    ND_NFSV4) {
2465 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2466 			    if (*(tl + 1)) {
2467 				if (i == 0 && ret > 1) {
2468 				    /*
2469 				     * If the Delegreturn failed, try again
2470 				     * without it. The server will Recall, as
2471 				     * required.
2472 				     * If ret > 1, the first iteration of this
2473 				     * loop is the second DelegReturn result.
2474 				     */
2475 				    mbuf_freem(nd->nd_mrep);
2476 				    goto tryagain;
2477 				} else {
2478 				    nd->nd_flag |= ND_NOMOREDATA;
2479 				}
2480 			    }
2481 			}
2482 		}
2483 		/* Now, the first wcc attribute reply. */
2484 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2485 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2486 			if (*(tl + 1))
2487 				nd->nd_flag |= ND_NOMOREDATA;
2488 		}
2489 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2490 		    fstuff);
2491 		/* and the second wcc attribute reply. */
2492 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2493 		    !error) {
2494 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2495 			if (*(tl + 1))
2496 				nd->nd_flag |= ND_NOMOREDATA;
2497 		}
2498 		if (!error)
2499 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2500 			    NULL, tstuff);
2501 	}
2502 	if (nd->nd_repstat && !error)
2503 		error = nd->nd_repstat;
2504 nfsmout:
2505 	mbuf_freem(nd->nd_mrep);
2506 	return (error);
2507 }
2508 
2509 /*
2510  * nfs hard link create rpc
2511  */
2512 APPLESTATIC int
2513 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2514     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2515     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2516 {
2517 	u_int32_t *tl;
2518 	struct nfsrv_descript nfsd, *nd = &nfsd;
2519 	nfsattrbit_t attrbits;
2520 	int error = 0;
2521 
2522 	*attrflagp = 0;
2523 	*dattrflagp = 0;
2524 	if (namelen > NFS_MAXNAMLEN)
2525 		return (ENAMETOOLONG);
2526 	NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2527 	if (nd->nd_flag & ND_NFSV4) {
2528 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2529 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2530 	}
2531 	(void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2532 		VTONFS(dvp)->n_fhp->nfh_len, 0);
2533 	if (nd->nd_flag & ND_NFSV4) {
2534 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2535 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2536 		NFSWCCATTR_ATTRBIT(&attrbits);
2537 		(void) nfsrv_putattrbit(nd, &attrbits);
2538 		nd->nd_flag |= ND_V4WCCATTR;
2539 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2540 		*tl = txdr_unsigned(NFSV4OP_LINK);
2541 	}
2542 	(void) nfsm_strtom(nd, name, namelen);
2543 	error = nfscl_request(nd, vp, p, cred, dstuff);
2544 	if (error)
2545 		return (error);
2546 	if (nd->nd_flag & ND_NFSV3) {
2547 		error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2548 		if (!error)
2549 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2550 			    NULL, dstuff);
2551 	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2552 		/*
2553 		 * First, parse out the PutFH and Getattr result.
2554 		 */
2555 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2556 		if (!(*(tl + 1)))
2557 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2558 		if (*(tl + 1))
2559 			nd->nd_flag |= ND_NOMOREDATA;
2560 		/*
2561 		 * Get the pre-op attributes.
2562 		 */
2563 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2564 	}
2565 	if (nd->nd_repstat && !error)
2566 		error = nd->nd_repstat;
2567 nfsmout:
2568 	mbuf_freem(nd->nd_mrep);
2569 	return (error);
2570 }
2571 
2572 /*
2573  * nfs symbolic link create rpc
2574  */
2575 APPLESTATIC int
2576 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target,
2577     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2578     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2579     int *dattrflagp, void *dstuff)
2580 {
2581 	u_int32_t *tl;
2582 	struct nfsrv_descript nfsd, *nd = &nfsd;
2583 	struct nfsmount *nmp;
2584 	int slen, error = 0;
2585 
2586 	*nfhpp = NULL;
2587 	*attrflagp = 0;
2588 	*dattrflagp = 0;
2589 	nmp = VFSTONFS(vnode_mount(dvp));
2590 	slen = strlen(target);
2591 	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2592 		return (ENAMETOOLONG);
2593 	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2594 	if (nd->nd_flag & ND_NFSV4) {
2595 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2596 		*tl = txdr_unsigned(NFLNK);
2597 		(void) nfsm_strtom(nd, target, slen);
2598 	}
2599 	(void) nfsm_strtom(nd, name, namelen);
2600 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2601 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2602 	if (!(nd->nd_flag & ND_NFSV4))
2603 		(void) nfsm_strtom(nd, target, slen);
2604 	if (nd->nd_flag & ND_NFSV2)
2605 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2606 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2607 	if (error)
2608 		return (error);
2609 	if (nd->nd_flag & ND_NFSV4)
2610 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2611 	if ((nd->nd_flag & ND_NFSV3) && !error) {
2612 		if (!nd->nd_repstat)
2613 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2614 		if (!error)
2615 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2616 			    NULL, dstuff);
2617 	}
2618 	if (nd->nd_repstat && !error)
2619 		error = nd->nd_repstat;
2620 	mbuf_freem(nd->nd_mrep);
2621 	/*
2622 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2623 	 * Only do this if vfs.nfs.ignore_eexist is set.
2624 	 * Never do this for NFSv4.1 or later minor versions, since sessions
2625 	 * should guarantee "exactly once" RPC semantics.
2626 	 */
2627 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2628 	    nmp->nm_minorvers == 0))
2629 		error = 0;
2630 	return (error);
2631 }
2632 
2633 /*
2634  * nfs make dir rpc
2635  */
2636 APPLESTATIC int
2637 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2638     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2639     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2640     int *dattrflagp, void *dstuff)
2641 {
2642 	u_int32_t *tl;
2643 	struct nfsrv_descript nfsd, *nd = &nfsd;
2644 	nfsattrbit_t attrbits;
2645 	int error = 0;
2646 	struct nfsfh *fhp;
2647 	struct nfsmount *nmp;
2648 
2649 	*nfhpp = NULL;
2650 	*attrflagp = 0;
2651 	*dattrflagp = 0;
2652 	nmp = VFSTONFS(vnode_mount(dvp));
2653 	fhp = VTONFS(dvp)->n_fhp;
2654 	if (namelen > NFS_MAXNAMLEN)
2655 		return (ENAMETOOLONG);
2656 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2657 	if (nd->nd_flag & ND_NFSV4) {
2658 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2659 		*tl = txdr_unsigned(NFDIR);
2660 	}
2661 	(void) nfsm_strtom(nd, name, namelen);
2662 	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2663 	if (nd->nd_flag & ND_NFSV4) {
2664 		NFSGETATTR_ATTRBIT(&attrbits);
2665 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2666 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2667 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2668 		(void) nfsrv_putattrbit(nd, &attrbits);
2669 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2670 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2671 		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2672 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2673 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2674 		(void) nfsrv_putattrbit(nd, &attrbits);
2675 	}
2676 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2677 	if (error)
2678 		return (error);
2679 	if (nd->nd_flag & ND_NFSV4)
2680 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2681 	if (!nd->nd_repstat && !error) {
2682 		if (nd->nd_flag & ND_NFSV4) {
2683 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2684 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2685 		}
2686 		if (!error)
2687 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2688 		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2689 			/* Get rid of the PutFH and Getattr status values. */
2690 			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2691 			/* Load the directory attributes. */
2692 			error = nfsm_loadattr(nd, dnap);
2693 			if (error == 0)
2694 				*dattrflagp = 1;
2695 		}
2696 	}
2697 	if ((nd->nd_flag & ND_NFSV3) && !error)
2698 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2699 	if (nd->nd_repstat && !error)
2700 		error = nd->nd_repstat;
2701 nfsmout:
2702 	mbuf_freem(nd->nd_mrep);
2703 	/*
2704 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2705 	 * Only do this if vfs.nfs.ignore_eexist is set.
2706 	 * Never do this for NFSv4.1 or later minor versions, since sessions
2707 	 * should guarantee "exactly once" RPC semantics.
2708 	 */
2709 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2710 	    nmp->nm_minorvers == 0))
2711 		error = 0;
2712 	return (error);
2713 }
2714 
2715 /*
2716  * nfs remove directory call
2717  */
2718 APPLESTATIC int
2719 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2720     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2721 {
2722 	struct nfsrv_descript nfsd, *nd = &nfsd;
2723 	int error = 0;
2724 
2725 	*dattrflagp = 0;
2726 	if (namelen > NFS_MAXNAMLEN)
2727 		return (ENAMETOOLONG);
2728 	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2729 	(void) nfsm_strtom(nd, name, namelen);
2730 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2731 	if (error)
2732 		return (error);
2733 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2734 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2735 	if (nd->nd_repstat && !error)
2736 		error = nd->nd_repstat;
2737 	mbuf_freem(nd->nd_mrep);
2738 	/*
2739 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2740 	 */
2741 	if (error == ENOENT)
2742 		error = 0;
2743 	return (error);
2744 }
2745 
2746 /*
2747  * Readdir rpc.
2748  * Always returns with either uio_resid unchanged, if you are at the
2749  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2750  * filled in.
2751  * I felt this would allow caching of directory blocks more easily
2752  * than returning a pertially filled block.
2753  * Directory offset cookies:
2754  * Oh my, what to do with them...
2755  * I can think of three ways to deal with them:
2756  * 1 - have the layer above these RPCs maintain a map between logical
2757  *     directory byte offsets and the NFS directory offset cookies
2758  * 2 - pass the opaque directory offset cookies up into userland
2759  *     and let the libc functions deal with them, via the system call
2760  * 3 - return them to userland in the "struct dirent", so future versions
2761  *     of libc can use them and do whatever is necessary to make things work
2762  *     above these rpc calls, in the meantime
2763  * For now, I do #3 by "hiding" the directory offset cookies after the
2764  * d_name field in struct dirent. This is space inside d_reclen that
2765  * will be ignored by anything that doesn't know about them.
2766  * The directory offset cookies are filled in as the last 8 bytes of
2767  * each directory entry, after d_name. Someday, the userland libc
2768  * functions may be able to use these. In the meantime, it satisfies
2769  * OpenBSD's requirements for cookies being returned.
2770  * If expects the directory offset cookie for the read to be in uio_offset
2771  * and returns the one for the next entry after this directory block in
2772  * there, as well.
2773  */
2774 APPLESTATIC int
2775 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2776     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2777     int *eofp, void *stuff)
2778 {
2779 	int len, left;
2780 	struct dirent *dp = NULL;
2781 	u_int32_t *tl;
2782 	nfsquad_t cookie, ncookie;
2783 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
2784 	struct nfsnode *dnp = VTONFS(vp);
2785 	struct nfsvattr nfsva;
2786 	struct nfsrv_descript nfsd, *nd = &nfsd;
2787 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2788 	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2789 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
2790 	char *cp;
2791 	nfsattrbit_t attrbits, dattrbits;
2792 	u_int32_t rderr, *tl2 = NULL;
2793 	size_t tresid;
2794 
2795 	KASSERT(uiop->uio_iovcnt == 1 &&
2796 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
2797 	    ("nfs readdirrpc bad uio"));
2798 
2799 	/*
2800 	 * There is no point in reading a lot more than uio_resid, however
2801 	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2802 	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2803 	 * will never make readsize > nm_readdirsize.
2804 	 */
2805 	readsize = nmp->nm_readdirsize;
2806 	if (readsize > uio_uio_resid(uiop))
2807 		readsize = uio_uio_resid(uiop) + DIRBLKSIZ;
2808 
2809 	*attrflagp = 0;
2810 	if (eofp)
2811 		*eofp = 0;
2812 	tresid = uio_uio_resid(uiop);
2813 	cookie.lval[0] = cookiep->nfsuquad[0];
2814 	cookie.lval[1] = cookiep->nfsuquad[1];
2815 	nd->nd_mrep = NULL;
2816 
2817 	/*
2818 	 * For NFSv4, first create the "." and ".." entries.
2819 	 */
2820 	if (NFSHASNFSV4(nmp)) {
2821 		reqsize = 6 * NFSX_UNSIGNED;
2822 		NFSGETATTR_ATTRBIT(&dattrbits);
2823 		NFSZERO_ATTRBIT(&attrbits);
2824 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2825 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2826 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2827 		    NFSATTRBIT_MOUNTEDONFILEID)) {
2828 			NFSSETBIT_ATTRBIT(&attrbits,
2829 			    NFSATTRBIT_MOUNTEDONFILEID);
2830 			gotmnton = 1;
2831 		} else {
2832 			/*
2833 			 * Must fake it. Use the fileno, except when the
2834 			 * fsid is != to that of the directory. For that
2835 			 * case, generate a fake fileno that is not the same.
2836 			 */
2837 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2838 			gotmnton = 0;
2839 		}
2840 
2841 		/*
2842 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2843 		 */
2844 		if (uiop->uio_offset == 0) {
2845 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2846 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2847 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2848 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2849 			(void) nfsrv_putattrbit(nd, &attrbits);
2850 			error = nfscl_request(nd, vp, p, cred, stuff);
2851 			if (error)
2852 			    return (error);
2853 			dotfileid = 0;	/* Fake out the compiler. */
2854 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2855 			    error = nfsm_loadattr(nd, &nfsva);
2856 			    if (error != 0)
2857 				goto nfsmout;
2858 			    dotfileid = nfsva.na_fileid;
2859 			}
2860 			if (nd->nd_repstat == 0) {
2861 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2862 			    len = fxdr_unsigned(int, *(tl + 4));
2863 			    if (len > 0 && len <= NFSX_V4FHMAX)
2864 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2865 			    else
2866 				error = EPERM;
2867 			    if (!error) {
2868 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
2869 				nfsva.na_mntonfileno = UINT64_MAX;
2870 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
2871 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
2872 				    NULL, NULL, NULL, p, cred);
2873 				if (error) {
2874 				    dotdotfileid = dotfileid;
2875 				} else if (gotmnton) {
2876 				    if (nfsva.na_mntonfileno != UINT64_MAX)
2877 					dotdotfileid = nfsva.na_mntonfileno;
2878 				    else
2879 					dotdotfileid = nfsva.na_fileid;
2880 				} else if (nfsva.na_filesid[0] ==
2881 				    dnp->n_vattr.na_filesid[0] &&
2882 				    nfsva.na_filesid[1] ==
2883 				    dnp->n_vattr.na_filesid[1]) {
2884 				    dotdotfileid = nfsva.na_fileid;
2885 				} else {
2886 				    do {
2887 					fakefileno--;
2888 				    } while (fakefileno ==
2889 					nfsva.na_fileid);
2890 				    dotdotfileid = fakefileno;
2891 				}
2892 			    }
2893 			} else if (nd->nd_repstat == NFSERR_NOENT) {
2894 			    /*
2895 			     * Lookupp returns NFSERR_NOENT when we are
2896 			     * at the root, so just use the current dir.
2897 			     */
2898 			    nd->nd_repstat = 0;
2899 			    dotdotfileid = dotfileid;
2900 			} else {
2901 			    error = nd->nd_repstat;
2902 			}
2903 			mbuf_freem(nd->nd_mrep);
2904 			if (error)
2905 			    return (error);
2906 			nd->nd_mrep = NULL;
2907 			dp = (struct dirent *)uio_iov_base(uiop);
2908 			dp->d_off = 0;
2909 			dp->d_type = DT_DIR;
2910 			dp->d_fileno = dotfileid;
2911 			dp->d_namlen = 1;
2912 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
2913 			dp->d_name[0] = '.';
2914 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
2915 			/*
2916 			 * Just make these offset cookie 0.
2917 			 */
2918 			tl = (u_int32_t *)&dp->d_name[8];
2919 			*tl++ = 0;
2920 			*tl = 0;
2921 			blksiz += dp->d_reclen;
2922 			uio_uio_resid_add(uiop, -(dp->d_reclen));
2923 			uiop->uio_offset += dp->d_reclen;
2924 			uio_iov_base_add(uiop, dp->d_reclen);
2925 			uio_iov_len_add(uiop, -(dp->d_reclen));
2926 			dp = (struct dirent *)uio_iov_base(uiop);
2927 			dp->d_off = 0;
2928 			dp->d_type = DT_DIR;
2929 			dp->d_fileno = dotdotfileid;
2930 			dp->d_namlen = 2;
2931 			*((uint64_t *)dp->d_name) = 0;
2932 			dp->d_name[0] = '.';
2933 			dp->d_name[1] = '.';
2934 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
2935 			/*
2936 			 * Just make these offset cookie 0.
2937 			 */
2938 			tl = (u_int32_t *)&dp->d_name[8];
2939 			*tl++ = 0;
2940 			*tl = 0;
2941 			blksiz += dp->d_reclen;
2942 			uio_uio_resid_add(uiop, -(dp->d_reclen));
2943 			uiop->uio_offset += dp->d_reclen;
2944 			uio_iov_base_add(uiop, dp->d_reclen);
2945 			uio_iov_len_add(uiop, -(dp->d_reclen));
2946 		}
2947 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
2948 	} else {
2949 		reqsize = 5 * NFSX_UNSIGNED;
2950 	}
2951 
2952 
2953 	/*
2954 	 * Loop around doing readdir rpc's of size readsize.
2955 	 * The stopping criteria is EOF or buffer full.
2956 	 */
2957 	while (more_dirs && bigenough) {
2958 		*attrflagp = 0;
2959 		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
2960 		if (nd->nd_flag & ND_NFSV2) {
2961 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2962 			*tl++ = cookie.lval[1];
2963 			*tl = txdr_unsigned(readsize);
2964 		} else {
2965 			NFSM_BUILD(tl, u_int32_t *, reqsize);
2966 			*tl++ = cookie.lval[0];
2967 			*tl++ = cookie.lval[1];
2968 			if (cookie.qval == 0) {
2969 				*tl++ = 0;
2970 				*tl++ = 0;
2971 			} else {
2972 				NFSLOCKNODE(dnp);
2973 				*tl++ = dnp->n_cookieverf.nfsuquad[0];
2974 				*tl++ = dnp->n_cookieverf.nfsuquad[1];
2975 				NFSUNLOCKNODE(dnp);
2976 			}
2977 			if (nd->nd_flag & ND_NFSV4) {
2978 				*tl++ = txdr_unsigned(readsize);
2979 				*tl = txdr_unsigned(readsize);
2980 				(void) nfsrv_putattrbit(nd, &attrbits);
2981 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2982 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
2983 				(void) nfsrv_putattrbit(nd, &dattrbits);
2984 			} else {
2985 				*tl = txdr_unsigned(readsize);
2986 			}
2987 		}
2988 		error = nfscl_request(nd, vp, p, cred, stuff);
2989 		if (error)
2990 			return (error);
2991 		if (!(nd->nd_flag & ND_NFSV2)) {
2992 			if (nd->nd_flag & ND_NFSV3)
2993 				error = nfscl_postop_attr(nd, nap, attrflagp,
2994 				    stuff);
2995 			if (!nd->nd_repstat && !error) {
2996 				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2997 				NFSLOCKNODE(dnp);
2998 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
2999 				dnp->n_cookieverf.nfsuquad[1] = *tl;
3000 				NFSUNLOCKNODE(dnp);
3001 			}
3002 		}
3003 		if (nd->nd_repstat || error) {
3004 			if (!error)
3005 				error = nd->nd_repstat;
3006 			goto nfsmout;
3007 		}
3008 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3009 		more_dirs = fxdr_unsigned(int, *tl);
3010 		if (!more_dirs)
3011 			tryformoredirs = 0;
3012 
3013 		/* loop through the dir entries, doctoring them to 4bsd form */
3014 		while (more_dirs && bigenough) {
3015 			if (nd->nd_flag & ND_NFSV4) {
3016 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3017 				ncookie.lval[0] = *tl++;
3018 				ncookie.lval[1] = *tl++;
3019 				len = fxdr_unsigned(int, *tl);
3020 			} else if (nd->nd_flag & ND_NFSV3) {
3021 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3022 				nfsva.na_fileid = fxdr_hyper(tl);
3023 				tl += 2;
3024 				len = fxdr_unsigned(int, *tl);
3025 			} else {
3026 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3027 				nfsva.na_fileid = fxdr_unsigned(uint64_t,
3028 				    *tl++);
3029 				len = fxdr_unsigned(int, *tl);
3030 			}
3031 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3032 				error = EBADRPC;
3033 				goto nfsmout;
3034 			}
3035 			tlen = roundup2(len, 8);
3036 			if (tlen == len)
3037 				tlen += 8;  /* To ensure null termination. */
3038 			left = DIRBLKSIZ - blksiz;
3039 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3040 				dp->d_reclen += left;
3041 				uio_iov_base_add(uiop, left);
3042 				uio_iov_len_add(uiop, -(left));
3043 				uio_uio_resid_add(uiop, -(left));
3044 				uiop->uio_offset += left;
3045 				blksiz = 0;
3046 			}
3047 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3048 			    uio_uio_resid(uiop))
3049 				bigenough = 0;
3050 			if (bigenough) {
3051 				dp = (struct dirent *)uio_iov_base(uiop);
3052 				dp->d_off = 0;
3053 				dp->d_namlen = len;
3054 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3055 				    NFSX_HYPER;
3056 				dp->d_type = DT_UNKNOWN;
3057 				blksiz += dp->d_reclen;
3058 				if (blksiz == DIRBLKSIZ)
3059 					blksiz = 0;
3060 				uio_uio_resid_add(uiop, -(DIRHDSIZ));
3061 				uiop->uio_offset += DIRHDSIZ;
3062 				uio_iov_base_add(uiop, DIRHDSIZ);
3063 				uio_iov_len_add(uiop, -(DIRHDSIZ));
3064 				error = nfsm_mbufuio(nd, uiop, len);
3065 				if (error)
3066 					goto nfsmout;
3067 				cp = uio_iov_base(uiop);
3068 				tlen -= len;
3069 				*cp = '\0';	/* null terminate */
3070 				cp += tlen;	/* points to cookie storage */
3071 				tl2 = (u_int32_t *)cp;
3072 				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3073 				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3074 				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3075 				uiop->uio_offset += (tlen + NFSX_HYPER);
3076 			} else {
3077 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3078 				if (error)
3079 					goto nfsmout;
3080 			}
3081 			if (nd->nd_flag & ND_NFSV4) {
3082 				rderr = 0;
3083 				nfsva.na_mntonfileno = UINT64_MAX;
3084 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3085 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3086 				    NULL, NULL, &rderr, p, cred);
3087 				if (error)
3088 					goto nfsmout;
3089 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3090 			} else if (nd->nd_flag & ND_NFSV3) {
3091 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3092 				ncookie.lval[0] = *tl++;
3093 				ncookie.lval[1] = *tl++;
3094 			} else {
3095 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3096 				ncookie.lval[0] = 0;
3097 				ncookie.lval[1] = *tl++;
3098 			}
3099 			if (bigenough) {
3100 			    if (nd->nd_flag & ND_NFSV4) {
3101 				if (rderr) {
3102 				    dp->d_fileno = 0;
3103 				} else {
3104 				    if (gotmnton) {
3105 					if (nfsva.na_mntonfileno != UINT64_MAX)
3106 					    dp->d_fileno = nfsva.na_mntonfileno;
3107 					else
3108 					    dp->d_fileno = nfsva.na_fileid;
3109 				    } else if (nfsva.na_filesid[0] ==
3110 					dnp->n_vattr.na_filesid[0] &&
3111 					nfsva.na_filesid[1] ==
3112 					dnp->n_vattr.na_filesid[1]) {
3113 					dp->d_fileno = nfsva.na_fileid;
3114 				    } else {
3115 					do {
3116 					    fakefileno--;
3117 					} while (fakefileno ==
3118 					    nfsva.na_fileid);
3119 					dp->d_fileno = fakefileno;
3120 				    }
3121 				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3122 				}
3123 			    } else {
3124 				dp->d_fileno = nfsva.na_fileid;
3125 			    }
3126 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3127 				ncookie.lval[0];
3128 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3129 				ncookie.lval[1];
3130 			}
3131 			more_dirs = fxdr_unsigned(int, *tl);
3132 		}
3133 		/*
3134 		 * If at end of rpc data, get the eof boolean
3135 		 */
3136 		if (!more_dirs) {
3137 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3138 			eof = fxdr_unsigned(int, *tl);
3139 			if (tryformoredirs)
3140 				more_dirs = !eof;
3141 			if (nd->nd_flag & ND_NFSV4) {
3142 				error = nfscl_postop_attr(nd, nap, attrflagp,
3143 				    stuff);
3144 				if (error)
3145 					goto nfsmout;
3146 			}
3147 		}
3148 		mbuf_freem(nd->nd_mrep);
3149 		nd->nd_mrep = NULL;
3150 	}
3151 	/*
3152 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3153 	 * by increasing d_reclen for the last record.
3154 	 */
3155 	if (blksiz > 0) {
3156 		left = DIRBLKSIZ - blksiz;
3157 		dp->d_reclen += left;
3158 		uio_iov_base_add(uiop, left);
3159 		uio_iov_len_add(uiop, -(left));
3160 		uio_uio_resid_add(uiop, -(left));
3161 		uiop->uio_offset += left;
3162 	}
3163 
3164 	/*
3165 	 * If returning no data, assume end of file.
3166 	 * If not bigenough, return not end of file, since you aren't
3167 	 *    returning all the data
3168 	 * Otherwise, return the eof flag from the server.
3169 	 */
3170 	if (eofp) {
3171 		if (tresid == ((size_t)(uio_uio_resid(uiop))))
3172 			*eofp = 1;
3173 		else if (!bigenough)
3174 			*eofp = 0;
3175 		else
3176 			*eofp = eof;
3177 	}
3178 
3179 	/*
3180 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3181 	 */
3182 	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3183 		dp = (struct dirent *)uio_iov_base(uiop);
3184 		dp->d_type = DT_UNKNOWN;
3185 		dp->d_fileno = 0;
3186 		dp->d_namlen = 0;
3187 		dp->d_name[0] = '\0';
3188 		tl = (u_int32_t *)&dp->d_name[4];
3189 		*tl++ = cookie.lval[0];
3190 		*tl = cookie.lval[1];
3191 		dp->d_reclen = DIRBLKSIZ;
3192 		uio_iov_base_add(uiop, DIRBLKSIZ);
3193 		uio_iov_len_add(uiop, -(DIRBLKSIZ));
3194 		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3195 		uiop->uio_offset += DIRBLKSIZ;
3196 	}
3197 
3198 nfsmout:
3199 	if (nd->nd_mrep != NULL)
3200 		mbuf_freem(nd->nd_mrep);
3201 	return (error);
3202 }
3203 
3204 #ifndef APPLE
3205 /*
3206  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3207  * (Also used for NFS V4 when mount flag set.)
3208  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3209  */
3210 APPLESTATIC int
3211 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3212     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3213     int *eofp, void *stuff)
3214 {
3215 	int len, left;
3216 	struct dirent *dp = NULL;
3217 	u_int32_t *tl;
3218 	vnode_t newvp = NULLVP;
3219 	struct nfsrv_descript nfsd, *nd = &nfsd;
3220 	struct nameidata nami, *ndp = &nami;
3221 	struct componentname *cnp = &ndp->ni_cnd;
3222 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3223 	struct nfsnode *dnp = VTONFS(vp), *np;
3224 	struct nfsvattr nfsva;
3225 	struct nfsfh *nfhp;
3226 	nfsquad_t cookie, ncookie;
3227 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3228 	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3229 	int isdotdot = 0, unlocknewvp = 0;
3230 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3231 	u_int64_t fileno = 0;
3232 	char *cp;
3233 	nfsattrbit_t attrbits, dattrbits;
3234 	size_t tresid;
3235 	u_int32_t *tl2 = NULL, rderr;
3236 	struct timespec dctime;
3237 
3238 	KASSERT(uiop->uio_iovcnt == 1 &&
3239 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
3240 	    ("nfs readdirplusrpc bad uio"));
3241 	timespecclear(&dctime);
3242 	*attrflagp = 0;
3243 	if (eofp != NULL)
3244 		*eofp = 0;
3245 	ndp->ni_dvp = vp;
3246 	nd->nd_mrep = NULL;
3247 	cookie.lval[0] = cookiep->nfsuquad[0];
3248 	cookie.lval[1] = cookiep->nfsuquad[1];
3249 	tresid = uio_uio_resid(uiop);
3250 
3251 	/*
3252 	 * For NFSv4, first create the "." and ".." entries.
3253 	 */
3254 	if (NFSHASNFSV4(nmp)) {
3255 		NFSGETATTR_ATTRBIT(&dattrbits);
3256 		NFSZERO_ATTRBIT(&attrbits);
3257 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3258 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3259 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3260 			NFSSETBIT_ATTRBIT(&attrbits,
3261 			    NFSATTRBIT_MOUNTEDONFILEID);
3262 			gotmnton = 1;
3263 		} else {
3264 			/*
3265 			 * Must fake it. Use the fileno, except when the
3266 			 * fsid is != to that of the directory. For that
3267 			 * case, generate a fake fileno that is not the same.
3268 			 */
3269 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3270 			gotmnton = 0;
3271 		}
3272 
3273 		/*
3274 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3275 		 */
3276 		if (uiop->uio_offset == 0) {
3277 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3278 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3279 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3280 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3281 			(void) nfsrv_putattrbit(nd, &attrbits);
3282 			error = nfscl_request(nd, vp, p, cred, stuff);
3283 			if (error)
3284 			    return (error);
3285 			dotfileid = 0;	/* Fake out the compiler. */
3286 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3287 			    error = nfsm_loadattr(nd, &nfsva);
3288 			    if (error != 0)
3289 				goto nfsmout;
3290 			    dctime = nfsva.na_ctime;
3291 			    dotfileid = nfsva.na_fileid;
3292 			}
3293 			if (nd->nd_repstat == 0) {
3294 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3295 			    len = fxdr_unsigned(int, *(tl + 4));
3296 			    if (len > 0 && len <= NFSX_V4FHMAX)
3297 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3298 			    else
3299 				error = EPERM;
3300 			    if (!error) {
3301 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3302 				nfsva.na_mntonfileno = UINT64_MAX;
3303 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3304 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3305 				    NULL, NULL, NULL, p, cred);
3306 				if (error) {
3307 				    dotdotfileid = dotfileid;
3308 				} else if (gotmnton) {
3309 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3310 					dotdotfileid = nfsva.na_mntonfileno;
3311 				    else
3312 					dotdotfileid = nfsva.na_fileid;
3313 				} else if (nfsva.na_filesid[0] ==
3314 				    dnp->n_vattr.na_filesid[0] &&
3315 				    nfsva.na_filesid[1] ==
3316 				    dnp->n_vattr.na_filesid[1]) {
3317 				    dotdotfileid = nfsva.na_fileid;
3318 				} else {
3319 				    do {
3320 					fakefileno--;
3321 				    } while (fakefileno ==
3322 					nfsva.na_fileid);
3323 				    dotdotfileid = fakefileno;
3324 				}
3325 			    }
3326 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3327 			    /*
3328 			     * Lookupp returns NFSERR_NOENT when we are
3329 			     * at the root, so just use the current dir.
3330 			     */
3331 			    nd->nd_repstat = 0;
3332 			    dotdotfileid = dotfileid;
3333 			} else {
3334 			    error = nd->nd_repstat;
3335 			}
3336 			mbuf_freem(nd->nd_mrep);
3337 			if (error)
3338 			    return (error);
3339 			nd->nd_mrep = NULL;
3340 			dp = (struct dirent *)uio_iov_base(uiop);
3341 			dp->d_off = 0;
3342 			dp->d_type = DT_DIR;
3343 			dp->d_fileno = dotfileid;
3344 			dp->d_namlen = 1;
3345 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3346 			dp->d_name[0] = '.';
3347 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3348 			/*
3349 			 * Just make these offset cookie 0.
3350 			 */
3351 			tl = (u_int32_t *)&dp->d_name[8];
3352 			*tl++ = 0;
3353 			*tl = 0;
3354 			blksiz += dp->d_reclen;
3355 			uio_uio_resid_add(uiop, -(dp->d_reclen));
3356 			uiop->uio_offset += dp->d_reclen;
3357 			uio_iov_base_add(uiop, dp->d_reclen);
3358 			uio_iov_len_add(uiop, -(dp->d_reclen));
3359 			dp = (struct dirent *)uio_iov_base(uiop);
3360 			dp->d_off = 0;
3361 			dp->d_type = DT_DIR;
3362 			dp->d_fileno = dotdotfileid;
3363 			dp->d_namlen = 2;
3364 			*((uint64_t *)dp->d_name) = 0;
3365 			dp->d_name[0] = '.';
3366 			dp->d_name[1] = '.';
3367 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3368 			/*
3369 			 * Just make these offset cookie 0.
3370 			 */
3371 			tl = (u_int32_t *)&dp->d_name[8];
3372 			*tl++ = 0;
3373 			*tl = 0;
3374 			blksiz += dp->d_reclen;
3375 			uio_uio_resid_add(uiop, -(dp->d_reclen));
3376 			uiop->uio_offset += dp->d_reclen;
3377 			uio_iov_base_add(uiop, dp->d_reclen);
3378 			uio_iov_len_add(uiop, -(dp->d_reclen));
3379 		}
3380 		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3381 		if (gotmnton)
3382 			NFSSETBIT_ATTRBIT(&attrbits,
3383 			    NFSATTRBIT_MOUNTEDONFILEID);
3384 	}
3385 
3386 	/*
3387 	 * Loop around doing readdir rpc's of size nm_readdirsize.
3388 	 * The stopping criteria is EOF or buffer full.
3389 	 */
3390 	while (more_dirs && bigenough) {
3391 		*attrflagp = 0;
3392 		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3393  		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3394 		*tl++ = cookie.lval[0];
3395 		*tl++ = cookie.lval[1];
3396 		if (cookie.qval == 0) {
3397 			*tl++ = 0;
3398 			*tl++ = 0;
3399 		} else {
3400 			NFSLOCKNODE(dnp);
3401 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3402 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3403 			NFSUNLOCKNODE(dnp);
3404 		}
3405 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3406 		*tl = txdr_unsigned(nmp->nm_readdirsize);
3407 		if (nd->nd_flag & ND_NFSV4) {
3408 			(void) nfsrv_putattrbit(nd, &attrbits);
3409 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3410 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3411 			(void) nfsrv_putattrbit(nd, &dattrbits);
3412 		}
3413 		error = nfscl_request(nd, vp, p, cred, stuff);
3414 		if (error)
3415 			return (error);
3416 		if (nd->nd_flag & ND_NFSV3)
3417 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3418 		if (nd->nd_repstat || error) {
3419 			if (!error)
3420 				error = nd->nd_repstat;
3421 			goto nfsmout;
3422 		}
3423 		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3424 			dctime = nap->na_ctime;
3425 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3426 		NFSLOCKNODE(dnp);
3427 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3428 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3429 		NFSUNLOCKNODE(dnp);
3430 		more_dirs = fxdr_unsigned(int, *tl);
3431 		if (!more_dirs)
3432 			tryformoredirs = 0;
3433 
3434 		/* loop through the dir entries, doctoring them to 4bsd form */
3435 		while (more_dirs && bigenough) {
3436 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3437 			if (nd->nd_flag & ND_NFSV4) {
3438 				ncookie.lval[0] = *tl++;
3439 				ncookie.lval[1] = *tl++;
3440 			} else {
3441 				fileno = fxdr_hyper(tl);
3442 				tl += 2;
3443 			}
3444 			len = fxdr_unsigned(int, *tl);
3445 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3446 				error = EBADRPC;
3447 				goto nfsmout;
3448 			}
3449 			tlen = roundup2(len, 8);
3450 			if (tlen == len)
3451 				tlen += 8;  /* To ensure null termination. */
3452 			left = DIRBLKSIZ - blksiz;
3453 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3454 				dp->d_reclen += left;
3455 				uio_iov_base_add(uiop, left);
3456 				uio_iov_len_add(uiop, -(left));
3457 				uio_uio_resid_add(uiop, -(left));
3458 				uiop->uio_offset += left;
3459 				blksiz = 0;
3460 			}
3461 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3462 			    uio_uio_resid(uiop))
3463 				bigenough = 0;
3464 			if (bigenough) {
3465 				dp = (struct dirent *)uio_iov_base(uiop);
3466 				dp->d_off = 0;
3467 				dp->d_namlen = len;
3468 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3469 				    NFSX_HYPER;
3470 				dp->d_type = DT_UNKNOWN;
3471 				blksiz += dp->d_reclen;
3472 				if (blksiz == DIRBLKSIZ)
3473 					blksiz = 0;
3474 				uio_uio_resid_add(uiop, -(DIRHDSIZ));
3475 				uiop->uio_offset += DIRHDSIZ;
3476 				uio_iov_base_add(uiop, DIRHDSIZ);
3477 				uio_iov_len_add(uiop, -(DIRHDSIZ));
3478 				cnp->cn_nameptr = uio_iov_base(uiop);
3479 				cnp->cn_namelen = len;
3480 				NFSCNHASHZERO(cnp);
3481 				error = nfsm_mbufuio(nd, uiop, len);
3482 				if (error)
3483 					goto nfsmout;
3484 				cp = uio_iov_base(uiop);
3485 				tlen -= len;
3486 				*cp = '\0';
3487 				cp += tlen;	/* points to cookie storage */
3488 				tl2 = (u_int32_t *)cp;
3489 				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3490 				    cnp->cn_nameptr[1] == '.')
3491 					isdotdot = 1;
3492 				else
3493 					isdotdot = 0;
3494 				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3495 				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3496 				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3497 				uiop->uio_offset += (tlen + NFSX_HYPER);
3498 			} else {
3499 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3500 				if (error)
3501 					goto nfsmout;
3502 			}
3503 			nfhp = NULL;
3504 			if (nd->nd_flag & ND_NFSV3) {
3505 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3506 				ncookie.lval[0] = *tl++;
3507 				ncookie.lval[1] = *tl++;
3508 				attrflag = fxdr_unsigned(int, *tl);
3509 				if (attrflag) {
3510 				  error = nfsm_loadattr(nd, &nfsva);
3511 				  if (error)
3512 					goto nfsmout;
3513 				}
3514 				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3515 				if (*tl) {
3516 					error = nfsm_getfh(nd, &nfhp);
3517 					if (error)
3518 					    goto nfsmout;
3519 				}
3520 				if (!attrflag && nfhp != NULL) {
3521 					FREE((caddr_t)nfhp, M_NFSFH);
3522 					nfhp = NULL;
3523 				}
3524 			} else {
3525 				rderr = 0;
3526 				nfsva.na_mntonfileno = 0xffffffff;
3527 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3528 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3529 				    NULL, NULL, &rderr, p, cred);
3530 				if (error)
3531 					goto nfsmout;
3532 			}
3533 
3534 			if (bigenough) {
3535 			    if (nd->nd_flag & ND_NFSV4) {
3536 				if (rderr) {
3537 				    dp->d_fileno = 0;
3538 				} else if (gotmnton) {
3539 				    if (nfsva.na_mntonfileno != 0xffffffff)
3540 					dp->d_fileno = nfsva.na_mntonfileno;
3541 				    else
3542 					dp->d_fileno = nfsva.na_fileid;
3543 				} else if (nfsva.na_filesid[0] ==
3544 				    dnp->n_vattr.na_filesid[0] &&
3545 				    nfsva.na_filesid[1] ==
3546 				    dnp->n_vattr.na_filesid[1]) {
3547 				    dp->d_fileno = nfsva.na_fileid;
3548 				} else {
3549 				    do {
3550 					fakefileno--;
3551 				    } while (fakefileno ==
3552 					nfsva.na_fileid);
3553 				    dp->d_fileno = fakefileno;
3554 				}
3555 			    } else {
3556 				dp->d_fileno = fileno;
3557 			    }
3558 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3559 				ncookie.lval[0];
3560 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3561 				ncookie.lval[1];
3562 
3563 			    if (nfhp != NULL) {
3564 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3565 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3566 				    VREF(vp);
3567 				    newvp = vp;
3568 				    unlocknewvp = 0;
3569 				    FREE((caddr_t)nfhp, M_NFSFH);
3570 				    np = dnp;
3571 				} else if (isdotdot != 0) {
3572 				    /*
3573 				     * Skip doing a nfscl_nget() call for "..".
3574 				     * There's a race between acquiring the nfs
3575 				     * node here and lookups that look for the
3576 				     * directory being read (in the parent).
3577 				     * It would try to get a lock on ".." here,
3578 				     * owning the lock on the directory being
3579 				     * read. Lookup will hold the lock on ".."
3580 				     * and try to acquire the lock on the
3581 				     * directory being read.
3582 				     * If the directory is unlocked/relocked,
3583 				     * then there is a LOR with the buflock
3584 				     * vp is relocked.
3585 				     */
3586 				    free(nfhp, M_NFSFH);
3587 				} else {
3588 				    error = nfscl_nget(vnode_mount(vp), vp,
3589 				      nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3590 				    if (!error) {
3591 					newvp = NFSTOV(np);
3592 					unlocknewvp = 1;
3593 				    }
3594 				}
3595 				nfhp = NULL;
3596 				if (newvp != NULLVP) {
3597 				    error = nfscl_loadattrcache(&newvp,
3598 					&nfsva, NULL, NULL, 0, 0);
3599 				    if (error) {
3600 					if (unlocknewvp)
3601 					    vput(newvp);
3602 					else
3603 					    vrele(newvp);
3604 					goto nfsmout;
3605 				    }
3606 				    dp->d_type =
3607 					vtonfs_dtype(np->n_vattr.na_type);
3608 				    ndp->ni_vp = newvp;
3609 				    NFSCNHASH(cnp, HASHINIT);
3610 				    if (cnp->cn_namelen <= NCHNAMLEN &&
3611 					(newvp->v_type != VDIR ||
3612 					 dctime.tv_sec != 0)) {
3613 					cache_enter_time(ndp->ni_dvp,
3614 					    ndp->ni_vp, cnp,
3615 					    &nfsva.na_ctime,
3616 					    newvp->v_type != VDIR ? NULL :
3617 					    &dctime);
3618 				    }
3619 				    if (unlocknewvp)
3620 					vput(newvp);
3621 				    else
3622 					vrele(newvp);
3623 				    newvp = NULLVP;
3624 				}
3625 			    }
3626 			} else if (nfhp != NULL) {
3627 			    FREE((caddr_t)nfhp, M_NFSFH);
3628 			}
3629 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3630 			more_dirs = fxdr_unsigned(int, *tl);
3631 		}
3632 		/*
3633 		 * If at end of rpc data, get the eof boolean
3634 		 */
3635 		if (!more_dirs) {
3636 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3637 			eof = fxdr_unsigned(int, *tl);
3638 			if (tryformoredirs)
3639 				more_dirs = !eof;
3640 			if (nd->nd_flag & ND_NFSV4) {
3641 				error = nfscl_postop_attr(nd, nap, attrflagp,
3642 				    stuff);
3643 				if (error)
3644 					goto nfsmout;
3645 			}
3646 		}
3647 		mbuf_freem(nd->nd_mrep);
3648 		nd->nd_mrep = NULL;
3649 	}
3650 	/*
3651 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3652 	 * by increasing d_reclen for the last record.
3653 	 */
3654 	if (blksiz > 0) {
3655 		left = DIRBLKSIZ - blksiz;
3656 		dp->d_reclen += left;
3657 		uio_iov_base_add(uiop, left);
3658 		uio_iov_len_add(uiop, -(left));
3659 		uio_uio_resid_add(uiop, -(left));
3660 		uiop->uio_offset += left;
3661 	}
3662 
3663 	/*
3664 	 * If returning no data, assume end of file.
3665 	 * If not bigenough, return not end of file, since you aren't
3666 	 *    returning all the data
3667 	 * Otherwise, return the eof flag from the server.
3668 	 */
3669 	if (eofp != NULL) {
3670 		if (tresid == uio_uio_resid(uiop))
3671 			*eofp = 1;
3672 		else if (!bigenough)
3673 			*eofp = 0;
3674 		else
3675 			*eofp = eof;
3676 	}
3677 
3678 	/*
3679 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3680 	 */
3681 	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3682 		dp = (struct dirent *)uio_iov_base(uiop);
3683 		dp->d_type = DT_UNKNOWN;
3684 		dp->d_fileno = 0;
3685 		dp->d_namlen = 0;
3686 		dp->d_name[0] = '\0';
3687 		tl = (u_int32_t *)&dp->d_name[4];
3688 		*tl++ = cookie.lval[0];
3689 		*tl = cookie.lval[1];
3690 		dp->d_reclen = DIRBLKSIZ;
3691 		uio_iov_base_add(uiop, DIRBLKSIZ);
3692 		uio_iov_len_add(uiop, -(DIRBLKSIZ));
3693 		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3694 		uiop->uio_offset += DIRBLKSIZ;
3695 	}
3696 
3697 nfsmout:
3698 	if (nd->nd_mrep != NULL)
3699 		mbuf_freem(nd->nd_mrep);
3700 	return (error);
3701 }
3702 #endif	/* !APPLE */
3703 
3704 /*
3705  * Nfs commit rpc
3706  */
3707 APPLESTATIC int
3708 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3709     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3710 {
3711 	u_int32_t *tl;
3712 	struct nfsrv_descript nfsd, *nd = &nfsd;
3713 	nfsattrbit_t attrbits;
3714 	int error;
3715 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3716 
3717 	*attrflagp = 0;
3718 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3719 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3720 	txdr_hyper(offset, tl);
3721 	tl += 2;
3722 	*tl = txdr_unsigned(cnt);
3723 	if (nd->nd_flag & ND_NFSV4) {
3724 		/*
3725 		 * And do a Getattr op.
3726 		 */
3727 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3728 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3729 		NFSGETATTR_ATTRBIT(&attrbits);
3730 		(void) nfsrv_putattrbit(nd, &attrbits);
3731 	}
3732 	error = nfscl_request(nd, vp, p, cred, stuff);
3733 	if (error)
3734 		return (error);
3735 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3736 	if (!error && !nd->nd_repstat) {
3737 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3738 		NFSLOCKMNT(nmp);
3739 		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3740 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3741 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
3742 		}
3743 		NFSUNLOCKMNT(nmp);
3744 		if (nd->nd_flag & ND_NFSV4)
3745 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3746 	}
3747 nfsmout:
3748 	if (!error && nd->nd_repstat)
3749 		error = nd->nd_repstat;
3750 	mbuf_freem(nd->nd_mrep);
3751 	return (error);
3752 }
3753 
3754 /*
3755  * NFS byte range lock rpc.
3756  * (Mostly just calls one of the three lower level RPC routines.)
3757  */
3758 APPLESTATIC int
3759 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3760     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3761 {
3762 	struct nfscllockowner *lp;
3763 	struct nfsclclient *clp;
3764 	struct nfsfh *nfhp;
3765 	struct nfsrv_descript nfsd, *nd = &nfsd;
3766 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3767 	u_int64_t off, len;
3768 	off_t start, end;
3769 	u_int32_t clidrev = 0;
3770 	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3771 	int callcnt, dorpc;
3772 
3773 	/*
3774 	 * Convert the flock structure into a start and end and do POSIX
3775 	 * bounds checking.
3776 	 */
3777 	switch (fl->l_whence) {
3778 	case SEEK_SET:
3779 	case SEEK_CUR:
3780 		/*
3781 		 * Caller is responsible for adding any necessary offset
3782 		 * when SEEK_CUR is used.
3783 		 */
3784 		start = fl->l_start;
3785 		off = fl->l_start;
3786 		break;
3787 	case SEEK_END:
3788 		start = size + fl->l_start;
3789 		off = size + fl->l_start;
3790 		break;
3791 	default:
3792 		return (EINVAL);
3793 	}
3794 	if (start < 0)
3795 		return (EINVAL);
3796 	if (fl->l_len != 0) {
3797 		end = start + fl->l_len - 1;
3798 		if (end < start)
3799 			return (EINVAL);
3800 	}
3801 
3802 	len = fl->l_len;
3803 	if (len == 0)
3804 		len = NFS64BITSSET;
3805 	retrycnt = 0;
3806 	do {
3807 	    nd->nd_repstat = 0;
3808 	    if (op == F_GETLK) {
3809 		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3810 		if (error)
3811 			return (error);
3812 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3813 		if (!error) {
3814 			clidrev = clp->nfsc_clientidrev;
3815 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3816 			    p, id, flags);
3817 		} else if (error == -1) {
3818 			error = 0;
3819 		}
3820 		nfscl_clientrelease(clp);
3821 	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3822 		/*
3823 		 * We must loop around for all lockowner cases.
3824 		 */
3825 		callcnt = 0;
3826 		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3827 		if (error)
3828 			return (error);
3829 		do {
3830 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3831 			clp, id, flags, &lp, &dorpc);
3832 		    /*
3833 		     * If it returns a NULL lp, we're done.
3834 		     */
3835 		    if (lp == NULL) {
3836 			if (callcnt == 0)
3837 			    nfscl_clientrelease(clp);
3838 			else
3839 			    nfscl_releasealllocks(clp, vp, p, id, flags);
3840 			return (error);
3841 		    }
3842 		    if (nmp->nm_clp != NULL)
3843 			clidrev = nmp->nm_clp->nfsc_clientidrev;
3844 		    else
3845 			clidrev = 0;
3846 		    /*
3847 		     * If the server doesn't support Posix lock semantics,
3848 		     * only allow locks on the entire file, since it won't
3849 		     * handle overlapping byte ranges.
3850 		     * There might still be a problem when a lock
3851 		     * upgrade/downgrade (read<->write) occurs, since the
3852 		     * server "might" expect an unlock first?
3853 		     */
3854 		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
3855 			(off == 0 && len == NFS64BITSSET))) {
3856 			/*
3857 			 * Since the lock records will go away, we must
3858 			 * wait for grace and delay here.
3859 			 */
3860 			do {
3861 			    error = nfsrpc_locku(nd, nmp, lp, off, len,
3862 				NFSV4LOCKT_READ, cred, p, 0);
3863 			    if ((nd->nd_repstat == NFSERR_GRACE ||
3864 				 nd->nd_repstat == NFSERR_DELAY) &&
3865 				error == 0)
3866 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
3867 				    "nfs_advlock");
3868 			} while ((nd->nd_repstat == NFSERR_GRACE ||
3869 			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
3870 		    }
3871 		    callcnt++;
3872 		} while (error == 0 && nd->nd_repstat == 0);
3873 		nfscl_releasealllocks(clp, vp, p, id, flags);
3874 	    } else if (op == F_SETLK) {
3875 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
3876 		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
3877 		if (error || donelocally) {
3878 			return (error);
3879 		}
3880 		if (nmp->nm_clp != NULL)
3881 			clidrev = nmp->nm_clp->nfsc_clientidrev;
3882 		else
3883 			clidrev = 0;
3884 		nfhp = VTONFS(vp)->n_fhp;
3885 		if (!lp->nfsl_open->nfso_posixlock &&
3886 		    (off != 0 || len != NFS64BITSSET)) {
3887 			error = EINVAL;
3888 		} else {
3889 			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
3890 			    nfhp->nfh_len, lp, newone, reclaim, off,
3891 			    len, fl->l_type, cred, p, 0);
3892 		}
3893 		if (!error)
3894 			error = nd->nd_repstat;
3895 		nfscl_lockrelease(lp, error, newone);
3896 	    } else {
3897 		error = EINVAL;
3898 	    }
3899 	    if (!error)
3900 	        error = nd->nd_repstat;
3901 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
3902 		error == NFSERR_STALEDONTRECOVER ||
3903 		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3904 		error == NFSERR_BADSESSION) {
3905 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
3906 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
3907 		&& clidrev != 0) {
3908 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
3909 		retrycnt++;
3910 	    }
3911 	} while (error == NFSERR_GRACE ||
3912 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3913 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
3914 	    error == NFSERR_BADSESSION ||
3915 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
3916 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
3917 	if (error && retrycnt >= 4)
3918 		error = EIO;
3919 	return (error);
3920 }
3921 
3922 /*
3923  * The lower level routine for the LockT case.
3924  */
3925 APPLESTATIC int
3926 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
3927     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
3928     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3929 {
3930 	u_int32_t *tl;
3931 	int error, type, size;
3932 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
3933 	struct nfsnode *np;
3934 	struct nfsmount *nmp;
3935 	struct nfsclsession *tsep;
3936 
3937 	nmp = VFSTONFS(vp->v_mount);
3938 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
3939 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
3940 	if (fl->l_type == F_RDLCK)
3941 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
3942 	else
3943 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
3944 	txdr_hyper(off, tl);
3945 	tl += 2;
3946 	txdr_hyper(len, tl);
3947 	tl += 2;
3948 	tsep = nfsmnt_mdssession(nmp);
3949 	*tl++ = tsep->nfsess_clientid.lval[0];
3950 	*tl = tsep->nfsess_clientid.lval[1];
3951 	nfscl_filllockowner(id, own, flags);
3952 	np = VTONFS(vp);
3953 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
3954 	    np->n_fhp->nfh_len);
3955 	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
3956 	error = nfscl_request(nd, vp, p, cred, NULL);
3957 	if (error)
3958 		return (error);
3959 	if (nd->nd_repstat == 0) {
3960 		fl->l_type = F_UNLCK;
3961 	} else if (nd->nd_repstat == NFSERR_DENIED) {
3962 		nd->nd_repstat = 0;
3963 		fl->l_whence = SEEK_SET;
3964 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
3965 		fl->l_start = fxdr_hyper(tl);
3966 		tl += 2;
3967 		len = fxdr_hyper(tl);
3968 		tl += 2;
3969 		if (len == NFS64BITSSET)
3970 			fl->l_len = 0;
3971 		else
3972 			fl->l_len = len;
3973 		type = fxdr_unsigned(int, *tl++);
3974 		if (type == NFSV4LOCKT_WRITE)
3975 			fl->l_type = F_WRLCK;
3976 		else
3977 			fl->l_type = F_RDLCK;
3978 		/*
3979 		 * XXX For now, I have no idea what to do with the
3980 		 * conflicting lock_owner, so I'll just set the pid == 0
3981 		 * and skip over the lock_owner.
3982 		 */
3983 		fl->l_pid = (pid_t)0;
3984 		tl += 2;
3985 		size = fxdr_unsigned(int, *tl);
3986 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
3987 			error = EBADRPC;
3988 		if (!error)
3989 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
3990 	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
3991 		nfscl_initiate_recovery(clp);
3992 nfsmout:
3993 	mbuf_freem(nd->nd_mrep);
3994 	return (error);
3995 }
3996 
3997 /*
3998  * Lower level function that performs the LockU RPC.
3999  */
4000 static int
4001 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4002     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4003     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4004 {
4005 	u_int32_t *tl;
4006 	int error;
4007 
4008 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4009 	    lp->nfsl_open->nfso_fhlen, NULL, NULL);
4010 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4011 	*tl++ = txdr_unsigned(type);
4012 	*tl = txdr_unsigned(lp->nfsl_seqid);
4013 	if (nfstest_outofseq &&
4014 	    (arc4random() % nfstest_outofseq) == 0)
4015 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
4016 	tl++;
4017 	if (NFSHASNFSV4N(nmp))
4018 		*tl++ = 0;
4019 	else
4020 		*tl++ = lp->nfsl_stateid.seqid;
4021 	*tl++ = lp->nfsl_stateid.other[0];
4022 	*tl++ = lp->nfsl_stateid.other[1];
4023 	*tl++ = lp->nfsl_stateid.other[2];
4024 	txdr_hyper(off, tl);
4025 	tl += 2;
4026 	txdr_hyper(len, tl);
4027 	if (syscred)
4028 		nd->nd_flag |= ND_USEGSSNAME;
4029 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4030 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4031 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4032 	if (error)
4033 		return (error);
4034 	if (nd->nd_repstat == 0) {
4035 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4036 		lp->nfsl_stateid.seqid = *tl++;
4037 		lp->nfsl_stateid.other[0] = *tl++;
4038 		lp->nfsl_stateid.other[1] = *tl++;
4039 		lp->nfsl_stateid.other[2] = *tl;
4040 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4041 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4042 nfsmout:
4043 	mbuf_freem(nd->nd_mrep);
4044 	return (error);
4045 }
4046 
4047 /*
4048  * The actual Lock RPC.
4049  */
4050 APPLESTATIC int
4051 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4052     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4053     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4054     NFSPROC_T *p, int syscred)
4055 {
4056 	u_int32_t *tl;
4057 	int error, size;
4058 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4059 	struct nfsclsession *tsep;
4060 
4061 	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL);
4062 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4063 	if (type == F_RDLCK)
4064 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4065 	else
4066 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4067 	*tl++ = txdr_unsigned(reclaim);
4068 	txdr_hyper(off, tl);
4069 	tl += 2;
4070 	txdr_hyper(len, tl);
4071 	tl += 2;
4072 	if (newone) {
4073 	    *tl = newnfs_true;
4074 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4075 		2 * NFSX_UNSIGNED + NFSX_HYPER);
4076 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4077 	    if (NFSHASNFSV4N(nmp))
4078 		*tl++ = 0;
4079 	    else
4080 		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
4081 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4082 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4083 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4084 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
4085 	    tsep = nfsmnt_mdssession(nmp);
4086 	    *tl++ = tsep->nfsess_clientid.lval[0];
4087 	    *tl = tsep->nfsess_clientid.lval[1];
4088 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4089 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4090 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4091 	} else {
4092 	    *tl = newnfs_false;
4093 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4094 	    if (NFSHASNFSV4N(nmp))
4095 		*tl++ = 0;
4096 	    else
4097 		*tl++ = lp->nfsl_stateid.seqid;
4098 	    *tl++ = lp->nfsl_stateid.other[0];
4099 	    *tl++ = lp->nfsl_stateid.other[1];
4100 	    *tl++ = lp->nfsl_stateid.other[2];
4101 	    *tl = txdr_unsigned(lp->nfsl_seqid);
4102 	    if (nfstest_outofseq &&
4103 		(arc4random() % nfstest_outofseq) == 0)
4104 		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4105 	}
4106 	if (syscred)
4107 		nd->nd_flag |= ND_USEGSSNAME;
4108 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4109 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4110 	if (error)
4111 		return (error);
4112 	if (newone)
4113 	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4114 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4115 	if (nd->nd_repstat == 0) {
4116 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4117 		lp->nfsl_stateid.seqid = *tl++;
4118 		lp->nfsl_stateid.other[0] = *tl++;
4119 		lp->nfsl_stateid.other[1] = *tl++;
4120 		lp->nfsl_stateid.other[2] = *tl;
4121 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4122 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4123 		size = fxdr_unsigned(int, *(tl + 7));
4124 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4125 			error = EBADRPC;
4126 		if (!error)
4127 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4128 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4129 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4130 nfsmout:
4131 	mbuf_freem(nd->nd_mrep);
4132 	return (error);
4133 }
4134 
4135 /*
4136  * nfs statfs rpc
4137  * (always called with the vp for the mount point)
4138  */
4139 APPLESTATIC int
4140 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4141     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4142     void *stuff)
4143 {
4144 	u_int32_t *tl = NULL;
4145 	struct nfsrv_descript nfsd, *nd = &nfsd;
4146 	struct nfsmount *nmp;
4147 	nfsattrbit_t attrbits;
4148 	int error;
4149 
4150 	*attrflagp = 0;
4151 	nmp = VFSTONFS(vnode_mount(vp));
4152 	if (NFSHASNFSV4(nmp)) {
4153 		/*
4154 		 * For V4, you actually do a getattr.
4155 		 */
4156 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4157 		NFSSTATFS_GETATTRBIT(&attrbits);
4158 		(void) nfsrv_putattrbit(nd, &attrbits);
4159 		nd->nd_flag |= ND_USEGSSNAME;
4160 		error = nfscl_request(nd, vp, p, cred, stuff);
4161 		if (error)
4162 			return (error);
4163 		if (nd->nd_repstat == 0) {
4164 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4165 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4166 			    cred);
4167 			if (!error) {
4168 				nmp->nm_fsid[0] = nap->na_filesid[0];
4169 				nmp->nm_fsid[1] = nap->na_filesid[1];
4170 				NFSSETHASSETFSID(nmp);
4171 				*attrflagp = 1;
4172 			}
4173 		} else {
4174 			error = nd->nd_repstat;
4175 		}
4176 		if (error)
4177 			goto nfsmout;
4178 	} else {
4179 		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4180 		error = nfscl_request(nd, vp, p, cred, stuff);
4181 		if (error)
4182 			return (error);
4183 		if (nd->nd_flag & ND_NFSV3) {
4184 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4185 			if (error)
4186 				goto nfsmout;
4187 		}
4188 		if (nd->nd_repstat) {
4189 			error = nd->nd_repstat;
4190 			goto nfsmout;
4191 		}
4192 		NFSM_DISSECT(tl, u_int32_t *,
4193 		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4194 	}
4195 	if (NFSHASNFSV3(nmp)) {
4196 		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4197 		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4198 		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4199 		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4200 		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4201 		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4202 		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4203 	} else if (NFSHASNFSV4(nmp) == 0) {
4204 		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4205 		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4206 		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4207 		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4208 		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4209 	}
4210 nfsmout:
4211 	mbuf_freem(nd->nd_mrep);
4212 	return (error);
4213 }
4214 
4215 /*
4216  * nfs pathconf rpc
4217  */
4218 APPLESTATIC int
4219 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4220     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4221     void *stuff)
4222 {
4223 	struct nfsrv_descript nfsd, *nd = &nfsd;
4224 	struct nfsmount *nmp;
4225 	u_int32_t *tl;
4226 	nfsattrbit_t attrbits;
4227 	int error;
4228 
4229 	*attrflagp = 0;
4230 	nmp = VFSTONFS(vnode_mount(vp));
4231 	if (NFSHASNFSV4(nmp)) {
4232 		/*
4233 		 * For V4, you actually do a getattr.
4234 		 */
4235 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4236 		NFSPATHCONF_GETATTRBIT(&attrbits);
4237 		(void) nfsrv_putattrbit(nd, &attrbits);
4238 		nd->nd_flag |= ND_USEGSSNAME;
4239 		error = nfscl_request(nd, vp, p, cred, stuff);
4240 		if (error)
4241 			return (error);
4242 		if (nd->nd_repstat == 0) {
4243 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4244 			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4245 			    cred);
4246 			if (!error)
4247 				*attrflagp = 1;
4248 		} else {
4249 			error = nd->nd_repstat;
4250 		}
4251 	} else {
4252 		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4253 		error = nfscl_request(nd, vp, p, cred, stuff);
4254 		if (error)
4255 			return (error);
4256 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4257 		if (nd->nd_repstat && !error)
4258 			error = nd->nd_repstat;
4259 		if (!error) {
4260 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4261 			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4262 			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4263 			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4264 			pc->pc_chownrestricted =
4265 			    fxdr_unsigned(u_int32_t, *tl++);
4266 			pc->pc_caseinsensitive =
4267 			    fxdr_unsigned(u_int32_t, *tl++);
4268 			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4269 		}
4270 	}
4271 nfsmout:
4272 	mbuf_freem(nd->nd_mrep);
4273 	return (error);
4274 }
4275 
4276 /*
4277  * nfs version 3 fsinfo rpc call
4278  */
4279 APPLESTATIC int
4280 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4281     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4282 {
4283 	u_int32_t *tl;
4284 	struct nfsrv_descript nfsd, *nd = &nfsd;
4285 	int error;
4286 
4287 	*attrflagp = 0;
4288 	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4289 	error = nfscl_request(nd, vp, p, cred, stuff);
4290 	if (error)
4291 		return (error);
4292 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4293 	if (nd->nd_repstat && !error)
4294 		error = nd->nd_repstat;
4295 	if (!error) {
4296 		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4297 		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4298 		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4299 		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4300 		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4301 		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4302 		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4303 		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4304 		fsp->fs_maxfilesize = fxdr_hyper(tl);
4305 		tl += 2;
4306 		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4307 		tl += 2;
4308 		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4309 	}
4310 nfsmout:
4311 	mbuf_freem(nd->nd_mrep);
4312 	return (error);
4313 }
4314 
4315 /*
4316  * This function performs the Renew RPC.
4317  */
4318 APPLESTATIC int
4319 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4320     NFSPROC_T *p)
4321 {
4322 	u_int32_t *tl;
4323 	struct nfsrv_descript nfsd;
4324 	struct nfsrv_descript *nd = &nfsd;
4325 	struct nfsmount *nmp;
4326 	int error;
4327 	struct nfssockreq *nrp;
4328 	struct nfsclsession *tsep;
4329 
4330 	nmp = clp->nfsc_nmp;
4331 	if (nmp == NULL)
4332 		return (0);
4333 	if (dsp == NULL)
4334 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL);
4335 	else
4336 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4337 		    &dsp->nfsclds_sess);
4338 	if (!NFSHASNFSV4N(nmp)) {
4339 		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4340 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4341 		tsep = nfsmnt_mdssession(nmp);
4342 		*tl++ = tsep->nfsess_clientid.lval[0];
4343 		*tl = tsep->nfsess_clientid.lval[1];
4344 	}
4345 	nrp = NULL;
4346 	if (dsp != NULL)
4347 		nrp = dsp->nfsclds_sockp;
4348 	if (nrp == NULL)
4349 		/* If NULL, use the MDS socket. */
4350 		nrp = &nmp->nm_sockreq;
4351 	nd->nd_flag |= ND_USEGSSNAME;
4352 	if (dsp == NULL)
4353 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4354 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4355 	else
4356 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4357 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4358 	if (error)
4359 		return (error);
4360 	error = nd->nd_repstat;
4361 	mbuf_freem(nd->nd_mrep);
4362 	return (error);
4363 }
4364 
4365 /*
4366  * This function performs the Releaselockowner RPC.
4367  */
4368 APPLESTATIC int
4369 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4370     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4371 {
4372 	struct nfsrv_descript nfsd, *nd = &nfsd;
4373 	u_int32_t *tl;
4374 	int error;
4375 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4376 	struct nfsclsession *tsep;
4377 
4378 	if (NFSHASNFSV4N(nmp)) {
4379 		/* For NFSv4.1, do a FreeStateID. */
4380 		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4381 		    NULL);
4382 		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4383 	} else {
4384 		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4385 		    NULL);
4386 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4387 		tsep = nfsmnt_mdssession(nmp);
4388 		*tl++ = tsep->nfsess_clientid.lval[0];
4389 		*tl = tsep->nfsess_clientid.lval[1];
4390 		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4391 		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4392 		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4393 	}
4394 	nd->nd_flag |= ND_USEGSSNAME;
4395 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4396 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4397 	if (error)
4398 		return (error);
4399 	error = nd->nd_repstat;
4400 	mbuf_freem(nd->nd_mrep);
4401 	return (error);
4402 }
4403 
4404 /*
4405  * This function performs the Compound to get the mount pt FH.
4406  */
4407 APPLESTATIC int
4408 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4409     NFSPROC_T *p)
4410 {
4411 	u_int32_t *tl;
4412 	struct nfsrv_descript nfsd;
4413 	struct nfsrv_descript *nd = &nfsd;
4414 	u_char *cp, *cp2;
4415 	int error, cnt, len, setnil;
4416 	u_int32_t *opcntp;
4417 
4418 	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL);
4419 	cp = dirpath;
4420 	cnt = 0;
4421 	do {
4422 		setnil = 0;
4423 		while (*cp == '/')
4424 			cp++;
4425 		cp2 = cp;
4426 		while (*cp2 != '\0' && *cp2 != '/')
4427 			cp2++;
4428 		if (*cp2 == '/') {
4429 			setnil = 1;
4430 			*cp2 = '\0';
4431 		}
4432 		if (cp2 != cp) {
4433 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4434 			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
4435 			nfsm_strtom(nd, cp, strlen(cp));
4436 			cnt++;
4437 		}
4438 		if (setnil)
4439 			*cp2++ = '/';
4440 		cp = cp2;
4441 	} while (*cp != '\0');
4442 	if (NFSHASNFSV4N(nmp))
4443 		/* Has a Sequence Op done by nfscl_reqstart(). */
4444 		*opcntp = txdr_unsigned(3 + cnt);
4445 	else
4446 		*opcntp = txdr_unsigned(2 + cnt);
4447 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4448 	*tl = txdr_unsigned(NFSV4OP_GETFH);
4449 	nd->nd_flag |= ND_USEGSSNAME;
4450 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4451 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4452 	if (error)
4453 		return (error);
4454 	if (nd->nd_repstat == 0) {
4455 		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4456 		tl += (2 + 2 * cnt);
4457 		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4458 			len > NFSX_FHMAX) {
4459 			nd->nd_repstat = NFSERR_BADXDR;
4460 		} else {
4461 			nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4462 			if (nd->nd_repstat == 0)
4463 				nmp->nm_fhsize = len;
4464 		}
4465 	}
4466 	error = nd->nd_repstat;
4467 nfsmout:
4468 	mbuf_freem(nd->nd_mrep);
4469 	return (error);
4470 }
4471 
4472 /*
4473  * This function performs the Delegreturn RPC.
4474  */
4475 APPLESTATIC int
4476 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4477     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4478 {
4479 	u_int32_t *tl;
4480 	struct nfsrv_descript nfsd;
4481 	struct nfsrv_descript *nd = &nfsd;
4482 	int error;
4483 
4484 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4485 	    dp->nfsdl_fhlen, NULL, NULL);
4486 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4487 	if (NFSHASNFSV4N(nmp))
4488 		*tl++ = 0;
4489 	else
4490 		*tl++ = dp->nfsdl_stateid.seqid;
4491 	*tl++ = dp->nfsdl_stateid.other[0];
4492 	*tl++ = dp->nfsdl_stateid.other[1];
4493 	*tl = dp->nfsdl_stateid.other[2];
4494 	if (syscred)
4495 		nd->nd_flag |= ND_USEGSSNAME;
4496 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4497 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4498 	if (error)
4499 		return (error);
4500 	error = nd->nd_repstat;
4501 	mbuf_freem(nd->nd_mrep);
4502 	return (error);
4503 }
4504 
4505 /*
4506  * nfs getacl call.
4507  */
4508 APPLESTATIC int
4509 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4510     struct acl *aclp, void *stuff)
4511 {
4512 	struct nfsrv_descript nfsd, *nd = &nfsd;
4513 	int error;
4514 	nfsattrbit_t attrbits;
4515 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4516 
4517 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4518 		return (EOPNOTSUPP);
4519 	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4520 	NFSZERO_ATTRBIT(&attrbits);
4521 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4522 	(void) nfsrv_putattrbit(nd, &attrbits);
4523 	error = nfscl_request(nd, vp, p, cred, stuff);
4524 	if (error)
4525 		return (error);
4526 	if (!nd->nd_repstat)
4527 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4528 		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4529 	else
4530 		error = nd->nd_repstat;
4531 	mbuf_freem(nd->nd_mrep);
4532 	return (error);
4533 }
4534 
4535 /*
4536  * nfs setacl call.
4537  */
4538 APPLESTATIC int
4539 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4540     struct acl *aclp, void *stuff)
4541 {
4542 	int error;
4543 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4544 
4545 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4546 		return (EOPNOTSUPP);
4547 	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4548 	return (error);
4549 }
4550 
4551 /*
4552  * nfs setacl call.
4553  */
4554 static int
4555 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4556     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4557 {
4558 	struct nfsrv_descript nfsd, *nd = &nfsd;
4559 	int error;
4560 	nfsattrbit_t attrbits;
4561 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4562 
4563 	if (!NFSHASNFSV4(nmp))
4564 		return (EOPNOTSUPP);
4565 	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4566 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4567 	NFSZERO_ATTRBIT(&attrbits);
4568 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4569 	(void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0,
4570 	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0);
4571 	error = nfscl_request(nd, vp, p, cred, stuff);
4572 	if (error)
4573 		return (error);
4574 	/* Don't care about the pre/postop attributes */
4575 	mbuf_freem(nd->nd_mrep);
4576 	return (nd->nd_repstat);
4577 }
4578 
4579 /*
4580  * Do the NFSv4.1 Exchange ID.
4581  */
4582 int
4583 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4584     struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
4585     struct ucred *cred, NFSPROC_T *p)
4586 {
4587 	uint32_t *tl, v41flags;
4588 	struct nfsrv_descript nfsd;
4589 	struct nfsrv_descript *nd = &nfsd;
4590 	struct nfsclds *dsp;
4591 	struct timespec verstime;
4592 	int error, len;
4593 
4594 	*dspp = NULL;
4595 	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL);
4596 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4597 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
4598 	*tl = txdr_unsigned(clp->nfsc_rev);
4599 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4600 
4601 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4602 	*tl++ = txdr_unsigned(exchflags);
4603 	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4604 
4605 	/* Set the implementation id4 */
4606 	*tl = txdr_unsigned(1);
4607 	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4608 	(void) nfsm_strtom(nd, version, strlen(version));
4609 	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4610 	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
4611 	verstime.tv_nsec = 0;
4612 	txdr_nfsv4time(&verstime, tl);
4613 	nd->nd_flag |= ND_USEGSSNAME;
4614 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4615 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4616 	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4617 	    (int)nd->nd_repstat);
4618 	if (error != 0)
4619 		return (error);
4620 	if (nd->nd_repstat == 0) {
4621 		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4622 		len = fxdr_unsigned(int, *(tl + 7));
4623 		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4624 			error = NFSERR_BADXDR;
4625 			goto nfsmout;
4626 		}
4627 		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4628 		    M_WAITOK | M_ZERO);
4629 		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4630 		dsp->nfsclds_servownlen = len;
4631 		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4632 		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4633 		dsp->nfsclds_sess.nfsess_sequenceid =
4634 		    fxdr_unsigned(uint32_t, *tl++);
4635 		v41flags = fxdr_unsigned(uint32_t, *tl);
4636 		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4637 		    NFSHASPNFSOPT(nmp)) {
4638 			NFSCL_DEBUG(1, "set PNFS\n");
4639 			NFSLOCKMNT(nmp);
4640 			nmp->nm_state |= NFSSTA_PNFS;
4641 			NFSUNLOCKMNT(nmp);
4642 			dsp->nfsclds_flags |= NFSCLDS_MDS;
4643 		}
4644 		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4645 			dsp->nfsclds_flags |= NFSCLDS_DS;
4646 		if (len > 0)
4647 			nd->nd_repstat = nfsrv_mtostr(nd,
4648 			    dsp->nfsclds_serverown, len);
4649 		if (nd->nd_repstat == 0) {
4650 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4651 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4652 			    NULL, MTX_DEF);
4653 			nfscl_initsessionslots(&dsp->nfsclds_sess);
4654 			*dspp = dsp;
4655 		} else
4656 			free(dsp, M_NFSCLDS);
4657 	}
4658 	error = nd->nd_repstat;
4659 nfsmout:
4660 	mbuf_freem(nd->nd_mrep);
4661 	return (error);
4662 }
4663 
4664 /*
4665  * Do the NFSv4.1 Create Session.
4666  */
4667 int
4668 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4669     struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
4670     NFSPROC_T *p)
4671 {
4672 	uint32_t crflags, maxval, *tl;
4673 	struct nfsrv_descript nfsd;
4674 	struct nfsrv_descript *nd = &nfsd;
4675 	int error, irdcnt;
4676 
4677 	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL);
4678 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4679 	*tl++ = sep->nfsess_clientid.lval[0];
4680 	*tl++ = sep->nfsess_clientid.lval[1];
4681 	*tl++ = txdr_unsigned(sequenceid);
4682 	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4683 	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
4684 		crflags |= NFSV4CRSESS_CONNBACKCHAN;
4685 	*tl = txdr_unsigned(crflags);
4686 
4687 	/* Fill in fore channel attributes. */
4688 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4689 	*tl++ = 0;				/* Header pad size */
4690 	*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
4691 	*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
4692 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
4693 	*tl++ = txdr_unsigned(20);		/* Max operations */
4694 	*tl++ = txdr_unsigned(64);		/* Max slots */
4695 	*tl = 0;				/* No rdma ird */
4696 
4697 	/* Fill in back channel attributes. */
4698 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4699 	*tl++ = 0;				/* Header pad size */
4700 	*tl++ = txdr_unsigned(10000);		/* Max request size */
4701 	*tl++ = txdr_unsigned(10000);		/* Max response size */
4702 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
4703 	*tl++ = txdr_unsigned(4);		/* Max operations */
4704 	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
4705 	*tl = 0;				/* No rdma ird */
4706 
4707 	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4708 	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
4709 
4710 	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
4711 	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
4712 	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
4713 	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4714 	*tl++ = 0;				/* Null machine name */
4715 	*tl++ = 0;				/* Uid == 0 */
4716 	*tl++ = 0;				/* Gid == 0 */
4717 	*tl = 0;				/* No additional gids */
4718 	nd->nd_flag |= ND_USEGSSNAME;
4719 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4720 	    NFS_VER4, NULL, 1, NULL, NULL);
4721 	if (error != 0)
4722 		return (error);
4723 	if (nd->nd_repstat == 0) {
4724 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4725 		    2 * NFSX_UNSIGNED);
4726 		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4727 		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4728 		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4729 		crflags = fxdr_unsigned(uint32_t, *tl);
4730 		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4731 			NFSLOCKMNT(nmp);
4732 			nmp->nm_state |= NFSSTA_SESSPERSIST;
4733 			NFSUNLOCKMNT(nmp);
4734 		}
4735 
4736 		/* Get the fore channel slot count. */
4737 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4738 		tl++;			/* Skip the header pad size. */
4739 
4740 		/* Make sure nm_wsize is small enough. */
4741 		maxval = fxdr_unsigned(uint32_t, *tl++);
4742 		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
4743 			if (nmp->nm_wsize > 8096)
4744 				nmp->nm_wsize /= 2;
4745 			else
4746 				break;
4747 		}
4748 
4749 		/* Make sure nm_rsize is small enough. */
4750 		maxval = fxdr_unsigned(uint32_t, *tl++);
4751 		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
4752 			if (nmp->nm_rsize > 8096)
4753 				nmp->nm_rsize /= 2;
4754 			else
4755 				break;
4756 		}
4757 
4758 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4759 		tl++;
4760 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4761 		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4762 		irdcnt = fxdr_unsigned(int, *tl);
4763 		if (irdcnt > 0)
4764 			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4765 
4766 		/* and the back channel slot count. */
4767 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4768 		tl += 5;
4769 		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4770 		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4771 	}
4772 	error = nd->nd_repstat;
4773 nfsmout:
4774 	mbuf_freem(nd->nd_mrep);
4775 	return (error);
4776 }
4777 
4778 /*
4779  * Do the NFSv4.1 Destroy Session.
4780  */
4781 int
4782 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4783     struct ucred *cred, NFSPROC_T *p)
4784 {
4785 	uint32_t *tl;
4786 	struct nfsrv_descript nfsd;
4787 	struct nfsrv_descript *nd = &nfsd;
4788 	int error;
4789 	struct nfsclsession *tsep;
4790 
4791 	nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL);
4792 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4793 	tsep = nfsmnt_mdssession(nmp);
4794 	bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4795 	nd->nd_flag |= ND_USEGSSNAME;
4796 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4797 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4798 	if (error != 0)
4799 		return (error);
4800 	error = nd->nd_repstat;
4801 	mbuf_freem(nd->nd_mrep);
4802 	return (error);
4803 }
4804 
4805 /*
4806  * Do the NFSv4.1 Destroy Client.
4807  */
4808 int
4809 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
4810     struct ucred *cred, NFSPROC_T *p)
4811 {
4812 	uint32_t *tl;
4813 	struct nfsrv_descript nfsd;
4814 	struct nfsrv_descript *nd = &nfsd;
4815 	int error;
4816 	struct nfsclsession *tsep;
4817 
4818 	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL);
4819 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4820 	tsep = nfsmnt_mdssession(nmp);
4821 	*tl++ = tsep->nfsess_clientid.lval[0];
4822 	*tl = tsep->nfsess_clientid.lval[1];
4823 	nd->nd_flag |= ND_USEGSSNAME;
4824 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4825 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4826 	if (error != 0)
4827 		return (error);
4828 	error = nd->nd_repstat;
4829 	mbuf_freem(nd->nd_mrep);
4830 	return (error);
4831 }
4832 
4833 /*
4834  * Do the NFSv4.1 LayoutGet.
4835  */
4836 int
4837 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
4838     uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen,
4839     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp,
4840     struct ucred *cred, NFSPROC_T *p, void *stuff)
4841 {
4842 	struct nfsrv_descript nfsd, *nd = &nfsd;
4843 	int error;
4844 
4845 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL);
4846 	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
4847 	    layoutlen, 0);
4848 	nd->nd_flag |= ND_USEGSSNAME;
4849 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4850 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4851 	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
4852 	if (error != 0)
4853 		return (error);
4854 	if (nd->nd_repstat == 0)
4855 		error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp);
4856 	if (error == 0 && nd->nd_repstat != 0)
4857 		error = nd->nd_repstat;
4858 	mbuf_freem(nd->nd_mrep);
4859 	return (error);
4860 }
4861 
4862 /*
4863  * Do the NFSv4.1 Get Device Info.
4864  */
4865 int
4866 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
4867     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
4868     NFSPROC_T *p)
4869 {
4870 	uint32_t cnt, *tl;
4871 	struct nfsrv_descript nfsd;
4872 	struct nfsrv_descript *nd = &nfsd;
4873 	struct sockaddr_storage ss;
4874 	struct nfsclds *dsp = NULL, **dspp;
4875 	struct nfscldevinfo *ndi;
4876 	int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt;
4877 	uint8_t stripeindex;
4878 
4879 	*ndip = NULL;
4880 	ndi = NULL;
4881 	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL);
4882 	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
4883 	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
4884 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
4885 	*tl++ = txdr_unsigned(layouttype);
4886 	*tl++ = txdr_unsigned(100000);
4887 	if (notifybitsp != NULL && *notifybitsp != 0) {
4888 		*tl = txdr_unsigned(1);		/* One word of bits. */
4889 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
4890 		*tl = txdr_unsigned(*notifybitsp);
4891 	} else
4892 		*tl = txdr_unsigned(0);
4893 	nd->nd_flag |= ND_USEGSSNAME;
4894 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4895 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4896 	if (error != 0)
4897 		return (error);
4898 	if (nd->nd_repstat == 0) {
4899 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4900 		if (layouttype != fxdr_unsigned(int, *tl++))
4901 			printf("EEK! devinfo layout type not same!\n");
4902 		stripecnt = fxdr_unsigned(int, *++tl);
4903 		NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
4904 		if (stripecnt < 1 || stripecnt > 4096) {
4905 			printf("NFS devinfo stripecnt %d: out of range\n",
4906 			    stripecnt);
4907 			error = NFSERR_BADXDR;
4908 			goto nfsmout;
4909 		}
4910 		NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED);
4911 		addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
4912 		NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
4913 		if (addrcnt < 1 || addrcnt > 128) {
4914 			printf("NFS devinfo addrcnt %d: out of range\n",
4915 			    addrcnt);
4916 			error = NFSERR_BADXDR;
4917 			goto nfsmout;
4918 		}
4919 
4920 		/*
4921 		 * Now we know how many stripe indices and addresses, so
4922 		 * we can allocate the structure the correct size.
4923 		 */
4924 		i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *)
4925 		    + 1;
4926 		NFSCL_DEBUG(4, "stripeindices=%d\n", i);
4927 		ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
4928 		    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO);
4929 		NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID);
4930 		ndi->nfsdi_refcnt = 0;
4931 		ndi->nfsdi_stripecnt = stripecnt;
4932 		ndi->nfsdi_addrcnt = addrcnt;
4933 		/* Fill in the stripe indices. */
4934 		for (i = 0; i < stripecnt; i++) {
4935 			stripeindex = fxdr_unsigned(uint8_t, *tl++);
4936 			NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
4937 			if (stripeindex >= addrcnt) {
4938 				printf("NFS devinfo stripeindex %d: too big\n",
4939 				    (int)stripeindex);
4940 				error = NFSERR_BADXDR;
4941 				goto nfsmout;
4942 			}
4943 			nfsfldi_setstripeindex(ndi, i, stripeindex);
4944 		}
4945 
4946 		/* Now, dissect the server address(es). */
4947 		safilled = 0;
4948 		for (i = 0; i < addrcnt; i++) {
4949 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4950 			cnt = fxdr_unsigned(uint32_t, *tl);
4951 			if (cnt == 0) {
4952 				printf("NFS devinfo 0 len addrlist\n");
4953 				error = NFSERR_BADXDR;
4954 				goto nfsmout;
4955 			}
4956 			dspp = nfsfldi_addr(ndi, i);
4957 			pos = arc4random() % cnt;	/* Choose one. */
4958 			safilled = 0;
4959 			for (j = 0; j < cnt; j++) {
4960 				error = nfsv4_getipaddr(nd, &ss, &isudp);
4961 				if (error != 0 && error != EPERM) {
4962 					error = NFSERR_BADXDR;
4963 					goto nfsmout;
4964 				}
4965 				if (error == 0 && isudp == 0) {
4966 					/*
4967 					 * The algorithm is:
4968 					 * - use "pos" entry if it is of the
4969 					 *   same af_family or none of them
4970 					 *   is of the same af_family
4971 					 * else
4972 					 * - use the first one of the same
4973 					 *   af_family.
4974 					 */
4975 					if ((safilled == 0 && ss.ss_family ==
4976 					     nmp->nm_nam->sa_family) ||
4977 					    (j == pos &&
4978 					     (safilled == 0 || ss.ss_family ==
4979 					      nmp->nm_nam->sa_family)) ||
4980 					    (safilled == 1 && ss.ss_family ==
4981 					     nmp->nm_nam->sa_family)) {
4982 						error = nfsrpc_fillsa(nmp, &ss,
4983 						    &dsp, p);
4984 						if (error == 0) {
4985 							*dspp = dsp;
4986 							if (ss.ss_family ==
4987 							 nmp->nm_nam->sa_family)
4988 								safilled = 2;
4989 							else
4990 								safilled = 1;
4991 						}
4992 					}
4993 				}
4994 			}
4995 			if (safilled == 0)
4996 				break;
4997 		}
4998 
4999 		/* And the notify bits. */
5000 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5001 		if (safilled != 0) {
5002 			bitcnt = fxdr_unsigned(int, *tl);
5003 			if (bitcnt > 0) {
5004 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5005 				if (notifybitsp != NULL)
5006 					*notifybitsp =
5007 					    fxdr_unsigned(uint32_t, *tl);
5008 			}
5009 			*ndip = ndi;
5010 		} else
5011 			error = EPERM;
5012 	}
5013 	if (nd->nd_repstat != 0)
5014 		error = nd->nd_repstat;
5015 nfsmout:
5016 	if (error != 0 && ndi != NULL)
5017 		nfscl_freedevinfo(ndi);
5018 	mbuf_freem(nd->nd_mrep);
5019 	return (error);
5020 }
5021 
5022 /*
5023  * Do the NFSv4.1 LayoutCommit.
5024  */
5025 int
5026 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5027     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5028     int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred,
5029     NFSPROC_T *p, void *stuff)
5030 {
5031 	uint32_t *tl;
5032 	struct nfsrv_descript nfsd, *nd = &nfsd;
5033 	int error, outcnt, i;
5034 	uint8_t *cp;
5035 
5036 	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL);
5037 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5038 	    NFSX_STATEID);
5039 	txdr_hyper(off, tl);
5040 	tl += 2;
5041 	txdr_hyper(len, tl);
5042 	tl += 2;
5043 	if (reclaim != 0)
5044 		*tl++ = newnfs_true;
5045 	else
5046 		*tl++ = newnfs_false;
5047 	*tl++ = txdr_unsigned(stateidp->seqid);
5048 	*tl++ = stateidp->other[0];
5049 	*tl++ = stateidp->other[1];
5050 	*tl++ = stateidp->other[2];
5051 	*tl++ = newnfs_true;
5052 	if (lastbyte < off)
5053 		lastbyte = off;
5054 	else if (lastbyte >= (off + len))
5055 		lastbyte = off + len - 1;
5056 	txdr_hyper(lastbyte, tl);
5057 	tl += 2;
5058 	*tl++ = newnfs_false;
5059 	*tl++ = txdr_unsigned(layouttype);
5060 	*tl = txdr_unsigned(layoutupdatecnt);
5061 	if (layoutupdatecnt > 0) {
5062 		KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES,
5063 		    ("Must be nil for Files Layout"));
5064 		outcnt = NFSM_RNDUP(layoutupdatecnt);
5065 		NFSM_BUILD(cp, uint8_t *, outcnt);
5066 		NFSBCOPY(layp, cp, layoutupdatecnt);
5067 		cp += layoutupdatecnt;
5068 		for (i = 0; i < (outcnt - layoutupdatecnt); i++)
5069 			*cp++ = 0x0;
5070 	}
5071 	nd->nd_flag |= ND_USEGSSNAME;
5072 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5073 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5074 	if (error != 0)
5075 		return (error);
5076 	error = nd->nd_repstat;
5077 	mbuf_freem(nd->nd_mrep);
5078 	return (error);
5079 }
5080 
5081 /*
5082  * Do the NFSv4.1 LayoutReturn.
5083  */
5084 int
5085 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5086     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5087     uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp,
5088     struct ucred *cred, NFSPROC_T *p, void *stuff)
5089 {
5090 	uint32_t *tl;
5091 	struct nfsrv_descript nfsd, *nd = &nfsd;
5092 	int error, outcnt, i;
5093 	uint8_t *cp;
5094 
5095 	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL);
5096 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5097 	if (reclaim != 0)
5098 		*tl++ = newnfs_true;
5099 	else
5100 		*tl++ = newnfs_false;
5101 	*tl++ = txdr_unsigned(layouttype);
5102 	*tl++ = txdr_unsigned(iomode);
5103 	*tl = txdr_unsigned(layoutreturn);
5104 	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5105 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5106 		    NFSX_UNSIGNED);
5107 		txdr_hyper(offset, tl);
5108 		tl += 2;
5109 		txdr_hyper(len, tl);
5110 		tl += 2;
5111 		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5112 		*tl++ = txdr_unsigned(stateidp->seqid);
5113 		*tl++ = stateidp->other[0];
5114 		*tl++ = stateidp->other[1];
5115 		*tl++ = stateidp->other[2];
5116 		*tl = txdr_unsigned(layoutcnt);
5117 		if (layoutcnt > 0) {
5118 			outcnt = NFSM_RNDUP(layoutcnt);
5119 			NFSM_BUILD(cp, uint8_t *, outcnt);
5120 			NFSBCOPY(layp, cp, layoutcnt);
5121 			cp += layoutcnt;
5122 			for (i = 0; i < (outcnt - layoutcnt); i++)
5123 				*cp++ = 0x0;
5124 		}
5125 	}
5126 	nd->nd_flag |= ND_USEGSSNAME;
5127 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5128 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5129 	if (error != 0)
5130 		return (error);
5131 	if (nd->nd_repstat == 0) {
5132 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5133 		if (*tl != 0) {
5134 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5135 			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5136 			stateidp->other[0] = *tl++;
5137 			stateidp->other[1] = *tl++;
5138 			stateidp->other[2] = *tl;
5139 		}
5140 	} else
5141 		error = nd->nd_repstat;
5142 nfsmout:
5143 	mbuf_freem(nd->nd_mrep);
5144 	return (error);
5145 }
5146 
5147 /*
5148  * Acquire a layout and devinfo, if possible. The caller must have acquired
5149  * a reference count on the nfsclclient structure before calling this.
5150  * Return the layout in lypp with a reference count on it, if successful.
5151  */
5152 static int
5153 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5154     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5155     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5156 {
5157 	struct nfscllayout *lyp;
5158 	struct nfsclflayout *flp;
5159 	struct nfsclflayouthead flh;
5160 	int error = 0, islocked, layoutlen, recalled, retonclose;
5161 	nfsv4stateid_t stateid;
5162 	struct nfsclsession *tsep;
5163 
5164 	*lypp = NULL;
5165 	/*
5166 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5167 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5168 	 * flp == NULL.
5169 	 */
5170 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5171 	    off, &flp, &recalled);
5172 	islocked = 0;
5173 	if (lyp == NULL || flp == NULL) {
5174 		if (recalled != 0)
5175 			return (EIO);
5176 		LIST_INIT(&flh);
5177 		tsep = nfsmnt_mdssession(nmp);
5178 		layoutlen = tsep->nfsess_maxcache -
5179 		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5180 		if (lyp == NULL) {
5181 			stateid.seqid = 0;
5182 			stateid.other[0] = stateidp->other[0];
5183 			stateid.other[1] = stateidp->other[1];
5184 			stateid.other[2] = stateidp->other[2];
5185 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5186 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5187 			    (uint64_t)0, layoutlen, &stateid, &retonclose,
5188 			    &flh, cred, p, NULL);
5189 		} else {
5190 			islocked = 1;
5191 			stateid.seqid = lyp->nfsly_stateid.seqid;
5192 			stateid.other[0] = lyp->nfsly_stateid.other[0];
5193 			stateid.other[1] = lyp->nfsly_stateid.other[1];
5194 			stateid.other[2] = lyp->nfsly_stateid.other[2];
5195 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5196 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
5197 			    (uint64_t)0, layoutlen, &stateid, &retonclose,
5198 			    &flh, cred, p, NULL);
5199 		}
5200 		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5201 		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5202 		    &flh, error, NULL, cred, p);
5203 		if (error == 0)
5204 			*lypp = lyp;
5205 		else if (islocked != 0)
5206 			nfscl_rellayout(lyp, 1);
5207 	} else
5208 		*lypp = lyp;
5209 	return (error);
5210 }
5211 
5212 /*
5213  * Do a TCP connection plus exchange id and create session.
5214  * If successful, a "struct nfsclds" is linked into the list for the
5215  * mount point and a pointer to it is returned.
5216  */
5217 static int
5218 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp,
5219     struct nfsclds **dspp, NFSPROC_T *p)
5220 {
5221 	struct sockaddr_in *msad, *sad, *ssd;
5222 	struct sockaddr_in6 *msad6, *sad6, *ssd6;
5223 	struct nfsclclient *clp;
5224 	struct nfssockreq *nrp;
5225 	struct nfsclds *dsp, *tdsp;
5226 	int error;
5227 	enum nfsclds_state retv;
5228 	uint32_t sequenceid;
5229 
5230 	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5231 	    ("nfsrpc_fillsa: NULL nr_cred"));
5232 	NFSLOCKCLSTATE();
5233 	clp = nmp->nm_clp;
5234 	NFSUNLOCKCLSTATE();
5235 	if (clp == NULL)
5236 		return (EPERM);
5237 	if (ssp->ss_family == AF_INET) {
5238 		ssd = (struct sockaddr_in *)ssp;
5239 		NFSLOCKMNT(nmp);
5240 
5241 		/*
5242 		 * Check to see if we already have a session for this
5243 		 * address that is usable for a DS.
5244 		 * Note that the MDS's address is in a different place
5245 		 * than the sessions already acquired for DS's.
5246 		 */
5247 		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5248 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5249 		while (tdsp != NULL) {
5250 			if (msad != NULL && msad->sin_family == AF_INET &&
5251 			    ssd->sin_addr.s_addr == msad->sin_addr.s_addr &&
5252 			    ssd->sin_port == msad->sin_port &&
5253 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5254 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5255 				*dspp = tdsp;
5256 				NFSUNLOCKMNT(nmp);
5257 				NFSCL_DEBUG(4, "fnd same addr\n");
5258 				return (0);
5259 			}
5260 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5261 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5262 				msad = (struct sockaddr_in *)
5263 				    tdsp->nfsclds_sockp->nr_nam;
5264 			else
5265 				msad = NULL;
5266 		}
5267 		NFSUNLOCKMNT(nmp);
5268 
5269 		/* No IP address match, so look for new/trunked one. */
5270 		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5271 		sad->sin_len = sizeof(*sad);
5272 		sad->sin_family = AF_INET;
5273 		sad->sin_port = ssd->sin_port;
5274 		sad->sin_addr.s_addr = ssd->sin_addr.s_addr;
5275 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5276 		nrp->nr_nam = (struct sockaddr *)sad;
5277 	} else if (ssp->ss_family == AF_INET6) {
5278 		ssd6 = (struct sockaddr_in6 *)ssp;
5279 		NFSLOCKMNT(nmp);
5280 
5281 		/*
5282 		 * Check to see if we already have a session for this
5283 		 * address that is usable for a DS.
5284 		 * Note that the MDS's address is in a different place
5285 		 * than the sessions already acquired for DS's.
5286 		 */
5287 		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5288 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5289 		while (tdsp != NULL) {
5290 			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5291 			    IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr,
5292 			    &msad6->sin6_addr) &&
5293 			    ssd6->sin6_port == msad6->sin6_port &&
5294 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5295 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5296 				*dspp = tdsp;
5297 				NFSUNLOCKMNT(nmp);
5298 				return (0);
5299 			}
5300 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5301 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5302 				msad6 = (struct sockaddr_in6 *)
5303 				    tdsp->nfsclds_sockp->nr_nam;
5304 			else
5305 				msad6 = NULL;
5306 		}
5307 		NFSUNLOCKMNT(nmp);
5308 
5309 		/* No IP address match, so look for new/trunked one. */
5310 		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5311 		sad6->sin6_len = sizeof(*sad6);
5312 		sad6->sin6_family = AF_INET6;
5313 		sad6->sin6_port = ssd6->sin6_port;
5314 		NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr,
5315 		    sizeof(struct in6_addr));
5316 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5317 		nrp->nr_nam = (struct sockaddr *)sad6;
5318 	} else
5319 		return (EPERM);
5320 
5321 	nrp->nr_sotype = SOCK_STREAM;
5322 	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5323 	nrp->nr_prog = NFS_PROG;
5324 	nrp->nr_vers = NFS_VER4;
5325 
5326 	/*
5327 	 * Use the credentials that were used for the mount, which are
5328 	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5329 	 * Ref. counting the credentials with crhold() is probably not
5330 	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5331 	 * unmount, but I did it anyhow.
5332 	 */
5333 	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5334 	error = newnfs_connect(nmp, nrp, NULL, p, 0);
5335 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
5336 
5337 	/* Now, do the exchangeid and create session. */
5338 	if (error == 0) {
5339 		error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS,
5340 		    &dsp, nrp->nr_cred, p);
5341 		NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5342 		if (error != 0)
5343 			newnfs_disconnect(nrp);
5344 	}
5345 	if (error == 0) {
5346 		dsp->nfsclds_sockp = nrp;
5347 		NFSLOCKMNT(nmp);
5348 		retv = nfscl_getsameserver(nmp, dsp, &tdsp);
5349 		NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5350 		if (retv == NFSDSP_USETHISSESSION) {
5351 			NFSUNLOCKMNT(nmp);
5352 			/*
5353 			 * If there is already a session for this server,
5354 			 * use it.
5355 			 */
5356 			(void)newnfs_disconnect(nrp);
5357 			nfscl_freenfsclds(dsp);
5358 			*dspp = tdsp;
5359 			return (0);
5360 		}
5361 		if (retv == NFSDSP_SEQTHISSESSION)
5362 			sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid;
5363 		else
5364 			sequenceid = dsp->nfsclds_sess.nfsess_sequenceid;
5365 		NFSUNLOCKMNT(nmp);
5366 		error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5367 		    nrp, sequenceid, 0, nrp->nr_cred, p);
5368 		NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5369 	} else {
5370 		NFSFREECRED(nrp->nr_cred);
5371 		NFSFREEMUTEX(&nrp->nr_mtx);
5372 		free(nrp->nr_nam, M_SONAME);
5373 		free(nrp, M_NFSSOCKREQ);
5374 	}
5375 	if (error == 0) {
5376 		NFSCL_DEBUG(3, "add DS session\n");
5377 		/*
5378 		 * Put it at the end of the list. That way the list
5379 		 * is ordered by when the entry was added. This matters
5380 		 * since the one done first is the one that should be
5381 		 * used for sequencid'ing any subsequent create sessions.
5382 		 */
5383 		NFSLOCKMNT(nmp);
5384 		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5385 		NFSUNLOCKMNT(nmp);
5386 		*dspp = dsp;
5387 	} else if (dsp != NULL) {
5388 		newnfs_disconnect(nrp);
5389 		nfscl_freenfsclds(dsp);
5390 	}
5391 	return (error);
5392 }
5393 
5394 /*
5395  * Do the NFSv4.1 Reclaim Complete.
5396  */
5397 int
5398 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5399 {
5400 	uint32_t *tl;
5401 	struct nfsrv_descript nfsd;
5402 	struct nfsrv_descript *nd = &nfsd;
5403 	int error;
5404 
5405 	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL);
5406 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5407 	*tl = newnfs_false;
5408 	nd->nd_flag |= ND_USEGSSNAME;
5409 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5410 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5411 	if (error != 0)
5412 		return (error);
5413 	error = nd->nd_repstat;
5414 	mbuf_freem(nd->nd_mrep);
5415 	return (error);
5416 }
5417 
5418 /*
5419  * Initialize the slot tables for a session.
5420  */
5421 static void
5422 nfscl_initsessionslots(struct nfsclsession *sep)
5423 {
5424 	int i;
5425 
5426 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
5427 		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5428 			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5429 		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5430 	}
5431 	for (i = 0; i < 64; i++)
5432 		sep->nfsess_slotseq[i] = 0;
5433 	sep->nfsess_slots = 0;
5434 }
5435 
5436 /*
5437  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5438  */
5439 int
5440 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5441     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5442 {
5443 	struct nfsnode *np = VTONFS(vp);
5444 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5445 	struct nfscllayout *layp;
5446 	struct nfscldevinfo *dip;
5447 	struct nfsclflayout *rflp;
5448 	nfsv4stateid_t stateid;
5449 	struct ucred *newcred;
5450 	uint64_t lastbyte, len, off, oresid, xfer;
5451 	int eof, error, iolaymode, recalled;
5452 	void *lckp;
5453 
5454 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5455 	    (np->n_flag & NNOLAYOUT) != 0)
5456 		return (EIO);
5457 	/* Now, get a reference cnt on the clientid for this mount. */
5458 	if (nfscl_getref(nmp) == 0)
5459 		return (EIO);
5460 
5461 	/* Find an appropriate stateid. */
5462 	newcred = NFSNEWCRED(cred);
5463 	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5464 	    rwaccess, 1, newcred, p, &stateid, &lckp);
5465 	if (error != 0) {
5466 		NFSFREECRED(newcred);
5467 		nfscl_relref(nmp);
5468 		return (error);
5469 	}
5470 	/* Search for a layout for this file. */
5471 	off = uiop->uio_offset;
5472 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5473 	    np->n_fhp->nfh_len, off, &rflp, &recalled);
5474 	if (layp == NULL || rflp == NULL) {
5475 		if (recalled != 0) {
5476 			NFSFREECRED(newcred);
5477 			nfscl_relref(nmp);
5478 			return (EIO);
5479 		}
5480 		if (layp != NULL) {
5481 			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5482 			layp = NULL;
5483 		}
5484 		/* Try and get a Layout, if it is supported. */
5485 		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5486 		    (np->n_flag & NWRITEOPENED) != 0)
5487 			iolaymode = NFSLAYOUTIOMODE_RW;
5488 		else
5489 			iolaymode = NFSLAYOUTIOMODE_READ;
5490 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5491 		    NULL, &stateid, off, &layp, newcred, p);
5492 		if (error != 0) {
5493 			NFSLOCKNODE(np);
5494 			np->n_flag |= NNOLAYOUT;
5495 			NFSUNLOCKNODE(np);
5496 			if (lckp != NULL)
5497 				nfscl_lockderef(lckp);
5498 			NFSFREECRED(newcred);
5499 			if (layp != NULL)
5500 				nfscl_rellayout(layp, 0);
5501 			nfscl_relref(nmp);
5502 			return (error);
5503 		}
5504 	}
5505 
5506 	/*
5507 	 * Loop around finding a layout that works for the first part of
5508 	 * this I/O operation, and then call the function that actually
5509 	 * does the RPC.
5510 	 */
5511 	eof = 0;
5512 	len = (uint64_t)uiop->uio_resid;
5513 	while (len > 0 && error == 0 && eof == 0) {
5514 		off = uiop->uio_offset;
5515 		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5516 		if (error == 0) {
5517 			oresid = xfer = (uint64_t)uiop->uio_resid;
5518 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5519 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5520 			dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev,
5521 			    rflp->nfsfl_devp);
5522 			if (dip != NULL) {
5523 				error = nfscl_doflayoutio(vp, uiop, iomode,
5524 				    must_commit, &eof, &stateid, rwaccess, dip,
5525 				    layp, rflp, off, xfer, docommit, newcred,
5526 				    p);
5527 				nfscl_reldevinfo(dip);
5528 				lastbyte = off + xfer - 1;
5529 				if (error == 0) {
5530 					NFSLOCKCLSTATE();
5531 					if (lastbyte > layp->nfsly_lastbyte)
5532 						layp->nfsly_lastbyte = lastbyte;
5533 					NFSUNLOCKCLSTATE();
5534 				} else if (error == NFSERR_OPENMODE &&
5535 				    rwaccess == NFSV4OPEN_ACCESSREAD) {
5536 					NFSLOCKMNT(nmp);
5537 					nmp->nm_state |= NFSSTA_OPENMODE;
5538 					NFSUNLOCKMNT(nmp);
5539 				}
5540 			} else
5541 				error = EIO;
5542 			if (error == 0)
5543 				len -= (oresid - (uint64_t)uiop->uio_resid);
5544 		}
5545 	}
5546 	if (lckp != NULL)
5547 		nfscl_lockderef(lckp);
5548 	NFSFREECRED(newcred);
5549 	nfscl_rellayout(layp, 0);
5550 	nfscl_relref(nmp);
5551 	return (error);
5552 }
5553 
5554 /*
5555  * Find a file layout that will handle the first bytes of the requested
5556  * range and return the information from it needed to to the I/O operation.
5557  */
5558 int
5559 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5560     struct nfsclflayout **retflpp)
5561 {
5562 	struct nfsclflayout *flp, *nflp, *rflp;
5563 	uint32_t rw;
5564 
5565 	rflp = NULL;
5566 	rw = rwaccess;
5567 	/* For reading, do the Read list first and then the Write list. */
5568 	do {
5569 		if (rw == NFSV4OPEN_ACCESSREAD)
5570 			flp = LIST_FIRST(&lyp->nfsly_flayread);
5571 		else
5572 			flp = LIST_FIRST(&lyp->nfsly_flayrw);
5573 		while (flp != NULL) {
5574 			nflp = LIST_NEXT(flp, nfsfl_list);
5575 			if (flp->nfsfl_off > off)
5576 				break;
5577 			if (flp->nfsfl_end > off &&
5578 			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5579 				rflp = flp;
5580 			flp = nflp;
5581 		}
5582 		if (rw == NFSV4OPEN_ACCESSREAD)
5583 			rw = NFSV4OPEN_ACCESSWRITE;
5584 		else
5585 			rw = 0;
5586 	} while (rw != 0);
5587 	if (rflp != NULL) {
5588 		/* This one covers the most bytes starting at off. */
5589 		*retflpp = rflp;
5590 		return (0);
5591 	}
5592 	return (EIO);
5593 }
5594 
5595 /*
5596  * Do I/O using an NFSv4.1 file layout.
5597  */
5598 static int
5599 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5600     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5601     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5602     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
5603 {
5604 	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
5605 	int commit_thru_mds, error, stripe_index, stripe_pos;
5606 	struct nfsnode *np;
5607 	struct nfsfh *fhp;
5608 	struct nfsclds **dspp;
5609 
5610 	np = VTONFS(vp);
5611 	rel_off = off - flp->nfsfl_patoff;
5612 	stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
5613 	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
5614 	    dp->nfsdi_stripecnt;
5615 	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
5616 	error = 0;
5617 
5618 	/* Loop around, doing I/O for each stripe unit. */
5619 	while (len > 0 && error == 0) {
5620 		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
5621 		dspp = nfsfldi_addr(dp, stripe_index);
5622 		if (len > transfer && docommit == 0)
5623 			xfer = transfer;
5624 		else
5625 			xfer = len;
5626 		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
5627 			/* Dense layout. */
5628 			if (stripe_pos >= flp->nfsfl_fhcnt)
5629 				return (EIO);
5630 			fhp = flp->nfsfl_fh[stripe_pos];
5631 			io_off = (rel_off / (stripe_unit_size *
5632 			    dp->nfsdi_stripecnt)) * stripe_unit_size +
5633 			    rel_off % stripe_unit_size;
5634 		} else {
5635 			/* Sparse layout. */
5636 			if (flp->nfsfl_fhcnt > 1) {
5637 				if (stripe_index >= flp->nfsfl_fhcnt)
5638 					return (EIO);
5639 				fhp = flp->nfsfl_fh[stripe_index];
5640 			} else if (flp->nfsfl_fhcnt == 1)
5641 				fhp = flp->nfsfl_fh[0];
5642 			else
5643 				fhp = np->n_fhp;
5644 			io_off = off;
5645 		}
5646 		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
5647 			commit_thru_mds = 1;
5648 			if (docommit != 0)
5649 				error = EIO;
5650 		} else {
5651 			commit_thru_mds = 0;
5652 			mtx_lock(&np->n_mtx);
5653 			np->n_flag |= NDSCOMMIT;
5654 			mtx_unlock(&np->n_mtx);
5655 		}
5656 		if (docommit != 0) {
5657 			if (error == 0)
5658 				error = nfsrpc_commitds(vp, io_off, xfer,
5659 				    *dspp, fhp, cred, p);
5660 			if (error == 0) {
5661 				/*
5662 				 * Set both eof and uio_resid = 0 to end any
5663 				 * loops.
5664 				 */
5665 				*eofp = 1;
5666 				uiop->uio_resid = 0;
5667 			} else {
5668 				mtx_lock(&np->n_mtx);
5669 				np->n_flag &= ~NDSCOMMIT;
5670 				mtx_unlock(&np->n_mtx);
5671 			}
5672 		} else if (rwflag == FREAD)
5673 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
5674 			    io_off, xfer, fhp, cred, p);
5675 		else {
5676 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
5677 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
5678 			    cred, p);
5679 			if (error == 0) {
5680 				NFSLOCKCLSTATE();
5681 				lyp->nfsly_flags |= NFSLY_WRITTEN;
5682 				NFSUNLOCKCLSTATE();
5683 			}
5684 		}
5685 		if (error == 0) {
5686 			transfer = stripe_unit_size;
5687 			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
5688 			len -= xfer;
5689 			off += xfer;
5690 		}
5691 	}
5692 	return (error);
5693 }
5694 
5695 /*
5696  * The actual read RPC done to a DS.
5697  */
5698 static int
5699 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
5700     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp,
5701     struct ucred *cred, NFSPROC_T *p)
5702 {
5703 	uint32_t *tl;
5704 	int error, retlen;
5705 	struct nfsrv_descript nfsd;
5706 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5707 	struct nfsrv_descript *nd = &nfsd;
5708 	struct nfssockreq *nrp;
5709 
5710 	nd->nd_mrep = NULL;
5711 	nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5712 	    NULL, &dsp->nfsclds_sess);
5713 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
5714 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
5715 	txdr_hyper(io_off, tl);
5716 	*(tl + 2) = txdr_unsigned(len);
5717 	nrp = dsp->nfsclds_sockp;
5718 	if (nrp == NULL)
5719 		/* If NULL, use the MDS socket. */
5720 		nrp = &nmp->nm_sockreq;
5721 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5722 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5723 	if (error != 0)
5724 		return (error);
5725 	if (nd->nd_repstat != 0) {
5726 		error = nd->nd_repstat;
5727 		goto nfsmout;
5728 	}
5729 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5730 	*eofp = fxdr_unsigned(int, *tl);
5731 	NFSM_STRSIZ(retlen, len);
5732 	error = nfsm_mbufuio(nd, uiop, retlen);
5733 nfsmout:
5734 	if (nd->nd_mrep != NULL)
5735 		mbuf_freem(nd->nd_mrep);
5736 	return (error);
5737 }
5738 
5739 /*
5740  * The actual write RPC done to a DS.
5741  */
5742 static int
5743 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5744     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
5745     struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p)
5746 {
5747 	uint32_t *tl;
5748 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5749 	int error, rlen, commit, committed = NFSWRITE_FILESYNC;
5750 	int32_t backup;
5751 	struct nfsrv_descript nfsd;
5752 	struct nfsrv_descript *nd = &nfsd;
5753 	struct nfssockreq *nrp;
5754 
5755 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
5756 	nd->nd_mrep = NULL;
5757 	nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5758 	    NULL, &dsp->nfsclds_sess);
5759 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
5760 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
5761 	txdr_hyper(io_off, tl);
5762 	tl += 2;
5763 	*tl++ = txdr_unsigned(*iomode);
5764 	*tl = txdr_unsigned(len);
5765 	nfsm_uiombuf(nd, uiop, len);
5766 	nrp = dsp->nfsclds_sockp;
5767 	if (nrp == NULL)
5768 		/* If NULL, use the MDS socket. */
5769 		nrp = &nmp->nm_sockreq;
5770 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5771 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5772 	if (error != 0)
5773 		return (error);
5774 	if (nd->nd_repstat != 0) {
5775 		/*
5776 		 * In case the rpc gets retried, roll
5777 		 * the uio fileds changed by nfsm_uiombuf()
5778 		 * back.
5779 		 */
5780 		uiop->uio_offset -= len;
5781 		uio_uio_resid_add(uiop, len);
5782 		uio_iov_base_add(uiop, -len);
5783 		uio_iov_len_add(uiop, len);
5784 		error = nd->nd_repstat;
5785 	} else {
5786 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
5787 		rlen = fxdr_unsigned(int, *tl++);
5788 		if (rlen == 0) {
5789 			error = NFSERR_IO;
5790 			goto nfsmout;
5791 		} else if (rlen < len) {
5792 			backup = len - rlen;
5793 			uio_iov_base_add(uiop, -(backup));
5794 			uio_iov_len_add(uiop, backup);
5795 			uiop->uio_offset -= backup;
5796 			uio_uio_resid_add(uiop, backup);
5797 			len = rlen;
5798 		}
5799 		commit = fxdr_unsigned(int, *tl++);
5800 
5801 		/*
5802 		 * Return the lowest commitment level
5803 		 * obtained by any of the RPCs.
5804 		 */
5805 		if (committed == NFSWRITE_FILESYNC)
5806 			committed = commit;
5807 		else if (committed == NFSWRITE_DATASYNC &&
5808 		    commit == NFSWRITE_UNSTABLE)
5809 			committed = commit;
5810 		if (commit_thru_mds != 0) {
5811 			NFSLOCKMNT(nmp);
5812 			if (!NFSHASWRITEVERF(nmp)) {
5813 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
5814 				NFSSETWRITEVERF(nmp);
5815 	    		} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
5816 				*must_commit = 1;
5817 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
5818 			}
5819 			NFSUNLOCKMNT(nmp);
5820 		} else {
5821 			NFSLOCKDS(dsp);
5822 			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
5823 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5824 				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
5825 			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
5826 				*must_commit = 1;
5827 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5828 			}
5829 			NFSUNLOCKDS(dsp);
5830 		}
5831 	}
5832 nfsmout:
5833 	if (nd->nd_mrep != NULL)
5834 		mbuf_freem(nd->nd_mrep);
5835 	*iomode = committed;
5836 	if (nd->nd_repstat != 0 && error == 0)
5837 		error = nd->nd_repstat;
5838 	return (error);
5839 }
5840 
5841 /*
5842  * Free up the nfsclds structure.
5843  */
5844 void
5845 nfscl_freenfsclds(struct nfsclds *dsp)
5846 {
5847 	int i;
5848 
5849 	if (dsp == NULL)
5850 		return;
5851 	if (dsp->nfsclds_sockp != NULL) {
5852 		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
5853 		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
5854 		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
5855 		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
5856 	}
5857 	NFSFREEMUTEX(&dsp->nfsclds_mtx);
5858 	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
5859 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
5860 		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
5861 			m_freem(
5862 			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
5863 	}
5864 	free(dsp, M_NFSCLDS);
5865 }
5866 
5867 static enum nfsclds_state
5868 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
5869     struct nfsclds **retdspp)
5870 {
5871 	struct nfsclds *dsp, *cur_dsp;
5872 
5873 	/*
5874 	 * Search the list of nfsclds structures for one with the same
5875 	 * server.
5876 	 */
5877 	cur_dsp = NULL;
5878 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
5879 		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
5880 		    dsp->nfsclds_servownlen != 0 &&
5881 		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
5882 		    dsp->nfsclds_servownlen) &&
5883 		    dsp->nfsclds_sess.nfsess_defunct == 0) {
5884 			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
5885 			    TAILQ_FIRST(&nmp->nm_sess), dsp,
5886 			    dsp->nfsclds_flags);
5887 			/* Server major id matches. */
5888 			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
5889 				*retdspp = dsp;
5890 				return (NFSDSP_USETHISSESSION);
5891 			}
5892 
5893 			/*
5894 			 * Note the first match, so it can be used for
5895 			 * sequence'ing new sessions.
5896 			 */
5897 			if (cur_dsp == NULL)
5898 				cur_dsp = dsp;
5899 		}
5900 	}
5901 	if (cur_dsp != NULL) {
5902 		*retdspp = cur_dsp;
5903 		return (NFSDSP_SEQTHISSESSION);
5904 	}
5905 	return (NFSDSP_NOTFOUND);
5906 }
5907 
5908 /*
5909  * NFS commit rpc to a NFSv4.1 DS.
5910  */
5911 static int
5912 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
5913     struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p)
5914 {
5915 	uint32_t *tl;
5916 	struct nfsrv_descript nfsd, *nd = &nfsd;
5917 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5918 	struct nfssockreq *nrp;
5919 	int error;
5920 
5921 	nd->nd_mrep = NULL;
5922 	nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5923 	    NULL, &dsp->nfsclds_sess);
5924 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
5925 	txdr_hyper(offset, tl);
5926 	tl += 2;
5927 	*tl = txdr_unsigned(cnt);
5928 	nrp = dsp->nfsclds_sockp;
5929 	if (nrp == NULL)
5930 		/* If NULL, use the MDS socket. */
5931 		nrp = &nmp->nm_sockreq;
5932 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5933 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5934 	if (error != 0)
5935 		return (error);
5936 	if (nd->nd_repstat == 0) {
5937 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
5938 		NFSLOCKDS(dsp);
5939 		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
5940 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5941 			error = NFSERR_STALEWRITEVERF;
5942 		}
5943 		NFSUNLOCKDS(dsp);
5944 	}
5945 nfsmout:
5946 	if (error == 0 && nd->nd_repstat != 0)
5947 		error = nd->nd_repstat;
5948 	mbuf_freem(nd->nd_mrep);
5949 	return (error);
5950 }
5951 
5952 /*
5953  * Set up the XDR arguments for the LayoutGet operation.
5954  */
5955 static void
5956 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
5957     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layoutlen,
5958     int usecurstateid)
5959 {
5960 	uint32_t *tl;
5961 
5962 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5963 	    NFSX_STATEID);
5964 	*tl++ = newnfs_false;		/* Don't signal availability. */
5965 	*tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES);
5966 	*tl++ = txdr_unsigned(iomode);
5967 	txdr_hyper(offset, tl);
5968 	tl += 2;
5969 	txdr_hyper(len, tl);
5970 	tl += 2;
5971 	txdr_hyper(minlen, tl);
5972 	tl += 2;
5973 	if (usecurstateid != 0) {
5974 		/* Special stateid for Current stateid. */
5975 		*tl++ = txdr_unsigned(1);
5976 		*tl++ = 0;
5977 		*tl++ = 0;
5978 		*tl++ = 0;
5979 	} else {
5980 		*tl++ = txdr_unsigned(stateidp->seqid);
5981 		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
5982 		*tl++ = stateidp->other[0];
5983 		*tl++ = stateidp->other[1];
5984 		*tl++ = stateidp->other[2];
5985 	}
5986 	*tl = txdr_unsigned(layoutlen);
5987 }
5988 
5989 /*
5990  * Parse the reply for a successful LayoutGet operation.
5991  */
5992 static int
5993 nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
5994     int *retonclosep, struct nfsclflayouthead *flhp)
5995 {
5996 	uint32_t *tl;
5997 	struct nfsclflayout *flp, *prevflp, *tflp;
5998 	int cnt, error, gotiomode, fhcnt, nfhlen, i, j;
5999 	uint64_t retlen;
6000 	struct nfsfh *nfhp;
6001 	uint8_t *cp;
6002 
6003 	error = 0;
6004 	flp = NULL;
6005 	gotiomode = -1;
6006 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
6007 	if (*tl++ != 0)
6008 		*retonclosep = 1;
6009 	else
6010 		*retonclosep = 0;
6011 	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
6012 	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
6013 	    (int)stateidp->seqid);
6014 	stateidp->other[0] = *tl++;
6015 	stateidp->other[1] = *tl++;
6016 	stateidp->other[2] = *tl++;
6017 	cnt = fxdr_unsigned(int, *tl);
6018 	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
6019 	if (cnt <= 0 || cnt > 10000) {
6020 		/* Don't accept more than 10000 layouts in reply. */
6021 		error = NFSERR_BADXDR;
6022 		goto nfsmout;
6023 	}
6024 	for (i = 0; i < cnt; i++) {
6025 		/* Dissect all the way to the file handle cnt. */
6026 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER +
6027 		    6 * NFSX_UNSIGNED + NFSX_V4DEVICEID);
6028 		fhcnt = fxdr_unsigned(int, *(tl + 11 +
6029 		    NFSX_V4DEVICEID / NFSX_UNSIGNED));
6030 		NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
6031 		if (fhcnt < 0 || fhcnt > 100) {
6032 			/* Don't accept more than 100 file handles. */
6033 			error = NFSERR_BADXDR;
6034 			goto nfsmout;
6035 		}
6036 		if (fhcnt > 1)
6037 			flp = malloc(sizeof(*flp) + (fhcnt - 1) *
6038 			    sizeof(struct nfsfh *), M_NFSFLAYOUT, M_WAITOK);
6039 		else
6040 			flp = malloc(sizeof(*flp), M_NFSFLAYOUT, M_WAITOK);
6041 		flp->nfsfl_flags = 0;
6042 		flp->nfsfl_fhcnt = 0;
6043 		flp->nfsfl_devp = NULL;
6044 		flp->nfsfl_off = fxdr_hyper(tl); tl += 2;
6045 		retlen = fxdr_hyper(tl); tl += 2;
6046 		if (flp->nfsfl_off + retlen < flp->nfsfl_off)
6047 			flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
6048 		else
6049 			flp->nfsfl_end = flp->nfsfl_off + retlen;
6050 		flp->nfsfl_iomode = fxdr_unsigned(int, *tl++);
6051 		if (gotiomode == -1)
6052 			gotiomode = flp->nfsfl_iomode;
6053 		if (fxdr_unsigned(int, *tl++) != NFSLAYOUT_NFSV4_1_FILES) {
6054 			printf("NFSv4.1: got non-files layout\n");
6055 			error = NFSERR_BADXDR;
6056 			goto nfsmout;
6057 		}
6058 		NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
6059 		tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6060 		flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
6061 		NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
6062 		flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
6063 		flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
6064 		if (fxdr_unsigned(int, *tl) != fhcnt) {
6065 			printf("EEK! bad fhcnt\n");
6066 			error = NFSERR_BADXDR;
6067 			goto nfsmout;
6068 		}
6069 		for (j = 0; j < fhcnt; j++) {
6070 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6071 			nfhlen = fxdr_unsigned(int, *tl);
6072 			if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
6073 				error = NFSERR_BADXDR;
6074 				goto nfsmout;
6075 			}
6076 			nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, M_NFSFH,
6077 			    M_WAITOK);
6078 			flp->nfsfl_fh[j] = nfhp;
6079 			flp->nfsfl_fhcnt++;
6080 			nfhp->nfh_len = nfhlen;
6081 			NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
6082 			NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
6083 		}
6084 		if (flp->nfsfl_iomode == gotiomode) {
6085 			/* Keep the list in increasing offset order. */
6086 			tflp = LIST_FIRST(flhp);
6087 			prevflp = NULL;
6088 			while (tflp != NULL &&
6089 			    tflp->nfsfl_off < flp->nfsfl_off) {
6090 				prevflp = tflp;
6091 				tflp = LIST_NEXT(tflp, nfsfl_list);
6092 			}
6093 			if (prevflp == NULL)
6094 				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
6095 			else
6096 				LIST_INSERT_AFTER(prevflp, flp,
6097 				    nfsfl_list);
6098 		} else {
6099 			printf("nfscl_layoutget(): got wrong iomode\n");
6100 			nfscl_freeflayout(flp);
6101 		}
6102 		flp = NULL;
6103 	}
6104 nfsmout:
6105 	if (error != 0 && flp != NULL)
6106 		nfscl_freeflayout(flp);
6107 	return (error);
6108 }
6109 
6110 /*
6111  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
6112  * so that it does both an Open and a Layoutget.
6113  */
6114 static int
6115 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
6116     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
6117     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
6118     struct ucred *cred, NFSPROC_T *p)
6119 {
6120 	struct nfscllayout *lyp;
6121 	struct nfsclflayout *flp;
6122 	struct nfsclflayouthead flh;
6123 	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
6124 	int laystat;
6125 	nfsv4stateid_t stateid;
6126 	struct nfsclsession *tsep;
6127 
6128 	error = 0;
6129 	/*
6130 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
6131 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
6132 	 * flp == NULL.
6133 	 */
6134 	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
6135 	    &recalled);
6136 	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
6137 	if (lyp == NULL)
6138 		islocked = 0;
6139 	else if (flp != NULL)
6140 		islocked = 1;
6141 	else
6142 		islocked = 2;
6143 	if ((lyp == NULL || flp == NULL) && recalled == 0) {
6144 		LIST_INIT(&flh);
6145 		tsep = nfsmnt_mdssession(nmp);
6146 		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
6147 		    3 * NFSX_UNSIGNED);
6148 		if (lyp == NULL)
6149 			usecurstateid = 1;
6150 		else {
6151 			usecurstateid = 0;
6152 			stateid.seqid = lyp->nfsly_stateid.seqid;
6153 			stateid.other[0] = lyp->nfsly_stateid.other[0];
6154 			stateid.other[1] = lyp->nfsly_stateid.other[1];
6155 			stateid.other[2] = lyp->nfsly_stateid.other[2];
6156 		}
6157 		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
6158 		    newfhp, newfhlen, mode, op, name, namelen,
6159 		    dpp, &stateid, usecurstateid, layoutlen,
6160 		    &retonclose, &flh, &laystat, cred, p);
6161 		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
6162 		    laystat, error);
6163 		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
6164 		    &stateid, retonclose, NULL, &lyp, &flh, laystat, &islocked,
6165 		    cred, p);
6166 	} else
6167 		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
6168 		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
6169 	if (islocked == 2)
6170 		nfscl_rellayout(lyp, 1);
6171 	else if (islocked == 1)
6172 		nfscl_rellayout(lyp, 0);
6173 	return (error);
6174 }
6175 
6176 /*
6177  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
6178  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
6179  * handled by nfsrpc_openrpc().
6180  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
6181  * can be NULL.
6182  */
6183 static int
6184 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
6185     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
6186     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
6187     nfsv4stateid_t *stateidp, int usecurstateid,
6188     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
6189     int *laystatp, struct ucred *cred, NFSPROC_T *p)
6190 {
6191 	uint32_t *tl;
6192 	struct nfsrv_descript nfsd, *nd = &nfsd;
6193 	struct nfscldeleg *ndp = NULL;
6194 	struct nfsvattr nfsva;
6195 	struct nfsclsession *tsep;
6196 	uint32_t rflags, deleg;
6197 	nfsattrbit_t attrbits;
6198 	int error, ret, acesize, limitby, iomode;
6199 
6200 	*dpp = NULL;
6201 	*laystatp = ENXIO;
6202 	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL);
6203 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
6204 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
6205 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
6206 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
6207 	tsep = nfsmnt_mdssession(nmp);
6208 	*tl++ = tsep->nfsess_clientid.lval[0];
6209 	*tl = tsep->nfsess_clientid.lval[1];
6210 	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
6211 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6212 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
6213 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
6214 	nfsm_strtom(nd, name, namelen);
6215 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6216 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
6217 	NFSZERO_ATTRBIT(&attrbits);
6218 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
6219 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
6220 	nfsrv_putattrbit(nd, &attrbits);
6221 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6222 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
6223 	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
6224 		iomode = NFSLAYOUTIOMODE_RW;
6225 	else
6226 		iomode = NFSLAYOUTIOMODE_READ;
6227 	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
6228 	    layoutlen, usecurstateid);
6229 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
6230 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6231 	if (error != 0)
6232 		return (error);
6233 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
6234 	if (nd->nd_repstat != 0)
6235 		*laystatp = nd->nd_repstat;
6236 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6237 		/* ND_NOMOREDATA will be set if the Open operation failed. */
6238 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6239 		    6 * NFSX_UNSIGNED);
6240 		op->nfso_stateid.seqid = *tl++;
6241 		op->nfso_stateid.other[0] = *tl++;
6242 		op->nfso_stateid.other[1] = *tl++;
6243 		op->nfso_stateid.other[2] = *tl;
6244 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
6245 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
6246 		if (error != 0)
6247 			goto nfsmout;
6248 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
6249 		deleg = fxdr_unsigned(u_int32_t, *tl);
6250 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
6251 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
6252 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
6253 			      NFSCLFLAGS_FIRSTDELEG))
6254 				op->nfso_own->nfsow_clp->nfsc_flags |=
6255 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
6256 			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
6257 			    M_NFSCLDELEG, M_WAITOK);
6258 			LIST_INIT(&ndp->nfsdl_owner);
6259 			LIST_INIT(&ndp->nfsdl_lock);
6260 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
6261 			ndp->nfsdl_fhlen = newfhlen;
6262 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
6263 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
6264 			nfscl_lockinit(&ndp->nfsdl_rwlock);
6265 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6266 			    NFSX_UNSIGNED);
6267 			ndp->nfsdl_stateid.seqid = *tl++;
6268 			ndp->nfsdl_stateid.other[0] = *tl++;
6269 			ndp->nfsdl_stateid.other[1] = *tl++;
6270 			ndp->nfsdl_stateid.other[2] = *tl++;
6271 			ret = fxdr_unsigned(int, *tl);
6272 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
6273 				ndp->nfsdl_flags = NFSCLDL_WRITE;
6274 				/*
6275 				 * Indicates how much the file can grow.
6276 				 */
6277 				NFSM_DISSECT(tl, u_int32_t *,
6278 				    3 * NFSX_UNSIGNED);
6279 				limitby = fxdr_unsigned(int, *tl++);
6280 				switch (limitby) {
6281 				case NFSV4OPEN_LIMITSIZE:
6282 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
6283 					break;
6284 				case NFSV4OPEN_LIMITBLOCKS:
6285 					ndp->nfsdl_sizelimit =
6286 					    fxdr_unsigned(u_int64_t, *tl++);
6287 					ndp->nfsdl_sizelimit *=
6288 					    fxdr_unsigned(u_int64_t, *tl);
6289 					break;
6290 				default:
6291 					error = NFSERR_BADXDR;
6292 					goto nfsmout;
6293 				};
6294 			} else
6295 				ndp->nfsdl_flags = NFSCLDL_READ;
6296 			if (ret != 0)
6297 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
6298 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
6299 			    &acesize, p);
6300 			if (error != 0)
6301 				goto nfsmout;
6302 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
6303 			error = NFSERR_BADXDR;
6304 			goto nfsmout;
6305 		}
6306 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
6307 		    nfscl_assumeposixlocks)
6308 			op->nfso_posixlock = 1;
6309 		else
6310 			op->nfso_posixlock = 0;
6311 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
6312 		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
6313 		if (*++tl == 0) {
6314 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
6315 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
6316 			    NULL, NULL, NULL, p, cred);
6317 			if (error != 0)
6318 				goto nfsmout;
6319 			if (ndp != NULL) {
6320 				ndp->nfsdl_change = nfsva.na_filerev;
6321 				ndp->nfsdl_modtime = nfsva.na_mtime;
6322 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
6323 				*dpp = ndp;
6324 				ndp = NULL;
6325 			}
6326 			/*
6327 			 * At this point, the Open has succeeded, so set
6328 			 * nd_repstat = NFS_OK.  If the Layoutget failed,
6329 			 * this function just won't return a layout.
6330 			 */
6331 			if (nd->nd_repstat == 0) {
6332 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6333 				*laystatp = fxdr_unsigned(int, *++tl);
6334 				if (*laystatp == 0) {
6335 					error = nfsrv_parselayoutget(nd,
6336 					    stateidp, retonclosep, flhp);
6337 					if (error != 0)
6338 						*laystatp = error;
6339 				}
6340 			} else
6341 				nd->nd_repstat = 0;	/* Return 0 for Open. */
6342 		}
6343 	}
6344 	if (nd->nd_repstat != 0 && error == 0)
6345 		error = nd->nd_repstat;
6346 nfsmout:
6347 	free(ndp, M_NFSCLDELEG);
6348 	mbuf_freem(nd->nd_mrep);
6349 	return (error);
6350 }
6351 
6352 /*
6353  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
6354  * Used only for mounts with pNFS enabled.
6355  */
6356 static int
6357 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
6358     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
6359     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
6360     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
6361     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
6362     int usecurstateid, int layoutlen, int *retonclosep,
6363     struct nfsclflayouthead *flhp, int *laystatp)
6364 {
6365 	uint32_t *tl;
6366 	int error = 0, deleg, newone, ret, acesize, limitby;
6367 	struct nfsrv_descript nfsd, *nd = &nfsd;
6368 	struct nfsclopen *op;
6369 	struct nfscldeleg *dp = NULL;
6370 	struct nfsnode *np;
6371 	struct nfsfh *nfhp;
6372 	struct nfsclsession *tsep;
6373 	nfsattrbit_t attrbits;
6374 	nfsv4stateid_t stateid;
6375 	uint32_t rflags;
6376 	struct nfsmount *nmp;
6377 
6378 	nmp = VFSTONFS(dvp->v_mount);
6379 	np = VTONFS(dvp);
6380 	*laystatp = ENXIO;
6381 	*unlockedp = 0;
6382 	*nfhpp = NULL;
6383 	*dpp = NULL;
6384 	*attrflagp = 0;
6385 	*dattrflagp = 0;
6386 	if (namelen > NFS_MAXNAMLEN)
6387 		return (ENAMETOOLONG);
6388 	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
6389 	/*
6390 	 * For V4, this is actually an Open op.
6391 	 */
6392 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
6393 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
6394 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
6395 	    NFSV4OPEN_ACCESSREAD);
6396 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
6397 	tsep = nfsmnt_mdssession(nmp);
6398 	*tl++ = tsep->nfsess_clientid.lval[0];
6399 	*tl = tsep->nfsess_clientid.lval[1];
6400 	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
6401 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
6402 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
6403 	if ((fmode & O_EXCL) != 0) {
6404 		if (NFSHASSESSPERSIST(nmp)) {
6405 			/* Use GUARDED for persistent sessions. */
6406 			*tl = txdr_unsigned(NFSCREATE_GUARDED);
6407 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
6408 		} else {
6409 			/* Otherwise, use EXCLUSIVE4_1. */
6410 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
6411 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
6412 			*tl++ = cverf.lval[0];
6413 			*tl = cverf.lval[1];
6414 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
6415 		}
6416 	} else {
6417 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
6418 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
6419 	}
6420 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
6421 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
6422 	nfsm_strtom(nd, name, namelen);
6423 	/* Get the new file's handle and attributes, plus save the FH. */
6424 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
6425 	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
6426 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
6427 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
6428 	NFSGETATTR_ATTRBIT(&attrbits);
6429 	nfsrv_putattrbit(nd, &attrbits);
6430 	/* Get the directory's post-op attributes. */
6431 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
6432 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
6433 	nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
6434 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
6435 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
6436 	nfsrv_putattrbit(nd, &attrbits);
6437 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
6438 	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
6439 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
6440 	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
6441 	    layoutlen, usecurstateid);
6442 	error = nfscl_request(nd, dvp, p, cred, dstuff);
6443 	if (error != 0)
6444 		return (error);
6445 	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
6446 	    error);
6447 	if (nd->nd_repstat != 0)
6448 		*laystatp = nd->nd_repstat;
6449 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
6450 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6451 		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
6452 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6453 		    6 * NFSX_UNSIGNED);
6454 		stateid.seqid = *tl++;
6455 		stateid.other[0] = *tl++;
6456 		stateid.other[1] = *tl++;
6457 		stateid.other[2] = *tl;
6458 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
6459 		nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
6460 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
6461 		deleg = fxdr_unsigned(int, *tl);
6462 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
6463 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
6464 			if (!(owp->nfsow_clp->nfsc_flags &
6465 			      NFSCLFLAGS_FIRSTDELEG))
6466 				owp->nfsow_clp->nfsc_flags |=
6467 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
6468 			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
6469 			    M_NFSCLDELEG, M_WAITOK);
6470 			LIST_INIT(&dp->nfsdl_owner);
6471 			LIST_INIT(&dp->nfsdl_lock);
6472 			dp->nfsdl_clp = owp->nfsow_clp;
6473 			newnfs_copyincred(cred, &dp->nfsdl_cred);
6474 			nfscl_lockinit(&dp->nfsdl_rwlock);
6475 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6476 			    NFSX_UNSIGNED);
6477 			dp->nfsdl_stateid.seqid = *tl++;
6478 			dp->nfsdl_stateid.other[0] = *tl++;
6479 			dp->nfsdl_stateid.other[1] = *tl++;
6480 			dp->nfsdl_stateid.other[2] = *tl++;
6481 			ret = fxdr_unsigned(int, *tl);
6482 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
6483 				dp->nfsdl_flags = NFSCLDL_WRITE;
6484 				/*
6485 				 * Indicates how much the file can grow.
6486 				 */
6487 				NFSM_DISSECT(tl, u_int32_t *,
6488 				    3 * NFSX_UNSIGNED);
6489 				limitby = fxdr_unsigned(int, *tl++);
6490 				switch (limitby) {
6491 				case NFSV4OPEN_LIMITSIZE:
6492 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
6493 					break;
6494 				case NFSV4OPEN_LIMITBLOCKS:
6495 					dp->nfsdl_sizelimit =
6496 					    fxdr_unsigned(u_int64_t, *tl++);
6497 					dp->nfsdl_sizelimit *=
6498 					    fxdr_unsigned(u_int64_t, *tl);
6499 					break;
6500 				default:
6501 					error = NFSERR_BADXDR;
6502 					goto nfsmout;
6503 				};
6504 			} else {
6505 				dp->nfsdl_flags = NFSCLDL_READ;
6506 			}
6507 			if (ret != 0)
6508 				dp->nfsdl_flags |= NFSCLDL_RECALL;
6509 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
6510 			    &acesize, p);
6511 			if (error != 0)
6512 				goto nfsmout;
6513 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
6514 			error = NFSERR_BADXDR;
6515 			goto nfsmout;
6516 		}
6517 
6518 		/* Now, we should have the status for the SaveFH. */
6519 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6520 		if (*++tl == 0) {
6521 			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
6522 			/*
6523 			 * Now, process the GetFH and Getattr for the newly
6524 			 * created file. nfscl_mtofh() will set
6525 			 * ND_NOMOREDATA if these weren't successful.
6526 			 */
6527 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
6528 			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
6529 			if (error != 0)
6530 				goto nfsmout;
6531 		} else
6532 			nd->nd_flag |= ND_NOMOREDATA;
6533 		/* Now we have the PutFH and Getattr for the directory. */
6534 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6535 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6536 			if (*++tl != 0)
6537 				nd->nd_flag |= ND_NOMOREDATA;
6538 			else {
6539 				NFSM_DISSECT(tl, uint32_t *, 2 *
6540 				    NFSX_UNSIGNED);
6541 				if (*++tl != 0)
6542 					nd->nd_flag |= ND_NOMOREDATA;
6543 			}
6544 		}
6545 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6546 			/* Load the directory attributes. */
6547 			error = nfsm_loadattr(nd, dnap);
6548 			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
6549 			if (error != 0)
6550 				goto nfsmout;
6551 			*dattrflagp = 1;
6552 			if (dp != NULL && *attrflagp != 0) {
6553 				dp->nfsdl_change = nnap->na_filerev;
6554 				dp->nfsdl_modtime = nnap->na_mtime;
6555 				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
6556 			}
6557 			/*
6558 			 * We can now complete the Open state.
6559 			 */
6560 			nfhp = *nfhpp;
6561 			if (dp != NULL) {
6562 				dp->nfsdl_fhlen = nfhp->nfh_len;
6563 				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
6564 				    nfhp->nfh_len);
6565 			}
6566 			/*
6567 			 * Get an Open structure that will be
6568 			 * attached to the OpenOwner, acquired already.
6569 			 */
6570 			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
6571 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
6572 			    cred, p, NULL, &op, &newone, NULL, 0);
6573 			if (error != 0)
6574 				goto nfsmout;
6575 			op->nfso_stateid = stateid;
6576 			newnfs_copyincred(cred, &op->nfso_cred);
6577 
6578 			nfscl_openrelease(nmp, op, error, newone);
6579 			*unlockedp = 1;
6580 
6581 			/* Now, handle the RestoreFH and LayoutGet. */
6582 			if (nd->nd_repstat == 0) {
6583 				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
6584 				*laystatp = fxdr_unsigned(int, *(tl + 3));
6585 				if (*laystatp == 0) {
6586 					error = nfsrv_parselayoutget(nd,
6587 					    stateidp, retonclosep, flhp);
6588 					if (error != 0)
6589 						*laystatp = error;
6590 				}
6591 				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
6592 				    error);
6593 			} else
6594 				nd->nd_repstat = 0;
6595 		}
6596 	}
6597 	if (nd->nd_repstat != 0 && error == 0)
6598 		error = nd->nd_repstat;
6599 	if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
6600 		nfscl_initiate_recovery(owp->nfsow_clp);
6601 nfsmout:
6602 	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
6603 	if (error == 0)
6604 		*dpp = dp;
6605 	else
6606 		free(dp, M_NFSCLDELEG);
6607 	mbuf_freem(nd->nd_mrep);
6608 	return (error);
6609 }
6610 
6611 /*
6612  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
6613  */
6614 static int
6615 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
6616     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
6617     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
6618     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
6619     int *dattrflagp, void *dstuff, int *unlockedp)
6620 {
6621 	struct nfscllayout *lyp;
6622 	struct nfsclflayouthead flh;
6623 	struct nfsfh *nfhp;
6624 	struct nfsclsession *tsep;
6625 	struct nfsmount *nmp;
6626 	nfsv4stateid_t stateid;
6627 	int error, layoutlen, retonclose, laystat;
6628 
6629 	error = 0;
6630 	nmp = VFSTONFS(dvp->v_mount);
6631 	LIST_INIT(&flh);
6632 	tsep = nfsmnt_mdssession(nmp);
6633 	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
6634 	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
6635 	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
6636 	    dstuff, unlockedp, &stateid, 1, layoutlen, &retonclose, &flh,
6637 	    &laystat);
6638 	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
6639 	    laystat, error);
6640 	lyp = NULL;
6641 	nfhp = *nfhpp;
6642 	laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh, nfhp->nfh_len,
6643 	    &stateid, retonclose, NULL, &lyp, &flh, laystat, NULL, cred, p);
6644 	if (laystat == 0)
6645 		nfscl_rellayout(lyp, 0);
6646 	return (error);
6647 }
6648 
6649 /*
6650  * Process the results of a layoutget() operation.
6651  */
6652 static int
6653 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
6654     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
6655     struct nfscllayout **lypp, struct nfsclflayouthead *flhp,
6656     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
6657 {
6658 	struct nfsclflayout *tflp;
6659 	struct nfscldevinfo *dip;
6660 
6661 	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
6662 		/* Disable PNFS. */
6663 		NFSCL_DEBUG(1, "disable PNFS\n");
6664 		NFSLOCKMNT(nmp);
6665 		nmp->nm_state &= ~NFSSTA_PNFS;
6666 		NFSUNLOCKMNT(nmp);
6667 	}
6668 	if (laystat == 0) {
6669 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
6670 		LIST_FOREACH(tflp, flhp, nfsfl_list) {
6671 			laystat = nfscl_adddevinfo(nmp, NULL, tflp);
6672 			NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
6673 			if (laystat != 0) {
6674 				laystat = nfsrpc_getdeviceinfo(nmp,
6675 				    tflp->nfsfl_dev, NFSLAYOUT_NFSV4_1_FILES,
6676 				    notifybit, &dip, cred, p);
6677 				NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
6678 				    laystat);
6679 				if (laystat != 0)
6680 					break;
6681 				laystat = nfscl_adddevinfo(nmp, dip, tflp);
6682 				if (laystat != 0)
6683 					printf("getlayout: cannot add\n");
6684 			}
6685 		}
6686 	}
6687 	if (laystat == 0) {
6688 		/*
6689 		 * nfscl_layout() always returns with the nfsly_lock
6690 		 * set to a refcnt (shared lock).
6691 		 * Passing in dvp is sufficient, since it is only used to
6692 		 * get the fsid for the file system.
6693 		 */
6694 		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
6695 		    retonclose, flhp, lypp, cred, p);
6696 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
6697 		    laystat);
6698 		if (laystat == 0 && islockedp != NULL)
6699 			*islockedp = 1;
6700 	}
6701 	return (laystat);
6702 }
6703 
6704