xref: /freebsd/sys/fs/nfsclient/nfs_clrpcops.c (revision 627f1555f571b5328637dbfbe441ed89c84db20c)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 /*
40  * Rpc op calls, generally called from the vnode op calls or through the
41  * buffer cache, for NFS v2, 3 and 4.
42  * These do not normally make any changes to vnode arguments or use
43  * structures that might change between the VFS variants. The returned
44  * arguments are all at the end, after the NFSPROC_T *p one.
45  */
46 
47 #include "opt_inet6.h"
48 
49 #include <fs/nfs/nfsport.h>
50 #include <fs/nfsclient/nfs.h>
51 #include <sys/extattr.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54 
55 SYSCTL_DECL(_vfs_nfs);
56 
57 static int	nfsignore_eexist = 0;
58 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
59     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
60 
61 static int	nfscl_dssameconn = 0;
62 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
63     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
64 
65 static uint64_t nfs_maxcopyrange = SSIZE_MAX;
66 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW,
67     &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable");
68 
69 /*
70  * Global variables
71  */
72 extern struct nfsstatsv1 nfsstatsv1;
73 extern int nfs_numnfscbd;
74 extern struct timeval nfsboottime;
75 extern u_int32_t newnfs_false, newnfs_true;
76 extern nfstype nfsv34_type[9];
77 extern int nfsrv_useacl;
78 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
79 extern int nfscl_debuglevel;
80 extern int nfs_pnfsiothreads;
81 extern u_long sb_max_adj;
82 NFSCLSTATEMUTEX;
83 int nfstest_outofseq = 0;
84 int nfscl_assumeposixlocks = 1;
85 int nfscl_enablecallb = 0;
86 short nfsv4_cbport = NFSV4_CBPORT;
87 int nfstest_openallsetattr = 0;
88 
89 #define	DIRHDSIZ	offsetof(struct dirent, d_name)
90 
91 /*
92  * nfscl_getsameserver() can return one of three values:
93  * NFSDSP_USETHISSESSION - Use this session for the DS.
94  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
95  *     session.
96  * NFSDSP_NOTFOUND - No matching server was found.
97  */
98 enum nfsclds_state {
99 	NFSDSP_USETHISSESSION = 0,
100 	NFSDSP_SEQTHISSESSION = 1,
101 	NFSDSP_NOTFOUND = 2,
102 };
103 
104 /*
105  * Do a write RPC on a DS data file, using this structure for the arguments,
106  * so that this function can be executed by a separate kernel process.
107  */
108 struct nfsclwritedsdorpc {
109 	int			done;
110 	int			inprog;
111 	struct task		tsk;
112 	struct vnode		*vp;
113 	int			iomode;
114 	int			must_commit;
115 	nfsv4stateid_t		*stateidp;
116 	struct nfsclds		*dsp;
117 	uint64_t		off;
118 	int			len;
119 #ifdef notyet
120 	int			advise;
121 #endif
122 	struct nfsfh		*fhp;
123 	struct mbuf		*m;
124 	int			vers;
125 	int			minorvers;
126 	struct ucred		*cred;
127 	NFSPROC_T		*p;
128 	int			err;
129 };
130 
131 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
132     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
133 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
134     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *);
135 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
136     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
137     int);
138 static int nfsrpc_deallocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
139     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
140 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
141     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
142     struct nfsvattr *, struct nfsfh **, int *, int *);
143 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
144     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
145     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
146     int *, int *);
147 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
148     struct nfscllockowner *, u_int64_t, u_int64_t,
149     u_int32_t, struct ucred *, NFSPROC_T *, int);
150 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
151     struct acl *, nfsv4stateid_t *);
152 static int nfsrpc_layouterror(struct nfsmount *, uint8_t *, int, uint64_t,
153     uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, uint32_t,
154     uint32_t, char *);
155 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
156     uint32_t, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
157     struct ucred *, NFSPROC_T *);
158 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
159     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
160     NFSPROC_T *);
161 static void nfscl_initsessionslots(struct nfsclsession *);
162 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
163     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
164     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
165     NFSPROC_T *);
166 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
167     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
168     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
169     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
170 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
171     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
172     struct ucred *, NFSPROC_T *);
173 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
174     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
175     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
176 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
177     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
178     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
179 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
180     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
181     struct ucred *, NFSPROC_T *);
182 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
183     struct nfsclds *, struct nfsclds **, uint32_t *);
184 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
185     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
186     NFSPROC_T *);
187 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
188     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
189 #ifdef notyet
190 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
191     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
192     NFSPROC_T *);
193 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
194     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
195 #endif
196 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
197     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
198 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
199     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
200 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
201     NFSPROC_T *);
202 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
203     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
204 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
205     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
206     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
207 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
208     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
209     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
210     struct nfsfh **, int *, int *, int *);
211 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
212     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
213     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
214     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
215 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
216     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
217     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
218     struct nfsfh **, int *, int *, int *, nfsv4stateid_t *,
219     int, int, int, int *, struct nfsclflayouthead *, int *);
220 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
221     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
222     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *);
223 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
224     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
225     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
226 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
227     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
228     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
229 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
230     int, struct nfsvattr *, int *, struct ucred *);
231 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
232 
233 int nfs_pnfsio(task_fn_t *, void *);
234 
235 /*
236  * nfs null call from vfs.
237  */
238 int
239 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
240 {
241 	int error;
242 	struct nfsrv_descript nfsd, *nd = &nfsd;
243 
244 	NFSCL_REQSTART(nd, NFSPROC_NULL, vp, NULL);
245 	error = nfscl_request(nd, vp, p, cred);
246 	if (nd->nd_repstat && !error)
247 		error = nd->nd_repstat;
248 	m_freem(nd->nd_mrep);
249 	return (error);
250 }
251 
252 /*
253  * nfs access rpc op.
254  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
255  * modes are changed on the server, accesses might still fail later.
256  */
257 int
258 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
259     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
260 {
261 	int error;
262 	u_int32_t mode, rmode;
263 
264 	if (acmode & VREAD)
265 		mode = NFSACCESS_READ;
266 	else
267 		mode = 0;
268 	if (vp->v_type == VDIR) {
269 		if (acmode & VWRITE)
270 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
271 				 NFSACCESS_DELETE);
272 		if (acmode & VEXEC)
273 			mode |= NFSACCESS_LOOKUP;
274 	} else {
275 		if (acmode & VWRITE)
276 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
277 		if (acmode & VEXEC)
278 			mode |= NFSACCESS_EXECUTE;
279 	}
280 
281 	/*
282 	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
283 	 */
284 	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode);
285 
286 	/*
287 	 * The NFS V3 spec does not clarify whether or not
288 	 * the returned access bits can be a superset of
289 	 * the ones requested, so...
290 	 */
291 	if (!error && (rmode & mode) != mode)
292 		error = EACCES;
293 	return (error);
294 }
295 
296 /*
297  * The actual rpc, separated out for Darwin.
298  */
299 int
300 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
301     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep)
302 {
303 	u_int32_t *tl;
304 	u_int32_t supported, rmode;
305 	int error;
306 	struct nfsrv_descript nfsd, *nd = &nfsd;
307 	nfsattrbit_t attrbits;
308 
309 	*attrflagp = 0;
310 	supported = mode;
311 	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp, cred);
312 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
313 	*tl = txdr_unsigned(mode);
314 	if (nd->nd_flag & ND_NFSV4) {
315 		/*
316 		 * And do a Getattr op.
317 		 */
318 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
319 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
320 		NFSGETATTR_ATTRBIT(&attrbits);
321 		(void) nfsrv_putattrbit(nd, &attrbits);
322 	}
323 	error = nfscl_request(nd, vp, p, cred);
324 	if (error)
325 		return (error);
326 	if (nd->nd_flag & ND_NFSV3) {
327 		error = nfscl_postop_attr(nd, nap, attrflagp);
328 		if (error)
329 			goto nfsmout;
330 	}
331 	if (!nd->nd_repstat) {
332 		if (nd->nd_flag & ND_NFSV4) {
333 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
334 			supported = fxdr_unsigned(u_int32_t, *tl++);
335 		} else {
336 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
337 		}
338 		rmode = fxdr_unsigned(u_int32_t, *tl);
339 		if (nd->nd_flag & ND_NFSV4)
340 			error = nfscl_postop_attr(nd, nap, attrflagp);
341 
342 		/*
343 		 * It's not obvious what should be done about
344 		 * unsupported access modes. For now, be paranoid
345 		 * and clear the unsupported ones.
346 		 */
347 		rmode &= supported;
348 		*rmodep = rmode;
349 	} else
350 		error = nd->nd_repstat;
351 nfsmout:
352 	m_freem(nd->nd_mrep);
353 	return (error);
354 }
355 
356 /*
357  * nfs open rpc
358  */
359 int
360 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
361 {
362 	struct nfsclopen *op;
363 	struct nfscldeleg *dp;
364 	struct nfsfh *nfhp;
365 	struct nfsnode *np = VTONFS(vp);
366 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
367 	u_int32_t mode, clidrev;
368 	int ret, newone, error, expireret = 0, retrycnt;
369 
370 	/*
371 	 * For NFSv4, Open Ops are only done on Regular Files.
372 	 */
373 	if (vp->v_type != VREG)
374 		return (0);
375 	mode = 0;
376 	if (amode & FREAD)
377 		mode |= NFSV4OPEN_ACCESSREAD;
378 	if (amode & FWRITE)
379 		mode |= NFSV4OPEN_ACCESSWRITE;
380 	nfhp = np->n_fhp;
381 
382 	retrycnt = 0;
383 #ifdef notdef
384 { char name[100]; int namel;
385 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
386 bcopy(NFS4NODENAME(np->n_v4), name, namel);
387 name[namel] = '\0';
388 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
389 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
390 else printf(" fhl=0\n");
391 }
392 #endif
393 	do {
394 	    dp = NULL;
395 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
396 		cred, p, NULL, &op, &newone, &ret, 1, true);
397 	    if (error) {
398 		return (error);
399 	    }
400 	    if (nmp->nm_clp != NULL)
401 		clidrev = nmp->nm_clp->nfsc_clientidrev;
402 	    else
403 		clidrev = 0;
404 	    if (ret == NFSCLOPEN_DOOPEN) {
405 		if (np->n_v4 != NULL) {
406 			/*
407 			 * For the first attempt, try and get a layout, if
408 			 * pNFS is enabled for the mount.
409 			 */
410 			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
411 			    nfs_numnfscbd == 0 ||
412 			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
413 				error = nfsrpc_openrpc(nmp, vp,
414 				    np->n_v4->n4_data,
415 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
416 				    np->n_fhp->nfh_len, mode, op,
417 				    NFS4NODENAME(np->n_v4),
418 				    np->n_v4->n4_namelen,
419 				    &dp, 0, 0x0, cred, p, 0, 0);
420 			else
421 				error = nfsrpc_getopenlayout(nmp, vp,
422 				    np->n_v4->n4_data,
423 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
424 				    np->n_fhp->nfh_len, mode, op,
425 				    NFS4NODENAME(np->n_v4),
426 				    np->n_v4->n4_namelen, &dp, cred, p);
427 			if (dp != NULL) {
428 				NFSLOCKNODE(np);
429 				np->n_flag &= ~NDELEGMOD;
430 				/*
431 				 * Invalidate the attribute cache, so that
432 				 * attributes that pre-date the issue of a
433 				 * delegation are not cached, since the
434 				 * cached attributes will remain valid while
435 				 * the delegation is held.
436 				 */
437 				NFSINVALATTRCACHE(np);
438 				NFSUNLOCKNODE(np);
439 				(void) nfscl_deleg(nmp->nm_mountp,
440 				    op->nfso_own->nfsow_clp,
441 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
442 			}
443 		} else {
444 			error = EIO;
445 		}
446 		newnfs_copyincred(cred, &op->nfso_cred);
447 	    } else if (ret == NFSCLOPEN_SETCRED)
448 		/*
449 		 * This is a new local open on a delegation. It needs
450 		 * to have credentials so that an open can be done
451 		 * against the server during recovery.
452 		 */
453 		newnfs_copyincred(cred, &op->nfso_cred);
454 
455 	    /*
456 	     * nfso_opencnt is the count of how many VOP_OPEN()s have
457 	     * been done on this Open successfully and a VOP_CLOSE()
458 	     * is expected for each of these.
459 	     * If error is non-zero, don't increment it, since the Open
460 	     * hasn't succeeded yet.
461 	     */
462 	    if (!error) {
463 		op->nfso_opencnt++;
464 		if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
465 		    NFSLOCKNODE(np);
466 		    np->n_openstateid = op;
467 		    NFSUNLOCKNODE(np);
468 		}
469 	    }
470 	    nfscl_openrelease(nmp, op, error, newone);
471 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
472 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
473 		error == NFSERR_BADSESSION) {
474 		(void) nfs_catnap(PZERO, error, "nfs_open");
475 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
476 		&& clidrev != 0) {
477 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
478 		retrycnt++;
479 	    }
480 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
481 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
482 	    error == NFSERR_BADSESSION ||
483 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
484 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
485 	if (error && retrycnt >= 4)
486 		error = EIO;
487 	return (error);
488 }
489 
490 /*
491  * the actual open rpc
492  */
493 int
494 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
495     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
496     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
497     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
498     int syscred, int recursed)
499 {
500 	u_int32_t *tl;
501 	struct nfsrv_descript nfsd, *nd = &nfsd;
502 	struct nfscldeleg *dp, *ndp = NULL;
503 	struct nfsvattr nfsva;
504 	u_int32_t rflags, deleg;
505 	nfsattrbit_t attrbits;
506 	int error, ret, acesize, limitby;
507 	struct nfsclsession *tsep;
508 
509 	dp = *dpp;
510 	*dpp = NULL;
511 	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
512 	    cred);
513 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
514 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
515 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
516 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
517 	tsep = nfsmnt_mdssession(nmp);
518 	*tl++ = tsep->nfsess_clientid.lval[0];
519 	*tl = tsep->nfsess_clientid.lval[1];
520 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
521 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
522 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
523 	if (reclaim) {
524 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
525 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
526 		*tl = txdr_unsigned(delegtype);
527 	} else {
528 		if (dp != NULL) {
529 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
530 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
531 			if (NFSHASNFSV4N(nmp))
532 				*tl++ = 0;
533 			else
534 				*tl++ = dp->nfsdl_stateid.seqid;
535 			*tl++ = dp->nfsdl_stateid.other[0];
536 			*tl++ = dp->nfsdl_stateid.other[1];
537 			*tl = dp->nfsdl_stateid.other[2];
538 		} else {
539 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
540 		}
541 		(void) nfsm_strtom(nd, name, namelen);
542 	}
543 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
544 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
545 	NFSZERO_ATTRBIT(&attrbits);
546 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
547 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
548 	(void) nfsrv_putattrbit(nd, &attrbits);
549 	if (syscred)
550 		nd->nd_flag |= ND_USEGSSNAME;
551 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
552 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
553 	if (error)
554 		return (error);
555 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
556 	if (!nd->nd_repstat) {
557 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
558 		    6 * NFSX_UNSIGNED);
559 		op->nfso_stateid.seqid = *tl++;
560 		op->nfso_stateid.other[0] = *tl++;
561 		op->nfso_stateid.other[1] = *tl++;
562 		op->nfso_stateid.other[2] = *tl;
563 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
564 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
565 		if (error)
566 			goto nfsmout;
567 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
568 		deleg = fxdr_unsigned(u_int32_t, *tl);
569 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
570 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
571 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
572 			      NFSCLFLAGS_FIRSTDELEG))
573 				op->nfso_own->nfsow_clp->nfsc_flags |=
574 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
575 			ndp = malloc(
576 			    sizeof (struct nfscldeleg) + newfhlen,
577 			    M_NFSCLDELEG, M_WAITOK);
578 			LIST_INIT(&ndp->nfsdl_owner);
579 			LIST_INIT(&ndp->nfsdl_lock);
580 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
581 			ndp->nfsdl_fhlen = newfhlen;
582 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
583 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
584 			nfscl_lockinit(&ndp->nfsdl_rwlock);
585 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
586 			    NFSX_UNSIGNED);
587 			ndp->nfsdl_stateid.seqid = *tl++;
588 			ndp->nfsdl_stateid.other[0] = *tl++;
589 			ndp->nfsdl_stateid.other[1] = *tl++;
590 			ndp->nfsdl_stateid.other[2] = *tl++;
591 			ret = fxdr_unsigned(int, *tl);
592 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
593 				ndp->nfsdl_flags = NFSCLDL_WRITE;
594 				/*
595 				 * Indicates how much the file can grow.
596 				 */
597 				NFSM_DISSECT(tl, u_int32_t *,
598 				    3 * NFSX_UNSIGNED);
599 				limitby = fxdr_unsigned(int, *tl++);
600 				switch (limitby) {
601 				case NFSV4OPEN_LIMITSIZE:
602 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
603 					break;
604 				case NFSV4OPEN_LIMITBLOCKS:
605 					ndp->nfsdl_sizelimit =
606 					    fxdr_unsigned(u_int64_t, *tl++);
607 					ndp->nfsdl_sizelimit *=
608 					    fxdr_unsigned(u_int64_t, *tl);
609 					break;
610 				default:
611 					error = NFSERR_BADXDR;
612 					goto nfsmout;
613 				}
614 			} else {
615 				ndp->nfsdl_flags = NFSCLDL_READ;
616 			}
617 			if (ret)
618 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
619 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
620 			    &ret, &acesize, p);
621 			if (error)
622 				goto nfsmout;
623 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
624 			error = NFSERR_BADXDR;
625 			goto nfsmout;
626 		}
627 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
628 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
629 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
630 		    NULL, NULL, NULL, p, cred);
631 		if (error)
632 			goto nfsmout;
633 		if (ndp != NULL) {
634 			ndp->nfsdl_change = nfsva.na_filerev;
635 			ndp->nfsdl_modtime = nfsva.na_mtime;
636 			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
637 		}
638 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
639 		    do {
640 			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
641 			    cred, p);
642 			if (ret == NFSERR_DELAY)
643 			    (void) nfs_catnap(PZERO, ret, "nfs_open");
644 		    } while (ret == NFSERR_DELAY);
645 		    error = ret;
646 		}
647 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
648 		    nfscl_assumeposixlocks)
649 		    op->nfso_posixlock = 1;
650 		else
651 		    op->nfso_posixlock = 0;
652 
653 		/*
654 		 * If the server is handing out delegations, but we didn't
655 		 * get one because an OpenConfirm was required, try the
656 		 * Open again, to get a delegation. This is a harmless no-op,
657 		 * from a server's point of view.
658 		 */
659 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
660 		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
661 		    && !error && dp == NULL && ndp == NULL && !recursed) {
662 		    do {
663 			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
664 			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
665 			    cred, p, syscred, 1);
666 			if (ret == NFSERR_DELAY)
667 			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
668 		    } while (ret == NFSERR_DELAY);
669 		    if (ret) {
670 			if (ndp != NULL) {
671 				free(ndp, M_NFSCLDELEG);
672 				ndp = NULL;
673 			}
674 			if (ret == NFSERR_STALECLIENTID ||
675 			    ret == NFSERR_STALEDONTRECOVER ||
676 			    ret == NFSERR_BADSESSION)
677 				error = ret;
678 		    }
679 		}
680 	}
681 	if (nd->nd_repstat != 0 && error == 0)
682 		error = nd->nd_repstat;
683 	if (error == NFSERR_STALECLIENTID)
684 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
685 nfsmout:
686 	if (!error)
687 		*dpp = ndp;
688 	else if (ndp != NULL)
689 		free(ndp, M_NFSCLDELEG);
690 	m_freem(nd->nd_mrep);
691 	return (error);
692 }
693 
694 /*
695  * open downgrade rpc
696  */
697 int
698 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
699     struct ucred *cred, NFSPROC_T *p)
700 {
701 	u_int32_t *tl;
702 	struct nfsrv_descript nfsd, *nd = &nfsd;
703 	int error;
704 
705 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp, cred);
706 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
707 	if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
708 		*tl++ = 0;
709 	else
710 		*tl++ = op->nfso_stateid.seqid;
711 	*tl++ = op->nfso_stateid.other[0];
712 	*tl++ = op->nfso_stateid.other[1];
713 	*tl++ = op->nfso_stateid.other[2];
714 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
715 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
716 	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
717 	error = nfscl_request(nd, vp, p, cred);
718 	if (error)
719 		return (error);
720 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
721 	if (!nd->nd_repstat) {
722 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
723 		op->nfso_stateid.seqid = *tl++;
724 		op->nfso_stateid.other[0] = *tl++;
725 		op->nfso_stateid.other[1] = *tl++;
726 		op->nfso_stateid.other[2] = *tl;
727 	}
728 	if (nd->nd_repstat && error == 0)
729 		error = nd->nd_repstat;
730 	if (error == NFSERR_STALESTATEID)
731 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
732 nfsmout:
733 	m_freem(nd->nd_mrep);
734 	return (error);
735 }
736 
737 /*
738  * V4 Close operation.
739  */
740 int
741 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
742 {
743 	struct nfsclclient *clp;
744 	int error;
745 
746 	if (vp->v_type != VREG)
747 		return (0);
748 	if (doclose)
749 		error = nfscl_doclose(vp, &clp, p);
750 	else {
751 		error = nfscl_getclose(vp, &clp);
752 		if (error == 0)
753 			nfscl_clientrelease(clp);
754 	}
755 	return (error);
756 }
757 
758 /*
759  * Close the open.
760  */
761 int
762 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p,
763     bool loop_on_delayed, bool freeop)
764 {
765 	struct nfsrv_descript nfsd, *nd = &nfsd;
766 	struct nfscllockowner *lp, *nlp;
767 	struct nfscllock *lop, *nlop;
768 	struct ucred *tcred;
769 	u_int64_t off = 0, len = 0;
770 	u_int32_t type = NFSV4LOCKT_READ;
771 	int error, do_unlock, trycnt;
772 
773 	tcred = newnfs_getcred();
774 	newnfs_copycred(&op->nfso_cred, tcred);
775 	/*
776 	 * (Theoretically this could be done in the same
777 	 *  compound as the close, but having multiple
778 	 *  sequenced Ops in the same compound might be
779 	 *  too scary for some servers.)
780 	 */
781 	if (op->nfso_posixlock) {
782 		off = 0;
783 		len = NFS64BITSSET;
784 		type = NFSV4LOCKT_READ;
785 	}
786 
787 	/*
788 	 * Since this function is only called from VOP_INACTIVE(), no
789 	 * other thread will be manipulating this Open. As such, the
790 	 * lock lists are not being changed by other threads, so it should
791 	 * be safe to do this without locking.
792 	 */
793 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
794 		do_unlock = 1;
795 		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
796 			if (op->nfso_posixlock == 0) {
797 				off = lop->nfslo_first;
798 				len = lop->nfslo_end - lop->nfslo_first;
799 				if (lop->nfslo_type == F_WRLCK)
800 					type = NFSV4LOCKT_WRITE;
801 				else
802 					type = NFSV4LOCKT_READ;
803 			}
804 			if (do_unlock) {
805 				trycnt = 0;
806 				do {
807 					error = nfsrpc_locku(nd, nmp, lp, off,
808 					    len, type, tcred, p, 0);
809 					if ((nd->nd_repstat == NFSERR_GRACE ||
810 					    nd->nd_repstat == NFSERR_DELAY) &&
811 					    error == 0)
812 						(void) nfs_catnap(PZERO,
813 						    (int)nd->nd_repstat,
814 						    "nfs_close");
815 				} while ((nd->nd_repstat == NFSERR_GRACE ||
816 				    nd->nd_repstat == NFSERR_DELAY) &&
817 				    error == 0 && trycnt++ < 5);
818 				if (op->nfso_posixlock)
819 					do_unlock = 0;
820 			}
821 			nfscl_freelock(lop, 0);
822 		}
823 		/*
824 		 * Do a ReleaseLockOwner.
825 		 * The lock owner name nfsl_owner may be used by other opens for
826 		 * other files but the lock_owner4 name that nfsrpc_rellockown()
827 		 * puts on the wire has the file handle for this file appended
828 		 * to it, so it can be done now.
829 		 */
830 		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
831 		    lp->nfsl_open->nfso_fhlen, tcred, p);
832 	}
833 
834 	/*
835 	 * There could be other Opens for different files on the same
836 	 * OpenOwner, so locking is required.
837 	 */
838 	NFSLOCKCLSTATE();
839 	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
840 	NFSUNLOCKCLSTATE();
841 	do {
842 		error = nfscl_tryclose(op, tcred, nmp, p, loop_on_delayed);
843 		if (error == NFSERR_GRACE)
844 			(void) nfs_catnap(PZERO, error, "nfs_close");
845 	} while (error == NFSERR_GRACE);
846 	NFSLOCKCLSTATE();
847 	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
848 
849 	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
850 		nfscl_freelockowner(lp, 0);
851 	if (freeop && error != NFSERR_DELAY)
852 		nfscl_freeopen(op, 0, true);
853 	NFSUNLOCKCLSTATE();
854 	NFSFREECRED(tcred);
855 	return (error);
856 }
857 
858 /*
859  * The actual Close RPC.
860  */
861 int
862 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
863     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
864     int syscred)
865 {
866 	u_int32_t *tl;
867 	int error;
868 
869 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
870 	    op->nfso_fhlen, NULL, NULL, 0, 0, cred);
871 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
872 	if (NFSHASNFSV4N(nmp)) {
873 		*tl++ = 0;
874 		*tl++ = 0;
875 	} else {
876 		*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
877 		*tl++ = op->nfso_stateid.seqid;
878 	}
879 	*tl++ = op->nfso_stateid.other[0];
880 	*tl++ = op->nfso_stateid.other[1];
881 	*tl = op->nfso_stateid.other[2];
882 	if (syscred)
883 		nd->nd_flag |= ND_USEGSSNAME;
884 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
885 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
886 	if (error)
887 		return (error);
888 	if (!NFSHASNFSV4N(nmp))
889 		NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
890 	if (nd->nd_repstat == 0)
891 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
892 	error = nd->nd_repstat;
893 	if (!NFSHASNFSV4N(nmp) && error == NFSERR_STALESTATEID)
894 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
895 nfsmout:
896 	m_freem(nd->nd_mrep);
897 	return (error);
898 }
899 
900 /*
901  * V4 Open Confirm RPC.
902  */
903 int
904 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
905     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
906 {
907 	u_int32_t *tl;
908 	struct nfsrv_descript nfsd, *nd = &nfsd;
909 	struct nfsmount *nmp;
910 	int error;
911 
912 	nmp = VFSTONFS(vp->v_mount);
913 	if (NFSHASNFSV4N(nmp))
914 		return (0);		/* No confirmation for NFSv4.1. */
915 	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
916 	    0, 0, NULL);
917 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
918 	*tl++ = op->nfso_stateid.seqid;
919 	*tl++ = op->nfso_stateid.other[0];
920 	*tl++ = op->nfso_stateid.other[1];
921 	*tl++ = op->nfso_stateid.other[2];
922 	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
923 	error = nfscl_request(nd, vp, p, cred);
924 	if (error)
925 		return (error);
926 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
927 	if (!nd->nd_repstat) {
928 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
929 		op->nfso_stateid.seqid = *tl++;
930 		op->nfso_stateid.other[0] = *tl++;
931 		op->nfso_stateid.other[1] = *tl++;
932 		op->nfso_stateid.other[2] = *tl;
933 	}
934 	error = nd->nd_repstat;
935 	if (error == NFSERR_STALESTATEID)
936 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
937 nfsmout:
938 	m_freem(nd->nd_mrep);
939 	return (error);
940 }
941 
942 /*
943  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
944  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
945  */
946 int
947 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
948     bool *retokp, struct ucred *cred, NFSPROC_T *p)
949 {
950 	u_int32_t *tl;
951 	struct nfsrv_descript nfsd;
952 	struct nfsrv_descript *nd = &nfsd;
953 	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
954 	u_short port;
955 	int error, isinet6 = 0, callblen;
956 	nfsquad_t confirm;
957 	static u_int32_t rev = 0;
958 	struct nfsclds *dsp, *odsp;
959 	struct in6_addr a6;
960 	struct nfsclsession *tsep;
961 	struct rpc_reconupcall recon;
962 	struct nfscl_reconarg *rcp;
963 
964 	if (nfsboottime.tv_sec == 0)
965 		NFSSETBOOTTIME(nfsboottime);
966 	if (NFSHASNFSV4N(nmp)) {
967 		error = NFSERR_BADSESSION;
968 		odsp = dsp = NULL;
969 		if (retokp != NULL) {
970 			NFSLOCKMNT(nmp);
971 			odsp = TAILQ_FIRST(&nmp->nm_sess);
972 			NFSUNLOCKMNT(nmp);
973 		}
974 		if (odsp != NULL) {
975 			/*
976 			 * When a session already exists, first try a
977 			 * CreateSession with the extant ClientID.
978 			 */
979 			dsp = malloc(sizeof(struct nfsclds) +
980 			    odsp->nfsclds_servownlen + 1, M_NFSCLDS,
981 			    M_WAITOK | M_ZERO);
982 			dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
983 			dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
984 			dsp->nfsclds_sess.nfsess_clientid =
985 			    odsp->nfsclds_sess.nfsess_clientid;
986 			dsp->nfsclds_sess.nfsess_sequenceid =
987 			    odsp->nfsclds_sess.nfsess_sequenceid;
988 			dsp->nfsclds_flags = odsp->nfsclds_flags;
989 			if (dsp->nfsclds_servownlen > 0)
990 				memcpy(dsp->nfsclds_serverown,
991 				    odsp->nfsclds_serverown,
992 				    dsp->nfsclds_servownlen + 1);
993 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
994 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
995 			    NULL, MTX_DEF);
996 			nfscl_initsessionslots(&dsp->nfsclds_sess);
997 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
998 			    &nmp->nm_sockreq, NULL,
999 			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
1000 			NFSCL_DEBUG(1, "create session for extant "
1001 			    "ClientID=%d\n", error);
1002 			if (error != 0) {
1003 				nfscl_freenfsclds(dsp);
1004 				dsp = NULL;
1005 				/*
1006 				 * If *retokp is true, return any error other
1007 				 * than NFSERR_STALECLIENTID,
1008 				 * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
1009 				 * so that nfscl_recover() will not loop.
1010 				 */
1011 				if (*retokp)
1012 					return (NFSERR_IO);
1013 			} else
1014 				*retokp = true;
1015 		} else if (retokp != NULL && *retokp)
1016 			return (NFSERR_IO);
1017 		if (error != 0) {
1018 			/*
1019 			 * Either there was no previous session or the
1020 			 * CreateSession attempt failed, so...
1021 			 * do an ExchangeID followed by the CreateSession.
1022 			 */
1023 			clp->nfsc_rev = rev++;
1024 			error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1025 			    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1026 			    cred, p);
1027 			NFSCL_DEBUG(1, "aft exch=%d\n", error);
1028 			if (error == 0)
1029 				error = nfsrpc_createsession(nmp,
1030 				    &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1031 				    dsp->nfsclds_sess.nfsess_sequenceid, 1,
1032 				    cred, p);
1033 			NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1034 		}
1035 		if (error == 0) {
1036 			/*
1037 			 * If the session supports a backchannel, set up
1038 			 * the BindConnectionToSession call in the krpc
1039 			 * so that it is done on a reconnection.
1040 			 */
1041 			if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) {
1042 				rcp = mem_alloc(sizeof(*rcp));
1043 				rcp->minorvers = nmp->nm_minorvers;
1044 				memcpy(rcp->sessionid,
1045 				    dsp->nfsclds_sess.nfsess_sessionid,
1046 				    NFSX_V4SESSIONID);
1047 				recon.call = nfsrpc_bindconnsess;
1048 				recon.arg = rcp;
1049 				CLNT_CONTROL(nmp->nm_client, CLSET_RECONUPCALL,
1050 				    &recon);
1051 			}
1052 
1053 			NFSLOCKMNT(nmp);
1054 			/*
1055 			 * The old sessions cannot be safely free'd
1056 			 * here, since they may still be used by
1057 			 * in-progress RPCs.
1058 			 */
1059 			tsep = NULL;
1060 			if (TAILQ_FIRST(&nmp->nm_sess) != NULL) {
1061 				/*
1062 				 * Mark the old session defunct.  Needed
1063 				 * when called from nfscl_hasexpired().
1064 				 */
1065 				tsep = NFSMNT_MDSSESSION(nmp);
1066 				tsep->nfsess_defunct = 1;
1067 			}
1068 			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1069 			    nfsclds_list);
1070 			/*
1071 			 * Wake up RPCs waiting for a slot on the
1072 			 * old session. These will then fail with
1073 			 * NFSERR_BADSESSION and be retried with the
1074 			 * new session by nfsv4_setsequence().
1075 			 * Also wakeup() processes waiting for the
1076 			 * new session.
1077 			 */
1078 			if (tsep != NULL)
1079 				wakeup(&tsep->nfsess_slots);
1080 			wakeup(&nmp->nm_sess);
1081 			NFSUNLOCKMNT(nmp);
1082 		} else if (dsp != NULL)
1083 			nfscl_freenfsclds(dsp);
1084 		if (error == 0 && reclaim == 0) {
1085 			error = nfsrpc_reclaimcomplete(nmp, cred, p);
1086 			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1087 			if (error == NFSERR_COMPLETEALREADY ||
1088 			    error == NFSERR_NOTSUPP)
1089 				/* Ignore this error. */
1090 				error = 0;
1091 		}
1092 		return (error);
1093 	} else if (retokp != NULL && *retokp)
1094 		return (NFSERR_IO);
1095 	clp->nfsc_rev = rev++;
1096 
1097 	/*
1098 	 * Allocate a single session structure for NFSv4.0, because some of
1099 	 * the fields are used by NFSv4.0 although it doesn't do a session.
1100 	 */
1101 	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1102 	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1103 	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1104 	NFSLOCKMNT(nmp);
1105 	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1106 	tsep = NFSMNT_MDSSESSION(nmp);
1107 	NFSUNLOCKMNT(nmp);
1108 
1109 	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0,
1110 	    NULL);
1111 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1112 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
1113 	*tl = txdr_unsigned(clp->nfsc_rev);
1114 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1115 
1116 	/*
1117 	 * set up the callback address
1118 	 */
1119 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1120 	*tl = txdr_unsigned(NFS_CALLBCKPROG);
1121 	callblen = strlen(nfsv4_callbackaddr);
1122 	if (callblen == 0)
1123 		cp = nfscl_getmyip(nmp, &a6, &isinet6);
1124 	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1125 	    (callblen > 0 || cp != NULL)) {
1126 		port = htons(nfsv4_cbport);
1127 		cp2 = (u_int8_t *)&port;
1128 #ifdef INET6
1129 		if ((callblen > 0 &&
1130 		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1131 			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1132 
1133 			(void) nfsm_strtom(nd, "tcp6", 4);
1134 			if (callblen == 0) {
1135 				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1136 				ip6add = ip6buf;
1137 			} else {
1138 				ip6add = nfsv4_callbackaddr;
1139 			}
1140 			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1141 			    ip6add, cp2[0], cp2[1]);
1142 		} else
1143 #endif
1144 		{
1145 			(void) nfsm_strtom(nd, "tcp", 3);
1146 			if (callblen == 0)
1147 				snprintf(addr, INET6_ADDRSTRLEN + 9,
1148 				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1149 				    cp[2], cp[3], cp2[0], cp2[1]);
1150 			else
1151 				snprintf(addr, INET6_ADDRSTRLEN + 9,
1152 				    "%s.%d.%d", nfsv4_callbackaddr,
1153 				    cp2[0], cp2[1]);
1154 		}
1155 		(void) nfsm_strtom(nd, addr, strlen(addr));
1156 	} else {
1157 		(void) nfsm_strtom(nd, "tcp", 3);
1158 		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1159 	}
1160 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1161 	*tl = txdr_unsigned(clp->nfsc_cbident);
1162 	nd->nd_flag |= ND_USEGSSNAME;
1163 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1164 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1165 	if (error)
1166 		return (error);
1167 	if (nd->nd_repstat == 0) {
1168 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1169 	    tsep->nfsess_clientid.lval[0] = *tl++;
1170 	    tsep->nfsess_clientid.lval[1] = *tl++;
1171 	    confirm.lval[0] = *tl++;
1172 	    confirm.lval[1] = *tl;
1173 	    m_freem(nd->nd_mrep);
1174 	    nd->nd_mrep = NULL;
1175 
1176 	    /*
1177 	     * and confirm it.
1178 	     */
1179 	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1180 		NULL, 0, 0, NULL);
1181 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1182 	    *tl++ = tsep->nfsess_clientid.lval[0];
1183 	    *tl++ = tsep->nfsess_clientid.lval[1];
1184 	    *tl++ = confirm.lval[0];
1185 	    *tl = confirm.lval[1];
1186 	    nd->nd_flag |= ND_USEGSSNAME;
1187 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1188 		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1189 	    if (error)
1190 		return (error);
1191 	    m_freem(nd->nd_mrep);
1192 	    nd->nd_mrep = NULL;
1193 	}
1194 	error = nd->nd_repstat;
1195 nfsmout:
1196 	m_freem(nd->nd_mrep);
1197 	return (error);
1198 }
1199 
1200 /*
1201  * nfs getattr call.
1202  */
1203 int
1204 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1205     struct nfsvattr *nap)
1206 {
1207 	struct nfsrv_descript nfsd, *nd = &nfsd;
1208 	int error;
1209 	nfsattrbit_t attrbits;
1210 
1211 	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
1212 	if (nd->nd_flag & ND_NFSV4) {
1213 		NFSGETATTR_ATTRBIT(&attrbits);
1214 		(void) nfsrv_putattrbit(nd, &attrbits);
1215 	}
1216 	error = nfscl_request(nd, vp, p, cred);
1217 	if (error)
1218 		return (error);
1219 	if (!nd->nd_repstat)
1220 		error = nfsm_loadattr(nd, nap);
1221 	else
1222 		error = nd->nd_repstat;
1223 	m_freem(nd->nd_mrep);
1224 	return (error);
1225 }
1226 
1227 /*
1228  * nfs getattr call with non-vnode arguments.
1229  */
1230 int
1231 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1232     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1233     uint32_t *leasep)
1234 {
1235 	struct nfsrv_descript nfsd, *nd = &nfsd;
1236 	int error, vers = NFS_VER2;
1237 	nfsattrbit_t attrbits;
1238 
1239 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
1240 	    cred);
1241 	if (nd->nd_flag & ND_NFSV4) {
1242 		vers = NFS_VER4;
1243 		NFSGETATTR_ATTRBIT(&attrbits);
1244 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1245 		(void) nfsrv_putattrbit(nd, &attrbits);
1246 	} else if (nd->nd_flag & ND_NFSV3) {
1247 		vers = NFS_VER3;
1248 	}
1249 	if (syscred)
1250 		nd->nd_flag |= ND_USEGSSNAME;
1251 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1252 	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1253 	if (error)
1254 		return (error);
1255 	if (nd->nd_repstat == 0) {
1256 		if ((nd->nd_flag & ND_NFSV4) != 0)
1257 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1258 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1259 			    NULL, NULL);
1260 		else
1261 			error = nfsm_loadattr(nd, nap);
1262 	} else
1263 		error = nd->nd_repstat;
1264 	m_freem(nd->nd_mrep);
1265 	return (error);
1266 }
1267 
1268 /*
1269  * Do an nfs setattr operation.
1270  */
1271 int
1272 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1273     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp)
1274 {
1275 	int error, expireret = 0, openerr, retrycnt;
1276 	u_int32_t clidrev = 0, mode;
1277 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1278 	struct nfsfh *nfhp;
1279 	nfsv4stateid_t stateid;
1280 	void *lckp;
1281 
1282 	if (nmp->nm_clp != NULL)
1283 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1284 	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1285 		mode = NFSV4OPEN_ACCESSWRITE;
1286 	else
1287 		mode = NFSV4OPEN_ACCESSREAD;
1288 	retrycnt = 0;
1289 	do {
1290 		lckp = NULL;
1291 		openerr = 1;
1292 		if (NFSHASNFSV4(nmp)) {
1293 			nfhp = VTONFS(vp)->n_fhp;
1294 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1295 			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1296 			if (error && vp->v_type == VREG &&
1297 			    (mode == NFSV4OPEN_ACCESSWRITE ||
1298 			     nfstest_openallsetattr)) {
1299 				/*
1300 				 * No Open stateid, so try and open the file
1301 				 * now.
1302 				 */
1303 				if (mode == NFSV4OPEN_ACCESSWRITE)
1304 					openerr = nfsrpc_open(vp, FWRITE, cred,
1305 					    p);
1306 				else
1307 					openerr = nfsrpc_open(vp, FREAD, cred,
1308 					    p);
1309 				if (!openerr)
1310 					(void) nfscl_getstateid(vp,
1311 					    nfhp->nfh_fh, nfhp->nfh_len,
1312 					    mode, 0, cred, p, &stateid, &lckp);
1313 			}
1314 		}
1315 		if (vap != NULL)
1316 			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1317 			    rnap, attrflagp);
1318 		else
1319 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid);
1320 		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1321 			NFSLOCKMNT(nmp);
1322 			nmp->nm_state |= NFSSTA_OPENMODE;
1323 			NFSUNLOCKMNT(nmp);
1324 		}
1325 		if (error == NFSERR_STALESTATEID)
1326 			nfscl_initiate_recovery(nmp->nm_clp);
1327 		if (lckp != NULL)
1328 			nfscl_lockderef(lckp);
1329 		if (!openerr)
1330 			(void) nfsrpc_close(vp, 0, p);
1331 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1332 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1333 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1334 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1335 		} else if ((error == NFSERR_EXPIRED ||
1336 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1337 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1338 		}
1339 		retrycnt++;
1340 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1341 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1342 	    error == NFSERR_BADSESSION ||
1343 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1344 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1345 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1346 	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1347 	     retrycnt < 4));
1348 	if (error && retrycnt >= 4)
1349 		error = EIO;
1350 	return (error);
1351 }
1352 
1353 static int
1354 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1355     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1356     struct nfsvattr *rnap, int *attrflagp)
1357 {
1358 	u_int32_t *tl;
1359 	struct nfsrv_descript nfsd, *nd = &nfsd;
1360 	int error;
1361 	nfsattrbit_t attrbits;
1362 
1363 	*attrflagp = 0;
1364 	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp, cred);
1365 	if (nd->nd_flag & ND_NFSV4)
1366 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1367 	vap->va_type = vp->v_type;
1368 	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1369 	if (nd->nd_flag & ND_NFSV3) {
1370 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1371 		*tl = newnfs_false;
1372 	} else if (nd->nd_flag & ND_NFSV4) {
1373 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1374 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1375 		NFSGETATTR_ATTRBIT(&attrbits);
1376 		(void) nfsrv_putattrbit(nd, &attrbits);
1377 	}
1378 	error = nfscl_request(nd, vp, p, cred);
1379 	if (error)
1380 		return (error);
1381 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1382 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, NULL);
1383 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1384 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1385 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1386 		error = nfscl_postop_attr(nd, rnap, attrflagp);
1387 	m_freem(nd->nd_mrep);
1388 	if (nd->nd_repstat && !error)
1389 		error = nd->nd_repstat;
1390 	return (error);
1391 }
1392 
1393 /*
1394  * nfs lookup rpc
1395  */
1396 int
1397 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1398     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1399     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, uint32_t openmode)
1400 {
1401 	uint32_t deleg, rflags, *tl;
1402 	struct nfsrv_descript nfsd, *nd = &nfsd;
1403 	struct nfsmount *nmp;
1404 	struct nfsnode *np;
1405 	struct nfsfh *nfhp;
1406 	nfsattrbit_t attrbits;
1407 	int error = 0, lookupp = 0, newone, ret, retop;
1408 	uint8_t own[NFSV4CL_LOCKNAMELEN];
1409 	struct nfsclopen *op;
1410 	struct nfscldeleg *ndp;
1411 	nfsv4stateid_t stateid;
1412 
1413 	*attrflagp = 0;
1414 	*dattrflagp = 0;
1415 	if (dvp->v_type != VDIR)
1416 		return (ENOTDIR);
1417 	nmp = VFSTONFS(dvp->v_mount);
1418 	if (len > NFS_MAXNAMLEN)
1419 		return (ENAMETOOLONG);
1420 	if (NFSHASNFSV4(nmp) && len == 1 &&
1421 		name[0] == '.') {
1422 		/*
1423 		 * Just return the current dir's fh.
1424 		 */
1425 		np = VTONFS(dvp);
1426 		nfhp = malloc(sizeof (struct nfsfh) +
1427 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1428 		nfhp->nfh_len = np->n_fhp->nfh_len;
1429 		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1430 		*nfhpp = nfhp;
1431 		return (0);
1432 	}
1433 	if (NFSHASNFSV4(nmp) && len == 2 &&
1434 		name[0] == '.' && name[1] == '.') {
1435 		lookupp = 1;
1436 		openmode = 0;
1437 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp, cred);
1438 	} else if (openmode != 0) {
1439 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPOPEN, dvp, cred);
1440 		nfsm_strtom(nd, name, len);
1441 	} else {
1442 		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp, cred);
1443 		(void) nfsm_strtom(nd, name, len);
1444 	}
1445 	if (nd->nd_flag & ND_NFSV4) {
1446 		NFSGETATTR_ATTRBIT(&attrbits);
1447 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1448 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1449 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1450 		(void) nfsrv_putattrbit(nd, &attrbits);
1451 		if (openmode != 0) {
1452 			/* Test for a VREG file. */
1453 			NFSZERO_ATTRBIT(&attrbits);
1454 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
1455 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
1456 			*tl = txdr_unsigned(NFSV4OP_VERIFY);
1457 			nfsrv_putattrbit(nd, &attrbits);
1458 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1459 			*tl++ = txdr_unsigned(NFSX_UNSIGNED);
1460 			*tl = vtonfsv34_type(VREG);
1461 
1462 			/* Attempt the Open for VREG. */
1463 			nfscl_filllockowner(NULL, own, F_POSIX);
1464 			NFSM_BUILD(tl, uint32_t *, 6 * NFSX_UNSIGNED);
1465 			*tl++ = txdr_unsigned(NFSV4OP_OPEN);
1466 			*tl++ = 0;		/* seqid, ignored. */
1467 			*tl++ = txdr_unsigned(openmode);
1468 			*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
1469 			*tl++ = 0;		/* ClientID, ignored. */
1470 			*tl = 0;
1471 			nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
1472 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1473 			*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
1474 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
1475 		}
1476 	}
1477 	error = nfscl_request(nd, dvp, p, cred);
1478 	if (error)
1479 		return (error);
1480 	ndp = NULL;
1481 	if (nd->nd_repstat) {
1482 		/*
1483 		 * When an NFSv4 Lookupp returns ENOENT, it means that
1484 		 * the lookup is at the root of an fs, so return this dir.
1485 		 */
1486 		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1487 		    np = VTONFS(dvp);
1488 		    nfhp = malloc(sizeof (struct nfsfh) +
1489 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1490 		    nfhp->nfh_len = np->n_fhp->nfh_len;
1491 		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1492 		    *nfhpp = nfhp;
1493 		    m_freem(nd->nd_mrep);
1494 		    return (0);
1495 		}
1496 		if (nd->nd_flag & ND_NFSV3)
1497 		    error = nfscl_postop_attr(nd, dnap, dattrflagp);
1498 		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1499 		    ND_NFSV4) {
1500 			/* Load the directory attributes. */
1501 			error = nfsm_loadattr(nd, dnap);
1502 			if (error != 0)
1503 				goto nfsmout;
1504 			*dattrflagp = 1;
1505 		}
1506 		/* Check Lookup operation reply status. */
1507 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1508 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1509 			if (*++tl != 0)
1510 				goto nfsmout;
1511 		}
1512 		/* Look for GetFH reply. */
1513 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1514 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1515 			if (*++tl != 0)
1516 				goto nfsmout;
1517 			error = nfsm_getfh(nd, nfhpp);
1518 			if (error)
1519 				goto nfsmout;
1520 		}
1521 		/* Look for Getattr reply. */
1522 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1523 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1524 			if (*++tl != 0)
1525 				goto nfsmout;
1526 			error = nfsm_loadattr(nd, nap);
1527 			if (error == 0) {
1528 				/*
1529 				 * We have now successfully completed the
1530 				 * lookup, so set nd_repstat to 0.
1531 				 */
1532 				nd->nd_repstat = 0;
1533 				*attrflagp = 1;
1534 			}
1535 		}
1536 		goto nfsmout;
1537 	}
1538 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1539 		/* Load the directory attributes. */
1540 		error = nfsm_loadattr(nd, dnap);
1541 		if (error != 0)
1542 			goto nfsmout;
1543 		*dattrflagp = 1;
1544 		/* Skip over the Lookup and GetFH operation status values. */
1545 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1546 	}
1547 	error = nfsm_getfh(nd, nfhpp);
1548 	if (error)
1549 		goto nfsmout;
1550 
1551 	error = nfscl_postop_attr(nd, nap, attrflagp);
1552 	if (openmode != 0 && error == 0) {
1553 		NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID +
1554 		    10 * NFSX_UNSIGNED);
1555 		tl += 4;	/* Skip over Verify+Open status. */
1556 		stateid.seqid = *tl++;
1557 		stateid.other[0] = *tl++;
1558 		stateid.other[1] = *tl++;
1559 		stateid.other[2] = *tl;
1560 		rflags = fxdr_unsigned(uint32_t, *(tl + 6));
1561 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1562 		if (error != 0)
1563 			goto nfsmout;
1564 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
1565 		deleg = fxdr_unsigned(uint32_t, *tl);
1566 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
1567 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
1568 			/*
1569 			 * Just need to fill in the fields used by
1570 			 * nfscl_trydelegreturn().
1571 			 * Mark the mount point as acquiring
1572 			 * delegations, so NFSPROC_LOOKUPOPEN will
1573 			 * no longer be done.
1574 			 */
1575 			NFSLOCKMNT(nmp);
1576 			nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
1577 			NFSUNLOCKMNT(nmp);
1578 			ndp = malloc(sizeof(struct nfscldeleg) +
1579 			    (*nfhpp)->nfh_len, M_NFSCLDELEG, M_WAITOK);
1580 			ndp->nfsdl_fhlen = (*nfhpp)->nfh_len;
1581 			NFSBCOPY((*nfhpp)->nfh_fh, ndp->nfsdl_fh,
1582 			    ndp->nfsdl_fhlen);
1583 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
1584 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
1585 			ndp->nfsdl_stateid.seqid = *tl++;
1586 			ndp->nfsdl_stateid.other[0] = *tl++;
1587 			ndp->nfsdl_stateid.other[1] = *tl++;
1588 			ndp->nfsdl_stateid.other[2] = *tl++;
1589 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
1590 			error = NFSERR_BADXDR;
1591 			goto nfsmout;
1592 		}
1593 		ret = nfscl_open(dvp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len,
1594 		    openmode, 0, cred, p, NULL, &op, &newone, &retop, 1, true);
1595 		if (ret != 0)
1596 			goto nfsmout;
1597 		if (newone != 0) {
1598 			op->nfso_stateid.seqid = stateid.seqid;
1599 			op->nfso_stateid.other[0] = stateid.other[0];
1600 			op->nfso_stateid.other[1] = stateid.other[1];
1601 			op->nfso_stateid.other[2] = stateid.other[2];
1602 			op->nfso_mode = openmode;
1603 		} else {
1604 			op->nfso_stateid.seqid = stateid.seqid;
1605 			if (retop == NFSCLOPEN_DOOPEN)
1606 				op->nfso_mode |= openmode;
1607 		}
1608 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
1609 		    nfscl_assumeposixlocks)
1610 			op->nfso_posixlock = 1;
1611 		else
1612 			op->nfso_posixlock = 0;
1613 		nfscl_openrelease(nmp, op, 0, 0);
1614 		if (ndp != NULL) {
1615 			/*
1616 			 * Since we do not have the vnode, we
1617 			 * cannot invalidate cached attributes.
1618 			 * Just return the delegation.
1619 			 */
1620 			nfscl_trydelegreturn(ndp, cred, nmp, p);
1621 		}
1622 	}
1623 	if ((nd->nd_flag & ND_NFSV3) && !error)
1624 		error = nfscl_postop_attr(nd, dnap, dattrflagp);
1625 nfsmout:
1626 	m_freem(nd->nd_mrep);
1627 	if (!error && nd->nd_repstat)
1628 		error = nd->nd_repstat;
1629 	free(ndp, M_NFSCLDELEG);
1630 	return (error);
1631 }
1632 
1633 /*
1634  * Do a readlink rpc.
1635  */
1636 int
1637 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1638     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1639 {
1640 	u_int32_t *tl;
1641 	struct nfsrv_descript nfsd, *nd = &nfsd;
1642 	struct nfsnode *np = VTONFS(vp);
1643 	nfsattrbit_t attrbits;
1644 	int error, len, cangetattr = 1;
1645 
1646 	*attrflagp = 0;
1647 	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp, cred);
1648 	if (nd->nd_flag & ND_NFSV4) {
1649 		/*
1650 		 * And do a Getattr op.
1651 		 */
1652 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1653 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1654 		NFSGETATTR_ATTRBIT(&attrbits);
1655 		(void) nfsrv_putattrbit(nd, &attrbits);
1656 	}
1657 	error = nfscl_request(nd, vp, p, cred);
1658 	if (error)
1659 		return (error);
1660 	if (nd->nd_flag & ND_NFSV3)
1661 		error = nfscl_postop_attr(nd, nap, attrflagp);
1662 	if (!nd->nd_repstat && !error) {
1663 		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1664 		/*
1665 		 * This seems weird to me, but must have been added to
1666 		 * FreeBSD for some reason. The only thing I can think of
1667 		 * is that there was/is some server that replies with
1668 		 * more link data than it should?
1669 		 */
1670 		if (len == NFS_MAXPATHLEN) {
1671 			NFSLOCKNODE(np);
1672 			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1673 				len = np->n_size;
1674 				cangetattr = 0;
1675 			}
1676 			NFSUNLOCKNODE(np);
1677 		}
1678 		error = nfsm_mbufuio(nd, uiop, len);
1679 		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1680 			error = nfscl_postop_attr(nd, nap, attrflagp);
1681 	}
1682 	if (nd->nd_repstat && !error)
1683 		error = nd->nd_repstat;
1684 nfsmout:
1685 	m_freem(nd->nd_mrep);
1686 	return (error);
1687 }
1688 
1689 /*
1690  * Read operation.
1691  */
1692 int
1693 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1694     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1695 {
1696 	int error, expireret = 0, retrycnt;
1697 	u_int32_t clidrev = 0;
1698 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1699 	struct nfsnode *np = VTONFS(vp);
1700 	struct ucred *newcred;
1701 	struct nfsfh *nfhp = NULL;
1702 	nfsv4stateid_t stateid;
1703 	void *lckp;
1704 
1705 	if (nmp->nm_clp != NULL)
1706 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1707 	newcred = cred;
1708 	if (NFSHASNFSV4(nmp)) {
1709 		nfhp = np->n_fhp;
1710 		newcred = NFSNEWCRED(cred);
1711 	}
1712 	retrycnt = 0;
1713 	do {
1714 		lckp = NULL;
1715 		if (NFSHASNFSV4(nmp))
1716 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1717 			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1718 			    &lckp);
1719 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1720 		    attrflagp);
1721 		if (error == NFSERR_OPENMODE) {
1722 			NFSLOCKMNT(nmp);
1723 			nmp->nm_state |= NFSSTA_OPENMODE;
1724 			NFSUNLOCKMNT(nmp);
1725 		}
1726 		if (error == NFSERR_STALESTATEID)
1727 			nfscl_initiate_recovery(nmp->nm_clp);
1728 		if (lckp != NULL)
1729 			nfscl_lockderef(lckp);
1730 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1731 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1732 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1733 			(void) nfs_catnap(PZERO, error, "nfs_read");
1734 		} else if ((error == NFSERR_EXPIRED ||
1735 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1736 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1737 		}
1738 		retrycnt++;
1739 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1740 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1741 	    error == NFSERR_BADSESSION ||
1742 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1743 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1744 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1745 	    (error == NFSERR_OPENMODE && retrycnt < 4));
1746 	if (error && retrycnt >= 4)
1747 		error = EIO;
1748 	if (NFSHASNFSV4(nmp))
1749 		NFSFREECRED(newcred);
1750 	return (error);
1751 }
1752 
1753 /*
1754  * The actual read RPC.
1755  */
1756 static int
1757 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1758     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1759     int *attrflagp)
1760 {
1761 	u_int32_t *tl;
1762 	int error = 0, len, retlen, tsiz, eof = 0;
1763 	struct nfsrv_descript nfsd;
1764 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1765 	struct nfsrv_descript *nd = &nfsd;
1766 	int rsize;
1767 	off_t tmp_off;
1768 
1769 	*attrflagp = 0;
1770 	tsiz = uiop->uio_resid;
1771 	tmp_off = uiop->uio_offset + tsiz;
1772 	NFSLOCKMNT(nmp);
1773 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1774 		NFSUNLOCKMNT(nmp);
1775 		return (EFBIG);
1776 	}
1777 	rsize = nmp->nm_rsize;
1778 	NFSUNLOCKMNT(nmp);
1779 	nd->nd_mrep = NULL;
1780 	while (tsiz > 0) {
1781 		*attrflagp = 0;
1782 		len = (tsiz > rsize) ? rsize : tsiz;
1783 		NFSCL_REQSTART(nd, NFSPROC_READ, vp, cred);
1784 		if (nd->nd_flag & ND_NFSV4)
1785 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1786 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1787 		if (nd->nd_flag & ND_NFSV2) {
1788 			*tl++ = txdr_unsigned(uiop->uio_offset);
1789 			*tl++ = txdr_unsigned(len);
1790 			*tl = 0;
1791 		} else {
1792 			txdr_hyper(uiop->uio_offset, tl);
1793 			*(tl + 2) = txdr_unsigned(len);
1794 		}
1795 		/*
1796 		 * Since I can't do a Getattr for NFSv4 for Write, there
1797 		 * doesn't seem any point in doing one here, either.
1798 		 * (See the comment in nfsrpc_writerpc() for more info.)
1799 		 */
1800 		error = nfscl_request(nd, vp, p, cred);
1801 		if (error)
1802 			return (error);
1803 		if (nd->nd_flag & ND_NFSV3) {
1804 			error = nfscl_postop_attr(nd, nap, attrflagp);
1805 		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1806 			error = nfsm_loadattr(nd, nap);
1807 			if (!error)
1808 				*attrflagp = 1;
1809 		}
1810 		if (nd->nd_repstat || error) {
1811 			if (!error)
1812 				error = nd->nd_repstat;
1813 			goto nfsmout;
1814 		}
1815 		if (nd->nd_flag & ND_NFSV3) {
1816 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1817 			eof = fxdr_unsigned(int, *(tl + 1));
1818 		} else if (nd->nd_flag & ND_NFSV4) {
1819 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1820 			eof = fxdr_unsigned(int, *tl);
1821 		}
1822 		NFSM_STRSIZ(retlen, len);
1823 		error = nfsm_mbufuio(nd, uiop, retlen);
1824 		if (error)
1825 			goto nfsmout;
1826 		m_freem(nd->nd_mrep);
1827 		nd->nd_mrep = NULL;
1828 		tsiz -= retlen;
1829 		if (!(nd->nd_flag & ND_NFSV2)) {
1830 			if (eof || retlen == 0)
1831 				tsiz = 0;
1832 		} else if (retlen < len)
1833 			tsiz = 0;
1834 	}
1835 	return (0);
1836 nfsmout:
1837 	if (nd->nd_mrep != NULL)
1838 		m_freem(nd->nd_mrep);
1839 	return (error);
1840 }
1841 
1842 /*
1843  * nfs write operation
1844  * When called_from_strategy != 0, it should return EIO for an error that
1845  * indicates recovery is in progress, so that the buffer will be left
1846  * dirty and be written back to the server later. If it loops around,
1847  * the recovery thread could get stuck waiting for the buffer and recovery
1848  * will then deadlock.
1849  */
1850 int
1851 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1852     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1853     int called_from_strategy, int ioflag)
1854 {
1855 	int error, expireret = 0, retrycnt, nostateid;
1856 	u_int32_t clidrev = 0;
1857 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1858 	struct nfsnode *np = VTONFS(vp);
1859 	struct ucred *newcred;
1860 	struct nfsfh *nfhp = NULL;
1861 	nfsv4stateid_t stateid;
1862 	void *lckp;
1863 
1864 	KASSERT(*must_commit >= 0 && *must_commit <= 2,
1865 	    ("nfsrpc_write: must_commit out of range=%d", *must_commit));
1866 	if (nmp->nm_clp != NULL)
1867 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1868 	newcred = cred;
1869 	if (NFSHASNFSV4(nmp)) {
1870 		newcred = NFSNEWCRED(cred);
1871 		nfhp = np->n_fhp;
1872 	}
1873 	retrycnt = 0;
1874 	do {
1875 		lckp = NULL;
1876 		nostateid = 0;
1877 		if (NFSHASNFSV4(nmp)) {
1878 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1879 			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1880 			    &lckp);
1881 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1882 			    stateid.other[2] == 0) {
1883 				nostateid = 1;
1884 				NFSCL_DEBUG(1, "stateid0 in write\n");
1885 			}
1886 		}
1887 
1888 		/*
1889 		 * If there is no stateid for NFSv4, it means this is an
1890 		 * extraneous write after close. Basically a poorly
1891 		 * implemented buffer cache. Just don't do the write.
1892 		 */
1893 		if (nostateid)
1894 			error = 0;
1895 		else
1896 			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1897 			    newcred, &stateid, p, nap, attrflagp, ioflag);
1898 		if (error == NFSERR_STALESTATEID)
1899 			nfscl_initiate_recovery(nmp->nm_clp);
1900 		if (lckp != NULL)
1901 			nfscl_lockderef(lckp);
1902 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1903 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1904 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1905 			(void) nfs_catnap(PZERO, error, "nfs_write");
1906 		} else if ((error == NFSERR_EXPIRED ||
1907 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1908 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1909 		}
1910 		retrycnt++;
1911 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1912 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1913 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1914 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1915 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1916 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1917 	if (error != 0 && (retrycnt >= 4 ||
1918 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1919 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1920 		error = EIO;
1921 	if (NFSHASNFSV4(nmp))
1922 		NFSFREECRED(newcred);
1923 	return (error);
1924 }
1925 
1926 /*
1927  * The actual write RPC.
1928  */
1929 static int
1930 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1931     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1932     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int ioflag)
1933 {
1934 	u_int32_t *tl;
1935 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1936 	struct nfsnode *np = VTONFS(vp);
1937 	int error = 0, len, rlen, commit, committed = NFSWRITE_FILESYNC;
1938 	int wccflag = 0;
1939 	int32_t backup;
1940 	struct nfsrv_descript *nd;
1941 	nfsattrbit_t attrbits;
1942 	uint64_t tmp_off;
1943 	ssize_t tsiz, wsize;
1944 	bool do_append;
1945 
1946 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1947 	*attrflagp = 0;
1948 	tsiz = uiop->uio_resid;
1949 	tmp_off = uiop->uio_offset + tsiz;
1950 	NFSLOCKMNT(nmp);
1951 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1952 		NFSUNLOCKMNT(nmp);
1953 		return (EFBIG);
1954 	}
1955 	wsize = nmp->nm_wsize;
1956 	do_append = false;
1957 	if ((ioflag & IO_APPEND) != 0 && NFSHASNFSV4(nmp) && !NFSHASPNFS(nmp))
1958 		do_append = true;
1959 	NFSUNLOCKMNT(nmp);
1960 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK);
1961 	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
1962 	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
1963 	while (tsiz > 0) {
1964 		*attrflagp = 0;
1965 		len = (tsiz > wsize) ? wsize : tsiz;
1966 		if (do_append)
1967 			NFSCL_REQSTART(nd, NFSPROC_APPENDWRITE, vp, cred);
1968 		else
1969 			NFSCL_REQSTART(nd, NFSPROC_WRITE, vp, cred);
1970 		if (nd->nd_flag & ND_NFSV4) {
1971 			if (do_append) {
1972 				NFSZERO_ATTRBIT(&attrbits);
1973 				NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
1974 				nfsrv_putattrbit(nd, &attrbits);
1975 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED +
1976 				    NFSX_HYPER);
1977 				*tl++ = txdr_unsigned(NFSX_HYPER);
1978 				txdr_hyper(uiop->uio_offset, tl); tl += 2;
1979 				*tl = txdr_unsigned(NFSV4OP_WRITE);
1980 			}
1981 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1982 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1983 			txdr_hyper(uiop->uio_offset, tl);
1984 			tl += 2;
1985 			*tl++ = txdr_unsigned(*iomode);
1986 			*tl = txdr_unsigned(len);
1987 		} else if (nd->nd_flag & ND_NFSV3) {
1988 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1989 			txdr_hyper(uiop->uio_offset, tl);
1990 			tl += 2;
1991 			*tl++ = txdr_unsigned(len);
1992 			*tl++ = txdr_unsigned(*iomode);
1993 			*tl = txdr_unsigned(len);
1994 		} else {
1995 			u_int32_t x;
1996 
1997 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1998 			/*
1999 			 * Not sure why someone changed this, since the
2000 			 * RFC clearly states that "beginoffset" and
2001 			 * "totalcount" are ignored, but it wouldn't
2002 			 * surprise me if there's a busted server out there.
2003 			 */
2004 			/* Set both "begin" and "current" to non-garbage. */
2005 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
2006 			*tl++ = x;      /* "begin offset" */
2007 			*tl++ = x;      /* "current offset" */
2008 			x = txdr_unsigned(len);
2009 			*tl++ = x;      /* total to this offset */
2010 			*tl = x;        /* size of this write */
2011 		}
2012 		nfsm_uiombuf(nd, uiop, len);
2013 		/*
2014 		 * Although it is tempting to do a normal Getattr Op in the
2015 		 * NFSv4 compound, the result can be a nearly hung client
2016 		 * system if the Getattr asks for Owner and/or OwnerGroup.
2017 		 * It occurs when the client can't map either the Owner or
2018 		 * Owner_group name in the Getattr reply to a uid/gid. When
2019 		 * there is a cache miss, the kernel does an upcall to the
2020 		 * nfsuserd. Then, it can try and read the local /etc/passwd
2021 		 * or /etc/group file. It can then block in getnewbuf(),
2022 		 * waiting for dirty writes to be pushed to the NFS server.
2023 		 * The only reason this doesn't result in a complete
2024 		 * deadlock, is that the upcall times out and allows
2025 		 * the write to complete. However, progress is so slow
2026 		 * that it might just as well be deadlocked.
2027 		 * As such, we get the rest of the attributes, but not
2028 		 * Owner or Owner_group.
2029 		 * nb: nfscl_loadattrcache() needs to be told that these
2030 		 *     partial attributes from a write rpc are being
2031 		 *     passed in, via a argument flag.
2032 		 */
2033 		if (nd->nd_flag & ND_NFSV4) {
2034 			NFSWRITEGETATTR_ATTRBIT(&attrbits);
2035 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2036 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2037 			(void) nfsrv_putattrbit(nd, &attrbits);
2038 		}
2039 		error = nfscl_request(nd, vp, p, cred);
2040 		if (error) {
2041 			free(nd, M_TEMP);
2042 			return (error);
2043 		}
2044 		if (nd->nd_repstat) {
2045 			/*
2046 			 * In case the rpc gets retried, roll
2047 			 * the uio fields changed by nfsm_uiombuf()
2048 			 * back.
2049 			 */
2050 			uiop->uio_offset -= len;
2051 			uiop->uio_resid += len;
2052 			uiop->uio_iov->iov_base =
2053 			    (char *)uiop->uio_iov->iov_base - len;
2054 			uiop->uio_iov->iov_len += len;
2055 		}
2056 		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2057 			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
2058 			    &wccflag, &tmp_off);
2059 			if (error)
2060 				goto nfsmout;
2061 		}
2062 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2063 		    (ND_NFSV4 | ND_NOMOREDATA) &&
2064 		    nd->nd_repstat == NFSERR_NOTSAME && do_append) {
2065 			/*
2066 			 * Verify of the file's size failed, so redo the
2067 			 * write using the file's size as returned in
2068 			 * the wcc attributes.
2069 			 */
2070 			if (tmp_off + tsiz <= nmp->nm_maxfilesize) {
2071 				do_append = false;
2072 				uiop->uio_offset = tmp_off;
2073 				m_freem(nd->nd_mrep);
2074 				nd->nd_mrep = NULL;
2075 				continue;
2076 			} else
2077 				nd->nd_repstat = EFBIG;
2078 		}
2079 		if (!nd->nd_repstat) {
2080 			if (do_append) {
2081 				/* Strip off the Write reply status. */
2082 				do_append = false;
2083 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2084 			}
2085 			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2086 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
2087 					+ NFSX_VERF);
2088 				rlen = fxdr_unsigned(int, *tl++);
2089 				if (rlen == 0) {
2090 					error = NFSERR_IO;
2091 					goto nfsmout;
2092 				} else if (rlen < len) {
2093 					backup = len - rlen;
2094 					uiop->uio_iov->iov_base =
2095 					    (char *)uiop->uio_iov->iov_base -
2096 					    backup;
2097 					uiop->uio_iov->iov_len += backup;
2098 					uiop->uio_offset -= backup;
2099 					uiop->uio_resid += backup;
2100 					len = rlen;
2101 				}
2102 				commit = fxdr_unsigned(int, *tl++);
2103 
2104 				/*
2105 				 * Return the lowest commitment level
2106 				 * obtained by any of the RPCs.
2107 				 */
2108 				if (committed == NFSWRITE_FILESYNC)
2109 					committed = commit;
2110 				else if (committed == NFSWRITE_DATASYNC &&
2111 					commit == NFSWRITE_UNSTABLE)
2112 					committed = commit;
2113 				NFSLOCKMNT(nmp);
2114 				if (!NFSHASWRITEVERF(nmp)) {
2115 					NFSBCOPY((caddr_t)tl,
2116 					    (caddr_t)&nmp->nm_verf[0],
2117 					    NFSX_VERF);
2118 					NFSSETWRITEVERF(nmp);
2119 	    			} else if (NFSBCMP(tl, nmp->nm_verf,
2120 				    NFSX_VERF) && *must_commit != 2) {
2121 					*must_commit = 1;
2122 					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
2123 				}
2124 				NFSUNLOCKMNT(nmp);
2125 			}
2126 			if (nd->nd_flag & ND_NFSV4)
2127 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2128 			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
2129 				error = nfsm_loadattr(nd, nap);
2130 				if (!error)
2131 					*attrflagp = NFS_LATTR_NOSHRINK;
2132 			}
2133 		} else {
2134 			error = nd->nd_repstat;
2135 		}
2136 		if (error)
2137 			goto nfsmout;
2138 		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
2139 		m_freem(nd->nd_mrep);
2140 		nd->nd_mrep = NULL;
2141 		tsiz -= len;
2142 	}
2143 nfsmout:
2144 	if (nd->nd_mrep != NULL)
2145 		m_freem(nd->nd_mrep);
2146 	*iomode = committed;
2147 	if (nd->nd_repstat && !error)
2148 		error = nd->nd_repstat;
2149 	free(nd, M_TEMP);
2150 	return (error);
2151 }
2152 
2153 /*
2154  * Do an nfs deallocate operation.
2155  */
2156 int
2157 nfsrpc_deallocate(vnode_t vp, off_t offs, off_t len, struct nfsvattr *nap,
2158     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
2159 {
2160 	int error, expireret = 0, openerr, retrycnt;
2161 	uint32_t clidrev = 0;
2162 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2163 	struct nfsfh *nfhp;
2164 	nfsv4stateid_t stateid;
2165 	void *lckp;
2166 
2167 	if (nmp->nm_clp != NULL)
2168 		clidrev = nmp->nm_clp->nfsc_clientidrev;
2169 	retrycnt = 0;
2170 	do {
2171 		lckp = NULL;
2172 		openerr = 1;
2173 		nfhp = VTONFS(vp)->n_fhp;
2174 		error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
2175 		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
2176 		if (error != 0) {
2177 			/*
2178 			 * No Open stateid, so try and open the file
2179 			 * now.
2180 			 */
2181 			openerr = nfsrpc_open(vp, FWRITE, cred, p);
2182 			if (openerr == 0)
2183 				nfscl_getstateid(vp, nfhp->nfh_fh,
2184 				    nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0,
2185 				    cred, p, &stateid, &lckp);
2186 		}
2187 		error = nfsrpc_deallocaterpc(vp, offs, len, &stateid, nap,
2188 		    attrflagp, cred, p);
2189 		if (error == NFSERR_STALESTATEID)
2190 			nfscl_initiate_recovery(nmp->nm_clp);
2191 		if (lckp != NULL)
2192 			nfscl_lockderef(lckp);
2193 		if (openerr == 0)
2194 			nfsrpc_close(vp, 0, p);
2195 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2196 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2197 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
2198 			(void) nfs_catnap(PZERO, error, "nfs_deallocate");
2199 		} else if ((error == NFSERR_EXPIRED ||
2200 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
2201 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2202 		}
2203 		retrycnt++;
2204 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2205 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2206 	    error == NFSERR_BADSESSION ||
2207 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
2208 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2209 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
2210 	if (error && retrycnt >= 4)
2211 		error = EIO;
2212 	return (error);
2213 }
2214 
2215 /*
2216  * The actual deallocate RPC.
2217  */
2218 static int
2219 nfsrpc_deallocaterpc(vnode_t vp, off_t offs, off_t len,
2220     nfsv4stateid_t *stateidp, struct nfsvattr *nap, int *attrflagp,
2221     struct ucred *cred, NFSPROC_T *p)
2222 {
2223 	uint32_t *tl;
2224 	struct nfsnode *np = VTONFS(vp);
2225 	int error, wccflag;
2226 	struct nfsrv_descript nfsd;
2227 	struct nfsrv_descript *nd = &nfsd;
2228 	nfsattrbit_t attrbits;
2229 
2230 	*attrflagp = 0;
2231 	NFSCL_REQSTART(nd, NFSPROC_DEALLOCATE, vp, cred);
2232 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2233 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
2234 	txdr_hyper(offs, tl);
2235 	tl += 2;
2236 	txdr_hyper(len, tl);
2237 	NFSWRITEGETATTR_ATTRBIT(&attrbits);
2238 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
2239 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2240 	nfsrv_putattrbit(nd, &attrbits);
2241 	error = nfscl_request(nd, vp, p, cred);
2242 	if (error != 0)
2243 		return (error);
2244 	wccflag = 0;
2245 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, NULL);
2246 	if (error != 0)
2247 		goto nfsmout;
2248 	if (nd->nd_repstat == 0) {
2249 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2250 		error = nfsm_loadattr(nd, nap);
2251 		if (error != 0)
2252 			goto nfsmout;
2253 		*attrflagp = NFS_LATTR_NOSHRINK;
2254 	}
2255 	NFSWRITERPC_SETTIME(wccflag, np, nap, 1);
2256 nfsmout:
2257 	m_freem(nd->nd_mrep);
2258 	if (nd->nd_repstat != 0 && error == 0)
2259 		error = nd->nd_repstat;
2260 	return (error);
2261 }
2262 
2263 /*
2264  * nfs mknod rpc
2265  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
2266  * mode set to specify the file type and the size field for rdev.
2267  */
2268 int
2269 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2270     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
2271     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2272     int *attrflagp, int *dattrflagp)
2273 {
2274 	u_int32_t *tl;
2275 	int error = 0;
2276 	struct nfsrv_descript nfsd, *nd = &nfsd;
2277 	nfsattrbit_t attrbits;
2278 
2279 	*nfhpp = NULL;
2280 	*attrflagp = 0;
2281 	*dattrflagp = 0;
2282 	if (namelen > NFS_MAXNAMLEN)
2283 		return (ENAMETOOLONG);
2284 	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp, cred);
2285 	if (nd->nd_flag & ND_NFSV4) {
2286 		if (vtyp == VBLK || vtyp == VCHR) {
2287 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2288 			*tl++ = vtonfsv34_type(vtyp);
2289 			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2290 			*tl = txdr_unsigned(NFSMINOR(rdev));
2291 		} else {
2292 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2293 			*tl = vtonfsv34_type(vtyp);
2294 		}
2295 	}
2296 	(void) nfsm_strtom(nd, name, namelen);
2297 	if (nd->nd_flag & ND_NFSV3) {
2298 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2299 		*tl = vtonfsv34_type(vtyp);
2300 	}
2301 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2302 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2303 	if ((nd->nd_flag & ND_NFSV3) &&
2304 	    (vtyp == VCHR || vtyp == VBLK)) {
2305 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2306 		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2307 		*tl = txdr_unsigned(NFSMINOR(rdev));
2308 	}
2309 	if (nd->nd_flag & ND_NFSV4) {
2310 		NFSGETATTR_ATTRBIT(&attrbits);
2311 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2312 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2313 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2314 		(void) nfsrv_putattrbit(nd, &attrbits);
2315 	}
2316 	if (nd->nd_flag & ND_NFSV2)
2317 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2318 	error = nfscl_request(nd, dvp, p, cred);
2319 	if (error)
2320 		return (error);
2321 	if (nd->nd_flag & ND_NFSV4)
2322 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2323 	if (!nd->nd_repstat) {
2324 		if (nd->nd_flag & ND_NFSV4) {
2325 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2326 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2327 			if (error)
2328 				goto nfsmout;
2329 		}
2330 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2331 		if (error)
2332 			goto nfsmout;
2333 	}
2334 	if (nd->nd_flag & ND_NFSV3)
2335 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2336 	if (!error && nd->nd_repstat)
2337 		error = nd->nd_repstat;
2338 nfsmout:
2339 	m_freem(nd->nd_mrep);
2340 	return (error);
2341 }
2342 
2343 /*
2344  * nfs file create call
2345  * Mostly just call the approriate routine. (I separated out v4, so that
2346  * error recovery wouldn't be as difficult.)
2347  */
2348 int
2349 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2350     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2351     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2352     int *attrflagp, int *dattrflagp)
2353 {
2354 	int error = 0, newone, expireret = 0, retrycnt, unlocked;
2355 	struct nfsclowner *owp;
2356 	struct nfscldeleg *dp;
2357 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2358 	u_int32_t clidrev;
2359 
2360 	if (NFSHASNFSV4(nmp)) {
2361 	    retrycnt = 0;
2362 	    do {
2363 		dp = NULL;
2364 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2365 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2366 		    NULL, 1, true);
2367 		if (error)
2368 			return (error);
2369 		if (nmp->nm_clp != NULL)
2370 			clidrev = nmp->nm_clp->nfsc_clientidrev;
2371 		else
2372 			clidrev = 0;
2373 		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2374 		    nfs_numnfscbd == 0 || retrycnt > 0)
2375 			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2376 			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2377 			  attrflagp, dattrflagp, &unlocked);
2378 		else
2379 			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2380 			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2381 			  attrflagp, dattrflagp, &unlocked);
2382 		/*
2383 		 * There is no need to invalidate cached attributes here,
2384 		 * since new post-delegation issue attributes are always
2385 		 * returned by nfsrpc_createv4() and these will update the
2386 		 * attribute cache.
2387 		 */
2388 		if (dp != NULL)
2389 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2390 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2391 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2392 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2393 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2394 		    error == NFSERR_BADSESSION) {
2395 			(void) nfs_catnap(PZERO, error, "nfs_open");
2396 		} else if ((error == NFSERR_EXPIRED ||
2397 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
2398 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2399 			retrycnt++;
2400 		}
2401 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2402 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2403 		error == NFSERR_BADSESSION ||
2404 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2405 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
2406 	    if (error && retrycnt >= 4)
2407 		    error = EIO;
2408 	} else {
2409 		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2410 		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp);
2411 	}
2412 	return (error);
2413 }
2414 
2415 /*
2416  * The create rpc for v2 and 3.
2417  */
2418 static int
2419 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2420     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2421     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2422     int *attrflagp, int *dattrflagp)
2423 {
2424 	u_int32_t *tl;
2425 	int error = 0;
2426 	struct nfsrv_descript nfsd, *nd = &nfsd;
2427 
2428 	*nfhpp = NULL;
2429 	*attrflagp = 0;
2430 	*dattrflagp = 0;
2431 	if (namelen > NFS_MAXNAMLEN)
2432 		return (ENAMETOOLONG);
2433 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2434 	(void) nfsm_strtom(nd, name, namelen);
2435 	if (nd->nd_flag & ND_NFSV3) {
2436 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2437 		if (fmode & O_EXCL) {
2438 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2439 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2440 			*tl++ = cverf.lval[0];
2441 			*tl = cverf.lval[1];
2442 		} else {
2443 			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2444 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
2445 		}
2446 	} else {
2447 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2448 	}
2449 	error = nfscl_request(nd, dvp, p, cred);
2450 	if (error)
2451 		return (error);
2452 	if (nd->nd_repstat == 0) {
2453 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2454 		if (error)
2455 			goto nfsmout;
2456 	}
2457 	if (nd->nd_flag & ND_NFSV3)
2458 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2459 	if (nd->nd_repstat != 0 && error == 0)
2460 		error = nd->nd_repstat;
2461 nfsmout:
2462 	m_freem(nd->nd_mrep);
2463 	return (error);
2464 }
2465 
2466 static int
2467 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2468     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2469     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2470     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2471     int *dattrflagp, int *unlockedp)
2472 {
2473 	u_int32_t *tl;
2474 	int error = 0, deleg, newone, ret, acesize, limitby;
2475 	struct nfsrv_descript nfsd, *nd = &nfsd;
2476 	struct nfsclopen *op;
2477 	struct nfscldeleg *dp = NULL;
2478 	struct nfsnode *np;
2479 	struct nfsfh *nfhp;
2480 	nfsattrbit_t attrbits;
2481 	nfsv4stateid_t stateid;
2482 	u_int32_t rflags;
2483 	struct nfsmount *nmp;
2484 	struct nfsclsession *tsep;
2485 
2486 	nmp = VFSTONFS(dvp->v_mount);
2487 	np = VTONFS(dvp);
2488 	*unlockedp = 0;
2489 	*nfhpp = NULL;
2490 	*dpp = NULL;
2491 	*attrflagp = 0;
2492 	*dattrflagp = 0;
2493 	if (namelen > NFS_MAXNAMLEN)
2494 		return (ENAMETOOLONG);
2495 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2496 	/*
2497 	 * For V4, this is actually an Open op.
2498 	 */
2499 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2500 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
2501 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2502 	    NFSV4OPEN_ACCESSREAD);
2503 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2504 	tsep = nfsmnt_mdssession(nmp);
2505 	*tl++ = tsep->nfsess_clientid.lval[0];
2506 	*tl = tsep->nfsess_clientid.lval[1];
2507 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2508 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2509 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2510 	if (fmode & O_EXCL) {
2511 		if (NFSHASNFSV4N(nmp)) {
2512 			if (NFSHASSESSPERSIST(nmp)) {
2513 				/* Use GUARDED for persistent sessions. */
2514 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
2515 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2516 			} else {
2517 				/* Otherwise, use EXCLUSIVE4_1. */
2518 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2519 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2520 				*tl++ = cverf.lval[0];
2521 				*tl = cverf.lval[1];
2522 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2523 			}
2524 		} else {
2525 			/* NFSv4.0 */
2526 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2527 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2528 			*tl++ = cverf.lval[0];
2529 			*tl = cverf.lval[1];
2530 		}
2531 	} else {
2532 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2533 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2534 	}
2535 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2536 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2537 	(void) nfsm_strtom(nd, name, namelen);
2538 	/* Get the new file's handle and attributes. */
2539 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2540 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2541 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2542 	NFSGETATTR_ATTRBIT(&attrbits);
2543 	(void) nfsrv_putattrbit(nd, &attrbits);
2544 	/* Get the directory's post-op attributes. */
2545 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2546 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2547 	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2548 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2549 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2550 	(void) nfsrv_putattrbit(nd, &attrbits);
2551 	error = nfscl_request(nd, dvp, p, cred);
2552 	if (error)
2553 		return (error);
2554 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2555 	if (nd->nd_repstat == 0) {
2556 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2557 		    6 * NFSX_UNSIGNED);
2558 		stateid.seqid = *tl++;
2559 		stateid.other[0] = *tl++;
2560 		stateid.other[1] = *tl++;
2561 		stateid.other[2] = *tl;
2562 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2563 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2564 		if (error)
2565 			goto nfsmout;
2566 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2567 		deleg = fxdr_unsigned(int, *tl);
2568 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2569 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2570 			if (!(owp->nfsow_clp->nfsc_flags &
2571 			      NFSCLFLAGS_FIRSTDELEG))
2572 				owp->nfsow_clp->nfsc_flags |=
2573 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2574 			dp = malloc(
2575 			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2576 			    M_NFSCLDELEG, M_WAITOK);
2577 			LIST_INIT(&dp->nfsdl_owner);
2578 			LIST_INIT(&dp->nfsdl_lock);
2579 			dp->nfsdl_clp = owp->nfsow_clp;
2580 			newnfs_copyincred(cred, &dp->nfsdl_cred);
2581 			nfscl_lockinit(&dp->nfsdl_rwlock);
2582 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2583 			    NFSX_UNSIGNED);
2584 			dp->nfsdl_stateid.seqid = *tl++;
2585 			dp->nfsdl_stateid.other[0] = *tl++;
2586 			dp->nfsdl_stateid.other[1] = *tl++;
2587 			dp->nfsdl_stateid.other[2] = *tl++;
2588 			ret = fxdr_unsigned(int, *tl);
2589 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2590 				dp->nfsdl_flags = NFSCLDL_WRITE;
2591 				/*
2592 				 * Indicates how much the file can grow.
2593 				 */
2594 				NFSM_DISSECT(tl, u_int32_t *,
2595 				    3 * NFSX_UNSIGNED);
2596 				limitby = fxdr_unsigned(int, *tl++);
2597 				switch (limitby) {
2598 				case NFSV4OPEN_LIMITSIZE:
2599 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2600 					break;
2601 				case NFSV4OPEN_LIMITBLOCKS:
2602 					dp->nfsdl_sizelimit =
2603 					    fxdr_unsigned(u_int64_t, *tl++);
2604 					dp->nfsdl_sizelimit *=
2605 					    fxdr_unsigned(u_int64_t, *tl);
2606 					break;
2607 				default:
2608 					error = NFSERR_BADXDR;
2609 					goto nfsmout;
2610 				}
2611 			} else {
2612 				dp->nfsdl_flags = NFSCLDL_READ;
2613 			}
2614 			if (ret)
2615 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2616 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
2617 			    &ret, &acesize, p);
2618 			if (error)
2619 				goto nfsmout;
2620 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2621 			error = NFSERR_BADXDR;
2622 			goto nfsmout;
2623 		}
2624 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2625 		if (error)
2626 			goto nfsmout;
2627 		/* Get rid of the PutFH and Getattr status values. */
2628 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2629 		/* Load the directory attributes. */
2630 		error = nfsm_loadattr(nd, dnap);
2631 		if (error)
2632 			goto nfsmout;
2633 		*dattrflagp = 1;
2634 		if (dp != NULL && *attrflagp) {
2635 			dp->nfsdl_change = nnap->na_filerev;
2636 			dp->nfsdl_modtime = nnap->na_mtime;
2637 			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2638 		}
2639 		/*
2640 		 * We can now complete the Open state.
2641 		 */
2642 		nfhp = *nfhpp;
2643 		if (dp != NULL) {
2644 			dp->nfsdl_fhlen = nfhp->nfh_len;
2645 			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2646 		}
2647 		/*
2648 		 * Get an Open structure that will be
2649 		 * attached to the OpenOwner, acquired already.
2650 		 */
2651 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2652 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2653 		    cred, p, NULL, &op, &newone, NULL, 0, false);
2654 		if (error)
2655 			goto nfsmout;
2656 		op->nfso_stateid = stateid;
2657 		newnfs_copyincred(cred, &op->nfso_cred);
2658 		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2659 		    do {
2660 			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2661 			    nfhp->nfh_len, op, cred, p);
2662 			if (ret == NFSERR_DELAY)
2663 			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2664 		    } while (ret == NFSERR_DELAY);
2665 		    error = ret;
2666 		}
2667 
2668 		/*
2669 		 * If the server is handing out delegations, but we didn't
2670 		 * get one because an OpenConfirm was required, try the
2671 		 * Open again, to get a delegation. This is a harmless no-op,
2672 		 * from a server's point of view.
2673 		 */
2674 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2675 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2676 		    !error && dp == NULL) {
2677 		    do {
2678 			ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2679 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2680 			    nfhp->nfh_fh, nfhp->nfh_len,
2681 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2682 			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2683 			if (ret == NFSERR_DELAY)
2684 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2685 		    } while (ret == NFSERR_DELAY);
2686 		    if (ret) {
2687 			if (dp != NULL) {
2688 				free(dp, M_NFSCLDELEG);
2689 				dp = NULL;
2690 			}
2691 			if (ret == NFSERR_STALECLIENTID ||
2692 			    ret == NFSERR_STALEDONTRECOVER ||
2693 			    ret == NFSERR_BADSESSION)
2694 				error = ret;
2695 		    }
2696 		}
2697 		nfscl_openrelease(nmp, op, error, newone);
2698 		*unlockedp = 1;
2699 	}
2700 	if (nd->nd_repstat != 0 && error == 0)
2701 		error = nd->nd_repstat;
2702 	if (error == NFSERR_STALECLIENTID)
2703 		nfscl_initiate_recovery(owp->nfsow_clp);
2704 nfsmout:
2705 	if (!error)
2706 		*dpp = dp;
2707 	else if (dp != NULL)
2708 		free(dp, M_NFSCLDELEG);
2709 	m_freem(nd->nd_mrep);
2710 	return (error);
2711 }
2712 
2713 /*
2714  * Nfs remove rpc
2715  */
2716 int
2717 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2718     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
2719 {
2720 	u_int32_t *tl;
2721 	struct nfsrv_descript nfsd, *nd = &nfsd;
2722 	struct nfsnode *np;
2723 	struct nfsmount *nmp;
2724 	nfsv4stateid_t dstateid;
2725 	int error, ret = 0, i;
2726 
2727 	*dattrflagp = 0;
2728 	if (namelen > NFS_MAXNAMLEN)
2729 		return (ENAMETOOLONG);
2730 	nmp = VFSTONFS(dvp->v_mount);
2731 tryagain:
2732 	if (NFSHASNFSV4(nmp) && ret == 0) {
2733 		ret = nfscl_removedeleg(vp, p, &dstateid);
2734 		if (ret == 1) {
2735 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp, cred);
2736 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2737 			    NFSX_UNSIGNED);
2738 			if (NFSHASNFSV4N(nmp))
2739 				*tl++ = 0;
2740 			else
2741 				*tl++ = dstateid.seqid;
2742 			*tl++ = dstateid.other[0];
2743 			*tl++ = dstateid.other[1];
2744 			*tl++ = dstateid.other[2];
2745 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2746 			np = VTONFS(dvp);
2747 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2748 			    np->n_fhp->nfh_len, 0);
2749 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2750 			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2751 		}
2752 	} else {
2753 		ret = 0;
2754 	}
2755 	if (ret == 0)
2756 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp, cred);
2757 	(void) nfsm_strtom(nd, name, namelen);
2758 	error = nfscl_request(nd, dvp, p, cred);
2759 	if (error)
2760 		return (error);
2761 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2762 		/* For NFSv4, parse out any Delereturn replies. */
2763 		if (ret > 0 && nd->nd_repstat != 0 &&
2764 		    (nd->nd_flag & ND_NOMOREDATA)) {
2765 			/*
2766 			 * If the Delegreturn failed, try again without
2767 			 * it. The server will Recall, as required.
2768 			 */
2769 			m_freem(nd->nd_mrep);
2770 			goto tryagain;
2771 		}
2772 		for (i = 0; i < (ret * 2); i++) {
2773 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2774 			    ND_NFSV4) {
2775 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2776 			    if (*(tl + 1))
2777 				nd->nd_flag |= ND_NOMOREDATA;
2778 			}
2779 		}
2780 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2781 	}
2782 	if (nd->nd_repstat && !error)
2783 		error = nd->nd_repstat;
2784 nfsmout:
2785 	m_freem(nd->nd_mrep);
2786 	return (error);
2787 }
2788 
2789 /*
2790  * Do an nfs rename rpc.
2791  */
2792 int
2793 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2794     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2795     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2796     int *fattrflagp, int *tattrflagp)
2797 {
2798 	u_int32_t *tl;
2799 	struct nfsrv_descript nfsd, *nd = &nfsd;
2800 	struct nfsmount *nmp;
2801 	struct nfsnode *np;
2802 	nfsattrbit_t attrbits;
2803 	nfsv4stateid_t fdstateid, tdstateid;
2804 	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2805 
2806 	*fattrflagp = 0;
2807 	*tattrflagp = 0;
2808 	nmp = VFSTONFS(fdvp->v_mount);
2809 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2810 		return (ENAMETOOLONG);
2811 tryagain:
2812 	if (NFSHASNFSV4(nmp) && ret == 0) {
2813 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2814 		    &tdstateid, &gottd, p);
2815 		if (gotfd && gottd) {
2816 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp, cred);
2817 		} else if (gotfd) {
2818 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp, cred);
2819 		} else if (gottd) {
2820 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp, cred);
2821 		}
2822 		if (gotfd) {
2823 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2824 			if (NFSHASNFSV4N(nmp))
2825 				*tl++ = 0;
2826 			else
2827 				*tl++ = fdstateid.seqid;
2828 			*tl++ = fdstateid.other[0];
2829 			*tl++ = fdstateid.other[1];
2830 			*tl = fdstateid.other[2];
2831 			if (gottd) {
2832 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2833 				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2834 				np = VTONFS(tvp);
2835 				(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2836 				    np->n_fhp->nfh_len, 0);
2837 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2838 				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2839 			}
2840 		}
2841 		if (gottd) {
2842 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2843 			if (NFSHASNFSV4N(nmp))
2844 				*tl++ = 0;
2845 			else
2846 				*tl++ = tdstateid.seqid;
2847 			*tl++ = tdstateid.other[0];
2848 			*tl++ = tdstateid.other[1];
2849 			*tl = tdstateid.other[2];
2850 		}
2851 		if (ret > 0) {
2852 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2853 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2854 			np = VTONFS(fdvp);
2855 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2856 			    np->n_fhp->nfh_len, 0);
2857 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2858 			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2859 		}
2860 	} else {
2861 		ret = 0;
2862 	}
2863 	if (ret == 0)
2864 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp, cred);
2865 	if (nd->nd_flag & ND_NFSV4) {
2866 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2867 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2868 		NFSWCCATTR_ATTRBIT(&attrbits);
2869 		(void) nfsrv_putattrbit(nd, &attrbits);
2870 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2871 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2872 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2873 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2874 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2875 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2876 		(void) nfsrv_putattrbit(nd, &attrbits);
2877 		nd->nd_flag |= ND_V4WCCATTR;
2878 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2879 		*tl = txdr_unsigned(NFSV4OP_RENAME);
2880 	}
2881 	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2882 	if (!(nd->nd_flag & ND_NFSV4))
2883 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2884 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2885 	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2886 	error = nfscl_request(nd, fdvp, p, cred);
2887 	if (error)
2888 		return (error);
2889 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2890 		/* For NFSv4, parse out any Delereturn replies. */
2891 		if (ret > 0 && nd->nd_repstat != 0 &&
2892 		    (nd->nd_flag & ND_NOMOREDATA)) {
2893 			/*
2894 			 * If the Delegreturn failed, try again without
2895 			 * it. The server will Recall, as required.
2896 			 */
2897 			m_freem(nd->nd_mrep);
2898 			goto tryagain;
2899 		}
2900 		for (i = 0; i < (ret * 2); i++) {
2901 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2902 			    ND_NFSV4) {
2903 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2904 			    if (*(tl + 1)) {
2905 				if (i == 0 && ret > 1) {
2906 				    /*
2907 				     * If the Delegreturn failed, try again
2908 				     * without it. The server will Recall, as
2909 				     * required.
2910 				     * If ret > 1, the first iteration of this
2911 				     * loop is the second DelegReturn result.
2912 				     */
2913 				    m_freem(nd->nd_mrep);
2914 				    goto tryagain;
2915 				} else {
2916 				    nd->nd_flag |= ND_NOMOREDATA;
2917 				}
2918 			    }
2919 			}
2920 		}
2921 		/* Now, the first wcc attribute reply. */
2922 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2923 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2924 			if (*(tl + 1))
2925 				nd->nd_flag |= ND_NOMOREDATA;
2926 		}
2927 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, NULL);
2928 		/* and the second wcc attribute reply. */
2929 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2930 		    !error) {
2931 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2932 			if (*(tl + 1))
2933 				nd->nd_flag |= ND_NOMOREDATA;
2934 		}
2935 		if (!error)
2936 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2937 			    NULL, NULL);
2938 	}
2939 	if (nd->nd_repstat && !error)
2940 		error = nd->nd_repstat;
2941 nfsmout:
2942 	m_freem(nd->nd_mrep);
2943 	return (error);
2944 }
2945 
2946 /*
2947  * nfs hard link create rpc
2948  */
2949 int
2950 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2951     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2952     struct nfsvattr *nap, int *attrflagp, int *dattrflagp)
2953 {
2954 	u_int32_t *tl;
2955 	struct nfsrv_descript nfsd, *nd = &nfsd;
2956 	nfsattrbit_t attrbits;
2957 	int error = 0;
2958 
2959 	*attrflagp = 0;
2960 	*dattrflagp = 0;
2961 	if (namelen > NFS_MAXNAMLEN)
2962 		return (ENAMETOOLONG);
2963 	NFSCL_REQSTART(nd, NFSPROC_LINK, vp, cred);
2964 	if (nd->nd_flag & ND_NFSV4) {
2965 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2966 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2967 	}
2968 	(void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2969 		VTONFS(dvp)->n_fhp->nfh_len, 0);
2970 	if (nd->nd_flag & ND_NFSV4) {
2971 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2972 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2973 		NFSWCCATTR_ATTRBIT(&attrbits);
2974 		(void) nfsrv_putattrbit(nd, &attrbits);
2975 		nd->nd_flag |= ND_V4WCCATTR;
2976 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2977 		*tl = txdr_unsigned(NFSV4OP_LINK);
2978 	}
2979 	(void) nfsm_strtom(nd, name, namelen);
2980 	error = nfscl_request(nd, vp, p, cred);
2981 	if (error)
2982 		return (error);
2983 	if (nd->nd_flag & ND_NFSV3) {
2984 		error = nfscl_postop_attr(nd, nap, attrflagp);
2985 		if (!error)
2986 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2987 			    NULL, NULL);
2988 	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2989 		/*
2990 		 * First, parse out the PutFH and Getattr result.
2991 		 */
2992 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2993 		if (!(*(tl + 1)))
2994 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2995 		if (*(tl + 1))
2996 			nd->nd_flag |= ND_NOMOREDATA;
2997 		/*
2998 		 * Get the pre-op attributes.
2999 		 */
3000 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3001 	}
3002 	if (nd->nd_repstat && !error)
3003 		error = nd->nd_repstat;
3004 nfsmout:
3005 	m_freem(nd->nd_mrep);
3006 	return (error);
3007 }
3008 
3009 /*
3010  * nfs symbolic link create rpc
3011  */
3012 int
3013 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
3014     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3015     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3016     int *dattrflagp)
3017 {
3018 	u_int32_t *tl;
3019 	struct nfsrv_descript nfsd, *nd = &nfsd;
3020 	struct nfsmount *nmp;
3021 	int slen, error = 0;
3022 
3023 	*nfhpp = NULL;
3024 	*attrflagp = 0;
3025 	*dattrflagp = 0;
3026 	nmp = VFSTONFS(dvp->v_mount);
3027 	slen = strlen(target);
3028 	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
3029 		return (ENAMETOOLONG);
3030 	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp, cred);
3031 	if (nd->nd_flag & ND_NFSV4) {
3032 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3033 		*tl = txdr_unsigned(NFLNK);
3034 		(void) nfsm_strtom(nd, target, slen);
3035 	}
3036 	(void) nfsm_strtom(nd, name, namelen);
3037 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3038 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
3039 	if (!(nd->nd_flag & ND_NFSV4))
3040 		(void) nfsm_strtom(nd, target, slen);
3041 	if (nd->nd_flag & ND_NFSV2)
3042 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3043 	error = nfscl_request(nd, dvp, p, cred);
3044 	if (error)
3045 		return (error);
3046 	if (nd->nd_flag & ND_NFSV4)
3047 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3048 	if ((nd->nd_flag & ND_NFSV3) && !error) {
3049 		if (!nd->nd_repstat)
3050 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3051 		if (!error)
3052 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3053 			    NULL, NULL);
3054 	}
3055 	if (nd->nd_repstat && !error)
3056 		error = nd->nd_repstat;
3057 	m_freem(nd->nd_mrep);
3058 	/*
3059 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3060 	 * Only do this if vfs.nfs.ignore_eexist is set.
3061 	 * Never do this for NFSv4.1 or later minor versions, since sessions
3062 	 * should guarantee "exactly once" RPC semantics.
3063 	 */
3064 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3065 	    nmp->nm_minorvers == 0))
3066 		error = 0;
3067 	return (error);
3068 }
3069 
3070 /*
3071  * nfs make dir rpc
3072  */
3073 int
3074 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
3075     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3076     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3077     int *dattrflagp)
3078 {
3079 	u_int32_t *tl;
3080 	struct nfsrv_descript nfsd, *nd = &nfsd;
3081 	nfsattrbit_t attrbits;
3082 	int error = 0;
3083 	struct nfsfh *fhp;
3084 	struct nfsmount *nmp;
3085 
3086 	*nfhpp = NULL;
3087 	*attrflagp = 0;
3088 	*dattrflagp = 0;
3089 	nmp = VFSTONFS(dvp->v_mount);
3090 	fhp = VTONFS(dvp)->n_fhp;
3091 	if (namelen > NFS_MAXNAMLEN)
3092 		return (ENAMETOOLONG);
3093 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp, cred);
3094 	if (nd->nd_flag & ND_NFSV4) {
3095 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3096 		*tl = txdr_unsigned(NFDIR);
3097 	}
3098 	(void) nfsm_strtom(nd, name, namelen);
3099 	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3100 	if (nd->nd_flag & ND_NFSV4) {
3101 		NFSGETATTR_ATTRBIT(&attrbits);
3102 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3103 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3104 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3105 		(void) nfsrv_putattrbit(nd, &attrbits);
3106 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3107 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
3108 		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
3109 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3110 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3111 		(void) nfsrv_putattrbit(nd, &attrbits);
3112 	}
3113 	error = nfscl_request(nd, dvp, p, cred);
3114 	if (error)
3115 		return (error);
3116 	if (nd->nd_flag & ND_NFSV4)
3117 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3118 	if (!nd->nd_repstat && !error) {
3119 		if (nd->nd_flag & ND_NFSV4) {
3120 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3121 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
3122 		}
3123 		if (!error)
3124 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3125 		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
3126 			/* Get rid of the PutFH and Getattr status values. */
3127 			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3128 			/* Load the directory attributes. */
3129 			error = nfsm_loadattr(nd, dnap);
3130 			if (error == 0)
3131 				*dattrflagp = 1;
3132 		}
3133 	}
3134 	if ((nd->nd_flag & ND_NFSV3) && !error)
3135 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3136 	if (nd->nd_repstat && !error)
3137 		error = nd->nd_repstat;
3138 nfsmout:
3139 	m_freem(nd->nd_mrep);
3140 	/*
3141 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3142 	 * Only do this if vfs.nfs.ignore_eexist is set.
3143 	 * Never do this for NFSv4.1 or later minor versions, since sessions
3144 	 * should guarantee "exactly once" RPC semantics.
3145 	 */
3146 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3147 	    nmp->nm_minorvers == 0))
3148 		error = 0;
3149 	return (error);
3150 }
3151 
3152 /*
3153  * nfs remove directory call
3154  */
3155 int
3156 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
3157     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
3158 {
3159 	struct nfsrv_descript nfsd, *nd = &nfsd;
3160 	int error = 0;
3161 
3162 	*dattrflagp = 0;
3163 	if (namelen > NFS_MAXNAMLEN)
3164 		return (ENAMETOOLONG);
3165 	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp, cred);
3166 	(void) nfsm_strtom(nd, name, namelen);
3167 	error = nfscl_request(nd, dvp, p, cred);
3168 	if (error)
3169 		return (error);
3170 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3171 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3172 	if (nd->nd_repstat && !error)
3173 		error = nd->nd_repstat;
3174 	m_freem(nd->nd_mrep);
3175 	/*
3176 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
3177 	 */
3178 	if (error == ENOENT)
3179 		error = 0;
3180 	return (error);
3181 }
3182 
3183 /*
3184  * Readdir rpc.
3185  * Always returns with either uio_resid unchanged, if you are at the
3186  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
3187  * filled in.
3188  * I felt this would allow caching of directory blocks more easily
3189  * than returning a pertially filled block.
3190  * Directory offset cookies:
3191  * Oh my, what to do with them...
3192  * I can think of three ways to deal with them:
3193  * 1 - have the layer above these RPCs maintain a map between logical
3194  *     directory byte offsets and the NFS directory offset cookies
3195  * 2 - pass the opaque directory offset cookies up into userland
3196  *     and let the libc functions deal with them, via the system call
3197  * 3 - return them to userland in the "struct dirent", so future versions
3198  *     of libc can use them and do whatever is necessary to make things work
3199  *     above these rpc calls, in the meantime
3200  * For now, I do #3 by "hiding" the directory offset cookies after the
3201  * d_name field in struct dirent. This is space inside d_reclen that
3202  * will be ignored by anything that doesn't know about them.
3203  * The directory offset cookies are filled in as the last 8 bytes of
3204  * each directory entry, after d_name. Someday, the userland libc
3205  * functions may be able to use these. In the meantime, it satisfies
3206  * OpenBSD's requirements for cookies being returned.
3207  * If expects the directory offset cookie for the read to be in uio_offset
3208  * and returns the one for the next entry after this directory block in
3209  * there, as well.
3210  */
3211 int
3212 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3213     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3214     int *eofp)
3215 {
3216 	int len, left;
3217 	struct dirent *dp = NULL;
3218 	u_int32_t *tl;
3219 	nfsquad_t cookie, ncookie;
3220 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3221 	struct nfsnode *dnp = VTONFS(vp);
3222 	struct nfsvattr nfsva;
3223 	struct nfsrv_descript nfsd, *nd = &nfsd;
3224 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3225 	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
3226 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3227 	char *cp;
3228 	nfsattrbit_t attrbits, dattrbits;
3229 	u_int32_t rderr, *tl2 = NULL;
3230 	size_t tresid;
3231 
3232 	KASSERT(uiop->uio_iovcnt == 1 &&
3233 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3234 	    ("nfs readdirrpc bad uio"));
3235 	ncookie.lval[0] = ncookie.lval[1] = 0;
3236 	/*
3237 	 * There is no point in reading a lot more than uio_resid, however
3238 	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
3239 	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
3240 	 * will never make readsize > nm_readdirsize.
3241 	 */
3242 	readsize = nmp->nm_readdirsize;
3243 	if (readsize > uiop->uio_resid)
3244 		readsize = uiop->uio_resid + DIRBLKSIZ;
3245 
3246 	*attrflagp = 0;
3247 	if (eofp)
3248 		*eofp = 0;
3249 	tresid = uiop->uio_resid;
3250 	cookie.lval[0] = cookiep->nfsuquad[0];
3251 	cookie.lval[1] = cookiep->nfsuquad[1];
3252 	nd->nd_mrep = NULL;
3253 
3254 	/*
3255 	 * For NFSv4, first create the "." and ".." entries.
3256 	 */
3257 	if (NFSHASNFSV4(nmp)) {
3258 		reqsize = 6 * NFSX_UNSIGNED;
3259 		NFSGETATTR_ATTRBIT(&dattrbits);
3260 		NFSZERO_ATTRBIT(&attrbits);
3261 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3262 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
3263 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3264 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3265 			NFSSETBIT_ATTRBIT(&attrbits,
3266 			    NFSATTRBIT_MOUNTEDONFILEID);
3267 			gotmnton = 1;
3268 		} else {
3269 			/*
3270 			 * Must fake it. Use the fileno, except when the
3271 			 * fsid is != to that of the directory. For that
3272 			 * case, generate a fake fileno that is not the same.
3273 			 */
3274 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3275 			gotmnton = 0;
3276 		}
3277 
3278 		/*
3279 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3280 		 */
3281 		if (uiop->uio_offset == 0) {
3282 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3283 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3284 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3285 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3286 			(void) nfsrv_putattrbit(nd, &attrbits);
3287 			error = nfscl_request(nd, vp, p, cred);
3288 			if (error)
3289 			    return (error);
3290 			dotfileid = 0;	/* Fake out the compiler. */
3291 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3292 			    error = nfsm_loadattr(nd, &nfsva);
3293 			    if (error != 0)
3294 				goto nfsmout;
3295 			    dotfileid = nfsva.na_fileid;
3296 			}
3297 			if (nd->nd_repstat == 0) {
3298 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3299 			    len = fxdr_unsigned(int, *(tl + 4));
3300 			    if (len > 0 && len <= NFSX_V4FHMAX)
3301 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3302 			    else
3303 				error = EPERM;
3304 			    if (!error) {
3305 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3306 				nfsva.na_mntonfileno = UINT64_MAX;
3307 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3308 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3309 				    NULL, NULL, NULL, p, cred);
3310 				if (error) {
3311 				    dotdotfileid = dotfileid;
3312 				} else if (gotmnton) {
3313 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3314 					dotdotfileid = nfsva.na_mntonfileno;
3315 				    else
3316 					dotdotfileid = nfsva.na_fileid;
3317 				} else if (nfsva.na_filesid[0] ==
3318 				    dnp->n_vattr.na_filesid[0] &&
3319 				    nfsva.na_filesid[1] ==
3320 				    dnp->n_vattr.na_filesid[1]) {
3321 				    dotdotfileid = nfsva.na_fileid;
3322 				} else {
3323 				    do {
3324 					fakefileno--;
3325 				    } while (fakefileno ==
3326 					nfsva.na_fileid);
3327 				    dotdotfileid = fakefileno;
3328 				}
3329 			    }
3330 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3331 			    /*
3332 			     * Lookupp returns NFSERR_NOENT when we are
3333 			     * at the root, so just use the current dir.
3334 			     */
3335 			    nd->nd_repstat = 0;
3336 			    dotdotfileid = dotfileid;
3337 			} else {
3338 			    error = nd->nd_repstat;
3339 			}
3340 			m_freem(nd->nd_mrep);
3341 			if (error)
3342 			    return (error);
3343 			nd->nd_mrep = NULL;
3344 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3345 			dp->d_pad0 = dp->d_pad1 = 0;
3346 			dp->d_off = 0;
3347 			dp->d_type = DT_DIR;
3348 			dp->d_fileno = dotfileid;
3349 			dp->d_namlen = 1;
3350 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3351 			dp->d_name[0] = '.';
3352 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3353 			/*
3354 			 * Just make these offset cookie 0.
3355 			 */
3356 			tl = (u_int32_t *)&dp->d_name[8];
3357 			*tl++ = 0;
3358 			*tl = 0;
3359 			blksiz += dp->d_reclen;
3360 			uiop->uio_resid -= dp->d_reclen;
3361 			uiop->uio_offset += dp->d_reclen;
3362 			uiop->uio_iov->iov_base =
3363 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3364 			uiop->uio_iov->iov_len -= dp->d_reclen;
3365 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3366 			dp->d_pad0 = dp->d_pad1 = 0;
3367 			dp->d_off = 0;
3368 			dp->d_type = DT_DIR;
3369 			dp->d_fileno = dotdotfileid;
3370 			dp->d_namlen = 2;
3371 			*((uint64_t *)dp->d_name) = 0;
3372 			dp->d_name[0] = '.';
3373 			dp->d_name[1] = '.';
3374 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3375 			/*
3376 			 * Just make these offset cookie 0.
3377 			 */
3378 			tl = (u_int32_t *)&dp->d_name[8];
3379 			*tl++ = 0;
3380 			*tl = 0;
3381 			blksiz += dp->d_reclen;
3382 			uiop->uio_resid -= dp->d_reclen;
3383 			uiop->uio_offset += dp->d_reclen;
3384 			uiop->uio_iov->iov_base =
3385 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3386 			uiop->uio_iov->iov_len -= dp->d_reclen;
3387 		}
3388 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3389 	} else {
3390 		reqsize = 5 * NFSX_UNSIGNED;
3391 	}
3392 
3393 	/*
3394 	 * Loop around doing readdir rpc's of size readsize.
3395 	 * The stopping criteria is EOF or buffer full.
3396 	 */
3397 	while (more_dirs && bigenough) {
3398 		*attrflagp = 0;
3399 		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp, cred);
3400 		if (nd->nd_flag & ND_NFSV2) {
3401 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3402 			*tl++ = cookie.lval[1];
3403 			*tl = txdr_unsigned(readsize);
3404 		} else {
3405 			NFSM_BUILD(tl, u_int32_t *, reqsize);
3406 			*tl++ = cookie.lval[0];
3407 			*tl++ = cookie.lval[1];
3408 			if (cookie.qval == 0) {
3409 				*tl++ = 0;
3410 				*tl++ = 0;
3411 			} else {
3412 				NFSLOCKNODE(dnp);
3413 				*tl++ = dnp->n_cookieverf.nfsuquad[0];
3414 				*tl++ = dnp->n_cookieverf.nfsuquad[1];
3415 				NFSUNLOCKNODE(dnp);
3416 			}
3417 			if (nd->nd_flag & ND_NFSV4) {
3418 				*tl++ = txdr_unsigned(readsize);
3419 				*tl = txdr_unsigned(readsize);
3420 				(void) nfsrv_putattrbit(nd, &attrbits);
3421 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3422 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
3423 				(void) nfsrv_putattrbit(nd, &dattrbits);
3424 			} else {
3425 				*tl = txdr_unsigned(readsize);
3426 			}
3427 		}
3428 		error = nfscl_request(nd, vp, p, cred);
3429 		if (error)
3430 			return (error);
3431 		if (!(nd->nd_flag & ND_NFSV2)) {
3432 			if (nd->nd_flag & ND_NFSV3)
3433 				error = nfscl_postop_attr(nd, nap, attrflagp);
3434 			if (!nd->nd_repstat && !error) {
3435 				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3436 				NFSLOCKNODE(dnp);
3437 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
3438 				dnp->n_cookieverf.nfsuquad[1] = *tl;
3439 				NFSUNLOCKNODE(dnp);
3440 			}
3441 		}
3442 		if (nd->nd_repstat || error) {
3443 			if (!error)
3444 				error = nd->nd_repstat;
3445 			goto nfsmout;
3446 		}
3447 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3448 		more_dirs = fxdr_unsigned(int, *tl);
3449 		if (!more_dirs)
3450 			tryformoredirs = 0;
3451 
3452 		/* loop through the dir entries, doctoring them to 4bsd form */
3453 		while (more_dirs && bigenough) {
3454 			if (nd->nd_flag & ND_NFSV4) {
3455 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3456 				ncookie.lval[0] = *tl++;
3457 				ncookie.lval[1] = *tl++;
3458 				len = fxdr_unsigned(int, *tl);
3459 			} else if (nd->nd_flag & ND_NFSV3) {
3460 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3461 				nfsva.na_fileid = fxdr_hyper(tl);
3462 				tl += 2;
3463 				len = fxdr_unsigned(int, *tl);
3464 			} else {
3465 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3466 				nfsva.na_fileid = fxdr_unsigned(uint64_t,
3467 				    *tl++);
3468 				len = fxdr_unsigned(int, *tl);
3469 			}
3470 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3471 				error = EBADRPC;
3472 				goto nfsmout;
3473 			}
3474 			tlen = roundup2(len, 8);
3475 			if (tlen == len)
3476 				tlen += 8;  /* To ensure null termination. */
3477 			left = DIRBLKSIZ - blksiz;
3478 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3479 				NFSBZERO(uiop->uio_iov->iov_base, left);
3480 				dp->d_reclen += left;
3481 				uiop->uio_iov->iov_base =
3482 				    (char *)uiop->uio_iov->iov_base + left;
3483 				uiop->uio_iov->iov_len -= left;
3484 				uiop->uio_resid -= left;
3485 				uiop->uio_offset += left;
3486 				blksiz = 0;
3487 			}
3488 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3489 			    uiop->uio_resid)
3490 				bigenough = 0;
3491 			if (bigenough) {
3492 				dp = (struct dirent *)uiop->uio_iov->iov_base;
3493 				dp->d_pad0 = dp->d_pad1 = 0;
3494 				dp->d_off = 0;
3495 				dp->d_namlen = len;
3496 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3497 				    NFSX_HYPER;
3498 				dp->d_type = DT_UNKNOWN;
3499 				blksiz += dp->d_reclen;
3500 				if (blksiz == DIRBLKSIZ)
3501 					blksiz = 0;
3502 				uiop->uio_resid -= DIRHDSIZ;
3503 				uiop->uio_offset += DIRHDSIZ;
3504 				uiop->uio_iov->iov_base =
3505 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3506 				uiop->uio_iov->iov_len -= DIRHDSIZ;
3507 				error = nfsm_mbufuio(nd, uiop, len);
3508 				if (error)
3509 					goto nfsmout;
3510 				cp = uiop->uio_iov->iov_base;
3511 				tlen -= len;
3512 				NFSBZERO(cp, tlen);
3513 				cp += tlen;	/* points to cookie storage */
3514 				tl2 = (u_int32_t *)cp;
3515 				uiop->uio_iov->iov_base =
3516 				    (char *)uiop->uio_iov->iov_base + tlen +
3517 				    NFSX_HYPER;
3518 				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3519 				uiop->uio_resid -= tlen + NFSX_HYPER;
3520 				uiop->uio_offset += (tlen + NFSX_HYPER);
3521 			} else {
3522 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3523 				if (error)
3524 					goto nfsmout;
3525 			}
3526 			if (nd->nd_flag & ND_NFSV4) {
3527 				rderr = 0;
3528 				nfsva.na_mntonfileno = UINT64_MAX;
3529 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3530 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3531 				    NULL, NULL, &rderr, p, cred);
3532 				if (error)
3533 					goto nfsmout;
3534 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3535 			} else if (nd->nd_flag & ND_NFSV3) {
3536 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3537 				ncookie.lval[0] = *tl++;
3538 				ncookie.lval[1] = *tl++;
3539 			} else {
3540 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3541 				ncookie.lval[0] = 0;
3542 				ncookie.lval[1] = *tl++;
3543 			}
3544 			if (bigenough) {
3545 			    if (nd->nd_flag & ND_NFSV4) {
3546 				if (rderr) {
3547 				    dp->d_fileno = 0;
3548 				} else {
3549 				    if (gotmnton) {
3550 					if (nfsva.na_mntonfileno != UINT64_MAX)
3551 					    dp->d_fileno = nfsva.na_mntonfileno;
3552 					else
3553 					    dp->d_fileno = nfsva.na_fileid;
3554 				    } else if (nfsva.na_filesid[0] ==
3555 					dnp->n_vattr.na_filesid[0] &&
3556 					nfsva.na_filesid[1] ==
3557 					dnp->n_vattr.na_filesid[1]) {
3558 					dp->d_fileno = nfsva.na_fileid;
3559 				    } else {
3560 					do {
3561 					    fakefileno--;
3562 					} while (fakefileno ==
3563 					    nfsva.na_fileid);
3564 					dp->d_fileno = fakefileno;
3565 				    }
3566 				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3567 				}
3568 			    } else {
3569 				dp->d_fileno = nfsva.na_fileid;
3570 			    }
3571 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3572 				ncookie.lval[0];
3573 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3574 				ncookie.lval[1];
3575 			}
3576 			more_dirs = fxdr_unsigned(int, *tl);
3577 		}
3578 		/*
3579 		 * If at end of rpc data, get the eof boolean
3580 		 */
3581 		if (!more_dirs) {
3582 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3583 			eof = fxdr_unsigned(int, *tl);
3584 			if (tryformoredirs)
3585 				more_dirs = !eof;
3586 			if (nd->nd_flag & ND_NFSV4) {
3587 				error = nfscl_postop_attr(nd, nap, attrflagp);
3588 				if (error)
3589 					goto nfsmout;
3590 			}
3591 		}
3592 		m_freem(nd->nd_mrep);
3593 		nd->nd_mrep = NULL;
3594 	}
3595 	/*
3596 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3597 	 * by increasing d_reclen for the last record.
3598 	 */
3599 	if (blksiz > 0) {
3600 		left = DIRBLKSIZ - blksiz;
3601 		NFSBZERO(uiop->uio_iov->iov_base, left);
3602 		dp->d_reclen += left;
3603 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3604 		    left;
3605 		uiop->uio_iov->iov_len -= left;
3606 		uiop->uio_resid -= left;
3607 		uiop->uio_offset += left;
3608 	}
3609 
3610 	/*
3611 	 * If returning no data, assume end of file.
3612 	 * If not bigenough, return not end of file, since you aren't
3613 	 *    returning all the data
3614 	 * Otherwise, return the eof flag from the server.
3615 	 */
3616 	if (eofp) {
3617 		if (tresid == ((size_t)(uiop->uio_resid)))
3618 			*eofp = 1;
3619 		else if (!bigenough)
3620 			*eofp = 0;
3621 		else
3622 			*eofp = eof;
3623 	}
3624 
3625 	/*
3626 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3627 	 */
3628 	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3629 		dp = (struct dirent *)uiop->uio_iov->iov_base;
3630 		NFSBZERO(dp, DIRBLKSIZ);
3631 		dp->d_type = DT_UNKNOWN;
3632 		tl = (u_int32_t *)&dp->d_name[4];
3633 		*tl++ = cookie.lval[0];
3634 		*tl = cookie.lval[1];
3635 		dp->d_reclen = DIRBLKSIZ;
3636 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3637 		    DIRBLKSIZ;
3638 		uiop->uio_iov->iov_len -= DIRBLKSIZ;
3639 		uiop->uio_resid -= DIRBLKSIZ;
3640 		uiop->uio_offset += DIRBLKSIZ;
3641 	}
3642 
3643 nfsmout:
3644 	if (nd->nd_mrep != NULL)
3645 		m_freem(nd->nd_mrep);
3646 	return (error);
3647 }
3648 
3649 /*
3650  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3651  * (Also used for NFS V4 when mount flag set.)
3652  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3653  */
3654 int
3655 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3656     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3657     int *eofp)
3658 {
3659 	int len, left;
3660 	struct dirent *dp = NULL;
3661 	u_int32_t *tl;
3662 	vnode_t newvp = NULLVP;
3663 	struct nfsrv_descript nfsd, *nd = &nfsd;
3664 	struct nameidata nami, *ndp = &nami;
3665 	struct componentname *cnp = &ndp->ni_cnd;
3666 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3667 	struct nfsnode *dnp = VTONFS(vp), *np;
3668 	struct nfsvattr nfsva;
3669 	struct nfsfh *nfhp;
3670 	nfsquad_t cookie, ncookie;
3671 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3672 	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3673 	int isdotdot = 0, unlocknewvp = 0;
3674 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3675 	u_int64_t fileno = 0;
3676 	char *cp;
3677 	nfsattrbit_t attrbits, dattrbits;
3678 	size_t tresid;
3679 	u_int32_t *tl2 = NULL, rderr;
3680 	struct timespec dctime, ts;
3681 	bool attr_ok;
3682 
3683 	KASSERT(uiop->uio_iovcnt == 1 &&
3684 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3685 	    ("nfs readdirplusrpc bad uio"));
3686 	ncookie.lval[0] = ncookie.lval[1] = 0;
3687 	timespecclear(&dctime);
3688 	*attrflagp = 0;
3689 	if (eofp != NULL)
3690 		*eofp = 0;
3691 	ndp->ni_dvp = vp;
3692 	nd->nd_mrep = NULL;
3693 	cookie.lval[0] = cookiep->nfsuquad[0];
3694 	cookie.lval[1] = cookiep->nfsuquad[1];
3695 	tresid = uiop->uio_resid;
3696 
3697 	/*
3698 	 * For NFSv4, first create the "." and ".." entries.
3699 	 */
3700 	if (NFSHASNFSV4(nmp)) {
3701 		NFSGETATTR_ATTRBIT(&dattrbits);
3702 		NFSZERO_ATTRBIT(&attrbits);
3703 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3704 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3705 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3706 			NFSSETBIT_ATTRBIT(&attrbits,
3707 			    NFSATTRBIT_MOUNTEDONFILEID);
3708 			gotmnton = 1;
3709 		} else {
3710 			/*
3711 			 * Must fake it. Use the fileno, except when the
3712 			 * fsid is != to that of the directory. For that
3713 			 * case, generate a fake fileno that is not the same.
3714 			 */
3715 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3716 			gotmnton = 0;
3717 		}
3718 
3719 		/*
3720 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3721 		 */
3722 		if (uiop->uio_offset == 0) {
3723 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3724 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3725 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3726 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3727 			(void) nfsrv_putattrbit(nd, &attrbits);
3728 			error = nfscl_request(nd, vp, p, cred);
3729 			if (error)
3730 			    return (error);
3731 			dotfileid = 0;	/* Fake out the compiler. */
3732 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3733 			    error = nfsm_loadattr(nd, &nfsva);
3734 			    if (error != 0)
3735 				goto nfsmout;
3736 			    dctime = nfsva.na_ctime;
3737 			    dotfileid = nfsva.na_fileid;
3738 			}
3739 			if (nd->nd_repstat == 0) {
3740 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3741 			    len = fxdr_unsigned(int, *(tl + 4));
3742 			    if (len > 0 && len <= NFSX_V4FHMAX)
3743 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3744 			    else
3745 				error = EPERM;
3746 			    if (!error) {
3747 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3748 				nfsva.na_mntonfileno = UINT64_MAX;
3749 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3750 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3751 				    NULL, NULL, NULL, p, cred);
3752 				if (error) {
3753 				    dotdotfileid = dotfileid;
3754 				} else if (gotmnton) {
3755 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3756 					dotdotfileid = nfsva.na_mntonfileno;
3757 				    else
3758 					dotdotfileid = nfsva.na_fileid;
3759 				} else if (nfsva.na_filesid[0] ==
3760 				    dnp->n_vattr.na_filesid[0] &&
3761 				    nfsva.na_filesid[1] ==
3762 				    dnp->n_vattr.na_filesid[1]) {
3763 				    dotdotfileid = nfsva.na_fileid;
3764 				} else {
3765 				    do {
3766 					fakefileno--;
3767 				    } while (fakefileno ==
3768 					nfsva.na_fileid);
3769 				    dotdotfileid = fakefileno;
3770 				}
3771 			    }
3772 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3773 			    /*
3774 			     * Lookupp returns NFSERR_NOENT when we are
3775 			     * at the root, so just use the current dir.
3776 			     */
3777 			    nd->nd_repstat = 0;
3778 			    dotdotfileid = dotfileid;
3779 			} else {
3780 			    error = nd->nd_repstat;
3781 			}
3782 			m_freem(nd->nd_mrep);
3783 			if (error)
3784 			    return (error);
3785 			nd->nd_mrep = NULL;
3786 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3787 			dp->d_pad0 = dp->d_pad1 = 0;
3788 			dp->d_off = 0;
3789 			dp->d_type = DT_DIR;
3790 			dp->d_fileno = dotfileid;
3791 			dp->d_namlen = 1;
3792 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3793 			dp->d_name[0] = '.';
3794 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3795 			/*
3796 			 * Just make these offset cookie 0.
3797 			 */
3798 			tl = (u_int32_t *)&dp->d_name[8];
3799 			*tl++ = 0;
3800 			*tl = 0;
3801 			blksiz += dp->d_reclen;
3802 			uiop->uio_resid -= dp->d_reclen;
3803 			uiop->uio_offset += dp->d_reclen;
3804 			uiop->uio_iov->iov_base =
3805 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3806 			uiop->uio_iov->iov_len -= dp->d_reclen;
3807 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3808 			dp->d_pad0 = dp->d_pad1 = 0;
3809 			dp->d_off = 0;
3810 			dp->d_type = DT_DIR;
3811 			dp->d_fileno = dotdotfileid;
3812 			dp->d_namlen = 2;
3813 			*((uint64_t *)dp->d_name) = 0;
3814 			dp->d_name[0] = '.';
3815 			dp->d_name[1] = '.';
3816 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3817 			/*
3818 			 * Just make these offset cookie 0.
3819 			 */
3820 			tl = (u_int32_t *)&dp->d_name[8];
3821 			*tl++ = 0;
3822 			*tl = 0;
3823 			blksiz += dp->d_reclen;
3824 			uiop->uio_resid -= dp->d_reclen;
3825 			uiop->uio_offset += dp->d_reclen;
3826 			uiop->uio_iov->iov_base =
3827 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3828 			uiop->uio_iov->iov_len -= dp->d_reclen;
3829 		}
3830 		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3831 		if (gotmnton)
3832 			NFSSETBIT_ATTRBIT(&attrbits,
3833 			    NFSATTRBIT_MOUNTEDONFILEID);
3834 		if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3835 		    NFSATTRBIT_TIMECREATE))
3836 			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
3837 	}
3838 
3839 	/*
3840 	 * Loop around doing readdir rpc's of size nm_readdirsize.
3841 	 * The stopping criteria is EOF or buffer full.
3842 	 */
3843 	while (more_dirs && bigenough) {
3844 		*attrflagp = 0;
3845 		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp, cred);
3846  		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3847 		*tl++ = cookie.lval[0];
3848 		*tl++ = cookie.lval[1];
3849 		if (cookie.qval == 0) {
3850 			*tl++ = 0;
3851 			*tl++ = 0;
3852 		} else {
3853 			NFSLOCKNODE(dnp);
3854 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3855 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3856 			NFSUNLOCKNODE(dnp);
3857 		}
3858 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3859 		*tl = txdr_unsigned(nmp->nm_readdirsize);
3860 		if (nd->nd_flag & ND_NFSV4) {
3861 			(void) nfsrv_putattrbit(nd, &attrbits);
3862 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3863 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3864 			(void) nfsrv_putattrbit(nd, &dattrbits);
3865 		}
3866 		nanouptime(&ts);
3867 		error = nfscl_request(nd, vp, p, cred);
3868 		if (error)
3869 			return (error);
3870 		if (nd->nd_flag & ND_NFSV3)
3871 			error = nfscl_postop_attr(nd, nap, attrflagp);
3872 		if (nd->nd_repstat || error) {
3873 			if (!error)
3874 				error = nd->nd_repstat;
3875 			goto nfsmout;
3876 		}
3877 		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3878 			dctime = nap->na_ctime;
3879 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3880 		NFSLOCKNODE(dnp);
3881 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3882 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3883 		NFSUNLOCKNODE(dnp);
3884 		more_dirs = fxdr_unsigned(int, *tl);
3885 		if (!more_dirs)
3886 			tryformoredirs = 0;
3887 
3888 		/* loop through the dir entries, doctoring them to 4bsd form */
3889 		while (more_dirs && bigenough) {
3890 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3891 			if (nd->nd_flag & ND_NFSV4) {
3892 				ncookie.lval[0] = *tl++;
3893 				ncookie.lval[1] = *tl++;
3894 			} else {
3895 				fileno = fxdr_hyper(tl);
3896 				tl += 2;
3897 			}
3898 			len = fxdr_unsigned(int, *tl);
3899 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3900 				error = EBADRPC;
3901 				goto nfsmout;
3902 			}
3903 			tlen = roundup2(len, 8);
3904 			if (tlen == len)
3905 				tlen += 8;  /* To ensure null termination. */
3906 			left = DIRBLKSIZ - blksiz;
3907 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3908 				NFSBZERO(uiop->uio_iov->iov_base, left);
3909 				dp->d_reclen += left;
3910 				uiop->uio_iov->iov_base =
3911 				    (char *)uiop->uio_iov->iov_base + left;
3912 				uiop->uio_iov->iov_len -= left;
3913 				uiop->uio_resid -= left;
3914 				uiop->uio_offset += left;
3915 				blksiz = 0;
3916 			}
3917 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3918 			    uiop->uio_resid)
3919 				bigenough = 0;
3920 			if (bigenough) {
3921 				dp = (struct dirent *)uiop->uio_iov->iov_base;
3922 				dp->d_pad0 = dp->d_pad1 = 0;
3923 				dp->d_off = 0;
3924 				dp->d_namlen = len;
3925 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3926 				    NFSX_HYPER;
3927 				dp->d_type = DT_UNKNOWN;
3928 				blksiz += dp->d_reclen;
3929 				if (blksiz == DIRBLKSIZ)
3930 					blksiz = 0;
3931 				uiop->uio_resid -= DIRHDSIZ;
3932 				uiop->uio_offset += DIRHDSIZ;
3933 				uiop->uio_iov->iov_base =
3934 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3935 				uiop->uio_iov->iov_len -= DIRHDSIZ;
3936 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
3937 				cnp->cn_namelen = len;
3938 				NFSCNHASHZERO(cnp);
3939 				error = nfsm_mbufuio(nd, uiop, len);
3940 				if (error)
3941 					goto nfsmout;
3942 				cp = uiop->uio_iov->iov_base;
3943 				tlen -= len;
3944 				NFSBZERO(cp, tlen);
3945 				cp += tlen;	/* points to cookie storage */
3946 				tl2 = (u_int32_t *)cp;
3947 				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3948 				    cnp->cn_nameptr[1] == '.')
3949 					isdotdot = 1;
3950 				else
3951 					isdotdot = 0;
3952 				uiop->uio_iov->iov_base =
3953 				    (char *)uiop->uio_iov->iov_base + tlen +
3954 				    NFSX_HYPER;
3955 				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3956 				uiop->uio_resid -= tlen + NFSX_HYPER;
3957 				uiop->uio_offset += (tlen + NFSX_HYPER);
3958 			} else {
3959 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3960 				if (error)
3961 					goto nfsmout;
3962 			}
3963 			nfhp = NULL;
3964 			if (nd->nd_flag & ND_NFSV3) {
3965 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3966 				ncookie.lval[0] = *tl++;
3967 				ncookie.lval[1] = *tl++;
3968 				attrflag = fxdr_unsigned(int, *tl);
3969 				if (attrflag) {
3970 				  error = nfsm_loadattr(nd, &nfsva);
3971 				  if (error)
3972 					goto nfsmout;
3973 				}
3974 				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3975 				if (*tl) {
3976 					error = nfsm_getfh(nd, &nfhp);
3977 					if (error)
3978 					    goto nfsmout;
3979 				}
3980 				if (!attrflag && nfhp != NULL) {
3981 					free(nfhp, M_NFSFH);
3982 					nfhp = NULL;
3983 				}
3984 			} else {
3985 				rderr = 0;
3986 				nfsva.na_mntonfileno = 0xffffffff;
3987 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3988 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3989 				    NULL, NULL, &rderr, p, cred);
3990 				if (error)
3991 					goto nfsmout;
3992 			}
3993 
3994 			if (bigenough) {
3995 			    if (nd->nd_flag & ND_NFSV4) {
3996 				if (rderr) {
3997 				    dp->d_fileno = 0;
3998 				} else if (gotmnton) {
3999 				    if (nfsva.na_mntonfileno != 0xffffffff)
4000 					dp->d_fileno = nfsva.na_mntonfileno;
4001 				    else
4002 					dp->d_fileno = nfsva.na_fileid;
4003 				} else if (nfsva.na_filesid[0] ==
4004 				    dnp->n_vattr.na_filesid[0] &&
4005 				    nfsva.na_filesid[1] ==
4006 				    dnp->n_vattr.na_filesid[1]) {
4007 				    dp->d_fileno = nfsva.na_fileid;
4008 				} else {
4009 				    do {
4010 					fakefileno--;
4011 				    } while (fakefileno ==
4012 					nfsva.na_fileid);
4013 				    dp->d_fileno = fakefileno;
4014 				}
4015 			    } else {
4016 				dp->d_fileno = fileno;
4017 			    }
4018 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
4019 				ncookie.lval[0];
4020 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
4021 				ncookie.lval[1];
4022 
4023 			    if (nfhp != NULL) {
4024 				attr_ok = true;
4025 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
4026 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
4027 				    VREF(vp);
4028 				    newvp = vp;
4029 				    unlocknewvp = 0;
4030 				    free(nfhp, M_NFSFH);
4031 				    np = dnp;
4032 				} else if (isdotdot != 0) {
4033 				    /*
4034 				     * Skip doing a nfscl_nget() call for "..".
4035 				     * There's a race between acquiring the nfs
4036 				     * node here and lookups that look for the
4037 				     * directory being read (in the parent).
4038 				     * It would try to get a lock on ".." here,
4039 				     * owning the lock on the directory being
4040 				     * read. Lookup will hold the lock on ".."
4041 				     * and try to acquire the lock on the
4042 				     * directory being read.
4043 				     * If the directory is unlocked/relocked,
4044 				     * then there is a LOR with the buflock
4045 				     * vp is relocked.
4046 				     */
4047 				    free(nfhp, M_NFSFH);
4048 				} else {
4049 				    error = nfscl_nget(vp->v_mount, vp,
4050 				      nfhp, cnp, p, &np, LK_EXCLUSIVE);
4051 				    if (!error) {
4052 					newvp = NFSTOV(np);
4053 					unlocknewvp = 1;
4054 					/*
4055 					 * If n_localmodtime >= time before RPC,
4056 					 * then a file modification operation,
4057 					 * such as VOP_SETATTR() of size, has
4058 					 * occurred while the Lookup RPC and
4059 					 * acquisition of the vnode happened. As
4060 					 * such, the attributes might be stale,
4061 					 * with possibly an incorrect size.
4062 					 */
4063 					NFSLOCKNODE(np);
4064 					if (timespecisset(
4065 					    &np->n_localmodtime) &&
4066 					    timespeccmp(&np->n_localmodtime,
4067 					    &ts, >=)) {
4068 					    NFSCL_DEBUG(4, "nfsrpc_readdirplus:"
4069 						" localmod stale attributes\n");
4070 					    attr_ok = false;
4071 					}
4072 					NFSUNLOCKNODE(np);
4073 				    }
4074 				}
4075 				nfhp = NULL;
4076 				if (newvp != NULLVP) {
4077 				    if (attr_ok)
4078 					error = nfscl_loadattrcache(&newvp,
4079 					    &nfsva, NULL, 0, 0);
4080 				    if (error) {
4081 					if (unlocknewvp)
4082 					    vput(newvp);
4083 					else
4084 					    vrele(newvp);
4085 					goto nfsmout;
4086 				    }
4087 				    dp->d_type =
4088 					vtonfs_dtype(np->n_vattr.na_type);
4089 				    ndp->ni_vp = newvp;
4090 				    NFSCNHASH(cnp, HASHINIT);
4091 				    if (cnp->cn_namelen <= NCHNAMLEN &&
4092 					ndp->ni_dvp != ndp->ni_vp &&
4093 					(newvp->v_type != VDIR ||
4094 					 dctime.tv_sec != 0)) {
4095 					cache_enter_time_flags(ndp->ni_dvp,
4096 					    ndp->ni_vp, cnp,
4097 					    &nfsva.na_ctime,
4098 					    newvp->v_type != VDIR ? NULL :
4099 					    &dctime, VFS_CACHE_DROPOLD);
4100 				    }
4101 				    if (unlocknewvp)
4102 					vput(newvp);
4103 				    else
4104 					vrele(newvp);
4105 				    newvp = NULLVP;
4106 				}
4107 			    }
4108 			} else if (nfhp != NULL) {
4109 			    free(nfhp, M_NFSFH);
4110 			}
4111 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4112 			more_dirs = fxdr_unsigned(int, *tl);
4113 		}
4114 		/*
4115 		 * If at end of rpc data, get the eof boolean
4116 		 */
4117 		if (!more_dirs) {
4118 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4119 			eof = fxdr_unsigned(int, *tl);
4120 			if (tryformoredirs)
4121 				more_dirs = !eof;
4122 			if (nd->nd_flag & ND_NFSV4) {
4123 				error = nfscl_postop_attr(nd, nap, attrflagp);
4124 				if (error)
4125 					goto nfsmout;
4126 			}
4127 		}
4128 		m_freem(nd->nd_mrep);
4129 		nd->nd_mrep = NULL;
4130 	}
4131 	/*
4132 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
4133 	 * by increasing d_reclen for the last record.
4134 	 */
4135 	if (blksiz > 0) {
4136 		left = DIRBLKSIZ - blksiz;
4137 		NFSBZERO(uiop->uio_iov->iov_base, left);
4138 		dp->d_reclen += left;
4139 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4140 		    left;
4141 		uiop->uio_iov->iov_len -= left;
4142 		uiop->uio_resid -= left;
4143 		uiop->uio_offset += left;
4144 	}
4145 
4146 	/*
4147 	 * If returning no data, assume end of file.
4148 	 * If not bigenough, return not end of file, since you aren't
4149 	 *    returning all the data
4150 	 * Otherwise, return the eof flag from the server.
4151 	 */
4152 	if (eofp != NULL) {
4153 		if (tresid == uiop->uio_resid)
4154 			*eofp = 1;
4155 		else if (!bigenough)
4156 			*eofp = 0;
4157 		else
4158 			*eofp = eof;
4159 	}
4160 
4161 	/*
4162 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
4163 	 */
4164 	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
4165 		dp = (struct dirent *)uiop->uio_iov->iov_base;
4166 		NFSBZERO(dp, DIRBLKSIZ);
4167 		dp->d_type = DT_UNKNOWN;
4168 		tl = (u_int32_t *)&dp->d_name[4];
4169 		*tl++ = cookie.lval[0];
4170 		*tl = cookie.lval[1];
4171 		dp->d_reclen = DIRBLKSIZ;
4172 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4173 		    DIRBLKSIZ;
4174 		uiop->uio_iov->iov_len -= DIRBLKSIZ;
4175 		uiop->uio_resid -= DIRBLKSIZ;
4176 		uiop->uio_offset += DIRBLKSIZ;
4177 	}
4178 
4179 nfsmout:
4180 	if (nd->nd_mrep != NULL)
4181 		m_freem(nd->nd_mrep);
4182 	return (error);
4183 }
4184 
4185 /*
4186  * Nfs commit rpc
4187  */
4188 int
4189 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
4190     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4191 {
4192 	u_int32_t *tl;
4193 	struct nfsrv_descript nfsd, *nd = &nfsd;
4194 	nfsattrbit_t attrbits;
4195 	int error;
4196 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4197 
4198 	*attrflagp = 0;
4199 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp, cred);
4200 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
4201 	txdr_hyper(offset, tl);
4202 	tl += 2;
4203 	*tl = txdr_unsigned(cnt);
4204 	if (nd->nd_flag & ND_NFSV4) {
4205 		/*
4206 		 * And do a Getattr op.
4207 		 */
4208 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4209 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
4210 		NFSGETATTR_ATTRBIT(&attrbits);
4211 		(void) nfsrv_putattrbit(nd, &attrbits);
4212 	}
4213 	error = nfscl_request(nd, vp, p, cred);
4214 	if (error)
4215 		return (error);
4216 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, NULL);
4217 	if (!error && !nd->nd_repstat) {
4218 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
4219 		NFSLOCKMNT(nmp);
4220 		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
4221 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
4222 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
4223 		}
4224 		NFSUNLOCKMNT(nmp);
4225 		if (nd->nd_flag & ND_NFSV4)
4226 			error = nfscl_postop_attr(nd, nap, attrflagp);
4227 	}
4228 nfsmout:
4229 	if (!error && nd->nd_repstat)
4230 		error = nd->nd_repstat;
4231 	m_freem(nd->nd_mrep);
4232 	return (error);
4233 }
4234 
4235 /*
4236  * NFS byte range lock rpc.
4237  * (Mostly just calls one of the three lower level RPC routines.)
4238  */
4239 int
4240 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
4241     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4242 {
4243 	struct nfscllockowner *lp;
4244 	struct nfsclclient *clp;
4245 	struct nfsfh *nfhp;
4246 	struct nfsrv_descript nfsd, *nd = &nfsd;
4247 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4248 	u_int64_t off, len;
4249 	off_t start, end;
4250 	u_int32_t clidrev = 0;
4251 	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
4252 	int callcnt, dorpc;
4253 
4254 	/*
4255 	 * Convert the flock structure into a start and end and do POSIX
4256 	 * bounds checking.
4257 	 */
4258 	switch (fl->l_whence) {
4259 	case SEEK_SET:
4260 	case SEEK_CUR:
4261 		/*
4262 		 * Caller is responsible for adding any necessary offset
4263 		 * when SEEK_CUR is used.
4264 		 */
4265 		start = fl->l_start;
4266 		off = fl->l_start;
4267 		break;
4268 	case SEEK_END:
4269 		start = size + fl->l_start;
4270 		off = size + fl->l_start;
4271 		break;
4272 	default:
4273 		return (EINVAL);
4274 	}
4275 	if (start < 0)
4276 		return (EINVAL);
4277 	if (fl->l_len != 0) {
4278 		end = start + fl->l_len - 1;
4279 		if (end < start)
4280 			return (EINVAL);
4281 	}
4282 
4283 	len = fl->l_len;
4284 	if (len == 0)
4285 		len = NFS64BITSSET;
4286 	retrycnt = 0;
4287 	do {
4288 	    nd->nd_repstat = 0;
4289 	    if (op == F_GETLK) {
4290 		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4291 		if (error)
4292 			return (error);
4293 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
4294 		if (!error) {
4295 			clidrev = clp->nfsc_clientidrev;
4296 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
4297 			    p, id, flags);
4298 		} else if (error == -1) {
4299 			error = 0;
4300 		}
4301 		nfscl_clientrelease(clp);
4302 	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
4303 		/*
4304 		 * We must loop around for all lockowner cases.
4305 		 */
4306 		callcnt = 0;
4307 		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4308 		if (error)
4309 			return (error);
4310 		do {
4311 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
4312 			clp, id, flags, &lp, &dorpc);
4313 		    /*
4314 		     * If it returns a NULL lp, we're done.
4315 		     */
4316 		    if (lp == NULL) {
4317 			if (callcnt == 0)
4318 			    nfscl_clientrelease(clp);
4319 			else
4320 			    nfscl_releasealllocks(clp, vp, p, id, flags);
4321 			return (error);
4322 		    }
4323 		    if (nmp->nm_clp != NULL)
4324 			clidrev = nmp->nm_clp->nfsc_clientidrev;
4325 		    else
4326 			clidrev = 0;
4327 		    /*
4328 		     * If the server doesn't support Posix lock semantics,
4329 		     * only allow locks on the entire file, since it won't
4330 		     * handle overlapping byte ranges.
4331 		     * There might still be a problem when a lock
4332 		     * upgrade/downgrade (read<->write) occurs, since the
4333 		     * server "might" expect an unlock first?
4334 		     */
4335 		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4336 			(off == 0 && len == NFS64BITSSET))) {
4337 			/*
4338 			 * Since the lock records will go away, we must
4339 			 * wait for grace and delay here.
4340 			 */
4341 			do {
4342 			    error = nfsrpc_locku(nd, nmp, lp, off, len,
4343 				NFSV4LOCKT_READ, cred, p, 0);
4344 			    if ((nd->nd_repstat == NFSERR_GRACE ||
4345 				 nd->nd_repstat == NFSERR_DELAY) &&
4346 				error == 0)
4347 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4348 				    "nfs_advlock");
4349 			} while ((nd->nd_repstat == NFSERR_GRACE ||
4350 			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
4351 		    }
4352 		    callcnt++;
4353 		} while (error == 0 && nd->nd_repstat == 0);
4354 		nfscl_releasealllocks(clp, vp, p, id, flags);
4355 	    } else if (op == F_SETLK) {
4356 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4357 		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4358 		if (error || donelocally) {
4359 			return (error);
4360 		}
4361 		if (nmp->nm_clp != NULL)
4362 			clidrev = nmp->nm_clp->nfsc_clientidrev;
4363 		else
4364 			clidrev = 0;
4365 		nfhp = VTONFS(vp)->n_fhp;
4366 		if (!lp->nfsl_open->nfso_posixlock &&
4367 		    (off != 0 || len != NFS64BITSSET)) {
4368 			error = EINVAL;
4369 		} else {
4370 			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4371 			    nfhp->nfh_len, lp, newone, reclaim, off,
4372 			    len, fl->l_type, cred, p, 0);
4373 		}
4374 		if (!error)
4375 			error = nd->nd_repstat;
4376 		nfscl_lockrelease(lp, error, newone);
4377 	    } else {
4378 		error = EINVAL;
4379 	    }
4380 	    if (!error)
4381 	        error = nd->nd_repstat;
4382 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4383 		error == NFSERR_STALEDONTRECOVER ||
4384 		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4385 		error == NFSERR_BADSESSION) {
4386 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
4387 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4388 		&& clidrev != 0) {
4389 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4390 		retrycnt++;
4391 	    }
4392 	} while (error == NFSERR_GRACE ||
4393 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4394 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4395 	    error == NFSERR_BADSESSION ||
4396 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4397 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
4398 	if (error && retrycnt >= 4)
4399 		error = EIO;
4400 	return (error);
4401 }
4402 
4403 /*
4404  * The lower level routine for the LockT case.
4405  */
4406 int
4407 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4408     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4409     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4410 {
4411 	u_int32_t *tl;
4412 	int error, type, size;
4413 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4414 	struct nfsnode *np;
4415 	struct nfsmount *nmp;
4416 	struct nfsclsession *tsep;
4417 
4418 	nmp = VFSTONFS(vp->v_mount);
4419 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp, cred);
4420 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4421 	if (fl->l_type == F_RDLCK)
4422 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4423 	else
4424 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4425 	txdr_hyper(off, tl);
4426 	tl += 2;
4427 	txdr_hyper(len, tl);
4428 	tl += 2;
4429 	tsep = nfsmnt_mdssession(nmp);
4430 	*tl++ = tsep->nfsess_clientid.lval[0];
4431 	*tl = tsep->nfsess_clientid.lval[1];
4432 	nfscl_filllockowner(id, own, flags);
4433 	np = VTONFS(vp);
4434 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4435 	    np->n_fhp->nfh_len);
4436 	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4437 	error = nfscl_request(nd, vp, p, cred);
4438 	if (error)
4439 		return (error);
4440 	if (nd->nd_repstat == 0) {
4441 		fl->l_type = F_UNLCK;
4442 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4443 		nd->nd_repstat = 0;
4444 		fl->l_whence = SEEK_SET;
4445 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4446 		fl->l_start = fxdr_hyper(tl);
4447 		tl += 2;
4448 		len = fxdr_hyper(tl);
4449 		tl += 2;
4450 		if (len == NFS64BITSSET)
4451 			fl->l_len = 0;
4452 		else
4453 			fl->l_len = len;
4454 		type = fxdr_unsigned(int, *tl++);
4455 		if (type == NFSV4LOCKT_WRITE)
4456 			fl->l_type = F_WRLCK;
4457 		else
4458 			fl->l_type = F_RDLCK;
4459 		/*
4460 		 * XXX For now, I have no idea what to do with the
4461 		 * conflicting lock_owner, so I'll just set the pid == 0
4462 		 * and skip over the lock_owner.
4463 		 */
4464 		fl->l_pid = (pid_t)0;
4465 		tl += 2;
4466 		size = fxdr_unsigned(int, *tl);
4467 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4468 			error = EBADRPC;
4469 		if (!error)
4470 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4471 	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4472 		nfscl_initiate_recovery(clp);
4473 nfsmout:
4474 	m_freem(nd->nd_mrep);
4475 	return (error);
4476 }
4477 
4478 /*
4479  * Lower level function that performs the LockU RPC.
4480  */
4481 static int
4482 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4483     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4484     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4485 {
4486 	u_int32_t *tl;
4487 	int error;
4488 
4489 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4490 	    lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0, cred);
4491 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4492 	*tl++ = txdr_unsigned(type);
4493 	*tl = txdr_unsigned(lp->nfsl_seqid);
4494 	if (nfstest_outofseq &&
4495 	    (arc4random() % nfstest_outofseq) == 0)
4496 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
4497 	tl++;
4498 	if (NFSHASNFSV4N(nmp))
4499 		*tl++ = 0;
4500 	else
4501 		*tl++ = lp->nfsl_stateid.seqid;
4502 	*tl++ = lp->nfsl_stateid.other[0];
4503 	*tl++ = lp->nfsl_stateid.other[1];
4504 	*tl++ = lp->nfsl_stateid.other[2];
4505 	txdr_hyper(off, tl);
4506 	tl += 2;
4507 	txdr_hyper(len, tl);
4508 	if (syscred)
4509 		nd->nd_flag |= ND_USEGSSNAME;
4510 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4511 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4512 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4513 	if (error)
4514 		return (error);
4515 	if (nd->nd_repstat == 0) {
4516 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4517 		lp->nfsl_stateid.seqid = *tl++;
4518 		lp->nfsl_stateid.other[0] = *tl++;
4519 		lp->nfsl_stateid.other[1] = *tl++;
4520 		lp->nfsl_stateid.other[2] = *tl;
4521 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4522 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4523 nfsmout:
4524 	m_freem(nd->nd_mrep);
4525 	return (error);
4526 }
4527 
4528 /*
4529  * The actual Lock RPC.
4530  */
4531 int
4532 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4533     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4534     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4535     NFSPROC_T *p, int syscred)
4536 {
4537 	u_int32_t *tl;
4538 	int error, size;
4539 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4540 	struct nfsclsession *tsep;
4541 
4542 	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
4543 	    cred);
4544 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4545 	if (type == F_RDLCK)
4546 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4547 	else
4548 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4549 	*tl++ = txdr_unsigned(reclaim);
4550 	txdr_hyper(off, tl);
4551 	tl += 2;
4552 	txdr_hyper(len, tl);
4553 	tl += 2;
4554 	if (newone) {
4555 	    *tl = newnfs_true;
4556 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4557 		2 * NFSX_UNSIGNED + NFSX_HYPER);
4558 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4559 	    if (NFSHASNFSV4N(nmp))
4560 		*tl++ = 0;
4561 	    else
4562 		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
4563 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4564 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4565 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4566 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
4567 	    tsep = nfsmnt_mdssession(nmp);
4568 	    *tl++ = tsep->nfsess_clientid.lval[0];
4569 	    *tl = tsep->nfsess_clientid.lval[1];
4570 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4571 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4572 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4573 	} else {
4574 	    *tl = newnfs_false;
4575 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4576 	    if (NFSHASNFSV4N(nmp))
4577 		*tl++ = 0;
4578 	    else
4579 		*tl++ = lp->nfsl_stateid.seqid;
4580 	    *tl++ = lp->nfsl_stateid.other[0];
4581 	    *tl++ = lp->nfsl_stateid.other[1];
4582 	    *tl++ = lp->nfsl_stateid.other[2];
4583 	    *tl = txdr_unsigned(lp->nfsl_seqid);
4584 	    if (nfstest_outofseq &&
4585 		(arc4random() % nfstest_outofseq) == 0)
4586 		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4587 	}
4588 	if (syscred)
4589 		nd->nd_flag |= ND_USEGSSNAME;
4590 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4591 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4592 	if (error)
4593 		return (error);
4594 	if (newone)
4595 	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4596 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4597 	if (nd->nd_repstat == 0) {
4598 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4599 		lp->nfsl_stateid.seqid = *tl++;
4600 		lp->nfsl_stateid.other[0] = *tl++;
4601 		lp->nfsl_stateid.other[1] = *tl++;
4602 		lp->nfsl_stateid.other[2] = *tl;
4603 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4604 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4605 		size = fxdr_unsigned(int, *(tl + 7));
4606 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4607 			error = EBADRPC;
4608 		if (!error)
4609 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4610 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4611 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4612 nfsmout:
4613 	m_freem(nd->nd_mrep);
4614 	return (error);
4615 }
4616 
4617 /*
4618  * nfs statfs rpc
4619  * (always called with the vp for the mount point)
4620  */
4621 int
4622 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4623     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4624 {
4625 	u_int32_t *tl = NULL;
4626 	struct nfsrv_descript nfsd, *nd = &nfsd;
4627 	struct nfsmount *nmp;
4628 	nfsattrbit_t attrbits;
4629 	int error;
4630 
4631 	*attrflagp = 0;
4632 	nmp = VFSTONFS(vp->v_mount);
4633 	if (NFSHASNFSV4(nmp)) {
4634 		/*
4635 		 * For V4, you actually do a getattr.
4636 		 */
4637 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4638 		NFSSTATFS_GETATTRBIT(&attrbits);
4639 		(void) nfsrv_putattrbit(nd, &attrbits);
4640 		nd->nd_flag |= ND_USEGSSNAME;
4641 		error = nfscl_request(nd, vp, p, cred);
4642 		if (error)
4643 			return (error);
4644 		if (nd->nd_repstat == 0) {
4645 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4646 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4647 			    cred);
4648 			if (!error) {
4649 				nmp->nm_fsid[0] = nap->na_filesid[0];
4650 				nmp->nm_fsid[1] = nap->na_filesid[1];
4651 				NFSSETHASSETFSID(nmp);
4652 				*attrflagp = 1;
4653 			}
4654 		} else {
4655 			error = nd->nd_repstat;
4656 		}
4657 		if (error)
4658 			goto nfsmout;
4659 	} else {
4660 		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp, NULL);
4661 		error = nfscl_request(nd, vp, p, cred);
4662 		if (error)
4663 			return (error);
4664 		if (nd->nd_flag & ND_NFSV3) {
4665 			error = nfscl_postop_attr(nd, nap, attrflagp);
4666 			if (error)
4667 				goto nfsmout;
4668 		}
4669 		if (nd->nd_repstat) {
4670 			error = nd->nd_repstat;
4671 			goto nfsmout;
4672 		}
4673 		NFSM_DISSECT(tl, u_int32_t *,
4674 		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4675 	}
4676 	if (NFSHASNFSV3(nmp)) {
4677 		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4678 		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4679 		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4680 		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4681 		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4682 		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4683 		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4684 	} else if (NFSHASNFSV4(nmp) == 0) {
4685 		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4686 		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4687 		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4688 		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4689 		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4690 	}
4691 nfsmout:
4692 	m_freem(nd->nd_mrep);
4693 	return (error);
4694 }
4695 
4696 /*
4697  * nfs pathconf rpc
4698  */
4699 int
4700 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4701     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4702 {
4703 	struct nfsrv_descript nfsd, *nd = &nfsd;
4704 	struct nfsmount *nmp;
4705 	u_int32_t *tl;
4706 	nfsattrbit_t attrbits;
4707 	int error;
4708 
4709 	*attrflagp = 0;
4710 	nmp = VFSTONFS(vp->v_mount);
4711 	if (NFSHASNFSV4(nmp)) {
4712 		/*
4713 		 * For V4, you actually do a getattr.
4714 		 */
4715 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4716 		NFSPATHCONF_GETATTRBIT(&attrbits);
4717 		(void) nfsrv_putattrbit(nd, &attrbits);
4718 		nd->nd_flag |= ND_USEGSSNAME;
4719 		error = nfscl_request(nd, vp, p, cred);
4720 		if (error)
4721 			return (error);
4722 		if (nd->nd_repstat == 0) {
4723 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4724 			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4725 			    cred);
4726 			if (!error)
4727 				*attrflagp = 1;
4728 		} else {
4729 			error = nd->nd_repstat;
4730 		}
4731 	} else {
4732 		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp, NULL);
4733 		error = nfscl_request(nd, vp, p, cred);
4734 		if (error)
4735 			return (error);
4736 		error = nfscl_postop_attr(nd, nap, attrflagp);
4737 		if (nd->nd_repstat && !error)
4738 			error = nd->nd_repstat;
4739 		if (!error) {
4740 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4741 			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4742 			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4743 			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4744 			pc->pc_chownrestricted =
4745 			    fxdr_unsigned(u_int32_t, *tl++);
4746 			pc->pc_caseinsensitive =
4747 			    fxdr_unsigned(u_int32_t, *tl++);
4748 			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4749 		}
4750 	}
4751 nfsmout:
4752 	m_freem(nd->nd_mrep);
4753 	return (error);
4754 }
4755 
4756 /*
4757  * nfs version 3 fsinfo rpc call
4758  */
4759 int
4760 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4761     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4762 {
4763 	u_int32_t *tl;
4764 	struct nfsrv_descript nfsd, *nd = &nfsd;
4765 	int error;
4766 
4767 	*attrflagp = 0;
4768 	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp, cred);
4769 	error = nfscl_request(nd, vp, p, cred);
4770 	if (error)
4771 		return (error);
4772 	error = nfscl_postop_attr(nd, nap, attrflagp);
4773 	if (nd->nd_repstat && !error)
4774 		error = nd->nd_repstat;
4775 	if (!error) {
4776 		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4777 		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4778 		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4779 		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4780 		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4781 		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4782 		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4783 		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4784 		fsp->fs_maxfilesize = fxdr_hyper(tl);
4785 		tl += 2;
4786 		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4787 		tl += 2;
4788 		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4789 	}
4790 nfsmout:
4791 	m_freem(nd->nd_mrep);
4792 	return (error);
4793 }
4794 
4795 /*
4796  * This function performs the Renew RPC.
4797  */
4798 int
4799 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4800     NFSPROC_T *p)
4801 {
4802 	u_int32_t *tl;
4803 	struct nfsrv_descript nfsd;
4804 	struct nfsrv_descript *nd = &nfsd;
4805 	struct nfsmount *nmp;
4806 	int error;
4807 	struct nfssockreq *nrp;
4808 	struct nfsclsession *tsep;
4809 
4810 	nmp = clp->nfsc_nmp;
4811 	if (nmp == NULL)
4812 		return (0);
4813 	if (dsp == NULL)
4814 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4815 		    0, cred);
4816 	else
4817 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4818 		    &dsp->nfsclds_sess, 0, 0, NULL);
4819 	if (!NFSHASNFSV4N(nmp)) {
4820 		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4821 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4822 		tsep = nfsmnt_mdssession(nmp);
4823 		*tl++ = tsep->nfsess_clientid.lval[0];
4824 		*tl = tsep->nfsess_clientid.lval[1];
4825 	}
4826 	nrp = NULL;
4827 	if (dsp != NULL)
4828 		nrp = dsp->nfsclds_sockp;
4829 	if (nrp == NULL)
4830 		/* If NULL, use the MDS socket. */
4831 		nrp = &nmp->nm_sockreq;
4832 	nd->nd_flag |= ND_USEGSSNAME;
4833 	if (dsp == NULL)
4834 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4835 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4836 	else {
4837 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4838 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4839 		if (error == ENXIO)
4840 			nfscl_cancelreqs(dsp);
4841 	}
4842 	if (error)
4843 		return (error);
4844 	error = nd->nd_repstat;
4845 	m_freem(nd->nd_mrep);
4846 	return (error);
4847 }
4848 
4849 /*
4850  * This function performs the Releaselockowner RPC.
4851  */
4852 int
4853 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4854     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4855 {
4856 	struct nfsrv_descript nfsd, *nd = &nfsd;
4857 	u_int32_t *tl;
4858 	int error;
4859 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4860 	struct nfsclsession *tsep;
4861 
4862 	if (NFSHASNFSV4N(nmp)) {
4863 		/* For NFSv4.1, do a FreeStateID. */
4864 		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4865 		    NULL, 0, 0, cred);
4866 		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4867 	} else {
4868 		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4869 		    NULL, 0, 0, NULL);
4870 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4871 		tsep = nfsmnt_mdssession(nmp);
4872 		*tl++ = tsep->nfsess_clientid.lval[0];
4873 		*tl = tsep->nfsess_clientid.lval[1];
4874 		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4875 		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4876 		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4877 	}
4878 	nd->nd_flag |= ND_USEGSSNAME;
4879 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4880 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4881 	if (error)
4882 		return (error);
4883 	error = nd->nd_repstat;
4884 	m_freem(nd->nd_mrep);
4885 	return (error);
4886 }
4887 
4888 /*
4889  * This function performs the Compound to get the mount pt FH.
4890  */
4891 int
4892 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4893     NFSPROC_T *p)
4894 {
4895 	u_int32_t *tl;
4896 	struct nfsrv_descript nfsd;
4897 	struct nfsrv_descript *nd = &nfsd;
4898 	u_char *cp, *cp2;
4899 	int error, cnt, len, setnil;
4900 	u_int32_t *opcntp;
4901 
4902 	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4903 	    0, NULL);
4904 	cp = dirpath;
4905 	cnt = 0;
4906 	do {
4907 		setnil = 0;
4908 		while (*cp == '/')
4909 			cp++;
4910 		cp2 = cp;
4911 		while (*cp2 != '\0' && *cp2 != '/')
4912 			cp2++;
4913 		if (*cp2 == '/') {
4914 			setnil = 1;
4915 			*cp2 = '\0';
4916 		}
4917 		if (cp2 != cp) {
4918 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4919 			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
4920 			nfsm_strtom(nd, cp, strlen(cp));
4921 			cnt++;
4922 		}
4923 		if (setnil)
4924 			*cp2++ = '/';
4925 		cp = cp2;
4926 	} while (*cp != '\0');
4927 	if (NFSHASNFSV4N(nmp))
4928 		/* Has a Sequence Op done by nfscl_reqstart(). */
4929 		*opcntp = txdr_unsigned(3 + cnt);
4930 	else
4931 		*opcntp = txdr_unsigned(2 + cnt);
4932 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4933 	*tl = txdr_unsigned(NFSV4OP_GETFH);
4934 	nd->nd_flag |= ND_USEGSSNAME;
4935 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4936 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4937 	if (error)
4938 		return (error);
4939 	if (nd->nd_repstat == 0) {
4940 		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4941 		tl += (2 + 2 * cnt);
4942 		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4943 			len > NFSX_FHMAX) {
4944 			nd->nd_repstat = NFSERR_BADXDR;
4945 		} else {
4946 			nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4947 			if (nd->nd_repstat == 0)
4948 				nmp->nm_fhsize = len;
4949 		}
4950 	}
4951 	error = nd->nd_repstat;
4952 nfsmout:
4953 	m_freem(nd->nd_mrep);
4954 	return (error);
4955 }
4956 
4957 /*
4958  * This function performs the Delegreturn RPC.
4959  */
4960 int
4961 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4962     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4963 {
4964 	u_int32_t *tl;
4965 	struct nfsrv_descript nfsd;
4966 	struct nfsrv_descript *nd = &nfsd;
4967 	int error;
4968 
4969 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4970 	    dp->nfsdl_fhlen, NULL, NULL, 0, 0, cred);
4971 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4972 	if (NFSHASNFSV4N(nmp))
4973 		*tl++ = 0;
4974 	else
4975 		*tl++ = dp->nfsdl_stateid.seqid;
4976 	*tl++ = dp->nfsdl_stateid.other[0];
4977 	*tl++ = dp->nfsdl_stateid.other[1];
4978 	*tl = dp->nfsdl_stateid.other[2];
4979 	if (syscred)
4980 		nd->nd_flag |= ND_USEGSSNAME;
4981 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4982 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4983 	if (error)
4984 		return (error);
4985 	error = nd->nd_repstat;
4986 	m_freem(nd->nd_mrep);
4987 	return (error);
4988 }
4989 
4990 /*
4991  * nfs getacl call.
4992  */
4993 int
4994 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
4995 {
4996 	struct nfsrv_descript nfsd, *nd = &nfsd;
4997 	int error;
4998 	nfsattrbit_t attrbits;
4999 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5000 
5001 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5002 		return (EOPNOTSUPP);
5003 	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp, cred);
5004 	NFSZERO_ATTRBIT(&attrbits);
5005 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5006 	(void) nfsrv_putattrbit(nd, &attrbits);
5007 	error = nfscl_request(nd, vp, p, cred);
5008 	if (error)
5009 		return (error);
5010 	if (!nd->nd_repstat)
5011 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
5012 		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
5013 	else
5014 		error = nd->nd_repstat;
5015 	m_freem(nd->nd_mrep);
5016 	return (error);
5017 }
5018 
5019 /*
5020  * nfs setacl call.
5021  */
5022 int
5023 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5024 {
5025 	int error;
5026 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5027 
5028 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5029 		return (EOPNOTSUPP);
5030 	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL);
5031 	return (error);
5032 }
5033 
5034 /*
5035  * nfs setacl call.
5036  */
5037 static int
5038 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
5039     struct acl *aclp, nfsv4stateid_t *stateidp)
5040 {
5041 	struct nfsrv_descript nfsd, *nd = &nfsd;
5042 	int error;
5043 	nfsattrbit_t attrbits;
5044 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5045 
5046 	if (!NFSHASNFSV4(nmp))
5047 		return (EOPNOTSUPP);
5048 	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp, cred);
5049 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
5050 	NFSZERO_ATTRBIT(&attrbits);
5051 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5052 	(void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
5053 	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
5054 	error = nfscl_request(nd, vp, p, cred);
5055 	if (error)
5056 		return (error);
5057 	/* Don't care about the pre/postop attributes */
5058 	m_freem(nd->nd_mrep);
5059 	return (nd->nd_repstat);
5060 }
5061 
5062 /*
5063  * Do the NFSv4.1 Exchange ID.
5064  */
5065 int
5066 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
5067     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
5068     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
5069 {
5070 	uint32_t *tl, v41flags;
5071 	struct nfsrv_descript nfsd;
5072 	struct nfsrv_descript *nd = &nfsd;
5073 	struct nfsclds *dsp;
5074 	struct timespec verstime;
5075 	int error, len;
5076 
5077 	*dspp = NULL;
5078 	if (minorvers == 0)
5079 		minorvers = nmp->nm_minorvers;
5080 	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
5081 	    NFS_VER4, minorvers, NULL);
5082 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5083 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
5084 	*tl = txdr_unsigned(clp->nfsc_rev);
5085 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
5086 
5087 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
5088 	*tl++ = txdr_unsigned(exchflags);
5089 	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
5090 
5091 	/* Set the implementation id4 */
5092 	*tl = txdr_unsigned(1);
5093 	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
5094 	(void) nfsm_strtom(nd, version, strlen(version));
5095 	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
5096 	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
5097 	verstime.tv_nsec = 0;
5098 	txdr_nfsv4time(&verstime, tl);
5099 	nd->nd_flag |= ND_USEGSSNAME;
5100 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
5101 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5102 	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
5103 	    (int)nd->nd_repstat);
5104 	if (error != 0)
5105 		return (error);
5106 	if (nd->nd_repstat == 0) {
5107 		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
5108 		len = fxdr_unsigned(int, *(tl + 7));
5109 		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
5110 			error = NFSERR_BADXDR;
5111 			goto nfsmout;
5112 		}
5113 		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
5114 		    M_WAITOK | M_ZERO);
5115 		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
5116 		dsp->nfsclds_servownlen = len;
5117 		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
5118 		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
5119 		dsp->nfsclds_sess.nfsess_sequenceid =
5120 		    fxdr_unsigned(uint32_t, *tl++);
5121 		v41flags = fxdr_unsigned(uint32_t, *tl);
5122 		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
5123 		    NFSHASPNFSOPT(nmp)) {
5124 			NFSCL_DEBUG(1, "set PNFS\n");
5125 			NFSLOCKMNT(nmp);
5126 			nmp->nm_state |= NFSSTA_PNFS;
5127 			NFSUNLOCKMNT(nmp);
5128 			dsp->nfsclds_flags |= NFSCLDS_MDS;
5129 		}
5130 		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
5131 			dsp->nfsclds_flags |= NFSCLDS_DS;
5132 		if (minorvers == NFSV42_MINORVERSION)
5133 			dsp->nfsclds_flags |= NFSCLDS_MINORV2;
5134 		if (len > 0)
5135 			nd->nd_repstat = nfsrv_mtostr(nd,
5136 			    dsp->nfsclds_serverown, len);
5137 		if (nd->nd_repstat == 0) {
5138 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5139 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5140 			    NULL, MTX_DEF);
5141 			nfscl_initsessionslots(&dsp->nfsclds_sess);
5142 			*dspp = dsp;
5143 		} else
5144 			free(dsp, M_NFSCLDS);
5145 	}
5146 	error = nd->nd_repstat;
5147 nfsmout:
5148 	m_freem(nd->nd_mrep);
5149 	return (error);
5150 }
5151 
5152 /*
5153  * Do the NFSv4.1 Create Session.
5154  */
5155 int
5156 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
5157     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
5158     struct ucred *cred, NFSPROC_T *p)
5159 {
5160 	uint32_t crflags, maxval, *tl;
5161 	struct nfsrv_descript nfsd;
5162 	struct nfsrv_descript *nd = &nfsd;
5163 	int error, irdcnt, minorvers;
5164 
5165 	/* Make sure nm_rsize, nm_wsize is set. */
5166 	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
5167 		nmp->nm_rsize = NFS_MAXBSIZE;
5168 	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
5169 		nmp->nm_wsize = NFS_MAXBSIZE;
5170 	if (dsp == NULL)
5171 		minorvers = nmp->nm_minorvers;
5172 	else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
5173 		minorvers = NFSV42_MINORVERSION;
5174 	else
5175 		minorvers = NFSV41_MINORVERSION;
5176 	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
5177 	    NFS_VER4, minorvers, NULL);
5178 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5179 	*tl++ = sep->nfsess_clientid.lval[0];
5180 	*tl++ = sep->nfsess_clientid.lval[1];
5181 	*tl++ = txdr_unsigned(sequenceid);
5182 	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
5183 	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
5184 		crflags |= NFSV4CRSESS_CONNBACKCHAN;
5185 	*tl = txdr_unsigned(crflags);
5186 
5187 	/* Fill in fore channel attributes. */
5188 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5189 	*tl++ = 0;				/* Header pad size */
5190 	if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
5191 	    nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
5192 		/*
5193 		 * NFSv4.2 Extended Attribute operations may want to do
5194 		 * requests/replies that are larger than nm_rsize/nm_wsize.
5195 		 */
5196 		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5197 		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5198 	} else {
5199 		*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
5200 		*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
5201 	}
5202 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5203 	*tl++ = txdr_unsigned(20);		/* Max operations */
5204 	*tl++ = txdr_unsigned(64);		/* Max slots */
5205 	*tl = 0;				/* No rdma ird */
5206 
5207 	/* Fill in back channel attributes. */
5208 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5209 	*tl++ = 0;				/* Header pad size */
5210 	*tl++ = txdr_unsigned(10000);		/* Max request size */
5211 	*tl++ = txdr_unsigned(10000);		/* Max response size */
5212 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5213 	*tl++ = txdr_unsigned(4);		/* Max operations */
5214 	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
5215 	*tl = 0;				/* No rdma ird */
5216 
5217 	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
5218 	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
5219 
5220 	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
5221 	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
5222 	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
5223 	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
5224 	*tl++ = 0;				/* Null machine name */
5225 	*tl++ = 0;				/* Uid == 0 */
5226 	*tl++ = 0;				/* Gid == 0 */
5227 	*tl = 0;				/* No additional gids */
5228 	nd->nd_flag |= ND_USEGSSNAME;
5229 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
5230 	    NFS_VER4, NULL, 1, NULL, NULL);
5231 	if (error != 0)
5232 		return (error);
5233 	if (nd->nd_repstat == 0) {
5234 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
5235 		    2 * NFSX_UNSIGNED);
5236 		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
5237 		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
5238 		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
5239 		crflags = fxdr_unsigned(uint32_t, *tl);
5240 		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
5241 			NFSLOCKMNT(nmp);
5242 			nmp->nm_state |= NFSSTA_SESSPERSIST;
5243 			NFSUNLOCKMNT(nmp);
5244 		}
5245 
5246 		/* Get the fore channel slot count. */
5247 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5248 		tl++;			/* Skip the header pad size. */
5249 
5250 		/* Make sure nm_wsize is small enough. */
5251 		maxval = fxdr_unsigned(uint32_t, *tl++);
5252 		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
5253 			if (nmp->nm_wsize > 8096)
5254 				nmp->nm_wsize /= 2;
5255 			else
5256 				break;
5257 		}
5258 		sep->nfsess_maxreq = maxval;
5259 
5260 		/* Make sure nm_rsize is small enough. */
5261 		maxval = fxdr_unsigned(uint32_t, *tl++);
5262 		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
5263 			if (nmp->nm_rsize > 8096)
5264 				nmp->nm_rsize /= 2;
5265 			else
5266 				break;
5267 		}
5268 		sep->nfsess_maxresp = maxval;
5269 
5270 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
5271 		tl++;
5272 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
5273 		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
5274 		irdcnt = fxdr_unsigned(int, *tl);
5275 		if (irdcnt < 0 || irdcnt > 1) {
5276 			error = NFSERR_BADXDR;
5277 			goto nfsmout;
5278 		}
5279 		if (irdcnt > 0)
5280 			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
5281 
5282 		/* and the back channel slot count. */
5283 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5284 		tl += 5;
5285 		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
5286 		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
5287 	}
5288 	error = nd->nd_repstat;
5289 nfsmout:
5290 	m_freem(nd->nd_mrep);
5291 	return (error);
5292 }
5293 
5294 /*
5295  * Do the NFSv4.1 Destroy Client.
5296  */
5297 int
5298 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5299     struct ucred *cred, NFSPROC_T *p)
5300 {
5301 	uint32_t *tl;
5302 	struct nfsrv_descript nfsd;
5303 	struct nfsrv_descript *nd = &nfsd;
5304 	int error;
5305 	struct nfsclsession *tsep;
5306 
5307 	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5308 	    0, NULL);
5309 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5310 	tsep = nfsmnt_mdssession(nmp);
5311 	*tl++ = tsep->nfsess_clientid.lval[0];
5312 	*tl = tsep->nfsess_clientid.lval[1];
5313 	nd->nd_flag |= ND_USEGSSNAME;
5314 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5315 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5316 	if (error != 0)
5317 		return (error);
5318 	error = nd->nd_repstat;
5319 	m_freem(nd->nd_mrep);
5320 	return (error);
5321 }
5322 
5323 /*
5324  * Do the NFSv4.1 LayoutGet.
5325  */
5326 static int
5327 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5328     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5329     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5330     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p)
5331 {
5332 	struct nfsrv_descript nfsd, *nd = &nfsd;
5333 	int error;
5334 
5335 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5336 	    0, cred);
5337 	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5338 	    layouttype, layoutlen, 0);
5339 	nd->nd_flag |= ND_USEGSSNAME;
5340 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5341 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5342 	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5343 	if (error != 0)
5344 		return (error);
5345 	if (nd->nd_repstat == 0)
5346 		error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5347 		    flhp);
5348 	if (error == 0 && nd->nd_repstat != 0)
5349 		error = nd->nd_repstat;
5350 	m_freem(nd->nd_mrep);
5351 	return (error);
5352 }
5353 
5354 /*
5355  * Do the NFSv4.1 Get Device Info.
5356  */
5357 int
5358 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5359     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5360     NFSPROC_T *p)
5361 {
5362 	uint32_t cnt, *tl, vers, minorvers;
5363 	struct nfsrv_descript nfsd;
5364 	struct nfsrv_descript *nd = &nfsd;
5365 	struct sockaddr_in sin, ssin;
5366 	struct sockaddr_in6 sin6, ssin6;
5367 	struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5368 	struct nfscldevinfo *ndi;
5369 	int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5370 	int stripecnt;
5371 	uint8_t stripeindex;
5372 	sa_family_t af, safilled;
5373 
5374 	ssin.sin_port = 0;		/* To shut up compiler. */
5375 	ssin.sin_addr.s_addr = 0;	/* ditto */
5376 	*ndip = NULL;
5377 	ndi = NULL;
5378 	gotdspp = NULL;
5379 	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5380 	    0, cred);
5381 	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5382 	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5383 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5384 	*tl++ = txdr_unsigned(layouttype);
5385 	*tl++ = txdr_unsigned(100000);
5386 	if (notifybitsp != NULL && *notifybitsp != 0) {
5387 		*tl = txdr_unsigned(1);		/* One word of bits. */
5388 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5389 		*tl = txdr_unsigned(*notifybitsp);
5390 	} else
5391 		*tl = txdr_unsigned(0);
5392 	nd->nd_flag |= ND_USEGSSNAME;
5393 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5394 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5395 	if (error != 0)
5396 		return (error);
5397 	if (nd->nd_repstat == 0) {
5398 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5399 		if (layouttype != fxdr_unsigned(int, *tl))
5400 			printf("EEK! devinfo layout type not same!\n");
5401 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5402 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5403 			stripecnt = fxdr_unsigned(int, *tl);
5404 			NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5405 			if (stripecnt < 1 || stripecnt > 4096) {
5406 				printf("pNFS File layout devinfo stripecnt %d:"
5407 				    " out of range\n", stripecnt);
5408 				error = NFSERR_BADXDR;
5409 				goto nfsmout;
5410 			}
5411 			NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5412 			    NFSX_UNSIGNED);
5413 			addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5414 			NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5415 			if (addrcnt < 1 || addrcnt > 128) {
5416 				printf("NFS devinfo addrcnt %d: out of range\n",
5417 				    addrcnt);
5418 				error = NFSERR_BADXDR;
5419 				goto nfsmout;
5420 			}
5421 
5422 			/*
5423 			 * Now we know how many stripe indices and addresses, so
5424 			 * we can allocate the structure the correct size.
5425 			 */
5426 			i = (stripecnt * sizeof(uint8_t)) /
5427 			    sizeof(struct nfsclds *) + 1;
5428 			NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5429 			ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5430 			    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5431 			    M_ZERO);
5432 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5433 			    NFSX_V4DEVICEID);
5434 			ndi->nfsdi_refcnt = 0;
5435 			ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5436 			ndi->nfsdi_stripecnt = stripecnt;
5437 			ndi->nfsdi_addrcnt = addrcnt;
5438 			/* Fill in the stripe indices. */
5439 			for (i = 0; i < stripecnt; i++) {
5440 				stripeindex = fxdr_unsigned(uint8_t, *tl++);
5441 				NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5442 				if (stripeindex >= addrcnt) {
5443 					printf("pNFS File Layout devinfo"
5444 					    " stripeindex %d: too big\n",
5445 					    (int)stripeindex);
5446 					error = NFSERR_BADXDR;
5447 					goto nfsmout;
5448 				}
5449 				nfsfldi_setstripeindex(ndi, i, stripeindex);
5450 			}
5451 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
5452 			/* For Flex File, we only get one address list. */
5453 			ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5454 			    M_NFSDEVINFO, M_WAITOK | M_ZERO);
5455 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5456 			    NFSX_V4DEVICEID);
5457 			ndi->nfsdi_refcnt = 0;
5458 			ndi->nfsdi_flags = NFSDI_FLEXFILE;
5459 			addrcnt = ndi->nfsdi_addrcnt = 1;
5460 		}
5461 
5462 		/* Now, dissect the server address(es). */
5463 		safilled = AF_UNSPEC;
5464 		for (i = 0; i < addrcnt; i++) {
5465 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5466 			cnt = fxdr_unsigned(uint32_t, *tl);
5467 			if (cnt == 0) {
5468 				printf("NFS devinfo 0 len addrlist\n");
5469 				error = NFSERR_BADXDR;
5470 				goto nfsmout;
5471 			}
5472 			dspp = nfsfldi_addr(ndi, i);
5473 			safilled = AF_UNSPEC;
5474 			for (j = 0; j < cnt; j++) {
5475 				error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5476 				    &isudp);
5477 				if (error != 0 && error != EPERM) {
5478 					error = NFSERR_BADXDR;
5479 					goto nfsmout;
5480 				}
5481 				if (error == 0 && isudp == 0) {
5482 					/*
5483 					 * The priority is:
5484 					 * - Same address family.
5485 					 * Save the address and dspp, so that
5486 					 * the connection can be done after
5487 					 * parsing is complete.
5488 					 */
5489 					if (safilled == AF_UNSPEC ||
5490 					    (af == nmp->nm_nam->sa_family &&
5491 					     safilled != nmp->nm_nam->sa_family)
5492 					   ) {
5493 						if (af == AF_INET)
5494 							ssin = sin;
5495 						else
5496 							ssin6 = sin6;
5497 						safilled = af;
5498 						gotdspp = dspp;
5499 					}
5500 				}
5501 			}
5502 		}
5503 
5504 		gotvers = NFS_VER4;	/* Default NFSv4.1 for File Layout. */
5505 		gotminor = NFSV41_MINORVERSION;
5506 		/* For Flex File, we will take one of the versions to use. */
5507 		if (layouttype == NFSLAYOUT_FLEXFILE) {
5508 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5509 			j = fxdr_unsigned(int, *tl);
5510 			if (j < 1 || j > NFSDEV_MAXVERS) {
5511 				printf("pNFS: too many versions\n");
5512 				error = NFSERR_BADXDR;
5513 				goto nfsmout;
5514 			}
5515 			gotvers = 0;
5516 			gotminor = 0;
5517 			for (i = 0; i < j; i++) {
5518 				NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5519 				vers = fxdr_unsigned(uint32_t, *tl++);
5520 				minorvers = fxdr_unsigned(uint32_t, *tl++);
5521 				if (vers == NFS_VER3)
5522 					minorvers = 0;
5523 				if ((vers == NFS_VER4 && ((minorvers ==
5524 				    NFSV41_MINORVERSION && gotminor == 0) ||
5525 				    minorvers == NFSV42_MINORVERSION)) ||
5526 				    (vers == NFS_VER3 && gotvers == 0)) {
5527 					gotvers = vers;
5528 					gotminor = minorvers;
5529 					/* We'll take this one. */
5530 					ndi->nfsdi_versindex = i;
5531 					ndi->nfsdi_vers = vers;
5532 					ndi->nfsdi_minorvers = minorvers;
5533 					ndi->nfsdi_rsize = fxdr_unsigned(
5534 					    uint32_t, *tl++);
5535 					ndi->nfsdi_wsize = fxdr_unsigned(
5536 					    uint32_t, *tl++);
5537 					if (*tl == newnfs_true)
5538 						ndi->nfsdi_flags |=
5539 						    NFSDI_TIGHTCOUPLED;
5540 					else
5541 						ndi->nfsdi_flags &=
5542 						    ~NFSDI_TIGHTCOUPLED;
5543 				}
5544 			}
5545 			if (gotvers == 0) {
5546 				printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5547 				error = NFSERR_BADXDR;
5548 				goto nfsmout;
5549 			}
5550 		}
5551 
5552 		/* And the notify bits. */
5553 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5554 		bitcnt = fxdr_unsigned(int, *tl);
5555 		if (bitcnt > 0) {
5556 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5557 			if (notifybitsp != NULL)
5558 				*notifybitsp =
5559 				    fxdr_unsigned(uint32_t, *tl);
5560 		}
5561 		if (safilled != AF_UNSPEC) {
5562 			KASSERT(ndi != NULL, ("ndi is NULL"));
5563 			*ndip = ndi;
5564 		} else
5565 			error = EPERM;
5566 		if (error == 0) {
5567 			/*
5568 			 * Now we can do a TCP connection for the correct
5569 			 * NFS version and IP address.
5570 			 */
5571 			error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5572 			    gotvers, gotminor, &dsp, p);
5573 		}
5574 		if (error == 0) {
5575 			KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5576 			*gotdspp = dsp;
5577 		}
5578 	}
5579 	if (nd->nd_repstat != 0 && error == 0)
5580 		error = nd->nd_repstat;
5581 nfsmout:
5582 	if (error != 0 && ndi != NULL)
5583 		nfscl_freedevinfo(ndi);
5584 	m_freem(nd->nd_mrep);
5585 	return (error);
5586 }
5587 
5588 /*
5589  * Do the NFSv4.1 LayoutCommit.
5590  */
5591 int
5592 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5593     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5594     int layouttype, struct ucred *cred, NFSPROC_T *p)
5595 {
5596 	uint32_t *tl;
5597 	struct nfsrv_descript nfsd, *nd = &nfsd;
5598 	int error;
5599 
5600 	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5601 	    0, 0, cred);
5602 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5603 	    NFSX_STATEID);
5604 	txdr_hyper(off, tl);
5605 	tl += 2;
5606 	txdr_hyper(len, tl);
5607 	tl += 2;
5608 	if (reclaim != 0)
5609 		*tl++ = newnfs_true;
5610 	else
5611 		*tl++ = newnfs_false;
5612 	*tl++ = txdr_unsigned(stateidp->seqid);
5613 	*tl++ = stateidp->other[0];
5614 	*tl++ = stateidp->other[1];
5615 	*tl++ = stateidp->other[2];
5616 	*tl++ = newnfs_true;
5617 	if (lastbyte < off)
5618 		lastbyte = off;
5619 	else if (lastbyte >= (off + len))
5620 		lastbyte = off + len - 1;
5621 	txdr_hyper(lastbyte, tl);
5622 	tl += 2;
5623 	*tl++ = newnfs_false;
5624 	*tl++ = txdr_unsigned(layouttype);
5625 	/* All supported layouts are 0 length. */
5626 	*tl = txdr_unsigned(0);
5627 	nd->nd_flag |= ND_USEGSSNAME;
5628 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5629 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5630 	if (error != 0)
5631 		return (error);
5632 	error = nd->nd_repstat;
5633 	m_freem(nd->nd_mrep);
5634 	return (error);
5635 }
5636 
5637 /*
5638  * Do the NFSv4.1 LayoutReturn.
5639  */
5640 int
5641 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5642     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5643     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5644     uint32_t stat, uint32_t op, char *devid)
5645 {
5646 	uint32_t *tl;
5647 	struct nfsrv_descript nfsd, *nd = &nfsd;
5648 	uint64_t tu64;
5649 	int error;
5650 
5651 	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5652 	    0, 0, cred);
5653 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5654 	if (reclaim != 0)
5655 		*tl++ = newnfs_true;
5656 	else
5657 		*tl++ = newnfs_false;
5658 	*tl++ = txdr_unsigned(layouttype);
5659 	*tl++ = txdr_unsigned(iomode);
5660 	*tl = txdr_unsigned(layoutreturn);
5661 	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5662 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5663 		    NFSX_UNSIGNED);
5664 		txdr_hyper(offset, tl);
5665 		tl += 2;
5666 		txdr_hyper(len, tl);
5667 		tl += 2;
5668 		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5669 		*tl++ = txdr_unsigned(stateidp->seqid);
5670 		*tl++ = stateidp->other[0];
5671 		*tl++ = stateidp->other[1];
5672 		*tl++ = stateidp->other[2];
5673 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5674 			*tl = txdr_unsigned(0);
5675 		else if (layouttype == NFSLAYOUT_FLEXFILE) {
5676 			if (stat != 0) {
5677 				*tl = txdr_unsigned(2 * NFSX_HYPER +
5678 				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5679 				    NFSX_UNSIGNED);
5680 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5681 				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5682 				    NFSX_UNSIGNED);
5683 				*tl++ = txdr_unsigned(1);	/* One error. */
5684 				tu64 = 0;			/* Offset. */
5685 				txdr_hyper(tu64, tl); tl += 2;
5686 				tu64 = UINT64_MAX;		/* Length. */
5687 				txdr_hyper(tu64, tl); tl += 2;
5688 				NFSBCOPY(stateidp, tl, NFSX_STATEID);
5689 				tl += (NFSX_STATEID / NFSX_UNSIGNED);
5690 				*tl++ = txdr_unsigned(1);	/* One error. */
5691 				NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5692 				tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5693 				*tl++ = txdr_unsigned(stat);
5694 				*tl++ = txdr_unsigned(op);
5695 			} else {
5696 				*tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5697 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5698 				/* No ioerrs. */
5699 				*tl++ = 0;
5700 			}
5701 			*tl = 0;	/* No stats yet. */
5702 		}
5703 	}
5704 	nd->nd_flag |= ND_USEGSSNAME;
5705 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5706 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5707 	if (error != 0)
5708 		return (error);
5709 	if (nd->nd_repstat == 0) {
5710 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5711 		if (*tl != 0) {
5712 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5713 			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5714 			stateidp->other[0] = *tl++;
5715 			stateidp->other[1] = *tl++;
5716 			stateidp->other[2] = *tl;
5717 		}
5718 	} else
5719 		error = nd->nd_repstat;
5720 nfsmout:
5721 	m_freem(nd->nd_mrep);
5722 	return (error);
5723 }
5724 
5725 /*
5726  * Do the NFSv4.2 LayoutError.
5727  */
5728 static int
5729 nfsrpc_layouterror(struct nfsmount *nmp, uint8_t *fh, int fhlen, uint64_t offset,
5730     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5731     uint32_t stat, uint32_t op, char *devid)
5732 {
5733 	uint32_t *tl;
5734 	struct nfsrv_descript nfsd, *nd = &nfsd;
5735 	int error;
5736 
5737 	nfscl_reqstart(nd, NFSPROC_LAYOUTERROR, nmp, fh, fhlen, NULL, NULL,
5738 	    0, 0, cred);
5739 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5740 	    NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5741 	txdr_hyper(offset, tl); tl += 2;
5742 	txdr_hyper(len, tl); tl += 2;
5743 	*tl++ = txdr_unsigned(stateidp->seqid);
5744 	*tl++ = stateidp->other[0];
5745 	*tl++ = stateidp->other[1];
5746 	*tl++ = stateidp->other[2];
5747 	*tl++ = txdr_unsigned(1);
5748 	NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5749 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5750 	*tl++ = txdr_unsigned(stat);
5751 	*tl = txdr_unsigned(op);
5752 	nd->nd_flag |= ND_USEGSSNAME;
5753 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5754 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5755 	if (error != 0)
5756 		return (error);
5757 	if (nd->nd_repstat != 0)
5758 		error = nd->nd_repstat;
5759 	m_freem(nd->nd_mrep);
5760 	return (error);
5761 }
5762 
5763 /*
5764  * Acquire a layout and devinfo, if possible. The caller must have acquired
5765  * a reference count on the nfsclclient structure before calling this.
5766  * Return the layout in lypp with a reference count on it, if successful.
5767  */
5768 static int
5769 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5770     int iomode, uint32_t rw, uint32_t *notifybitsp, nfsv4stateid_t *stateidp,
5771     uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5772 {
5773 	struct nfscllayout *lyp;
5774 	struct nfsclflayout *flp;
5775 	struct nfsclflayouthead flh;
5776 	int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5777 	nfsv4stateid_t stateid;
5778 	struct nfsclsession *tsep;
5779 
5780 	*lypp = NULL;
5781 	if (NFSHASFLEXFILE(nmp))
5782 		layouttype = NFSLAYOUT_FLEXFILE;
5783 	else
5784 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5785 	/*
5786 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5787 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5788 	 * flp == NULL.
5789 	 */
5790 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5791 	    off, rw, &flp, &recalled);
5792 	islocked = 0;
5793 	if (lyp == NULL || flp == NULL) {
5794 		if (recalled != 0)
5795 			return (EIO);
5796 		LIST_INIT(&flh);
5797 		tsep = nfsmnt_mdssession(nmp);
5798 		layoutlen = tsep->nfsess_maxcache -
5799 		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5800 		if (lyp == NULL) {
5801 			stateid.seqid = 0;
5802 			stateid.other[0] = stateidp->other[0];
5803 			stateid.other[1] = stateidp->other[1];
5804 			stateid.other[2] = stateidp->other[2];
5805 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5806 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5807 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5808 			    &retonclose, &flh, cred, p);
5809 		} else {
5810 			islocked = 1;
5811 			stateid.seqid = lyp->nfsly_stateid.seqid;
5812 			stateid.other[0] = lyp->nfsly_stateid.other[0];
5813 			stateid.other[1] = lyp->nfsly_stateid.other[1];
5814 			stateid.other[2] = lyp->nfsly_stateid.other[2];
5815 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5816 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
5817 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5818 			    &retonclose, &flh, cred, p);
5819 		}
5820 		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5821 		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5822 		    &flh, layouttype, error, NULL, cred, p);
5823 		if (error == 0)
5824 			*lypp = lyp;
5825 		else if (islocked != 0)
5826 			nfscl_rellayout(lyp, 1);
5827 	} else
5828 		*lypp = lyp;
5829 	return (error);
5830 }
5831 
5832 /*
5833  * Do a TCP connection plus exchange id and create session.
5834  * If successful, a "struct nfsclds" is linked into the list for the
5835  * mount point and a pointer to it is returned.
5836  */
5837 static int
5838 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5839     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5840     struct nfsclds **dspp, NFSPROC_T *p)
5841 {
5842 	struct sockaddr_in *msad, *sad;
5843 	struct sockaddr_in6 *msad6, *sad6;
5844 	struct nfsclclient *clp;
5845 	struct nfssockreq *nrp;
5846 	struct nfsclds *dsp, *tdsp;
5847 	int error, firsttry;
5848 	enum nfsclds_state retv;
5849 	uint32_t sequenceid = 0;
5850 
5851 	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5852 	    ("nfsrpc_fillsa: NULL nr_cred"));
5853 	NFSLOCKCLSTATE();
5854 	clp = nmp->nm_clp;
5855 	NFSUNLOCKCLSTATE();
5856 	if (clp == NULL)
5857 		return (EPERM);
5858 	if (af == AF_INET) {
5859 		NFSLOCKMNT(nmp);
5860 		/*
5861 		 * Check to see if we already have a session for this
5862 		 * address that is usable for a DS.
5863 		 * Note that the MDS's address is in a different place
5864 		 * than the sessions already acquired for DS's.
5865 		 */
5866 		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5867 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5868 		while (tdsp != NULL) {
5869 			if (msad != NULL && msad->sin_family == AF_INET &&
5870 			    sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5871 			    sin->sin_port == msad->sin_port &&
5872 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5873 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5874 				*dspp = tdsp;
5875 				NFSUNLOCKMNT(nmp);
5876 				NFSCL_DEBUG(4, "fnd same addr\n");
5877 				return (0);
5878 			}
5879 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5880 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5881 				msad = (struct sockaddr_in *)
5882 				    tdsp->nfsclds_sockp->nr_nam;
5883 			else
5884 				msad = NULL;
5885 		}
5886 		NFSUNLOCKMNT(nmp);
5887 
5888 		/* No IP address match, so look for new/trunked one. */
5889 		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5890 		sad->sin_len = sizeof(*sad);
5891 		sad->sin_family = AF_INET;
5892 		sad->sin_port = sin->sin_port;
5893 		sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5894 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5895 		nrp->nr_nam = (struct sockaddr *)sad;
5896 	} else if (af == AF_INET6) {
5897 		NFSLOCKMNT(nmp);
5898 		/*
5899 		 * Check to see if we already have a session for this
5900 		 * address that is usable for a DS.
5901 		 * Note that the MDS's address is in a different place
5902 		 * than the sessions already acquired for DS's.
5903 		 */
5904 		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5905 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5906 		while (tdsp != NULL) {
5907 			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5908 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5909 			    &msad6->sin6_addr) &&
5910 			    sin6->sin6_port == msad6->sin6_port &&
5911 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5912 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5913 				*dspp = tdsp;
5914 				NFSUNLOCKMNT(nmp);
5915 				return (0);
5916 			}
5917 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5918 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5919 				msad6 = (struct sockaddr_in6 *)
5920 				    tdsp->nfsclds_sockp->nr_nam;
5921 			else
5922 				msad6 = NULL;
5923 		}
5924 		NFSUNLOCKMNT(nmp);
5925 
5926 		/* No IP address match, so look for new/trunked one. */
5927 		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5928 		sad6->sin6_len = sizeof(*sad6);
5929 		sad6->sin6_family = AF_INET6;
5930 		sad6->sin6_port = sin6->sin6_port;
5931 		NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5932 		    sizeof(struct in6_addr));
5933 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5934 		nrp->nr_nam = (struct sockaddr *)sad6;
5935 	} else
5936 		return (EPERM);
5937 
5938 	nrp->nr_sotype = SOCK_STREAM;
5939 	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5940 	nrp->nr_prog = NFS_PROG;
5941 	nrp->nr_vers = vers;
5942 
5943 	/*
5944 	 * Use the credentials that were used for the mount, which are
5945 	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5946 	 * Ref. counting the credentials with crhold() is probably not
5947 	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5948 	 * unmount, but I did it anyhow.
5949 	 */
5950 	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5951 	error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client);
5952 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
5953 
5954 	dsp = NULL;
5955 	/* Now, do the exchangeid and create session. */
5956 	if (error == 0) {
5957 		if (vers == NFS_VER4) {
5958 			firsttry = 0;
5959 			do {
5960 				error = nfsrpc_exchangeid(nmp, clp, nrp,
5961 				    minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
5962 				    nrp->nr_cred, p);
5963 				NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5964 				if (error == NFSERR_MINORVERMISMATCH)
5965 					minorvers = NFSV42_MINORVERSION;
5966 			} while (error == NFSERR_MINORVERMISMATCH &&
5967 			    firsttry++ == 0);
5968 			if (error != 0)
5969 				newnfs_disconnect(NULL, nrp);
5970 		} else {
5971 			dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5972 			    M_WAITOK | M_ZERO);
5973 			dsp->nfsclds_flags |= NFSCLDS_DS;
5974 			dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5975 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5976 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5977 			    NULL, MTX_DEF);
5978 		}
5979 	}
5980 	if (error == 0) {
5981 		dsp->nfsclds_sockp = nrp;
5982 		if (vers == NFS_VER4) {
5983 			NFSLOCKMNT(nmp);
5984 			retv = nfscl_getsameserver(nmp, dsp, &tdsp,
5985 			    &sequenceid);
5986 			NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5987 			if (retv == NFSDSP_USETHISSESSION &&
5988 			    nfscl_dssameconn != 0) {
5989 				NFSLOCKDS(tdsp);
5990 				tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
5991 				NFSUNLOCKDS(tdsp);
5992 				NFSUNLOCKMNT(nmp);
5993 				/*
5994 				 * If there is already a session for this
5995 				 * server, use it.
5996 				 */
5997 				newnfs_disconnect(NULL, nrp);
5998 				nfscl_freenfsclds(dsp);
5999 				*dspp = tdsp;
6000 				return (0);
6001 			}
6002 			if (retv == NFSDSP_NOTFOUND)
6003 				sequenceid =
6004 				    dsp->nfsclds_sess.nfsess_sequenceid;
6005 			NFSUNLOCKMNT(nmp);
6006 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
6007 			    nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
6008 			NFSCL_DEBUG(3, "DS createsess=%d\n", error);
6009 		}
6010 	} else {
6011 		NFSFREECRED(nrp->nr_cred);
6012 		NFSFREEMUTEX(&nrp->nr_mtx);
6013 		free(nrp->nr_nam, M_SONAME);
6014 		free(nrp, M_NFSSOCKREQ);
6015 	}
6016 	if (error == 0) {
6017 		NFSCL_DEBUG(3, "add DS session\n");
6018 		/*
6019 		 * Put it at the end of the list. That way the list
6020 		 * is ordered by when the entry was added. This matters
6021 		 * since the one done first is the one that should be
6022 		 * used for sequencid'ing any subsequent create sessions.
6023 		 */
6024 		NFSLOCKMNT(nmp);
6025 		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
6026 		NFSUNLOCKMNT(nmp);
6027 		*dspp = dsp;
6028 	} else if (dsp != NULL) {
6029 		newnfs_disconnect(NULL, nrp);
6030 		nfscl_freenfsclds(dsp);
6031 	}
6032 	return (error);
6033 }
6034 
6035 /*
6036  * Do the NFSv4.1 Reclaim Complete.
6037  */
6038 int
6039 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
6040 {
6041 	uint32_t *tl;
6042 	struct nfsrv_descript nfsd;
6043 	struct nfsrv_descript *nd = &nfsd;
6044 	int error;
6045 
6046 	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
6047 	    0, cred);
6048 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6049 	*tl = newnfs_false;
6050 	nd->nd_flag |= ND_USEGSSNAME;
6051 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6052 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6053 	if (error != 0)
6054 		return (error);
6055 	error = nd->nd_repstat;
6056 	m_freem(nd->nd_mrep);
6057 	return (error);
6058 }
6059 
6060 /*
6061  * Initialize the slot tables for a session.
6062  */
6063 static void
6064 nfscl_initsessionslots(struct nfsclsession *sep)
6065 {
6066 	int i;
6067 
6068 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
6069 		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
6070 			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
6071 		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
6072 	}
6073 	for (i = 0; i < 64; i++)
6074 		sep->nfsess_slotseq[i] = 0;
6075 	sep->nfsess_slots = 0;
6076 	sep->nfsess_badslots = 0;
6077 }
6078 
6079 /*
6080  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
6081  */
6082 int
6083 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6084     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
6085 {
6086 	struct nfsnode *np = VTONFS(vp);
6087 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6088 	struct nfscllayout *layp;
6089 	struct nfscldevinfo *dip;
6090 	struct nfsclflayout *rflp;
6091 	struct mbuf *m, *m2;
6092 	struct nfsclwritedsdorpc *drpc, *tdrpc;
6093 	nfsv4stateid_t stateid;
6094 	struct ucred *newcred;
6095 	uint64_t lastbyte, len, off, oresid, xfer;
6096 	int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
6097 	void *lckp;
6098 	uint8_t *dev;
6099 	void *iovbase = NULL;
6100 	size_t iovlen = 0;
6101 	off_t offs = 0;
6102 	ssize_t resid = 0;
6103 	uint32_t op;
6104 
6105 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
6106 	    (np->n_flag & NNOLAYOUT) != 0)
6107 		return (EIO);
6108 	/* Now, get a reference cnt on the clientid for this mount. */
6109 	if (nfscl_getref(nmp) == 0)
6110 		return (EIO);
6111 
6112 	/* Find an appropriate stateid. */
6113 	newcred = NFSNEWCRED(cred);
6114 	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
6115 	    rwaccess, 1, newcred, p, &stateid, &lckp);
6116 	if (error != 0) {
6117 		NFSFREECRED(newcred);
6118 		nfscl_relref(nmp);
6119 		return (error);
6120 	}
6121 	/* Search for a layout for this file. */
6122 	off = uiop->uio_offset;
6123 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
6124 	    np->n_fhp->nfh_len, off, rwaccess, &rflp, &recalled);
6125 	if (layp == NULL || rflp == NULL) {
6126 		if (recalled != 0) {
6127 			NFSFREECRED(newcred);
6128 			if (lckp != NULL)
6129 				nfscl_lockderef(lckp);
6130 			nfscl_relref(nmp);
6131 			return (EIO);
6132 		}
6133 		if (layp != NULL) {
6134 			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
6135 			layp = NULL;
6136 		}
6137 		/* Try and get a Layout, if it is supported. */
6138 		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
6139 		    (np->n_flag & NWRITEOPENED) != 0)
6140 			iolaymode = NFSLAYOUTIOMODE_RW;
6141 		else
6142 			iolaymode = NFSLAYOUTIOMODE_READ;
6143 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
6144 		    rwaccess, NULL, &stateid, off, &layp, newcred, p);
6145 		if (error != 0) {
6146 			NFSLOCKNODE(np);
6147 			np->n_flag |= NNOLAYOUT;
6148 			NFSUNLOCKNODE(np);
6149 			if (lckp != NULL)
6150 				nfscl_lockderef(lckp);
6151 			NFSFREECRED(newcred);
6152 			if (layp != NULL)
6153 				nfscl_rellayout(layp, 0);
6154 			nfscl_relref(nmp);
6155 			return (error);
6156 		}
6157 	}
6158 
6159 	/*
6160 	 * Loop around finding a layout that works for the first part of
6161 	 * this I/O operation, and then call the function that actually
6162 	 * does the RPC.
6163 	 */
6164 	eof = 0;
6165 	len = (uint64_t)uiop->uio_resid;
6166 	while (len > 0 && error == 0 && eof == 0) {
6167 		off = uiop->uio_offset;
6168 		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
6169 		if (error == 0) {
6170 			oresid = xfer = (uint64_t)uiop->uio_resid;
6171 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
6172 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
6173 			/*
6174 			 * For Flex File layout with mirrored DSs, select one
6175 			 * of them at random for reads. For writes and commits,
6176 			 * do all mirrors.
6177 			 */
6178 			m = NULL;
6179 			tdrpc = drpc = NULL;
6180 			firstmirror = 0;
6181 			mirrorcnt = 1;
6182 			if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
6183 			    (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
6184 				if (rwaccess == NFSV4OPEN_ACCESSREAD) {
6185 					firstmirror = arc4random() % mirrorcnt;
6186 					mirrorcnt = firstmirror + 1;
6187 				} else {
6188 					if (docommit == 0) {
6189 						/*
6190 						 * Save values, so uiop can be
6191 						 * rolled back upon a write
6192 						 * error.
6193 						 */
6194 						offs = uiop->uio_offset;
6195 						resid = uiop->uio_resid;
6196 						iovbase =
6197 						    uiop->uio_iov->iov_base;
6198 						iovlen = uiop->uio_iov->iov_len;
6199 						m = nfsm_uiombuflist(uiop, len,
6200 						    0);
6201 					}
6202 					tdrpc = drpc = malloc(sizeof(*drpc) *
6203 					    (mirrorcnt - 1), M_TEMP, M_WAITOK |
6204 					    M_ZERO);
6205 				}
6206 			}
6207 			for (i = firstmirror; i < mirrorcnt && error == 0; i++){
6208 				m2 = NULL;
6209 				if (m != NULL && i < mirrorcnt - 1)
6210 					m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
6211 				else {
6212 					m2 = m;
6213 					m = NULL;
6214 				}
6215 				if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
6216 					dev = rflp->nfsfl_ffm[i].dev;
6217 					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6218 					    rflp->nfsfl_ffm[i].devp);
6219 				} else {
6220 					dev = rflp->nfsfl_dev;
6221 					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6222 					    rflp->nfsfl_devp);
6223 				}
6224 				if (dip != NULL) {
6225 					if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
6226 					    != 0)
6227 						error = nfscl_dofflayoutio(vp,
6228 						    uiop, iomode, must_commit,
6229 						    &eof, &stateid, rwaccess,
6230 						    dip, layp, rflp, off, xfer,
6231 						    i, docommit, m2, tdrpc,
6232 						    newcred, p);
6233 					else
6234 						error = nfscl_doflayoutio(vp,
6235 						    uiop, iomode, must_commit,
6236 						    &eof, &stateid, rwaccess,
6237 						    dip, layp, rflp, off, xfer,
6238 						    docommit, newcred, p);
6239 					nfscl_reldevinfo(dip);
6240 				} else {
6241 					if (m2 != NULL)
6242 						m_freem(m2);
6243 					error = EIO;
6244 				}
6245 				tdrpc++;
6246 			}
6247 			if (m != NULL)
6248 				m_freem(m);
6249 			tdrpc = drpc;
6250 			timo = hz / 50;		/* Wait for 20msec. */
6251 			if (timo < 1)
6252 				timo = 1;
6253 			for (i = firstmirror; i < mirrorcnt - 1 &&
6254 			    tdrpc != NULL; i++, tdrpc++) {
6255 				/*
6256 				 * For the unused drpc entries, both inprog and
6257 				 * err == 0, so this loop won't break.
6258 				 */
6259 				while (tdrpc->inprog != 0 && tdrpc->done == 0)
6260 					tsleep(&tdrpc->tsk, PVFS, "clrpcio",
6261 					    timo);
6262 				if (error == 0 && tdrpc->err != 0)
6263 					error = tdrpc->err;
6264 				if (rwaccess != NFSV4OPEN_ACCESSREAD &&
6265 				    docommit == 0 && *must_commit == 0 &&
6266 				    tdrpc->must_commit == 1)
6267 					*must_commit = 1;
6268 			}
6269 			free(drpc, M_TEMP);
6270 			if (error == 0) {
6271 				if (mirrorcnt > 1 && rwaccess ==
6272 				    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6273 					NFSLOCKCLSTATE();
6274 					layp->nfsly_flags |= NFSLY_WRITTEN;
6275 					NFSUNLOCKCLSTATE();
6276 				}
6277 				lastbyte = off + xfer - 1;
6278 				NFSLOCKCLSTATE();
6279 				if (lastbyte > layp->nfsly_lastbyte)
6280 					layp->nfsly_lastbyte = lastbyte;
6281 				NFSUNLOCKCLSTATE();
6282 			} else if (error == NFSERR_OPENMODE &&
6283 			    rwaccess == NFSV4OPEN_ACCESSREAD) {
6284 				NFSLOCKMNT(nmp);
6285 				nmp->nm_state |= NFSSTA_OPENMODE;
6286 				NFSUNLOCKMNT(nmp);
6287 			} else if ((error == NFSERR_NOSPC ||
6288 			    error == NFSERR_IO || error == NFSERR_NXIO) &&
6289 			    nmp->nm_minorvers == NFSV42_MINORVERSION) {
6290 				if (docommit != 0)
6291 					op = NFSV4OP_COMMIT;
6292 				else if (rwaccess == NFSV4OPEN_ACCESSREAD)
6293 					op = NFSV4OP_READ;
6294 				else
6295 					op = NFSV4OP_WRITE;
6296 				nfsrpc_layouterror(nmp, np->n_fhp->nfh_fh,
6297 				    np->n_fhp->nfh_len, off, xfer,
6298 				    &layp->nfsly_stateid, newcred, p, error, op,
6299 				    dip->nfsdi_deviceid);
6300 				error = EIO;
6301 			} else
6302 				error = EIO;
6303 			if (error == 0)
6304 				len -= (oresid - (uint64_t)uiop->uio_resid);
6305 			else if (mirrorcnt > 1 && rwaccess ==
6306 			    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6307 				/*
6308 				 * In case the rpc gets retried, roll the
6309 				 * uio fields changed by nfsm_uiombuflist()
6310 				 * back.
6311 				 */
6312 				uiop->uio_offset = offs;
6313 				uiop->uio_resid = resid;
6314 				uiop->uio_iov->iov_base = iovbase;
6315 				uiop->uio_iov->iov_len = iovlen;
6316 			}
6317 		}
6318 	}
6319 	if (lckp != NULL)
6320 		nfscl_lockderef(lckp);
6321 	NFSFREECRED(newcred);
6322 	nfscl_rellayout(layp, 0);
6323 	nfscl_relref(nmp);
6324 	return (error);
6325 }
6326 
6327 /*
6328  * Find a file layout that will handle the first bytes of the requested
6329  * range and return the information from it needed to the I/O operation.
6330  */
6331 int
6332 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
6333     struct nfsclflayout **retflpp)
6334 {
6335 	struct nfsclflayout *flp, *nflp, *rflp;
6336 	uint32_t rw;
6337 
6338 	rflp = NULL;
6339 	rw = rwaccess;
6340 	/* For reading, do the Read list first and then the Write list. */
6341 	do {
6342 		if (rw == NFSV4OPEN_ACCESSREAD)
6343 			flp = LIST_FIRST(&lyp->nfsly_flayread);
6344 		else
6345 			flp = LIST_FIRST(&lyp->nfsly_flayrw);
6346 		while (flp != NULL) {
6347 			nflp = LIST_NEXT(flp, nfsfl_list);
6348 			if (flp->nfsfl_off > off)
6349 				break;
6350 			if (flp->nfsfl_end > off &&
6351 			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
6352 				rflp = flp;
6353 			flp = nflp;
6354 		}
6355 		if (rw == NFSV4OPEN_ACCESSREAD)
6356 			rw = NFSV4OPEN_ACCESSWRITE;
6357 		else
6358 			rw = 0;
6359 	} while (rw != 0);
6360 	if (rflp != NULL) {
6361 		/* This one covers the most bytes starting at off. */
6362 		*retflpp = rflp;
6363 		return (0);
6364 	}
6365 	return (EIO);
6366 }
6367 
6368 /*
6369  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6370  */
6371 static int
6372 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6373     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6374     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6375     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6376 {
6377 	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6378 	int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6379 	struct nfsnode *np;
6380 	struct nfsfh *fhp;
6381 	struct nfsclds **dspp;
6382 
6383 	np = VTONFS(vp);
6384 	rel_off = off - flp->nfsfl_patoff;
6385 	stripe_unit_size = flp->nfsfl_util & NFSFLAYUTIL_STRIPE_MASK;
6386 	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6387 	    dp->nfsdi_stripecnt;
6388 	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6389 	error = 0;
6390 
6391 	/* Loop around, doing I/O for each stripe unit. */
6392 	while (len > 0 && error == 0) {
6393 		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6394 		dspp = nfsfldi_addr(dp, stripe_index);
6395 		if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6396 			minorvers = NFSV42_MINORVERSION;
6397 		else
6398 			minorvers = NFSV41_MINORVERSION;
6399 		if (len > transfer && docommit == 0)
6400 			xfer = transfer;
6401 		else
6402 			xfer = len;
6403 		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6404 			/* Dense layout. */
6405 			if (stripe_pos >= flp->nfsfl_fhcnt)
6406 				return (EIO);
6407 			fhp = flp->nfsfl_fh[stripe_pos];
6408 			io_off = (rel_off / (stripe_unit_size *
6409 			    dp->nfsdi_stripecnt)) * stripe_unit_size +
6410 			    rel_off % stripe_unit_size;
6411 		} else {
6412 			/* Sparse layout. */
6413 			if (flp->nfsfl_fhcnt > 1) {
6414 				if (stripe_index >= flp->nfsfl_fhcnt)
6415 					return (EIO);
6416 				fhp = flp->nfsfl_fh[stripe_index];
6417 			} else if (flp->nfsfl_fhcnt == 1)
6418 				fhp = flp->nfsfl_fh[0];
6419 			else
6420 				fhp = np->n_fhp;
6421 			io_off = off;
6422 		}
6423 		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6424 			commit_thru_mds = 1;
6425 			if (docommit != 0)
6426 				error = EIO;
6427 		} else {
6428 			commit_thru_mds = 0;
6429 			NFSLOCKNODE(np);
6430 			np->n_flag |= NDSCOMMIT;
6431 			NFSUNLOCKNODE(np);
6432 		}
6433 		if (docommit != 0) {
6434 			if (error == 0)
6435 				error = nfsrpc_commitds(vp, io_off, xfer,
6436 				    *dspp, fhp, NFS_VER4, minorvers, cred, p);
6437 			if (error == 0) {
6438 				/*
6439 				 * Set both eof and uio_resid = 0 to end any
6440 				 * loops.
6441 				 */
6442 				*eofp = 1;
6443 				uiop->uio_resid = 0;
6444 			} else {
6445 				NFSLOCKNODE(np);
6446 				np->n_flag &= ~NDSCOMMIT;
6447 				NFSUNLOCKNODE(np);
6448 			}
6449 		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
6450 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6451 			    io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6452 		else {
6453 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6454 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6455 			    0, NFS_VER4, minorvers, cred, p);
6456 			if (error == 0) {
6457 				NFSLOCKCLSTATE();
6458 				lyp->nfsly_flags |= NFSLY_WRITTEN;
6459 				NFSUNLOCKCLSTATE();
6460 			}
6461 		}
6462 		if (error == 0) {
6463 			transfer = stripe_unit_size;
6464 			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6465 			len -= xfer;
6466 			off += xfer;
6467 		}
6468 	}
6469 	return (error);
6470 }
6471 
6472 /*
6473  * Do I/O using an NFSv4.1 flex file layout.
6474  */
6475 static int
6476 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6477     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6478     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6479     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6480     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6481 {
6482 	uint64_t xfer;
6483 	int error;
6484 	struct nfsnode *np;
6485 	struct nfsfh *fhp;
6486 	struct nfsclds **dspp;
6487 	struct ucred *tcred;
6488 	struct mbuf *m, *m2;
6489 	uint32_t copylen;
6490 
6491 	np = VTONFS(vp);
6492 	error = 0;
6493 	NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6494 	    (uintmax_t)len);
6495 	/* Loop around, doing I/O for each stripe unit. */
6496 	while (len > 0 && error == 0) {
6497 		dspp = nfsfldi_addr(dp, 0);
6498 		fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6499 		stateidp = &flp->nfsfl_ffm[mirror].st;
6500 		NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6501 		    mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6502 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6503 			tcred = NFSNEWCRED(cred);
6504 			tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6505 			tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6506 			tcred->cr_ngroups = 1;
6507 		} else
6508 			tcred = cred;
6509 		if (rwflag == NFSV4OPEN_ACCESSREAD)
6510 			copylen = dp->nfsdi_rsize;
6511 		else {
6512 			copylen = dp->nfsdi_wsize;
6513 			if (len > copylen && mp != NULL) {
6514 				/*
6515 				 * When a mirrored configuration needs to do
6516 				 * multiple writes to each mirror, all writes
6517 				 * except the last one must be a multiple of
6518 				 * 4 bytes.  This is required so that the XDR
6519 				 * does not need padding.
6520 				 * If possible, clip the size to an exact
6521 				 * multiple of the mbuf length, so that the
6522 				 * split will be on an mbuf boundary.
6523 				 */
6524 				copylen &= 0xfffffffc;
6525 				if (copylen > mp->m_len)
6526 					copylen = copylen / mp->m_len *
6527 					    mp->m_len;
6528 			}
6529 		}
6530 		NFSLOCKNODE(np);
6531 		np->n_flag |= NDSCOMMIT;
6532 		NFSUNLOCKNODE(np);
6533 		if (len > copylen && docommit == 0)
6534 			xfer = copylen;
6535 		else
6536 			xfer = len;
6537 		if (docommit != 0) {
6538 			if (error == 0) {
6539 				/*
6540 				 * Do last mirrored DS commit with this thread.
6541 				 */
6542 				if (mirror < flp->nfsfl_mirrorcnt - 1)
6543 					error = nfsio_commitds(vp, off, xfer,
6544 					    *dspp, fhp, dp->nfsdi_vers,
6545 					    dp->nfsdi_minorvers, drpc, tcred,
6546 					    p);
6547 				else
6548 					error = nfsrpc_commitds(vp, off, xfer,
6549 					    *dspp, fhp, dp->nfsdi_vers,
6550 					    dp->nfsdi_minorvers, tcred, p);
6551 				NFSCL_DEBUG(4, "commitds=%d\n", error);
6552 				if (error != 0 && error != EACCES && error !=
6553 				    ESTALE) {
6554 					NFSCL_DEBUG(4,
6555 					    "DS layreterr for commit\n");
6556 					nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6557 					    lyp, *dspp);
6558 				}
6559 			}
6560 			NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6561 			if (error == 0) {
6562 				/*
6563 				 * Set both eof and uio_resid = 0 to end any
6564 				 * loops.
6565 				 */
6566 				*eofp = 1;
6567 				uiop->uio_resid = 0;
6568 			} else {
6569 				NFSLOCKNODE(np);
6570 				np->n_flag &= ~NDSCOMMIT;
6571 				NFSUNLOCKNODE(np);
6572 			}
6573 		} else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6574 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6575 			    off, xfer, fhp, 1, dp->nfsdi_vers,
6576 			    dp->nfsdi_minorvers, tcred, p);
6577 			NFSCL_DEBUG(4, "readds=%d\n", error);
6578 			if (error != 0 && error != EACCES && error != ESTALE) {
6579 				NFSCL_DEBUG(4, "DS layreterr for read\n");
6580 				nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6581 				    *dspp);
6582 			}
6583 		} else {
6584 			if (flp->nfsfl_mirrorcnt == 1) {
6585 				error = nfsrpc_writeds(vp, uiop, iomode,
6586 				    must_commit, stateidp, *dspp, off, xfer,
6587 				    fhp, 0, 1, dp->nfsdi_vers,
6588 				    dp->nfsdi_minorvers, tcred, p);
6589 				if (error == 0) {
6590 					NFSLOCKCLSTATE();
6591 					lyp->nfsly_flags |= NFSLY_WRITTEN;
6592 					NFSUNLOCKCLSTATE();
6593 				}
6594 			} else {
6595 				m = mp;
6596 				if (xfer < len) {
6597 					/* The mbuf list must be split. */
6598 					m2 = nfsm_split(mp, xfer);
6599 					if (m2 != NULL)
6600 						mp = m2;
6601 					else {
6602 						m_freem(mp);
6603 						error = EIO;
6604 					}
6605 				}
6606 				NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6607 				    (uintmax_t)len, (uintmax_t)xfer);
6608 				/*
6609 				 * Do last write to a mirrored DS with this
6610 				 * thread.
6611 				 */
6612 				if (error == 0) {
6613 					if (mirror < flp->nfsfl_mirrorcnt - 1)
6614 						error = nfsio_writedsmir(vp,
6615 						    iomode, must_commit,
6616 						    stateidp, *dspp, off,
6617 						    xfer, fhp, m,
6618 						    dp->nfsdi_vers,
6619 						    dp->nfsdi_minorvers, drpc,
6620 						    tcred, p);
6621 					else
6622 						error = nfsrpc_writedsmir(vp,
6623 						    iomode, must_commit,
6624 						    stateidp, *dspp, off,
6625 						    xfer, fhp, m,
6626 						    dp->nfsdi_vers,
6627 						    dp->nfsdi_minorvers, tcred,
6628 						    p);
6629 				}
6630 				NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6631 				if (error != 0 && error != EACCES && error !=
6632 				    ESTALE) {
6633 					NFSCL_DEBUG(4,
6634 					    "DS layreterr for write\n");
6635 					nfscl_dserr(NFSV4OP_WRITE, error, dp,
6636 					    lyp, *dspp);
6637 				}
6638 			}
6639 		}
6640 		NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6641 		if (error == 0) {
6642 			len -= xfer;
6643 			off += xfer;
6644 		}
6645 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6646 			NFSFREECRED(tcred);
6647 	}
6648 	NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6649 	return (error);
6650 }
6651 
6652 /*
6653  * The actual read RPC done to a DS.
6654  */
6655 static int
6656 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6657     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6658     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6659 {
6660 	uint32_t *tl;
6661 	int attrflag, error, retlen;
6662 	struct nfsrv_descript nfsd;
6663 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6664 	struct nfsrv_descript *nd = &nfsd;
6665 	struct nfssockreq *nrp;
6666 	struct nfsvattr na;
6667 
6668 	nd->nd_mrep = NULL;
6669 	if (vers == 0 || vers == NFS_VER4) {
6670 		nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6671 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6672 		    NULL);
6673 		vers = NFS_VER4;
6674 		NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6675 		if (flex != 0)
6676 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6677 		else
6678 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6679 	} else {
6680 		nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6681 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6682 		    NULL);
6683 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READ]);
6684 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READDS]);
6685 		NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6686 	}
6687 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6688 	txdr_hyper(io_off, tl);
6689 	*(tl + 2) = txdr_unsigned(len);
6690 	nrp = dsp->nfsclds_sockp;
6691 	NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6692 	if (nrp == NULL)
6693 		/* If NULL, use the MDS socket. */
6694 		nrp = &nmp->nm_sockreq;
6695 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6696 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6697 	NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6698 	    error);
6699 	if (error != 0)
6700 		return (error);
6701 	if (vers == NFS_VER3) {
6702 		error = nfscl_postop_attr(nd, &na, &attrflag);
6703 		NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6704 		if (error != 0)
6705 			goto nfsmout;
6706 	}
6707 	if (nd->nd_repstat != 0) {
6708 		error = nd->nd_repstat;
6709 		goto nfsmout;
6710 	}
6711 	if (vers == NFS_VER3) {
6712 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6713 		*eofp = fxdr_unsigned(int, *(tl + 1));
6714 	} else {
6715 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6716 		*eofp = fxdr_unsigned(int, *tl);
6717 	}
6718 	NFSM_STRSIZ(retlen, len);
6719 	NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6720 	error = nfsm_mbufuio(nd, uiop, retlen);
6721 nfsmout:
6722 	if (nd->nd_mrep != NULL)
6723 		m_freem(nd->nd_mrep);
6724 	return (error);
6725 }
6726 
6727 /*
6728  * The actual write RPC done to a DS.
6729  */
6730 static int
6731 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6732     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6733     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6734     struct ucred *cred, NFSPROC_T *p)
6735 {
6736 	uint32_t *tl;
6737 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6738 	int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6739 	int32_t backup;
6740 	struct nfsrv_descript nfsd;
6741 	struct nfsrv_descript *nd = &nfsd;
6742 	struct nfssockreq *nrp;
6743 	struct nfsvattr na;
6744 
6745 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6746 	nd->nd_mrep = NULL;
6747 	if (vers == 0 || vers == NFS_VER4) {
6748 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6749 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6750 		    NULL);
6751 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6752 		vers = NFS_VER4;
6753 		if (flex != 0)
6754 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6755 		else
6756 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6757 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6758 	} else {
6759 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6760 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6761 		    NULL);
6762 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6763 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6764 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6765 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6766 	}
6767 	txdr_hyper(io_off, tl);
6768 	tl += 2;
6769 	if (vers == NFS_VER3)
6770 		*tl++ = txdr_unsigned(len);
6771 	*tl++ = txdr_unsigned(*iomode);
6772 	*tl = txdr_unsigned(len);
6773 	nfsm_uiombuf(nd, uiop, len);
6774 	nrp = dsp->nfsclds_sockp;
6775 	if (nrp == NULL)
6776 		/* If NULL, use the MDS socket. */
6777 		nrp = &nmp->nm_sockreq;
6778 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6779 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6780 	NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6781 	    nd->nd_repstat);
6782 	if (error != 0)
6783 		return (error);
6784 	if (nd->nd_repstat != 0) {
6785 		/*
6786 		 * In case the rpc gets retried, roll
6787 		 * the uio fields changed by nfsm_uiombuf()
6788 		 * back.
6789 		 */
6790 		uiop->uio_offset -= len;
6791 		uiop->uio_resid += len;
6792 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6793 		uiop->uio_iov->iov_len += len;
6794 		error = nd->nd_repstat;
6795 	} else {
6796 		if (vers == NFS_VER3) {
6797 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6798 			    NULL);
6799 			NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6800 			if (error != 0)
6801 				goto nfsmout;
6802 		}
6803 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6804 		rlen = fxdr_unsigned(int, *tl++);
6805 		NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6806 		if (rlen == 0) {
6807 			error = NFSERR_IO;
6808 			goto nfsmout;
6809 		} else if (rlen < len) {
6810 			backup = len - rlen;
6811 			uiop->uio_iov->iov_base =
6812 			    (char *)uiop->uio_iov->iov_base - backup;
6813 			uiop->uio_iov->iov_len += backup;
6814 			uiop->uio_offset -= backup;
6815 			uiop->uio_resid += backup;
6816 			len = rlen;
6817 		}
6818 		commit = fxdr_unsigned(int, *tl++);
6819 
6820 		/*
6821 		 * Return the lowest commitment level
6822 		 * obtained by any of the RPCs.
6823 		 */
6824 		if (committed == NFSWRITE_FILESYNC)
6825 			committed = commit;
6826 		else if (committed == NFSWRITE_DATASYNC &&
6827 		    commit == NFSWRITE_UNSTABLE)
6828 			committed = commit;
6829 		if (commit_thru_mds != 0) {
6830 			NFSLOCKMNT(nmp);
6831 			if (!NFSHASWRITEVERF(nmp)) {
6832 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6833 				NFSSETWRITEVERF(nmp);
6834 			} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF) &&
6835 			    *must_commit != 2) {
6836 				*must_commit = 1;
6837 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6838 			}
6839 			NFSUNLOCKMNT(nmp);
6840 		} else {
6841 			NFSLOCKDS(dsp);
6842 			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6843 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6844 				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6845 			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
6846 			    *must_commit != 2) {
6847 				*must_commit = 1;
6848 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6849 			}
6850 			NFSUNLOCKDS(dsp);
6851 		}
6852 	}
6853 nfsmout:
6854 	if (nd->nd_mrep != NULL)
6855 		m_freem(nd->nd_mrep);
6856 	*iomode = committed;
6857 	if (nd->nd_repstat != 0 && error == 0)
6858 		error = nd->nd_repstat;
6859 	return (error);
6860 }
6861 
6862 /*
6863  * The actual write RPC done to a DS.
6864  * This variant is called from a separate kernel process for mirrors.
6865  * Any short write is considered an IO error.
6866  */
6867 static int
6868 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6869     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6870     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6871     struct ucred *cred, NFSPROC_T *p)
6872 {
6873 	uint32_t *tl;
6874 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6875 	int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6876 	struct nfsrv_descript nfsd;
6877 	struct nfsrv_descript *nd = &nfsd;
6878 	struct nfssockreq *nrp;
6879 	struct nfsvattr na;
6880 
6881 	nd->nd_mrep = NULL;
6882 	if (vers == 0 || vers == NFS_VER4) {
6883 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6884 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6885 		    NULL);
6886 		vers = NFS_VER4;
6887 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6888 		    minorvers);
6889 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6890 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6891 	} else {
6892 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6893 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6894 		    NULL);
6895 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6896 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6897 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6898 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6899 	}
6900 	txdr_hyper(io_off, tl);
6901 	tl += 2;
6902 	if (vers == NFS_VER3)
6903 		*tl++ = txdr_unsigned(len);
6904 	*tl++ = txdr_unsigned(*iomode);
6905 	*tl = txdr_unsigned(len);
6906 	if (len > 0) {
6907 		/* Put data in mbuf chain. */
6908 		nd->nd_mb->m_next = m;
6909 	}
6910 	nrp = dsp->nfsclds_sockp;
6911 	if (nrp == NULL)
6912 		/* If NULL, use the MDS socket. */
6913 		nrp = &nmp->nm_sockreq;
6914 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6915 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6916 	NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6917 	    nd->nd_repstat);
6918 	if (error != 0)
6919 		return (error);
6920 	if (nd->nd_repstat != 0)
6921 		error = nd->nd_repstat;
6922 	else {
6923 		if (vers == NFS_VER3) {
6924 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6925 			    NULL);
6926 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6927 			    error);
6928 			if (error != 0)
6929 				goto nfsmout;
6930 		}
6931 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6932 		rlen = fxdr_unsigned(int, *tl++);
6933 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6934 		    rlen);
6935 		if (rlen != len) {
6936 			error = NFSERR_IO;
6937 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6938 			    len, rlen);
6939 			goto nfsmout;
6940 		}
6941 		commit = fxdr_unsigned(int, *tl++);
6942 
6943 		/*
6944 		 * Return the lowest commitment level
6945 		 * obtained by any of the RPCs.
6946 		 */
6947 		if (committed == NFSWRITE_FILESYNC)
6948 			committed = commit;
6949 		else if (committed == NFSWRITE_DATASYNC &&
6950 		    commit == NFSWRITE_UNSTABLE)
6951 			committed = commit;
6952 		NFSLOCKDS(dsp);
6953 		if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6954 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6955 			dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6956 		} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
6957 		    *must_commit != 2) {
6958 			*must_commit = 1;
6959 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6960 		}
6961 		NFSUNLOCKDS(dsp);
6962 	}
6963 nfsmout:
6964 	if (nd->nd_mrep != NULL)
6965 		m_freem(nd->nd_mrep);
6966 	*iomode = committed;
6967 	if (nd->nd_repstat != 0 && error == 0)
6968 		error = nd->nd_repstat;
6969 	return (error);
6970 }
6971 
6972 /*
6973  * Start up the thread that will execute nfsrpc_writedsmir().
6974  */
6975 static void
6976 start_writedsmir(void *arg, int pending)
6977 {
6978 	struct nfsclwritedsdorpc *drpc;
6979 
6980 	drpc = (struct nfsclwritedsdorpc *)arg;
6981 	drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
6982 	    &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
6983 	    drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
6984 	    drpc->p);
6985 	drpc->done = 1;
6986 	crfree(drpc->cred);
6987 	NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
6988 }
6989 
6990 /*
6991  * Set up the write DS mirror call for the pNFS I/O thread.
6992  */
6993 static int
6994 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6995     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
6996     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6997     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6998 {
6999 	int error, ret;
7000 
7001 	error = 0;
7002 	drpc->done = 0;
7003 	drpc->vp = vp;
7004 	drpc->iomode = *iomode;
7005 	drpc->must_commit = *must_commit;
7006 	drpc->stateidp = stateidp;
7007 	drpc->dsp = dsp;
7008 	drpc->off = off;
7009 	drpc->len = len;
7010 	drpc->fhp = fhp;
7011 	drpc->m = m;
7012 	drpc->vers = vers;
7013 	drpc->minorvers = minorvers;
7014 	drpc->cred = crhold(cred);
7015 	drpc->p = p;
7016 	drpc->inprog = 0;
7017 	ret = EIO;
7018 	if (nfs_pnfsiothreads != 0) {
7019 		ret = nfs_pnfsio(start_writedsmir, drpc);
7020 		NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
7021 	}
7022 	if (ret != 0) {
7023 		error = nfsrpc_writedsmir(vp, iomode, &drpc->must_commit,
7024 		    stateidp, dsp, off, len, fhp, m, vers, minorvers, cred, p);
7025 		crfree(drpc->cred);
7026 	}
7027 	NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
7028 	return (error);
7029 }
7030 
7031 /*
7032  * Free up the nfsclds structure.
7033  */
7034 void
7035 nfscl_freenfsclds(struct nfsclds *dsp)
7036 {
7037 	int i;
7038 
7039 	if (dsp == NULL)
7040 		return;
7041 	if (dsp->nfsclds_sockp != NULL) {
7042 		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
7043 		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
7044 		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
7045 		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
7046 	}
7047 	NFSFREEMUTEX(&dsp->nfsclds_mtx);
7048 	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
7049 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
7050 		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
7051 			m_freem(
7052 			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
7053 	}
7054 	free(dsp, M_NFSCLDS);
7055 }
7056 
7057 static enum nfsclds_state
7058 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
7059     struct nfsclds **retdspp, uint32_t *sequencep)
7060 {
7061 	struct nfsclds *dsp;
7062 	int fndseq;
7063 
7064 	/*
7065 	 * Search the list of nfsclds structures for one with the same
7066 	 * server.
7067 	 */
7068 	fndseq = 0;
7069 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
7070 		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
7071 		    dsp->nfsclds_servownlen != 0 &&
7072 		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
7073 		    dsp->nfsclds_servownlen) &&
7074 		    dsp->nfsclds_sess.nfsess_defunct == 0) {
7075 			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
7076 			    TAILQ_FIRST(&nmp->nm_sess), dsp,
7077 			    dsp->nfsclds_flags);
7078 			if (fndseq == 0) {
7079 				/* Get sequenceid# from first entry. */
7080 				*sequencep =
7081 				    dsp->nfsclds_sess.nfsess_sequenceid;
7082 				fndseq = 1;
7083 			}
7084 			/* Server major id matches. */
7085 			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
7086 				*retdspp = dsp;
7087 				return (NFSDSP_USETHISSESSION);
7088 			}
7089 		}
7090 	}
7091 	if (fndseq != 0)
7092 		return (NFSDSP_SEQTHISSESSION);
7093 	return (NFSDSP_NOTFOUND);
7094 }
7095 
7096 /*
7097  * NFS commit rpc to a NFSv4.1 DS.
7098  */
7099 static int
7100 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7101     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
7102     NFSPROC_T *p)
7103 {
7104 	uint32_t *tl;
7105 	struct nfsrv_descript nfsd, *nd = &nfsd;
7106 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7107 	struct nfssockreq *nrp;
7108 	struct nfsvattr na;
7109 	int attrflag, error;
7110 
7111 	nd->nd_mrep = NULL;
7112 	if (vers == 0 || vers == NFS_VER4) {
7113 		nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
7114 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7115 		    NULL);
7116 		vers = NFS_VER4;
7117 	} else {
7118 		nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
7119 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7120 		    NULL);
7121 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMIT]);
7122 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMITDS]);
7123 	}
7124 	NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
7125 	    minorvers);
7126 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7127 	txdr_hyper(offset, tl);
7128 	tl += 2;
7129 	*tl = txdr_unsigned(cnt);
7130 	nrp = dsp->nfsclds_sockp;
7131 	if (nrp == NULL)
7132 		/* If NULL, use the MDS socket. */
7133 		nrp = &nmp->nm_sockreq;
7134 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7135 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7136 	NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
7137 	    nd->nd_repstat);
7138 	if (error != 0)
7139 		return (error);
7140 	if (nd->nd_repstat == 0) {
7141 		if (vers == NFS_VER3) {
7142 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7143 			    NULL);
7144 			NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
7145 			if (error != 0)
7146 				goto nfsmout;
7147 		}
7148 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
7149 		NFSLOCKDS(dsp);
7150 		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
7151 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7152 			error = NFSERR_STALEWRITEVERF;
7153 		}
7154 		NFSUNLOCKDS(dsp);
7155 	}
7156 nfsmout:
7157 	if (error == 0 && nd->nd_repstat != 0)
7158 		error = nd->nd_repstat;
7159 	m_freem(nd->nd_mrep);
7160 	return (error);
7161 }
7162 
7163 /*
7164  * Start up the thread that will execute nfsrpc_commitds().
7165  */
7166 static void
7167 start_commitds(void *arg, int pending)
7168 {
7169 	struct nfsclwritedsdorpc *drpc;
7170 
7171 	drpc = (struct nfsclwritedsdorpc *)arg;
7172 	drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
7173 	    drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
7174 	    drpc->p);
7175 	drpc->done = 1;
7176 	crfree(drpc->cred);
7177 	NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
7178 }
7179 
7180 /*
7181  * Set up the commit DS mirror call for the pNFS I/O thread.
7182  */
7183 static int
7184 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7185     struct nfsfh *fhp, int vers, int minorvers,
7186     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7187 {
7188 	int error, ret;
7189 
7190 	error = 0;
7191 	drpc->done = 0;
7192 	drpc->vp = vp;
7193 	drpc->off = offset;
7194 	drpc->len = cnt;
7195 	drpc->dsp = dsp;
7196 	drpc->fhp = fhp;
7197 	drpc->vers = vers;
7198 	drpc->minorvers = minorvers;
7199 	drpc->cred = crhold(cred);
7200 	drpc->p = p;
7201 	drpc->inprog = 0;
7202 	ret = EIO;
7203 	if (nfs_pnfsiothreads != 0) {
7204 		ret = nfs_pnfsio(start_commitds, drpc);
7205 		NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
7206 	}
7207 	if (ret != 0) {
7208 		error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
7209 		    minorvers, cred, p);
7210 		crfree(drpc->cred);
7211 	}
7212 	NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
7213 	return (error);
7214 }
7215 
7216 /*
7217  * NFS Advise rpc
7218  */
7219 int
7220 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
7221     struct ucred *cred, NFSPROC_T *p)
7222 {
7223 	u_int32_t *tl;
7224 	struct nfsrv_descript nfsd, *nd = &nfsd;
7225 	nfsattrbit_t hints;
7226 	int error;
7227 
7228 	NFSZERO_ATTRBIT(&hints);
7229 	if (advise == POSIX_FADV_WILLNEED)
7230 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7231 	else if (advise == POSIX_FADV_DONTNEED)
7232 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7233 	else
7234 		return (0);
7235 	NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp, cred);
7236 	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7237 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
7238 	txdr_hyper(offset, tl);
7239 	tl += 2;
7240 	txdr_hyper(cnt, tl);
7241 	nfsrv_putattrbit(nd, &hints);
7242 	error = nfscl_request(nd, vp, p, cred);
7243 	if (error != 0)
7244 		return (error);
7245 	if (nd->nd_repstat != 0)
7246 		error = nd->nd_repstat;
7247 	m_freem(nd->nd_mrep);
7248 	return (error);
7249 }
7250 
7251 #ifdef notyet
7252 /*
7253  * NFS advise rpc to a NFSv4.2 DS.
7254  */
7255 static int
7256 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7257     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7258     struct ucred *cred, NFSPROC_T *p)
7259 {
7260 	uint32_t *tl;
7261 	struct nfsrv_descript nfsd, *nd = &nfsd;
7262 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7263 	struct nfssockreq *nrp;
7264 	nfsattrbit_t hints;
7265 	int error;
7266 
7267 	/* For NFS DSs prior to NFSv4.2, just return OK. */
7268 	if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
7269 		return (0);
7270 	NFSZERO_ATTRBIT(&hints);
7271 	if (advise == POSIX_FADV_WILLNEED)
7272 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7273 	else if (advise == POSIX_FADV_DONTNEED)
7274 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7275 	else
7276 		return (0);
7277 	nd->nd_mrep = NULL;
7278 	nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
7279 	    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers, NULL);
7280 	vers = NFS_VER4;
7281 	NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
7282 	    minorvers);
7283 	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7284 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7285 	txdr_hyper(offset, tl);
7286 	tl += 2;
7287 	*tl = txdr_unsigned(cnt);
7288 	nfsrv_putattrbit(nd, &hints);
7289 	nrp = dsp->nfsclds_sockp;
7290 	if (nrp == NULL)
7291 		/* If NULL, use the MDS socket. */
7292 		nrp = &nmp->nm_sockreq;
7293 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7294 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7295 	NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
7296 	    nd->nd_repstat);
7297 	if (error != 0)
7298 		return (error);
7299 	if (nd->nd_repstat != 0)
7300 		error = nd->nd_repstat;
7301 	m_freem(nd->nd_mrep);
7302 	return (error);
7303 }
7304 
7305 /*
7306  * Start up the thread that will execute nfsrpc_commitds().
7307  */
7308 static void
7309 start_adviseds(void *arg, int pending)
7310 {
7311 	struct nfsclwritedsdorpc *drpc;
7312 
7313 	drpc = (struct nfsclwritedsdorpc *)arg;
7314 	drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
7315 	    drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
7316 	    drpc->cred, drpc->p);
7317 	drpc->done = 1;
7318 	crfree(drpc->cred);
7319 	NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
7320 }
7321 
7322 /*
7323  * Set up the advise DS mirror call for the pNFS I/O thread.
7324  */
7325 static int
7326 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7327     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7328     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7329 {
7330 	int error, ret;
7331 
7332 	error = 0;
7333 	drpc->done = 0;
7334 	drpc->vp = vp;
7335 	drpc->off = offset;
7336 	drpc->len = cnt;
7337 	drpc->advise = advise;
7338 	drpc->dsp = dsp;
7339 	drpc->fhp = fhp;
7340 	drpc->vers = vers;
7341 	drpc->minorvers = minorvers;
7342 	drpc->cred = crhold(cred);
7343 	drpc->p = p;
7344 	drpc->inprog = 0;
7345 	ret = EIO;
7346 	if (nfs_pnfsiothreads != 0) {
7347 		ret = nfs_pnfsio(start_adviseds, drpc);
7348 		NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
7349 	}
7350 	if (ret != 0) {
7351 		error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
7352 		    minorvers, cred, p);
7353 		crfree(drpc->cred);
7354 	}
7355 	NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
7356 	return (error);
7357 }
7358 #endif	/* notyet */
7359 
7360 /*
7361  * Do the Allocate operation, retrying for recovery.
7362  */
7363 int
7364 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
7365     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7366 {
7367 	int error, expireret = 0, retrycnt, nostateid;
7368 	uint32_t clidrev = 0;
7369 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7370 	struct nfsfh *nfhp = NULL;
7371 	nfsv4stateid_t stateid;
7372 	off_t tmp_off;
7373 	void *lckp;
7374 
7375 	if (len < 0)
7376 		return (EINVAL);
7377 	if (len == 0)
7378 		return (0);
7379 	tmp_off = off + len;
7380 	NFSLOCKMNT(nmp);
7381 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
7382 		NFSUNLOCKMNT(nmp);
7383 		return (EFBIG);
7384 	}
7385 	if (nmp->nm_clp != NULL)
7386 		clidrev = nmp->nm_clp->nfsc_clientidrev;
7387 	NFSUNLOCKMNT(nmp);
7388 	nfhp = VTONFS(vp)->n_fhp;
7389 	retrycnt = 0;
7390 	do {
7391 		lckp = NULL;
7392 		nostateid = 0;
7393 		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7394 		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7395 		if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7396 		    stateid.other[2] == 0) {
7397 			nostateid = 1;
7398 			NFSCL_DEBUG(1, "stateid0 in allocate\n");
7399 		}
7400 
7401 		/*
7402 		 * Not finding a stateid should probably never happen,
7403 		 * but just return an error for this case.
7404 		 */
7405 		if (nostateid != 0)
7406 			error = EIO;
7407 		else
7408 			error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7409 			    nap, attrflagp, cred, p);
7410 		if (error == NFSERR_STALESTATEID)
7411 			nfscl_initiate_recovery(nmp->nm_clp);
7412 		if (lckp != NULL)
7413 			nfscl_lockderef(lckp);
7414 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7415 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7416 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7417 			(void) nfs_catnap(PZERO, error, "nfs_allocate");
7418 		} else if ((error == NFSERR_EXPIRED ||
7419 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
7420 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7421 		}
7422 		retrycnt++;
7423 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7424 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7425 	    error == NFSERR_STALEDONTRECOVER ||
7426 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7427 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7428 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
7429 	if (error != 0 && retrycnt >= 4)
7430 		error = EIO;
7431 	return (error);
7432 }
7433 
7434 /*
7435  * The allocate RPC.
7436  */
7437 static int
7438 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7439     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7440 {
7441 	uint32_t *tl;
7442 	int error;
7443 	struct nfsrv_descript nfsd;
7444 	struct nfsrv_descript *nd = &nfsd;
7445 	nfsattrbit_t attrbits;
7446 
7447 	*attrflagp = 0;
7448 	NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp, cred);
7449 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7450 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7451 	txdr_hyper(off, tl); tl += 2;
7452 	txdr_hyper(len, tl); tl += 2;
7453 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7454 	NFSGETATTR_ATTRBIT(&attrbits);
7455 	nfsrv_putattrbit(nd, &attrbits);
7456 	error = nfscl_request(nd, vp, p, cred);
7457 	if (error != 0)
7458 		return (error);
7459 	if (nd->nd_repstat == 0) {
7460 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7461 		error = nfsm_loadattr(nd, nap);
7462 		if (error == 0)
7463 			*attrflagp = NFS_LATTR_NOSHRINK;
7464 	} else
7465 		error = nd->nd_repstat;
7466 nfsmout:
7467 	m_freem(nd->nd_mrep);
7468 	return (error);
7469 }
7470 
7471 /*
7472  * Set up the XDR arguments for the LayoutGet operation.
7473  */
7474 static void
7475 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7476     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7477     int layoutlen, int usecurstateid)
7478 {
7479 	uint32_t *tl;
7480 
7481 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7482 	    NFSX_STATEID);
7483 	*tl++ = newnfs_false;		/* Don't signal availability. */
7484 	*tl++ = txdr_unsigned(layouttype);
7485 	*tl++ = txdr_unsigned(iomode);
7486 	txdr_hyper(offset, tl);
7487 	tl += 2;
7488 	txdr_hyper(len, tl);
7489 	tl += 2;
7490 	txdr_hyper(minlen, tl);
7491 	tl += 2;
7492 	if (usecurstateid != 0) {
7493 		/* Special stateid for Current stateid. */
7494 		*tl++ = txdr_unsigned(1);
7495 		*tl++ = 0;
7496 		*tl++ = 0;
7497 		*tl++ = 0;
7498 	} else {
7499 		*tl++ = txdr_unsigned(stateidp->seqid);
7500 		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7501 		*tl++ = stateidp->other[0];
7502 		*tl++ = stateidp->other[1];
7503 		*tl++ = stateidp->other[2];
7504 	}
7505 	*tl = txdr_unsigned(layoutlen);
7506 }
7507 
7508 /*
7509  * Parse the reply for a successful LayoutGet operation.
7510  */
7511 static int
7512 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7513     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7514 {
7515 	uint32_t *tl;
7516 	struct nfsclflayout *flp, *prevflp, *tflp;
7517 	int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7518 	int m, mirrorcnt;
7519 	uint64_t retlen, off;
7520 	struct nfsfh *nfhp;
7521 	uint8_t *cp;
7522 	uid_t user;
7523 	gid_t grp;
7524 
7525 	NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7526 	error = 0;
7527 	flp = NULL;
7528 	gotiomode = -1;
7529 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7530 	if (*tl++ != 0)
7531 		*retonclosep = 1;
7532 	else
7533 		*retonclosep = 0;
7534 	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7535 	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7536 	    (int)stateidp->seqid);
7537 	stateidp->other[0] = *tl++;
7538 	stateidp->other[1] = *tl++;
7539 	stateidp->other[2] = *tl++;
7540 	cnt = fxdr_unsigned(int, *tl);
7541 	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7542 	if (cnt <= 0 || cnt > 10000) {
7543 		/* Don't accept more than 10000 layouts in reply. */
7544 		error = NFSERR_BADXDR;
7545 		goto nfsmout;
7546 	}
7547 	for (i = 0; i < cnt; i++) {
7548 		/* Dissect to the layout type. */
7549 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7550 		    3 * NFSX_UNSIGNED);
7551 		off = fxdr_hyper(tl); tl += 2;
7552 		retlen = fxdr_hyper(tl); tl += 2;
7553 		iomode = fxdr_unsigned(int, *tl++);
7554 		laytype = fxdr_unsigned(int, *tl);
7555 		NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7556 		    (uintmax_t)off, (uintmax_t)retlen, iomode);
7557 		/* Ignore length of layout body for now. */
7558 		if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7559 			/* Parse the File layout up to fhcnt. */
7560 			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7561 			    NFSX_HYPER + NFSX_V4DEVICEID);
7562 			fhcnt = fxdr_unsigned(int, *(tl + 4 +
7563 			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
7564 			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7565 			if (fhcnt < 0 || fhcnt > 100) {
7566 				/* Don't accept more than 100 file handles. */
7567 				error = NFSERR_BADXDR;
7568 				goto nfsmout;
7569 			}
7570 			if (fhcnt > 0)
7571 				flp = malloc(sizeof(*flp) + fhcnt *
7572 				    sizeof(struct nfsfh *), M_NFSFLAYOUT,
7573 				    M_WAITOK);
7574 			else
7575 				flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7576 				    M_WAITOK);
7577 			flp->nfsfl_flags = NFSFL_FILE;
7578 			flp->nfsfl_fhcnt = 0;
7579 			flp->nfsfl_devp = NULL;
7580 			flp->nfsfl_off = off;
7581 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7582 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7583 			else
7584 				flp->nfsfl_end = flp->nfsfl_off + retlen;
7585 			flp->nfsfl_iomode = iomode;
7586 			if (gotiomode == -1)
7587 				gotiomode = flp->nfsfl_iomode;
7588 			/* Ignore layout body length for now. */
7589 			NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7590 			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7591 			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7592 			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7593 			mtx_lock(&nmp->nm_mtx);
7594 			if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7595 			    NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7596 				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7597 			mtx_unlock(&nmp->nm_mtx);
7598 			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7599 			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7600 			NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7601 			    flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7602 			for (j = 0; j < fhcnt; j++) {
7603 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7604 				nfhlen = fxdr_unsigned(int, *tl);
7605 				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7606 					error = NFSERR_BADXDR;
7607 					goto nfsmout;
7608 				}
7609 				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7610 				    M_NFSFH, M_WAITOK);
7611 				flp->nfsfl_fh[j] = nfhp;
7612 				flp->nfsfl_fhcnt++;
7613 				nfhp->nfh_len = nfhlen;
7614 				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7615 				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7616 			}
7617 		} else if (laytype == NFSLAYOUT_FLEXFILE) {
7618 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7619 			    NFSX_HYPER);
7620 			mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7621 			NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7622 			if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7623 				error = NFSERR_BADXDR;
7624 				goto nfsmout;
7625 			}
7626 			flp = malloc(sizeof(*flp) + mirrorcnt *
7627 			    sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7628 			flp->nfsfl_flags = NFSFL_FLEXFILE;
7629 			flp->nfsfl_mirrorcnt = mirrorcnt;
7630 			for (j = 0; j < mirrorcnt; j++)
7631 				flp->nfsfl_ffm[j].devp = NULL;
7632 			flp->nfsfl_off = off;
7633 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7634 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7635 			else
7636 				flp->nfsfl_end = flp->nfsfl_off + retlen;
7637 			flp->nfsfl_iomode = iomode;
7638 			if (gotiomode == -1)
7639 				gotiomode = flp->nfsfl_iomode;
7640 			flp->nfsfl_stripeunit = fxdr_hyper(tl);
7641 			NFSCL_DEBUG(4, "stripeunit=%ju\n",
7642 			    (uintmax_t)flp->nfsfl_stripeunit);
7643 			for (j = 0; j < mirrorcnt; j++) {
7644 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7645 				k = fxdr_unsigned(int, *tl);
7646 				if (k < 1 || k > 128) {
7647 					error = NFSERR_BADXDR;
7648 					goto nfsmout;
7649 				}
7650 				NFSCL_DEBUG(4, "servercnt=%d\n", k);
7651 				for (l = 0; l < k; l++) {
7652 					NFSM_DISSECT(tl, uint32_t *,
7653 					    NFSX_V4DEVICEID + NFSX_STATEID +
7654 					    2 * NFSX_UNSIGNED);
7655 					if (l == 0) {
7656 						/* Just use the first server. */
7657 						NFSBCOPY(tl,
7658 						    flp->nfsfl_ffm[j].dev,
7659 						    NFSX_V4DEVICEID);
7660 						tl += (NFSX_V4DEVICEID /
7661 						    NFSX_UNSIGNED);
7662 						tl++;
7663 						flp->nfsfl_ffm[j].st.seqid =
7664 						    *tl++;
7665 						flp->nfsfl_ffm[j].st.other[0] =
7666 						    *tl++;
7667 						flp->nfsfl_ffm[j].st.other[1] =
7668 						    *tl++;
7669 						flp->nfsfl_ffm[j].st.other[2] =
7670 						    *tl++;
7671 						NFSCL_DEBUG(4, "st.seqid=%u "
7672 						 "st.o0=0x%x st.o1=0x%x "
7673 						 "st.o2=0x%x\n",
7674 						 flp->nfsfl_ffm[j].st.seqid,
7675 						 flp->nfsfl_ffm[j].st.other[0],
7676 						 flp->nfsfl_ffm[j].st.other[1],
7677 						 flp->nfsfl_ffm[j].st.other[2]);
7678 					} else
7679 						tl += ((NFSX_V4DEVICEID +
7680 						    NFSX_STATEID +
7681 						    NFSX_UNSIGNED) /
7682 						    NFSX_UNSIGNED);
7683 					fhcnt = fxdr_unsigned(int, *tl);
7684 					NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7685 					if (fhcnt < 1 ||
7686 					    fhcnt > NFSDEV_MAXVERS) {
7687 						error = NFSERR_BADXDR;
7688 						goto nfsmout;
7689 					}
7690 					for (m = 0; m < fhcnt; m++) {
7691 						NFSM_DISSECT(tl, uint32_t *,
7692 						    NFSX_UNSIGNED);
7693 						nfhlen = fxdr_unsigned(int,
7694 						    *tl);
7695 						NFSCL_DEBUG(4, "nfhlen=%d\n",
7696 						    nfhlen);
7697 						if (nfhlen <= 0 || nfhlen >
7698 						    NFSX_V4FHMAX) {
7699 							error = NFSERR_BADXDR;
7700 							goto nfsmout;
7701 						}
7702 						NFSM_DISSECT(cp, uint8_t *,
7703 						    NFSM_RNDUP(nfhlen));
7704 						if (l == 0) {
7705 							flp->nfsfl_ffm[j].fhcnt
7706 							    = fhcnt;
7707 							nfhp = malloc(
7708 							    sizeof(*nfhp) +
7709 							    nfhlen - 1, M_NFSFH,
7710 							    M_WAITOK);
7711 							flp->nfsfl_ffm[j].fh[m]
7712 							    = nfhp;
7713 							nfhp->nfh_len = nfhlen;
7714 							NFSBCOPY(cp,
7715 							    nfhp->nfh_fh,
7716 							    nfhlen);
7717 							NFSCL_DEBUG(4,
7718 							    "got fh\n");
7719 						}
7720 					}
7721 					/* Now, get the ffsd_user/ffds_group. */
7722 					error = nfsrv_parseug(nd, 0, &user,
7723 					    &grp, curthread);
7724 					NFSCL_DEBUG(4, "after parseu=%d\n",
7725 					    error);
7726 					if (error == 0)
7727 						error = nfsrv_parseug(nd, 1,
7728 						    &user, &grp, curthread);
7729 					NFSCL_DEBUG(4, "aft parseg=%d\n",
7730 					    grp);
7731 					if (error != 0)
7732 						goto nfsmout;
7733 					NFSCL_DEBUG(4, "user=%d group=%d\n",
7734 					    user, grp);
7735 					if (l == 0) {
7736 						flp->nfsfl_ffm[j].user = user;
7737 						flp->nfsfl_ffm[j].group = grp;
7738 						NFSCL_DEBUG(4,
7739 						    "usr=%d grp=%d\n", user,
7740 						    grp);
7741 					}
7742 				}
7743 			}
7744 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7745 			flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7746 #ifdef notnow
7747 			/*
7748 			 * At this time, there is no flag.
7749 			 * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7750 			 * added, or it may never exist?
7751 			 */
7752 			mtx_lock(&nmp->nm_mtx);
7753 			if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7754 			    NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7755 				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7756 			mtx_unlock(&nmp->nm_mtx);
7757 #endif
7758 			flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7759 			NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7760 			    flp->nfsfl_fflags, flp->nfsfl_statshint);
7761 		} else {
7762 			error = NFSERR_BADXDR;
7763 			goto nfsmout;
7764 		}
7765 		if (flp->nfsfl_iomode == gotiomode) {
7766 			/* Keep the list in increasing offset order. */
7767 			tflp = LIST_FIRST(flhp);
7768 			prevflp = NULL;
7769 			while (tflp != NULL &&
7770 			    tflp->nfsfl_off < flp->nfsfl_off) {
7771 				prevflp = tflp;
7772 				tflp = LIST_NEXT(tflp, nfsfl_list);
7773 			}
7774 			if (prevflp == NULL)
7775 				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7776 			else
7777 				LIST_INSERT_AFTER(prevflp, flp,
7778 				    nfsfl_list);
7779 			NFSCL_DEBUG(4, "flp inserted\n");
7780 		} else {
7781 			printf("nfscl_layoutget(): got wrong iomode\n");
7782 			nfscl_freeflayout(flp);
7783 		}
7784 		flp = NULL;
7785 	}
7786 nfsmout:
7787 	NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7788 	if (error != 0 && flp != NULL)
7789 		nfscl_freeflayout(flp);
7790 	return (error);
7791 }
7792 
7793 /*
7794  * Parse a user/group digit string.
7795  */
7796 static int
7797 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7798     NFSPROC_T *p)
7799 {
7800 	uint32_t *tl;
7801 	char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7802 	uint32_t len = 0;
7803 	int error = 0;
7804 
7805 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7806 	len = fxdr_unsigned(uint32_t, *tl);
7807 	str = NULL;
7808 	if (len > NFSV4_OPAQUELIMIT) {
7809 		error = NFSERR_BADXDR;
7810 		goto nfsmout;
7811 	}
7812 	NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7813 	if (len == 0) {
7814 		if (dogrp != 0)
7815 			*gidp = GID_NOGROUP;
7816 		else
7817 			*uidp = UID_NOBODY;
7818 		return (0);
7819 	}
7820 	if (len > NFSV4_SMALLSTR)
7821 		str = malloc(len + 1, M_TEMP, M_WAITOK);
7822 	else
7823 		str = str0;
7824 	NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7825 	NFSBCOPY(cp, str, len);
7826 	str[len] = '\0';
7827 	NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7828 	if (dogrp != 0)
7829 		error = nfsv4_strtogid(nd, str, len, gidp);
7830 	else
7831 		error = nfsv4_strtouid(nd, str, len, uidp);
7832 nfsmout:
7833 	if (len > NFSV4_SMALLSTR)
7834 		free(str, M_TEMP);
7835 	NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
7836 	return (error);
7837 }
7838 
7839 /*
7840  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
7841  * so that it does both an Open and a Layoutget.
7842  */
7843 static int
7844 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7845     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7846     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7847     struct ucred *cred, NFSPROC_T *p)
7848 {
7849 	struct nfscllayout *lyp;
7850 	struct nfsclflayout *flp;
7851 	struct nfsclflayouthead flh;
7852 	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
7853 	int layouttype, laystat;
7854 	nfsv4stateid_t stateid;
7855 	struct nfsclsession *tsep;
7856 
7857 	error = 0;
7858 	if (NFSHASFLEXFILE(nmp))
7859 		layouttype = NFSLAYOUT_FLEXFILE;
7860 	else
7861 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
7862 	/*
7863 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7864 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7865 	 * flp == NULL.
7866 	 */
7867 	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, mode, &flp,
7868 	    &recalled);
7869 	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7870 	if (lyp == NULL)
7871 		islocked = 0;
7872 	else if (flp != NULL)
7873 		islocked = 1;
7874 	else
7875 		islocked = 2;
7876 	if ((lyp == NULL || flp == NULL) && recalled == 0) {
7877 		LIST_INIT(&flh);
7878 		tsep = nfsmnt_mdssession(nmp);
7879 		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7880 		    3 * NFSX_UNSIGNED);
7881 		if (lyp == NULL)
7882 			usecurstateid = 1;
7883 		else {
7884 			usecurstateid = 0;
7885 			stateid.seqid = lyp->nfsly_stateid.seqid;
7886 			stateid.other[0] = lyp->nfsly_stateid.other[0];
7887 			stateid.other[1] = lyp->nfsly_stateid.other[1];
7888 			stateid.other[2] = lyp->nfsly_stateid.other[2];
7889 		}
7890 		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7891 		    newfhp, newfhlen, mode, op, name, namelen,
7892 		    dpp, &stateid, usecurstateid, layouttype, layoutlen,
7893 		    &retonclose, &flh, &laystat, cred, p);
7894 		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7895 		    laystat, error);
7896 		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7897 		    &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7898 		    &islocked, cred, p);
7899 	} else
7900 		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7901 		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7902 	if (islocked == 2)
7903 		nfscl_rellayout(lyp, 1);
7904 	else if (islocked == 1)
7905 		nfscl_rellayout(lyp, 0);
7906 	return (error);
7907 }
7908 
7909 /*
7910  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7911  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7912  * handled by nfsrpc_openrpc().
7913  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7914  * can be NULL.
7915  */
7916 static int
7917 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7918     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7919     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7920     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7921     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7922     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7923 {
7924 	uint32_t *tl;
7925 	struct nfsrv_descript nfsd, *nd = &nfsd;
7926 	struct nfscldeleg *ndp = NULL;
7927 	struct nfsvattr nfsva;
7928 	struct nfsclsession *tsep;
7929 	uint32_t rflags, deleg;
7930 	nfsattrbit_t attrbits;
7931 	int error, ret, acesize, limitby, iomode;
7932 
7933 	*dpp = NULL;
7934 	*laystatp = ENXIO;
7935 	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
7936 	    0, 0, cred);
7937 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
7938 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
7939 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
7940 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
7941 	tsep = nfsmnt_mdssession(nmp);
7942 	*tl++ = tsep->nfsess_clientid.lval[0];
7943 	*tl = tsep->nfsess_clientid.lval[1];
7944 	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7945 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7946 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
7947 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7948 	nfsm_strtom(nd, name, namelen);
7949 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7950 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7951 	NFSZERO_ATTRBIT(&attrbits);
7952 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7953 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7954 	nfsrv_putattrbit(nd, &attrbits);
7955 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7956 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7957 	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
7958 		iomode = NFSLAYOUTIOMODE_RW;
7959 	else
7960 		iomode = NFSLAYOUTIOMODE_READ;
7961 	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
7962 	    layouttype, layoutlen, usecurstateid);
7963 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
7964 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7965 	if (error != 0)
7966 		return (error);
7967 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
7968 	if (nd->nd_repstat != 0)
7969 		*laystatp = nd->nd_repstat;
7970 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7971 		/* ND_NOMOREDATA will be set if the Open operation failed. */
7972 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7973 		    6 * NFSX_UNSIGNED);
7974 		op->nfso_stateid.seqid = *tl++;
7975 		op->nfso_stateid.other[0] = *tl++;
7976 		op->nfso_stateid.other[1] = *tl++;
7977 		op->nfso_stateid.other[2] = *tl;
7978 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7979 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7980 		if (error != 0)
7981 			goto nfsmout;
7982 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7983 		deleg = fxdr_unsigned(u_int32_t, *tl);
7984 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
7985 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
7986 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
7987 			      NFSCLFLAGS_FIRSTDELEG))
7988 				op->nfso_own->nfsow_clp->nfsc_flags |=
7989 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7990 			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
7991 			    M_NFSCLDELEG, M_WAITOK);
7992 			LIST_INIT(&ndp->nfsdl_owner);
7993 			LIST_INIT(&ndp->nfsdl_lock);
7994 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
7995 			ndp->nfsdl_fhlen = newfhlen;
7996 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
7997 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
7998 			nfscl_lockinit(&ndp->nfsdl_rwlock);
7999 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8000 			    NFSX_UNSIGNED);
8001 			ndp->nfsdl_stateid.seqid = *tl++;
8002 			ndp->nfsdl_stateid.other[0] = *tl++;
8003 			ndp->nfsdl_stateid.other[1] = *tl++;
8004 			ndp->nfsdl_stateid.other[2] = *tl++;
8005 			ret = fxdr_unsigned(int, *tl);
8006 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8007 				ndp->nfsdl_flags = NFSCLDL_WRITE;
8008 				/*
8009 				 * Indicates how much the file can grow.
8010 				 */
8011 				NFSM_DISSECT(tl, u_int32_t *,
8012 				    3 * NFSX_UNSIGNED);
8013 				limitby = fxdr_unsigned(int, *tl++);
8014 				switch (limitby) {
8015 				case NFSV4OPEN_LIMITSIZE:
8016 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
8017 					break;
8018 				case NFSV4OPEN_LIMITBLOCKS:
8019 					ndp->nfsdl_sizelimit =
8020 					    fxdr_unsigned(u_int64_t, *tl++);
8021 					ndp->nfsdl_sizelimit *=
8022 					    fxdr_unsigned(u_int64_t, *tl);
8023 					break;
8024 				default:
8025 					error = NFSERR_BADXDR;
8026 					goto nfsmout;
8027 				};
8028 			} else
8029 				ndp->nfsdl_flags = NFSCLDL_READ;
8030 			if (ret != 0)
8031 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
8032 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
8033 			    &ret, &acesize, p);
8034 			if (error != 0)
8035 				goto nfsmout;
8036 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8037 			error = NFSERR_BADXDR;
8038 			goto nfsmout;
8039 		}
8040 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
8041 		    nfscl_assumeposixlocks)
8042 			op->nfso_posixlock = 1;
8043 		else
8044 			op->nfso_posixlock = 0;
8045 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8046 		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
8047 		if (*++tl == 0) {
8048 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
8049 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
8050 			    NULL, NULL, NULL, p, cred);
8051 			if (error != 0)
8052 				goto nfsmout;
8053 			if (ndp != NULL) {
8054 				ndp->nfsdl_change = nfsva.na_filerev;
8055 				ndp->nfsdl_modtime = nfsva.na_mtime;
8056 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8057 				*dpp = ndp;
8058 				ndp = NULL;
8059 			}
8060 			/*
8061 			 * At this point, the Open has succeeded, so set
8062 			 * nd_repstat = NFS_OK.  If the Layoutget failed,
8063 			 * this function just won't return a layout.
8064 			 */
8065 			if (nd->nd_repstat == 0) {
8066 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8067 				*laystatp = fxdr_unsigned(int, *++tl);
8068 				if (*laystatp == 0) {
8069 					error = nfsrv_parselayoutget(nmp, nd,
8070 					    stateidp, retonclosep, flhp);
8071 					if (error != 0)
8072 						*laystatp = error;
8073 				}
8074 			} else
8075 				nd->nd_repstat = 0;	/* Return 0 for Open. */
8076 		}
8077 	}
8078 	if (nd->nd_repstat != 0 && error == 0)
8079 		error = nd->nd_repstat;
8080 nfsmout:
8081 	free(ndp, M_NFSCLDELEG);
8082 	m_freem(nd->nd_mrep);
8083 	return (error);
8084 }
8085 
8086 /*
8087  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
8088  * Used only for mounts with pNFS enabled.
8089  */
8090 static int
8091 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8092     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8093     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8094     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8095     int *dattrflagp, int *unlockedp, nfsv4stateid_t *stateidp,
8096     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
8097     struct nfsclflayouthead *flhp, int *laystatp)
8098 {
8099 	uint32_t *tl;
8100 	int error = 0, deleg, newone, ret, acesize, limitby;
8101 	struct nfsrv_descript nfsd, *nd = &nfsd;
8102 	struct nfsclopen *op;
8103 	struct nfscldeleg *dp = NULL;
8104 	struct nfsnode *np;
8105 	struct nfsfh *nfhp;
8106 	struct nfsclsession *tsep;
8107 	nfsattrbit_t attrbits;
8108 	nfsv4stateid_t stateid;
8109 	struct nfsmount *nmp;
8110 
8111 	nmp = VFSTONFS(dvp->v_mount);
8112 	np = VTONFS(dvp);
8113 	*laystatp = ENXIO;
8114 	*unlockedp = 0;
8115 	*nfhpp = NULL;
8116 	*dpp = NULL;
8117 	*attrflagp = 0;
8118 	*dattrflagp = 0;
8119 	if (namelen > NFS_MAXNAMLEN)
8120 		return (ENAMETOOLONG);
8121 	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp, cred);
8122 	/*
8123 	 * For V4, this is actually an Open op.
8124 	 */
8125 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
8126 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
8127 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
8128 	    NFSV4OPEN_ACCESSREAD);
8129 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
8130 	tsep = nfsmnt_mdssession(nmp);
8131 	*tl++ = tsep->nfsess_clientid.lval[0];
8132 	*tl = tsep->nfsess_clientid.lval[1];
8133 	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8134 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8135 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
8136 	if ((fmode & O_EXCL) != 0) {
8137 		if (NFSHASSESSPERSIST(nmp)) {
8138 			/* Use GUARDED for persistent sessions. */
8139 			*tl = txdr_unsigned(NFSCREATE_GUARDED);
8140 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8141 		} else {
8142 			/* Otherwise, use EXCLUSIVE4_1. */
8143 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
8144 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
8145 			*tl++ = cverf.lval[0];
8146 			*tl = cverf.lval[1];
8147 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8148 		}
8149 	} else {
8150 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
8151 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
8152 	}
8153 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8154 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8155 	nfsm_strtom(nd, name, namelen);
8156 	/* Get the new file's handle and attributes, plus save the FH. */
8157 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
8158 	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
8159 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
8160 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8161 	NFSGETATTR_ATTRBIT(&attrbits);
8162 	nfsrv_putattrbit(nd, &attrbits);
8163 	/* Get the directory's post-op attributes. */
8164 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8165 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8166 	nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
8167 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8168 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8169 	nfsrv_putattrbit(nd, &attrbits);
8170 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8171 	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
8172 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8173 	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
8174 	    layouttype, layoutlen, usecurstateid);
8175 	error = nfscl_request(nd, dvp, p, cred);
8176 	if (error != 0)
8177 		return (error);
8178 	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
8179 	    error);
8180 	if (nd->nd_repstat != 0)
8181 		*laystatp = nd->nd_repstat;
8182 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
8183 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8184 		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
8185 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8186 		    6 * NFSX_UNSIGNED);
8187 		stateid.seqid = *tl++;
8188 		stateid.other[0] = *tl++;
8189 		stateid.other[1] = *tl++;
8190 		stateid.other[2] = *tl;
8191 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8192 		if (error != 0)
8193 			goto nfsmout;
8194 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8195 		deleg = fxdr_unsigned(int, *tl);
8196 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
8197 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
8198 			if (!(owp->nfsow_clp->nfsc_flags &
8199 			      NFSCLFLAGS_FIRSTDELEG))
8200 				owp->nfsow_clp->nfsc_flags |=
8201 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8202 			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
8203 			    M_NFSCLDELEG, M_WAITOK);
8204 			LIST_INIT(&dp->nfsdl_owner);
8205 			LIST_INIT(&dp->nfsdl_lock);
8206 			dp->nfsdl_clp = owp->nfsow_clp;
8207 			newnfs_copyincred(cred, &dp->nfsdl_cred);
8208 			nfscl_lockinit(&dp->nfsdl_rwlock);
8209 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8210 			    NFSX_UNSIGNED);
8211 			dp->nfsdl_stateid.seqid = *tl++;
8212 			dp->nfsdl_stateid.other[0] = *tl++;
8213 			dp->nfsdl_stateid.other[1] = *tl++;
8214 			dp->nfsdl_stateid.other[2] = *tl++;
8215 			ret = fxdr_unsigned(int, *tl);
8216 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8217 				dp->nfsdl_flags = NFSCLDL_WRITE;
8218 				/*
8219 				 * Indicates how much the file can grow.
8220 				 */
8221 				NFSM_DISSECT(tl, u_int32_t *,
8222 				    3 * NFSX_UNSIGNED);
8223 				limitby = fxdr_unsigned(int, *tl++);
8224 				switch (limitby) {
8225 				case NFSV4OPEN_LIMITSIZE:
8226 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
8227 					break;
8228 				case NFSV4OPEN_LIMITBLOCKS:
8229 					dp->nfsdl_sizelimit =
8230 					    fxdr_unsigned(u_int64_t, *tl++);
8231 					dp->nfsdl_sizelimit *=
8232 					    fxdr_unsigned(u_int64_t, *tl);
8233 					break;
8234 				default:
8235 					error = NFSERR_BADXDR;
8236 					goto nfsmout;
8237 				};
8238 			} else {
8239 				dp->nfsdl_flags = NFSCLDL_READ;
8240 			}
8241 			if (ret != 0)
8242 				dp->nfsdl_flags |= NFSCLDL_RECALL;
8243 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
8244 			    &ret, &acesize, p);
8245 			if (error != 0)
8246 				goto nfsmout;
8247 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8248 			error = NFSERR_BADXDR;
8249 			goto nfsmout;
8250 		}
8251 
8252 		/* Now, we should have the status for the SaveFH. */
8253 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8254 		if (*++tl == 0) {
8255 			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
8256 			/*
8257 			 * Now, process the GetFH and Getattr for the newly
8258 			 * created file. nfscl_mtofh() will set
8259 			 * ND_NOMOREDATA if these weren't successful.
8260 			 */
8261 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
8262 			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
8263 			if (error != 0)
8264 				goto nfsmout;
8265 		} else
8266 			nd->nd_flag |= ND_NOMOREDATA;
8267 		/* Now we have the PutFH and Getattr for the directory. */
8268 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8269 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8270 			if (*++tl != 0)
8271 				nd->nd_flag |= ND_NOMOREDATA;
8272 			else {
8273 				NFSM_DISSECT(tl, uint32_t *, 2 *
8274 				    NFSX_UNSIGNED);
8275 				if (*++tl != 0)
8276 					nd->nd_flag |= ND_NOMOREDATA;
8277 			}
8278 		}
8279 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8280 			/* Load the directory attributes. */
8281 			error = nfsm_loadattr(nd, dnap);
8282 			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
8283 			if (error != 0)
8284 				goto nfsmout;
8285 			*dattrflagp = 1;
8286 			if (dp != NULL && *attrflagp != 0) {
8287 				dp->nfsdl_change = nnap->na_filerev;
8288 				dp->nfsdl_modtime = nnap->na_mtime;
8289 				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8290 			}
8291 			/*
8292 			 * We can now complete the Open state.
8293 			 */
8294 			nfhp = *nfhpp;
8295 			if (dp != NULL) {
8296 				dp->nfsdl_fhlen = nfhp->nfh_len;
8297 				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
8298 				    nfhp->nfh_len);
8299 			}
8300 			/*
8301 			 * Get an Open structure that will be
8302 			 * attached to the OpenOwner, acquired already.
8303 			 */
8304 			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
8305 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
8306 			    cred, p, NULL, &op, &newone, NULL, 0, false);
8307 			if (error != 0)
8308 				goto nfsmout;
8309 			op->nfso_stateid = stateid;
8310 			newnfs_copyincred(cred, &op->nfso_cred);
8311 
8312 			nfscl_openrelease(nmp, op, error, newone);
8313 			*unlockedp = 1;
8314 
8315 			/* Now, handle the RestoreFH and LayoutGet. */
8316 			if (nd->nd_repstat == 0) {
8317 				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
8318 				*laystatp = fxdr_unsigned(int, *(tl + 3));
8319 				if (*laystatp == 0) {
8320 					error = nfsrv_parselayoutget(nmp, nd,
8321 					    stateidp, retonclosep, flhp);
8322 					if (error != 0)
8323 						*laystatp = error;
8324 				}
8325 				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
8326 				    error);
8327 			} else
8328 				nd->nd_repstat = 0;
8329 		}
8330 	}
8331 	if (nd->nd_repstat != 0 && error == 0)
8332 		error = nd->nd_repstat;
8333 	if (error == NFSERR_STALECLIENTID)
8334 		nfscl_initiate_recovery(owp->nfsow_clp);
8335 nfsmout:
8336 	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
8337 	if (error == 0)
8338 		*dpp = dp;
8339 	else
8340 		free(dp, M_NFSCLDELEG);
8341 	m_freem(nd->nd_mrep);
8342 	return (error);
8343 }
8344 
8345 /*
8346  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
8347  */
8348 static int
8349 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8350     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8351     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8352     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8353     int *dattrflagp, int *unlockedp)
8354 {
8355 	struct nfscllayout *lyp;
8356 	struct nfsclflayouthead flh;
8357 	struct nfsfh *nfhp;
8358 	struct nfsclsession *tsep;
8359 	struct nfsmount *nmp;
8360 	nfsv4stateid_t stateid;
8361 	int error, layoutlen, layouttype, retonclose, laystat;
8362 
8363 	error = 0;
8364 	nmp = VFSTONFS(dvp->v_mount);
8365 	if (NFSHASFLEXFILE(nmp))
8366 		layouttype = NFSLAYOUT_FLEXFILE;
8367 	else
8368 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
8369 	LIST_INIT(&flh);
8370 	tsep = nfsmnt_mdssession(nmp);
8371 	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
8372 	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
8373 	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
8374 	    unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
8375 	    &flh, &laystat);
8376 	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
8377 	    laystat, error);
8378 	lyp = NULL;
8379 	if (laystat == 0) {
8380 		nfhp = *nfhpp;
8381 		laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
8382 		    nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
8383 		    layouttype, laystat, NULL, cred, p);
8384 	} else
8385 		laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
8386 		    retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
8387 		    cred, p);
8388 	if (laystat == 0)
8389 		nfscl_rellayout(lyp, 0);
8390 	return (error);
8391 }
8392 
8393 /*
8394  * Process the results of a layoutget() operation.
8395  */
8396 static int
8397 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8398     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8399     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8400     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8401 {
8402 	struct nfsclflayout *tflp;
8403 	struct nfscldevinfo *dip;
8404 	uint8_t *dev;
8405 	int i, mirrorcnt;
8406 
8407 	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8408 		NFSLOCKMNT(nmp);
8409 		if (!NFSHASFLEXFILE(nmp)) {
8410 			/* Switch to using Flex File Layout. */
8411 			nmp->nm_state |= NFSSTA_FLEXFILE;
8412 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
8413 			/* Disable pNFS. */
8414 			NFSCL_DEBUG(1, "disable PNFS\n");
8415 			nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8416 		}
8417 		NFSUNLOCKMNT(nmp);
8418 	}
8419 	if (laystat == 0) {
8420 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8421 		LIST_FOREACH(tflp, flhp, nfsfl_list) {
8422 			if (layouttype == NFSLAYOUT_FLEXFILE)
8423 				mirrorcnt = tflp->nfsfl_mirrorcnt;
8424 			else
8425 				mirrorcnt = 1;
8426 			for (i = 0; i < mirrorcnt; i++) {
8427 				laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8428 				NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8429 				if (laystat != 0) {
8430 					if (layouttype == NFSLAYOUT_FLEXFILE)
8431 						dev = tflp->nfsfl_ffm[i].dev;
8432 					else
8433 						dev = tflp->nfsfl_dev;
8434 					laystat = nfsrpc_getdeviceinfo(nmp, dev,
8435 					    layouttype, notifybit, &dip, cred,
8436 					    p);
8437 					NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8438 					    laystat);
8439 					if (laystat != 0)
8440 						goto out;
8441 					laystat = nfscl_adddevinfo(nmp, dip, i,
8442 					    tflp);
8443 					if (laystat != 0)
8444 						printf("nfsrpc_layoutgetresout"
8445 						    ": cannot add\n");
8446 				}
8447 			}
8448 		}
8449 	}
8450 out:
8451 	if (laystat == 0) {
8452 		/*
8453 		 * nfscl_layout() always returns with the nfsly_lock
8454 		 * set to a refcnt (shared lock).
8455 		 * Passing in dvp is sufficient, since it is only used to
8456 		 * get the fsid for the file system.
8457 		 */
8458 		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8459 		    layouttype, retonclose, flhp, lypp, cred, p);
8460 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8461 		    laystat);
8462 		if (laystat == 0 && islockedp != NULL)
8463 			*islockedp = 1;
8464 	}
8465 	return (laystat);
8466 }
8467 
8468 /*
8469  * nfs copy_file_range operation.
8470  */
8471 int
8472 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8473     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8474     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8475     struct ucred *cred, bool consecutive, bool *must_commitp)
8476 {
8477 	int commit, error, expireret = 0, retrycnt;
8478 	u_int32_t clidrev = 0;
8479 	struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8480 	struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8481 	nfsv4stateid_t instateid, outstateid;
8482 	void *inlckp, *outlckp;
8483 
8484 	if (nmp->nm_clp != NULL)
8485 		clidrev = nmp->nm_clp->nfsc_clientidrev;
8486 	innfhp = VTONFS(invp)->n_fhp;
8487 	outnfhp = VTONFS(outvp)->n_fhp;
8488 	retrycnt = 0;
8489 	do {
8490 		/* Get both stateids. */
8491 		inlckp = NULL;
8492 		nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8493 		    NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8494 		    &inlckp);
8495 		outlckp = NULL;
8496 		nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8497 		    NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8498 		    &outlckp);
8499 
8500 		error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8501 		    &instateid, &outstateid, innap, inattrflagp, outnap,
8502 		    outattrflagp, consecutive, &commit, cred, curthread);
8503 		if (error == 0) {
8504 			if (commit != NFSWRITE_FILESYNC)
8505 				*must_commitp = true;
8506 			*inoffp += *lenp;
8507 			*outoffp += *lenp;
8508 		} else if (error == NFSERR_STALESTATEID)
8509 			nfscl_initiate_recovery(nmp->nm_clp);
8510 		if (inlckp != NULL)
8511 			nfscl_lockderef(inlckp);
8512 		if (outlckp != NULL)
8513 			nfscl_lockderef(outlckp);
8514 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8515 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8516 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8517 			(void) nfs_catnap(PZERO, error, "nfs_cfr");
8518 		} else if ((error == NFSERR_EXPIRED ||
8519 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
8520 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8521 			    curthread);
8522 		}
8523 		retrycnt++;
8524 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8525 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8526 	      error == NFSERR_STALEDONTRECOVER ||
8527 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8528 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8529 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
8530 	if (error != 0 && (retrycnt >= 4 ||
8531 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8532 	      error == NFSERR_STALEDONTRECOVER))
8533 		error = EIO;
8534 	return (error);
8535 }
8536 
8537 /*
8538  * The copy RPC.
8539  */
8540 static int
8541 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8542     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8543     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8544     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8545     NFSPROC_T *p)
8546 {
8547 	uint32_t *tl;
8548 	int error;
8549 	struct nfsrv_descript nfsd;
8550 	struct nfsrv_descript *nd = &nfsd;
8551 	struct nfsmount *nmp;
8552 	nfsattrbit_t attrbits;
8553 	uint64_t len;
8554 
8555 	nmp = VFSTONFS(outvp->v_mount);
8556 	*inattrflagp = *outattrflagp = 0;
8557 	*commitp = NFSWRITE_UNSTABLE;
8558 	len = *lenp;
8559 	*lenp = 0;
8560 	if (len > nfs_maxcopyrange)
8561 		len = nfs_maxcopyrange;
8562 	NFSCL_REQSTART(nd, NFSPROC_COPY, invp, cred);
8563 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8564 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8565 	NFSGETATTR_ATTRBIT(&attrbits);
8566 	nfsrv_putattrbit(nd, &attrbits);
8567 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8568 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8569 	nfsm_fhtom(nd, VTONFS(outvp)->n_fhp->nfh_fh,
8570 	    VTONFS(outvp)->n_fhp->nfh_len, 0);
8571 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8572 	*tl = txdr_unsigned(NFSV4OP_COPY);
8573 	nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8574 	nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8575 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8576 	txdr_hyper(inoff, tl); tl += 2;
8577 	txdr_hyper(outoff, tl); tl += 2;
8578 	txdr_hyper(len, tl); tl += 2;
8579 	if (consecutive)
8580 		*tl++ = newnfs_true;
8581 	else
8582 		*tl++ = newnfs_false;
8583 	*tl++ = newnfs_true;
8584 	*tl++ = 0;
8585 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8586 	NFSWRITEGETATTR_ATTRBIT(&attrbits);
8587 	nfsrv_putattrbit(nd, &attrbits);
8588 	error = nfscl_request(nd, invp, p, cred);
8589 	if (error != 0)
8590 		return (error);
8591 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8592 		/* Get the input file's attributes. */
8593 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8594 		if (*(tl + 1) == 0) {
8595 			error = nfsm_loadattr(nd, innap);
8596 			if (error != 0)
8597 				goto nfsmout;
8598 			*inattrflagp = 1;
8599 		} else
8600 			nd->nd_flag |= ND_NOMOREDATA;
8601 	}
8602 	/* Skip over return stat for PutFH. */
8603 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8604 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8605 		if (*++tl != 0)
8606 			nd->nd_flag |= ND_NOMOREDATA;
8607 	}
8608 	/* Skip over return stat for Copy. */
8609 	if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8610 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8611 	if (nd->nd_repstat == 0) {
8612 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8613 		if (*tl != 0) {
8614 			/* There should be no callback ids. */
8615 			error = NFSERR_BADXDR;
8616 			goto nfsmout;
8617 		}
8618 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8619 		    NFSX_VERF);
8620 		len = fxdr_hyper(tl); tl += 2;
8621 		*commitp = fxdr_unsigned(int, *tl++);
8622 		NFSLOCKMNT(nmp);
8623 		if (!NFSHASWRITEVERF(nmp)) {
8624 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8625 			NFSSETWRITEVERF(nmp);
8626 	    	} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8627 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8628 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
8629 		}
8630 		NFSUNLOCKMNT(nmp);
8631 		tl += (NFSX_VERF / NFSX_UNSIGNED);
8632 		if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8633 			/* Must be a synchronous copy. */
8634 			nd->nd_repstat = NFSERR_NOTSUPP;
8635 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8636 		error = nfsm_loadattr(nd, outnap);
8637 		if (error == 0)
8638 			*outattrflagp = NFS_LATTR_NOSHRINK;
8639 		if (nd->nd_repstat == 0)
8640 			*lenp = len;
8641 	} else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8642 		/*
8643 		 * For the case where consecutive is not supported, but
8644 		 * synchronous is supported, we can try consecutive == false
8645 		 * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8646 		 * since Copy cannot be done.
8647 		 */
8648 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8649 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8650 			if (!consecutive || *++tl == newnfs_false)
8651 				nd->nd_repstat = NFSERR_NOTSUPP;
8652 		} else
8653 			nd->nd_repstat = NFSERR_BADXDR;
8654 	}
8655 	if (error == 0)
8656 		error = nd->nd_repstat;
8657 nfsmout:
8658 	m_freem(nd->nd_mrep);
8659 	return (error);
8660 }
8661 
8662 /*
8663  * Seek operation.
8664  */
8665 int
8666 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8667     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8668 {
8669 	int error, expireret = 0, retrycnt;
8670 	u_int32_t clidrev = 0;
8671 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8672 	struct nfsnode *np = VTONFS(vp);
8673 	struct nfsfh *nfhp = NULL;
8674 	nfsv4stateid_t stateid;
8675 	void *lckp;
8676 
8677 	if (nmp->nm_clp != NULL)
8678 		clidrev = nmp->nm_clp->nfsc_clientidrev;
8679 	nfhp = np->n_fhp;
8680 	retrycnt = 0;
8681 	do {
8682 		lckp = NULL;
8683 		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8684 		    NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8685 		error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8686 		    nap, attrflagp, cred);
8687 		if (error == NFSERR_STALESTATEID)
8688 			nfscl_initiate_recovery(nmp->nm_clp);
8689 		if (lckp != NULL)
8690 			nfscl_lockderef(lckp);
8691 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8692 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8693 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8694 			(void) nfs_catnap(PZERO, error, "nfs_seek");
8695 		} else if ((error == NFSERR_EXPIRED ||
8696 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
8697 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8698 			    curthread);
8699 		}
8700 		retrycnt++;
8701 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8702 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8703 	    error == NFSERR_BADSESSION ||
8704 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8705 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8706 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8707 	    (error == NFSERR_OPENMODE && retrycnt < 4));
8708 	if (error && retrycnt >= 4)
8709 		error = EIO;
8710 	return (error);
8711 }
8712 
8713 /*
8714  * The seek RPC.
8715  */
8716 static int
8717 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8718     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8719 {
8720 	uint32_t *tl;
8721 	int error;
8722 	struct nfsrv_descript nfsd;
8723 	struct nfsrv_descript *nd = &nfsd;
8724 	nfsattrbit_t attrbits;
8725 
8726 	*attrflagp = 0;
8727 	NFSCL_REQSTART(nd, NFSPROC_SEEK, vp, cred);
8728 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8729 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8730 	txdr_hyper(*offp, tl); tl += 2;
8731 	*tl++ = txdr_unsigned(content);
8732 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8733 	NFSGETATTR_ATTRBIT(&attrbits);
8734 	nfsrv_putattrbit(nd, &attrbits);
8735 	error = nfscl_request(nd, vp, curthread, cred);
8736 	if (error != 0)
8737 		return (error);
8738 	if (nd->nd_repstat == 0) {
8739 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8740 		if (*tl++ == newnfs_true)
8741 			*eofp = true;
8742 		else
8743 			*eofp = false;
8744 		*offp = fxdr_hyper(tl);
8745 		/* Just skip over Getattr op status. */
8746 		error = nfsm_loadattr(nd, nap);
8747 		if (error == 0)
8748 			*attrflagp = 1;
8749 	}
8750 	error = nd->nd_repstat;
8751 nfsmout:
8752 	m_freem(nd->nd_mrep);
8753 	return (error);
8754 }
8755 
8756 /*
8757  * The getextattr RPC.
8758  */
8759 int
8760 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8761     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8762 {
8763 	uint32_t *tl;
8764 	int error;
8765 	struct nfsrv_descript nfsd;
8766 	struct nfsrv_descript *nd = &nfsd;
8767 	nfsattrbit_t attrbits;
8768 	uint32_t len, len2;
8769 
8770 	*attrflagp = 0;
8771 	NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp, cred);
8772 	nfsm_strtom(nd, name, strlen(name));
8773 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8774 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8775 	NFSGETATTR_ATTRBIT(&attrbits);
8776 	nfsrv_putattrbit(nd, &attrbits);
8777 	error = nfscl_request(nd, vp, p, cred);
8778 	if (error != 0)
8779 		return (error);
8780 	if (nd->nd_repstat == 0) {
8781 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8782 		len = fxdr_unsigned(uint32_t, *tl);
8783 		/* Sanity check lengths. */
8784 		if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
8785 		    uiop->uio_resid <= UINT32_MAX) {
8786 			len2 = uiop->uio_resid;
8787 			if (len2 >= len)
8788 				error = nfsm_mbufuio(nd, uiop, len);
8789 			else {
8790 				error = nfsm_mbufuio(nd, uiop, len2);
8791 				if (error == 0) {
8792 					/*
8793 					 * nfsm_mbufuio() advances to a multiple
8794 					 * of 4, so round up len2 as well.  Then
8795 					 * we need to advance over the rest of
8796 					 * the data, rounding up the remaining
8797 					 * length.
8798 					 */
8799 					len2 = NFSM_RNDUP(len2);
8800 					len2 = NFSM_RNDUP(len - len2);
8801 					if (len2 > 0)
8802 						error = nfsm_advance(nd, len2,
8803 						    -1);
8804 				}
8805 			}
8806 		} else if (uiop == NULL && len > 0) {
8807 			/* Just wants the length and not the data. */
8808 			error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8809 		} else if (len > 0)
8810 			error = ENOATTR;
8811 		if (error != 0)
8812 			goto nfsmout;
8813 		*lenp = len;
8814 		/* Just skip over Getattr op status. */
8815 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8816 		error = nfsm_loadattr(nd, nap);
8817 		if (error == 0)
8818 			*attrflagp = 1;
8819 	}
8820 	if (error == 0)
8821 		error = nd->nd_repstat;
8822 nfsmout:
8823 	m_freem(nd->nd_mrep);
8824 	return (error);
8825 }
8826 
8827 /*
8828  * The setextattr RPC.
8829  */
8830 int
8831 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
8832     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8833 {
8834 	uint32_t *tl;
8835 	int error;
8836 	struct nfsrv_descript nfsd;
8837 	struct nfsrv_descript *nd = &nfsd;
8838 	nfsattrbit_t attrbits;
8839 
8840 	*attrflagp = 0;
8841 	NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp, cred);
8842 	if (uiop->uio_resid > nd->nd_maxreq) {
8843 		/* nd_maxreq is set by NFSCL_REQSTART(). */
8844 		m_freem(nd->nd_mreq);
8845 		return (EINVAL);
8846 	}
8847 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8848 	*tl = txdr_unsigned(NFSV4SXATTR_EITHER);
8849 	nfsm_strtom(nd, name, strlen(name));
8850 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8851 	*tl = txdr_unsigned(uiop->uio_resid);
8852 	nfsm_uiombuf(nd, uiop, uiop->uio_resid);
8853 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8854 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8855 	NFSGETATTR_ATTRBIT(&attrbits);
8856 	nfsrv_putattrbit(nd, &attrbits);
8857 	error = nfscl_request(nd, vp, p, cred);
8858 	if (error != 0)
8859 		return (error);
8860 	if (nd->nd_repstat == 0) {
8861 		/* Just skip over the reply and Getattr op status. */
8862 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8863 		    NFSX_UNSIGNED);
8864 		error = nfsm_loadattr(nd, nap);
8865 		if (error == 0)
8866 			*attrflagp = 1;
8867 	}
8868 	if (error == 0)
8869 		error = nd->nd_repstat;
8870 nfsmout:
8871 	m_freem(nd->nd_mrep);
8872 	return (error);
8873 }
8874 
8875 /*
8876  * The removeextattr RPC.
8877  */
8878 int
8879 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
8880     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8881 {
8882 	uint32_t *tl;
8883 	int error;
8884 	struct nfsrv_descript nfsd;
8885 	struct nfsrv_descript *nd = &nfsd;
8886 	nfsattrbit_t attrbits;
8887 
8888 	*attrflagp = 0;
8889 	NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp, cred);
8890 	nfsm_strtom(nd, name, strlen(name));
8891 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8892 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8893 	NFSGETATTR_ATTRBIT(&attrbits);
8894 	nfsrv_putattrbit(nd, &attrbits);
8895 	error = nfscl_request(nd, vp, p, cred);
8896 	if (error != 0)
8897 		return (error);
8898 	if (nd->nd_repstat == 0) {
8899 		/* Just skip over the reply and Getattr op status. */
8900 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8901 		    NFSX_UNSIGNED);
8902 		error = nfsm_loadattr(nd, nap);
8903 		if (error == 0)
8904 			*attrflagp = 1;
8905 	}
8906 	if (error == 0)
8907 		error = nd->nd_repstat;
8908 nfsmout:
8909 	m_freem(nd->nd_mrep);
8910 	return (error);
8911 }
8912 
8913 /*
8914  * The listextattr RPC.
8915  */
8916 int
8917 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
8918     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
8919     struct ucred *cred, NFSPROC_T *p)
8920 {
8921 	uint32_t *tl;
8922 	int cnt, error, i, len;
8923 	struct nfsrv_descript nfsd;
8924 	struct nfsrv_descript *nd = &nfsd;
8925 	nfsattrbit_t attrbits;
8926 	u_char c;
8927 
8928 	*attrflagp = 0;
8929 	NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp, cred);
8930 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8931 	txdr_hyper(*cookiep, tl); tl += 2;
8932 	*tl++ = txdr_unsigned(*lenp);
8933 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8934 	NFSGETATTR_ATTRBIT(&attrbits);
8935 	nfsrv_putattrbit(nd, &attrbits);
8936 	error = nfscl_request(nd, vp, p, cred);
8937 	if (error != 0)
8938 		return (error);
8939 	*eofp = true;
8940 	*lenp = 0;
8941 	if (nd->nd_repstat == 0) {
8942 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
8943 		*cookiep = fxdr_hyper(tl); tl += 2;
8944 		cnt = fxdr_unsigned(int, *tl);
8945 		if (cnt < 0) {
8946 			error = EBADRPC;
8947 			goto nfsmout;
8948 		}
8949 		for (i = 0; i < cnt; i++) {
8950 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8951 			len = fxdr_unsigned(int, *tl);
8952 			if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
8953 				error = EBADRPC;
8954 				goto nfsmout;
8955 			}
8956 			if (uiop == NULL)
8957 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8958 			else if (uiop->uio_resid >= len + 1) {
8959 				c = len;
8960 				error = uiomove(&c, sizeof(c), uiop);
8961 				if (error == 0)
8962 					error = nfsm_mbufuio(nd, uiop, len);
8963 			} else {
8964 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8965 				*eofp = false;
8966 			}
8967 			if (error != 0)
8968 				goto nfsmout;
8969 			*lenp += (len + 1);
8970 		}
8971 		/* Get the eof and skip over the Getattr op status. */
8972 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
8973 		/*
8974 		 * *eofp is set false above, because it wasn't able to copy
8975 		 * all of the reply.
8976 		 */
8977 		if (*eofp && *tl == 0)
8978 			*eofp = false;
8979 		error = nfsm_loadattr(nd, nap);
8980 		if (error == 0)
8981 			*attrflagp = 1;
8982 	}
8983 	if (error == 0)
8984 		error = nd->nd_repstat;
8985 nfsmout:
8986 	m_freem(nd->nd_mrep);
8987 	return (error);
8988 }
8989 
8990 /*
8991  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
8992  */
8993 static struct mbuf *
8994 nfsm_split(struct mbuf *mp, uint64_t xfer)
8995 {
8996 	struct mbuf *m, *m2;
8997 	vm_page_t pg;
8998 	int i, j, left, pgno, plen, trim;
8999 	char *cp, *cp2;
9000 
9001 	if ((mp->m_flags & M_EXTPG) == 0) {
9002 		m = m_split(mp, xfer, M_WAITOK);
9003 		return (m);
9004 	}
9005 
9006 	/* Find the correct mbuf to split at. */
9007 	for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
9008 		xfer -= m->m_len;
9009 	if (m == NULL)
9010 		return (NULL);
9011 
9012 	/* If xfer == m->m_len, we can just split the mbuf list. */
9013 	if (xfer == m->m_len) {
9014 		m2 = m->m_next;
9015 		m->m_next = NULL;
9016 		return (m2);
9017 	}
9018 
9019 	/* Find the page to split at. */
9020 	pgno = 0;
9021 	left = xfer;
9022 	do {
9023 		if (pgno == 0)
9024 			plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
9025 		else
9026 			plen = m_epg_pagelen(m, pgno, 0);
9027 		if (left <= plen)
9028 			break;
9029 		left -= plen;
9030 		pgno++;
9031 	} while (pgno < m->m_epg_npgs);
9032 	if (pgno == m->m_epg_npgs)
9033 		panic("nfsm_split: eroneous ext_pgs mbuf");
9034 
9035 	m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
9036 	m2->m_epg_flags |= EPG_FLAG_ANON;
9037 
9038 	/*
9039 	 * If left < plen, allocate a new page for the new mbuf
9040 	 * and copy the data after left in the page to this new
9041 	 * page.
9042 	 */
9043 	if (left < plen) {
9044 		pg = vm_page_alloc_noobj(VM_ALLOC_WAITOK | VM_ALLOC_NODUMP |
9045 		    VM_ALLOC_WIRED);
9046 		m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
9047 		m2->m_epg_npgs = 1;
9048 
9049 		/* Copy the data after left to the new page. */
9050 		trim = plen - left;
9051 		cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
9052 		if (pgno == 0)
9053 			cp += m->m_epg_1st_off;
9054 		cp += left;
9055 		cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
9056 		if (pgno == m->m_epg_npgs - 1)
9057 			m2->m_epg_last_len = trim;
9058 		else {
9059 			cp2 += PAGE_SIZE - trim;
9060 			m2->m_epg_1st_off = PAGE_SIZE - trim;
9061 			m2->m_epg_last_len = m->m_epg_last_len;
9062 		}
9063 		memcpy(cp2, cp, trim);
9064 		m2->m_len = trim;
9065 	} else {
9066 		m2->m_len = 0;
9067 		m2->m_epg_last_len = m->m_epg_last_len;
9068 	}
9069 
9070 	/* Move the pages beyond pgno to the new mbuf. */
9071 	for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
9072 		m2->m_epg_pa[j] = m->m_epg_pa[i];
9073 		/* Never moves page 0. */
9074 		m2->m_len += m_epg_pagelen(m, i, 0);
9075 	}
9076 	m2->m_epg_npgs = j;
9077 	m->m_epg_npgs = pgno + 1;
9078 	m->m_epg_last_len = left;
9079 	m->m_len = xfer;
9080 
9081 	m2->m_next = m->m_next;
9082 	m->m_next = NULL;
9083 	return (m2);
9084 }
9085 
9086 /*
9087  * Do the NFSv4.1 Bind Connection to Session.
9088  * Called from the reconnect layer of the krpc (sys/rpc/clnt_rc.c).
9089  */
9090 void
9091 nfsrpc_bindconnsess(CLIENT *cl, void *arg, struct ucred *cr)
9092 {
9093 	struct nfscl_reconarg *rcp = (struct nfscl_reconarg *)arg;
9094 	uint32_t res, *tl;
9095 	struct nfsrv_descript nfsd;
9096 	struct nfsrv_descript *nd = &nfsd;
9097 	struct rpc_callextra ext;
9098 	struct timeval utimeout;
9099 	enum clnt_stat stat;
9100 	int error;
9101 
9102 	nfscl_reqstart(nd, NFSPROC_BINDCONNTOSESS, NULL, NULL, 0, NULL, NULL,
9103 	    NFS_VER4, rcp->minorvers, NULL);
9104 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED);
9105 	memcpy(tl, rcp->sessionid, NFSX_V4SESSIONID);
9106 	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9107 	*tl++ = txdr_unsigned(NFSCDFC4_FORE_OR_BOTH);
9108 	*tl = newnfs_false;
9109 
9110 	memset(&ext, 0, sizeof(ext));
9111 	utimeout.tv_sec = 30;
9112 	utimeout.tv_usec = 0;
9113 	ext.rc_auth = authunix_create(cr);
9114 	nd->nd_mrep = NULL;
9115 	stat = CLNT_CALL_MBUF(cl, &ext, NFSV4PROC_COMPOUND, nd->nd_mreq,
9116 	    &nd->nd_mrep, utimeout);
9117 	AUTH_DESTROY(ext.rc_auth);
9118 	if (stat != RPC_SUCCESS) {
9119 		printf("nfsrpc_bindconnsess: call failed stat=%d\n", stat);
9120 		return;
9121 	}
9122 	if (nd->nd_mrep == NULL) {
9123 		printf("nfsrpc_bindconnsess: no reply args\n");
9124 		return;
9125 	}
9126 	error = 0;
9127 	newnfs_realign(&nd->nd_mrep, M_WAITOK);
9128 	nd->nd_md = nd->nd_mrep;
9129 	nd->nd_dpos = mtod(nd->nd_md, char *);
9130 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9131 	nd->nd_repstat = fxdr_unsigned(uint32_t, *tl++);
9132 	if (nd->nd_repstat == NFSERR_OK) {
9133 		res = fxdr_unsigned(uint32_t, *tl);
9134 		if (res > 0 && (error = nfsm_advance(nd, NFSM_RNDUP(res),
9135 		    -1)) != 0)
9136 			goto nfsmout;
9137 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
9138 		    4 * NFSX_UNSIGNED);
9139 		tl += 3;
9140 		if (!NFSBCMP(tl, rcp->sessionid, NFSX_V4SESSIONID)) {
9141 			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9142 			res = fxdr_unsigned(uint32_t, *tl);
9143 			if (res != NFSCDFS4_BOTH)
9144 				printf("nfsrpc_bindconnsess: did not "
9145 				    "return FS4_BOTH\n");
9146 		} else
9147 			printf("nfsrpc_bindconnsess: not same "
9148 			    "sessionid\n");
9149 	} else if (nd->nd_repstat != NFSERR_BADSESSION)
9150 		printf("nfsrpc_bindconnsess: returned %d\n", nd->nd_repstat);
9151 nfsmout:
9152 	if (error != 0)
9153 		printf("nfsrpc_bindconnsess: reply bad xdr\n");
9154 	m_freem(nd->nd_mrep);
9155 }
9156