xref: /freebsd/sys/fs/nfsclient/nfs_clrpcops.c (revision 8efba70d7914324890b1f8fe3079036eb2b5c3db)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 /*
38  * Rpc op calls, generally called from the vnode op calls or through the
39  * buffer cache, for NFS v2, 3 and 4.
40  * These do not normally make any changes to vnode arguments or use
41  * structures that might change between the VFS variants. The returned
42  * arguments are all at the end, after the NFSPROC_T *p one.
43  */
44 
45 #include "opt_inet6.h"
46 
47 #include <fs/nfs/nfsport.h>
48 #include <fs/nfsclient/nfs.h>
49 #include <sys/extattr.h>
50 #include <sys/sysctl.h>
51 #include <sys/taskqueue.h>
52 
53 SYSCTL_DECL(_vfs_nfs);
54 
55 static int	nfsignore_eexist = 0;
56 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
57     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
58 
59 static int	nfscl_dssameconn = 0;
60 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
61     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
62 
63 static uint64_t nfs_maxcopyrange = SSIZE_MAX;
64 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW,
65     &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable");
66 
67 /*
68  * Global variables
69  */
70 extern struct nfsstatsv1 nfsstatsv1;
71 extern int nfs_numnfscbd;
72 extern struct timeval nfsboottime;
73 extern u_int32_t newnfs_false, newnfs_true;
74 extern nfstype nfsv34_type[9];
75 extern int nfsrv_useacl;
76 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
77 extern int nfscl_debuglevel;
78 extern int nfs_pnfsiothreads;
79 extern u_long sb_max_adj;
80 NFSCLSTATEMUTEX;
81 int nfstest_outofseq = 0;
82 int nfscl_assumeposixlocks = 1;
83 int nfscl_enablecallb = 0;
84 short nfsv4_cbport = NFSV4_CBPORT;
85 int nfstest_openallsetattr = 0;
86 
87 #define	DIRHDSIZ	offsetof(struct dirent, d_name)
88 
89 /*
90  * nfscl_getsameserver() can return one of three values:
91  * NFSDSP_USETHISSESSION - Use this session for the DS.
92  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
93  *     session.
94  * NFSDSP_NOTFOUND - No matching server was found.
95  */
96 enum nfsclds_state {
97 	NFSDSP_USETHISSESSION = 0,
98 	NFSDSP_SEQTHISSESSION = 1,
99 	NFSDSP_NOTFOUND = 2,
100 };
101 
102 /*
103  * Do a write RPC on a DS data file, using this structure for the arguments,
104  * so that this function can be executed by a separate kernel process.
105  */
106 struct nfsclwritedsdorpc {
107 	int			done;
108 	int			inprog;
109 	struct task		tsk;
110 	struct vnode		*vp;
111 	int			iomode;
112 	int			must_commit;
113 	nfsv4stateid_t		*stateidp;
114 	struct nfsclds		*dsp;
115 	uint64_t		off;
116 	int			len;
117 #ifdef notyet
118 	int			advise;
119 #endif
120 	struct nfsfh		*fhp;
121 	struct mbuf		*m;
122 	int			vers;
123 	int			minorvers;
124 	struct ucred		*cred;
125 	NFSPROC_T		*p;
126 	int			err;
127 };
128 
129 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
130     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
131 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
132     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *);
133 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
134     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
135     int);
136 static int nfsrpc_deallocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
137     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
138 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
139     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
140     struct nfsvattr *, struct nfsfh **, int *, int *);
141 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
142     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
143     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
144     int *, int *);
145 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
146     struct nfscllockowner *, u_int64_t, u_int64_t,
147     u_int32_t, struct ucred *, NFSPROC_T *, int);
148 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
149     struct acl *, nfsv4stateid_t *);
150 static int nfsrpc_layouterror(struct nfsmount *, uint8_t *, int, uint64_t,
151     uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, uint32_t,
152     uint32_t, char *);
153 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
154     uint32_t, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
155     struct ucred *, NFSPROC_T *);
156 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
157     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
158     NFSPROC_T *);
159 static void nfscl_initsessionslots(struct nfsclsession *);
160 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
161     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
162     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
163     NFSPROC_T *);
164 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
165     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
166     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
167     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
168 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
169     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
170     struct ucred *, NFSPROC_T *);
171 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
172     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
173     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
174 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
175     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
176     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
177 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
178     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
179     struct ucred *, NFSPROC_T *);
180 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
181     struct nfsclds *, struct nfsclds **, uint32_t *);
182 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
183     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
184     NFSPROC_T *);
185 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
186     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
187 #ifdef notyet
188 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
189     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
190     NFSPROC_T *);
191 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
192     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
193 #endif
194 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
195     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
196 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
197     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
198 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
199     NFSPROC_T *);
200 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
201     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
202 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
203     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
204     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
205 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
206     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
207     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
208     struct nfsfh **, int *, int *, int *);
209 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
210     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
211     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
212     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
213 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
214     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
215     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
216     struct nfsfh **, int *, int *, int *, nfsv4stateid_t *,
217     int, int, int, int *, struct nfsclflayouthead *, int *);
218 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
219     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
220     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *);
221 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
222     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
223     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
224 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
225     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
226     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
227 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
228     int, struct nfsvattr *, int *, struct ucred *);
229 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
230 static void nfscl_statfs(struct vnode *, struct ucred *, NFSPROC_T *);
231 
232 int nfs_pnfsio(task_fn_t *, void *);
233 
234 /*
235  * nfs null call from vfs.
236  */
237 int
238 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
239 {
240 	int error;
241 	struct nfsrv_descript nfsd, *nd = &nfsd;
242 
243 	NFSCL_REQSTART(nd, NFSPROC_NULL, vp, NULL);
244 	error = nfscl_request(nd, vp, p, cred);
245 	if (nd->nd_repstat && !error)
246 		error = nd->nd_repstat;
247 	m_freem(nd->nd_mrep);
248 	return (error);
249 }
250 
251 /*
252  * nfs access rpc op.
253  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
254  * modes are changed on the server, accesses might still fail later.
255  */
256 int
257 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
258     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
259 {
260 	int error;
261 	u_int32_t mode, rmode;
262 
263 	if (acmode & VREAD)
264 		mode = NFSACCESS_READ;
265 	else
266 		mode = 0;
267 	if (vp->v_type == VDIR) {
268 		if (acmode & VWRITE)
269 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
270 				 NFSACCESS_DELETE);
271 		if (acmode & VEXEC)
272 			mode |= NFSACCESS_LOOKUP;
273 	} else {
274 		if (acmode & VWRITE)
275 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
276 		if (acmode & VEXEC)
277 			mode |= NFSACCESS_EXECUTE;
278 	}
279 
280 	/*
281 	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
282 	 */
283 	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode);
284 
285 	/*
286 	 * The NFS V3 spec does not clarify whether or not
287 	 * the returned access bits can be a superset of
288 	 * the ones requested, so...
289 	 */
290 	if (!error && (rmode & mode) != mode)
291 		error = EACCES;
292 	return (error);
293 }
294 
295 /*
296  * The actual rpc, separated out for Darwin.
297  */
298 int
299 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
300     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep)
301 {
302 	u_int32_t *tl;
303 	u_int32_t supported, rmode;
304 	int error;
305 	struct nfsrv_descript nfsd, *nd = &nfsd;
306 	nfsattrbit_t attrbits;
307 	struct nfsmount *nmp;
308 	struct nfsnode *np;
309 
310 	*attrflagp = 0;
311 	supported = mode;
312 	nmp = VFSTONFS(vp->v_mount);
313 	np = VTONFS(vp);
314 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
315 	    nmp->nm_fhsize == 0) {
316 		/* Attempt to get the actual root file handle. */
317 		error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
318 		if (error != 0)
319 			return (EACCES);
320 		if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
321 			nfscl_statfs(vp, cred, p);
322 	}
323 	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp, cred);
324 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
325 	*tl = txdr_unsigned(mode);
326 	if (nd->nd_flag & ND_NFSV4) {
327 		/*
328 		 * And do a Getattr op.
329 		 */
330 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
331 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
332 		NFSGETATTR_ATTRBIT(&attrbits);
333 		(void) nfsrv_putattrbit(nd, &attrbits);
334 	}
335 	error = nfscl_request(nd, vp, p, cred);
336 	if (error)
337 		return (error);
338 	if (nd->nd_flag & ND_NFSV3) {
339 		error = nfscl_postop_attr(nd, nap, attrflagp);
340 		if (error)
341 			goto nfsmout;
342 	}
343 	if (!nd->nd_repstat) {
344 		if (nd->nd_flag & ND_NFSV4) {
345 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
346 			supported = fxdr_unsigned(u_int32_t, *tl++);
347 		} else {
348 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
349 		}
350 		rmode = fxdr_unsigned(u_int32_t, *tl);
351 		if (nd->nd_flag & ND_NFSV4)
352 			error = nfscl_postop_attr(nd, nap, attrflagp);
353 
354 		/*
355 		 * It's not obvious what should be done about
356 		 * unsupported access modes. For now, be paranoid
357 		 * and clear the unsupported ones.
358 		 */
359 		rmode &= supported;
360 		*rmodep = rmode;
361 	} else
362 		error = nd->nd_repstat;
363 nfsmout:
364 	m_freem(nd->nd_mrep);
365 	return (error);
366 }
367 
368 /*
369  * nfs open rpc
370  */
371 int
372 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
373 {
374 	struct nfsclopen *op;
375 	struct nfscldeleg *dp;
376 	struct nfsfh *nfhp;
377 	struct nfsnode *np = VTONFS(vp);
378 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
379 	u_int32_t mode, clidrev;
380 	int ret, newone, error, expireret = 0, retrycnt;
381 
382 	/*
383 	 * For NFSv4, Open Ops are only done on Regular Files.
384 	 */
385 	if (vp->v_type != VREG)
386 		return (0);
387 	mode = 0;
388 	if (amode & FREAD)
389 		mode |= NFSV4OPEN_ACCESSREAD;
390 	if (amode & FWRITE)
391 		mode |= NFSV4OPEN_ACCESSWRITE;
392 	nfhp = np->n_fhp;
393 
394 	retrycnt = 0;
395 	do {
396 	    dp = NULL;
397 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
398 		cred, p, NULL, &op, &newone, &ret, 1, true);
399 	    if (error) {
400 		return (error);
401 	    }
402 	    if (nmp->nm_clp != NULL)
403 		clidrev = nmp->nm_clp->nfsc_clientidrev;
404 	    else
405 		clidrev = 0;
406 	    if (ret == NFSCLOPEN_DOOPEN) {
407 		if (np->n_v4 != NULL) {
408 			/*
409 			 * For the first attempt, try and get a layout, if
410 			 * pNFS is enabled for the mount.
411 			 */
412 			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
413 			    nfs_numnfscbd == 0 ||
414 			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
415 				error = nfsrpc_openrpc(nmp, vp,
416 				    np->n_v4->n4_data,
417 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
418 				    np->n_fhp->nfh_len, mode, op,
419 				    NFS4NODENAME(np->n_v4),
420 				    np->n_v4->n4_namelen,
421 				    &dp, 0, 0x0, cred, p, 0, 0);
422 			else
423 				error = nfsrpc_getopenlayout(nmp, vp,
424 				    np->n_v4->n4_data,
425 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
426 				    np->n_fhp->nfh_len, mode, op,
427 				    NFS4NODENAME(np->n_v4),
428 				    np->n_v4->n4_namelen, &dp, cred, p);
429 			if (dp != NULL) {
430 				NFSLOCKNODE(np);
431 				np->n_flag &= ~NDELEGMOD;
432 				/*
433 				 * Invalidate the attribute cache, so that
434 				 * attributes that pre-date the issue of a
435 				 * delegation are not cached, since the
436 				 * cached attributes will remain valid while
437 				 * the delegation is held.
438 				 */
439 				NFSINVALATTRCACHE(np);
440 				NFSUNLOCKNODE(np);
441 				(void) nfscl_deleg(nmp->nm_mountp,
442 				    op->nfso_own->nfsow_clp,
443 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
444 			}
445 		} else if (NFSHASNFSV4N(nmp)) {
446 			/*
447 			 * For the first attempt, try and get a layout, if
448 			 * pNFS is enabled for the mount.
449 			 */
450 			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
451 			    nfs_numnfscbd == 0 ||
452 			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
453 				error = nfsrpc_openrpc(nmp, vp, nfhp->nfh_fh,
454 				    nfhp->nfh_len, nfhp->nfh_fh, nfhp->nfh_len,
455 				    mode, op, NULL, 0, &dp, 0, 0x0, cred, p, 0,
456 				    0);
457 			else
458 				error = nfsrpc_getopenlayout(nmp, vp,
459 				    nfhp->nfh_fh, nfhp->nfh_len, nfhp->nfh_fh,
460 				    nfhp->nfh_len, mode, op, NULL, 0, &dp,
461 				    cred, p);
462 			if (dp != NULL) {
463 				NFSLOCKNODE(np);
464 				np->n_flag &= ~NDELEGMOD;
465 				/*
466 				 * Invalidate the attribute cache, so that
467 				 * attributes that pre-date the issue of a
468 				 * delegation are not cached, since the
469 				 * cached attributes will remain valid while
470 				 * the delegation is held.
471 				 */
472 				NFSINVALATTRCACHE(np);
473 				NFSUNLOCKNODE(np);
474 				(void) nfscl_deleg(nmp->nm_mountp,
475 				    op->nfso_own->nfsow_clp,
476 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
477 			}
478 		} else {
479 			error = EIO;
480 		}
481 		newnfs_copyincred(cred, &op->nfso_cred);
482 	    } else if (ret == NFSCLOPEN_SETCRED)
483 		/*
484 		 * This is a new local open on a delegation. It needs
485 		 * to have credentials so that an open can be done
486 		 * against the server during recovery.
487 		 */
488 		newnfs_copyincred(cred, &op->nfso_cred);
489 
490 	    /*
491 	     * nfso_opencnt is the count of how many VOP_OPEN()s have
492 	     * been done on this Open successfully and a VOP_CLOSE()
493 	     * is expected for each of these.
494 	     * If error is non-zero, don't increment it, since the Open
495 	     * hasn't succeeded yet.
496 	     */
497 	    if (!error) {
498 		op->nfso_opencnt++;
499 		if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
500 		    NFSLOCKNODE(np);
501 		    np->n_openstateid = op;
502 		    NFSUNLOCKNODE(np);
503 		}
504 	    }
505 	    nfscl_openrelease(nmp, op, error, newone);
506 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
507 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
508 		error == NFSERR_BADSESSION) {
509 		(void) nfs_catnap(PZERO, error, "nfs_open");
510 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
511 		&& clidrev != 0) {
512 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
513 		retrycnt++;
514 	    }
515 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
516 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
517 	    error == NFSERR_BADSESSION ||
518 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
519 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
520 	if (error && retrycnt >= 4)
521 		error = EIO;
522 	return (error);
523 }
524 
525 /*
526  * the actual open rpc
527  */
528 int
529 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
530     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
531     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
532     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
533     int syscred, int recursed)
534 {
535 	u_int32_t *tl;
536 	struct nfsrv_descript nfsd, *nd = &nfsd;
537 	struct nfscldeleg *dp, *ndp = NULL;
538 	struct nfsvattr nfsva;
539 	u_int32_t rflags, deleg;
540 	nfsattrbit_t attrbits;
541 	int error, ret, acesize, limitby;
542 	struct nfsclsession *tsep;
543 
544 	dp = *dpp;
545 	*dpp = NULL;
546 	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
547 	    cred);
548 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
549 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
550 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
551 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
552 	tsep = nfsmnt_mdssession(nmp);
553 	*tl++ = tsep->nfsess_clientid.lval[0];
554 	*tl = tsep->nfsess_clientid.lval[1];
555 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
556 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
557 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
558 	if (reclaim) {
559 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
560 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
561 		*tl = txdr_unsigned(delegtype);
562 	} else {
563 		if (dp != NULL) {
564 			if (NFSHASNFSV4N(nmp))
565 				*tl = txdr_unsigned(
566 				    NFSV4OPEN_CLAIMDELEGATECURFH);
567 			else
568 				*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
569 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
570 			if (NFSHASNFSV4N(nmp))
571 				*tl++ = 0;
572 			else
573 				*tl++ = dp->nfsdl_stateid.seqid;
574 			*tl++ = dp->nfsdl_stateid.other[0];
575 			*tl++ = dp->nfsdl_stateid.other[1];
576 			*tl = dp->nfsdl_stateid.other[2];
577 			if (!NFSHASNFSV4N(nmp))
578 				(void)nfsm_strtom(nd, name, namelen);
579 		} else if (NFSHASNFSV4N(nmp)) {
580 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
581 		} else {
582 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
583 			(void)nfsm_strtom(nd, name, namelen);
584 		}
585 	}
586 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
587 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
588 	NFSZERO_ATTRBIT(&attrbits);
589 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
590 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
591 	(void) nfsrv_putattrbit(nd, &attrbits);
592 	if (syscred)
593 		nd->nd_flag |= ND_USEGSSNAME;
594 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
595 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
596 	if (error)
597 		return (error);
598 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
599 	if (nd->nd_repstat == 0 || (nd->nd_repstat == NFSERR_DELAY &&
600 	    reclaim != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0)) {
601 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
602 		    6 * NFSX_UNSIGNED);
603 		op->nfso_stateid.seqid = *tl++;
604 		op->nfso_stateid.other[0] = *tl++;
605 		op->nfso_stateid.other[1] = *tl++;
606 		op->nfso_stateid.other[2] = *tl;
607 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
608 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
609 		if (error)
610 			goto nfsmout;
611 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
612 		deleg = fxdr_unsigned(u_int32_t, *tl);
613 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
614 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
615 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
616 			      NFSCLFLAGS_FIRSTDELEG))
617 				op->nfso_own->nfsow_clp->nfsc_flags |=
618 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
619 			ndp = malloc(
620 			    sizeof (struct nfscldeleg) + newfhlen,
621 			    M_NFSCLDELEG, M_WAITOK);
622 			LIST_INIT(&ndp->nfsdl_owner);
623 			LIST_INIT(&ndp->nfsdl_lock);
624 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
625 			ndp->nfsdl_fhlen = newfhlen;
626 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
627 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
628 			nfscl_lockinit(&ndp->nfsdl_rwlock);
629 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
630 			    NFSX_UNSIGNED);
631 			ndp->nfsdl_stateid.seqid = *tl++;
632 			ndp->nfsdl_stateid.other[0] = *tl++;
633 			ndp->nfsdl_stateid.other[1] = *tl++;
634 			ndp->nfsdl_stateid.other[2] = *tl++;
635 			ret = fxdr_unsigned(int, *tl);
636 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
637 				ndp->nfsdl_flags = NFSCLDL_WRITE;
638 				/*
639 				 * Indicates how much the file can grow.
640 				 */
641 				NFSM_DISSECT(tl, u_int32_t *,
642 				    3 * NFSX_UNSIGNED);
643 				limitby = fxdr_unsigned(int, *tl++);
644 				switch (limitby) {
645 				case NFSV4OPEN_LIMITSIZE:
646 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
647 					break;
648 				case NFSV4OPEN_LIMITBLOCKS:
649 					ndp->nfsdl_sizelimit =
650 					    fxdr_unsigned(u_int64_t, *tl++);
651 					ndp->nfsdl_sizelimit *=
652 					    fxdr_unsigned(u_int64_t, *tl);
653 					break;
654 				default:
655 					error = NFSERR_BADXDR;
656 					goto nfsmout;
657 				}
658 			} else {
659 				ndp->nfsdl_flags = NFSCLDL_READ;
660 			}
661 			if (ret)
662 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
663 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
664 			    &ret, &acesize, p);
665 			if (error)
666 				goto nfsmout;
667 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
668 			error = NFSERR_BADXDR;
669 			goto nfsmout;
670 		}
671 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
672 		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
673 		if (*++tl == 0) {
674 			KASSERT(nd->nd_repstat == 0,
675 			    ("nfsrpc_openrpc: Getattr repstat"));
676 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
677 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
678 			    NULL, NULL, NULL, p, cred);
679 			if (error)
680 				goto nfsmout;
681 		}
682 		if (ndp != NULL) {
683 			if (reclaim != 0 && dp != NULL) {
684 				ndp->nfsdl_change = dp->nfsdl_change;
685 				ndp->nfsdl_modtime = dp->nfsdl_modtime;
686 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
687 			} else if (nd->nd_repstat == 0) {
688 				ndp->nfsdl_change = nfsva.na_filerev;
689 				ndp->nfsdl_modtime = nfsva.na_mtime;
690 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
691 			} else
692 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
693 		}
694 		nd->nd_repstat = 0;
695 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
696 		    do {
697 			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
698 			    cred, p);
699 			if (ret == NFSERR_DELAY)
700 			    (void) nfs_catnap(PZERO, ret, "nfs_open");
701 		    } while (ret == NFSERR_DELAY);
702 		    error = ret;
703 		}
704 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
705 		    nfscl_assumeposixlocks)
706 		    op->nfso_posixlock = 1;
707 		else
708 		    op->nfso_posixlock = 0;
709 
710 		/*
711 		 * If the server is handing out delegations, but we didn't
712 		 * get one because an OpenConfirm was required, try the
713 		 * Open again, to get a delegation. This is a harmless no-op,
714 		 * from a server's point of view.
715 		 */
716 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
717 		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
718 		    && !error && dp == NULL && ndp == NULL && !recursed) {
719 		    do {
720 			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
721 			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
722 			    cred, p, syscred, 1);
723 			if (ret == NFSERR_DELAY)
724 			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
725 		    } while (ret == NFSERR_DELAY);
726 		    if (ret) {
727 			if (ndp != NULL) {
728 				free(ndp, M_NFSCLDELEG);
729 				ndp = NULL;
730 			}
731 			if (ret == NFSERR_STALECLIENTID ||
732 			    ret == NFSERR_STALEDONTRECOVER ||
733 			    ret == NFSERR_BADSESSION)
734 				error = ret;
735 		    }
736 		}
737 	}
738 	if (nd->nd_repstat != 0 && error == 0)
739 		error = nd->nd_repstat;
740 	if (error == NFSERR_STALECLIENTID)
741 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
742 nfsmout:
743 	if (!error)
744 		*dpp = ndp;
745 	else if (ndp != NULL)
746 		free(ndp, M_NFSCLDELEG);
747 	m_freem(nd->nd_mrep);
748 	return (error);
749 }
750 
751 /*
752  * open downgrade rpc
753  */
754 int
755 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
756     struct ucred *cred, NFSPROC_T *p)
757 {
758 	u_int32_t *tl;
759 	struct nfsrv_descript nfsd, *nd = &nfsd;
760 	int error;
761 
762 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp, cred);
763 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
764 	if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
765 		*tl++ = 0;
766 	else
767 		*tl++ = op->nfso_stateid.seqid;
768 	*tl++ = op->nfso_stateid.other[0];
769 	*tl++ = op->nfso_stateid.other[1];
770 	*tl++ = op->nfso_stateid.other[2];
771 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
772 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
773 	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
774 	error = nfscl_request(nd, vp, p, cred);
775 	if (error)
776 		return (error);
777 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
778 	if (!nd->nd_repstat) {
779 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
780 		op->nfso_stateid.seqid = *tl++;
781 		op->nfso_stateid.other[0] = *tl++;
782 		op->nfso_stateid.other[1] = *tl++;
783 		op->nfso_stateid.other[2] = *tl;
784 	}
785 	if (nd->nd_repstat && error == 0)
786 		error = nd->nd_repstat;
787 	if (error == NFSERR_STALESTATEID)
788 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
789 nfsmout:
790 	m_freem(nd->nd_mrep);
791 	return (error);
792 }
793 
794 /*
795  * V4 Close operation.
796  */
797 int
798 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
799 {
800 	struct nfsclclient *clp;
801 	int error;
802 
803 	if (vp->v_type != VREG)
804 		return (0);
805 	if (doclose)
806 		error = nfscl_doclose(vp, &clp, p);
807 	else {
808 		error = nfscl_getclose(vp, &clp);
809 		if (error == 0)
810 			nfscl_clientrelease(clp);
811 	}
812 	return (error);
813 }
814 
815 /*
816  * Close the open.
817  */
818 int
819 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p,
820     bool loop_on_delayed, bool freeop)
821 {
822 	struct nfsrv_descript nfsd, *nd = &nfsd;
823 	struct nfscllockowner *lp, *nlp;
824 	struct nfscllock *lop, *nlop;
825 	struct ucred *tcred;
826 	u_int64_t off = 0, len = 0;
827 	u_int32_t type = NFSV4LOCKT_READ;
828 	int error, do_unlock, trycnt;
829 
830 	tcred = newnfs_getcred();
831 	newnfs_copycred(&op->nfso_cred, tcred);
832 	/*
833 	 * (Theoretically this could be done in the same
834 	 *  compound as the close, but having multiple
835 	 *  sequenced Ops in the same compound might be
836 	 *  too scary for some servers.)
837 	 */
838 	if (op->nfso_posixlock) {
839 		off = 0;
840 		len = NFS64BITSSET;
841 		type = NFSV4LOCKT_READ;
842 	}
843 
844 	/*
845 	 * Since this function is only called from VOP_INACTIVE(), no
846 	 * other thread will be manipulating this Open. As such, the
847 	 * lock lists are not being changed by other threads, so it should
848 	 * be safe to do this without locking.
849 	 */
850 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
851 		do_unlock = 1;
852 		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
853 			if (op->nfso_posixlock == 0) {
854 				off = lop->nfslo_first;
855 				len = lop->nfslo_end - lop->nfslo_first;
856 				if (lop->nfslo_type == F_WRLCK)
857 					type = NFSV4LOCKT_WRITE;
858 				else
859 					type = NFSV4LOCKT_READ;
860 			}
861 			if (do_unlock) {
862 				trycnt = 0;
863 				do {
864 					error = nfsrpc_locku(nd, nmp, lp, off,
865 					    len, type, tcred, p, 0);
866 					if ((nd->nd_repstat == NFSERR_GRACE ||
867 					    nd->nd_repstat == NFSERR_DELAY) &&
868 					    error == 0)
869 						(void) nfs_catnap(PZERO,
870 						    (int)nd->nd_repstat,
871 						    "nfs_close");
872 				} while ((nd->nd_repstat == NFSERR_GRACE ||
873 				    nd->nd_repstat == NFSERR_DELAY) &&
874 				    error == 0 && trycnt++ < 5);
875 				if (op->nfso_posixlock)
876 					do_unlock = 0;
877 			}
878 			nfscl_freelock(lop, 0);
879 		}
880 		/*
881 		 * Do a ReleaseLockOwner.
882 		 * The lock owner name nfsl_owner may be used by other opens for
883 		 * other files but the lock_owner4 name that nfsrpc_rellockown()
884 		 * puts on the wire has the file handle for this file appended
885 		 * to it, so it can be done now.
886 		 */
887 		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
888 		    lp->nfsl_open->nfso_fhlen, tcred, p);
889 	}
890 
891 	/*
892 	 * There could be other Opens for different files on the same
893 	 * OpenOwner, so locking is required.
894 	 */
895 	NFSLOCKCLSTATE();
896 	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
897 	NFSUNLOCKCLSTATE();
898 	do {
899 		error = nfscl_tryclose(op, tcred, nmp, p, loop_on_delayed);
900 		if (error == NFSERR_GRACE)
901 			(void) nfs_catnap(PZERO, error, "nfs_close");
902 	} while (error == NFSERR_GRACE);
903 	NFSLOCKCLSTATE();
904 	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
905 
906 	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
907 		nfscl_freelockowner(lp, 0);
908 	if (freeop && error != NFSERR_DELAY)
909 		nfscl_freeopen(op, 0, true);
910 	NFSUNLOCKCLSTATE();
911 	NFSFREECRED(tcred);
912 	return (error);
913 }
914 
915 /*
916  * The actual Close RPC.
917  */
918 int
919 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
920     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
921     int syscred)
922 {
923 	u_int32_t *tl;
924 	int error;
925 
926 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
927 	    op->nfso_fhlen, NULL, NULL, 0, 0, cred);
928 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
929 	if (NFSHASNFSV4N(nmp)) {
930 		*tl++ = 0;
931 		*tl++ = 0;
932 	} else {
933 		*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
934 		*tl++ = op->nfso_stateid.seqid;
935 	}
936 	*tl++ = op->nfso_stateid.other[0];
937 	*tl++ = op->nfso_stateid.other[1];
938 	*tl = op->nfso_stateid.other[2];
939 	if (syscred)
940 		nd->nd_flag |= ND_USEGSSNAME;
941 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
942 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
943 	if (error)
944 		return (error);
945 	if (!NFSHASNFSV4N(nmp))
946 		NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
947 	if (nd->nd_repstat == 0)
948 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
949 	error = nd->nd_repstat;
950 	if (!NFSHASNFSV4N(nmp) && error == NFSERR_STALESTATEID)
951 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
952 nfsmout:
953 	m_freem(nd->nd_mrep);
954 	return (error);
955 }
956 
957 /*
958  * V4 Open Confirm RPC.
959  */
960 int
961 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
962     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
963 {
964 	u_int32_t *tl;
965 	struct nfsrv_descript nfsd, *nd = &nfsd;
966 	struct nfsmount *nmp;
967 	int error;
968 
969 	nmp = VFSTONFS(vp->v_mount);
970 	if (NFSHASNFSV4N(nmp))
971 		return (0);		/* No confirmation for NFSv4.1. */
972 	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
973 	    0, 0, NULL);
974 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
975 	*tl++ = op->nfso_stateid.seqid;
976 	*tl++ = op->nfso_stateid.other[0];
977 	*tl++ = op->nfso_stateid.other[1];
978 	*tl++ = op->nfso_stateid.other[2];
979 	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
980 	error = nfscl_request(nd, vp, p, cred);
981 	if (error)
982 		return (error);
983 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
984 	if (!nd->nd_repstat) {
985 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
986 		op->nfso_stateid.seqid = *tl++;
987 		op->nfso_stateid.other[0] = *tl++;
988 		op->nfso_stateid.other[1] = *tl++;
989 		op->nfso_stateid.other[2] = *tl;
990 	}
991 	error = nd->nd_repstat;
992 	if (error == NFSERR_STALESTATEID)
993 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
994 nfsmout:
995 	m_freem(nd->nd_mrep);
996 	return (error);
997 }
998 
999 /*
1000  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
1001  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
1002  */
1003 int
1004 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
1005     bool *retokp, struct ucred *cred, NFSPROC_T *p)
1006 {
1007 	u_int32_t *tl;
1008 	struct nfsrv_descript nfsd;
1009 	struct nfsrv_descript *nd = &nfsd;
1010 	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
1011 	u_short port;
1012 	int error, isinet6 = 0, callblen;
1013 	nfsquad_t confirm;
1014 	static u_int32_t rev = 0;
1015 	struct nfsclds *dsp, *odsp;
1016 	struct in6_addr a6;
1017 	struct nfsclsession *tsep;
1018 	struct rpc_reconupcall recon;
1019 	struct nfscl_reconarg *rcp;
1020 
1021 	if (nfsboottime.tv_sec == 0)
1022 		NFSSETBOOTTIME(nfsboottime);
1023 	if (NFSHASNFSV4N(nmp)) {
1024 		error = NFSERR_BADSESSION;
1025 		odsp = dsp = NULL;
1026 		if (retokp != NULL) {
1027 			NFSLOCKMNT(nmp);
1028 			odsp = TAILQ_FIRST(&nmp->nm_sess);
1029 			NFSUNLOCKMNT(nmp);
1030 		}
1031 		if (odsp != NULL) {
1032 			/*
1033 			 * When a session already exists, first try a
1034 			 * CreateSession with the extant ClientID.
1035 			 */
1036 			dsp = malloc(sizeof(struct nfsclds) +
1037 			    odsp->nfsclds_servownlen + 1, M_NFSCLDS,
1038 			    M_WAITOK | M_ZERO);
1039 			dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
1040 			dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
1041 			dsp->nfsclds_sess.nfsess_clientid =
1042 			    odsp->nfsclds_sess.nfsess_clientid;
1043 			dsp->nfsclds_sess.nfsess_sequenceid =
1044 			    odsp->nfsclds_sess.nfsess_sequenceid + 1;
1045 			dsp->nfsclds_flags = odsp->nfsclds_flags;
1046 			if (dsp->nfsclds_servownlen > 0)
1047 				memcpy(dsp->nfsclds_serverown,
1048 				    odsp->nfsclds_serverown,
1049 				    dsp->nfsclds_servownlen + 1);
1050 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1051 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
1052 			    NULL, MTX_DEF);
1053 			nfscl_initsessionslots(&dsp->nfsclds_sess);
1054 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
1055 			    &nmp->nm_sockreq, NULL,
1056 			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
1057 			NFSCL_DEBUG(1, "create session for extant "
1058 			    "ClientID=%d\n", error);
1059 			if (error != 0) {
1060 				nfscl_freenfsclds(dsp);
1061 				dsp = NULL;
1062 				/*
1063 				 * If *retokp is true, return any error other
1064 				 * than NFSERR_STALECLIENTID,
1065 				 * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
1066 				 * so that nfscl_recover() will not loop.
1067 				 */
1068 				if (*retokp)
1069 					return (NFSERR_IO);
1070 			} else
1071 				*retokp = true;
1072 		} else if (retokp != NULL && *retokp)
1073 			return (NFSERR_IO);
1074 		if (error != 0) {
1075 			/*
1076 			 * Either there was no previous session or the
1077 			 * CreateSession attempt failed, so...
1078 			 * do an ExchangeID followed by the CreateSession.
1079 			 */
1080 			clp->nfsc_rev = rev++;
1081 			error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1082 			    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1083 			    cred, p);
1084 			NFSCL_DEBUG(1, "aft exch=%d\n", error);
1085 			if (error == 0)
1086 				error = nfsrpc_createsession(nmp,
1087 				    &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1088 				    dsp->nfsclds_sess.nfsess_sequenceid, 1,
1089 				    cred, p);
1090 			NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1091 		}
1092 		if (error == 0) {
1093 			/*
1094 			 * If the session supports a backchannel, set up
1095 			 * the BindConnectionToSession call in the krpc
1096 			 * so that it is done on a reconnection.
1097 			 */
1098 			if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) {
1099 				rcp = mem_alloc(sizeof(*rcp));
1100 				rcp->minorvers = nmp->nm_minorvers;
1101 				memcpy(rcp->sessionid,
1102 				    dsp->nfsclds_sess.nfsess_sessionid,
1103 				    NFSX_V4SESSIONID);
1104 				recon.call = nfsrpc_bindconnsess;
1105 				recon.arg = rcp;
1106 				CLNT_CONTROL(nmp->nm_client, CLSET_RECONUPCALL,
1107 				    &recon);
1108 			}
1109 
1110 			NFSLOCKMNT(nmp);
1111 			/*
1112 			 * The old sessions cannot be safely free'd
1113 			 * here, since they may still be used by
1114 			 * in-progress RPCs.
1115 			 */
1116 			tsep = NULL;
1117 			if (TAILQ_FIRST(&nmp->nm_sess) != NULL) {
1118 				/*
1119 				 * Mark the old session defunct.  Needed
1120 				 * when called from nfscl_hasexpired().
1121 				 */
1122 				tsep = NFSMNT_MDSSESSION(nmp);
1123 				tsep->nfsess_defunct = 1;
1124 			}
1125 			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1126 			    nfsclds_list);
1127 			/*
1128 			 * Wake up RPCs waiting for a slot on the
1129 			 * old session. These will then fail with
1130 			 * NFSERR_BADSESSION and be retried with the
1131 			 * new session by nfsv4_setsequence().
1132 			 * Also wakeup() processes waiting for the
1133 			 * new session.
1134 			 */
1135 			if (tsep != NULL)
1136 				wakeup(&tsep->nfsess_slots);
1137 			wakeup(&nmp->nm_sess);
1138 			NFSUNLOCKMNT(nmp);
1139 		} else if (dsp != NULL)
1140 			nfscl_freenfsclds(dsp);
1141 		if (error == 0 && reclaim == 0) {
1142 			error = nfsrpc_reclaimcomplete(nmp, cred, p);
1143 			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1144 			if (error == NFSERR_COMPLETEALREADY ||
1145 			    error == NFSERR_NOTSUPP)
1146 				/* Ignore this error. */
1147 				error = 0;
1148 		}
1149 		return (error);
1150 	} else if (retokp != NULL && *retokp)
1151 		return (NFSERR_IO);
1152 	clp->nfsc_rev = rev++;
1153 
1154 	/*
1155 	 * Allocate a single session structure for NFSv4.0, because some of
1156 	 * the fields are used by NFSv4.0 although it doesn't do a session.
1157 	 */
1158 	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1159 	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1160 	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1161 	NFSLOCKMNT(nmp);
1162 	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1163 	tsep = NFSMNT_MDSSESSION(nmp);
1164 	NFSUNLOCKMNT(nmp);
1165 
1166 	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0,
1167 	    NULL);
1168 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1169 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
1170 	*tl = txdr_unsigned(clp->nfsc_rev);
1171 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1172 
1173 	/*
1174 	 * set up the callback address
1175 	 */
1176 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1177 	*tl = txdr_unsigned(NFS_CALLBCKPROG);
1178 	callblen = strlen(nfsv4_callbackaddr);
1179 	if (callblen == 0)
1180 		cp = nfscl_getmyip(nmp, &a6, &isinet6);
1181 	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1182 	    (callblen > 0 || cp != NULL)) {
1183 		port = htons(nfsv4_cbport);
1184 		cp2 = (u_int8_t *)&port;
1185 #ifdef INET6
1186 		if ((callblen > 0 &&
1187 		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1188 			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1189 
1190 			(void) nfsm_strtom(nd, "tcp6", 4);
1191 			if (callblen == 0) {
1192 				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1193 				ip6add = ip6buf;
1194 			} else {
1195 				ip6add = nfsv4_callbackaddr;
1196 			}
1197 			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1198 			    ip6add, cp2[0], cp2[1]);
1199 		} else
1200 #endif
1201 		{
1202 			(void) nfsm_strtom(nd, "tcp", 3);
1203 			if (callblen == 0)
1204 				snprintf(addr, INET6_ADDRSTRLEN + 9,
1205 				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1206 				    cp[2], cp[3], cp2[0], cp2[1]);
1207 			else
1208 				snprintf(addr, INET6_ADDRSTRLEN + 9,
1209 				    "%s.%d.%d", nfsv4_callbackaddr,
1210 				    cp2[0], cp2[1]);
1211 		}
1212 		(void) nfsm_strtom(nd, addr, strlen(addr));
1213 	} else {
1214 		(void) nfsm_strtom(nd, "tcp", 3);
1215 		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1216 	}
1217 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1218 	*tl = txdr_unsigned(clp->nfsc_cbident);
1219 	nd->nd_flag |= ND_USEGSSNAME;
1220 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1221 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1222 	if (error)
1223 		return (error);
1224 	if (nd->nd_repstat == 0) {
1225 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1226 	    tsep->nfsess_clientid.lval[0] = *tl++;
1227 	    tsep->nfsess_clientid.lval[1] = *tl++;
1228 	    confirm.lval[0] = *tl++;
1229 	    confirm.lval[1] = *tl;
1230 	    m_freem(nd->nd_mrep);
1231 	    nd->nd_mrep = NULL;
1232 
1233 	    /*
1234 	     * and confirm it.
1235 	     */
1236 	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1237 		NULL, 0, 0, NULL);
1238 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1239 	    *tl++ = tsep->nfsess_clientid.lval[0];
1240 	    *tl++ = tsep->nfsess_clientid.lval[1];
1241 	    *tl++ = confirm.lval[0];
1242 	    *tl = confirm.lval[1];
1243 	    nd->nd_flag |= ND_USEGSSNAME;
1244 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1245 		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1246 	    if (error)
1247 		return (error);
1248 	    m_freem(nd->nd_mrep);
1249 	    nd->nd_mrep = NULL;
1250 	}
1251 	error = nd->nd_repstat;
1252 nfsmout:
1253 	m_freem(nd->nd_mrep);
1254 	return (error);
1255 }
1256 
1257 /*
1258  * nfs getattr call.
1259  */
1260 int
1261 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1262     struct nfsvattr *nap)
1263 {
1264 	struct nfsrv_descript nfsd, *nd = &nfsd;
1265 	int error;
1266 	nfsattrbit_t attrbits;
1267 	struct nfsnode *np;
1268 	struct nfsmount *nmp;
1269 
1270 	nmp = VFSTONFS(vp->v_mount);
1271 	np = VTONFS(vp);
1272 	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
1273 	    nmp->nm_fhsize == 0) {
1274 		/* Attempt to get the actual root file handle. */
1275 		error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
1276 		if (error != 0)
1277 			return (EACCES);
1278 		if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
1279 			nfscl_statfs(vp, cred, p);
1280 	}
1281 	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
1282 	if (nd->nd_flag & ND_NFSV4) {
1283 		NFSGETATTR_ATTRBIT(&attrbits);
1284 		(void) nfsrv_putattrbit(nd, &attrbits);
1285 	}
1286 	error = nfscl_request(nd, vp, p, cred);
1287 	if (error)
1288 		return (error);
1289 	if (!nd->nd_repstat)
1290 		error = nfsm_loadattr(nd, nap);
1291 	else
1292 		error = nd->nd_repstat;
1293 	m_freem(nd->nd_mrep);
1294 	return (error);
1295 }
1296 
1297 /*
1298  * nfs getattr call with non-vnode arguments.
1299  */
1300 int
1301 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1302     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1303     uint32_t *leasep)
1304 {
1305 	struct nfsrv_descript nfsd, *nd = &nfsd;
1306 	int error, vers = NFS_VER2;
1307 	nfsattrbit_t attrbits;
1308 
1309 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
1310 	    cred);
1311 	if (nd->nd_flag & ND_NFSV4) {
1312 		vers = NFS_VER4;
1313 		NFSGETATTR_ATTRBIT(&attrbits);
1314 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1315 		(void) nfsrv_putattrbit(nd, &attrbits);
1316 	} else if (nd->nd_flag & ND_NFSV3) {
1317 		vers = NFS_VER3;
1318 	}
1319 	if (syscred)
1320 		nd->nd_flag |= ND_USEGSSNAME;
1321 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1322 	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1323 	if (error)
1324 		return (error);
1325 	if (nd->nd_repstat == 0) {
1326 		if ((nd->nd_flag & ND_NFSV4) != 0)
1327 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1328 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1329 			    NULL, NULL);
1330 		else
1331 			error = nfsm_loadattr(nd, nap);
1332 	} else
1333 		error = nd->nd_repstat;
1334 	m_freem(nd->nd_mrep);
1335 	return (error);
1336 }
1337 
1338 /*
1339  * Do an nfs setattr operation.
1340  */
1341 int
1342 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1343     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp)
1344 {
1345 	int error, expireret = 0, openerr, retrycnt;
1346 	u_int32_t clidrev = 0, mode;
1347 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1348 	struct nfsfh *nfhp;
1349 	nfsv4stateid_t stateid;
1350 	void *lckp;
1351 
1352 	if (nmp->nm_clp != NULL)
1353 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1354 	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1355 		mode = NFSV4OPEN_ACCESSWRITE;
1356 	else
1357 		mode = NFSV4OPEN_ACCESSREAD;
1358 	retrycnt = 0;
1359 	do {
1360 		lckp = NULL;
1361 		openerr = 1;
1362 		if (NFSHASNFSV4(nmp)) {
1363 			nfhp = VTONFS(vp)->n_fhp;
1364 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1365 			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1366 			if (error && vp->v_type == VREG &&
1367 			    (mode == NFSV4OPEN_ACCESSWRITE ||
1368 			     nfstest_openallsetattr)) {
1369 				/*
1370 				 * No Open stateid, so try and open the file
1371 				 * now.
1372 				 */
1373 				if (mode == NFSV4OPEN_ACCESSWRITE)
1374 					openerr = nfsrpc_open(vp, FWRITE, cred,
1375 					    p);
1376 				else
1377 					openerr = nfsrpc_open(vp, FREAD, cred,
1378 					    p);
1379 				if (!openerr)
1380 					(void) nfscl_getstateid(vp,
1381 					    nfhp->nfh_fh, nfhp->nfh_len,
1382 					    mode, 0, cred, p, &stateid, &lckp);
1383 			}
1384 		}
1385 		if (vap != NULL)
1386 			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1387 			    rnap, attrflagp);
1388 		else
1389 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid);
1390 		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1391 			NFSLOCKMNT(nmp);
1392 			nmp->nm_state |= NFSSTA_OPENMODE;
1393 			NFSUNLOCKMNT(nmp);
1394 		}
1395 		if (error == NFSERR_STALESTATEID)
1396 			nfscl_initiate_recovery(nmp->nm_clp);
1397 		if (lckp != NULL)
1398 			nfscl_lockderef(lckp);
1399 		if (!openerr)
1400 			(void) nfsrpc_close(vp, 0, p);
1401 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1402 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1403 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1404 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1405 		} else if ((error == NFSERR_EXPIRED ||
1406 		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1407 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1408 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1409 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1410 		    NFSHASNFSV4N(nmp)) {
1411 			error = EIO;
1412 		}
1413 		retrycnt++;
1414 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1415 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1416 	    error == NFSERR_BADSESSION ||
1417 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1418 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1419 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1420 	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1421 	     retrycnt < 4));
1422 	if (error && retrycnt >= 4)
1423 		error = EIO;
1424 	return (error);
1425 }
1426 
1427 static int
1428 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1429     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1430     struct nfsvattr *rnap, int *attrflagp)
1431 {
1432 	u_int32_t *tl;
1433 	struct nfsrv_descript nfsd, *nd = &nfsd;
1434 	int error;
1435 	nfsattrbit_t attrbits;
1436 
1437 	*attrflagp = 0;
1438 	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp, cred);
1439 	if (nd->nd_flag & ND_NFSV4)
1440 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1441 	vap->va_type = vp->v_type;
1442 	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1443 	if (nd->nd_flag & ND_NFSV3) {
1444 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1445 		*tl = newnfs_false;
1446 	} else if (nd->nd_flag & ND_NFSV4) {
1447 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1448 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1449 		NFSGETATTR_ATTRBIT(&attrbits);
1450 		(void) nfsrv_putattrbit(nd, &attrbits);
1451 	}
1452 	error = nfscl_request(nd, vp, p, cred);
1453 	if (error)
1454 		return (error);
1455 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1456 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, NULL);
1457 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1458 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1459 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1460 		error = nfscl_postop_attr(nd, rnap, attrflagp);
1461 	m_freem(nd->nd_mrep);
1462 	if (nd->nd_repstat && !error)
1463 		error = nd->nd_repstat;
1464 	return (error);
1465 }
1466 
1467 /*
1468  * nfs lookup rpc
1469  */
1470 int
1471 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1472     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1473     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, uint32_t openmode)
1474 {
1475 	uint32_t deleg, rflags, *tl;
1476 	struct nfsrv_descript nfsd, *nd = &nfsd;
1477 	struct nfsmount *nmp;
1478 	struct nfsnode *np;
1479 	struct nfsfh *nfhp;
1480 	nfsattrbit_t attrbits;
1481 	int error = 0, lookupp = 0, newone, ret, retop;
1482 	uint8_t own[NFSV4CL_LOCKNAMELEN];
1483 	struct nfsclopen *op;
1484 	struct nfscldeleg *ndp;
1485 	nfsv4stateid_t stateid;
1486 
1487 	*attrflagp = 0;
1488 	*dattrflagp = 0;
1489 	if (dvp->v_type != VDIR)
1490 		return (ENOTDIR);
1491 	nmp = VFSTONFS(dvp->v_mount);
1492 	if (len > NFS_MAXNAMLEN)
1493 		return (ENAMETOOLONG);
1494 	if (NFSHASNFSV4(nmp) && len == 1 &&
1495 		name[0] == '.') {
1496 		/*
1497 		 * Just return the current dir's fh.
1498 		 */
1499 		np = VTONFS(dvp);
1500 		nfhp = malloc(sizeof (struct nfsfh) +
1501 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1502 		nfhp->nfh_len = np->n_fhp->nfh_len;
1503 		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1504 		*nfhpp = nfhp;
1505 		return (0);
1506 	}
1507 	if (NFSHASNFSV4(nmp) && len == 2 &&
1508 		name[0] == '.' && name[1] == '.') {
1509 		lookupp = 1;
1510 		openmode = 0;
1511 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp, cred);
1512 	} else if (openmode != 0) {
1513 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPOPEN, dvp, cred);
1514 		nfsm_strtom(nd, name, len);
1515 	} else {
1516 		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp, cred);
1517 		(void) nfsm_strtom(nd, name, len);
1518 	}
1519 	if (nd->nd_flag & ND_NFSV4) {
1520 		NFSGETATTR_ATTRBIT(&attrbits);
1521 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1522 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1523 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1524 		(void) nfsrv_putattrbit(nd, &attrbits);
1525 		if (openmode != 0) {
1526 			/* Test for a VREG file. */
1527 			NFSZERO_ATTRBIT(&attrbits);
1528 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
1529 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
1530 			*tl = txdr_unsigned(NFSV4OP_VERIFY);
1531 			nfsrv_putattrbit(nd, &attrbits);
1532 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1533 			*tl++ = txdr_unsigned(NFSX_UNSIGNED);
1534 			*tl = vtonfsv34_type(VREG);
1535 
1536 			/* Attempt the Open for VREG. */
1537 			nfscl_filllockowner(NULL, own, F_POSIX);
1538 			NFSM_BUILD(tl, uint32_t *, 6 * NFSX_UNSIGNED);
1539 			*tl++ = txdr_unsigned(NFSV4OP_OPEN);
1540 			*tl++ = 0;		/* seqid, ignored. */
1541 			*tl++ = txdr_unsigned(openmode);
1542 			*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
1543 			*tl++ = 0;		/* ClientID, ignored. */
1544 			*tl = 0;
1545 			nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
1546 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1547 			*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
1548 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
1549 		}
1550 	}
1551 	error = nfscl_request(nd, dvp, p, cred);
1552 	if (error)
1553 		return (error);
1554 	ndp = NULL;
1555 	if (nd->nd_repstat) {
1556 		/*
1557 		 * When an NFSv4 Lookupp returns ENOENT, it means that
1558 		 * the lookup is at the root of an fs, so return this dir.
1559 		 */
1560 		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1561 		    np = VTONFS(dvp);
1562 		    nfhp = malloc(sizeof (struct nfsfh) +
1563 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1564 		    nfhp->nfh_len = np->n_fhp->nfh_len;
1565 		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1566 		    *nfhpp = nfhp;
1567 		    m_freem(nd->nd_mrep);
1568 		    return (0);
1569 		}
1570 		if (nd->nd_flag & ND_NFSV3)
1571 		    error = nfscl_postop_attr(nd, dnap, dattrflagp);
1572 		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1573 		    ND_NFSV4) {
1574 			/* Load the directory attributes. */
1575 			error = nfsm_loadattr(nd, dnap);
1576 			if (error != 0)
1577 				goto nfsmout;
1578 			*dattrflagp = 1;
1579 		}
1580 		/* Check Lookup operation reply status. */
1581 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1582 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1583 			if (*++tl != 0)
1584 				goto nfsmout;
1585 		}
1586 		/* Look for GetFH reply. */
1587 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1588 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1589 			if (*++tl != 0)
1590 				goto nfsmout;
1591 			error = nfsm_getfh(nd, nfhpp);
1592 			if (error)
1593 				goto nfsmout;
1594 		}
1595 		/* Look for Getattr reply. */
1596 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1597 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1598 			if (*++tl != 0)
1599 				goto nfsmout;
1600 			error = nfsm_loadattr(nd, nap);
1601 			if (error == 0) {
1602 				/*
1603 				 * We have now successfully completed the
1604 				 * lookup, so set nd_repstat to 0.
1605 				 */
1606 				nd->nd_repstat = 0;
1607 				*attrflagp = 1;
1608 			}
1609 		}
1610 		goto nfsmout;
1611 	}
1612 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1613 		/* Load the directory attributes. */
1614 		error = nfsm_loadattr(nd, dnap);
1615 		if (error != 0)
1616 			goto nfsmout;
1617 		*dattrflagp = 1;
1618 		/* Skip over the Lookup and GetFH operation status values. */
1619 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1620 	}
1621 	error = nfsm_getfh(nd, nfhpp);
1622 	if (error)
1623 		goto nfsmout;
1624 
1625 	error = nfscl_postop_attr(nd, nap, attrflagp);
1626 	if (openmode != 0 && error == 0) {
1627 		NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID +
1628 		    10 * NFSX_UNSIGNED);
1629 		tl += 4;	/* Skip over Verify+Open status. */
1630 		stateid.seqid = *tl++;
1631 		stateid.other[0] = *tl++;
1632 		stateid.other[1] = *tl++;
1633 		stateid.other[2] = *tl;
1634 		rflags = fxdr_unsigned(uint32_t, *(tl + 6));
1635 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1636 		if (error != 0)
1637 			goto nfsmout;
1638 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
1639 		deleg = fxdr_unsigned(uint32_t, *tl);
1640 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
1641 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
1642 			/*
1643 			 * Just need to fill in the fields used by
1644 			 * nfscl_trydelegreturn().
1645 			 * Mark the mount point as acquiring
1646 			 * delegations, so NFSPROC_LOOKUPOPEN will
1647 			 * no longer be done.
1648 			 */
1649 			NFSLOCKMNT(nmp);
1650 			nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
1651 			NFSUNLOCKMNT(nmp);
1652 			ndp = malloc(sizeof(struct nfscldeleg) +
1653 			    (*nfhpp)->nfh_len, M_NFSCLDELEG, M_WAITOK);
1654 			ndp->nfsdl_fhlen = (*nfhpp)->nfh_len;
1655 			NFSBCOPY((*nfhpp)->nfh_fh, ndp->nfsdl_fh,
1656 			    ndp->nfsdl_fhlen);
1657 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
1658 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
1659 			ndp->nfsdl_stateid.seqid = *tl++;
1660 			ndp->nfsdl_stateid.other[0] = *tl++;
1661 			ndp->nfsdl_stateid.other[1] = *tl++;
1662 			ndp->nfsdl_stateid.other[2] = *tl++;
1663 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
1664 			error = NFSERR_BADXDR;
1665 			goto nfsmout;
1666 		}
1667 		ret = nfscl_open(dvp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len,
1668 		    openmode, 0, cred, p, NULL, &op, &newone, &retop, 1, true);
1669 		if (ret != 0)
1670 			goto nfsmout;
1671 		if (newone != 0) {
1672 			op->nfso_stateid.seqid = stateid.seqid;
1673 			op->nfso_stateid.other[0] = stateid.other[0];
1674 			op->nfso_stateid.other[1] = stateid.other[1];
1675 			op->nfso_stateid.other[2] = stateid.other[2];
1676 			op->nfso_mode = openmode;
1677 		} else {
1678 			op->nfso_stateid.seqid = stateid.seqid;
1679 			if (retop == NFSCLOPEN_DOOPEN)
1680 				op->nfso_mode |= openmode;
1681 		}
1682 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
1683 		    nfscl_assumeposixlocks)
1684 			op->nfso_posixlock = 1;
1685 		else
1686 			op->nfso_posixlock = 0;
1687 		nfscl_openrelease(nmp, op, 0, 0);
1688 		if (ndp != NULL) {
1689 			/*
1690 			 * Since we do not have the vnode, we
1691 			 * cannot invalidate cached attributes.
1692 			 * Just return the delegation.
1693 			 */
1694 			nfscl_trydelegreturn(ndp, cred, nmp, p);
1695 		}
1696 	}
1697 	if ((nd->nd_flag & ND_NFSV3) && !error)
1698 		error = nfscl_postop_attr(nd, dnap, dattrflagp);
1699 nfsmout:
1700 	m_freem(nd->nd_mrep);
1701 	if (!error && nd->nd_repstat)
1702 		error = nd->nd_repstat;
1703 	free(ndp, M_NFSCLDELEG);
1704 	return (error);
1705 }
1706 
1707 /*
1708  * Do a readlink rpc.
1709  */
1710 int
1711 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1712     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1713 {
1714 	u_int32_t *tl;
1715 	struct nfsrv_descript nfsd, *nd = &nfsd;
1716 	struct nfsnode *np = VTONFS(vp);
1717 	nfsattrbit_t attrbits;
1718 	int error, len, cangetattr = 1;
1719 
1720 	*attrflagp = 0;
1721 	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp, cred);
1722 	if (nd->nd_flag & ND_NFSV4) {
1723 		/*
1724 		 * And do a Getattr op.
1725 		 */
1726 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1727 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1728 		NFSGETATTR_ATTRBIT(&attrbits);
1729 		(void) nfsrv_putattrbit(nd, &attrbits);
1730 	}
1731 	error = nfscl_request(nd, vp, p, cred);
1732 	if (error)
1733 		return (error);
1734 	if (nd->nd_flag & ND_NFSV3)
1735 		error = nfscl_postop_attr(nd, nap, attrflagp);
1736 	if (!nd->nd_repstat && !error) {
1737 		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1738 		/*
1739 		 * This seems weird to me, but must have been added to
1740 		 * FreeBSD for some reason. The only thing I can think of
1741 		 * is that there was/is some server that replies with
1742 		 * more link data than it should?
1743 		 */
1744 		if (len == NFS_MAXPATHLEN) {
1745 			NFSLOCKNODE(np);
1746 			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1747 				len = np->n_size;
1748 				cangetattr = 0;
1749 			}
1750 			NFSUNLOCKNODE(np);
1751 		}
1752 		error = nfsm_mbufuio(nd, uiop, len);
1753 		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1754 			error = nfscl_postop_attr(nd, nap, attrflagp);
1755 	}
1756 	if (nd->nd_repstat && !error)
1757 		error = nd->nd_repstat;
1758 nfsmout:
1759 	m_freem(nd->nd_mrep);
1760 	return (error);
1761 }
1762 
1763 /*
1764  * Read operation.
1765  */
1766 int
1767 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1768     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1769 {
1770 	int error, expireret = 0, retrycnt;
1771 	u_int32_t clidrev = 0;
1772 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1773 	struct nfsnode *np = VTONFS(vp);
1774 	struct ucred *newcred;
1775 	struct nfsfh *nfhp = NULL;
1776 	nfsv4stateid_t stateid;
1777 	void *lckp;
1778 
1779 	if (nmp->nm_clp != NULL)
1780 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1781 	newcred = cred;
1782 	if (NFSHASNFSV4(nmp)) {
1783 		nfhp = np->n_fhp;
1784 		newcred = NFSNEWCRED(cred);
1785 	}
1786 	retrycnt = 0;
1787 	do {
1788 		lckp = NULL;
1789 		if (NFSHASNFSV4(nmp))
1790 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1791 			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1792 			    &lckp);
1793 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1794 		    attrflagp);
1795 		if (error == NFSERR_OPENMODE) {
1796 			NFSLOCKMNT(nmp);
1797 			nmp->nm_state |= NFSSTA_OPENMODE;
1798 			NFSUNLOCKMNT(nmp);
1799 		}
1800 		if (error == NFSERR_STALESTATEID)
1801 			nfscl_initiate_recovery(nmp->nm_clp);
1802 		if (lckp != NULL)
1803 			nfscl_lockderef(lckp);
1804 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1805 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1806 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1807 			(void) nfs_catnap(PZERO, error, "nfs_read");
1808 		} else if ((error == NFSERR_EXPIRED ||
1809 		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1810 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1811 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1812 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1813 		    NFSHASNFSV4N(nmp)) {
1814 			error = EIO;
1815 		}
1816 		retrycnt++;
1817 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1818 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1819 	    error == NFSERR_BADSESSION ||
1820 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1821 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1822 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1823 	    (error == NFSERR_OPENMODE && retrycnt < 4));
1824 	if (error && retrycnt >= 4)
1825 		error = EIO;
1826 	if (NFSHASNFSV4(nmp))
1827 		NFSFREECRED(newcred);
1828 	return (error);
1829 }
1830 
1831 /*
1832  * The actual read RPC.
1833  */
1834 static int
1835 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1836     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1837     int *attrflagp)
1838 {
1839 	u_int32_t *tl;
1840 	int error = 0, len, retlen, tsiz, eof = 0;
1841 	struct nfsrv_descript nfsd;
1842 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1843 	struct nfsrv_descript *nd = &nfsd;
1844 	int rsize;
1845 	off_t tmp_off;
1846 
1847 	*attrflagp = 0;
1848 	tsiz = uiop->uio_resid;
1849 	tmp_off = uiop->uio_offset + tsiz;
1850 	NFSLOCKMNT(nmp);
1851 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1852 		NFSUNLOCKMNT(nmp);
1853 		return (EFBIG);
1854 	}
1855 	rsize = nmp->nm_rsize;
1856 	NFSUNLOCKMNT(nmp);
1857 	nd->nd_mrep = NULL;
1858 	while (tsiz > 0) {
1859 		*attrflagp = 0;
1860 		len = (tsiz > rsize) ? rsize : tsiz;
1861 		NFSCL_REQSTART(nd, NFSPROC_READ, vp, cred);
1862 		if (nd->nd_flag & ND_NFSV4)
1863 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1864 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1865 		if (nd->nd_flag & ND_NFSV2) {
1866 			*tl++ = txdr_unsigned(uiop->uio_offset);
1867 			*tl++ = txdr_unsigned(len);
1868 			*tl = 0;
1869 		} else {
1870 			txdr_hyper(uiop->uio_offset, tl);
1871 			*(tl + 2) = txdr_unsigned(len);
1872 		}
1873 		/*
1874 		 * Since I can't do a Getattr for NFSv4 for Write, there
1875 		 * doesn't seem any point in doing one here, either.
1876 		 * (See the comment in nfsrpc_writerpc() for more info.)
1877 		 */
1878 		error = nfscl_request(nd, vp, p, cred);
1879 		if (error)
1880 			return (error);
1881 		if (nd->nd_flag & ND_NFSV3) {
1882 			error = nfscl_postop_attr(nd, nap, attrflagp);
1883 		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1884 			error = nfsm_loadattr(nd, nap);
1885 			if (!error)
1886 				*attrflagp = 1;
1887 		}
1888 		if (nd->nd_repstat || error) {
1889 			if (!error)
1890 				error = nd->nd_repstat;
1891 			goto nfsmout;
1892 		}
1893 		if (nd->nd_flag & ND_NFSV3) {
1894 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1895 			eof = fxdr_unsigned(int, *(tl + 1));
1896 		} else if (nd->nd_flag & ND_NFSV4) {
1897 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1898 			eof = fxdr_unsigned(int, *tl);
1899 		}
1900 		NFSM_STRSIZ(retlen, len);
1901 		error = nfsm_mbufuio(nd, uiop, retlen);
1902 		if (error)
1903 			goto nfsmout;
1904 		m_freem(nd->nd_mrep);
1905 		nd->nd_mrep = NULL;
1906 		tsiz -= retlen;
1907 		if (!(nd->nd_flag & ND_NFSV2)) {
1908 			if (eof || retlen == 0)
1909 				tsiz = 0;
1910 		} else if (retlen < len)
1911 			tsiz = 0;
1912 	}
1913 	return (0);
1914 nfsmout:
1915 	if (nd->nd_mrep != NULL)
1916 		m_freem(nd->nd_mrep);
1917 	return (error);
1918 }
1919 
1920 /*
1921  * nfs write operation
1922  * When called_from_strategy != 0, it should return EIO for an error that
1923  * indicates recovery is in progress, so that the buffer will be left
1924  * dirty and be written back to the server later. If it loops around,
1925  * the recovery thread could get stuck waiting for the buffer and recovery
1926  * will then deadlock.
1927  */
1928 int
1929 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1930     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1931     int called_from_strategy, int ioflag)
1932 {
1933 	int error, expireret = 0, retrycnt, nostateid;
1934 	u_int32_t clidrev = 0;
1935 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1936 	struct nfsnode *np = VTONFS(vp);
1937 	struct ucred *newcred;
1938 	struct nfsfh *nfhp = NULL;
1939 	nfsv4stateid_t stateid;
1940 	void *lckp;
1941 
1942 	KASSERT(*must_commit >= 0 && *must_commit <= 2,
1943 	    ("nfsrpc_write: must_commit out of range=%d", *must_commit));
1944 	if (nmp->nm_clp != NULL)
1945 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1946 	newcred = cred;
1947 	if (NFSHASNFSV4(nmp)) {
1948 		newcred = NFSNEWCRED(cred);
1949 		nfhp = np->n_fhp;
1950 	}
1951 	retrycnt = 0;
1952 	do {
1953 		lckp = NULL;
1954 		nostateid = 0;
1955 		if (NFSHASNFSV4(nmp)) {
1956 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1957 			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1958 			    &lckp);
1959 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1960 			    stateid.other[2] == 0) {
1961 				nostateid = 1;
1962 				NFSCL_DEBUG(1, "stateid0 in write\n");
1963 			}
1964 		}
1965 
1966 		/*
1967 		 * If there is no stateid for NFSv4, it means this is an
1968 		 * extraneous write after close. Basically a poorly
1969 		 * implemented buffer cache. Just don't do the write.
1970 		 */
1971 		if (nostateid)
1972 			error = 0;
1973 		else
1974 			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1975 			    newcred, &stateid, p, nap, attrflagp, ioflag);
1976 		if (error == NFSERR_STALESTATEID)
1977 			nfscl_initiate_recovery(nmp->nm_clp);
1978 		if (lckp != NULL)
1979 			nfscl_lockderef(lckp);
1980 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1981 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1982 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1983 			(void) nfs_catnap(PZERO, error, "nfs_write");
1984 		} else if ((error == NFSERR_EXPIRED ||
1985 		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1986 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1987 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1988 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1989 		    NFSHASNFSV4N(nmp)) {
1990 			error = EIO;
1991 		}
1992 		retrycnt++;
1993 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1994 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1995 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1996 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1997 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1998 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1999 	if (error != 0 && (retrycnt >= 4 ||
2000 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
2001 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
2002 		error = EIO;
2003 	if (NFSHASNFSV4(nmp))
2004 		NFSFREECRED(newcred);
2005 	return (error);
2006 }
2007 
2008 /*
2009  * The actual write RPC.
2010  */
2011 static int
2012 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
2013     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
2014     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int ioflag)
2015 {
2016 	u_int32_t *tl;
2017 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2018 	struct nfsnode *np = VTONFS(vp);
2019 	int error = 0, len, rlen, commit, committed = NFSWRITE_FILESYNC;
2020 	int wccflag = 0;
2021 	int32_t backup;
2022 	struct nfsrv_descript *nd;
2023 	nfsattrbit_t attrbits;
2024 	uint64_t tmp_off;
2025 	ssize_t tsiz, wsize;
2026 	bool do_append;
2027 
2028 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
2029 	*attrflagp = 0;
2030 	tsiz = uiop->uio_resid;
2031 	tmp_off = uiop->uio_offset + tsiz;
2032 	NFSLOCKMNT(nmp);
2033 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
2034 		NFSUNLOCKMNT(nmp);
2035 		return (EFBIG);
2036 	}
2037 	wsize = nmp->nm_wsize;
2038 	do_append = false;
2039 	if ((ioflag & IO_APPEND) != 0 && NFSHASNFSV4(nmp) && !NFSHASPNFS(nmp))
2040 		do_append = true;
2041 	NFSUNLOCKMNT(nmp);
2042 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK);
2043 	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
2044 	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
2045 	while (tsiz > 0) {
2046 		*attrflagp = 0;
2047 		len = (tsiz > wsize) ? wsize : tsiz;
2048 		if (do_append)
2049 			NFSCL_REQSTART(nd, NFSPROC_APPENDWRITE, vp, cred);
2050 		else
2051 			NFSCL_REQSTART(nd, NFSPROC_WRITE, vp, cred);
2052 		if (nd->nd_flag & ND_NFSV4) {
2053 			if (do_append) {
2054 				NFSZERO_ATTRBIT(&attrbits);
2055 				NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
2056 				nfsrv_putattrbit(nd, &attrbits);
2057 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED +
2058 				    NFSX_HYPER);
2059 				*tl++ = txdr_unsigned(NFSX_HYPER);
2060 				txdr_hyper(uiop->uio_offset, tl); tl += 2;
2061 				*tl = txdr_unsigned(NFSV4OP_WRITE);
2062 			}
2063 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2064 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
2065 			txdr_hyper(uiop->uio_offset, tl);
2066 			tl += 2;
2067 			*tl++ = txdr_unsigned(*iomode);
2068 			*tl = txdr_unsigned(len);
2069 		} else if (nd->nd_flag & ND_NFSV3) {
2070 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
2071 			txdr_hyper(uiop->uio_offset, tl);
2072 			tl += 2;
2073 			*tl++ = txdr_unsigned(len);
2074 			*tl++ = txdr_unsigned(*iomode);
2075 			*tl = txdr_unsigned(len);
2076 		} else {
2077 			u_int32_t x;
2078 
2079 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2080 			/*
2081 			 * Not sure why someone changed this, since the
2082 			 * RFC clearly states that "beginoffset" and
2083 			 * "totalcount" are ignored, but it wouldn't
2084 			 * surprise me if there's a busted server out there.
2085 			 */
2086 			/* Set both "begin" and "current" to non-garbage. */
2087 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
2088 			*tl++ = x;      /* "begin offset" */
2089 			*tl++ = x;      /* "current offset" */
2090 			x = txdr_unsigned(len);
2091 			*tl++ = x;      /* total to this offset */
2092 			*tl = x;        /* size of this write */
2093 		}
2094 		error = nfsm_uiombuf(nd, uiop, len);
2095 		if (error != 0) {
2096 			m_freem(nd->nd_mreq);
2097 			free(nd, M_TEMP);
2098 			return (error);
2099 		}
2100 		/*
2101 		 * Although it is tempting to do a normal Getattr Op in the
2102 		 * NFSv4 compound, the result can be a nearly hung client
2103 		 * system if the Getattr asks for Owner and/or OwnerGroup.
2104 		 * It occurs when the client can't map either the Owner or
2105 		 * Owner_group name in the Getattr reply to a uid/gid. When
2106 		 * there is a cache miss, the kernel does an upcall to the
2107 		 * nfsuserd. Then, it can try and read the local /etc/passwd
2108 		 * or /etc/group file. It can then block in getnewbuf(),
2109 		 * waiting for dirty writes to be pushed to the NFS server.
2110 		 * The only reason this doesn't result in a complete
2111 		 * deadlock, is that the upcall times out and allows
2112 		 * the write to complete. However, progress is so slow
2113 		 * that it might just as well be deadlocked.
2114 		 * As such, we get the rest of the attributes, but not
2115 		 * Owner or Owner_group.
2116 		 * nb: nfscl_loadattrcache() needs to be told that these
2117 		 *     partial attributes from a write rpc are being
2118 		 *     passed in, via a argument flag.
2119 		 */
2120 		if (nd->nd_flag & ND_NFSV4) {
2121 			NFSWRITEGETATTR_ATTRBIT(&attrbits);
2122 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2123 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2124 			(void) nfsrv_putattrbit(nd, &attrbits);
2125 		}
2126 		error = nfscl_request(nd, vp, p, cred);
2127 		if (error) {
2128 			free(nd, M_TEMP);
2129 			return (error);
2130 		}
2131 		if (nd->nd_repstat) {
2132 			/*
2133 			 * In case the rpc gets retried, roll
2134 			 * the uio fields changed by nfsm_uiombuf()
2135 			 * back.
2136 			 */
2137 			uiop->uio_offset -= len;
2138 			uiop->uio_resid += len;
2139 			uiop->uio_iov->iov_base =
2140 			    (char *)uiop->uio_iov->iov_base - len;
2141 			uiop->uio_iov->iov_len += len;
2142 		}
2143 		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2144 			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
2145 			    &wccflag, &tmp_off);
2146 			if (error)
2147 				goto nfsmout;
2148 		}
2149 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2150 		    (ND_NFSV4 | ND_NOMOREDATA) &&
2151 		    nd->nd_repstat == NFSERR_NOTSAME && do_append) {
2152 			/*
2153 			 * Verify of the file's size failed, so redo the
2154 			 * write using the file's size as returned in
2155 			 * the wcc attributes.
2156 			 */
2157 			if (tmp_off + tsiz <= nmp->nm_maxfilesize) {
2158 				do_append = false;
2159 				uiop->uio_offset = tmp_off;
2160 				m_freem(nd->nd_mrep);
2161 				nd->nd_mrep = NULL;
2162 				continue;
2163 			} else
2164 				nd->nd_repstat = EFBIG;
2165 		}
2166 		if (!nd->nd_repstat) {
2167 			if (do_append) {
2168 				/* Strip off the Write reply status. */
2169 				do_append = false;
2170 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2171 			}
2172 			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2173 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
2174 					+ NFSX_VERF);
2175 				rlen = fxdr_unsigned(int, *tl++);
2176 				if (rlen == 0) {
2177 					error = NFSERR_IO;
2178 					goto nfsmout;
2179 				} else if (rlen < len) {
2180 					backup = len - rlen;
2181 					uiop->uio_iov->iov_base =
2182 					    (char *)uiop->uio_iov->iov_base -
2183 					    backup;
2184 					uiop->uio_iov->iov_len += backup;
2185 					uiop->uio_offset -= backup;
2186 					uiop->uio_resid += backup;
2187 					len = rlen;
2188 				}
2189 				commit = fxdr_unsigned(int, *tl++);
2190 
2191 				/*
2192 				 * Return the lowest commitment level
2193 				 * obtained by any of the RPCs.
2194 				 */
2195 				if (committed == NFSWRITE_FILESYNC)
2196 					committed = commit;
2197 				else if (committed == NFSWRITE_DATASYNC &&
2198 					commit == NFSWRITE_UNSTABLE)
2199 					committed = commit;
2200 				NFSLOCKMNT(nmp);
2201 				if (!NFSHASWRITEVERF(nmp)) {
2202 					NFSBCOPY((caddr_t)tl,
2203 					    (caddr_t)&nmp->nm_verf[0],
2204 					    NFSX_VERF);
2205 					NFSSETWRITEVERF(nmp);
2206 	    			} else if (NFSBCMP(tl, nmp->nm_verf,
2207 				    NFSX_VERF) && *must_commit != 2) {
2208 					*must_commit = 1;
2209 					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
2210 				}
2211 				NFSUNLOCKMNT(nmp);
2212 			}
2213 			if (nd->nd_flag & ND_NFSV4)
2214 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2215 			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
2216 				error = nfsm_loadattr(nd, nap);
2217 				if (!error)
2218 					*attrflagp = NFS_LATTR_NOSHRINK;
2219 			}
2220 		} else {
2221 			error = nd->nd_repstat;
2222 		}
2223 		if (error)
2224 			goto nfsmout;
2225 		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
2226 		m_freem(nd->nd_mrep);
2227 		nd->nd_mrep = NULL;
2228 		tsiz -= len;
2229 	}
2230 nfsmout:
2231 	if (nd->nd_mrep != NULL)
2232 		m_freem(nd->nd_mrep);
2233 	*iomode = committed;
2234 	if (nd->nd_repstat && !error)
2235 		error = nd->nd_repstat;
2236 	free(nd, M_TEMP);
2237 	return (error);
2238 }
2239 
2240 /*
2241  * Do an nfs deallocate operation.
2242  */
2243 int
2244 nfsrpc_deallocate(vnode_t vp, off_t offs, off_t len, struct nfsvattr *nap,
2245     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
2246 {
2247 	int error, expireret = 0, openerr, retrycnt;
2248 	uint32_t clidrev = 0;
2249 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2250 	struct nfsfh *nfhp;
2251 	nfsv4stateid_t stateid;
2252 	void *lckp;
2253 
2254 	if (nmp->nm_clp != NULL)
2255 		clidrev = nmp->nm_clp->nfsc_clientidrev;
2256 	retrycnt = 0;
2257 	do {
2258 		lckp = NULL;
2259 		openerr = 1;
2260 		nfhp = VTONFS(vp)->n_fhp;
2261 		error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
2262 		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
2263 		if (error != 0) {
2264 			/*
2265 			 * No Open stateid, so try and open the file
2266 			 * now.
2267 			 */
2268 			openerr = nfsrpc_open(vp, FWRITE, cred, p);
2269 			if (openerr == 0)
2270 				nfscl_getstateid(vp, nfhp->nfh_fh,
2271 				    nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0,
2272 				    cred, p, &stateid, &lckp);
2273 		}
2274 		error = nfsrpc_deallocaterpc(vp, offs, len, &stateid, nap,
2275 		    attrflagp, cred, p);
2276 		if (error == NFSERR_STALESTATEID)
2277 			nfscl_initiate_recovery(nmp->nm_clp);
2278 		if (lckp != NULL)
2279 			nfscl_lockderef(lckp);
2280 		if (openerr == 0)
2281 			nfsrpc_close(vp, 0, p);
2282 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2283 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2284 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
2285 			(void) nfs_catnap(PZERO, error, "nfs_deallocate");
2286 		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
2287 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
2288 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2289 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
2290 			error = EIO;
2291 		}
2292 		retrycnt++;
2293 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2294 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2295 	    error == NFSERR_BADSESSION ||
2296 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
2297 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2298 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
2299 	if (error && retrycnt >= 4)
2300 		error = EIO;
2301 	return (error);
2302 }
2303 
2304 /*
2305  * The actual deallocate RPC.
2306  */
2307 static int
2308 nfsrpc_deallocaterpc(vnode_t vp, off_t offs, off_t len,
2309     nfsv4stateid_t *stateidp, struct nfsvattr *nap, int *attrflagp,
2310     struct ucred *cred, NFSPROC_T *p)
2311 {
2312 	uint32_t *tl;
2313 	struct nfsnode *np = VTONFS(vp);
2314 	int error, wccflag;
2315 	struct nfsrv_descript nfsd;
2316 	struct nfsrv_descript *nd = &nfsd;
2317 	nfsattrbit_t attrbits;
2318 
2319 	*attrflagp = 0;
2320 	NFSCL_REQSTART(nd, NFSPROC_DEALLOCATE, vp, cred);
2321 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2322 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
2323 	txdr_hyper(offs, tl);
2324 	tl += 2;
2325 	txdr_hyper(len, tl);
2326 	NFSWRITEGETATTR_ATTRBIT(&attrbits);
2327 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
2328 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2329 	nfsrv_putattrbit(nd, &attrbits);
2330 	error = nfscl_request(nd, vp, p, cred);
2331 	if (error != 0)
2332 		return (error);
2333 	wccflag = 0;
2334 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, NULL);
2335 	if (error != 0)
2336 		goto nfsmout;
2337 	if (nd->nd_repstat == 0) {
2338 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2339 		error = nfsm_loadattr(nd, nap);
2340 		if (error != 0)
2341 			goto nfsmout;
2342 		*attrflagp = NFS_LATTR_NOSHRINK;
2343 	}
2344 	NFSWRITERPC_SETTIME(wccflag, np, nap, 1);
2345 nfsmout:
2346 	m_freem(nd->nd_mrep);
2347 	if (nd->nd_repstat != 0 && error == 0)
2348 		error = nd->nd_repstat;
2349 	return (error);
2350 }
2351 
2352 /*
2353  * nfs mknod rpc
2354  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
2355  * mode set to specify the file type and the size field for rdev.
2356  */
2357 int
2358 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2359     u_int32_t rdev, __enum_uint8(vtype) vtyp, struct ucred *cred, NFSPROC_T *p,
2360     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2361     int *attrflagp, int *dattrflagp)
2362 {
2363 	u_int32_t *tl;
2364 	int error = 0;
2365 	struct nfsrv_descript nfsd, *nd = &nfsd;
2366 	nfsattrbit_t attrbits;
2367 
2368 	*nfhpp = NULL;
2369 	*attrflagp = 0;
2370 	*dattrflagp = 0;
2371 	if (namelen > NFS_MAXNAMLEN)
2372 		return (ENAMETOOLONG);
2373 	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp, cred);
2374 	if (nd->nd_flag & ND_NFSV4) {
2375 		if (vtyp == VBLK || vtyp == VCHR) {
2376 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2377 			*tl++ = vtonfsv34_type(vtyp);
2378 			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2379 			*tl = txdr_unsigned(NFSMINOR(rdev));
2380 		} else {
2381 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2382 			*tl = vtonfsv34_type(vtyp);
2383 		}
2384 	}
2385 	(void) nfsm_strtom(nd, name, namelen);
2386 	if (nd->nd_flag & ND_NFSV3) {
2387 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2388 		*tl = vtonfsv34_type(vtyp);
2389 	}
2390 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2391 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2392 	if ((nd->nd_flag & ND_NFSV3) &&
2393 	    (vtyp == VCHR || vtyp == VBLK)) {
2394 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2395 		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2396 		*tl = txdr_unsigned(NFSMINOR(rdev));
2397 	}
2398 	if (nd->nd_flag & ND_NFSV4) {
2399 		NFSGETATTR_ATTRBIT(&attrbits);
2400 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2401 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2402 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2403 		(void) nfsrv_putattrbit(nd, &attrbits);
2404 	}
2405 	if (nd->nd_flag & ND_NFSV2)
2406 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2407 	error = nfscl_request(nd, dvp, p, cred);
2408 	if (error)
2409 		return (error);
2410 	if (nd->nd_flag & ND_NFSV4)
2411 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2412 	if (!nd->nd_repstat) {
2413 		if (nd->nd_flag & ND_NFSV4) {
2414 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2415 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2416 			if (error)
2417 				goto nfsmout;
2418 		}
2419 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2420 		if (error)
2421 			goto nfsmout;
2422 	}
2423 	if (nd->nd_flag & ND_NFSV3)
2424 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2425 	if (!error && nd->nd_repstat)
2426 		error = nd->nd_repstat;
2427 nfsmout:
2428 	m_freem(nd->nd_mrep);
2429 	return (error);
2430 }
2431 
2432 /*
2433  * nfs file create call
2434  * Mostly just call the approriate routine. (I separated out v4, so that
2435  * error recovery wouldn't be as difficult.)
2436  */
2437 int
2438 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2439     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2440     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2441     int *attrflagp, int *dattrflagp)
2442 {
2443 	int error = 0, newone, expireret = 0, retrycnt, unlocked;
2444 	struct nfsclowner *owp;
2445 	struct nfscldeleg *dp;
2446 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2447 	u_int32_t clidrev;
2448 
2449 	if (NFSHASNFSV4(nmp)) {
2450 	    retrycnt = 0;
2451 	    do {
2452 		dp = NULL;
2453 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2454 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2455 		    NULL, 1, true);
2456 		if (error)
2457 			return (error);
2458 		if (nmp->nm_clp != NULL)
2459 			clidrev = nmp->nm_clp->nfsc_clientidrev;
2460 		else
2461 			clidrev = 0;
2462 		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2463 		    nfs_numnfscbd == 0 || retrycnt > 0)
2464 			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2465 			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2466 			  attrflagp, dattrflagp, &unlocked);
2467 		else
2468 			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2469 			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2470 			  attrflagp, dattrflagp, &unlocked);
2471 		/*
2472 		 * There is no need to invalidate cached attributes here,
2473 		 * since new post-delegation issue attributes are always
2474 		 * returned by nfsrpc_createv4() and these will update the
2475 		 * attribute cache.
2476 		 */
2477 		if (dp != NULL)
2478 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2479 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2480 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2481 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2482 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2483 		    error == NFSERR_BADSESSION) {
2484 			(void) nfs_catnap(PZERO, error, "nfs_open");
2485 		} else if ((error == NFSERR_EXPIRED ||
2486 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
2487 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2488 			retrycnt++;
2489 		}
2490 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2491 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2492 		error == NFSERR_BADSESSION ||
2493 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2494 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
2495 	    if (error && retrycnt >= 4)
2496 		    error = EIO;
2497 	} else {
2498 		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2499 		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp);
2500 	}
2501 	return (error);
2502 }
2503 
2504 /*
2505  * The create rpc for v2 and 3.
2506  */
2507 static int
2508 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2509     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2510     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2511     int *attrflagp, int *dattrflagp)
2512 {
2513 	u_int32_t *tl;
2514 	int error = 0;
2515 	struct nfsrv_descript nfsd, *nd = &nfsd;
2516 
2517 	*nfhpp = NULL;
2518 	*attrflagp = 0;
2519 	*dattrflagp = 0;
2520 	if (namelen > NFS_MAXNAMLEN)
2521 		return (ENAMETOOLONG);
2522 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2523 	(void) nfsm_strtom(nd, name, namelen);
2524 	if (nd->nd_flag & ND_NFSV3) {
2525 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2526 		if (fmode & O_EXCL) {
2527 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2528 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2529 			*tl++ = cverf.lval[0];
2530 			*tl = cverf.lval[1];
2531 		} else {
2532 			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2533 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
2534 		}
2535 	} else {
2536 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2537 	}
2538 	error = nfscl_request(nd, dvp, p, cred);
2539 	if (error)
2540 		return (error);
2541 	if (nd->nd_repstat == 0) {
2542 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2543 		if (error)
2544 			goto nfsmout;
2545 	}
2546 	if (nd->nd_flag & ND_NFSV3)
2547 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2548 	if (nd->nd_repstat != 0 && error == 0)
2549 		error = nd->nd_repstat;
2550 nfsmout:
2551 	m_freem(nd->nd_mrep);
2552 	return (error);
2553 }
2554 
2555 static int
2556 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2557     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2558     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2559     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2560     int *dattrflagp, int *unlockedp)
2561 {
2562 	u_int32_t *tl;
2563 	int error = 0, deleg, newone, ret, acesize, limitby;
2564 	struct nfsrv_descript nfsd, *nd = &nfsd;
2565 	struct nfsclopen *op;
2566 	struct nfscldeleg *dp = NULL;
2567 	struct nfsnode *np;
2568 	struct nfsfh *nfhp;
2569 	nfsattrbit_t attrbits;
2570 	nfsv4stateid_t stateid;
2571 	u_int32_t rflags;
2572 	struct nfsmount *nmp;
2573 	struct nfsclsession *tsep;
2574 
2575 	nmp = VFSTONFS(dvp->v_mount);
2576 	np = VTONFS(dvp);
2577 	*unlockedp = 0;
2578 	*nfhpp = NULL;
2579 	*dpp = NULL;
2580 	*attrflagp = 0;
2581 	*dattrflagp = 0;
2582 	if (namelen > NFS_MAXNAMLEN)
2583 		return (ENAMETOOLONG);
2584 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2585 	/*
2586 	 * For V4, this is actually an Open op.
2587 	 */
2588 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2589 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
2590 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2591 	    NFSV4OPEN_ACCESSREAD);
2592 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2593 	tsep = nfsmnt_mdssession(nmp);
2594 	*tl++ = tsep->nfsess_clientid.lval[0];
2595 	*tl = tsep->nfsess_clientid.lval[1];
2596 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2597 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2598 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2599 	if (fmode & O_EXCL) {
2600 		if (NFSHASNFSV4N(nmp)) {
2601 			if (NFSHASSESSPERSIST(nmp)) {
2602 				/* Use GUARDED for persistent sessions. */
2603 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
2604 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2605 			} else {
2606 				/* Otherwise, use EXCLUSIVE4_1. */
2607 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2608 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2609 				*tl++ = cverf.lval[0];
2610 				*tl = cverf.lval[1];
2611 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2612 			}
2613 		} else {
2614 			/* NFSv4.0 */
2615 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2616 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2617 			*tl++ = cverf.lval[0];
2618 			*tl = cverf.lval[1];
2619 		}
2620 	} else {
2621 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2622 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2623 	}
2624 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2625 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2626 	(void) nfsm_strtom(nd, name, namelen);
2627 	/* Get the new file's handle and attributes. */
2628 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2629 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2630 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2631 	NFSGETATTR_ATTRBIT(&attrbits);
2632 	(void) nfsrv_putattrbit(nd, &attrbits);
2633 	/* Get the directory's post-op attributes. */
2634 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2635 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2636 	(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2637 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2638 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2639 	(void) nfsrv_putattrbit(nd, &attrbits);
2640 	error = nfscl_request(nd, dvp, p, cred);
2641 	if (error)
2642 		return (error);
2643 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2644 	if (nd->nd_repstat == 0) {
2645 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2646 		    6 * NFSX_UNSIGNED);
2647 		stateid.seqid = *tl++;
2648 		stateid.other[0] = *tl++;
2649 		stateid.other[1] = *tl++;
2650 		stateid.other[2] = *tl;
2651 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2652 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2653 		if (error)
2654 			goto nfsmout;
2655 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2656 		deleg = fxdr_unsigned(int, *tl);
2657 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2658 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2659 			if (!(owp->nfsow_clp->nfsc_flags &
2660 			      NFSCLFLAGS_FIRSTDELEG))
2661 				owp->nfsow_clp->nfsc_flags |=
2662 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2663 			dp = malloc(
2664 			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2665 			    M_NFSCLDELEG, M_WAITOK);
2666 			LIST_INIT(&dp->nfsdl_owner);
2667 			LIST_INIT(&dp->nfsdl_lock);
2668 			dp->nfsdl_clp = owp->nfsow_clp;
2669 			newnfs_copyincred(cred, &dp->nfsdl_cred);
2670 			nfscl_lockinit(&dp->nfsdl_rwlock);
2671 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2672 			    NFSX_UNSIGNED);
2673 			dp->nfsdl_stateid.seqid = *tl++;
2674 			dp->nfsdl_stateid.other[0] = *tl++;
2675 			dp->nfsdl_stateid.other[1] = *tl++;
2676 			dp->nfsdl_stateid.other[2] = *tl++;
2677 			ret = fxdr_unsigned(int, *tl);
2678 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2679 				dp->nfsdl_flags = NFSCLDL_WRITE;
2680 				/*
2681 				 * Indicates how much the file can grow.
2682 				 */
2683 				NFSM_DISSECT(tl, u_int32_t *,
2684 				    3 * NFSX_UNSIGNED);
2685 				limitby = fxdr_unsigned(int, *tl++);
2686 				switch (limitby) {
2687 				case NFSV4OPEN_LIMITSIZE:
2688 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2689 					break;
2690 				case NFSV4OPEN_LIMITBLOCKS:
2691 					dp->nfsdl_sizelimit =
2692 					    fxdr_unsigned(u_int64_t, *tl++);
2693 					dp->nfsdl_sizelimit *=
2694 					    fxdr_unsigned(u_int64_t, *tl);
2695 					break;
2696 				default:
2697 					error = NFSERR_BADXDR;
2698 					goto nfsmout;
2699 				}
2700 			} else {
2701 				dp->nfsdl_flags = NFSCLDL_READ;
2702 			}
2703 			if (ret)
2704 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2705 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
2706 			    &ret, &acesize, p);
2707 			if (error)
2708 				goto nfsmout;
2709 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2710 			error = NFSERR_BADXDR;
2711 			goto nfsmout;
2712 		}
2713 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2714 		if (error)
2715 			goto nfsmout;
2716 		/* Get rid of the PutFH and Getattr status values. */
2717 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2718 		/* Load the directory attributes. */
2719 		error = nfsm_loadattr(nd, dnap);
2720 		if (error)
2721 			goto nfsmout;
2722 		*dattrflagp = 1;
2723 		if (dp != NULL && *attrflagp) {
2724 			dp->nfsdl_change = nnap->na_filerev;
2725 			dp->nfsdl_modtime = nnap->na_mtime;
2726 			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2727 		}
2728 		/*
2729 		 * We can now complete the Open state.
2730 		 */
2731 		nfhp = *nfhpp;
2732 		if (dp != NULL) {
2733 			dp->nfsdl_fhlen = nfhp->nfh_len;
2734 			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2735 		}
2736 		/*
2737 		 * Get an Open structure that will be
2738 		 * attached to the OpenOwner, acquired already.
2739 		 */
2740 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2741 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2742 		    cred, p, NULL, &op, &newone, NULL, 0, false);
2743 		if (error)
2744 			goto nfsmout;
2745 		op->nfso_stateid = stateid;
2746 		newnfs_copyincred(cred, &op->nfso_cred);
2747 		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2748 		    do {
2749 			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2750 			    nfhp->nfh_len, op, cred, p);
2751 			if (ret == NFSERR_DELAY)
2752 			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2753 		    } while (ret == NFSERR_DELAY);
2754 		    error = ret;
2755 		}
2756 
2757 		/*
2758 		 * If the server is handing out delegations, but we didn't
2759 		 * get one because an OpenConfirm was required, try the
2760 		 * Open again, to get a delegation. This is a harmless no-op,
2761 		 * from a server's point of view.
2762 		 */
2763 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2764 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2765 		    !error && dp == NULL) {
2766 		    KASSERT(!NFSHASNFSV4N(nmp),
2767 			("nfsrpc_createv4: result confirm"));
2768 		    do {
2769 			ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2770 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2771 			    nfhp->nfh_fh, nfhp->nfh_len,
2772 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2773 			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2774 			if (ret == NFSERR_DELAY)
2775 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2776 		    } while (ret == NFSERR_DELAY);
2777 		    if (ret) {
2778 			if (dp != NULL) {
2779 				free(dp, M_NFSCLDELEG);
2780 				dp = NULL;
2781 			}
2782 			if (ret == NFSERR_STALECLIENTID ||
2783 			    ret == NFSERR_STALEDONTRECOVER ||
2784 			    ret == NFSERR_BADSESSION)
2785 				error = ret;
2786 		    }
2787 		}
2788 		nfscl_openrelease(nmp, op, error, newone);
2789 		*unlockedp = 1;
2790 	}
2791 	if (nd->nd_repstat != 0 && error == 0)
2792 		error = nd->nd_repstat;
2793 	if (error == NFSERR_STALECLIENTID)
2794 		nfscl_initiate_recovery(owp->nfsow_clp);
2795 nfsmout:
2796 	if (!error)
2797 		*dpp = dp;
2798 	else if (dp != NULL)
2799 		free(dp, M_NFSCLDELEG);
2800 	m_freem(nd->nd_mrep);
2801 	return (error);
2802 }
2803 
2804 /*
2805  * Nfs remove rpc
2806  */
2807 int
2808 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2809     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
2810 {
2811 	u_int32_t *tl;
2812 	struct nfsrv_descript nfsd, *nd = &nfsd;
2813 	struct nfsnode *np;
2814 	struct nfsmount *nmp;
2815 	nfsv4stateid_t dstateid;
2816 	int error, ret = 0, i;
2817 
2818 	*dattrflagp = 0;
2819 	if (namelen > NFS_MAXNAMLEN)
2820 		return (ENAMETOOLONG);
2821 	nmp = VFSTONFS(dvp->v_mount);
2822 tryagain:
2823 	if (NFSHASNFSV4(nmp) && ret == 0) {
2824 		ret = nfscl_removedeleg(vp, p, &dstateid);
2825 		if (ret == 1) {
2826 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp, cred);
2827 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2828 			    NFSX_UNSIGNED);
2829 			if (NFSHASNFSV4N(nmp))
2830 				*tl++ = 0;
2831 			else
2832 				*tl++ = dstateid.seqid;
2833 			*tl++ = dstateid.other[0];
2834 			*tl++ = dstateid.other[1];
2835 			*tl++ = dstateid.other[2];
2836 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2837 			np = VTONFS(dvp);
2838 			(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2839 			    np->n_fhp->nfh_len, 0);
2840 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2841 			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2842 		}
2843 	} else {
2844 		ret = 0;
2845 	}
2846 	if (ret == 0)
2847 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp, cred);
2848 	(void) nfsm_strtom(nd, name, namelen);
2849 	error = nfscl_request(nd, dvp, p, cred);
2850 	if (error)
2851 		return (error);
2852 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2853 		/* For NFSv4, parse out any Delereturn replies. */
2854 		if (ret > 0 && nd->nd_repstat != 0 &&
2855 		    (nd->nd_flag & ND_NOMOREDATA)) {
2856 			/*
2857 			 * If the Delegreturn failed, try again without
2858 			 * it. The server will Recall, as required.
2859 			 */
2860 			m_freem(nd->nd_mrep);
2861 			goto tryagain;
2862 		}
2863 		for (i = 0; i < (ret * 2); i++) {
2864 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2865 			    ND_NFSV4) {
2866 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2867 			    if (*(tl + 1))
2868 				nd->nd_flag |= ND_NOMOREDATA;
2869 			}
2870 		}
2871 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2872 	}
2873 	if (nd->nd_repstat && !error)
2874 		error = nd->nd_repstat;
2875 nfsmout:
2876 	m_freem(nd->nd_mrep);
2877 	return (error);
2878 }
2879 
2880 /*
2881  * Do an nfs rename rpc.
2882  */
2883 int
2884 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2885     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2886     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2887     int *fattrflagp, int *tattrflagp)
2888 {
2889 	u_int32_t *tl;
2890 	struct nfsrv_descript nfsd, *nd = &nfsd;
2891 	struct nfsmount *nmp;
2892 	struct nfsnode *np;
2893 	nfsattrbit_t attrbits;
2894 	nfsv4stateid_t fdstateid, tdstateid;
2895 	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2896 
2897 	*fattrflagp = 0;
2898 	*tattrflagp = 0;
2899 	nmp = VFSTONFS(fdvp->v_mount);
2900 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2901 		return (ENAMETOOLONG);
2902 tryagain:
2903 	if (NFSHASNFSV4(nmp) && ret == 0) {
2904 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2905 		    &tdstateid, &gottd, p);
2906 		if (gotfd && gottd) {
2907 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp, cred);
2908 		} else if (gotfd) {
2909 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp, cred);
2910 		} else if (gottd) {
2911 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp, cred);
2912 		}
2913 		if (gotfd) {
2914 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2915 			if (NFSHASNFSV4N(nmp))
2916 				*tl++ = 0;
2917 			else
2918 				*tl++ = fdstateid.seqid;
2919 			*tl++ = fdstateid.other[0];
2920 			*tl++ = fdstateid.other[1];
2921 			*tl = fdstateid.other[2];
2922 			if (gottd) {
2923 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2924 				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2925 				np = VTONFS(tvp);
2926 				(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2927 				    np->n_fhp->nfh_len, 0);
2928 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2929 				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2930 			}
2931 		}
2932 		if (gottd) {
2933 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2934 			if (NFSHASNFSV4N(nmp))
2935 				*tl++ = 0;
2936 			else
2937 				*tl++ = tdstateid.seqid;
2938 			*tl++ = tdstateid.other[0];
2939 			*tl++ = tdstateid.other[1];
2940 			*tl = tdstateid.other[2];
2941 		}
2942 		if (ret > 0) {
2943 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2944 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2945 			np = VTONFS(fdvp);
2946 			(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2947 			    np->n_fhp->nfh_len, 0);
2948 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2949 			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2950 		}
2951 	} else {
2952 		ret = 0;
2953 	}
2954 	if (ret == 0)
2955 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp, cred);
2956 	if (nd->nd_flag & ND_NFSV4) {
2957 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2958 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2959 		NFSWCCATTR_ATTRBIT(&attrbits);
2960 		(void) nfsrv_putattrbit(nd, &attrbits);
2961 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2962 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2963 		(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2964 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2965 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2966 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2967 		(void) nfsrv_putattrbit(nd, &attrbits);
2968 		nd->nd_flag |= ND_V4WCCATTR;
2969 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2970 		*tl = txdr_unsigned(NFSV4OP_RENAME);
2971 	}
2972 	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2973 	if (!(nd->nd_flag & ND_NFSV4))
2974 		(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2975 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2976 	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2977 	error = nfscl_request(nd, fdvp, p, cred);
2978 	if (error)
2979 		return (error);
2980 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2981 		/* For NFSv4, parse out any Delereturn replies. */
2982 		if (ret > 0 && nd->nd_repstat != 0 &&
2983 		    (nd->nd_flag & ND_NOMOREDATA)) {
2984 			/*
2985 			 * If the Delegreturn failed, try again without
2986 			 * it. The server will Recall, as required.
2987 			 */
2988 			m_freem(nd->nd_mrep);
2989 			goto tryagain;
2990 		}
2991 		for (i = 0; i < (ret * 2); i++) {
2992 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2993 			    ND_NFSV4) {
2994 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2995 			    if (*(tl + 1)) {
2996 				if (i == 1 && ret > 1) {
2997 				    /*
2998 				     * If the Delegreturn failed, try again
2999 				     * without it. The server will Recall, as
3000 				     * required.
3001 				     * If ret > 1, the second iteration of this
3002 				     * loop is the second DelegReturn result.
3003 				     */
3004 				    m_freem(nd->nd_mrep);
3005 				    goto tryagain;
3006 				} else {
3007 				    nd->nd_flag |= ND_NOMOREDATA;
3008 				}
3009 			    }
3010 			}
3011 		}
3012 		/* Now, the first wcc attribute reply. */
3013 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
3014 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3015 			if (*(tl + 1))
3016 				nd->nd_flag |= ND_NOMOREDATA;
3017 		}
3018 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, NULL);
3019 		/* and the second wcc attribute reply. */
3020 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
3021 		    !error) {
3022 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3023 			if (*(tl + 1))
3024 				nd->nd_flag |= ND_NOMOREDATA;
3025 		}
3026 		if (!error)
3027 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
3028 			    NULL, NULL);
3029 	}
3030 	if (nd->nd_repstat && !error)
3031 		error = nd->nd_repstat;
3032 nfsmout:
3033 	m_freem(nd->nd_mrep);
3034 	return (error);
3035 }
3036 
3037 /*
3038  * nfs hard link create rpc
3039  */
3040 int
3041 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
3042     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3043     struct nfsvattr *nap, int *attrflagp, int *dattrflagp)
3044 {
3045 	u_int32_t *tl;
3046 	struct nfsrv_descript nfsd, *nd = &nfsd;
3047 	nfsattrbit_t attrbits;
3048 	int error = 0;
3049 
3050 	*attrflagp = 0;
3051 	*dattrflagp = 0;
3052 	if (namelen > NFS_MAXNAMLEN)
3053 		return (ENAMETOOLONG);
3054 	NFSCL_REQSTART(nd, NFSPROC_LINK, vp, cred);
3055 	if (nd->nd_flag & ND_NFSV4) {
3056 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3057 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
3058 	}
3059 	(void)nfsm_fhtom(VFSTONFS(dvp->v_mount), nd, VTONFS(dvp)->n_fhp->nfh_fh,
3060 		VTONFS(dvp)->n_fhp->nfh_len, 0);
3061 	if (nd->nd_flag & ND_NFSV4) {
3062 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3063 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3064 		NFSWCCATTR_ATTRBIT(&attrbits);
3065 		(void) nfsrv_putattrbit(nd, &attrbits);
3066 		nd->nd_flag |= ND_V4WCCATTR;
3067 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3068 		*tl = txdr_unsigned(NFSV4OP_LINK);
3069 	}
3070 	(void) nfsm_strtom(nd, name, namelen);
3071 	error = nfscl_request(nd, vp, p, cred);
3072 	if (error)
3073 		return (error);
3074 	if (nd->nd_flag & ND_NFSV3) {
3075 		error = nfscl_postop_attr(nd, nap, attrflagp);
3076 		if (!error)
3077 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3078 			    NULL, NULL);
3079 	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
3080 		/*
3081 		 * First, parse out the PutFH and Getattr result.
3082 		 */
3083 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3084 		if (!(*(tl + 1)))
3085 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3086 		if (*(tl + 1))
3087 			nd->nd_flag |= ND_NOMOREDATA;
3088 		/*
3089 		 * Get the pre-op attributes.
3090 		 */
3091 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3092 	}
3093 	if (nd->nd_repstat && !error)
3094 		error = nd->nd_repstat;
3095 nfsmout:
3096 	m_freem(nd->nd_mrep);
3097 	return (error);
3098 }
3099 
3100 /*
3101  * nfs symbolic link create rpc
3102  */
3103 int
3104 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
3105     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3106     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3107     int *dattrflagp)
3108 {
3109 	u_int32_t *tl;
3110 	struct nfsrv_descript nfsd, *nd = &nfsd;
3111 	struct nfsmount *nmp;
3112 	int slen, error = 0;
3113 
3114 	*nfhpp = NULL;
3115 	*attrflagp = 0;
3116 	*dattrflagp = 0;
3117 	nmp = VFSTONFS(dvp->v_mount);
3118 	slen = strlen(target);
3119 	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
3120 		return (ENAMETOOLONG);
3121 	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp, cred);
3122 	if (nd->nd_flag & ND_NFSV4) {
3123 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3124 		*tl = txdr_unsigned(NFLNK);
3125 		(void) nfsm_strtom(nd, target, slen);
3126 	}
3127 	(void) nfsm_strtom(nd, name, namelen);
3128 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3129 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
3130 	if (!(nd->nd_flag & ND_NFSV4))
3131 		(void) nfsm_strtom(nd, target, slen);
3132 	if (nd->nd_flag & ND_NFSV2)
3133 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3134 	error = nfscl_request(nd, dvp, p, cred);
3135 	if (error)
3136 		return (error);
3137 	if (nd->nd_flag & ND_NFSV4)
3138 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3139 	if ((nd->nd_flag & ND_NFSV3) && !error) {
3140 		if (!nd->nd_repstat)
3141 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3142 		if (!error)
3143 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3144 			    NULL, NULL);
3145 	}
3146 	if (nd->nd_repstat && !error)
3147 		error = nd->nd_repstat;
3148 	m_freem(nd->nd_mrep);
3149 	/*
3150 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3151 	 * Only do this if vfs.nfs.ignore_eexist is set.
3152 	 * Never do this for NFSv4.1 or later minor versions, since sessions
3153 	 * should guarantee "exactly once" RPC semantics.
3154 	 */
3155 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3156 	    nmp->nm_minorvers == 0))
3157 		error = 0;
3158 	return (error);
3159 }
3160 
3161 /*
3162  * nfs make dir rpc
3163  */
3164 int
3165 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
3166     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3167     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3168     int *dattrflagp)
3169 {
3170 	u_int32_t *tl;
3171 	struct nfsrv_descript nfsd, *nd = &nfsd;
3172 	nfsattrbit_t attrbits;
3173 	int error = 0;
3174 	struct nfsfh *fhp;
3175 	struct nfsmount *nmp;
3176 
3177 	*nfhpp = NULL;
3178 	*attrflagp = 0;
3179 	*dattrflagp = 0;
3180 	nmp = VFSTONFS(dvp->v_mount);
3181 	fhp = VTONFS(dvp)->n_fhp;
3182 	if (namelen > NFS_MAXNAMLEN)
3183 		return (ENAMETOOLONG);
3184 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp, cred);
3185 	if (nd->nd_flag & ND_NFSV4) {
3186 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3187 		*tl = txdr_unsigned(NFDIR);
3188 	}
3189 	(void) nfsm_strtom(nd, name, namelen);
3190 	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3191 	if (nd->nd_flag & ND_NFSV4) {
3192 		NFSGETATTR_ATTRBIT(&attrbits);
3193 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3194 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3195 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3196 		(void) nfsrv_putattrbit(nd, &attrbits);
3197 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3198 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
3199 		(void)nfsm_fhtom(nmp, nd, fhp->nfh_fh, fhp->nfh_len, 0);
3200 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3201 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3202 		(void) nfsrv_putattrbit(nd, &attrbits);
3203 	}
3204 	error = nfscl_request(nd, dvp, p, cred);
3205 	if (error)
3206 		return (error);
3207 	if (nd->nd_flag & ND_NFSV4)
3208 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3209 	if (!nd->nd_repstat && !error) {
3210 		if (nd->nd_flag & ND_NFSV4) {
3211 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3212 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
3213 		}
3214 		if (!error)
3215 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3216 		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
3217 			/* Get rid of the PutFH and Getattr status values. */
3218 			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3219 			/* Load the directory attributes. */
3220 			error = nfsm_loadattr(nd, dnap);
3221 			if (error == 0)
3222 				*dattrflagp = 1;
3223 		}
3224 	}
3225 	if ((nd->nd_flag & ND_NFSV3) && !error)
3226 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3227 	if (nd->nd_repstat && !error)
3228 		error = nd->nd_repstat;
3229 nfsmout:
3230 	m_freem(nd->nd_mrep);
3231 	/*
3232 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3233 	 * Only do this if vfs.nfs.ignore_eexist is set.
3234 	 * Never do this for NFSv4.1 or later minor versions, since sessions
3235 	 * should guarantee "exactly once" RPC semantics.
3236 	 */
3237 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3238 	    nmp->nm_minorvers == 0))
3239 		error = 0;
3240 	return (error);
3241 }
3242 
3243 /*
3244  * nfs remove directory call
3245  */
3246 int
3247 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
3248     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
3249 {
3250 	struct nfsrv_descript nfsd, *nd = &nfsd;
3251 	int error = 0;
3252 
3253 	*dattrflagp = 0;
3254 	if (namelen > NFS_MAXNAMLEN)
3255 		return (ENAMETOOLONG);
3256 	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp, cred);
3257 	(void) nfsm_strtom(nd, name, namelen);
3258 	error = nfscl_request(nd, dvp, p, cred);
3259 	if (error)
3260 		return (error);
3261 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3262 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3263 	if (nd->nd_repstat && !error)
3264 		error = nd->nd_repstat;
3265 	m_freem(nd->nd_mrep);
3266 	/*
3267 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
3268 	 */
3269 	if (error == ENOENT)
3270 		error = 0;
3271 	return (error);
3272 }
3273 
3274 /*
3275  * Readdir rpc.
3276  * Always returns with either uio_resid unchanged, if you are at the
3277  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
3278  * filled in.
3279  * I felt this would allow caching of directory blocks more easily
3280  * than returning a pertially filled block.
3281  * Directory offset cookies:
3282  * Oh my, what to do with them...
3283  * I can think of three ways to deal with them:
3284  * 1 - have the layer above these RPCs maintain a map between logical
3285  *     directory byte offsets and the NFS directory offset cookies
3286  * 2 - pass the opaque directory offset cookies up into userland
3287  *     and let the libc functions deal with them, via the system call
3288  * 3 - return them to userland in the "struct dirent", so future versions
3289  *     of libc can use them and do whatever is necessary to make things work
3290  *     above these rpc calls, in the meantime
3291  * For now, I do #3 by "hiding" the directory offset cookies after the
3292  * d_name field in struct dirent. This is space inside d_reclen that
3293  * will be ignored by anything that doesn't know about them.
3294  * The directory offset cookies are filled in as the last 8 bytes of
3295  * each directory entry, after d_name. Someday, the userland libc
3296  * functions may be able to use these. In the meantime, it satisfies
3297  * OpenBSD's requirements for cookies being returned.
3298  * If expects the directory offset cookie for the read to be in uio_offset
3299  * and returns the one for the next entry after this directory block in
3300  * there, as well.
3301  */
3302 int
3303 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3304     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3305     int *eofp)
3306 {
3307 	int len, left;
3308 	struct dirent *dp = NULL;
3309 	u_int32_t *tl;
3310 	nfsquad_t cookie, ncookie;
3311 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3312 	struct nfsnode *dnp = VTONFS(vp);
3313 	struct nfsvattr nfsva;
3314 	struct nfsrv_descript nfsd, *nd = &nfsd;
3315 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3316 	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
3317 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3318 	char *cp;
3319 	nfsattrbit_t attrbits, dattrbits;
3320 	u_int32_t rderr, *tl2 = NULL;
3321 	size_t tresid;
3322 
3323 	KASSERT(uiop->uio_iovcnt == 1 &&
3324 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3325 	    ("nfs readdirrpc bad uio"));
3326 	ncookie.lval[0] = ncookie.lval[1] = 0;
3327 	/*
3328 	 * There is no point in reading a lot more than uio_resid, however
3329 	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
3330 	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
3331 	 * will never make readsize > nm_readdirsize.
3332 	 */
3333 	readsize = nmp->nm_readdirsize;
3334 	if (readsize > uiop->uio_resid)
3335 		readsize = uiop->uio_resid + DIRBLKSIZ;
3336 
3337 	*attrflagp = 0;
3338 	if (eofp)
3339 		*eofp = 0;
3340 	tresid = uiop->uio_resid;
3341 	cookie.lval[0] = cookiep->nfsuquad[0];
3342 	cookie.lval[1] = cookiep->nfsuquad[1];
3343 	nd->nd_mrep = NULL;
3344 
3345 	/*
3346 	 * For NFSv4, first create the "." and ".." entries.
3347 	 */
3348 	if (NFSHASNFSV4(nmp)) {
3349 		reqsize = 6 * NFSX_UNSIGNED;
3350 		NFSGETATTR_ATTRBIT(&dattrbits);
3351 		NFSZERO_ATTRBIT(&attrbits);
3352 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3353 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
3354 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3355 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3356 			NFSSETBIT_ATTRBIT(&attrbits,
3357 			    NFSATTRBIT_MOUNTEDONFILEID);
3358 			gotmnton = 1;
3359 		} else {
3360 			/*
3361 			 * Must fake it. Use the fileno, except when the
3362 			 * fsid is != to that of the directory. For that
3363 			 * case, generate a fake fileno that is not the same.
3364 			 */
3365 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3366 			gotmnton = 0;
3367 		}
3368 
3369 		/*
3370 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3371 		 */
3372 		if (uiop->uio_offset == 0) {
3373 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3374 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3375 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3376 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3377 			(void) nfsrv_putattrbit(nd, &attrbits);
3378 			error = nfscl_request(nd, vp, p, cred);
3379 			if (error)
3380 			    return (error);
3381 			dotfileid = 0;	/* Fake out the compiler. */
3382 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3383 			    error = nfsm_loadattr(nd, &nfsva);
3384 			    if (error != 0)
3385 				goto nfsmout;
3386 			    dotfileid = nfsva.na_fileid;
3387 			}
3388 			if (nd->nd_repstat == 0) {
3389 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3390 			    len = fxdr_unsigned(int, *(tl + 4));
3391 			    if (len > 0 && len <= NFSX_V4FHMAX)
3392 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3393 			    else
3394 				error = EPERM;
3395 			    if (!error) {
3396 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3397 				nfsva.na_mntonfileno = UINT64_MAX;
3398 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3399 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3400 				    NULL, NULL, NULL, p, cred);
3401 				if (error) {
3402 				    dotdotfileid = dotfileid;
3403 				} else if (gotmnton) {
3404 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3405 					dotdotfileid = nfsva.na_mntonfileno;
3406 				    else
3407 					dotdotfileid = nfsva.na_fileid;
3408 				} else if (nfsva.na_filesid[0] ==
3409 				    dnp->n_vattr.na_filesid[0] &&
3410 				    nfsva.na_filesid[1] ==
3411 				    dnp->n_vattr.na_filesid[1]) {
3412 				    dotdotfileid = nfsva.na_fileid;
3413 				} else {
3414 				    do {
3415 					fakefileno--;
3416 				    } while (fakefileno ==
3417 					nfsva.na_fileid);
3418 				    dotdotfileid = fakefileno;
3419 				}
3420 			    }
3421 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3422 			    /*
3423 			     * Lookupp returns NFSERR_NOENT when we are
3424 			     * at the root, so just use the current dir.
3425 			     */
3426 			    nd->nd_repstat = 0;
3427 			    dotdotfileid = dotfileid;
3428 			} else {
3429 			    error = nd->nd_repstat;
3430 			}
3431 			m_freem(nd->nd_mrep);
3432 			if (error)
3433 			    return (error);
3434 			nd->nd_mrep = NULL;
3435 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3436 			dp->d_pad0 = dp->d_pad1 = 0;
3437 			dp->d_off = 0;
3438 			dp->d_type = DT_DIR;
3439 			dp->d_fileno = dotfileid;
3440 			dp->d_namlen = 1;
3441 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3442 			dp->d_name[0] = '.';
3443 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3444 			/*
3445 			 * Just make these offset cookie 0.
3446 			 */
3447 			tl = (u_int32_t *)&dp->d_name[8];
3448 			*tl++ = 0;
3449 			*tl = 0;
3450 			blksiz += dp->d_reclen;
3451 			uiop->uio_resid -= dp->d_reclen;
3452 			uiop->uio_offset += dp->d_reclen;
3453 			uiop->uio_iov->iov_base =
3454 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3455 			uiop->uio_iov->iov_len -= dp->d_reclen;
3456 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3457 			dp->d_pad0 = dp->d_pad1 = 0;
3458 			dp->d_off = 0;
3459 			dp->d_type = DT_DIR;
3460 			dp->d_fileno = dotdotfileid;
3461 			dp->d_namlen = 2;
3462 			*((uint64_t *)dp->d_name) = 0;
3463 			dp->d_name[0] = '.';
3464 			dp->d_name[1] = '.';
3465 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3466 			/*
3467 			 * Just make these offset cookie 0.
3468 			 */
3469 			tl = (u_int32_t *)&dp->d_name[8];
3470 			*tl++ = 0;
3471 			*tl = 0;
3472 			blksiz += dp->d_reclen;
3473 			uiop->uio_resid -= dp->d_reclen;
3474 			uiop->uio_offset += dp->d_reclen;
3475 			uiop->uio_iov->iov_base =
3476 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3477 			uiop->uio_iov->iov_len -= dp->d_reclen;
3478 		}
3479 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3480 	} else {
3481 		reqsize = 5 * NFSX_UNSIGNED;
3482 	}
3483 
3484 	/*
3485 	 * Loop around doing readdir rpc's of size readsize.
3486 	 * The stopping criteria is EOF or buffer full.
3487 	 */
3488 	while (more_dirs && bigenough) {
3489 		*attrflagp = 0;
3490 		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp, cred);
3491 		if (nd->nd_flag & ND_NFSV2) {
3492 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3493 			*tl++ = cookie.lval[1];
3494 			*tl = txdr_unsigned(readsize);
3495 		} else {
3496 			NFSM_BUILD(tl, u_int32_t *, reqsize);
3497 			*tl++ = cookie.lval[0];
3498 			*tl++ = cookie.lval[1];
3499 			if (cookie.qval == 0) {
3500 				*tl++ = 0;
3501 				*tl++ = 0;
3502 			} else {
3503 				NFSLOCKNODE(dnp);
3504 				*tl++ = dnp->n_cookieverf.nfsuquad[0];
3505 				*tl++ = dnp->n_cookieverf.nfsuquad[1];
3506 				NFSUNLOCKNODE(dnp);
3507 			}
3508 			if (nd->nd_flag & ND_NFSV4) {
3509 				*tl++ = txdr_unsigned(readsize);
3510 				*tl = txdr_unsigned(readsize);
3511 				(void) nfsrv_putattrbit(nd, &attrbits);
3512 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3513 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
3514 				(void) nfsrv_putattrbit(nd, &dattrbits);
3515 			} else {
3516 				*tl = txdr_unsigned(readsize);
3517 			}
3518 		}
3519 		error = nfscl_request(nd, vp, p, cred);
3520 		if (error)
3521 			return (error);
3522 		if (!(nd->nd_flag & ND_NFSV2)) {
3523 			if (nd->nd_flag & ND_NFSV3)
3524 				error = nfscl_postop_attr(nd, nap, attrflagp);
3525 			if (!nd->nd_repstat && !error) {
3526 				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3527 				NFSLOCKNODE(dnp);
3528 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
3529 				dnp->n_cookieverf.nfsuquad[1] = *tl;
3530 				NFSUNLOCKNODE(dnp);
3531 			}
3532 		}
3533 		if (nd->nd_repstat || error) {
3534 			if (!error)
3535 				error = nd->nd_repstat;
3536 			goto nfsmout;
3537 		}
3538 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3539 		more_dirs = fxdr_unsigned(int, *tl);
3540 		if (!more_dirs)
3541 			tryformoredirs = 0;
3542 
3543 		/* loop through the dir entries, doctoring them to 4bsd form */
3544 		while (more_dirs && bigenough) {
3545 			if (nd->nd_flag & ND_NFSV4) {
3546 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3547 				ncookie.lval[0] = *tl++;
3548 				ncookie.lval[1] = *tl++;
3549 				len = fxdr_unsigned(int, *tl);
3550 			} else if (nd->nd_flag & ND_NFSV3) {
3551 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3552 				nfsva.na_fileid = fxdr_hyper(tl);
3553 				tl += 2;
3554 				len = fxdr_unsigned(int, *tl);
3555 			} else {
3556 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3557 				nfsva.na_fileid = fxdr_unsigned(uint64_t,
3558 				    *tl++);
3559 				len = fxdr_unsigned(int, *tl);
3560 			}
3561 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3562 				error = EBADRPC;
3563 				goto nfsmout;
3564 			}
3565 			tlen = roundup2(len, 8);
3566 			if (tlen == len)
3567 				tlen += 8;  /* To ensure null termination. */
3568 			left = DIRBLKSIZ - blksiz;
3569 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3570 				NFSBZERO(uiop->uio_iov->iov_base, left);
3571 				dp->d_reclen += left;
3572 				uiop->uio_iov->iov_base =
3573 				    (char *)uiop->uio_iov->iov_base + left;
3574 				uiop->uio_iov->iov_len -= left;
3575 				uiop->uio_resid -= left;
3576 				uiop->uio_offset += left;
3577 				blksiz = 0;
3578 			}
3579 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3580 			    uiop->uio_resid)
3581 				bigenough = 0;
3582 			if (bigenough) {
3583 				dp = (struct dirent *)uiop->uio_iov->iov_base;
3584 				dp->d_pad0 = dp->d_pad1 = 0;
3585 				dp->d_off = 0;
3586 				dp->d_namlen = len;
3587 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3588 				    NFSX_HYPER;
3589 				dp->d_type = DT_UNKNOWN;
3590 				blksiz += dp->d_reclen;
3591 				if (blksiz == DIRBLKSIZ)
3592 					blksiz = 0;
3593 				uiop->uio_resid -= DIRHDSIZ;
3594 				uiop->uio_offset += DIRHDSIZ;
3595 				uiop->uio_iov->iov_base =
3596 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3597 				uiop->uio_iov->iov_len -= DIRHDSIZ;
3598 				error = nfsm_mbufuio(nd, uiop, len);
3599 				if (error)
3600 					goto nfsmout;
3601 				cp = uiop->uio_iov->iov_base;
3602 				tlen -= len;
3603 				NFSBZERO(cp, tlen);
3604 				cp += tlen;	/* points to cookie storage */
3605 				tl2 = (u_int32_t *)cp;
3606 				uiop->uio_iov->iov_base =
3607 				    (char *)uiop->uio_iov->iov_base + tlen +
3608 				    NFSX_HYPER;
3609 				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3610 				uiop->uio_resid -= tlen + NFSX_HYPER;
3611 				uiop->uio_offset += (tlen + NFSX_HYPER);
3612 			} else {
3613 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3614 				if (error)
3615 					goto nfsmout;
3616 			}
3617 			if (nd->nd_flag & ND_NFSV4) {
3618 				rderr = 0;
3619 				nfsva.na_mntonfileno = UINT64_MAX;
3620 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3621 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3622 				    NULL, NULL, &rderr, p, cred);
3623 				if (error)
3624 					goto nfsmout;
3625 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3626 			} else if (nd->nd_flag & ND_NFSV3) {
3627 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3628 				ncookie.lval[0] = *tl++;
3629 				ncookie.lval[1] = *tl++;
3630 			} else {
3631 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3632 				ncookie.lval[0] = 0;
3633 				ncookie.lval[1] = *tl++;
3634 			}
3635 			if (bigenough) {
3636 			    if (nd->nd_flag & ND_NFSV4) {
3637 				if (rderr) {
3638 				    dp->d_fileno = 0;
3639 				} else {
3640 				    if (gotmnton) {
3641 					if (nfsva.na_mntonfileno != UINT64_MAX)
3642 					    dp->d_fileno = nfsva.na_mntonfileno;
3643 					else
3644 					    dp->d_fileno = nfsva.na_fileid;
3645 				    } else if (nfsva.na_filesid[0] ==
3646 					dnp->n_vattr.na_filesid[0] &&
3647 					nfsva.na_filesid[1] ==
3648 					dnp->n_vattr.na_filesid[1]) {
3649 					dp->d_fileno = nfsva.na_fileid;
3650 				    } else {
3651 					do {
3652 					    fakefileno--;
3653 					} while (fakefileno ==
3654 					    nfsva.na_fileid);
3655 					dp->d_fileno = fakefileno;
3656 				    }
3657 				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3658 				}
3659 			    } else {
3660 				dp->d_fileno = nfsva.na_fileid;
3661 			    }
3662 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3663 				ncookie.lval[0];
3664 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3665 				ncookie.lval[1];
3666 			}
3667 			more_dirs = fxdr_unsigned(int, *tl);
3668 		}
3669 		/*
3670 		 * If at end of rpc data, get the eof boolean
3671 		 */
3672 		if (!more_dirs) {
3673 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3674 			eof = fxdr_unsigned(int, *tl);
3675 			if (tryformoredirs)
3676 				more_dirs = !eof;
3677 			if (nd->nd_flag & ND_NFSV4) {
3678 				error = nfscl_postop_attr(nd, nap, attrflagp);
3679 				if (error)
3680 					goto nfsmout;
3681 			}
3682 		}
3683 		m_freem(nd->nd_mrep);
3684 		nd->nd_mrep = NULL;
3685 	}
3686 	/*
3687 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3688 	 * by increasing d_reclen for the last record.
3689 	 */
3690 	if (blksiz > 0) {
3691 		left = DIRBLKSIZ - blksiz;
3692 		NFSBZERO(uiop->uio_iov->iov_base, left);
3693 		dp->d_reclen += left;
3694 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3695 		    left;
3696 		uiop->uio_iov->iov_len -= left;
3697 		uiop->uio_resid -= left;
3698 		uiop->uio_offset += left;
3699 	}
3700 
3701 	/*
3702 	 * If returning no data, assume end of file.
3703 	 * If not bigenough, return not end of file, since you aren't
3704 	 *    returning all the data
3705 	 * Otherwise, return the eof flag from the server.
3706 	 */
3707 	if (eofp) {
3708 		if (tresid == ((size_t)(uiop->uio_resid)))
3709 			*eofp = 1;
3710 		else if (!bigenough)
3711 			*eofp = 0;
3712 		else
3713 			*eofp = eof;
3714 	}
3715 
3716 	/*
3717 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3718 	 */
3719 	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3720 		dp = (struct dirent *)uiop->uio_iov->iov_base;
3721 		NFSBZERO(dp, DIRBLKSIZ);
3722 		dp->d_type = DT_UNKNOWN;
3723 		tl = (u_int32_t *)&dp->d_name[4];
3724 		*tl++ = cookie.lval[0];
3725 		*tl = cookie.lval[1];
3726 		dp->d_reclen = DIRBLKSIZ;
3727 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3728 		    DIRBLKSIZ;
3729 		uiop->uio_iov->iov_len -= DIRBLKSIZ;
3730 		uiop->uio_resid -= DIRBLKSIZ;
3731 		uiop->uio_offset += DIRBLKSIZ;
3732 	}
3733 
3734 nfsmout:
3735 	if (nd->nd_mrep != NULL)
3736 		m_freem(nd->nd_mrep);
3737 	return (error);
3738 }
3739 
3740 /*
3741  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3742  * (Also used for NFS V4 when mount flag set.)
3743  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3744  */
3745 int
3746 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3747     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3748     int *eofp)
3749 {
3750 	int len, left;
3751 	struct dirent *dp = NULL;
3752 	u_int32_t *tl;
3753 	vnode_t newvp = NULLVP;
3754 	struct nfsrv_descript nfsd, *nd = &nfsd;
3755 	struct nameidata nami, *ndp = &nami;
3756 	struct componentname *cnp = &ndp->ni_cnd;
3757 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3758 	struct nfsnode *dnp = VTONFS(vp), *np;
3759 	struct nfsvattr nfsva;
3760 	struct nfsfh *nfhp;
3761 	nfsquad_t cookie, ncookie;
3762 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3763 	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3764 	int isdotdot = 0, unlocknewvp = 0;
3765 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3766 	u_int64_t fileno = 0;
3767 	char *cp;
3768 	nfsattrbit_t attrbits, dattrbits;
3769 	size_t tresid;
3770 	u_int32_t *tl2 = NULL, rderr;
3771 	struct timespec dctime, ts;
3772 	bool attr_ok;
3773 
3774 	KASSERT(uiop->uio_iovcnt == 1 &&
3775 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3776 	    ("nfs readdirplusrpc bad uio"));
3777 	ncookie.lval[0] = ncookie.lval[1] = 0;
3778 	timespecclear(&dctime);
3779 	*attrflagp = 0;
3780 	if (eofp != NULL)
3781 		*eofp = 0;
3782 	ndp->ni_dvp = vp;
3783 	nd->nd_mrep = NULL;
3784 	cookie.lval[0] = cookiep->nfsuquad[0];
3785 	cookie.lval[1] = cookiep->nfsuquad[1];
3786 	tresid = uiop->uio_resid;
3787 
3788 	/*
3789 	 * For NFSv4, first create the "." and ".." entries.
3790 	 */
3791 	if (NFSHASNFSV4(nmp)) {
3792 		NFSGETATTR_ATTRBIT(&dattrbits);
3793 		NFSZERO_ATTRBIT(&attrbits);
3794 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3795 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3796 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3797 			NFSSETBIT_ATTRBIT(&attrbits,
3798 			    NFSATTRBIT_MOUNTEDONFILEID);
3799 			gotmnton = 1;
3800 		} else {
3801 			/*
3802 			 * Must fake it. Use the fileno, except when the
3803 			 * fsid is != to that of the directory. For that
3804 			 * case, generate a fake fileno that is not the same.
3805 			 */
3806 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3807 			gotmnton = 0;
3808 		}
3809 
3810 		/*
3811 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3812 		 */
3813 		if (uiop->uio_offset == 0) {
3814 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3815 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3816 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3817 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3818 			(void) nfsrv_putattrbit(nd, &attrbits);
3819 			error = nfscl_request(nd, vp, p, cred);
3820 			if (error)
3821 			    return (error);
3822 			dotfileid = 0;	/* Fake out the compiler. */
3823 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3824 			    error = nfsm_loadattr(nd, &nfsva);
3825 			    if (error != 0)
3826 				goto nfsmout;
3827 			    dctime = nfsva.na_ctime;
3828 			    dotfileid = nfsva.na_fileid;
3829 			}
3830 			if (nd->nd_repstat == 0) {
3831 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3832 			    len = fxdr_unsigned(int, *(tl + 4));
3833 			    if (len > 0 && len <= NFSX_V4FHMAX)
3834 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3835 			    else
3836 				error = EPERM;
3837 			    if (!error) {
3838 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3839 				nfsva.na_mntonfileno = UINT64_MAX;
3840 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3841 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3842 				    NULL, NULL, NULL, p, cred);
3843 				if (error) {
3844 				    dotdotfileid = dotfileid;
3845 				} else if (gotmnton) {
3846 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3847 					dotdotfileid = nfsva.na_mntonfileno;
3848 				    else
3849 					dotdotfileid = nfsva.na_fileid;
3850 				} else if (nfsva.na_filesid[0] ==
3851 				    dnp->n_vattr.na_filesid[0] &&
3852 				    nfsva.na_filesid[1] ==
3853 				    dnp->n_vattr.na_filesid[1]) {
3854 				    dotdotfileid = nfsva.na_fileid;
3855 				} else {
3856 				    do {
3857 					fakefileno--;
3858 				    } while (fakefileno ==
3859 					nfsva.na_fileid);
3860 				    dotdotfileid = fakefileno;
3861 				}
3862 			    }
3863 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3864 			    /*
3865 			     * Lookupp returns NFSERR_NOENT when we are
3866 			     * at the root, so just use the current dir.
3867 			     */
3868 			    nd->nd_repstat = 0;
3869 			    dotdotfileid = dotfileid;
3870 			} else {
3871 			    error = nd->nd_repstat;
3872 			}
3873 			m_freem(nd->nd_mrep);
3874 			if (error)
3875 			    return (error);
3876 			nd->nd_mrep = NULL;
3877 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3878 			dp->d_pad0 = dp->d_pad1 = 0;
3879 			dp->d_off = 0;
3880 			dp->d_type = DT_DIR;
3881 			dp->d_fileno = dotfileid;
3882 			dp->d_namlen = 1;
3883 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3884 			dp->d_name[0] = '.';
3885 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3886 			/*
3887 			 * Just make these offset cookie 0.
3888 			 */
3889 			tl = (u_int32_t *)&dp->d_name[8];
3890 			*tl++ = 0;
3891 			*tl = 0;
3892 			blksiz += dp->d_reclen;
3893 			uiop->uio_resid -= dp->d_reclen;
3894 			uiop->uio_offset += dp->d_reclen;
3895 			uiop->uio_iov->iov_base =
3896 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3897 			uiop->uio_iov->iov_len -= dp->d_reclen;
3898 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3899 			dp->d_pad0 = dp->d_pad1 = 0;
3900 			dp->d_off = 0;
3901 			dp->d_type = DT_DIR;
3902 			dp->d_fileno = dotdotfileid;
3903 			dp->d_namlen = 2;
3904 			*((uint64_t *)dp->d_name) = 0;
3905 			dp->d_name[0] = '.';
3906 			dp->d_name[1] = '.';
3907 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3908 			/*
3909 			 * Just make these offset cookie 0.
3910 			 */
3911 			tl = (u_int32_t *)&dp->d_name[8];
3912 			*tl++ = 0;
3913 			*tl = 0;
3914 			blksiz += dp->d_reclen;
3915 			uiop->uio_resid -= dp->d_reclen;
3916 			uiop->uio_offset += dp->d_reclen;
3917 			uiop->uio_iov->iov_base =
3918 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3919 			uiop->uio_iov->iov_len -= dp->d_reclen;
3920 		}
3921 		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3922 		if (gotmnton)
3923 			NFSSETBIT_ATTRBIT(&attrbits,
3924 			    NFSATTRBIT_MOUNTEDONFILEID);
3925 		if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3926 		    NFSATTRBIT_TIMECREATE))
3927 			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
3928 	}
3929 
3930 	/*
3931 	 * Loop around doing readdir rpc's of size nm_readdirsize.
3932 	 * The stopping criteria is EOF or buffer full.
3933 	 */
3934 	while (more_dirs && bigenough) {
3935 		*attrflagp = 0;
3936 		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp, cred);
3937  		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3938 		*tl++ = cookie.lval[0];
3939 		*tl++ = cookie.lval[1];
3940 		if (cookie.qval == 0) {
3941 			*tl++ = 0;
3942 			*tl++ = 0;
3943 		} else {
3944 			NFSLOCKNODE(dnp);
3945 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3946 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3947 			NFSUNLOCKNODE(dnp);
3948 		}
3949 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3950 		*tl = txdr_unsigned(nmp->nm_readdirsize);
3951 		if (nd->nd_flag & ND_NFSV4) {
3952 			(void) nfsrv_putattrbit(nd, &attrbits);
3953 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3954 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3955 			(void) nfsrv_putattrbit(nd, &dattrbits);
3956 		}
3957 		nanouptime(&ts);
3958 		error = nfscl_request(nd, vp, p, cred);
3959 		if (error)
3960 			return (error);
3961 		if (nd->nd_flag & ND_NFSV3)
3962 			error = nfscl_postop_attr(nd, nap, attrflagp);
3963 		if (nd->nd_repstat || error) {
3964 			if (!error)
3965 				error = nd->nd_repstat;
3966 			goto nfsmout;
3967 		}
3968 		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3969 			dctime = nap->na_ctime;
3970 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3971 		NFSLOCKNODE(dnp);
3972 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3973 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3974 		NFSUNLOCKNODE(dnp);
3975 		more_dirs = fxdr_unsigned(int, *tl);
3976 		if (!more_dirs)
3977 			tryformoredirs = 0;
3978 
3979 		/* loop through the dir entries, doctoring them to 4bsd form */
3980 		while (more_dirs && bigenough) {
3981 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3982 			if (nd->nd_flag & ND_NFSV4) {
3983 				ncookie.lval[0] = *tl++;
3984 				ncookie.lval[1] = *tl++;
3985 			} else {
3986 				fileno = fxdr_hyper(tl);
3987 				tl += 2;
3988 			}
3989 			len = fxdr_unsigned(int, *tl);
3990 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3991 				error = EBADRPC;
3992 				goto nfsmout;
3993 			}
3994 			tlen = roundup2(len, 8);
3995 			if (tlen == len)
3996 				tlen += 8;  /* To ensure null termination. */
3997 			left = DIRBLKSIZ - blksiz;
3998 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3999 				NFSBZERO(uiop->uio_iov->iov_base, left);
4000 				dp->d_reclen += left;
4001 				uiop->uio_iov->iov_base =
4002 				    (char *)uiop->uio_iov->iov_base + left;
4003 				uiop->uio_iov->iov_len -= left;
4004 				uiop->uio_resid -= left;
4005 				uiop->uio_offset += left;
4006 				blksiz = 0;
4007 			}
4008 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
4009 			    uiop->uio_resid)
4010 				bigenough = 0;
4011 			if (bigenough) {
4012 				dp = (struct dirent *)uiop->uio_iov->iov_base;
4013 				dp->d_pad0 = dp->d_pad1 = 0;
4014 				dp->d_off = 0;
4015 				dp->d_namlen = len;
4016 				dp->d_reclen = _GENERIC_DIRLEN(len) +
4017 				    NFSX_HYPER;
4018 				dp->d_type = DT_UNKNOWN;
4019 				blksiz += dp->d_reclen;
4020 				if (blksiz == DIRBLKSIZ)
4021 					blksiz = 0;
4022 				uiop->uio_resid -= DIRHDSIZ;
4023 				uiop->uio_offset += DIRHDSIZ;
4024 				uiop->uio_iov->iov_base =
4025 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
4026 				uiop->uio_iov->iov_len -= DIRHDSIZ;
4027 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
4028 				cnp->cn_namelen = len;
4029 				NFSCNHASHZERO(cnp);
4030 				error = nfsm_mbufuio(nd, uiop, len);
4031 				if (error)
4032 					goto nfsmout;
4033 				cp = uiop->uio_iov->iov_base;
4034 				tlen -= len;
4035 				NFSBZERO(cp, tlen);
4036 				cp += tlen;	/* points to cookie storage */
4037 				tl2 = (u_int32_t *)cp;
4038 				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
4039 				    cnp->cn_nameptr[1] == '.')
4040 					isdotdot = 1;
4041 				else
4042 					isdotdot = 0;
4043 				uiop->uio_iov->iov_base =
4044 				    (char *)uiop->uio_iov->iov_base + tlen +
4045 				    NFSX_HYPER;
4046 				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
4047 				uiop->uio_resid -= tlen + NFSX_HYPER;
4048 				uiop->uio_offset += (tlen + NFSX_HYPER);
4049 			} else {
4050 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
4051 				if (error)
4052 					goto nfsmout;
4053 			}
4054 			nfhp = NULL;
4055 			if (nd->nd_flag & ND_NFSV3) {
4056 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
4057 				ncookie.lval[0] = *tl++;
4058 				ncookie.lval[1] = *tl++;
4059 				attrflag = fxdr_unsigned(int, *tl);
4060 				if (attrflag) {
4061 				  error = nfsm_loadattr(nd, &nfsva);
4062 				  if (error)
4063 					goto nfsmout;
4064 				}
4065 				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
4066 				if (*tl) {
4067 					error = nfsm_getfh(nd, &nfhp);
4068 					if (error)
4069 					    goto nfsmout;
4070 				}
4071 				if (!attrflag && nfhp != NULL) {
4072 					free(nfhp, M_NFSFH);
4073 					nfhp = NULL;
4074 				}
4075 			} else {
4076 				rderr = 0;
4077 				nfsva.na_mntonfileno = 0xffffffff;
4078 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
4079 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
4080 				    NULL, NULL, &rderr, p, cred);
4081 				if (error)
4082 					goto nfsmout;
4083 			}
4084 
4085 			if (bigenough) {
4086 			    if (nd->nd_flag & ND_NFSV4) {
4087 				if (rderr) {
4088 				    dp->d_fileno = 0;
4089 				} else if (gotmnton) {
4090 				    if (nfsva.na_mntonfileno != 0xffffffff)
4091 					dp->d_fileno = nfsva.na_mntonfileno;
4092 				    else
4093 					dp->d_fileno = nfsva.na_fileid;
4094 				} else if (nfsva.na_filesid[0] ==
4095 				    dnp->n_vattr.na_filesid[0] &&
4096 				    nfsva.na_filesid[1] ==
4097 				    dnp->n_vattr.na_filesid[1]) {
4098 				    dp->d_fileno = nfsva.na_fileid;
4099 				} else {
4100 				    do {
4101 					fakefileno--;
4102 				    } while (fakefileno ==
4103 					nfsva.na_fileid);
4104 				    dp->d_fileno = fakefileno;
4105 				}
4106 			    } else {
4107 				dp->d_fileno = fileno;
4108 			    }
4109 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
4110 				ncookie.lval[0];
4111 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
4112 				ncookie.lval[1];
4113 
4114 			    if (nfhp != NULL) {
4115 				attr_ok = true;
4116 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
4117 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
4118 				    VREF(vp);
4119 				    newvp = vp;
4120 				    unlocknewvp = 0;
4121 				    free(nfhp, M_NFSFH);
4122 				    np = dnp;
4123 				} else if (isdotdot != 0) {
4124 				    /*
4125 				     * Skip doing a nfscl_nget() call for "..".
4126 				     * There's a race between acquiring the nfs
4127 				     * node here and lookups that look for the
4128 				     * directory being read (in the parent).
4129 				     * It would try to get a lock on ".." here,
4130 				     * owning the lock on the directory being
4131 				     * read. Lookup will hold the lock on ".."
4132 				     * and try to acquire the lock on the
4133 				     * directory being read.
4134 				     * If the directory is unlocked/relocked,
4135 				     * then there is a LOR with the buflock
4136 				     * vp is relocked.
4137 				     */
4138 				    free(nfhp, M_NFSFH);
4139 				} else {
4140 				    error = nfscl_nget(vp->v_mount, vp,
4141 				      nfhp, cnp, p, &np, LK_EXCLUSIVE);
4142 				    if (!error) {
4143 					newvp = NFSTOV(np);
4144 					unlocknewvp = 1;
4145 					/*
4146 					 * If n_localmodtime >= time before RPC,
4147 					 * then a file modification operation,
4148 					 * such as VOP_SETATTR() of size, has
4149 					 * occurred while the Lookup RPC and
4150 					 * acquisition of the vnode happened. As
4151 					 * such, the attributes might be stale,
4152 					 * with possibly an incorrect size.
4153 					 */
4154 					NFSLOCKNODE(np);
4155 					if (timespecisset(
4156 					    &np->n_localmodtime) &&
4157 					    timespeccmp(&np->n_localmodtime,
4158 					    &ts, >=)) {
4159 					    NFSCL_DEBUG(4, "nfsrpc_readdirplus:"
4160 						" localmod stale attributes\n");
4161 					    attr_ok = false;
4162 					}
4163 					NFSUNLOCKNODE(np);
4164 				    }
4165 				}
4166 				nfhp = NULL;
4167 				if (newvp != NULLVP) {
4168 				    if (attr_ok)
4169 					error = nfscl_loadattrcache(&newvp,
4170 					    &nfsva, NULL, 0, 0);
4171 				    if (error) {
4172 					if (unlocknewvp)
4173 					    vput(newvp);
4174 					else
4175 					    vrele(newvp);
4176 					goto nfsmout;
4177 				    }
4178 				    dp->d_type =
4179 					vtonfs_dtype(np->n_vattr.na_type);
4180 				    ndp->ni_vp = newvp;
4181 				    NFSCNHASH(cnp, HASHINIT);
4182 				    if (cnp->cn_namelen <= NCHNAMLEN &&
4183 					ndp->ni_dvp != ndp->ni_vp &&
4184 					(newvp->v_type != VDIR ||
4185 					 dctime.tv_sec != 0)) {
4186 					cache_enter_time_flags(ndp->ni_dvp,
4187 					    ndp->ni_vp, cnp,
4188 					    &nfsva.na_ctime,
4189 					    newvp->v_type != VDIR ? NULL :
4190 					    &dctime, VFS_CACHE_DROPOLD);
4191 				    }
4192 				    if (unlocknewvp)
4193 					vput(newvp);
4194 				    else
4195 					vrele(newvp);
4196 				    newvp = NULLVP;
4197 				}
4198 			    }
4199 			} else if (nfhp != NULL) {
4200 			    free(nfhp, M_NFSFH);
4201 			}
4202 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4203 			more_dirs = fxdr_unsigned(int, *tl);
4204 		}
4205 		/*
4206 		 * If at end of rpc data, get the eof boolean
4207 		 */
4208 		if (!more_dirs) {
4209 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4210 			eof = fxdr_unsigned(int, *tl);
4211 			if (tryformoredirs)
4212 				more_dirs = !eof;
4213 			if (nd->nd_flag & ND_NFSV4) {
4214 				error = nfscl_postop_attr(nd, nap, attrflagp);
4215 				if (error)
4216 					goto nfsmout;
4217 			}
4218 		}
4219 		m_freem(nd->nd_mrep);
4220 		nd->nd_mrep = NULL;
4221 	}
4222 	/*
4223 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
4224 	 * by increasing d_reclen for the last record.
4225 	 */
4226 	if (blksiz > 0) {
4227 		left = DIRBLKSIZ - blksiz;
4228 		NFSBZERO(uiop->uio_iov->iov_base, left);
4229 		dp->d_reclen += left;
4230 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4231 		    left;
4232 		uiop->uio_iov->iov_len -= left;
4233 		uiop->uio_resid -= left;
4234 		uiop->uio_offset += left;
4235 	}
4236 
4237 	/*
4238 	 * If returning no data, assume end of file.
4239 	 * If not bigenough, return not end of file, since you aren't
4240 	 *    returning all the data
4241 	 * Otherwise, return the eof flag from the server.
4242 	 */
4243 	if (eofp != NULL) {
4244 		if (tresid == uiop->uio_resid)
4245 			*eofp = 1;
4246 		else if (!bigenough)
4247 			*eofp = 0;
4248 		else
4249 			*eofp = eof;
4250 	}
4251 
4252 	/*
4253 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
4254 	 */
4255 	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
4256 		dp = (struct dirent *)uiop->uio_iov->iov_base;
4257 		NFSBZERO(dp, DIRBLKSIZ);
4258 		dp->d_type = DT_UNKNOWN;
4259 		tl = (u_int32_t *)&dp->d_name[4];
4260 		*tl++ = cookie.lval[0];
4261 		*tl = cookie.lval[1];
4262 		dp->d_reclen = DIRBLKSIZ;
4263 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4264 		    DIRBLKSIZ;
4265 		uiop->uio_iov->iov_len -= DIRBLKSIZ;
4266 		uiop->uio_resid -= DIRBLKSIZ;
4267 		uiop->uio_offset += DIRBLKSIZ;
4268 	}
4269 
4270 nfsmout:
4271 	if (nd->nd_mrep != NULL)
4272 		m_freem(nd->nd_mrep);
4273 	return (error);
4274 }
4275 
4276 /*
4277  * Nfs commit rpc
4278  */
4279 int
4280 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
4281     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4282 {
4283 	u_int32_t *tl;
4284 	struct nfsrv_descript nfsd, *nd = &nfsd;
4285 	nfsattrbit_t attrbits;
4286 	int error;
4287 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4288 
4289 	*attrflagp = 0;
4290 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp, cred);
4291 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
4292 	txdr_hyper(offset, tl);
4293 	tl += 2;
4294 	*tl = txdr_unsigned(cnt);
4295 	if (nd->nd_flag & ND_NFSV4) {
4296 		/*
4297 		 * And do a Getattr op.
4298 		 */
4299 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4300 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
4301 		NFSGETATTR_ATTRBIT(&attrbits);
4302 		(void) nfsrv_putattrbit(nd, &attrbits);
4303 	}
4304 	error = nfscl_request(nd, vp, p, cred);
4305 	if (error)
4306 		return (error);
4307 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, NULL);
4308 	if (!error && !nd->nd_repstat) {
4309 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
4310 		NFSLOCKMNT(nmp);
4311 		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
4312 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
4313 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
4314 		}
4315 		NFSUNLOCKMNT(nmp);
4316 		if (nd->nd_flag & ND_NFSV4)
4317 			error = nfscl_postop_attr(nd, nap, attrflagp);
4318 	}
4319 nfsmout:
4320 	if (!error && nd->nd_repstat)
4321 		error = nd->nd_repstat;
4322 	m_freem(nd->nd_mrep);
4323 	return (error);
4324 }
4325 
4326 /*
4327  * NFS byte range lock rpc.
4328  * (Mostly just calls one of the three lower level RPC routines.)
4329  */
4330 int
4331 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
4332     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4333 {
4334 	struct nfscllockowner *lp;
4335 	struct nfsclclient *clp;
4336 	struct nfsfh *nfhp;
4337 	struct nfsrv_descript nfsd, *nd = &nfsd;
4338 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4339 	u_int64_t off, len;
4340 	off_t start, end;
4341 	u_int32_t clidrev = 0;
4342 	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
4343 	int callcnt, dorpc;
4344 
4345 	/*
4346 	 * Convert the flock structure into a start and end and do POSIX
4347 	 * bounds checking.
4348 	 */
4349 	switch (fl->l_whence) {
4350 	case SEEK_SET:
4351 	case SEEK_CUR:
4352 		/*
4353 		 * Caller is responsible for adding any necessary offset
4354 		 * when SEEK_CUR is used.
4355 		 */
4356 		start = fl->l_start;
4357 		off = fl->l_start;
4358 		break;
4359 	case SEEK_END:
4360 		start = size + fl->l_start;
4361 		off = size + fl->l_start;
4362 		break;
4363 	default:
4364 		return (EINVAL);
4365 	}
4366 	if (start < 0)
4367 		return (EINVAL);
4368 	if (fl->l_len != 0) {
4369 		end = start + fl->l_len - 1;
4370 		if (end < start)
4371 			return (EINVAL);
4372 	}
4373 
4374 	len = fl->l_len;
4375 	if (len == 0)
4376 		len = NFS64BITSSET;
4377 	retrycnt = 0;
4378 	do {
4379 	    nd->nd_repstat = 0;
4380 	    if (op == F_GETLK) {
4381 		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4382 		if (error)
4383 			return (error);
4384 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
4385 		if (!error) {
4386 			clidrev = clp->nfsc_clientidrev;
4387 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
4388 			    p, id, flags);
4389 		} else if (error == -1) {
4390 			error = 0;
4391 		}
4392 		nfscl_clientrelease(clp);
4393 	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
4394 		/*
4395 		 * We must loop around for all lockowner cases.
4396 		 */
4397 		callcnt = 0;
4398 		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4399 		if (error)
4400 			return (error);
4401 		do {
4402 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
4403 			clp, id, flags, &lp, &dorpc);
4404 		    /*
4405 		     * If it returns a NULL lp, we're done.
4406 		     */
4407 		    if (lp == NULL) {
4408 			if (callcnt == 0)
4409 			    nfscl_clientrelease(clp);
4410 			else
4411 			    nfscl_releasealllocks(clp, vp, p, id, flags);
4412 			return (error);
4413 		    }
4414 		    if (nmp->nm_clp != NULL)
4415 			clidrev = nmp->nm_clp->nfsc_clientidrev;
4416 		    else
4417 			clidrev = 0;
4418 		    /*
4419 		     * If the server doesn't support Posix lock semantics,
4420 		     * only allow locks on the entire file, since it won't
4421 		     * handle overlapping byte ranges.
4422 		     * There might still be a problem when a lock
4423 		     * upgrade/downgrade (read<->write) occurs, since the
4424 		     * server "might" expect an unlock first?
4425 		     */
4426 		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4427 			(off == 0 && len == NFS64BITSSET))) {
4428 			/*
4429 			 * Since the lock records will go away, we must
4430 			 * wait for grace and delay here.
4431 			 */
4432 			do {
4433 			    error = nfsrpc_locku(nd, nmp, lp, off, len,
4434 				NFSV4LOCKT_READ, cred, p, 0);
4435 			    if ((nd->nd_repstat == NFSERR_GRACE ||
4436 				 nd->nd_repstat == NFSERR_DELAY) &&
4437 				error == 0)
4438 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4439 				    "nfs_advlock");
4440 			} while ((nd->nd_repstat == NFSERR_GRACE ||
4441 			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
4442 		    }
4443 		    callcnt++;
4444 		} while (error == 0 && nd->nd_repstat == 0);
4445 		nfscl_releasealllocks(clp, vp, p, id, flags);
4446 	    } else if (op == F_SETLK) {
4447 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4448 		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4449 		if (error || donelocally) {
4450 			return (error);
4451 		}
4452 		if (nmp->nm_clp != NULL)
4453 			clidrev = nmp->nm_clp->nfsc_clientidrev;
4454 		else
4455 			clidrev = 0;
4456 		nfhp = VTONFS(vp)->n_fhp;
4457 		if (!lp->nfsl_open->nfso_posixlock &&
4458 		    (off != 0 || len != NFS64BITSSET)) {
4459 			error = EINVAL;
4460 		} else {
4461 			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4462 			    nfhp->nfh_len, lp, newone, reclaim, off,
4463 			    len, fl->l_type, cred, p, 0);
4464 		}
4465 		if (!error)
4466 			error = nd->nd_repstat;
4467 		nfscl_lockrelease(lp, error, newone);
4468 	    } else {
4469 		error = EINVAL;
4470 	    }
4471 	    if (!error)
4472 	        error = nd->nd_repstat;
4473 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4474 		error == NFSERR_STALEDONTRECOVER ||
4475 		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4476 		error == NFSERR_BADSESSION) {
4477 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
4478 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4479 		&& clidrev != 0) {
4480 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4481 		retrycnt++;
4482 	    }
4483 	} while (error == NFSERR_GRACE ||
4484 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4485 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4486 	    error == NFSERR_BADSESSION ||
4487 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4488 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
4489 	if (error && retrycnt >= 4)
4490 		error = EIO;
4491 	return (error);
4492 }
4493 
4494 /*
4495  * The lower level routine for the LockT case.
4496  */
4497 int
4498 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4499     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4500     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4501 {
4502 	u_int32_t *tl;
4503 	int error, type, size;
4504 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4505 	struct nfsnode *np;
4506 	struct nfsmount *nmp;
4507 	struct nfsclsession *tsep;
4508 
4509 	nmp = VFSTONFS(vp->v_mount);
4510 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp, cred);
4511 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4512 	if (fl->l_type == F_RDLCK)
4513 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4514 	else
4515 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4516 	txdr_hyper(off, tl);
4517 	tl += 2;
4518 	txdr_hyper(len, tl);
4519 	tl += 2;
4520 	tsep = nfsmnt_mdssession(nmp);
4521 	*tl++ = tsep->nfsess_clientid.lval[0];
4522 	*tl = tsep->nfsess_clientid.lval[1];
4523 	nfscl_filllockowner(id, own, flags);
4524 	np = VTONFS(vp);
4525 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4526 	    np->n_fhp->nfh_len);
4527 	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4528 	error = nfscl_request(nd, vp, p, cred);
4529 	if (error)
4530 		return (error);
4531 	if (nd->nd_repstat == 0) {
4532 		fl->l_type = F_UNLCK;
4533 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4534 		nd->nd_repstat = 0;
4535 		fl->l_whence = SEEK_SET;
4536 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4537 		fl->l_start = fxdr_hyper(tl);
4538 		tl += 2;
4539 		len = fxdr_hyper(tl);
4540 		tl += 2;
4541 		if (len == NFS64BITSSET)
4542 			fl->l_len = 0;
4543 		else
4544 			fl->l_len = len;
4545 		type = fxdr_unsigned(int, *tl++);
4546 		if (type == NFSV4LOCKT_WRITE)
4547 			fl->l_type = F_WRLCK;
4548 		else
4549 			fl->l_type = F_RDLCK;
4550 		/*
4551 		 * XXX For now, I have no idea what to do with the
4552 		 * conflicting lock_owner, so I'll just set the pid == 0
4553 		 * and skip over the lock_owner.
4554 		 */
4555 		fl->l_pid = (pid_t)0;
4556 		tl += 2;
4557 		size = fxdr_unsigned(int, *tl);
4558 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4559 			error = EBADRPC;
4560 		if (!error)
4561 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4562 	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4563 		nfscl_initiate_recovery(clp);
4564 nfsmout:
4565 	m_freem(nd->nd_mrep);
4566 	return (error);
4567 }
4568 
4569 /*
4570  * Lower level function that performs the LockU RPC.
4571  */
4572 static int
4573 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4574     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4575     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4576 {
4577 	u_int32_t *tl;
4578 	int error;
4579 
4580 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4581 	    lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0, cred);
4582 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4583 	*tl++ = txdr_unsigned(type);
4584 	*tl = txdr_unsigned(lp->nfsl_seqid);
4585 	if (nfstest_outofseq &&
4586 	    (arc4random() % nfstest_outofseq) == 0)
4587 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
4588 	tl++;
4589 	if (NFSHASNFSV4N(nmp))
4590 		*tl++ = 0;
4591 	else
4592 		*tl++ = lp->nfsl_stateid.seqid;
4593 	*tl++ = lp->nfsl_stateid.other[0];
4594 	*tl++ = lp->nfsl_stateid.other[1];
4595 	*tl++ = lp->nfsl_stateid.other[2];
4596 	txdr_hyper(off, tl);
4597 	tl += 2;
4598 	txdr_hyper(len, tl);
4599 	if (syscred)
4600 		nd->nd_flag |= ND_USEGSSNAME;
4601 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4602 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4603 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4604 	if (error)
4605 		return (error);
4606 	if (nd->nd_repstat == 0) {
4607 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4608 		lp->nfsl_stateid.seqid = *tl++;
4609 		lp->nfsl_stateid.other[0] = *tl++;
4610 		lp->nfsl_stateid.other[1] = *tl++;
4611 		lp->nfsl_stateid.other[2] = *tl;
4612 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4613 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4614 nfsmout:
4615 	m_freem(nd->nd_mrep);
4616 	return (error);
4617 }
4618 
4619 /*
4620  * The actual Lock RPC.
4621  */
4622 int
4623 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4624     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4625     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4626     NFSPROC_T *p, int syscred)
4627 {
4628 	u_int32_t *tl;
4629 	int error, size;
4630 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4631 	struct nfsclsession *tsep;
4632 
4633 	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
4634 	    cred);
4635 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4636 	if (type == F_RDLCK)
4637 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4638 	else
4639 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4640 	*tl++ = txdr_unsigned(reclaim);
4641 	txdr_hyper(off, tl);
4642 	tl += 2;
4643 	txdr_hyper(len, tl);
4644 	tl += 2;
4645 	if (newone) {
4646 	    *tl = newnfs_true;
4647 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4648 		2 * NFSX_UNSIGNED + NFSX_HYPER);
4649 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4650 	    if (NFSHASNFSV4N(nmp))
4651 		*tl++ = 0;
4652 	    else
4653 		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
4654 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4655 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4656 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4657 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
4658 	    tsep = nfsmnt_mdssession(nmp);
4659 	    *tl++ = tsep->nfsess_clientid.lval[0];
4660 	    *tl = tsep->nfsess_clientid.lval[1];
4661 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4662 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4663 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4664 	} else {
4665 	    *tl = newnfs_false;
4666 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4667 	    if (NFSHASNFSV4N(nmp))
4668 		*tl++ = 0;
4669 	    else
4670 		*tl++ = lp->nfsl_stateid.seqid;
4671 	    *tl++ = lp->nfsl_stateid.other[0];
4672 	    *tl++ = lp->nfsl_stateid.other[1];
4673 	    *tl++ = lp->nfsl_stateid.other[2];
4674 	    *tl = txdr_unsigned(lp->nfsl_seqid);
4675 	    if (nfstest_outofseq &&
4676 		(arc4random() % nfstest_outofseq) == 0)
4677 		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4678 	}
4679 	if (syscred)
4680 		nd->nd_flag |= ND_USEGSSNAME;
4681 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4682 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4683 	if (error)
4684 		return (error);
4685 	if (newone)
4686 	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4687 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4688 	if (nd->nd_repstat == 0) {
4689 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4690 		lp->nfsl_stateid.seqid = *tl++;
4691 		lp->nfsl_stateid.other[0] = *tl++;
4692 		lp->nfsl_stateid.other[1] = *tl++;
4693 		lp->nfsl_stateid.other[2] = *tl;
4694 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4695 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4696 		size = fxdr_unsigned(int, *(tl + 7));
4697 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4698 			error = EBADRPC;
4699 		if (!error)
4700 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4701 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4702 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4703 nfsmout:
4704 	m_freem(nd->nd_mrep);
4705 	return (error);
4706 }
4707 
4708 /*
4709  * nfs statfs rpc
4710  * (always called with the vp for the mount point)
4711  */
4712 int
4713 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4714     uint32_t *leasep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap,
4715     int *attrflagp)
4716 {
4717 	u_int32_t *tl = NULL;
4718 	struct nfsrv_descript nfsd, *nd = &nfsd;
4719 	struct nfsmount *nmp;
4720 	nfsattrbit_t attrbits;
4721 	int error;
4722 
4723 	*attrflagp = 0;
4724 	nmp = VFSTONFS(vp->v_mount);
4725 	if (NFSHASNFSV4(nmp)) {
4726 		/*
4727 		 * For V4, you actually do a getattr.
4728 		 */
4729 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4730 		if (leasep != NULL)
4731 			NFSROOTFS_GETATTRBIT(&attrbits);
4732 		else
4733 			NFSSTATFS_GETATTRBIT(&attrbits);
4734 		(void) nfsrv_putattrbit(nd, &attrbits);
4735 		nd->nd_flag |= ND_USEGSSNAME;
4736 		error = nfscl_request(nd, vp, p, cred);
4737 		if (error)
4738 			return (error);
4739 		if (nd->nd_repstat == 0) {
4740 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4741 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, leasep, NULL,
4742 			    p, cred);
4743 			if (!error) {
4744 				nmp->nm_fsid[0] = nap->na_filesid[0];
4745 				nmp->nm_fsid[1] = nap->na_filesid[1];
4746 				NFSSETHASSETFSID(nmp);
4747 				*attrflagp = 1;
4748 			}
4749 		} else {
4750 			error = nd->nd_repstat;
4751 		}
4752 		if (error)
4753 			goto nfsmout;
4754 	} else {
4755 		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp, NULL);
4756 		error = nfscl_request(nd, vp, p, cred);
4757 		if (error)
4758 			return (error);
4759 		if (nd->nd_flag & ND_NFSV3) {
4760 			error = nfscl_postop_attr(nd, nap, attrflagp);
4761 			if (error)
4762 				goto nfsmout;
4763 		}
4764 		if (nd->nd_repstat) {
4765 			error = nd->nd_repstat;
4766 			goto nfsmout;
4767 		}
4768 		NFSM_DISSECT(tl, u_int32_t *,
4769 		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4770 	}
4771 	if (NFSHASNFSV3(nmp)) {
4772 		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4773 		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4774 		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4775 		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4776 		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4777 		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4778 		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4779 	} else if (NFSHASNFSV4(nmp) == 0) {
4780 		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4781 		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4782 		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4783 		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4784 		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4785 	}
4786 nfsmout:
4787 	m_freem(nd->nd_mrep);
4788 	return (error);
4789 }
4790 
4791 /*
4792  * nfs pathconf rpc
4793  */
4794 int
4795 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4796     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4797 {
4798 	struct nfsrv_descript nfsd, *nd = &nfsd;
4799 	struct nfsmount *nmp;
4800 	u_int32_t *tl;
4801 	nfsattrbit_t attrbits;
4802 	int error;
4803 	struct nfsnode *np;
4804 
4805 	*attrflagp = 0;
4806 	nmp = VFSTONFS(vp->v_mount);
4807 	if (NFSHASNFSV4(nmp)) {
4808 		np = VTONFS(vp);
4809 		if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
4810 		    nmp->nm_fhsize == 0) {
4811 			/* Attempt to get the actual root file handle. */
4812 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
4813 			    cred, p);
4814 			if (error != 0)
4815 				return (EACCES);
4816 			if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
4817 				nfscl_statfs(vp, cred, p);
4818 		}
4819 		/*
4820 		 * For V4, you actually do a getattr.
4821 		 */
4822 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4823 		NFSPATHCONF_GETATTRBIT(&attrbits);
4824 		(void) nfsrv_putattrbit(nd, &attrbits);
4825 		nd->nd_flag |= ND_USEGSSNAME;
4826 		error = nfscl_request(nd, vp, p, cred);
4827 		if (error)
4828 			return (error);
4829 		if (nd->nd_repstat == 0) {
4830 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4831 			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4832 			    cred);
4833 			if (!error)
4834 				*attrflagp = 1;
4835 		} else {
4836 			error = nd->nd_repstat;
4837 		}
4838 	} else {
4839 		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp, NULL);
4840 		error = nfscl_request(nd, vp, p, cred);
4841 		if (error)
4842 			return (error);
4843 		error = nfscl_postop_attr(nd, nap, attrflagp);
4844 		if (nd->nd_repstat && !error)
4845 			error = nd->nd_repstat;
4846 		if (!error) {
4847 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4848 			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4849 			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4850 			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4851 			pc->pc_chownrestricted =
4852 			    fxdr_unsigned(u_int32_t, *tl++);
4853 			pc->pc_caseinsensitive =
4854 			    fxdr_unsigned(u_int32_t, *tl++);
4855 			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4856 		}
4857 	}
4858 nfsmout:
4859 	m_freem(nd->nd_mrep);
4860 	return (error);
4861 }
4862 
4863 /*
4864  * nfs version 3 fsinfo rpc call
4865  */
4866 int
4867 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4868     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4869 {
4870 	u_int32_t *tl;
4871 	struct nfsrv_descript nfsd, *nd = &nfsd;
4872 	int error;
4873 
4874 	*attrflagp = 0;
4875 	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp, NULL);
4876 	error = nfscl_request(nd, vp, p, cred);
4877 	if (error)
4878 		return (error);
4879 	error = nfscl_postop_attr(nd, nap, attrflagp);
4880 	if (nd->nd_repstat && !error)
4881 		error = nd->nd_repstat;
4882 	if (!error) {
4883 		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4884 		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4885 		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4886 		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4887 		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4888 		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4889 		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4890 		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4891 		fsp->fs_maxfilesize = fxdr_hyper(tl);
4892 		tl += 2;
4893 		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4894 		tl += 2;
4895 		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4896 	}
4897 nfsmout:
4898 	m_freem(nd->nd_mrep);
4899 	return (error);
4900 }
4901 
4902 /*
4903  * This function performs the Renew RPC.
4904  */
4905 int
4906 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4907     NFSPROC_T *p)
4908 {
4909 	u_int32_t *tl;
4910 	struct nfsrv_descript nfsd;
4911 	struct nfsrv_descript *nd = &nfsd;
4912 	struct nfsmount *nmp;
4913 	int error;
4914 	struct nfssockreq *nrp;
4915 	struct nfsclsession *tsep;
4916 
4917 	nmp = clp->nfsc_nmp;
4918 	if (nmp == NULL)
4919 		return (0);
4920 	if (dsp == NULL)
4921 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4922 		    0, cred);
4923 	else
4924 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4925 		    &dsp->nfsclds_sess, 0, 0, NULL);
4926 	if (!NFSHASNFSV4N(nmp)) {
4927 		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4928 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4929 		tsep = nfsmnt_mdssession(nmp);
4930 		*tl++ = tsep->nfsess_clientid.lval[0];
4931 		*tl = tsep->nfsess_clientid.lval[1];
4932 	}
4933 	nrp = NULL;
4934 	if (dsp != NULL)
4935 		nrp = dsp->nfsclds_sockp;
4936 	if (nrp == NULL)
4937 		/* If NULL, use the MDS socket. */
4938 		nrp = &nmp->nm_sockreq;
4939 	nd->nd_flag |= ND_USEGSSNAME;
4940 	if (dsp == NULL)
4941 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4942 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4943 	else {
4944 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4945 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4946 		if (error == ENXIO)
4947 			nfscl_cancelreqs(dsp);
4948 	}
4949 	if (error)
4950 		return (error);
4951 	error = nd->nd_repstat;
4952 	m_freem(nd->nd_mrep);
4953 	return (error);
4954 }
4955 
4956 /*
4957  * This function performs the Releaselockowner RPC.
4958  */
4959 int
4960 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4961     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4962 {
4963 	struct nfsrv_descript nfsd, *nd = &nfsd;
4964 	u_int32_t *tl;
4965 	int error;
4966 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4967 	struct nfsclsession *tsep;
4968 
4969 	if (NFSHASNFSV4N(nmp)) {
4970 		/* For NFSv4.1, do a FreeStateID. */
4971 		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4972 		    NULL, 0, 0, cred);
4973 		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4974 	} else {
4975 		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4976 		    NULL, 0, 0, NULL);
4977 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4978 		tsep = nfsmnt_mdssession(nmp);
4979 		*tl++ = tsep->nfsess_clientid.lval[0];
4980 		*tl = tsep->nfsess_clientid.lval[1];
4981 		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4982 		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4983 		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4984 	}
4985 	nd->nd_flag |= ND_USEGSSNAME;
4986 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4987 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4988 	if (error)
4989 		return (error);
4990 	error = nd->nd_repstat;
4991 	m_freem(nd->nd_mrep);
4992 	return (error);
4993 }
4994 
4995 /*
4996  * This function performs the Compound to get the mount pt FH.
4997  */
4998 int
4999 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
5000     NFSPROC_T *p)
5001 {
5002 	u_int32_t *tl;
5003 	struct nfsrv_descript nfsd;
5004 	struct nfsrv_descript *nd = &nfsd;
5005 	u_char *cp, *cp2, *fhp;
5006 	int error, cnt, len, setnil;
5007 	u_int32_t *opcntp;
5008 
5009 	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
5010 	    0, NULL);
5011 	cp = dirpath;
5012 	cnt = 0;
5013 	do {
5014 		setnil = 0;
5015 		while (*cp == '/')
5016 			cp++;
5017 		cp2 = cp;
5018 		while (*cp2 != '\0' && *cp2 != '/')
5019 			cp2++;
5020 		if (*cp2 == '/') {
5021 			setnil = 1;
5022 			*cp2 = '\0';
5023 		}
5024 		if (cp2 != cp) {
5025 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5026 			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
5027 			nfsm_strtom(nd, cp, strlen(cp));
5028 			cnt++;
5029 		}
5030 		if (setnil)
5031 			*cp2++ = '/';
5032 		cp = cp2;
5033 	} while (*cp != '\0');
5034 	if (NFSHASNFSV4N(nmp))
5035 		/* Has a Sequence Op done by nfscl_reqstart(). */
5036 		*opcntp = txdr_unsigned(3 + cnt);
5037 	else
5038 		*opcntp = txdr_unsigned(2 + cnt);
5039 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5040 	*tl = txdr_unsigned(NFSV4OP_GETFH);
5041 	nd->nd_flag |= ND_USEGSSNAME;
5042 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5043 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5044 	if (error)
5045 		return (error);
5046 	if (nd->nd_repstat == 0) {
5047 		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
5048 		tl += (2 + 2 * cnt);
5049 		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
5050 			len > NFSX_FHMAX) {
5051 			nd->nd_repstat = NFSERR_BADXDR;
5052 		} else {
5053 			fhp = malloc(len + 1, M_TEMP, M_WAITOK);
5054 			nd->nd_repstat = nfsrv_mtostr(nd, fhp, len);
5055 			if (nd->nd_repstat == 0) {
5056 				NFSLOCKMNT(nmp);
5057 				if (nmp->nm_fhsize == 0) {
5058 					NFSBCOPY(fhp, nmp->nm_fh, len);
5059 					nmp->nm_fhsize = len;
5060 				}
5061 				NFSUNLOCKMNT(nmp);
5062 			}
5063 			free(fhp, M_TEMP);
5064 		}
5065 	}
5066 	error = nd->nd_repstat;
5067 nfsmout:
5068 	m_freem(nd->nd_mrep);
5069 	return (error);
5070 }
5071 
5072 /*
5073  * This function performs the Delegreturn RPC.
5074  */
5075 int
5076 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
5077     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
5078 {
5079 	u_int32_t *tl;
5080 	struct nfsrv_descript nfsd;
5081 	struct nfsrv_descript *nd = &nfsd;
5082 	int error;
5083 
5084 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
5085 	    dp->nfsdl_fhlen, NULL, NULL, 0, 0, cred);
5086 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
5087 	if (NFSHASNFSV4N(nmp))
5088 		*tl++ = 0;
5089 	else
5090 		*tl++ = dp->nfsdl_stateid.seqid;
5091 	*tl++ = dp->nfsdl_stateid.other[0];
5092 	*tl++ = dp->nfsdl_stateid.other[1];
5093 	*tl = dp->nfsdl_stateid.other[2];
5094 	if (syscred)
5095 		nd->nd_flag |= ND_USEGSSNAME;
5096 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5097 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5098 	if (error)
5099 		return (error);
5100 	error = nd->nd_repstat;
5101 	m_freem(nd->nd_mrep);
5102 	return (error);
5103 }
5104 
5105 /*
5106  * nfs getacl call.
5107  */
5108 int
5109 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5110 {
5111 	struct nfsrv_descript nfsd, *nd = &nfsd;
5112 	int error;
5113 	nfsattrbit_t attrbits;
5114 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5115 
5116 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5117 		return (EOPNOTSUPP);
5118 	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp, cred);
5119 	NFSZERO_ATTRBIT(&attrbits);
5120 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5121 	(void) nfsrv_putattrbit(nd, &attrbits);
5122 	error = nfscl_request(nd, vp, p, cred);
5123 	if (error)
5124 		return (error);
5125 	if (!nd->nd_repstat)
5126 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
5127 		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
5128 	else
5129 		error = nd->nd_repstat;
5130 	m_freem(nd->nd_mrep);
5131 	return (error);
5132 }
5133 
5134 /*
5135  * nfs setacl call.
5136  */
5137 int
5138 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5139 {
5140 	int error;
5141 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5142 
5143 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5144 		return (EOPNOTSUPP);
5145 	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL);
5146 	return (error);
5147 }
5148 
5149 /*
5150  * nfs setacl call.
5151  */
5152 static int
5153 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
5154     struct acl *aclp, nfsv4stateid_t *stateidp)
5155 {
5156 	struct nfsrv_descript nfsd, *nd = &nfsd;
5157 	int error;
5158 	nfsattrbit_t attrbits;
5159 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5160 
5161 	if (!NFSHASNFSV4(nmp))
5162 		return (EOPNOTSUPP);
5163 	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp, cred);
5164 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
5165 	NFSZERO_ATTRBIT(&attrbits);
5166 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5167 	(void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
5168 	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
5169 	error = nfscl_request(nd, vp, p, cred);
5170 	if (error)
5171 		return (error);
5172 	/* Don't care about the pre/postop attributes */
5173 	m_freem(nd->nd_mrep);
5174 	return (nd->nd_repstat);
5175 }
5176 
5177 /*
5178  * Do the NFSv4.1 Exchange ID.
5179  */
5180 int
5181 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
5182     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
5183     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
5184 {
5185 	uint32_t *tl, v41flags;
5186 	struct nfsrv_descript nfsd;
5187 	struct nfsrv_descript *nd = &nfsd;
5188 	struct nfsclds *dsp;
5189 	struct timespec verstime;
5190 	int error, len;
5191 
5192 	*dspp = NULL;
5193 	if (minorvers == 0)
5194 		minorvers = nmp->nm_minorvers;
5195 	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
5196 	    NFS_VER4, minorvers, NULL);
5197 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5198 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
5199 	*tl = txdr_unsigned(clp->nfsc_rev);
5200 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
5201 
5202 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
5203 	*tl++ = txdr_unsigned(exchflags);
5204 	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
5205 
5206 	/* Set the implementation id4 */
5207 	*tl = txdr_unsigned(1);
5208 	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
5209 	(void) nfsm_strtom(nd, version, strlen(version));
5210 	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
5211 	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
5212 	verstime.tv_nsec = 0;
5213 	txdr_nfsv4time(&verstime, tl);
5214 	nd->nd_flag |= ND_USEGSSNAME;
5215 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
5216 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5217 	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
5218 	    (int)nd->nd_repstat);
5219 	if (error != 0)
5220 		return (error);
5221 	if (nd->nd_repstat == 0) {
5222 		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
5223 		len = fxdr_unsigned(int, *(tl + 7));
5224 		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
5225 			error = NFSERR_BADXDR;
5226 			goto nfsmout;
5227 		}
5228 		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
5229 		    M_WAITOK | M_ZERO);
5230 		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
5231 		dsp->nfsclds_servownlen = len;
5232 		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
5233 		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
5234 		dsp->nfsclds_sess.nfsess_sequenceid =
5235 		    fxdr_unsigned(uint32_t, *tl++);
5236 		v41flags = fxdr_unsigned(uint32_t, *tl);
5237 		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
5238 		    NFSHASPNFSOPT(nmp)) {
5239 			NFSCL_DEBUG(1, "set PNFS\n");
5240 			NFSLOCKMNT(nmp);
5241 			nmp->nm_state |= NFSSTA_PNFS;
5242 			NFSUNLOCKMNT(nmp);
5243 			dsp->nfsclds_flags |= NFSCLDS_MDS;
5244 		}
5245 		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
5246 			dsp->nfsclds_flags |= NFSCLDS_DS;
5247 		if (minorvers == NFSV42_MINORVERSION)
5248 			dsp->nfsclds_flags |= NFSCLDS_MINORV2;
5249 		if (len > 0)
5250 			nd->nd_repstat = nfsrv_mtostr(nd,
5251 			    dsp->nfsclds_serverown, len);
5252 		if (nd->nd_repstat == 0) {
5253 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5254 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5255 			    NULL, MTX_DEF);
5256 			nfscl_initsessionslots(&dsp->nfsclds_sess);
5257 			*dspp = dsp;
5258 		} else
5259 			free(dsp, M_NFSCLDS);
5260 	}
5261 	error = nd->nd_repstat;
5262 nfsmout:
5263 	m_freem(nd->nd_mrep);
5264 	return (error);
5265 }
5266 
5267 /*
5268  * Do the NFSv4.1 Create Session.
5269  */
5270 int
5271 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
5272     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
5273     struct ucred *cred, NFSPROC_T *p)
5274 {
5275 	uint32_t crflags, maxval, *tl;
5276 	struct nfsrv_descript nfsd;
5277 	struct nfsrv_descript *nd = &nfsd;
5278 	int error, irdcnt, minorvers;
5279 
5280 	/* Make sure nm_rsize, nm_wsize is set. */
5281 	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
5282 		nmp->nm_rsize = NFS_MAXBSIZE;
5283 	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
5284 		nmp->nm_wsize = NFS_MAXBSIZE;
5285 	if (dsp == NULL)
5286 		minorvers = nmp->nm_minorvers;
5287 	else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
5288 		minorvers = NFSV42_MINORVERSION;
5289 	else
5290 		minorvers = NFSV41_MINORVERSION;
5291 	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
5292 	    NFS_VER4, minorvers, NULL);
5293 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5294 	*tl++ = sep->nfsess_clientid.lval[0];
5295 	*tl++ = sep->nfsess_clientid.lval[1];
5296 	*tl++ = txdr_unsigned(sequenceid);
5297 	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
5298 	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
5299 		crflags |= NFSV4CRSESS_CONNBACKCHAN;
5300 	*tl = txdr_unsigned(crflags);
5301 
5302 	/* Fill in fore channel attributes. */
5303 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5304 	*tl++ = 0;				/* Header pad size */
5305 	if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
5306 	    nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
5307 		/*
5308 		 * NFSv4.2 Extended Attribute operations may want to do
5309 		 * requests/replies that are larger than nm_rsize/nm_wsize.
5310 		 */
5311 		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5312 		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5313 	} else {
5314 		*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
5315 		*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
5316 	}
5317 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5318 	*tl++ = txdr_unsigned(20);		/* Max operations */
5319 	*tl++ = txdr_unsigned(64);		/* Max slots */
5320 	*tl = 0;				/* No rdma ird */
5321 
5322 	/* Fill in back channel attributes. */
5323 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5324 	*tl++ = 0;				/* Header pad size */
5325 	*tl++ = txdr_unsigned(10000);		/* Max request size */
5326 	*tl++ = txdr_unsigned(10000);		/* Max response size */
5327 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5328 	*tl++ = txdr_unsigned(4);		/* Max operations */
5329 	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
5330 	*tl = 0;				/* No rdma ird */
5331 
5332 	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
5333 	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
5334 
5335 	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
5336 	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
5337 	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
5338 	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
5339 	*tl++ = 0;				/* Null machine name */
5340 	*tl++ = 0;				/* Uid == 0 */
5341 	*tl++ = 0;				/* Gid == 0 */
5342 	*tl = 0;				/* No additional gids */
5343 	nd->nd_flag |= ND_USEGSSNAME;
5344 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
5345 	    NFS_VER4, NULL, 1, NULL, NULL);
5346 	if (error != 0)
5347 		return (error);
5348 	if (nd->nd_repstat == 0) {
5349 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
5350 		    2 * NFSX_UNSIGNED);
5351 		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
5352 		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
5353 		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
5354 		crflags = fxdr_unsigned(uint32_t, *tl);
5355 		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
5356 			NFSLOCKMNT(nmp);
5357 			nmp->nm_state |= NFSSTA_SESSPERSIST;
5358 			NFSUNLOCKMNT(nmp);
5359 		}
5360 
5361 		/* Get the fore channel slot count. */
5362 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5363 		tl++;			/* Skip the header pad size. */
5364 
5365 		/* Make sure nm_wsize is small enough. */
5366 		maxval = fxdr_unsigned(uint32_t, *tl++);
5367 		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
5368 			if (nmp->nm_wsize > 8096)
5369 				nmp->nm_wsize /= 2;
5370 			else
5371 				break;
5372 		}
5373 		sep->nfsess_maxreq = maxval;
5374 
5375 		/* Make sure nm_rsize is small enough. */
5376 		maxval = fxdr_unsigned(uint32_t, *tl++);
5377 		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
5378 			if (nmp->nm_rsize > 8096)
5379 				nmp->nm_rsize /= 2;
5380 			else
5381 				break;
5382 		}
5383 		sep->nfsess_maxresp = maxval;
5384 
5385 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
5386 		tl++;
5387 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
5388 		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
5389 		irdcnt = fxdr_unsigned(int, *tl);
5390 		if (irdcnt < 0 || irdcnt > 1) {
5391 			error = NFSERR_BADXDR;
5392 			goto nfsmout;
5393 		}
5394 		if (irdcnt > 0)
5395 			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
5396 
5397 		/* and the back channel slot count. */
5398 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5399 		tl += 5;
5400 		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
5401 		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
5402 	}
5403 	error = nd->nd_repstat;
5404 nfsmout:
5405 	m_freem(nd->nd_mrep);
5406 	return (error);
5407 }
5408 
5409 /*
5410  * Do the NFSv4.1 Destroy Client.
5411  */
5412 int
5413 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5414     struct ucred *cred, NFSPROC_T *p)
5415 {
5416 	uint32_t *tl;
5417 	struct nfsrv_descript nfsd;
5418 	struct nfsrv_descript *nd = &nfsd;
5419 	int error;
5420 	struct nfsclsession *tsep;
5421 
5422 	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5423 	    0, NULL);
5424 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5425 	tsep = nfsmnt_mdssession(nmp);
5426 	*tl++ = tsep->nfsess_clientid.lval[0];
5427 	*tl = tsep->nfsess_clientid.lval[1];
5428 	nd->nd_flag |= ND_USEGSSNAME;
5429 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5430 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5431 	if (error != 0)
5432 		return (error);
5433 	error = nd->nd_repstat;
5434 	m_freem(nd->nd_mrep);
5435 	return (error);
5436 }
5437 
5438 /*
5439  * Do the NFSv4.1 LayoutGet.
5440  */
5441 static int
5442 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5443     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5444     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5445     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p)
5446 {
5447 	struct nfsrv_descript nfsd, *nd = &nfsd;
5448 	int error;
5449 
5450 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5451 	    0, cred);
5452 	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5453 	    layouttype, layoutlen, 0);
5454 	nd->nd_flag |= ND_USEGSSNAME;
5455 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5456 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5457 	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5458 	if (error != 0)
5459 		return (error);
5460 	if (nd->nd_repstat == 0)
5461 		error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5462 		    flhp);
5463 	if (error == 0 && nd->nd_repstat != 0)
5464 		error = nd->nd_repstat;
5465 	m_freem(nd->nd_mrep);
5466 	return (error);
5467 }
5468 
5469 /*
5470  * Do the NFSv4.1 Get Device Info.
5471  */
5472 int
5473 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5474     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5475     NFSPROC_T *p)
5476 {
5477 	uint32_t cnt, *tl, vers, minorvers;
5478 	struct nfsrv_descript nfsd;
5479 	struct nfsrv_descript *nd = &nfsd;
5480 	struct sockaddr_in sin, ssin;
5481 	struct sockaddr_in6 sin6, ssin6;
5482 	struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5483 	struct nfscldevinfo *ndi;
5484 	int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5485 	int stripecnt;
5486 	uint8_t stripeindex;
5487 	sa_family_t af, safilled;
5488 
5489 	ssin.sin_port = 0;		/* To shut up compiler. */
5490 	ssin.sin_addr.s_addr = 0;	/* ditto */
5491 	*ndip = NULL;
5492 	ndi = NULL;
5493 	gotdspp = NULL;
5494 	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5495 	    0, cred);
5496 	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5497 	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5498 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5499 	*tl++ = txdr_unsigned(layouttype);
5500 	*tl++ = txdr_unsigned(100000);
5501 	if (notifybitsp != NULL && *notifybitsp != 0) {
5502 		*tl = txdr_unsigned(1);		/* One word of bits. */
5503 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5504 		*tl = txdr_unsigned(*notifybitsp);
5505 	} else
5506 		*tl = txdr_unsigned(0);
5507 	nd->nd_flag |= ND_USEGSSNAME;
5508 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5509 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5510 	if (error != 0)
5511 		return (error);
5512 	if (nd->nd_repstat == 0) {
5513 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5514 		if (layouttype != fxdr_unsigned(int, *tl))
5515 			printf("EEK! devinfo layout type not same!\n");
5516 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5517 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5518 			stripecnt = fxdr_unsigned(int, *tl);
5519 			NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5520 			if (stripecnt < 1 || stripecnt > 4096) {
5521 				printf("pNFS File layout devinfo stripecnt %d:"
5522 				    " out of range\n", stripecnt);
5523 				error = NFSERR_BADXDR;
5524 				goto nfsmout;
5525 			}
5526 			NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5527 			    NFSX_UNSIGNED);
5528 			addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5529 			NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5530 			if (addrcnt < 1 || addrcnt > 128) {
5531 				printf("NFS devinfo addrcnt %d: out of range\n",
5532 				    addrcnt);
5533 				error = NFSERR_BADXDR;
5534 				goto nfsmout;
5535 			}
5536 
5537 			/*
5538 			 * Now we know how many stripe indices and addresses, so
5539 			 * we can allocate the structure the correct size.
5540 			 */
5541 			i = (stripecnt * sizeof(uint8_t)) /
5542 			    sizeof(struct nfsclds *) + 1;
5543 			NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5544 			ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5545 			    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5546 			    M_ZERO);
5547 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5548 			    NFSX_V4DEVICEID);
5549 			ndi->nfsdi_refcnt = 0;
5550 			ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5551 			ndi->nfsdi_stripecnt = stripecnt;
5552 			ndi->nfsdi_addrcnt = addrcnt;
5553 			/* Fill in the stripe indices. */
5554 			for (i = 0; i < stripecnt; i++) {
5555 				stripeindex = fxdr_unsigned(uint8_t, *tl++);
5556 				NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5557 				if (stripeindex >= addrcnt) {
5558 					printf("pNFS File Layout devinfo"
5559 					    " stripeindex %d: too big\n",
5560 					    (int)stripeindex);
5561 					error = NFSERR_BADXDR;
5562 					goto nfsmout;
5563 				}
5564 				nfsfldi_setstripeindex(ndi, i, stripeindex);
5565 			}
5566 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
5567 			/* For Flex File, we only get one address list. */
5568 			ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5569 			    M_NFSDEVINFO, M_WAITOK | M_ZERO);
5570 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5571 			    NFSX_V4DEVICEID);
5572 			ndi->nfsdi_refcnt = 0;
5573 			ndi->nfsdi_flags = NFSDI_FLEXFILE;
5574 			addrcnt = ndi->nfsdi_addrcnt = 1;
5575 		}
5576 
5577 		/* Now, dissect the server address(es). */
5578 		safilled = AF_UNSPEC;
5579 		for (i = 0; i < addrcnt; i++) {
5580 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5581 			cnt = fxdr_unsigned(uint32_t, *tl);
5582 			if (cnt == 0) {
5583 				printf("NFS devinfo 0 len addrlist\n");
5584 				error = NFSERR_BADXDR;
5585 				goto nfsmout;
5586 			}
5587 			dspp = nfsfldi_addr(ndi, i);
5588 			safilled = AF_UNSPEC;
5589 			for (j = 0; j < cnt; j++) {
5590 				error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5591 				    &isudp);
5592 				if (error != 0 && error != EPERM) {
5593 					error = NFSERR_BADXDR;
5594 					goto nfsmout;
5595 				}
5596 				if (error == 0 && isudp == 0) {
5597 					/*
5598 					 * The priority is:
5599 					 * - Same address family.
5600 					 * Save the address and dspp, so that
5601 					 * the connection can be done after
5602 					 * parsing is complete.
5603 					 */
5604 					if (safilled == AF_UNSPEC ||
5605 					    (af == nmp->nm_nam->sa_family &&
5606 					     safilled != nmp->nm_nam->sa_family)
5607 					   ) {
5608 						if (af == AF_INET)
5609 							ssin = sin;
5610 						else
5611 							ssin6 = sin6;
5612 						safilled = af;
5613 						gotdspp = dspp;
5614 					}
5615 				}
5616 			}
5617 		}
5618 
5619 		gotvers = NFS_VER4;	/* Default NFSv4.1 for File Layout. */
5620 		gotminor = NFSV41_MINORVERSION;
5621 		/* For Flex File, we will take one of the versions to use. */
5622 		if (layouttype == NFSLAYOUT_FLEXFILE) {
5623 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5624 			j = fxdr_unsigned(int, *tl);
5625 			if (j < 1 || j > NFSDEV_MAXVERS) {
5626 				printf("pNFS: too many versions\n");
5627 				error = NFSERR_BADXDR;
5628 				goto nfsmout;
5629 			}
5630 			gotvers = 0;
5631 			gotminor = 0;
5632 			for (i = 0; i < j; i++) {
5633 				NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5634 				vers = fxdr_unsigned(uint32_t, *tl++);
5635 				minorvers = fxdr_unsigned(uint32_t, *tl++);
5636 				if (vers == NFS_VER3)
5637 					minorvers = 0;
5638 				if ((vers == NFS_VER4 && ((minorvers ==
5639 				    NFSV41_MINORVERSION && gotminor == 0) ||
5640 				    minorvers == NFSV42_MINORVERSION)) ||
5641 				    (vers == NFS_VER3 && gotvers == 0)) {
5642 					gotvers = vers;
5643 					gotminor = minorvers;
5644 					/* We'll take this one. */
5645 					ndi->nfsdi_versindex = i;
5646 					ndi->nfsdi_vers = vers;
5647 					ndi->nfsdi_minorvers = minorvers;
5648 					ndi->nfsdi_rsize = fxdr_unsigned(
5649 					    uint32_t, *tl++);
5650 					ndi->nfsdi_wsize = fxdr_unsigned(
5651 					    uint32_t, *tl++);
5652 					if (*tl == newnfs_true)
5653 						ndi->nfsdi_flags |=
5654 						    NFSDI_TIGHTCOUPLED;
5655 					else
5656 						ndi->nfsdi_flags &=
5657 						    ~NFSDI_TIGHTCOUPLED;
5658 				}
5659 			}
5660 			if (gotvers == 0) {
5661 				printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5662 				error = NFSERR_BADXDR;
5663 				goto nfsmout;
5664 			}
5665 		}
5666 
5667 		/* And the notify bits. */
5668 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5669 		bitcnt = fxdr_unsigned(int, *tl);
5670 		if (bitcnt > 0) {
5671 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5672 			if (notifybitsp != NULL)
5673 				*notifybitsp =
5674 				    fxdr_unsigned(uint32_t, *tl);
5675 		}
5676 		if (safilled != AF_UNSPEC) {
5677 			KASSERT(ndi != NULL, ("ndi is NULL"));
5678 			*ndip = ndi;
5679 		} else
5680 			error = EPERM;
5681 		if (error == 0) {
5682 			/*
5683 			 * Now we can do a TCP connection for the correct
5684 			 * NFS version and IP address.
5685 			 */
5686 			error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5687 			    gotvers, gotminor, &dsp, p);
5688 		}
5689 		if (error == 0) {
5690 			KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5691 			*gotdspp = dsp;
5692 		}
5693 	}
5694 	if (nd->nd_repstat != 0 && error == 0)
5695 		error = nd->nd_repstat;
5696 nfsmout:
5697 	if (error != 0 && ndi != NULL)
5698 		nfscl_freedevinfo(ndi);
5699 	m_freem(nd->nd_mrep);
5700 	return (error);
5701 }
5702 
5703 /*
5704  * Do the NFSv4.1 LayoutCommit.
5705  */
5706 int
5707 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5708     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5709     int layouttype, struct ucred *cred, NFSPROC_T *p)
5710 {
5711 	uint32_t *tl;
5712 	struct nfsrv_descript nfsd, *nd = &nfsd;
5713 	int error;
5714 
5715 	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5716 	    0, 0, cred);
5717 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5718 	    NFSX_STATEID);
5719 	txdr_hyper(off, tl);
5720 	tl += 2;
5721 	txdr_hyper(len, tl);
5722 	tl += 2;
5723 	if (reclaim != 0)
5724 		*tl++ = newnfs_true;
5725 	else
5726 		*tl++ = newnfs_false;
5727 	*tl++ = txdr_unsigned(stateidp->seqid);
5728 	*tl++ = stateidp->other[0];
5729 	*tl++ = stateidp->other[1];
5730 	*tl++ = stateidp->other[2];
5731 	*tl++ = newnfs_true;
5732 	if (lastbyte < off)
5733 		lastbyte = off;
5734 	else if (lastbyte >= (off + len))
5735 		lastbyte = off + len - 1;
5736 	txdr_hyper(lastbyte, tl);
5737 	tl += 2;
5738 	*tl++ = newnfs_false;
5739 	*tl++ = txdr_unsigned(layouttype);
5740 	/* All supported layouts are 0 length. */
5741 	*tl = txdr_unsigned(0);
5742 	nd->nd_flag |= ND_USEGSSNAME;
5743 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5744 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5745 	if (error != 0)
5746 		return (error);
5747 	error = nd->nd_repstat;
5748 	m_freem(nd->nd_mrep);
5749 	return (error);
5750 }
5751 
5752 /*
5753  * Do the NFSv4.1 LayoutReturn.
5754  */
5755 int
5756 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5757     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5758     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5759     uint32_t stat, uint32_t op, char *devid)
5760 {
5761 	uint32_t *tl;
5762 	struct nfsrv_descript nfsd, *nd = &nfsd;
5763 	uint64_t tu64;
5764 	int error;
5765 
5766 	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5767 	    0, 0, cred);
5768 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5769 	if (reclaim != 0)
5770 		*tl++ = newnfs_true;
5771 	else
5772 		*tl++ = newnfs_false;
5773 	*tl++ = txdr_unsigned(layouttype);
5774 	*tl++ = txdr_unsigned(iomode);
5775 	*tl = txdr_unsigned(layoutreturn);
5776 	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5777 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5778 		    NFSX_UNSIGNED);
5779 		txdr_hyper(offset, tl);
5780 		tl += 2;
5781 		txdr_hyper(len, tl);
5782 		tl += 2;
5783 		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5784 		*tl++ = txdr_unsigned(stateidp->seqid);
5785 		*tl++ = stateidp->other[0];
5786 		*tl++ = stateidp->other[1];
5787 		*tl++ = stateidp->other[2];
5788 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5789 			*tl = txdr_unsigned(0);
5790 		else if (layouttype == NFSLAYOUT_FLEXFILE) {
5791 			if (stat != 0) {
5792 				*tl = txdr_unsigned(2 * NFSX_HYPER +
5793 				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5794 				    NFSX_UNSIGNED);
5795 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5796 				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5797 				    NFSX_UNSIGNED);
5798 				*tl++ = txdr_unsigned(1);	/* One error. */
5799 				tu64 = 0;			/* Offset. */
5800 				txdr_hyper(tu64, tl); tl += 2;
5801 				tu64 = UINT64_MAX;		/* Length. */
5802 				txdr_hyper(tu64, tl); tl += 2;
5803 				NFSBCOPY(stateidp, tl, NFSX_STATEID);
5804 				tl += (NFSX_STATEID / NFSX_UNSIGNED);
5805 				*tl++ = txdr_unsigned(1);	/* One error. */
5806 				NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5807 				tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5808 				*tl++ = txdr_unsigned(stat);
5809 				*tl++ = txdr_unsigned(op);
5810 			} else {
5811 				*tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5812 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5813 				/* No ioerrs. */
5814 				*tl++ = 0;
5815 			}
5816 			*tl = 0;	/* No stats yet. */
5817 		}
5818 	}
5819 	nd->nd_flag |= ND_USEGSSNAME;
5820 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5821 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5822 	if (error != 0)
5823 		return (error);
5824 	if (nd->nd_repstat == 0) {
5825 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5826 		if (*tl != 0) {
5827 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5828 			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5829 			stateidp->other[0] = *tl++;
5830 			stateidp->other[1] = *tl++;
5831 			stateidp->other[2] = *tl;
5832 		}
5833 	} else
5834 		error = nd->nd_repstat;
5835 nfsmout:
5836 	m_freem(nd->nd_mrep);
5837 	return (error);
5838 }
5839 
5840 /*
5841  * Do the NFSv4.2 LayoutError.
5842  */
5843 static int
5844 nfsrpc_layouterror(struct nfsmount *nmp, uint8_t *fh, int fhlen, uint64_t offset,
5845     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5846     uint32_t stat, uint32_t op, char *devid)
5847 {
5848 	uint32_t *tl;
5849 	struct nfsrv_descript nfsd, *nd = &nfsd;
5850 	int error;
5851 
5852 	nfscl_reqstart(nd, NFSPROC_LAYOUTERROR, nmp, fh, fhlen, NULL, NULL,
5853 	    0, 0, cred);
5854 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5855 	    NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5856 	txdr_hyper(offset, tl); tl += 2;
5857 	txdr_hyper(len, tl); tl += 2;
5858 	*tl++ = txdr_unsigned(stateidp->seqid);
5859 	*tl++ = stateidp->other[0];
5860 	*tl++ = stateidp->other[1];
5861 	*tl++ = stateidp->other[2];
5862 	*tl++ = txdr_unsigned(1);
5863 	NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5864 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5865 	*tl++ = txdr_unsigned(stat);
5866 	*tl = txdr_unsigned(op);
5867 	nd->nd_flag |= ND_USEGSSNAME;
5868 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5869 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5870 	if (error != 0)
5871 		return (error);
5872 	if (nd->nd_repstat != 0)
5873 		error = nd->nd_repstat;
5874 	m_freem(nd->nd_mrep);
5875 	return (error);
5876 }
5877 
5878 /*
5879  * Acquire a layout and devinfo, if possible. The caller must have acquired
5880  * a reference count on the nfsclclient structure before calling this.
5881  * Return the layout in lypp with a reference count on it, if successful.
5882  */
5883 static int
5884 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5885     int iomode, uint32_t rw, uint32_t *notifybitsp, nfsv4stateid_t *stateidp,
5886     uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5887 {
5888 	struct nfscllayout *lyp;
5889 	struct nfsclflayout *flp;
5890 	struct nfsclflayouthead flh;
5891 	int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5892 	nfsv4stateid_t stateid;
5893 	struct nfsclsession *tsep;
5894 
5895 	*lypp = NULL;
5896 	if (NFSHASFLEXFILE(nmp))
5897 		layouttype = NFSLAYOUT_FLEXFILE;
5898 	else
5899 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5900 	/*
5901 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5902 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5903 	 * flp == NULL.
5904 	 */
5905 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5906 	    off, rw, &flp, &recalled);
5907 	islocked = 0;
5908 	if (lyp == NULL || flp == NULL) {
5909 		if (recalled != 0)
5910 			return (EIO);
5911 		LIST_INIT(&flh);
5912 		tsep = nfsmnt_mdssession(nmp);
5913 		layoutlen = tsep->nfsess_maxcache -
5914 		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5915 		if (lyp == NULL) {
5916 			stateid.seqid = 0;
5917 			stateid.other[0] = stateidp->other[0];
5918 			stateid.other[1] = stateidp->other[1];
5919 			stateid.other[2] = stateidp->other[2];
5920 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5921 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5922 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5923 			    &retonclose, &flh, cred, p);
5924 		} else {
5925 			islocked = 1;
5926 			stateid.seqid = lyp->nfsly_stateid.seqid;
5927 			stateid.other[0] = lyp->nfsly_stateid.other[0];
5928 			stateid.other[1] = lyp->nfsly_stateid.other[1];
5929 			stateid.other[2] = lyp->nfsly_stateid.other[2];
5930 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5931 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
5932 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5933 			    &retonclose, &flh, cred, p);
5934 		}
5935 		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5936 		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5937 		    &flh, layouttype, error, NULL, cred, p);
5938 		if (error == 0)
5939 			*lypp = lyp;
5940 		else if (islocked != 0)
5941 			nfscl_rellayout(lyp, 1);
5942 	} else
5943 		*lypp = lyp;
5944 	return (error);
5945 }
5946 
5947 /*
5948  * Do a TCP connection plus exchange id and create session.
5949  * If successful, a "struct nfsclds" is linked into the list for the
5950  * mount point and a pointer to it is returned.
5951  */
5952 static int
5953 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5954     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5955     struct nfsclds **dspp, NFSPROC_T *p)
5956 {
5957 	struct sockaddr_in *msad, *sad;
5958 	struct sockaddr_in6 *msad6, *sad6;
5959 	struct nfsclclient *clp;
5960 	struct nfssockreq *nrp;
5961 	struct nfsclds *dsp, *tdsp;
5962 	int error, firsttry;
5963 	enum nfsclds_state retv;
5964 	uint32_t sequenceid = 0;
5965 
5966 	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5967 	    ("nfsrpc_fillsa: NULL nr_cred"));
5968 	NFSLOCKCLSTATE();
5969 	clp = nmp->nm_clp;
5970 	NFSUNLOCKCLSTATE();
5971 	if (clp == NULL)
5972 		return (EPERM);
5973 	if (af == AF_INET) {
5974 		NFSLOCKMNT(nmp);
5975 		/*
5976 		 * Check to see if we already have a session for this
5977 		 * address that is usable for a DS.
5978 		 * Note that the MDS's address is in a different place
5979 		 * than the sessions already acquired for DS's.
5980 		 */
5981 		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5982 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5983 		while (tdsp != NULL) {
5984 			if (msad != NULL && msad->sin_family == AF_INET &&
5985 			    sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5986 			    sin->sin_port == msad->sin_port &&
5987 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5988 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5989 				*dspp = tdsp;
5990 				NFSUNLOCKMNT(nmp);
5991 				NFSCL_DEBUG(4, "fnd same addr\n");
5992 				return (0);
5993 			}
5994 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5995 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5996 				msad = (struct sockaddr_in *)
5997 				    tdsp->nfsclds_sockp->nr_nam;
5998 			else
5999 				msad = NULL;
6000 		}
6001 		NFSUNLOCKMNT(nmp);
6002 
6003 		/* No IP address match, so look for new/trunked one. */
6004 		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
6005 		sad->sin_len = sizeof(*sad);
6006 		sad->sin_family = AF_INET;
6007 		sad->sin_port = sin->sin_port;
6008 		sad->sin_addr.s_addr = sin->sin_addr.s_addr;
6009 		if (NFSHASPNFS(nmp) && NFSHASKERB(nmp)) {
6010 			/* For pNFS, a separate server principal is needed. */
6011 			nrp = malloc(sizeof(*nrp) + NI_MAXSERV + NI_MAXHOST,
6012 			    M_NFSSOCKREQ, M_WAITOK | M_ZERO);
6013 			/*
6014 			 * Use the latter part of nr_srvprinc as a temporary
6015 			 * buffer for the IP address.
6016 			 */
6017 			inet_ntoa_r(sad->sin_addr,
6018 			    &nrp->nr_srvprinc[NI_MAXSERV]);
6019 			NFSCL_DEBUG(1, "nfsrpc_fillsa: DS IP=%s\n",
6020 			    &nrp->nr_srvprinc[NI_MAXSERV]);
6021 			if (!rpc_gss_ip_to_srv_principal_call(
6022 			    &nrp->nr_srvprinc[NI_MAXSERV], "nfs",
6023 			    nrp->nr_srvprinc))
6024 				nrp->nr_srvprinc[0] = '\0';
6025 			NFSCL_DEBUG(1, "nfsrpc_fillsa: srv principal=%s\n",
6026 			    nrp->nr_srvprinc);
6027 		} else
6028 			nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ,
6029 			    M_WAITOK | M_ZERO);
6030 		nrp->nr_nam = (struct sockaddr *)sad;
6031 	} else if (af == AF_INET6) {
6032 		NFSLOCKMNT(nmp);
6033 		/*
6034 		 * Check to see if we already have a session for this
6035 		 * address that is usable for a DS.
6036 		 * Note that the MDS's address is in a different place
6037 		 * than the sessions already acquired for DS's.
6038 		 */
6039 		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
6040 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
6041 		while (tdsp != NULL) {
6042 			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
6043 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6044 			    &msad6->sin6_addr) &&
6045 			    sin6->sin6_port == msad6->sin6_port &&
6046 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
6047 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
6048 				*dspp = tdsp;
6049 				NFSUNLOCKMNT(nmp);
6050 				return (0);
6051 			}
6052 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
6053 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
6054 				msad6 = (struct sockaddr_in6 *)
6055 				    tdsp->nfsclds_sockp->nr_nam;
6056 			else
6057 				msad6 = NULL;
6058 		}
6059 		NFSUNLOCKMNT(nmp);
6060 
6061 		/* No IP address match, so look for new/trunked one. */
6062 		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
6063 		sad6->sin6_len = sizeof(*sad6);
6064 		sad6->sin6_family = AF_INET6;
6065 		sad6->sin6_port = sin6->sin6_port;
6066 		NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
6067 		    sizeof(struct in6_addr));
6068 		if (NFSHASPNFS(nmp) && NFSHASKERB(nmp)) {
6069 			/* For pNFS, a separate server principal is needed. */
6070 			nrp = malloc(sizeof(*nrp) + NI_MAXSERV + NI_MAXHOST,
6071 			    M_NFSSOCKREQ, M_WAITOK | M_ZERO);
6072 			/*
6073 			 * Use the latter part of nr_srvprinc as a temporary
6074 			 * buffer for the IP address.
6075 			 */
6076 			inet_ntop(AF_INET6, &sad6->sin6_addr,
6077 			    &nrp->nr_srvprinc[NI_MAXSERV], NI_MAXHOST);
6078 			NFSCL_DEBUG(1, "nfsrpc_fillsa: DS IP=%s\n",
6079 			    &nrp->nr_srvprinc[NI_MAXSERV]);
6080 			if (!rpc_gss_ip_to_srv_principal_call(
6081 			    &nrp->nr_srvprinc[NI_MAXSERV], "nfs",
6082 			    nrp->nr_srvprinc))
6083 				nrp->nr_srvprinc[0] = '\0';
6084 			NFSCL_DEBUG(1, "nfsrpc_fillsa: srv principal=%s\n",
6085 			    nrp->nr_srvprinc);
6086 		} else
6087 			nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ,
6088 			    M_WAITOK | M_ZERO);
6089 		nrp->nr_nam = (struct sockaddr *)sad6;
6090 	} else
6091 		return (EPERM);
6092 
6093 	nrp->nr_sotype = SOCK_STREAM;
6094 	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
6095 	nrp->nr_prog = NFS_PROG;
6096 	nrp->nr_vers = vers;
6097 
6098 	/*
6099 	 * Use the credentials that were used for the mount, which are
6100 	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
6101 	 * Ref. counting the credentials with crhold() is probably not
6102 	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
6103 	 * unmount, but I did it anyhow.
6104 	 */
6105 	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
6106 	error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client);
6107 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
6108 
6109 	dsp = NULL;
6110 	/* Now, do the exchangeid and create session. */
6111 	if (error == 0) {
6112 		if (vers == NFS_VER4) {
6113 			firsttry = 0;
6114 			do {
6115 				error = nfsrpc_exchangeid(nmp, clp, nrp,
6116 				    minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
6117 				    nrp->nr_cred, p);
6118 				NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
6119 				if (error == NFSERR_MINORVERMISMATCH)
6120 					minorvers = NFSV42_MINORVERSION;
6121 			} while (error == NFSERR_MINORVERMISMATCH &&
6122 			    firsttry++ == 0);
6123 			if (error != 0)
6124 				newnfs_disconnect(NULL, nrp);
6125 		} else {
6126 			dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
6127 			    M_WAITOK | M_ZERO);
6128 			dsp->nfsclds_flags |= NFSCLDS_DS;
6129 			dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
6130 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
6131 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
6132 			    NULL, MTX_DEF);
6133 		}
6134 	}
6135 	if (error == 0) {
6136 		dsp->nfsclds_sockp = nrp;
6137 		if (vers == NFS_VER4) {
6138 			NFSLOCKMNT(nmp);
6139 			retv = nfscl_getsameserver(nmp, dsp, &tdsp,
6140 			    &sequenceid);
6141 			NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
6142 			if (retv == NFSDSP_USETHISSESSION &&
6143 			    nfscl_dssameconn != 0) {
6144 				NFSLOCKDS(tdsp);
6145 				tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
6146 				NFSUNLOCKDS(tdsp);
6147 				NFSUNLOCKMNT(nmp);
6148 				/*
6149 				 * If there is already a session for this
6150 				 * server, use it.
6151 				 */
6152 				newnfs_disconnect(NULL, nrp);
6153 				nfscl_freenfsclds(dsp);
6154 				*dspp = tdsp;
6155 				return (0);
6156 			}
6157 			if (retv == NFSDSP_NOTFOUND)
6158 				sequenceid =
6159 				    dsp->nfsclds_sess.nfsess_sequenceid;
6160 			NFSUNLOCKMNT(nmp);
6161 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
6162 			    nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
6163 			NFSCL_DEBUG(3, "DS createsess=%d\n", error);
6164 		}
6165 	} else {
6166 		NFSFREECRED(nrp->nr_cred);
6167 		NFSFREEMUTEX(&nrp->nr_mtx);
6168 		free(nrp->nr_nam, M_SONAME);
6169 		free(nrp, M_NFSSOCKREQ);
6170 	}
6171 	if (error == 0) {
6172 		NFSCL_DEBUG(3, "add DS session\n");
6173 		/*
6174 		 * Put it at the end of the list. That way the list
6175 		 * is ordered by when the entry was added. This matters
6176 		 * since the one done first is the one that should be
6177 		 * used for sequencid'ing any subsequent create sessions.
6178 		 */
6179 		NFSLOCKMNT(nmp);
6180 		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
6181 		NFSUNLOCKMNT(nmp);
6182 		*dspp = dsp;
6183 	} else if (dsp != NULL) {
6184 		newnfs_disconnect(NULL, nrp);
6185 		nfscl_freenfsclds(dsp);
6186 	}
6187 	return (error);
6188 }
6189 
6190 /*
6191  * Do the NFSv4.1 Reclaim Complete.
6192  */
6193 int
6194 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
6195 {
6196 	uint32_t *tl;
6197 	struct nfsrv_descript nfsd;
6198 	struct nfsrv_descript *nd = &nfsd;
6199 	int error;
6200 
6201 	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
6202 	    0, cred);
6203 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6204 	*tl = newnfs_false;
6205 	nd->nd_flag |= ND_USEGSSNAME;
6206 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6207 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6208 	if (error != 0)
6209 		return (error);
6210 	error = nd->nd_repstat;
6211 	m_freem(nd->nd_mrep);
6212 	return (error);
6213 }
6214 
6215 /*
6216  * Initialize the slot tables for a session.
6217  */
6218 static void
6219 nfscl_initsessionslots(struct nfsclsession *sep)
6220 {
6221 	int i;
6222 
6223 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
6224 		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
6225 			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
6226 		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
6227 	}
6228 	for (i = 0; i < 64; i++)
6229 		sep->nfsess_slotseq[i] = 0;
6230 	sep->nfsess_slots = 0;
6231 	sep->nfsess_badslots = 0;
6232 }
6233 
6234 /*
6235  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
6236  */
6237 int
6238 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6239     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
6240 {
6241 	struct nfsnode *np = VTONFS(vp);
6242 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6243 	struct nfscllayout *layp;
6244 	struct nfscldevinfo *dip;
6245 	struct nfsclflayout *rflp;
6246 	struct mbuf *m, *m2;
6247 	struct nfsclwritedsdorpc *drpc, *tdrpc;
6248 	nfsv4stateid_t stateid;
6249 	struct ucred *newcred;
6250 	uint64_t lastbyte, len, off, oresid, xfer;
6251 	int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
6252 	void *lckp;
6253 	uint8_t *dev;
6254 	void *iovbase = NULL;
6255 	size_t iovlen = 0;
6256 	off_t offs = 0;
6257 	ssize_t resid = 0;
6258 	uint32_t op;
6259 
6260 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
6261 	    (np->n_flag & NNOLAYOUT) != 0)
6262 		return (EIO);
6263 	/* Now, get a reference cnt on the clientid for this mount. */
6264 	if (nfscl_getref(nmp) == 0)
6265 		return (EIO);
6266 
6267 	/* Find an appropriate stateid. */
6268 	newcred = NFSNEWCRED(cred);
6269 	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
6270 	    rwaccess, 1, newcred, p, &stateid, &lckp);
6271 	if (error != 0) {
6272 		NFSFREECRED(newcred);
6273 		nfscl_relref(nmp);
6274 		return (error);
6275 	}
6276 	/* Search for a layout for this file. */
6277 	off = uiop->uio_offset;
6278 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
6279 	    np->n_fhp->nfh_len, off, rwaccess, &rflp, &recalled);
6280 	if (layp == NULL || rflp == NULL) {
6281 		if (recalled != 0) {
6282 			NFSFREECRED(newcred);
6283 			if (lckp != NULL)
6284 				nfscl_lockderef(lckp);
6285 			nfscl_relref(nmp);
6286 			return (EIO);
6287 		}
6288 		if (layp != NULL) {
6289 			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
6290 			layp = NULL;
6291 		}
6292 		/* Try and get a Layout, if it is supported. */
6293 		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
6294 		    (np->n_flag & NWRITEOPENED) != 0)
6295 			iolaymode = NFSLAYOUTIOMODE_RW;
6296 		else
6297 			iolaymode = NFSLAYOUTIOMODE_READ;
6298 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
6299 		    rwaccess, NULL, &stateid, off, &layp, newcred, p);
6300 		if (error != 0) {
6301 			NFSLOCKNODE(np);
6302 			np->n_flag |= NNOLAYOUT;
6303 			NFSUNLOCKNODE(np);
6304 			if (lckp != NULL)
6305 				nfscl_lockderef(lckp);
6306 			NFSFREECRED(newcred);
6307 			if (layp != NULL)
6308 				nfscl_rellayout(layp, 0);
6309 			nfscl_relref(nmp);
6310 			return (error);
6311 		}
6312 	}
6313 
6314 	/*
6315 	 * Loop around finding a layout that works for the first part of
6316 	 * this I/O operation, and then call the function that actually
6317 	 * does the RPC.
6318 	 */
6319 	eof = 0;
6320 	len = (uint64_t)uiop->uio_resid;
6321 	while (len > 0 && error == 0 && eof == 0) {
6322 		off = uiop->uio_offset;
6323 		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
6324 		if (error == 0) {
6325 			oresid = xfer = (uint64_t)uiop->uio_resid;
6326 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
6327 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
6328 			/*
6329 			 * For Flex File layout with mirrored DSs, select one
6330 			 * of them at random for reads. For writes and commits,
6331 			 * do all mirrors.
6332 			 */
6333 			m = NULL;
6334 			tdrpc = drpc = NULL;
6335 			firstmirror = 0;
6336 			mirrorcnt = 1;
6337 			if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
6338 			    (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
6339 				if (rwaccess == NFSV4OPEN_ACCESSREAD) {
6340 					firstmirror = arc4random() % mirrorcnt;
6341 					mirrorcnt = firstmirror + 1;
6342 				} else {
6343 					if (docommit == 0) {
6344 						/*
6345 						 * Save values, so uiop can be
6346 						 * rolled back upon a write
6347 						 * error.
6348 						 */
6349 						offs = uiop->uio_offset;
6350 						resid = uiop->uio_resid;
6351 						iovbase =
6352 						    uiop->uio_iov->iov_base;
6353 						iovlen = uiop->uio_iov->iov_len;
6354 						m = nfsm_uiombuflist(uiop, len,
6355 						    0);
6356 						if (m == NULL) {
6357 							error = EFAULT;
6358 							break;
6359 						}
6360 					}
6361 					tdrpc = drpc = malloc(sizeof(*drpc) *
6362 					    (mirrorcnt - 1), M_TEMP, M_WAITOK |
6363 					    M_ZERO);
6364 				}
6365 			}
6366 			for (i = firstmirror; i < mirrorcnt && error == 0; i++){
6367 				m2 = NULL;
6368 				if (m != NULL && i < mirrorcnt - 1)
6369 					m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
6370 				else {
6371 					m2 = m;
6372 					m = NULL;
6373 				}
6374 				if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
6375 					dev = rflp->nfsfl_ffm[i].dev;
6376 					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6377 					    rflp->nfsfl_ffm[i].devp);
6378 				} else {
6379 					dev = rflp->nfsfl_dev;
6380 					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6381 					    rflp->nfsfl_devp);
6382 				}
6383 				if (dip != NULL) {
6384 					if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
6385 					    != 0)
6386 						error = nfscl_dofflayoutio(vp,
6387 						    uiop, iomode, must_commit,
6388 						    &eof, &stateid, rwaccess,
6389 						    dip, layp, rflp, off, xfer,
6390 						    i, docommit, m2, tdrpc,
6391 						    newcred, p);
6392 					else
6393 						error = nfscl_doflayoutio(vp,
6394 						    uiop, iomode, must_commit,
6395 						    &eof, &stateid, rwaccess,
6396 						    dip, layp, rflp, off, xfer,
6397 						    docommit, newcred, p);
6398 					nfscl_reldevinfo(dip);
6399 				} else {
6400 					if (m2 != NULL)
6401 						m_freem(m2);
6402 					error = EIO;
6403 				}
6404 				tdrpc++;
6405 			}
6406 			if (m != NULL)
6407 				m_freem(m);
6408 			tdrpc = drpc;
6409 			timo = hz / 50;		/* Wait for 20msec. */
6410 			if (timo < 1)
6411 				timo = 1;
6412 			for (i = firstmirror; i < mirrorcnt - 1 &&
6413 			    tdrpc != NULL; i++, tdrpc++) {
6414 				/*
6415 				 * For the unused drpc entries, both inprog and
6416 				 * err == 0, so this loop won't break.
6417 				 */
6418 				while (tdrpc->inprog != 0 && tdrpc->done == 0)
6419 					tsleep(&tdrpc->tsk, PVFS, "clrpcio",
6420 					    timo);
6421 				if (error == 0 && tdrpc->err != 0)
6422 					error = tdrpc->err;
6423 				if (rwaccess != NFSV4OPEN_ACCESSREAD &&
6424 				    docommit == 0 && *must_commit == 0 &&
6425 				    tdrpc->must_commit == 1)
6426 					*must_commit = 1;
6427 			}
6428 			free(drpc, M_TEMP);
6429 			if (error == 0) {
6430 				if (mirrorcnt > 1 && rwaccess ==
6431 				    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6432 					NFSLOCKCLSTATE();
6433 					layp->nfsly_flags |= NFSLY_WRITTEN;
6434 					NFSUNLOCKCLSTATE();
6435 				}
6436 				lastbyte = off + xfer - 1;
6437 				NFSLOCKCLSTATE();
6438 				if (lastbyte > layp->nfsly_lastbyte)
6439 					layp->nfsly_lastbyte = lastbyte;
6440 				NFSUNLOCKCLSTATE();
6441 			} else if (error == NFSERR_OPENMODE &&
6442 			    rwaccess == NFSV4OPEN_ACCESSREAD) {
6443 				NFSLOCKMNT(nmp);
6444 				nmp->nm_state |= NFSSTA_OPENMODE;
6445 				NFSUNLOCKMNT(nmp);
6446 			} else if ((error == NFSERR_NOSPC ||
6447 			    error == NFSERR_IO || error == NFSERR_NXIO) &&
6448 			    nmp->nm_minorvers == NFSV42_MINORVERSION) {
6449 				if (docommit != 0)
6450 					op = NFSV4OP_COMMIT;
6451 				else if (rwaccess == NFSV4OPEN_ACCESSREAD)
6452 					op = NFSV4OP_READ;
6453 				else
6454 					op = NFSV4OP_WRITE;
6455 				nfsrpc_layouterror(nmp, np->n_fhp->nfh_fh,
6456 				    np->n_fhp->nfh_len, off, xfer,
6457 				    &layp->nfsly_stateid, newcred, p, error, op,
6458 				    dip->nfsdi_deviceid);
6459 				error = EIO;
6460 			} else
6461 				error = EIO;
6462 			if (error == 0)
6463 				len -= (oresid - (uint64_t)uiop->uio_resid);
6464 			else if (mirrorcnt > 1 && rwaccess ==
6465 			    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6466 				/*
6467 				 * In case the rpc gets retried, roll the
6468 				 * uio fields changed by nfsm_uiombuflist()
6469 				 * back.
6470 				 */
6471 				uiop->uio_offset = offs;
6472 				uiop->uio_resid = resid;
6473 				uiop->uio_iov->iov_base = iovbase;
6474 				uiop->uio_iov->iov_len = iovlen;
6475 			}
6476 		}
6477 	}
6478 	if (lckp != NULL)
6479 		nfscl_lockderef(lckp);
6480 	NFSFREECRED(newcred);
6481 	nfscl_rellayout(layp, 0);
6482 	nfscl_relref(nmp);
6483 	return (error);
6484 }
6485 
6486 /*
6487  * Find a file layout that will handle the first bytes of the requested
6488  * range and return the information from it needed to the I/O operation.
6489  */
6490 int
6491 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
6492     struct nfsclflayout **retflpp)
6493 {
6494 	struct nfsclflayout *flp, *nflp, *rflp;
6495 	uint32_t rw;
6496 
6497 	rflp = NULL;
6498 	rw = rwaccess;
6499 	/* For reading, do the Read list first and then the Write list. */
6500 	do {
6501 		if (rw == NFSV4OPEN_ACCESSREAD)
6502 			flp = LIST_FIRST(&lyp->nfsly_flayread);
6503 		else
6504 			flp = LIST_FIRST(&lyp->nfsly_flayrw);
6505 		while (flp != NULL) {
6506 			nflp = LIST_NEXT(flp, nfsfl_list);
6507 			if (flp->nfsfl_off > off)
6508 				break;
6509 			if (flp->nfsfl_end > off &&
6510 			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
6511 				rflp = flp;
6512 			flp = nflp;
6513 		}
6514 		if (rw == NFSV4OPEN_ACCESSREAD)
6515 			rw = NFSV4OPEN_ACCESSWRITE;
6516 		else
6517 			rw = 0;
6518 	} while (rw != 0);
6519 	if (rflp != NULL) {
6520 		/* This one covers the most bytes starting at off. */
6521 		*retflpp = rflp;
6522 		return (0);
6523 	}
6524 	return (EIO);
6525 }
6526 
6527 /*
6528  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6529  */
6530 static int
6531 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6532     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6533     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6534     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6535 {
6536 	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6537 	int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6538 	struct nfsnode *np;
6539 	struct nfsfh *fhp;
6540 	struct nfsclds **dspp;
6541 
6542 	np = VTONFS(vp);
6543 	rel_off = off - flp->nfsfl_patoff;
6544 	stripe_unit_size = flp->nfsfl_util & NFSFLAYUTIL_STRIPE_MASK;
6545 	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6546 	    dp->nfsdi_stripecnt;
6547 	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6548 	error = 0;
6549 
6550 	/* Loop around, doing I/O for each stripe unit. */
6551 	while (len > 0 && error == 0) {
6552 		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6553 		dspp = nfsfldi_addr(dp, stripe_index);
6554 		if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6555 			minorvers = NFSV42_MINORVERSION;
6556 		else
6557 			minorvers = NFSV41_MINORVERSION;
6558 		if (len > transfer && docommit == 0)
6559 			xfer = transfer;
6560 		else
6561 			xfer = len;
6562 		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6563 			/* Dense layout. */
6564 			if (stripe_pos >= flp->nfsfl_fhcnt)
6565 				return (EIO);
6566 			fhp = flp->nfsfl_fh[stripe_pos];
6567 			io_off = (rel_off / (stripe_unit_size *
6568 			    dp->nfsdi_stripecnt)) * stripe_unit_size +
6569 			    rel_off % stripe_unit_size;
6570 		} else {
6571 			/* Sparse layout. */
6572 			if (flp->nfsfl_fhcnt > 1) {
6573 				if (stripe_index >= flp->nfsfl_fhcnt)
6574 					return (EIO);
6575 				fhp = flp->nfsfl_fh[stripe_index];
6576 			} else if (flp->nfsfl_fhcnt == 1)
6577 				fhp = flp->nfsfl_fh[0];
6578 			else
6579 				fhp = np->n_fhp;
6580 			io_off = off;
6581 		}
6582 		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6583 			commit_thru_mds = 1;
6584 			if (docommit != 0)
6585 				error = EIO;
6586 		} else {
6587 			commit_thru_mds = 0;
6588 			NFSLOCKNODE(np);
6589 			np->n_flag |= NDSCOMMIT;
6590 			NFSUNLOCKNODE(np);
6591 		}
6592 		if (docommit != 0) {
6593 			if (error == 0)
6594 				error = nfsrpc_commitds(vp, io_off, xfer,
6595 				    *dspp, fhp, NFS_VER4, minorvers, cred, p);
6596 			if (error == 0) {
6597 				/*
6598 				 * Set both eof and uio_resid = 0 to end any
6599 				 * loops.
6600 				 */
6601 				*eofp = 1;
6602 				uiop->uio_resid = 0;
6603 			} else {
6604 				NFSLOCKNODE(np);
6605 				np->n_flag &= ~NDSCOMMIT;
6606 				NFSUNLOCKNODE(np);
6607 			}
6608 		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
6609 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6610 			    io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6611 		else {
6612 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6613 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6614 			    0, NFS_VER4, minorvers, cred, p);
6615 			if (error == 0) {
6616 				NFSLOCKCLSTATE();
6617 				lyp->nfsly_flags |= NFSLY_WRITTEN;
6618 				NFSUNLOCKCLSTATE();
6619 			}
6620 		}
6621 		if (error == 0) {
6622 			transfer = stripe_unit_size;
6623 			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6624 			len -= xfer;
6625 			off += xfer;
6626 		}
6627 	}
6628 	return (error);
6629 }
6630 
6631 /*
6632  * Do I/O using an NFSv4.1 flex file layout.
6633  */
6634 static int
6635 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6636     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6637     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6638     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6639     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6640 {
6641 	uint64_t xfer;
6642 	int error;
6643 	struct nfsnode *np;
6644 	struct nfsfh *fhp;
6645 	struct nfsclds **dspp;
6646 	struct ucred *tcred;
6647 	struct mbuf *m, *m2;
6648 	uint32_t copylen;
6649 
6650 	np = VTONFS(vp);
6651 	error = 0;
6652 	NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6653 	    (uintmax_t)len);
6654 	/* Loop around, doing I/O for each stripe unit. */
6655 	while (len > 0 && error == 0) {
6656 		dspp = nfsfldi_addr(dp, 0);
6657 		fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6658 		stateidp = &flp->nfsfl_ffm[mirror].st;
6659 		NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6660 		    mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6661 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6662 			tcred = NFSNEWCRED(cred);
6663 			tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6664 			tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6665 			tcred->cr_ngroups = 1;
6666 		} else
6667 			tcred = cred;
6668 		if (rwflag == NFSV4OPEN_ACCESSREAD)
6669 			copylen = dp->nfsdi_rsize;
6670 		else {
6671 			copylen = dp->nfsdi_wsize;
6672 			if (len > copylen && mp != NULL) {
6673 				/*
6674 				 * When a mirrored configuration needs to do
6675 				 * multiple writes to each mirror, all writes
6676 				 * except the last one must be a multiple of
6677 				 * 4 bytes.  This is required so that the XDR
6678 				 * does not need padding.
6679 				 * If possible, clip the size to an exact
6680 				 * multiple of the mbuf length, so that the
6681 				 * split will be on an mbuf boundary.
6682 				 */
6683 				copylen &= 0xfffffffc;
6684 				if (copylen > mp->m_len)
6685 					copylen = copylen / mp->m_len *
6686 					    mp->m_len;
6687 			}
6688 		}
6689 		NFSLOCKNODE(np);
6690 		np->n_flag |= NDSCOMMIT;
6691 		NFSUNLOCKNODE(np);
6692 		if (len > copylen && docommit == 0)
6693 			xfer = copylen;
6694 		else
6695 			xfer = len;
6696 		if (docommit != 0) {
6697 			if (error == 0) {
6698 				/*
6699 				 * Do last mirrored DS commit with this thread.
6700 				 */
6701 				if (mirror < flp->nfsfl_mirrorcnt - 1)
6702 					error = nfsio_commitds(vp, off, xfer,
6703 					    *dspp, fhp, dp->nfsdi_vers,
6704 					    dp->nfsdi_minorvers, drpc, tcred,
6705 					    p);
6706 				else
6707 					error = nfsrpc_commitds(vp, off, xfer,
6708 					    *dspp, fhp, dp->nfsdi_vers,
6709 					    dp->nfsdi_minorvers, tcred, p);
6710 				NFSCL_DEBUG(4, "commitds=%d\n", error);
6711 				if (error != 0 && error != EACCES && error !=
6712 				    ESTALE) {
6713 					NFSCL_DEBUG(4,
6714 					    "DS layreterr for commit\n");
6715 					nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6716 					    lyp, *dspp);
6717 				}
6718 			}
6719 			NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6720 			if (error == 0) {
6721 				/*
6722 				 * Set both eof and uio_resid = 0 to end any
6723 				 * loops.
6724 				 */
6725 				*eofp = 1;
6726 				uiop->uio_resid = 0;
6727 			} else {
6728 				NFSLOCKNODE(np);
6729 				np->n_flag &= ~NDSCOMMIT;
6730 				NFSUNLOCKNODE(np);
6731 			}
6732 		} else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6733 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6734 			    off, xfer, fhp, 1, dp->nfsdi_vers,
6735 			    dp->nfsdi_minorvers, tcred, p);
6736 			NFSCL_DEBUG(4, "readds=%d\n", error);
6737 			if (error != 0 && error != EACCES && error != ESTALE) {
6738 				NFSCL_DEBUG(4, "DS layreterr for read\n");
6739 				nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6740 				    *dspp);
6741 			}
6742 		} else {
6743 			if (flp->nfsfl_mirrorcnt == 1) {
6744 				error = nfsrpc_writeds(vp, uiop, iomode,
6745 				    must_commit, stateidp, *dspp, off, xfer,
6746 				    fhp, 0, 1, dp->nfsdi_vers,
6747 				    dp->nfsdi_minorvers, tcred, p);
6748 				if (error == 0) {
6749 					NFSLOCKCLSTATE();
6750 					lyp->nfsly_flags |= NFSLY_WRITTEN;
6751 					NFSUNLOCKCLSTATE();
6752 				}
6753 			} else {
6754 				m = mp;
6755 				if (xfer < len) {
6756 					/* The mbuf list must be split. */
6757 					m2 = nfsm_split(mp, xfer);
6758 					if (m2 != NULL)
6759 						mp = m2;
6760 					else {
6761 						m_freem(mp);
6762 						error = EIO;
6763 					}
6764 				}
6765 				NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6766 				    (uintmax_t)len, (uintmax_t)xfer);
6767 				/*
6768 				 * Do last write to a mirrored DS with this
6769 				 * thread.
6770 				 */
6771 				if (error == 0) {
6772 					if (mirror < flp->nfsfl_mirrorcnt - 1)
6773 						error = nfsio_writedsmir(vp,
6774 						    iomode, must_commit,
6775 						    stateidp, *dspp, off,
6776 						    xfer, fhp, m,
6777 						    dp->nfsdi_vers,
6778 						    dp->nfsdi_minorvers, drpc,
6779 						    tcred, p);
6780 					else
6781 						error = nfsrpc_writedsmir(vp,
6782 						    iomode, must_commit,
6783 						    stateidp, *dspp, off,
6784 						    xfer, fhp, m,
6785 						    dp->nfsdi_vers,
6786 						    dp->nfsdi_minorvers, tcred,
6787 						    p);
6788 				}
6789 				NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6790 				if (error != 0 && error != EACCES && error !=
6791 				    ESTALE) {
6792 					NFSCL_DEBUG(4,
6793 					    "DS layreterr for write\n");
6794 					nfscl_dserr(NFSV4OP_WRITE, error, dp,
6795 					    lyp, *dspp);
6796 				}
6797 			}
6798 		}
6799 		NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6800 		if (error == 0) {
6801 			len -= xfer;
6802 			off += xfer;
6803 		}
6804 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6805 			NFSFREECRED(tcred);
6806 	}
6807 	NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6808 	return (error);
6809 }
6810 
6811 /*
6812  * The actual read RPC done to a DS.
6813  */
6814 static int
6815 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6816     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6817     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6818 {
6819 	uint32_t *tl;
6820 	int attrflag, error, retlen;
6821 	struct nfsrv_descript nfsd;
6822 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6823 	struct nfsrv_descript *nd = &nfsd;
6824 	struct nfssockreq *nrp;
6825 	struct nfsvattr na;
6826 
6827 	nd->nd_mrep = NULL;
6828 	if (vers == 0 || vers == NFS_VER4) {
6829 		nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6830 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6831 		    NULL);
6832 		vers = NFS_VER4;
6833 		NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6834 		if (flex != 0)
6835 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6836 		else
6837 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6838 	} else {
6839 		nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6840 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6841 		    NULL);
6842 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READ]);
6843 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READDS]);
6844 		NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6845 	}
6846 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6847 	txdr_hyper(io_off, tl);
6848 	*(tl + 2) = txdr_unsigned(len);
6849 	nrp = dsp->nfsclds_sockp;
6850 	NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6851 	if (nrp == NULL)
6852 		/* If NULL, use the MDS socket. */
6853 		nrp = &nmp->nm_sockreq;
6854 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6855 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6856 	NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6857 	    error);
6858 	if (error != 0)
6859 		return (error);
6860 	if (vers == NFS_VER3) {
6861 		error = nfscl_postop_attr(nd, &na, &attrflag);
6862 		NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6863 		if (error != 0)
6864 			goto nfsmout;
6865 	}
6866 	if (nd->nd_repstat != 0) {
6867 		error = nd->nd_repstat;
6868 		goto nfsmout;
6869 	}
6870 	if (vers == NFS_VER3) {
6871 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6872 		*eofp = fxdr_unsigned(int, *(tl + 1));
6873 	} else {
6874 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6875 		*eofp = fxdr_unsigned(int, *tl);
6876 	}
6877 	NFSM_STRSIZ(retlen, len);
6878 	NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6879 	error = nfsm_mbufuio(nd, uiop, retlen);
6880 nfsmout:
6881 	if (nd->nd_mrep != NULL)
6882 		m_freem(nd->nd_mrep);
6883 	return (error);
6884 }
6885 
6886 /*
6887  * The actual write RPC done to a DS.
6888  */
6889 static int
6890 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6891     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6892     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6893     struct ucred *cred, NFSPROC_T *p)
6894 {
6895 	uint32_t *tl;
6896 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6897 	int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6898 	int32_t backup;
6899 	struct nfsrv_descript nfsd;
6900 	struct nfsrv_descript *nd = &nfsd;
6901 	struct nfssockreq *nrp;
6902 	struct nfsvattr na;
6903 
6904 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6905 	nd->nd_mrep = NULL;
6906 	if (vers == 0 || vers == NFS_VER4) {
6907 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6908 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6909 		    NULL);
6910 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6911 		vers = NFS_VER4;
6912 		if (flex != 0)
6913 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6914 		else
6915 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6916 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6917 	} else {
6918 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6919 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6920 		    NULL);
6921 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6922 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6923 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6924 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6925 	}
6926 	txdr_hyper(io_off, tl);
6927 	tl += 2;
6928 	if (vers == NFS_VER3)
6929 		*tl++ = txdr_unsigned(len);
6930 	*tl++ = txdr_unsigned(*iomode);
6931 	*tl = txdr_unsigned(len);
6932 	error = nfsm_uiombuf(nd, uiop, len);
6933 	if (error != 0) {
6934 		m_freem(nd->nd_mreq);
6935 		return (error);
6936 	}
6937 	nrp = dsp->nfsclds_sockp;
6938 	if (nrp == NULL)
6939 		/* If NULL, use the MDS socket. */
6940 		nrp = &nmp->nm_sockreq;
6941 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6942 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6943 	NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6944 	    nd->nd_repstat);
6945 	if (error != 0)
6946 		return (error);
6947 	if (nd->nd_repstat != 0) {
6948 		/*
6949 		 * In case the rpc gets retried, roll
6950 		 * the uio fields changed by nfsm_uiombuf()
6951 		 * back.
6952 		 */
6953 		uiop->uio_offset -= len;
6954 		uiop->uio_resid += len;
6955 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6956 		uiop->uio_iov->iov_len += len;
6957 		error = nd->nd_repstat;
6958 	} else {
6959 		if (vers == NFS_VER3) {
6960 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6961 			    NULL);
6962 			NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6963 			if (error != 0)
6964 				goto nfsmout;
6965 		}
6966 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6967 		rlen = fxdr_unsigned(int, *tl++);
6968 		NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6969 		if (rlen == 0) {
6970 			error = NFSERR_IO;
6971 			goto nfsmout;
6972 		} else if (rlen < len) {
6973 			backup = len - rlen;
6974 			uiop->uio_iov->iov_base =
6975 			    (char *)uiop->uio_iov->iov_base - backup;
6976 			uiop->uio_iov->iov_len += backup;
6977 			uiop->uio_offset -= backup;
6978 			uiop->uio_resid += backup;
6979 			len = rlen;
6980 		}
6981 		commit = fxdr_unsigned(int, *tl++);
6982 
6983 		/*
6984 		 * Return the lowest commitment level
6985 		 * obtained by any of the RPCs.
6986 		 */
6987 		if (committed == NFSWRITE_FILESYNC)
6988 			committed = commit;
6989 		else if (committed == NFSWRITE_DATASYNC &&
6990 		    commit == NFSWRITE_UNSTABLE)
6991 			committed = commit;
6992 		if (commit_thru_mds != 0) {
6993 			NFSLOCKMNT(nmp);
6994 			if (!NFSHASWRITEVERF(nmp)) {
6995 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6996 				NFSSETWRITEVERF(nmp);
6997 			} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF) &&
6998 			    *must_commit != 2) {
6999 				*must_commit = 1;
7000 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
7001 			}
7002 			NFSUNLOCKMNT(nmp);
7003 		} else {
7004 			NFSLOCKDS(dsp);
7005 			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
7006 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7007 				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
7008 			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
7009 			    *must_commit != 2) {
7010 				*must_commit = 1;
7011 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7012 			}
7013 			NFSUNLOCKDS(dsp);
7014 		}
7015 	}
7016 nfsmout:
7017 	if (nd->nd_mrep != NULL)
7018 		m_freem(nd->nd_mrep);
7019 	*iomode = committed;
7020 	if (nd->nd_repstat != 0 && error == 0)
7021 		error = nd->nd_repstat;
7022 	return (error);
7023 }
7024 
7025 /*
7026  * The actual write RPC done to a DS.
7027  * This variant is called from a separate kernel process for mirrors.
7028  * Any short write is considered an IO error.
7029  */
7030 static int
7031 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
7032     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
7033     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
7034     struct ucred *cred, NFSPROC_T *p)
7035 {
7036 	uint32_t *tl;
7037 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7038 	int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
7039 	struct nfsrv_descript nfsd;
7040 	struct nfsrv_descript *nd = &nfsd;
7041 	struct nfssockreq *nrp;
7042 	struct nfsvattr na;
7043 
7044 	nd->nd_mrep = NULL;
7045 	if (vers == 0 || vers == NFS_VER4) {
7046 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
7047 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7048 		    NULL);
7049 		vers = NFS_VER4;
7050 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
7051 		    minorvers);
7052 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7053 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
7054 	} else {
7055 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
7056 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7057 		    NULL);
7058 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
7059 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
7060 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
7061 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
7062 	}
7063 	txdr_hyper(io_off, tl);
7064 	tl += 2;
7065 	if (vers == NFS_VER3)
7066 		*tl++ = txdr_unsigned(len);
7067 	*tl++ = txdr_unsigned(*iomode);
7068 	*tl = txdr_unsigned(len);
7069 	if (len > 0) {
7070 		/* Put data in mbuf chain. */
7071 		nd->nd_mb->m_next = m;
7072 	}
7073 	nrp = dsp->nfsclds_sockp;
7074 	if (nrp == NULL)
7075 		/* If NULL, use the MDS socket. */
7076 		nrp = &nmp->nm_sockreq;
7077 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7078 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7079 	NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
7080 	    nd->nd_repstat);
7081 	if (error != 0)
7082 		return (error);
7083 	if (nd->nd_repstat != 0)
7084 		error = nd->nd_repstat;
7085 	else {
7086 		if (vers == NFS_VER3) {
7087 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7088 			    NULL);
7089 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
7090 			    error);
7091 			if (error != 0)
7092 				goto nfsmout;
7093 		}
7094 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
7095 		rlen = fxdr_unsigned(int, *tl++);
7096 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
7097 		    rlen);
7098 		if (rlen != len) {
7099 			error = NFSERR_IO;
7100 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
7101 			    len, rlen);
7102 			goto nfsmout;
7103 		}
7104 		commit = fxdr_unsigned(int, *tl++);
7105 
7106 		/*
7107 		 * Return the lowest commitment level
7108 		 * obtained by any of the RPCs.
7109 		 */
7110 		if (committed == NFSWRITE_FILESYNC)
7111 			committed = commit;
7112 		else if (committed == NFSWRITE_DATASYNC &&
7113 		    commit == NFSWRITE_UNSTABLE)
7114 			committed = commit;
7115 		NFSLOCKDS(dsp);
7116 		if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
7117 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7118 			dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
7119 		} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
7120 		    *must_commit != 2) {
7121 			*must_commit = 1;
7122 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7123 		}
7124 		NFSUNLOCKDS(dsp);
7125 	}
7126 nfsmout:
7127 	if (nd->nd_mrep != NULL)
7128 		m_freem(nd->nd_mrep);
7129 	*iomode = committed;
7130 	if (nd->nd_repstat != 0 && error == 0)
7131 		error = nd->nd_repstat;
7132 	return (error);
7133 }
7134 
7135 /*
7136  * Start up the thread that will execute nfsrpc_writedsmir().
7137  */
7138 static void
7139 start_writedsmir(void *arg, int pending)
7140 {
7141 	struct nfsclwritedsdorpc *drpc;
7142 
7143 	drpc = (struct nfsclwritedsdorpc *)arg;
7144 	drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
7145 	    &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
7146 	    drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
7147 	    drpc->p);
7148 	drpc->done = 1;
7149 	crfree(drpc->cred);
7150 	NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
7151 }
7152 
7153 /*
7154  * Set up the write DS mirror call for the pNFS I/O thread.
7155  */
7156 static int
7157 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
7158     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
7159     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
7160     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7161 {
7162 	int error, ret;
7163 
7164 	error = 0;
7165 	drpc->done = 0;
7166 	drpc->vp = vp;
7167 	drpc->iomode = *iomode;
7168 	drpc->must_commit = *must_commit;
7169 	drpc->stateidp = stateidp;
7170 	drpc->dsp = dsp;
7171 	drpc->off = off;
7172 	drpc->len = len;
7173 	drpc->fhp = fhp;
7174 	drpc->m = m;
7175 	drpc->vers = vers;
7176 	drpc->minorvers = minorvers;
7177 	drpc->cred = crhold(cred);
7178 	drpc->p = p;
7179 	drpc->inprog = 0;
7180 	ret = EIO;
7181 	if (nfs_pnfsiothreads != 0) {
7182 		ret = nfs_pnfsio(start_writedsmir, drpc);
7183 		NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
7184 	}
7185 	if (ret != 0) {
7186 		error = nfsrpc_writedsmir(vp, iomode, &drpc->must_commit,
7187 		    stateidp, dsp, off, len, fhp, m, vers, minorvers, cred, p);
7188 		crfree(drpc->cred);
7189 	}
7190 	NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
7191 	return (error);
7192 }
7193 
7194 /*
7195  * Free up the nfsclds structure.
7196  */
7197 void
7198 nfscl_freenfsclds(struct nfsclds *dsp)
7199 {
7200 	int i;
7201 
7202 	if (dsp == NULL)
7203 		return;
7204 	if (dsp->nfsclds_sockp != NULL) {
7205 		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
7206 		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
7207 		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
7208 		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
7209 	}
7210 	NFSFREEMUTEX(&dsp->nfsclds_mtx);
7211 	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
7212 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
7213 		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
7214 			m_freem(
7215 			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
7216 	}
7217 	free(dsp, M_NFSCLDS);
7218 }
7219 
7220 static enum nfsclds_state
7221 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
7222     struct nfsclds **retdspp, uint32_t *sequencep)
7223 {
7224 	struct nfsclds *dsp;
7225 	int fndseq;
7226 
7227 	/*
7228 	 * Search the list of nfsclds structures for one with the same
7229 	 * server.
7230 	 */
7231 	fndseq = 0;
7232 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
7233 		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
7234 		    dsp->nfsclds_servownlen != 0 &&
7235 		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
7236 		    dsp->nfsclds_servownlen) &&
7237 		    dsp->nfsclds_sess.nfsess_defunct == 0) {
7238 			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
7239 			    TAILQ_FIRST(&nmp->nm_sess), dsp,
7240 			    dsp->nfsclds_flags);
7241 			if (fndseq == 0) {
7242 				/* Get sequenceid# from first entry. */
7243 				*sequencep =
7244 				    dsp->nfsclds_sess.nfsess_sequenceid;
7245 				fndseq = 1;
7246 			}
7247 			/* Server major id matches. */
7248 			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
7249 				*retdspp = dsp;
7250 				return (NFSDSP_USETHISSESSION);
7251 			}
7252 		}
7253 	}
7254 	if (fndseq != 0)
7255 		return (NFSDSP_SEQTHISSESSION);
7256 	return (NFSDSP_NOTFOUND);
7257 }
7258 
7259 /*
7260  * NFS commit rpc to a NFSv4.1 DS.
7261  */
7262 static int
7263 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7264     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
7265     NFSPROC_T *p)
7266 {
7267 	uint32_t *tl;
7268 	struct nfsrv_descript nfsd, *nd = &nfsd;
7269 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7270 	struct nfssockreq *nrp;
7271 	struct nfsvattr na;
7272 	int attrflag, error;
7273 
7274 	nd->nd_mrep = NULL;
7275 	if (vers == 0 || vers == NFS_VER4) {
7276 		nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
7277 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7278 		    NULL);
7279 		vers = NFS_VER4;
7280 	} else {
7281 		nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
7282 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7283 		    NULL);
7284 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMIT]);
7285 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMITDS]);
7286 	}
7287 	NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
7288 	    minorvers);
7289 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7290 	txdr_hyper(offset, tl);
7291 	tl += 2;
7292 	*tl = txdr_unsigned(cnt);
7293 	nrp = dsp->nfsclds_sockp;
7294 	if (nrp == NULL)
7295 		/* If NULL, use the MDS socket. */
7296 		nrp = &nmp->nm_sockreq;
7297 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7298 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7299 	NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
7300 	    nd->nd_repstat);
7301 	if (error != 0)
7302 		return (error);
7303 	if (nd->nd_repstat == 0) {
7304 		if (vers == NFS_VER3) {
7305 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7306 			    NULL);
7307 			NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
7308 			if (error != 0)
7309 				goto nfsmout;
7310 		}
7311 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
7312 		NFSLOCKDS(dsp);
7313 		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
7314 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7315 			error = NFSERR_STALEWRITEVERF;
7316 		}
7317 		NFSUNLOCKDS(dsp);
7318 	}
7319 nfsmout:
7320 	if (error == 0 && nd->nd_repstat != 0)
7321 		error = nd->nd_repstat;
7322 	m_freem(nd->nd_mrep);
7323 	return (error);
7324 }
7325 
7326 /*
7327  * Start up the thread that will execute nfsrpc_commitds().
7328  */
7329 static void
7330 start_commitds(void *arg, int pending)
7331 {
7332 	struct nfsclwritedsdorpc *drpc;
7333 
7334 	drpc = (struct nfsclwritedsdorpc *)arg;
7335 	drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
7336 	    drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
7337 	    drpc->p);
7338 	drpc->done = 1;
7339 	crfree(drpc->cred);
7340 	NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
7341 }
7342 
7343 /*
7344  * Set up the commit DS mirror call for the pNFS I/O thread.
7345  */
7346 static int
7347 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7348     struct nfsfh *fhp, int vers, int minorvers,
7349     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7350 {
7351 	int error, ret;
7352 
7353 	error = 0;
7354 	drpc->done = 0;
7355 	drpc->vp = vp;
7356 	drpc->off = offset;
7357 	drpc->len = cnt;
7358 	drpc->dsp = dsp;
7359 	drpc->fhp = fhp;
7360 	drpc->vers = vers;
7361 	drpc->minorvers = minorvers;
7362 	drpc->cred = crhold(cred);
7363 	drpc->p = p;
7364 	drpc->inprog = 0;
7365 	ret = EIO;
7366 	if (nfs_pnfsiothreads != 0) {
7367 		ret = nfs_pnfsio(start_commitds, drpc);
7368 		NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
7369 	}
7370 	if (ret != 0) {
7371 		error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
7372 		    minorvers, cred, p);
7373 		crfree(drpc->cred);
7374 	}
7375 	NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
7376 	return (error);
7377 }
7378 
7379 /*
7380  * NFS Advise rpc
7381  */
7382 int
7383 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
7384     struct ucred *cred, NFSPROC_T *p)
7385 {
7386 	u_int32_t *tl;
7387 	struct nfsrv_descript nfsd, *nd = &nfsd;
7388 	nfsattrbit_t hints;
7389 	int error;
7390 
7391 	NFSZERO_ATTRBIT(&hints);
7392 	if (advise == POSIX_FADV_WILLNEED)
7393 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7394 	else if (advise == POSIX_FADV_DONTNEED)
7395 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7396 	else
7397 		return (0);
7398 	NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp, cred);
7399 	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7400 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
7401 	txdr_hyper(offset, tl);
7402 	tl += 2;
7403 	txdr_hyper(cnt, tl);
7404 	nfsrv_putattrbit(nd, &hints);
7405 	error = nfscl_request(nd, vp, p, cred);
7406 	if (error != 0)
7407 		return (error);
7408 	if (nd->nd_repstat != 0)
7409 		error = nd->nd_repstat;
7410 	m_freem(nd->nd_mrep);
7411 	return (error);
7412 }
7413 
7414 #ifdef notyet
7415 /*
7416  * NFS advise rpc to a NFSv4.2 DS.
7417  */
7418 static int
7419 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7420     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7421     struct ucred *cred, NFSPROC_T *p)
7422 {
7423 	uint32_t *tl;
7424 	struct nfsrv_descript nfsd, *nd = &nfsd;
7425 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7426 	struct nfssockreq *nrp;
7427 	nfsattrbit_t hints;
7428 	int error;
7429 
7430 	/* For NFS DSs prior to NFSv4.2, just return OK. */
7431 	if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
7432 		return (0);
7433 	NFSZERO_ATTRBIT(&hints);
7434 	if (advise == POSIX_FADV_WILLNEED)
7435 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7436 	else if (advise == POSIX_FADV_DONTNEED)
7437 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7438 	else
7439 		return (0);
7440 	nd->nd_mrep = NULL;
7441 	nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
7442 	    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers, NULL);
7443 	vers = NFS_VER4;
7444 	NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
7445 	    minorvers);
7446 	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7447 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7448 	txdr_hyper(offset, tl);
7449 	tl += 2;
7450 	*tl = txdr_unsigned(cnt);
7451 	nfsrv_putattrbit(nd, &hints);
7452 	nrp = dsp->nfsclds_sockp;
7453 	if (nrp == NULL)
7454 		/* If NULL, use the MDS socket. */
7455 		nrp = &nmp->nm_sockreq;
7456 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7457 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7458 	NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
7459 	    nd->nd_repstat);
7460 	if (error != 0)
7461 		return (error);
7462 	if (nd->nd_repstat != 0)
7463 		error = nd->nd_repstat;
7464 	m_freem(nd->nd_mrep);
7465 	return (error);
7466 }
7467 
7468 /*
7469  * Start up the thread that will execute nfsrpc_commitds().
7470  */
7471 static void
7472 start_adviseds(void *arg, int pending)
7473 {
7474 	struct nfsclwritedsdorpc *drpc;
7475 
7476 	drpc = (struct nfsclwritedsdorpc *)arg;
7477 	drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
7478 	    drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
7479 	    drpc->cred, drpc->p);
7480 	drpc->done = 1;
7481 	crfree(drpc->cred);
7482 	NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
7483 }
7484 
7485 /*
7486  * Set up the advise DS mirror call for the pNFS I/O thread.
7487  */
7488 static int
7489 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7490     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7491     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7492 {
7493 	int error, ret;
7494 
7495 	error = 0;
7496 	drpc->done = 0;
7497 	drpc->vp = vp;
7498 	drpc->off = offset;
7499 	drpc->len = cnt;
7500 	drpc->advise = advise;
7501 	drpc->dsp = dsp;
7502 	drpc->fhp = fhp;
7503 	drpc->vers = vers;
7504 	drpc->minorvers = minorvers;
7505 	drpc->cred = crhold(cred);
7506 	drpc->p = p;
7507 	drpc->inprog = 0;
7508 	ret = EIO;
7509 	if (nfs_pnfsiothreads != 0) {
7510 		ret = nfs_pnfsio(start_adviseds, drpc);
7511 		NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
7512 	}
7513 	if (ret != 0) {
7514 		error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
7515 		    minorvers, cred, p);
7516 		crfree(drpc->cred);
7517 	}
7518 	NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
7519 	return (error);
7520 }
7521 #endif	/* notyet */
7522 
7523 /*
7524  * Do the Allocate operation, retrying for recovery.
7525  */
7526 int
7527 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
7528     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7529 {
7530 	int error, expireret = 0, retrycnt, nostateid;
7531 	uint32_t clidrev = 0;
7532 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7533 	struct nfsfh *nfhp = NULL;
7534 	nfsv4stateid_t stateid;
7535 	off_t tmp_off;
7536 	void *lckp;
7537 
7538 	if (len < 0)
7539 		return (EINVAL);
7540 	if (len == 0)
7541 		return (0);
7542 	tmp_off = off + len;
7543 	NFSLOCKMNT(nmp);
7544 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
7545 		NFSUNLOCKMNT(nmp);
7546 		return (EFBIG);
7547 	}
7548 	if (nmp->nm_clp != NULL)
7549 		clidrev = nmp->nm_clp->nfsc_clientidrev;
7550 	NFSUNLOCKMNT(nmp);
7551 	nfhp = VTONFS(vp)->n_fhp;
7552 	retrycnt = 0;
7553 	do {
7554 		lckp = NULL;
7555 		nostateid = 0;
7556 		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7557 		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7558 		if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7559 		    stateid.other[2] == 0) {
7560 			nostateid = 1;
7561 			NFSCL_DEBUG(1, "stateid0 in allocate\n");
7562 		}
7563 
7564 		/*
7565 		 * Not finding a stateid should probably never happen,
7566 		 * but just return an error for this case.
7567 		 */
7568 		if (nostateid != 0)
7569 			error = EIO;
7570 		else
7571 			error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7572 			    nap, attrflagp, cred, p);
7573 		if (error == NFSERR_STALESTATEID)
7574 			nfscl_initiate_recovery(nmp->nm_clp);
7575 		if (lckp != NULL)
7576 			nfscl_lockderef(lckp);
7577 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7578 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7579 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7580 			(void) nfs_catnap(PZERO, error, "nfs_allocate");
7581 		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
7582 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
7583 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7584 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
7585 			error = EIO;
7586 		}
7587 		retrycnt++;
7588 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7589 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7590 	    error == NFSERR_STALEDONTRECOVER ||
7591 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7592 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7593 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
7594 	if (error != 0 && retrycnt >= 4)
7595 		error = EIO;
7596 	return (error);
7597 }
7598 
7599 /*
7600  * The allocate RPC.
7601  */
7602 static int
7603 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7604     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7605 {
7606 	uint32_t *tl;
7607 	int error;
7608 	struct nfsrv_descript nfsd;
7609 	struct nfsrv_descript *nd = &nfsd;
7610 	nfsattrbit_t attrbits;
7611 
7612 	*attrflagp = 0;
7613 	NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp, cred);
7614 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7615 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7616 	txdr_hyper(off, tl); tl += 2;
7617 	txdr_hyper(len, tl); tl += 2;
7618 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7619 	NFSGETATTR_ATTRBIT(&attrbits);
7620 	nfsrv_putattrbit(nd, &attrbits);
7621 	error = nfscl_request(nd, vp, p, cred);
7622 	if (error != 0)
7623 		return (error);
7624 	if (nd->nd_repstat == 0) {
7625 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7626 		error = nfsm_loadattr(nd, nap);
7627 		if (error == 0)
7628 			*attrflagp = NFS_LATTR_NOSHRINK;
7629 	} else
7630 		error = nd->nd_repstat;
7631 nfsmout:
7632 	m_freem(nd->nd_mrep);
7633 	return (error);
7634 }
7635 
7636 /*
7637  * Set up the XDR arguments for the LayoutGet operation.
7638  */
7639 static void
7640 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7641     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7642     int layoutlen, int usecurstateid)
7643 {
7644 	uint32_t *tl;
7645 
7646 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7647 	    NFSX_STATEID);
7648 	*tl++ = newnfs_false;		/* Don't signal availability. */
7649 	*tl++ = txdr_unsigned(layouttype);
7650 	*tl++ = txdr_unsigned(iomode);
7651 	txdr_hyper(offset, tl);
7652 	tl += 2;
7653 	txdr_hyper(len, tl);
7654 	tl += 2;
7655 	txdr_hyper(minlen, tl);
7656 	tl += 2;
7657 	if (usecurstateid != 0) {
7658 		/* Special stateid for Current stateid. */
7659 		*tl++ = txdr_unsigned(1);
7660 		*tl++ = 0;
7661 		*tl++ = 0;
7662 		*tl++ = 0;
7663 	} else {
7664 		*tl++ = txdr_unsigned(stateidp->seqid);
7665 		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7666 		*tl++ = stateidp->other[0];
7667 		*tl++ = stateidp->other[1];
7668 		*tl++ = stateidp->other[2];
7669 	}
7670 	*tl = txdr_unsigned(layoutlen);
7671 }
7672 
7673 /*
7674  * Parse the reply for a successful LayoutGet operation.
7675  */
7676 static int
7677 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7678     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7679 {
7680 	uint32_t *tl;
7681 	struct nfsclflayout *flp, *prevflp, *tflp;
7682 	int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7683 	int m, mirrorcnt;
7684 	uint64_t retlen, off;
7685 	struct nfsfh *nfhp;
7686 	uint8_t *cp;
7687 	uid_t user;
7688 	gid_t grp;
7689 
7690 	NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7691 	error = 0;
7692 	flp = NULL;
7693 	gotiomode = -1;
7694 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7695 	if (*tl++ != 0)
7696 		*retonclosep = 1;
7697 	else
7698 		*retonclosep = 0;
7699 	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7700 	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7701 	    (int)stateidp->seqid);
7702 	stateidp->other[0] = *tl++;
7703 	stateidp->other[1] = *tl++;
7704 	stateidp->other[2] = *tl++;
7705 	cnt = fxdr_unsigned(int, *tl);
7706 	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7707 	if (cnt <= 0 || cnt > 10000) {
7708 		/* Don't accept more than 10000 layouts in reply. */
7709 		error = NFSERR_BADXDR;
7710 		goto nfsmout;
7711 	}
7712 	for (i = 0; i < cnt; i++) {
7713 		/* Dissect to the layout type. */
7714 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7715 		    3 * NFSX_UNSIGNED);
7716 		off = fxdr_hyper(tl); tl += 2;
7717 		retlen = fxdr_hyper(tl); tl += 2;
7718 		iomode = fxdr_unsigned(int, *tl++);
7719 		laytype = fxdr_unsigned(int, *tl);
7720 		NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7721 		    (uintmax_t)off, (uintmax_t)retlen, iomode);
7722 		/* Ignore length of layout body for now. */
7723 		if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7724 			/* Parse the File layout up to fhcnt. */
7725 			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7726 			    NFSX_HYPER + NFSX_V4DEVICEID);
7727 			fhcnt = fxdr_unsigned(int, *(tl + 4 +
7728 			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
7729 			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7730 			if (fhcnt < 0 || fhcnt > 100) {
7731 				/* Don't accept more than 100 file handles. */
7732 				error = NFSERR_BADXDR;
7733 				goto nfsmout;
7734 			}
7735 			if (fhcnt > 0)
7736 				flp = malloc(sizeof(*flp) + fhcnt *
7737 				    sizeof(struct nfsfh *), M_NFSFLAYOUT,
7738 				    M_WAITOK);
7739 			else
7740 				flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7741 				    M_WAITOK);
7742 			flp->nfsfl_flags = NFSFL_FILE;
7743 			flp->nfsfl_fhcnt = 0;
7744 			flp->nfsfl_devp = NULL;
7745 			flp->nfsfl_off = off;
7746 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7747 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7748 			else
7749 				flp->nfsfl_end = flp->nfsfl_off + retlen;
7750 			flp->nfsfl_iomode = iomode;
7751 			if (gotiomode == -1)
7752 				gotiomode = flp->nfsfl_iomode;
7753 			/* Ignore layout body length for now. */
7754 			NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7755 			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7756 			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7757 			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7758 			mtx_lock(&nmp->nm_mtx);
7759 			if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7760 			    NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7761 				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7762 			mtx_unlock(&nmp->nm_mtx);
7763 			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7764 			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7765 			NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7766 			    flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7767 			for (j = 0; j < fhcnt; j++) {
7768 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7769 				nfhlen = fxdr_unsigned(int, *tl);
7770 				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7771 					error = NFSERR_BADXDR;
7772 					goto nfsmout;
7773 				}
7774 				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7775 				    M_NFSFH, M_WAITOK);
7776 				flp->nfsfl_fh[j] = nfhp;
7777 				flp->nfsfl_fhcnt++;
7778 				nfhp->nfh_len = nfhlen;
7779 				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7780 				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7781 			}
7782 		} else if (laytype == NFSLAYOUT_FLEXFILE) {
7783 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7784 			    NFSX_HYPER);
7785 			mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7786 			NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7787 			if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7788 				error = NFSERR_BADXDR;
7789 				goto nfsmout;
7790 			}
7791 			flp = malloc(sizeof(*flp) + mirrorcnt *
7792 			    sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7793 			flp->nfsfl_flags = NFSFL_FLEXFILE;
7794 			flp->nfsfl_mirrorcnt = mirrorcnt;
7795 			for (j = 0; j < mirrorcnt; j++)
7796 				flp->nfsfl_ffm[j].devp = NULL;
7797 			flp->nfsfl_off = off;
7798 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7799 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7800 			else
7801 				flp->nfsfl_end = flp->nfsfl_off + retlen;
7802 			flp->nfsfl_iomode = iomode;
7803 			if (gotiomode == -1)
7804 				gotiomode = flp->nfsfl_iomode;
7805 			flp->nfsfl_stripeunit = fxdr_hyper(tl);
7806 			NFSCL_DEBUG(4, "stripeunit=%ju\n",
7807 			    (uintmax_t)flp->nfsfl_stripeunit);
7808 			for (j = 0; j < mirrorcnt; j++) {
7809 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7810 				k = fxdr_unsigned(int, *tl);
7811 				if (k < 1 || k > 128) {
7812 					error = NFSERR_BADXDR;
7813 					goto nfsmout;
7814 				}
7815 				NFSCL_DEBUG(4, "servercnt=%d\n", k);
7816 				for (l = 0; l < k; l++) {
7817 					NFSM_DISSECT(tl, uint32_t *,
7818 					    NFSX_V4DEVICEID + NFSX_STATEID +
7819 					    2 * NFSX_UNSIGNED);
7820 					if (l == 0) {
7821 						/* Just use the first server. */
7822 						NFSBCOPY(tl,
7823 						    flp->nfsfl_ffm[j].dev,
7824 						    NFSX_V4DEVICEID);
7825 						tl += (NFSX_V4DEVICEID /
7826 						    NFSX_UNSIGNED);
7827 						tl++;
7828 						flp->nfsfl_ffm[j].st.seqid =
7829 						    *tl++;
7830 						flp->nfsfl_ffm[j].st.other[0] =
7831 						    *tl++;
7832 						flp->nfsfl_ffm[j].st.other[1] =
7833 						    *tl++;
7834 						flp->nfsfl_ffm[j].st.other[2] =
7835 						    *tl++;
7836 						NFSCL_DEBUG(4, "st.seqid=%u "
7837 						 "st.o0=0x%x st.o1=0x%x "
7838 						 "st.o2=0x%x\n",
7839 						 flp->nfsfl_ffm[j].st.seqid,
7840 						 flp->nfsfl_ffm[j].st.other[0],
7841 						 flp->nfsfl_ffm[j].st.other[1],
7842 						 flp->nfsfl_ffm[j].st.other[2]);
7843 					} else
7844 						tl += ((NFSX_V4DEVICEID +
7845 						    NFSX_STATEID +
7846 						    NFSX_UNSIGNED) /
7847 						    NFSX_UNSIGNED);
7848 					fhcnt = fxdr_unsigned(int, *tl);
7849 					NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7850 					if (fhcnt < 1 ||
7851 					    fhcnt > NFSDEV_MAXVERS) {
7852 						error = NFSERR_BADXDR;
7853 						goto nfsmout;
7854 					}
7855 					for (m = 0; m < fhcnt; m++) {
7856 						NFSM_DISSECT(tl, uint32_t *,
7857 						    NFSX_UNSIGNED);
7858 						nfhlen = fxdr_unsigned(int,
7859 						    *tl);
7860 						NFSCL_DEBUG(4, "nfhlen=%d\n",
7861 						    nfhlen);
7862 						if (nfhlen <= 0 || nfhlen >
7863 						    NFSX_V4FHMAX) {
7864 							error = NFSERR_BADXDR;
7865 							goto nfsmout;
7866 						}
7867 						NFSM_DISSECT(cp, uint8_t *,
7868 						    NFSM_RNDUP(nfhlen));
7869 						if (l == 0) {
7870 							flp->nfsfl_ffm[j].fhcnt
7871 							    = fhcnt;
7872 							nfhp = malloc(
7873 							    sizeof(*nfhp) +
7874 							    nfhlen - 1, M_NFSFH,
7875 							    M_WAITOK);
7876 							flp->nfsfl_ffm[j].fh[m]
7877 							    = nfhp;
7878 							nfhp->nfh_len = nfhlen;
7879 							NFSBCOPY(cp,
7880 							    nfhp->nfh_fh,
7881 							    nfhlen);
7882 							NFSCL_DEBUG(4,
7883 							    "got fh\n");
7884 						}
7885 					}
7886 					/* Now, get the ffsd_user/ffds_group. */
7887 					error = nfsrv_parseug(nd, 0, &user,
7888 					    &grp, curthread);
7889 					NFSCL_DEBUG(4, "after parseu=%d\n",
7890 					    error);
7891 					if (error == 0)
7892 						error = nfsrv_parseug(nd, 1,
7893 						    &user, &grp, curthread);
7894 					NFSCL_DEBUG(4, "aft parseg=%d\n",
7895 					    grp);
7896 					if (error != 0)
7897 						goto nfsmout;
7898 					NFSCL_DEBUG(4, "user=%d group=%d\n",
7899 					    user, grp);
7900 					if (l == 0) {
7901 						flp->nfsfl_ffm[j].user = user;
7902 						flp->nfsfl_ffm[j].group = grp;
7903 						NFSCL_DEBUG(4,
7904 						    "usr=%d grp=%d\n", user,
7905 						    grp);
7906 					}
7907 				}
7908 			}
7909 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7910 			flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7911 #ifdef notnow
7912 			/*
7913 			 * At this time, there is no flag.
7914 			 * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7915 			 * added, or it may never exist?
7916 			 */
7917 			mtx_lock(&nmp->nm_mtx);
7918 			if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7919 			    NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7920 				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7921 			mtx_unlock(&nmp->nm_mtx);
7922 #endif
7923 			flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7924 			NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7925 			    flp->nfsfl_fflags, flp->nfsfl_statshint);
7926 		} else {
7927 			error = NFSERR_BADXDR;
7928 			goto nfsmout;
7929 		}
7930 		if (flp->nfsfl_iomode == gotiomode) {
7931 			/* Keep the list in increasing offset order. */
7932 			tflp = LIST_FIRST(flhp);
7933 			prevflp = NULL;
7934 			while (tflp != NULL &&
7935 			    tflp->nfsfl_off < flp->nfsfl_off) {
7936 				prevflp = tflp;
7937 				tflp = LIST_NEXT(tflp, nfsfl_list);
7938 			}
7939 			if (prevflp == NULL)
7940 				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7941 			else
7942 				LIST_INSERT_AFTER(prevflp, flp,
7943 				    nfsfl_list);
7944 			NFSCL_DEBUG(4, "flp inserted\n");
7945 		} else {
7946 			printf("nfscl_layoutget(): got wrong iomode\n");
7947 			nfscl_freeflayout(flp);
7948 		}
7949 		flp = NULL;
7950 	}
7951 nfsmout:
7952 	NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7953 	if (error != 0 && flp != NULL)
7954 		nfscl_freeflayout(flp);
7955 	return (error);
7956 }
7957 
7958 /*
7959  * Parse a user/group digit string.
7960  */
7961 static int
7962 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7963     NFSPROC_T *p)
7964 {
7965 	uint32_t *tl;
7966 	char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7967 	uint32_t len = 0;
7968 	int error = 0;
7969 
7970 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7971 	len = fxdr_unsigned(uint32_t, *tl);
7972 	str = NULL;
7973 	if (len > NFSV4_OPAQUELIMIT) {
7974 		error = NFSERR_BADXDR;
7975 		goto nfsmout;
7976 	}
7977 	NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7978 	if (len == 0) {
7979 		if (dogrp != 0)
7980 			*gidp = GID_NOGROUP;
7981 		else
7982 			*uidp = UID_NOBODY;
7983 		return (0);
7984 	}
7985 	if (len > NFSV4_SMALLSTR)
7986 		str = malloc(len + 1, M_TEMP, M_WAITOK);
7987 	else
7988 		str = str0;
7989 	NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7990 	NFSBCOPY(cp, str, len);
7991 	str[len] = '\0';
7992 	NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7993 	if (dogrp != 0)
7994 		error = nfsv4_strtogid(nd, str, len, gidp);
7995 	else
7996 		error = nfsv4_strtouid(nd, str, len, uidp);
7997 nfsmout:
7998 	if (len > NFSV4_SMALLSTR)
7999 		free(str, M_TEMP);
8000 	NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
8001 	return (error);
8002 }
8003 
8004 /*
8005  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
8006  * so that it does both an Open and a Layoutget.
8007  */
8008 static int
8009 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
8010     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
8011     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
8012     struct ucred *cred, NFSPROC_T *p)
8013 {
8014 	struct nfscllayout *lyp;
8015 	struct nfsclflayout *flp;
8016 	struct nfsclflayouthead flh;
8017 	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
8018 	int layouttype, laystat;
8019 	nfsv4stateid_t stateid;
8020 	struct nfsclsession *tsep;
8021 
8022 	error = 0;
8023 	if (NFSHASFLEXFILE(nmp))
8024 		layouttype = NFSLAYOUT_FLEXFILE;
8025 	else
8026 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
8027 	/*
8028 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
8029 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
8030 	 * flp == NULL.
8031 	 */
8032 	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, mode, &flp,
8033 	    &recalled);
8034 	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
8035 	if (lyp == NULL)
8036 		islocked = 0;
8037 	else if (flp != NULL)
8038 		islocked = 1;
8039 	else
8040 		islocked = 2;
8041 	if ((lyp == NULL || flp == NULL) && recalled == 0) {
8042 		LIST_INIT(&flh);
8043 		tsep = nfsmnt_mdssession(nmp);
8044 		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
8045 		    3 * NFSX_UNSIGNED);
8046 		if (lyp == NULL)
8047 			usecurstateid = 1;
8048 		else {
8049 			usecurstateid = 0;
8050 			stateid.seqid = lyp->nfsly_stateid.seqid;
8051 			stateid.other[0] = lyp->nfsly_stateid.other[0];
8052 			stateid.other[1] = lyp->nfsly_stateid.other[1];
8053 			stateid.other[2] = lyp->nfsly_stateid.other[2];
8054 		}
8055 		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
8056 		    newfhp, newfhlen, mode, op, name, namelen,
8057 		    dpp, &stateid, usecurstateid, layouttype, layoutlen,
8058 		    &retonclose, &flh, &laystat, cred, p);
8059 		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
8060 		    laystat, error);
8061 		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
8062 		    &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
8063 		    &islocked, cred, p);
8064 	} else
8065 		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
8066 		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
8067 	if (islocked == 2)
8068 		nfscl_rellayout(lyp, 1);
8069 	else if (islocked == 1)
8070 		nfscl_rellayout(lyp, 0);
8071 	return (error);
8072 }
8073 
8074 /*
8075  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
8076  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
8077  * handled by nfsrpc_openrpc().
8078  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
8079  * can be NULL.
8080  */
8081 static int
8082 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
8083     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
8084     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
8085     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
8086     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
8087     int *laystatp, struct ucred *cred, NFSPROC_T *p)
8088 {
8089 	uint32_t *tl;
8090 	struct nfsrv_descript nfsd, *nd = &nfsd;
8091 	struct nfscldeleg *ndp = NULL;
8092 	struct nfsvattr nfsva;
8093 	struct nfsclsession *tsep;
8094 	uint32_t rflags, deleg;
8095 	nfsattrbit_t attrbits;
8096 	int error, ret, acesize, limitby, iomode;
8097 
8098 	*dpp = NULL;
8099 	*laystatp = ENXIO;
8100 	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
8101 	    0, 0, cred);
8102 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
8103 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
8104 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
8105 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
8106 	tsep = nfsmnt_mdssession(nmp);
8107 	*tl++ = tsep->nfsess_clientid.lval[0];
8108 	*tl = tsep->nfsess_clientid.lval[1];
8109 	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8110 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8111 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
8112 	if (NFSHASNFSV4N(nmp)) {
8113 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
8114 	} else {
8115 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8116 		nfsm_strtom(nd, name, namelen);
8117 	}
8118 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8119 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8120 	NFSZERO_ATTRBIT(&attrbits);
8121 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
8122 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
8123 	nfsrv_putattrbit(nd, &attrbits);
8124 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8125 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8126 	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
8127 		iomode = NFSLAYOUTIOMODE_RW;
8128 	else
8129 		iomode = NFSLAYOUTIOMODE_READ;
8130 	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
8131 	    layouttype, layoutlen, usecurstateid);
8132 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
8133 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
8134 	if (error != 0)
8135 		return (error);
8136 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
8137 	if (nd->nd_repstat != 0)
8138 		*laystatp = nd->nd_repstat;
8139 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8140 		/* ND_NOMOREDATA will be set if the Open operation failed. */
8141 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8142 		    6 * NFSX_UNSIGNED);
8143 		op->nfso_stateid.seqid = *tl++;
8144 		op->nfso_stateid.other[0] = *tl++;
8145 		op->nfso_stateid.other[1] = *tl++;
8146 		op->nfso_stateid.other[2] = *tl;
8147 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
8148 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8149 		if (error != 0)
8150 			goto nfsmout;
8151 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8152 		deleg = fxdr_unsigned(u_int32_t, *tl);
8153 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
8154 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
8155 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
8156 			      NFSCLFLAGS_FIRSTDELEG))
8157 				op->nfso_own->nfsow_clp->nfsc_flags |=
8158 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8159 			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
8160 			    M_NFSCLDELEG, M_WAITOK);
8161 			LIST_INIT(&ndp->nfsdl_owner);
8162 			LIST_INIT(&ndp->nfsdl_lock);
8163 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
8164 			ndp->nfsdl_fhlen = newfhlen;
8165 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
8166 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
8167 			nfscl_lockinit(&ndp->nfsdl_rwlock);
8168 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8169 			    NFSX_UNSIGNED);
8170 			ndp->nfsdl_stateid.seqid = *tl++;
8171 			ndp->nfsdl_stateid.other[0] = *tl++;
8172 			ndp->nfsdl_stateid.other[1] = *tl++;
8173 			ndp->nfsdl_stateid.other[2] = *tl++;
8174 			ret = fxdr_unsigned(int, *tl);
8175 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8176 				ndp->nfsdl_flags = NFSCLDL_WRITE;
8177 				/*
8178 				 * Indicates how much the file can grow.
8179 				 */
8180 				NFSM_DISSECT(tl, u_int32_t *,
8181 				    3 * NFSX_UNSIGNED);
8182 				limitby = fxdr_unsigned(int, *tl++);
8183 				switch (limitby) {
8184 				case NFSV4OPEN_LIMITSIZE:
8185 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
8186 					break;
8187 				case NFSV4OPEN_LIMITBLOCKS:
8188 					ndp->nfsdl_sizelimit =
8189 					    fxdr_unsigned(u_int64_t, *tl++);
8190 					ndp->nfsdl_sizelimit *=
8191 					    fxdr_unsigned(u_int64_t, *tl);
8192 					break;
8193 				default:
8194 					error = NFSERR_BADXDR;
8195 					goto nfsmout;
8196 				};
8197 			} else
8198 				ndp->nfsdl_flags = NFSCLDL_READ;
8199 			if (ret != 0)
8200 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
8201 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
8202 			    &ret, &acesize, p);
8203 			if (error != 0)
8204 				goto nfsmout;
8205 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8206 			error = NFSERR_BADXDR;
8207 			goto nfsmout;
8208 		}
8209 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
8210 		    nfscl_assumeposixlocks)
8211 			op->nfso_posixlock = 1;
8212 		else
8213 			op->nfso_posixlock = 0;
8214 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8215 		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
8216 		if (*++tl == 0) {
8217 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
8218 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
8219 			    NULL, NULL, NULL, p, cred);
8220 			if (error != 0)
8221 				goto nfsmout;
8222 			if (ndp != NULL) {
8223 				ndp->nfsdl_change = nfsva.na_filerev;
8224 				ndp->nfsdl_modtime = nfsva.na_mtime;
8225 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8226 				*dpp = ndp;
8227 				ndp = NULL;
8228 			}
8229 			/*
8230 			 * At this point, the Open has succeeded, so set
8231 			 * nd_repstat = NFS_OK.  If the Layoutget failed,
8232 			 * this function just won't return a layout.
8233 			 */
8234 			if (nd->nd_repstat == 0) {
8235 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8236 				*laystatp = fxdr_unsigned(int, *++tl);
8237 				if (*laystatp == 0) {
8238 					error = nfsrv_parselayoutget(nmp, nd,
8239 					    stateidp, retonclosep, flhp);
8240 					if (error != 0)
8241 						*laystatp = error;
8242 				}
8243 			} else
8244 				nd->nd_repstat = 0;	/* Return 0 for Open. */
8245 		}
8246 	}
8247 	if (nd->nd_repstat != 0 && error == 0)
8248 		error = nd->nd_repstat;
8249 nfsmout:
8250 	free(ndp, M_NFSCLDELEG);
8251 	m_freem(nd->nd_mrep);
8252 	return (error);
8253 }
8254 
8255 /*
8256  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
8257  * Used only for mounts with pNFS enabled.
8258  */
8259 static int
8260 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8261     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8262     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8263     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8264     int *dattrflagp, int *unlockedp, nfsv4stateid_t *stateidp,
8265     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
8266     struct nfsclflayouthead *flhp, int *laystatp)
8267 {
8268 	uint32_t *tl;
8269 	int error = 0, deleg, newone, ret, acesize, limitby;
8270 	struct nfsrv_descript nfsd, *nd = &nfsd;
8271 	struct nfsclopen *op;
8272 	struct nfscldeleg *dp = NULL;
8273 	struct nfsnode *np;
8274 	struct nfsfh *nfhp;
8275 	struct nfsclsession *tsep;
8276 	nfsattrbit_t attrbits;
8277 	nfsv4stateid_t stateid;
8278 	struct nfsmount *nmp;
8279 
8280 	nmp = VFSTONFS(dvp->v_mount);
8281 	np = VTONFS(dvp);
8282 	*laystatp = ENXIO;
8283 	*unlockedp = 0;
8284 	*nfhpp = NULL;
8285 	*dpp = NULL;
8286 	*attrflagp = 0;
8287 	*dattrflagp = 0;
8288 	if (namelen > NFS_MAXNAMLEN)
8289 		return (ENAMETOOLONG);
8290 	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp, cred);
8291 	/*
8292 	 * For V4, this is actually an Open op.
8293 	 */
8294 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
8295 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
8296 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
8297 	    NFSV4OPEN_ACCESSREAD);
8298 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
8299 	tsep = nfsmnt_mdssession(nmp);
8300 	*tl++ = tsep->nfsess_clientid.lval[0];
8301 	*tl = tsep->nfsess_clientid.lval[1];
8302 	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8303 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8304 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
8305 	if ((fmode & O_EXCL) != 0) {
8306 		if (NFSHASSESSPERSIST(nmp)) {
8307 			/* Use GUARDED for persistent sessions. */
8308 			*tl = txdr_unsigned(NFSCREATE_GUARDED);
8309 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8310 		} else {
8311 			/* Otherwise, use EXCLUSIVE4_1. */
8312 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
8313 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
8314 			*tl++ = cverf.lval[0];
8315 			*tl = cverf.lval[1];
8316 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8317 		}
8318 	} else {
8319 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
8320 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
8321 	}
8322 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8323 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8324 	nfsm_strtom(nd, name, namelen);
8325 	/* Get the new file's handle and attributes, plus save the FH. */
8326 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
8327 	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
8328 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
8329 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8330 	NFSGETATTR_ATTRBIT(&attrbits);
8331 	nfsrv_putattrbit(nd, &attrbits);
8332 	/* Get the directory's post-op attributes. */
8333 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8334 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8335 	(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
8336 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8337 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8338 	nfsrv_putattrbit(nd, &attrbits);
8339 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8340 	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
8341 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8342 	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
8343 	    layouttype, layoutlen, usecurstateid);
8344 	error = nfscl_request(nd, dvp, p, cred);
8345 	if (error != 0)
8346 		return (error);
8347 	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
8348 	    error);
8349 	if (nd->nd_repstat != 0)
8350 		*laystatp = nd->nd_repstat;
8351 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
8352 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8353 		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
8354 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8355 		    6 * NFSX_UNSIGNED);
8356 		stateid.seqid = *tl++;
8357 		stateid.other[0] = *tl++;
8358 		stateid.other[1] = *tl++;
8359 		stateid.other[2] = *tl;
8360 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8361 		if (error != 0)
8362 			goto nfsmout;
8363 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8364 		deleg = fxdr_unsigned(int, *tl);
8365 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
8366 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
8367 			if (!(owp->nfsow_clp->nfsc_flags &
8368 			      NFSCLFLAGS_FIRSTDELEG))
8369 				owp->nfsow_clp->nfsc_flags |=
8370 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8371 			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
8372 			    M_NFSCLDELEG, M_WAITOK);
8373 			LIST_INIT(&dp->nfsdl_owner);
8374 			LIST_INIT(&dp->nfsdl_lock);
8375 			dp->nfsdl_clp = owp->nfsow_clp;
8376 			newnfs_copyincred(cred, &dp->nfsdl_cred);
8377 			nfscl_lockinit(&dp->nfsdl_rwlock);
8378 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8379 			    NFSX_UNSIGNED);
8380 			dp->nfsdl_stateid.seqid = *tl++;
8381 			dp->nfsdl_stateid.other[0] = *tl++;
8382 			dp->nfsdl_stateid.other[1] = *tl++;
8383 			dp->nfsdl_stateid.other[2] = *tl++;
8384 			ret = fxdr_unsigned(int, *tl);
8385 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8386 				dp->nfsdl_flags = NFSCLDL_WRITE;
8387 				/*
8388 				 * Indicates how much the file can grow.
8389 				 */
8390 				NFSM_DISSECT(tl, u_int32_t *,
8391 				    3 * NFSX_UNSIGNED);
8392 				limitby = fxdr_unsigned(int, *tl++);
8393 				switch (limitby) {
8394 				case NFSV4OPEN_LIMITSIZE:
8395 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
8396 					break;
8397 				case NFSV4OPEN_LIMITBLOCKS:
8398 					dp->nfsdl_sizelimit =
8399 					    fxdr_unsigned(u_int64_t, *tl++);
8400 					dp->nfsdl_sizelimit *=
8401 					    fxdr_unsigned(u_int64_t, *tl);
8402 					break;
8403 				default:
8404 					error = NFSERR_BADXDR;
8405 					goto nfsmout;
8406 				};
8407 			} else {
8408 				dp->nfsdl_flags = NFSCLDL_READ;
8409 			}
8410 			if (ret != 0)
8411 				dp->nfsdl_flags |= NFSCLDL_RECALL;
8412 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
8413 			    &ret, &acesize, p);
8414 			if (error != 0)
8415 				goto nfsmout;
8416 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8417 			error = NFSERR_BADXDR;
8418 			goto nfsmout;
8419 		}
8420 
8421 		/* Now, we should have the status for the SaveFH. */
8422 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8423 		if (*++tl == 0) {
8424 			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
8425 			/*
8426 			 * Now, process the GetFH and Getattr for the newly
8427 			 * created file. nfscl_mtofh() will set
8428 			 * ND_NOMOREDATA if these weren't successful.
8429 			 */
8430 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
8431 			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
8432 			if (error != 0)
8433 				goto nfsmout;
8434 		} else
8435 			nd->nd_flag |= ND_NOMOREDATA;
8436 		/* Now we have the PutFH and Getattr for the directory. */
8437 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8438 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8439 			if (*++tl != 0)
8440 				nd->nd_flag |= ND_NOMOREDATA;
8441 			else {
8442 				NFSM_DISSECT(tl, uint32_t *, 2 *
8443 				    NFSX_UNSIGNED);
8444 				if (*++tl != 0)
8445 					nd->nd_flag |= ND_NOMOREDATA;
8446 			}
8447 		}
8448 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8449 			/* Load the directory attributes. */
8450 			error = nfsm_loadattr(nd, dnap);
8451 			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
8452 			if (error != 0)
8453 				goto nfsmout;
8454 			*dattrflagp = 1;
8455 			if (dp != NULL && *attrflagp != 0) {
8456 				dp->nfsdl_change = nnap->na_filerev;
8457 				dp->nfsdl_modtime = nnap->na_mtime;
8458 				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8459 			}
8460 			/*
8461 			 * We can now complete the Open state.
8462 			 */
8463 			nfhp = *nfhpp;
8464 			if (dp != NULL) {
8465 				dp->nfsdl_fhlen = nfhp->nfh_len;
8466 				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
8467 				    nfhp->nfh_len);
8468 			}
8469 			/*
8470 			 * Get an Open structure that will be
8471 			 * attached to the OpenOwner, acquired already.
8472 			 */
8473 			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
8474 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
8475 			    cred, p, NULL, &op, &newone, NULL, 0, false);
8476 			if (error != 0)
8477 				goto nfsmout;
8478 			op->nfso_stateid = stateid;
8479 			newnfs_copyincred(cred, &op->nfso_cred);
8480 
8481 			nfscl_openrelease(nmp, op, error, newone);
8482 			*unlockedp = 1;
8483 
8484 			/* Now, handle the RestoreFH and LayoutGet. */
8485 			if (nd->nd_repstat == 0) {
8486 				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
8487 				*laystatp = fxdr_unsigned(int, *(tl + 3));
8488 				if (*laystatp == 0) {
8489 					error = nfsrv_parselayoutget(nmp, nd,
8490 					    stateidp, retonclosep, flhp);
8491 					if (error != 0)
8492 						*laystatp = error;
8493 				}
8494 				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
8495 				    error);
8496 			} else
8497 				nd->nd_repstat = 0;
8498 		}
8499 	}
8500 	if (nd->nd_repstat != 0 && error == 0)
8501 		error = nd->nd_repstat;
8502 	if (error == NFSERR_STALECLIENTID)
8503 		nfscl_initiate_recovery(owp->nfsow_clp);
8504 nfsmout:
8505 	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
8506 	if (error == 0)
8507 		*dpp = dp;
8508 	else
8509 		free(dp, M_NFSCLDELEG);
8510 	m_freem(nd->nd_mrep);
8511 	return (error);
8512 }
8513 
8514 /*
8515  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
8516  */
8517 static int
8518 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8519     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8520     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8521     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8522     int *dattrflagp, int *unlockedp)
8523 {
8524 	struct nfscllayout *lyp;
8525 	struct nfsclflayouthead flh;
8526 	struct nfsfh *nfhp;
8527 	struct nfsclsession *tsep;
8528 	struct nfsmount *nmp;
8529 	nfsv4stateid_t stateid;
8530 	int error, layoutlen, layouttype, retonclose, laystat;
8531 
8532 	error = 0;
8533 	nmp = VFSTONFS(dvp->v_mount);
8534 	if (NFSHASFLEXFILE(nmp))
8535 		layouttype = NFSLAYOUT_FLEXFILE;
8536 	else
8537 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
8538 	LIST_INIT(&flh);
8539 	tsep = nfsmnt_mdssession(nmp);
8540 	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
8541 	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
8542 	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
8543 	    unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
8544 	    &flh, &laystat);
8545 	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
8546 	    laystat, error);
8547 	lyp = NULL;
8548 	if (laystat == 0) {
8549 		nfhp = *nfhpp;
8550 		laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
8551 		    nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
8552 		    layouttype, laystat, NULL, cred, p);
8553 	} else
8554 		laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
8555 		    retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
8556 		    cred, p);
8557 	if (laystat == 0)
8558 		nfscl_rellayout(lyp, 0);
8559 	return (error);
8560 }
8561 
8562 /*
8563  * Process the results of a layoutget() operation.
8564  */
8565 static int
8566 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8567     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8568     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8569     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8570 {
8571 	struct nfsclflayout *tflp;
8572 	struct nfscldevinfo *dip;
8573 	uint8_t *dev;
8574 	int i, mirrorcnt;
8575 
8576 	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8577 		NFSLOCKMNT(nmp);
8578 		if (!NFSHASFLEXFILE(nmp)) {
8579 			/* Switch to using Flex File Layout. */
8580 			nmp->nm_state |= NFSSTA_FLEXFILE;
8581 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
8582 			/* Disable pNFS. */
8583 			NFSCL_DEBUG(1, "disable PNFS\n");
8584 			nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8585 		}
8586 		NFSUNLOCKMNT(nmp);
8587 	}
8588 	if (laystat == 0) {
8589 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8590 		LIST_FOREACH(tflp, flhp, nfsfl_list) {
8591 			if (layouttype == NFSLAYOUT_FLEXFILE)
8592 				mirrorcnt = tflp->nfsfl_mirrorcnt;
8593 			else
8594 				mirrorcnt = 1;
8595 			for (i = 0; i < mirrorcnt; i++) {
8596 				laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8597 				NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8598 				if (laystat != 0) {
8599 					if (layouttype == NFSLAYOUT_FLEXFILE)
8600 						dev = tflp->nfsfl_ffm[i].dev;
8601 					else
8602 						dev = tflp->nfsfl_dev;
8603 					laystat = nfsrpc_getdeviceinfo(nmp, dev,
8604 					    layouttype, notifybit, &dip, cred,
8605 					    p);
8606 					NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8607 					    laystat);
8608 					if (laystat != 0)
8609 						goto out;
8610 					laystat = nfscl_adddevinfo(nmp, dip, i,
8611 					    tflp);
8612 					if (laystat != 0)
8613 						printf("nfsrpc_layoutgetresout"
8614 						    ": cannot add\n");
8615 				}
8616 			}
8617 		}
8618 	}
8619 out:
8620 	if (laystat == 0) {
8621 		/*
8622 		 * nfscl_layout() always returns with the nfsly_lock
8623 		 * set to a refcnt (shared lock).
8624 		 * Passing in dvp is sufficient, since it is only used to
8625 		 * get the fsid for the file system.
8626 		 */
8627 		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8628 		    layouttype, retonclose, flhp, lypp, cred, p);
8629 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8630 		    laystat);
8631 		if (laystat == 0 && islockedp != NULL)
8632 			*islockedp = 1;
8633 	}
8634 	return (laystat);
8635 }
8636 
8637 /*
8638  * nfs copy_file_range operation.
8639  */
8640 int
8641 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8642     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8643     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8644     struct ucred *cred, bool consecutive, bool *must_commitp)
8645 {
8646 	int commit, error, expireret = 0, retrycnt;
8647 	u_int32_t clidrev = 0;
8648 	struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8649 	struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8650 	nfsv4stateid_t instateid, outstateid;
8651 	void *inlckp, *outlckp;
8652 
8653 	if (nmp->nm_clp != NULL)
8654 		clidrev = nmp->nm_clp->nfsc_clientidrev;
8655 	innfhp = VTONFS(invp)->n_fhp;
8656 	outnfhp = VTONFS(outvp)->n_fhp;
8657 	retrycnt = 0;
8658 	do {
8659 		/* Get both stateids. */
8660 		inlckp = NULL;
8661 		nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8662 		    NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8663 		    &inlckp);
8664 		outlckp = NULL;
8665 		nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8666 		    NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8667 		    &outlckp);
8668 
8669 		error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8670 		    &instateid, &outstateid, innap, inattrflagp, outnap,
8671 		    outattrflagp, consecutive, &commit, cred, curthread);
8672 		if (error == 0) {
8673 			if (commit != NFSWRITE_FILESYNC)
8674 				*must_commitp = true;
8675 			*inoffp += *lenp;
8676 			*outoffp += *lenp;
8677 		} else if (error == NFSERR_STALESTATEID)
8678 			nfscl_initiate_recovery(nmp->nm_clp);
8679 		if (inlckp != NULL)
8680 			nfscl_lockderef(inlckp);
8681 		if (outlckp != NULL)
8682 			nfscl_lockderef(outlckp);
8683 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8684 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8685 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8686 			(void) nfs_catnap(PZERO, error, "nfs_cfr");
8687 		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8688 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
8689 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8690 			    curthread);
8691 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8692 			error = EIO;
8693 		}
8694 		retrycnt++;
8695 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8696 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8697 	      error == NFSERR_STALEDONTRECOVER ||
8698 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8699 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8700 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
8701 	if (error != 0 && (retrycnt >= 4 ||
8702 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8703 	      error == NFSERR_STALEDONTRECOVER))
8704 		error = EIO;
8705 	return (error);
8706 }
8707 
8708 /*
8709  * The copy RPC.
8710  */
8711 static int
8712 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8713     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8714     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8715     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8716     NFSPROC_T *p)
8717 {
8718 	uint32_t *tl, *opcntp;
8719 	int error;
8720 	struct nfsrv_descript nfsd;
8721 	struct nfsrv_descript *nd = &nfsd;
8722 	struct nfsmount *nmp;
8723 	nfsattrbit_t attrbits;
8724 	struct vattr va;
8725 	uint64_t len;
8726 
8727 	nmp = VFSTONFS(invp->v_mount);
8728 	*inattrflagp = *outattrflagp = 0;
8729 	*commitp = NFSWRITE_UNSTABLE;
8730 	len = *lenp;
8731 	*lenp = 0;
8732 	if (len > nfs_maxcopyrange)
8733 		len = nfs_maxcopyrange;
8734 	nfscl_reqstart(nd, NFSPROC_COPY, nmp, VTONFS(invp)->n_fhp->nfh_fh,
8735 	    VTONFS(invp)->n_fhp->nfh_len, &opcntp, NULL, 0, 0, cred);
8736 	/*
8737 	 * First do a Setattr of atime to the server's clock
8738 	 * time.  The FreeBSD "collective" was of the opinion
8739 	 * that setting atime was necessary for this syscall.
8740 	 * Do the Setattr before the Copy, so that it can be
8741 	 * handled well if the server replies NFSERR_DELAY to
8742 	 * the Setattr operation.
8743 	 */
8744 	if ((nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 0) {
8745 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8746 		*tl = txdr_unsigned(NFSV4OP_SETATTR);
8747 		nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8748 		VATTR_NULL(&va);
8749 		va.va_atime.tv_sec = va.va_atime.tv_nsec = 0;
8750 		va.va_vaflags = VA_UTIMES_NULL;
8751 		nfscl_fillsattr(nd, &va, invp, 0, 0);
8752 		/* Bump opcnt from 7 to 8. */
8753 		*opcntp = txdr_unsigned(8);
8754 	}
8755 
8756 	/* Now Getattr the invp attributes. */
8757 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8758 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8759 	NFSGETATTR_ATTRBIT(&attrbits);
8760 	nfsrv_putattrbit(nd, &attrbits);
8761 
8762 	/* Set outvp. */
8763 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8764 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8765 	(void)nfsm_fhtom(nmp, nd, VTONFS(outvp)->n_fhp->nfh_fh,
8766 	    VTONFS(outvp)->n_fhp->nfh_len, 0);
8767 
8768 	/* Do the Copy. */
8769 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8770 	*tl = txdr_unsigned(NFSV4OP_COPY);
8771 	nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8772 	nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8773 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8774 	txdr_hyper(inoff, tl); tl += 2;
8775 	txdr_hyper(outoff, tl); tl += 2;
8776 	txdr_hyper(len, tl); tl += 2;
8777 	if (consecutive)
8778 		*tl++ = newnfs_true;
8779 	else
8780 		*tl++ = newnfs_false;
8781 	*tl++ = newnfs_true;
8782 	*tl++ = 0;
8783 
8784 	/* Get the outvp attributes. */
8785 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8786 	NFSWRITEGETATTR_ATTRBIT(&attrbits);
8787 	nfsrv_putattrbit(nd, &attrbits);
8788 
8789 	error = nfscl_request(nd, invp, p, cred);
8790 	if (error != 0)
8791 		return (error);
8792 	/* Skip over the Setattr reply. */
8793 	if ((nd->nd_flag & ND_NOMOREDATA) == 0 &&
8794 	    (nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 0) {
8795 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8796 		if (*(tl + 1) == 0) {
8797 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8798 			if (error != 0)
8799 				goto nfsmout;
8800 		} else
8801 			nd->nd_flag |= ND_NOMOREDATA;
8802 	}
8803 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8804 		/* Get the input file's attributes. */
8805 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8806 		if (*(tl + 1) == 0) {
8807 			error = nfsm_loadattr(nd, innap);
8808 			if (error != 0)
8809 				goto nfsmout;
8810 			*inattrflagp = 1;
8811 		} else
8812 			nd->nd_flag |= ND_NOMOREDATA;
8813 	}
8814 	/* Skip over return stat for PutFH. */
8815 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8816 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8817 		if (*++tl != 0)
8818 			nd->nd_flag |= ND_NOMOREDATA;
8819 	}
8820 	/* Skip over return stat for Copy. */
8821 	if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8822 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8823 	if (nd->nd_repstat == 0) {
8824 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8825 		if (*tl != 0) {
8826 			/* There should be no callback ids. */
8827 			error = NFSERR_BADXDR;
8828 			goto nfsmout;
8829 		}
8830 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8831 		    NFSX_VERF);
8832 		len = fxdr_hyper(tl); tl += 2;
8833 		*commitp = fxdr_unsigned(int, *tl++);
8834 		NFSLOCKMNT(nmp);
8835 		if (!NFSHASWRITEVERF(nmp)) {
8836 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8837 			NFSSETWRITEVERF(nmp);
8838 	    	} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8839 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8840 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
8841 		}
8842 		NFSUNLOCKMNT(nmp);
8843 		tl += (NFSX_VERF / NFSX_UNSIGNED);
8844 		if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8845 			/* Must be a synchronous copy. */
8846 			nd->nd_repstat = NFSERR_NOTSUPP;
8847 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8848 		error = nfsm_loadattr(nd, outnap);
8849 		if (error == 0)
8850 			*outattrflagp = NFS_LATTR_NOSHRINK;
8851 		if (nd->nd_repstat == 0)
8852 			*lenp = len;
8853 	} else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8854 		/*
8855 		 * For the case where consecutive is not supported, but
8856 		 * synchronous is supported, we can try consecutive == false
8857 		 * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8858 		 * since Copy cannot be done.
8859 		 */
8860 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8861 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8862 			if (!consecutive || *++tl == newnfs_false)
8863 				nd->nd_repstat = NFSERR_NOTSUPP;
8864 		} else
8865 			nd->nd_repstat = NFSERR_BADXDR;
8866 	}
8867 	if (error == 0)
8868 		error = nd->nd_repstat;
8869 nfsmout:
8870 	m_freem(nd->nd_mrep);
8871 	return (error);
8872 }
8873 
8874 /*
8875  * Seek operation.
8876  */
8877 int
8878 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8879     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8880 {
8881 	int error, expireret = 0, retrycnt;
8882 	u_int32_t clidrev = 0;
8883 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8884 	struct nfsnode *np = VTONFS(vp);
8885 	struct nfsfh *nfhp = NULL;
8886 	nfsv4stateid_t stateid;
8887 	void *lckp;
8888 
8889 	if (nmp->nm_clp != NULL)
8890 		clidrev = nmp->nm_clp->nfsc_clientidrev;
8891 	nfhp = np->n_fhp;
8892 	retrycnt = 0;
8893 	do {
8894 		lckp = NULL;
8895 		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8896 		    NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8897 		error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8898 		    nap, attrflagp, cred);
8899 		if (error == NFSERR_STALESTATEID)
8900 			nfscl_initiate_recovery(nmp->nm_clp);
8901 		if (lckp != NULL)
8902 			nfscl_lockderef(lckp);
8903 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8904 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8905 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8906 			(void) nfs_catnap(PZERO, error, "nfs_seek");
8907 		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8908 		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
8909 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8910 			    curthread);
8911 		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8912 			error = EIO;
8913 		}
8914 		retrycnt++;
8915 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8916 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8917 	    error == NFSERR_BADSESSION ||
8918 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8919 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8920 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8921 	    (error == NFSERR_OPENMODE && retrycnt < 4));
8922 	if (error && retrycnt >= 4)
8923 		error = EIO;
8924 	return (error);
8925 }
8926 
8927 /*
8928  * The seek RPC.
8929  */
8930 static int
8931 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8932     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8933 {
8934 	uint32_t *tl;
8935 	int error;
8936 	struct nfsrv_descript nfsd;
8937 	struct nfsrv_descript *nd = &nfsd;
8938 	nfsattrbit_t attrbits;
8939 
8940 	*attrflagp = 0;
8941 	NFSCL_REQSTART(nd, NFSPROC_SEEK, vp, cred);
8942 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8943 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8944 	txdr_hyper(*offp, tl); tl += 2;
8945 	*tl++ = txdr_unsigned(content);
8946 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8947 	NFSGETATTR_ATTRBIT(&attrbits);
8948 	nfsrv_putattrbit(nd, &attrbits);
8949 	error = nfscl_request(nd, vp, curthread, cred);
8950 	if (error != 0)
8951 		return (error);
8952 	if (nd->nd_repstat == 0) {
8953 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8954 		if (*tl++ == newnfs_true)
8955 			*eofp = true;
8956 		else
8957 			*eofp = false;
8958 		*offp = fxdr_hyper(tl);
8959 		/* Just skip over Getattr op status. */
8960 		error = nfsm_loadattr(nd, nap);
8961 		if (error == 0)
8962 			*attrflagp = 1;
8963 	}
8964 	error = nd->nd_repstat;
8965 nfsmout:
8966 	m_freem(nd->nd_mrep);
8967 	return (error);
8968 }
8969 
8970 /*
8971  * The getextattr RPC.
8972  */
8973 int
8974 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8975     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8976 {
8977 	uint32_t *tl;
8978 	int error;
8979 	struct nfsrv_descript nfsd;
8980 	struct nfsrv_descript *nd = &nfsd;
8981 	nfsattrbit_t attrbits;
8982 	uint32_t len, len2;
8983 
8984 	*attrflagp = 0;
8985 	NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp, cred);
8986 	nfsm_strtom(nd, name, strlen(name));
8987 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8988 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8989 	NFSGETATTR_ATTRBIT(&attrbits);
8990 	nfsrv_putattrbit(nd, &attrbits);
8991 	error = nfscl_request(nd, vp, p, cred);
8992 	if (error != 0)
8993 		return (error);
8994 	if (nd->nd_repstat == 0) {
8995 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8996 		len = fxdr_unsigned(uint32_t, *tl);
8997 		/* Sanity check lengths. */
8998 		if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
8999 		    uiop->uio_resid <= UINT32_MAX) {
9000 			len2 = uiop->uio_resid;
9001 			if (len2 >= len)
9002 				error = nfsm_mbufuio(nd, uiop, len);
9003 			else {
9004 				error = nfsm_mbufuio(nd, uiop, len2);
9005 				if (error == 0) {
9006 					/*
9007 					 * nfsm_mbufuio() advances to a multiple
9008 					 * of 4, so round up len2 as well.  Then
9009 					 * we need to advance over the rest of
9010 					 * the data, rounding up the remaining
9011 					 * length.
9012 					 */
9013 					len2 = NFSM_RNDUP(len2);
9014 					len2 = NFSM_RNDUP(len - len2);
9015 					if (len2 > 0)
9016 						error = nfsm_advance(nd, len2,
9017 						    -1);
9018 				}
9019 			}
9020 		} else if (uiop == NULL && len > 0) {
9021 			/* Just wants the length and not the data. */
9022 			error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9023 		} else if (len > 0)
9024 			error = ENOATTR;
9025 		if (error != 0)
9026 			goto nfsmout;
9027 		*lenp = len;
9028 		/* Just skip over Getattr op status. */
9029 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9030 		error = nfsm_loadattr(nd, nap);
9031 		if (error == 0)
9032 			*attrflagp = 1;
9033 	}
9034 	if (error == 0)
9035 		error = nd->nd_repstat;
9036 nfsmout:
9037 	m_freem(nd->nd_mrep);
9038 	return (error);
9039 }
9040 
9041 /*
9042  * The setextattr RPC.
9043  */
9044 int
9045 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
9046     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
9047 {
9048 	uint32_t *tl;
9049 	int error;
9050 	struct nfsrv_descript nfsd;
9051 	struct nfsrv_descript *nd = &nfsd;
9052 	nfsattrbit_t attrbits;
9053 
9054 	*attrflagp = 0;
9055 	NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp, cred);
9056 	if (uiop->uio_resid > nd->nd_maxreq) {
9057 		/* nd_maxreq is set by NFSCL_REQSTART(). */
9058 		m_freem(nd->nd_mreq);
9059 		return (EINVAL);
9060 	}
9061 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9062 	*tl = txdr_unsigned(NFSV4SXATTR_EITHER);
9063 	nfsm_strtom(nd, name, strlen(name));
9064 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9065 	*tl = txdr_unsigned(uiop->uio_resid);
9066 	error = nfsm_uiombuf(nd, uiop, uiop->uio_resid);
9067 	if (error != 0) {
9068 		m_freem(nd->nd_mreq);
9069 		return (error);
9070 	}
9071 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9072 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
9073 	NFSGETATTR_ATTRBIT(&attrbits);
9074 	nfsrv_putattrbit(nd, &attrbits);
9075 	error = nfscl_request(nd, vp, p, cred);
9076 	if (error != 0)
9077 		return (error);
9078 	if (nd->nd_repstat == 0) {
9079 		/* Just skip over the reply and Getattr op status. */
9080 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
9081 		    NFSX_UNSIGNED);
9082 		error = nfsm_loadattr(nd, nap);
9083 		if (error == 0)
9084 			*attrflagp = 1;
9085 	}
9086 	if (error == 0)
9087 		error = nd->nd_repstat;
9088 nfsmout:
9089 	m_freem(nd->nd_mrep);
9090 	return (error);
9091 }
9092 
9093 /*
9094  * The removeextattr RPC.
9095  */
9096 int
9097 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
9098     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
9099 {
9100 	uint32_t *tl;
9101 	int error;
9102 	struct nfsrv_descript nfsd;
9103 	struct nfsrv_descript *nd = &nfsd;
9104 	nfsattrbit_t attrbits;
9105 
9106 	*attrflagp = 0;
9107 	NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp, cred);
9108 	nfsm_strtom(nd, name, strlen(name));
9109 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9110 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
9111 	NFSGETATTR_ATTRBIT(&attrbits);
9112 	nfsrv_putattrbit(nd, &attrbits);
9113 	error = nfscl_request(nd, vp, p, cred);
9114 	if (error != 0)
9115 		return (error);
9116 	if (nd->nd_repstat == 0) {
9117 		/* Just skip over the reply and Getattr op status. */
9118 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
9119 		    NFSX_UNSIGNED);
9120 		error = nfsm_loadattr(nd, nap);
9121 		if (error == 0)
9122 			*attrflagp = 1;
9123 	}
9124 	if (error == 0)
9125 		error = nd->nd_repstat;
9126 nfsmout:
9127 	m_freem(nd->nd_mrep);
9128 	return (error);
9129 }
9130 
9131 /*
9132  * The listextattr RPC.
9133  */
9134 int
9135 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
9136     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
9137     struct ucred *cred, NFSPROC_T *p)
9138 {
9139 	uint32_t *tl;
9140 	int cnt, error, i, len;
9141 	struct nfsrv_descript nfsd;
9142 	struct nfsrv_descript *nd = &nfsd;
9143 	nfsattrbit_t attrbits;
9144 	u_char c;
9145 
9146 	*attrflagp = 0;
9147 	NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp, cred);
9148 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
9149 	txdr_hyper(*cookiep, tl); tl += 2;
9150 	*tl++ = txdr_unsigned(*lenp);
9151 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
9152 	NFSGETATTR_ATTRBIT(&attrbits);
9153 	nfsrv_putattrbit(nd, &attrbits);
9154 	error = nfscl_request(nd, vp, p, cred);
9155 	if (error != 0)
9156 		return (error);
9157 	*eofp = true;
9158 	*lenp = 0;
9159 	if (nd->nd_repstat == 0) {
9160 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
9161 		*cookiep = fxdr_hyper(tl); tl += 2;
9162 		cnt = fxdr_unsigned(int, *tl);
9163 		if (cnt < 0) {
9164 			error = EBADRPC;
9165 			goto nfsmout;
9166 		}
9167 		for (i = 0; i < cnt; i++) {
9168 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
9169 			len = fxdr_unsigned(int, *tl);
9170 			if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
9171 				error = EBADRPC;
9172 				goto nfsmout;
9173 			}
9174 			if (uiop == NULL)
9175 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9176 			else if (uiop->uio_resid >= len + 1) {
9177 				c = len;
9178 				error = uiomove(&c, sizeof(c), uiop);
9179 				if (error == 0)
9180 					error = nfsm_mbufuio(nd, uiop, len);
9181 			} else {
9182 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9183 				*eofp = false;
9184 			}
9185 			if (error != 0)
9186 				goto nfsmout;
9187 			*lenp += (len + 1);
9188 		}
9189 		/* Get the eof and skip over the Getattr op status. */
9190 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
9191 		/*
9192 		 * *eofp is set false above, because it wasn't able to copy
9193 		 * all of the reply.
9194 		 */
9195 		if (*eofp && *tl == 0)
9196 			*eofp = false;
9197 		error = nfsm_loadattr(nd, nap);
9198 		if (error == 0)
9199 			*attrflagp = 1;
9200 	}
9201 	if (error == 0)
9202 		error = nd->nd_repstat;
9203 nfsmout:
9204 	m_freem(nd->nd_mrep);
9205 	return (error);
9206 }
9207 
9208 /*
9209  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
9210  */
9211 static struct mbuf *
9212 nfsm_split(struct mbuf *mp, uint64_t xfer)
9213 {
9214 	struct mbuf *m, *m2;
9215 	vm_page_t pg;
9216 	int i, j, left, pgno, plen, trim;
9217 	char *cp, *cp2;
9218 
9219 	if ((mp->m_flags & M_EXTPG) == 0) {
9220 		m = m_split(mp, xfer, M_WAITOK);
9221 		return (m);
9222 	}
9223 
9224 	/* Find the correct mbuf to split at. */
9225 	for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
9226 		xfer -= m->m_len;
9227 	if (m == NULL)
9228 		return (NULL);
9229 
9230 	/* If xfer == m->m_len, we can just split the mbuf list. */
9231 	if (xfer == m->m_len) {
9232 		m2 = m->m_next;
9233 		m->m_next = NULL;
9234 		return (m2);
9235 	}
9236 
9237 	/* Find the page to split at. */
9238 	pgno = 0;
9239 	left = xfer;
9240 	do {
9241 		if (pgno == 0)
9242 			plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
9243 		else
9244 			plen = m_epg_pagelen(m, pgno, 0);
9245 		if (left <= plen)
9246 			break;
9247 		left -= plen;
9248 		pgno++;
9249 	} while (pgno < m->m_epg_npgs);
9250 	if (pgno == m->m_epg_npgs)
9251 		panic("nfsm_split: eroneous ext_pgs mbuf");
9252 
9253 	m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
9254 	m2->m_epg_flags |= EPG_FLAG_ANON;
9255 
9256 	/*
9257 	 * If left < plen, allocate a new page for the new mbuf
9258 	 * and copy the data after left in the page to this new
9259 	 * page.
9260 	 */
9261 	if (left < plen) {
9262 		pg = vm_page_alloc_noobj(VM_ALLOC_WAITOK | VM_ALLOC_NODUMP |
9263 		    VM_ALLOC_WIRED);
9264 		m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
9265 		m2->m_epg_npgs = 1;
9266 
9267 		/* Copy the data after left to the new page. */
9268 		trim = plen - left;
9269 		cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
9270 		if (pgno == 0)
9271 			cp += m->m_epg_1st_off;
9272 		cp += left;
9273 		cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
9274 		if (pgno == m->m_epg_npgs - 1)
9275 			m2->m_epg_last_len = trim;
9276 		else {
9277 			cp2 += PAGE_SIZE - trim;
9278 			m2->m_epg_1st_off = PAGE_SIZE - trim;
9279 			m2->m_epg_last_len = m->m_epg_last_len;
9280 		}
9281 		memcpy(cp2, cp, trim);
9282 		m2->m_len = trim;
9283 	} else {
9284 		m2->m_len = 0;
9285 		m2->m_epg_last_len = m->m_epg_last_len;
9286 	}
9287 
9288 	/* Move the pages beyond pgno to the new mbuf. */
9289 	for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
9290 		m2->m_epg_pa[j] = m->m_epg_pa[i];
9291 		/* Never moves page 0. */
9292 		m2->m_len += m_epg_pagelen(m, i, 0);
9293 	}
9294 	m2->m_epg_npgs = j;
9295 	m->m_epg_npgs = pgno + 1;
9296 	m->m_epg_last_len = left;
9297 	m->m_len = xfer;
9298 
9299 	m2->m_next = m->m_next;
9300 	m->m_next = NULL;
9301 	return (m2);
9302 }
9303 
9304 /*
9305  * Do the NFSv4.1 Bind Connection to Session.
9306  * Called from the reconnect layer of the krpc (sys/rpc/clnt_rc.c).
9307  */
9308 void
9309 nfsrpc_bindconnsess(CLIENT *cl, void *arg, struct ucred *cr)
9310 {
9311 	struct nfscl_reconarg *rcp = (struct nfscl_reconarg *)arg;
9312 	uint32_t res, *tl;
9313 	struct nfsrv_descript nfsd;
9314 	struct nfsrv_descript *nd = &nfsd;
9315 	struct rpc_callextra ext;
9316 	struct timeval utimeout;
9317 	enum clnt_stat stat;
9318 	int error;
9319 
9320 	nfscl_reqstart(nd, NFSPROC_BINDCONNTOSESS, NULL, NULL, 0, NULL, NULL,
9321 	    NFS_VER4, rcp->minorvers, NULL);
9322 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED);
9323 	memcpy(tl, rcp->sessionid, NFSX_V4SESSIONID);
9324 	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9325 	*tl++ = txdr_unsigned(NFSCDFC4_FORE_OR_BOTH);
9326 	*tl = newnfs_false;
9327 
9328 	memset(&ext, 0, sizeof(ext));
9329 	utimeout.tv_sec = 30;
9330 	utimeout.tv_usec = 0;
9331 	ext.rc_auth = authunix_create(cr);
9332 	nd->nd_mrep = NULL;
9333 	stat = CLNT_CALL_MBUF(cl, &ext, NFSV4PROC_COMPOUND, nd->nd_mreq,
9334 	    &nd->nd_mrep, utimeout);
9335 	AUTH_DESTROY(ext.rc_auth);
9336 	if (stat != RPC_SUCCESS) {
9337 		printf("nfsrpc_bindconnsess: call failed stat=%d\n", stat);
9338 		return;
9339 	}
9340 	if (nd->nd_mrep == NULL) {
9341 		printf("nfsrpc_bindconnsess: no reply args\n");
9342 		return;
9343 	}
9344 	error = 0;
9345 	newnfs_realign(&nd->nd_mrep, M_WAITOK);
9346 	nd->nd_md = nd->nd_mrep;
9347 	nd->nd_dpos = mtod(nd->nd_md, char *);
9348 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9349 	nd->nd_repstat = fxdr_unsigned(uint32_t, *tl++);
9350 	if (nd->nd_repstat == NFSERR_OK) {
9351 		res = fxdr_unsigned(uint32_t, *tl);
9352 		if (res > 0 && (error = nfsm_advance(nd, NFSM_RNDUP(res),
9353 		    -1)) != 0)
9354 			goto nfsmout;
9355 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
9356 		    4 * NFSX_UNSIGNED);
9357 		tl += 3;
9358 		if (!NFSBCMP(tl, rcp->sessionid, NFSX_V4SESSIONID)) {
9359 			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9360 			res = fxdr_unsigned(uint32_t, *tl);
9361 			if (res != NFSCDFS4_BOTH)
9362 				printf("nfsrpc_bindconnsess: did not "
9363 				    "return FS4_BOTH\n");
9364 		} else
9365 			printf("nfsrpc_bindconnsess: not same "
9366 			    "sessionid\n");
9367 	} else if (nd->nd_repstat != NFSERR_BADSESSION)
9368 		printf("nfsrpc_bindconnsess: returned %d\n", nd->nd_repstat);
9369 nfsmout:
9370 	if (error != 0)
9371 		printf("nfsrpc_bindconnsess: reply bad xdr\n");
9372 	m_freem(nd->nd_mrep);
9373 }
9374 
9375 /*
9376  * Do roughly what nfs_statfs() does for NFSv4, but when called with a shared
9377  * locked vnode.
9378  */
9379 static void
9380 nfscl_statfs(struct vnode *vp, struct ucred *cred, NFSPROC_T *td)
9381 {
9382 	struct nfsvattr nfsva;
9383 	struct nfsfsinfo fs;
9384 	struct nfsstatfs sb;
9385 	struct mount *mp;
9386 	struct nfsmount *nmp;
9387 	uint32_t lease;
9388 	int attrflag, error;
9389 
9390 	mp = vp->v_mount;
9391 	nmp = VFSTONFS(mp);
9392 	error = nfsrpc_statfs(vp, &sb, &fs, &lease, cred, td, &nfsva,
9393 	    &attrflag);
9394 	if (attrflag != 0)
9395 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
9396 	if (error == 0) {
9397 		NFSLOCKCLSTATE();
9398 		if (nmp->nm_clp != NULL)
9399 			nmp->nm_clp->nfsc_renew = NFSCL_RENEW(lease);
9400 		NFSUNLOCKCLSTATE();
9401 		mtx_lock(&nmp->nm_mtx);
9402 		nfscl_loadfsinfo(nmp, &fs);
9403 		nfscl_loadsbinfo(nmp, &sb, &mp->mnt_stat);
9404 		mp->mnt_stat.f_iosize = newnfs_iosize(nmp);
9405 		mtx_unlock(&nmp->nm_mtx);
9406 	}
9407 }
9408