xref: /freebsd/sys/fs/nfsclient/nfs_clstate.c (revision 33b8c039a960bcff3471baf5929558c4d1500727)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009 Rick Macklem, University of Guelph
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 /*
34  * These functions implement the client side state handling for NFSv4.
35  * NFSv4 state handling:
36  * - A lockowner is used to determine lock contention, so it
37  *   corresponds directly to a Posix pid. (1 to 1 mapping)
38  * - The correct granularity of an OpenOwner is not nearly so
39  *   obvious. An OpenOwner does the following:
40  *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
41  *   - is used to check for Open/Share contention (not applicable to
42  *     this client, since all Opens are Deny_None)
43  *   As such, I considered both extreme.
44  *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
45  *   all Open, Close and Lock (with a new lockowner) Ops.
46  *   1 OpenOwner for each Open - This one results in an OpenConfirm for
47  *   every Open, for most servers.
48  *   So, I chose to use the same mapping as I did for LockOwnwers.
49  *   The main concern here is that you can end up with multiple Opens
50  *   for the same File Handle, but on different OpenOwners (opens
51  *   inherited from parents, grandparents...) and you do not know
52  *   which of these the vnodeop close applies to. This is handled by
53  *   delaying the Close Op(s) until all of the Opens have been closed.
54  *   (It is not yet obvious if this is the correct granularity.)
55  * - How the code handles serialization:
56  *   - For the ClientId, it uses an exclusive lock while getting its
57  *     SetClientId and during recovery. Otherwise, it uses a shared
58  *     lock via a reference count.
59  *   - For the rest of the data structures, it uses an SMP mutex
60  *     (once the nfs client is SMP safe) and doesn't sleep while
61  *     manipulating the linked lists.
62  *   - The serialization of Open/Close/Lock/LockU falls out in the
63  *     "wash", since OpenOwners and LockOwners are both mapped from
64  *     Posix pid. In other words, there is only one Posix pid using
65  *     any given owner, so that owner is serialized. (If you change
66  *     the granularity of the OpenOwner, then code must be added to
67  *     serialize Ops on the OpenOwner.)
68  * - When to get rid of OpenOwners and LockOwners.
69  *   - The function nfscl_cleanup_common() is executed after a process exits.
70  *     It goes through the client list looking for all Open and Lock Owners.
71  *     When one is found, it is marked "defunct" or in the case of
72  *     an OpenOwner without any Opens, freed.
73  *     The renew thread scans for defunct Owners and gets rid of them,
74  *     if it can. The LockOwners will also be deleted when the
75  *     associated Open is closed.
76  *   - If the LockU or Close Op(s) fail during close in a way
77  *     that could be recovered upon retry, they are relinked to the
78  *     ClientId's defunct open list and retried by the renew thread
79  *     until they succeed or an unmount/recovery occurs.
80  *     (Since we are done with them, they do not need to be recovered.)
81  */
82 
83 #include <fs/nfs/nfsport.h>
84 
85 /*
86  * Global variables
87  */
88 extern struct nfsstatsv1 nfsstatsv1;
89 extern struct nfsreqhead nfsd_reqq;
90 extern u_int32_t newnfs_false, newnfs_true;
91 extern int nfscl_debuglevel;
92 extern int nfscl_enablecallb;
93 extern int nfs_numnfscbd;
94 NFSREQSPINLOCK;
95 NFSCLSTATEMUTEX;
96 int nfscl_inited = 0;
97 struct nfsclhead nfsclhead;	/* Head of clientid list */
98 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99 int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100 
101 static int nfscl_delegcnt = 0;
102 static int nfscl_layoutcnt = 0;
103 static int nfscl_getopen(struct nfsclownerhead *, struct nfsclopenhash *,
104     u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t,
105     struct nfscllockowner **, struct nfsclopen **);
106 static bool nfscl_checkown(struct nfsclowner *, struct nfsclopen *, uint8_t *,
107     uint8_t *, struct nfscllockowner **, struct nfsclopen **,
108     struct nfsclopen **);
109 static void nfscl_clrelease(struct nfsclclient *);
110 static void nfscl_cleanclient(struct nfsclclient *);
111 static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
112     struct ucred *, NFSPROC_T *);
113 static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
114     struct nfsmount *, struct ucred *, NFSPROC_T *);
115 static void nfscl_recover(struct nfsclclient *, bool *, struct ucred *,
116     NFSPROC_T *);
117 static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
118     struct nfscllock *, int);
119 static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
120     struct nfscllock **, int);
121 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
122 static u_int32_t nfscl_nextcbident(void);
123 static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
124 static struct nfsclclient *nfscl_getclnt(u_int32_t);
125 static struct nfsclclient *nfscl_getclntsess(uint8_t *);
126 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
127     int);
128 static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
129     int, struct nfsclrecalllayout **);
130 static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
131 static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
132     int);
133 static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
134 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
135     u_int8_t *, struct nfscllock **);
136 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
137 static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
138     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
139 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
140     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
141     struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
142 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
143     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
144     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
145 static void nfscl_totalrecall(struct nfsclclient *);
146 static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
147     struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
148 static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
149     u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
150     struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
151 static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
152     int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
153     struct ucred *, NFSPROC_T *);
154 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
155     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
156 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *,
157     bool);
158 static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
159 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
160 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
161     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
162     vnode_t *);
163 static void nfscl_freeopenowner(struct nfsclowner *, int);
164 static void nfscl_cleandeleg(struct nfscldeleg *);
165 static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
166     struct nfsmount *, NFSPROC_T *);
167 static void nfscl_emptylockowner(struct nfscllockowner *,
168     struct nfscllockownerfhhead *);
169 static void nfscl_mergeflayouts(struct nfsclflayouthead *,
170     struct nfsclflayouthead *);
171 static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
172     uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
173 static int nfscl_seq(uint32_t, uint32_t);
174 static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
175     struct ucred *, NFSPROC_T *);
176 static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
177     struct ucred *, NFSPROC_T *);
178 
179 static short nfscberr_null[] = {
180 	0,
181 	0,
182 };
183 
184 static short nfscberr_getattr[] = {
185 	NFSERR_RESOURCE,
186 	NFSERR_BADHANDLE,
187 	NFSERR_BADXDR,
188 	NFSERR_RESOURCE,
189 	NFSERR_SERVERFAULT,
190 	0,
191 };
192 
193 static short nfscberr_recall[] = {
194 	NFSERR_RESOURCE,
195 	NFSERR_BADHANDLE,
196 	NFSERR_BADSTATEID,
197 	NFSERR_BADXDR,
198 	NFSERR_RESOURCE,
199 	NFSERR_SERVERFAULT,
200 	0,
201 };
202 
203 static short *nfscl_cberrmap[] = {
204 	nfscberr_null,
205 	nfscberr_null,
206 	nfscberr_null,
207 	nfscberr_getattr,
208 	nfscberr_recall
209 };
210 
211 #define	NETFAMILY(clp) \
212 		(((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
213 
214 /*
215  * Called for an open operation.
216  * If the nfhp argument is NULL, just get an openowner.
217  */
218 int
219 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
220     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
221     struct nfsclopen **opp, int *newonep, int *retp, int lockit)
222 {
223 	struct nfsclclient *clp;
224 	struct nfsclowner *owp, *nowp;
225 	struct nfsclopen *op = NULL, *nop = NULL;
226 	struct nfscldeleg *dp;
227 	struct nfsclownerhead *ohp;
228 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
229 	int ret;
230 
231 	if (newonep != NULL)
232 		*newonep = 0;
233 	if (opp != NULL)
234 		*opp = NULL;
235 	if (owpp != NULL)
236 		*owpp = NULL;
237 
238 	/*
239 	 * Might need one or both of these, so MALLOC them now, to
240 	 * avoid a tsleep() in MALLOC later.
241 	 */
242 	nowp = malloc(sizeof (struct nfsclowner),
243 	    M_NFSCLOWNER, M_WAITOK);
244 	if (nfhp != NULL) {
245 	    nop = malloc(sizeof (struct nfsclopen) +
246 		fhlen - 1, M_NFSCLOPEN, M_WAITOK);
247 	    nop->nfso_hash.le_prev = NULL;
248 	}
249 	ret = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
250 	if (ret != 0) {
251 		free(nowp, M_NFSCLOWNER);
252 		if (nop != NULL)
253 			free(nop, M_NFSCLOPEN);
254 		return (ret);
255 	}
256 
257 	/*
258 	 * Get the Open iff it already exists.
259 	 * If none found, add the new one or return error, depending upon
260 	 * "create".
261 	 */
262 	NFSLOCKCLSTATE();
263 	dp = NULL;
264 	/* First check the delegation list */
265 	if (nfhp != NULL && usedeleg) {
266 		LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
267 			if (dp->nfsdl_fhlen == fhlen &&
268 			    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
269 				if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
270 				    (dp->nfsdl_flags & NFSCLDL_WRITE))
271 					break;
272 				dp = NULL;
273 				break;
274 			}
275 		}
276 	}
277 
278 	/* For NFSv4.1/4.2 and this option, use a single open_owner. */
279 	if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
280 		nfscl_filllockowner(NULL, own, F_POSIX);
281 	else
282 		nfscl_filllockowner(p->td_proc, own, F_POSIX);
283 	if (dp != NULL)
284 		ohp = &dp->nfsdl_owner;
285 	else
286 		ohp = &clp->nfsc_owner;
287 	/* Now, search for an openowner */
288 	LIST_FOREACH(owp, ohp, nfsow_list) {
289 		if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
290 			break;
291 	}
292 
293 	/*
294 	 * Create a new open, as required.
295 	 */
296 	nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
297 	    cred, newonep);
298 
299 	/*
300 	 * Now, check the mode on the open and return the appropriate
301 	 * value.
302 	 */
303 	if (retp != NULL) {
304 		if (nfhp != NULL && dp != NULL && nop == NULL)
305 			/* new local open on delegation */
306 			*retp = NFSCLOPEN_SETCRED;
307 		else
308 			*retp = NFSCLOPEN_OK;
309 	}
310 	if (op != NULL && (amode & ~(op->nfso_mode))) {
311 		op->nfso_mode |= amode;
312 		if (retp != NULL && dp == NULL)
313 			*retp = NFSCLOPEN_DOOPEN;
314 	}
315 
316 	/*
317 	 * Serialize modifications to the open owner for multiple threads
318 	 * within the same process using a read/write sleep lock.
319 	 * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
320 	 * by acquiring a shared lock.  The close operations still use an
321 	 * exclusive lock for this case.
322 	 */
323 	if (lockit != 0) {
324 		if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount))) {
325 			/*
326 			 * Get a shared lock on the OpenOwner, but first
327 			 * wait for any pending exclusive lock, so that the
328 			 * exclusive locker gets priority.
329 			 */
330 			nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
331 			    NFSCLSTATEMUTEXPTR, NULL);
332 			nfsv4_getref(&owp->nfsow_rwlock, NULL,
333 			    NFSCLSTATEMUTEXPTR, NULL);
334 		} else
335 			nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
336 	}
337 	NFSUNLOCKCLSTATE();
338 	if (nowp != NULL)
339 		free(nowp, M_NFSCLOWNER);
340 	if (nop != NULL)
341 		free(nop, M_NFSCLOPEN);
342 	if (owpp != NULL)
343 		*owpp = owp;
344 	if (opp != NULL)
345 		*opp = op;
346 	return (0);
347 }
348 
349 /*
350  * Create a new open, as required.
351  */
352 static void
353 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
354     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
355     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
356     struct ucred *cred, int *newonep)
357 {
358 	struct nfsclowner *owp = *owpp, *nowp;
359 	struct nfsclopen *op, *nop;
360 
361 	if (nowpp != NULL)
362 		nowp = *nowpp;
363 	else
364 		nowp = NULL;
365 	if (nopp != NULL)
366 		nop = *nopp;
367 	else
368 		nop = NULL;
369 	if (owp == NULL && nowp != NULL) {
370 		NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
371 		LIST_INIT(&nowp->nfsow_open);
372 		nowp->nfsow_clp = clp;
373 		nowp->nfsow_seqid = 0;
374 		nowp->nfsow_defunct = 0;
375 		nfscl_lockinit(&nowp->nfsow_rwlock);
376 		if (dp != NULL) {
377 			nfsstatsv1.cllocalopenowners++;
378 			LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
379 		} else {
380 			nfsstatsv1.clopenowners++;
381 			LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
382 		}
383 		owp = *owpp = nowp;
384 		*nowpp = NULL;
385 		if (newonep != NULL)
386 			*newonep = 1;
387 	}
388 
389 	 /* If an fhp has been specified, create an Open as well. */
390 	if (fhp != NULL) {
391 		/* and look for the correct open, based upon FH */
392 		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
393 			if (op->nfso_fhlen == fhlen &&
394 			    !NFSBCMP(op->nfso_fh, fhp, fhlen))
395 				break;
396 		}
397 		if (op == NULL && nop != NULL) {
398 			nop->nfso_own = owp;
399 			nop->nfso_mode = 0;
400 			nop->nfso_opencnt = 0;
401 			nop->nfso_posixlock = 1;
402 			nop->nfso_fhlen = fhlen;
403 			NFSBCOPY(fhp, nop->nfso_fh, fhlen);
404 			LIST_INIT(&nop->nfso_lock);
405 			nop->nfso_stateid.seqid = 0;
406 			nop->nfso_stateid.other[0] = 0;
407 			nop->nfso_stateid.other[1] = 0;
408 			nop->nfso_stateid.other[2] = 0;
409 			KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
410 			newnfs_copyincred(cred, &nop->nfso_cred);
411 			if (dp != NULL) {
412 				TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
413 				TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
414 				    nfsdl_list);
415 				dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
416 				nfsstatsv1.cllocalopens++;
417 			} else {
418 				LIST_INSERT_HEAD(NFSCLOPENHASH(clp, fhp, fhlen),
419 				    nop, nfso_hash);
420 				nfsstatsv1.clopens++;
421 			}
422 			LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
423 			*opp = nop;
424 			*nopp = NULL;
425 			if (newonep != NULL)
426 				*newonep = 1;
427 		} else {
428 			*opp = op;
429 		}
430 	}
431 }
432 
433 /*
434  * Called to find/add a delegation to a client.
435  */
436 int
437 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
438     int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
439 {
440 	struct nfscldeleg *dp = *dpp, *tdp;
441 	struct nfsmount *nmp;
442 
443 	KASSERT(mp != NULL, ("nfscl_deleg: mp NULL"));
444 	nmp = VFSTONFS(mp);
445 	/*
446 	 * First, if we have received a Read delegation for a file on a
447 	 * read/write file system, just return it, because they aren't
448 	 * useful, imho.
449 	 */
450 	if (dp != NULL && !NFSMNT_RDONLY(mp) &&
451 	    (dp->nfsdl_flags & NFSCLDL_READ)) {
452 		nfscl_trydelegreturn(dp, cred, nmp, p);
453 		free(dp, M_NFSCLDELEG);
454 		*dpp = NULL;
455 		return (0);
456 	}
457 
458 	/*
459 	 * Since a delegation might be added to the mount,
460 	 * set NFSMNTP_DELEGISSUED now.  If a delegation already
461 	 * exagain ists, setting this flag is harmless.
462 	 */
463 	NFSLOCKMNT(nmp);
464 	nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
465 	NFSUNLOCKMNT(nmp);
466 
467 	/* Look for the correct deleg, based upon FH */
468 	NFSLOCKCLSTATE();
469 	tdp = nfscl_finddeleg(clp, nfhp, fhlen);
470 	if (tdp == NULL) {
471 		if (dp == NULL) {
472 			NFSUNLOCKCLSTATE();
473 			return (NFSERR_BADSTATEID);
474 		}
475 		*dpp = NULL;
476 		TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
477 		LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
478 		    nfsdl_hash);
479 		dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
480 		nfsstatsv1.cldelegates++;
481 		nfscl_delegcnt++;
482 	} else {
483 		/*
484 		 * Delegation already exists, what do we do if a new one??
485 		 */
486 		if (dp != NULL) {
487 			printf("Deleg already exists!\n");
488 			free(dp, M_NFSCLDELEG);
489 			*dpp = NULL;
490 		} else {
491 			*dpp = tdp;
492 		}
493 	}
494 	NFSUNLOCKCLSTATE();
495 	return (0);
496 }
497 
498 /*
499  * Find a delegation for this file handle. Return NULL upon failure.
500  */
501 static struct nfscldeleg *
502 nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
503 {
504 	struct nfscldeleg *dp;
505 
506 	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
507 	    if (dp->nfsdl_fhlen == fhlen &&
508 		!NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
509 		break;
510 	}
511 	return (dp);
512 }
513 
514 /*
515  * Get a stateid for an I/O operation. First, look for an open and iff
516  * found, return either a lockowner stateid or the open stateid.
517  * If no Open is found, just return error and the special stateid of all zeros.
518  */
519 int
520 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
521     int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
522     void **lckpp)
523 {
524 	struct nfsclclient *clp;
525 	struct nfsclopen *op = NULL, *top;
526 	struct nfsclopenhash *oph;
527 	struct nfscllockowner *lp;
528 	struct nfscldeleg *dp;
529 	struct nfsnode *np;
530 	struct nfsmount *nmp;
531 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
532 	int error;
533 	bool done;
534 
535 	*lckpp = NULL;
536 	/*
537 	 * Initially, just set the special stateid of all zeros.
538 	 * (Don't do this for a DS, since the special stateid can't be used.)
539 	 */
540 	if (fords == 0) {
541 		stateidp->seqid = 0;
542 		stateidp->other[0] = 0;
543 		stateidp->other[1] = 0;
544 		stateidp->other[2] = 0;
545 	}
546 	if (vnode_vtype(vp) != VREG)
547 		return (EISDIR);
548 	np = VTONFS(vp);
549 	nmp = VFSTONFS(vp->v_mount);
550 	NFSLOCKCLSTATE();
551 	clp = nfscl_findcl(nmp);
552 	if (clp == NULL) {
553 		NFSUNLOCKCLSTATE();
554 		return (EACCES);
555 	}
556 
557 	/*
558 	 * Wait for recovery to complete.
559 	 */
560 	while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
561 		(void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
562 		    PZERO, "nfsrecvr", NULL);
563 
564 	/*
565 	 * First, look for a delegation.
566 	 */
567 	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
568 		if (dp->nfsdl_fhlen == fhlen &&
569 		    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
570 			if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
571 			    (dp->nfsdl_flags & NFSCLDL_WRITE)) {
572 				stateidp->seqid = dp->nfsdl_stateid.seqid;
573 				stateidp->other[0] = dp->nfsdl_stateid.other[0];
574 				stateidp->other[1] = dp->nfsdl_stateid.other[1];
575 				stateidp->other[2] = dp->nfsdl_stateid.other[2];
576 				if (!(np->n_flag & NDELEGRECALL)) {
577 					TAILQ_REMOVE(&clp->nfsc_deleg, dp,
578 					    nfsdl_list);
579 					TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
580 					    nfsdl_list);
581 					dp->nfsdl_timestamp = NFSD_MONOSEC +
582 					    120;
583 					dp->nfsdl_rwlock.nfslock_usecnt++;
584 					*lckpp = (void *)&dp->nfsdl_rwlock;
585 				}
586 				NFSUNLOCKCLSTATE();
587 				return (0);
588 			}
589 			break;
590 		}
591 	}
592 
593 	if (p != NULL) {
594 		/*
595 		 * If p != NULL, we want to search the parentage tree
596 		 * for a matching OpenOwner and use that.
597 		 */
598 		if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
599 			nfscl_filllockowner(NULL, own, F_POSIX);
600 		else
601 			nfscl_filllockowner(p->td_proc, own, F_POSIX);
602 		lp = NULL;
603 		error = nfscl_getopen(NULL, clp->nfsc_openhash, nfhp, fhlen,
604 		    own, own, mode, &lp, &op);
605 		if (error == 0 && lp != NULL && fords == 0) {
606 			/* Don't return a lock stateid for a DS. */
607 			stateidp->seqid =
608 			    lp->nfsl_stateid.seqid;
609 			stateidp->other[0] =
610 			    lp->nfsl_stateid.other[0];
611 			stateidp->other[1] =
612 			    lp->nfsl_stateid.other[1];
613 			stateidp->other[2] =
614 			    lp->nfsl_stateid.other[2];
615 			NFSUNLOCKCLSTATE();
616 			return (0);
617 		}
618 	}
619 	if (op == NULL) {
620 		/* If not found, just look for any OpenOwner that will work. */
621 		top = NULL;
622 		done = false;
623 		oph = NFSCLOPENHASH(clp, nfhp, fhlen);
624 		LIST_FOREACH(op, oph, nfso_hash) {
625 			if (op->nfso_fhlen == fhlen &&
626 			    !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
627 				if (top == NULL && (op->nfso_mode &
628 				    NFSV4OPEN_ACCESSWRITE) != 0 &&
629 				    (mode & NFSV4OPEN_ACCESSREAD) != 0)
630 					top = op;
631 				if ((mode & op->nfso_mode) == mode) {
632 					/* LRU order the hash list. */
633 					LIST_REMOVE(op, nfso_hash);
634 					LIST_INSERT_HEAD(oph, op, nfso_hash);
635 					done = true;
636 					break;
637 				}
638 			}
639 		}
640 		if (!done) {
641 			NFSCL_DEBUG(2, "openmode top=%p\n", top);
642 			if (top == NULL || NFSHASOPENMODE(nmp)) {
643 				NFSUNLOCKCLSTATE();
644 				return (ENOENT);
645 			} else
646 				op = top;
647 		}
648 		/*
649 		 * For read aheads or write behinds, use the open cred.
650 		 * A read ahead or write behind is indicated by p == NULL.
651 		 */
652 		if (p == NULL)
653 			newnfs_copycred(&op->nfso_cred, cred);
654 	}
655 
656 	/*
657 	 * No lock stateid, so return the open stateid.
658 	 */
659 	stateidp->seqid = op->nfso_stateid.seqid;
660 	stateidp->other[0] = op->nfso_stateid.other[0];
661 	stateidp->other[1] = op->nfso_stateid.other[1];
662 	stateidp->other[2] = op->nfso_stateid.other[2];
663 	NFSUNLOCKCLSTATE();
664 	return (0);
665 }
666 
667 /*
668  * Search for a matching file, mode and, optionally, lockowner.
669  */
670 static int
671 nfscl_getopen(struct nfsclownerhead *ohp, struct nfsclopenhash *ohashp,
672     u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown,
673     u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp)
674 {
675 	struct nfsclowner *owp;
676 	struct nfsclopen *op, *rop, *rop2;
677 	struct nfsclopenhash *oph;
678 	bool keep_looping;
679 
680 	KASSERT(ohp == NULL || ohashp == NULL, ("nfscl_getopen: "
681 	    "only one of ohp and ohashp can be set"));
682 	if (lpp != NULL)
683 		*lpp = NULL;
684 	/*
685 	 * rop will be set to the open to be returned. There are three
686 	 * variants of this, all for an open of the correct file:
687 	 * 1 - A match of lockown.
688 	 * 2 - A match of the openown, when no lockown match exists.
689 	 * 3 - A match for any open, if no openown or lockown match exists.
690 	 * Looking for #2 over #3 probably isn't necessary, but since
691 	 * RFC3530 is vague w.r.t. the relationship between openowners and
692 	 * lockowners, I think this is the safer way to go.
693 	 */
694 	rop = NULL;
695 	rop2 = NULL;
696 	keep_looping = true;
697 	/* Search the client list */
698 	if (ohashp == NULL) {
699 		/* Search the local opens on the delegation. */
700 		LIST_FOREACH(owp, ohp, nfsow_list) {
701 			/* and look for the correct open */
702 			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
703 				if (op->nfso_fhlen == fhlen &&
704 				    !NFSBCMP(op->nfso_fh, nfhp, fhlen)
705 				    && (op->nfso_mode & mode) == mode)
706 					keep_looping = nfscl_checkown(owp, op, openown,
707 					    lockown, lpp, &rop, &rop2);
708 				if (!keep_looping)
709 					break;
710 			}
711 			if (!keep_looping)
712 				break;
713 		}
714 	} else {
715 		/* Search for matching opens on the hash list. */
716 		oph = &ohashp[NFSCLOPENHASHFUNC(nfhp, fhlen)];
717 		LIST_FOREACH(op, oph, nfso_hash) {
718 			if (op->nfso_fhlen == fhlen &&
719 			    !NFSBCMP(op->nfso_fh, nfhp, fhlen)
720 			    && (op->nfso_mode & mode) == mode)
721 				keep_looping = nfscl_checkown(op->nfso_own, op,
722 				    openown, lockown, lpp, &rop, &rop2);
723 			if (!keep_looping) {
724 				/* LRU order the hash list. */
725 				LIST_REMOVE(op, nfso_hash);
726 				LIST_INSERT_HEAD(oph, op, nfso_hash);
727 				break;
728 			}
729 		}
730 	}
731 	if (rop == NULL)
732 		rop = rop2;
733 	if (rop == NULL)
734 		return (EBADF);
735 	*opp = rop;
736 	return (0);
737 }
738 
739 /* Check for an owner match. */
740 static bool
741 nfscl_checkown(struct nfsclowner *owp, struct nfsclopen *op, uint8_t *openown,
742     uint8_t *lockown, struct nfscllockowner **lpp, struct nfsclopen **ropp,
743     struct nfsclopen **ropp2)
744 {
745 	struct nfscllockowner *lp;
746 	bool keep_looping;
747 
748 	keep_looping = true;
749 	if (lpp != NULL) {
750 		/* Now look for a matching lockowner. */
751 		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
752 			if (!NFSBCMP(lp->nfsl_owner, lockown,
753 			    NFSV4CL_LOCKNAMELEN)) {
754 				*lpp = lp;
755 				*ropp = op;
756 				return (false);
757 			}
758 		}
759 	}
760 	if (*ropp == NULL && !NFSBCMP(owp->nfsow_owner, openown,
761 	    NFSV4CL_LOCKNAMELEN)) {
762 		*ropp = op;
763 		if (lpp == NULL)
764 			keep_looping = false;
765 	}
766 	if (*ropp2 == NULL)
767 		*ropp2 = op;
768 	return (keep_looping);
769 }
770 
771 /*
772  * Release use of an open owner. Called when open operations are done
773  * with the open owner.
774  */
775 void
776 nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
777     __unused int error, __unused int candelete, int unlocked)
778 {
779 
780 	if (owp == NULL)
781 		return;
782 	NFSLOCKCLSTATE();
783 	if (unlocked == 0) {
784 		if (NFSHASONEOPENOWN(nmp))
785 			nfsv4_relref(&owp->nfsow_rwlock);
786 		else
787 			nfscl_lockunlock(&owp->nfsow_rwlock);
788 	}
789 	nfscl_clrelease(owp->nfsow_clp);
790 	NFSUNLOCKCLSTATE();
791 }
792 
793 /*
794  * Release use of an open structure under an open owner.
795  */
796 void
797 nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
798     int candelete)
799 {
800 	struct nfsclclient *clp;
801 	struct nfsclowner *owp;
802 
803 	if (op == NULL)
804 		return;
805 	NFSLOCKCLSTATE();
806 	owp = op->nfso_own;
807 	if (NFSHASONEOPENOWN(nmp))
808 		nfsv4_relref(&owp->nfsow_rwlock);
809 	else
810 		nfscl_lockunlock(&owp->nfsow_rwlock);
811 	clp = owp->nfsow_clp;
812 	if (error && candelete && op->nfso_opencnt == 0)
813 		nfscl_freeopen(op, 0);
814 	nfscl_clrelease(clp);
815 	NFSUNLOCKCLSTATE();
816 }
817 
818 /*
819  * Called to get a clientid structure. It will optionally lock the
820  * client data structures to do the SetClientId/SetClientId_confirm,
821  * but will release that lock and return the clientid with a reference
822  * count on it.
823  * If the "cred" argument is NULL, a new clientid should not be created.
824  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
825  * be done.
826  * It always clpp with a reference count on it, unless returning an error.
827  */
828 int
829 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
830     bool tryminvers, struct nfsclclient **clpp)
831 {
832 	struct nfsclclient *clp;
833 	struct nfsclclient *newclp = NULL;
834 	struct nfsmount *nmp;
835 	char uuid[HOSTUUIDLEN];
836 	int igotlock = 0, error, trystalecnt, clidinusedelay, i;
837 	u_int16_t idlen = 0;
838 
839 	nmp = VFSTONFS(mp);
840 	if (cred != NULL) {
841 		getcredhostuuid(cred, uuid, sizeof uuid);
842 		idlen = strlen(uuid);
843 		if (idlen > 0)
844 			idlen += sizeof (u_int64_t);
845 		else
846 			idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
847 		newclp = malloc(
848 		    sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
849 		    M_WAITOK | M_ZERO);
850 	}
851 	NFSLOCKCLSTATE();
852 	/*
853 	 * If a forced dismount is already in progress, don't
854 	 * allocate a new clientid and get out now. For the case where
855 	 * clp != NULL, this is a harmless optimization.
856 	 */
857 	if (NFSCL_FORCEDISM(mp)) {
858 		NFSUNLOCKCLSTATE();
859 		if (newclp != NULL)
860 			free(newclp, M_NFSCLCLIENT);
861 		return (EBADF);
862 	}
863 	clp = nmp->nm_clp;
864 	if (clp == NULL) {
865 		if (newclp == NULL) {
866 			NFSUNLOCKCLSTATE();
867 			return (EACCES);
868 		}
869 		clp = newclp;
870 		clp->nfsc_idlen = idlen;
871 		LIST_INIT(&clp->nfsc_owner);
872 		TAILQ_INIT(&clp->nfsc_deleg);
873 		TAILQ_INIT(&clp->nfsc_layout);
874 		LIST_INIT(&clp->nfsc_devinfo);
875 		for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
876 			LIST_INIT(&clp->nfsc_deleghash[i]);
877 		for (i = 0; i < NFSCLOPENHASHSIZE; i++)
878 			LIST_INIT(&clp->nfsc_openhash[i]);
879 		for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
880 			LIST_INIT(&clp->nfsc_layouthash[i]);
881 		clp->nfsc_flags = NFSCLFLAGS_INITED;
882 		clp->nfsc_clientidrev = 1;
883 		clp->nfsc_cbident = nfscl_nextcbident();
884 		nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
885 		    clp->nfsc_idlen);
886 		LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
887 		nmp->nm_clp = clp;
888 		clp->nfsc_nmp = nmp;
889 	} else {
890 		if (newclp != NULL)
891 			free(newclp, M_NFSCLCLIENT);
892 	}
893 	while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
894 	    !NFSCL_FORCEDISM(mp))
895 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
896 		    NFSCLSTATEMUTEXPTR, mp);
897 	if (igotlock == 0) {
898 		/*
899 		 * Call nfsv4_lock() with "iwantlock == 0" so that it will
900 		 * wait for a pending exclusive lock request.  This gives the
901 		 * exclusive lock request priority over this shared lock
902 		 * request.
903 		 * An exclusive lock on nfsc_lock is used mainly for server
904 		 * crash recoveries.
905 		 */
906 		nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
907 		nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
908 	}
909 	if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
910 		/*
911 		 * Both nfsv4_lock() and nfsv4_getref() know to check
912 		 * for NFSCL_FORCEDISM() and return without sleeping to
913 		 * wait for the exclusive lock to be released, since it
914 		 * might be held by nfscl_umount() and we need to get out
915 		 * now for that case and not wait until nfscl_umount()
916 		 * releases it.
917 		 */
918 		NFSUNLOCKCLSTATE();
919 		return (EBADF);
920 	}
921 	NFSUNLOCKCLSTATE();
922 
923 	/*
924 	 * If it needs a clientid, do the setclientid now.
925 	 */
926 	if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
927 		if (!igotlock)
928 			panic("nfscl_clget");
929 		if (p == NULL || cred == NULL) {
930 			NFSLOCKCLSTATE();
931 			nfsv4_unlock(&clp->nfsc_lock, 0);
932 			NFSUNLOCKCLSTATE();
933 			return (EACCES);
934 		}
935 		/*
936 		 * If RFC3530 Sec. 14.2.33 is taken literally,
937 		 * NFSERR_CLIDINUSE will be returned persistently for the
938 		 * case where a new mount of the same file system is using
939 		 * a different principal. In practice, NFSERR_CLIDINUSE is
940 		 * only returned when there is outstanding unexpired state
941 		 * on the clientid. As such, try for twice the lease
942 		 * interval, if we know what that is. Otherwise, make a
943 		 * wild ass guess.
944 		 * The case of returning NFSERR_STALECLIENTID is far less
945 		 * likely, but might occur if there is a significant delay
946 		 * between doing the SetClientID and SetClientIDConfirm Ops,
947 		 * such that the server throws away the clientid before
948 		 * receiving the SetClientIDConfirm.
949 		 */
950 		if (clp->nfsc_renew > 0)
951 			clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
952 		else
953 			clidinusedelay = 120;
954 		trystalecnt = 3;
955 		do {
956 			error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
957 			if (error == NFSERR_STALECLIENTID ||
958 			    error == NFSERR_STALEDONTRECOVER ||
959 			    error == NFSERR_BADSESSION ||
960 			    error == NFSERR_CLIDINUSE) {
961 				(void) nfs_catnap(PZERO, error, "nfs_setcl");
962 			} else if (error == NFSERR_MINORVERMISMATCH &&
963 			    tryminvers) {
964 				if (nmp->nm_minorvers > 0)
965 					nmp->nm_minorvers--;
966 				else
967 					tryminvers = false;
968 			}
969 		} while (((error == NFSERR_STALECLIENTID ||
970 		     error == NFSERR_BADSESSION ||
971 		     error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
972 		    (error == NFSERR_CLIDINUSE && --clidinusedelay > 0) ||
973 		    (error == NFSERR_MINORVERMISMATCH && tryminvers));
974 		if (error) {
975 			NFSLOCKCLSTATE();
976 			nfsv4_unlock(&clp->nfsc_lock, 0);
977 			NFSUNLOCKCLSTATE();
978 			return (error);
979 		}
980 		clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
981 	}
982 	if (igotlock) {
983 		NFSLOCKCLSTATE();
984 		nfsv4_unlock(&clp->nfsc_lock, 1);
985 		NFSUNLOCKCLSTATE();
986 	}
987 
988 	*clpp = clp;
989 	return (0);
990 }
991 
992 /*
993  * Get a reference to a clientid and return it, if valid.
994  */
995 struct nfsclclient *
996 nfscl_findcl(struct nfsmount *nmp)
997 {
998 	struct nfsclclient *clp;
999 
1000 	clp = nmp->nm_clp;
1001 	if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
1002 		return (NULL);
1003 	return (clp);
1004 }
1005 
1006 /*
1007  * Release the clientid structure. It may be locked or reference counted.
1008  */
1009 static void
1010 nfscl_clrelease(struct nfsclclient *clp)
1011 {
1012 
1013 	if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
1014 		nfsv4_unlock(&clp->nfsc_lock, 0);
1015 	else
1016 		nfsv4_relref(&clp->nfsc_lock);
1017 }
1018 
1019 /*
1020  * External call for nfscl_clrelease.
1021  */
1022 void
1023 nfscl_clientrelease(struct nfsclclient *clp)
1024 {
1025 
1026 	NFSLOCKCLSTATE();
1027 	if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
1028 		nfsv4_unlock(&clp->nfsc_lock, 0);
1029 	else
1030 		nfsv4_relref(&clp->nfsc_lock);
1031 	NFSUNLOCKCLSTATE();
1032 }
1033 
1034 /*
1035  * Called when wanting to lock a byte region.
1036  */
1037 int
1038 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1039     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
1040     int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
1041     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
1042 {
1043 	struct nfscllockowner *lp;
1044 	struct nfsclopen *op;
1045 	struct nfsclclient *clp;
1046 	struct nfscllockowner *nlp;
1047 	struct nfscllock *nlop, *otherlop;
1048 	struct nfscldeleg *dp = NULL, *ldp = NULL;
1049 	struct nfscllockownerhead *lhp = NULL;
1050 	struct nfsnode *np;
1051 	u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
1052 	u_int8_t *openownp;
1053 	int error = 0, ret, donelocally = 0;
1054 	u_int32_t mode;
1055 
1056 	/* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1057 	mode = 0;
1058 	np = VTONFS(vp);
1059 	*lpp = NULL;
1060 	lp = NULL;
1061 	*newonep = 0;
1062 	*donelocallyp = 0;
1063 
1064 	/*
1065 	 * Might need these, so MALLOC them now, to
1066 	 * avoid a tsleep() in MALLOC later.
1067 	 */
1068 	nlp = malloc(
1069 	    sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1070 	otherlop = malloc(
1071 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1072 	nlop = malloc(
1073 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1074 	nlop->nfslo_type = type;
1075 	nlop->nfslo_first = off;
1076 	if (len == NFS64BITSSET) {
1077 		nlop->nfslo_end = NFS64BITSSET;
1078 	} else {
1079 		nlop->nfslo_end = off + len;
1080 		if (nlop->nfslo_end <= nlop->nfslo_first)
1081 			error = NFSERR_INVAL;
1082 	}
1083 
1084 	if (!error) {
1085 		if (recovery)
1086 			clp = rclp;
1087 		else
1088 			error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
1089 	}
1090 	if (error) {
1091 		free(nlp, M_NFSCLLOCKOWNER);
1092 		free(otherlop, M_NFSCLLOCK);
1093 		free(nlop, M_NFSCLLOCK);
1094 		return (error);
1095 	}
1096 
1097 	op = NULL;
1098 	if (recovery) {
1099 		ownp = rownp;
1100 		openownp = ropenownp;
1101 	} else {
1102 		nfscl_filllockowner(id, own, flags);
1103 		ownp = own;
1104 		if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
1105 			nfscl_filllockowner(NULL, openown, F_POSIX);
1106 		else
1107 			nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1108 		openownp = openown;
1109 	}
1110 	if (!recovery) {
1111 		NFSLOCKCLSTATE();
1112 		/*
1113 		 * First, search for a delegation. If one exists for this file,
1114 		 * the lock can be done locally against it, so long as there
1115 		 * isn't a local lock conflict.
1116 		 */
1117 		ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1118 		    np->n_fhp->nfh_len);
1119 		/* Just sanity check for correct type of delegation */
1120 		if (dp != NULL && ((dp->nfsdl_flags &
1121 		    (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1122 		     (type == F_WRLCK &&
1123 		      (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1124 			dp = NULL;
1125 	}
1126 	if (dp != NULL) {
1127 		/* Now, find an open and maybe a lockowner. */
1128 		ret = nfscl_getopen(&dp->nfsdl_owner, NULL, np->n_fhp->nfh_fh,
1129 		    np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1130 		if (ret)
1131 			ret = nfscl_getopen(NULL, clp->nfsc_openhash,
1132 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1133 			    ownp, mode, NULL, &op);
1134 		if (!ret) {
1135 			lhp = &dp->nfsdl_lock;
1136 			TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1137 			TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1138 			dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1139 			donelocally = 1;
1140 		} else {
1141 			dp = NULL;
1142 		}
1143 	}
1144 	if (!donelocally) {
1145 		/*
1146 		 * Get the related Open and maybe lockowner.
1147 		 */
1148 		error = nfscl_getopen(NULL, clp->nfsc_openhash,
1149 		    np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1150 		    ownp, mode, &lp, &op);
1151 		if (!error)
1152 			lhp = &op->nfso_lock;
1153 	}
1154 	if (!error && !recovery)
1155 		error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1156 		    np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1157 	if (error) {
1158 		if (!recovery) {
1159 			nfscl_clrelease(clp);
1160 			NFSUNLOCKCLSTATE();
1161 		}
1162 		free(nlp, M_NFSCLLOCKOWNER);
1163 		free(otherlop, M_NFSCLLOCK);
1164 		free(nlop, M_NFSCLLOCK);
1165 		return (error);
1166 	}
1167 
1168 	/*
1169 	 * Ok, see if a lockowner exists and create one, as required.
1170 	 */
1171 	if (lp == NULL)
1172 		LIST_FOREACH(lp, lhp, nfsl_list) {
1173 			if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1174 				break;
1175 		}
1176 	if (lp == NULL) {
1177 		NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1178 		if (recovery)
1179 			NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1180 			    NFSV4CL_LOCKNAMELEN);
1181 		else
1182 			NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1183 			    NFSV4CL_LOCKNAMELEN);
1184 		nlp->nfsl_seqid = 0;
1185 		nlp->nfsl_lockflags = flags;
1186 		nlp->nfsl_inprog = NULL;
1187 		nfscl_lockinit(&nlp->nfsl_rwlock);
1188 		LIST_INIT(&nlp->nfsl_lock);
1189 		if (donelocally) {
1190 			nlp->nfsl_open = NULL;
1191 			nfsstatsv1.cllocallockowners++;
1192 		} else {
1193 			nlp->nfsl_open = op;
1194 			nfsstatsv1.cllockowners++;
1195 		}
1196 		LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1197 		lp = nlp;
1198 		nlp = NULL;
1199 		*newonep = 1;
1200 	}
1201 
1202 	/*
1203 	 * Now, update the byte ranges for locks.
1204 	 */
1205 	ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1206 	if (!ret)
1207 		donelocally = 1;
1208 	if (donelocally) {
1209 		*donelocallyp = 1;
1210 		if (!recovery)
1211 			nfscl_clrelease(clp);
1212 	} else {
1213 		/*
1214 		 * Serial modifications on the lock owner for multiple threads
1215 		 * for the same process using a read/write lock.
1216 		 */
1217 		if (!recovery)
1218 			nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1219 	}
1220 	if (!recovery)
1221 		NFSUNLOCKCLSTATE();
1222 
1223 	if (nlp)
1224 		free(nlp, M_NFSCLLOCKOWNER);
1225 	if (nlop)
1226 		free(nlop, M_NFSCLLOCK);
1227 	if (otherlop)
1228 		free(otherlop, M_NFSCLLOCK);
1229 
1230 	*lpp = lp;
1231 	return (0);
1232 }
1233 
1234 /*
1235  * Called to unlock a byte range, for LockU.
1236  */
1237 int
1238 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1239     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1240     struct nfsclclient *clp, void *id, int flags,
1241     struct nfscllockowner **lpp, int *dorpcp)
1242 {
1243 	struct nfscllockowner *lp;
1244 	struct nfsclopen *op;
1245 	struct nfscllock *nlop, *other_lop = NULL;
1246 	struct nfscldeleg *dp;
1247 	struct nfsnode *np;
1248 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1249 	int ret = 0, fnd;
1250 
1251 	np = VTONFS(vp);
1252 	*lpp = NULL;
1253 	*dorpcp = 0;
1254 
1255 	/*
1256 	 * Might need these, so MALLOC them now, to
1257 	 * avoid a tsleep() in MALLOC later.
1258 	 */
1259 	nlop = malloc(
1260 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1261 	nlop->nfslo_type = F_UNLCK;
1262 	nlop->nfslo_first = off;
1263 	if (len == NFS64BITSSET) {
1264 		nlop->nfslo_end = NFS64BITSSET;
1265 	} else {
1266 		nlop->nfslo_end = off + len;
1267 		if (nlop->nfslo_end <= nlop->nfslo_first) {
1268 			free(nlop, M_NFSCLLOCK);
1269 			return (NFSERR_INVAL);
1270 		}
1271 	}
1272 	if (callcnt == 0) {
1273 		other_lop = malloc(
1274 		    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1275 		*other_lop = *nlop;
1276 	}
1277 	nfscl_filllockowner(id, own, flags);
1278 	dp = NULL;
1279 	NFSLOCKCLSTATE();
1280 	if (callcnt == 0)
1281 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1282 		    np->n_fhp->nfh_len);
1283 
1284 	/*
1285 	 * First, unlock any local regions on a delegation.
1286 	 */
1287 	if (dp != NULL) {
1288 		/* Look for this lockowner. */
1289 		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1290 			if (!NFSBCMP(lp->nfsl_owner, own,
1291 			    NFSV4CL_LOCKNAMELEN))
1292 				break;
1293 		}
1294 		if (lp != NULL)
1295 			/* Use other_lop, so nlop is still available */
1296 			(void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1297 	}
1298 
1299 	/*
1300 	 * Now, find a matching open/lockowner that hasn't already been done,
1301 	 * as marked by nfsl_inprog.
1302 	 */
1303 	lp = NULL;
1304 	fnd = 0;
1305 	LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1306 	    np->n_fhp->nfh_len), nfso_hash) {
1307 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1308 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1309 			LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1310 				if (lp->nfsl_inprog == NULL &&
1311 				    !NFSBCMP(lp->nfsl_owner, own,
1312 				     NFSV4CL_LOCKNAMELEN)) {
1313 					fnd = 1;
1314 					break;
1315 				}
1316 			}
1317 		}
1318 		if (fnd)
1319 			break;
1320 	}
1321 
1322 	if (lp != NULL) {
1323 		ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1324 		if (ret)
1325 			*dorpcp = 1;
1326 		/*
1327 		 * Serial modifications on the lock owner for multiple
1328 		 * threads for the same process using a read/write lock.
1329 		 */
1330 		lp->nfsl_inprog = p;
1331 		nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1332 		*lpp = lp;
1333 	}
1334 	NFSUNLOCKCLSTATE();
1335 	if (nlop)
1336 		free(nlop, M_NFSCLLOCK);
1337 	if (other_lop)
1338 		free(other_lop, M_NFSCLLOCK);
1339 	return (0);
1340 }
1341 
1342 /*
1343  * Release all lockowners marked in progess for this process and file.
1344  */
1345 void
1346 nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1347     void *id, int flags)
1348 {
1349 	struct nfsclopen *op;
1350 	struct nfscllockowner *lp;
1351 	struct nfsnode *np;
1352 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1353 
1354 	np = VTONFS(vp);
1355 	nfscl_filllockowner(id, own, flags);
1356 	NFSLOCKCLSTATE();
1357 	LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1358 	    np->n_fhp->nfh_len), nfso_hash) {
1359 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1360 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1361 			LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1362 				if (lp->nfsl_inprog == p &&
1363 				    !NFSBCMP(lp->nfsl_owner, own,
1364 				    NFSV4CL_LOCKNAMELEN)) {
1365 					lp->nfsl_inprog = NULL;
1366 					nfscl_lockunlock(&lp->nfsl_rwlock);
1367 				}
1368 			}
1369 		}
1370 	}
1371 	nfscl_clrelease(clp);
1372 	NFSUNLOCKCLSTATE();
1373 }
1374 
1375 /*
1376  * Called to find out if any bytes within the byte range specified are
1377  * write locked by the calling process. Used to determine if flushing
1378  * is required before a LockU.
1379  * If in doubt, return 1, so the flush will occur.
1380  */
1381 int
1382 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1383     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1384 {
1385 	struct nfscllockowner *lp;
1386 	struct nfsclopen *op;
1387 	struct nfsclclient *clp;
1388 	struct nfscllock *lop;
1389 	struct nfscldeleg *dp;
1390 	struct nfsnode *np;
1391 	u_int64_t off, end;
1392 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1393 	int error = 0;
1394 
1395 	np = VTONFS(vp);
1396 	switch (fl->l_whence) {
1397 	case SEEK_SET:
1398 	case SEEK_CUR:
1399 		/*
1400 		 * Caller is responsible for adding any necessary offset
1401 		 * when SEEK_CUR is used.
1402 		 */
1403 		off = fl->l_start;
1404 		break;
1405 	case SEEK_END:
1406 		off = np->n_size + fl->l_start;
1407 		break;
1408 	default:
1409 		return (1);
1410 	}
1411 	if (fl->l_len != 0) {
1412 		end = off + fl->l_len;
1413 		if (end < off)
1414 			return (1);
1415 	} else {
1416 		end = NFS64BITSSET;
1417 	}
1418 
1419 	error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
1420 	if (error)
1421 		return (1);
1422 	nfscl_filllockowner(id, own, flags);
1423 	NFSLOCKCLSTATE();
1424 
1425 	/*
1426 	 * First check the delegation locks.
1427 	 */
1428 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1429 	if (dp != NULL) {
1430 		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1431 			if (!NFSBCMP(lp->nfsl_owner, own,
1432 			    NFSV4CL_LOCKNAMELEN))
1433 				break;
1434 		}
1435 		if (lp != NULL) {
1436 			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1437 				if (lop->nfslo_first >= end)
1438 					break;
1439 				if (lop->nfslo_end <= off)
1440 					continue;
1441 				if (lop->nfslo_type == F_WRLCK) {
1442 					nfscl_clrelease(clp);
1443 					NFSUNLOCKCLSTATE();
1444 					return (1);
1445 				}
1446 			}
1447 		}
1448 	}
1449 
1450 	/*
1451 	 * Now, check state against the server.
1452 	 */
1453 	LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1454 	    np->n_fhp->nfh_len), nfso_hash) {
1455 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1456 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1457 			LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1458 				if (!NFSBCMP(lp->nfsl_owner, own,
1459 				    NFSV4CL_LOCKNAMELEN))
1460 					break;
1461 			}
1462 			if (lp != NULL) {
1463 				LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1464 					if (lop->nfslo_first >= end)
1465 						break;
1466 					if (lop->nfslo_end <= off)
1467 						continue;
1468 					if (lop->nfslo_type == F_WRLCK) {
1469 						nfscl_clrelease(clp);
1470 						NFSUNLOCKCLSTATE();
1471 						return (1);
1472 					}
1473 				}
1474 			}
1475 		}
1476 	}
1477 	nfscl_clrelease(clp);
1478 	NFSUNLOCKCLSTATE();
1479 	return (0);
1480 }
1481 
1482 /*
1483  * Release a byte range lock owner structure.
1484  */
1485 void
1486 nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1487 {
1488 	struct nfsclclient *clp;
1489 
1490 	if (lp == NULL)
1491 		return;
1492 	NFSLOCKCLSTATE();
1493 	clp = lp->nfsl_open->nfso_own->nfsow_clp;
1494 	if (error != 0 && candelete &&
1495 	    (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1496 		nfscl_freelockowner(lp, 0);
1497 	else
1498 		nfscl_lockunlock(&lp->nfsl_rwlock);
1499 	nfscl_clrelease(clp);
1500 	NFSUNLOCKCLSTATE();
1501 }
1502 
1503 /*
1504  * Free up an open structure and any associated byte range lock structures.
1505  */
1506 void
1507 nfscl_freeopen(struct nfsclopen *op, int local)
1508 {
1509 
1510 	LIST_REMOVE(op, nfso_list);
1511 	if (op->nfso_hash.le_prev != NULL)
1512 		LIST_REMOVE(op, nfso_hash);
1513 	nfscl_freealllocks(&op->nfso_lock, local);
1514 	free(op, M_NFSCLOPEN);
1515 	if (local)
1516 		nfsstatsv1.cllocalopens--;
1517 	else
1518 		nfsstatsv1.clopens--;
1519 }
1520 
1521 /*
1522  * Free up all lock owners and associated locks.
1523  */
1524 static void
1525 nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1526 {
1527 	struct nfscllockowner *lp, *nlp;
1528 
1529 	LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1530 		if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1531 			panic("nfscllckw");
1532 		nfscl_freelockowner(lp, local);
1533 	}
1534 }
1535 
1536 /*
1537  * Called for an Open when NFSERR_EXPIRED is received from the server.
1538  * If there are no byte range locks nor a Share Deny lost, try to do a
1539  * fresh Open. Otherwise, free the open.
1540  */
1541 static int
1542 nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1543     struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1544 {
1545 	struct nfscllockowner *lp;
1546 	struct nfscldeleg *dp;
1547 	int mustdelete = 0, error;
1548 
1549 	/*
1550 	 * Look for any byte range lock(s).
1551 	 */
1552 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1553 		if (!LIST_EMPTY(&lp->nfsl_lock)) {
1554 			mustdelete = 1;
1555 			break;
1556 		}
1557 	}
1558 
1559 	/*
1560 	 * If no byte range lock(s) nor a Share deny, try to re-open.
1561 	 */
1562 	if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1563 		newnfs_copycred(&op->nfso_cred, cred);
1564 		dp = NULL;
1565 		error = nfsrpc_reopen(nmp, op->nfso_fh,
1566 		    op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1567 		if (error) {
1568 			mustdelete = 1;
1569 			if (dp != NULL) {
1570 				free(dp, M_NFSCLDELEG);
1571 				dp = NULL;
1572 			}
1573 		}
1574 		if (dp != NULL)
1575 			nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1576 			    op->nfso_fhlen, cred, p, &dp);
1577 	}
1578 
1579 	/*
1580 	 * If a byte range lock or Share deny or couldn't re-open, free it.
1581 	 */
1582 	if (mustdelete)
1583 		nfscl_freeopen(op, 0);
1584 	return (mustdelete);
1585 }
1586 
1587 /*
1588  * Free up an open owner structure.
1589  */
1590 static void
1591 nfscl_freeopenowner(struct nfsclowner *owp, int local)
1592 {
1593 
1594 	LIST_REMOVE(owp, nfsow_list);
1595 	free(owp, M_NFSCLOWNER);
1596 	if (local)
1597 		nfsstatsv1.cllocalopenowners--;
1598 	else
1599 		nfsstatsv1.clopenowners--;
1600 }
1601 
1602 /*
1603  * Free up a byte range lock owner structure.
1604  */
1605 void
1606 nfscl_freelockowner(struct nfscllockowner *lp, int local)
1607 {
1608 	struct nfscllock *lop, *nlop;
1609 
1610 	LIST_REMOVE(lp, nfsl_list);
1611 	LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1612 		nfscl_freelock(lop, local);
1613 	}
1614 	free(lp, M_NFSCLLOCKOWNER);
1615 	if (local)
1616 		nfsstatsv1.cllocallockowners--;
1617 	else
1618 		nfsstatsv1.cllockowners--;
1619 }
1620 
1621 /*
1622  * Free up a byte range lock structure.
1623  */
1624 void
1625 nfscl_freelock(struct nfscllock *lop, int local)
1626 {
1627 
1628 	LIST_REMOVE(lop, nfslo_list);
1629 	free(lop, M_NFSCLLOCK);
1630 	if (local)
1631 		nfsstatsv1.cllocallocks--;
1632 	else
1633 		nfsstatsv1.cllocks--;
1634 }
1635 
1636 /*
1637  * Clean out the state related to a delegation.
1638  */
1639 static void
1640 nfscl_cleandeleg(struct nfscldeleg *dp)
1641 {
1642 	struct nfsclowner *owp, *nowp;
1643 	struct nfsclopen *op;
1644 
1645 	LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1646 		op = LIST_FIRST(&owp->nfsow_open);
1647 		if (op != NULL) {
1648 			if (LIST_NEXT(op, nfso_list) != NULL)
1649 				panic("nfscleandel");
1650 			nfscl_freeopen(op, 1);
1651 		}
1652 		nfscl_freeopenowner(owp, 1);
1653 	}
1654 	nfscl_freealllocks(&dp->nfsdl_lock, 1);
1655 }
1656 
1657 /*
1658  * Free a delegation.
1659  */
1660 static void
1661 nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
1662 {
1663 
1664 	TAILQ_REMOVE(hdp, dp, nfsdl_list);
1665 	LIST_REMOVE(dp, nfsdl_hash);
1666 	if (freeit)
1667 		free(dp, M_NFSCLDELEG);
1668 	nfsstatsv1.cldelegates--;
1669 	nfscl_delegcnt--;
1670 }
1671 
1672 /*
1673  * Free up all state related to this client structure.
1674  */
1675 static void
1676 nfscl_cleanclient(struct nfsclclient *clp)
1677 {
1678 	struct nfsclowner *owp, *nowp;
1679 	struct nfsclopen *op, *nop;
1680 	struct nfscllayout *lyp, *nlyp;
1681 	struct nfscldevinfo *dip, *ndip;
1682 
1683 	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1684 		nfscl_freelayout(lyp);
1685 
1686 	LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1687 		nfscl_freedevinfo(dip);
1688 
1689 	/* Now, all the OpenOwners, etc. */
1690 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1691 		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1692 			nfscl_freeopen(op, 0);
1693 		}
1694 		nfscl_freeopenowner(owp, 0);
1695 	}
1696 }
1697 
1698 /*
1699  * Called when an NFSERR_EXPIRED is received from the server.
1700  */
1701 static void
1702 nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1703     struct ucred *cred, NFSPROC_T *p)
1704 {
1705 	struct nfsclowner *owp, *nowp, *towp;
1706 	struct nfsclopen *op, *nop, *top;
1707 	struct nfscldeleg *dp, *ndp;
1708 	int ret, printed = 0;
1709 
1710 	/*
1711 	 * First, merge locally issued Opens into the list for the server.
1712 	 */
1713 	dp = TAILQ_FIRST(&clp->nfsc_deleg);
1714 	while (dp != NULL) {
1715 	    ndp = TAILQ_NEXT(dp, nfsdl_list);
1716 	    owp = LIST_FIRST(&dp->nfsdl_owner);
1717 	    while (owp != NULL) {
1718 		nowp = LIST_NEXT(owp, nfsow_list);
1719 		op = LIST_FIRST(&owp->nfsow_open);
1720 		if (op != NULL) {
1721 		    if (LIST_NEXT(op, nfso_list) != NULL)
1722 			panic("nfsclexp");
1723 		    LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1724 			if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1725 			    NFSV4CL_LOCKNAMELEN))
1726 			    break;
1727 		    }
1728 		    if (towp != NULL) {
1729 			/* Merge opens in */
1730 			LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1731 			    if (top->nfso_fhlen == op->nfso_fhlen &&
1732 				!NFSBCMP(top->nfso_fh, op->nfso_fh,
1733 				 op->nfso_fhlen)) {
1734 				top->nfso_mode |= op->nfso_mode;
1735 				top->nfso_opencnt += op->nfso_opencnt;
1736 				break;
1737 			    }
1738 			}
1739 			if (top == NULL) {
1740 			    /* Just add the open to the owner list */
1741 			    LIST_REMOVE(op, nfso_list);
1742 			    op->nfso_own = towp;
1743 			    LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1744 			    LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1745 				op->nfso_fhlen), op, nfso_hash);
1746 			    nfsstatsv1.cllocalopens--;
1747 			    nfsstatsv1.clopens++;
1748 			}
1749 		    } else {
1750 			/* Just add the openowner to the client list */
1751 			LIST_REMOVE(owp, nfsow_list);
1752 			owp->nfsow_clp = clp;
1753 			LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1754 			LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1755 			    op->nfso_fhlen), op, nfso_hash);
1756 			nfsstatsv1.cllocalopenowners--;
1757 			nfsstatsv1.clopenowners++;
1758 			nfsstatsv1.cllocalopens--;
1759 			nfsstatsv1.clopens++;
1760 		    }
1761 		}
1762 		owp = nowp;
1763 	    }
1764 	    if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1765 		printed = 1;
1766 		printf("nfsv4 expired locks lost\n");
1767 	    }
1768 	    nfscl_cleandeleg(dp);
1769 	    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
1770 	    dp = ndp;
1771 	}
1772 	if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1773 	    panic("nfsclexp");
1774 
1775 	/*
1776 	 * Now, try and reopen against the server.
1777 	 */
1778 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1779 		owp->nfsow_seqid = 0;
1780 		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1781 			ret = nfscl_expireopen(clp, op, nmp, cred, p);
1782 			if (ret && !printed) {
1783 				printed = 1;
1784 				printf("nfsv4 expired locks lost\n");
1785 			}
1786 		}
1787 		if (LIST_EMPTY(&owp->nfsow_open))
1788 			nfscl_freeopenowner(owp, 0);
1789 	}
1790 }
1791 
1792 /*
1793  * This function must be called after the process represented by "own" has
1794  * exited. Must be called with CLSTATE lock held.
1795  */
1796 static void
1797 nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1798 {
1799 	struct nfsclowner *owp, *nowp;
1800 	struct nfscllockowner *lp, *nlp;
1801 	struct nfscldeleg *dp;
1802 
1803 	/* First, get rid of local locks on delegations. */
1804 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1805 		LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1806 		    if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1807 			if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1808 			    panic("nfscllckw");
1809 			nfscl_freelockowner(lp, 1);
1810 		    }
1811 		}
1812 	}
1813 	owp = LIST_FIRST(&clp->nfsc_owner);
1814 	while (owp != NULL) {
1815 		nowp = LIST_NEXT(owp, nfsow_list);
1816 		if (!NFSBCMP(owp->nfsow_owner, own,
1817 		    NFSV4CL_LOCKNAMELEN)) {
1818 			/*
1819 			 * If there are children that haven't closed the
1820 			 * file descriptors yet, the opens will still be
1821 			 * here. For that case, let the renew thread clear
1822 			 * out the OpenOwner later.
1823 			 */
1824 			if (LIST_EMPTY(&owp->nfsow_open))
1825 				nfscl_freeopenowner(owp, 0);
1826 			else
1827 				owp->nfsow_defunct = 1;
1828 		}
1829 		owp = nowp;
1830 	}
1831 }
1832 
1833 /*
1834  * Find open/lock owners for processes that have exited.
1835  */
1836 static void
1837 nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1838 {
1839 	struct nfsclowner *owp, *nowp;
1840 	struct nfsclopen *op;
1841 	struct nfscllockowner *lp, *nlp;
1842 	struct nfscldeleg *dp;
1843 
1844 	/*
1845 	 * All the pidhash locks must be acquired, since they are sx locks
1846 	 * and must be acquired before the mutexes.  The pid(s) that will
1847 	 * be used aren't known yet, so all the locks need to be acquired.
1848 	 * Fortunately, this function is only performed once/sec.
1849 	 */
1850 	pidhash_slockall();
1851 	NFSLOCKCLSTATE();
1852 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1853 		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1854 			LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1855 				if (LIST_EMPTY(&lp->nfsl_lock))
1856 					nfscl_emptylockowner(lp, lhp);
1857 			}
1858 		}
1859 		if (nfscl_procdoesntexist(owp->nfsow_owner))
1860 			nfscl_cleanup_common(clp, owp->nfsow_owner);
1861 	}
1862 
1863 	/*
1864 	 * For the single open_owner case, these lock owners need to be
1865 	 * checked to see if they still exist separately.
1866 	 * This is because nfscl_procdoesntexist() never returns true for
1867 	 * the single open_owner so that the above doesn't ever call
1868 	 * nfscl_cleanup_common().
1869 	 */
1870 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1871 		LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1872 			if (nfscl_procdoesntexist(lp->nfsl_owner))
1873 				nfscl_cleanup_common(clp, lp->nfsl_owner);
1874 		}
1875 	}
1876 	NFSUNLOCKCLSTATE();
1877 	pidhash_sunlockall();
1878 }
1879 
1880 /*
1881  * Take the empty lock owner and move it to the local lhp list if the
1882  * associated process no longer exists.
1883  */
1884 static void
1885 nfscl_emptylockowner(struct nfscllockowner *lp,
1886     struct nfscllockownerfhhead *lhp)
1887 {
1888 	struct nfscllockownerfh *lfhp, *mylfhp;
1889 	struct nfscllockowner *nlp;
1890 	int fnd_it;
1891 
1892 	/* If not a Posix lock owner, just return. */
1893 	if ((lp->nfsl_lockflags & F_POSIX) == 0)
1894 		return;
1895 
1896 	fnd_it = 0;
1897 	mylfhp = NULL;
1898 	/*
1899 	 * First, search to see if this lock owner is already in the list.
1900 	 * If it is, then the associated process no longer exists.
1901 	 */
1902 	SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1903 		if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1904 		    !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1905 		    lfhp->nfslfh_len))
1906 			mylfhp = lfhp;
1907 		LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1908 			if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1909 			    NFSV4CL_LOCKNAMELEN))
1910 				fnd_it = 1;
1911 	}
1912 	/* If not found, check if process still exists. */
1913 	if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1914 		return;
1915 
1916 	/* Move the lock owner over to the local list. */
1917 	if (mylfhp == NULL) {
1918 		mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1919 		    M_NOWAIT);
1920 		if (mylfhp == NULL)
1921 			return;
1922 		mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1923 		NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1924 		    mylfhp->nfslfh_len);
1925 		LIST_INIT(&mylfhp->nfslfh_lock);
1926 		SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1927 	}
1928 	LIST_REMOVE(lp, nfsl_list);
1929 	LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1930 }
1931 
1932 static int	fake_global;	/* Used to force visibility of MNTK_UNMOUNTF */
1933 /*
1934  * Called from nfs umount to free up the clientid.
1935  */
1936 void
1937 nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p)
1938 {
1939 	struct nfsclclient *clp;
1940 	struct ucred *cred;
1941 	int igotlock;
1942 
1943 	/*
1944 	 * For the case that matters, this is the thread that set
1945 	 * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1946 	 * done to ensure that any thread executing nfscl_getcl() after
1947 	 * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1948 	 * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1949 	 * explanation, courtesy of Alan Cox.
1950 	 * What follows is a snippet from Alan Cox's email at:
1951 	 * https://docs.FreeBSD.org/cgi/mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1952 	 *
1953 	 * 1. Set MNTK_UNMOUNTF
1954 	 * 2. Acquire a standard FreeBSD mutex "m".
1955 	 * 3. Update some data structures.
1956 	 * 4. Release mutex "m".
1957 	 *
1958 	 * Then, other threads that acquire "m" after step 4 has occurred will
1959 	 * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1960 	 * step 2 may or may not see MNTK_UNMOUNTF as set.
1961 	 */
1962 	NFSLOCKCLSTATE();
1963 	if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1964 		fake_global++;
1965 		NFSUNLOCKCLSTATE();
1966 		NFSLOCKCLSTATE();
1967 	}
1968 
1969 	clp = nmp->nm_clp;
1970 	if (clp != NULL) {
1971 		if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
1972 			panic("nfscl umount");
1973 
1974 		/*
1975 		 * First, handshake with the nfscl renew thread, to terminate
1976 		 * it.
1977 		 */
1978 		clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
1979 		while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
1980 			(void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
1981 			    "nfsclumnt", hz);
1982 
1983 		/*
1984 		 * Now, get the exclusive lock on the client state, so
1985 		 * that no uses of the state are still in progress.
1986 		 */
1987 		do {
1988 			igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1989 			    NFSCLSTATEMUTEXPTR, NULL);
1990 		} while (!igotlock);
1991 		NFSUNLOCKCLSTATE();
1992 
1993 		/*
1994 		 * Free up all the state. It will expire on the server, but
1995 		 * maybe we should do a SetClientId/SetClientIdConfirm so
1996 		 * the server throws it away?
1997 		 */
1998 		LIST_REMOVE(clp, nfsc_list);
1999 		nfscl_delegreturnall(clp, p);
2000 		cred = newnfs_getcred();
2001 		if (NFSHASNFSV4N(nmp)) {
2002 			(void)nfsrpc_destroysession(nmp, clp, cred, p);
2003 			(void)nfsrpc_destroyclient(nmp, clp, cred, p);
2004 		} else
2005 			(void)nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2006 		nfscl_cleanclient(clp);
2007 		nmp->nm_clp = NULL;
2008 		NFSFREECRED(cred);
2009 		free(clp, M_NFSCLCLIENT);
2010 	} else
2011 		NFSUNLOCKCLSTATE();
2012 }
2013 
2014 /*
2015  * This function is called when a server replies with NFSERR_STALECLIENTID
2016  * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
2017  * doing Opens and Locks with reclaim. If these fail, it deletes the
2018  * corresponding state.
2019  */
2020 static void
2021 nfscl_recover(struct nfsclclient *clp, bool *retokp, struct ucred *cred,
2022     NFSPROC_T *p)
2023 {
2024 	struct nfsclowner *owp, *nowp;
2025 	struct nfsclopen *op, *nop;
2026 	struct nfscllockowner *lp, *nlp;
2027 	struct nfscllock *lop, *nlop;
2028 	struct nfscldeleg *dp, *ndp, *tdp;
2029 	struct nfsmount *nmp;
2030 	struct ucred *tcred;
2031 	struct nfsclopenhead extra_open;
2032 	struct nfscldeleghead extra_deleg;
2033 	struct nfsreq *rep;
2034 	u_int64_t len;
2035 	u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
2036 	int i, igotlock = 0, error, trycnt, firstlock;
2037 	struct nfscllayout *lyp, *nlyp;
2038 	bool recovered_one;
2039 
2040 	/*
2041 	 * First, lock the client structure, so everyone else will
2042 	 * block when trying to use state.
2043 	 */
2044 	NFSLOCKCLSTATE();
2045 	clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2046 	do {
2047 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2048 		    NFSCLSTATEMUTEXPTR, NULL);
2049 	} while (!igotlock);
2050 	NFSUNLOCKCLSTATE();
2051 
2052 	nmp = clp->nfsc_nmp;
2053 	if (nmp == NULL)
2054 		panic("nfscl recover");
2055 
2056 	/*
2057 	 * For now, just get rid of all layouts. There may be a need
2058 	 * to do LayoutCommit Ops with reclaim == true later.
2059 	 */
2060 	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2061 		nfscl_freelayout(lyp);
2062 	TAILQ_INIT(&clp->nfsc_layout);
2063 	for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2064 		LIST_INIT(&clp->nfsc_layouthash[i]);
2065 
2066 	trycnt = 5;
2067 	tcred = NULL;
2068 	do {
2069 		error = nfsrpc_setclient(nmp, clp, 1, retokp, cred, p);
2070 	} while ((error == NFSERR_STALECLIENTID ||
2071 	     error == NFSERR_BADSESSION ||
2072 	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2073 	if (error) {
2074 		NFSLOCKCLSTATE();
2075 		clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2076 		    NFSCLFLAGS_RECVRINPROG);
2077 		wakeup(&clp->nfsc_flags);
2078 		nfsv4_unlock(&clp->nfsc_lock, 0);
2079 		NFSUNLOCKCLSTATE();
2080 		return;
2081 	}
2082 	clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2083 	clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2084 
2085 	/*
2086 	 * Mark requests already queued on the server, so that they don't
2087 	 * initiate another recovery cycle. Any requests already in the
2088 	 * queue that handle state information will have the old stale
2089 	 * clientid/stateid and will get a NFSERR_STALESTATEID,
2090 	 * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2091 	 * This will be translated to NFSERR_STALEDONTRECOVER when
2092 	 * R_DONTRECOVER is set.
2093 	 */
2094 	NFSLOCKREQ();
2095 	TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2096 		if (rep->r_nmp == nmp)
2097 			rep->r_flags |= R_DONTRECOVER;
2098 	}
2099 	NFSUNLOCKREQ();
2100 
2101 	/*
2102 	 * If nfsrpc_setclient() returns *retokp == true,
2103 	 * no more recovery is needed.
2104 	 */
2105 	if (*retokp)
2106 		goto out;
2107 
2108 	/*
2109 	 * Now, mark all delegations "need reclaim".
2110 	 */
2111 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2112 		dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2113 
2114 	TAILQ_INIT(&extra_deleg);
2115 	LIST_INIT(&extra_open);
2116 	/*
2117 	 * Now traverse the state lists, doing Open and Lock Reclaims.
2118 	 */
2119 	tcred = newnfs_getcred();
2120 	recovered_one = false;
2121 	owp = LIST_FIRST(&clp->nfsc_owner);
2122 	while (owp != NULL) {
2123 	    nowp = LIST_NEXT(owp, nfsow_list);
2124 	    owp->nfsow_seqid = 0;
2125 	    op = LIST_FIRST(&owp->nfsow_open);
2126 	    while (op != NULL) {
2127 		nop = LIST_NEXT(op, nfso_list);
2128 		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2129 		    /* Search for a delegation to reclaim with the open */
2130 		    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2131 			if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2132 			    continue;
2133 			if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2134 			    mode = NFSV4OPEN_ACCESSWRITE;
2135 			    delegtype = NFSV4OPEN_DELEGATEWRITE;
2136 			} else {
2137 			    mode = NFSV4OPEN_ACCESSREAD;
2138 			    delegtype = NFSV4OPEN_DELEGATEREAD;
2139 			}
2140 			if ((op->nfso_mode & mode) == mode &&
2141 			    op->nfso_fhlen == dp->nfsdl_fhlen &&
2142 			    !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2143 			    break;
2144 		    }
2145 		    ndp = dp;
2146 		    if (dp == NULL)
2147 			delegtype = NFSV4OPEN_DELEGATENONE;
2148 		    newnfs_copycred(&op->nfso_cred, tcred);
2149 		    error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2150 			op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2151 			op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2152 			tcred, p);
2153 		    if (!error) {
2154 			recovered_one = true;
2155 			/* Handle any replied delegation */
2156 			if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2157 			    || NFSMNT_RDONLY(nmp->nm_mountp))) {
2158 			    if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2159 				mode = NFSV4OPEN_ACCESSWRITE;
2160 			    else
2161 				mode = NFSV4OPEN_ACCESSREAD;
2162 			    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2163 				if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2164 				    continue;
2165 				if ((op->nfso_mode & mode) == mode &&
2166 				    op->nfso_fhlen == dp->nfsdl_fhlen &&
2167 				    !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2168 				    op->nfso_fhlen)) {
2169 				    dp->nfsdl_stateid = ndp->nfsdl_stateid;
2170 				    dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2171 				    dp->nfsdl_ace = ndp->nfsdl_ace;
2172 				    dp->nfsdl_change = ndp->nfsdl_change;
2173 				    dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2174 				    if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2175 					dp->nfsdl_flags |= NFSCLDL_RECALL;
2176 				    free(ndp, M_NFSCLDELEG);
2177 				    ndp = NULL;
2178 				    break;
2179 				}
2180 			    }
2181 			}
2182 			if (ndp != NULL)
2183 			    TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2184 
2185 			/* and reclaim all byte range locks */
2186 			lp = LIST_FIRST(&op->nfso_lock);
2187 			while (lp != NULL) {
2188 			    nlp = LIST_NEXT(lp, nfsl_list);
2189 			    lp->nfsl_seqid = 0;
2190 			    firstlock = 1;
2191 			    lop = LIST_FIRST(&lp->nfsl_lock);
2192 			    while (lop != NULL) {
2193 				nlop = LIST_NEXT(lop, nfslo_list);
2194 				if (lop->nfslo_end == NFS64BITSSET)
2195 				    len = NFS64BITSSET;
2196 				else
2197 				    len = lop->nfslo_end - lop->nfslo_first;
2198 				error = nfscl_trylock(nmp, NULL,
2199 				    op->nfso_fh, op->nfso_fhlen, lp,
2200 				    firstlock, 1, lop->nfslo_first, len,
2201 				    lop->nfslo_type, tcred, p);
2202 				if (error != 0)
2203 				    nfscl_freelock(lop, 0);
2204 				else
2205 				    firstlock = 0;
2206 				lop = nlop;
2207 			    }
2208 			    /* If no locks, but a lockowner, just delete it. */
2209 			    if (LIST_EMPTY(&lp->nfsl_lock))
2210 				nfscl_freelockowner(lp, 0);
2211 			    lp = nlp;
2212 			}
2213 		    } else if (error == NFSERR_NOGRACE && !recovered_one &&
2214 			NFSHASNFSV4N(nmp)) {
2215 			/*
2216 			 * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2217 			 * actually end up here, since the client will do
2218 			 * a recovery for NFSERR_BADSESSION, but will get
2219 			 * an NFSERR_NOGRACE reply for the first "reclaim"
2220 			 * attempt.
2221 			 * So, call nfscl_expireclient() to recover the
2222 			 * opens as best we can and then do a reclaim
2223 			 * complete and return.
2224 			 */
2225 			nfsrpc_reclaimcomplete(nmp, cred, p);
2226 			nfscl_expireclient(clp, nmp, tcred, p);
2227 			goto out;
2228 		    }
2229 		}
2230 		if (error != 0 && error != NFSERR_BADSESSION)
2231 		    nfscl_freeopen(op, 0);
2232 		op = nop;
2233 	    }
2234 	    owp = nowp;
2235 	}
2236 
2237 	/*
2238 	 * Now, try and get any delegations not yet reclaimed by cobbling
2239 	 * to-gether an appropriate open.
2240 	 */
2241 	nowp = NULL;
2242 	dp = TAILQ_FIRST(&clp->nfsc_deleg);
2243 	while (dp != NULL) {
2244 	    ndp = TAILQ_NEXT(dp, nfsdl_list);
2245 	    if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2246 		if (nowp == NULL) {
2247 		    nowp = malloc(
2248 			sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2249 		    /*
2250 		     * Name must be as long an largest possible
2251 		     * NFSV4CL_LOCKNAMELEN. 12 for now.
2252 		     */
2253 		    NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2254 			NFSV4CL_LOCKNAMELEN);
2255 		    LIST_INIT(&nowp->nfsow_open);
2256 		    nowp->nfsow_clp = clp;
2257 		    nowp->nfsow_seqid = 0;
2258 		    nowp->nfsow_defunct = 0;
2259 		    nfscl_lockinit(&nowp->nfsow_rwlock);
2260 		}
2261 		nop = NULL;
2262 		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2263 		    nop = malloc(sizeof (struct nfsclopen) +
2264 			dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2265 		    nop->nfso_own = nowp;
2266 		    if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2267 			nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2268 			delegtype = NFSV4OPEN_DELEGATEWRITE;
2269 		    } else {
2270 			nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2271 			delegtype = NFSV4OPEN_DELEGATEREAD;
2272 		    }
2273 		    nop->nfso_opencnt = 0;
2274 		    nop->nfso_posixlock = 1;
2275 		    nop->nfso_fhlen = dp->nfsdl_fhlen;
2276 		    NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2277 		    LIST_INIT(&nop->nfso_lock);
2278 		    nop->nfso_stateid.seqid = 0;
2279 		    nop->nfso_stateid.other[0] = 0;
2280 		    nop->nfso_stateid.other[1] = 0;
2281 		    nop->nfso_stateid.other[2] = 0;
2282 		    newnfs_copycred(&dp->nfsdl_cred, tcred);
2283 		    newnfs_copyincred(tcred, &nop->nfso_cred);
2284 		    tdp = NULL;
2285 		    error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2286 			nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2287 			nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2288 			delegtype, tcred, p);
2289 		    if (tdp != NULL) {
2290 			if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2291 			    mode = NFSV4OPEN_ACCESSWRITE;
2292 			else
2293 			    mode = NFSV4OPEN_ACCESSREAD;
2294 			if ((nop->nfso_mode & mode) == mode &&
2295 			    nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2296 			    !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2297 			    nop->nfso_fhlen)) {
2298 			    dp->nfsdl_stateid = tdp->nfsdl_stateid;
2299 			    dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2300 			    dp->nfsdl_ace = tdp->nfsdl_ace;
2301 			    dp->nfsdl_change = tdp->nfsdl_change;
2302 			    dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2303 			    if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2304 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2305 			    free(tdp, M_NFSCLDELEG);
2306 			} else {
2307 			    TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2308 			}
2309 		    }
2310 		}
2311 		if (error) {
2312 		    if (nop != NULL)
2313 			free(nop, M_NFSCLOPEN);
2314 		    if (error == NFSERR_NOGRACE && !recovered_one &&
2315 			NFSHASNFSV4N(nmp)) {
2316 			/*
2317 			 * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2318 			 * actually end up here, since the client will do
2319 			 * a recovery for NFSERR_BADSESSION, but will get
2320 			 * an NFSERR_NOGRACE reply for the first "reclaim"
2321 			 * attempt.
2322 			 * So, call nfscl_expireclient() to recover the
2323 			 * opens as best we can and then do a reclaim
2324 			 * complete and return.
2325 			 */
2326 			nfsrpc_reclaimcomplete(nmp, cred, p);
2327 			nfscl_expireclient(clp, nmp, tcred, p);
2328 			free(nowp, M_NFSCLOWNER);
2329 			goto out;
2330 		    }
2331 		    /*
2332 		     * Couldn't reclaim it, so throw the state
2333 		     * away. Ouch!!
2334 		     */
2335 		    nfscl_cleandeleg(dp);
2336 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
2337 		} else {
2338 		    recovered_one = true;
2339 		    LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2340 		}
2341 	    }
2342 	    dp = ndp;
2343 	}
2344 
2345 	/*
2346 	 * Now, get rid of extra Opens and Delegations.
2347 	 */
2348 	LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2349 		do {
2350 			newnfs_copycred(&op->nfso_cred, tcred);
2351 			error = nfscl_tryclose(op, tcred, nmp, p);
2352 			if (error == NFSERR_GRACE)
2353 				(void) nfs_catnap(PZERO, error, "nfsexcls");
2354 		} while (error == NFSERR_GRACE);
2355 		LIST_REMOVE(op, nfso_list);
2356 		free(op, M_NFSCLOPEN);
2357 	}
2358 	if (nowp != NULL)
2359 		free(nowp, M_NFSCLOWNER);
2360 
2361 	TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2362 		do {
2363 			newnfs_copycred(&dp->nfsdl_cred, tcred);
2364 			error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2365 			if (error == NFSERR_GRACE)
2366 				(void) nfs_catnap(PZERO, error, "nfsexdlg");
2367 		} while (error == NFSERR_GRACE);
2368 		TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2369 		free(dp, M_NFSCLDELEG);
2370 	}
2371 
2372 	/* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2373 	if (NFSHASNFSV4N(nmp))
2374 		(void)nfsrpc_reclaimcomplete(nmp, cred, p);
2375 
2376 out:
2377 	NFSLOCKCLSTATE();
2378 	clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2379 	wakeup(&clp->nfsc_flags);
2380 	nfsv4_unlock(&clp->nfsc_lock, 0);
2381 	NFSUNLOCKCLSTATE();
2382 	if (tcred != NULL)
2383 		NFSFREECRED(tcred);
2384 }
2385 
2386 /*
2387  * This function is called when a server replies with NFSERR_EXPIRED.
2388  * It deletes all state for the client and does a fresh SetClientId/confirm.
2389  * XXX Someday it should post a signal to the process(es) that hold the
2390  * state, so they know that lock state has been lost.
2391  */
2392 int
2393 nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2394 {
2395 	struct nfsmount *nmp;
2396 	struct ucred *cred;
2397 	int igotlock = 0, error, trycnt;
2398 
2399 	/*
2400 	 * If the clientid has gone away or a new SetClientid has already
2401 	 * been done, just return ok.
2402 	 */
2403 	if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2404 		return (0);
2405 
2406 	/*
2407 	 * First, lock the client structure, so everyone else will
2408 	 * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2409 	 * that only one thread does the work.
2410 	 */
2411 	NFSLOCKCLSTATE();
2412 	clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2413 	do {
2414 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2415 		    NFSCLSTATEMUTEXPTR, NULL);
2416 	} while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2417 	if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2418 		if (igotlock)
2419 			nfsv4_unlock(&clp->nfsc_lock, 0);
2420 		NFSUNLOCKCLSTATE();
2421 		return (0);
2422 	}
2423 	clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2424 	NFSUNLOCKCLSTATE();
2425 
2426 	nmp = clp->nfsc_nmp;
2427 	if (nmp == NULL)
2428 		panic("nfscl expired");
2429 	cred = newnfs_getcred();
2430 	trycnt = 5;
2431 	do {
2432 		error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2433 	} while ((error == NFSERR_STALECLIENTID ||
2434 	     error == NFSERR_BADSESSION ||
2435 	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2436 	if (error) {
2437 		NFSLOCKCLSTATE();
2438 		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2439 	} else {
2440 		/*
2441 		 * Expire the state for the client.
2442 		 */
2443 		nfscl_expireclient(clp, nmp, cred, p);
2444 		NFSLOCKCLSTATE();
2445 		clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2446 		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2447 	}
2448 	clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2449 	wakeup(&clp->nfsc_flags);
2450 	nfsv4_unlock(&clp->nfsc_lock, 0);
2451 	NFSUNLOCKCLSTATE();
2452 	NFSFREECRED(cred);
2453 	return (error);
2454 }
2455 
2456 /*
2457  * This function inserts a lock in the list after insert_lop.
2458  */
2459 static void
2460 nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2461     struct nfscllock *insert_lop, int local)
2462 {
2463 
2464 	if ((struct nfscllockowner *)insert_lop == lp)
2465 		LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2466 	else
2467 		LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2468 	if (local)
2469 		nfsstatsv1.cllocallocks++;
2470 	else
2471 		nfsstatsv1.cllocks++;
2472 }
2473 
2474 /*
2475  * This function updates the locking for a lock owner and given file. It
2476  * maintains a list of lock ranges ordered on increasing file offset that
2477  * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2478  * It always adds new_lop to the list and sometimes uses the one pointed
2479  * at by other_lopp.
2480  * Returns 1 if the locks were modified, 0 otherwise.
2481  */
2482 static int
2483 nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2484     struct nfscllock **other_lopp, int local)
2485 {
2486 	struct nfscllock *new_lop = *new_lopp;
2487 	struct nfscllock *lop, *tlop, *ilop;
2488 	struct nfscllock *other_lop;
2489 	int unlock = 0, modified = 0;
2490 	u_int64_t tmp;
2491 
2492 	/*
2493 	 * Work down the list until the lock is merged.
2494 	 */
2495 	if (new_lop->nfslo_type == F_UNLCK)
2496 		unlock = 1;
2497 	ilop = (struct nfscllock *)lp;
2498 	lop = LIST_FIRST(&lp->nfsl_lock);
2499 	while (lop != NULL) {
2500 	    /*
2501 	     * Only check locks for this file that aren't before the start of
2502 	     * new lock's range.
2503 	     */
2504 	    if (lop->nfslo_end >= new_lop->nfslo_first) {
2505 		if (new_lop->nfslo_end < lop->nfslo_first) {
2506 		    /*
2507 		     * If the new lock ends before the start of the
2508 		     * current lock's range, no merge, just insert
2509 		     * the new lock.
2510 		     */
2511 		    break;
2512 		}
2513 		if (new_lop->nfslo_type == lop->nfslo_type ||
2514 		    (new_lop->nfslo_first <= lop->nfslo_first &&
2515 		     new_lop->nfslo_end >= lop->nfslo_end)) {
2516 		    /*
2517 		     * This lock can be absorbed by the new lock/unlock.
2518 		     * This happens when it covers the entire range
2519 		     * of the old lock or is contiguous
2520 		     * with the old lock and is of the same type or an
2521 		     * unlock.
2522 		     */
2523 		    if (new_lop->nfslo_type != lop->nfslo_type ||
2524 			new_lop->nfslo_first != lop->nfslo_first ||
2525 			new_lop->nfslo_end != lop->nfslo_end)
2526 			modified = 1;
2527 		    if (lop->nfslo_first < new_lop->nfslo_first)
2528 			new_lop->nfslo_first = lop->nfslo_first;
2529 		    if (lop->nfslo_end > new_lop->nfslo_end)
2530 			new_lop->nfslo_end = lop->nfslo_end;
2531 		    tlop = lop;
2532 		    lop = LIST_NEXT(lop, nfslo_list);
2533 		    nfscl_freelock(tlop, local);
2534 		    continue;
2535 		}
2536 
2537 		/*
2538 		 * All these cases are for contiguous locks that are not the
2539 		 * same type, so they can't be merged.
2540 		 */
2541 		if (new_lop->nfslo_first <= lop->nfslo_first) {
2542 		    /*
2543 		     * This case is where the new lock overlaps with the
2544 		     * first part of the old lock. Move the start of the
2545 		     * old lock to just past the end of the new lock. The
2546 		     * new lock will be inserted in front of the old, since
2547 		     * ilop hasn't been updated. (We are done now.)
2548 		     */
2549 		    if (lop->nfslo_first != new_lop->nfslo_end) {
2550 			lop->nfslo_first = new_lop->nfslo_end;
2551 			modified = 1;
2552 		    }
2553 		    break;
2554 		}
2555 		if (new_lop->nfslo_end >= lop->nfslo_end) {
2556 		    /*
2557 		     * This case is where the new lock overlaps with the
2558 		     * end of the old lock's range. Move the old lock's
2559 		     * end to just before the new lock's first and insert
2560 		     * the new lock after the old lock.
2561 		     * Might not be done yet, since the new lock could
2562 		     * overlap further locks with higher ranges.
2563 		     */
2564 		    if (lop->nfslo_end != new_lop->nfslo_first) {
2565 			lop->nfslo_end = new_lop->nfslo_first;
2566 			modified = 1;
2567 		    }
2568 		    ilop = lop;
2569 		    lop = LIST_NEXT(lop, nfslo_list);
2570 		    continue;
2571 		}
2572 		/*
2573 		 * The final case is where the new lock's range is in the
2574 		 * middle of the current lock's and splits the current lock
2575 		 * up. Use *other_lopp to handle the second part of the
2576 		 * split old lock range. (We are done now.)
2577 		 * For unlock, we use new_lop as other_lop and tmp, since
2578 		 * other_lop and new_lop are the same for this case.
2579 		 * We noted the unlock case above, so we don't need
2580 		 * new_lop->nfslo_type any longer.
2581 		 */
2582 		tmp = new_lop->nfslo_first;
2583 		if (unlock) {
2584 		    other_lop = new_lop;
2585 		    *new_lopp = NULL;
2586 		} else {
2587 		    other_lop = *other_lopp;
2588 		    *other_lopp = NULL;
2589 		}
2590 		other_lop->nfslo_first = new_lop->nfslo_end;
2591 		other_lop->nfslo_end = lop->nfslo_end;
2592 		other_lop->nfslo_type = lop->nfslo_type;
2593 		lop->nfslo_end = tmp;
2594 		nfscl_insertlock(lp, other_lop, lop, local);
2595 		ilop = lop;
2596 		modified = 1;
2597 		break;
2598 	    }
2599 	    ilop = lop;
2600 	    lop = LIST_NEXT(lop, nfslo_list);
2601 	    if (lop == NULL)
2602 		break;
2603 	}
2604 
2605 	/*
2606 	 * Insert the new lock in the list at the appropriate place.
2607 	 */
2608 	if (!unlock) {
2609 		nfscl_insertlock(lp, new_lop, ilop, local);
2610 		*new_lopp = NULL;
2611 		modified = 1;
2612 	}
2613 	return (modified);
2614 }
2615 
2616 /*
2617  * This function must be run as a kernel thread.
2618  * It does Renew Ops and recovery, when required.
2619  */
2620 void
2621 nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2622 {
2623 	struct nfsclowner *owp, *nowp;
2624 	struct nfsclopen *op;
2625 	struct nfscllockowner *lp, *nlp;
2626 	struct nfscldeleghead dh;
2627 	struct nfscldeleg *dp, *ndp;
2628 	struct ucred *cred;
2629 	u_int32_t clidrev;
2630 	int error, cbpathdown, islept, igotlock, ret, clearok;
2631 	uint32_t recover_done_time = 0;
2632 	time_t mytime;
2633 	static time_t prevsec = 0;
2634 	struct nfscllockownerfh *lfhp, *nlfhp;
2635 	struct nfscllockownerfhhead lfh;
2636 	struct nfscllayout *lyp, *nlyp;
2637 	struct nfscldevinfo *dip, *ndip;
2638 	struct nfscllayouthead rlh;
2639 	struct nfsclrecalllayout *recallp;
2640 	struct nfsclds *dsp;
2641 	bool retok;
2642 	struct mount *mp;
2643 	vnode_t vp;
2644 
2645 	cred = newnfs_getcred();
2646 	NFSLOCKCLSTATE();
2647 	clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2648 	mp = clp->nfsc_nmp->nm_mountp;
2649 	NFSUNLOCKCLSTATE();
2650 	for(;;) {
2651 		newnfs_setroot(cred);
2652 		cbpathdown = 0;
2653 		if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2654 			/*
2655 			 * Only allow one full recover within 1/2 of the lease
2656 			 * duration (nfsc_renew).
2657 			 * retok is value/result.  If passed in set to true,
2658 			 * it indicates only a CreateSession operation should
2659 			 * be attempted.
2660 			 * If it is returned true, it indicates that the
2661 			 * recovery only required a CreateSession.
2662 			 */
2663 			retok = true;
2664 			if (recover_done_time < NFSD_MONOSEC) {
2665 				recover_done_time = NFSD_MONOSEC +
2666 				    clp->nfsc_renew;
2667 				retok = false;
2668 			}
2669 			NFSCL_DEBUG(1, "Doing recovery, only "
2670 			    "createsession=%d\n", retok);
2671 			nfscl_recover(clp, &retok, cred, p);
2672 		}
2673 		if (clp->nfsc_expire <= NFSD_MONOSEC &&
2674 		    (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2675 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2676 			clidrev = clp->nfsc_clientidrev;
2677 			error = nfsrpc_renew(clp, NULL, cred, p);
2678 			if (error == NFSERR_CBPATHDOWN)
2679 			    cbpathdown = 1;
2680 			else if (error == NFSERR_STALECLIENTID ||
2681 			    error == NFSERR_BADSESSION) {
2682 			    NFSLOCKCLSTATE();
2683 			    clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2684 			    NFSUNLOCKCLSTATE();
2685 			} else if (error == NFSERR_EXPIRED)
2686 			    (void) nfscl_hasexpired(clp, clidrev, p);
2687 		}
2688 
2689 checkdsrenew:
2690 		if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2691 			/* Do renews for any DS sessions. */
2692 			NFSLOCKMNT(clp->nfsc_nmp);
2693 			/* Skip first entry, since the MDS is handled above. */
2694 			dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2695 			if (dsp != NULL)
2696 				dsp = TAILQ_NEXT(dsp, nfsclds_list);
2697 			while (dsp != NULL) {
2698 				if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2699 				    dsp->nfsclds_sess.nfsess_defunct == 0) {
2700 					dsp->nfsclds_expire = NFSD_MONOSEC +
2701 					    clp->nfsc_renew;
2702 					NFSUNLOCKMNT(clp->nfsc_nmp);
2703 					(void)nfsrpc_renew(clp, dsp, cred, p);
2704 					goto checkdsrenew;
2705 				}
2706 				dsp = TAILQ_NEXT(dsp, nfsclds_list);
2707 			}
2708 			NFSUNLOCKMNT(clp->nfsc_nmp);
2709 		}
2710 
2711 		TAILQ_INIT(&dh);
2712 		NFSLOCKCLSTATE();
2713 		if (cbpathdown)
2714 			/* It's a Total Recall! */
2715 			nfscl_totalrecall(clp);
2716 
2717 		/*
2718 		 * Now, handle defunct owners.
2719 		 */
2720 		LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2721 			if (LIST_EMPTY(&owp->nfsow_open)) {
2722 				if (owp->nfsow_defunct != 0)
2723 					nfscl_freeopenowner(owp, 0);
2724 			}
2725 		}
2726 
2727 		/*
2728 		 * Do the recall on any delegations. To avoid trouble, always
2729 		 * come back up here after having slept.
2730 		 */
2731 		igotlock = 0;
2732 tryagain:
2733 		dp = TAILQ_FIRST(&clp->nfsc_deleg);
2734 		while (dp != NULL) {
2735 			ndp = TAILQ_NEXT(dp, nfsdl_list);
2736 			if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2737 				/*
2738 				 * Wait for outstanding I/O ops to be done.
2739 				 */
2740 				if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2741 				    if (igotlock) {
2742 					nfsv4_unlock(&clp->nfsc_lock, 0);
2743 					igotlock = 0;
2744 				    }
2745 				    dp->nfsdl_rwlock.nfslock_lock |=
2746 					NFSV4LOCK_WANTED;
2747 				    msleep(&dp->nfsdl_rwlock,
2748 					NFSCLSTATEMUTEXPTR, PVFS, "nfscld",
2749 					5 * hz);
2750 				    if (NFSCL_FORCEDISM(mp))
2751 					goto terminate;
2752 				    goto tryagain;
2753 				}
2754 				while (!igotlock) {
2755 				    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2756 					&islept, NFSCLSTATEMUTEXPTR, mp);
2757 				    if (igotlock == 0 && NFSCL_FORCEDISM(mp))
2758 					goto terminate;
2759 				    if (islept)
2760 					goto tryagain;
2761 				}
2762 				NFSUNLOCKCLSTATE();
2763 				newnfs_copycred(&dp->nfsdl_cred, cred);
2764 				ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2765 				    NULL, cred, p, 1, &vp);
2766 				if (!ret) {
2767 				    nfscl_cleandeleg(dp);
2768 				    TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2769 					nfsdl_list);
2770 				    LIST_REMOVE(dp, nfsdl_hash);
2771 				    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2772 				    nfscl_delegcnt--;
2773 				    nfsstatsv1.cldelegates--;
2774 				}
2775 				NFSLOCKCLSTATE();
2776 				/*
2777 				 * The nfsc_lock must be released before doing
2778 				 * vrele(), since it might call nfs_inactive().
2779 				 * For the unlikely case where the vnode failed
2780 				 * to be acquired by nfscl_recalldeleg(), a
2781 				 * VOP_RECLAIM() should be in progress and it
2782 				 * will return the delegation.
2783 				 */
2784 				nfsv4_unlock(&clp->nfsc_lock, 0);
2785 				igotlock = 0;
2786 				if (vp != NULL) {
2787 					NFSUNLOCKCLSTATE();
2788 					vrele(vp);
2789 					NFSLOCKCLSTATE();
2790 				}
2791 				goto tryagain;
2792 			}
2793 			dp = ndp;
2794 		}
2795 
2796 		/*
2797 		 * Clear out old delegations, if we are above the high water
2798 		 * mark. Only clear out ones with no state related to them.
2799 		 * The tailq list is in LRU order.
2800 		 */
2801 		dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2802 		while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2803 		    ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2804 		    if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2805 			dp->nfsdl_rwlock.nfslock_lock == 0 &&
2806 			dp->nfsdl_timestamp < NFSD_MONOSEC &&
2807 			(dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2808 			  NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2809 			clearok = 1;
2810 			LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2811 			    op = LIST_FIRST(&owp->nfsow_open);
2812 			    if (op != NULL) {
2813 				clearok = 0;
2814 				break;
2815 			    }
2816 			}
2817 			if (clearok) {
2818 			    LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2819 				if (!LIST_EMPTY(&lp->nfsl_lock)) {
2820 				    clearok = 0;
2821 				    break;
2822 				}
2823 			    }
2824 			}
2825 			if (clearok) {
2826 			    TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2827 			    LIST_REMOVE(dp, nfsdl_hash);
2828 			    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2829 			    nfscl_delegcnt--;
2830 			    nfsstatsv1.cldelegates--;
2831 			}
2832 		    }
2833 		    dp = ndp;
2834 		}
2835 		if (igotlock)
2836 			nfsv4_unlock(&clp->nfsc_lock, 0);
2837 
2838 		/*
2839 		 * Do the recall on any layouts. To avoid trouble, always
2840 		 * come back up here after having slept.
2841 		 */
2842 		TAILQ_INIT(&rlh);
2843 tryagain2:
2844 		TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2845 			if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2846 				/*
2847 				 * Wait for outstanding I/O ops to be done.
2848 				 */
2849 				if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2850 				    (lyp->nfsly_lock.nfslock_lock &
2851 				     NFSV4LOCK_LOCK) != 0) {
2852 					lyp->nfsly_lock.nfslock_lock |=
2853 					    NFSV4LOCK_WANTED;
2854 					msleep(&lyp->nfsly_lock.nfslock_lock,
2855 					    NFSCLSTATEMUTEXPTR, PVFS, "nfslyp",
2856 					    5 * hz);
2857 					if (NFSCL_FORCEDISM(mp))
2858 					    goto terminate;
2859 					goto tryagain2;
2860 				}
2861 				/* Move the layout to the recall list. */
2862 				TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2863 				    nfsly_list);
2864 				LIST_REMOVE(lyp, nfsly_hash);
2865 				TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2866 
2867 				/* Handle any layout commits. */
2868 				if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2869 				    (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2870 					lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2871 					NFSUNLOCKCLSTATE();
2872 					NFSCL_DEBUG(3, "do layoutcommit\n");
2873 					nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2874 					    cred, p);
2875 					NFSLOCKCLSTATE();
2876 					goto tryagain2;
2877 				}
2878 			}
2879 		}
2880 
2881 		/* Now, look for stale layouts. */
2882 		lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2883 		while (lyp != NULL) {
2884 			nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2885 			if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2886 			    (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
2887 			    lyp->nfsly_lock.nfslock_usecnt == 0 &&
2888 			    lyp->nfsly_lock.nfslock_lock == 0) {
2889 				NFSCL_DEBUG(4, "ret stale lay=%d\n",
2890 				    nfscl_layoutcnt);
2891 				recallp = malloc(sizeof(*recallp),
2892 				    M_NFSLAYRECALL, M_NOWAIT);
2893 				if (recallp == NULL)
2894 					break;
2895 				(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2896 				    lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2897 				    lyp->nfsly_stateid.seqid, 0, 0, NULL,
2898 				    recallp);
2899 			}
2900 			lyp = nlyp;
2901 		}
2902 
2903 		/*
2904 		 * Free up any unreferenced device info structures.
2905 		 */
2906 		LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2907 			if (dip->nfsdi_layoutrefs == 0 &&
2908 			    dip->nfsdi_refcnt == 0) {
2909 				NFSCL_DEBUG(4, "freeing devinfo\n");
2910 				LIST_REMOVE(dip, nfsdi_list);
2911 				nfscl_freedevinfo(dip);
2912 			}
2913 		}
2914 		NFSUNLOCKCLSTATE();
2915 
2916 		/* Do layout return(s), as required. */
2917 		TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2918 			TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2919 			NFSCL_DEBUG(4, "ret layout\n");
2920 			nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2921 			nfscl_freelayout(lyp);
2922 		}
2923 
2924 		/*
2925 		 * Delegreturn any delegations cleaned out or recalled.
2926 		 */
2927 		TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2928 			newnfs_copycred(&dp->nfsdl_cred, cred);
2929 			(void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2930 			TAILQ_REMOVE(&dh, dp, nfsdl_list);
2931 			free(dp, M_NFSCLDELEG);
2932 		}
2933 
2934 		SLIST_INIT(&lfh);
2935 		/*
2936 		 * Call nfscl_cleanupkext() once per second to check for
2937 		 * open/lock owners where the process has exited.
2938 		 */
2939 		mytime = NFSD_MONOSEC;
2940 		if (prevsec != mytime) {
2941 			prevsec = mytime;
2942 			nfscl_cleanupkext(clp, &lfh);
2943 		}
2944 
2945 		/*
2946 		 * Do a ReleaseLockOwner for all lock owners where the
2947 		 * associated process no longer exists, as found by
2948 		 * nfscl_cleanupkext().
2949 		 */
2950 		newnfs_setroot(cred);
2951 		SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2952 			LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2953 			    nlp) {
2954 				(void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2955 				    lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2956 				    p);
2957 				nfscl_freelockowner(lp, 0);
2958 			}
2959 			free(lfhp, M_TEMP);
2960 		}
2961 		SLIST_INIT(&lfh);
2962 
2963 		NFSLOCKCLSTATE();
2964 		if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
2965 			(void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
2966 			    hz);
2967 terminate:
2968 		if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
2969 			clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
2970 			NFSUNLOCKCLSTATE();
2971 			NFSFREECRED(cred);
2972 			wakeup((caddr_t)clp);
2973 			return;
2974 		}
2975 		NFSUNLOCKCLSTATE();
2976 	}
2977 }
2978 
2979 /*
2980  * Initiate state recovery. Called when NFSERR_STALECLIENTID,
2981  * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
2982  */
2983 void
2984 nfscl_initiate_recovery(struct nfsclclient *clp)
2985 {
2986 
2987 	if (clp == NULL)
2988 		return;
2989 	NFSLOCKCLSTATE();
2990 	clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2991 	NFSUNLOCKCLSTATE();
2992 	wakeup((caddr_t)clp);
2993 }
2994 
2995 /*
2996  * Dump out the state stuff for debugging.
2997  */
2998 void
2999 nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
3000     int lockowner, int locks)
3001 {
3002 	struct nfsclclient *clp;
3003 	struct nfsclowner *owp;
3004 	struct nfsclopen *op;
3005 	struct nfscllockowner *lp;
3006 	struct nfscllock *lop;
3007 	struct nfscldeleg *dp;
3008 
3009 	clp = nmp->nm_clp;
3010 	if (clp == NULL) {
3011 		printf("nfscl dumpstate NULL clp\n");
3012 		return;
3013 	}
3014 	NFSLOCKCLSTATE();
3015 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
3016 	  LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3017 	    if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3018 		printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3019 		    owp->nfsow_owner[0], owp->nfsow_owner[1],
3020 		    owp->nfsow_owner[2], owp->nfsow_owner[3],
3021 		    owp->nfsow_seqid);
3022 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3023 		if (opens)
3024 		    printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3025 			op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3026 			op->nfso_stateid.other[2], op->nfso_opencnt,
3027 			op->nfso_fh[12]);
3028 		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3029 		    if (lockowner)
3030 			printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3031 			    lp->nfsl_owner[0], lp->nfsl_owner[1],
3032 			    lp->nfsl_owner[2], lp->nfsl_owner[3],
3033 			    lp->nfsl_seqid,
3034 			    lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3035 			    lp->nfsl_stateid.other[2]);
3036 		    LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3037 			if (locks)
3038 #ifdef __FreeBSD__
3039 			    printf("lck typ=%d fst=%ju end=%ju\n",
3040 				lop->nfslo_type, (intmax_t)lop->nfslo_first,
3041 				(intmax_t)lop->nfslo_end);
3042 #else
3043 			    printf("lck typ=%d fst=%qd end=%qd\n",
3044 				lop->nfslo_type, lop->nfslo_first,
3045 				lop->nfslo_end);
3046 #endif
3047 		    }
3048 		}
3049 	    }
3050 	  }
3051 	}
3052 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3053 	    if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3054 		printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3055 		    owp->nfsow_owner[0], owp->nfsow_owner[1],
3056 		    owp->nfsow_owner[2], owp->nfsow_owner[3],
3057 		    owp->nfsow_seqid);
3058 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3059 		if (opens)
3060 		    printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3061 			op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3062 			op->nfso_stateid.other[2], op->nfso_opencnt,
3063 			op->nfso_fh[12]);
3064 		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3065 		    if (lockowner)
3066 			printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3067 			    lp->nfsl_owner[0], lp->nfsl_owner[1],
3068 			    lp->nfsl_owner[2], lp->nfsl_owner[3],
3069 			    lp->nfsl_seqid,
3070 			    lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3071 			    lp->nfsl_stateid.other[2]);
3072 		    LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3073 			if (locks)
3074 #ifdef __FreeBSD__
3075 			    printf("lck typ=%d fst=%ju end=%ju\n",
3076 				lop->nfslo_type, (intmax_t)lop->nfslo_first,
3077 				(intmax_t)lop->nfslo_end);
3078 #else
3079 			    printf("lck typ=%d fst=%qd end=%qd\n",
3080 				lop->nfslo_type, lop->nfslo_first,
3081 				lop->nfslo_end);
3082 #endif
3083 		    }
3084 		}
3085 	    }
3086 	}
3087 	NFSUNLOCKCLSTATE();
3088 }
3089 
3090 /*
3091  * Check for duplicate open owners and opens.
3092  * (Only used as a diagnostic aid.)
3093  */
3094 void
3095 nfscl_dupopen(vnode_t vp, int dupopens)
3096 {
3097 	struct nfsclclient *clp;
3098 	struct nfsclowner *owp, *owp2;
3099 	struct nfsclopen *op, *op2;
3100 	struct nfsfh *nfhp;
3101 
3102 	clp = VFSTONFS(vp->v_mount)->nm_clp;
3103 	if (clp == NULL) {
3104 		printf("nfscl dupopen NULL clp\n");
3105 		return;
3106 	}
3107 	nfhp = VTONFS(vp)->n_fhp;
3108 	NFSLOCKCLSTATE();
3109 
3110 	/*
3111 	 * First, search for duplicate owners.
3112 	 * These should never happen!
3113 	 */
3114 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3115 	    LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3116 		if (owp != owp2 &&
3117 		    !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
3118 		    NFSV4CL_LOCKNAMELEN)) {
3119 			NFSUNLOCKCLSTATE();
3120 			printf("DUP OWNER\n");
3121 			nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3122 			return;
3123 		}
3124 	    }
3125 	}
3126 
3127 	/*
3128 	 * Now, search for duplicate stateids.
3129 	 * These shouldn't happen, either.
3130 	 */
3131 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3132 	    LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3133 		LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3134 		    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3135 			if (op != op2 &&
3136 			    (op->nfso_stateid.other[0] != 0 ||
3137 			     op->nfso_stateid.other[1] != 0 ||
3138 			     op->nfso_stateid.other[2] != 0) &&
3139 			    op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3140 			    op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3141 			    op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3142 			    NFSUNLOCKCLSTATE();
3143 			    printf("DUP STATEID\n");
3144 			    nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3145 			    return;
3146 			}
3147 		    }
3148 		}
3149 	    }
3150 	}
3151 
3152 	/*
3153 	 * Now search for duplicate opens.
3154 	 * Duplicate opens for the same owner
3155 	 * should never occur. Other duplicates are
3156 	 * possible and are checked for if "dupopens"
3157 	 * is true.
3158 	 */
3159 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3160 	    LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3161 		if (nfhp->nfh_len == op2->nfso_fhlen &&
3162 		    !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3163 		    LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3164 			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3165 			    if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3166 				!NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3167 				(!NFSBCMP(op->nfso_own->nfsow_owner,
3168 				 op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3169 				 dupopens)) {
3170 				if (!NFSBCMP(op->nfso_own->nfsow_owner,
3171 				    op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3172 				    NFSUNLOCKCLSTATE();
3173 				    printf("BADDUP OPEN\n");
3174 				} else {
3175 				    NFSUNLOCKCLSTATE();
3176 				    printf("DUP OPEN\n");
3177 				}
3178 				nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0,
3179 				    0);
3180 				return;
3181 			    }
3182 			}
3183 		    }
3184 		}
3185 	    }
3186 	}
3187 	NFSUNLOCKCLSTATE();
3188 }
3189 
3190 /*
3191  * During close, find an open that needs to be dereferenced and
3192  * dereference it. If there are no more opens for this file,
3193  * log a message to that effect.
3194  * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3195  * on the file's vnode.
3196  * This is the safe way, since it is difficult to identify
3197  * which open the close is for and I/O can be performed after the
3198  * close(2) system call when a file is mmap'd.
3199  * If it returns 0 for success, there will be a referenced
3200  * clp returned via clpp.
3201  */
3202 int
3203 nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3204 {
3205 	struct nfsclclient *clp;
3206 	struct nfsclowner *owp;
3207 	struct nfsclopen *op;
3208 	struct nfscldeleg *dp;
3209 	struct nfsfh *nfhp;
3210 	int error, notdecr;
3211 
3212 	error = nfscl_getcl(vp->v_mount, NULL, NULL, false, &clp);
3213 	if (error)
3214 		return (error);
3215 	*clpp = clp;
3216 
3217 	nfhp = VTONFS(vp)->n_fhp;
3218 	notdecr = 1;
3219 	NFSLOCKCLSTATE();
3220 	/*
3221 	 * First, look for one under a delegation that was locally issued
3222 	 * and just decrement the opencnt for it. Since all my Opens against
3223 	 * the server are DENY_NONE, I don't see a problem with hanging
3224 	 * onto them. (It is much easier to use one of the extant Opens
3225 	 * that I already have on the server when a Delegation is recalled
3226 	 * than to do fresh Opens.) Someday, I might need to rethink this, but.
3227 	 */
3228 	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3229 	if (dp != NULL) {
3230 		LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3231 			op = LIST_FIRST(&owp->nfsow_open);
3232 			if (op != NULL) {
3233 				/*
3234 				 * Since a delegation is for a file, there
3235 				 * should never be more than one open for
3236 				 * each openowner.
3237 				 */
3238 				if (LIST_NEXT(op, nfso_list) != NULL)
3239 					panic("nfscdeleg opens");
3240 				if (notdecr && op->nfso_opencnt > 0) {
3241 					notdecr = 0;
3242 					op->nfso_opencnt--;
3243 					break;
3244 				}
3245 			}
3246 		}
3247 	}
3248 
3249 	/* Now process the opens against the server. */
3250 	LIST_FOREACH(op, NFSCLOPENHASH(clp, nfhp->nfh_fh, nfhp->nfh_len),
3251 	    nfso_hash) {
3252 		if (op->nfso_fhlen == nfhp->nfh_len &&
3253 		    !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3254 		    nfhp->nfh_len)) {
3255 			/* Found an open, decrement cnt if possible */
3256 			if (notdecr && op->nfso_opencnt > 0) {
3257 				notdecr = 0;
3258 				op->nfso_opencnt--;
3259 			}
3260 			/*
3261 			 * There are more opens, so just return.
3262 			 */
3263 			if (op->nfso_opencnt > 0) {
3264 				NFSUNLOCKCLSTATE();
3265 				return (0);
3266 			}
3267 		}
3268 	}
3269 	NFSUNLOCKCLSTATE();
3270 	if (notdecr)
3271 		printf("nfscl: never fnd open\n");
3272 	return (0);
3273 }
3274 
3275 int
3276 nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3277 {
3278 	struct nfsclclient *clp;
3279 	struct nfsclowner *owp, *nowp;
3280 	struct nfsclopen *op;
3281 	struct nfscldeleg *dp;
3282 	struct nfsfh *nfhp;
3283 	struct nfsclrecalllayout *recallp;
3284 	int error;
3285 
3286 	error = nfscl_getcl(vp->v_mount, NULL, NULL, false, &clp);
3287 	if (error)
3288 		return (error);
3289 	*clpp = clp;
3290 
3291 	nfhp = VTONFS(vp)->n_fhp;
3292 	recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3293 	NFSLOCKCLSTATE();
3294 	/*
3295 	 * First get rid of the local Open structures, which should be no
3296 	 * longer in use.
3297 	 */
3298 	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3299 	if (dp != NULL) {
3300 		LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3301 			op = LIST_FIRST(&owp->nfsow_open);
3302 			if (op != NULL) {
3303 				KASSERT((op->nfso_opencnt == 0),
3304 				    ("nfscl: bad open cnt on deleg"));
3305 				nfscl_freeopen(op, 1);
3306 			}
3307 			nfscl_freeopenowner(owp, 1);
3308 		}
3309 	}
3310 
3311 	/* Return any layouts marked return on close. */
3312 	nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
3313 
3314 	/* Now process the opens against the server. */
3315 lookformore:
3316 	LIST_FOREACH(op, NFSCLOPENHASH(clp, nfhp->nfh_fh, nfhp->nfh_len),
3317 	    nfso_hash) {
3318 		if (op->nfso_fhlen == nfhp->nfh_len &&
3319 		    !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3320 		    nfhp->nfh_len)) {
3321 			/* Found an open, close it. */
3322 #ifdef DIAGNOSTIC
3323 			KASSERT((op->nfso_opencnt == 0),
3324 			    ("nfscl: bad open cnt on server (%d)",
3325 			     op->nfso_opencnt));
3326 #endif
3327 			NFSUNLOCKCLSTATE();
3328 			nfsrpc_doclose(VFSTONFS(vp->v_mount), op, p);
3329 			NFSLOCKCLSTATE();
3330 			goto lookformore;
3331 		}
3332 	}
3333 	NFSUNLOCKCLSTATE();
3334 	/*
3335 	 * recallp has been set NULL by nfscl_retoncloselayout() if it was
3336 	 * used by the function, but calling free() with a NULL pointer is ok.
3337 	 */
3338 	free(recallp, M_NFSLAYRECALL);
3339 	return (0);
3340 }
3341 
3342 /*
3343  * Return all delegations on this client.
3344  * (Must be called with client sleep lock.)
3345  */
3346 static void
3347 nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p)
3348 {
3349 	struct nfscldeleg *dp, *ndp;
3350 	struct ucred *cred;
3351 
3352 	cred = newnfs_getcred();
3353 	TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3354 		nfscl_cleandeleg(dp);
3355 		(void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3356 		nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
3357 	}
3358 	NFSFREECRED(cred);
3359 }
3360 
3361 /*
3362  * Return any delegation for this vp.
3363  */
3364 void
3365 nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
3366 {
3367 	struct nfsclclient *clp;
3368 	struct nfscldeleg *dp;
3369 	struct ucred *cred;
3370 	struct nfsnode *np;
3371 	struct nfsmount *nmp;
3372 
3373 	nmp = VFSTONFS(vp->v_mount);
3374 	NFSLOCKMNT(nmp);
3375 	if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
3376 		NFSUNLOCKMNT(nmp);
3377 		return;
3378 	}
3379 	NFSUNLOCKMNT(nmp);
3380 	np = VTONFS(vp);
3381 	cred = newnfs_getcred();
3382 	dp = NULL;
3383 	NFSLOCKCLSTATE();
3384 	clp = nmp->nm_clp;
3385 	if (clp != NULL)
3386 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
3387 		    np->n_fhp->nfh_len);
3388 	if (dp != NULL) {
3389 		nfscl_cleandeleg(dp);
3390 		nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
3391 		NFSUNLOCKCLSTATE();
3392 		newnfs_copycred(&dp->nfsdl_cred, cred);
3393 		nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3394 		free(dp, M_NFSCLDELEG);
3395 	} else
3396 		NFSUNLOCKCLSTATE();
3397 	NFSFREECRED(cred);
3398 }
3399 
3400 /*
3401  * Do a callback RPC.
3402  */
3403 void
3404 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3405 {
3406 	int clist, gotseq_ok, i, j, k, op, rcalls;
3407 	u_int32_t *tl;
3408 	struct nfsclclient *clp;
3409 	struct nfscldeleg *dp = NULL;
3410 	int numops, taglen = -1, error = 0, trunc __unused;
3411 	u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3412 	u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3413 	vnode_t vp = NULL;
3414 	struct nfsnode *np;
3415 	struct vattr va;
3416 	struct nfsfh *nfhp;
3417 	mount_t mp;
3418 	nfsattrbit_t attrbits, rattrbits;
3419 	nfsv4stateid_t stateid;
3420 	uint32_t seqid, slotid = 0, highslot, cachethis __unused;
3421 	uint8_t sessionid[NFSX_V4SESSIONID];
3422 	struct mbuf *rep;
3423 	struct nfscllayout *lyp;
3424 	uint64_t filesid[2], len, off;
3425 	int changed, gotone, laytype, recalltype;
3426 	uint32_t iomode;
3427 	struct nfsclrecalllayout *recallp = NULL;
3428 	struct nfsclsession *tsep;
3429 
3430 	gotseq_ok = 0;
3431 	nfsrvd_rephead(nd);
3432 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3433 	taglen = fxdr_unsigned(int, *tl);
3434 	if (taglen < 0) {
3435 		error = EBADRPC;
3436 		goto nfsmout;
3437 	}
3438 	if (taglen <= NFSV4_SMALLSTR)
3439 		tagstr = tag;
3440 	else
3441 		tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3442 	error = nfsrv_mtostr(nd, tagstr, taglen);
3443 	if (error) {
3444 		if (taglen > NFSV4_SMALLSTR)
3445 			free(tagstr, M_TEMP);
3446 		taglen = -1;
3447 		goto nfsmout;
3448 	}
3449 	(void) nfsm_strtom(nd, tag, taglen);
3450 	if (taglen > NFSV4_SMALLSTR) {
3451 		free(tagstr, M_TEMP);
3452 	}
3453 	NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3454 	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3455 	minorvers = fxdr_unsigned(u_int32_t, *tl++);
3456 	if (minorvers != NFSV4_MINORVERSION &&
3457 	    minorvers != NFSV41_MINORVERSION &&
3458 	    minorvers != NFSV42_MINORVERSION)
3459 		nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3460 	cbident = fxdr_unsigned(u_int32_t, *tl++);
3461 	if (nd->nd_repstat)
3462 		numops = 0;
3463 	else
3464 		numops = fxdr_unsigned(int, *tl);
3465 	/*
3466 	 * Loop around doing the sub ops.
3467 	 */
3468 	for (i = 0; i < numops; i++) {
3469 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3470 		NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3471 		*repp++ = *tl;
3472 		op = fxdr_unsigned(int, *tl);
3473 		if (op < NFSV4OP_CBGETATTR ||
3474 		   (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3475 		   (op > NFSV4OP_CBNOTIFYDEVID &&
3476 		    minorvers == NFSV41_MINORVERSION) ||
3477 		   (op > NFSV4OP_CBOFFLOAD &&
3478 		    minorvers == NFSV42_MINORVERSION)) {
3479 		    nd->nd_repstat = NFSERR_OPILLEGAL;
3480 		    *repp = nfscl_errmap(nd, minorvers);
3481 		    retops++;
3482 		    break;
3483 		}
3484 		nd->nd_procnum = op;
3485 		if (op < NFSV42_CBNOPS)
3486 			nfsstatsv1.cbrpccnt[nd->nd_procnum]++;
3487 		switch (op) {
3488 		case NFSV4OP_CBGETATTR:
3489 			NFSCL_DEBUG(4, "cbgetattr\n");
3490 			mp = NULL;
3491 			vp = NULL;
3492 			error = nfsm_getfh(nd, &nfhp);
3493 			if (!error)
3494 				error = nfsrv_getattrbits(nd, &attrbits,
3495 				    NULL, NULL);
3496 			if (error == 0 && i == 0 &&
3497 			    minorvers != NFSV4_MINORVERSION)
3498 				error = NFSERR_OPNOTINSESS;
3499 			if (!error) {
3500 				mp = nfscl_getmnt(minorvers, sessionid, cbident,
3501 				    &clp);
3502 				if (mp == NULL)
3503 					error = NFSERR_SERVERFAULT;
3504 			}
3505 			if (!error) {
3506 				error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3507 				    nfhp->nfh_len, p, &np);
3508 				if (!error)
3509 					vp = NFSTOV(np);
3510 			}
3511 			if (!error) {
3512 				NFSZERO_ATTRBIT(&rattrbits);
3513 				NFSLOCKCLSTATE();
3514 				dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3515 				    nfhp->nfh_len);
3516 				if (dp != NULL) {
3517 					if (NFSISSET_ATTRBIT(&attrbits,
3518 					    NFSATTRBIT_SIZE)) {
3519 						if (vp != NULL)
3520 							va.va_size = np->n_size;
3521 						else
3522 							va.va_size =
3523 							    dp->nfsdl_size;
3524 						NFSSETBIT_ATTRBIT(&rattrbits,
3525 						    NFSATTRBIT_SIZE);
3526 					}
3527 					if (NFSISSET_ATTRBIT(&attrbits,
3528 					    NFSATTRBIT_CHANGE)) {
3529 						va.va_filerev =
3530 						    dp->nfsdl_change;
3531 						if (vp == NULL ||
3532 						    (np->n_flag & NDELEGMOD))
3533 							va.va_filerev++;
3534 						NFSSETBIT_ATTRBIT(&rattrbits,
3535 						    NFSATTRBIT_CHANGE);
3536 					}
3537 				} else
3538 					error = NFSERR_SERVERFAULT;
3539 				NFSUNLOCKCLSTATE();
3540 			}
3541 			if (vp != NULL)
3542 				vrele(vp);
3543 			if (mp != NULL)
3544 				vfs_unbusy(mp);
3545 			if (nfhp != NULL)
3546 				free(nfhp, M_NFSFH);
3547 			if (!error)
3548 				(void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3549 				    NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3550 				    (uint64_t)0, NULL);
3551 			break;
3552 		case NFSV4OP_CBRECALL:
3553 			NFSCL_DEBUG(4, "cbrecall\n");
3554 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3555 			    NFSX_UNSIGNED);
3556 			stateid.seqid = *tl++;
3557 			NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3558 			    NFSX_STATEIDOTHER);
3559 			tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3560 			trunc = fxdr_unsigned(int, *tl);
3561 			error = nfsm_getfh(nd, &nfhp);
3562 			if (error == 0 && i == 0 &&
3563 			    minorvers != NFSV4_MINORVERSION)
3564 				error = NFSERR_OPNOTINSESS;
3565 			if (!error) {
3566 				NFSLOCKCLSTATE();
3567 				if (minorvers == NFSV4_MINORVERSION)
3568 					clp = nfscl_getclnt(cbident);
3569 				else
3570 					clp = nfscl_getclntsess(sessionid);
3571 				if (clp != NULL) {
3572 					dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3573 					    nfhp->nfh_len);
3574 					if (dp != NULL && (dp->nfsdl_flags &
3575 					    NFSCLDL_DELEGRET) == 0) {
3576 						dp->nfsdl_flags |=
3577 						    NFSCLDL_RECALL;
3578 						wakeup((caddr_t)clp);
3579 					}
3580 				} else {
3581 					error = NFSERR_SERVERFAULT;
3582 				}
3583 				NFSUNLOCKCLSTATE();
3584 			}
3585 			if (nfhp != NULL)
3586 				free(nfhp, M_NFSFH);
3587 			break;
3588 		case NFSV4OP_CBLAYOUTRECALL:
3589 			NFSCL_DEBUG(4, "cblayrec\n");
3590 			nfhp = NULL;
3591 			NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3592 			laytype = fxdr_unsigned(int, *tl++);
3593 			iomode = fxdr_unsigned(uint32_t, *tl++);
3594 			if (newnfs_true == *tl++)
3595 				changed = 1;
3596 			else
3597 				changed = 0;
3598 			recalltype = fxdr_unsigned(int, *tl);
3599 			NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n",
3600 			    laytype, iomode, changed, recalltype);
3601 			recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3602 			    M_WAITOK);
3603 			if (laytype != NFSLAYOUT_NFSV4_1_FILES &&
3604 			    laytype != NFSLAYOUT_FLEXFILE)
3605 				error = NFSERR_NOMATCHLAYOUT;
3606 			else if (recalltype == NFSLAYOUTRETURN_FILE) {
3607 				error = nfsm_getfh(nd, &nfhp);
3608 				NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3609 				if (error != 0)
3610 					goto nfsmout;
3611 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3612 				    NFSX_STATEID);
3613 				off = fxdr_hyper(tl); tl += 2;
3614 				len = fxdr_hyper(tl); tl += 2;
3615 				stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3616 				NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3617 				if (minorvers == NFSV4_MINORVERSION)
3618 					error = NFSERR_NOTSUPP;
3619 				else if (i == 0)
3620 					error = NFSERR_OPNOTINSESS;
3621 				NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n",
3622 				    (uintmax_t)off, (uintmax_t)len,
3623 				    stateid.seqid, error);
3624 				if (error == 0) {
3625 					NFSLOCKCLSTATE();
3626 					clp = nfscl_getclntsess(sessionid);
3627 					NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3628 					if (clp != NULL) {
3629 						lyp = nfscl_findlayout(clp,
3630 						    nfhp->nfh_fh,
3631 						    nfhp->nfh_len);
3632 						NFSCL_DEBUG(4, "cblyp=%p\n",
3633 						    lyp);
3634 						if (lyp != NULL &&
3635 						    (lyp->nfsly_flags &
3636 						     (NFSLY_FILES |
3637 						      NFSLY_FLEXFILE)) != 0 &&
3638 						    !NFSBCMP(stateid.other,
3639 						    lyp->nfsly_stateid.other,
3640 						    NFSX_STATEIDOTHER)) {
3641 							error =
3642 							    nfscl_layoutrecall(
3643 							    recalltype,
3644 							    lyp, iomode, off,
3645 							    len, stateid.seqid,
3646 							    0, 0, NULL,
3647 							    recallp);
3648 							if (error == 0 &&
3649 							    stateid.seqid >
3650 							    lyp->nfsly_stateid.seqid)
3651 								lyp->nfsly_stateid.seqid =
3652 								    stateid.seqid;
3653 							recallp = NULL;
3654 							wakeup(clp);
3655 							NFSCL_DEBUG(4,
3656 							    "aft layrcal=%d "
3657 							    "layseqid=%d\n",
3658 							    error,
3659 							    lyp->nfsly_stateid.seqid);
3660 						} else
3661 							error =
3662 							  NFSERR_NOMATCHLAYOUT;
3663 					} else
3664 						error = NFSERR_NOMATCHLAYOUT;
3665 					NFSUNLOCKCLSTATE();
3666 				}
3667 				free(nfhp, M_NFSFH);
3668 			} else if (recalltype == NFSLAYOUTRETURN_FSID) {
3669 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3670 				filesid[0] = fxdr_hyper(tl); tl += 2;
3671 				filesid[1] = fxdr_hyper(tl); tl += 2;
3672 				gotone = 0;
3673 				NFSLOCKCLSTATE();
3674 				clp = nfscl_getclntsess(sessionid);
3675 				if (clp != NULL) {
3676 					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3677 					    nfsly_list) {
3678 						if (lyp->nfsly_filesid[0] ==
3679 						    filesid[0] &&
3680 						    lyp->nfsly_filesid[1] ==
3681 						    filesid[1]) {
3682 							error =
3683 							    nfscl_layoutrecall(
3684 							    recalltype,
3685 							    lyp, iomode, 0,
3686 							    UINT64_MAX,
3687 							    lyp->nfsly_stateid.seqid,
3688 							    0, 0, NULL,
3689 							    recallp);
3690 							recallp = NULL;
3691 							gotone = 1;
3692 						}
3693 					}
3694 					if (gotone != 0)
3695 						wakeup(clp);
3696 					else
3697 						error = NFSERR_NOMATCHLAYOUT;
3698 				} else
3699 					error = NFSERR_NOMATCHLAYOUT;
3700 				NFSUNLOCKCLSTATE();
3701 			} else if (recalltype == NFSLAYOUTRETURN_ALL) {
3702 				gotone = 0;
3703 				NFSLOCKCLSTATE();
3704 				clp = nfscl_getclntsess(sessionid);
3705 				if (clp != NULL) {
3706 					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3707 					    nfsly_list) {
3708 						error = nfscl_layoutrecall(
3709 						    recalltype, lyp, iomode, 0,
3710 						    UINT64_MAX,
3711 						    lyp->nfsly_stateid.seqid,
3712 						    0, 0, NULL, recallp);
3713 						recallp = NULL;
3714 						gotone = 1;
3715 					}
3716 					if (gotone != 0)
3717 						wakeup(clp);
3718 					else
3719 						error = NFSERR_NOMATCHLAYOUT;
3720 				} else
3721 					error = NFSERR_NOMATCHLAYOUT;
3722 				NFSUNLOCKCLSTATE();
3723 			} else
3724 				error = NFSERR_NOMATCHLAYOUT;
3725 			if (recallp != NULL) {
3726 				free(recallp, M_NFSLAYRECALL);
3727 				recallp = NULL;
3728 			}
3729 			break;
3730 		case NFSV4OP_CBSEQUENCE:
3731 			NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3732 			    5 * NFSX_UNSIGNED);
3733 			bcopy(tl, sessionid, NFSX_V4SESSIONID);
3734 			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3735 			seqid = fxdr_unsigned(uint32_t, *tl++);
3736 			slotid = fxdr_unsigned(uint32_t, *tl++);
3737 			highslot = fxdr_unsigned(uint32_t, *tl++);
3738 			cachethis = *tl++;
3739 			/* Throw away the referring call stuff. */
3740 			clist = fxdr_unsigned(int, *tl);
3741 			for (j = 0; j < clist; j++) {
3742 				NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3743 				    NFSX_UNSIGNED);
3744 				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3745 				rcalls = fxdr_unsigned(int, *tl);
3746 				for (k = 0; k < rcalls; k++) {
3747 					NFSM_DISSECT(tl, uint32_t *,
3748 					    2 * NFSX_UNSIGNED);
3749 				}
3750 			}
3751 			NFSLOCKCLSTATE();
3752 			if (i == 0) {
3753 				clp = nfscl_getclntsess(sessionid);
3754 				if (clp == NULL)
3755 					error = NFSERR_SERVERFAULT;
3756 			} else
3757 				error = NFSERR_SEQUENCEPOS;
3758 			if (error == 0) {
3759 				tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3760 				error = nfsv4_seqsession(seqid, slotid,
3761 				    highslot, tsep->nfsess_cbslots, &rep,
3762 				    tsep->nfsess_backslots);
3763 			}
3764 			NFSUNLOCKCLSTATE();
3765 			if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3766 				gotseq_ok = 1;
3767 				if (rep != NULL) {
3768 					/*
3769 					 * Handle a reply for a retried
3770 					 * callback.  The reply will be
3771 					 * re-inserted in the session cache
3772 					 * by the nfsv4_seqsess_cacherep() call
3773 					 * after out:
3774 					 */
3775 					KASSERT(error == NFSERR_REPLYFROMCACHE,
3776 					    ("cbsequence: non-NULL rep"));
3777 					NFSCL_DEBUG(4, "Got cbretry\n");
3778 					m_freem(nd->nd_mreq);
3779 					nd->nd_mreq = rep;
3780 					rep = NULL;
3781 					goto out;
3782 				}
3783 				NFSM_BUILD(tl, uint32_t *,
3784 				    NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3785 				bcopy(sessionid, tl, NFSX_V4SESSIONID);
3786 				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3787 				*tl++ = txdr_unsigned(seqid);
3788 				*tl++ = txdr_unsigned(slotid);
3789 				*tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3790 				*tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3791 			}
3792 			break;
3793 		default:
3794 			if (i == 0 && minorvers != NFSV4_MINORVERSION)
3795 				error = NFSERR_OPNOTINSESS;
3796 			else {
3797 				NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3798 				error = NFSERR_NOTSUPP;
3799 			}
3800 			break;
3801 		}
3802 		if (error) {
3803 			if (error == EBADRPC || error == NFSERR_BADXDR) {
3804 				nd->nd_repstat = NFSERR_BADXDR;
3805 			} else {
3806 				nd->nd_repstat = error;
3807 			}
3808 			error = 0;
3809 		}
3810 		retops++;
3811 		if (nd->nd_repstat) {
3812 			*repp = nfscl_errmap(nd, minorvers);
3813 			break;
3814 		} else
3815 			*repp = 0;	/* NFS4_OK */
3816 	}
3817 nfsmout:
3818 	if (recallp != NULL)
3819 		free(recallp, M_NFSLAYRECALL);
3820 	if (error) {
3821 		if (error == EBADRPC || error == NFSERR_BADXDR)
3822 			nd->nd_repstat = NFSERR_BADXDR;
3823 		else
3824 			printf("nfsv4 comperr1=%d\n", error);
3825 	}
3826 	if (taglen == -1) {
3827 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3828 		*tl++ = 0;
3829 		*tl = 0;
3830 	} else {
3831 		*retopsp = txdr_unsigned(retops);
3832 	}
3833 	*nd->nd_errp = nfscl_errmap(nd, minorvers);
3834 out:
3835 	if (gotseq_ok != 0) {
3836 		rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3837 		NFSLOCKCLSTATE();
3838 		clp = nfscl_getclntsess(sessionid);
3839 		if (clp != NULL) {
3840 			tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3841 			nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3842 			    NFSERR_OK, &rep);
3843 			NFSUNLOCKCLSTATE();
3844 		} else {
3845 			NFSUNLOCKCLSTATE();
3846 			m_freem(rep);
3847 		}
3848 	}
3849 }
3850 
3851 /*
3852  * Generate the next cbident value. Basically just increment a static value
3853  * and then check that it isn't already in the list, if it has wrapped around.
3854  */
3855 static u_int32_t
3856 nfscl_nextcbident(void)
3857 {
3858 	struct nfsclclient *clp;
3859 	int matched;
3860 	static u_int32_t nextcbident = 0;
3861 	static int haswrapped = 0;
3862 
3863 	nextcbident++;
3864 	if (nextcbident == 0)
3865 		haswrapped = 1;
3866 	if (haswrapped) {
3867 		/*
3868 		 * Search the clientid list for one already using this cbident.
3869 		 */
3870 		do {
3871 			matched = 0;
3872 			NFSLOCKCLSTATE();
3873 			LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3874 				if (clp->nfsc_cbident == nextcbident) {
3875 					matched = 1;
3876 					break;
3877 				}
3878 			}
3879 			NFSUNLOCKCLSTATE();
3880 			if (matched == 1)
3881 				nextcbident++;
3882 		} while (matched);
3883 	}
3884 	return (nextcbident);
3885 }
3886 
3887 /*
3888  * Get the mount point related to a given cbident or session and busy it.
3889  */
3890 static mount_t
3891 nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3892     struct nfsclclient **clpp)
3893 {
3894 	struct nfsclclient *clp;
3895 	mount_t mp;
3896 	int error;
3897 	struct nfsclsession *tsep;
3898 
3899 	*clpp = NULL;
3900 	NFSLOCKCLSTATE();
3901 	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3902 		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3903 		if (minorvers == NFSV4_MINORVERSION) {
3904 			if (clp->nfsc_cbident == cbident)
3905 				break;
3906 		} else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3907 		    NFSX_V4SESSIONID))
3908 			break;
3909 	}
3910 	if (clp == NULL) {
3911 		NFSUNLOCKCLSTATE();
3912 		return (NULL);
3913 	}
3914 	mp = clp->nfsc_nmp->nm_mountp;
3915 	vfs_ref(mp);
3916 	NFSUNLOCKCLSTATE();
3917 	error = vfs_busy(mp, 0);
3918 	vfs_rel(mp);
3919 	if (error != 0)
3920 		return (NULL);
3921 	*clpp = clp;
3922 	return (mp);
3923 }
3924 
3925 /*
3926  * Get the clientid pointer related to a given cbident.
3927  */
3928 static struct nfsclclient *
3929 nfscl_getclnt(u_int32_t cbident)
3930 {
3931 	struct nfsclclient *clp;
3932 
3933 	LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3934 		if (clp->nfsc_cbident == cbident)
3935 			break;
3936 	return (clp);
3937 }
3938 
3939 /*
3940  * Get the clientid pointer related to a given sessionid.
3941  */
3942 static struct nfsclclient *
3943 nfscl_getclntsess(uint8_t *sessionid)
3944 {
3945 	struct nfsclclient *clp;
3946 	struct nfsclsession *tsep;
3947 
3948 	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3949 		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3950 		if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3951 		    NFSX_V4SESSIONID))
3952 			break;
3953 	}
3954 	return (clp);
3955 }
3956 
3957 /*
3958  * Search for a lock conflict locally on the client. A conflict occurs if
3959  * - not same owner and overlapping byte range and at least one of them is
3960  *   a write lock or this is an unlock.
3961  */
3962 static int
3963 nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
3964     struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
3965     struct nfscllock **lopp)
3966 {
3967 	struct nfsclopen *op;
3968 	int ret;
3969 
3970 	if (dp != NULL) {
3971 		ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
3972 		if (ret)
3973 			return (ret);
3974 	}
3975 	LIST_FOREACH(op, NFSCLOPENHASH(clp, fhp, fhlen), nfso_hash) {
3976 		if (op->nfso_fhlen == fhlen &&
3977 		    !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
3978 			ret = nfscl_checkconflict(&op->nfso_lock, nlop,
3979 			    own, lopp);
3980 			if (ret)
3981 				return (ret);
3982 		}
3983 	}
3984 	return (0);
3985 }
3986 
3987 static int
3988 nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
3989     u_int8_t *own, struct nfscllock **lopp)
3990 {
3991 	struct nfscllockowner *lp;
3992 	struct nfscllock *lop;
3993 
3994 	LIST_FOREACH(lp, lhp, nfsl_list) {
3995 		if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
3996 			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3997 				if (lop->nfslo_first >= nlop->nfslo_end)
3998 					break;
3999 				if (lop->nfslo_end <= nlop->nfslo_first)
4000 					continue;
4001 				if (lop->nfslo_type == F_WRLCK ||
4002 				    nlop->nfslo_type == F_WRLCK ||
4003 				    nlop->nfslo_type == F_UNLCK) {
4004 					if (lopp != NULL)
4005 						*lopp = lop;
4006 					return (NFSERR_DENIED);
4007 				}
4008 			}
4009 		}
4010 	}
4011 	return (0);
4012 }
4013 
4014 /*
4015  * Check for a local conflicting lock.
4016  */
4017 int
4018 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
4019     u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
4020 {
4021 	struct nfscllock *lop, nlck;
4022 	struct nfscldeleg *dp;
4023 	struct nfsnode *np;
4024 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
4025 	int error;
4026 
4027 	nlck.nfslo_type = fl->l_type;
4028 	nlck.nfslo_first = off;
4029 	if (len == NFS64BITSSET) {
4030 		nlck.nfslo_end = NFS64BITSSET;
4031 	} else {
4032 		nlck.nfslo_end = off + len;
4033 		if (nlck.nfslo_end <= nlck.nfslo_first)
4034 			return (NFSERR_INVAL);
4035 	}
4036 	np = VTONFS(vp);
4037 	nfscl_filllockowner(id, own, flags);
4038 	NFSLOCKCLSTATE();
4039 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4040 	error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
4041 	    &nlck, own, dp, &lop);
4042 	if (error != 0) {
4043 		fl->l_whence = SEEK_SET;
4044 		fl->l_start = lop->nfslo_first;
4045 		if (lop->nfslo_end == NFS64BITSSET)
4046 			fl->l_len = 0;
4047 		else
4048 			fl->l_len = lop->nfslo_end - lop->nfslo_first;
4049 		fl->l_pid = (pid_t)0;
4050 		fl->l_type = lop->nfslo_type;
4051 		error = -1;			/* no RPC required */
4052 	} else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
4053 	    fl->l_type == F_RDLCK)) {
4054 		/*
4055 		 * The delegation ensures that there isn't a conflicting
4056 		 * lock on the server, so return -1 to indicate an RPC
4057 		 * isn't required.
4058 		 */
4059 		fl->l_type = F_UNLCK;
4060 		error = -1;
4061 	}
4062 	NFSUNLOCKCLSTATE();
4063 	return (error);
4064 }
4065 
4066 /*
4067  * Handle Recall of a delegation.
4068  * The clp must be exclusive locked when this is called.
4069  */
4070 static int
4071 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
4072     struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4073     int called_from_renewthread, vnode_t *vpp)
4074 {
4075 	struct nfsclowner *owp, *lowp, *nowp;
4076 	struct nfsclopen *op, *lop;
4077 	struct nfscllockowner *lp;
4078 	struct nfscllock *lckp;
4079 	struct nfsnode *np;
4080 	int error = 0, ret;
4081 
4082 	if (vp == NULL) {
4083 		KASSERT(vpp != NULL, ("nfscl_recalldeleg: vpp NULL"));
4084 		*vpp = NULL;
4085 		/*
4086 		 * First, get a vnode for the file. This is needed to do RPCs.
4087 		 */
4088 		ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
4089 		    dp->nfsdl_fhlen, p, &np);
4090 		if (ret) {
4091 			/*
4092 			 * File isn't open, so nothing to move over to the
4093 			 * server.
4094 			 */
4095 			return (0);
4096 		}
4097 		vp = NFSTOV(np);
4098 		*vpp = vp;
4099 	} else {
4100 		np = VTONFS(vp);
4101 	}
4102 	dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
4103 
4104 	/*
4105 	 * Ok, if it's a write delegation, flush data to the server, so
4106 	 * that close/open consistency is retained.
4107 	 */
4108 	ret = 0;
4109 	NFSLOCKNODE(np);
4110 	if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
4111 		np->n_flag |= NDELEGRECALL;
4112 		NFSUNLOCKNODE(np);
4113 		ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
4114 		NFSLOCKNODE(np);
4115 		np->n_flag &= ~NDELEGRECALL;
4116 	}
4117 	NFSINVALATTRCACHE(np);
4118 	NFSUNLOCKNODE(np);
4119 	if (ret == EIO && called_from_renewthread != 0) {
4120 		/*
4121 		 * If the flush failed with EIO for the renew thread,
4122 		 * return now, so that the dirty buffer will be flushed
4123 		 * later.
4124 		 */
4125 		return (ret);
4126 	}
4127 
4128 	/*
4129 	 * Now, for each openowner with opens issued locally, move them
4130 	 * over to state against the server.
4131 	 */
4132 	LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
4133 		lop = LIST_FIRST(&lowp->nfsow_open);
4134 		if (lop != NULL) {
4135 			if (LIST_NEXT(lop, nfso_list) != NULL)
4136 				panic("nfsdlg mult opens");
4137 			/*
4138 			 * Look for the same openowner against the server.
4139 			 */
4140 			LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4141 				if (!NFSBCMP(lowp->nfsow_owner,
4142 				    owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
4143 					newnfs_copycred(&dp->nfsdl_cred, cred);
4144 					ret = nfscl_moveopen(vp, clp, nmp, lop,
4145 					    owp, dp, cred, p);
4146 					if (ret == NFSERR_STALECLIENTID ||
4147 					    ret == NFSERR_STALEDONTRECOVER ||
4148 					    ret == NFSERR_BADSESSION)
4149 						return (ret);
4150 					if (ret) {
4151 						nfscl_freeopen(lop, 1);
4152 						if (!error)
4153 							error = ret;
4154 					}
4155 					break;
4156 				}
4157 			}
4158 
4159 			/*
4160 			 * If no openowner found, create one and get an open
4161 			 * for it.
4162 			 */
4163 			if (owp == NULL) {
4164 				nowp = malloc(
4165 				    sizeof (struct nfsclowner), M_NFSCLOWNER,
4166 				    M_WAITOK);
4167 				nfscl_newopen(clp, NULL, &owp, &nowp, &op,
4168 				    NULL, lowp->nfsow_owner, dp->nfsdl_fh,
4169 				    dp->nfsdl_fhlen, NULL, NULL);
4170 				newnfs_copycred(&dp->nfsdl_cred, cred);
4171 				ret = nfscl_moveopen(vp, clp, nmp, lop,
4172 				    owp, dp, cred, p);
4173 				if (ret) {
4174 					nfscl_freeopenowner(owp, 0);
4175 					if (ret == NFSERR_STALECLIENTID ||
4176 					    ret == NFSERR_STALEDONTRECOVER ||
4177 					    ret == NFSERR_BADSESSION)
4178 						return (ret);
4179 					if (ret) {
4180 						nfscl_freeopen(lop, 1);
4181 						if (!error)
4182 							error = ret;
4183 					}
4184 				}
4185 			}
4186 		}
4187 	}
4188 
4189 	/*
4190 	 * Now, get byte range locks for any locks done locally.
4191 	 */
4192 	LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4193 		LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4194 			newnfs_copycred(&dp->nfsdl_cred, cred);
4195 			ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4196 			if (ret == NFSERR_STALESTATEID ||
4197 			    ret == NFSERR_STALEDONTRECOVER ||
4198 			    ret == NFSERR_STALECLIENTID ||
4199 			    ret == NFSERR_BADSESSION)
4200 				return (ret);
4201 			if (ret && !error)
4202 				error = ret;
4203 		}
4204 	}
4205 	return (error);
4206 }
4207 
4208 /*
4209  * Move a locally issued open over to an owner on the state list.
4210  * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4211  * returns with it unlocked.
4212  */
4213 static int
4214 nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4215     struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4216     struct ucred *cred, NFSPROC_T *p)
4217 {
4218 	struct nfsclopen *op, *nop;
4219 	struct nfscldeleg *ndp;
4220 	struct nfsnode *np;
4221 	int error = 0, newone;
4222 
4223 	/*
4224 	 * First, look for an appropriate open, If found, just increment the
4225 	 * opencnt in it.
4226 	 */
4227 	LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4228 		if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4229 		    op->nfso_fhlen == lop->nfso_fhlen &&
4230 		    !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4231 			op->nfso_opencnt += lop->nfso_opencnt;
4232 			nfscl_freeopen(lop, 1);
4233 			return (0);
4234 		}
4235 	}
4236 
4237 	/* No appropriate open, so we have to do one against the server. */
4238 	np = VTONFS(vp);
4239 	nop = malloc(sizeof (struct nfsclopen) +
4240 	    lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4241 	nop->nfso_hash.le_prev = NULL;
4242 	newone = 0;
4243 	nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4244 	    lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4245 	ndp = dp;
4246 	error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4247 	    lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4248 	    NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4249 	if (error) {
4250 		if (newone)
4251 			nfscl_freeopen(op, 0);
4252 	} else {
4253 		op->nfso_mode |= lop->nfso_mode;
4254 		op->nfso_opencnt += lop->nfso_opencnt;
4255 		nfscl_freeopen(lop, 1);
4256 	}
4257 	if (nop != NULL)
4258 		free(nop, M_NFSCLOPEN);
4259 	if (ndp != NULL) {
4260 		/*
4261 		 * What should I do with the returned delegation, since the
4262 		 * delegation is being recalled? For now, just printf and
4263 		 * through it away.
4264 		 */
4265 		printf("Moveopen returned deleg\n");
4266 		free(ndp, M_NFSCLDELEG);
4267 	}
4268 	return (error);
4269 }
4270 
4271 /*
4272  * Recall all delegations on this client.
4273  */
4274 static void
4275 nfscl_totalrecall(struct nfsclclient *clp)
4276 {
4277 	struct nfscldeleg *dp;
4278 
4279 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4280 		if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4281 			dp->nfsdl_flags |= NFSCLDL_RECALL;
4282 	}
4283 }
4284 
4285 /*
4286  * Relock byte ranges. Called for delegation recall and state expiry.
4287  */
4288 static int
4289 nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4290     struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4291     NFSPROC_T *p)
4292 {
4293 	struct nfscllockowner *nlp;
4294 	struct nfsfh *nfhp;
4295 	u_int64_t off, len;
4296 	int error, newone, donelocally;
4297 
4298 	off = lop->nfslo_first;
4299 	len = lop->nfslo_end - lop->nfslo_first;
4300 	error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4301 	    clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4302 	    lp->nfsl_openowner, &nlp, &newone, &donelocally);
4303 	if (error || donelocally)
4304 		return (error);
4305 	nfhp = VTONFS(vp)->n_fhp;
4306 	error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4307 	    nfhp->nfh_len, nlp, newone, 0, off,
4308 	    len, lop->nfslo_type, cred, p);
4309 	if (error)
4310 		nfscl_freelockowner(nlp, 0);
4311 	return (error);
4312 }
4313 
4314 /*
4315  * Called to re-open a file. Basically get a vnode for the file handle
4316  * and then call nfsrpc_openrpc() to do the rest.
4317  */
4318 static int
4319 nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4320     u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4321     struct ucred *cred, NFSPROC_T *p)
4322 {
4323 	struct nfsnode *np;
4324 	vnode_t vp;
4325 	int error;
4326 
4327 	error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4328 	if (error)
4329 		return (error);
4330 	vp = NFSTOV(np);
4331 	if (np->n_v4 != NULL) {
4332 		error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4333 		    np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4334 		    NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4335 		    cred, p);
4336 	} else {
4337 		error = EINVAL;
4338 	}
4339 	vrele(vp);
4340 	return (error);
4341 }
4342 
4343 /*
4344  * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4345  * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4346  * fail.
4347  */
4348 static int
4349 nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4350     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4351     u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4352     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4353 {
4354 	int error;
4355 
4356 	do {
4357 		error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4358 		    mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4359 		    0, 0);
4360 		if (error == NFSERR_DELAY)
4361 			(void) nfs_catnap(PZERO, error, "nfstryop");
4362 	} while (error == NFSERR_DELAY);
4363 	if (error == EAUTH || error == EACCES) {
4364 		/* Try again using system credentials */
4365 		newnfs_setroot(cred);
4366 		do {
4367 		    error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4368 			newfhlen, mode, op, name, namelen, ndpp, reclaim,
4369 			delegtype, cred, p, 1, 0);
4370 		    if (error == NFSERR_DELAY)
4371 			(void) nfs_catnap(PZERO, error, "nfstryop");
4372 		} while (error == NFSERR_DELAY);
4373 	}
4374 	return (error);
4375 }
4376 
4377 /*
4378  * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4379  * NFSERR_DELAY. Also, retry with system credentials, if the provided
4380  * cred don't work.
4381  */
4382 static int
4383 nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4384     int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4385     u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4386 {
4387 	struct nfsrv_descript nfsd, *nd = &nfsd;
4388 	int error;
4389 
4390 	do {
4391 		error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4392 		    reclaim, off, len, type, cred, p, 0);
4393 		if (!error && nd->nd_repstat == NFSERR_DELAY)
4394 			(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4395 			    "nfstrylck");
4396 	} while (!error && nd->nd_repstat == NFSERR_DELAY);
4397 	if (!error)
4398 		error = nd->nd_repstat;
4399 	if (error == EAUTH || error == EACCES) {
4400 		/* Try again using root credentials */
4401 		newnfs_setroot(cred);
4402 		do {
4403 			error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4404 			    newone, reclaim, off, len, type, cred, p, 1);
4405 			if (!error && nd->nd_repstat == NFSERR_DELAY)
4406 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4407 				    "nfstrylck");
4408 		} while (!error && nd->nd_repstat == NFSERR_DELAY);
4409 		if (!error)
4410 			error = nd->nd_repstat;
4411 	}
4412 	return (error);
4413 }
4414 
4415 /*
4416  * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4417  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4418  * credentials fail.
4419  */
4420 static int
4421 nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4422     struct nfsmount *nmp, NFSPROC_T *p)
4423 {
4424 	int error;
4425 
4426 	do {
4427 		error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4428 		if (error == NFSERR_DELAY)
4429 			(void) nfs_catnap(PZERO, error, "nfstrydp");
4430 	} while (error == NFSERR_DELAY);
4431 	if (error == EAUTH || error == EACCES) {
4432 		/* Try again using system credentials */
4433 		newnfs_setroot(cred);
4434 		do {
4435 			error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4436 			if (error == NFSERR_DELAY)
4437 				(void) nfs_catnap(PZERO, error, "nfstrydp");
4438 		} while (error == NFSERR_DELAY);
4439 	}
4440 	return (error);
4441 }
4442 
4443 /*
4444  * Try a close against the server. Just call nfsrpc_closerpc(),
4445  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4446  * credentials fail.
4447  */
4448 int
4449 nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4450     struct nfsmount *nmp, NFSPROC_T *p)
4451 {
4452 	struct nfsrv_descript nfsd, *nd = &nfsd;
4453 	int error;
4454 
4455 	do {
4456 		error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4457 		if (error == NFSERR_DELAY)
4458 			(void) nfs_catnap(PZERO, error, "nfstrycl");
4459 	} while (error == NFSERR_DELAY);
4460 	if (error == EAUTH || error == EACCES) {
4461 		/* Try again using system credentials */
4462 		newnfs_setroot(cred);
4463 		do {
4464 			error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4465 			if (error == NFSERR_DELAY)
4466 				(void) nfs_catnap(PZERO, error, "nfstrycl");
4467 		} while (error == NFSERR_DELAY);
4468 	}
4469 	return (error);
4470 }
4471 
4472 /*
4473  * Decide if a delegation on a file permits close without flushing writes
4474  * to the server. This might be a big performance win in some environments.
4475  * (Not useful until the client does caching on local stable storage.)
4476  */
4477 int
4478 nfscl_mustflush(vnode_t vp)
4479 {
4480 	struct nfsclclient *clp;
4481 	struct nfscldeleg *dp;
4482 	struct nfsnode *np;
4483 	struct nfsmount *nmp;
4484 
4485 	np = VTONFS(vp);
4486 	nmp = VFSTONFS(vp->v_mount);
4487 	if (!NFSHASNFSV4(nmp))
4488 		return (1);
4489 	NFSLOCKCLSTATE();
4490 	clp = nfscl_findcl(nmp);
4491 	if (clp == NULL) {
4492 		NFSUNLOCKCLSTATE();
4493 		return (1);
4494 	}
4495 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4496 	if (dp != NULL && (dp->nfsdl_flags &
4497 	    (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4498 	     NFSCLDL_WRITE &&
4499 	    (dp->nfsdl_sizelimit >= np->n_size ||
4500 	     !NFSHASSTRICT3530(nmp))) {
4501 		NFSUNLOCKCLSTATE();
4502 		return (0);
4503 	}
4504 	NFSUNLOCKCLSTATE();
4505 	return (1);
4506 }
4507 
4508 /*
4509  * See if a (write) delegation exists for this file.
4510  */
4511 int
4512 nfscl_nodeleg(vnode_t vp, int writedeleg)
4513 {
4514 	struct nfsclclient *clp;
4515 	struct nfscldeleg *dp;
4516 	struct nfsnode *np;
4517 	struct nfsmount *nmp;
4518 
4519 	np = VTONFS(vp);
4520 	nmp = VFSTONFS(vp->v_mount);
4521 	if (!NFSHASNFSV4(nmp))
4522 		return (1);
4523 	NFSLOCKMNT(nmp);
4524 	if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4525 		NFSUNLOCKMNT(nmp);
4526 		return (1);
4527 	}
4528 	NFSUNLOCKMNT(nmp);
4529 	NFSLOCKCLSTATE();
4530 	clp = nfscl_findcl(nmp);
4531 	if (clp == NULL) {
4532 		NFSUNLOCKCLSTATE();
4533 		return (1);
4534 	}
4535 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4536 	if (dp != NULL &&
4537 	    (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4538 	    (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4539 	     NFSCLDL_WRITE)) {
4540 		NFSUNLOCKCLSTATE();
4541 		return (0);
4542 	}
4543 	NFSUNLOCKCLSTATE();
4544 	return (1);
4545 }
4546 
4547 /*
4548  * Look for an associated delegation that should be DelegReturned.
4549  */
4550 int
4551 nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4552 {
4553 	struct nfsclclient *clp;
4554 	struct nfscldeleg *dp;
4555 	struct nfsclowner *owp;
4556 	struct nfscllockowner *lp;
4557 	struct nfsmount *nmp;
4558 	struct ucred *cred;
4559 	struct nfsnode *np;
4560 	int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4561 
4562 	nmp = VFSTONFS(vp->v_mount);
4563 	np = VTONFS(vp);
4564 	NFSLOCKCLSTATE();
4565 	/*
4566 	 * Loop around waiting for:
4567 	 * - outstanding I/O operations on delegations to complete
4568 	 * - for a delegation on vp that has state, lock the client and
4569 	 *   do a recall
4570 	 * - return delegation with no state
4571 	 */
4572 	while (1) {
4573 		clp = nfscl_findcl(nmp);
4574 		if (clp == NULL) {
4575 			NFSUNLOCKCLSTATE();
4576 			return (retcnt);
4577 		}
4578 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4579 		    np->n_fhp->nfh_len);
4580 		if (dp != NULL) {
4581 		    /*
4582 		     * Wait for outstanding I/O ops to be done.
4583 		     */
4584 		    if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4585 			if (igotlock) {
4586 			    nfsv4_unlock(&clp->nfsc_lock, 0);
4587 			    igotlock = 0;
4588 			}
4589 			dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4590 			(void) nfsmsleep(&dp->nfsdl_rwlock,
4591 			    NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4592 			continue;
4593 		    }
4594 		    needsrecall = 0;
4595 		    LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4596 			if (!LIST_EMPTY(&owp->nfsow_open)) {
4597 			    needsrecall = 1;
4598 			    break;
4599 			}
4600 		    }
4601 		    if (!needsrecall) {
4602 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4603 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4604 				needsrecall = 1;
4605 				break;
4606 			    }
4607 			}
4608 		    }
4609 		    if (needsrecall && !triedrecall) {
4610 			dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4611 			islept = 0;
4612 			while (!igotlock) {
4613 			    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4614 				&islept, NFSCLSTATEMUTEXPTR, NULL);
4615 			    if (islept)
4616 				break;
4617 			}
4618 			if (islept)
4619 			    continue;
4620 			NFSUNLOCKCLSTATE();
4621 			cred = newnfs_getcred();
4622 			newnfs_copycred(&dp->nfsdl_cred, cred);
4623 			nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0, NULL);
4624 			NFSFREECRED(cred);
4625 			triedrecall = 1;
4626 			NFSLOCKCLSTATE();
4627 			nfsv4_unlock(&clp->nfsc_lock, 0);
4628 			igotlock = 0;
4629 			continue;
4630 		    }
4631 		    *stp = dp->nfsdl_stateid;
4632 		    retcnt = 1;
4633 		    nfscl_cleandeleg(dp);
4634 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4635 		}
4636 		if (igotlock)
4637 		    nfsv4_unlock(&clp->nfsc_lock, 0);
4638 		NFSUNLOCKCLSTATE();
4639 		return (retcnt);
4640 	}
4641 }
4642 
4643 /*
4644  * Look for associated delegation(s) that should be DelegReturned.
4645  */
4646 int
4647 nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4648     nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4649 {
4650 	struct nfsclclient *clp;
4651 	struct nfscldeleg *dp;
4652 	struct nfsclowner *owp;
4653 	struct nfscllockowner *lp;
4654 	struct nfsmount *nmp;
4655 	struct ucred *cred;
4656 	struct nfsnode *np;
4657 	int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4658 
4659 	nmp = VFSTONFS(fvp->v_mount);
4660 	*gotfdp = 0;
4661 	*gottdp = 0;
4662 	NFSLOCKCLSTATE();
4663 	/*
4664 	 * Loop around waiting for:
4665 	 * - outstanding I/O operations on delegations to complete
4666 	 * - for a delegation on fvp that has state, lock the client and
4667 	 *   do a recall
4668 	 * - return delegation(s) with no state.
4669 	 */
4670 	while (1) {
4671 		clp = nfscl_findcl(nmp);
4672 		if (clp == NULL) {
4673 			NFSUNLOCKCLSTATE();
4674 			return (retcnt);
4675 		}
4676 		np = VTONFS(fvp);
4677 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4678 		    np->n_fhp->nfh_len);
4679 		if (dp != NULL && *gotfdp == 0) {
4680 		    /*
4681 		     * Wait for outstanding I/O ops to be done.
4682 		     */
4683 		    if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4684 			if (igotlock) {
4685 			    nfsv4_unlock(&clp->nfsc_lock, 0);
4686 			    igotlock = 0;
4687 			}
4688 			dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4689 			(void) nfsmsleep(&dp->nfsdl_rwlock,
4690 			    NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4691 			continue;
4692 		    }
4693 		    needsrecall = 0;
4694 		    LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4695 			if (!LIST_EMPTY(&owp->nfsow_open)) {
4696 			    needsrecall = 1;
4697 			    break;
4698 			}
4699 		    }
4700 		    if (!needsrecall) {
4701 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4702 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4703 				needsrecall = 1;
4704 				break;
4705 			    }
4706 			}
4707 		    }
4708 		    if (needsrecall && !triedrecall) {
4709 			dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4710 			islept = 0;
4711 			while (!igotlock) {
4712 			    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4713 				&islept, NFSCLSTATEMUTEXPTR, NULL);
4714 			    if (islept)
4715 				break;
4716 			}
4717 			if (islept)
4718 			    continue;
4719 			NFSUNLOCKCLSTATE();
4720 			cred = newnfs_getcred();
4721 			newnfs_copycred(&dp->nfsdl_cred, cred);
4722 			nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0, NULL);
4723 			NFSFREECRED(cred);
4724 			triedrecall = 1;
4725 			NFSLOCKCLSTATE();
4726 			nfsv4_unlock(&clp->nfsc_lock, 0);
4727 			igotlock = 0;
4728 			continue;
4729 		    }
4730 		    *fstp = dp->nfsdl_stateid;
4731 		    retcnt++;
4732 		    *gotfdp = 1;
4733 		    nfscl_cleandeleg(dp);
4734 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4735 		}
4736 		if (igotlock) {
4737 		    nfsv4_unlock(&clp->nfsc_lock, 0);
4738 		    igotlock = 0;
4739 		}
4740 		if (tvp != NULL) {
4741 		    np = VTONFS(tvp);
4742 		    dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4743 			np->n_fhp->nfh_len);
4744 		    if (dp != NULL && *gottdp == 0) {
4745 			/*
4746 			 * Wait for outstanding I/O ops to be done.
4747 			 */
4748 			if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4749 			    dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4750 			    (void) nfsmsleep(&dp->nfsdl_rwlock,
4751 				NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4752 			    continue;
4753 			}
4754 			LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4755 			    if (!LIST_EMPTY(&owp->nfsow_open)) {
4756 				NFSUNLOCKCLSTATE();
4757 				return (retcnt);
4758 			    }
4759 			}
4760 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4761 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4762 				NFSUNLOCKCLSTATE();
4763 				return (retcnt);
4764 			    }
4765 			}
4766 			*tstp = dp->nfsdl_stateid;
4767 			retcnt++;
4768 			*gottdp = 1;
4769 			nfscl_cleandeleg(dp);
4770 			nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4771 		    }
4772 		}
4773 		NFSUNLOCKCLSTATE();
4774 		return (retcnt);
4775 	}
4776 }
4777 
4778 /*
4779  * Get a reference on the clientid associated with the mount point.
4780  * Return 1 if success, 0 otherwise.
4781  */
4782 int
4783 nfscl_getref(struct nfsmount *nmp)
4784 {
4785 	struct nfsclclient *clp;
4786 
4787 	NFSLOCKCLSTATE();
4788 	clp = nfscl_findcl(nmp);
4789 	if (clp == NULL) {
4790 		NFSUNLOCKCLSTATE();
4791 		return (0);
4792 	}
4793 	nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL);
4794 	NFSUNLOCKCLSTATE();
4795 	return (1);
4796 }
4797 
4798 /*
4799  * Release a reference on a clientid acquired with the above call.
4800  */
4801 void
4802 nfscl_relref(struct nfsmount *nmp)
4803 {
4804 	struct nfsclclient *clp;
4805 
4806 	NFSLOCKCLSTATE();
4807 	clp = nfscl_findcl(nmp);
4808 	if (clp == NULL) {
4809 		NFSUNLOCKCLSTATE();
4810 		return;
4811 	}
4812 	nfsv4_relref(&clp->nfsc_lock);
4813 	NFSUNLOCKCLSTATE();
4814 }
4815 
4816 /*
4817  * Save the size attribute in the delegation, since the nfsnode
4818  * is going away.
4819  */
4820 void
4821 nfscl_reclaimnode(vnode_t vp)
4822 {
4823 	struct nfsclclient *clp;
4824 	struct nfscldeleg *dp;
4825 	struct nfsnode *np = VTONFS(vp);
4826 	struct nfsmount *nmp;
4827 
4828 	nmp = VFSTONFS(vp->v_mount);
4829 	if (!NFSHASNFSV4(nmp))
4830 		return;
4831 	NFSLOCKCLSTATE();
4832 	clp = nfscl_findcl(nmp);
4833 	if (clp == NULL) {
4834 		NFSUNLOCKCLSTATE();
4835 		return;
4836 	}
4837 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4838 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4839 		dp->nfsdl_size = np->n_size;
4840 	NFSUNLOCKCLSTATE();
4841 }
4842 
4843 /*
4844  * Get the saved size attribute in the delegation, since it is a
4845  * newly allocated nfsnode.
4846  */
4847 void
4848 nfscl_newnode(vnode_t vp)
4849 {
4850 	struct nfsclclient *clp;
4851 	struct nfscldeleg *dp;
4852 	struct nfsnode *np = VTONFS(vp);
4853 	struct nfsmount *nmp;
4854 
4855 	nmp = VFSTONFS(vp->v_mount);
4856 	if (!NFSHASNFSV4(nmp))
4857 		return;
4858 	NFSLOCKCLSTATE();
4859 	clp = nfscl_findcl(nmp);
4860 	if (clp == NULL) {
4861 		NFSUNLOCKCLSTATE();
4862 		return;
4863 	}
4864 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4865 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4866 		np->n_size = dp->nfsdl_size;
4867 	NFSUNLOCKCLSTATE();
4868 }
4869 
4870 /*
4871  * If there is a valid write delegation for this file, set the modtime
4872  * to the local clock time.
4873  */
4874 void
4875 nfscl_delegmodtime(vnode_t vp)
4876 {
4877 	struct nfsclclient *clp;
4878 	struct nfscldeleg *dp;
4879 	struct nfsnode *np = VTONFS(vp);
4880 	struct nfsmount *nmp;
4881 
4882 	nmp = VFSTONFS(vp->v_mount);
4883 	if (!NFSHASNFSV4(nmp))
4884 		return;
4885 	NFSLOCKMNT(nmp);
4886 	if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4887 		NFSUNLOCKMNT(nmp);
4888 		return;
4889 	}
4890 	NFSUNLOCKMNT(nmp);
4891 	NFSLOCKCLSTATE();
4892 	clp = nfscl_findcl(nmp);
4893 	if (clp == NULL) {
4894 		NFSUNLOCKCLSTATE();
4895 		return;
4896 	}
4897 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4898 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4899 		nanotime(&dp->nfsdl_modtime);
4900 		dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4901 	}
4902 	NFSUNLOCKCLSTATE();
4903 }
4904 
4905 /*
4906  * If there is a valid write delegation for this file with a modtime set,
4907  * put that modtime in mtime.
4908  */
4909 void
4910 nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4911 {
4912 	struct nfsclclient *clp;
4913 	struct nfscldeleg *dp;
4914 	struct nfsnode *np = VTONFS(vp);
4915 	struct nfsmount *nmp;
4916 
4917 	nmp = VFSTONFS(vp->v_mount);
4918 	if (!NFSHASNFSV4(nmp))
4919 		return;
4920 	NFSLOCKMNT(nmp);
4921 	if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4922 		NFSUNLOCKMNT(nmp);
4923 		return;
4924 	}
4925 	NFSUNLOCKMNT(nmp);
4926 	NFSLOCKCLSTATE();
4927 	clp = nfscl_findcl(nmp);
4928 	if (clp == NULL) {
4929 		NFSUNLOCKCLSTATE();
4930 		return;
4931 	}
4932 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4933 	if (dp != NULL &&
4934 	    (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4935 	    (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4936 		*mtime = dp->nfsdl_modtime;
4937 	NFSUNLOCKCLSTATE();
4938 }
4939 
4940 static int
4941 nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4942 {
4943 	short *defaulterrp, *errp;
4944 
4945 	if (!nd->nd_repstat)
4946 		return (0);
4947 	if (nd->nd_procnum == NFSPROC_NOOP)
4948 		return (txdr_unsigned(nd->nd_repstat & 0xffff));
4949 	if (nd->nd_repstat == EBADRPC)
4950 		return (txdr_unsigned(NFSERR_BADXDR));
4951 	if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
4952 	    nd->nd_repstat == NFSERR_OPILLEGAL)
4953 		return (txdr_unsigned(nd->nd_repstat));
4954 	if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
4955 	    minorvers > NFSV4_MINORVERSION) {
4956 		/* NFSv4.n error. */
4957 		return (txdr_unsigned(nd->nd_repstat));
4958 	}
4959 	if (nd->nd_procnum < NFSV4OP_CBNOPS)
4960 		errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
4961 	else
4962 		return (txdr_unsigned(nd->nd_repstat));
4963 	while (*++errp)
4964 		if (*errp == (short)nd->nd_repstat)
4965 			return (txdr_unsigned(nd->nd_repstat));
4966 	return (txdr_unsigned(*defaulterrp));
4967 }
4968 
4969 /*
4970  * Called to find/add a layout to a client.
4971  * This function returns the layout with a refcnt (shared lock) upon
4972  * success (returns 0) or with no lock/refcnt on the layout when an
4973  * error is returned.
4974  * If a layout is passed in via lypp, it is locked (exclusively locked).
4975  */
4976 int
4977 nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4978     nfsv4stateid_t *stateidp, int layouttype, int retonclose,
4979     struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
4980     struct ucred *cred, NFSPROC_T *p)
4981 {
4982 	struct nfsclclient *clp;
4983 	struct nfscllayout *lyp, *tlyp;
4984 	struct nfsclflayout *flp;
4985 	struct nfsnode *np = VTONFS(vp);
4986 	mount_t mp;
4987 	int layout_passed_in;
4988 
4989 	mp = nmp->nm_mountp;
4990 	layout_passed_in = 1;
4991 	tlyp = NULL;
4992 	lyp = *lypp;
4993 	if (lyp == NULL) {
4994 		layout_passed_in = 0;
4995 		tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
4996 		    M_WAITOK | M_ZERO);
4997 	}
4998 
4999 	NFSLOCKCLSTATE();
5000 	clp = nmp->nm_clp;
5001 	if (clp == NULL) {
5002 		if (layout_passed_in != 0)
5003 			nfsv4_unlock(&lyp->nfsly_lock, 0);
5004 		NFSUNLOCKCLSTATE();
5005 		if (tlyp != NULL)
5006 			free(tlyp, M_NFSLAYOUT);
5007 		return (EPERM);
5008 	}
5009 	if (lyp == NULL) {
5010 		/*
5011 		 * Although no lyp was passed in, another thread might have
5012 		 * allocated one. If one is found, just increment it's ref
5013 		 * count and return it.
5014 		 */
5015 		lyp = nfscl_findlayout(clp, fhp, fhlen);
5016 		if (lyp == NULL) {
5017 			lyp = tlyp;
5018 			tlyp = NULL;
5019 			lyp->nfsly_stateid.seqid = stateidp->seqid;
5020 			lyp->nfsly_stateid.other[0] = stateidp->other[0];
5021 			lyp->nfsly_stateid.other[1] = stateidp->other[1];
5022 			lyp->nfsly_stateid.other[2] = stateidp->other[2];
5023 			lyp->nfsly_lastbyte = 0;
5024 			LIST_INIT(&lyp->nfsly_flayread);
5025 			LIST_INIT(&lyp->nfsly_flayrw);
5026 			LIST_INIT(&lyp->nfsly_recall);
5027 			lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
5028 			lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
5029 			lyp->nfsly_clp = clp;
5030 			if (layouttype == NFSLAYOUT_FLEXFILE)
5031 				lyp->nfsly_flags = NFSLY_FLEXFILE;
5032 			else
5033 				lyp->nfsly_flags = NFSLY_FILES;
5034 			if (retonclose != 0)
5035 				lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5036 			lyp->nfsly_fhlen = fhlen;
5037 			NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
5038 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5039 			LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
5040 			    nfsly_hash);
5041 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5042 			nfscl_layoutcnt++;
5043 		} else {
5044 			if (retonclose != 0)
5045 				lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5046 			if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5047 				lyp->nfsly_stateid.seqid = stateidp->seqid;
5048 			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5049 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5050 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5051 		}
5052 		nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5053 		if (NFSCL_FORCEDISM(mp)) {
5054 			NFSUNLOCKCLSTATE();
5055 			if (tlyp != NULL)
5056 				free(tlyp, M_NFSLAYOUT);
5057 			return (EPERM);
5058 		}
5059 		*lypp = lyp;
5060 	} else if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5061 		lyp->nfsly_stateid.seqid = stateidp->seqid;
5062 
5063 	/* Merge the new list of File Layouts into the list. */
5064 	flp = LIST_FIRST(fhlp);
5065 	if (flp != NULL) {
5066 		if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
5067 			nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
5068 		else
5069 			nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
5070 	}
5071 	if (layout_passed_in != 0)
5072 		nfsv4_unlock(&lyp->nfsly_lock, 1);
5073 	NFSUNLOCKCLSTATE();
5074 	if (tlyp != NULL)
5075 		free(tlyp, M_NFSLAYOUT);
5076 	return (0);
5077 }
5078 
5079 /*
5080  * Search for a layout by MDS file handle.
5081  * If one is found, it is returned with a refcnt (shared lock) iff
5082  * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
5083  * returned NULL.
5084  */
5085 struct nfscllayout *
5086 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
5087     uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
5088 {
5089 	struct nfscllayout *lyp;
5090 	mount_t mp;
5091 	int error, igotlock;
5092 
5093 	mp = clp->nfsc_nmp->nm_mountp;
5094 	*recalledp = 0;
5095 	*retflpp = NULL;
5096 	NFSLOCKCLSTATE();
5097 	lyp = nfscl_findlayout(clp, fhp, fhlen);
5098 	if (lyp != NULL) {
5099 		if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5100 			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5101 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5102 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5103 			error = nfscl_findlayoutforio(lyp, off,
5104 			    NFSV4OPEN_ACCESSREAD, retflpp);
5105 			if (error == 0)
5106 				nfsv4_getref(&lyp->nfsly_lock, NULL,
5107 				    NFSCLSTATEMUTEXPTR, mp);
5108 			else {
5109 				do {
5110 					igotlock = nfsv4_lock(&lyp->nfsly_lock,
5111 					    1, NULL, NFSCLSTATEMUTEXPTR, mp);
5112 				} while (igotlock == 0 && !NFSCL_FORCEDISM(mp));
5113 				*retflpp = NULL;
5114 			}
5115 			if (NFSCL_FORCEDISM(mp)) {
5116 				lyp = NULL;
5117 				*recalledp = 1;
5118 			}
5119 		} else {
5120 			lyp = NULL;
5121 			*recalledp = 1;
5122 		}
5123 	}
5124 	NFSUNLOCKCLSTATE();
5125 	return (lyp);
5126 }
5127 
5128 /*
5129  * Search for a layout by MDS file handle. If one is found, mark in to be
5130  * recalled, if it already marked "return on close".
5131  */
5132 static void
5133 nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
5134     int fhlen, struct nfsclrecalllayout **recallpp)
5135 {
5136 	struct nfscllayout *lyp;
5137 	uint32_t iomode;
5138 
5139 	if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vp->v_mount)) ||
5140 	    nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5141 	    (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
5142 		return;
5143 	lyp = nfscl_findlayout(clp, fhp, fhlen);
5144 	if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
5145 	    NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
5146 		iomode = 0;
5147 		if (!LIST_EMPTY(&lyp->nfsly_flayread))
5148 			iomode |= NFSLAYOUTIOMODE_READ;
5149 		if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5150 			iomode |= NFSLAYOUTIOMODE_RW;
5151 		(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5152 		    0, UINT64_MAX, lyp->nfsly_stateid.seqid, 0, 0, NULL,
5153 		    *recallpp);
5154 		NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
5155 		*recallpp = NULL;
5156 	}
5157 }
5158 
5159 /*
5160  * Mark the layout to be recalled and with an error.
5161  * Also, disable the dsp from further use.
5162  */
5163 void
5164 nfscl_dserr(uint32_t op, uint32_t stat, struct nfscldevinfo *dp,
5165     struct nfscllayout *lyp, struct nfsclds *dsp)
5166 {
5167 	struct nfsclrecalllayout *recallp;
5168 	uint32_t iomode;
5169 
5170 	printf("DS being disabled, error=%d\n", stat);
5171 	/* Set up the return of the layout. */
5172 	recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
5173 	iomode = 0;
5174 	NFSLOCKCLSTATE();
5175 	if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5176 		if (!LIST_EMPTY(&lyp->nfsly_flayread))
5177 			iomode |= NFSLAYOUTIOMODE_READ;
5178 		if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5179 			iomode |= NFSLAYOUTIOMODE_RW;
5180 		(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5181 		    0, UINT64_MAX, lyp->nfsly_stateid.seqid, stat, op,
5182 		    dp->nfsdi_deviceid, recallp);
5183 		NFSUNLOCKCLSTATE();
5184 		NFSCL_DEBUG(4, "nfscl_dserr recall iomode=%d\n", iomode);
5185 	} else {
5186 		NFSUNLOCKCLSTATE();
5187 		free(recallp, M_NFSLAYRECALL);
5188 	}
5189 
5190 	/* And shut the TCP connection down. */
5191 	nfscl_cancelreqs(dsp);
5192 }
5193 
5194 /*
5195  * Cancel all RPCs for this "dsp" by closing the connection.
5196  * Also, mark the session as defunct.
5197  * If NFSCLDS_SAMECONN is set, the connection is shared with other DSs and
5198  * cannot be shut down.
5199  */
5200 void
5201 nfscl_cancelreqs(struct nfsclds *dsp)
5202 {
5203 	struct __rpc_client *cl;
5204 	static int non_event;
5205 
5206 	NFSLOCKDS(dsp);
5207 	if ((dsp->nfsclds_flags & (NFSCLDS_CLOSED | NFSCLDS_SAMECONN)) == 0 &&
5208 	    dsp->nfsclds_sockp != NULL &&
5209 	    dsp->nfsclds_sockp->nr_client != NULL) {
5210 		dsp->nfsclds_flags |= NFSCLDS_CLOSED;
5211 		cl = dsp->nfsclds_sockp->nr_client;
5212 		dsp->nfsclds_sess.nfsess_defunct = 1;
5213 		NFSUNLOCKDS(dsp);
5214 		CLNT_CLOSE(cl);
5215 		/*
5216 		 * This 1sec sleep is done to reduce the number of reconnect
5217 		 * attempts made on the DS while it has failed.
5218 		 */
5219 		tsleep(&non_event, PVFS, "ndscls", hz);
5220 		return;
5221 	}
5222 	NFSUNLOCKDS(dsp);
5223 }
5224 
5225 /*
5226  * Dereference a layout.
5227  */
5228 void
5229 nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
5230 {
5231 
5232 	NFSLOCKCLSTATE();
5233 	if (exclocked != 0)
5234 		nfsv4_unlock(&lyp->nfsly_lock, 0);
5235 	else
5236 		nfsv4_relref(&lyp->nfsly_lock);
5237 	NFSUNLOCKCLSTATE();
5238 }
5239 
5240 /*
5241  * Search for a devinfo by deviceid. If one is found, return it after
5242  * acquiring a reference count on it.
5243  */
5244 struct nfscldevinfo *
5245 nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
5246     struct nfscldevinfo *dip)
5247 {
5248 
5249 	NFSLOCKCLSTATE();
5250 	if (dip == NULL)
5251 		dip = nfscl_finddevinfo(clp, deviceid);
5252 	if (dip != NULL)
5253 		dip->nfsdi_refcnt++;
5254 	NFSUNLOCKCLSTATE();
5255 	return (dip);
5256 }
5257 
5258 /*
5259  * Dereference a devinfo structure.
5260  */
5261 static void
5262 nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
5263 {
5264 
5265 	dip->nfsdi_refcnt--;
5266 	if (dip->nfsdi_refcnt == 0)
5267 		wakeup(&dip->nfsdi_refcnt);
5268 }
5269 
5270 /*
5271  * Dereference a devinfo structure.
5272  */
5273 void
5274 nfscl_reldevinfo(struct nfscldevinfo *dip)
5275 {
5276 
5277 	NFSLOCKCLSTATE();
5278 	nfscl_reldevinfo_locked(dip);
5279 	NFSUNLOCKCLSTATE();
5280 }
5281 
5282 /*
5283  * Find a layout for this file handle. Return NULL upon failure.
5284  */
5285 static struct nfscllayout *
5286 nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5287 {
5288 	struct nfscllayout *lyp;
5289 
5290 	LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5291 		if (lyp->nfsly_fhlen == fhlen &&
5292 		    !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5293 			break;
5294 	return (lyp);
5295 }
5296 
5297 /*
5298  * Find a devinfo for this deviceid. Return NULL upon failure.
5299  */
5300 static struct nfscldevinfo *
5301 nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5302 {
5303 	struct nfscldevinfo *dip;
5304 
5305 	LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5306 		if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5307 		    == 0)
5308 			break;
5309 	return (dip);
5310 }
5311 
5312 /*
5313  * Merge the new file layout list into the main one, maintaining it in
5314  * increasing offset order.
5315  */
5316 static void
5317 nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5318     struct nfsclflayouthead *newfhlp)
5319 {
5320 	struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5321 
5322 	flp = LIST_FIRST(fhlp);
5323 	prevflp = NULL;
5324 	LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5325 		while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5326 			prevflp = flp;
5327 			flp = LIST_NEXT(flp, nfsfl_list);
5328 		}
5329 		if (prevflp == NULL)
5330 			LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5331 		else
5332 			LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5333 		prevflp = nflp;
5334 	}
5335 }
5336 
5337 /*
5338  * Add this nfscldevinfo to the client, if it doesn't already exist.
5339  * This function consumes the structure pointed at by dip, if not NULL.
5340  */
5341 int
5342 nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, int ind,
5343     struct nfsclflayout *flp)
5344 {
5345 	struct nfsclclient *clp;
5346 	struct nfscldevinfo *tdip;
5347 	uint8_t *dev;
5348 
5349 	NFSLOCKCLSTATE();
5350 	clp = nmp->nm_clp;
5351 	if (clp == NULL) {
5352 		NFSUNLOCKCLSTATE();
5353 		if (dip != NULL)
5354 			free(dip, M_NFSDEVINFO);
5355 		return (ENODEV);
5356 	}
5357 	if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5358 		dev = flp->nfsfl_dev;
5359 	else
5360 		dev = flp->nfsfl_ffm[ind].dev;
5361 	tdip = nfscl_finddevinfo(clp, dev);
5362 	if (tdip != NULL) {
5363 		tdip->nfsdi_layoutrefs++;
5364 		if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5365 			flp->nfsfl_devp = tdip;
5366 		else
5367 			flp->nfsfl_ffm[ind].devp = tdip;
5368 		nfscl_reldevinfo_locked(tdip);
5369 		NFSUNLOCKCLSTATE();
5370 		if (dip != NULL)
5371 			free(dip, M_NFSDEVINFO);
5372 		return (0);
5373 	}
5374 	if (dip != NULL) {
5375 		LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5376 		dip->nfsdi_layoutrefs = 1;
5377 		if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5378 			flp->nfsfl_devp = dip;
5379 		else
5380 			flp->nfsfl_ffm[ind].devp = dip;
5381 	}
5382 	NFSUNLOCKCLSTATE();
5383 	if (dip == NULL)
5384 		return (ENODEV);
5385 	return (0);
5386 }
5387 
5388 /*
5389  * Free up a layout structure and associated file layout structure(s).
5390  */
5391 void
5392 nfscl_freelayout(struct nfscllayout *layp)
5393 {
5394 	struct nfsclflayout *flp, *nflp;
5395 	struct nfsclrecalllayout *rp, *nrp;
5396 
5397 	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5398 		LIST_REMOVE(flp, nfsfl_list);
5399 		nfscl_freeflayout(flp);
5400 	}
5401 	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5402 		LIST_REMOVE(flp, nfsfl_list);
5403 		nfscl_freeflayout(flp);
5404 	}
5405 	LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5406 		LIST_REMOVE(rp, nfsrecly_list);
5407 		free(rp, M_NFSLAYRECALL);
5408 	}
5409 	nfscl_layoutcnt--;
5410 	free(layp, M_NFSLAYOUT);
5411 }
5412 
5413 /*
5414  * Free up a file layout structure.
5415  */
5416 void
5417 nfscl_freeflayout(struct nfsclflayout *flp)
5418 {
5419 	int i, j;
5420 
5421 	if ((flp->nfsfl_flags & NFSFL_FILE) != 0) {
5422 		for (i = 0; i < flp->nfsfl_fhcnt; i++)
5423 			free(flp->nfsfl_fh[i], M_NFSFH);
5424 		if (flp->nfsfl_devp != NULL)
5425 			flp->nfsfl_devp->nfsdi_layoutrefs--;
5426 	}
5427 	if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0)
5428 		for (i = 0; i < flp->nfsfl_mirrorcnt; i++) {
5429 			for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++)
5430 				free(flp->nfsfl_ffm[i].fh[j], M_NFSFH);
5431 			if (flp->nfsfl_ffm[i].devp != NULL)
5432 				flp->nfsfl_ffm[i].devp->nfsdi_layoutrefs--;
5433 		}
5434 	free(flp, M_NFSFLAYOUT);
5435 }
5436 
5437 /*
5438  * Free up a file layout devinfo structure.
5439  */
5440 void
5441 nfscl_freedevinfo(struct nfscldevinfo *dip)
5442 {
5443 
5444 	free(dip, M_NFSDEVINFO);
5445 }
5446 
5447 /*
5448  * Mark any layouts that match as recalled.
5449  */
5450 static int
5451 nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5452     uint64_t off, uint64_t len, uint32_t stateseqid, uint32_t stat, uint32_t op,
5453     char *devid, struct nfsclrecalllayout *recallp)
5454 {
5455 	struct nfsclrecalllayout *rp, *orp;
5456 
5457 	recallp->nfsrecly_recalltype = recalltype;
5458 	recallp->nfsrecly_iomode = iomode;
5459 	recallp->nfsrecly_stateseqid = stateseqid;
5460 	recallp->nfsrecly_off = off;
5461 	recallp->nfsrecly_len = len;
5462 	recallp->nfsrecly_stat = stat;
5463 	recallp->nfsrecly_op = op;
5464 	if (devid != NULL)
5465 		NFSBCOPY(devid, recallp->nfsrecly_devid, NFSX_V4DEVICEID);
5466 	/*
5467 	 * Order the list as file returns first, followed by fsid and any
5468 	 * returns, both in increasing stateseqid order.
5469 	 * Note that the seqids wrap around, so 1 is after 0xffffffff.
5470 	 * (I'm not sure this is correct because I find RFC5661 confusing
5471 	 *  on this, but hopefully it will work ok.)
5472 	 */
5473 	orp = NULL;
5474 	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5475 		orp = rp;
5476 		if ((recalltype == NFSLAYOUTRETURN_FILE &&
5477 		     (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5478 		      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5479 		    (recalltype != NFSLAYOUTRETURN_FILE &&
5480 		     rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5481 		     nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5482 			LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5483 			break;
5484 		}
5485 
5486 		/*
5487 		 * Put any error return on all the file returns that will
5488 		 * preceed this one.
5489 		 */
5490 		if (rp->nfsrecly_recalltype == NFSLAYOUTRETURN_FILE &&
5491 		   stat != 0 && rp->nfsrecly_stat == 0) {
5492 			rp->nfsrecly_stat = stat;
5493 			rp->nfsrecly_op = op;
5494 			if (devid != NULL)
5495 				NFSBCOPY(devid, rp->nfsrecly_devid,
5496 				    NFSX_V4DEVICEID);
5497 		}
5498 	}
5499 	if (rp == NULL) {
5500 		if (orp == NULL)
5501 			LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5502 			    nfsrecly_list);
5503 		else
5504 			LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5505 	}
5506 	lyp->nfsly_flags |= NFSLY_RECALL;
5507 	wakeup(lyp->nfsly_clp);
5508 	return (0);
5509 }
5510 
5511 /*
5512  * Compare the two seqids for ordering. The trick is that the seqids can
5513  * wrap around from 0xffffffff->0, so check for the cases where one
5514  * has wrapped around.
5515  * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5516  */
5517 static int
5518 nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5519 {
5520 
5521 	if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5522 		/* seqid2 has wrapped around. */
5523 		return (0);
5524 	if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5525 		/* seqid1 has wrapped around. */
5526 		return (1);
5527 	if (seqid1 <= seqid2)
5528 		return (1);
5529 	return (0);
5530 }
5531 
5532 /*
5533  * Do a layout return for each of the recalls.
5534  */
5535 static void
5536 nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5537     struct ucred *cred, NFSPROC_T *p)
5538 {
5539 	struct nfsclrecalllayout *rp;
5540 	nfsv4stateid_t stateid;
5541 	int layouttype;
5542 
5543 	NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5544 	stateid.seqid = lyp->nfsly_stateid.seqid;
5545 	if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5546 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5547 	else
5548 		layouttype = NFSLAYOUT_FLEXFILE;
5549 	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5550 		(void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5551 		    lyp->nfsly_fhlen, 0, layouttype,
5552 		    rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5553 		    rp->nfsrecly_off, rp->nfsrecly_len,
5554 		    &stateid, cred, p, rp->nfsrecly_stat, rp->nfsrecly_op,
5555 		    rp->nfsrecly_devid);
5556 	}
5557 }
5558 
5559 /*
5560  * Do the layout commit for a file layout.
5561  */
5562 static void
5563 nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5564     struct ucred *cred, NFSPROC_T *p)
5565 {
5566 	struct nfsclflayout *flp;
5567 	uint64_t len;
5568 	int error, layouttype;
5569 
5570 	if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5571 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5572 	else
5573 		layouttype = NFSLAYOUT_FLEXFILE;
5574 	LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5575 		if (layouttype == NFSLAYOUT_FLEXFILE &&
5576 		    (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) {
5577 			NFSCL_DEBUG(4, "Flex file: no layoutcommit\n");
5578 			/* If not supported, don't bother doing it. */
5579 			NFSLOCKMNT(nmp);
5580 			nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5581 			NFSUNLOCKMNT(nmp);
5582 			break;
5583 		} else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5584 			len = flp->nfsfl_end - flp->nfsfl_off;
5585 			error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5586 			    lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5587 			    lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5588 			    layouttype, cred, p, NULL);
5589 			NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5590 			if (error == NFSERR_NOTSUPP) {
5591 				/* If not supported, don't bother doing it. */
5592 				NFSLOCKMNT(nmp);
5593 				nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5594 				NFSUNLOCKMNT(nmp);
5595 				break;
5596 			}
5597 		}
5598 	}
5599 }
5600 
5601 /*
5602  * Commit all layouts for a file (vnode).
5603  */
5604 int
5605 nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5606 {
5607 	struct nfsclclient *clp;
5608 	struct nfscllayout *lyp;
5609 	struct nfsnode *np = VTONFS(vp);
5610 	mount_t mp;
5611 	struct nfsmount *nmp;
5612 
5613 	mp = vp->v_mount;
5614 	nmp = VFSTONFS(mp);
5615 	if (NFSHASNOLAYOUTCOMMIT(nmp))
5616 		return (0);
5617 	NFSLOCKCLSTATE();
5618 	clp = nmp->nm_clp;
5619 	if (clp == NULL) {
5620 		NFSUNLOCKCLSTATE();
5621 		return (EPERM);
5622 	}
5623 	lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5624 	if (lyp == NULL) {
5625 		NFSUNLOCKCLSTATE();
5626 		return (EPERM);
5627 	}
5628 	nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5629 	if (NFSCL_FORCEDISM(mp)) {
5630 		NFSUNLOCKCLSTATE();
5631 		return (EPERM);
5632 	}
5633 tryagain:
5634 	if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5635 		lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5636 		NFSUNLOCKCLSTATE();
5637 		NFSCL_DEBUG(4, "do layoutcommit2\n");
5638 		nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5639 		NFSLOCKCLSTATE();
5640 		goto tryagain;
5641 	}
5642 	nfsv4_relref(&lyp->nfsly_lock);
5643 	NFSUNLOCKCLSTATE();
5644 	return (0);
5645 }
5646