xref: /freebsd/sys/fs/nfsclient/nfs_clstate.c (revision adfe4271248cd5ff6f17e6604da354b1c2f0026c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009 Rick Macklem, University of Guelph
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 /*
34  * These functions implement the client side state handling for NFSv4.
35  * NFSv4 state handling:
36  * - A lockowner is used to determine lock contention, so it
37  *   corresponds directly to a Posix pid. (1 to 1 mapping)
38  * - The correct granularity of an OpenOwner is not nearly so
39  *   obvious. An OpenOwner does the following:
40  *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
41  *   - is used to check for Open/Share contention (not applicable to
42  *     this client, since all Opens are Deny_None)
43  *   As such, I considered both extreme.
44  *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
45  *   all Open, Close and Lock (with a new lockowner) Ops.
46  *   1 OpenOwner for each Open - This one results in an OpenConfirm for
47  *   every Open, for most servers.
48  *   So, I chose to use the same mapping as I did for LockOwnwers.
49  *   The main concern here is that you can end up with multiple Opens
50  *   for the same File Handle, but on different OpenOwners (opens
51  *   inherited from parents, grandparents...) and you do not know
52  *   which of these the vnodeop close applies to. This is handled by
53  *   delaying the Close Op(s) until all of the Opens have been closed.
54  *   (It is not yet obvious if this is the correct granularity.)
55  * - How the code handles serialization:
56  *   - For the ClientId, it uses an exclusive lock while getting its
57  *     SetClientId and during recovery. Otherwise, it uses a shared
58  *     lock via a reference count.
59  *   - For the rest of the data structures, it uses an SMP mutex
60  *     (once the nfs client is SMP safe) and doesn't sleep while
61  *     manipulating the linked lists.
62  *   - The serialization of Open/Close/Lock/LockU falls out in the
63  *     "wash", since OpenOwners and LockOwners are both mapped from
64  *     Posix pid. In other words, there is only one Posix pid using
65  *     any given owner, so that owner is serialized. (If you change
66  *     the granularity of the OpenOwner, then code must be added to
67  *     serialize Ops on the OpenOwner.)
68  * - When to get rid of OpenOwners and LockOwners.
69  *   - The function nfscl_cleanup_common() is executed after a process exits.
70  *     It goes through the client list looking for all Open and Lock Owners.
71  *     When one is found, it is marked "defunct" or in the case of
72  *     an OpenOwner without any Opens, freed.
73  *     The renew thread scans for defunct Owners and gets rid of them,
74  *     if it can. The LockOwners will also be deleted when the
75  *     associated Open is closed.
76  *   - If the LockU or Close Op(s) fail during close in a way
77  *     that could be recovered upon retry, they are relinked to the
78  *     ClientId's defunct open list and retried by the renew thread
79  *     until they succeed or an unmount/recovery occurs.
80  *     (Since we are done with them, they do not need to be recovered.)
81  */
82 
83 #include <fs/nfs/nfsport.h>
84 
85 /*
86  * Global variables
87  */
88 extern struct nfsstatsv1 nfsstatsv1;
89 extern struct nfsreqhead nfsd_reqq;
90 extern u_int32_t newnfs_false, newnfs_true;
91 extern int nfscl_debuglevel;
92 extern int nfscl_enablecallb;
93 extern int nfs_numnfscbd;
94 NFSREQSPINLOCK;
95 NFSCLSTATEMUTEX;
96 int nfscl_inited = 0;
97 struct nfsclhead nfsclhead;	/* Head of clientid list */
98 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99 int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100 
101 static int nfscl_delegcnt = 0;
102 static int nfscl_layoutcnt = 0;
103 static int nfscl_getopen(struct nfsclownerhead *, struct nfsclopenhash *,
104     u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t,
105     struct nfscllockowner **, struct nfsclopen **);
106 static bool nfscl_checkown(struct nfsclowner *, struct nfsclopen *, uint8_t *,
107     uint8_t *, struct nfscllockowner **, struct nfsclopen **,
108     struct nfsclopen **);
109 static void nfscl_clrelease(struct nfsclclient *);
110 static void nfscl_cleanclient(struct nfsclclient *);
111 static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
112     struct ucred *, NFSPROC_T *);
113 static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
114     struct nfsmount *, struct ucred *, NFSPROC_T *);
115 static void nfscl_recover(struct nfsclclient *, bool *, struct ucred *,
116     NFSPROC_T *);
117 static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
118     struct nfscllock *, int);
119 static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
120     struct nfscllock **, int);
121 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
122 static u_int32_t nfscl_nextcbident(void);
123 static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
124 static struct nfsclclient *nfscl_getclnt(u_int32_t);
125 static struct nfsclclient *nfscl_getclntsess(uint8_t *);
126 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
127     int);
128 static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
129     int, struct nfsclrecalllayout **);
130 static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
131 static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
132     int);
133 static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
134 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
135     u_int8_t *, struct nfscllock **);
136 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
137 static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
138     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
139 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
140     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
141     struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
142 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
143     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
144     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
145 static void nfscl_totalrecall(struct nfsclclient *);
146 static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
147     struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
148 static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
149     u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
150     struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
151 static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
152     int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
153     struct ucred *, NFSPROC_T *);
154 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
155     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
156 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *,
157     bool);
158 static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
159 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
160 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
161     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
162     vnode_t *);
163 static void nfscl_freeopenowner(struct nfsclowner *, int);
164 static void nfscl_cleandeleg(struct nfscldeleg *);
165 static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
166     struct nfsmount *, NFSPROC_T *);
167 static void nfscl_emptylockowner(struct nfscllockowner *,
168     struct nfscllockownerfhhead *);
169 static void nfscl_mergeflayouts(struct nfsclflayouthead *,
170     struct nfsclflayouthead *);
171 static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
172     uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
173 static int nfscl_seq(uint32_t, uint32_t);
174 static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
175     struct ucred *, NFSPROC_T *);
176 static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
177     struct ucred *, NFSPROC_T *);
178 
179 static short nfscberr_null[] = {
180 	0,
181 	0,
182 };
183 
184 static short nfscberr_getattr[] = {
185 	NFSERR_RESOURCE,
186 	NFSERR_BADHANDLE,
187 	NFSERR_BADXDR,
188 	NFSERR_RESOURCE,
189 	NFSERR_SERVERFAULT,
190 	0,
191 };
192 
193 static short nfscberr_recall[] = {
194 	NFSERR_RESOURCE,
195 	NFSERR_BADHANDLE,
196 	NFSERR_BADSTATEID,
197 	NFSERR_BADXDR,
198 	NFSERR_RESOURCE,
199 	NFSERR_SERVERFAULT,
200 	0,
201 };
202 
203 static short *nfscl_cberrmap[] = {
204 	nfscberr_null,
205 	nfscberr_null,
206 	nfscberr_null,
207 	nfscberr_getattr,
208 	nfscberr_recall
209 };
210 
211 #define	NETFAMILY(clp) \
212 		(((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
213 
214 /*
215  * Called for an open operation.
216  * If the nfhp argument is NULL, just get an openowner.
217  */
218 int
219 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
220     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
221     struct nfsclopen **opp, int *newonep, int *retp, int lockit)
222 {
223 	struct nfsclclient *clp;
224 	struct nfsclowner *owp, *nowp;
225 	struct nfsclopen *op = NULL, *nop = NULL;
226 	struct nfscldeleg *dp;
227 	struct nfsclownerhead *ohp;
228 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
229 	int ret;
230 
231 	if (newonep != NULL)
232 		*newonep = 0;
233 	if (opp != NULL)
234 		*opp = NULL;
235 	if (owpp != NULL)
236 		*owpp = NULL;
237 
238 	/*
239 	 * Might need one or both of these, so MALLOC them now, to
240 	 * avoid a tsleep() in MALLOC later.
241 	 */
242 	nowp = malloc(sizeof (struct nfsclowner),
243 	    M_NFSCLOWNER, M_WAITOK);
244 	if (nfhp != NULL) {
245 	    nop = malloc(sizeof (struct nfsclopen) +
246 		fhlen - 1, M_NFSCLOPEN, M_WAITOK);
247 	    nop->nfso_hash.le_prev = NULL;
248 	}
249 	ret = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
250 	if (ret != 0) {
251 		free(nowp, M_NFSCLOWNER);
252 		if (nop != NULL)
253 			free(nop, M_NFSCLOPEN);
254 		return (ret);
255 	}
256 
257 	/*
258 	 * Get the Open iff it already exists.
259 	 * If none found, add the new one or return error, depending upon
260 	 * "create".
261 	 */
262 	NFSLOCKCLSTATE();
263 	dp = NULL;
264 	/* First check the delegation list */
265 	if (nfhp != NULL && usedeleg) {
266 		LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
267 			if (dp->nfsdl_fhlen == fhlen &&
268 			    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
269 				if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
270 				    (dp->nfsdl_flags & NFSCLDL_WRITE))
271 					break;
272 				dp = NULL;
273 				break;
274 			}
275 		}
276 	}
277 
278 	/* For NFSv4.1/4.2 and this option, use a single open_owner. */
279 	if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
280 		nfscl_filllockowner(NULL, own, F_POSIX);
281 	else
282 		nfscl_filllockowner(p->td_proc, own, F_POSIX);
283 	if (dp != NULL)
284 		ohp = &dp->nfsdl_owner;
285 	else
286 		ohp = &clp->nfsc_owner;
287 	/* Now, search for an openowner */
288 	LIST_FOREACH(owp, ohp, nfsow_list) {
289 		if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
290 			break;
291 	}
292 
293 	/*
294 	 * Create a new open, as required.
295 	 */
296 	nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
297 	    cred, newonep);
298 
299 	/*
300 	 * Now, check the mode on the open and return the appropriate
301 	 * value.
302 	 */
303 	if (retp != NULL) {
304 		if (nfhp != NULL && dp != NULL && nop == NULL)
305 			/* new local open on delegation */
306 			*retp = NFSCLOPEN_SETCRED;
307 		else
308 			*retp = NFSCLOPEN_OK;
309 	}
310 	if (op != NULL && (amode & ~(op->nfso_mode))) {
311 		op->nfso_mode |= amode;
312 		if (retp != NULL && dp == NULL)
313 			*retp = NFSCLOPEN_DOOPEN;
314 	}
315 
316 	/*
317 	 * Serialize modifications to the open owner for multiple threads
318 	 * within the same process using a read/write sleep lock.
319 	 * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
320 	 * by acquiring a shared lock.  The close operations still use an
321 	 * exclusive lock for this case.
322 	 */
323 	if (lockit != 0) {
324 		if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount))) {
325 			/*
326 			 * Get a shared lock on the OpenOwner, but first
327 			 * wait for any pending exclusive lock, so that the
328 			 * exclusive locker gets priority.
329 			 */
330 			nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
331 			    NFSCLSTATEMUTEXPTR, NULL);
332 			nfsv4_getref(&owp->nfsow_rwlock, NULL,
333 			    NFSCLSTATEMUTEXPTR, NULL);
334 		} else
335 			nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
336 	}
337 	NFSUNLOCKCLSTATE();
338 	if (nowp != NULL)
339 		free(nowp, M_NFSCLOWNER);
340 	if (nop != NULL)
341 		free(nop, M_NFSCLOPEN);
342 	if (owpp != NULL)
343 		*owpp = owp;
344 	if (opp != NULL)
345 		*opp = op;
346 	return (0);
347 }
348 
349 /*
350  * Create a new open, as required.
351  */
352 static void
353 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
354     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
355     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
356     struct ucred *cred, int *newonep)
357 {
358 	struct nfsclowner *owp = *owpp, *nowp;
359 	struct nfsclopen *op, *nop;
360 
361 	if (nowpp != NULL)
362 		nowp = *nowpp;
363 	else
364 		nowp = NULL;
365 	if (nopp != NULL)
366 		nop = *nopp;
367 	else
368 		nop = NULL;
369 	if (owp == NULL && nowp != NULL) {
370 		NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
371 		LIST_INIT(&nowp->nfsow_open);
372 		nowp->nfsow_clp = clp;
373 		nowp->nfsow_seqid = 0;
374 		nowp->nfsow_defunct = 0;
375 		nfscl_lockinit(&nowp->nfsow_rwlock);
376 		if (dp != NULL) {
377 			nfsstatsv1.cllocalopenowners++;
378 			LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
379 		} else {
380 			nfsstatsv1.clopenowners++;
381 			LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
382 		}
383 		owp = *owpp = nowp;
384 		*nowpp = NULL;
385 		if (newonep != NULL)
386 			*newonep = 1;
387 	}
388 
389 	 /* If an fhp has been specified, create an Open as well. */
390 	if (fhp != NULL) {
391 		/* and look for the correct open, based upon FH */
392 		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
393 			if (op->nfso_fhlen == fhlen &&
394 			    !NFSBCMP(op->nfso_fh, fhp, fhlen))
395 				break;
396 		}
397 		if (op == NULL && nop != NULL) {
398 			nop->nfso_own = owp;
399 			nop->nfso_mode = 0;
400 			nop->nfso_opencnt = 0;
401 			nop->nfso_posixlock = 1;
402 			nop->nfso_fhlen = fhlen;
403 			NFSBCOPY(fhp, nop->nfso_fh, fhlen);
404 			LIST_INIT(&nop->nfso_lock);
405 			nop->nfso_stateid.seqid = 0;
406 			nop->nfso_stateid.other[0] = 0;
407 			nop->nfso_stateid.other[1] = 0;
408 			nop->nfso_stateid.other[2] = 0;
409 			KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
410 			newnfs_copyincred(cred, &nop->nfso_cred);
411 			if (dp != NULL) {
412 				TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
413 				TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
414 				    nfsdl_list);
415 				dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
416 				nfsstatsv1.cllocalopens++;
417 			} else {
418 				LIST_INSERT_HEAD(NFSCLOPENHASH(clp, fhp, fhlen),
419 				    nop, nfso_hash);
420 				nfsstatsv1.clopens++;
421 			}
422 			LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
423 			*opp = nop;
424 			*nopp = NULL;
425 			if (newonep != NULL)
426 				*newonep = 1;
427 		} else {
428 			*opp = op;
429 		}
430 	}
431 }
432 
433 /*
434  * Called to find/add a delegation to a client.
435  */
436 int
437 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
438     int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
439 {
440 	struct nfscldeleg *dp = *dpp, *tdp;
441 
442 	/*
443 	 * First, if we have received a Read delegation for a file on a
444 	 * read/write file system, just return it, because they aren't
445 	 * useful, imho.
446 	 */
447 	if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) &&
448 	    (dp->nfsdl_flags & NFSCLDL_READ)) {
449 		(void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p);
450 		free(dp, M_NFSCLDELEG);
451 		*dpp = NULL;
452 		return (0);
453 	}
454 
455 	/* Look for the correct deleg, based upon FH */
456 	NFSLOCKCLSTATE();
457 	tdp = nfscl_finddeleg(clp, nfhp, fhlen);
458 	if (tdp == NULL) {
459 		if (dp == NULL) {
460 			NFSUNLOCKCLSTATE();
461 			return (NFSERR_BADSTATEID);
462 		}
463 		*dpp = NULL;
464 		TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
465 		LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
466 		    nfsdl_hash);
467 		dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
468 		nfsstatsv1.cldelegates++;
469 		nfscl_delegcnt++;
470 	} else {
471 		/*
472 		 * Delegation already exists, what do we do if a new one??
473 		 */
474 		if (dp != NULL) {
475 			printf("Deleg already exists!\n");
476 			free(dp, M_NFSCLDELEG);
477 			*dpp = NULL;
478 		} else {
479 			*dpp = tdp;
480 		}
481 	}
482 	NFSUNLOCKCLSTATE();
483 	return (0);
484 }
485 
486 /*
487  * Find a delegation for this file handle. Return NULL upon failure.
488  */
489 static struct nfscldeleg *
490 nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
491 {
492 	struct nfscldeleg *dp;
493 
494 	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
495 	    if (dp->nfsdl_fhlen == fhlen &&
496 		!NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
497 		break;
498 	}
499 	return (dp);
500 }
501 
502 /*
503  * Get a stateid for an I/O operation. First, look for an open and iff
504  * found, return either a lockowner stateid or the open stateid.
505  * If no Open is found, just return error and the special stateid of all zeros.
506  */
507 int
508 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
509     int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
510     void **lckpp)
511 {
512 	struct nfsclclient *clp;
513 	struct nfsclopen *op = NULL, *top;
514 	struct nfsclopenhash *oph;
515 	struct nfscllockowner *lp;
516 	struct nfscldeleg *dp;
517 	struct nfsnode *np;
518 	struct nfsmount *nmp;
519 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
520 	int error;
521 	bool done;
522 
523 	*lckpp = NULL;
524 	/*
525 	 * Initially, just set the special stateid of all zeros.
526 	 * (Don't do this for a DS, since the special stateid can't be used.)
527 	 */
528 	if (fords == 0) {
529 		stateidp->seqid = 0;
530 		stateidp->other[0] = 0;
531 		stateidp->other[1] = 0;
532 		stateidp->other[2] = 0;
533 	}
534 	if (vnode_vtype(vp) != VREG)
535 		return (EISDIR);
536 	np = VTONFS(vp);
537 	nmp = VFSTONFS(vp->v_mount);
538 	NFSLOCKCLSTATE();
539 	clp = nfscl_findcl(nmp);
540 	if (clp == NULL) {
541 		NFSUNLOCKCLSTATE();
542 		return (EACCES);
543 	}
544 
545 	/*
546 	 * Wait for recovery to complete.
547 	 */
548 	while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
549 		(void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
550 		    PZERO, "nfsrecvr", NULL);
551 
552 	/*
553 	 * First, look for a delegation.
554 	 */
555 	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
556 		if (dp->nfsdl_fhlen == fhlen &&
557 		    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
558 			if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
559 			    (dp->nfsdl_flags & NFSCLDL_WRITE)) {
560 				stateidp->seqid = dp->nfsdl_stateid.seqid;
561 				stateidp->other[0] = dp->nfsdl_stateid.other[0];
562 				stateidp->other[1] = dp->nfsdl_stateid.other[1];
563 				stateidp->other[2] = dp->nfsdl_stateid.other[2];
564 				if (!(np->n_flag & NDELEGRECALL)) {
565 					TAILQ_REMOVE(&clp->nfsc_deleg, dp,
566 					    nfsdl_list);
567 					TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
568 					    nfsdl_list);
569 					dp->nfsdl_timestamp = NFSD_MONOSEC +
570 					    120;
571 					dp->nfsdl_rwlock.nfslock_usecnt++;
572 					*lckpp = (void *)&dp->nfsdl_rwlock;
573 				}
574 				NFSUNLOCKCLSTATE();
575 				return (0);
576 			}
577 			break;
578 		}
579 	}
580 
581 	if (p != NULL) {
582 		/*
583 		 * If p != NULL, we want to search the parentage tree
584 		 * for a matching OpenOwner and use that.
585 		 */
586 		if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
587 			nfscl_filllockowner(NULL, own, F_POSIX);
588 		else
589 			nfscl_filllockowner(p->td_proc, own, F_POSIX);
590 		lp = NULL;
591 		error = nfscl_getopen(NULL, clp->nfsc_openhash, nfhp, fhlen,
592 		    own, own, mode, &lp, &op);
593 		if (error == 0 && lp != NULL && fords == 0) {
594 			/* Don't return a lock stateid for a DS. */
595 			stateidp->seqid =
596 			    lp->nfsl_stateid.seqid;
597 			stateidp->other[0] =
598 			    lp->nfsl_stateid.other[0];
599 			stateidp->other[1] =
600 			    lp->nfsl_stateid.other[1];
601 			stateidp->other[2] =
602 			    lp->nfsl_stateid.other[2];
603 			NFSUNLOCKCLSTATE();
604 			return (0);
605 		}
606 	}
607 	if (op == NULL) {
608 		/* If not found, just look for any OpenOwner that will work. */
609 		top = NULL;
610 		done = false;
611 		oph = NFSCLOPENHASH(clp, nfhp, fhlen);
612 		LIST_FOREACH(op, oph, nfso_hash) {
613 			if (op->nfso_fhlen == fhlen &&
614 			    !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
615 				if (top == NULL && (op->nfso_mode &
616 				    NFSV4OPEN_ACCESSWRITE) != 0 &&
617 				    (mode & NFSV4OPEN_ACCESSREAD) != 0)
618 					top = op;
619 				if ((mode & op->nfso_mode) == mode) {
620 					/* LRU order the hash list. */
621 					LIST_REMOVE(op, nfso_hash);
622 					LIST_INSERT_HEAD(oph, op, nfso_hash);
623 					done = true;
624 					break;
625 				}
626 			}
627 		}
628 		if (!done) {
629 			NFSCL_DEBUG(2, "openmode top=%p\n", top);
630 			if (top == NULL || NFSHASOPENMODE(nmp)) {
631 				NFSUNLOCKCLSTATE();
632 				return (ENOENT);
633 			} else
634 				op = top;
635 		}
636 		/*
637 		 * For read aheads or write behinds, use the open cred.
638 		 * A read ahead or write behind is indicated by p == NULL.
639 		 */
640 		if (p == NULL)
641 			newnfs_copycred(&op->nfso_cred, cred);
642 	}
643 
644 	/*
645 	 * No lock stateid, so return the open stateid.
646 	 */
647 	stateidp->seqid = op->nfso_stateid.seqid;
648 	stateidp->other[0] = op->nfso_stateid.other[0];
649 	stateidp->other[1] = op->nfso_stateid.other[1];
650 	stateidp->other[2] = op->nfso_stateid.other[2];
651 	NFSUNLOCKCLSTATE();
652 	return (0);
653 }
654 
655 /*
656  * Search for a matching file, mode and, optionally, lockowner.
657  */
658 static int
659 nfscl_getopen(struct nfsclownerhead *ohp, struct nfsclopenhash *ohashp,
660     u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown,
661     u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp)
662 {
663 	struct nfsclowner *owp;
664 	struct nfsclopen *op, *rop, *rop2;
665 	struct nfsclopenhash *oph;
666 	bool keep_looping;
667 
668 	KASSERT(ohp == NULL || ohashp == NULL, ("nfscl_getopen: "
669 	    "only one of ohp and ohashp can be set"));
670 	if (lpp != NULL)
671 		*lpp = NULL;
672 	/*
673 	 * rop will be set to the open to be returned. There are three
674 	 * variants of this, all for an open of the correct file:
675 	 * 1 - A match of lockown.
676 	 * 2 - A match of the openown, when no lockown match exists.
677 	 * 3 - A match for any open, if no openown or lockown match exists.
678 	 * Looking for #2 over #3 probably isn't necessary, but since
679 	 * RFC3530 is vague w.r.t. the relationship between openowners and
680 	 * lockowners, I think this is the safer way to go.
681 	 */
682 	rop = NULL;
683 	rop2 = NULL;
684 	keep_looping = true;
685 	/* Search the client list */
686 	if (ohashp == NULL) {
687 		/* Search the local opens on the delegation. */
688 		LIST_FOREACH(owp, ohp, nfsow_list) {
689 			/* and look for the correct open */
690 			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
691 				if (op->nfso_fhlen == fhlen &&
692 				    !NFSBCMP(op->nfso_fh, nfhp, fhlen)
693 				    && (op->nfso_mode & mode) == mode)
694 					keep_looping = nfscl_checkown(owp, op, openown,
695 					    lockown, lpp, &rop, &rop2);
696 				if (!keep_looping)
697 					break;
698 			}
699 			if (!keep_looping)
700 				break;
701 		}
702 	} else {
703 		/* Search for matching opens on the hash list. */
704 		oph = &ohashp[NFSCLOPENHASHFUNC(nfhp, fhlen)];
705 		LIST_FOREACH(op, oph, nfso_hash) {
706 			if (op->nfso_fhlen == fhlen &&
707 			    !NFSBCMP(op->nfso_fh, nfhp, fhlen)
708 			    && (op->nfso_mode & mode) == mode)
709 				keep_looping = nfscl_checkown(op->nfso_own, op,
710 				    openown, lockown, lpp, &rop, &rop2);
711 			if (!keep_looping) {
712 				/* LRU order the hash list. */
713 				LIST_REMOVE(op, nfso_hash);
714 				LIST_INSERT_HEAD(oph, op, nfso_hash);
715 				break;
716 			}
717 		}
718 	}
719 	if (rop == NULL)
720 		rop = rop2;
721 	if (rop == NULL)
722 		return (EBADF);
723 	*opp = rop;
724 	return (0);
725 }
726 
727 /* Check for an owner match. */
728 static bool
729 nfscl_checkown(struct nfsclowner *owp, struct nfsclopen *op, uint8_t *openown,
730     uint8_t *lockown, struct nfscllockowner **lpp, struct nfsclopen **ropp,
731     struct nfsclopen **ropp2)
732 {
733 	struct nfscllockowner *lp;
734 	bool keep_looping;
735 
736 	keep_looping = true;
737 	if (lpp != NULL) {
738 		/* Now look for a matching lockowner. */
739 		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
740 			if (!NFSBCMP(lp->nfsl_owner, lockown,
741 			    NFSV4CL_LOCKNAMELEN)) {
742 				*lpp = lp;
743 				*ropp = op;
744 				return (false);
745 			}
746 		}
747 	}
748 	if (*ropp == NULL && !NFSBCMP(owp->nfsow_owner, openown,
749 	    NFSV4CL_LOCKNAMELEN)) {
750 		*ropp = op;
751 		if (lpp == NULL)
752 			keep_looping = false;
753 	}
754 	if (*ropp2 == NULL)
755 		*ropp2 = op;
756 	return (keep_looping);
757 }
758 
759 /*
760  * Release use of an open owner. Called when open operations are done
761  * with the open owner.
762  */
763 void
764 nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
765     __unused int error, __unused int candelete, int unlocked)
766 {
767 
768 	if (owp == NULL)
769 		return;
770 	NFSLOCKCLSTATE();
771 	if (unlocked == 0) {
772 		if (NFSHASONEOPENOWN(nmp))
773 			nfsv4_relref(&owp->nfsow_rwlock);
774 		else
775 			nfscl_lockunlock(&owp->nfsow_rwlock);
776 	}
777 	nfscl_clrelease(owp->nfsow_clp);
778 	NFSUNLOCKCLSTATE();
779 }
780 
781 /*
782  * Release use of an open structure under an open owner.
783  */
784 void
785 nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
786     int candelete)
787 {
788 	struct nfsclclient *clp;
789 	struct nfsclowner *owp;
790 
791 	if (op == NULL)
792 		return;
793 	NFSLOCKCLSTATE();
794 	owp = op->nfso_own;
795 	if (NFSHASONEOPENOWN(nmp))
796 		nfsv4_relref(&owp->nfsow_rwlock);
797 	else
798 		nfscl_lockunlock(&owp->nfsow_rwlock);
799 	clp = owp->nfsow_clp;
800 	if (error && candelete && op->nfso_opencnt == 0)
801 		nfscl_freeopen(op, 0);
802 	nfscl_clrelease(clp);
803 	NFSUNLOCKCLSTATE();
804 }
805 
806 /*
807  * Called to get a clientid structure. It will optionally lock the
808  * client data structures to do the SetClientId/SetClientId_confirm,
809  * but will release that lock and return the clientid with a reference
810  * count on it.
811  * If the "cred" argument is NULL, a new clientid should not be created.
812  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
813  * be done.
814  * The start_renewthread argument tells nfscl_getcl() to start a renew
815  * thread if this creates a new clp.
816  * It always clpp with a reference count on it, unless returning an error.
817  */
818 int
819 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
820     int start_renewthread, struct nfsclclient **clpp)
821 {
822 	struct nfsclclient *clp;
823 	struct nfsclclient *newclp = NULL;
824 	struct nfsmount *nmp;
825 	char uuid[HOSTUUIDLEN];
826 	int igotlock = 0, error, trystalecnt, clidinusedelay, i;
827 	u_int16_t idlen = 0;
828 
829 	nmp = VFSTONFS(mp);
830 	if (cred != NULL) {
831 		getcredhostuuid(cred, uuid, sizeof uuid);
832 		idlen = strlen(uuid);
833 		if (idlen > 0)
834 			idlen += sizeof (u_int64_t);
835 		else
836 			idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
837 		newclp = malloc(
838 		    sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
839 		    M_WAITOK | M_ZERO);
840 	}
841 	NFSLOCKCLSTATE();
842 	/*
843 	 * If a forced dismount is already in progress, don't
844 	 * allocate a new clientid and get out now. For the case where
845 	 * clp != NULL, this is a harmless optimization.
846 	 */
847 	if (NFSCL_FORCEDISM(mp)) {
848 		NFSUNLOCKCLSTATE();
849 		if (newclp != NULL)
850 			free(newclp, M_NFSCLCLIENT);
851 		return (EBADF);
852 	}
853 	clp = nmp->nm_clp;
854 	if (clp == NULL) {
855 		if (newclp == NULL) {
856 			NFSUNLOCKCLSTATE();
857 			return (EACCES);
858 		}
859 		clp = newclp;
860 		clp->nfsc_idlen = idlen;
861 		LIST_INIT(&clp->nfsc_owner);
862 		TAILQ_INIT(&clp->nfsc_deleg);
863 		TAILQ_INIT(&clp->nfsc_layout);
864 		LIST_INIT(&clp->nfsc_devinfo);
865 		for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
866 			LIST_INIT(&clp->nfsc_deleghash[i]);
867 		for (i = 0; i < NFSCLOPENHASHSIZE; i++)
868 			LIST_INIT(&clp->nfsc_openhash[i]);
869 		for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
870 			LIST_INIT(&clp->nfsc_layouthash[i]);
871 		clp->nfsc_flags = NFSCLFLAGS_INITED;
872 		clp->nfsc_clientidrev = 1;
873 		clp->nfsc_cbident = nfscl_nextcbident();
874 		nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
875 		    clp->nfsc_idlen);
876 		LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
877 		nmp->nm_clp = clp;
878 		clp->nfsc_nmp = nmp;
879 		NFSUNLOCKCLSTATE();
880 		if (start_renewthread != 0)
881 			nfscl_start_renewthread(clp);
882 	} else {
883 		NFSUNLOCKCLSTATE();
884 		if (newclp != NULL)
885 			free(newclp, M_NFSCLCLIENT);
886 	}
887 	NFSLOCKCLSTATE();
888 	while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
889 	    !NFSCL_FORCEDISM(mp))
890 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
891 		    NFSCLSTATEMUTEXPTR, mp);
892 	if (igotlock == 0) {
893 		/*
894 		 * Call nfsv4_lock() with "iwantlock == 0" so that it will
895 		 * wait for a pending exclusive lock request.  This gives the
896 		 * exclusive lock request priority over this shared lock
897 		 * request.
898 		 * An exclusive lock on nfsc_lock is used mainly for server
899 		 * crash recoveries.
900 		 */
901 		nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
902 		nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
903 	}
904 	if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
905 		/*
906 		 * Both nfsv4_lock() and nfsv4_getref() know to check
907 		 * for NFSCL_FORCEDISM() and return without sleeping to
908 		 * wait for the exclusive lock to be released, since it
909 		 * might be held by nfscl_umount() and we need to get out
910 		 * now for that case and not wait until nfscl_umount()
911 		 * releases it.
912 		 */
913 		NFSUNLOCKCLSTATE();
914 		return (EBADF);
915 	}
916 	NFSUNLOCKCLSTATE();
917 
918 	/*
919 	 * If it needs a clientid, do the setclientid now.
920 	 */
921 	if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
922 		if (!igotlock)
923 			panic("nfscl_clget");
924 		if (p == NULL || cred == NULL) {
925 			NFSLOCKCLSTATE();
926 			nfsv4_unlock(&clp->nfsc_lock, 0);
927 			NFSUNLOCKCLSTATE();
928 			return (EACCES);
929 		}
930 		/*
931 		 * If RFC3530 Sec. 14.2.33 is taken literally,
932 		 * NFSERR_CLIDINUSE will be returned persistently for the
933 		 * case where a new mount of the same file system is using
934 		 * a different principal. In practice, NFSERR_CLIDINUSE is
935 		 * only returned when there is outstanding unexpired state
936 		 * on the clientid. As such, try for twice the lease
937 		 * interval, if we know what that is. Otherwise, make a
938 		 * wild ass guess.
939 		 * The case of returning NFSERR_STALECLIENTID is far less
940 		 * likely, but might occur if there is a significant delay
941 		 * between doing the SetClientID and SetClientIDConfirm Ops,
942 		 * such that the server throws away the clientid before
943 		 * receiving the SetClientIDConfirm.
944 		 */
945 		if (clp->nfsc_renew > 0)
946 			clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
947 		else
948 			clidinusedelay = 120;
949 		trystalecnt = 3;
950 		do {
951 			error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
952 			if (error == NFSERR_STALECLIENTID ||
953 			    error == NFSERR_STALEDONTRECOVER ||
954 			    error == NFSERR_BADSESSION ||
955 			    error == NFSERR_CLIDINUSE) {
956 				(void) nfs_catnap(PZERO, error, "nfs_setcl");
957 			}
958 		} while (((error == NFSERR_STALECLIENTID ||
959 		     error == NFSERR_BADSESSION ||
960 		     error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
961 		    (error == NFSERR_CLIDINUSE && --clidinusedelay > 0));
962 		if (error) {
963 			NFSLOCKCLSTATE();
964 			nfsv4_unlock(&clp->nfsc_lock, 0);
965 			NFSUNLOCKCLSTATE();
966 			return (error);
967 		}
968 		clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
969 	}
970 	if (igotlock) {
971 		NFSLOCKCLSTATE();
972 		nfsv4_unlock(&clp->nfsc_lock, 1);
973 		NFSUNLOCKCLSTATE();
974 	}
975 
976 	*clpp = clp;
977 	return (0);
978 }
979 
980 /*
981  * Get a reference to a clientid and return it, if valid.
982  */
983 struct nfsclclient *
984 nfscl_findcl(struct nfsmount *nmp)
985 {
986 	struct nfsclclient *clp;
987 
988 	clp = nmp->nm_clp;
989 	if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
990 		return (NULL);
991 	return (clp);
992 }
993 
994 /*
995  * Release the clientid structure. It may be locked or reference counted.
996  */
997 static void
998 nfscl_clrelease(struct nfsclclient *clp)
999 {
1000 
1001 	if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
1002 		nfsv4_unlock(&clp->nfsc_lock, 0);
1003 	else
1004 		nfsv4_relref(&clp->nfsc_lock);
1005 }
1006 
1007 /*
1008  * External call for nfscl_clrelease.
1009  */
1010 void
1011 nfscl_clientrelease(struct nfsclclient *clp)
1012 {
1013 
1014 	NFSLOCKCLSTATE();
1015 	if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
1016 		nfsv4_unlock(&clp->nfsc_lock, 0);
1017 	else
1018 		nfsv4_relref(&clp->nfsc_lock);
1019 	NFSUNLOCKCLSTATE();
1020 }
1021 
1022 /*
1023  * Called when wanting to lock a byte region.
1024  */
1025 int
1026 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1027     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
1028     int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
1029     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
1030 {
1031 	struct nfscllockowner *lp;
1032 	struct nfsclopen *op;
1033 	struct nfsclclient *clp;
1034 	struct nfscllockowner *nlp;
1035 	struct nfscllock *nlop, *otherlop;
1036 	struct nfscldeleg *dp = NULL, *ldp = NULL;
1037 	struct nfscllockownerhead *lhp = NULL;
1038 	struct nfsnode *np;
1039 	u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
1040 	u_int8_t *openownp;
1041 	int error = 0, ret, donelocally = 0;
1042 	u_int32_t mode;
1043 
1044 	/* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1045 	mode = 0;
1046 	np = VTONFS(vp);
1047 	*lpp = NULL;
1048 	lp = NULL;
1049 	*newonep = 0;
1050 	*donelocallyp = 0;
1051 
1052 	/*
1053 	 * Might need these, so MALLOC them now, to
1054 	 * avoid a tsleep() in MALLOC later.
1055 	 */
1056 	nlp = malloc(
1057 	    sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1058 	otherlop = malloc(
1059 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1060 	nlop = malloc(
1061 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1062 	nlop->nfslo_type = type;
1063 	nlop->nfslo_first = off;
1064 	if (len == NFS64BITSSET) {
1065 		nlop->nfslo_end = NFS64BITSSET;
1066 	} else {
1067 		nlop->nfslo_end = off + len;
1068 		if (nlop->nfslo_end <= nlop->nfslo_first)
1069 			error = NFSERR_INVAL;
1070 	}
1071 
1072 	if (!error) {
1073 		if (recovery)
1074 			clp = rclp;
1075 		else
1076 			error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
1077 	}
1078 	if (error) {
1079 		free(nlp, M_NFSCLLOCKOWNER);
1080 		free(otherlop, M_NFSCLLOCK);
1081 		free(nlop, M_NFSCLLOCK);
1082 		return (error);
1083 	}
1084 
1085 	op = NULL;
1086 	if (recovery) {
1087 		ownp = rownp;
1088 		openownp = ropenownp;
1089 	} else {
1090 		nfscl_filllockowner(id, own, flags);
1091 		ownp = own;
1092 		if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
1093 			nfscl_filllockowner(NULL, openown, F_POSIX);
1094 		else
1095 			nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1096 		openownp = openown;
1097 	}
1098 	if (!recovery) {
1099 		NFSLOCKCLSTATE();
1100 		/*
1101 		 * First, search for a delegation. If one exists for this file,
1102 		 * the lock can be done locally against it, so long as there
1103 		 * isn't a local lock conflict.
1104 		 */
1105 		ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1106 		    np->n_fhp->nfh_len);
1107 		/* Just sanity check for correct type of delegation */
1108 		if (dp != NULL && ((dp->nfsdl_flags &
1109 		    (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1110 		     (type == F_WRLCK &&
1111 		      (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1112 			dp = NULL;
1113 	}
1114 	if (dp != NULL) {
1115 		/* Now, find an open and maybe a lockowner. */
1116 		ret = nfscl_getopen(&dp->nfsdl_owner, NULL, np->n_fhp->nfh_fh,
1117 		    np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1118 		if (ret)
1119 			ret = nfscl_getopen(NULL, clp->nfsc_openhash,
1120 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1121 			    ownp, mode, NULL, &op);
1122 		if (!ret) {
1123 			lhp = &dp->nfsdl_lock;
1124 			TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1125 			TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1126 			dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1127 			donelocally = 1;
1128 		} else {
1129 			dp = NULL;
1130 		}
1131 	}
1132 	if (!donelocally) {
1133 		/*
1134 		 * Get the related Open and maybe lockowner.
1135 		 */
1136 		error = nfscl_getopen(NULL, clp->nfsc_openhash,
1137 		    np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1138 		    ownp, mode, &lp, &op);
1139 		if (!error)
1140 			lhp = &op->nfso_lock;
1141 	}
1142 	if (!error && !recovery)
1143 		error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1144 		    np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1145 	if (error) {
1146 		if (!recovery) {
1147 			nfscl_clrelease(clp);
1148 			NFSUNLOCKCLSTATE();
1149 		}
1150 		free(nlp, M_NFSCLLOCKOWNER);
1151 		free(otherlop, M_NFSCLLOCK);
1152 		free(nlop, M_NFSCLLOCK);
1153 		return (error);
1154 	}
1155 
1156 	/*
1157 	 * Ok, see if a lockowner exists and create one, as required.
1158 	 */
1159 	if (lp == NULL)
1160 		LIST_FOREACH(lp, lhp, nfsl_list) {
1161 			if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1162 				break;
1163 		}
1164 	if (lp == NULL) {
1165 		NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1166 		if (recovery)
1167 			NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1168 			    NFSV4CL_LOCKNAMELEN);
1169 		else
1170 			NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1171 			    NFSV4CL_LOCKNAMELEN);
1172 		nlp->nfsl_seqid = 0;
1173 		nlp->nfsl_lockflags = flags;
1174 		nlp->nfsl_inprog = NULL;
1175 		nfscl_lockinit(&nlp->nfsl_rwlock);
1176 		LIST_INIT(&nlp->nfsl_lock);
1177 		if (donelocally) {
1178 			nlp->nfsl_open = NULL;
1179 			nfsstatsv1.cllocallockowners++;
1180 		} else {
1181 			nlp->nfsl_open = op;
1182 			nfsstatsv1.cllockowners++;
1183 		}
1184 		LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1185 		lp = nlp;
1186 		nlp = NULL;
1187 		*newonep = 1;
1188 	}
1189 
1190 	/*
1191 	 * Now, update the byte ranges for locks.
1192 	 */
1193 	ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1194 	if (!ret)
1195 		donelocally = 1;
1196 	if (donelocally) {
1197 		*donelocallyp = 1;
1198 		if (!recovery)
1199 			nfscl_clrelease(clp);
1200 	} else {
1201 		/*
1202 		 * Serial modifications on the lock owner for multiple threads
1203 		 * for the same process using a read/write lock.
1204 		 */
1205 		if (!recovery)
1206 			nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1207 	}
1208 	if (!recovery)
1209 		NFSUNLOCKCLSTATE();
1210 
1211 	if (nlp)
1212 		free(nlp, M_NFSCLLOCKOWNER);
1213 	if (nlop)
1214 		free(nlop, M_NFSCLLOCK);
1215 	if (otherlop)
1216 		free(otherlop, M_NFSCLLOCK);
1217 
1218 	*lpp = lp;
1219 	return (0);
1220 }
1221 
1222 /*
1223  * Called to unlock a byte range, for LockU.
1224  */
1225 int
1226 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1227     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1228     struct nfsclclient *clp, void *id, int flags,
1229     struct nfscllockowner **lpp, int *dorpcp)
1230 {
1231 	struct nfscllockowner *lp;
1232 	struct nfsclopen *op;
1233 	struct nfscllock *nlop, *other_lop = NULL;
1234 	struct nfscldeleg *dp;
1235 	struct nfsnode *np;
1236 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1237 	int ret = 0, fnd;
1238 
1239 	np = VTONFS(vp);
1240 	*lpp = NULL;
1241 	*dorpcp = 0;
1242 
1243 	/*
1244 	 * Might need these, so MALLOC them now, to
1245 	 * avoid a tsleep() in MALLOC later.
1246 	 */
1247 	nlop = malloc(
1248 	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1249 	nlop->nfslo_type = F_UNLCK;
1250 	nlop->nfslo_first = off;
1251 	if (len == NFS64BITSSET) {
1252 		nlop->nfslo_end = NFS64BITSSET;
1253 	} else {
1254 		nlop->nfslo_end = off + len;
1255 		if (nlop->nfslo_end <= nlop->nfslo_first) {
1256 			free(nlop, M_NFSCLLOCK);
1257 			return (NFSERR_INVAL);
1258 		}
1259 	}
1260 	if (callcnt == 0) {
1261 		other_lop = malloc(
1262 		    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1263 		*other_lop = *nlop;
1264 	}
1265 	nfscl_filllockowner(id, own, flags);
1266 	dp = NULL;
1267 	NFSLOCKCLSTATE();
1268 	if (callcnt == 0)
1269 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1270 		    np->n_fhp->nfh_len);
1271 
1272 	/*
1273 	 * First, unlock any local regions on a delegation.
1274 	 */
1275 	if (dp != NULL) {
1276 		/* Look for this lockowner. */
1277 		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1278 			if (!NFSBCMP(lp->nfsl_owner, own,
1279 			    NFSV4CL_LOCKNAMELEN))
1280 				break;
1281 		}
1282 		if (lp != NULL)
1283 			/* Use other_lop, so nlop is still available */
1284 			(void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1285 	}
1286 
1287 	/*
1288 	 * Now, find a matching open/lockowner that hasn't already been done,
1289 	 * as marked by nfsl_inprog.
1290 	 */
1291 	lp = NULL;
1292 	fnd = 0;
1293 	LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1294 	    np->n_fhp->nfh_len), nfso_hash) {
1295 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1296 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1297 			LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1298 				if (lp->nfsl_inprog == NULL &&
1299 				    !NFSBCMP(lp->nfsl_owner, own,
1300 				     NFSV4CL_LOCKNAMELEN)) {
1301 					fnd = 1;
1302 					break;
1303 				}
1304 			}
1305 		}
1306 		if (fnd)
1307 			break;
1308 	}
1309 
1310 	if (lp != NULL) {
1311 		ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1312 		if (ret)
1313 			*dorpcp = 1;
1314 		/*
1315 		 * Serial modifications on the lock owner for multiple
1316 		 * threads for the same process using a read/write lock.
1317 		 */
1318 		lp->nfsl_inprog = p;
1319 		nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1320 		*lpp = lp;
1321 	}
1322 	NFSUNLOCKCLSTATE();
1323 	if (nlop)
1324 		free(nlop, M_NFSCLLOCK);
1325 	if (other_lop)
1326 		free(other_lop, M_NFSCLLOCK);
1327 	return (0);
1328 }
1329 
1330 /*
1331  * Release all lockowners marked in progess for this process and file.
1332  */
1333 void
1334 nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1335     void *id, int flags)
1336 {
1337 	struct nfsclopen *op;
1338 	struct nfscllockowner *lp;
1339 	struct nfsnode *np;
1340 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1341 
1342 	np = VTONFS(vp);
1343 	nfscl_filllockowner(id, own, flags);
1344 	NFSLOCKCLSTATE();
1345 	LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1346 	    np->n_fhp->nfh_len), nfso_hash) {
1347 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1348 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1349 			LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1350 				if (lp->nfsl_inprog == p &&
1351 				    !NFSBCMP(lp->nfsl_owner, own,
1352 				    NFSV4CL_LOCKNAMELEN)) {
1353 					lp->nfsl_inprog = NULL;
1354 					nfscl_lockunlock(&lp->nfsl_rwlock);
1355 				}
1356 			}
1357 		}
1358 	}
1359 	nfscl_clrelease(clp);
1360 	NFSUNLOCKCLSTATE();
1361 }
1362 
1363 /*
1364  * Called to find out if any bytes within the byte range specified are
1365  * write locked by the calling process. Used to determine if flushing
1366  * is required before a LockU.
1367  * If in doubt, return 1, so the flush will occur.
1368  */
1369 int
1370 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1371     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1372 {
1373 	struct nfscllockowner *lp;
1374 	struct nfsclopen *op;
1375 	struct nfsclclient *clp;
1376 	struct nfscllock *lop;
1377 	struct nfscldeleg *dp;
1378 	struct nfsnode *np;
1379 	u_int64_t off, end;
1380 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1381 	int error = 0;
1382 
1383 	np = VTONFS(vp);
1384 	switch (fl->l_whence) {
1385 	case SEEK_SET:
1386 	case SEEK_CUR:
1387 		/*
1388 		 * Caller is responsible for adding any necessary offset
1389 		 * when SEEK_CUR is used.
1390 		 */
1391 		off = fl->l_start;
1392 		break;
1393 	case SEEK_END:
1394 		off = np->n_size + fl->l_start;
1395 		break;
1396 	default:
1397 		return (1);
1398 	}
1399 	if (fl->l_len != 0) {
1400 		end = off + fl->l_len;
1401 		if (end < off)
1402 			return (1);
1403 	} else {
1404 		end = NFS64BITSSET;
1405 	}
1406 
1407 	error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
1408 	if (error)
1409 		return (1);
1410 	nfscl_filllockowner(id, own, flags);
1411 	NFSLOCKCLSTATE();
1412 
1413 	/*
1414 	 * First check the delegation locks.
1415 	 */
1416 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1417 	if (dp != NULL) {
1418 		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1419 			if (!NFSBCMP(lp->nfsl_owner, own,
1420 			    NFSV4CL_LOCKNAMELEN))
1421 				break;
1422 		}
1423 		if (lp != NULL) {
1424 			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1425 				if (lop->nfslo_first >= end)
1426 					break;
1427 				if (lop->nfslo_end <= off)
1428 					continue;
1429 				if (lop->nfslo_type == F_WRLCK) {
1430 					nfscl_clrelease(clp);
1431 					NFSUNLOCKCLSTATE();
1432 					return (1);
1433 				}
1434 			}
1435 		}
1436 	}
1437 
1438 	/*
1439 	 * Now, check state against the server.
1440 	 */
1441 	LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1442 	    np->n_fhp->nfh_len), nfso_hash) {
1443 		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1444 		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1445 			LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1446 				if (!NFSBCMP(lp->nfsl_owner, own,
1447 				    NFSV4CL_LOCKNAMELEN))
1448 					break;
1449 			}
1450 			if (lp != NULL) {
1451 				LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1452 					if (lop->nfslo_first >= end)
1453 						break;
1454 					if (lop->nfslo_end <= off)
1455 						continue;
1456 					if (lop->nfslo_type == F_WRLCK) {
1457 						nfscl_clrelease(clp);
1458 						NFSUNLOCKCLSTATE();
1459 						return (1);
1460 					}
1461 				}
1462 			}
1463 		}
1464 	}
1465 	nfscl_clrelease(clp);
1466 	NFSUNLOCKCLSTATE();
1467 	return (0);
1468 }
1469 
1470 /*
1471  * Release a byte range lock owner structure.
1472  */
1473 void
1474 nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1475 {
1476 	struct nfsclclient *clp;
1477 
1478 	if (lp == NULL)
1479 		return;
1480 	NFSLOCKCLSTATE();
1481 	clp = lp->nfsl_open->nfso_own->nfsow_clp;
1482 	if (error != 0 && candelete &&
1483 	    (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1484 		nfscl_freelockowner(lp, 0);
1485 	else
1486 		nfscl_lockunlock(&lp->nfsl_rwlock);
1487 	nfscl_clrelease(clp);
1488 	NFSUNLOCKCLSTATE();
1489 }
1490 
1491 /*
1492  * Free up an open structure and any associated byte range lock structures.
1493  */
1494 void
1495 nfscl_freeopen(struct nfsclopen *op, int local)
1496 {
1497 
1498 	LIST_REMOVE(op, nfso_list);
1499 	if (op->nfso_hash.le_prev != NULL)
1500 		LIST_REMOVE(op, nfso_hash);
1501 	nfscl_freealllocks(&op->nfso_lock, local);
1502 	free(op, M_NFSCLOPEN);
1503 	if (local)
1504 		nfsstatsv1.cllocalopens--;
1505 	else
1506 		nfsstatsv1.clopens--;
1507 }
1508 
1509 /*
1510  * Free up all lock owners and associated locks.
1511  */
1512 static void
1513 nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1514 {
1515 	struct nfscllockowner *lp, *nlp;
1516 
1517 	LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1518 		if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1519 			panic("nfscllckw");
1520 		nfscl_freelockowner(lp, local);
1521 	}
1522 }
1523 
1524 /*
1525  * Called for an Open when NFSERR_EXPIRED is received from the server.
1526  * If there are no byte range locks nor a Share Deny lost, try to do a
1527  * fresh Open. Otherwise, free the open.
1528  */
1529 static int
1530 nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1531     struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1532 {
1533 	struct nfscllockowner *lp;
1534 	struct nfscldeleg *dp;
1535 	int mustdelete = 0, error;
1536 
1537 	/*
1538 	 * Look for any byte range lock(s).
1539 	 */
1540 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1541 		if (!LIST_EMPTY(&lp->nfsl_lock)) {
1542 			mustdelete = 1;
1543 			break;
1544 		}
1545 	}
1546 
1547 	/*
1548 	 * If no byte range lock(s) nor a Share deny, try to re-open.
1549 	 */
1550 	if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1551 		newnfs_copycred(&op->nfso_cred, cred);
1552 		dp = NULL;
1553 		error = nfsrpc_reopen(nmp, op->nfso_fh,
1554 		    op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1555 		if (error) {
1556 			mustdelete = 1;
1557 			if (dp != NULL) {
1558 				free(dp, M_NFSCLDELEG);
1559 				dp = NULL;
1560 			}
1561 		}
1562 		if (dp != NULL)
1563 			nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1564 			    op->nfso_fhlen, cred, p, &dp);
1565 	}
1566 
1567 	/*
1568 	 * If a byte range lock or Share deny or couldn't re-open, free it.
1569 	 */
1570 	if (mustdelete)
1571 		nfscl_freeopen(op, 0);
1572 	return (mustdelete);
1573 }
1574 
1575 /*
1576  * Free up an open owner structure.
1577  */
1578 static void
1579 nfscl_freeopenowner(struct nfsclowner *owp, int local)
1580 {
1581 
1582 	LIST_REMOVE(owp, nfsow_list);
1583 	free(owp, M_NFSCLOWNER);
1584 	if (local)
1585 		nfsstatsv1.cllocalopenowners--;
1586 	else
1587 		nfsstatsv1.clopenowners--;
1588 }
1589 
1590 /*
1591  * Free up a byte range lock owner structure.
1592  */
1593 void
1594 nfscl_freelockowner(struct nfscllockowner *lp, int local)
1595 {
1596 	struct nfscllock *lop, *nlop;
1597 
1598 	LIST_REMOVE(lp, nfsl_list);
1599 	LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1600 		nfscl_freelock(lop, local);
1601 	}
1602 	free(lp, M_NFSCLLOCKOWNER);
1603 	if (local)
1604 		nfsstatsv1.cllocallockowners--;
1605 	else
1606 		nfsstatsv1.cllockowners--;
1607 }
1608 
1609 /*
1610  * Free up a byte range lock structure.
1611  */
1612 void
1613 nfscl_freelock(struct nfscllock *lop, int local)
1614 {
1615 
1616 	LIST_REMOVE(lop, nfslo_list);
1617 	free(lop, M_NFSCLLOCK);
1618 	if (local)
1619 		nfsstatsv1.cllocallocks--;
1620 	else
1621 		nfsstatsv1.cllocks--;
1622 }
1623 
1624 /*
1625  * Clean out the state related to a delegation.
1626  */
1627 static void
1628 nfscl_cleandeleg(struct nfscldeleg *dp)
1629 {
1630 	struct nfsclowner *owp, *nowp;
1631 	struct nfsclopen *op;
1632 
1633 	LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1634 		op = LIST_FIRST(&owp->nfsow_open);
1635 		if (op != NULL) {
1636 			if (LIST_NEXT(op, nfso_list) != NULL)
1637 				panic("nfscleandel");
1638 			nfscl_freeopen(op, 1);
1639 		}
1640 		nfscl_freeopenowner(owp, 1);
1641 	}
1642 	nfscl_freealllocks(&dp->nfsdl_lock, 1);
1643 }
1644 
1645 /*
1646  * Free a delegation.
1647  */
1648 static void
1649 nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
1650 {
1651 
1652 	TAILQ_REMOVE(hdp, dp, nfsdl_list);
1653 	LIST_REMOVE(dp, nfsdl_hash);
1654 	if (freeit)
1655 		free(dp, M_NFSCLDELEG);
1656 	nfsstatsv1.cldelegates--;
1657 	nfscl_delegcnt--;
1658 }
1659 
1660 /*
1661  * Free up all state related to this client structure.
1662  */
1663 static void
1664 nfscl_cleanclient(struct nfsclclient *clp)
1665 {
1666 	struct nfsclowner *owp, *nowp;
1667 	struct nfsclopen *op, *nop;
1668 	struct nfscllayout *lyp, *nlyp;
1669 	struct nfscldevinfo *dip, *ndip;
1670 
1671 	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1672 		nfscl_freelayout(lyp);
1673 
1674 	LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1675 		nfscl_freedevinfo(dip);
1676 
1677 	/* Now, all the OpenOwners, etc. */
1678 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1679 		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1680 			nfscl_freeopen(op, 0);
1681 		}
1682 		nfscl_freeopenowner(owp, 0);
1683 	}
1684 }
1685 
1686 /*
1687  * Called when an NFSERR_EXPIRED is received from the server.
1688  */
1689 static void
1690 nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1691     struct ucred *cred, NFSPROC_T *p)
1692 {
1693 	struct nfsclowner *owp, *nowp, *towp;
1694 	struct nfsclopen *op, *nop, *top;
1695 	struct nfscldeleg *dp, *ndp;
1696 	int ret, printed = 0;
1697 
1698 	/*
1699 	 * First, merge locally issued Opens into the list for the server.
1700 	 */
1701 	dp = TAILQ_FIRST(&clp->nfsc_deleg);
1702 	while (dp != NULL) {
1703 	    ndp = TAILQ_NEXT(dp, nfsdl_list);
1704 	    owp = LIST_FIRST(&dp->nfsdl_owner);
1705 	    while (owp != NULL) {
1706 		nowp = LIST_NEXT(owp, nfsow_list);
1707 		op = LIST_FIRST(&owp->nfsow_open);
1708 		if (op != NULL) {
1709 		    if (LIST_NEXT(op, nfso_list) != NULL)
1710 			panic("nfsclexp");
1711 		    LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1712 			if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1713 			    NFSV4CL_LOCKNAMELEN))
1714 			    break;
1715 		    }
1716 		    if (towp != NULL) {
1717 			/* Merge opens in */
1718 			LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1719 			    if (top->nfso_fhlen == op->nfso_fhlen &&
1720 				!NFSBCMP(top->nfso_fh, op->nfso_fh,
1721 				 op->nfso_fhlen)) {
1722 				top->nfso_mode |= op->nfso_mode;
1723 				top->nfso_opencnt += op->nfso_opencnt;
1724 				break;
1725 			    }
1726 			}
1727 			if (top == NULL) {
1728 			    /* Just add the open to the owner list */
1729 			    LIST_REMOVE(op, nfso_list);
1730 			    op->nfso_own = towp;
1731 			    LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1732 			    LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1733 				op->nfso_fhlen), op, nfso_hash);
1734 			    nfsstatsv1.cllocalopens--;
1735 			    nfsstatsv1.clopens++;
1736 			}
1737 		    } else {
1738 			/* Just add the openowner to the client list */
1739 			LIST_REMOVE(owp, nfsow_list);
1740 			owp->nfsow_clp = clp;
1741 			LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1742 			LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1743 			    op->nfso_fhlen), op, nfso_hash);
1744 			nfsstatsv1.cllocalopenowners--;
1745 			nfsstatsv1.clopenowners++;
1746 			nfsstatsv1.cllocalopens--;
1747 			nfsstatsv1.clopens++;
1748 		    }
1749 		}
1750 		owp = nowp;
1751 	    }
1752 	    if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1753 		printed = 1;
1754 		printf("nfsv4 expired locks lost\n");
1755 	    }
1756 	    nfscl_cleandeleg(dp);
1757 	    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
1758 	    dp = ndp;
1759 	}
1760 	if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1761 	    panic("nfsclexp");
1762 
1763 	/*
1764 	 * Now, try and reopen against the server.
1765 	 */
1766 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1767 		owp->nfsow_seqid = 0;
1768 		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1769 			ret = nfscl_expireopen(clp, op, nmp, cred, p);
1770 			if (ret && !printed) {
1771 				printed = 1;
1772 				printf("nfsv4 expired locks lost\n");
1773 			}
1774 		}
1775 		if (LIST_EMPTY(&owp->nfsow_open))
1776 			nfscl_freeopenowner(owp, 0);
1777 	}
1778 }
1779 
1780 /*
1781  * This function must be called after the process represented by "own" has
1782  * exited. Must be called with CLSTATE lock held.
1783  */
1784 static void
1785 nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1786 {
1787 	struct nfsclowner *owp, *nowp;
1788 	struct nfscllockowner *lp, *nlp;
1789 	struct nfscldeleg *dp;
1790 
1791 	/* First, get rid of local locks on delegations. */
1792 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1793 		LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1794 		    if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1795 			if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1796 			    panic("nfscllckw");
1797 			nfscl_freelockowner(lp, 1);
1798 		    }
1799 		}
1800 	}
1801 	owp = LIST_FIRST(&clp->nfsc_owner);
1802 	while (owp != NULL) {
1803 		nowp = LIST_NEXT(owp, nfsow_list);
1804 		if (!NFSBCMP(owp->nfsow_owner, own,
1805 		    NFSV4CL_LOCKNAMELEN)) {
1806 			/*
1807 			 * If there are children that haven't closed the
1808 			 * file descriptors yet, the opens will still be
1809 			 * here. For that case, let the renew thread clear
1810 			 * out the OpenOwner later.
1811 			 */
1812 			if (LIST_EMPTY(&owp->nfsow_open))
1813 				nfscl_freeopenowner(owp, 0);
1814 			else
1815 				owp->nfsow_defunct = 1;
1816 		}
1817 		owp = nowp;
1818 	}
1819 }
1820 
1821 /*
1822  * Find open/lock owners for processes that have exited.
1823  */
1824 static void
1825 nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1826 {
1827 	struct nfsclowner *owp, *nowp;
1828 	struct nfsclopen *op;
1829 	struct nfscllockowner *lp, *nlp;
1830 	struct nfscldeleg *dp;
1831 
1832 	/*
1833 	 * All the pidhash locks must be acquired, since they are sx locks
1834 	 * and must be acquired before the mutexes.  The pid(s) that will
1835 	 * be used aren't known yet, so all the locks need to be acquired.
1836 	 * Fortunately, this function is only performed once/sec.
1837 	 */
1838 	pidhash_slockall();
1839 	NFSLOCKCLSTATE();
1840 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1841 		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1842 			LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1843 				if (LIST_EMPTY(&lp->nfsl_lock))
1844 					nfscl_emptylockowner(lp, lhp);
1845 			}
1846 		}
1847 		if (nfscl_procdoesntexist(owp->nfsow_owner))
1848 			nfscl_cleanup_common(clp, owp->nfsow_owner);
1849 	}
1850 
1851 	/*
1852 	 * For the single open_owner case, these lock owners need to be
1853 	 * checked to see if they still exist separately.
1854 	 * This is because nfscl_procdoesntexist() never returns true for
1855 	 * the single open_owner so that the above doesn't ever call
1856 	 * nfscl_cleanup_common().
1857 	 */
1858 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1859 		LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1860 			if (nfscl_procdoesntexist(lp->nfsl_owner))
1861 				nfscl_cleanup_common(clp, lp->nfsl_owner);
1862 		}
1863 	}
1864 	NFSUNLOCKCLSTATE();
1865 	pidhash_sunlockall();
1866 }
1867 
1868 /*
1869  * Take the empty lock owner and move it to the local lhp list if the
1870  * associated process no longer exists.
1871  */
1872 static void
1873 nfscl_emptylockowner(struct nfscllockowner *lp,
1874     struct nfscllockownerfhhead *lhp)
1875 {
1876 	struct nfscllockownerfh *lfhp, *mylfhp;
1877 	struct nfscllockowner *nlp;
1878 	int fnd_it;
1879 
1880 	/* If not a Posix lock owner, just return. */
1881 	if ((lp->nfsl_lockflags & F_POSIX) == 0)
1882 		return;
1883 
1884 	fnd_it = 0;
1885 	mylfhp = NULL;
1886 	/*
1887 	 * First, search to see if this lock owner is already in the list.
1888 	 * If it is, then the associated process no longer exists.
1889 	 */
1890 	SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1891 		if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1892 		    !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1893 		    lfhp->nfslfh_len))
1894 			mylfhp = lfhp;
1895 		LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1896 			if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1897 			    NFSV4CL_LOCKNAMELEN))
1898 				fnd_it = 1;
1899 	}
1900 	/* If not found, check if process still exists. */
1901 	if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1902 		return;
1903 
1904 	/* Move the lock owner over to the local list. */
1905 	if (mylfhp == NULL) {
1906 		mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1907 		    M_NOWAIT);
1908 		if (mylfhp == NULL)
1909 			return;
1910 		mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1911 		NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1912 		    mylfhp->nfslfh_len);
1913 		LIST_INIT(&mylfhp->nfslfh_lock);
1914 		SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1915 	}
1916 	LIST_REMOVE(lp, nfsl_list);
1917 	LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1918 }
1919 
1920 static int	fake_global;	/* Used to force visibility of MNTK_UNMOUNTF */
1921 /*
1922  * Called from nfs umount to free up the clientid.
1923  */
1924 void
1925 nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p)
1926 {
1927 	struct nfsclclient *clp;
1928 	struct ucred *cred;
1929 	int igotlock;
1930 
1931 	/*
1932 	 * For the case that matters, this is the thread that set
1933 	 * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1934 	 * done to ensure that any thread executing nfscl_getcl() after
1935 	 * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1936 	 * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1937 	 * explanation, courtesy of Alan Cox.
1938 	 * What follows is a snippet from Alan Cox's email at:
1939 	 * https://docs.FreeBSD.org/cgi/mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1940 	 *
1941 	 * 1. Set MNTK_UNMOUNTF
1942 	 * 2. Acquire a standard FreeBSD mutex "m".
1943 	 * 3. Update some data structures.
1944 	 * 4. Release mutex "m".
1945 	 *
1946 	 * Then, other threads that acquire "m" after step 4 has occurred will
1947 	 * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1948 	 * step 2 may or may not see MNTK_UNMOUNTF as set.
1949 	 */
1950 	NFSLOCKCLSTATE();
1951 	if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1952 		fake_global++;
1953 		NFSUNLOCKCLSTATE();
1954 		NFSLOCKCLSTATE();
1955 	}
1956 
1957 	clp = nmp->nm_clp;
1958 	if (clp != NULL) {
1959 		if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
1960 			panic("nfscl umount");
1961 
1962 		/*
1963 		 * First, handshake with the nfscl renew thread, to terminate
1964 		 * it.
1965 		 */
1966 		clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
1967 		while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
1968 			(void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
1969 			    "nfsclumnt", hz);
1970 
1971 		/*
1972 		 * Now, get the exclusive lock on the client state, so
1973 		 * that no uses of the state are still in progress.
1974 		 */
1975 		do {
1976 			igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1977 			    NFSCLSTATEMUTEXPTR, NULL);
1978 		} while (!igotlock);
1979 		NFSUNLOCKCLSTATE();
1980 
1981 		/*
1982 		 * Free up all the state. It will expire on the server, but
1983 		 * maybe we should do a SetClientId/SetClientIdConfirm so
1984 		 * the server throws it away?
1985 		 */
1986 		LIST_REMOVE(clp, nfsc_list);
1987 		nfscl_delegreturnall(clp, p);
1988 		cred = newnfs_getcred();
1989 		if (NFSHASNFSV4N(nmp)) {
1990 			(void)nfsrpc_destroysession(nmp, clp, cred, p);
1991 			(void)nfsrpc_destroyclient(nmp, clp, cred, p);
1992 		} else
1993 			(void)nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
1994 		nfscl_cleanclient(clp);
1995 		nmp->nm_clp = NULL;
1996 		NFSFREECRED(cred);
1997 		free(clp, M_NFSCLCLIENT);
1998 	} else
1999 		NFSUNLOCKCLSTATE();
2000 }
2001 
2002 /*
2003  * This function is called when a server replies with NFSERR_STALECLIENTID
2004  * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
2005  * doing Opens and Locks with reclaim. If these fail, it deletes the
2006  * corresponding state.
2007  */
2008 static void
2009 nfscl_recover(struct nfsclclient *clp, bool *retokp, struct ucred *cred,
2010     NFSPROC_T *p)
2011 {
2012 	struct nfsclowner *owp, *nowp;
2013 	struct nfsclopen *op, *nop;
2014 	struct nfscllockowner *lp, *nlp;
2015 	struct nfscllock *lop, *nlop;
2016 	struct nfscldeleg *dp, *ndp, *tdp;
2017 	struct nfsmount *nmp;
2018 	struct ucred *tcred;
2019 	struct nfsclopenhead extra_open;
2020 	struct nfscldeleghead extra_deleg;
2021 	struct nfsreq *rep;
2022 	u_int64_t len;
2023 	u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
2024 	int i, igotlock = 0, error, trycnt, firstlock;
2025 	struct nfscllayout *lyp, *nlyp;
2026 	bool recovered_one;
2027 
2028 	/*
2029 	 * First, lock the client structure, so everyone else will
2030 	 * block when trying to use state.
2031 	 */
2032 	NFSLOCKCLSTATE();
2033 	clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2034 	do {
2035 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2036 		    NFSCLSTATEMUTEXPTR, NULL);
2037 	} while (!igotlock);
2038 	NFSUNLOCKCLSTATE();
2039 
2040 	nmp = clp->nfsc_nmp;
2041 	if (nmp == NULL)
2042 		panic("nfscl recover");
2043 
2044 	/*
2045 	 * For now, just get rid of all layouts. There may be a need
2046 	 * to do LayoutCommit Ops with reclaim == true later.
2047 	 */
2048 	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2049 		nfscl_freelayout(lyp);
2050 	TAILQ_INIT(&clp->nfsc_layout);
2051 	for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2052 		LIST_INIT(&clp->nfsc_layouthash[i]);
2053 
2054 	trycnt = 5;
2055 	tcred = NULL;
2056 	do {
2057 		error = nfsrpc_setclient(nmp, clp, 1, retokp, cred, p);
2058 	} while ((error == NFSERR_STALECLIENTID ||
2059 	     error == NFSERR_BADSESSION ||
2060 	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2061 	if (error) {
2062 		NFSLOCKCLSTATE();
2063 		clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2064 		    NFSCLFLAGS_RECVRINPROG);
2065 		wakeup(&clp->nfsc_flags);
2066 		nfsv4_unlock(&clp->nfsc_lock, 0);
2067 		NFSUNLOCKCLSTATE();
2068 		return;
2069 	}
2070 	clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2071 	clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2072 
2073 	/*
2074 	 * Mark requests already queued on the server, so that they don't
2075 	 * initiate another recovery cycle. Any requests already in the
2076 	 * queue that handle state information will have the old stale
2077 	 * clientid/stateid and will get a NFSERR_STALESTATEID,
2078 	 * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2079 	 * This will be translated to NFSERR_STALEDONTRECOVER when
2080 	 * R_DONTRECOVER is set.
2081 	 */
2082 	NFSLOCKREQ();
2083 	TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2084 		if (rep->r_nmp == nmp)
2085 			rep->r_flags |= R_DONTRECOVER;
2086 	}
2087 	NFSUNLOCKREQ();
2088 
2089 	/*
2090 	 * If nfsrpc_setclient() returns *retokp == true,
2091 	 * no more recovery is needed.
2092 	 */
2093 	if (*retokp)
2094 		goto out;
2095 
2096 	/*
2097 	 * Now, mark all delegations "need reclaim".
2098 	 */
2099 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2100 		dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2101 
2102 	TAILQ_INIT(&extra_deleg);
2103 	LIST_INIT(&extra_open);
2104 	/*
2105 	 * Now traverse the state lists, doing Open and Lock Reclaims.
2106 	 */
2107 	tcred = newnfs_getcred();
2108 	recovered_one = false;
2109 	owp = LIST_FIRST(&clp->nfsc_owner);
2110 	while (owp != NULL) {
2111 	    nowp = LIST_NEXT(owp, nfsow_list);
2112 	    owp->nfsow_seqid = 0;
2113 	    op = LIST_FIRST(&owp->nfsow_open);
2114 	    while (op != NULL) {
2115 		nop = LIST_NEXT(op, nfso_list);
2116 		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2117 		    /* Search for a delegation to reclaim with the open */
2118 		    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2119 			if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2120 			    continue;
2121 			if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2122 			    mode = NFSV4OPEN_ACCESSWRITE;
2123 			    delegtype = NFSV4OPEN_DELEGATEWRITE;
2124 			} else {
2125 			    mode = NFSV4OPEN_ACCESSREAD;
2126 			    delegtype = NFSV4OPEN_DELEGATEREAD;
2127 			}
2128 			if ((op->nfso_mode & mode) == mode &&
2129 			    op->nfso_fhlen == dp->nfsdl_fhlen &&
2130 			    !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2131 			    break;
2132 		    }
2133 		    ndp = dp;
2134 		    if (dp == NULL)
2135 			delegtype = NFSV4OPEN_DELEGATENONE;
2136 		    newnfs_copycred(&op->nfso_cred, tcred);
2137 		    error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2138 			op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2139 			op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2140 			tcred, p);
2141 		    if (!error) {
2142 			recovered_one = true;
2143 			/* Handle any replied delegation */
2144 			if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2145 			    || NFSMNT_RDONLY(nmp->nm_mountp))) {
2146 			    if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2147 				mode = NFSV4OPEN_ACCESSWRITE;
2148 			    else
2149 				mode = NFSV4OPEN_ACCESSREAD;
2150 			    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2151 				if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2152 				    continue;
2153 				if ((op->nfso_mode & mode) == mode &&
2154 				    op->nfso_fhlen == dp->nfsdl_fhlen &&
2155 				    !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2156 				    op->nfso_fhlen)) {
2157 				    dp->nfsdl_stateid = ndp->nfsdl_stateid;
2158 				    dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2159 				    dp->nfsdl_ace = ndp->nfsdl_ace;
2160 				    dp->nfsdl_change = ndp->nfsdl_change;
2161 				    dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2162 				    if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2163 					dp->nfsdl_flags |= NFSCLDL_RECALL;
2164 				    free(ndp, M_NFSCLDELEG);
2165 				    ndp = NULL;
2166 				    break;
2167 				}
2168 			    }
2169 			}
2170 			if (ndp != NULL)
2171 			    TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2172 
2173 			/* and reclaim all byte range locks */
2174 			lp = LIST_FIRST(&op->nfso_lock);
2175 			while (lp != NULL) {
2176 			    nlp = LIST_NEXT(lp, nfsl_list);
2177 			    lp->nfsl_seqid = 0;
2178 			    firstlock = 1;
2179 			    lop = LIST_FIRST(&lp->nfsl_lock);
2180 			    while (lop != NULL) {
2181 				nlop = LIST_NEXT(lop, nfslo_list);
2182 				if (lop->nfslo_end == NFS64BITSSET)
2183 				    len = NFS64BITSSET;
2184 				else
2185 				    len = lop->nfslo_end - lop->nfslo_first;
2186 				error = nfscl_trylock(nmp, NULL,
2187 				    op->nfso_fh, op->nfso_fhlen, lp,
2188 				    firstlock, 1, lop->nfslo_first, len,
2189 				    lop->nfslo_type, tcred, p);
2190 				if (error != 0)
2191 				    nfscl_freelock(lop, 0);
2192 				else
2193 				    firstlock = 0;
2194 				lop = nlop;
2195 			    }
2196 			    /* If no locks, but a lockowner, just delete it. */
2197 			    if (LIST_EMPTY(&lp->nfsl_lock))
2198 				nfscl_freelockowner(lp, 0);
2199 			    lp = nlp;
2200 			}
2201 		    } else if (error == NFSERR_NOGRACE && !recovered_one &&
2202 			NFSHASNFSV4N(nmp)) {
2203 			/*
2204 			 * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2205 			 * actually end up here, since the client will do
2206 			 * a recovery for NFSERR_BADSESSION, but will get
2207 			 * an NFSERR_NOGRACE reply for the first "reclaim"
2208 			 * attempt.
2209 			 * So, call nfscl_expireclient() to recover the
2210 			 * opens as best we can and then do a reclaim
2211 			 * complete and return.
2212 			 */
2213 			nfsrpc_reclaimcomplete(nmp, cred, p);
2214 			nfscl_expireclient(clp, nmp, tcred, p);
2215 			goto out;
2216 		    }
2217 		}
2218 		if (error != 0 && error != NFSERR_BADSESSION)
2219 		    nfscl_freeopen(op, 0);
2220 		op = nop;
2221 	    }
2222 	    owp = nowp;
2223 	}
2224 
2225 	/*
2226 	 * Now, try and get any delegations not yet reclaimed by cobbling
2227 	 * to-gether an appropriate open.
2228 	 */
2229 	nowp = NULL;
2230 	dp = TAILQ_FIRST(&clp->nfsc_deleg);
2231 	while (dp != NULL) {
2232 	    ndp = TAILQ_NEXT(dp, nfsdl_list);
2233 	    if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2234 		if (nowp == NULL) {
2235 		    nowp = malloc(
2236 			sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2237 		    /*
2238 		     * Name must be as long an largest possible
2239 		     * NFSV4CL_LOCKNAMELEN. 12 for now.
2240 		     */
2241 		    NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2242 			NFSV4CL_LOCKNAMELEN);
2243 		    LIST_INIT(&nowp->nfsow_open);
2244 		    nowp->nfsow_clp = clp;
2245 		    nowp->nfsow_seqid = 0;
2246 		    nowp->nfsow_defunct = 0;
2247 		    nfscl_lockinit(&nowp->nfsow_rwlock);
2248 		}
2249 		nop = NULL;
2250 		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2251 		    nop = malloc(sizeof (struct nfsclopen) +
2252 			dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2253 		    nop->nfso_own = nowp;
2254 		    if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2255 			nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2256 			delegtype = NFSV4OPEN_DELEGATEWRITE;
2257 		    } else {
2258 			nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2259 			delegtype = NFSV4OPEN_DELEGATEREAD;
2260 		    }
2261 		    nop->nfso_opencnt = 0;
2262 		    nop->nfso_posixlock = 1;
2263 		    nop->nfso_fhlen = dp->nfsdl_fhlen;
2264 		    NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2265 		    LIST_INIT(&nop->nfso_lock);
2266 		    nop->nfso_stateid.seqid = 0;
2267 		    nop->nfso_stateid.other[0] = 0;
2268 		    nop->nfso_stateid.other[1] = 0;
2269 		    nop->nfso_stateid.other[2] = 0;
2270 		    newnfs_copycred(&dp->nfsdl_cred, tcred);
2271 		    newnfs_copyincred(tcred, &nop->nfso_cred);
2272 		    tdp = NULL;
2273 		    error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2274 			nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2275 			nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2276 			delegtype, tcred, p);
2277 		    if (tdp != NULL) {
2278 			if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2279 			    mode = NFSV4OPEN_ACCESSWRITE;
2280 			else
2281 			    mode = NFSV4OPEN_ACCESSREAD;
2282 			if ((nop->nfso_mode & mode) == mode &&
2283 			    nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2284 			    !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2285 			    nop->nfso_fhlen)) {
2286 			    dp->nfsdl_stateid = tdp->nfsdl_stateid;
2287 			    dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2288 			    dp->nfsdl_ace = tdp->nfsdl_ace;
2289 			    dp->nfsdl_change = tdp->nfsdl_change;
2290 			    dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2291 			    if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2292 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2293 			    free(tdp, M_NFSCLDELEG);
2294 			} else {
2295 			    TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2296 			}
2297 		    }
2298 		}
2299 		if (error) {
2300 		    if (nop != NULL)
2301 			free(nop, M_NFSCLOPEN);
2302 		    if (error == NFSERR_NOGRACE && !recovered_one &&
2303 			NFSHASNFSV4N(nmp)) {
2304 			/*
2305 			 * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2306 			 * actually end up here, since the client will do
2307 			 * a recovery for NFSERR_BADSESSION, but will get
2308 			 * an NFSERR_NOGRACE reply for the first "reclaim"
2309 			 * attempt.
2310 			 * So, call nfscl_expireclient() to recover the
2311 			 * opens as best we can and then do a reclaim
2312 			 * complete and return.
2313 			 */
2314 			nfsrpc_reclaimcomplete(nmp, cred, p);
2315 			nfscl_expireclient(clp, nmp, tcred, p);
2316 			free(nowp, M_NFSCLOWNER);
2317 			goto out;
2318 		    }
2319 		    /*
2320 		     * Couldn't reclaim it, so throw the state
2321 		     * away. Ouch!!
2322 		     */
2323 		    nfscl_cleandeleg(dp);
2324 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
2325 		} else {
2326 		    recovered_one = true;
2327 		    LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2328 		}
2329 	    }
2330 	    dp = ndp;
2331 	}
2332 
2333 	/*
2334 	 * Now, get rid of extra Opens and Delegations.
2335 	 */
2336 	LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2337 		do {
2338 			newnfs_copycred(&op->nfso_cred, tcred);
2339 			error = nfscl_tryclose(op, tcred, nmp, p);
2340 			if (error == NFSERR_GRACE)
2341 				(void) nfs_catnap(PZERO, error, "nfsexcls");
2342 		} while (error == NFSERR_GRACE);
2343 		LIST_REMOVE(op, nfso_list);
2344 		free(op, M_NFSCLOPEN);
2345 	}
2346 	if (nowp != NULL)
2347 		free(nowp, M_NFSCLOWNER);
2348 
2349 	TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2350 		do {
2351 			newnfs_copycred(&dp->nfsdl_cred, tcred);
2352 			error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2353 			if (error == NFSERR_GRACE)
2354 				(void) nfs_catnap(PZERO, error, "nfsexdlg");
2355 		} while (error == NFSERR_GRACE);
2356 		TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2357 		free(dp, M_NFSCLDELEG);
2358 	}
2359 
2360 	/* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2361 	if (NFSHASNFSV4N(nmp))
2362 		(void)nfsrpc_reclaimcomplete(nmp, cred, p);
2363 
2364 out:
2365 	NFSLOCKCLSTATE();
2366 	clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2367 	wakeup(&clp->nfsc_flags);
2368 	nfsv4_unlock(&clp->nfsc_lock, 0);
2369 	NFSUNLOCKCLSTATE();
2370 	if (tcred != NULL)
2371 		NFSFREECRED(tcred);
2372 }
2373 
2374 /*
2375  * This function is called when a server replies with NFSERR_EXPIRED.
2376  * It deletes all state for the client and does a fresh SetClientId/confirm.
2377  * XXX Someday it should post a signal to the process(es) that hold the
2378  * state, so they know that lock state has been lost.
2379  */
2380 int
2381 nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2382 {
2383 	struct nfsmount *nmp;
2384 	struct ucred *cred;
2385 	int igotlock = 0, error, trycnt;
2386 
2387 	/*
2388 	 * If the clientid has gone away or a new SetClientid has already
2389 	 * been done, just return ok.
2390 	 */
2391 	if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2392 		return (0);
2393 
2394 	/*
2395 	 * First, lock the client structure, so everyone else will
2396 	 * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2397 	 * that only one thread does the work.
2398 	 */
2399 	NFSLOCKCLSTATE();
2400 	clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2401 	do {
2402 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2403 		    NFSCLSTATEMUTEXPTR, NULL);
2404 	} while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2405 	if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2406 		if (igotlock)
2407 			nfsv4_unlock(&clp->nfsc_lock, 0);
2408 		NFSUNLOCKCLSTATE();
2409 		return (0);
2410 	}
2411 	clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2412 	NFSUNLOCKCLSTATE();
2413 
2414 	nmp = clp->nfsc_nmp;
2415 	if (nmp == NULL)
2416 		panic("nfscl expired");
2417 	cred = newnfs_getcred();
2418 	trycnt = 5;
2419 	do {
2420 		error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2421 	} while ((error == NFSERR_STALECLIENTID ||
2422 	     error == NFSERR_BADSESSION ||
2423 	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2424 	if (error) {
2425 		NFSLOCKCLSTATE();
2426 		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2427 	} else {
2428 		/*
2429 		 * Expire the state for the client.
2430 		 */
2431 		nfscl_expireclient(clp, nmp, cred, p);
2432 		NFSLOCKCLSTATE();
2433 		clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2434 		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2435 	}
2436 	clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2437 	wakeup(&clp->nfsc_flags);
2438 	nfsv4_unlock(&clp->nfsc_lock, 0);
2439 	NFSUNLOCKCLSTATE();
2440 	NFSFREECRED(cred);
2441 	return (error);
2442 }
2443 
2444 /*
2445  * This function inserts a lock in the list after insert_lop.
2446  */
2447 static void
2448 nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2449     struct nfscllock *insert_lop, int local)
2450 {
2451 
2452 	if ((struct nfscllockowner *)insert_lop == lp)
2453 		LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2454 	else
2455 		LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2456 	if (local)
2457 		nfsstatsv1.cllocallocks++;
2458 	else
2459 		nfsstatsv1.cllocks++;
2460 }
2461 
2462 /*
2463  * This function updates the locking for a lock owner and given file. It
2464  * maintains a list of lock ranges ordered on increasing file offset that
2465  * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2466  * It always adds new_lop to the list and sometimes uses the one pointed
2467  * at by other_lopp.
2468  * Returns 1 if the locks were modified, 0 otherwise.
2469  */
2470 static int
2471 nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2472     struct nfscllock **other_lopp, int local)
2473 {
2474 	struct nfscllock *new_lop = *new_lopp;
2475 	struct nfscllock *lop, *tlop, *ilop;
2476 	struct nfscllock *other_lop;
2477 	int unlock = 0, modified = 0;
2478 	u_int64_t tmp;
2479 
2480 	/*
2481 	 * Work down the list until the lock is merged.
2482 	 */
2483 	if (new_lop->nfslo_type == F_UNLCK)
2484 		unlock = 1;
2485 	ilop = (struct nfscllock *)lp;
2486 	lop = LIST_FIRST(&lp->nfsl_lock);
2487 	while (lop != NULL) {
2488 	    /*
2489 	     * Only check locks for this file that aren't before the start of
2490 	     * new lock's range.
2491 	     */
2492 	    if (lop->nfslo_end >= new_lop->nfslo_first) {
2493 		if (new_lop->nfslo_end < lop->nfslo_first) {
2494 		    /*
2495 		     * If the new lock ends before the start of the
2496 		     * current lock's range, no merge, just insert
2497 		     * the new lock.
2498 		     */
2499 		    break;
2500 		}
2501 		if (new_lop->nfslo_type == lop->nfslo_type ||
2502 		    (new_lop->nfslo_first <= lop->nfslo_first &&
2503 		     new_lop->nfslo_end >= lop->nfslo_end)) {
2504 		    /*
2505 		     * This lock can be absorbed by the new lock/unlock.
2506 		     * This happens when it covers the entire range
2507 		     * of the old lock or is contiguous
2508 		     * with the old lock and is of the same type or an
2509 		     * unlock.
2510 		     */
2511 		    if (new_lop->nfslo_type != lop->nfslo_type ||
2512 			new_lop->nfslo_first != lop->nfslo_first ||
2513 			new_lop->nfslo_end != lop->nfslo_end)
2514 			modified = 1;
2515 		    if (lop->nfslo_first < new_lop->nfslo_first)
2516 			new_lop->nfslo_first = lop->nfslo_first;
2517 		    if (lop->nfslo_end > new_lop->nfslo_end)
2518 			new_lop->nfslo_end = lop->nfslo_end;
2519 		    tlop = lop;
2520 		    lop = LIST_NEXT(lop, nfslo_list);
2521 		    nfscl_freelock(tlop, local);
2522 		    continue;
2523 		}
2524 
2525 		/*
2526 		 * All these cases are for contiguous locks that are not the
2527 		 * same type, so they can't be merged.
2528 		 */
2529 		if (new_lop->nfslo_first <= lop->nfslo_first) {
2530 		    /*
2531 		     * This case is where the new lock overlaps with the
2532 		     * first part of the old lock. Move the start of the
2533 		     * old lock to just past the end of the new lock. The
2534 		     * new lock will be inserted in front of the old, since
2535 		     * ilop hasn't been updated. (We are done now.)
2536 		     */
2537 		    if (lop->nfslo_first != new_lop->nfslo_end) {
2538 			lop->nfslo_first = new_lop->nfslo_end;
2539 			modified = 1;
2540 		    }
2541 		    break;
2542 		}
2543 		if (new_lop->nfslo_end >= lop->nfslo_end) {
2544 		    /*
2545 		     * This case is where the new lock overlaps with the
2546 		     * end of the old lock's range. Move the old lock's
2547 		     * end to just before the new lock's first and insert
2548 		     * the new lock after the old lock.
2549 		     * Might not be done yet, since the new lock could
2550 		     * overlap further locks with higher ranges.
2551 		     */
2552 		    if (lop->nfslo_end != new_lop->nfslo_first) {
2553 			lop->nfslo_end = new_lop->nfslo_first;
2554 			modified = 1;
2555 		    }
2556 		    ilop = lop;
2557 		    lop = LIST_NEXT(lop, nfslo_list);
2558 		    continue;
2559 		}
2560 		/*
2561 		 * The final case is where the new lock's range is in the
2562 		 * middle of the current lock's and splits the current lock
2563 		 * up. Use *other_lopp to handle the second part of the
2564 		 * split old lock range. (We are done now.)
2565 		 * For unlock, we use new_lop as other_lop and tmp, since
2566 		 * other_lop and new_lop are the same for this case.
2567 		 * We noted the unlock case above, so we don't need
2568 		 * new_lop->nfslo_type any longer.
2569 		 */
2570 		tmp = new_lop->nfslo_first;
2571 		if (unlock) {
2572 		    other_lop = new_lop;
2573 		    *new_lopp = NULL;
2574 		} else {
2575 		    other_lop = *other_lopp;
2576 		    *other_lopp = NULL;
2577 		}
2578 		other_lop->nfslo_first = new_lop->nfslo_end;
2579 		other_lop->nfslo_end = lop->nfslo_end;
2580 		other_lop->nfslo_type = lop->nfslo_type;
2581 		lop->nfslo_end = tmp;
2582 		nfscl_insertlock(lp, other_lop, lop, local);
2583 		ilop = lop;
2584 		modified = 1;
2585 		break;
2586 	    }
2587 	    ilop = lop;
2588 	    lop = LIST_NEXT(lop, nfslo_list);
2589 	    if (lop == NULL)
2590 		break;
2591 	}
2592 
2593 	/*
2594 	 * Insert the new lock in the list at the appropriate place.
2595 	 */
2596 	if (!unlock) {
2597 		nfscl_insertlock(lp, new_lop, ilop, local);
2598 		*new_lopp = NULL;
2599 		modified = 1;
2600 	}
2601 	return (modified);
2602 }
2603 
2604 /*
2605  * This function must be run as a kernel thread.
2606  * It does Renew Ops and recovery, when required.
2607  */
2608 void
2609 nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2610 {
2611 	struct nfsclowner *owp, *nowp;
2612 	struct nfsclopen *op;
2613 	struct nfscllockowner *lp, *nlp;
2614 	struct nfscldeleghead dh;
2615 	struct nfscldeleg *dp, *ndp;
2616 	struct ucred *cred;
2617 	u_int32_t clidrev;
2618 	int error, cbpathdown, islept, igotlock, ret, clearok;
2619 	uint32_t recover_done_time = 0;
2620 	time_t mytime;
2621 	static time_t prevsec = 0;
2622 	struct nfscllockownerfh *lfhp, *nlfhp;
2623 	struct nfscllockownerfhhead lfh;
2624 	struct nfscllayout *lyp, *nlyp;
2625 	struct nfscldevinfo *dip, *ndip;
2626 	struct nfscllayouthead rlh;
2627 	struct nfsclrecalllayout *recallp;
2628 	struct nfsclds *dsp;
2629 	bool retok;
2630 	struct mount *mp;
2631 	vnode_t vp;
2632 
2633 	cred = newnfs_getcred();
2634 	NFSLOCKCLSTATE();
2635 	clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2636 	mp = clp->nfsc_nmp->nm_mountp;
2637 	NFSUNLOCKCLSTATE();
2638 	for(;;) {
2639 		newnfs_setroot(cred);
2640 		cbpathdown = 0;
2641 		if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2642 			/*
2643 			 * Only allow one full recover within 1/2 of the lease
2644 			 * duration (nfsc_renew).
2645 			 * retok is value/result.  If passed in set to true,
2646 			 * it indicates only a CreateSession operation should
2647 			 * be attempted.
2648 			 * If it is returned true, it indicates that the
2649 			 * recovery only required a CreateSession.
2650 			 */
2651 			retok = true;
2652 			if (recover_done_time < NFSD_MONOSEC) {
2653 				recover_done_time = NFSD_MONOSEC +
2654 				    clp->nfsc_renew;
2655 				retok = false;
2656 			}
2657 			NFSCL_DEBUG(1, "Doing recovery, only "
2658 			    "createsession=%d\n", retok);
2659 			nfscl_recover(clp, &retok, cred, p);
2660 		}
2661 		if (clp->nfsc_expire <= NFSD_MONOSEC &&
2662 		    (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2663 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2664 			clidrev = clp->nfsc_clientidrev;
2665 			error = nfsrpc_renew(clp, NULL, cred, p);
2666 			if (error == NFSERR_CBPATHDOWN)
2667 			    cbpathdown = 1;
2668 			else if (error == NFSERR_STALECLIENTID ||
2669 			    error == NFSERR_BADSESSION) {
2670 			    NFSLOCKCLSTATE();
2671 			    clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2672 			    NFSUNLOCKCLSTATE();
2673 			} else if (error == NFSERR_EXPIRED)
2674 			    (void) nfscl_hasexpired(clp, clidrev, p);
2675 		}
2676 
2677 checkdsrenew:
2678 		if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2679 			/* Do renews for any DS sessions. */
2680 			NFSLOCKMNT(clp->nfsc_nmp);
2681 			/* Skip first entry, since the MDS is handled above. */
2682 			dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2683 			if (dsp != NULL)
2684 				dsp = TAILQ_NEXT(dsp, nfsclds_list);
2685 			while (dsp != NULL) {
2686 				if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2687 				    dsp->nfsclds_sess.nfsess_defunct == 0) {
2688 					dsp->nfsclds_expire = NFSD_MONOSEC +
2689 					    clp->nfsc_renew;
2690 					NFSUNLOCKMNT(clp->nfsc_nmp);
2691 					(void)nfsrpc_renew(clp, dsp, cred, p);
2692 					goto checkdsrenew;
2693 				}
2694 				dsp = TAILQ_NEXT(dsp, nfsclds_list);
2695 			}
2696 			NFSUNLOCKMNT(clp->nfsc_nmp);
2697 		}
2698 
2699 		TAILQ_INIT(&dh);
2700 		NFSLOCKCLSTATE();
2701 		if (cbpathdown)
2702 			/* It's a Total Recall! */
2703 			nfscl_totalrecall(clp);
2704 
2705 		/*
2706 		 * Now, handle defunct owners.
2707 		 */
2708 		LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2709 			if (LIST_EMPTY(&owp->nfsow_open)) {
2710 				if (owp->nfsow_defunct != 0)
2711 					nfscl_freeopenowner(owp, 0);
2712 			}
2713 		}
2714 
2715 		/*
2716 		 * Do the recall on any delegations. To avoid trouble, always
2717 		 * come back up here after having slept.
2718 		 */
2719 		igotlock = 0;
2720 tryagain:
2721 		dp = TAILQ_FIRST(&clp->nfsc_deleg);
2722 		while (dp != NULL) {
2723 			ndp = TAILQ_NEXT(dp, nfsdl_list);
2724 			if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2725 				/*
2726 				 * Wait for outstanding I/O ops to be done.
2727 				 */
2728 				if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2729 				    if (igotlock) {
2730 					nfsv4_unlock(&clp->nfsc_lock, 0);
2731 					igotlock = 0;
2732 				    }
2733 				    dp->nfsdl_rwlock.nfslock_lock |=
2734 					NFSV4LOCK_WANTED;
2735 				    msleep(&dp->nfsdl_rwlock,
2736 					NFSCLSTATEMUTEXPTR, PVFS, "nfscld",
2737 					5 * hz);
2738 				    if (NFSCL_FORCEDISM(mp))
2739 					goto terminate;
2740 				    goto tryagain;
2741 				}
2742 				while (!igotlock) {
2743 				    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2744 					&islept, NFSCLSTATEMUTEXPTR, mp);
2745 				    if (igotlock == 0 && NFSCL_FORCEDISM(mp))
2746 					goto terminate;
2747 				    if (islept)
2748 					goto tryagain;
2749 				}
2750 				NFSUNLOCKCLSTATE();
2751 				newnfs_copycred(&dp->nfsdl_cred, cred);
2752 				ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2753 				    NULL, cred, p, 1, &vp);
2754 				if (!ret) {
2755 				    nfscl_cleandeleg(dp);
2756 				    TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2757 					nfsdl_list);
2758 				    LIST_REMOVE(dp, nfsdl_hash);
2759 				    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2760 				    nfscl_delegcnt--;
2761 				    nfsstatsv1.cldelegates--;
2762 				}
2763 				NFSLOCKCLSTATE();
2764 				/*
2765 				 * The nfsc_lock must be released before doing
2766 				 * vrele(), since it might call nfs_inactive().
2767 				 * For the unlikely case where the vnode failed
2768 				 * to be acquired by nfscl_recalldeleg(), a
2769 				 * VOP_RECLAIM() should be in progress and it
2770 				 * will return the delegation.
2771 				 */
2772 				nfsv4_unlock(&clp->nfsc_lock, 0);
2773 				igotlock = 0;
2774 				if (vp != NULL) {
2775 					NFSUNLOCKCLSTATE();
2776 					vrele(vp);
2777 					NFSLOCKCLSTATE();
2778 				}
2779 				goto tryagain;
2780 			}
2781 			dp = ndp;
2782 		}
2783 
2784 		/*
2785 		 * Clear out old delegations, if we are above the high water
2786 		 * mark. Only clear out ones with no state related to them.
2787 		 * The tailq list is in LRU order.
2788 		 */
2789 		dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2790 		while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2791 		    ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2792 		    if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2793 			dp->nfsdl_rwlock.nfslock_lock == 0 &&
2794 			dp->nfsdl_timestamp < NFSD_MONOSEC &&
2795 			(dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2796 			  NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2797 			clearok = 1;
2798 			LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2799 			    op = LIST_FIRST(&owp->nfsow_open);
2800 			    if (op != NULL) {
2801 				clearok = 0;
2802 				break;
2803 			    }
2804 			}
2805 			if (clearok) {
2806 			    LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2807 				if (!LIST_EMPTY(&lp->nfsl_lock)) {
2808 				    clearok = 0;
2809 				    break;
2810 				}
2811 			    }
2812 			}
2813 			if (clearok) {
2814 			    TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2815 			    LIST_REMOVE(dp, nfsdl_hash);
2816 			    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2817 			    nfscl_delegcnt--;
2818 			    nfsstatsv1.cldelegates--;
2819 			}
2820 		    }
2821 		    dp = ndp;
2822 		}
2823 		if (igotlock)
2824 			nfsv4_unlock(&clp->nfsc_lock, 0);
2825 
2826 		/*
2827 		 * Do the recall on any layouts. To avoid trouble, always
2828 		 * come back up here after having slept.
2829 		 */
2830 		TAILQ_INIT(&rlh);
2831 tryagain2:
2832 		TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2833 			if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2834 				/*
2835 				 * Wait for outstanding I/O ops to be done.
2836 				 */
2837 				if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2838 				    (lyp->nfsly_lock.nfslock_lock &
2839 				     NFSV4LOCK_LOCK) != 0) {
2840 					lyp->nfsly_lock.nfslock_lock |=
2841 					    NFSV4LOCK_WANTED;
2842 					msleep(&lyp->nfsly_lock.nfslock_lock,
2843 					    NFSCLSTATEMUTEXPTR, PVFS, "nfslyp",
2844 					    5 * hz);
2845 					if (NFSCL_FORCEDISM(mp))
2846 					    goto terminate;
2847 					goto tryagain2;
2848 				}
2849 				/* Move the layout to the recall list. */
2850 				TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2851 				    nfsly_list);
2852 				LIST_REMOVE(lyp, nfsly_hash);
2853 				TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2854 
2855 				/* Handle any layout commits. */
2856 				if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2857 				    (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2858 					lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2859 					NFSUNLOCKCLSTATE();
2860 					NFSCL_DEBUG(3, "do layoutcommit\n");
2861 					nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2862 					    cred, p);
2863 					NFSLOCKCLSTATE();
2864 					goto tryagain2;
2865 				}
2866 			}
2867 		}
2868 
2869 		/* Now, look for stale layouts. */
2870 		lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2871 		while (lyp != NULL) {
2872 			nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2873 			if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2874 			    (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
2875 			    lyp->nfsly_lock.nfslock_usecnt == 0 &&
2876 			    lyp->nfsly_lock.nfslock_lock == 0) {
2877 				NFSCL_DEBUG(4, "ret stale lay=%d\n",
2878 				    nfscl_layoutcnt);
2879 				recallp = malloc(sizeof(*recallp),
2880 				    M_NFSLAYRECALL, M_NOWAIT);
2881 				if (recallp == NULL)
2882 					break;
2883 				(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2884 				    lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2885 				    lyp->nfsly_stateid.seqid, 0, 0, NULL,
2886 				    recallp);
2887 			}
2888 			lyp = nlyp;
2889 		}
2890 
2891 		/*
2892 		 * Free up any unreferenced device info structures.
2893 		 */
2894 		LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2895 			if (dip->nfsdi_layoutrefs == 0 &&
2896 			    dip->nfsdi_refcnt == 0) {
2897 				NFSCL_DEBUG(4, "freeing devinfo\n");
2898 				LIST_REMOVE(dip, nfsdi_list);
2899 				nfscl_freedevinfo(dip);
2900 			}
2901 		}
2902 		NFSUNLOCKCLSTATE();
2903 
2904 		/* Do layout return(s), as required. */
2905 		TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2906 			TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2907 			NFSCL_DEBUG(4, "ret layout\n");
2908 			nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2909 			nfscl_freelayout(lyp);
2910 		}
2911 
2912 		/*
2913 		 * Delegreturn any delegations cleaned out or recalled.
2914 		 */
2915 		TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2916 			newnfs_copycred(&dp->nfsdl_cred, cred);
2917 			(void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2918 			TAILQ_REMOVE(&dh, dp, nfsdl_list);
2919 			free(dp, M_NFSCLDELEG);
2920 		}
2921 
2922 		SLIST_INIT(&lfh);
2923 		/*
2924 		 * Call nfscl_cleanupkext() once per second to check for
2925 		 * open/lock owners where the process has exited.
2926 		 */
2927 		mytime = NFSD_MONOSEC;
2928 		if (prevsec != mytime) {
2929 			prevsec = mytime;
2930 			nfscl_cleanupkext(clp, &lfh);
2931 		}
2932 
2933 		/*
2934 		 * Do a ReleaseLockOwner for all lock owners where the
2935 		 * associated process no longer exists, as found by
2936 		 * nfscl_cleanupkext().
2937 		 */
2938 		newnfs_setroot(cred);
2939 		SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2940 			LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2941 			    nlp) {
2942 				(void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2943 				    lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2944 				    p);
2945 				nfscl_freelockowner(lp, 0);
2946 			}
2947 			free(lfhp, M_TEMP);
2948 		}
2949 		SLIST_INIT(&lfh);
2950 
2951 		NFSLOCKCLSTATE();
2952 		if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
2953 			(void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
2954 			    hz);
2955 terminate:
2956 		if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
2957 			clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
2958 			NFSUNLOCKCLSTATE();
2959 			NFSFREECRED(cred);
2960 			wakeup((caddr_t)clp);
2961 			return;
2962 		}
2963 		NFSUNLOCKCLSTATE();
2964 	}
2965 }
2966 
2967 /*
2968  * Initiate state recovery. Called when NFSERR_STALECLIENTID,
2969  * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
2970  */
2971 void
2972 nfscl_initiate_recovery(struct nfsclclient *clp)
2973 {
2974 
2975 	if (clp == NULL)
2976 		return;
2977 	NFSLOCKCLSTATE();
2978 	clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2979 	NFSUNLOCKCLSTATE();
2980 	wakeup((caddr_t)clp);
2981 }
2982 
2983 /*
2984  * Dump out the state stuff for debugging.
2985  */
2986 void
2987 nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
2988     int lockowner, int locks)
2989 {
2990 	struct nfsclclient *clp;
2991 	struct nfsclowner *owp;
2992 	struct nfsclopen *op;
2993 	struct nfscllockowner *lp;
2994 	struct nfscllock *lop;
2995 	struct nfscldeleg *dp;
2996 
2997 	clp = nmp->nm_clp;
2998 	if (clp == NULL) {
2999 		printf("nfscl dumpstate NULL clp\n");
3000 		return;
3001 	}
3002 	NFSLOCKCLSTATE();
3003 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
3004 	  LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3005 	    if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3006 		printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3007 		    owp->nfsow_owner[0], owp->nfsow_owner[1],
3008 		    owp->nfsow_owner[2], owp->nfsow_owner[3],
3009 		    owp->nfsow_seqid);
3010 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3011 		if (opens)
3012 		    printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3013 			op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3014 			op->nfso_stateid.other[2], op->nfso_opencnt,
3015 			op->nfso_fh[12]);
3016 		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3017 		    if (lockowner)
3018 			printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3019 			    lp->nfsl_owner[0], lp->nfsl_owner[1],
3020 			    lp->nfsl_owner[2], lp->nfsl_owner[3],
3021 			    lp->nfsl_seqid,
3022 			    lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3023 			    lp->nfsl_stateid.other[2]);
3024 		    LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3025 			if (locks)
3026 #ifdef __FreeBSD__
3027 			    printf("lck typ=%d fst=%ju end=%ju\n",
3028 				lop->nfslo_type, (intmax_t)lop->nfslo_first,
3029 				(intmax_t)lop->nfslo_end);
3030 #else
3031 			    printf("lck typ=%d fst=%qd end=%qd\n",
3032 				lop->nfslo_type, lop->nfslo_first,
3033 				lop->nfslo_end);
3034 #endif
3035 		    }
3036 		}
3037 	    }
3038 	  }
3039 	}
3040 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3041 	    if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3042 		printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3043 		    owp->nfsow_owner[0], owp->nfsow_owner[1],
3044 		    owp->nfsow_owner[2], owp->nfsow_owner[3],
3045 		    owp->nfsow_seqid);
3046 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3047 		if (opens)
3048 		    printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3049 			op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3050 			op->nfso_stateid.other[2], op->nfso_opencnt,
3051 			op->nfso_fh[12]);
3052 		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3053 		    if (lockowner)
3054 			printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3055 			    lp->nfsl_owner[0], lp->nfsl_owner[1],
3056 			    lp->nfsl_owner[2], lp->nfsl_owner[3],
3057 			    lp->nfsl_seqid,
3058 			    lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3059 			    lp->nfsl_stateid.other[2]);
3060 		    LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3061 			if (locks)
3062 #ifdef __FreeBSD__
3063 			    printf("lck typ=%d fst=%ju end=%ju\n",
3064 				lop->nfslo_type, (intmax_t)lop->nfslo_first,
3065 				(intmax_t)lop->nfslo_end);
3066 #else
3067 			    printf("lck typ=%d fst=%qd end=%qd\n",
3068 				lop->nfslo_type, lop->nfslo_first,
3069 				lop->nfslo_end);
3070 #endif
3071 		    }
3072 		}
3073 	    }
3074 	}
3075 	NFSUNLOCKCLSTATE();
3076 }
3077 
3078 /*
3079  * Check for duplicate open owners and opens.
3080  * (Only used as a diagnostic aid.)
3081  */
3082 void
3083 nfscl_dupopen(vnode_t vp, int dupopens)
3084 {
3085 	struct nfsclclient *clp;
3086 	struct nfsclowner *owp, *owp2;
3087 	struct nfsclopen *op, *op2;
3088 	struct nfsfh *nfhp;
3089 
3090 	clp = VFSTONFS(vp->v_mount)->nm_clp;
3091 	if (clp == NULL) {
3092 		printf("nfscl dupopen NULL clp\n");
3093 		return;
3094 	}
3095 	nfhp = VTONFS(vp)->n_fhp;
3096 	NFSLOCKCLSTATE();
3097 
3098 	/*
3099 	 * First, search for duplicate owners.
3100 	 * These should never happen!
3101 	 */
3102 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3103 	    LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3104 		if (owp != owp2 &&
3105 		    !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
3106 		    NFSV4CL_LOCKNAMELEN)) {
3107 			NFSUNLOCKCLSTATE();
3108 			printf("DUP OWNER\n");
3109 			nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3110 			return;
3111 		}
3112 	    }
3113 	}
3114 
3115 	/*
3116 	 * Now, search for duplicate stateids.
3117 	 * These shouldn't happen, either.
3118 	 */
3119 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3120 	    LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3121 		LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3122 		    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3123 			if (op != op2 &&
3124 			    (op->nfso_stateid.other[0] != 0 ||
3125 			     op->nfso_stateid.other[1] != 0 ||
3126 			     op->nfso_stateid.other[2] != 0) &&
3127 			    op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3128 			    op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3129 			    op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3130 			    NFSUNLOCKCLSTATE();
3131 			    printf("DUP STATEID\n");
3132 			    nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3133 			    return;
3134 			}
3135 		    }
3136 		}
3137 	    }
3138 	}
3139 
3140 	/*
3141 	 * Now search for duplicate opens.
3142 	 * Duplicate opens for the same owner
3143 	 * should never occur. Other duplicates are
3144 	 * possible and are checked for if "dupopens"
3145 	 * is true.
3146 	 */
3147 	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3148 	    LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3149 		if (nfhp->nfh_len == op2->nfso_fhlen &&
3150 		    !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3151 		    LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3152 			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3153 			    if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3154 				!NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3155 				(!NFSBCMP(op->nfso_own->nfsow_owner,
3156 				 op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3157 				 dupopens)) {
3158 				if (!NFSBCMP(op->nfso_own->nfsow_owner,
3159 				    op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3160 				    NFSUNLOCKCLSTATE();
3161 				    printf("BADDUP OPEN\n");
3162 				} else {
3163 				    NFSUNLOCKCLSTATE();
3164 				    printf("DUP OPEN\n");
3165 				}
3166 				nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0,
3167 				    0);
3168 				return;
3169 			    }
3170 			}
3171 		    }
3172 		}
3173 	    }
3174 	}
3175 	NFSUNLOCKCLSTATE();
3176 }
3177 
3178 /*
3179  * During close, find an open that needs to be dereferenced and
3180  * dereference it. If there are no more opens for this file,
3181  * log a message to that effect.
3182  * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3183  * on the file's vnode.
3184  * This is the safe way, since it is difficult to identify
3185  * which open the close is for and I/O can be performed after the
3186  * close(2) system call when a file is mmap'd.
3187  * If it returns 0 for success, there will be a referenced
3188  * clp returned via clpp.
3189  */
3190 int
3191 nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3192 {
3193 	struct nfsclclient *clp;
3194 	struct nfsclowner *owp;
3195 	struct nfsclopen *op;
3196 	struct nfscldeleg *dp;
3197 	struct nfsfh *nfhp;
3198 	int error, notdecr;
3199 
3200 	error = nfscl_getcl(vp->v_mount, NULL, NULL, 1, &clp);
3201 	if (error)
3202 		return (error);
3203 	*clpp = clp;
3204 
3205 	nfhp = VTONFS(vp)->n_fhp;
3206 	notdecr = 1;
3207 	NFSLOCKCLSTATE();
3208 	/*
3209 	 * First, look for one under a delegation that was locally issued
3210 	 * and just decrement the opencnt for it. Since all my Opens against
3211 	 * the server are DENY_NONE, I don't see a problem with hanging
3212 	 * onto them. (It is much easier to use one of the extant Opens
3213 	 * that I already have on the server when a Delegation is recalled
3214 	 * than to do fresh Opens.) Someday, I might need to rethink this, but.
3215 	 */
3216 	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3217 	if (dp != NULL) {
3218 		LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3219 			op = LIST_FIRST(&owp->nfsow_open);
3220 			if (op != NULL) {
3221 				/*
3222 				 * Since a delegation is for a file, there
3223 				 * should never be more than one open for
3224 				 * each openowner.
3225 				 */
3226 				if (LIST_NEXT(op, nfso_list) != NULL)
3227 					panic("nfscdeleg opens");
3228 				if (notdecr && op->nfso_opencnt > 0) {
3229 					notdecr = 0;
3230 					op->nfso_opencnt--;
3231 					break;
3232 				}
3233 			}
3234 		}
3235 	}
3236 
3237 	/* Now process the opens against the server. */
3238 	LIST_FOREACH(op, NFSCLOPENHASH(clp, nfhp->nfh_fh, nfhp->nfh_len),
3239 	    nfso_hash) {
3240 		if (op->nfso_fhlen == nfhp->nfh_len &&
3241 		    !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3242 		    nfhp->nfh_len)) {
3243 			/* Found an open, decrement cnt if possible */
3244 			if (notdecr && op->nfso_opencnt > 0) {
3245 				notdecr = 0;
3246 				op->nfso_opencnt--;
3247 			}
3248 			/*
3249 			 * There are more opens, so just return.
3250 			 */
3251 			if (op->nfso_opencnt > 0) {
3252 				NFSUNLOCKCLSTATE();
3253 				return (0);
3254 			}
3255 		}
3256 	}
3257 	NFSUNLOCKCLSTATE();
3258 	if (notdecr)
3259 		printf("nfscl: never fnd open\n");
3260 	return (0);
3261 }
3262 
3263 int
3264 nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3265 {
3266 	struct nfsclclient *clp;
3267 	struct nfsclowner *owp, *nowp;
3268 	struct nfsclopen *op;
3269 	struct nfscldeleg *dp;
3270 	struct nfsfh *nfhp;
3271 	struct nfsclrecalllayout *recallp;
3272 	int error;
3273 
3274 	error = nfscl_getcl(vp->v_mount, NULL, NULL, 1, &clp);
3275 	if (error)
3276 		return (error);
3277 	*clpp = clp;
3278 
3279 	nfhp = VTONFS(vp)->n_fhp;
3280 	recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3281 	NFSLOCKCLSTATE();
3282 	/*
3283 	 * First get rid of the local Open structures, which should be no
3284 	 * longer in use.
3285 	 */
3286 	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3287 	if (dp != NULL) {
3288 		LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3289 			op = LIST_FIRST(&owp->nfsow_open);
3290 			if (op != NULL) {
3291 				KASSERT((op->nfso_opencnt == 0),
3292 				    ("nfscl: bad open cnt on deleg"));
3293 				nfscl_freeopen(op, 1);
3294 			}
3295 			nfscl_freeopenowner(owp, 1);
3296 		}
3297 	}
3298 
3299 	/* Return any layouts marked return on close. */
3300 	nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
3301 
3302 	/* Now process the opens against the server. */
3303 lookformore:
3304 	LIST_FOREACH(op, NFSCLOPENHASH(clp, nfhp->nfh_fh, nfhp->nfh_len),
3305 	    nfso_hash) {
3306 		if (op->nfso_fhlen == nfhp->nfh_len &&
3307 		    !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3308 		    nfhp->nfh_len)) {
3309 			/* Found an open, close it. */
3310 #ifdef DIAGNOSTIC
3311 			KASSERT((op->nfso_opencnt == 0),
3312 			    ("nfscl: bad open cnt on server (%d)",
3313 			     op->nfso_opencnt));
3314 #endif
3315 			NFSUNLOCKCLSTATE();
3316 			nfsrpc_doclose(VFSTONFS(vp->v_mount), op, p);
3317 			NFSLOCKCLSTATE();
3318 			goto lookformore;
3319 		}
3320 	}
3321 	NFSUNLOCKCLSTATE();
3322 	/*
3323 	 * recallp has been set NULL by nfscl_retoncloselayout() if it was
3324 	 * used by the function, but calling free() with a NULL pointer is ok.
3325 	 */
3326 	free(recallp, M_NFSLAYRECALL);
3327 	return (0);
3328 }
3329 
3330 /*
3331  * Return all delegations on this client.
3332  * (Must be called with client sleep lock.)
3333  */
3334 static void
3335 nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p)
3336 {
3337 	struct nfscldeleg *dp, *ndp;
3338 	struct ucred *cred;
3339 
3340 	cred = newnfs_getcred();
3341 	TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3342 		nfscl_cleandeleg(dp);
3343 		(void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3344 		nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
3345 	}
3346 	NFSFREECRED(cred);
3347 }
3348 
3349 /*
3350  * Return any delegation for this vp.
3351  */
3352 void
3353 nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
3354 {
3355 	struct nfsclclient *clp;
3356 	struct nfscldeleg *dp;
3357 	struct ucred *cred;
3358 	struct nfsnode *np;
3359 
3360 	np = VTONFS(vp);
3361 	cred = newnfs_getcred();
3362 	dp = NULL;
3363 	NFSLOCKCLSTATE();
3364 	clp = VFSTONFS(vp->v_mount)->nm_clp;
3365 	if (clp != NULL)
3366 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
3367 		    np->n_fhp->nfh_len);
3368 	if (dp != NULL) {
3369 		nfscl_cleandeleg(dp);
3370 		nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
3371 		NFSUNLOCKCLSTATE();
3372 		newnfs_copycred(&dp->nfsdl_cred, cred);
3373 		nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3374 		free(dp, M_NFSCLDELEG);
3375 	} else
3376 		NFSUNLOCKCLSTATE();
3377 	NFSFREECRED(cred);
3378 }
3379 
3380 /*
3381  * Do a callback RPC.
3382  */
3383 void
3384 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3385 {
3386 	int clist, gotseq_ok, i, j, k, op, rcalls;
3387 	u_int32_t *tl;
3388 	struct nfsclclient *clp;
3389 	struct nfscldeleg *dp = NULL;
3390 	int numops, taglen = -1, error = 0, trunc __unused;
3391 	u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3392 	u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3393 	vnode_t vp = NULL;
3394 	struct nfsnode *np;
3395 	struct vattr va;
3396 	struct nfsfh *nfhp;
3397 	mount_t mp;
3398 	nfsattrbit_t attrbits, rattrbits;
3399 	nfsv4stateid_t stateid;
3400 	uint32_t seqid, slotid = 0, highslot, cachethis __unused;
3401 	uint8_t sessionid[NFSX_V4SESSIONID];
3402 	struct mbuf *rep;
3403 	struct nfscllayout *lyp;
3404 	uint64_t filesid[2], len, off;
3405 	int changed, gotone, laytype, recalltype;
3406 	uint32_t iomode;
3407 	struct nfsclrecalllayout *recallp = NULL;
3408 	struct nfsclsession *tsep;
3409 
3410 	gotseq_ok = 0;
3411 	nfsrvd_rephead(nd);
3412 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3413 	taglen = fxdr_unsigned(int, *tl);
3414 	if (taglen < 0) {
3415 		error = EBADRPC;
3416 		goto nfsmout;
3417 	}
3418 	if (taglen <= NFSV4_SMALLSTR)
3419 		tagstr = tag;
3420 	else
3421 		tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3422 	error = nfsrv_mtostr(nd, tagstr, taglen);
3423 	if (error) {
3424 		if (taglen > NFSV4_SMALLSTR)
3425 			free(tagstr, M_TEMP);
3426 		taglen = -1;
3427 		goto nfsmout;
3428 	}
3429 	(void) nfsm_strtom(nd, tag, taglen);
3430 	if (taglen > NFSV4_SMALLSTR) {
3431 		free(tagstr, M_TEMP);
3432 	}
3433 	NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3434 	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3435 	minorvers = fxdr_unsigned(u_int32_t, *tl++);
3436 	if (minorvers != NFSV4_MINORVERSION &&
3437 	    minorvers != NFSV41_MINORVERSION &&
3438 	    minorvers != NFSV42_MINORVERSION)
3439 		nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3440 	cbident = fxdr_unsigned(u_int32_t, *tl++);
3441 	if (nd->nd_repstat)
3442 		numops = 0;
3443 	else
3444 		numops = fxdr_unsigned(int, *tl);
3445 	/*
3446 	 * Loop around doing the sub ops.
3447 	 */
3448 	for (i = 0; i < numops; i++) {
3449 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3450 		NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3451 		*repp++ = *tl;
3452 		op = fxdr_unsigned(int, *tl);
3453 		if (op < NFSV4OP_CBGETATTR ||
3454 		   (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3455 		   (op > NFSV4OP_CBNOTIFYDEVID &&
3456 		    minorvers == NFSV41_MINORVERSION) ||
3457 		   (op > NFSV4OP_CBOFFLOAD &&
3458 		    minorvers == NFSV42_MINORVERSION)) {
3459 		    nd->nd_repstat = NFSERR_OPILLEGAL;
3460 		    *repp = nfscl_errmap(nd, minorvers);
3461 		    retops++;
3462 		    break;
3463 		}
3464 		nd->nd_procnum = op;
3465 		if (op < NFSV42_CBNOPS)
3466 			nfsstatsv1.cbrpccnt[nd->nd_procnum]++;
3467 		switch (op) {
3468 		case NFSV4OP_CBGETATTR:
3469 			NFSCL_DEBUG(4, "cbgetattr\n");
3470 			mp = NULL;
3471 			vp = NULL;
3472 			error = nfsm_getfh(nd, &nfhp);
3473 			if (!error)
3474 				error = nfsrv_getattrbits(nd, &attrbits,
3475 				    NULL, NULL);
3476 			if (error == 0 && i == 0 &&
3477 			    minorvers != NFSV4_MINORVERSION)
3478 				error = NFSERR_OPNOTINSESS;
3479 			if (!error) {
3480 				mp = nfscl_getmnt(minorvers, sessionid, cbident,
3481 				    &clp);
3482 				if (mp == NULL)
3483 					error = NFSERR_SERVERFAULT;
3484 			}
3485 			if (!error) {
3486 				error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3487 				    nfhp->nfh_len, p, &np);
3488 				if (!error)
3489 					vp = NFSTOV(np);
3490 			}
3491 			if (!error) {
3492 				NFSZERO_ATTRBIT(&rattrbits);
3493 				NFSLOCKCLSTATE();
3494 				dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3495 				    nfhp->nfh_len);
3496 				if (dp != NULL) {
3497 					if (NFSISSET_ATTRBIT(&attrbits,
3498 					    NFSATTRBIT_SIZE)) {
3499 						if (vp != NULL)
3500 							va.va_size = np->n_size;
3501 						else
3502 							va.va_size =
3503 							    dp->nfsdl_size;
3504 						NFSSETBIT_ATTRBIT(&rattrbits,
3505 						    NFSATTRBIT_SIZE);
3506 					}
3507 					if (NFSISSET_ATTRBIT(&attrbits,
3508 					    NFSATTRBIT_CHANGE)) {
3509 						va.va_filerev =
3510 						    dp->nfsdl_change;
3511 						if (vp == NULL ||
3512 						    (np->n_flag & NDELEGMOD))
3513 							va.va_filerev++;
3514 						NFSSETBIT_ATTRBIT(&rattrbits,
3515 						    NFSATTRBIT_CHANGE);
3516 					}
3517 				} else
3518 					error = NFSERR_SERVERFAULT;
3519 				NFSUNLOCKCLSTATE();
3520 			}
3521 			if (vp != NULL)
3522 				vrele(vp);
3523 			if (mp != NULL)
3524 				vfs_unbusy(mp);
3525 			if (nfhp != NULL)
3526 				free(nfhp, M_NFSFH);
3527 			if (!error)
3528 				(void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3529 				    NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3530 				    (uint64_t)0, NULL);
3531 			break;
3532 		case NFSV4OP_CBRECALL:
3533 			NFSCL_DEBUG(4, "cbrecall\n");
3534 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3535 			    NFSX_UNSIGNED);
3536 			stateid.seqid = *tl++;
3537 			NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3538 			    NFSX_STATEIDOTHER);
3539 			tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3540 			trunc = fxdr_unsigned(int, *tl);
3541 			error = nfsm_getfh(nd, &nfhp);
3542 			if (error == 0 && i == 0 &&
3543 			    minorvers != NFSV4_MINORVERSION)
3544 				error = NFSERR_OPNOTINSESS;
3545 			if (!error) {
3546 				NFSLOCKCLSTATE();
3547 				if (minorvers == NFSV4_MINORVERSION)
3548 					clp = nfscl_getclnt(cbident);
3549 				else
3550 					clp = nfscl_getclntsess(sessionid);
3551 				if (clp != NULL) {
3552 					dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3553 					    nfhp->nfh_len);
3554 					if (dp != NULL && (dp->nfsdl_flags &
3555 					    NFSCLDL_DELEGRET) == 0) {
3556 						dp->nfsdl_flags |=
3557 						    NFSCLDL_RECALL;
3558 						wakeup((caddr_t)clp);
3559 					}
3560 				} else {
3561 					error = NFSERR_SERVERFAULT;
3562 				}
3563 				NFSUNLOCKCLSTATE();
3564 			}
3565 			if (nfhp != NULL)
3566 				free(nfhp, M_NFSFH);
3567 			break;
3568 		case NFSV4OP_CBLAYOUTRECALL:
3569 			NFSCL_DEBUG(4, "cblayrec\n");
3570 			nfhp = NULL;
3571 			NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3572 			laytype = fxdr_unsigned(int, *tl++);
3573 			iomode = fxdr_unsigned(uint32_t, *tl++);
3574 			if (newnfs_true == *tl++)
3575 				changed = 1;
3576 			else
3577 				changed = 0;
3578 			recalltype = fxdr_unsigned(int, *tl);
3579 			NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n",
3580 			    laytype, iomode, changed, recalltype);
3581 			recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3582 			    M_WAITOK);
3583 			if (laytype != NFSLAYOUT_NFSV4_1_FILES &&
3584 			    laytype != NFSLAYOUT_FLEXFILE)
3585 				error = NFSERR_NOMATCHLAYOUT;
3586 			else if (recalltype == NFSLAYOUTRETURN_FILE) {
3587 				error = nfsm_getfh(nd, &nfhp);
3588 				NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3589 				if (error != 0)
3590 					goto nfsmout;
3591 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3592 				    NFSX_STATEID);
3593 				off = fxdr_hyper(tl); tl += 2;
3594 				len = fxdr_hyper(tl); tl += 2;
3595 				stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3596 				NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3597 				if (minorvers == NFSV4_MINORVERSION)
3598 					error = NFSERR_NOTSUPP;
3599 				else if (i == 0)
3600 					error = NFSERR_OPNOTINSESS;
3601 				NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n",
3602 				    (uintmax_t)off, (uintmax_t)len,
3603 				    stateid.seqid, error);
3604 				if (error == 0) {
3605 					NFSLOCKCLSTATE();
3606 					clp = nfscl_getclntsess(sessionid);
3607 					NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3608 					if (clp != NULL) {
3609 						lyp = nfscl_findlayout(clp,
3610 						    nfhp->nfh_fh,
3611 						    nfhp->nfh_len);
3612 						NFSCL_DEBUG(4, "cblyp=%p\n",
3613 						    lyp);
3614 						if (lyp != NULL &&
3615 						    (lyp->nfsly_flags &
3616 						     (NFSLY_FILES |
3617 						      NFSLY_FLEXFILE)) != 0 &&
3618 						    !NFSBCMP(stateid.other,
3619 						    lyp->nfsly_stateid.other,
3620 						    NFSX_STATEIDOTHER)) {
3621 							error =
3622 							    nfscl_layoutrecall(
3623 							    recalltype,
3624 							    lyp, iomode, off,
3625 							    len, stateid.seqid,
3626 							    0, 0, NULL,
3627 							    recallp);
3628 							if (error == 0 &&
3629 							    stateid.seqid >
3630 							    lyp->nfsly_stateid.seqid)
3631 								lyp->nfsly_stateid.seqid =
3632 								    stateid.seqid;
3633 							recallp = NULL;
3634 							wakeup(clp);
3635 							NFSCL_DEBUG(4,
3636 							    "aft layrcal=%d "
3637 							    "layseqid=%d\n",
3638 							    error,
3639 							    lyp->nfsly_stateid.seqid);
3640 						} else
3641 							error =
3642 							  NFSERR_NOMATCHLAYOUT;
3643 					} else
3644 						error = NFSERR_NOMATCHLAYOUT;
3645 					NFSUNLOCKCLSTATE();
3646 				}
3647 				free(nfhp, M_NFSFH);
3648 			} else if (recalltype == NFSLAYOUTRETURN_FSID) {
3649 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3650 				filesid[0] = fxdr_hyper(tl); tl += 2;
3651 				filesid[1] = fxdr_hyper(tl); tl += 2;
3652 				gotone = 0;
3653 				NFSLOCKCLSTATE();
3654 				clp = nfscl_getclntsess(sessionid);
3655 				if (clp != NULL) {
3656 					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3657 					    nfsly_list) {
3658 						if (lyp->nfsly_filesid[0] ==
3659 						    filesid[0] &&
3660 						    lyp->nfsly_filesid[1] ==
3661 						    filesid[1]) {
3662 							error =
3663 							    nfscl_layoutrecall(
3664 							    recalltype,
3665 							    lyp, iomode, 0,
3666 							    UINT64_MAX,
3667 							    lyp->nfsly_stateid.seqid,
3668 							    0, 0, NULL,
3669 							    recallp);
3670 							recallp = NULL;
3671 							gotone = 1;
3672 						}
3673 					}
3674 					if (gotone != 0)
3675 						wakeup(clp);
3676 					else
3677 						error = NFSERR_NOMATCHLAYOUT;
3678 				} else
3679 					error = NFSERR_NOMATCHLAYOUT;
3680 				NFSUNLOCKCLSTATE();
3681 			} else if (recalltype == NFSLAYOUTRETURN_ALL) {
3682 				gotone = 0;
3683 				NFSLOCKCLSTATE();
3684 				clp = nfscl_getclntsess(sessionid);
3685 				if (clp != NULL) {
3686 					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3687 					    nfsly_list) {
3688 						error = nfscl_layoutrecall(
3689 						    recalltype, lyp, iomode, 0,
3690 						    UINT64_MAX,
3691 						    lyp->nfsly_stateid.seqid,
3692 						    0, 0, NULL, recallp);
3693 						recallp = NULL;
3694 						gotone = 1;
3695 					}
3696 					if (gotone != 0)
3697 						wakeup(clp);
3698 					else
3699 						error = NFSERR_NOMATCHLAYOUT;
3700 				} else
3701 					error = NFSERR_NOMATCHLAYOUT;
3702 				NFSUNLOCKCLSTATE();
3703 			} else
3704 				error = NFSERR_NOMATCHLAYOUT;
3705 			if (recallp != NULL) {
3706 				free(recallp, M_NFSLAYRECALL);
3707 				recallp = NULL;
3708 			}
3709 			break;
3710 		case NFSV4OP_CBSEQUENCE:
3711 			NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3712 			    5 * NFSX_UNSIGNED);
3713 			bcopy(tl, sessionid, NFSX_V4SESSIONID);
3714 			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3715 			seqid = fxdr_unsigned(uint32_t, *tl++);
3716 			slotid = fxdr_unsigned(uint32_t, *tl++);
3717 			highslot = fxdr_unsigned(uint32_t, *tl++);
3718 			cachethis = *tl++;
3719 			/* Throw away the referring call stuff. */
3720 			clist = fxdr_unsigned(int, *tl);
3721 			for (j = 0; j < clist; j++) {
3722 				NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3723 				    NFSX_UNSIGNED);
3724 				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3725 				rcalls = fxdr_unsigned(int, *tl);
3726 				for (k = 0; k < rcalls; k++) {
3727 					NFSM_DISSECT(tl, uint32_t *,
3728 					    2 * NFSX_UNSIGNED);
3729 				}
3730 			}
3731 			NFSLOCKCLSTATE();
3732 			if (i == 0) {
3733 				clp = nfscl_getclntsess(sessionid);
3734 				if (clp == NULL)
3735 					error = NFSERR_SERVERFAULT;
3736 			} else
3737 				error = NFSERR_SEQUENCEPOS;
3738 			if (error == 0) {
3739 				tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3740 				error = nfsv4_seqsession(seqid, slotid,
3741 				    highslot, tsep->nfsess_cbslots, &rep,
3742 				    tsep->nfsess_backslots);
3743 			}
3744 			NFSUNLOCKCLSTATE();
3745 			if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3746 				gotseq_ok = 1;
3747 				if (rep != NULL) {
3748 					/*
3749 					 * Handle a reply for a retried
3750 					 * callback.  The reply will be
3751 					 * re-inserted in the session cache
3752 					 * by the nfsv4_seqsess_cacherep() call
3753 					 * after out:
3754 					 */
3755 					KASSERT(error == NFSERR_REPLYFROMCACHE,
3756 					    ("cbsequence: non-NULL rep"));
3757 					NFSCL_DEBUG(4, "Got cbretry\n");
3758 					m_freem(nd->nd_mreq);
3759 					nd->nd_mreq = rep;
3760 					rep = NULL;
3761 					goto out;
3762 				}
3763 				NFSM_BUILD(tl, uint32_t *,
3764 				    NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3765 				bcopy(sessionid, tl, NFSX_V4SESSIONID);
3766 				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3767 				*tl++ = txdr_unsigned(seqid);
3768 				*tl++ = txdr_unsigned(slotid);
3769 				*tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3770 				*tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3771 			}
3772 			break;
3773 		default:
3774 			if (i == 0 && minorvers != NFSV4_MINORVERSION)
3775 				error = NFSERR_OPNOTINSESS;
3776 			else {
3777 				NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3778 				error = NFSERR_NOTSUPP;
3779 			}
3780 			break;
3781 		}
3782 		if (error) {
3783 			if (error == EBADRPC || error == NFSERR_BADXDR) {
3784 				nd->nd_repstat = NFSERR_BADXDR;
3785 			} else {
3786 				nd->nd_repstat = error;
3787 			}
3788 			error = 0;
3789 		}
3790 		retops++;
3791 		if (nd->nd_repstat) {
3792 			*repp = nfscl_errmap(nd, minorvers);
3793 			break;
3794 		} else
3795 			*repp = 0;	/* NFS4_OK */
3796 	}
3797 nfsmout:
3798 	if (recallp != NULL)
3799 		free(recallp, M_NFSLAYRECALL);
3800 	if (error) {
3801 		if (error == EBADRPC || error == NFSERR_BADXDR)
3802 			nd->nd_repstat = NFSERR_BADXDR;
3803 		else
3804 			printf("nfsv4 comperr1=%d\n", error);
3805 	}
3806 	if (taglen == -1) {
3807 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3808 		*tl++ = 0;
3809 		*tl = 0;
3810 	} else {
3811 		*retopsp = txdr_unsigned(retops);
3812 	}
3813 	*nd->nd_errp = nfscl_errmap(nd, minorvers);
3814 out:
3815 	if (gotseq_ok != 0) {
3816 		rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3817 		NFSLOCKCLSTATE();
3818 		clp = nfscl_getclntsess(sessionid);
3819 		if (clp != NULL) {
3820 			tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3821 			nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3822 			    NFSERR_OK, &rep);
3823 			NFSUNLOCKCLSTATE();
3824 		} else {
3825 			NFSUNLOCKCLSTATE();
3826 			m_freem(rep);
3827 		}
3828 	}
3829 }
3830 
3831 /*
3832  * Generate the next cbident value. Basically just increment a static value
3833  * and then check that it isn't already in the list, if it has wrapped around.
3834  */
3835 static u_int32_t
3836 nfscl_nextcbident(void)
3837 {
3838 	struct nfsclclient *clp;
3839 	int matched;
3840 	static u_int32_t nextcbident = 0;
3841 	static int haswrapped = 0;
3842 
3843 	nextcbident++;
3844 	if (nextcbident == 0)
3845 		haswrapped = 1;
3846 	if (haswrapped) {
3847 		/*
3848 		 * Search the clientid list for one already using this cbident.
3849 		 */
3850 		do {
3851 			matched = 0;
3852 			NFSLOCKCLSTATE();
3853 			LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3854 				if (clp->nfsc_cbident == nextcbident) {
3855 					matched = 1;
3856 					break;
3857 				}
3858 			}
3859 			NFSUNLOCKCLSTATE();
3860 			if (matched == 1)
3861 				nextcbident++;
3862 		} while (matched);
3863 	}
3864 	return (nextcbident);
3865 }
3866 
3867 /*
3868  * Get the mount point related to a given cbident or session and busy it.
3869  */
3870 static mount_t
3871 nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3872     struct nfsclclient **clpp)
3873 {
3874 	struct nfsclclient *clp;
3875 	mount_t mp;
3876 	int error;
3877 	struct nfsclsession *tsep;
3878 
3879 	*clpp = NULL;
3880 	NFSLOCKCLSTATE();
3881 	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3882 		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3883 		if (minorvers == NFSV4_MINORVERSION) {
3884 			if (clp->nfsc_cbident == cbident)
3885 				break;
3886 		} else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3887 		    NFSX_V4SESSIONID))
3888 			break;
3889 	}
3890 	if (clp == NULL) {
3891 		NFSUNLOCKCLSTATE();
3892 		return (NULL);
3893 	}
3894 	mp = clp->nfsc_nmp->nm_mountp;
3895 	vfs_ref(mp);
3896 	NFSUNLOCKCLSTATE();
3897 	error = vfs_busy(mp, 0);
3898 	vfs_rel(mp);
3899 	if (error != 0)
3900 		return (NULL);
3901 	*clpp = clp;
3902 	return (mp);
3903 }
3904 
3905 /*
3906  * Get the clientid pointer related to a given cbident.
3907  */
3908 static struct nfsclclient *
3909 nfscl_getclnt(u_int32_t cbident)
3910 {
3911 	struct nfsclclient *clp;
3912 
3913 	LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3914 		if (clp->nfsc_cbident == cbident)
3915 			break;
3916 	return (clp);
3917 }
3918 
3919 /*
3920  * Get the clientid pointer related to a given sessionid.
3921  */
3922 static struct nfsclclient *
3923 nfscl_getclntsess(uint8_t *sessionid)
3924 {
3925 	struct nfsclclient *clp;
3926 	struct nfsclsession *tsep;
3927 
3928 	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3929 		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3930 		if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3931 		    NFSX_V4SESSIONID))
3932 			break;
3933 	}
3934 	return (clp);
3935 }
3936 
3937 /*
3938  * Search for a lock conflict locally on the client. A conflict occurs if
3939  * - not same owner and overlapping byte range and at least one of them is
3940  *   a write lock or this is an unlock.
3941  */
3942 static int
3943 nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
3944     struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
3945     struct nfscllock **lopp)
3946 {
3947 	struct nfsclopen *op;
3948 	int ret;
3949 
3950 	if (dp != NULL) {
3951 		ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
3952 		if (ret)
3953 			return (ret);
3954 	}
3955 	LIST_FOREACH(op, NFSCLOPENHASH(clp, fhp, fhlen), nfso_hash) {
3956 		if (op->nfso_fhlen == fhlen &&
3957 		    !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
3958 			ret = nfscl_checkconflict(&op->nfso_lock, nlop,
3959 			    own, lopp);
3960 			if (ret)
3961 				return (ret);
3962 		}
3963 	}
3964 	return (0);
3965 }
3966 
3967 static int
3968 nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
3969     u_int8_t *own, struct nfscllock **lopp)
3970 {
3971 	struct nfscllockowner *lp;
3972 	struct nfscllock *lop;
3973 
3974 	LIST_FOREACH(lp, lhp, nfsl_list) {
3975 		if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
3976 			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3977 				if (lop->nfslo_first >= nlop->nfslo_end)
3978 					break;
3979 				if (lop->nfslo_end <= nlop->nfslo_first)
3980 					continue;
3981 				if (lop->nfslo_type == F_WRLCK ||
3982 				    nlop->nfslo_type == F_WRLCK ||
3983 				    nlop->nfslo_type == F_UNLCK) {
3984 					if (lopp != NULL)
3985 						*lopp = lop;
3986 					return (NFSERR_DENIED);
3987 				}
3988 			}
3989 		}
3990 	}
3991 	return (0);
3992 }
3993 
3994 /*
3995  * Check for a local conflicting lock.
3996  */
3997 int
3998 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
3999     u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
4000 {
4001 	struct nfscllock *lop, nlck;
4002 	struct nfscldeleg *dp;
4003 	struct nfsnode *np;
4004 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
4005 	int error;
4006 
4007 	nlck.nfslo_type = fl->l_type;
4008 	nlck.nfslo_first = off;
4009 	if (len == NFS64BITSSET) {
4010 		nlck.nfslo_end = NFS64BITSSET;
4011 	} else {
4012 		nlck.nfslo_end = off + len;
4013 		if (nlck.nfslo_end <= nlck.nfslo_first)
4014 			return (NFSERR_INVAL);
4015 	}
4016 	np = VTONFS(vp);
4017 	nfscl_filllockowner(id, own, flags);
4018 	NFSLOCKCLSTATE();
4019 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4020 	error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
4021 	    &nlck, own, dp, &lop);
4022 	if (error != 0) {
4023 		fl->l_whence = SEEK_SET;
4024 		fl->l_start = lop->nfslo_first;
4025 		if (lop->nfslo_end == NFS64BITSSET)
4026 			fl->l_len = 0;
4027 		else
4028 			fl->l_len = lop->nfslo_end - lop->nfslo_first;
4029 		fl->l_pid = (pid_t)0;
4030 		fl->l_type = lop->nfslo_type;
4031 		error = -1;			/* no RPC required */
4032 	} else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
4033 	    fl->l_type == F_RDLCK)) {
4034 		/*
4035 		 * The delegation ensures that there isn't a conflicting
4036 		 * lock on the server, so return -1 to indicate an RPC
4037 		 * isn't required.
4038 		 */
4039 		fl->l_type = F_UNLCK;
4040 		error = -1;
4041 	}
4042 	NFSUNLOCKCLSTATE();
4043 	return (error);
4044 }
4045 
4046 /*
4047  * Handle Recall of a delegation.
4048  * The clp must be exclusive locked when this is called.
4049  */
4050 static int
4051 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
4052     struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4053     int called_from_renewthread, vnode_t *vpp)
4054 {
4055 	struct nfsclowner *owp, *lowp, *nowp;
4056 	struct nfsclopen *op, *lop;
4057 	struct nfscllockowner *lp;
4058 	struct nfscllock *lckp;
4059 	struct nfsnode *np;
4060 	int error = 0, ret;
4061 
4062 	if (vp == NULL) {
4063 		KASSERT(vpp != NULL, ("nfscl_recalldeleg: vpp NULL"));
4064 		*vpp = NULL;
4065 		/*
4066 		 * First, get a vnode for the file. This is needed to do RPCs.
4067 		 */
4068 		ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
4069 		    dp->nfsdl_fhlen, p, &np);
4070 		if (ret) {
4071 			/*
4072 			 * File isn't open, so nothing to move over to the
4073 			 * server.
4074 			 */
4075 			return (0);
4076 		}
4077 		vp = NFSTOV(np);
4078 		*vpp = vp;
4079 	} else {
4080 		np = VTONFS(vp);
4081 	}
4082 	dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
4083 
4084 	/*
4085 	 * Ok, if it's a write delegation, flush data to the server, so
4086 	 * that close/open consistency is retained.
4087 	 */
4088 	ret = 0;
4089 	NFSLOCKNODE(np);
4090 	if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
4091 		np->n_flag |= NDELEGRECALL;
4092 		NFSUNLOCKNODE(np);
4093 		ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
4094 		NFSLOCKNODE(np);
4095 		np->n_flag &= ~NDELEGRECALL;
4096 	}
4097 	NFSINVALATTRCACHE(np);
4098 	NFSUNLOCKNODE(np);
4099 	if (ret == EIO && called_from_renewthread != 0) {
4100 		/*
4101 		 * If the flush failed with EIO for the renew thread,
4102 		 * return now, so that the dirty buffer will be flushed
4103 		 * later.
4104 		 */
4105 		return (ret);
4106 	}
4107 
4108 	/*
4109 	 * Now, for each openowner with opens issued locally, move them
4110 	 * over to state against the server.
4111 	 */
4112 	LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
4113 		lop = LIST_FIRST(&lowp->nfsow_open);
4114 		if (lop != NULL) {
4115 			if (LIST_NEXT(lop, nfso_list) != NULL)
4116 				panic("nfsdlg mult opens");
4117 			/*
4118 			 * Look for the same openowner against the server.
4119 			 */
4120 			LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4121 				if (!NFSBCMP(lowp->nfsow_owner,
4122 				    owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
4123 					newnfs_copycred(&dp->nfsdl_cred, cred);
4124 					ret = nfscl_moveopen(vp, clp, nmp, lop,
4125 					    owp, dp, cred, p);
4126 					if (ret == NFSERR_STALECLIENTID ||
4127 					    ret == NFSERR_STALEDONTRECOVER ||
4128 					    ret == NFSERR_BADSESSION)
4129 						return (ret);
4130 					if (ret) {
4131 						nfscl_freeopen(lop, 1);
4132 						if (!error)
4133 							error = ret;
4134 					}
4135 					break;
4136 				}
4137 			}
4138 
4139 			/*
4140 			 * If no openowner found, create one and get an open
4141 			 * for it.
4142 			 */
4143 			if (owp == NULL) {
4144 				nowp = malloc(
4145 				    sizeof (struct nfsclowner), M_NFSCLOWNER,
4146 				    M_WAITOK);
4147 				nfscl_newopen(clp, NULL, &owp, &nowp, &op,
4148 				    NULL, lowp->nfsow_owner, dp->nfsdl_fh,
4149 				    dp->nfsdl_fhlen, NULL, NULL);
4150 				newnfs_copycred(&dp->nfsdl_cred, cred);
4151 				ret = nfscl_moveopen(vp, clp, nmp, lop,
4152 				    owp, dp, cred, p);
4153 				if (ret) {
4154 					nfscl_freeopenowner(owp, 0);
4155 					if (ret == NFSERR_STALECLIENTID ||
4156 					    ret == NFSERR_STALEDONTRECOVER ||
4157 					    ret == NFSERR_BADSESSION)
4158 						return (ret);
4159 					if (ret) {
4160 						nfscl_freeopen(lop, 1);
4161 						if (!error)
4162 							error = ret;
4163 					}
4164 				}
4165 			}
4166 		}
4167 	}
4168 
4169 	/*
4170 	 * Now, get byte range locks for any locks done locally.
4171 	 */
4172 	LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4173 		LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4174 			newnfs_copycred(&dp->nfsdl_cred, cred);
4175 			ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4176 			if (ret == NFSERR_STALESTATEID ||
4177 			    ret == NFSERR_STALEDONTRECOVER ||
4178 			    ret == NFSERR_STALECLIENTID ||
4179 			    ret == NFSERR_BADSESSION)
4180 				return (ret);
4181 			if (ret && !error)
4182 				error = ret;
4183 		}
4184 	}
4185 	return (error);
4186 }
4187 
4188 /*
4189  * Move a locally issued open over to an owner on the state list.
4190  * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4191  * returns with it unlocked.
4192  */
4193 static int
4194 nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4195     struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4196     struct ucred *cred, NFSPROC_T *p)
4197 {
4198 	struct nfsclopen *op, *nop;
4199 	struct nfscldeleg *ndp;
4200 	struct nfsnode *np;
4201 	int error = 0, newone;
4202 
4203 	/*
4204 	 * First, look for an appropriate open, If found, just increment the
4205 	 * opencnt in it.
4206 	 */
4207 	LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4208 		if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4209 		    op->nfso_fhlen == lop->nfso_fhlen &&
4210 		    !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4211 			op->nfso_opencnt += lop->nfso_opencnt;
4212 			nfscl_freeopen(lop, 1);
4213 			return (0);
4214 		}
4215 	}
4216 
4217 	/* No appropriate open, so we have to do one against the server. */
4218 	np = VTONFS(vp);
4219 	nop = malloc(sizeof (struct nfsclopen) +
4220 	    lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4221 	nop->nfso_hash.le_prev = NULL;
4222 	newone = 0;
4223 	nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4224 	    lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4225 	ndp = dp;
4226 	error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4227 	    lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4228 	    NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4229 	if (error) {
4230 		if (newone)
4231 			nfscl_freeopen(op, 0);
4232 	} else {
4233 		op->nfso_mode |= lop->nfso_mode;
4234 		op->nfso_opencnt += lop->nfso_opencnt;
4235 		nfscl_freeopen(lop, 1);
4236 	}
4237 	if (nop != NULL)
4238 		free(nop, M_NFSCLOPEN);
4239 	if (ndp != NULL) {
4240 		/*
4241 		 * What should I do with the returned delegation, since the
4242 		 * delegation is being recalled? For now, just printf and
4243 		 * through it away.
4244 		 */
4245 		printf("Moveopen returned deleg\n");
4246 		free(ndp, M_NFSCLDELEG);
4247 	}
4248 	return (error);
4249 }
4250 
4251 /*
4252  * Recall all delegations on this client.
4253  */
4254 static void
4255 nfscl_totalrecall(struct nfsclclient *clp)
4256 {
4257 	struct nfscldeleg *dp;
4258 
4259 	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4260 		if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4261 			dp->nfsdl_flags |= NFSCLDL_RECALL;
4262 	}
4263 }
4264 
4265 /*
4266  * Relock byte ranges. Called for delegation recall and state expiry.
4267  */
4268 static int
4269 nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4270     struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4271     NFSPROC_T *p)
4272 {
4273 	struct nfscllockowner *nlp;
4274 	struct nfsfh *nfhp;
4275 	u_int64_t off, len;
4276 	int error, newone, donelocally;
4277 
4278 	off = lop->nfslo_first;
4279 	len = lop->nfslo_end - lop->nfslo_first;
4280 	error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4281 	    clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4282 	    lp->nfsl_openowner, &nlp, &newone, &donelocally);
4283 	if (error || donelocally)
4284 		return (error);
4285 	nfhp = VTONFS(vp)->n_fhp;
4286 	error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4287 	    nfhp->nfh_len, nlp, newone, 0, off,
4288 	    len, lop->nfslo_type, cred, p);
4289 	if (error)
4290 		nfscl_freelockowner(nlp, 0);
4291 	return (error);
4292 }
4293 
4294 /*
4295  * Called to re-open a file. Basically get a vnode for the file handle
4296  * and then call nfsrpc_openrpc() to do the rest.
4297  */
4298 static int
4299 nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4300     u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4301     struct ucred *cred, NFSPROC_T *p)
4302 {
4303 	struct nfsnode *np;
4304 	vnode_t vp;
4305 	int error;
4306 
4307 	error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4308 	if (error)
4309 		return (error);
4310 	vp = NFSTOV(np);
4311 	if (np->n_v4 != NULL) {
4312 		error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4313 		    np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4314 		    NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4315 		    cred, p);
4316 	} else {
4317 		error = EINVAL;
4318 	}
4319 	vrele(vp);
4320 	return (error);
4321 }
4322 
4323 /*
4324  * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4325  * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4326  * fail.
4327  */
4328 static int
4329 nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4330     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4331     u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4332     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4333 {
4334 	int error;
4335 
4336 	do {
4337 		error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4338 		    mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4339 		    0, 0);
4340 		if (error == NFSERR_DELAY)
4341 			(void) nfs_catnap(PZERO, error, "nfstryop");
4342 	} while (error == NFSERR_DELAY);
4343 	if (error == EAUTH || error == EACCES) {
4344 		/* Try again using system credentials */
4345 		newnfs_setroot(cred);
4346 		do {
4347 		    error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4348 			newfhlen, mode, op, name, namelen, ndpp, reclaim,
4349 			delegtype, cred, p, 1, 0);
4350 		    if (error == NFSERR_DELAY)
4351 			(void) nfs_catnap(PZERO, error, "nfstryop");
4352 		} while (error == NFSERR_DELAY);
4353 	}
4354 	return (error);
4355 }
4356 
4357 /*
4358  * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4359  * NFSERR_DELAY. Also, retry with system credentials, if the provided
4360  * cred don't work.
4361  */
4362 static int
4363 nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4364     int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4365     u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4366 {
4367 	struct nfsrv_descript nfsd, *nd = &nfsd;
4368 	int error;
4369 
4370 	do {
4371 		error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4372 		    reclaim, off, len, type, cred, p, 0);
4373 		if (!error && nd->nd_repstat == NFSERR_DELAY)
4374 			(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4375 			    "nfstrylck");
4376 	} while (!error && nd->nd_repstat == NFSERR_DELAY);
4377 	if (!error)
4378 		error = nd->nd_repstat;
4379 	if (error == EAUTH || error == EACCES) {
4380 		/* Try again using root credentials */
4381 		newnfs_setroot(cred);
4382 		do {
4383 			error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4384 			    newone, reclaim, off, len, type, cred, p, 1);
4385 			if (!error && nd->nd_repstat == NFSERR_DELAY)
4386 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4387 				    "nfstrylck");
4388 		} while (!error && nd->nd_repstat == NFSERR_DELAY);
4389 		if (!error)
4390 			error = nd->nd_repstat;
4391 	}
4392 	return (error);
4393 }
4394 
4395 /*
4396  * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4397  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4398  * credentials fail.
4399  */
4400 static int
4401 nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4402     struct nfsmount *nmp, NFSPROC_T *p)
4403 {
4404 	int error;
4405 
4406 	do {
4407 		error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4408 		if (error == NFSERR_DELAY)
4409 			(void) nfs_catnap(PZERO, error, "nfstrydp");
4410 	} while (error == NFSERR_DELAY);
4411 	if (error == EAUTH || error == EACCES) {
4412 		/* Try again using system credentials */
4413 		newnfs_setroot(cred);
4414 		do {
4415 			error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4416 			if (error == NFSERR_DELAY)
4417 				(void) nfs_catnap(PZERO, error, "nfstrydp");
4418 		} while (error == NFSERR_DELAY);
4419 	}
4420 	return (error);
4421 }
4422 
4423 /*
4424  * Try a close against the server. Just call nfsrpc_closerpc(),
4425  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4426  * credentials fail.
4427  */
4428 int
4429 nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4430     struct nfsmount *nmp, NFSPROC_T *p)
4431 {
4432 	struct nfsrv_descript nfsd, *nd = &nfsd;
4433 	int error;
4434 
4435 	do {
4436 		error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4437 		if (error == NFSERR_DELAY)
4438 			(void) nfs_catnap(PZERO, error, "nfstrycl");
4439 	} while (error == NFSERR_DELAY);
4440 	if (error == EAUTH || error == EACCES) {
4441 		/* Try again using system credentials */
4442 		newnfs_setroot(cred);
4443 		do {
4444 			error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4445 			if (error == NFSERR_DELAY)
4446 				(void) nfs_catnap(PZERO, error, "nfstrycl");
4447 		} while (error == NFSERR_DELAY);
4448 	}
4449 	return (error);
4450 }
4451 
4452 /*
4453  * Decide if a delegation on a file permits close without flushing writes
4454  * to the server. This might be a big performance win in some environments.
4455  * (Not useful until the client does caching on local stable storage.)
4456  */
4457 int
4458 nfscl_mustflush(vnode_t vp)
4459 {
4460 	struct nfsclclient *clp;
4461 	struct nfscldeleg *dp;
4462 	struct nfsnode *np;
4463 	struct nfsmount *nmp;
4464 
4465 	np = VTONFS(vp);
4466 	nmp = VFSTONFS(vp->v_mount);
4467 	if (!NFSHASNFSV4(nmp))
4468 		return (1);
4469 	NFSLOCKCLSTATE();
4470 	clp = nfscl_findcl(nmp);
4471 	if (clp == NULL) {
4472 		NFSUNLOCKCLSTATE();
4473 		return (1);
4474 	}
4475 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4476 	if (dp != NULL && (dp->nfsdl_flags &
4477 	    (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4478 	     NFSCLDL_WRITE &&
4479 	    (dp->nfsdl_sizelimit >= np->n_size ||
4480 	     !NFSHASSTRICT3530(nmp))) {
4481 		NFSUNLOCKCLSTATE();
4482 		return (0);
4483 	}
4484 	NFSUNLOCKCLSTATE();
4485 	return (1);
4486 }
4487 
4488 /*
4489  * See if a (write) delegation exists for this file.
4490  */
4491 int
4492 nfscl_nodeleg(vnode_t vp, int writedeleg)
4493 {
4494 	struct nfsclclient *clp;
4495 	struct nfscldeleg *dp;
4496 	struct nfsnode *np;
4497 	struct nfsmount *nmp;
4498 
4499 	np = VTONFS(vp);
4500 	nmp = VFSTONFS(vp->v_mount);
4501 	if (!NFSHASNFSV4(nmp))
4502 		return (1);
4503 	NFSLOCKCLSTATE();
4504 	clp = nfscl_findcl(nmp);
4505 	if (clp == NULL) {
4506 		NFSUNLOCKCLSTATE();
4507 		return (1);
4508 	}
4509 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4510 	if (dp != NULL &&
4511 	    (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4512 	    (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4513 	     NFSCLDL_WRITE)) {
4514 		NFSUNLOCKCLSTATE();
4515 		return (0);
4516 	}
4517 	NFSUNLOCKCLSTATE();
4518 	return (1);
4519 }
4520 
4521 /*
4522  * Look for an associated delegation that should be DelegReturned.
4523  */
4524 int
4525 nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4526 {
4527 	struct nfsclclient *clp;
4528 	struct nfscldeleg *dp;
4529 	struct nfsclowner *owp;
4530 	struct nfscllockowner *lp;
4531 	struct nfsmount *nmp;
4532 	struct ucred *cred;
4533 	struct nfsnode *np;
4534 	int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4535 
4536 	nmp = VFSTONFS(vp->v_mount);
4537 	np = VTONFS(vp);
4538 	NFSLOCKCLSTATE();
4539 	/*
4540 	 * Loop around waiting for:
4541 	 * - outstanding I/O operations on delegations to complete
4542 	 * - for a delegation on vp that has state, lock the client and
4543 	 *   do a recall
4544 	 * - return delegation with no state
4545 	 */
4546 	while (1) {
4547 		clp = nfscl_findcl(nmp);
4548 		if (clp == NULL) {
4549 			NFSUNLOCKCLSTATE();
4550 			return (retcnt);
4551 		}
4552 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4553 		    np->n_fhp->nfh_len);
4554 		if (dp != NULL) {
4555 		    /*
4556 		     * Wait for outstanding I/O ops to be done.
4557 		     */
4558 		    if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4559 			if (igotlock) {
4560 			    nfsv4_unlock(&clp->nfsc_lock, 0);
4561 			    igotlock = 0;
4562 			}
4563 			dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4564 			(void) nfsmsleep(&dp->nfsdl_rwlock,
4565 			    NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4566 			continue;
4567 		    }
4568 		    needsrecall = 0;
4569 		    LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4570 			if (!LIST_EMPTY(&owp->nfsow_open)) {
4571 			    needsrecall = 1;
4572 			    break;
4573 			}
4574 		    }
4575 		    if (!needsrecall) {
4576 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4577 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4578 				needsrecall = 1;
4579 				break;
4580 			    }
4581 			}
4582 		    }
4583 		    if (needsrecall && !triedrecall) {
4584 			dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4585 			islept = 0;
4586 			while (!igotlock) {
4587 			    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4588 				&islept, NFSCLSTATEMUTEXPTR, NULL);
4589 			    if (islept)
4590 				break;
4591 			}
4592 			if (islept)
4593 			    continue;
4594 			NFSUNLOCKCLSTATE();
4595 			cred = newnfs_getcred();
4596 			newnfs_copycred(&dp->nfsdl_cred, cred);
4597 			nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0, NULL);
4598 			NFSFREECRED(cred);
4599 			triedrecall = 1;
4600 			NFSLOCKCLSTATE();
4601 			nfsv4_unlock(&clp->nfsc_lock, 0);
4602 			igotlock = 0;
4603 			continue;
4604 		    }
4605 		    *stp = dp->nfsdl_stateid;
4606 		    retcnt = 1;
4607 		    nfscl_cleandeleg(dp);
4608 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4609 		}
4610 		if (igotlock)
4611 		    nfsv4_unlock(&clp->nfsc_lock, 0);
4612 		NFSUNLOCKCLSTATE();
4613 		return (retcnt);
4614 	}
4615 }
4616 
4617 /*
4618  * Look for associated delegation(s) that should be DelegReturned.
4619  */
4620 int
4621 nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4622     nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4623 {
4624 	struct nfsclclient *clp;
4625 	struct nfscldeleg *dp;
4626 	struct nfsclowner *owp;
4627 	struct nfscllockowner *lp;
4628 	struct nfsmount *nmp;
4629 	struct ucred *cred;
4630 	struct nfsnode *np;
4631 	int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4632 
4633 	nmp = VFSTONFS(fvp->v_mount);
4634 	*gotfdp = 0;
4635 	*gottdp = 0;
4636 	NFSLOCKCLSTATE();
4637 	/*
4638 	 * Loop around waiting for:
4639 	 * - outstanding I/O operations on delegations to complete
4640 	 * - for a delegation on fvp that has state, lock the client and
4641 	 *   do a recall
4642 	 * - return delegation(s) with no state.
4643 	 */
4644 	while (1) {
4645 		clp = nfscl_findcl(nmp);
4646 		if (clp == NULL) {
4647 			NFSUNLOCKCLSTATE();
4648 			return (retcnt);
4649 		}
4650 		np = VTONFS(fvp);
4651 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4652 		    np->n_fhp->nfh_len);
4653 		if (dp != NULL && *gotfdp == 0) {
4654 		    /*
4655 		     * Wait for outstanding I/O ops to be done.
4656 		     */
4657 		    if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4658 			if (igotlock) {
4659 			    nfsv4_unlock(&clp->nfsc_lock, 0);
4660 			    igotlock = 0;
4661 			}
4662 			dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4663 			(void) nfsmsleep(&dp->nfsdl_rwlock,
4664 			    NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4665 			continue;
4666 		    }
4667 		    needsrecall = 0;
4668 		    LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4669 			if (!LIST_EMPTY(&owp->nfsow_open)) {
4670 			    needsrecall = 1;
4671 			    break;
4672 			}
4673 		    }
4674 		    if (!needsrecall) {
4675 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4676 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4677 				needsrecall = 1;
4678 				break;
4679 			    }
4680 			}
4681 		    }
4682 		    if (needsrecall && !triedrecall) {
4683 			dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4684 			islept = 0;
4685 			while (!igotlock) {
4686 			    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4687 				&islept, NFSCLSTATEMUTEXPTR, NULL);
4688 			    if (islept)
4689 				break;
4690 			}
4691 			if (islept)
4692 			    continue;
4693 			NFSUNLOCKCLSTATE();
4694 			cred = newnfs_getcred();
4695 			newnfs_copycred(&dp->nfsdl_cred, cred);
4696 			nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0, NULL);
4697 			NFSFREECRED(cred);
4698 			triedrecall = 1;
4699 			NFSLOCKCLSTATE();
4700 			nfsv4_unlock(&clp->nfsc_lock, 0);
4701 			igotlock = 0;
4702 			continue;
4703 		    }
4704 		    *fstp = dp->nfsdl_stateid;
4705 		    retcnt++;
4706 		    *gotfdp = 1;
4707 		    nfscl_cleandeleg(dp);
4708 		    nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4709 		}
4710 		if (igotlock) {
4711 		    nfsv4_unlock(&clp->nfsc_lock, 0);
4712 		    igotlock = 0;
4713 		}
4714 		if (tvp != NULL) {
4715 		    np = VTONFS(tvp);
4716 		    dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4717 			np->n_fhp->nfh_len);
4718 		    if (dp != NULL && *gottdp == 0) {
4719 			/*
4720 			 * Wait for outstanding I/O ops to be done.
4721 			 */
4722 			if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4723 			    dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4724 			    (void) nfsmsleep(&dp->nfsdl_rwlock,
4725 				NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4726 			    continue;
4727 			}
4728 			LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4729 			    if (!LIST_EMPTY(&owp->nfsow_open)) {
4730 				NFSUNLOCKCLSTATE();
4731 				return (retcnt);
4732 			    }
4733 			}
4734 			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4735 			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4736 				NFSUNLOCKCLSTATE();
4737 				return (retcnt);
4738 			    }
4739 			}
4740 			*tstp = dp->nfsdl_stateid;
4741 			retcnt++;
4742 			*gottdp = 1;
4743 			nfscl_cleandeleg(dp);
4744 			nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4745 		    }
4746 		}
4747 		NFSUNLOCKCLSTATE();
4748 		return (retcnt);
4749 	}
4750 }
4751 
4752 /*
4753  * Get a reference on the clientid associated with the mount point.
4754  * Return 1 if success, 0 otherwise.
4755  */
4756 int
4757 nfscl_getref(struct nfsmount *nmp)
4758 {
4759 	struct nfsclclient *clp;
4760 
4761 	NFSLOCKCLSTATE();
4762 	clp = nfscl_findcl(nmp);
4763 	if (clp == NULL) {
4764 		NFSUNLOCKCLSTATE();
4765 		return (0);
4766 	}
4767 	nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL);
4768 	NFSUNLOCKCLSTATE();
4769 	return (1);
4770 }
4771 
4772 /*
4773  * Release a reference on a clientid acquired with the above call.
4774  */
4775 void
4776 nfscl_relref(struct nfsmount *nmp)
4777 {
4778 	struct nfsclclient *clp;
4779 
4780 	NFSLOCKCLSTATE();
4781 	clp = nfscl_findcl(nmp);
4782 	if (clp == NULL) {
4783 		NFSUNLOCKCLSTATE();
4784 		return;
4785 	}
4786 	nfsv4_relref(&clp->nfsc_lock);
4787 	NFSUNLOCKCLSTATE();
4788 }
4789 
4790 /*
4791  * Save the size attribute in the delegation, since the nfsnode
4792  * is going away.
4793  */
4794 void
4795 nfscl_reclaimnode(vnode_t vp)
4796 {
4797 	struct nfsclclient *clp;
4798 	struct nfscldeleg *dp;
4799 	struct nfsnode *np = VTONFS(vp);
4800 	struct nfsmount *nmp;
4801 
4802 	nmp = VFSTONFS(vp->v_mount);
4803 	if (!NFSHASNFSV4(nmp))
4804 		return;
4805 	NFSLOCKCLSTATE();
4806 	clp = nfscl_findcl(nmp);
4807 	if (clp == NULL) {
4808 		NFSUNLOCKCLSTATE();
4809 		return;
4810 	}
4811 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4812 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4813 		dp->nfsdl_size = np->n_size;
4814 	NFSUNLOCKCLSTATE();
4815 }
4816 
4817 /*
4818  * Get the saved size attribute in the delegation, since it is a
4819  * newly allocated nfsnode.
4820  */
4821 void
4822 nfscl_newnode(vnode_t vp)
4823 {
4824 	struct nfsclclient *clp;
4825 	struct nfscldeleg *dp;
4826 	struct nfsnode *np = VTONFS(vp);
4827 	struct nfsmount *nmp;
4828 
4829 	nmp = VFSTONFS(vp->v_mount);
4830 	if (!NFSHASNFSV4(nmp))
4831 		return;
4832 	NFSLOCKCLSTATE();
4833 	clp = nfscl_findcl(nmp);
4834 	if (clp == NULL) {
4835 		NFSUNLOCKCLSTATE();
4836 		return;
4837 	}
4838 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4839 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4840 		np->n_size = dp->nfsdl_size;
4841 	NFSUNLOCKCLSTATE();
4842 }
4843 
4844 /*
4845  * If there is a valid write delegation for this file, set the modtime
4846  * to the local clock time.
4847  */
4848 void
4849 nfscl_delegmodtime(vnode_t vp)
4850 {
4851 	struct nfsclclient *clp;
4852 	struct nfscldeleg *dp;
4853 	struct nfsnode *np = VTONFS(vp);
4854 	struct nfsmount *nmp;
4855 
4856 	nmp = VFSTONFS(vp->v_mount);
4857 	if (!NFSHASNFSV4(nmp))
4858 		return;
4859 	NFSLOCKCLSTATE();
4860 	clp = nfscl_findcl(nmp);
4861 	if (clp == NULL) {
4862 		NFSUNLOCKCLSTATE();
4863 		return;
4864 	}
4865 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4866 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4867 		nanotime(&dp->nfsdl_modtime);
4868 		dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4869 	}
4870 	NFSUNLOCKCLSTATE();
4871 }
4872 
4873 /*
4874  * If there is a valid write delegation for this file with a modtime set,
4875  * put that modtime in mtime.
4876  */
4877 void
4878 nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4879 {
4880 	struct nfsclclient *clp;
4881 	struct nfscldeleg *dp;
4882 	struct nfsnode *np = VTONFS(vp);
4883 	struct nfsmount *nmp;
4884 
4885 	nmp = VFSTONFS(vp->v_mount);
4886 	if (!NFSHASNFSV4(nmp))
4887 		return;
4888 	NFSLOCKCLSTATE();
4889 	clp = nfscl_findcl(nmp);
4890 	if (clp == NULL) {
4891 		NFSUNLOCKCLSTATE();
4892 		return;
4893 	}
4894 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4895 	if (dp != NULL &&
4896 	    (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4897 	    (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4898 		*mtime = dp->nfsdl_modtime;
4899 	NFSUNLOCKCLSTATE();
4900 }
4901 
4902 static int
4903 nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4904 {
4905 	short *defaulterrp, *errp;
4906 
4907 	if (!nd->nd_repstat)
4908 		return (0);
4909 	if (nd->nd_procnum == NFSPROC_NOOP)
4910 		return (txdr_unsigned(nd->nd_repstat & 0xffff));
4911 	if (nd->nd_repstat == EBADRPC)
4912 		return (txdr_unsigned(NFSERR_BADXDR));
4913 	if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
4914 	    nd->nd_repstat == NFSERR_OPILLEGAL)
4915 		return (txdr_unsigned(nd->nd_repstat));
4916 	if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
4917 	    minorvers > NFSV4_MINORVERSION) {
4918 		/* NFSv4.n error. */
4919 		return (txdr_unsigned(nd->nd_repstat));
4920 	}
4921 	if (nd->nd_procnum < NFSV4OP_CBNOPS)
4922 		errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
4923 	else
4924 		return (txdr_unsigned(nd->nd_repstat));
4925 	while (*++errp)
4926 		if (*errp == (short)nd->nd_repstat)
4927 			return (txdr_unsigned(nd->nd_repstat));
4928 	return (txdr_unsigned(*defaulterrp));
4929 }
4930 
4931 /*
4932  * Called to find/add a layout to a client.
4933  * This function returns the layout with a refcnt (shared lock) upon
4934  * success (returns 0) or with no lock/refcnt on the layout when an
4935  * error is returned.
4936  * If a layout is passed in via lypp, it is locked (exclusively locked).
4937  */
4938 int
4939 nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4940     nfsv4stateid_t *stateidp, int layouttype, int retonclose,
4941     struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
4942     struct ucred *cred, NFSPROC_T *p)
4943 {
4944 	struct nfsclclient *clp;
4945 	struct nfscllayout *lyp, *tlyp;
4946 	struct nfsclflayout *flp;
4947 	struct nfsnode *np = VTONFS(vp);
4948 	mount_t mp;
4949 	int layout_passed_in;
4950 
4951 	mp = nmp->nm_mountp;
4952 	layout_passed_in = 1;
4953 	tlyp = NULL;
4954 	lyp = *lypp;
4955 	if (lyp == NULL) {
4956 		layout_passed_in = 0;
4957 		tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
4958 		    M_WAITOK | M_ZERO);
4959 	}
4960 
4961 	NFSLOCKCLSTATE();
4962 	clp = nmp->nm_clp;
4963 	if (clp == NULL) {
4964 		if (layout_passed_in != 0)
4965 			nfsv4_unlock(&lyp->nfsly_lock, 0);
4966 		NFSUNLOCKCLSTATE();
4967 		if (tlyp != NULL)
4968 			free(tlyp, M_NFSLAYOUT);
4969 		return (EPERM);
4970 	}
4971 	if (lyp == NULL) {
4972 		/*
4973 		 * Although no lyp was passed in, another thread might have
4974 		 * allocated one. If one is found, just increment it's ref
4975 		 * count and return it.
4976 		 */
4977 		lyp = nfscl_findlayout(clp, fhp, fhlen);
4978 		if (lyp == NULL) {
4979 			lyp = tlyp;
4980 			tlyp = NULL;
4981 			lyp->nfsly_stateid.seqid = stateidp->seqid;
4982 			lyp->nfsly_stateid.other[0] = stateidp->other[0];
4983 			lyp->nfsly_stateid.other[1] = stateidp->other[1];
4984 			lyp->nfsly_stateid.other[2] = stateidp->other[2];
4985 			lyp->nfsly_lastbyte = 0;
4986 			LIST_INIT(&lyp->nfsly_flayread);
4987 			LIST_INIT(&lyp->nfsly_flayrw);
4988 			LIST_INIT(&lyp->nfsly_recall);
4989 			lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
4990 			lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
4991 			lyp->nfsly_clp = clp;
4992 			if (layouttype == NFSLAYOUT_FLEXFILE)
4993 				lyp->nfsly_flags = NFSLY_FLEXFILE;
4994 			else
4995 				lyp->nfsly_flags = NFSLY_FILES;
4996 			if (retonclose != 0)
4997 				lyp->nfsly_flags |= NFSLY_RETONCLOSE;
4998 			lyp->nfsly_fhlen = fhlen;
4999 			NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
5000 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5001 			LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
5002 			    nfsly_hash);
5003 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5004 			nfscl_layoutcnt++;
5005 		} else {
5006 			if (retonclose != 0)
5007 				lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5008 			if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5009 				lyp->nfsly_stateid.seqid = stateidp->seqid;
5010 			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5011 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5012 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5013 		}
5014 		nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5015 		if (NFSCL_FORCEDISM(mp)) {
5016 			NFSUNLOCKCLSTATE();
5017 			if (tlyp != NULL)
5018 				free(tlyp, M_NFSLAYOUT);
5019 			return (EPERM);
5020 		}
5021 		*lypp = lyp;
5022 	} else if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5023 		lyp->nfsly_stateid.seqid = stateidp->seqid;
5024 
5025 	/* Merge the new list of File Layouts into the list. */
5026 	flp = LIST_FIRST(fhlp);
5027 	if (flp != NULL) {
5028 		if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
5029 			nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
5030 		else
5031 			nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
5032 	}
5033 	if (layout_passed_in != 0)
5034 		nfsv4_unlock(&lyp->nfsly_lock, 1);
5035 	NFSUNLOCKCLSTATE();
5036 	if (tlyp != NULL)
5037 		free(tlyp, M_NFSLAYOUT);
5038 	return (0);
5039 }
5040 
5041 /*
5042  * Search for a layout by MDS file handle.
5043  * If one is found, it is returned with a refcnt (shared lock) iff
5044  * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
5045  * returned NULL.
5046  */
5047 struct nfscllayout *
5048 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
5049     uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
5050 {
5051 	struct nfscllayout *lyp;
5052 	mount_t mp;
5053 	int error, igotlock;
5054 
5055 	mp = clp->nfsc_nmp->nm_mountp;
5056 	*recalledp = 0;
5057 	*retflpp = NULL;
5058 	NFSLOCKCLSTATE();
5059 	lyp = nfscl_findlayout(clp, fhp, fhlen);
5060 	if (lyp != NULL) {
5061 		if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5062 			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5063 			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5064 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5065 			error = nfscl_findlayoutforio(lyp, off,
5066 			    NFSV4OPEN_ACCESSREAD, retflpp);
5067 			if (error == 0)
5068 				nfsv4_getref(&lyp->nfsly_lock, NULL,
5069 				    NFSCLSTATEMUTEXPTR, mp);
5070 			else {
5071 				do {
5072 					igotlock = nfsv4_lock(&lyp->nfsly_lock,
5073 					    1, NULL, NFSCLSTATEMUTEXPTR, mp);
5074 				} while (igotlock == 0 && !NFSCL_FORCEDISM(mp));
5075 				*retflpp = NULL;
5076 			}
5077 			if (NFSCL_FORCEDISM(mp)) {
5078 				lyp = NULL;
5079 				*recalledp = 1;
5080 			}
5081 		} else {
5082 			lyp = NULL;
5083 			*recalledp = 1;
5084 		}
5085 	}
5086 	NFSUNLOCKCLSTATE();
5087 	return (lyp);
5088 }
5089 
5090 /*
5091  * Search for a layout by MDS file handle. If one is found, mark in to be
5092  * recalled, if it already marked "return on close".
5093  */
5094 static void
5095 nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
5096     int fhlen, struct nfsclrecalllayout **recallpp)
5097 {
5098 	struct nfscllayout *lyp;
5099 	uint32_t iomode;
5100 
5101 	if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vp->v_mount)) ||
5102 	    nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5103 	    (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
5104 		return;
5105 	lyp = nfscl_findlayout(clp, fhp, fhlen);
5106 	if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
5107 	    NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
5108 		iomode = 0;
5109 		if (!LIST_EMPTY(&lyp->nfsly_flayread))
5110 			iomode |= NFSLAYOUTIOMODE_READ;
5111 		if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5112 			iomode |= NFSLAYOUTIOMODE_RW;
5113 		(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5114 		    0, UINT64_MAX, lyp->nfsly_stateid.seqid, 0, 0, NULL,
5115 		    *recallpp);
5116 		NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
5117 		*recallpp = NULL;
5118 	}
5119 }
5120 
5121 /*
5122  * Mark the layout to be recalled and with an error.
5123  * Also, disable the dsp from further use.
5124  */
5125 void
5126 nfscl_dserr(uint32_t op, uint32_t stat, struct nfscldevinfo *dp,
5127     struct nfscllayout *lyp, struct nfsclds *dsp)
5128 {
5129 	struct nfsclrecalllayout *recallp;
5130 	uint32_t iomode;
5131 
5132 	printf("DS being disabled, error=%d\n", stat);
5133 	/* Set up the return of the layout. */
5134 	recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
5135 	iomode = 0;
5136 	NFSLOCKCLSTATE();
5137 	if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5138 		if (!LIST_EMPTY(&lyp->nfsly_flayread))
5139 			iomode |= NFSLAYOUTIOMODE_READ;
5140 		if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5141 			iomode |= NFSLAYOUTIOMODE_RW;
5142 		(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5143 		    0, UINT64_MAX, lyp->nfsly_stateid.seqid, stat, op,
5144 		    dp->nfsdi_deviceid, recallp);
5145 		NFSUNLOCKCLSTATE();
5146 		NFSCL_DEBUG(4, "nfscl_dserr recall iomode=%d\n", iomode);
5147 	} else {
5148 		NFSUNLOCKCLSTATE();
5149 		free(recallp, M_NFSLAYRECALL);
5150 	}
5151 
5152 	/* And shut the TCP connection down. */
5153 	nfscl_cancelreqs(dsp);
5154 }
5155 
5156 /*
5157  * Cancel all RPCs for this "dsp" by closing the connection.
5158  * Also, mark the session as defunct.
5159  * If NFSCLDS_SAMECONN is set, the connection is shared with other DSs and
5160  * cannot be shut down.
5161  */
5162 void
5163 nfscl_cancelreqs(struct nfsclds *dsp)
5164 {
5165 	struct __rpc_client *cl;
5166 	static int non_event;
5167 
5168 	NFSLOCKDS(dsp);
5169 	if ((dsp->nfsclds_flags & (NFSCLDS_CLOSED | NFSCLDS_SAMECONN)) == 0 &&
5170 	    dsp->nfsclds_sockp != NULL &&
5171 	    dsp->nfsclds_sockp->nr_client != NULL) {
5172 		dsp->nfsclds_flags |= NFSCLDS_CLOSED;
5173 		cl = dsp->nfsclds_sockp->nr_client;
5174 		dsp->nfsclds_sess.nfsess_defunct = 1;
5175 		NFSUNLOCKDS(dsp);
5176 		CLNT_CLOSE(cl);
5177 		/*
5178 		 * This 1sec sleep is done to reduce the number of reconnect
5179 		 * attempts made on the DS while it has failed.
5180 		 */
5181 		tsleep(&non_event, PVFS, "ndscls", hz);
5182 		return;
5183 	}
5184 	NFSUNLOCKDS(dsp);
5185 }
5186 
5187 /*
5188  * Dereference a layout.
5189  */
5190 void
5191 nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
5192 {
5193 
5194 	NFSLOCKCLSTATE();
5195 	if (exclocked != 0)
5196 		nfsv4_unlock(&lyp->nfsly_lock, 0);
5197 	else
5198 		nfsv4_relref(&lyp->nfsly_lock);
5199 	NFSUNLOCKCLSTATE();
5200 }
5201 
5202 /*
5203  * Search for a devinfo by deviceid. If one is found, return it after
5204  * acquiring a reference count on it.
5205  */
5206 struct nfscldevinfo *
5207 nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
5208     struct nfscldevinfo *dip)
5209 {
5210 
5211 	NFSLOCKCLSTATE();
5212 	if (dip == NULL)
5213 		dip = nfscl_finddevinfo(clp, deviceid);
5214 	if (dip != NULL)
5215 		dip->nfsdi_refcnt++;
5216 	NFSUNLOCKCLSTATE();
5217 	return (dip);
5218 }
5219 
5220 /*
5221  * Dereference a devinfo structure.
5222  */
5223 static void
5224 nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
5225 {
5226 
5227 	dip->nfsdi_refcnt--;
5228 	if (dip->nfsdi_refcnt == 0)
5229 		wakeup(&dip->nfsdi_refcnt);
5230 }
5231 
5232 /*
5233  * Dereference a devinfo structure.
5234  */
5235 void
5236 nfscl_reldevinfo(struct nfscldevinfo *dip)
5237 {
5238 
5239 	NFSLOCKCLSTATE();
5240 	nfscl_reldevinfo_locked(dip);
5241 	NFSUNLOCKCLSTATE();
5242 }
5243 
5244 /*
5245  * Find a layout for this file handle. Return NULL upon failure.
5246  */
5247 static struct nfscllayout *
5248 nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5249 {
5250 	struct nfscllayout *lyp;
5251 
5252 	LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5253 		if (lyp->nfsly_fhlen == fhlen &&
5254 		    !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5255 			break;
5256 	return (lyp);
5257 }
5258 
5259 /*
5260  * Find a devinfo for this deviceid. Return NULL upon failure.
5261  */
5262 static struct nfscldevinfo *
5263 nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5264 {
5265 	struct nfscldevinfo *dip;
5266 
5267 	LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5268 		if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5269 		    == 0)
5270 			break;
5271 	return (dip);
5272 }
5273 
5274 /*
5275  * Merge the new file layout list into the main one, maintaining it in
5276  * increasing offset order.
5277  */
5278 static void
5279 nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5280     struct nfsclflayouthead *newfhlp)
5281 {
5282 	struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5283 
5284 	flp = LIST_FIRST(fhlp);
5285 	prevflp = NULL;
5286 	LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5287 		while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5288 			prevflp = flp;
5289 			flp = LIST_NEXT(flp, nfsfl_list);
5290 		}
5291 		if (prevflp == NULL)
5292 			LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5293 		else
5294 			LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5295 		prevflp = nflp;
5296 	}
5297 }
5298 
5299 /*
5300  * Add this nfscldevinfo to the client, if it doesn't already exist.
5301  * This function consumes the structure pointed at by dip, if not NULL.
5302  */
5303 int
5304 nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, int ind,
5305     struct nfsclflayout *flp)
5306 {
5307 	struct nfsclclient *clp;
5308 	struct nfscldevinfo *tdip;
5309 	uint8_t *dev;
5310 
5311 	NFSLOCKCLSTATE();
5312 	clp = nmp->nm_clp;
5313 	if (clp == NULL) {
5314 		NFSUNLOCKCLSTATE();
5315 		if (dip != NULL)
5316 			free(dip, M_NFSDEVINFO);
5317 		return (ENODEV);
5318 	}
5319 	if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5320 		dev = flp->nfsfl_dev;
5321 	else
5322 		dev = flp->nfsfl_ffm[ind].dev;
5323 	tdip = nfscl_finddevinfo(clp, dev);
5324 	if (tdip != NULL) {
5325 		tdip->nfsdi_layoutrefs++;
5326 		if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5327 			flp->nfsfl_devp = tdip;
5328 		else
5329 			flp->nfsfl_ffm[ind].devp = tdip;
5330 		nfscl_reldevinfo_locked(tdip);
5331 		NFSUNLOCKCLSTATE();
5332 		if (dip != NULL)
5333 			free(dip, M_NFSDEVINFO);
5334 		return (0);
5335 	}
5336 	if (dip != NULL) {
5337 		LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5338 		dip->nfsdi_layoutrefs = 1;
5339 		if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5340 			flp->nfsfl_devp = dip;
5341 		else
5342 			flp->nfsfl_ffm[ind].devp = dip;
5343 	}
5344 	NFSUNLOCKCLSTATE();
5345 	if (dip == NULL)
5346 		return (ENODEV);
5347 	return (0);
5348 }
5349 
5350 /*
5351  * Free up a layout structure and associated file layout structure(s).
5352  */
5353 void
5354 nfscl_freelayout(struct nfscllayout *layp)
5355 {
5356 	struct nfsclflayout *flp, *nflp;
5357 	struct nfsclrecalllayout *rp, *nrp;
5358 
5359 	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5360 		LIST_REMOVE(flp, nfsfl_list);
5361 		nfscl_freeflayout(flp);
5362 	}
5363 	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5364 		LIST_REMOVE(flp, nfsfl_list);
5365 		nfscl_freeflayout(flp);
5366 	}
5367 	LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5368 		LIST_REMOVE(rp, nfsrecly_list);
5369 		free(rp, M_NFSLAYRECALL);
5370 	}
5371 	nfscl_layoutcnt--;
5372 	free(layp, M_NFSLAYOUT);
5373 }
5374 
5375 /*
5376  * Free up a file layout structure.
5377  */
5378 void
5379 nfscl_freeflayout(struct nfsclflayout *flp)
5380 {
5381 	int i, j;
5382 
5383 	if ((flp->nfsfl_flags & NFSFL_FILE) != 0) {
5384 		for (i = 0; i < flp->nfsfl_fhcnt; i++)
5385 			free(flp->nfsfl_fh[i], M_NFSFH);
5386 		if (flp->nfsfl_devp != NULL)
5387 			flp->nfsfl_devp->nfsdi_layoutrefs--;
5388 	}
5389 	if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0)
5390 		for (i = 0; i < flp->nfsfl_mirrorcnt; i++) {
5391 			for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++)
5392 				free(flp->nfsfl_ffm[i].fh[j], M_NFSFH);
5393 			if (flp->nfsfl_ffm[i].devp != NULL)
5394 				flp->nfsfl_ffm[i].devp->nfsdi_layoutrefs--;
5395 		}
5396 	free(flp, M_NFSFLAYOUT);
5397 }
5398 
5399 /*
5400  * Free up a file layout devinfo structure.
5401  */
5402 void
5403 nfscl_freedevinfo(struct nfscldevinfo *dip)
5404 {
5405 
5406 	free(dip, M_NFSDEVINFO);
5407 }
5408 
5409 /*
5410  * Mark any layouts that match as recalled.
5411  */
5412 static int
5413 nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5414     uint64_t off, uint64_t len, uint32_t stateseqid, uint32_t stat, uint32_t op,
5415     char *devid, struct nfsclrecalllayout *recallp)
5416 {
5417 	struct nfsclrecalllayout *rp, *orp;
5418 
5419 	recallp->nfsrecly_recalltype = recalltype;
5420 	recallp->nfsrecly_iomode = iomode;
5421 	recallp->nfsrecly_stateseqid = stateseqid;
5422 	recallp->nfsrecly_off = off;
5423 	recallp->nfsrecly_len = len;
5424 	recallp->nfsrecly_stat = stat;
5425 	recallp->nfsrecly_op = op;
5426 	if (devid != NULL)
5427 		NFSBCOPY(devid, recallp->nfsrecly_devid, NFSX_V4DEVICEID);
5428 	/*
5429 	 * Order the list as file returns first, followed by fsid and any
5430 	 * returns, both in increasing stateseqid order.
5431 	 * Note that the seqids wrap around, so 1 is after 0xffffffff.
5432 	 * (I'm not sure this is correct because I find RFC5661 confusing
5433 	 *  on this, but hopefully it will work ok.)
5434 	 */
5435 	orp = NULL;
5436 	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5437 		orp = rp;
5438 		if ((recalltype == NFSLAYOUTRETURN_FILE &&
5439 		     (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5440 		      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5441 		    (recalltype != NFSLAYOUTRETURN_FILE &&
5442 		     rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5443 		     nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5444 			LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5445 			break;
5446 		}
5447 
5448 		/*
5449 		 * Put any error return on all the file returns that will
5450 		 * preceed this one.
5451 		 */
5452 		if (rp->nfsrecly_recalltype == NFSLAYOUTRETURN_FILE &&
5453 		   stat != 0 && rp->nfsrecly_stat == 0) {
5454 			rp->nfsrecly_stat = stat;
5455 			rp->nfsrecly_op = op;
5456 			if (devid != NULL)
5457 				NFSBCOPY(devid, rp->nfsrecly_devid,
5458 				    NFSX_V4DEVICEID);
5459 		}
5460 	}
5461 	if (rp == NULL) {
5462 		if (orp == NULL)
5463 			LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5464 			    nfsrecly_list);
5465 		else
5466 			LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5467 	}
5468 	lyp->nfsly_flags |= NFSLY_RECALL;
5469 	wakeup(lyp->nfsly_clp);
5470 	return (0);
5471 }
5472 
5473 /*
5474  * Compare the two seqids for ordering. The trick is that the seqids can
5475  * wrap around from 0xffffffff->0, so check for the cases where one
5476  * has wrapped around.
5477  * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5478  */
5479 static int
5480 nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5481 {
5482 
5483 	if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5484 		/* seqid2 has wrapped around. */
5485 		return (0);
5486 	if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5487 		/* seqid1 has wrapped around. */
5488 		return (1);
5489 	if (seqid1 <= seqid2)
5490 		return (1);
5491 	return (0);
5492 }
5493 
5494 /*
5495  * Do a layout return for each of the recalls.
5496  */
5497 static void
5498 nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5499     struct ucred *cred, NFSPROC_T *p)
5500 {
5501 	struct nfsclrecalllayout *rp;
5502 	nfsv4stateid_t stateid;
5503 	int layouttype;
5504 
5505 	NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5506 	stateid.seqid = lyp->nfsly_stateid.seqid;
5507 	if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5508 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5509 	else
5510 		layouttype = NFSLAYOUT_FLEXFILE;
5511 	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5512 		(void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5513 		    lyp->nfsly_fhlen, 0, layouttype,
5514 		    rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5515 		    rp->nfsrecly_off, rp->nfsrecly_len,
5516 		    &stateid, cred, p, rp->nfsrecly_stat, rp->nfsrecly_op,
5517 		    rp->nfsrecly_devid);
5518 	}
5519 }
5520 
5521 /*
5522  * Do the layout commit for a file layout.
5523  */
5524 static void
5525 nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5526     struct ucred *cred, NFSPROC_T *p)
5527 {
5528 	struct nfsclflayout *flp;
5529 	uint64_t len;
5530 	int error, layouttype;
5531 
5532 	if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5533 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5534 	else
5535 		layouttype = NFSLAYOUT_FLEXFILE;
5536 	LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5537 		if (layouttype == NFSLAYOUT_FLEXFILE &&
5538 		    (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) {
5539 			NFSCL_DEBUG(4, "Flex file: no layoutcommit\n");
5540 			/* If not supported, don't bother doing it. */
5541 			NFSLOCKMNT(nmp);
5542 			nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5543 			NFSUNLOCKMNT(nmp);
5544 			break;
5545 		} else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5546 			len = flp->nfsfl_end - flp->nfsfl_off;
5547 			error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5548 			    lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5549 			    lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5550 			    layouttype, cred, p, NULL);
5551 			NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5552 			if (error == NFSERR_NOTSUPP) {
5553 				/* If not supported, don't bother doing it. */
5554 				NFSLOCKMNT(nmp);
5555 				nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5556 				NFSUNLOCKMNT(nmp);
5557 				break;
5558 			}
5559 		}
5560 	}
5561 }
5562 
5563 /*
5564  * Commit all layouts for a file (vnode).
5565  */
5566 int
5567 nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5568 {
5569 	struct nfsclclient *clp;
5570 	struct nfscllayout *lyp;
5571 	struct nfsnode *np = VTONFS(vp);
5572 	mount_t mp;
5573 	struct nfsmount *nmp;
5574 
5575 	mp = vp->v_mount;
5576 	nmp = VFSTONFS(mp);
5577 	if (NFSHASNOLAYOUTCOMMIT(nmp))
5578 		return (0);
5579 	NFSLOCKCLSTATE();
5580 	clp = nmp->nm_clp;
5581 	if (clp == NULL) {
5582 		NFSUNLOCKCLSTATE();
5583 		return (EPERM);
5584 	}
5585 	lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5586 	if (lyp == NULL) {
5587 		NFSUNLOCKCLSTATE();
5588 		return (EPERM);
5589 	}
5590 	nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5591 	if (NFSCL_FORCEDISM(mp)) {
5592 		NFSUNLOCKCLSTATE();
5593 		return (EPERM);
5594 	}
5595 tryagain:
5596 	if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5597 		lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5598 		NFSUNLOCKCLSTATE();
5599 		NFSCL_DEBUG(4, "do layoutcommit2\n");
5600 		nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5601 		NFSLOCKCLSTATE();
5602 		goto tryagain;
5603 	}
5604 	nfsv4_relref(&lyp->nfsly_lock);
5605 	NFSUNLOCKCLSTATE();
5606 	return (0);
5607 }
5608