xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_client_state.c (revision f2211ffec9a7ac3c1efc6de9347072f816f10a60)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23   * Use is subject to license terms.
24   */
25  
26  /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27  /* All Rights Reserved */
28  
29  
30  #include <nfs/nfs4_clnt.h>
31  #include <nfs/rnode4.h>
32  #include <sys/systm.h>
33  #include <sys/cmn_err.h>
34  #include <sys/atomic.h>
35  
36  static void	nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *);
37  static nfs4_open_owner_t *find_freed_open_owner(cred_t *,
38  				nfs4_oo_hash_bucket_t *, mntinfo4_t *);
39  static open_delegation_type4 get_dtype(rnode4_t *);
40  
41  #ifdef DEBUG
42  int nfs4_client_foo_debug = 0x0;
43  int nfs4_client_open_dg = 0x0;
44  /*
45   * If this is non-zero, the lockowner and openowner seqid sync primitives
46   * will intermittently return errors.
47   */
48  static int seqid_sync_faults = 0;
49  #endif
50  
51  stateid4 clnt_special0 = {
52  	0,
53  	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
54  };
55  
56  stateid4 clnt_special1 = {
57  	0xffffffff,
58  	{
59  		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
60  		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
61  		(char)0xff, (char)0xff, (char)0xff, (char)0xff
62  	}
63  };
64  
65  /* finds hash bucket and locks it */
66  static nfs4_oo_hash_bucket_t *
lock_bucket(cred_t * cr,mntinfo4_t * mi)67  lock_bucket(cred_t *cr, mntinfo4_t *mi)
68  {
69  	nfs4_oo_hash_bucket_t *bucketp;
70  	uint32_t hash_key;
71  
72  	hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr))
73  	    % NFS4_NUM_OO_BUCKETS;
74  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: "
75  	    "hash_key %d for cred %p", hash_key, (void*)cr));
76  
77  	ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS);
78  	ASSERT(mi != NULL);
79  	ASSERT(mutex_owned(&mi->mi_lock));
80  
81  	bucketp = &(mi->mi_oo_list[hash_key]);
82  	mutex_enter(&bucketp->b_lock);
83  	return (bucketp);
84  }
85  
86  /* unlocks hash bucket pointed by bucket_ptr */
87  static void
unlock_bucket(nfs4_oo_hash_bucket_t * bucketp)88  unlock_bucket(nfs4_oo_hash_bucket_t *bucketp)
89  {
90  	mutex_exit(&bucketp->b_lock);
91  }
92  
93  /*
94   * Removes the lock owner from the rnode's lock_owners list and frees the
95   * corresponding reference.
96   */
97  void
nfs4_rnode_remove_lock_owner(rnode4_t * rp,nfs4_lock_owner_t * lop)98  nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop)
99  {
100  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
101  	    "nfs4_rnode_remove_lock_owner"));
102  
103  	mutex_enter(&rp->r_statev4_lock);
104  
105  	if (lop->lo_next_rnode == NULL) {
106  		/* already removed from list */
107  		mutex_exit(&rp->r_statev4_lock);
108  		return;
109  	}
110  
111  	ASSERT(lop->lo_prev_rnode != NULL);
112  
113  	lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
114  	lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
115  
116  	lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
117  
118  	mutex_exit(&rp->r_statev4_lock);
119  
120  	/*
121  	 * This would be an appropriate place for
122  	 * RELEASE_LOCKOWNER.  For now, this is overkill
123  	 * because in the common case, close is going to
124  	 * release any lockowners anyway.
125  	 */
126  	lock_owner_rele(lop);
127  }
128  
129  /*
130   * Remove all lock owners from the rnode's lock_owners list.  Frees up
131   * their references from the list.
132   */
133  
134  void
nfs4_flush_lock_owners(rnode4_t * rp)135  nfs4_flush_lock_owners(rnode4_t *rp)
136  {
137  	nfs4_lock_owner_t *lop;
138  
139  	mutex_enter(&rp->r_statev4_lock);
140  	while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) {
141  		lop = rp->r_lo_head.lo_next_rnode;
142  		lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
143  		lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
144  		lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
145  		lock_owner_rele(lop);
146  	}
147  	mutex_exit(&rp->r_statev4_lock);
148  }
149  
150  void
nfs4_clear_open_streams(rnode4_t * rp)151  nfs4_clear_open_streams(rnode4_t *rp)
152  {
153  	nfs4_open_stream_t *osp;
154  
155  	mutex_enter(&rp->r_os_lock);
156  	while ((osp = list_head(&rp->r_open_streams)) != NULL) {
157  		open_owner_rele(osp->os_open_owner);
158  		list_remove(&rp->r_open_streams, osp);
159  		mutex_destroy(&osp->os_sync_lock);
160  		osp->os_open_owner = NULL;
161  		kmem_free(osp, sizeof (*osp));
162  	}
163  	mutex_exit(&rp->r_os_lock);
164  }
165  
166  void
open_owner_hold(nfs4_open_owner_t * oop)167  open_owner_hold(nfs4_open_owner_t *oop)
168  {
169  	mutex_enter(&oop->oo_lock);
170  	oop->oo_ref_count++;
171  	mutex_exit(&oop->oo_lock);
172  }
173  
174  /*
175   * Frees the open owner if the ref count hits zero.
176   */
177  void
open_owner_rele(nfs4_open_owner_t * oop)178  open_owner_rele(nfs4_open_owner_t *oop)
179  {
180  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
181  	    "open_owner_rele"));
182  
183  	mutex_enter(&oop->oo_lock);
184  	oop->oo_ref_count--;
185  	if (oop->oo_ref_count == 0) {
186  		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
187  		    "open_owner_rele: freeing open owner"));
188  		oop->oo_valid = 0;
189  		mutex_exit(&oop->oo_lock);
190  		/*
191  		 * Ok, we don't destroy the open owner, nor do we put it on
192  		 * the mntinfo4's free list just yet.  We are lazy about it
193  		 * and let callers to find_open_owner() do that to keep locking
194  		 * simple.
195  		 */
196  	} else {
197  		mutex_exit(&oop->oo_lock);
198  	}
199  }
200  
201  void
open_stream_hold(nfs4_open_stream_t * osp)202  open_stream_hold(nfs4_open_stream_t *osp)
203  {
204  	mutex_enter(&osp->os_sync_lock);
205  	osp->os_ref_count++;
206  	mutex_exit(&osp->os_sync_lock);
207  }
208  
209  /*
210   * Frees the open stream and removes it from the rnode4's open streams list if
211   * the ref count drops to zero.
212   */
213  void
open_stream_rele(nfs4_open_stream_t * osp,rnode4_t * rp)214  open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp)
215  {
216  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
217  	    "open_stream_rele"));
218  
219  	ASSERT(!mutex_owned(&rp->r_os_lock));
220  
221  	mutex_enter(&osp->os_sync_lock);
222  	ASSERT(osp->os_ref_count > 0);
223  	osp->os_ref_count--;
224  	if (osp->os_ref_count == 0) {
225  		nfs4_open_owner_t *tmp_oop;
226  
227  		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
228  		    "open_stream_rele: freeing open stream"));
229  		osp->os_valid = 0;
230  		tmp_oop = osp->os_open_owner;
231  		mutex_exit(&osp->os_sync_lock);
232  
233  		/* now see if we need to destroy the open owner */
234  		open_owner_rele(tmp_oop);
235  
236  		mutex_enter(&rp->r_os_lock);
237  		list_remove(&rp->r_open_streams, osp);
238  		mutex_exit(&rp->r_os_lock);
239  
240  		/* free up osp */
241  		mutex_destroy(&osp->os_sync_lock);
242  		osp->os_open_owner = NULL;
243  		kmem_free(osp, sizeof (*osp));
244  	} else {
245  		mutex_exit(&osp->os_sync_lock);
246  	}
247  }
248  
249  void
lock_owner_hold(nfs4_lock_owner_t * lop)250  lock_owner_hold(nfs4_lock_owner_t *lop)
251  {
252  	mutex_enter(&lop->lo_lock);
253  	lop->lo_ref_count++;
254  	mutex_exit(&lop->lo_lock);
255  }
256  
257  /*
258   * Frees the lock owner if the ref count hits zero and
259   * the structure no longer has no locks.
260   */
261  void
lock_owner_rele(nfs4_lock_owner_t * lop)262  lock_owner_rele(nfs4_lock_owner_t *lop)
263  {
264  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
265  	    "lock_owner_rele"));
266  
267  	mutex_enter(&lop->lo_lock);
268  	lop->lo_ref_count--;
269  	if (lop->lo_ref_count == 0) {
270  		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
271  		    "lock_owner_rele: freeing lock owner: "
272  		    "%x", lop->lo_pid));
273  		lop->lo_valid = 0;
274  		/*
275  		 * If there are no references, the lock_owner should
276  		 * already be off the rnode's list.
277  		 */
278  		ASSERT(lop->lo_next_rnode == NULL);
279  		ASSERT(lop->lo_prev_rnode == NULL);
280  		ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE));
281  		ASSERT(lop->lo_seqid_holder == NULL);
282  		mutex_exit(&lop->lo_lock);
283  
284  		/* free up lop */
285  		cv_destroy(&lop->lo_cv_seqid_sync);
286  		mutex_destroy(&lop->lo_lock);
287  		kmem_free(lop, sizeof (*lop));
288  	} else {
289  		mutex_exit(&lop->lo_lock);
290  	}
291  }
292  
293  /*
294   * This increments the open owner ref count if found.
295   * The argument 'just_created' determines whether we are looking for open
296   * owners with the 'oo_just_created' flag set or not.
297   */
298  nfs4_open_owner_t *
find_open_owner_nolock(cred_t * cr,int just_created,mntinfo4_t * mi)299  find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi)
300  {
301  	nfs4_open_owner_t	*oop = NULL, *next_oop;
302  	nfs4_oo_hash_bucket_t	*bucketp;
303  
304  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
305  	    "find_open_owner: cred %p, just_created %d",
306  	    (void*)cr, just_created));
307  
308  	ASSERT(mi != NULL);
309  	ASSERT(mutex_owned(&mi->mi_lock));
310  
311  	bucketp = lock_bucket(cr, mi);
312  
313  	/* got hash bucket, search through open owners */
314  	for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) {
315  		mutex_enter(&oop->oo_lock);
316  		if (!crcmp(oop->oo_cred, cr) &&
317  		    (oop->oo_just_created == just_created ||
318  		    just_created == NFS4_JUST_CREATED)) {
319  			/* match */
320  			if (oop->oo_valid == 0) {
321  				/* reactivate the open owner */
322  				oop->oo_valid = 1;
323  				ASSERT(oop->oo_ref_count == 0);
324  			}
325  			oop->oo_ref_count++;
326  			mutex_exit(&oop->oo_lock);
327  			unlock_bucket(bucketp);
328  			return (oop);
329  		}
330  		next_oop = list_next(&bucketp->b_oo_hash_list, oop);
331  		if (oop->oo_valid == 0) {
332  			list_remove(&bucketp->b_oo_hash_list, oop);
333  
334  			/*
335  			 * Now we go ahead and put this open owner
336  			 * on the freed list.  This is our lazy method.
337  			 */
338  			nfs4_free_open_owner(oop, mi);
339  		}
340  
341  		mutex_exit(&oop->oo_lock);
342  		oop = next_oop;
343  	}
344  
345  	/* search through recently freed open owners */
346  	oop = find_freed_open_owner(cr, bucketp, mi);
347  
348  	unlock_bucket(bucketp);
349  
350  	return (oop);
351  }
352  
353  nfs4_open_owner_t *
find_open_owner(cred_t * cr,int just_created,mntinfo4_t * mi)354  find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi)
355  {
356  	nfs4_open_owner_t *oop;
357  
358  	mutex_enter(&mi->mi_lock);
359  	oop = find_open_owner_nolock(cr, just_created, mi);
360  	mutex_exit(&mi->mi_lock);
361  
362  	return (oop);
363  }
364  
365  /*
366   * This increments osp's ref count if found.
367   * Returns with 'os_sync_lock' held.
368   */
369  nfs4_open_stream_t *
find_open_stream(nfs4_open_owner_t * oop,rnode4_t * rp)370  find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
371  {
372  	nfs4_open_stream_t	*osp;
373  
374  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
375  	    "find_open_stream"));
376  
377  	mutex_enter(&rp->r_os_lock);
378  	/* Now, no one can add or delete to rp's open streams list */
379  	for (osp = list_head(&rp->r_open_streams); osp != NULL;
380  	    osp = list_next(&rp->r_open_streams, osp)) {
381  		mutex_enter(&osp->os_sync_lock);
382  		if (osp->os_open_owner == oop && osp->os_valid != 0) {
383  			/* match */
384  			NFS4_DEBUG(nfs4_client_state_debug,
385  			    (CE_NOTE, "find_open_stream "
386  			    "got a match"));
387  
388  			osp->os_ref_count++;
389  			mutex_exit(&rp->r_os_lock);
390  			return (osp);
391  		}
392  		mutex_exit(&osp->os_sync_lock);
393  	}
394  
395  	mutex_exit(&rp->r_os_lock);
396  	return (NULL);
397  }
398  
399  /*
400   * Find the lock owner for the given file and process ID.  If "which" is
401   * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid
402   * from the server.
403   *
404   * This increments the lock owner's ref count if found.  Returns NULL if
405   * there was no match.
406   */
407  nfs4_lock_owner_t *
find_lock_owner(rnode4_t * rp,pid_t pid,lown_which_t which)408  find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which)
409  {
410  	nfs4_lock_owner_t	*lop, *next_lop;
411  
412  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
413  	    "find_lock_owner: pid %x, which %d", pid, which));
414  
415  	ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID);
416  
417  	/* search by pid */
418  	mutex_enter(&rp->r_statev4_lock);
419  
420  	lop = rp->r_lo_head.lo_next_rnode;
421  	while (lop != &rp->r_lo_head) {
422  		mutex_enter(&lop->lo_lock);
423  		if (lop->lo_pid == pid && lop->lo_valid != 0 &&
424  		    !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) {
425  			if (which == LOWN_ANY ||
426  			    lop->lo_just_created != NFS4_JUST_CREATED) {
427  				/* Found a matching lock owner */
428  				NFS4_DEBUG(nfs4_client_state_debug,
429  				    (CE_NOTE, "find_lock_owner: "
430  				    "got a match"));
431  
432  				lop->lo_ref_count++;
433  				mutex_exit(&lop->lo_lock);
434  				mutex_exit(&rp->r_statev4_lock);
435  				return (lop);
436  			}
437  		}
438  		next_lop = lop->lo_next_rnode;
439  		mutex_exit(&lop->lo_lock);
440  		lop = next_lop;
441  	}
442  
443  	mutex_exit(&rp->r_statev4_lock);
444  	return (NULL);
445  }
446  
447  /*
448   * This returns the delegation stateid as 'sid'. Returns 1 if a successful
449   * delegation stateid was found, otherwise returns 0.
450   */
451  
452  static int
nfs4_get_deleg_stateid(rnode4_t * rp,nfs_opnum4 op,stateid4 * sid)453  nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid)
454  {
455  	ASSERT(!mutex_owned(&rp->r_statev4_lock));
456  
457  	mutex_enter(&rp->r_statev4_lock);
458  	if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) ||
459  	    (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) &&
460  	    !rp->r_deleg_return_pending) {
461  
462  		*sid = rp->r_deleg_stateid;
463  		mutex_exit(&rp->r_statev4_lock);
464  		return (1);
465  	}
466  	mutex_exit(&rp->r_statev4_lock);
467  	return (0);
468  }
469  
470  /*
471   * This returns the lock stateid as 'sid'. Returns 1 if a successful lock
472   * stateid was found, otherwise returns 0.
473   */
474  static int
nfs4_get_lock_stateid(rnode4_t * rp,pid_t pid,stateid4 * sid)475  nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid)
476  {
477  	nfs4_lock_owner_t *lop;
478  
479  	lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
480  
481  	if (lop) {
482  		/*
483  		 * Found a matching lock owner, so use a lock
484  		 * stateid rather than an open stateid.
485  		 */
486  		mutex_enter(&lop->lo_lock);
487  		*sid = lop->lock_stateid;
488  		mutex_exit(&lop->lo_lock);
489  		lock_owner_rele(lop);
490  		return (1);
491  	}
492  
493  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
494  	    "nfs4_get_lock_stateid: no lop"));
495  	return (0);
496  }
497  
498  /*
499   * This returns the open stateid as 'sid'. Returns 1 if a successful open
500   * stateid was found, otherwise returns 0.
501   *
502   * Once the stateid is returned to the caller, it is no longer protected;
503   * so the caller must be prepared to handle OLD/BAD_STATEID where
504   * appropiate.
505   */
506  static int
nfs4_get_open_stateid(rnode4_t * rp,cred_t * cr,mntinfo4_t * mi,stateid4 * sid)507  nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid)
508  {
509  	nfs4_open_owner_t *oop;
510  	nfs4_open_stream_t *osp;
511  
512  	ASSERT(mi != NULL);
513  
514  	oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
515  	if (!oop) {
516  		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
517  		    "nfs4_get_open_stateid: no oop"));
518  		return (0);
519  	}
520  
521  	osp = find_open_stream(oop, rp);
522  	open_owner_rele(oop);
523  	if (!osp) {
524  		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
525  		    "nfs4_get_open_stateid: no osp"));
526  		return (0);
527  	}
528  
529  	if (osp->os_failed_reopen) {
530  		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
531  		    "nfs4_get_open_stateid: osp %p failed reopen",
532  		    (void *)osp));
533  		mutex_exit(&osp->os_sync_lock);
534  		open_stream_rele(osp, rp);
535  		return (0);
536  	}
537  	*sid = osp->open_stateid;
538  	mutex_exit(&osp->os_sync_lock);
539  	open_stream_rele(osp, rp);
540  	return (1);
541  }
542  
543  /*
544   * Returns the delegation stateid if this 'op' is OP_WRITE and the
545   * delegation we hold is a write delegation, OR this 'op' is not
546   * OP_WRITE and we have a delegation held (read or write), otherwise
547   * returns the lock stateid if there is a lock owner, otherwise
548   * returns the open stateid if there is a open stream, otherwise
549   * returns special stateid <seqid = 0, other = 0>.
550   *
551   * Used for WRITE operations.
552   */
553  stateid4
nfs4_get_w_stateid(cred_t * cr,rnode4_t * rp,pid_t pid,mntinfo4_t * mi,nfs_opnum4 op,nfs4_stateid_types_t * sid_tp)554  nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
555      nfs_opnum4 op, nfs4_stateid_types_t *sid_tp)
556  {
557  	stateid4 sid;
558  
559  	if (nfs4_get_deleg_stateid(rp, op, &sid)) {
560  		if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
561  			sid_tp->cur_sid_type = DEL_SID;
562  			return (sid);
563  		}
564  	}
565  	if (nfs4_get_lock_stateid(rp, pid, &sid)) {
566  		if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
567  			sid_tp->cur_sid_type = LOCK_SID;
568  			return (sid);
569  		}
570  	}
571  	if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
572  		if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
573  			sid_tp->cur_sid_type = OPEN_SID;
574  			return (sid);
575  		}
576  	}
577  	bzero(&sid, sizeof (stateid4));
578  	sid_tp->cur_sid_type = SPEC_SID;
579  	return (sid);
580  }
581  
582  /*
583   * Returns the delegation stateid if this 'op' is OP_WRITE and the
584   * delegation we hold is a write delegation, OR this 'op' is not
585   * OP_WRITE and we have a delegation held (read or write), otherwise
586   * returns the lock stateid if there is a lock owner, otherwise
587   * returns the open stateid if there is a open stream, otherwise
588   * returns special stateid <seqid = 0, other = 0>.
589   *
590   * This also updates which stateid we are using in 'sid_tp', skips
591   * previously attempted stateids, and skips checking higher priority
592   * stateids than the current level as dictated by 'sid_tp->cur_sid_type'
593   * for async reads.
594   *
595   * Used for READ and SETATTR operations.
596   */
597  stateid4
nfs4_get_stateid(cred_t * cr,rnode4_t * rp,pid_t pid,mntinfo4_t * mi,nfs_opnum4 op,nfs4_stateid_types_t * sid_tp,bool_t async_read)598  nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
599      nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read)
600  {
601  	stateid4 sid;
602  
603  	/*
604  	 * For asynchronous READs, do not attempt to retry from the start of
605  	 * the stateid priority list, just continue from where you last left
606  	 * off.
607  	 */
608  	if (async_read) {
609  		switch (sid_tp->cur_sid_type) {
610  		case NO_SID:
611  			break;
612  		case DEL_SID:
613  			goto lock_stateid;
614  		case LOCK_SID:
615  			goto open_stateid;
616  		case OPEN_SID:
617  			goto special_stateid;
618  		case SPEC_SID:
619  		default:
620  			cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current "
621  			    "stateid type %d", sid_tp->cur_sid_type);
622  		}
623  	}
624  
625  	if (nfs4_get_deleg_stateid(rp, op, &sid)) {
626  		if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
627  			sid_tp->cur_sid_type = DEL_SID;
628  			return (sid);
629  		}
630  	}
631  lock_stateid:
632  	if (nfs4_get_lock_stateid(rp, pid, &sid)) {
633  		if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
634  			sid_tp->cur_sid_type = LOCK_SID;
635  			return (sid);
636  		}
637  	}
638  open_stateid:
639  	if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
640  		if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
641  			sid_tp->cur_sid_type = OPEN_SID;
642  			return (sid);
643  		}
644  	}
645  special_stateid:
646  	bzero(&sid, sizeof (stateid4));
647  	sid_tp->cur_sid_type = SPEC_SID;
648  	return	(sid);
649  }
650  
651  void
nfs4_set_lock_stateid(nfs4_lock_owner_t * lop,stateid4 stateid)652  nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid)
653  {
654  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
655  	    "nfs4_set_lock_stateid"));
656  
657  	ASSERT(lop);
658  	ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
659  
660  	mutex_enter(&lop->lo_lock);
661  	lop->lock_stateid = stateid;
662  	mutex_exit(&lop->lo_lock);
663  }
664  
665  /*
666   * Sequence number used when a new open owner is needed.
667   * This is used so as to not confuse the server.  Since a open owner
668   * is based off of cred, a cred could be re-used quickly, and the server
669   * may not release all state for a cred.
670   */
671  static uint64_t open_owner_seq_num = 0;
672  
673  uint64_t
nfs4_get_new_oo_name(void)674  nfs4_get_new_oo_name(void)
675  {
676  	return (atomic_inc_64_nv(&open_owner_seq_num));
677  }
678  
679  /*
680   * Create a new open owner and add it to the open owner hash table.
681   */
682  nfs4_open_owner_t *
create_open_owner(cred_t * cr,mntinfo4_t * mi)683  create_open_owner(cred_t *cr, mntinfo4_t *mi)
684  {
685  	nfs4_open_owner_t	*oop;
686  	nfs4_oo_hash_bucket_t	*bucketp;
687  
688  	oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP);
689  	/*
690  	 * Make sure the cred doesn't go away when we put this open owner
691  	 * on the free list, as well as make crcmp() a valid check.
692  	 */
693  	crhold(cr);
694  	oop->oo_cred = cr;
695  	mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL);
696  	oop->oo_ref_count = 1;
697  	oop->oo_valid = 1;
698  	oop->oo_just_created = NFS4_JUST_CREATED;
699  	oop->oo_seqid = 0;
700  	oop->oo_seqid_inuse = 0;
701  	oop->oo_last_good_seqid = 0;
702  	oop->oo_last_good_op = TAG_NONE;
703  	oop->oo_cred_otw = NULL;
704  	cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
705  
706  	/*
707  	 * A Solaris open_owner is <oo_seq_num>
708  	 */
709  	oop->oo_name = nfs4_get_new_oo_name();
710  
711  	/* now add the struct into the cred hash table */
712  	ASSERT(mutex_owned(&mi->mi_lock));
713  	bucketp = lock_bucket(cr, mi);
714  	list_insert_head(&bucketp->b_oo_hash_list, oop);
715  	unlock_bucket(bucketp);
716  
717  	return (oop);
718  }
719  
720  /*
721   * Create a new open stream and it to the rnode's list.
722   * Increments the ref count on oop.
723   * Returns with 'os_sync_lock' held.
724   */
725  nfs4_open_stream_t *
create_open_stream(nfs4_open_owner_t * oop,rnode4_t * rp)726  create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
727  {
728  	nfs4_open_stream_t	*osp;
729  
730  #ifdef DEBUG
731  	mutex_enter(&oop->oo_lock);
732  	ASSERT(oop->oo_seqid_inuse);
733  	mutex_exit(&oop->oo_lock);
734  #endif
735  
736  	osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP);
737  	osp->os_open_ref_count = 1;
738  	osp->os_mapcnt = 0;
739  	osp->os_ref_count = 2;
740  	osp->os_valid = 1;
741  	osp->os_open_owner = oop;
742  	osp->os_orig_oo_name = oop->oo_name;
743  	bzero(&osp->open_stateid, sizeof (stateid4));
744  	osp->os_share_acc_read = 0;
745  	osp->os_share_acc_write = 0;
746  	osp->os_mmap_read = 0;
747  	osp->os_mmap_write = 0;
748  	osp->os_share_deny_none = 0;
749  	osp->os_share_deny_read = 0;
750  	osp->os_share_deny_write = 0;
751  	osp->os_delegation = 0;
752  	osp->os_dc_openacc = 0;
753  	osp->os_final_close = 0;
754  	osp->os_pending_close = 0;
755  	osp->os_failed_reopen = 0;
756  	osp->os_force_close = 0;
757  	mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL);
758  
759  	/* open owner gets a reference */
760  	open_owner_hold(oop);
761  
762  	/* now add the open stream to rp */
763  	mutex_enter(&rp->r_os_lock);
764  	mutex_enter(&osp->os_sync_lock);
765  	list_insert_head(&rp->r_open_streams, osp);
766  	mutex_exit(&rp->r_os_lock);
767  
768  	return (osp);
769  }
770  
771  /*
772   * Returns an open stream with 'os_sync_lock' held.
773   * If the open stream is found (rather than created), its
774   * 'os_open_ref_count' is bumped.
775   *
776   * There is no race with two threads entering this function
777   * and creating two open streams for the same <oop, rp> pair.
778   * This is because the open seqid sync must be acquired, thus
779   * only allowing one thread in at a time.
780   */
781  nfs4_open_stream_t *
find_or_create_open_stream(nfs4_open_owner_t * oop,rnode4_t * rp,int * created_osp)782  find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp,
783      int *created_osp)
784  {
785  	nfs4_open_stream_t *osp;
786  
787  #ifdef DEBUG
788  	mutex_enter(&oop->oo_lock);
789  	ASSERT(oop->oo_seqid_inuse);
790  	mutex_exit(&oop->oo_lock);
791  #endif
792  
793  	osp = find_open_stream(oop, rp);
794  	if (!osp) {
795  		osp = create_open_stream(oop, rp);
796  		if (osp)
797  			*created_osp = 1;
798  	} else {
799  		*created_osp = 0;
800  		osp->os_open_ref_count++;
801  	}
802  
803  	return (osp);
804  }
805  
806  static uint64_t lock_owner_seq_num = 0;
807  
808  /*
809   * Create a new lock owner and add it to the rnode's list.
810   * Assumes the rnode's r_statev4_lock is held.
811   * The created lock owner has a reference count of 2: one for the list and
812   * one for the caller to use.  Returns the lock owner locked down.
813   */
814  nfs4_lock_owner_t *
create_lock_owner(rnode4_t * rp,pid_t pid)815  create_lock_owner(rnode4_t *rp, pid_t pid)
816  {
817  	nfs4_lock_owner_t	*lop;
818  
819  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
820  	    "create_lock_owner: pid %x", pid));
821  
822  	ASSERT(mutex_owned(&rp->r_statev4_lock));
823  
824  	lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP);
825  	lop->lo_ref_count = 2;
826  	lop->lo_valid = 1;
827  	bzero(&lop->lock_stateid, sizeof (stateid4));
828  	lop->lo_pid = pid;
829  	lop->lock_seqid = 0;
830  	lop->lo_pending_rqsts = 0;
831  	lop->lo_just_created = NFS4_JUST_CREATED;
832  	lop->lo_flags = 0;
833  	lop->lo_seqid_holder = NULL;
834  
835  	/*
836  	 * A Solaris lock_owner is <seq_num><pid>
837  	 */
838  	lop->lock_owner_name.ln_seq_num =
839  	    atomic_inc_64_nv(&lock_owner_seq_num);
840  	lop->lock_owner_name.ln_pid = pid;
841  
842  	cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
843  	mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL);
844  
845  	mutex_enter(&lop->lo_lock);
846  
847  	/* now add the lock owner to rp */
848  	lop->lo_prev_rnode = &rp->r_lo_head;
849  	lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode;
850  	rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop;
851  	rp->r_lo_head.lo_next_rnode = lop;
852  
853  	return (lop);
854  
855  }
856  
857  /*
858   * This sets the lock seqid of a lock owner.
859   */
860  void
nfs4_set_lock_seqid(seqid4 seqid,nfs4_lock_owner_t * lop)861  nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop)
862  {
863  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
864  	    "nfs4_set_lock_seqid"));
865  
866  	ASSERT(lop != NULL);
867  	ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
868  
869  	lop->lock_seqid = seqid;
870  }
871  
872  static void
nfs4_set_new_lock_owner_args(lock_owner4 * owner,pid_t pid)873  nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid)
874  {
875  	nfs4_lo_name_t *cast_namep;
876  
877  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
878  	    "nfs4_set_new_lock_owner_args"));
879  
880  	owner->owner_len = sizeof (*cast_namep);
881  	owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
882  	/*
883  	 * A Solaris lock_owner is <seq_num><pid>
884  	 */
885  	cast_namep = (nfs4_lo_name_t *)owner->owner_val;
886  	cast_namep->ln_seq_num = atomic_inc_64_nv(&lock_owner_seq_num);
887  	cast_namep->ln_pid = pid;
888  }
889  
890  /*
891   * Fill in the lock owner args.
892   */
893  void
nfs4_setlockowner_args(lock_owner4 * owner,rnode4_t * rp,pid_t pid)894  nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid)
895  {
896  	nfs4_lock_owner_t *lop;
897  
898  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
899  	    "nfs4_setlockowner_args"));
900  
901  	/* This increments lop's ref count */
902  	lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
903  
904  	if (!lop)
905  		goto make_up_args;
906  
907  	mutex_enter(&lop->lo_lock);
908  	owner->owner_len = sizeof (lop->lock_owner_name);
909  	owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
910  	bcopy(&lop->lock_owner_name, owner->owner_val,
911  	    owner->owner_len);
912  	mutex_exit(&lop->lo_lock);
913  	lock_owner_rele(lop);
914  	return;
915  
916  make_up_args:
917  	nfs4_set_new_lock_owner_args(owner, pid);
918  }
919  
920  /*
921   * This ends our use of the open owner's open seqid by setting
922   * the appropiate flags and issuing a cv_signal to wake up another
923   * thread waiting to use the open seqid.
924   */
925  
926  void
nfs4_end_open_seqid_sync(nfs4_open_owner_t * oop)927  nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop)
928  {
929  	mutex_enter(&oop->oo_lock);
930  	ASSERT(oop->oo_seqid_inuse);
931  	oop->oo_seqid_inuse = 0;
932  	cv_signal(&oop->oo_cv_seqid_sync);
933  	mutex_exit(&oop->oo_lock);
934  }
935  
936  /*
937   * This starts our use of the open owner's open seqid by setting
938   * the oo_seqid_inuse to true.  We will wait (forever) with a
939   * cv_wait() until we are woken up.
940   *
941   * Return values:
942   * 0		no problems
943   * EAGAIN	caller should retry (like a recovery retry)
944   */
945  int
nfs4_start_open_seqid_sync(nfs4_open_owner_t * oop,mntinfo4_t * mi)946  nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi)
947  {
948  	int error = 0;
949  #ifdef DEBUG
950  	static int ops = 0;		/* fault injection */
951  #endif
952  
953  #ifdef DEBUG
954  	if (seqid_sync_faults && curthread != mi->mi_recovthread &&
955  	    ++ops % 5 == 0)
956  		return (EAGAIN);
957  #endif
958  
959  	mutex_enter(&mi->mi_lock);
960  	if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
961  	    curthread != mi->mi_recovthread)
962  		error = EAGAIN;
963  	mutex_exit(&mi->mi_lock);
964  	if (error != 0)
965  		goto done;
966  
967  	mutex_enter(&oop->oo_lock);
968  
969  	while (oop->oo_seqid_inuse) {
970  		NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
971  		    "nfs4_start_open_seqid_sync waiting on cv"));
972  
973  		cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock);
974  	}
975  
976  	oop->oo_seqid_inuse = 1;
977  
978  	mutex_exit(&oop->oo_lock);
979  
980  	mutex_enter(&mi->mi_lock);
981  	if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
982  	    curthread != mi->mi_recovthread)
983  		error = EAGAIN;
984  	mutex_exit(&mi->mi_lock);
985  
986  	if (error == EAGAIN)
987  		nfs4_end_open_seqid_sync(oop);
988  
989  	NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
990  	    "nfs4_start_open_seqid_sync: error=%d", error));
991  
992  done:
993  	return (error);
994  }
995  
996  #ifdef	DEBUG
997  int bypass_otw[2];
998  #endif
999  
1000  /*
1001   * Checks to see if the OPEN OTW is necessary that is, if it's already
1002   * been opened with the same access and deny bits we are now asking for.
1003   * Note, this assumes that *vp is a rnode.
1004   */
1005  int
nfs4_is_otw_open_necessary(nfs4_open_owner_t * oop,int flag,vnode_t * vp,int just_been_created,int * errorp,int acc,nfs4_recov_state_t * rsp)1006  nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp,
1007      int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp)
1008  {
1009  	rnode4_t *rp;
1010  	nfs4_open_stream_t *osp;
1011  	open_delegation_type4 dt;
1012  
1013  	rp = VTOR4(vp);
1014  
1015  	/*
1016  	 * Grab the delegation type.  This function is protected against
1017  	 * the delegation being returned by virtue of start_op (called
1018  	 * by nfs4open_otw) taking the r_deleg_recall_lock in read mode,
1019  	 * delegreturn requires this lock in write mode to proceed.
1020  	 */
1021  	ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER));
1022  	dt = get_dtype(rp);
1023  
1024  	/* returns with 'os_sync_lock' held */
1025  	osp = find_open_stream(oop, rp);
1026  
1027  	if (osp) {
1028  		uint32_t	do_otw = 0;
1029  
1030  		if (osp->os_failed_reopen) {
1031  			NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE,
1032  			    "nfs4_is_otw_open_necessary: os_failed_reopen "
1033  			    "set on osp %p, cr %p, rp %s", (void *)osp,
1034  			    (void *)osp->os_open_owner->oo_cred,
1035  			    rnode4info(rp)));
1036  			do_otw = 1;
1037  		}
1038  
1039  		/*
1040  		 * check access/deny bits
1041  		 */
1042  		if (!do_otw && (flag & FREAD))
1043  			if (osp->os_share_acc_read == 0 &&
1044  			    dt == OPEN_DELEGATE_NONE)
1045  				do_otw = 1;
1046  
1047  		if (!do_otw && (flag & FWRITE))
1048  			if (osp->os_share_acc_write == 0 &&
1049  			    dt != OPEN_DELEGATE_WRITE)
1050  				do_otw = 1;
1051  
1052  		if (!do_otw) {
1053  			NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
1054  			    "nfs4_is_otw_open_necessary: can skip this "
1055  			    "open OTW"));
1056  			if (!just_been_created) {
1057  				osp->os_open_ref_count++;
1058  				if (flag & FREAD)
1059  					osp->os_share_acc_read++;
1060  				if (flag & FWRITE)
1061  					osp->os_share_acc_write++;
1062  				osp->os_share_deny_none++;
1063  			}
1064  
1065  			/*
1066  			 * Need to reset this bitfield for the possible case
1067  			 * where we were going to OTW CLOSE the file, got a
1068  			 * non-recoverable error, and before we could retry
1069  			 * the CLOSE, OPENed the file again.
1070  			 */
1071  			ASSERT(osp->os_open_owner->oo_seqid_inuse);
1072  			osp->os_final_close = 0;
1073  			osp->os_force_close = 0;
1074  
1075  			mutex_exit(&osp->os_sync_lock);
1076  			open_stream_rele(osp, rp);
1077  
1078  #ifdef	DEBUG
1079  			bypass_otw[0]++;
1080  #endif
1081  
1082  			*errorp = 0;
1083  			return (0);
1084  		}
1085  		mutex_exit(&osp->os_sync_lock);
1086  		open_stream_rele(osp, rp);
1087  
1088  	} else if (dt != OPEN_DELEGATE_NONE) {
1089  		/*
1090  		 * Even if there isn't an open_stream yet, we may still be
1091  		 * able to bypass the otw open if the client owns a delegation.
1092  		 *
1093  		 * If you are asking for for WRITE, but I only have
1094  		 * a read delegation, then you still have to go otw.
1095  		 */
1096  
1097  		if (flag & FWRITE && dt == OPEN_DELEGATE_READ)
1098  			return (1);
1099  
1100  		/*
1101  		 * TODO - evaluate the nfsace4
1102  		 */
1103  
1104  		/*
1105  		 * Check the access flags to make sure the caller
1106  		 * had permission.
1107  		 */
1108  		if (flag & FREAD && !(acc & VREAD))
1109  			return (1);
1110  
1111  		if (flag & FWRITE && !(acc & VWRITE))
1112  			return (1);
1113  
1114  		/*
1115  		 * create_open_stream will add a reference to oop,
1116  		 * this will prevent the open_owner_rele done in
1117  		 * nfs4open_otw from destroying the open_owner.
1118  		 */
1119  
1120  		/* returns with 'os_sync_lock' held */
1121  		osp = create_open_stream(oop, rp);
1122  		if (osp == NULL)
1123  			return (1);
1124  
1125  		osp->open_stateid = rp->r_deleg_stateid;
1126  		osp->os_delegation = 1;
1127  
1128  		if (flag & FREAD)
1129  			osp->os_share_acc_read++;
1130  		if (flag & FWRITE)
1131  			osp->os_share_acc_write++;
1132  
1133  		osp->os_share_deny_none++;
1134  		mutex_exit(&osp->os_sync_lock);
1135  
1136  		open_stream_rele(osp, rp);
1137  
1138  		mutex_enter(&oop->oo_lock);
1139  		oop->oo_just_created = NFS4_PERM_CREATED;
1140  		mutex_exit(&oop->oo_lock);
1141  
1142  		ASSERT(rsp != NULL);
1143  		if (rsp->rs_sp != NULL) {
1144  			mutex_enter(&rsp->rs_sp->s_lock);
1145  			nfs4_inc_state_ref_count_nolock(rsp->rs_sp,
1146  			    VTOMI4(vp));
1147  			mutex_exit(&rsp->rs_sp->s_lock);
1148  		}
1149  #ifdef	DEBUG
1150  		bypass_otw[1]++;
1151  #endif
1152  
1153  		*errorp = 0;
1154  		return (0);
1155  	}
1156  
1157  	return (1);
1158  }
1159  
1160  static open_delegation_type4
get_dtype(rnode4_t * rp)1161  get_dtype(rnode4_t *rp)
1162  {
1163  	open_delegation_type4 dt;
1164  
1165  	mutex_enter(&rp->r_statev4_lock);
1166  	ASSERT(!rp->r_deleg_return_inprog);
1167  	if (rp->r_deleg_return_pending)
1168  		dt = OPEN_DELEGATE_NONE;
1169  	else
1170  		dt = rp->r_deleg_type;
1171  	mutex_exit(&rp->r_statev4_lock);
1172  
1173  	return (dt);
1174  }
1175  
1176  /*
1177   * Fill in *locker with the lock state arguments for a LOCK call.  If
1178   * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL.
1179   * Caller must already hold the necessary seqid sync lock(s).
1180   */
1181  
1182  void
nfs4_setup_lock_args(nfs4_lock_owner_t * lop,nfs4_open_owner_t * oop,nfs4_open_stream_t * osp,clientid4 clientid,locker4 * locker)1183  nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop,
1184      nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker)
1185  {
1186  	ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
1187  	if (lop->lo_just_created == NFS4_JUST_CREATED) {
1188  		/* this is a new lock request */
1189  		open_to_lock_owner4 *nown;
1190  
1191  		ASSERT(oop != NULL);
1192  		ASSERT(osp != NULL);
1193  
1194  		locker->new_lock_owner = TRUE;
1195  		nown = &locker->locker4_u.open_owner;
1196  		nown->open_seqid = nfs4_get_open_seqid(oop) + 1;
1197  		mutex_enter(&osp->os_sync_lock);
1198  		nown->open_stateid = osp->open_stateid;
1199  		mutex_exit(&osp->os_sync_lock);
1200  		nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */
1201  
1202  		nown->lock_owner.clientid = clientid;
1203  		nown->lock_owner.owner_len = sizeof (lop->lock_owner_name);
1204  		nown->lock_owner.owner_val =
1205  		    kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP);
1206  		bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val,
1207  		    nown->lock_owner.owner_len);
1208  	} else {
1209  		exist_lock_owner4 *eown;
1210  		/* have an existing lock owner */
1211  
1212  		locker->new_lock_owner = FALSE;
1213  		eown = &locker->locker4_u.lock_owner;
1214  		mutex_enter(&lop->lo_lock);
1215  		eown->lock_stateid = lop->lock_stateid;
1216  		mutex_exit(&lop->lo_lock);
1217  		eown->lock_seqid = lop->lock_seqid + 1;
1218  	}
1219  }
1220  
1221  /*
1222   * This starts our use of the lock owner's lock seqid by setting
1223   * the lo_flags to NFS4_LOCK_SEQID_INUSE.  We will wait (forever)
1224   * with a cv_wait() until we are woken up.
1225   *
1226   * Return values:
1227   * 0		no problems
1228   * EAGAIN	caller should retry (like a recovery retry)
1229   */
1230  int
nfs4_start_lock_seqid_sync(nfs4_lock_owner_t * lop,mntinfo4_t * mi)1231  nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi)
1232  {
1233  	int error = 0;
1234  #ifdef DEBUG
1235  	static int ops = 0;		/* fault injection */
1236  #endif
1237  
1238  #ifdef DEBUG
1239  	if (seqid_sync_faults && curthread != mi->mi_recovthread &&
1240  	    ++ops % 7 == 0)
1241  		return (EAGAIN);
1242  #endif
1243  
1244  	mutex_enter(&mi->mi_lock);
1245  	if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
1246  	    curthread != mi->mi_recovthread)
1247  		error = EAGAIN;
1248  	mutex_exit(&mi->mi_lock);
1249  	if (error != 0)
1250  		goto done;
1251  
1252  	mutex_enter(&lop->lo_lock);
1253  
1254  	ASSERT(lop->lo_seqid_holder != curthread);
1255  	while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) {
1256  		NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
1257  		    "nfs4_start_lock_seqid_sync: waiting on cv"));
1258  
1259  		cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock);
1260  	}
1261  	NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: "
1262  	    "NFS4_LOCK_SEQID_INUSE"));
1263  
1264  	lop->lo_flags |= NFS4_LOCK_SEQID_INUSE;
1265  	lop->lo_seqid_holder = curthread;
1266  	mutex_exit(&lop->lo_lock);
1267  
1268  	mutex_enter(&mi->mi_lock);
1269  	if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
1270  	    curthread != mi->mi_recovthread)
1271  		error = EAGAIN;
1272  	mutex_exit(&mi->mi_lock);
1273  
1274  	if (error == EAGAIN)
1275  		nfs4_end_lock_seqid_sync(lop);
1276  
1277  	NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
1278  	    "nfs4_start_lock_seqid_sync: error=%d", error));
1279  
1280  done:
1281  	return (error);
1282  }
1283  
1284  /*
1285   * This ends our use of the lock owner's lock seqid by setting
1286   * the appropiate flags and issuing a cv_signal to wake up another
1287   * thread waiting to use the lock seqid.
1288   */
1289  void
nfs4_end_lock_seqid_sync(nfs4_lock_owner_t * lop)1290  nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop)
1291  {
1292  	mutex_enter(&lop->lo_lock);
1293  	ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
1294  	ASSERT(lop->lo_seqid_holder == curthread);
1295  	lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE;
1296  	lop->lo_seqid_holder = NULL;
1297  	cv_signal(&lop->lo_cv_seqid_sync);
1298  	mutex_exit(&lop->lo_lock);
1299  }
1300  
1301  /*
1302   * Returns a reference to a lock owner via lopp, which has its lock seqid
1303   * synchronization started.
1304   * If the lock owner is in the 'just_created' state, then we return its open
1305   * owner and open stream and start the open seqid synchronization.
1306   *
1307   * Return value:
1308   * NFS4_OK		no problems
1309   * NFS4ERR_DELAY	there is lost state to recover; caller should retry
1310   * NFS4ERR_IO		no open stream
1311   */
1312  nfsstat4
nfs4_find_or_create_lock_owner(pid_t pid,rnode4_t * rp,cred_t * cr,nfs4_open_owner_t ** oopp,nfs4_open_stream_t ** ospp,nfs4_lock_owner_t ** lopp)1313  nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr,
1314      nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp,
1315      nfs4_lock_owner_t **lopp)
1316  {
1317  	nfs4_lock_owner_t *lop, *next_lop;
1318  	mntinfo4_t *mi;
1319  	int error = 0;
1320  	nfsstat4 stat;
1321  
1322  	mi = VTOMI4(RTOV4(rp));
1323  
1324  	mutex_enter(&rp->r_statev4_lock);
1325  
1326  	lop = rp->r_lo_head.lo_next_rnode;
1327  	while (lop != &rp->r_lo_head) {
1328  		mutex_enter(&lop->lo_lock);
1329  		if (lop->lo_pid == pid && lop->lo_valid != 0) {
1330  			/* Found a matching lock owner */
1331  			NFS4_DEBUG(nfs4_client_state_debug,
1332  			    (CE_NOTE, "nfs4_find_or_create_lock_owner: "
1333  			    "got a match"));
1334  			lop->lo_ref_count++;
1335  			break;
1336  		}
1337  		next_lop = lop->lo_next_rnode;
1338  		mutex_exit(&lop->lo_lock);
1339  		lop = next_lop;
1340  	}
1341  
1342  	if (lop == &rp->r_lo_head) {
1343  		/* create temporary lock owner */
1344  		lop = create_lock_owner(rp, pid);
1345  	}
1346  	mutex_exit(&rp->r_statev4_lock);
1347  
1348  	/* Have a locked down lock owner struct now */
1349  	if (lop->lo_just_created != NFS4_JUST_CREATED) {
1350  		/* This is an existing lock owner */
1351  		*oopp = NULL;
1352  		*ospp = NULL;
1353  	} else {
1354  		/* Lock owner doesn't exist yet */
1355  
1356  		/* First grab open owner seqid synchronization */
1357  		mutex_exit(&lop->lo_lock);
1358  		*oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi);
1359  		if (*oopp == NULL)
1360  			goto kill_new_lop;
1361  		error = nfs4_start_open_seqid_sync(*oopp, mi);
1362  		if (error == EAGAIN) {
1363  			stat = NFS4ERR_DELAY;
1364  			goto failed;
1365  		}
1366  		*ospp = find_open_stream(*oopp, rp);
1367  		if (*ospp == NULL) {
1368  			nfs4_end_open_seqid_sync(*oopp);
1369  			goto kill_new_lop;
1370  		}
1371  		if ((*ospp)->os_failed_reopen) {
1372  			mutex_exit(&(*ospp)->os_sync_lock);
1373  			NFS4_DEBUG((nfs4_open_stream_debug ||
1374  			    nfs4_client_lock_debug), (CE_NOTE,
1375  			    "nfs4_find_or_create_lock_owner: os_failed_reopen;"
1376  			    "osp %p, cr %p, rp %s", (void *)(*ospp),
1377  			    (void *)cr, rnode4info(rp)));
1378  			nfs4_end_open_seqid_sync(*oopp);
1379  			stat = NFS4ERR_IO;
1380  			goto failed;
1381  		}
1382  		mutex_exit(&(*ospp)->os_sync_lock);
1383  
1384  		/*
1385  		 * Now see if the lock owner has become permanent while we
1386  		 * had released our lock.
1387  		 */
1388  		mutex_enter(&lop->lo_lock);
1389  		if (lop->lo_just_created != NFS4_JUST_CREATED) {
1390  			nfs4_end_open_seqid_sync(*oopp);
1391  			open_stream_rele(*ospp, rp);
1392  			open_owner_rele(*oopp);
1393  			*oopp = NULL;
1394  			*ospp = NULL;
1395  		}
1396  	}
1397  	mutex_exit(&lop->lo_lock);
1398  
1399  	error = nfs4_start_lock_seqid_sync(lop, mi);
1400  	if (error == EAGAIN) {
1401  		if (*oopp != NULL)
1402  			nfs4_end_open_seqid_sync(*oopp);
1403  		stat = NFS4ERR_DELAY;
1404  		goto failed;
1405  	}
1406  	ASSERT(error == 0);
1407  
1408  	*lopp = lop;
1409  	return (NFS4_OK);
1410  
1411  kill_new_lop:
1412  	/*
1413  	 * A previous CLOSE was attempted but got EINTR, but the application
1414  	 * continued to use the unspecified state file descriptor.  But now the
1415  	 * open stream is gone (which could also destroy the open owner), hence
1416  	 * we can no longer continue.  The calling function should return EIO
1417  	 * to the application.
1418  	 */
1419  	NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug,
1420  	    (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created "
1421  	    "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp),
1422  	    (void *)(*ospp)));
1423  
1424  	nfs4_rnode_remove_lock_owner(rp, lop);
1425  	stat = NFS4ERR_IO;
1426  
1427  failed:
1428  	lock_owner_rele(lop);
1429  	if (*oopp) {
1430  		open_owner_rele(*oopp);
1431  		*oopp = NULL;
1432  	}
1433  	if (*ospp) {
1434  		open_stream_rele(*ospp, rp);
1435  		*ospp = NULL;
1436  	}
1437  	return (stat);
1438  }
1439  
1440  /*
1441   * This function grabs a recently freed open owner off of the freed open
1442   * owner list if there is a match on the cred 'cr'.  It returns NULL if no
1443   * such match is found.  It will set the 'oo_ref_count' and 'oo_valid' back
1444   * to both 1 (sane values) in the case a match is found.
1445   */
1446  static nfs4_open_owner_t *
find_freed_open_owner(cred_t * cr,nfs4_oo_hash_bucket_t * bucketp,mntinfo4_t * mi)1447  find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp,
1448      mntinfo4_t *mi)
1449  {
1450  	nfs4_open_owner_t		*foop;
1451  
1452  	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
1453  	    "find_freed_open_owner: cred %p", (void*)cr));
1454  
1455  	ASSERT(mutex_owned(&mi->mi_lock));
1456  	ASSERT(mutex_owned(&bucketp->b_lock));
1457  
1458  	/* got hash bucket, search through freed open owners */
1459  	for (foop = list_head(&mi->mi_foo_list); foop != NULL;
1460  	    foop = list_next(&mi->mi_foo_list, foop)) {
1461  		if (!crcmp(foop->oo_cred, cr)) {
1462  			NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1463  			    "find_freed_open_owner: got a match open owner "
1464  			    "%p", (void *)foop));
1465  			foop->oo_ref_count = 1;
1466  			foop->oo_valid = 1;
1467  			list_remove(&mi->mi_foo_list, foop);
1468  			mi->mi_foo_num--;
1469  
1470  			/* now add the struct into the cred hash table */
1471  			list_insert_head(&bucketp->b_oo_hash_list, foop);
1472  			return (foop);
1473  		}
1474  	}
1475  
1476  	return (NULL);
1477  }
1478  
1479  /*
1480   * Insert the newly freed 'oop' into the mi's freed oop list,
1481   * always at the head of the list.  If we've already reached
1482   * our maximum allowed number of freed open owners (mi_foo_max),
1483   * then remove the LRU open owner on the list (namely the tail).
1484   */
1485  static void
nfs4_free_open_owner(nfs4_open_owner_t * oop,mntinfo4_t * mi)1486  nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi)
1487  {
1488  	nfs4_open_owner_t *lru_foop;
1489  
1490  	if (mi->mi_foo_num < mi->mi_foo_max) {
1491  		NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1492  		    "nfs4_free_open_owner: num free %d, max free %d, "
1493  		    "insert open owner %p for mntinfo4 %p",
1494  		    mi->mi_foo_num, mi->mi_foo_max, (void *)oop,
1495  		    (void *)mi));
1496  		list_insert_head(&mi->mi_foo_list, oop);
1497  		mi->mi_foo_num++;
1498  		return;
1499  	}
1500  
1501  	/* need to replace a freed open owner */
1502  
1503  	lru_foop = list_tail(&mi->mi_foo_list);
1504  
1505  	NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1506  	    "nfs4_free_open_owner: destroy %p, insert %p",
1507  	    (void *)lru_foop, (void *)oop));
1508  
1509  	list_remove(&mi->mi_foo_list, lru_foop);
1510  	nfs4_destroy_open_owner(lru_foop);
1511  
1512  	/* head always has latest freed oop */
1513  	list_insert_head(&mi->mi_foo_list, oop);
1514  }
1515  
1516  void
nfs4_destroy_open_owner(nfs4_open_owner_t * oop)1517  nfs4_destroy_open_owner(nfs4_open_owner_t *oop)
1518  {
1519  	ASSERT(oop != NULL);
1520  
1521  	crfree(oop->oo_cred);
1522  	if (oop->oo_cred_otw)
1523  		crfree(oop->oo_cred_otw);
1524  	mutex_destroy(&oop->oo_lock);
1525  	cv_destroy(&oop->oo_cv_seqid_sync);
1526  	kmem_free(oop, sizeof (*oop));
1527  }
1528  
1529  seqid4
nfs4_get_open_seqid(nfs4_open_owner_t * oop)1530  nfs4_get_open_seqid(nfs4_open_owner_t *oop)
1531  {
1532  	ASSERT(oop->oo_seqid_inuse);
1533  	return (oop->oo_seqid);
1534  }
1535  
1536  /*
1537   * This set's the open seqid for a <open owner/ mntinfo4> pair.
1538   */
1539  void
nfs4_set_open_seqid(seqid4 seqid,nfs4_open_owner_t * oop,nfs4_tag_type_t tag_type)1540  nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop,
1541      nfs4_tag_type_t tag_type)
1542  {
1543  	ASSERT(oop->oo_seqid_inuse);
1544  	oop->oo_seqid = seqid;
1545  	oop->oo_last_good_seqid = seqid;
1546  	oop->oo_last_good_op = tag_type;
1547  }
1548  
1549  /*
1550   * This bumps the current open seqid for the open owner 'oop'.
1551   */
1552  void
nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t * oop,nfs4_tag_type_t tag_type)1553  nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop,
1554      nfs4_tag_type_t tag_type)
1555  {
1556  	ASSERT(oop->oo_seqid_inuse);
1557  	oop->oo_seqid++;
1558  	oop->oo_last_good_seqid = oop->oo_seqid;
1559  	oop->oo_last_good_op = tag_type;
1560  }
1561  
1562  /*
1563   * If no open owner was provided, this function takes the cred to find an
1564   * open owner within the given mntinfo4_t.  Either way we return the
1565   * open owner's OTW credential if it exists; otherwise returns the
1566   * supplied 'cr'.
1567   *
1568   * A hold is put on the returned credential, and it is up to the caller
1569   * to free the cred.
1570   */
1571  cred_t *
nfs4_get_otw_cred(cred_t * cr,mntinfo4_t * mi,nfs4_open_owner_t * provided_oop)1572  nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop)
1573  {
1574  	cred_t *ret_cr;
1575  	nfs4_open_owner_t *oop = provided_oop;
1576  
1577  	if (oop == NULL)
1578  		oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
1579  	if (oop != NULL) {
1580  		mutex_enter(&oop->oo_lock);
1581  		if (oop->oo_cred_otw)
1582  			ret_cr = oop->oo_cred_otw;
1583  		else
1584  			ret_cr = cr;
1585  		crhold(ret_cr);
1586  		mutex_exit(&oop->oo_lock);
1587  		if (provided_oop == NULL)
1588  			open_owner_rele(oop);
1589  	} else {
1590  		ret_cr = cr;
1591  		crhold(ret_cr);
1592  	}
1593  	return (ret_cr);
1594  }
1595  
1596  /*
1597   * Retrieves the next open stream in the rnode's list if an open stream
1598   * is provided; otherwise gets the first open stream in the list.
1599   * The open owner for that open stream is then retrieved, and if its
1600   * oo_cred_otw exists then it is returned; otherwise the provided 'cr'
1601   * is returned.  *osp is set to the 'found' open stream.
1602   *
1603   * Note: we don't set *osp to the open stream retrieved via the
1604   * optimized check since that won't necessarily be at the beginning
1605   * of the rnode list, and if that osp doesn't work we'd like to
1606   * check _all_ open streams (starting from the beginning of the
1607   * rnode list).
1608   */
1609  cred_t *
nfs4_get_otw_cred_by_osp(rnode4_t * rp,cred_t * cr,nfs4_open_stream_t ** osp,bool_t * first_time,bool_t * last_time)1610  nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr,
1611      nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time)
1612  {
1613  	nfs4_open_stream_t *next_osp = NULL;
1614  	cred_t *ret_cr;
1615  
1616  	ASSERT(cr != NULL);
1617  	/*
1618  	 * As an optimization, try to find the open owner
1619  	 * for the cred provided since that's most likely
1620  	 * to work.
1621  	 */
1622  	if (*first_time) {
1623  		nfs4_open_owner_t *oop;
1624  
1625  		oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp)));
1626  		if (oop) {
1627  			next_osp = find_open_stream(oop, rp);
1628  			if (next_osp)
1629  				mutex_exit(&next_osp->os_sync_lock);
1630  			open_owner_rele(oop);
1631  		}
1632  	}
1633  	if (next_osp == NULL) {
1634  		int delay_rele = 0;
1635  		*first_time = FALSE;
1636  
1637  		/* return the next open stream for this rnode */
1638  		mutex_enter(&rp->r_os_lock);
1639  		/* Now, no one can add or delete to rp's open streams list */
1640  
1641  		if (*osp) {
1642  			next_osp = list_next(&rp->r_open_streams, *osp);
1643  			/*
1644  			 * Delay the rele of *osp until after we drop
1645  			 * r_os_lock to not deadlock with oo_lock
1646  			 * via an open_stream_rele()->open_owner_rele().
1647  			 */
1648  			delay_rele = 1;
1649  		} else {
1650  			next_osp = list_head(&rp->r_open_streams);
1651  		}
1652  		if (next_osp) {
1653  			nfs4_open_stream_t *tmp_osp;
1654  
1655  			/* find the next valid open stream */
1656  			mutex_enter(&next_osp->os_sync_lock);
1657  			while (next_osp && !next_osp->os_valid) {
1658  				tmp_osp =
1659  				    list_next(&rp->r_open_streams, next_osp);
1660  				mutex_exit(&next_osp->os_sync_lock);
1661  				next_osp = tmp_osp;
1662  				if (next_osp)
1663  					mutex_enter(&next_osp->os_sync_lock);
1664  			}
1665  			if (next_osp) {
1666  				next_osp->os_ref_count++;
1667  				mutex_exit(&next_osp->os_sync_lock);
1668  			}
1669  		}
1670  		mutex_exit(&rp->r_os_lock);
1671  		if (delay_rele)
1672  			open_stream_rele(*osp, rp);
1673  	}
1674  
1675  	if (next_osp) {
1676  		nfs4_open_owner_t *oop;
1677  
1678  		oop = next_osp->os_open_owner;
1679  		mutex_enter(&oop->oo_lock);
1680  		if (oop->oo_cred_otw)
1681  			ret_cr = oop->oo_cred_otw;
1682  		else
1683  			ret_cr = cr;
1684  		crhold(ret_cr);
1685  		mutex_exit(&oop->oo_lock);
1686  		if (*first_time) {
1687  			open_stream_rele(next_osp, rp);
1688  			*osp = NULL;
1689  		} else
1690  			*osp = next_osp;
1691  	} else {
1692  		/* just return the cred provided to us */
1693  		*last_time = TRUE;
1694  		*osp = NULL;
1695  		ret_cr = cr;
1696  		crhold(ret_cr);
1697  	}
1698  
1699  	*first_time = FALSE;
1700  	return (ret_cr);
1701  }
1702  
1703  void
nfs4_init_stateid_types(nfs4_stateid_types_t * sid_tp)1704  nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp)
1705  {
1706  	bzero(&sid_tp->d_sid, sizeof (stateid4));
1707  	bzero(&sid_tp->l_sid, sizeof (stateid4));
1708  	bzero(&sid_tp->o_sid, sizeof (stateid4));
1709  	sid_tp->cur_sid_type = NO_SID;
1710  }
1711  
1712  void
nfs4_save_stateid(stateid4 * s1,nfs4_stateid_types_t * sid_tp)1713  nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp)
1714  {
1715  	NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1716  	    "nfs4_save_stateid: saved %s stateid",
1717  	    sid_tp->cur_sid_type == DEL_SID ? "delegation" :
1718  	    sid_tp->cur_sid_type == LOCK_SID ? "lock" :
1719  	    sid_tp->cur_sid_type == OPEN_SID ? "open" : "special"));
1720  
1721  	switch (sid_tp->cur_sid_type) {
1722  	case DEL_SID:
1723  		sid_tp->d_sid = *s1;
1724  		break;
1725  	case LOCK_SID:
1726  		sid_tp->l_sid = *s1;
1727  		break;
1728  	case OPEN_SID:
1729  		sid_tp->o_sid = *s1;
1730  		break;
1731  	case SPEC_SID:
1732  	default:
1733  		cmn_err(CE_PANIC, "nfs4_save_stateid: illegal "
1734  		    "stateid type %d", sid_tp->cur_sid_type);
1735  	}
1736  }
1737  
1738  /*
1739   * We got NFS4ERR_BAD_SEQID.  Setup some arguments to pass to recovery.
1740   * Caller is responsible for freeing.
1741   */
1742  nfs4_bseqid_entry_t *
nfs4_create_bseqid_entry(nfs4_open_owner_t * oop,nfs4_lock_owner_t * lop,vnode_t * vp,pid_t pid,nfs4_tag_type_t tag,seqid4 seqid)1743  nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop,
1744      vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid)
1745  {
1746  	nfs4_bseqid_entry_t	*bsep;
1747  
1748  	bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP);
1749  	bsep->bs_oop = oop;
1750  	bsep->bs_lop = lop;
1751  	bsep->bs_vp = vp;
1752  	bsep->bs_pid = pid;
1753  	bsep->bs_tag = tag;
1754  	bsep->bs_seqid = seqid;
1755  
1756  	return (bsep);
1757  }
1758  
1759  void
nfs4open_dg_save_lost_rqst(int error,nfs4_lost_rqst_t * lost_rqstp,nfs4_open_owner_t * oop,nfs4_open_stream_t * osp,cred_t * cr,vnode_t * vp,int access_close,int deny_close)1760  nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1761      nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr,
1762      vnode_t *vp, int access_close, int deny_close)
1763  {
1764  	lost_rqstp->lr_putfirst = FALSE;
1765  
1766  	ASSERT(vp != NULL);
1767  	if (error == ETIMEDOUT || error == EINTR ||
1768  	    NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1769  		NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1770  		    "nfs4open_dg_save_lost_rqst: error %d", error));
1771  
1772  		lost_rqstp->lr_op = OP_OPEN_DOWNGRADE;
1773  		/*
1774  		 * The vp is held and rele'd via the recovery code.
1775  		 * See nfs4_save_lost_rqst.
1776  		 */
1777  		lost_rqstp->lr_vp = vp;
1778  		lost_rqstp->lr_dvp = NULL;
1779  		lost_rqstp->lr_oop = oop;
1780  		lost_rqstp->lr_osp = osp;
1781  		lost_rqstp->lr_lop = NULL;
1782  		lost_rqstp->lr_cr = cr;
1783  		lost_rqstp->lr_flk = NULL;
1784  		lost_rqstp->lr_dg_acc = access_close;
1785  		lost_rqstp->lr_dg_deny = deny_close;
1786  		lost_rqstp->lr_putfirst = FALSE;
1787  	} else {
1788  		lost_rqstp->lr_op = 0;
1789  	}
1790  }
1791  
1792  /*
1793   * Change the access and deny bits of an OPEN.
1794   * If recovery is needed, *recov_credpp is set to the cred used OTW,
1795   * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW.
1796   */
1797  void
nfs4_open_downgrade(int access_close,int deny_close,nfs4_open_owner_t * oop,nfs4_open_stream_t * osp,vnode_t * vp,cred_t * cr,nfs4_lost_rqst_t * lrp,nfs4_error_t * ep,cred_t ** recov_credpp,seqid4 * recov_seqidp)1798  nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop,
1799      nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp,
1800      nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp)
1801  {
1802  	mntinfo4_t		*mi;
1803  	int			downgrade_acc, downgrade_deny;
1804  	int			new_acc, new_deny;
1805  	COMPOUND4args_clnt	args;
1806  	COMPOUND4res_clnt	res;
1807  	OPEN_DOWNGRADE4res	*odg_res;
1808  	nfs_argop4		argop[3];
1809  	nfs_resop4		*resop;
1810  	rnode4_t		*rp;
1811  	bool_t			needrecov = FALSE;
1812  	int			doqueue = 1;
1813  	seqid4			seqid = 0;
1814  	cred_t			*cred_otw;
1815  	hrtime_t		t;
1816  
1817  	ASSERT(mutex_owned(&osp->os_sync_lock));
1818  #if DEBUG
1819  	mutex_enter(&oop->oo_lock);
1820  	ASSERT(oop->oo_seqid_inuse);
1821  	mutex_exit(&oop->oo_lock);
1822  #endif
1823  
1824  
1825  	if (access_close == 0 && deny_close == 0) {
1826  		nfs4_error_zinit(ep);
1827  		return;
1828  	}
1829  
1830  	cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop);
1831  
1832  cred_retry:
1833  	nfs4_error_zinit(ep);
1834  	downgrade_acc = 0;
1835  	downgrade_deny = 0;
1836  	mi = VTOMI4(vp);
1837  	rp = VTOR4(vp);
1838  
1839  	/*
1840  	 * Check to see if the open stream got closed before we go OTW,
1841  	 * now that we have acquired the 'os_sync_lock'.
1842  	 */
1843  	if (!osp->os_valid) {
1844  		NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1845  		    " open stream has already been closed, return success"));
1846  		/* error has already been set */
1847  		goto no_args_out;
1848  	}
1849  
1850  	/* If the file failed recovery, just quit. */
1851  	mutex_enter(&rp->r_statelock);
1852  	if (rp->r_flags & R4RECOVERR) {
1853  		mutex_exit(&rp->r_statelock);
1854  		ep->error = EIO;
1855  		goto no_args_out;
1856  	}
1857  	mutex_exit(&rp->r_statelock);
1858  
1859  	seqid = nfs4_get_open_seqid(oop) + 1;
1860  
1861  	NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1862  	    "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"",
1863  	    access_close, osp->os_share_acc_read, osp->os_share_acc_write));
1864  
1865  	/* If we're closing the last READ, need to downgrade */
1866  	if ((access_close & FREAD) && (osp->os_share_acc_read == 1))
1867  		downgrade_acc |= OPEN4_SHARE_ACCESS_READ;
1868  
1869  	/* if we're closing the last WRITE, need to downgrade */
1870  	if ((access_close & FWRITE) && (osp->os_share_acc_write == 1))
1871  		downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE;
1872  
1873  	downgrade_deny = OPEN4_SHARE_DENY_NONE;
1874  
1875  	new_acc = 0;
1876  	new_deny = 0;
1877  
1878  	/* set our new access and deny share bits */
1879  	if ((osp->os_share_acc_read > 0) &&
1880  	    !(downgrade_acc & OPEN4_SHARE_ACCESS_READ))
1881  		new_acc |= OPEN4_SHARE_ACCESS_READ;
1882  	if ((osp->os_share_acc_write > 0) &&
1883  	    !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE))
1884  		new_acc |= OPEN4_SHARE_ACCESS_WRITE;
1885  
1886  	new_deny = OPEN4_SHARE_DENY_NONE;
1887  
1888  	NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1889  	    "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny));
1890  	NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1891  	    "new acc 0x%x deny 0x%x", new_acc, new_deny));
1892  
1893  	/*
1894  	 * Check to see if we aren't actually doing any downgrade or
1895  	 * if this is the last 'close' but the file is still mmapped.
1896  	 * Skip this if this a lost request resend so we don't decrement
1897  	 * the osp's share counts more than once.
1898  	 */
1899  	if (!lrp &&
1900  	    ((downgrade_acc == 0 && downgrade_deny == 0) ||
1901  	    (new_acc == 0 && new_deny == 0))) {
1902  		/*
1903  		 * No downgrade to do, but still need to
1904  		 * update osp's os_share_* counts.
1905  		 */
1906  		NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE,
1907  		    "nfs4_open_downgrade: just lower the osp's count by %s",
1908  		    (access_close & FREAD) && (access_close & FWRITE) ?
1909  		    "read and write" : (access_close & FREAD) ? "read" :
1910  		    (access_close & FWRITE) ? "write" : "bogus"));
1911  		if (access_close & FREAD)
1912  			osp->os_share_acc_read--;
1913  		if (access_close & FWRITE)
1914  			osp->os_share_acc_write--;
1915  		osp->os_share_deny_none--;
1916  		nfs4_error_zinit(ep);
1917  
1918  		goto no_args_out;
1919  	}
1920  
1921  	if (osp->os_orig_oo_name != oop->oo_name) {
1922  		ep->error = EIO;
1923  		goto no_args_out;
1924  	}
1925  
1926  	/* setup the COMPOUND args */
1927  	if (lrp)
1928  		args.ctag = TAG_OPEN_DG_LOST;
1929  	else
1930  		args.ctag = TAG_OPEN_DG;
1931  
1932  	args.array_len = 3;
1933  	args.array = argop;
1934  
1935  	/* putfh */
1936  	argop[0].argop = OP_CPUTFH;
1937  	argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1938  
1939  	argop[1].argop = OP_GETATTR;
1940  	argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1941  	argop[1].nfs_argop4_u.opgetattr.mi = mi;
1942  
1943  	ASSERT(mutex_owned(&osp->os_sync_lock));
1944  	ASSERT(osp->os_delegation == FALSE);
1945  
1946  	/* open downgrade */
1947  	argop[2].argop = OP_OPEN_DOWNGRADE;
1948  	argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid;
1949  	argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc;
1950  	argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny;
1951  	argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid;
1952  
1953  	t = gethrtime();
1954  
1955  	rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep);
1956  
1957  	if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
1958  		nfs4_set_open_seqid(seqid, oop, args.ctag);
1959  
1960  	if ((ep->error == EACCES ||
1961  	    (ep->error == 0 && res.status == NFS4ERR_ACCESS)) &&
1962  	    cred_otw != cr) {
1963  		crfree(cred_otw);
1964  		cred_otw = cr;
1965  		crhold(cred_otw);
1966  		if (!ep->error)
1967  			xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1968  		goto cred_retry;
1969  	}
1970  
1971  	needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp);
1972  
1973  	if (needrecov && recov_credpp) {
1974  		*recov_credpp = cred_otw;
1975  		crhold(*recov_credpp);
1976  		if (recov_seqidp)
1977  			*recov_seqidp = seqid;
1978  	}
1979  
1980  	if (!ep->error && !res.status) {
1981  		/* get the open downgrade results */
1982  		resop = &res.array[2];
1983  		odg_res = &resop->nfs_resop4_u.opopen_downgrade;
1984  
1985  		osp->open_stateid = odg_res->open_stateid;
1986  
1987  		/* set the open streams new access/deny bits */
1988  		if (access_close & FREAD)
1989  			osp->os_share_acc_read--;
1990  		if (access_close & FWRITE)
1991  			osp->os_share_acc_write--;
1992  		osp->os_share_deny_none--;
1993  		osp->os_dc_openacc = new_acc;
1994  
1995  		nfs4_attr_cache(vp,
1996  		    &res.array[1].nfs_resop4_u.opgetattr.ga_res,
1997  		    t, cred_otw, TRUE, NULL);
1998  	}
1999  
2000  	if (!ep->error)
2001  		xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2002  
2003  no_args_out:
2004  	crfree(cred_otw);
2005  }
2006  
2007  /*
2008   * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out
2009   * because the filesystem was forcibly unmounted) then we don't know if we
2010   * potentially left state dangling on the server, therefore the recovery
2011   * framework makes this call to resend the OPEN request and then undo it.
2012   */
2013  void
nfs4_resend_open_otw(vnode_t ** vpp,nfs4_lost_rqst_t * resend_rqstp,nfs4_error_t * ep)2014  nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp,
2015      nfs4_error_t *ep)
2016  {
2017  	COMPOUND4args_clnt	args;
2018  	COMPOUND4res_clnt	res;
2019  	nfs_argop4		argop[4];
2020  	GETFH4res		*gf_res = NULL;
2021  	OPEN4cargs		*open_args;
2022  	OPEN4res		*op_res;
2023  	char			*destcfp;
2024  	int			destclen;
2025  	nfs4_ga_res_t		*garp;
2026  	vnode_t			*dvp = NULL, *vp = NULL;
2027  	rnode4_t		*rp = NULL, *drp = NULL;
2028  	cred_t			*cr = NULL;
2029  	seqid4			seqid;
2030  	nfs4_open_owner_t	*oop = NULL;
2031  	nfs4_open_stream_t	*osp = NULL;
2032  	component4		*srcfp;
2033  	open_claim_type4	claim;
2034  	mntinfo4_t		*mi;
2035  	int			doqueue = 1;
2036  	bool_t			retry_open = FALSE;
2037  	int			created_osp = 0;
2038  	hrtime_t		t;
2039  	char			*failed_msg = "";
2040  	int			fh_different;
2041  	int			reopen = 0;
2042  
2043  	nfs4_error_zinit(ep);
2044  
2045  	cr = resend_rqstp->lr_cr;
2046  	dvp = resend_rqstp->lr_dvp;
2047  
2048  	vp = *vpp;
2049  	if (vp) {
2050  		ASSERT(nfs4_consistent_type(vp));
2051  		rp = VTOR4(vp);
2052  	}
2053  
2054  	if (rp) {
2055  		/* If the file failed recovery, just quit. */
2056  		mutex_enter(&rp->r_statelock);
2057  		if (rp->r_flags & R4RECOVERR) {
2058  			mutex_exit(&rp->r_statelock);
2059  			ep->error = EIO;
2060  			return;
2061  		}
2062  		mutex_exit(&rp->r_statelock);
2063  	}
2064  
2065  	if (dvp) {
2066  		drp = VTOR4(dvp);
2067  		/* If the parent directory failed recovery, just quit. */
2068  		mutex_enter(&drp->r_statelock);
2069  		if (drp->r_flags & R4RECOVERR) {
2070  			mutex_exit(&drp->r_statelock);
2071  			ep->error = EIO;
2072  			return;
2073  		}
2074  		mutex_exit(&drp->r_statelock);
2075  	} else
2076  		reopen = 1;	/* NULL dvp means this is a reopen */
2077  
2078  	claim = resend_rqstp->lr_oclaim;
2079  	ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR);
2080  
2081  	args.ctag = TAG_OPEN_LOST;
2082  	args.array_len = 4;
2083  	args.array = argop;
2084  
2085  	argop[0].argop = OP_CPUTFH;
2086  	if (reopen) {
2087  		ASSERT(vp != NULL);
2088  
2089  		mi = VTOMI4(vp);
2090  		/*
2091  		 * if this is a file mount then
2092  		 * use the mntinfo parentfh
2093  		 */
2094  		argop[0].nfs_argop4_u.opcputfh.sfh =
2095  		    (vp->v_flag & VROOT) ? mi->mi_srvparentfh :
2096  		    VTOSV(vp)->sv_dfh;
2097  		args.ctag = TAG_REOPEN_LOST;
2098  	} else {
2099  		argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh;
2100  		mi = VTOMI4(dvp);
2101  	}
2102  
2103  	argop[1].argop = OP_COPEN;
2104  	open_args = &argop[1].nfs_argop4_u.opcopen;
2105  	open_args->claim = claim;
2106  
2107  	/*
2108  	 * If we sent over a OPEN with CREATE then the only
2109  	 * thing we care about is to not leave dangling state
2110  	 * on the server, not whether the file we potentially
2111  	 * created remains on the server.  So even though the
2112  	 * lost open request specified a CREATE, we only wish
2113  	 * to do a non-CREATE OPEN.
2114  	 */
2115  	open_args->opentype = OPEN4_NOCREATE;
2116  
2117  	srcfp = &resend_rqstp->lr_ofile;
2118  	destclen = srcfp->utf8string_len;
2119  	destcfp = kmem_alloc(destclen + 1, KM_SLEEP);
2120  	bcopy(srcfp->utf8string_val, destcfp, destclen);
2121  	destcfp[destclen] = '\0';
2122  	if (claim == CLAIM_DELEGATE_CUR) {
2123  		open_args->open_claim4_u.delegate_cur_info.delegate_stateid =
2124  		    resend_rqstp->lr_ostateid;
2125  		open_args->open_claim4_u.delegate_cur_info.cfile = destcfp;
2126  	} else {
2127  		open_args->open_claim4_u.cfile = destcfp;
2128  	}
2129  
2130  	open_args->share_access = resend_rqstp->lr_oacc;
2131  	open_args->share_deny = resend_rqstp->lr_odeny;
2132  	oop = resend_rqstp->lr_oop;
2133  	ASSERT(oop != NULL);
2134  
2135  	open_args->owner.clientid = mi2clientid(mi);
2136  	/* this length never changes */
2137  	open_args->owner.owner_len = sizeof (oop->oo_name);
2138  	open_args->owner.owner_val =
2139  	    kmem_alloc(open_args->owner.owner_len, KM_SLEEP);
2140  
2141  	ep->error = nfs4_start_open_seqid_sync(oop, mi);
2142  	ASSERT(ep->error == 0);		/* recov thread always succeeds */
2143  	/*
2144  	 * We can get away with not saving the seqid upon detection
2145  	 * of a lost request, and now just use the open owner's current
2146  	 * seqid since we only allow one op OTW per seqid and lost
2147  	 * requests are saved FIFO.
2148  	 */
2149  	seqid = nfs4_get_open_seqid(oop) + 1;
2150  	open_args->seqid = seqid;
2151  
2152  	bcopy(&oop->oo_name, open_args->owner.owner_val,
2153  	    open_args->owner.owner_len);
2154  
2155  	/* getfh */
2156  	argop[2].argop = OP_GETFH;
2157  
2158  	/* Construct the getattr part of the compound */
2159  	argop[3].argop = OP_GETATTR;
2160  	argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
2161  	argop[3].nfs_argop4_u.opgetattr.mi = mi;
2162  
2163  	res.array = NULL;
2164  
2165  	t = gethrtime();
2166  
2167  	rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
2168  
2169  	if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
2170  		nfs4_set_open_seqid(seqid, oop, args.ctag);
2171  
2172  	NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2173  	    "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status));
2174  
2175  	if (ep->error || res.status)
2176  		goto err_out;
2177  
2178  	op_res = &res.array[1].nfs_resop4_u.opopen;
2179  	gf_res = &res.array[2].nfs_resop4_u.opgetfh;
2180  	garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res;
2181  
2182  	if (!vp) {
2183  		int rnode_err = 0;
2184  		nfs4_sharedfh_t *sfh;
2185  
2186  		/*
2187  		 * If we can't decode all the attributes they are not usable,
2188  		 * just make the vnode.
2189  		 */
2190  
2191  		sfh = sfh4_get(&gf_res->object, VTOMI4(dvp));
2192  		*vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp,
2193  		    fn_get(VTOSV(dvp)->sv_name,
2194  		    open_args->open_claim4_u.cfile, sfh));
2195  		sfh4_rele(&sfh);
2196  		NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2197  		    "nfs4_resend_open_otw: made vp %p for file %s",
2198  		    (void *)(*vpp), open_args->open_claim4_u.cfile));
2199  
2200  		if (ep->error)
2201  			PURGE_ATTRCACHE4(*vpp);
2202  
2203  		/*
2204  		 * For the newly created *vpp case, make sure the rnode
2205  		 * isn't bad before using it.
2206  		 */
2207  		mutex_enter(&(VTOR4(*vpp))->r_statelock);
2208  		if (VTOR4(*vpp)->r_flags & R4RECOVERR)
2209  			rnode_err = EIO;
2210  		mutex_exit(&(VTOR4(*vpp))->r_statelock);
2211  
2212  		if (rnode_err) {
2213  			NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2214  			    "nfs4_resend_open_otw: rp %p is bad",
2215  			    (void *)VTOR4(*vpp)));
2216  			ep->error = rnode_err;
2217  			goto err_out;
2218  		}
2219  
2220  		vp = *vpp;
2221  		rp = VTOR4(vp);
2222  	}
2223  
2224  	if (reopen) {
2225  		/*
2226  		 * Check if the path we reopened really is the same
2227  		 * file. We could end up in a situation were the file
2228  		 * was removed and a new file created with the same name.
2229  		 */
2230  		(void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
2231  		fh_different =
2232  		    (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0);
2233  		if (fh_different) {
2234  			if (mi->mi_fh_expire_type == FH4_PERSISTENT ||
2235  			    mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) {
2236  				/* Oops, we don't have the same file */
2237  				if (mi->mi_fh_expire_type == FH4_PERSISTENT)
2238  					failed_msg =
2239  					    "Couldn't reopen: Persistant "
2240  					    "file handle changed";
2241  				else
2242  					failed_msg =
2243  					    "Couldn't reopen: Volatile "
2244  					    "(no expire on open) file handle "
2245  					    "changed";
2246  
2247  				nfs4_end_open_seqid_sync(oop);
2248  				kmem_free(destcfp, destclen + 1);
2249  				nfs4args_copen_free(open_args);
2250  				xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2251  				nfs_rw_exit(&mi->mi_fh_lock);
2252  				nfs4_fail_recov(vp, failed_msg, ep->error,
2253  				    ep->stat);
2254  				return;
2255  			} else {
2256  				/*
2257  				 * We have volatile file handles that don't
2258  				 * compare.  If the fids are the same then we
2259  				 * assume that the file handle expired but the
2260  				 * renode still refers to the same file object.
2261  				 *
2262  				 * First check that we have fids or not.
2263  				 * If we don't we have a dumb server so we will
2264  				 * just assume every thing is ok for now.
2265  				 */
2266  				if (!ep->error &&
2267  				    garp->n4g_va.va_mask & AT_NODEID &&
2268  				    rp->r_attr.va_mask & AT_NODEID &&
2269  				    rp->r_attr.va_nodeid !=
2270  				    garp->n4g_va.va_nodeid) {
2271  					/*
2272  					 * We have fids, but they don't
2273  					 * compare. So kill the file.
2274  					 */
2275  					failed_msg =
2276  					    "Couldn't reopen: file handle "
2277  					    "changed due to mismatched fids";
2278  					nfs4_end_open_seqid_sync(oop);
2279  					kmem_free(destcfp, destclen + 1);
2280  					nfs4args_copen_free(open_args);
2281  					xdr_free(xdr_COMPOUND4res_clnt,
2282  					    (caddr_t)&res);
2283  					nfs_rw_exit(&mi->mi_fh_lock);
2284  					nfs4_fail_recov(vp, failed_msg,
2285  					    ep->error, ep->stat);
2286  					return;
2287  				} else {
2288  					/*
2289  					 * We have volatile file handles that
2290  					 * refers to the same file (at least
2291  					 * they have the same fid) or we don't
2292  					 * have fids so we can't tell. :(. We'll
2293  					 * be a kind and accepting client so
2294  					 * we'll update the rnode's file
2295  					 * handle with the otw handle.
2296  					 *
2297  					 * We need to drop mi->mi_fh_lock since
2298  					 * sh4_update acquires it. Since there
2299  					 * is only one recovery thread there is
2300  					 * no race.
2301  					 */
2302  					nfs_rw_exit(&mi->mi_fh_lock);
2303  					sfh4_update(rp->r_fh, &gf_res->object);
2304  				}
2305  			}
2306  		} else {
2307  			nfs_rw_exit(&mi->mi_fh_lock);
2308  		}
2309  	}
2310  
2311  	ASSERT(nfs4_consistent_type(vp));
2312  
2313  	if (op_res->rflags & OPEN4_RESULT_CONFIRM)
2314  		nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE,
2315  		    &retry_open, oop, TRUE, ep, NULL);
2316  	if (ep->error || ep->stat) {
2317  		nfs4_end_open_seqid_sync(oop);
2318  		kmem_free(destcfp, destclen + 1);
2319  		nfs4args_copen_free(open_args);
2320  		if (!ep->error)
2321  			xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2322  		return;
2323  	}
2324  
2325  	if (reopen) {
2326  		/*
2327  		 * Doing a reopen here so the osp should already exist.
2328  		 * If not, something changed or went very wrong.
2329  		 *
2330  		 * returns with 'os_sync_lock' held
2331  		 */
2332  		osp = find_open_stream(oop, rp);
2333  		if (!osp) {
2334  			NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2335  			    "nfs4_resend_open_otw: couldn't find osp"));
2336  			ep->error = EINVAL;
2337  			goto err_out;
2338  		}
2339  		osp->os_open_ref_count++;
2340  	} else {
2341  		mutex_enter(&oop->oo_lock);
2342  		oop->oo_just_created = NFS4_PERM_CREATED;
2343  		mutex_exit(&oop->oo_lock);
2344  
2345  		/* returns with 'os_sync_lock' held */
2346  		osp = find_or_create_open_stream(oop, rp, &created_osp);
2347  		if (!osp) {
2348  			NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2349  			    "nfs4_resend_open_otw: couldn't create osp"));
2350  			ep->error = EINVAL;
2351  			goto err_out;
2352  		}
2353  	}
2354  
2355  	osp->open_stateid = op_res->stateid;
2356  	osp->os_delegation = FALSE;
2357  	/*
2358  	 * Need to reset this bitfield for the possible case where we were
2359  	 * going to OTW CLOSE the file, got a non-recoverable error, and before
2360  	 * we could retry the CLOSE, OPENed the file again.
2361  	 */
2362  	ASSERT(osp->os_open_owner->oo_seqid_inuse);
2363  	osp->os_final_close = 0;
2364  	osp->os_force_close = 0;
2365  
2366  	if (!reopen) {
2367  		if (open_args->share_access & OPEN4_SHARE_ACCESS_READ)
2368  			osp->os_share_acc_read++;
2369  		if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE)
2370  			osp->os_share_acc_write++;
2371  		osp->os_share_deny_none++;
2372  	}
2373  
2374  	mutex_exit(&osp->os_sync_lock);
2375  	if (created_osp)
2376  		nfs4_inc_state_ref_count(mi);
2377  	open_stream_rele(osp, rp);
2378  
2379  	nfs4_end_open_seqid_sync(oop);
2380  
2381  	/* accept delegation, if any */
2382  	nfs4_delegation_accept(rp, claim, op_res, garp, cr);
2383  
2384  	kmem_free(destcfp, destclen + 1);
2385  	nfs4args_copen_free(open_args);
2386  
2387  	if (claim == CLAIM_DELEGATE_CUR)
2388  		nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
2389  	else
2390  		PURGE_ATTRCACHE4(vp);
2391  
2392  	xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2393  
2394  	ASSERT(nfs4_consistent_type(vp));
2395  
2396  	return;
2397  
2398  err_out:
2399  	nfs4_end_open_seqid_sync(oop);
2400  	kmem_free(destcfp, destclen + 1);
2401  	nfs4args_copen_free(open_args);
2402  	if (!ep->error)
2403  		xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2404  }
2405