1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29
30 #include <nfs/nfs4_clnt.h>
31 #include <nfs/rnode4.h>
32 #include <sys/systm.h>
33 #include <sys/cmn_err.h>
34 #include <sys/atomic.h>
35
36 static void nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *);
37 static nfs4_open_owner_t *find_freed_open_owner(cred_t *,
38 nfs4_oo_hash_bucket_t *, mntinfo4_t *);
39 static open_delegation_type4 get_dtype(rnode4_t *);
40
41 #ifdef DEBUG
42 int nfs4_client_foo_debug = 0x0;
43 int nfs4_client_open_dg = 0x0;
44 /*
45 * If this is non-zero, the lockowner and openowner seqid sync primitives
46 * will intermittently return errors.
47 */
48 static int seqid_sync_faults = 0;
49 #endif
50
51 stateid4 clnt_special0 = {
52 0,
53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
54 };
55
56 stateid4 clnt_special1 = {
57 0xffffffff,
58 {
59 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
60 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
61 (char)0xff, (char)0xff, (char)0xff, (char)0xff
62 }
63 };
64
65 /* finds hash bucket and locks it */
66 static nfs4_oo_hash_bucket_t *
lock_bucket(cred_t * cr,mntinfo4_t * mi)67 lock_bucket(cred_t *cr, mntinfo4_t *mi)
68 {
69 nfs4_oo_hash_bucket_t *bucketp;
70 uint32_t hash_key;
71
72 hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr))
73 % NFS4_NUM_OO_BUCKETS;
74 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: "
75 "hash_key %d for cred %p", hash_key, (void*)cr));
76
77 ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS);
78 ASSERT(mi != NULL);
79 ASSERT(mutex_owned(&mi->mi_lock));
80
81 bucketp = &(mi->mi_oo_list[hash_key]);
82 mutex_enter(&bucketp->b_lock);
83 return (bucketp);
84 }
85
86 /* unlocks hash bucket pointed by bucket_ptr */
87 static void
unlock_bucket(nfs4_oo_hash_bucket_t * bucketp)88 unlock_bucket(nfs4_oo_hash_bucket_t *bucketp)
89 {
90 mutex_exit(&bucketp->b_lock);
91 }
92
93 /*
94 * Removes the lock owner from the rnode's lock_owners list and frees the
95 * corresponding reference.
96 */
97 void
nfs4_rnode_remove_lock_owner(rnode4_t * rp,nfs4_lock_owner_t * lop)98 nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop)
99 {
100 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
101 "nfs4_rnode_remove_lock_owner"));
102
103 mutex_enter(&rp->r_statev4_lock);
104
105 if (lop->lo_next_rnode == NULL) {
106 /* already removed from list */
107 mutex_exit(&rp->r_statev4_lock);
108 return;
109 }
110
111 ASSERT(lop->lo_prev_rnode != NULL);
112
113 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
114 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
115
116 lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
117
118 mutex_exit(&rp->r_statev4_lock);
119
120 /*
121 * This would be an appropriate place for
122 * RELEASE_LOCKOWNER. For now, this is overkill
123 * because in the common case, close is going to
124 * release any lockowners anyway.
125 */
126 lock_owner_rele(lop);
127 }
128
129 /*
130 * Remove all lock owners from the rnode's lock_owners list. Frees up
131 * their references from the list.
132 */
133
134 void
nfs4_flush_lock_owners(rnode4_t * rp)135 nfs4_flush_lock_owners(rnode4_t *rp)
136 {
137 nfs4_lock_owner_t *lop;
138
139 mutex_enter(&rp->r_statev4_lock);
140 while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) {
141 lop = rp->r_lo_head.lo_next_rnode;
142 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
143 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
144 lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
145 lock_owner_rele(lop);
146 }
147 mutex_exit(&rp->r_statev4_lock);
148 }
149
150 void
nfs4_clear_open_streams(rnode4_t * rp)151 nfs4_clear_open_streams(rnode4_t *rp)
152 {
153 nfs4_open_stream_t *osp;
154
155 mutex_enter(&rp->r_os_lock);
156 while ((osp = list_head(&rp->r_open_streams)) != NULL) {
157 open_owner_rele(osp->os_open_owner);
158 list_remove(&rp->r_open_streams, osp);
159 mutex_destroy(&osp->os_sync_lock);
160 osp->os_open_owner = NULL;
161 kmem_free(osp, sizeof (*osp));
162 }
163 mutex_exit(&rp->r_os_lock);
164 }
165
166 void
open_owner_hold(nfs4_open_owner_t * oop)167 open_owner_hold(nfs4_open_owner_t *oop)
168 {
169 mutex_enter(&oop->oo_lock);
170 oop->oo_ref_count++;
171 mutex_exit(&oop->oo_lock);
172 }
173
174 /*
175 * Frees the open owner if the ref count hits zero.
176 */
177 void
open_owner_rele(nfs4_open_owner_t * oop)178 open_owner_rele(nfs4_open_owner_t *oop)
179 {
180 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
181 "open_owner_rele"));
182
183 mutex_enter(&oop->oo_lock);
184 oop->oo_ref_count--;
185 if (oop->oo_ref_count == 0) {
186 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
187 "open_owner_rele: freeing open owner"));
188 oop->oo_valid = 0;
189 mutex_exit(&oop->oo_lock);
190 /*
191 * Ok, we don't destroy the open owner, nor do we put it on
192 * the mntinfo4's free list just yet. We are lazy about it
193 * and let callers to find_open_owner() do that to keep locking
194 * simple.
195 */
196 } else {
197 mutex_exit(&oop->oo_lock);
198 }
199 }
200
201 void
open_stream_hold(nfs4_open_stream_t * osp)202 open_stream_hold(nfs4_open_stream_t *osp)
203 {
204 mutex_enter(&osp->os_sync_lock);
205 osp->os_ref_count++;
206 mutex_exit(&osp->os_sync_lock);
207 }
208
209 /*
210 * Frees the open stream and removes it from the rnode4's open streams list if
211 * the ref count drops to zero.
212 */
213 void
open_stream_rele(nfs4_open_stream_t * osp,rnode4_t * rp)214 open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp)
215 {
216 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
217 "open_stream_rele"));
218
219 ASSERT(!mutex_owned(&rp->r_os_lock));
220
221 mutex_enter(&osp->os_sync_lock);
222 ASSERT(osp->os_ref_count > 0);
223 osp->os_ref_count--;
224 if (osp->os_ref_count == 0) {
225 nfs4_open_owner_t *tmp_oop;
226
227 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
228 "open_stream_rele: freeing open stream"));
229 osp->os_valid = 0;
230 tmp_oop = osp->os_open_owner;
231 mutex_exit(&osp->os_sync_lock);
232
233 /* now see if we need to destroy the open owner */
234 open_owner_rele(tmp_oop);
235
236 mutex_enter(&rp->r_os_lock);
237 list_remove(&rp->r_open_streams, osp);
238 mutex_exit(&rp->r_os_lock);
239
240 /* free up osp */
241 mutex_destroy(&osp->os_sync_lock);
242 osp->os_open_owner = NULL;
243 kmem_free(osp, sizeof (*osp));
244 } else {
245 mutex_exit(&osp->os_sync_lock);
246 }
247 }
248
249 void
lock_owner_hold(nfs4_lock_owner_t * lop)250 lock_owner_hold(nfs4_lock_owner_t *lop)
251 {
252 mutex_enter(&lop->lo_lock);
253 lop->lo_ref_count++;
254 mutex_exit(&lop->lo_lock);
255 }
256
257 /*
258 * Frees the lock owner if the ref count hits zero and
259 * the structure no longer has no locks.
260 */
261 void
lock_owner_rele(nfs4_lock_owner_t * lop)262 lock_owner_rele(nfs4_lock_owner_t *lop)
263 {
264 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
265 "lock_owner_rele"));
266
267 mutex_enter(&lop->lo_lock);
268 lop->lo_ref_count--;
269 if (lop->lo_ref_count == 0) {
270 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
271 "lock_owner_rele: freeing lock owner: "
272 "%x", lop->lo_pid));
273 lop->lo_valid = 0;
274 /*
275 * If there are no references, the lock_owner should
276 * already be off the rnode's list.
277 */
278 ASSERT(lop->lo_next_rnode == NULL);
279 ASSERT(lop->lo_prev_rnode == NULL);
280 ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE));
281 ASSERT(lop->lo_seqid_holder == NULL);
282 mutex_exit(&lop->lo_lock);
283
284 /* free up lop */
285 cv_destroy(&lop->lo_cv_seqid_sync);
286 mutex_destroy(&lop->lo_lock);
287 kmem_free(lop, sizeof (*lop));
288 } else {
289 mutex_exit(&lop->lo_lock);
290 }
291 }
292
293 /*
294 * This increments the open owner ref count if found.
295 * The argument 'just_created' determines whether we are looking for open
296 * owners with the 'oo_just_created' flag set or not.
297 */
298 nfs4_open_owner_t *
find_open_owner_nolock(cred_t * cr,int just_created,mntinfo4_t * mi)299 find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi)
300 {
301 nfs4_open_owner_t *oop = NULL, *next_oop;
302 nfs4_oo_hash_bucket_t *bucketp;
303
304 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
305 "find_open_owner: cred %p, just_created %d",
306 (void*)cr, just_created));
307
308 ASSERT(mi != NULL);
309 ASSERT(mutex_owned(&mi->mi_lock));
310
311 bucketp = lock_bucket(cr, mi);
312
313 /* got hash bucket, search through open owners */
314 for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) {
315 mutex_enter(&oop->oo_lock);
316 if (!crcmp(oop->oo_cred, cr) &&
317 (oop->oo_just_created == just_created ||
318 just_created == NFS4_JUST_CREATED)) {
319 /* match */
320 if (oop->oo_valid == 0) {
321 /* reactivate the open owner */
322 oop->oo_valid = 1;
323 ASSERT(oop->oo_ref_count == 0);
324 }
325 oop->oo_ref_count++;
326 mutex_exit(&oop->oo_lock);
327 unlock_bucket(bucketp);
328 return (oop);
329 }
330 next_oop = list_next(&bucketp->b_oo_hash_list, oop);
331 if (oop->oo_valid == 0) {
332 list_remove(&bucketp->b_oo_hash_list, oop);
333
334 /*
335 * Now we go ahead and put this open owner
336 * on the freed list. This is our lazy method.
337 */
338 nfs4_free_open_owner(oop, mi);
339 }
340
341 mutex_exit(&oop->oo_lock);
342 oop = next_oop;
343 }
344
345 /* search through recently freed open owners */
346 oop = find_freed_open_owner(cr, bucketp, mi);
347
348 unlock_bucket(bucketp);
349
350 return (oop);
351 }
352
353 nfs4_open_owner_t *
find_open_owner(cred_t * cr,int just_created,mntinfo4_t * mi)354 find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi)
355 {
356 nfs4_open_owner_t *oop;
357
358 mutex_enter(&mi->mi_lock);
359 oop = find_open_owner_nolock(cr, just_created, mi);
360 mutex_exit(&mi->mi_lock);
361
362 return (oop);
363 }
364
365 /*
366 * This increments osp's ref count if found.
367 * Returns with 'os_sync_lock' held.
368 */
369 nfs4_open_stream_t *
find_open_stream(nfs4_open_owner_t * oop,rnode4_t * rp)370 find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
371 {
372 nfs4_open_stream_t *osp;
373
374 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
375 "find_open_stream"));
376
377 mutex_enter(&rp->r_os_lock);
378 /* Now, no one can add or delete to rp's open streams list */
379 for (osp = list_head(&rp->r_open_streams); osp != NULL;
380 osp = list_next(&rp->r_open_streams, osp)) {
381 mutex_enter(&osp->os_sync_lock);
382 if (osp->os_open_owner == oop && osp->os_valid != 0) {
383 /* match */
384 NFS4_DEBUG(nfs4_client_state_debug,
385 (CE_NOTE, "find_open_stream "
386 "got a match"));
387
388 osp->os_ref_count++;
389 mutex_exit(&rp->r_os_lock);
390 return (osp);
391 }
392 mutex_exit(&osp->os_sync_lock);
393 }
394
395 mutex_exit(&rp->r_os_lock);
396 return (NULL);
397 }
398
399 /*
400 * Find the lock owner for the given file and process ID. If "which" is
401 * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid
402 * from the server.
403 *
404 * This increments the lock owner's ref count if found. Returns NULL if
405 * there was no match.
406 */
407 nfs4_lock_owner_t *
find_lock_owner(rnode4_t * rp,pid_t pid,lown_which_t which)408 find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which)
409 {
410 nfs4_lock_owner_t *lop, *next_lop;
411
412 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
413 "find_lock_owner: pid %x, which %d", pid, which));
414
415 ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID);
416
417 /* search by pid */
418 mutex_enter(&rp->r_statev4_lock);
419
420 lop = rp->r_lo_head.lo_next_rnode;
421 while (lop != &rp->r_lo_head) {
422 mutex_enter(&lop->lo_lock);
423 if (lop->lo_pid == pid && lop->lo_valid != 0 &&
424 !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) {
425 if (which == LOWN_ANY ||
426 lop->lo_just_created != NFS4_JUST_CREATED) {
427 /* Found a matching lock owner */
428 NFS4_DEBUG(nfs4_client_state_debug,
429 (CE_NOTE, "find_lock_owner: "
430 "got a match"));
431
432 lop->lo_ref_count++;
433 mutex_exit(&lop->lo_lock);
434 mutex_exit(&rp->r_statev4_lock);
435 return (lop);
436 }
437 }
438 next_lop = lop->lo_next_rnode;
439 mutex_exit(&lop->lo_lock);
440 lop = next_lop;
441 }
442
443 mutex_exit(&rp->r_statev4_lock);
444 return (NULL);
445 }
446
447 /*
448 * This returns the delegation stateid as 'sid'. Returns 1 if a successful
449 * delegation stateid was found, otherwise returns 0.
450 */
451
452 static int
nfs4_get_deleg_stateid(rnode4_t * rp,nfs_opnum4 op,stateid4 * sid)453 nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid)
454 {
455 ASSERT(!mutex_owned(&rp->r_statev4_lock));
456
457 mutex_enter(&rp->r_statev4_lock);
458 if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) ||
459 (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) &&
460 !rp->r_deleg_return_pending) {
461
462 *sid = rp->r_deleg_stateid;
463 mutex_exit(&rp->r_statev4_lock);
464 return (1);
465 }
466 mutex_exit(&rp->r_statev4_lock);
467 return (0);
468 }
469
470 /*
471 * This returns the lock stateid as 'sid'. Returns 1 if a successful lock
472 * stateid was found, otherwise returns 0.
473 */
474 static int
nfs4_get_lock_stateid(rnode4_t * rp,pid_t pid,stateid4 * sid)475 nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid)
476 {
477 nfs4_lock_owner_t *lop;
478
479 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
480
481 if (lop) {
482 /*
483 * Found a matching lock owner, so use a lock
484 * stateid rather than an open stateid.
485 */
486 mutex_enter(&lop->lo_lock);
487 *sid = lop->lock_stateid;
488 mutex_exit(&lop->lo_lock);
489 lock_owner_rele(lop);
490 return (1);
491 }
492
493 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
494 "nfs4_get_lock_stateid: no lop"));
495 return (0);
496 }
497
498 /*
499 * This returns the open stateid as 'sid'. Returns 1 if a successful open
500 * stateid was found, otherwise returns 0.
501 *
502 * Once the stateid is returned to the caller, it is no longer protected;
503 * so the caller must be prepared to handle OLD/BAD_STATEID where
504 * appropiate.
505 */
506 static int
nfs4_get_open_stateid(rnode4_t * rp,cred_t * cr,mntinfo4_t * mi,stateid4 * sid)507 nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid)
508 {
509 nfs4_open_owner_t *oop;
510 nfs4_open_stream_t *osp;
511
512 ASSERT(mi != NULL);
513
514 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
515 if (!oop) {
516 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
517 "nfs4_get_open_stateid: no oop"));
518 return (0);
519 }
520
521 osp = find_open_stream(oop, rp);
522 open_owner_rele(oop);
523 if (!osp) {
524 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
525 "nfs4_get_open_stateid: no osp"));
526 return (0);
527 }
528
529 if (osp->os_failed_reopen) {
530 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
531 "nfs4_get_open_stateid: osp %p failed reopen",
532 (void *)osp));
533 mutex_exit(&osp->os_sync_lock);
534 open_stream_rele(osp, rp);
535 return (0);
536 }
537 *sid = osp->open_stateid;
538 mutex_exit(&osp->os_sync_lock);
539 open_stream_rele(osp, rp);
540 return (1);
541 }
542
543 /*
544 * Returns the delegation stateid if this 'op' is OP_WRITE and the
545 * delegation we hold is a write delegation, OR this 'op' is not
546 * OP_WRITE and we have a delegation held (read or write), otherwise
547 * returns the lock stateid if there is a lock owner, otherwise
548 * returns the open stateid if there is a open stream, otherwise
549 * returns special stateid <seqid = 0, other = 0>.
550 *
551 * Used for WRITE operations.
552 */
553 stateid4
nfs4_get_w_stateid(cred_t * cr,rnode4_t * rp,pid_t pid,mntinfo4_t * mi,nfs_opnum4 op,nfs4_stateid_types_t * sid_tp)554 nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
555 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp)
556 {
557 stateid4 sid;
558
559 if (nfs4_get_deleg_stateid(rp, op, &sid)) {
560 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
561 sid_tp->cur_sid_type = DEL_SID;
562 return (sid);
563 }
564 }
565 if (nfs4_get_lock_stateid(rp, pid, &sid)) {
566 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
567 sid_tp->cur_sid_type = LOCK_SID;
568 return (sid);
569 }
570 }
571 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
572 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
573 sid_tp->cur_sid_type = OPEN_SID;
574 return (sid);
575 }
576 }
577 bzero(&sid, sizeof (stateid4));
578 sid_tp->cur_sid_type = SPEC_SID;
579 return (sid);
580 }
581
582 /*
583 * Returns the delegation stateid if this 'op' is OP_WRITE and the
584 * delegation we hold is a write delegation, OR this 'op' is not
585 * OP_WRITE and we have a delegation held (read or write), otherwise
586 * returns the lock stateid if there is a lock owner, otherwise
587 * returns the open stateid if there is a open stream, otherwise
588 * returns special stateid <seqid = 0, other = 0>.
589 *
590 * This also updates which stateid we are using in 'sid_tp', skips
591 * previously attempted stateids, and skips checking higher priority
592 * stateids than the current level as dictated by 'sid_tp->cur_sid_type'
593 * for async reads.
594 *
595 * Used for READ and SETATTR operations.
596 */
597 stateid4
nfs4_get_stateid(cred_t * cr,rnode4_t * rp,pid_t pid,mntinfo4_t * mi,nfs_opnum4 op,nfs4_stateid_types_t * sid_tp,bool_t async_read)598 nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
599 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read)
600 {
601 stateid4 sid;
602
603 /*
604 * For asynchronous READs, do not attempt to retry from the start of
605 * the stateid priority list, just continue from where you last left
606 * off.
607 */
608 if (async_read) {
609 switch (sid_tp->cur_sid_type) {
610 case NO_SID:
611 break;
612 case DEL_SID:
613 goto lock_stateid;
614 case LOCK_SID:
615 goto open_stateid;
616 case OPEN_SID:
617 goto special_stateid;
618 case SPEC_SID:
619 default:
620 cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current "
621 "stateid type %d", sid_tp->cur_sid_type);
622 }
623 }
624
625 if (nfs4_get_deleg_stateid(rp, op, &sid)) {
626 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
627 sid_tp->cur_sid_type = DEL_SID;
628 return (sid);
629 }
630 }
631 lock_stateid:
632 if (nfs4_get_lock_stateid(rp, pid, &sid)) {
633 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
634 sid_tp->cur_sid_type = LOCK_SID;
635 return (sid);
636 }
637 }
638 open_stateid:
639 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
640 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
641 sid_tp->cur_sid_type = OPEN_SID;
642 return (sid);
643 }
644 }
645 special_stateid:
646 bzero(&sid, sizeof (stateid4));
647 sid_tp->cur_sid_type = SPEC_SID;
648 return (sid);
649 }
650
651 void
nfs4_set_lock_stateid(nfs4_lock_owner_t * lop,stateid4 stateid)652 nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid)
653 {
654 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
655 "nfs4_set_lock_stateid"));
656
657 ASSERT(lop);
658 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
659
660 mutex_enter(&lop->lo_lock);
661 lop->lock_stateid = stateid;
662 mutex_exit(&lop->lo_lock);
663 }
664
665 /*
666 * Sequence number used when a new open owner is needed.
667 * This is used so as to not confuse the server. Since a open owner
668 * is based off of cred, a cred could be re-used quickly, and the server
669 * may not release all state for a cred.
670 */
671 static uint64_t open_owner_seq_num = 0;
672
673 uint64_t
nfs4_get_new_oo_name(void)674 nfs4_get_new_oo_name(void)
675 {
676 return (atomic_inc_64_nv(&open_owner_seq_num));
677 }
678
679 /*
680 * Create a new open owner and add it to the open owner hash table.
681 */
682 nfs4_open_owner_t *
create_open_owner(cred_t * cr,mntinfo4_t * mi)683 create_open_owner(cred_t *cr, mntinfo4_t *mi)
684 {
685 nfs4_open_owner_t *oop;
686 nfs4_oo_hash_bucket_t *bucketp;
687
688 oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP);
689 /*
690 * Make sure the cred doesn't go away when we put this open owner
691 * on the free list, as well as make crcmp() a valid check.
692 */
693 crhold(cr);
694 oop->oo_cred = cr;
695 mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL);
696 oop->oo_ref_count = 1;
697 oop->oo_valid = 1;
698 oop->oo_just_created = NFS4_JUST_CREATED;
699 oop->oo_seqid = 0;
700 oop->oo_seqid_inuse = 0;
701 oop->oo_last_good_seqid = 0;
702 oop->oo_last_good_op = TAG_NONE;
703 oop->oo_cred_otw = NULL;
704 cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
705
706 /*
707 * A Solaris open_owner is <oo_seq_num>
708 */
709 oop->oo_name = nfs4_get_new_oo_name();
710
711 /* now add the struct into the cred hash table */
712 ASSERT(mutex_owned(&mi->mi_lock));
713 bucketp = lock_bucket(cr, mi);
714 list_insert_head(&bucketp->b_oo_hash_list, oop);
715 unlock_bucket(bucketp);
716
717 return (oop);
718 }
719
720 /*
721 * Create a new open stream and it to the rnode's list.
722 * Increments the ref count on oop.
723 * Returns with 'os_sync_lock' held.
724 */
725 nfs4_open_stream_t *
create_open_stream(nfs4_open_owner_t * oop,rnode4_t * rp)726 create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
727 {
728 nfs4_open_stream_t *osp;
729
730 #ifdef DEBUG
731 mutex_enter(&oop->oo_lock);
732 ASSERT(oop->oo_seqid_inuse);
733 mutex_exit(&oop->oo_lock);
734 #endif
735
736 osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP);
737 osp->os_open_ref_count = 1;
738 osp->os_mapcnt = 0;
739 osp->os_ref_count = 2;
740 osp->os_valid = 1;
741 osp->os_open_owner = oop;
742 osp->os_orig_oo_name = oop->oo_name;
743 bzero(&osp->open_stateid, sizeof (stateid4));
744 osp->os_share_acc_read = 0;
745 osp->os_share_acc_write = 0;
746 osp->os_mmap_read = 0;
747 osp->os_mmap_write = 0;
748 osp->os_share_deny_none = 0;
749 osp->os_share_deny_read = 0;
750 osp->os_share_deny_write = 0;
751 osp->os_delegation = 0;
752 osp->os_dc_openacc = 0;
753 osp->os_final_close = 0;
754 osp->os_pending_close = 0;
755 osp->os_failed_reopen = 0;
756 osp->os_force_close = 0;
757 mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL);
758
759 /* open owner gets a reference */
760 open_owner_hold(oop);
761
762 /* now add the open stream to rp */
763 mutex_enter(&rp->r_os_lock);
764 mutex_enter(&osp->os_sync_lock);
765 list_insert_head(&rp->r_open_streams, osp);
766 mutex_exit(&rp->r_os_lock);
767
768 return (osp);
769 }
770
771 /*
772 * Returns an open stream with 'os_sync_lock' held.
773 * If the open stream is found (rather than created), its
774 * 'os_open_ref_count' is bumped.
775 *
776 * There is no race with two threads entering this function
777 * and creating two open streams for the same <oop, rp> pair.
778 * This is because the open seqid sync must be acquired, thus
779 * only allowing one thread in at a time.
780 */
781 nfs4_open_stream_t *
find_or_create_open_stream(nfs4_open_owner_t * oop,rnode4_t * rp,int * created_osp)782 find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp,
783 int *created_osp)
784 {
785 nfs4_open_stream_t *osp;
786
787 #ifdef DEBUG
788 mutex_enter(&oop->oo_lock);
789 ASSERT(oop->oo_seqid_inuse);
790 mutex_exit(&oop->oo_lock);
791 #endif
792
793 osp = find_open_stream(oop, rp);
794 if (!osp) {
795 osp = create_open_stream(oop, rp);
796 if (osp)
797 *created_osp = 1;
798 } else {
799 *created_osp = 0;
800 osp->os_open_ref_count++;
801 }
802
803 return (osp);
804 }
805
806 static uint64_t lock_owner_seq_num = 0;
807
808 /*
809 * Create a new lock owner and add it to the rnode's list.
810 * Assumes the rnode's r_statev4_lock is held.
811 * The created lock owner has a reference count of 2: one for the list and
812 * one for the caller to use. Returns the lock owner locked down.
813 */
814 nfs4_lock_owner_t *
create_lock_owner(rnode4_t * rp,pid_t pid)815 create_lock_owner(rnode4_t *rp, pid_t pid)
816 {
817 nfs4_lock_owner_t *lop;
818
819 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
820 "create_lock_owner: pid %x", pid));
821
822 ASSERT(mutex_owned(&rp->r_statev4_lock));
823
824 lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP);
825 lop->lo_ref_count = 2;
826 lop->lo_valid = 1;
827 bzero(&lop->lock_stateid, sizeof (stateid4));
828 lop->lo_pid = pid;
829 lop->lock_seqid = 0;
830 lop->lo_pending_rqsts = 0;
831 lop->lo_just_created = NFS4_JUST_CREATED;
832 lop->lo_flags = 0;
833 lop->lo_seqid_holder = NULL;
834
835 /*
836 * A Solaris lock_owner is <seq_num><pid>
837 */
838 lop->lock_owner_name.ln_seq_num =
839 atomic_inc_64_nv(&lock_owner_seq_num);
840 lop->lock_owner_name.ln_pid = pid;
841
842 cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
843 mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL);
844
845 mutex_enter(&lop->lo_lock);
846
847 /* now add the lock owner to rp */
848 lop->lo_prev_rnode = &rp->r_lo_head;
849 lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode;
850 rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop;
851 rp->r_lo_head.lo_next_rnode = lop;
852
853 return (lop);
854
855 }
856
857 /*
858 * This sets the lock seqid of a lock owner.
859 */
860 void
nfs4_set_lock_seqid(seqid4 seqid,nfs4_lock_owner_t * lop)861 nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop)
862 {
863 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
864 "nfs4_set_lock_seqid"));
865
866 ASSERT(lop != NULL);
867 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
868
869 lop->lock_seqid = seqid;
870 }
871
872 static void
nfs4_set_new_lock_owner_args(lock_owner4 * owner,pid_t pid)873 nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid)
874 {
875 nfs4_lo_name_t *cast_namep;
876
877 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
878 "nfs4_set_new_lock_owner_args"));
879
880 owner->owner_len = sizeof (*cast_namep);
881 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
882 /*
883 * A Solaris lock_owner is <seq_num><pid>
884 */
885 cast_namep = (nfs4_lo_name_t *)owner->owner_val;
886 cast_namep->ln_seq_num = atomic_inc_64_nv(&lock_owner_seq_num);
887 cast_namep->ln_pid = pid;
888 }
889
890 /*
891 * Fill in the lock owner args.
892 */
893 void
nfs4_setlockowner_args(lock_owner4 * owner,rnode4_t * rp,pid_t pid)894 nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid)
895 {
896 nfs4_lock_owner_t *lop;
897
898 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
899 "nfs4_setlockowner_args"));
900
901 /* This increments lop's ref count */
902 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
903
904 if (!lop)
905 goto make_up_args;
906
907 mutex_enter(&lop->lo_lock);
908 owner->owner_len = sizeof (lop->lock_owner_name);
909 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
910 bcopy(&lop->lock_owner_name, owner->owner_val,
911 owner->owner_len);
912 mutex_exit(&lop->lo_lock);
913 lock_owner_rele(lop);
914 return;
915
916 make_up_args:
917 nfs4_set_new_lock_owner_args(owner, pid);
918 }
919
920 /*
921 * This ends our use of the open owner's open seqid by setting
922 * the appropiate flags and issuing a cv_signal to wake up another
923 * thread waiting to use the open seqid.
924 */
925
926 void
nfs4_end_open_seqid_sync(nfs4_open_owner_t * oop)927 nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop)
928 {
929 mutex_enter(&oop->oo_lock);
930 ASSERT(oop->oo_seqid_inuse);
931 oop->oo_seqid_inuse = 0;
932 cv_signal(&oop->oo_cv_seqid_sync);
933 mutex_exit(&oop->oo_lock);
934 }
935
936 /*
937 * This starts our use of the open owner's open seqid by setting
938 * the oo_seqid_inuse to true. We will wait (forever) with a
939 * cv_wait() until we are woken up.
940 *
941 * Return values:
942 * 0 no problems
943 * EAGAIN caller should retry (like a recovery retry)
944 */
945 int
nfs4_start_open_seqid_sync(nfs4_open_owner_t * oop,mntinfo4_t * mi)946 nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi)
947 {
948 int error = 0;
949 #ifdef DEBUG
950 static int ops = 0; /* fault injection */
951 #endif
952
953 #ifdef DEBUG
954 if (seqid_sync_faults && curthread != mi->mi_recovthread &&
955 ++ops % 5 == 0)
956 return (EAGAIN);
957 #endif
958
959 mutex_enter(&mi->mi_lock);
960 if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
961 curthread != mi->mi_recovthread)
962 error = EAGAIN;
963 mutex_exit(&mi->mi_lock);
964 if (error != 0)
965 goto done;
966
967 mutex_enter(&oop->oo_lock);
968
969 while (oop->oo_seqid_inuse) {
970 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
971 "nfs4_start_open_seqid_sync waiting on cv"));
972
973 cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock);
974 }
975
976 oop->oo_seqid_inuse = 1;
977
978 mutex_exit(&oop->oo_lock);
979
980 mutex_enter(&mi->mi_lock);
981 if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
982 curthread != mi->mi_recovthread)
983 error = EAGAIN;
984 mutex_exit(&mi->mi_lock);
985
986 if (error == EAGAIN)
987 nfs4_end_open_seqid_sync(oop);
988
989 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
990 "nfs4_start_open_seqid_sync: error=%d", error));
991
992 done:
993 return (error);
994 }
995
996 #ifdef DEBUG
997 int bypass_otw[2];
998 #endif
999
1000 /*
1001 * Checks to see if the OPEN OTW is necessary that is, if it's already
1002 * been opened with the same access and deny bits we are now asking for.
1003 * Note, this assumes that *vpp is a rnode.
1004 */
1005 int
nfs4_is_otw_open_necessary(nfs4_open_owner_t * oop,int flag,vnode_t * vp,int just_been_created,int * errorp,int acc,nfs4_recov_state_t * rsp)1006 nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp,
1007 int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp)
1008 {
1009 rnode4_t *rp;
1010 nfs4_open_stream_t *osp;
1011 open_delegation_type4 dt;
1012
1013 rp = VTOR4(vp);
1014
1015 /*
1016 * Grab the delegation type. This function is protected against
1017 * the delegation being returned by virtue of start_op (called
1018 * by nfs4open_otw) taking the r_deleg_recall_lock in read mode,
1019 * delegreturn requires this lock in write mode to proceed.
1020 */
1021 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER));
1022 dt = get_dtype(rp);
1023
1024 /* returns with 'os_sync_lock' held */
1025 osp = find_open_stream(oop, rp);
1026
1027 if (osp) {
1028 uint32_t do_otw = 0;
1029
1030 if (osp->os_failed_reopen) {
1031 NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE,
1032 "nfs4_is_otw_open_necessary: os_failed_reopen "
1033 "set on osp %p, cr %p, rp %s", (void *)osp,
1034 (void *)osp->os_open_owner->oo_cred,
1035 rnode4info(rp)));
1036 do_otw = 1;
1037 }
1038
1039 /*
1040 * check access/deny bits
1041 */
1042 if (!do_otw && (flag & FREAD))
1043 if (osp->os_share_acc_read == 0 &&
1044 dt == OPEN_DELEGATE_NONE)
1045 do_otw = 1;
1046
1047 if (!do_otw && (flag & FWRITE))
1048 if (osp->os_share_acc_write == 0 &&
1049 dt != OPEN_DELEGATE_WRITE)
1050 do_otw = 1;
1051
1052 if (!do_otw) {
1053 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
1054 "nfs4_is_otw_open_necessary: can skip this "
1055 "open OTW"));
1056 if (!just_been_created) {
1057 osp->os_open_ref_count++;
1058 if (flag & FREAD)
1059 osp->os_share_acc_read++;
1060 if (flag & FWRITE)
1061 osp->os_share_acc_write++;
1062 osp->os_share_deny_none++;
1063 }
1064
1065 /*
1066 * Need to reset this bitfield for the possible case
1067 * where we were going to OTW CLOSE the file, got a
1068 * non-recoverable error, and before we could retry
1069 * the CLOSE, OPENed the file again.
1070 */
1071 ASSERT(osp->os_open_owner->oo_seqid_inuse);
1072 osp->os_final_close = 0;
1073 osp->os_force_close = 0;
1074
1075 mutex_exit(&osp->os_sync_lock);
1076 open_stream_rele(osp, rp);
1077
1078 #ifdef DEBUG
1079 bypass_otw[0]++;
1080 #endif
1081
1082 *errorp = 0;
1083 return (0);
1084 }
1085 mutex_exit(&osp->os_sync_lock);
1086 open_stream_rele(osp, rp);
1087
1088 } else if (dt != OPEN_DELEGATE_NONE) {
1089 /*
1090 * Even if there isn't an open_stream yet, we may still be
1091 * able to bypass the otw open if the client owns a delegation.
1092 *
1093 * If you are asking for for WRITE, but I only have
1094 * a read delegation, then you still have to go otw.
1095 */
1096
1097 if (flag & FWRITE && dt == OPEN_DELEGATE_READ)
1098 return (1);
1099
1100 /*
1101 * TODO - evaluate the nfsace4
1102 */
1103
1104 /*
1105 * Check the access flags to make sure the caller
1106 * had permission.
1107 */
1108 if (flag & FREAD && !(acc & VREAD))
1109 return (1);
1110
1111 if (flag & FWRITE && !(acc & VWRITE))
1112 return (1);
1113
1114 /*
1115 * create_open_stream will add a reference to oop,
1116 * this will prevent the open_owner_rele done in
1117 * nfs4open_otw from destroying the open_owner.
1118 */
1119
1120 /* returns with 'os_sync_lock' held */
1121 osp = create_open_stream(oop, rp);
1122 if (osp == NULL)
1123 return (1);
1124
1125 osp->open_stateid = rp->r_deleg_stateid;
1126 osp->os_delegation = 1;
1127
1128 if (flag & FREAD)
1129 osp->os_share_acc_read++;
1130 if (flag & FWRITE)
1131 osp->os_share_acc_write++;
1132
1133 osp->os_share_deny_none++;
1134 mutex_exit(&osp->os_sync_lock);
1135
1136 open_stream_rele(osp, rp);
1137
1138 mutex_enter(&oop->oo_lock);
1139 oop->oo_just_created = NFS4_PERM_CREATED;
1140 mutex_exit(&oop->oo_lock);
1141
1142 ASSERT(rsp != NULL);
1143 if (rsp->rs_sp != NULL) {
1144 mutex_enter(&rsp->rs_sp->s_lock);
1145 nfs4_inc_state_ref_count_nolock(rsp->rs_sp,
1146 VTOMI4(vp));
1147 mutex_exit(&rsp->rs_sp->s_lock);
1148 }
1149 #ifdef DEBUG
1150 bypass_otw[1]++;
1151 #endif
1152
1153 *errorp = 0;
1154 return (0);
1155 }
1156
1157 return (1);
1158 }
1159
1160 static open_delegation_type4
get_dtype(rnode4_t * rp)1161 get_dtype(rnode4_t *rp)
1162 {
1163 open_delegation_type4 dt;
1164
1165 mutex_enter(&rp->r_statev4_lock);
1166 ASSERT(!rp->r_deleg_return_inprog);
1167 if (rp->r_deleg_return_pending)
1168 dt = OPEN_DELEGATE_NONE;
1169 else
1170 dt = rp->r_deleg_type;
1171 mutex_exit(&rp->r_statev4_lock);
1172
1173 return (dt);
1174 }
1175
1176 /*
1177 * Fill in *locker with the lock state arguments for a LOCK call. If
1178 * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL.
1179 * Caller must already hold the necessary seqid sync lock(s).
1180 */
1181
1182 void
nfs4_setup_lock_args(nfs4_lock_owner_t * lop,nfs4_open_owner_t * oop,nfs4_open_stream_t * osp,clientid4 clientid,locker4 * locker)1183 nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop,
1184 nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker)
1185 {
1186 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
1187 if (lop->lo_just_created == NFS4_JUST_CREATED) {
1188 /* this is a new lock request */
1189 open_to_lock_owner4 *nown;
1190
1191 ASSERT(oop != NULL);
1192 ASSERT(osp != NULL);
1193
1194 locker->new_lock_owner = TRUE;
1195 nown = &locker->locker4_u.open_owner;
1196 nown->open_seqid = nfs4_get_open_seqid(oop) + 1;
1197 mutex_enter(&osp->os_sync_lock);
1198 nown->open_stateid = osp->open_stateid;
1199 mutex_exit(&osp->os_sync_lock);
1200 nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */
1201
1202 nown->lock_owner.clientid = clientid;
1203 nown->lock_owner.owner_len = sizeof (lop->lock_owner_name);
1204 nown->lock_owner.owner_val =
1205 kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP);
1206 bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val,
1207 nown->lock_owner.owner_len);
1208 } else {
1209 exist_lock_owner4 *eown;
1210 /* have an existing lock owner */
1211
1212 locker->new_lock_owner = FALSE;
1213 eown = &locker->locker4_u.lock_owner;
1214 mutex_enter(&lop->lo_lock);
1215 eown->lock_stateid = lop->lock_stateid;
1216 mutex_exit(&lop->lo_lock);
1217 eown->lock_seqid = lop->lock_seqid + 1;
1218 }
1219 }
1220
1221 /*
1222 * This starts our use of the lock owner's lock seqid by setting
1223 * the lo_flags to NFS4_LOCK_SEQID_INUSE. We will wait (forever)
1224 * with a cv_wait() until we are woken up.
1225 *
1226 * Return values:
1227 * 0 no problems
1228 * EAGAIN caller should retry (like a recovery retry)
1229 */
1230 int
nfs4_start_lock_seqid_sync(nfs4_lock_owner_t * lop,mntinfo4_t * mi)1231 nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi)
1232 {
1233 int error = 0;
1234 #ifdef DEBUG
1235 static int ops = 0; /* fault injection */
1236 #endif
1237
1238 #ifdef DEBUG
1239 if (seqid_sync_faults && curthread != mi->mi_recovthread &&
1240 ++ops % 7 == 0)
1241 return (EAGAIN);
1242 #endif
1243
1244 mutex_enter(&mi->mi_lock);
1245 if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
1246 curthread != mi->mi_recovthread)
1247 error = EAGAIN;
1248 mutex_exit(&mi->mi_lock);
1249 if (error != 0)
1250 goto done;
1251
1252 mutex_enter(&lop->lo_lock);
1253
1254 ASSERT(lop->lo_seqid_holder != curthread);
1255 while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) {
1256 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
1257 "nfs4_start_lock_seqid_sync: waiting on cv"));
1258
1259 cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock);
1260 }
1261 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: "
1262 "NFS4_LOCK_SEQID_INUSE"));
1263
1264 lop->lo_flags |= NFS4_LOCK_SEQID_INUSE;
1265 lop->lo_seqid_holder = curthread;
1266 mutex_exit(&lop->lo_lock);
1267
1268 mutex_enter(&mi->mi_lock);
1269 if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
1270 curthread != mi->mi_recovthread)
1271 error = EAGAIN;
1272 mutex_exit(&mi->mi_lock);
1273
1274 if (error == EAGAIN)
1275 nfs4_end_lock_seqid_sync(lop);
1276
1277 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
1278 "nfs4_start_lock_seqid_sync: error=%d", error));
1279
1280 done:
1281 return (error);
1282 }
1283
1284 /*
1285 * This ends our use of the lock owner's lock seqid by setting
1286 * the appropiate flags and issuing a cv_signal to wake up another
1287 * thread waiting to use the lock seqid.
1288 */
1289 void
nfs4_end_lock_seqid_sync(nfs4_lock_owner_t * lop)1290 nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop)
1291 {
1292 mutex_enter(&lop->lo_lock);
1293 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
1294 ASSERT(lop->lo_seqid_holder == curthread);
1295 lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE;
1296 lop->lo_seqid_holder = NULL;
1297 cv_signal(&lop->lo_cv_seqid_sync);
1298 mutex_exit(&lop->lo_lock);
1299 }
1300
1301 /*
1302 * Returns a reference to a lock owner via lopp, which has its lock seqid
1303 * synchronization started.
1304 * If the lock owner is in the 'just_created' state, then we return its open
1305 * owner and open stream and start the open seqid synchronization.
1306 *
1307 * Return value:
1308 * NFS4_OK no problems
1309 * NFS4ERR_DELAY there is lost state to recover; caller should retry
1310 * NFS4ERR_IO no open stream
1311 */
1312 nfsstat4
nfs4_find_or_create_lock_owner(pid_t pid,rnode4_t * rp,cred_t * cr,nfs4_open_owner_t ** oopp,nfs4_open_stream_t ** ospp,nfs4_lock_owner_t ** lopp)1313 nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr,
1314 nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp,
1315 nfs4_lock_owner_t **lopp)
1316 {
1317 nfs4_lock_owner_t *lop, *next_lop;
1318 mntinfo4_t *mi;
1319 int error = 0;
1320 nfsstat4 stat;
1321
1322 mi = VTOMI4(RTOV4(rp));
1323
1324 mutex_enter(&rp->r_statev4_lock);
1325
1326 lop = rp->r_lo_head.lo_next_rnode;
1327 while (lop != &rp->r_lo_head) {
1328 mutex_enter(&lop->lo_lock);
1329 if (lop->lo_pid == pid && lop->lo_valid != 0) {
1330 /* Found a matching lock owner */
1331 NFS4_DEBUG(nfs4_client_state_debug,
1332 (CE_NOTE, "nfs4_find_or_create_lock_owner: "
1333 "got a match"));
1334 lop->lo_ref_count++;
1335 break;
1336 }
1337 next_lop = lop->lo_next_rnode;
1338 mutex_exit(&lop->lo_lock);
1339 lop = next_lop;
1340 }
1341
1342 if (lop == &rp->r_lo_head) {
1343 /* create temporary lock owner */
1344 lop = create_lock_owner(rp, pid);
1345 }
1346 mutex_exit(&rp->r_statev4_lock);
1347
1348 /* Have a locked down lock owner struct now */
1349 if (lop->lo_just_created != NFS4_JUST_CREATED) {
1350 /* This is an existing lock owner */
1351 *oopp = NULL;
1352 *ospp = NULL;
1353 } else {
1354 /* Lock owner doesn't exist yet */
1355
1356 /* First grab open owner seqid synchronization */
1357 mutex_exit(&lop->lo_lock);
1358 *oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi);
1359 if (*oopp == NULL)
1360 goto kill_new_lop;
1361 error = nfs4_start_open_seqid_sync(*oopp, mi);
1362 if (error == EAGAIN) {
1363 stat = NFS4ERR_DELAY;
1364 goto failed;
1365 }
1366 *ospp = find_open_stream(*oopp, rp);
1367 if (*ospp == NULL) {
1368 nfs4_end_open_seqid_sync(*oopp);
1369 goto kill_new_lop;
1370 }
1371 if ((*ospp)->os_failed_reopen) {
1372 mutex_exit(&(*ospp)->os_sync_lock);
1373 NFS4_DEBUG((nfs4_open_stream_debug ||
1374 nfs4_client_lock_debug), (CE_NOTE,
1375 "nfs4_find_or_create_lock_owner: os_failed_reopen;"
1376 "osp %p, cr %p, rp %s", (void *)(*ospp),
1377 (void *)cr, rnode4info(rp)));
1378 nfs4_end_open_seqid_sync(*oopp);
1379 stat = NFS4ERR_IO;
1380 goto failed;
1381 }
1382 mutex_exit(&(*ospp)->os_sync_lock);
1383
1384 /*
1385 * Now see if the lock owner has become permanent while we
1386 * had released our lock.
1387 */
1388 mutex_enter(&lop->lo_lock);
1389 if (lop->lo_just_created != NFS4_JUST_CREATED) {
1390 nfs4_end_open_seqid_sync(*oopp);
1391 open_stream_rele(*ospp, rp);
1392 open_owner_rele(*oopp);
1393 *oopp = NULL;
1394 *ospp = NULL;
1395 }
1396 }
1397 mutex_exit(&lop->lo_lock);
1398
1399 error = nfs4_start_lock_seqid_sync(lop, mi);
1400 if (error == EAGAIN) {
1401 if (*oopp != NULL)
1402 nfs4_end_open_seqid_sync(*oopp);
1403 stat = NFS4ERR_DELAY;
1404 goto failed;
1405 }
1406 ASSERT(error == 0);
1407
1408 *lopp = lop;
1409 return (NFS4_OK);
1410
1411 kill_new_lop:
1412 /*
1413 * A previous CLOSE was attempted but got EINTR, but the application
1414 * continued to use the unspecified state file descriptor. But now the
1415 * open stream is gone (which could also destroy the open owner), hence
1416 * we can no longer continue. The calling function should return EIO
1417 * to the application.
1418 */
1419 NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug,
1420 (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created "
1421 "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp),
1422 (void *)(*ospp)));
1423
1424 nfs4_rnode_remove_lock_owner(rp, lop);
1425 stat = NFS4ERR_IO;
1426
1427 failed:
1428 lock_owner_rele(lop);
1429 if (*oopp) {
1430 open_owner_rele(*oopp);
1431 *oopp = NULL;
1432 }
1433 if (*ospp) {
1434 open_stream_rele(*ospp, rp);
1435 *ospp = NULL;
1436 }
1437 return (stat);
1438 }
1439
1440 /*
1441 * This function grabs a recently freed open owner off of the freed open
1442 * owner list if there is a match on the cred 'cr'. It returns NULL if no
1443 * such match is found. It will set the 'oo_ref_count' and 'oo_valid' back
1444 * to both 1 (sane values) in the case a match is found.
1445 */
1446 static nfs4_open_owner_t *
find_freed_open_owner(cred_t * cr,nfs4_oo_hash_bucket_t * bucketp,mntinfo4_t * mi)1447 find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp,
1448 mntinfo4_t *mi)
1449 {
1450 nfs4_open_owner_t *foop;
1451
1452 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
1453 "find_freed_open_owner: cred %p", (void*)cr));
1454
1455 ASSERT(mutex_owned(&mi->mi_lock));
1456 ASSERT(mutex_owned(&bucketp->b_lock));
1457
1458 /* got hash bucket, search through freed open owners */
1459 for (foop = list_head(&mi->mi_foo_list); foop != NULL;
1460 foop = list_next(&mi->mi_foo_list, foop)) {
1461 if (!crcmp(foop->oo_cred, cr)) {
1462 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1463 "find_freed_open_owner: got a match open owner "
1464 "%p", (void *)foop));
1465 foop->oo_ref_count = 1;
1466 foop->oo_valid = 1;
1467 list_remove(&mi->mi_foo_list, foop);
1468 mi->mi_foo_num--;
1469
1470 /* now add the struct into the cred hash table */
1471 list_insert_head(&bucketp->b_oo_hash_list, foop);
1472 return (foop);
1473 }
1474 }
1475
1476 return (NULL);
1477 }
1478
1479 /*
1480 * Insert the newly freed 'oop' into the mi's freed oop list,
1481 * always at the head of the list. If we've already reached
1482 * our maximum allowed number of freed open owners (mi_foo_max),
1483 * then remove the LRU open owner on the list (namely the tail).
1484 */
1485 static void
nfs4_free_open_owner(nfs4_open_owner_t * oop,mntinfo4_t * mi)1486 nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi)
1487 {
1488 nfs4_open_owner_t *lru_foop;
1489
1490 if (mi->mi_foo_num < mi->mi_foo_max) {
1491 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1492 "nfs4_free_open_owner: num free %d, max free %d, "
1493 "insert open owner %p for mntinfo4 %p",
1494 mi->mi_foo_num, mi->mi_foo_max, (void *)oop,
1495 (void *)mi));
1496 list_insert_head(&mi->mi_foo_list, oop);
1497 mi->mi_foo_num++;
1498 return;
1499 }
1500
1501 /* need to replace a freed open owner */
1502
1503 lru_foop = list_tail(&mi->mi_foo_list);
1504
1505 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1506 "nfs4_free_open_owner: destroy %p, insert %p",
1507 (void *)lru_foop, (void *)oop));
1508
1509 list_remove(&mi->mi_foo_list, lru_foop);
1510 nfs4_destroy_open_owner(lru_foop);
1511
1512 /* head always has latest freed oop */
1513 list_insert_head(&mi->mi_foo_list, oop);
1514 }
1515
1516 void
nfs4_destroy_open_owner(nfs4_open_owner_t * oop)1517 nfs4_destroy_open_owner(nfs4_open_owner_t *oop)
1518 {
1519 ASSERT(oop != NULL);
1520
1521 crfree(oop->oo_cred);
1522 if (oop->oo_cred_otw)
1523 crfree(oop->oo_cred_otw);
1524 mutex_destroy(&oop->oo_lock);
1525 cv_destroy(&oop->oo_cv_seqid_sync);
1526 kmem_free(oop, sizeof (*oop));
1527 }
1528
1529 seqid4
nfs4_get_open_seqid(nfs4_open_owner_t * oop)1530 nfs4_get_open_seqid(nfs4_open_owner_t *oop)
1531 {
1532 ASSERT(oop->oo_seqid_inuse);
1533 return (oop->oo_seqid);
1534 }
1535
1536 /*
1537 * This set's the open seqid for a <open owner/ mntinfo4> pair.
1538 */
1539 void
nfs4_set_open_seqid(seqid4 seqid,nfs4_open_owner_t * oop,nfs4_tag_type_t tag_type)1540 nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop,
1541 nfs4_tag_type_t tag_type)
1542 {
1543 ASSERT(oop->oo_seqid_inuse);
1544 oop->oo_seqid = seqid;
1545 oop->oo_last_good_seqid = seqid;
1546 oop->oo_last_good_op = tag_type;
1547 }
1548
1549 /*
1550 * This bumps the current open seqid for the open owner 'oop'.
1551 */
1552 void
nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t * oop,nfs4_tag_type_t tag_type)1553 nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop,
1554 nfs4_tag_type_t tag_type)
1555 {
1556 ASSERT(oop->oo_seqid_inuse);
1557 oop->oo_seqid++;
1558 oop->oo_last_good_seqid = oop->oo_seqid;
1559 oop->oo_last_good_op = tag_type;
1560 }
1561
1562 /*
1563 * If no open owner was provided, this function takes the cred to find an
1564 * open owner within the given mntinfo4_t. Either way we return the
1565 * open owner's OTW credential if it exists; otherwise returns the
1566 * supplied 'cr'.
1567 *
1568 * A hold is put on the returned credential, and it is up to the caller
1569 * to free the cred.
1570 */
1571 cred_t *
nfs4_get_otw_cred(cred_t * cr,mntinfo4_t * mi,nfs4_open_owner_t * provided_oop)1572 nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop)
1573 {
1574 cred_t *ret_cr;
1575 nfs4_open_owner_t *oop = provided_oop;
1576
1577 if (oop == NULL)
1578 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
1579 if (oop != NULL) {
1580 mutex_enter(&oop->oo_lock);
1581 if (oop->oo_cred_otw)
1582 ret_cr = oop->oo_cred_otw;
1583 else
1584 ret_cr = cr;
1585 crhold(ret_cr);
1586 mutex_exit(&oop->oo_lock);
1587 if (provided_oop == NULL)
1588 open_owner_rele(oop);
1589 } else {
1590 ret_cr = cr;
1591 crhold(ret_cr);
1592 }
1593 return (ret_cr);
1594 }
1595
1596 /*
1597 * Retrieves the next open stream in the rnode's list if an open stream
1598 * is provided; otherwise gets the first open stream in the list.
1599 * The open owner for that open stream is then retrieved, and if its
1600 * oo_cred_otw exists then it is returned; otherwise the provided 'cr'
1601 * is returned. *osp is set to the 'found' open stream.
1602 *
1603 * Note: we don't set *osp to the open stream retrieved via the
1604 * optimized check since that won't necessarily be at the beginning
1605 * of the rnode list, and if that osp doesn't work we'd like to
1606 * check _all_ open streams (starting from the beginning of the
1607 * rnode list).
1608 */
1609 cred_t *
nfs4_get_otw_cred_by_osp(rnode4_t * rp,cred_t * cr,nfs4_open_stream_t ** osp,bool_t * first_time,bool_t * last_time)1610 nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr,
1611 nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time)
1612 {
1613 nfs4_open_stream_t *next_osp = NULL;
1614 cred_t *ret_cr;
1615
1616 ASSERT(cr != NULL);
1617 /*
1618 * As an optimization, try to find the open owner
1619 * for the cred provided since that's most likely
1620 * to work.
1621 */
1622 if (*first_time) {
1623 nfs4_open_owner_t *oop;
1624
1625 oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp)));
1626 if (oop) {
1627 next_osp = find_open_stream(oop, rp);
1628 if (next_osp)
1629 mutex_exit(&next_osp->os_sync_lock);
1630 open_owner_rele(oop);
1631 }
1632 }
1633 if (next_osp == NULL) {
1634 int delay_rele = 0;
1635 *first_time = FALSE;
1636
1637 /* return the next open stream for this rnode */
1638 mutex_enter(&rp->r_os_lock);
1639 /* Now, no one can add or delete to rp's open streams list */
1640
1641 if (*osp) {
1642 next_osp = list_next(&rp->r_open_streams, *osp);
1643 /*
1644 * Delay the rele of *osp until after we drop
1645 * r_os_lock to not deadlock with oo_lock
1646 * via an open_stream_rele()->open_owner_rele().
1647 */
1648 delay_rele = 1;
1649 } else {
1650 next_osp = list_head(&rp->r_open_streams);
1651 }
1652 if (next_osp) {
1653 nfs4_open_stream_t *tmp_osp;
1654
1655 /* find the next valid open stream */
1656 mutex_enter(&next_osp->os_sync_lock);
1657 while (next_osp && !next_osp->os_valid) {
1658 tmp_osp =
1659 list_next(&rp->r_open_streams, next_osp);
1660 mutex_exit(&next_osp->os_sync_lock);
1661 next_osp = tmp_osp;
1662 if (next_osp)
1663 mutex_enter(&next_osp->os_sync_lock);
1664 }
1665 if (next_osp) {
1666 next_osp->os_ref_count++;
1667 mutex_exit(&next_osp->os_sync_lock);
1668 }
1669 }
1670 mutex_exit(&rp->r_os_lock);
1671 if (delay_rele)
1672 open_stream_rele(*osp, rp);
1673 }
1674
1675 if (next_osp) {
1676 nfs4_open_owner_t *oop;
1677
1678 oop = next_osp->os_open_owner;
1679 mutex_enter(&oop->oo_lock);
1680 if (oop->oo_cred_otw)
1681 ret_cr = oop->oo_cred_otw;
1682 else
1683 ret_cr = cr;
1684 crhold(ret_cr);
1685 mutex_exit(&oop->oo_lock);
1686 if (*first_time) {
1687 open_stream_rele(next_osp, rp);
1688 *osp = NULL;
1689 } else
1690 *osp = next_osp;
1691 } else {
1692 /* just return the cred provided to us */
1693 *last_time = TRUE;
1694 *osp = NULL;
1695 ret_cr = cr;
1696 crhold(ret_cr);
1697 }
1698
1699 *first_time = FALSE;
1700 return (ret_cr);
1701 }
1702
1703 void
nfs4_init_stateid_types(nfs4_stateid_types_t * sid_tp)1704 nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp)
1705 {
1706 bzero(&sid_tp->d_sid, sizeof (stateid4));
1707 bzero(&sid_tp->l_sid, sizeof (stateid4));
1708 bzero(&sid_tp->o_sid, sizeof (stateid4));
1709 sid_tp->cur_sid_type = NO_SID;
1710 }
1711
1712 void
nfs4_save_stateid(stateid4 * s1,nfs4_stateid_types_t * sid_tp)1713 nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp)
1714 {
1715 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1716 "nfs4_save_stateid: saved %s stateid",
1717 sid_tp->cur_sid_type == DEL_SID ? "delegation" :
1718 sid_tp->cur_sid_type == LOCK_SID ? "lock" :
1719 sid_tp->cur_sid_type == OPEN_SID ? "open" : "special"));
1720
1721 switch (sid_tp->cur_sid_type) {
1722 case DEL_SID:
1723 sid_tp->d_sid = *s1;
1724 break;
1725 case LOCK_SID:
1726 sid_tp->l_sid = *s1;
1727 break;
1728 case OPEN_SID:
1729 sid_tp->o_sid = *s1;
1730 break;
1731 case SPEC_SID:
1732 default:
1733 cmn_err(CE_PANIC, "nfs4_save_stateid: illegal "
1734 "stateid type %d", sid_tp->cur_sid_type);
1735 }
1736 }
1737
1738 /*
1739 * We got NFS4ERR_BAD_SEQID. Setup some arguments to pass to recovery.
1740 * Caller is responsible for freeing.
1741 */
1742 nfs4_bseqid_entry_t *
nfs4_create_bseqid_entry(nfs4_open_owner_t * oop,nfs4_lock_owner_t * lop,vnode_t * vp,pid_t pid,nfs4_tag_type_t tag,seqid4 seqid)1743 nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop,
1744 vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid)
1745 {
1746 nfs4_bseqid_entry_t *bsep;
1747
1748 bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP);
1749 bsep->bs_oop = oop;
1750 bsep->bs_lop = lop;
1751 bsep->bs_vp = vp;
1752 bsep->bs_pid = pid;
1753 bsep->bs_tag = tag;
1754 bsep->bs_seqid = seqid;
1755
1756 return (bsep);
1757 }
1758
1759 void
nfs4open_dg_save_lost_rqst(int error,nfs4_lost_rqst_t * lost_rqstp,nfs4_open_owner_t * oop,nfs4_open_stream_t * osp,cred_t * cr,vnode_t * vp,int access_close,int deny_close)1760 nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1761 nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr,
1762 vnode_t *vp, int access_close, int deny_close)
1763 {
1764 lost_rqstp->lr_putfirst = FALSE;
1765
1766 ASSERT(vp != NULL);
1767 if (error == ETIMEDOUT || error == EINTR ||
1768 NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1769 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1770 "nfs4open_dg_save_lost_rqst: error %d", error));
1771
1772 lost_rqstp->lr_op = OP_OPEN_DOWNGRADE;
1773 /*
1774 * The vp is held and rele'd via the recovery code.
1775 * See nfs4_save_lost_rqst.
1776 */
1777 lost_rqstp->lr_vp = vp;
1778 lost_rqstp->lr_dvp = NULL;
1779 lost_rqstp->lr_oop = oop;
1780 lost_rqstp->lr_osp = osp;
1781 lost_rqstp->lr_lop = NULL;
1782 lost_rqstp->lr_cr = cr;
1783 lost_rqstp->lr_flk = NULL;
1784 lost_rqstp->lr_dg_acc = access_close;
1785 lost_rqstp->lr_dg_deny = deny_close;
1786 lost_rqstp->lr_putfirst = FALSE;
1787 } else {
1788 lost_rqstp->lr_op = 0;
1789 }
1790 }
1791
1792 /*
1793 * Change the access and deny bits of an OPEN.
1794 * If recovery is needed, *recov_credpp is set to the cred used OTW,
1795 * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW.
1796 */
1797 void
nfs4_open_downgrade(int access_close,int deny_close,nfs4_open_owner_t * oop,nfs4_open_stream_t * osp,vnode_t * vp,cred_t * cr,nfs4_lost_rqst_t * lrp,nfs4_error_t * ep,cred_t ** recov_credpp,seqid4 * recov_seqidp)1798 nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop,
1799 nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp,
1800 nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp)
1801 {
1802 mntinfo4_t *mi;
1803 int downgrade_acc, downgrade_deny;
1804 int new_acc, new_deny;
1805 COMPOUND4args_clnt args;
1806 COMPOUND4res_clnt res;
1807 OPEN_DOWNGRADE4res *odg_res;
1808 nfs_argop4 argop[3];
1809 nfs_resop4 *resop;
1810 rnode4_t *rp;
1811 bool_t needrecov = FALSE;
1812 int doqueue = 1;
1813 seqid4 seqid = 0;
1814 cred_t *cred_otw;
1815 hrtime_t t;
1816
1817 ASSERT(mutex_owned(&osp->os_sync_lock));
1818 #if DEBUG
1819 mutex_enter(&oop->oo_lock);
1820 ASSERT(oop->oo_seqid_inuse);
1821 mutex_exit(&oop->oo_lock);
1822 #endif
1823
1824
1825 if (access_close == 0 && deny_close == 0) {
1826 nfs4_error_zinit(ep);
1827 return;
1828 }
1829
1830 cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop);
1831
1832 cred_retry:
1833 nfs4_error_zinit(ep);
1834 downgrade_acc = 0;
1835 downgrade_deny = 0;
1836 mi = VTOMI4(vp);
1837 rp = VTOR4(vp);
1838
1839 /*
1840 * Check to see if the open stream got closed before we go OTW,
1841 * now that we have acquired the 'os_sync_lock'.
1842 */
1843 if (!osp->os_valid) {
1844 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1845 " open stream has already been closed, return success"));
1846 /* error has already been set */
1847 goto no_args_out;
1848 }
1849
1850 /* If the file failed recovery, just quit. */
1851 mutex_enter(&rp->r_statelock);
1852 if (rp->r_flags & R4RECOVERR) {
1853 mutex_exit(&rp->r_statelock);
1854 ep->error = EIO;
1855 goto no_args_out;
1856 }
1857 mutex_exit(&rp->r_statelock);
1858
1859 seqid = nfs4_get_open_seqid(oop) + 1;
1860
1861 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1862 "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"",
1863 access_close, osp->os_share_acc_read, osp->os_share_acc_write));
1864
1865 /* If we're closing the last READ, need to downgrade */
1866 if ((access_close & FREAD) && (osp->os_share_acc_read == 1))
1867 downgrade_acc |= OPEN4_SHARE_ACCESS_READ;
1868
1869 /* if we're closing the last WRITE, need to downgrade */
1870 if ((access_close & FWRITE) && (osp->os_share_acc_write == 1))
1871 downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE;
1872
1873 downgrade_deny = OPEN4_SHARE_DENY_NONE;
1874
1875 new_acc = 0;
1876 new_deny = 0;
1877
1878 /* set our new access and deny share bits */
1879 if ((osp->os_share_acc_read > 0) &&
1880 !(downgrade_acc & OPEN4_SHARE_ACCESS_READ))
1881 new_acc |= OPEN4_SHARE_ACCESS_READ;
1882 if ((osp->os_share_acc_write > 0) &&
1883 !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE))
1884 new_acc |= OPEN4_SHARE_ACCESS_WRITE;
1885
1886 new_deny = OPEN4_SHARE_DENY_NONE;
1887
1888 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1889 "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny));
1890 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1891 "new acc 0x%x deny 0x%x", new_acc, new_deny));
1892
1893 /*
1894 * Check to see if we aren't actually doing any downgrade or
1895 * if this is the last 'close' but the file is still mmapped.
1896 * Skip this if this a lost request resend so we don't decrement
1897 * the osp's share counts more than once.
1898 */
1899 if (!lrp &&
1900 ((downgrade_acc == 0 && downgrade_deny == 0) ||
1901 (new_acc == 0 && new_deny == 0))) {
1902 /*
1903 * No downgrade to do, but still need to
1904 * update osp's os_share_* counts.
1905 */
1906 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE,
1907 "nfs4_open_downgrade: just lower the osp's count by %s",
1908 (access_close & FREAD) && (access_close & FWRITE) ?
1909 "read and write" : (access_close & FREAD) ? "read" :
1910 (access_close & FWRITE) ? "write" : "bogus"));
1911 if (access_close & FREAD)
1912 osp->os_share_acc_read--;
1913 if (access_close & FWRITE)
1914 osp->os_share_acc_write--;
1915 osp->os_share_deny_none--;
1916 nfs4_error_zinit(ep);
1917
1918 goto no_args_out;
1919 }
1920
1921 if (osp->os_orig_oo_name != oop->oo_name) {
1922 ep->error = EIO;
1923 goto no_args_out;
1924 }
1925
1926 /* setup the COMPOUND args */
1927 if (lrp)
1928 args.ctag = TAG_OPEN_DG_LOST;
1929 else
1930 args.ctag = TAG_OPEN_DG;
1931
1932 args.array_len = 3;
1933 args.array = argop;
1934
1935 /* putfh */
1936 argop[0].argop = OP_CPUTFH;
1937 argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1938
1939 argop[1].argop = OP_GETATTR;
1940 argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1941 argop[1].nfs_argop4_u.opgetattr.mi = mi;
1942
1943 ASSERT(mutex_owned(&osp->os_sync_lock));
1944 ASSERT(osp->os_delegation == FALSE);
1945
1946 /* open downgrade */
1947 argop[2].argop = OP_OPEN_DOWNGRADE;
1948 argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid;
1949 argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc;
1950 argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny;
1951 argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid;
1952
1953 t = gethrtime();
1954
1955 rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep);
1956
1957 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
1958 nfs4_set_open_seqid(seqid, oop, args.ctag);
1959
1960 if ((ep->error == EACCES ||
1961 (ep->error == 0 && res.status == NFS4ERR_ACCESS)) &&
1962 cred_otw != cr) {
1963 crfree(cred_otw);
1964 cred_otw = cr;
1965 crhold(cred_otw);
1966 if (!ep->error)
1967 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1968 goto cred_retry;
1969 }
1970
1971 needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp);
1972
1973 if (needrecov && recov_credpp) {
1974 *recov_credpp = cred_otw;
1975 crhold(*recov_credpp);
1976 if (recov_seqidp)
1977 *recov_seqidp = seqid;
1978 }
1979
1980 if (!ep->error && !res.status) {
1981 /* get the open downgrade results */
1982 resop = &res.array[2];
1983 odg_res = &resop->nfs_resop4_u.opopen_downgrade;
1984
1985 osp->open_stateid = odg_res->open_stateid;
1986
1987 /* set the open streams new access/deny bits */
1988 if (access_close & FREAD)
1989 osp->os_share_acc_read--;
1990 if (access_close & FWRITE)
1991 osp->os_share_acc_write--;
1992 osp->os_share_deny_none--;
1993 osp->os_dc_openacc = new_acc;
1994
1995 nfs4_attr_cache(vp,
1996 &res.array[1].nfs_resop4_u.opgetattr.ga_res,
1997 t, cred_otw, TRUE, NULL);
1998 }
1999
2000 if (!ep->error)
2001 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2002
2003 no_args_out:
2004 crfree(cred_otw);
2005 }
2006
2007 /*
2008 * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out
2009 * because the filesystem was forcibly unmounted) then we don't know if we
2010 * potentially left state dangling on the server, therefore the recovery
2011 * framework makes this call to resend the OPEN request and then undo it.
2012 */
2013 void
nfs4_resend_open_otw(vnode_t ** vpp,nfs4_lost_rqst_t * resend_rqstp,nfs4_error_t * ep)2014 nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp,
2015 nfs4_error_t *ep)
2016 {
2017 COMPOUND4args_clnt args;
2018 COMPOUND4res_clnt res;
2019 nfs_argop4 argop[4];
2020 GETFH4res *gf_res = NULL;
2021 OPEN4cargs *open_args;
2022 OPEN4res *op_res;
2023 char *destcfp;
2024 int destclen;
2025 nfs4_ga_res_t *garp;
2026 vnode_t *dvp = NULL, *vp = NULL;
2027 rnode4_t *rp = NULL, *drp = NULL;
2028 cred_t *cr = NULL;
2029 seqid4 seqid;
2030 nfs4_open_owner_t *oop = NULL;
2031 nfs4_open_stream_t *osp = NULL;
2032 component4 *srcfp;
2033 open_claim_type4 claim;
2034 mntinfo4_t *mi;
2035 int doqueue = 1;
2036 bool_t retry_open = FALSE;
2037 int created_osp = 0;
2038 hrtime_t t;
2039 char *failed_msg = "";
2040 int fh_different;
2041 int reopen = 0;
2042
2043 nfs4_error_zinit(ep);
2044
2045 cr = resend_rqstp->lr_cr;
2046 dvp = resend_rqstp->lr_dvp;
2047
2048 vp = *vpp;
2049 if (vp) {
2050 ASSERT(nfs4_consistent_type(vp));
2051 rp = VTOR4(vp);
2052 }
2053
2054 if (rp) {
2055 /* If the file failed recovery, just quit. */
2056 mutex_enter(&rp->r_statelock);
2057 if (rp->r_flags & R4RECOVERR) {
2058 mutex_exit(&rp->r_statelock);
2059 ep->error = EIO;
2060 return;
2061 }
2062 mutex_exit(&rp->r_statelock);
2063 }
2064
2065 if (dvp) {
2066 drp = VTOR4(dvp);
2067 /* If the parent directory failed recovery, just quit. */
2068 mutex_enter(&drp->r_statelock);
2069 if (drp->r_flags & R4RECOVERR) {
2070 mutex_exit(&drp->r_statelock);
2071 ep->error = EIO;
2072 return;
2073 }
2074 mutex_exit(&drp->r_statelock);
2075 } else
2076 reopen = 1; /* NULL dvp means this is a reopen */
2077
2078 claim = resend_rqstp->lr_oclaim;
2079 ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR);
2080
2081 args.ctag = TAG_OPEN_LOST;
2082 args.array_len = 4;
2083 args.array = argop;
2084
2085 argop[0].argop = OP_CPUTFH;
2086 if (reopen) {
2087 ASSERT(vp != NULL);
2088
2089 mi = VTOMI4(vp);
2090 /*
2091 * if this is a file mount then
2092 * use the mntinfo parentfh
2093 */
2094 argop[0].nfs_argop4_u.opcputfh.sfh =
2095 (vp->v_flag & VROOT) ? mi->mi_srvparentfh :
2096 VTOSV(vp)->sv_dfh;
2097 args.ctag = TAG_REOPEN_LOST;
2098 } else {
2099 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh;
2100 mi = VTOMI4(dvp);
2101 }
2102
2103 argop[1].argop = OP_COPEN;
2104 open_args = &argop[1].nfs_argop4_u.opcopen;
2105 open_args->claim = claim;
2106
2107 /*
2108 * If we sent over a OPEN with CREATE then the only
2109 * thing we care about is to not leave dangling state
2110 * on the server, not whether the file we potentially
2111 * created remains on the server. So even though the
2112 * lost open request specified a CREATE, we only wish
2113 * to do a non-CREATE OPEN.
2114 */
2115 open_args->opentype = OPEN4_NOCREATE;
2116
2117 srcfp = &resend_rqstp->lr_ofile;
2118 destclen = srcfp->utf8string_len;
2119 destcfp = kmem_alloc(destclen + 1, KM_SLEEP);
2120 bcopy(srcfp->utf8string_val, destcfp, destclen);
2121 destcfp[destclen] = '\0';
2122 if (claim == CLAIM_DELEGATE_CUR) {
2123 open_args->open_claim4_u.delegate_cur_info.delegate_stateid =
2124 resend_rqstp->lr_ostateid;
2125 open_args->open_claim4_u.delegate_cur_info.cfile = destcfp;
2126 } else {
2127 open_args->open_claim4_u.cfile = destcfp;
2128 }
2129
2130 open_args->share_access = resend_rqstp->lr_oacc;
2131 open_args->share_deny = resend_rqstp->lr_odeny;
2132 oop = resend_rqstp->lr_oop;
2133 ASSERT(oop != NULL);
2134
2135 open_args->owner.clientid = mi2clientid(mi);
2136 /* this length never changes */
2137 open_args->owner.owner_len = sizeof (oop->oo_name);
2138 open_args->owner.owner_val =
2139 kmem_alloc(open_args->owner.owner_len, KM_SLEEP);
2140
2141 ep->error = nfs4_start_open_seqid_sync(oop, mi);
2142 ASSERT(ep->error == 0); /* recov thread always succeeds */
2143 /*
2144 * We can get away with not saving the seqid upon detection
2145 * of a lost request, and now just use the open owner's current
2146 * seqid since we only allow one op OTW per seqid and lost
2147 * requests are saved FIFO.
2148 */
2149 seqid = nfs4_get_open_seqid(oop) + 1;
2150 open_args->seqid = seqid;
2151
2152 bcopy(&oop->oo_name, open_args->owner.owner_val,
2153 open_args->owner.owner_len);
2154
2155 /* getfh */
2156 argop[2].argop = OP_GETFH;
2157
2158 /* Construct the getattr part of the compound */
2159 argop[3].argop = OP_GETATTR;
2160 argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
2161 argop[3].nfs_argop4_u.opgetattr.mi = mi;
2162
2163 res.array = NULL;
2164
2165 t = gethrtime();
2166
2167 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
2168
2169 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
2170 nfs4_set_open_seqid(seqid, oop, args.ctag);
2171
2172 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2173 "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status));
2174
2175 if (ep->error || res.status)
2176 goto err_out;
2177
2178 op_res = &res.array[1].nfs_resop4_u.opopen;
2179 gf_res = &res.array[2].nfs_resop4_u.opgetfh;
2180 garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res;
2181
2182 if (!vp) {
2183 int rnode_err = 0;
2184 nfs4_sharedfh_t *sfh;
2185
2186 /*
2187 * If we can't decode all the attributes they are not usable,
2188 * just make the vnode.
2189 */
2190
2191 sfh = sfh4_get(&gf_res->object, VTOMI4(dvp));
2192 *vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp,
2193 fn_get(VTOSV(dvp)->sv_name,
2194 open_args->open_claim4_u.cfile, sfh));
2195 sfh4_rele(&sfh);
2196 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2197 "nfs4_resend_open_otw: made vp %p for file %s",
2198 (void *)(*vpp), open_args->open_claim4_u.cfile));
2199
2200 if (ep->error)
2201 PURGE_ATTRCACHE4(*vpp);
2202
2203 /*
2204 * For the newly created *vpp case, make sure the rnode
2205 * isn't bad before using it.
2206 */
2207 mutex_enter(&(VTOR4(*vpp))->r_statelock);
2208 if (VTOR4(*vpp)->r_flags & R4RECOVERR)
2209 rnode_err = EIO;
2210 mutex_exit(&(VTOR4(*vpp))->r_statelock);
2211
2212 if (rnode_err) {
2213 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2214 "nfs4_resend_open_otw: rp %p is bad",
2215 (void *)VTOR4(*vpp)));
2216 ep->error = rnode_err;
2217 goto err_out;
2218 }
2219
2220 vp = *vpp;
2221 rp = VTOR4(vp);
2222 }
2223
2224 if (reopen) {
2225 /*
2226 * Check if the path we reopened really is the same
2227 * file. We could end up in a situation were the file
2228 * was removed and a new file created with the same name.
2229 */
2230 (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
2231 fh_different =
2232 (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0);
2233 if (fh_different) {
2234 if (mi->mi_fh_expire_type == FH4_PERSISTENT ||
2235 mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) {
2236 /* Oops, we don't have the same file */
2237 if (mi->mi_fh_expire_type == FH4_PERSISTENT)
2238 failed_msg =
2239 "Couldn't reopen: Persistant "
2240 "file handle changed";
2241 else
2242 failed_msg =
2243 "Couldn't reopen: Volatile "
2244 "(no expire on open) file handle "
2245 "changed";
2246
2247 nfs4_end_open_seqid_sync(oop);
2248 kmem_free(destcfp, destclen + 1);
2249 nfs4args_copen_free(open_args);
2250 (void) xdr_free(xdr_COMPOUND4res_clnt,
2251 (caddr_t)&res);
2252 nfs_rw_exit(&mi->mi_fh_lock);
2253 nfs4_fail_recov(vp, failed_msg, ep->error,
2254 ep->stat);
2255 return;
2256 } else {
2257 /*
2258 * We have volatile file handles that don't
2259 * compare. If the fids are the same then we
2260 * assume that the file handle expired but the
2261 * renode still refers to the same file object.
2262 *
2263 * First check that we have fids or not.
2264 * If we don't we have a dumb server so we will
2265 * just assume every thing is ok for now.
2266 */
2267 if (!ep->error &&
2268 garp->n4g_va.va_mask & AT_NODEID &&
2269 rp->r_attr.va_mask & AT_NODEID &&
2270 rp->r_attr.va_nodeid !=
2271 garp->n4g_va.va_nodeid) {
2272 /*
2273 * We have fids, but they don't
2274 * compare. So kill the file.
2275 */
2276 failed_msg =
2277 "Couldn't reopen: file handle "
2278 "changed due to mismatched fids";
2279 nfs4_end_open_seqid_sync(oop);
2280 kmem_free(destcfp, destclen + 1);
2281 nfs4args_copen_free(open_args);
2282 (void) xdr_free(xdr_COMPOUND4res_clnt,
2283 (caddr_t)&res);
2284 nfs_rw_exit(&mi->mi_fh_lock);
2285 nfs4_fail_recov(vp, failed_msg,
2286 ep->error, ep->stat);
2287 return;
2288 } else {
2289 /*
2290 * We have volatile file handles that
2291 * refers to the same file (at least
2292 * they have the same fid) or we don't
2293 * have fids so we can't tell. :(. We'll
2294 * be a kind and accepting client so
2295 * we'll update the rnode's file
2296 * handle with the otw handle.
2297 *
2298 * We need to drop mi->mi_fh_lock since
2299 * sh4_update acquires it. Since there
2300 * is only one recovery thread there is
2301 * no race.
2302 */
2303 nfs_rw_exit(&mi->mi_fh_lock);
2304 sfh4_update(rp->r_fh, &gf_res->object);
2305 }
2306 }
2307 } else {
2308 nfs_rw_exit(&mi->mi_fh_lock);
2309 }
2310 }
2311
2312 ASSERT(nfs4_consistent_type(vp));
2313
2314 if (op_res->rflags & OPEN4_RESULT_CONFIRM)
2315 nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE,
2316 &retry_open, oop, TRUE, ep, NULL);
2317 if (ep->error || ep->stat) {
2318 nfs4_end_open_seqid_sync(oop);
2319 kmem_free(destcfp, destclen + 1);
2320 nfs4args_copen_free(open_args);
2321 if (!ep->error)
2322 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2323 return;
2324 }
2325
2326 if (reopen) {
2327 /*
2328 * Doing a reopen here so the osp should already exist.
2329 * If not, something changed or went very wrong.
2330 *
2331 * returns with 'os_sync_lock' held
2332 */
2333 osp = find_open_stream(oop, rp);
2334 if (!osp) {
2335 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2336 "nfs4_resend_open_otw: couldn't find osp"));
2337 ep->error = EINVAL;
2338 goto err_out;
2339 }
2340 osp->os_open_ref_count++;
2341 } else {
2342 mutex_enter(&oop->oo_lock);
2343 oop->oo_just_created = NFS4_PERM_CREATED;
2344 mutex_exit(&oop->oo_lock);
2345
2346 /* returns with 'os_sync_lock' held */
2347 osp = find_or_create_open_stream(oop, rp, &created_osp);
2348 if (!osp) {
2349 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2350 "nfs4_resend_open_otw: couldn't create osp"));
2351 ep->error = EINVAL;
2352 goto err_out;
2353 }
2354 }
2355
2356 osp->open_stateid = op_res->stateid;
2357 osp->os_delegation = FALSE;
2358 /*
2359 * Need to reset this bitfield for the possible case where we were
2360 * going to OTW CLOSE the file, got a non-recoverable error, and before
2361 * we could retry the CLOSE, OPENed the file again.
2362 */
2363 ASSERT(osp->os_open_owner->oo_seqid_inuse);
2364 osp->os_final_close = 0;
2365 osp->os_force_close = 0;
2366
2367 if (!reopen) {
2368 if (open_args->share_access & OPEN4_SHARE_ACCESS_READ)
2369 osp->os_share_acc_read++;
2370 if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE)
2371 osp->os_share_acc_write++;
2372 osp->os_share_deny_none++;
2373 }
2374
2375 mutex_exit(&osp->os_sync_lock);
2376 if (created_osp)
2377 nfs4_inc_state_ref_count(mi);
2378 open_stream_rele(osp, rp);
2379
2380 nfs4_end_open_seqid_sync(oop);
2381
2382 /* accept delegation, if any */
2383 nfs4_delegation_accept(rp, claim, op_res, garp, cr);
2384
2385 kmem_free(destcfp, destclen + 1);
2386 nfs4args_copen_free(open_args);
2387
2388 if (claim == CLAIM_DELEGATE_CUR)
2389 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
2390 else
2391 PURGE_ATTRCACHE4(vp);
2392
2393 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2394
2395 ASSERT(nfs4_consistent_type(vp));
2396
2397 return;
2398
2399 err_out:
2400 nfs4_end_open_seqid_sync(oop);
2401 kmem_free(destcfp, destclen + 1);
2402 nfs4args_copen_free(open_args);
2403 if (!ep->error)
2404 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2405 }
2406