1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/cred.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/pathname.h>
36 #include <sys/sysmacros.h>
37 #include <sys/kmem.h>
38 #include <sys/kstat.h>
39 #include <sys/mkdev.h>
40 #include <sys/mount.h>
41 #include <sys/statvfs.h>
42 #include <sys/errno.h>
43 #include <sys/debug.h>
44 #include <sys/cmn_err.h>
45 #include <sys/utsname.h>
46 #include <sys/bootconf.h>
47 #include <sys/modctl.h>
48 #include <sys/acl.h>
49 #include <sys/flock.h>
50 #include <sys/kstr.h>
51 #include <sys/stropts.h>
52 #include <sys/strsubr.h>
53 #include <sys/atomic.h>
54 #include <sys/disp.h>
55 #include <sys/policy.h>
56 #include <sys/list.h>
57 #include <sys/zone.h>
58
59 #include <rpc/types.h>
60 #include <rpc/auth.h>
61 #include <rpc/rpcsec_gss.h>
62 #include <rpc/clnt.h>
63 #include <rpc/xdr.h>
64
65 #include <nfs/nfs.h>
66 #include <nfs/nfs_clnt.h>
67 #include <nfs/mount.h>
68 #include <nfs/nfs_acl.h>
69
70 #include <fs/fs_subr.h>
71
72 #include <nfs/nfs4.h>
73 #include <nfs/rnode4.h>
74 #include <nfs/nfs4_clnt.h>
75 #include <nfs/nfssys.h>
76
77 #ifdef DEBUG
78 /*
79 * These are "special" state IDs and file handles that
80 * match any delegation state ID or file handled. This
81 * is for testing purposes only.
82 */
83
84 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 };
85 char nfs4_deleg_fh[] = "\0377\0376\0375\0374";
86 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh };
87 nfsstat4 cb4_getattr_fail = NFS4_OK;
88 nfsstat4 cb4_recall_fail = NFS4_OK;
89
90 int nfs4_callback_debug;
91 int nfs4_recall_debug;
92 int nfs4_drat_debug;
93
94 #endif
95
96 #define CB_NOTE(x) NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x))
97 #define CB_WARN(x) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x))
98 #define CB_WARN1(x, y) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y))
99
100 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE;
101
102 static zone_key_t nfs4_callback_zone_key;
103
104 /*
105 * NFS4_MAPSIZE is the number of bytes we are willing to consume
106 * for the block allocation map when the server grants a NFS_LIMIT_BLOCK
107 * style delegation.
108 */
109
110 #define NFS4_MAPSIZE 8192
111 #define NFS4_MAPWORDS NFS4_MAPSIZE/sizeof (uint_t)
112 #define NbPW (NBBY*sizeof (uint_t))
113
114 static int nfs4_num_prognums = 1024;
115 static SVC_CALLOUT_TABLE nfs4_cb_sct;
116
117 struct nfs4_dnode {
118 list_node_t linkage;
119 rnode4_t *rnodep;
120 int flags; /* Flags for nfs4delegreturn_impl() */
121 };
122
123 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = {
124 { "delegations", KSTAT_DATA_UINT64 },
125 { "cb_getattr", KSTAT_DATA_UINT64 },
126 { "cb_recall", KSTAT_DATA_UINT64 },
127 { "cb_null", KSTAT_DATA_UINT64 },
128 { "cb_dispatch", KSTAT_DATA_UINT64 },
129 { "delegaccept_r", KSTAT_DATA_UINT64 },
130 { "delegaccept_rw", KSTAT_DATA_UINT64 },
131 { "delegreturn", KSTAT_DATA_UINT64 },
132 { "callbacks", KSTAT_DATA_UINT64 },
133 { "claim_cur", KSTAT_DATA_UINT64 },
134 { "claim_cur_ok", KSTAT_DATA_UINT64 },
135 { "recall_trunc", KSTAT_DATA_UINT64 },
136 { "recall_failed", KSTAT_DATA_UINT64 },
137 { "return_limit_write", KSTAT_DATA_UINT64 },
138 { "return_limit_addmap", KSTAT_DATA_UINT64 },
139 { "deleg_recover", KSTAT_DATA_UINT64 },
140 { "cb_illegal", KSTAT_DATA_UINT64 }
141 };
142
143 struct nfs4_cb_port {
144 list_node_t linkage; /* linkage into per-zone port list */
145 char netid[KNC_STRSIZE];
146 char uaddr[KNC_STRSIZE];
147 char protofmly[KNC_STRSIZE];
148 char proto[KNC_STRSIZE];
149 };
150
151 static int cb_getattr_bytes;
152
153 struct cb_recall_pass {
154 rnode4_t *rp;
155 int flags; /* Flags for nfs4delegreturn_impl() */
156 bool_t truncate;
157 };
158
159 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int);
160 static void nfs4delegreturn_thread(struct cb_recall_pass *);
161 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *,
162 int);
163 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int);
164 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int);
165 static int nfs4delegreturn_impl(rnode4_t *, int,
166 struct nfs4_callback_globals *);
167 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *,
168 struct nfs4_callback_globals *);
169 static void nfs4_recall_sync_wait(nfs4_server_t *);
170
171 static void
cb_getattr(nfs_cb_argop4 * argop,nfs_cb_resop4 * resop,struct svc_req * req,struct compound_state * cs,struct nfs4_callback_globals * ncg)172 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
173 struct compound_state *cs, struct nfs4_callback_globals *ncg)
174 {
175 CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr;
176 CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr;
177 rnode4_t *rp;
178 vnode_t *vp;
179 bool_t found = FALSE;
180 struct nfs4_server *sp;
181 struct fattr4 *fap;
182 rpc_inline_t *fdata;
183 long mapcnt;
184 fattr4_change change;
185 fattr4_size size;
186 uint_t rflag;
187
188 ncg->nfs4_callback_stats.cb_getattr.value.ui64++;
189
190 #ifdef DEBUG
191 /*
192 * error injection hook: set cb_getattr_fail global to
193 * NFS4 pcol error to be returned
194 */
195 if (cb4_getattr_fail != NFS4_OK) {
196 *cs->statusp = resp->status = cb4_getattr_fail;
197 return;
198 }
199 #endif
200
201 resp->obj_attributes.attrmask = 0;
202
203 mutex_enter(&ncg->nfs4_cb_lock);
204 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
205 mutex_exit(&ncg->nfs4_cb_lock);
206
207 if (nfs4_server_vlock(sp, 0) == FALSE) {
208
209 CB_WARN("cb_getattr: cannot find server\n");
210
211 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
212 return;
213 }
214
215 /*
216 * In cb_compound, callback_ident was validated against rq_prog,
217 * but we couldn't verify that it was set to the value we provided
218 * at setclientid time (because we didn't have server struct yet).
219 * Now we have the server struct, but don't have callback_ident
220 * handy. So, validate server struct program number against req
221 * RPC's prog number. At this point, we know the RPC prog num
222 * is valid (else we wouldn't be here); however, we don't know
223 * that it was the prog number we supplied to this server at
224 * setclientid time. If the prog numbers aren't equivalent, then
225 * log the problem and fail the request because either cbserv
226 * and/or cbclient are confused. This will probably never happen.
227 */
228 if (sp->s_program != req->rq_prog) {
229 #ifdef DEBUG
230 zcmn_err(getzoneid(), CE_WARN,
231 "cb_getattr: wrong server program number srv=%d req=%d\n",
232 sp->s_program, req->rq_prog);
233 #else
234 zcmn_err(getzoneid(), CE_WARN,
235 "cb_getattr: wrong server program number\n");
236 #endif
237 mutex_exit(&sp->s_lock);
238 nfs4_server_rele(sp);
239 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
240 return;
241 }
242
243 /*
244 * Search the delegation list for a matching file handle;
245 * mutex on sp prevents the list from changing.
246 */
247
248 rp = list_head(&sp->s_deleg_list);
249 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
250 nfs4_fhandle_t fhandle;
251
252 sfh4_copyval(rp->r_fh, &fhandle);
253
254 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
255 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
256 fhandle.fh_len) == 0)) {
257
258 found = TRUE;
259 break;
260 }
261 #ifdef DEBUG
262 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len &&
263 bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val,
264 args->fh.nfs_fh4_len) == 0) {
265
266 found = TRUE;
267 break;
268 }
269 #endif
270 }
271
272 /*
273 * VN_HOLD the vnode before releasing s_lock to guarantee
274 * we have a valid vnode reference.
275 */
276 if (found == TRUE) {
277 vp = RTOV4(rp);
278 VN_HOLD(vp);
279 }
280
281 mutex_exit(&sp->s_lock);
282 nfs4_server_rele(sp);
283
284 if (found == FALSE) {
285
286 CB_WARN("cb_getattr: bad fhandle\n");
287
288 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
289 return;
290 }
291
292 /*
293 * Figure out which attributes the server wants. We only
294 * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest.
295 */
296 fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP);
297
298 /*
299 * Don't actually need to create XDR to encode these
300 * simple data structures.
301 * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE);
302 */
303 fap = &resp->obj_attributes;
304
305 fap->attrmask = 0;
306 /* attrlist4_len starts at 0 and increases as attrs are processed */
307 fap->attrlist4 = (char *)fdata;
308 fap->attrlist4_len = 0;
309
310 /* don't supply attrs if request was zero */
311 if (args->attr_request != 0) {
312 if (args->attr_request & FATTR4_CHANGE_MASK) {
313 /*
314 * If the file is mmapped, then increment the change
315 * attribute and return it. This will guarantee that
316 * the server will perceive that the file has changed
317 * if there is any chance that the client application
318 * has changed it. Otherwise, just return the change
319 * attribute as it has been updated by nfs4write_deleg.
320 */
321
322 mutex_enter(&rp->r_statelock);
323 mapcnt = rp->r_mapcnt;
324 rflag = rp->r_flags;
325 mutex_exit(&rp->r_statelock);
326
327 mutex_enter(&rp->r_statev4_lock);
328 /*
329 * If object mapped, then always return new change.
330 * Otherwise, return change if object has dirty
331 * pages. If object doesn't have any dirty pages,
332 * then all changes have been pushed to server, so
333 * reset change to grant change.
334 */
335 if (mapcnt)
336 rp->r_deleg_change++;
337 else if (! (rflag & R4DIRTY))
338 rp->r_deleg_change = rp->r_deleg_change_grant;
339 change = rp->r_deleg_change;
340 mutex_exit(&rp->r_statev4_lock);
341
342 /*
343 * Use inline XDR code directly, we know that we
344 * going to a memory buffer and it has enough
345 * space so it cannot fail.
346 */
347 IXDR_PUT_U_HYPER(fdata, change);
348 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
349 fap->attrmask |= FATTR4_CHANGE_MASK;
350 }
351
352 if (args->attr_request & FATTR4_SIZE_MASK) {
353 /*
354 * Use an atomic add of 0 to fetch a consistent view
355 * of r_size; this avoids having to take rw_lock
356 * which could cause a deadlock.
357 */
358 size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0);
359
360 /*
361 * Use inline XDR code directly, we know that we
362 * going to a memory buffer and it has enough
363 * space so it cannot fail.
364 */
365 IXDR_PUT_U_HYPER(fdata, size);
366 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
367 fap->attrmask |= FATTR4_SIZE_MASK;
368 }
369 }
370
371 VN_RELE(vp);
372
373 *cs->statusp = resp->status = NFS4_OK;
374 }
375
376 static void
cb_getattr_free(nfs_cb_resop4 * resop)377 cb_getattr_free(nfs_cb_resop4 *resop)
378 {
379 if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4)
380 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr.
381 obj_attributes.attrlist4, cb_getattr_bytes);
382 }
383
384 static void
cb_recall(nfs_cb_argop4 * argop,nfs_cb_resop4 * resop,struct svc_req * req,struct compound_state * cs,struct nfs4_callback_globals * ncg)385 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
386 struct compound_state *cs, struct nfs4_callback_globals *ncg)
387 {
388 CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall;
389 CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall;
390 rnode4_t *rp;
391 vnode_t *vp;
392 struct nfs4_server *sp;
393 bool_t found = FALSE;
394 bool_t retried = FALSE;
395
396 ncg->nfs4_callback_stats.cb_recall.value.ui64++;
397
398 ASSERT(req->rq_prog >= NFS4_CALLBACK);
399 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
400
401 #ifdef DEBUG
402 /*
403 * error injection hook: set cb_recall_fail global to
404 * NFS4 pcol error to be returned
405 */
406 if (cb4_recall_fail != NFS4_OK) {
407 *cs->statusp = resp->status = cb4_recall_fail;
408 return;
409 }
410 #endif
411
412 mutex_enter(&ncg->nfs4_cb_lock);
413 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
414 mutex_exit(&ncg->nfs4_cb_lock);
415
416 if (nfs4_server_vlock(sp, 0) == FALSE) {
417
418 CB_WARN("cb_recall: cannot find server\n");
419
420 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
421 return;
422 }
423
424 retry:
425 /*
426 * Search the delegation list for a matching file handle
427 * AND stateid; mutex on sp prevents the list from changing.
428 */
429
430 rp = list_head(&sp->s_deleg_list);
431 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
432 mutex_enter(&rp->r_statev4_lock);
433
434 /* check both state id and file handle! */
435
436 if ((bcmp(&rp->r_deleg_stateid, &args->stateid,
437 sizeof (stateid4)) == 0)) {
438 nfs4_fhandle_t fhandle;
439
440 sfh4_copyval(rp->r_fh, &fhandle);
441 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
442 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
443 fhandle.fh_len) == 0)) {
444
445 found = TRUE;
446 break;
447 } else {
448 #ifdef DEBUG
449 CB_WARN("cb_recall: stateid OK, bad fh");
450 #endif
451 }
452 }
453 #ifdef DEBUG
454 if (bcmp(&args->stateid, &nfs4_deleg_any,
455 sizeof (stateid4)) == 0) {
456
457 found = TRUE;
458 break;
459 }
460 #endif
461 mutex_exit(&rp->r_statev4_lock);
462 }
463
464 /*
465 * VN_HOLD the vnode before releasing s_lock to guarantee
466 * we have a valid vnode reference. The async thread will
467 * release the hold when it's done.
468 */
469 if (found == TRUE) {
470 mutex_exit(&rp->r_statev4_lock);
471 vp = RTOV4(rp);
472 VN_HOLD(vp);
473 }
474 mutex_exit(&sp->s_lock);
475
476 if (found == FALSE && retried == FALSE) {
477 nfs4_recall_sync_wait(sp);
478 mutex_enter(&sp->s_lock);
479 retried = TRUE;
480 goto retry;
481 }
482
483 nfs4_server_rele(sp);
484
485 if (found == FALSE) {
486
487 CB_WARN("cb_recall: bad stateid\n");
488
489 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
490 return;
491 }
492
493 /* Fire up a thread to do the delegreturn */
494 nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN,
495 args->truncate);
496
497 *cs->statusp = resp->status = 0;
498 }
499
500 /* ARGSUSED */
501 static void
cb_recall_free(nfs_cb_resop4 * resop)502 cb_recall_free(nfs_cb_resop4 *resop)
503 {
504 /* nothing to do here, cb_recall doesn't kmem_alloc */
505 }
506
507 /*
508 * This function handles the CB_NULL proc call from an NFSv4 Server.
509 *
510 * We take note that the server has sent a CB_NULL for later processing
511 * in the recovery logic. It is noted so we may pause slightly after the
512 * setclientid and before reopening files. The pause is to allow the
513 * NFSv4 Server time to receive the CB_NULL reply and adjust any of
514 * its internal structures such that it has the opportunity to grant
515 * delegations to reopened files.
516 *
517 */
518
519 /* ARGSUSED */
520 static void
cb_null(CB_COMPOUND4args * args,CB_COMPOUND4res * resp,struct svc_req * req,struct nfs4_callback_globals * ncg)521 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
522 struct nfs4_callback_globals *ncg)
523 {
524 struct nfs4_server *sp;
525
526 ncg->nfs4_callback_stats.cb_null.value.ui64++;
527
528 ASSERT(req->rq_prog >= NFS4_CALLBACK);
529 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
530
531 mutex_enter(&ncg->nfs4_cb_lock);
532 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
533 mutex_exit(&ncg->nfs4_cb_lock);
534
535 if (nfs4_server_vlock(sp, 0) != FALSE) {
536 sp->s_flags |= N4S_CB_PINGED;
537 cv_broadcast(&sp->wait_cb_null);
538 mutex_exit(&sp->s_lock);
539 nfs4_server_rele(sp);
540 }
541 }
542
543 /*
544 * cb_illegal args: void
545 * res : status (NFS4ERR_OP_CB_ILLEGAL)
546 */
547 /* ARGSUSED */
548 static void
cb_illegal(nfs_cb_argop4 * argop,nfs_cb_resop4 * resop,struct svc_req * req,struct compound_state * cs,struct nfs4_callback_globals * ncg)549 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
550 struct compound_state *cs, struct nfs4_callback_globals *ncg)
551 {
552 CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal;
553
554 ncg->nfs4_callback_stats.cb_illegal.value.ui64++;
555 resop->resop = OP_CB_ILLEGAL;
556 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
557 }
558
559 static void
cb_compound(CB_COMPOUND4args * args,CB_COMPOUND4res * resp,struct svc_req * req,struct nfs4_callback_globals * ncg)560 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
561 struct nfs4_callback_globals *ncg)
562 {
563 uint_t i;
564 struct compound_state cs;
565 nfs_cb_argop4 *argop;
566 nfs_cb_resop4 *resop, *new_res;
567 uint_t op;
568
569 bzero(&cs, sizeof (cs));
570 cs.statusp = &resp->status;
571 cs.cont = TRUE;
572
573 /*
574 * Form a reply tag by copying over the reqeuest tag.
575 */
576 resp->tag.utf8string_len = args->tag.utf8string_len;
577 resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len,
578 KM_SLEEP);
579 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
580 args->tag.utf8string_len);
581
582 /*
583 * XXX for now, minorversion should be zero
584 */
585 if (args->minorversion != CB4_MINORVERSION) {
586 resp->array_len = 0;
587 resp->array = NULL;
588 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
589 return;
590 }
591
592 #ifdef DEBUG
593 /*
594 * Verify callback_ident. It doesn't really matter if it's wrong
595 * because we don't really use callback_ident -- we use prog number
596 * of the RPC request instead. In this case, just print a DEBUG
597 * console message to reveal brokenness of cbclient (at bkoff/cthon).
598 */
599 if (args->callback_ident != req->rq_prog)
600 zcmn_err(getzoneid(), CE_WARN,
601 "cb_compound: cb_client using wrong "
602 "callback_ident(%d), should be %d",
603 args->callback_ident, req->rq_prog);
604 #endif
605
606 resp->array_len = args->array_len;
607 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4),
608 KM_SLEEP);
609
610 for (i = 0; i < args->array_len && cs.cont; i++) {
611
612 argop = &args->array[i];
613 resop = &resp->array[i];
614 resop->resop = argop->argop;
615 op = (uint_t)resop->resop;
616
617 switch (op) {
618
619 case OP_CB_GETATTR:
620
621 cb_getattr(argop, resop, req, &cs, ncg);
622 break;
623
624 case OP_CB_RECALL:
625
626 cb_recall(argop, resop, req, &cs, ncg);
627 break;
628
629 case OP_CB_ILLEGAL:
630
631 /* fall through */
632
633 default:
634 /*
635 * Handle OP_CB_ILLEGAL and any undefined opcode.
636 * Currently, the XDR code will return BADXDR
637 * if cb op doesn't decode to legal value, so
638 * it really only handles OP_CB_ILLEGAL.
639 */
640 op = OP_CB_ILLEGAL;
641 cb_illegal(argop, resop, req, &cs, ncg);
642 }
643
644 if (*cs.statusp != NFS4_OK)
645 cs.cont = FALSE;
646
647 /*
648 * If not at last op, and if we are to stop, then
649 * compact the results array.
650 */
651 if ((i + 1) < args->array_len && !cs.cont) {
652
653 new_res = kmem_alloc(
654 (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP);
655 bcopy(resp->array,
656 new_res, (i+1) * sizeof (nfs_cb_resop4));
657 kmem_free(resp->array,
658 args->array_len * sizeof (nfs_cb_resop4));
659
660 resp->array_len = i + 1;
661 resp->array = new_res;
662 }
663 }
664
665 }
666
667 static void
cb_compound_free(CB_COMPOUND4res * resp)668 cb_compound_free(CB_COMPOUND4res *resp)
669 {
670 uint_t i, op;
671 nfs_cb_resop4 *resop;
672
673 if (resp->tag.utf8string_val) {
674 UTF8STRING_FREE(resp->tag)
675 }
676
677 for (i = 0; i < resp->array_len; i++) {
678
679 resop = &resp->array[i];
680 op = (uint_t)resop->resop;
681
682 switch (op) {
683
684 case OP_CB_GETATTR:
685
686 cb_getattr_free(resop);
687 break;
688
689 case OP_CB_RECALL:
690
691 cb_recall_free(resop);
692 break;
693
694 default:
695 break;
696 }
697 }
698
699 if (resp->array != NULL) {
700 kmem_free(resp->array,
701 resp->array_len * sizeof (nfs_cb_resop4));
702 }
703 }
704
705 static void
cb_dispatch(struct svc_req * req,SVCXPRT * xprt)706 cb_dispatch(struct svc_req *req, SVCXPRT *xprt)
707 {
708 CB_COMPOUND4args args;
709 CB_COMPOUND4res res;
710 struct nfs4_callback_globals *ncg;
711
712 bool_t (*xdr_args)(), (*xdr_res)();
713 void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *,
714 struct nfs4_callback_globals *);
715 void (*freeproc)(CB_COMPOUND4res *);
716
717 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
718 ASSERT(ncg != NULL);
719
720 ncg->nfs4_callback_stats.cb_dispatch.value.ui64++;
721
722 switch (req->rq_proc) {
723 case CB_NULL:
724 xdr_args = xdr_void;
725 xdr_res = xdr_void;
726 proc = cb_null;
727 freeproc = NULL;
728 break;
729
730 case CB_COMPOUND:
731 xdr_args = xdr_CB_COMPOUND4args_clnt;
732 xdr_res = xdr_CB_COMPOUND4res;
733 proc = cb_compound;
734 freeproc = cb_compound_free;
735 break;
736
737 default:
738 CB_WARN("cb_dispatch: no proc\n");
739 svcerr_noproc(xprt);
740 return;
741 }
742
743 args.tag.utf8string_val = NULL;
744 args.array = NULL;
745
746 if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) {
747
748 CB_WARN("cb_dispatch: cannot getargs\n");
749 svcerr_decode(xprt);
750 return;
751 }
752
753 (*proc)(&args, &res, req, ncg);
754
755 if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) {
756
757 CB_WARN("cb_dispatch: bad sendreply\n");
758 svcerr_systemerr(xprt);
759 }
760
761 if (freeproc)
762 (*freeproc)(&res);
763
764 if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) {
765
766 CB_WARN("cb_dispatch: bad freeargs\n");
767 }
768 }
769
770 static rpcprog_t
nfs4_getnextprogram(struct nfs4_callback_globals * ncg)771 nfs4_getnextprogram(struct nfs4_callback_globals *ncg)
772 {
773 int i, j;
774
775 j = ncg->nfs4_program_hint;
776 for (i = 0; i < nfs4_num_prognums; i++, j++) {
777
778 if (j >= nfs4_num_prognums)
779 j = 0;
780
781 if (ncg->nfs4prog2server[j] == NULL) {
782 ncg->nfs4_program_hint = j+1;
783 return (j+NFS4_CALLBACK);
784 }
785 }
786
787 return (0);
788 }
789
790 void
nfs4callback_destroy(nfs4_server_t * np)791 nfs4callback_destroy(nfs4_server_t *np)
792 {
793 struct nfs4_callback_globals *ncg;
794 int i;
795
796 if (np->s_program == 0)
797 return;
798
799 ncg = np->zone_globals;
800 i = np->s_program - NFS4_CALLBACK;
801
802 mutex_enter(&ncg->nfs4_cb_lock);
803
804 ASSERT(ncg->nfs4prog2server[i] == np);
805
806 ncg->nfs4prog2server[i] = NULL;
807
808 if (i < ncg->nfs4_program_hint)
809 ncg->nfs4_program_hint = i;
810
811 mutex_exit(&ncg->nfs4_cb_lock);
812 }
813
814 /*
815 * nfs4_setport - This function saves a netid and univeral address for
816 * the callback program. These values will be used during setclientid.
817 */
818 static void
nfs4_setport(char * netid,char * uaddr,char * protofmly,char * proto,struct nfs4_callback_globals * ncg)819 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto,
820 struct nfs4_callback_globals *ncg)
821 {
822 struct nfs4_cb_port *p;
823 bool_t found = FALSE;
824
825 ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock));
826
827 p = list_head(&ncg->nfs4_cb_ports);
828 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
829 if (strcmp(p->netid, netid) == 0) {
830 found = TRUE;
831 break;
832 }
833 }
834 if (found == TRUE)
835 (void) strcpy(p->uaddr, uaddr);
836 else {
837 p = kmem_alloc(sizeof (*p), KM_SLEEP);
838
839 (void) strcpy(p->uaddr, uaddr);
840 (void) strcpy(p->netid, netid);
841 (void) strcpy(p->protofmly, protofmly);
842 (void) strcpy(p->proto, proto);
843 list_insert_head(&ncg->nfs4_cb_ports, p);
844 }
845 }
846
847 /*
848 * nfs4_cb_args - This function is used to construct the callback
849 * portion of the arguments needed for setclientid.
850 */
851
852 void
nfs4_cb_args(nfs4_server_t * np,struct knetconfig * knc,SETCLIENTID4args * args)853 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args)
854 {
855 struct nfs4_cb_port *p;
856 bool_t found = FALSE;
857 rpcprog_t pgm;
858 struct nfs4_callback_globals *ncg = np->zone_globals;
859
860 /*
861 * This server structure may already have a program number
862 * assigned to it. This happens when the client has to
863 * re-issue SETCLIENTID. Just re-use the information.
864 */
865 if (np->s_program >= NFS4_CALLBACK &&
866 np->s_program < NFS4_CALLBACK + nfs4_num_prognums)
867 nfs4callback_destroy(np);
868
869 mutex_enter(&ncg->nfs4_cb_lock);
870
871 p = list_head(&ncg->nfs4_cb_ports);
872 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
873 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 &&
874 strcmp(p->proto, knc->knc_proto) == 0) {
875 found = TRUE;
876 break;
877 }
878 }
879
880 if (found == FALSE) {
881
882 NFS4_DEBUG(nfs4_callback_debug,
883 (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n",
884 knc->knc_protofmly, knc->knc_proto));
885
886 args->callback.cb_program = 0;
887 args->callback.cb_location.r_netid = NULL;
888 args->callback.cb_location.r_addr = NULL;
889 args->callback_ident = 0;
890 mutex_exit(&ncg->nfs4_cb_lock);
891 return;
892 }
893
894 if ((pgm = nfs4_getnextprogram(ncg)) == 0) {
895 CB_WARN("nfs4_cb_args: out of program numbers\n");
896
897 args->callback.cb_program = 0;
898 args->callback.cb_location.r_netid = NULL;
899 args->callback.cb_location.r_addr = NULL;
900 args->callback_ident = 0;
901 mutex_exit(&ncg->nfs4_cb_lock);
902 return;
903 }
904
905 ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np;
906 args->callback.cb_program = pgm;
907 args->callback.cb_location.r_netid = p->netid;
908 args->callback.cb_location.r_addr = p->uaddr;
909 args->callback_ident = pgm;
910
911 np->s_program = pgm;
912
913 mutex_exit(&ncg->nfs4_cb_lock);
914 }
915
916 static int
nfs4_dquery(struct nfs4_svc_args * arg,model_t model)917 nfs4_dquery(struct nfs4_svc_args *arg, model_t model)
918 {
919 file_t *fp;
920 vnode_t *vp;
921 rnode4_t *rp;
922 int error;
923 STRUCT_HANDLE(nfs4_svc_args, uap);
924
925 STRUCT_SET_HANDLE(uap, model, arg);
926
927 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
928 return (EBADF);
929
930 vp = fp->f_vnode;
931
932 if (vp == NULL || vp->v_type != VREG ||
933 !vn_matchops(vp, nfs4_vnodeops)) {
934 releasef(STRUCT_FGET(uap, fd));
935 return (EBADF);
936 }
937
938 rp = VTOR4(vp);
939
940 /*
941 * I can't convince myself that we need locking here. The
942 * rnode cannot disappear and the value returned is instantly
943 * stale anway, so why bother?
944 */
945
946 error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type);
947 releasef(STRUCT_FGET(uap, fd));
948 return (error);
949 }
950
951
952 /*
953 * NFS4 client system call. This service does the
954 * necessary initialization for the callback program.
955 * This is fashioned after the server side interaction
956 * between nfsd and the kernel. On the client, the
957 * mount command forks and the child process does the
958 * necessary interaction with the kernel.
959 *
960 * uap->fd is the fd of an open transport provider
961 */
962 int
nfs4_svc(struct nfs4_svc_args * arg,model_t model)963 nfs4_svc(struct nfs4_svc_args *arg, model_t model)
964 {
965 file_t *fp;
966 int error;
967 int readsize;
968 char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE];
969 char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE];
970 size_t len;
971 STRUCT_HANDLE(nfs4_svc_args, uap);
972 struct netbuf addrmask;
973 int cmd;
974 SVCMASTERXPRT *cb_xprt;
975 struct nfs4_callback_globals *ncg;
976
977 #ifdef lint
978 model = model; /* STRUCT macros don't always refer to it */
979 #endif
980
981 STRUCT_SET_HANDLE(uap, model, arg);
982
983 if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY)
984 return (nfs4_dquery(arg, model));
985
986 if (secpolicy_nfs(CRED()) != 0)
987 return (EPERM);
988
989 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
990 return (EBADF);
991
992 /*
993 * Set read buffer size to rsize
994 * and add room for RPC headers.
995 */
996 readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
997 if (readsize < RPC_MAXDATASIZE)
998 readsize = RPC_MAXDATASIZE;
999
1000 error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
1001 KNC_STRSIZE, &len);
1002 if (error) {
1003 releasef(STRUCT_FGET(uap, fd));
1004 return (error);
1005 }
1006
1007 cmd = STRUCT_FGET(uap, cmd);
1008
1009 if (cmd & NFS4_KRPC_START) {
1010 addrmask.len = STRUCT_FGET(uap, addrmask.len);
1011 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
1012 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
1013 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
1014 addrmask.len);
1015 if (error) {
1016 releasef(STRUCT_FGET(uap, fd));
1017 kmem_free(addrmask.buf, addrmask.maxlen);
1018 return (error);
1019 }
1020 }
1021 else
1022 addrmask.buf = NULL;
1023
1024 error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr,
1025 sizeof (uaddr), &len);
1026 if (error) {
1027 releasef(STRUCT_FGET(uap, fd));
1028 if (addrmask.buf)
1029 kmem_free(addrmask.buf, addrmask.maxlen);
1030 return (error);
1031 }
1032
1033 error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly,
1034 sizeof (protofmly), &len);
1035 if (error) {
1036 releasef(STRUCT_FGET(uap, fd));
1037 if (addrmask.buf)
1038 kmem_free(addrmask.buf, addrmask.maxlen);
1039 return (error);
1040 }
1041
1042 error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto,
1043 sizeof (proto), &len);
1044 if (error) {
1045 releasef(STRUCT_FGET(uap, fd));
1046 if (addrmask.buf)
1047 kmem_free(addrmask.buf, addrmask.maxlen);
1048 return (error);
1049 }
1050
1051 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1052 ASSERT(ncg != NULL);
1053
1054 mutex_enter(&ncg->nfs4_cb_lock);
1055 if (cmd & NFS4_SETPORT)
1056 nfs4_setport(buf, uaddr, protofmly, proto, ncg);
1057
1058 if (cmd & NFS4_KRPC_START) {
1059 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt,
1060 &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE);
1061 if (error) {
1062 CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n",
1063 error);
1064 kmem_free(addrmask.buf, addrmask.maxlen);
1065 }
1066 }
1067
1068 mutex_exit(&ncg->nfs4_cb_lock);
1069 releasef(STRUCT_FGET(uap, fd));
1070 return (error);
1071 }
1072
1073 struct nfs4_callback_globals *
nfs4_get_callback_globals(void)1074 nfs4_get_callback_globals(void)
1075 {
1076 return (zone_getspecific(nfs4_callback_zone_key, nfs_zone()));
1077 }
1078
1079 static void *
nfs4_callback_init_zone(zoneid_t zoneid)1080 nfs4_callback_init_zone(zoneid_t zoneid)
1081 {
1082 kstat_t *nfs4_callback_kstat;
1083 struct nfs4_callback_globals *ncg;
1084
1085 ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP);
1086
1087 ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums *
1088 sizeof (struct nfs4_server *), KM_SLEEP);
1089
1090 /* initialize the dlist */
1091 mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL);
1092 list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode),
1093 offsetof(struct nfs4_dnode, linkage));
1094
1095 /* initialize cb_port list */
1096 mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL);
1097 list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port),
1098 offsetof(struct nfs4_cb_port, linkage));
1099
1100 /* get our own copy of the kstats */
1101 bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats,
1102 sizeof (nfs4_callback_stats_tmpl));
1103 /* register "nfs:0:nfs4_callback_stats" for this zone */
1104 if ((nfs4_callback_kstat =
1105 kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc",
1106 KSTAT_TYPE_NAMED,
1107 sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t),
1108 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
1109 zoneid)) != NULL) {
1110 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats;
1111 kstat_install(nfs4_callback_kstat);
1112 }
1113 return (ncg);
1114 }
1115
1116 static void
nfs4_discard_delegations(struct nfs4_callback_globals * ncg)1117 nfs4_discard_delegations(struct nfs4_callback_globals *ncg)
1118 {
1119 nfs4_server_t *sp;
1120 int i, num_removed;
1121
1122 /*
1123 * It's OK here to just run through the registered "programs", as
1124 * servers without programs won't have any delegations to handle.
1125 */
1126 for (i = 0; i < nfs4_num_prognums; i++) {
1127 rnode4_t *rp;
1128
1129 mutex_enter(&ncg->nfs4_cb_lock);
1130 sp = ncg->nfs4prog2server[i];
1131 mutex_exit(&ncg->nfs4_cb_lock);
1132
1133 if (nfs4_server_vlock(sp, 1) == FALSE)
1134 continue;
1135 num_removed = 0;
1136 while ((rp = list_head(&sp->s_deleg_list)) != NULL) {
1137 mutex_enter(&rp->r_statev4_lock);
1138 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1139 /*
1140 * We need to take matters into our own hands,
1141 * as nfs4delegreturn_cleanup_impl() won't
1142 * remove this from the list.
1143 */
1144 list_remove(&sp->s_deleg_list, rp);
1145 mutex_exit(&rp->r_statev4_lock);
1146 nfs4_dec_state_ref_count_nolock(sp,
1147 VTOMI4(RTOV4(rp)));
1148 num_removed++;
1149 continue;
1150 }
1151 mutex_exit(&rp->r_statev4_lock);
1152 VN_HOLD(RTOV4(rp));
1153 mutex_exit(&sp->s_lock);
1154 /*
1155 * The following will remove the node from the list.
1156 */
1157 nfs4delegreturn_cleanup_impl(rp, sp, ncg);
1158 VN_RELE(RTOV4(rp));
1159 mutex_enter(&sp->s_lock);
1160 }
1161 mutex_exit(&sp->s_lock);
1162 /* each removed list node reles a reference */
1163 while (num_removed-- > 0)
1164 nfs4_server_rele(sp);
1165 /* remove our reference for nfs4_server_vlock */
1166 nfs4_server_rele(sp);
1167 }
1168 }
1169
1170 /* ARGSUSED */
1171 static void
nfs4_callback_shutdown_zone(zoneid_t zoneid,void * data)1172 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data)
1173 {
1174 struct nfs4_callback_globals *ncg = data;
1175
1176 /*
1177 * Clean pending delegation return list.
1178 */
1179 nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD);
1180
1181 /*
1182 * Discard all delegations.
1183 */
1184 nfs4_discard_delegations(ncg);
1185 }
1186
1187 static void
nfs4_callback_fini_zone(zoneid_t zoneid,void * data)1188 nfs4_callback_fini_zone(zoneid_t zoneid, void *data)
1189 {
1190 struct nfs4_callback_globals *ncg = data;
1191 struct nfs4_cb_port *p;
1192 nfs4_server_t *sp, *next;
1193 nfs4_server_t freelist;
1194 int i;
1195
1196 kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid);
1197
1198 /*
1199 * Discard all delegations that may have crept in since we did the
1200 * _shutdown.
1201 */
1202 nfs4_discard_delegations(ncg);
1203 /*
1204 * We're completely done with this zone and all associated
1205 * nfs4_server_t's. Any remaining nfs4_server_ts should only have one
1206 * more reference outstanding -- the reference we didn't release in
1207 * nfs4_renew_lease_thread().
1208 *
1209 * Here we need to run through the global nfs4_server_lst as we need to
1210 * deal with nfs4_server_ts without programs, as they also have threads
1211 * created for them, and so have outstanding references that we need to
1212 * release.
1213 */
1214 freelist.forw = &freelist;
1215 freelist.back = &freelist;
1216 mutex_enter(&nfs4_server_lst_lock);
1217 sp = nfs4_server_lst.forw;
1218 while (sp != &nfs4_server_lst) {
1219 next = sp->forw;
1220 if (sp->zoneid == zoneid) {
1221 remque(sp);
1222 insque(sp, &freelist);
1223 }
1224 sp = next;
1225 }
1226 mutex_exit(&nfs4_server_lst_lock);
1227
1228 sp = freelist.forw;
1229 while (sp != &freelist) {
1230 next = sp->forw;
1231 nfs4_server_rele(sp); /* free the list's reference */
1232 sp = next;
1233 }
1234
1235 #ifdef DEBUG
1236 for (i = 0; i < nfs4_num_prognums; i++) {
1237 ASSERT(ncg->nfs4prog2server[i] == NULL);
1238 }
1239 #endif
1240 kmem_free(ncg->nfs4prog2server, nfs4_num_prognums *
1241 sizeof (struct nfs4_server *));
1242
1243 mutex_enter(&ncg->nfs4_cb_lock);
1244 while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) {
1245 list_remove(&ncg->nfs4_cb_ports, p);
1246 kmem_free(p, sizeof (*p));
1247 }
1248 list_destroy(&ncg->nfs4_cb_ports);
1249 mutex_destroy(&ncg->nfs4_cb_lock);
1250 list_destroy(&ncg->nfs4_dlist);
1251 mutex_destroy(&ncg->nfs4_dlist_lock);
1252 kmem_free(ncg, sizeof (*ncg));
1253 }
1254
1255 void
nfs4_callback_init(void)1256 nfs4_callback_init(void)
1257 {
1258 int i;
1259 SVC_CALLOUT *nfs4_cb_sc;
1260
1261 /* initialize the callback table */
1262 nfs4_cb_sc = kmem_alloc(nfs4_num_prognums *
1263 sizeof (SVC_CALLOUT), KM_SLEEP);
1264
1265 for (i = 0; i < nfs4_num_prognums; i++) {
1266 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i;
1267 nfs4_cb_sc[i].sc_versmin = NFS_CB;
1268 nfs4_cb_sc[i].sc_versmax = NFS_CB;
1269 nfs4_cb_sc[i].sc_dispatch = cb_dispatch;
1270 }
1271
1272 nfs4_cb_sct.sct_size = nfs4_num_prognums;
1273 nfs4_cb_sct.sct_free = FALSE;
1274 nfs4_cb_sct.sct_sc = nfs4_cb_sc;
1275
1276 /*
1277 * Compute max bytes required for dyamically allocated parts
1278 * of cb_getattr reply. Only size and change are supported now.
1279 * If CB_GETATTR is changed to reply with additional attrs,
1280 * additional sizes must be added below.
1281 *
1282 * fattr4_change + fattr4_size == uint64_t + uint64_t
1283 */
1284 cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT;
1285
1286 zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone,
1287 nfs4_callback_shutdown_zone, nfs4_callback_fini_zone);
1288 }
1289
1290 void
nfs4_callback_fini(void)1291 nfs4_callback_fini(void)
1292 {
1293 }
1294
1295 /*
1296 * NB: This function can be called from the *wrong* zone (ie, the zone that
1297 * 'rp' belongs to and the caller's zone may not be the same). This can happen
1298 * if the zone is going away and we get called from nfs4_async_inactive(). In
1299 * this case the globals will be NULL and we won't update the counters, which
1300 * doesn't matter as the zone is going away anyhow.
1301 */
1302 static void
nfs4delegreturn_cleanup_impl(rnode4_t * rp,nfs4_server_t * np,struct nfs4_callback_globals * ncg)1303 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np,
1304 struct nfs4_callback_globals *ncg)
1305 {
1306 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1307 boolean_t need_rele = B_FALSE;
1308
1309 /*
1310 * Caller must be holding mi_recovlock in read mode
1311 * to call here. This is provided by start_op.
1312 * Delegation management requires to grab s_lock
1313 * first and then r_statev4_lock.
1314 */
1315
1316 if (np == NULL) {
1317 np = find_nfs4_server_all(mi, 1);
1318 if (np == NULL)
1319 return;
1320 need_rele = B_TRUE;
1321 } else {
1322 mutex_enter(&np->s_lock);
1323 }
1324
1325 mutex_enter(&rp->r_statev4_lock);
1326
1327 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1328 mutex_exit(&rp->r_statev4_lock);
1329 mutex_exit(&np->s_lock);
1330 if (need_rele)
1331 nfs4_server_rele(np);
1332 return;
1333 }
1334
1335 /*
1336 * Free the cred originally held when
1337 * the delegation was granted. Caller must
1338 * hold this cred if it wants to use it after
1339 * this call.
1340 */
1341 crfree(rp->r_deleg_cred);
1342 rp->r_deleg_cred = NULL;
1343 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1344 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1345 rp->r_deleg_needs_recall = FALSE;
1346 rp->r_deleg_return_pending = FALSE;
1347
1348 /*
1349 * Remove the rnode from the server's list and
1350 * update the ref counts.
1351 */
1352 list_remove(&np->s_deleg_list, rp);
1353 mutex_exit(&rp->r_statev4_lock);
1354 nfs4_dec_state_ref_count_nolock(np, mi);
1355 mutex_exit(&np->s_lock);
1356 /* removed list node removes a reference */
1357 nfs4_server_rele(np);
1358 if (need_rele)
1359 nfs4_server_rele(np);
1360 if (ncg != NULL)
1361 ncg->nfs4_callback_stats.delegations.value.ui64--;
1362 }
1363
1364 void
nfs4delegreturn_cleanup(rnode4_t * rp,nfs4_server_t * np)1365 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np)
1366 {
1367 struct nfs4_callback_globals *ncg;
1368
1369 if (np != NULL) {
1370 ncg = np->zone_globals;
1371 } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) {
1372 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1373 ASSERT(ncg != NULL);
1374 } else {
1375 /*
1376 * Request coming from the wrong zone.
1377 */
1378 ASSERT(getzoneid() == GLOBAL_ZONEID);
1379 ncg = NULL;
1380 }
1381
1382 nfs4delegreturn_cleanup_impl(rp, np, ncg);
1383 }
1384
1385 static void
nfs4delegreturn_save_lost_rqst(int error,nfs4_lost_rqst_t * lost_rqstp,cred_t * cr,vnode_t * vp)1386 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1387 cred_t *cr, vnode_t *vp)
1388 {
1389 if (error != ETIMEDOUT && error != EINTR &&
1390 !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1391 lost_rqstp->lr_op = 0;
1392 return;
1393 }
1394
1395 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1396 "nfs4close_save_lost_rqst: error %d", error));
1397
1398 lost_rqstp->lr_op = OP_DELEGRETURN;
1399 /*
1400 * The vp is held and rele'd via the recovery code.
1401 * See nfs4_save_lost_rqst.
1402 */
1403 lost_rqstp->lr_vp = vp;
1404 lost_rqstp->lr_dvp = NULL;
1405 lost_rqstp->lr_oop = NULL;
1406 lost_rqstp->lr_osp = NULL;
1407 lost_rqstp->lr_lop = NULL;
1408 lost_rqstp->lr_cr = cr;
1409 lost_rqstp->lr_flk = NULL;
1410 lost_rqstp->lr_putfirst = FALSE;
1411 }
1412
1413 static void
nfs4delegreturn_otw(rnode4_t * rp,cred_t * cr,nfs4_error_t * ep)1414 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep)
1415 {
1416 COMPOUND4args_clnt args;
1417 COMPOUND4res_clnt res;
1418 nfs_argop4 argops[3];
1419 nfs4_ga_res_t *garp = NULL;
1420 hrtime_t t;
1421 int numops;
1422 int doqueue = 1;
1423
1424 args.ctag = TAG_DELEGRETURN;
1425
1426 numops = 3; /* PUTFH, GETATTR, DELEGRETURN */
1427
1428 args.array = argops;
1429 args.array_len = numops;
1430
1431 argops[0].argop = OP_CPUTFH;
1432 argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1433
1434 argops[1].argop = OP_GETATTR;
1435 argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1436 argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp));
1437
1438 argops[2].argop = OP_DELEGRETURN;
1439 argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid =
1440 rp->r_deleg_stateid;
1441
1442 t = gethrtime();
1443 rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep);
1444
1445 if (ep->error)
1446 return;
1447
1448 if (res.status == NFS4_OK) {
1449 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
1450 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL);
1451
1452 }
1453 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1454 }
1455
1456 int
nfs4_do_delegreturn(rnode4_t * rp,int flags,cred_t * cr,struct nfs4_callback_globals * ncg)1457 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr,
1458 struct nfs4_callback_globals *ncg)
1459 {
1460 vnode_t *vp = RTOV4(rp);
1461 mntinfo4_t *mi = VTOMI4(vp);
1462 nfs4_lost_rqst_t lost_rqst;
1463 nfs4_recov_state_t recov_state;
1464 bool_t needrecov = FALSE, recovonly, done = FALSE;
1465 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1466
1467 ncg->nfs4_callback_stats.delegreturn.value.ui64++;
1468
1469 while (!done) {
1470 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN,
1471 &recov_state, &recovonly);
1472
1473 if (e.error) {
1474 if (flags & NFS4_DR_FORCE) {
1475 (void) nfs_rw_enter_sig(&mi->mi_recovlock,
1476 RW_READER, 0);
1477 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1478 nfs_rw_exit(&mi->mi_recovlock);
1479 }
1480 break;
1481 }
1482
1483 /*
1484 * Check to see if the delegation has already been
1485 * returned by the recovery thread. The state of
1486 * the delegation cannot change at this point due
1487 * to start_fop and the r_deleg_recall_lock.
1488 */
1489 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1490 e.error = 0;
1491 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1492 break;
1493 }
1494
1495 if (recovonly) {
1496 /*
1497 * Delegation will be returned via the
1498 * recovery framework. Build a lost request
1499 * structure, start recovery and get out.
1500 */
1501 nfs4_error_init(&e, EINTR);
1502 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1503 cr, vp);
1504 (void) nfs4_start_recovery(&e, mi, vp,
1505 NULL, &rp->r_deleg_stateid,
1506 lost_rqst.lr_op == OP_DELEGRETURN ?
1507 &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1508 NULL, NULL);
1509 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1510 break;
1511 }
1512
1513 nfs4delegreturn_otw(rp, cr, &e);
1514
1515 /*
1516 * Ignore some errors on delegreturn; no point in marking
1517 * the file dead on a state destroying operation.
1518 */
1519 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) ||
1520 e.stat == NFS4ERR_BADHANDLE ||
1521 e.stat == NFS4ERR_STALE ||
1522 (e.stat == NFS4ERR_STALE_STATEID &&
1523 !(rp->r_flags & R4HASHED))))
1524 needrecov = FALSE;
1525 else
1526 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1527
1528 if (needrecov) {
1529 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1530 cr, vp);
1531 (void) nfs4_start_recovery(&e, mi, vp,
1532 NULL, &rp->r_deleg_stateid,
1533 lost_rqst.lr_op == OP_DELEGRETURN ?
1534 &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1535 NULL, NULL);
1536 } else {
1537 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1538 done = TRUE;
1539 }
1540
1541 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1542 }
1543 return (e.error);
1544 }
1545
1546 /*
1547 * nfs4_resend_delegreturn - used to drive the delegreturn
1548 * operation via the recovery thread.
1549 */
1550 void
nfs4_resend_delegreturn(nfs4_lost_rqst_t * lorp,nfs4_error_t * ep,nfs4_server_t * np)1551 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep,
1552 nfs4_server_t *np)
1553 {
1554 rnode4_t *rp = VTOR4(lorp->lr_vp);
1555
1556 /* If the file failed recovery, just quit. */
1557 mutex_enter(&rp->r_statelock);
1558 if (rp->r_flags & R4RECOVERR) {
1559 ep->error = EIO;
1560 }
1561 mutex_exit(&rp->r_statelock);
1562
1563 if (!ep->error)
1564 nfs4delegreturn_otw(rp, lorp->lr_cr, ep);
1565
1566 /*
1567 * If recovery is now needed, then return the error
1568 * and status and let the recovery thread handle it,
1569 * including re-driving another delegreturn. Otherwise,
1570 * just give up and clean up the delegation.
1571 */
1572 if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp))
1573 return;
1574
1575 if (rp->r_deleg_type != OPEN_DELEGATE_NONE)
1576 nfs4delegreturn_cleanup(rp, np);
1577
1578 nfs4_error_zinit(ep);
1579 }
1580
1581 /*
1582 * nfs4delegreturn - general function to return a delegation.
1583 *
1584 * NFS4_DR_FORCE - return the delegation even if start_op fails
1585 * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE
1586 * NFS4_DR_DISCARD - discard the delegation w/o delegreturn
1587 * NFS4_DR_DID_OP - calling function already did nfs4_start_op
1588 * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL
1589 * NFS4_DR_REOPEN - do file reopens, if applicable
1590 */
1591 static int
nfs4delegreturn_impl(rnode4_t * rp,int flags,struct nfs4_callback_globals * ncg)1592 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg)
1593 {
1594 int error = 0;
1595 cred_t *cr = NULL;
1596 vnode_t *vp;
1597 bool_t needrecov = FALSE;
1598 bool_t rw_entered = FALSE;
1599 bool_t do_reopen;
1600
1601 vp = RTOV4(rp);
1602
1603 /*
1604 * If NFS4_DR_DISCARD is set by itself, take a short-cut and
1605 * discard without doing an otw DELEGRETURN. This may only be used
1606 * by the recovery thread because it bypasses the synchronization
1607 * with r_deleg_recall_lock and mi->mi_recovlock.
1608 */
1609 if (flags == NFS4_DR_DISCARD) {
1610 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1611 return (0);
1612 }
1613
1614 if (flags & NFS4_DR_DID_OP) {
1615 /*
1616 * Caller had already done start_op, which means the
1617 * r_deleg_recall_lock is already held in READ mode
1618 * so we cannot take it in write mode. Return the
1619 * delegation asynchronously.
1620 *
1621 * Remove the NFS4_DR_DID_OP flag so we don't
1622 * get stuck looping through here.
1623 */
1624 VN_HOLD(vp);
1625 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE);
1626 return (0);
1627 }
1628
1629 /*
1630 * Verify we still have a delegation and crhold the credential.
1631 */
1632 mutex_enter(&rp->r_statev4_lock);
1633 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1634 mutex_exit(&rp->r_statev4_lock);
1635 goto out;
1636 }
1637 cr = rp->r_deleg_cred;
1638 ASSERT(cr != NULL);
1639 crhold(cr);
1640 mutex_exit(&rp->r_statev4_lock);
1641
1642 /*
1643 * Push the modified data back to the server synchronously
1644 * before doing DELEGRETURN.
1645 */
1646 if (flags & NFS4_DR_PUSH)
1647 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
1648
1649 /*
1650 * Take r_deleg_recall_lock in WRITE mode, this will prevent
1651 * nfs4_is_otw_open_necessary from trying to use the delegation
1652 * while the DELEGRETURN is in progress.
1653 */
1654 (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE);
1655
1656 rw_entered = TRUE;
1657
1658 if (rp->r_deleg_type == OPEN_DELEGATE_NONE)
1659 goto out;
1660
1661 if (flags & NFS4_DR_REOPEN) {
1662 /*
1663 * If R4RECOVERRP is already set, then skip re-opening
1664 * the delegation open streams and go straight to doing
1665 * delegreturn. (XXX if the file has failed recovery, then the
1666 * delegreturn attempt is likely to be futile.)
1667 */
1668 mutex_enter(&rp->r_statelock);
1669 do_reopen = !(rp->r_flags & R4RECOVERRP);
1670 mutex_exit(&rp->r_statelock);
1671
1672 if (do_reopen) {
1673 error = deleg_reopen(vp, &needrecov, ncg, flags);
1674 if (error != 0) {
1675 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL))
1676 == 0)
1677 goto out;
1678 } else if (needrecov) {
1679 if ((flags & NFS4_DR_FORCE) == 0)
1680 goto out;
1681 }
1682 }
1683 }
1684
1685 if (flags & NFS4_DR_DISCARD) {
1686 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1687
1688 mutex_enter(&rp->r_statelock);
1689 /*
1690 * deleg_return_pending is cleared inside of delegation_accept
1691 * when a delegation is accepted. if this flag has been
1692 * cleared, then a new delegation has overwritten the one we
1693 * were about to throw away.
1694 */
1695 if (!rp->r_deleg_return_pending) {
1696 mutex_exit(&rp->r_statelock);
1697 goto out;
1698 }
1699 mutex_exit(&rp->r_statelock);
1700 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
1701 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1702 nfs_rw_exit(&mi->mi_recovlock);
1703 } else {
1704 error = nfs4_do_delegreturn(rp, flags, cr, ncg);
1705 }
1706
1707 out:
1708 if (cr)
1709 crfree(cr);
1710 if (rw_entered)
1711 nfs_rw_exit(&rp->r_deleg_recall_lock);
1712 return (error);
1713 }
1714
1715 int
nfs4delegreturn(rnode4_t * rp,int flags)1716 nfs4delegreturn(rnode4_t *rp, int flags)
1717 {
1718 struct nfs4_callback_globals *ncg;
1719
1720 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1721 ASSERT(ncg != NULL);
1722
1723 return (nfs4delegreturn_impl(rp, flags, ncg));
1724 }
1725
1726 void
nfs4delegreturn_async(rnode4_t * rp,int flags,bool_t trunc)1727 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc)
1728 {
1729 struct cb_recall_pass *pp;
1730
1731 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
1732 pp->rp = rp;
1733 pp->flags = flags;
1734 pp->truncate = trunc;
1735
1736 /*
1737 * Fire up a thread to do the actual delegreturn
1738 * Caller must guarantee that the rnode doesn't
1739 * vanish (by calling VN_HOLD).
1740 */
1741
1742 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
1743 minclsyspri);
1744 }
1745
1746 static void
delegreturn_all_thread(rpcprog_t * pp)1747 delegreturn_all_thread(rpcprog_t *pp)
1748 {
1749 nfs4_server_t *np;
1750 bool_t found = FALSE;
1751 rpcprog_t prog;
1752 rnode4_t *rp;
1753 vnode_t *vp;
1754 zoneid_t zoneid = getzoneid();
1755 struct nfs4_callback_globals *ncg;
1756
1757 NFS4_DEBUG(nfs4_drat_debug,
1758 (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp));
1759
1760 prog = *pp;
1761 kmem_free(pp, sizeof (*pp));
1762 pp = NULL;
1763
1764 mutex_enter(&nfs4_server_lst_lock);
1765 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
1766 if (np->zoneid == zoneid && np->s_program == prog) {
1767 mutex_enter(&np->s_lock);
1768 found = TRUE;
1769 break;
1770 }
1771 }
1772 mutex_exit(&nfs4_server_lst_lock);
1773
1774 /*
1775 * It's possible that the nfs4_server which was using this
1776 * program number has vanished since this thread is async.
1777 * If so, just return. Your work here is finished, my friend.
1778 */
1779 if (!found)
1780 goto out;
1781
1782 ncg = np->zone_globals;
1783 while ((rp = list_head(&np->s_deleg_list)) != NULL) {
1784 vp = RTOV4(rp);
1785 VN_HOLD(vp);
1786 mutex_exit(&np->s_lock);
1787 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN,
1788 ncg);
1789 VN_RELE(vp);
1790
1791 /* retake the s_lock for next trip through the loop */
1792 mutex_enter(&np->s_lock);
1793 }
1794 mutex_exit(&np->s_lock);
1795 out:
1796 NFS4_DEBUG(nfs4_drat_debug,
1797 (CE_NOTE, "delereturn_all_thread: complete\n"));
1798 zthread_exit();
1799 }
1800
1801 void
nfs4_delegreturn_all(nfs4_server_t * sp)1802 nfs4_delegreturn_all(nfs4_server_t *sp)
1803 {
1804 rpcprog_t pro, *pp;
1805
1806 mutex_enter(&sp->s_lock);
1807
1808 /* Check to see if the delegation list is empty */
1809
1810 if (list_head(&sp->s_deleg_list) == NULL) {
1811 mutex_exit(&sp->s_lock);
1812 return;
1813 }
1814 /*
1815 * Grab the program number; the async thread will use this
1816 * to find the nfs4_server.
1817 */
1818 pro = sp->s_program;
1819 mutex_exit(&sp->s_lock);
1820 pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP);
1821 *pp = pro;
1822 (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0,
1823 minclsyspri);
1824 }
1825
1826
1827 /*
1828 * Discard any delegations
1829 *
1830 * Iterate over the servers s_deleg_list and
1831 * for matching mount-point rnodes discard
1832 * the delegation.
1833 */
1834 void
nfs4_deleg_discard(mntinfo4_t * mi,nfs4_server_t * sp)1835 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp)
1836 {
1837 rnode4_t *rp, *next;
1838 mntinfo4_t *r_mi;
1839 struct nfs4_callback_globals *ncg;
1840
1841 ASSERT(mutex_owned(&sp->s_lock));
1842 ncg = sp->zone_globals;
1843
1844 for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) {
1845 r_mi = VTOMI4(RTOV4(rp));
1846 next = list_next(&sp->s_deleg_list, rp);
1847
1848 if (r_mi != mi) {
1849 /*
1850 * Skip if this rnode is in not on the
1851 * same mount-point
1852 */
1853 continue;
1854 }
1855
1856 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ);
1857
1858 #ifdef DEBUG
1859 if (nfs4_client_recov_debug) {
1860 zprintf(getzoneid(),
1861 "nfs4_deleg_discard: matched rnode %p "
1862 "-- discarding delegation\n", (void *)rp);
1863 }
1864 #endif
1865 mutex_enter(&rp->r_statev4_lock);
1866 /*
1867 * Free the cred originally held when the delegation
1868 * was granted. Also need to decrement the refcnt
1869 * on this server for each delegation we discard
1870 */
1871 if (rp->r_deleg_cred)
1872 crfree(rp->r_deleg_cred);
1873 rp->r_deleg_cred = NULL;
1874 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1875 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1876 rp->r_deleg_needs_recall = FALSE;
1877 ASSERT(sp->s_refcnt > 1);
1878 sp->s_refcnt--;
1879 list_remove(&sp->s_deleg_list, rp);
1880 mutex_exit(&rp->r_statev4_lock);
1881 nfs4_dec_state_ref_count_nolock(sp, mi);
1882 ncg->nfs4_callback_stats.delegations.value.ui64--;
1883 }
1884 }
1885
1886 /*
1887 * Reopen any open streams that were covered by the given file's
1888 * delegation.
1889 * Returns zero or an errno value. If there was no error, *recovp
1890 * indicates whether recovery was initiated.
1891 */
1892
1893 static int
deleg_reopen(vnode_t * vp,bool_t * recovp,struct nfs4_callback_globals * ncg,int flags)1894 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg,
1895 int flags)
1896 {
1897 nfs4_open_stream_t *osp;
1898 nfs4_recov_state_t recov_state;
1899 bool_t needrecov = FALSE;
1900 mntinfo4_t *mi;
1901 rnode4_t *rp;
1902 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1903 int claimnull;
1904
1905 mi = VTOMI4(vp);
1906 rp = VTOR4(vp);
1907
1908 recov_state.rs_flags = 0;
1909 recov_state.rs_num_retry_despite_err = 0;
1910
1911 retry:
1912 if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) {
1913 return (e.error);
1914 }
1915
1916 /*
1917 * if we mean to discard the delegation, it must be BAD, so don't
1918 * use it when doing the reopen or it will fail too.
1919 */
1920 claimnull = (flags & NFS4_DR_DISCARD);
1921 /*
1922 * Loop through the open streams for this rnode to find
1923 * all of the ones created using the delegation state ID.
1924 * Each of these needs to be re-opened.
1925 */
1926
1927 while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) {
1928
1929 if (claimnull) {
1930 nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE);
1931 } else {
1932 ncg->nfs4_callback_stats.claim_cur.value.ui64++;
1933
1934 nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE,
1935 FALSE);
1936 if (e.error == 0 && e.stat == NFS4_OK)
1937 ncg->nfs4_callback_stats.
1938 claim_cur_ok.value.ui64++;
1939 }
1940
1941 if (e.error == EAGAIN) {
1942 open_stream_rele(osp, rp);
1943 nfs4_end_op(mi, vp, NULL, &recov_state, TRUE);
1944 goto retry;
1945 }
1946
1947 /*
1948 * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then
1949 * recovery has already been started inside of nfs4_reopen.
1950 */
1951 if (e.error == EINTR || e.error == ETIMEDOUT ||
1952 NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) {
1953 open_stream_rele(osp, rp);
1954 break;
1955 }
1956
1957 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1958
1959 if (e.error != 0 && !needrecov) {
1960 /*
1961 * Recovery is not possible, but don't give up yet;
1962 * we'd still like to do delegreturn after
1963 * reopening as many streams as possible.
1964 * Continue processing the open streams.
1965 */
1966
1967 ncg->nfs4_callback_stats.recall_failed.value.ui64++;
1968
1969 } else if (needrecov) {
1970 /*
1971 * Start recovery and bail out. The recovery
1972 * thread will take it from here.
1973 */
1974 (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL,
1975 NULL, OP_OPEN, NULL, NULL, NULL);
1976 open_stream_rele(osp, rp);
1977 *recovp = TRUE;
1978 break;
1979 }
1980
1981 open_stream_rele(osp, rp);
1982 }
1983
1984 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1985
1986 return (e.error);
1987 }
1988
1989 /*
1990 * get_next_deleg_stream - returns the next open stream which
1991 * represents a delegation for this rnode. In order to assure
1992 * forward progress, the caller must guarantee that each open
1993 * stream returned is changed so that a future call won't return
1994 * it again.
1995 *
1996 * There are several ways for the open stream to change. If the open
1997 * stream is !os_delegation, then we aren't interested in it. Also, if
1998 * either os_failed_reopen or !os_valid, then don't return the osp.
1999 *
2000 * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return
2001 * the osp if it is an os_delegation open stream. Also, if the rnode still
2002 * has r_deleg_return_pending, then return the os_delegation osp. Lastly,
2003 * if the rnode's r_deleg_stateid is different from the osp's open_stateid,
2004 * then return the osp.
2005 *
2006 * We have already taken the 'r_deleg_recall_lock' as WRITER, which
2007 * prevents new OPENs from going OTW (as start_fop takes this
2008 * lock in READ mode); thus, no new open streams can be created
2009 * (which inherently means no new delegation open streams are
2010 * being created).
2011 */
2012
2013 static nfs4_open_stream_t *
get_next_deleg_stream(rnode4_t * rp,int claimnull)2014 get_next_deleg_stream(rnode4_t *rp, int claimnull)
2015 {
2016 nfs4_open_stream_t *osp;
2017
2018 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER));
2019
2020 /*
2021 * Search through the list of open streams looking for
2022 * one that was created while holding the delegation.
2023 */
2024 mutex_enter(&rp->r_os_lock);
2025 for (osp = list_head(&rp->r_open_streams); osp != NULL;
2026 osp = list_next(&rp->r_open_streams, osp)) {
2027 mutex_enter(&osp->os_sync_lock);
2028 if (!osp->os_delegation || osp->os_failed_reopen ||
2029 !osp->os_valid) {
2030 mutex_exit(&osp->os_sync_lock);
2031 continue;
2032 }
2033 if (!claimnull || rp->r_deleg_return_pending ||
2034 !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) {
2035 osp->os_ref_count++;
2036 mutex_exit(&osp->os_sync_lock);
2037 mutex_exit(&rp->r_os_lock);
2038 return (osp);
2039 }
2040 mutex_exit(&osp->os_sync_lock);
2041 }
2042 mutex_exit(&rp->r_os_lock);
2043
2044 return (NULL);
2045 }
2046
2047 static void
nfs4delegreturn_thread(struct cb_recall_pass * args)2048 nfs4delegreturn_thread(struct cb_recall_pass *args)
2049 {
2050 rnode4_t *rp;
2051 vnode_t *vp;
2052 cred_t *cr;
2053 int dtype, error, flags;
2054 bool_t rdirty, rip;
2055 kmutex_t cpr_lock;
2056 callb_cpr_t cpr_info;
2057 struct nfs4_callback_globals *ncg;
2058
2059 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2060 ASSERT(ncg != NULL);
2061
2062 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
2063
2064 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
2065 "nfsv4delegRtn");
2066
2067 rp = args->rp;
2068 vp = RTOV4(rp);
2069
2070 mutex_enter(&rp->r_statev4_lock);
2071 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2072 mutex_exit(&rp->r_statev4_lock);
2073 goto out;
2074 }
2075 mutex_exit(&rp->r_statev4_lock);
2076
2077 /*
2078 * Take the read-write lock in read mode to prevent other
2079 * threads from modifying the data during the recall. This
2080 * doesn't affect mmappers.
2081 */
2082 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
2083
2084 /* Proceed with delegreturn */
2085
2086 mutex_enter(&rp->r_statev4_lock);
2087 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2088 mutex_exit(&rp->r_statev4_lock);
2089 nfs_rw_exit(&rp->r_rwlock);
2090 goto out;
2091 }
2092 dtype = rp->r_deleg_type;
2093 cr = rp->r_deleg_cred;
2094 ASSERT(cr != NULL);
2095 crhold(cr);
2096 mutex_exit(&rp->r_statev4_lock);
2097
2098 flags = args->flags;
2099
2100 /*
2101 * If the file is being truncated at the server, then throw
2102 * away all of the pages, it doesn't matter what flavor of
2103 * delegation we have.
2104 */
2105
2106 if (args->truncate) {
2107 ncg->nfs4_callback_stats.recall_trunc.value.ui64++;
2108 nfs4_invalidate_pages(vp, 0, cr);
2109 } else if (dtype == OPEN_DELEGATE_WRITE) {
2110
2111 mutex_enter(&rp->r_statelock);
2112 rdirty = rp->r_flags & R4DIRTY;
2113 mutex_exit(&rp->r_statelock);
2114
2115 if (rdirty) {
2116 error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
2117
2118 if (error)
2119 CB_WARN1("nfs4delegreturn_thread:"
2120 " VOP_PUTPAGE: %d\n", error);
2121 }
2122 /* turn off NFS4_DR_PUSH because we just did that above. */
2123 flags &= ~NFS4_DR_PUSH;
2124 }
2125
2126 mutex_enter(&rp->r_statelock);
2127 rip = rp->r_flags & R4RECOVERRP;
2128 mutex_exit(&rp->r_statelock);
2129
2130 /* If a failed recovery is indicated, discard the pages */
2131
2132 if (rip) {
2133
2134 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL);
2135
2136 if (error)
2137 CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n",
2138 error);
2139 }
2140
2141 /*
2142 * Pass the flags to nfs4delegreturn_impl, but be sure not to pass
2143 * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again.
2144 */
2145 flags &= ~NFS4_DR_DID_OP;
2146
2147 (void) nfs4delegreturn_impl(rp, flags, ncg);
2148
2149 nfs_rw_exit(&rp->r_rwlock);
2150 crfree(cr);
2151 out:
2152 kmem_free(args, sizeof (struct cb_recall_pass));
2153 VN_RELE(vp);
2154 mutex_enter(&cpr_lock);
2155 CALLB_CPR_EXIT(&cpr_info);
2156 mutex_destroy(&cpr_lock);
2157 zthread_exit();
2158 }
2159
2160 /*
2161 * This function has one assumption that the caller of this function is
2162 * either doing recovery (therefore cannot call nfs4_start_op) or has
2163 * already called nfs4_start_op().
2164 */
2165 void
nfs4_delegation_accept(rnode4_t * rp,open_claim_type4 claim,OPEN4res * res,nfs4_ga_res_t * garp,cred_t * cr)2166 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res,
2167 nfs4_ga_res_t *garp, cred_t *cr)
2168 {
2169 open_read_delegation4 *orp;
2170 open_write_delegation4 *owp;
2171 nfs4_server_t *np;
2172 bool_t already = FALSE;
2173 bool_t recall = FALSE;
2174 bool_t valid_garp = TRUE;
2175 bool_t delegation_granted = FALSE;
2176 bool_t dr_needed = FALSE;
2177 bool_t recov;
2178 int dr_flags = 0;
2179 long mapcnt;
2180 uint_t rflag;
2181 mntinfo4_t *mi;
2182 struct nfs4_callback_globals *ncg;
2183 open_delegation_type4 odt;
2184
2185 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2186 ASSERT(ncg != NULL);
2187
2188 mi = VTOMI4(RTOV4(rp));
2189
2190 /*
2191 * Accept a delegation granted to the client via an OPEN.
2192 * Set the delegation fields in the rnode and insert the
2193 * rnode onto the list anchored in the nfs4_server_t. The
2194 * proper locking order requires the nfs4_server_t first,
2195 * even though it may not be needed in all cases.
2196 *
2197 * NB: find_nfs4_server returns with s_lock held.
2198 */
2199
2200 if ((np = find_nfs4_server(mi)) == NULL)
2201 return;
2202
2203 /* grab the statelock too, for examining r_mapcnt */
2204 mutex_enter(&rp->r_statelock);
2205 mutex_enter(&rp->r_statev4_lock);
2206
2207 if (rp->r_deleg_type == OPEN_DELEGATE_READ ||
2208 rp->r_deleg_type == OPEN_DELEGATE_WRITE)
2209 already = TRUE;
2210
2211 odt = res->delegation.delegation_type;
2212
2213 if (odt == OPEN_DELEGATE_READ) {
2214
2215 rp->r_deleg_type = res->delegation.delegation_type;
2216 orp = &res->delegation.open_delegation4_u.read;
2217 rp->r_deleg_stateid = orp->stateid;
2218 rp->r_deleg_perms = orp->permissions;
2219 if (claim == CLAIM_PREVIOUS)
2220 if ((recall = orp->recall) != 0)
2221 dr_needed = TRUE;
2222
2223 delegation_granted = TRUE;
2224
2225 ncg->nfs4_callback_stats.delegations.value.ui64++;
2226 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++;
2227
2228 } else if (odt == OPEN_DELEGATE_WRITE) {
2229
2230 rp->r_deleg_type = res->delegation.delegation_type;
2231 owp = &res->delegation.open_delegation4_u.write;
2232 rp->r_deleg_stateid = owp->stateid;
2233 rp->r_deleg_perms = owp->permissions;
2234 rp->r_deleg_limit = owp->space_limit;
2235 if (claim == CLAIM_PREVIOUS)
2236 if ((recall = owp->recall) != 0)
2237 dr_needed = TRUE;
2238
2239 delegation_granted = TRUE;
2240
2241 if (garp == NULL || !garp->n4g_change_valid) {
2242 valid_garp = FALSE;
2243 rp->r_deleg_change = 0;
2244 rp->r_deleg_change_grant = 0;
2245 } else {
2246 rp->r_deleg_change = garp->n4g_change;
2247 rp->r_deleg_change_grant = garp->n4g_change;
2248 }
2249 mapcnt = rp->r_mapcnt;
2250 rflag = rp->r_flags;
2251
2252 /*
2253 * Update the delegation change attribute if
2254 * there are mappers for the file is dirty. This
2255 * might be the case during recovery after server
2256 * reboot.
2257 */
2258 if (mapcnt > 0 || rflag & R4DIRTY)
2259 rp->r_deleg_change++;
2260
2261 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2262 "nfs4_delegation_accept: r_deleg_change: 0x%x\n",
2263 (int)(rp->r_deleg_change >> 32)));
2264 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2265 "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n",
2266 (int)(rp->r_deleg_change_grant >> 32)));
2267
2268
2269 ncg->nfs4_callback_stats.delegations.value.ui64++;
2270 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++;
2271 } else if (already) {
2272 /*
2273 * No delegation granted. If the rnode currently has
2274 * has one, then consider it tainted and return it.
2275 */
2276 dr_needed = TRUE;
2277 }
2278
2279 if (delegation_granted) {
2280 /* Add the rnode to the list. */
2281 if (!already) {
2282 crhold(cr);
2283 rp->r_deleg_cred = cr;
2284
2285 ASSERT(mutex_owned(&np->s_lock));
2286 list_insert_head(&np->s_deleg_list, rp);
2287 /* added list node gets a reference */
2288 np->s_refcnt++;
2289 nfs4_inc_state_ref_count_nolock(np, mi);
2290 }
2291 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
2292 }
2293
2294 /*
2295 * We've now safely accepted the delegation, if any. Drop the
2296 * locks and figure out what post-processing is needed. We'd
2297 * like to retain r_statev4_lock, but nfs4_server_rele takes
2298 * s_lock which would be a lock ordering violation.
2299 */
2300 mutex_exit(&rp->r_statev4_lock);
2301 mutex_exit(&rp->r_statelock);
2302 mutex_exit(&np->s_lock);
2303 nfs4_server_rele(np);
2304
2305 /*
2306 * Check to see if we are in recovery. Remember that
2307 * this function is protected by start_op, so a recovery
2308 * cannot begin until we are out of here.
2309 */
2310 mutex_enter(&mi->mi_lock);
2311 recov = mi->mi_recovflags & MI4_RECOV_ACTIV;
2312 mutex_exit(&mi->mi_lock);
2313
2314 mutex_enter(&rp->r_statev4_lock);
2315
2316 if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp)
2317 dr_needed = TRUE;
2318
2319 if (dr_needed && rp->r_deleg_return_pending == FALSE) {
2320 if (recov) {
2321 /*
2322 * We cannot call delegreturn from inside
2323 * of recovery or VOP_PUTPAGE will hang
2324 * due to nfs4_start_fop call in
2325 * nfs4write. Use dlistadd to add the
2326 * rnode to the list of rnodes needing
2327 * cleaning. We do not need to do reopen
2328 * here because recov_openfiles will do it.
2329 * In the non-recall case, just discard the
2330 * delegation as it is no longer valid.
2331 */
2332 if (recall)
2333 dr_flags = NFS4_DR_PUSH;
2334 else
2335 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD;
2336
2337 nfs4_dlistadd(rp, ncg, dr_flags);
2338 dr_flags = 0;
2339 } else {
2340 /*
2341 * Push the modified data back to the server,
2342 * reopen any delegation open streams, and return
2343 * the delegation. Drop the statev4_lock first!
2344 */
2345 dr_flags = NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN;
2346 }
2347 }
2348 mutex_exit(&rp->r_statev4_lock);
2349 if (dr_flags)
2350 (void) nfs4delegreturn_impl(rp, dr_flags, ncg);
2351 }
2352
2353 /*
2354 * nfs4delegabandon - Abandon the delegation on an rnode4. This code
2355 * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID
2356 * or BADSEQID and the recovery code is unable to recover. Push any
2357 * dirty data back to the server and return the delegation (if any).
2358 */
2359
2360 void
nfs4delegabandon(rnode4_t * rp)2361 nfs4delegabandon(rnode4_t *rp)
2362 {
2363 vnode_t *vp;
2364 struct cb_recall_pass *pp;
2365 open_delegation_type4 dt;
2366
2367 mutex_enter(&rp->r_statev4_lock);
2368 dt = rp->r_deleg_type;
2369 mutex_exit(&rp->r_statev4_lock);
2370
2371 if (dt == OPEN_DELEGATE_NONE)
2372 return;
2373
2374 vp = RTOV4(rp);
2375 VN_HOLD(vp);
2376
2377 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
2378 pp->rp = rp;
2379 /*
2380 * Recovery on the file has failed and we want to return
2381 * the delegation. We don't want to reopen files and
2382 * nfs4delegreturn_thread() figures out what to do about
2383 * the data. The only thing to do is attempt to return
2384 * the delegation.
2385 */
2386 pp->flags = 0;
2387 pp->truncate = FALSE;
2388
2389 /*
2390 * Fire up a thread to do the delegreturn; this is
2391 * necessary because we could be inside a GETPAGE or
2392 * PUTPAGE and we cannot do another one.
2393 */
2394
2395 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
2396 minclsyspri);
2397 }
2398
2399 static int
wait_for_recall1(vnode_t * vp,nfs4_op_hint_t op,nfs4_recov_state_t * rsp,int flg)2400 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp,
2401 int flg)
2402 {
2403 rnode4_t *rp;
2404 int error = 0;
2405
2406 #ifdef lint
2407 op = op;
2408 #endif
2409
2410 if (vp && vp->v_type == VREG) {
2411 rp = VTOR4(vp);
2412
2413 /*
2414 * Take r_deleg_recall_lock in read mode to synchronize
2415 * with delegreturn.
2416 */
2417 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock,
2418 RW_READER, INTR4(vp));
2419
2420 if (error == 0)
2421 rsp->rs_flags |= flg;
2422
2423 }
2424 return (error);
2425 }
2426
2427 void
nfs4_end_op_recall(vnode_t * vp1,vnode_t * vp2,nfs4_recov_state_t * rsp)2428 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp)
2429 {
2430 NFS4_DEBUG(nfs4_recall_debug,
2431 (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n",
2432 (void *)vp1, (void *)vp2));
2433
2434 if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2)
2435 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock);
2436 if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1)
2437 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2438 }
2439
2440 int
wait_for_recall(vnode_t * vp1,vnode_t * vp2,nfs4_op_hint_t op,nfs4_recov_state_t * rsp)2441 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op,
2442 nfs4_recov_state_t *rsp)
2443 {
2444 int error;
2445
2446 NFS4_DEBUG(nfs4_recall_debug,
2447 (CE_NOTE, "wait_for_recall: 0x%p, 0x%p\n",
2448 (void *)vp1, (void *) vp2));
2449
2450 rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2);
2451
2452 if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0)
2453 return (error);
2454
2455 if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2))
2456 != 0) {
2457 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) {
2458 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2459 rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1;
2460 }
2461
2462 return (error);
2463 }
2464
2465 return (0);
2466 }
2467
2468 /*
2469 * nfs4_dlistadd - Add this rnode to a list of rnodes to be
2470 * DELEGRETURN'd at the end of recovery.
2471 */
2472
2473 static void
nfs4_dlistadd(rnode4_t * rp,struct nfs4_callback_globals * ncg,int flags)2474 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags)
2475 {
2476 struct nfs4_dnode *dp;
2477
2478 ASSERT(mutex_owned(&rp->r_statev4_lock));
2479 /*
2480 * Mark the delegation as having a return pending.
2481 * This will prevent the use of the delegation stateID
2482 * by read, write, setattr and open.
2483 */
2484 rp->r_deleg_return_pending = TRUE;
2485 dp = kmem_alloc(sizeof (*dp), KM_SLEEP);
2486 VN_HOLD(RTOV4(rp));
2487 dp->rnodep = rp;
2488 dp->flags = flags;
2489 mutex_enter(&ncg->nfs4_dlist_lock);
2490 list_insert_head(&ncg->nfs4_dlist, dp);
2491 #ifdef DEBUG
2492 ncg->nfs4_dlistadd_c++;
2493 #endif
2494 mutex_exit(&ncg->nfs4_dlist_lock);
2495 }
2496
2497 /*
2498 * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list.
2499 * of files awaiting cleaning. If the override_flags are non-zero
2500 * then use them rather than the flags that were set when the rnode
2501 * was added to the dlist.
2502 */
2503 static void
nfs4_dlistclean_impl(struct nfs4_callback_globals * ncg,int override_flags)2504 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags)
2505 {
2506 rnode4_t *rp;
2507 struct nfs4_dnode *dp;
2508 int flags;
2509
2510 ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD);
2511
2512 mutex_enter(&ncg->nfs4_dlist_lock);
2513 while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) {
2514 #ifdef DEBUG
2515 ncg->nfs4_dlistclean_c++;
2516 #endif
2517 list_remove(&ncg->nfs4_dlist, dp);
2518 mutex_exit(&ncg->nfs4_dlist_lock);
2519 rp = dp->rnodep;
2520 flags = (override_flags != 0) ? override_flags : dp->flags;
2521 kmem_free(dp, sizeof (*dp));
2522 (void) nfs4delegreturn_impl(rp, flags, ncg);
2523 VN_RELE(RTOV4(rp));
2524 mutex_enter(&ncg->nfs4_dlist_lock);
2525 }
2526 mutex_exit(&ncg->nfs4_dlist_lock);
2527 }
2528
2529 void
nfs4_dlistclean(void)2530 nfs4_dlistclean(void)
2531 {
2532 struct nfs4_callback_globals *ncg;
2533
2534 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2535 ASSERT(ncg != NULL);
2536
2537 nfs4_dlistclean_impl(ncg, 0);
2538 }
2539
2540 nfs4_rcsync_t *
nfs4_recall_sync_start(mntinfo4_t * mi)2541 nfs4_recall_sync_start(mntinfo4_t *mi)
2542 {
2543 nfs4_server_t *sp = mi->mi_srv;
2544 nfs4_rcsync_t *rcp = kmem_zalloc(sizeof (*rcp), KM_SLEEP);
2545
2546 if (nfs4_server_vlock(sp, 0) == FALSE) {
2547 rcp->rs_flags = RS_SERVER_GONE;
2548 return rcp;
2549 }
2550 rcp->rs_mi = mi;
2551
2552 mutex_enter(&sp->s_rcsync_lock);
2553 rcp->rs_seq = sp->s_rcsync_seq++;
2554 list_insert_tail(&sp->s_rcsync_list, rcp);
2555 mutex_exit(&sp->s_rcsync_lock);
2556
2557 mutex_exit(&sp->s_lock);
2558 nfs4_server_rele(sp);
2559
2560 return rcp;
2561 }
2562
2563 void
nfs4_recall_sync_end(mntinfo4_t * mi,nfs4_rcsync_t * rcp)2564 nfs4_recall_sync_end(mntinfo4_t *mi, nfs4_rcsync_t *rcp)
2565 {
2566 nfs4_server_t *sp = mi->mi_srv;
2567
2568 if (nfs4_server_vlock(sp, 1) == FALSE) {
2569 ASSERT((rcp->rs_flags & RS_SERVER_GONE) != 0);
2570 kmem_free(rcp, sizeof (*rcp));
2571 return;
2572 }
2573
2574 mutex_enter(&sp->s_rcsync_lock);
2575 if ((rcp->rs_flags & RS_SERVER_GONE) == 0) {
2576 list_remove(&sp->s_rcsync_list, rcp);
2577 cv_broadcast(&sp->s_rcsync_cv);
2578 }
2579 mutex_exit(&sp->s_rcsync_lock);
2580 mutex_exit(&sp->s_lock);
2581 nfs4_server_rele(sp);
2582 kmem_free(rcp, sizeof (*rcp));
2583 }
2584
2585 static void
nfs4_recall_sync_wait(nfs4_server_t * sp)2586 nfs4_recall_sync_wait(nfs4_server_t *sp)
2587 {
2588 uint64_t seq;
2589
2590 mutex_enter(&sp->s_rcsync_lock);
2591 seq = sp->s_rcsync_seq;
2592
2593 while (!list_is_empty(&sp->s_rcsync_list)) {
2594 nfs4_rcsync_t *rcp = list_head(&sp->s_rcsync_list);
2595
2596 if (rcp->rs_seq >= seq)
2597 break;
2598
2599 cv_wait(&sp->s_rcsync_cv, &sp->s_rcsync_lock);
2600 }
2601
2602 mutex_exit(&sp->s_rcsync_lock);
2603 }
2604