1 /*
2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 * Authors: Doug Rabson <dfr@rabson.org>
4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
30 * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
31 * Copyright 2014 Joyent, Inc. All rights reserved.
32 */
33
34 /*
35 * NFS Lock Manager service functions (nlm_do_...)
36 * Called from nlm_rpc_svc.c wrappers.
37 *
38 * Source code derived from FreeBSD nlm_prot_impl.c
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/thread.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/mount.h>
47 #include <sys/priv.h>
48 #include <sys/proc.h>
49 #include <sys/share.h>
50 #include <sys/socket.h>
51 #include <sys/syscall.h>
52 #include <sys/syslog.h>
53 #include <sys/systm.h>
54 #include <sys/taskq.h>
55 #include <sys/unistd.h>
56 #include <sys/vnode.h>
57 #include <sys/vfs.h>
58 #include <sys/queue.h>
59 #include <sys/sdt.h>
60 #include <netinet/in.h>
61
62 #include <rpc/rpc.h>
63 #include <rpc/xdr.h>
64 #include <rpc/pmap_prot.h>
65 #include <rpc/pmap_clnt.h>
66 #include <rpc/rpcb_prot.h>
67
68 #include <rpcsvc/nlm_prot.h>
69 #include <rpcsvc/sm_inter.h>
70
71 #include <nfs/nfs.h>
72 #include <nfs/nfs_clnt.h>
73 #include <nfs/export.h>
74 #include <nfs/rnode.h>
75
76 #include "nlm_impl.h"
77
78 #define NLM_IN_GRACE(g) (ddi_get_lbolt() < (g)->grace_threshold)
79
80 struct nlm_block_cb_data {
81 struct nlm_host *hostp;
82 struct nlm_vhold *nvp;
83 struct flock64 *flp;
84 bool_t registered;
85 };
86
87 /*
88 * Invoke an asyncronous RPC callbeck
89 * (used when NLM server needs to reply to MSG NLM procedure).
90 */
91 #define NLM_INVOKE_CALLBACK(descr, rpcp, resp, callb) \
92 do { \
93 enum clnt_stat _stat; \
94 \
95 _stat = (*(callb))(resp, NULL, (rpcp)->nr_handle); \
96 if (_stat != RPC_SUCCESS && _stat != RPC_TIMEDOUT) { \
97 struct rpc_err _err; \
98 \
99 CLNT_GETERR((rpcp)->nr_handle, &_err); \
100 NLM_ERR("NLM: %s callback failed: " \
101 "stat %d, err %d\n", descr, _stat, \
102 _err.re_errno); \
103 } \
104 \
105 _NOTE(CONSTCOND) } while (0)
106
107 static void nlm_block(
108 nlm4_lockargs *lockargs,
109 struct nlm_host *host,
110 struct nlm_vhold *nvp,
111 struct flock64 *fl,
112 nlm_granted_cb grant_cb,
113 rpcvers_t);
114
115 static vnode_t *nlm_fh_to_vp(struct netobj *);
116 static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *);
117 static void nlm_init_shrlock(struct shrlock *, nlm4_share *, struct nlm_host *);
118 static callb_cpr_t *nlm_block_callback(flk_cb_when_t, void *);
119 static int nlm_vop_frlock(vnode_t *, int, flock64_t *, int, offset_t,
120 struct flk_callback *, cred_t *, caller_context_t *);
121
122 /*
123 * Convert a lock from network to local form, and
124 * check for valid range (no overflow).
125 */
126 static int
nlm_init_flock(struct flock64 * fl,struct nlm4_lock * nl,struct nlm_host * host,rpcvers_t vers,short type)127 nlm_init_flock(struct flock64 *fl, struct nlm4_lock *nl,
128 struct nlm_host *host, rpcvers_t vers, short type)
129 {
130 uint64_t off, len;
131
132 bzero(fl, sizeof (*fl));
133 off = nl->l_offset;
134 len = nl->l_len;
135
136 if (vers < NLM4_VERS) {
137 if (off > MAX_UOFF32 || len > MAX_UOFF32)
138 return (EINVAL);
139 if (off + len > MAX_UOFF32 + 1)
140 return (EINVAL);
141 } else {
142 /*
143 * Check range for 64-bit client (no overflow).
144 * Again allow len == ~0 to mean lock to EOF.
145 */
146 if (len == MAX_U_OFFSET_T)
147 len = 0;
148 if (len != 0 && off + (len - 1) < off)
149 return (EINVAL);
150 }
151
152 fl->l_type = type;
153 fl->l_whence = SEEK_SET;
154 fl->l_start = off;
155 fl->l_len = len;
156 fl->l_sysid = host->nh_sysid;
157 fl->l_pid = nl->svid;
158 /* l_pad */
159
160 return (0);
161 }
162
163 /*
164 * Convert an fhandle into a vnode.
165 * Uses the file id (fh_len + fh_data) in the fhandle to get the vnode.
166 * WARNING: users of this routine must do a VN_RELE on the vnode when they
167 * are done with it.
168 * This is just like nfs_fhtovp() but without the exportinfo argument.
169 */
170 static vnode_t *
lm_fhtovp(fhandle3_t * fh)171 lm_fhtovp(fhandle3_t *fh)
172 {
173 vfs_t *vfsp;
174 vnode_t *vp;
175 int error;
176
177 vfsp = getvfs(&fh->_fh3_fsid);
178 if (vfsp == NULL)
179 return (NULL);
180
181 /* LINTED E_BAD_PTR_CAST_ALIGN */
182 error = VFS_VGET(vfsp, &vp, (fid_t *)&(fh->_fh3_len));
183 VFS_RELE(vfsp);
184 if (error || vp == NULL)
185 return (NULL);
186
187 return (vp);
188 }
189
190 /*
191 * Gets vnode from client's filehandle
192 * NOTE: Holds vnode, it _must_ be explicitly
193 * released by VN_RELE().
194 */
195 static vnode_t *
nlm_fh_to_vp(struct netobj * fh)196 nlm_fh_to_vp(struct netobj *fh)
197 {
198 fhandle3_t *fhp;
199
200 /*
201 * Get a vnode pointer for the given NFS file handle.
202 * Note that it could be an NFSv2 or NFSv3 handle,
203 * which means the size might vary. (don't copy)
204 */
205 if (fh->n_len < sizeof (fhandle_t))
206 return (NULL);
207
208 /* We know this is aligned (kmem_alloc) */
209 /* LINTED E_BAD_PTR_CAST_ALIGN */
210 fhp = (fhandle3_t *)fh->n_bytes;
211
212 /*
213 * See the comment for NFS_FH3MAXDATA in uts/common/nfs/nfs.h for
214 * converting fhandles. Check the NFSv3 file handle size. The lockmgr
215 * is not used for NFS v4.
216 */
217 if (fhp->_fh3_len > NFS_FH3MAXDATA || fhp->_fh3_len == 0)
218 return (NULL);
219
220 return (lm_fhtovp(fhp));
221 }
222
223 /*
224 * Get vhold from client's filehandle, but in contrast to
225 * The function tries to check some access rights as well.
226 *
227 * NOTE: vhold object _must_ be explicitly released by
228 * nlm_vhold_release().
229 */
230 static struct nlm_vhold *
nlm_fh_to_vhold(struct nlm_host * hostp,struct netobj * fh)231 nlm_fh_to_vhold(struct nlm_host *hostp, struct netobj *fh)
232 {
233 vnode_t *vp;
234 struct nlm_vhold *nvp;
235
236 vp = nlm_fh_to_vp(fh);
237 if (vp == NULL)
238 return (NULL);
239
240
241 nvp = nlm_vhold_get(hostp, vp);
242
243 /*
244 * Both nlm_fh_to_vp() and nlm_vhold_get()
245 * do VN_HOLD(), so we need to drop one
246 * reference on vnode.
247 */
248 VN_RELE(vp);
249 return (nvp);
250 }
251
252 /* ******************************************************************* */
253
254 /*
255 * NLM implementation details, called from the RPC svc code.
256 */
257
258 /*
259 * Call-back from NFS statd, used to notify that one of our
260 * hosts had a status change. The host can be either an
261 * NFS client, NFS server or both.
262 * According to NSM protocol description, the state is a
263 * number that is increases monotonically each time the
264 * state of host changes. An even number indicates that
265 * the host is down, while an odd number indicates that
266 * the host is up.
267 *
268 * Here we ignore this even/odd difference of status number
269 * reported by the NSM, we launch notification handlers
270 * every time the state is changed. The reason we why do so
271 * is that client and server can talk to each other using
272 * connectionless transport and it's easy to lose packet
273 * containing NSM notification with status number update.
274 *
275 * In nlm_host_monitor(), we put the sysid in the private data
276 * that statd carries in this callback, so we can easliy find
277 * the host this call applies to.
278 */
279 /* ARGSUSED */
280 void
nlm_do_notify1(nlm_sm_status * argp,void * res,struct svc_req * sr)281 nlm_do_notify1(nlm_sm_status *argp, void *res, struct svc_req *sr)
282 {
283 struct nlm_globals *g;
284 struct nlm_host *host;
285 uint16_t sysid;
286
287 g = zone_getspecific(nlm_zone_key, curzone);
288 bcopy(&argp->priv, &sysid, sizeof (sysid));
289
290 DTRACE_PROBE2(nsm__notify, uint16_t, sysid,
291 int, argp->state);
292
293 host = nlm_host_find_by_sysid(g, (sysid_t)sysid);
294 if (host == NULL)
295 return;
296
297 nlm_host_notify_server(host, argp->state);
298 nlm_host_notify_client(host, argp->state);
299 nlm_host_release(g, host);
300 }
301
302 /*
303 * Another available call-back for NFS statd.
304 * Not currently used.
305 */
306 /* ARGSUSED */
307 void
nlm_do_notify2(nlm_sm_status * argp,void * res,struct svc_req * sr)308 nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr)
309 {
310 ASSERT(0);
311 }
312
313
314 /*
315 * NLM_TEST, NLM_TEST_MSG,
316 * NLM4_TEST, NLM4_TEST_MSG,
317 * Client inquiry about locks, non-blocking.
318 *
319 * Arg cb is NULL for NLM_TEST, NLM4_TEST, and
320 * non-NULL for NLM_TEST_MSG, NLM4_TEST_MSG
321 * The MSG forms use the cb to send the reply,
322 * and don't return a reply for this call.
323 */
324 void
nlm_do_test(nlm4_testargs * argp,nlm4_testres * resp,struct svc_req * sr,nlm_testres_cb cb)325 nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp,
326 struct svc_req *sr, nlm_testres_cb cb)
327 {
328 struct nlm_globals *g;
329 struct nlm_host *host;
330 struct nlm4_holder *lh;
331 struct nlm_owner_handle *oh;
332 nlm_rpc_t *rpcp = NULL;
333 vnode_t *vp = NULL;
334 struct netbuf *addr;
335 char *netid;
336 char *name;
337 int error;
338 struct flock64 fl;
339
340 nlm_copy_netobj(&resp->cookie, &argp->cookie);
341
342 name = argp->alock.caller_name;
343 netid = svc_getnetid(sr->rq_xprt);
344 addr = svc_getrpccaller(sr->rq_xprt);
345
346 g = zone_getspecific(nlm_zone_key, curzone);
347 host = nlm_host_findcreate(g, name, netid, addr);
348 if (host == NULL) {
349 resp->stat.stat = nlm4_denied_nolocks;
350 return;
351 }
352 if (cb != NULL) {
353 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
354 if (error != 0) {
355 resp->stat.stat = nlm4_denied_nolocks;
356 goto out;
357 }
358 }
359
360 vp = nlm_fh_to_vp(&argp->alock.fh);
361 if (vp == NULL) {
362 resp->stat.stat = nlm4_stale_fh;
363 goto out;
364 }
365
366 if (NLM_IN_GRACE(g)) {
367 resp->stat.stat = nlm4_denied_grace_period;
368 goto out;
369 }
370
371 /* Convert to local form. */
372 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
373 (argp->exclusive) ? F_WRLCK : F_RDLCK);
374 if (error) {
375 resp->stat.stat = nlm4_failed;
376 goto out;
377 }
378
379 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_GETLK, &fl, F_REMOTE); */
380 error = nlm_vop_frlock(vp, F_GETLK, &fl,
381 F_REMOTELOCK | FREAD | FWRITE,
382 (u_offset_t)0, NULL, CRED(), NULL);
383 if (error) {
384 resp->stat.stat = nlm4_failed;
385 goto out;
386 }
387
388 if (fl.l_type == F_UNLCK) {
389 resp->stat.stat = nlm4_granted;
390 goto out;
391 }
392 resp->stat.stat = nlm4_denied;
393
394 /*
395 * This lock "test" fails due to a conflicting lock.
396 *
397 * If this is a v1 client, make sure the conflicting
398 * lock range we report can be expressed with 32-bit
399 * offsets. The lock range requested was expressed
400 * as 32-bit offset and length, so at least part of
401 * the conflicting lock should lie below MAX_UOFF32.
402 * If the conflicting lock extends past that, we'll
403 * trim the range to end at MAX_UOFF32 so this lock
404 * can be represented in a 32-bit response. Check
405 * the start also (paranoid, but a low cost check).
406 */
407 if (sr->rq_vers < NLM4_VERS) {
408 uint64 maxlen;
409 if (fl.l_start > MAX_UOFF32)
410 fl.l_start = MAX_UOFF32;
411 maxlen = MAX_UOFF32 + 1 - fl.l_start;
412 if (fl.l_len > maxlen)
413 fl.l_len = maxlen;
414 }
415
416 /*
417 * Build the nlm4_holder result structure.
418 *
419 * Note that lh->oh is freed via xdr_free,
420 * xdr_nlm4_holder, xdr_netobj, xdr_bytes.
421 */
422 oh = kmem_zalloc(sizeof (*oh), KM_SLEEP);
423 oh->oh_sysid = (sysid_t)fl.l_sysid;
424 lh = &resp->stat.nlm4_testrply_u.holder;
425 lh->exclusive = (fl.l_type == F_WRLCK);
426 lh->svid = fl.l_pid;
427 lh->oh.n_len = sizeof (*oh);
428 lh->oh.n_bytes = (void *)oh;
429 lh->l_offset = fl.l_start;
430 lh->l_len = fl.l_len;
431
432 out:
433 /*
434 * If we have a callback function, use that to
435 * deliver the response via another RPC call.
436 */
437 if (cb != NULL && rpcp != NULL)
438 NLM_INVOKE_CALLBACK("test", rpcp, resp, cb);
439
440 if (vp != NULL)
441 VN_RELE(vp);
442 if (rpcp != NULL)
443 nlm_host_rele_rpc(host, rpcp);
444
445 nlm_host_release(g, host);
446 }
447
448 /*
449 * NLM_LOCK, NLM_LOCK_MSG, NLM_NM_LOCK
450 * NLM4_LOCK, NLM4_LOCK_MSG, NLM4_NM_LOCK
451 *
452 * Client request to set a lock, possibly blocking.
453 *
454 * If the lock needs to block, we return status blocked to
455 * this RPC call, and then later call back the client with
456 * a "granted" callback. Tricky aspects of this include:
457 * sending a reply before this function returns, and then
458 * borrowing this thread from the RPC service pool for the
459 * wait on the lock and doing the later granted callback.
460 *
461 * We also have to keep a list of locks (pending + granted)
462 * both to handle retransmitted requests, and to keep the
463 * vnodes for those locks active.
464 *
465 * Callback arguments:
466 * reply_cb Used to send a normal RPC reply just as if
467 * we had filled in a response for our caller.
468 * Needed because we do work after the reply.
469 * res_cb Used for the MSG calls, where there's no
470 * regular RPC response.
471 * grant_cb Used to CALL the client informing them of a
472 * granted lock after a "blocked" reply.
473 */
474 void
nlm_do_lock(nlm4_lockargs * argp,nlm4_res * resp,struct svc_req * sr,nlm_reply_cb reply_cb,nlm_res_cb res_cb,nlm_granted_cb grant_cb)475 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr,
476 nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_granted_cb grant_cb)
477 {
478 struct nlm_globals *g;
479 struct flock64 fl;
480 struct nlm_host *host = NULL;
481 struct netbuf *addr;
482 struct nlm_vhold *nvp = NULL;
483 nlm_rpc_t *rpcp = NULL;
484 char *netid;
485 char *name;
486 int error, flags;
487 bool_t do_blocking = FALSE;
488 bool_t do_mon_req = FALSE;
489 enum nlm4_stats status;
490
491 nlm_copy_netobj(&resp->cookie, &argp->cookie);
492
493 name = argp->alock.caller_name;
494 netid = svc_getnetid(sr->rq_xprt);
495 addr = svc_getrpccaller(sr->rq_xprt);
496
497 g = zone_getspecific(nlm_zone_key, curzone);
498 host = nlm_host_findcreate(g, name, netid, addr);
499 if (host == NULL) {
500 DTRACE_PROBE4(no__host, struct nlm_globals *, g,
501 char *, name, char *, netid, struct netbuf *, addr);
502 status = nlm4_denied_nolocks;
503 goto doreply;
504 }
505
506 DTRACE_PROBE3(start, struct nlm_globals *, g,
507 struct nlm_host *, host, nlm4_lockargs *, argp);
508
509 /*
510 * If this is a MSG call (NLM_LOCK_MSG, NLM4_LOCK_MSG)
511 * we'll have res_cb != NULL, and we know we'll need an
512 * RPC client handle _now_ so we can send the response.
513 * If we can't get an rpc handle (rpcp) then we have
514 * no way to respond, and the client will time out.
515 */
516 if (res_cb != NULL) {
517 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
518 if (error != 0) {
519 ASSERT(rpcp == NULL);
520 status = nlm4_denied_nolocks;
521 goto out;
522 }
523 }
524
525 /*
526 * During the "grace period", only allow reclaim.
527 */
528 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
529 status = nlm4_denied_grace_period;
530 goto doreply;
531 }
532
533 /*
534 * Check whether we missed host shutdown event
535 */
536 if (nlm_host_get_state(host) != argp->state)
537 nlm_host_notify_server(host, argp->state);
538
539 /*
540 * Get a hold on the vnode for a lock operation.
541 * Only lock() and share() need vhold objects.
542 */
543 nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
544 if (nvp == NULL) {
545 status = nlm4_stale_fh;
546 goto doreply;
547 }
548
549 /* Convert to local form. */
550 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
551 (argp->exclusive) ? F_WRLCK : F_RDLCK);
552 if (error) {
553 status = nlm4_failed;
554 goto doreply;
555 }
556
557 /*
558 * Try to lock non-blocking first. If we succeed
559 * getting the lock, we can reply with the granted
560 * status directly and avoid the complications of
561 * making the "granted" RPC callback later.
562 *
563 * This also let's us find out now about some
564 * possible errors like EROFS, etc.
565 */
566 flags = F_REMOTELOCK | FREAD | FWRITE;
567 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, flags,
568 (u_offset_t)0, NULL, CRED(), NULL);
569
570 DTRACE_PROBE3(setlk__res, struct flock64 *, &fl,
571 int, flags, int, error);
572
573 switch (error) {
574 case 0:
575 /* Got it without waiting! */
576 status = nlm4_granted;
577 do_mon_req = TRUE;
578 break;
579
580 /* EINPROGRESS too? */
581 case EAGAIN:
582 /* We did not get the lock. Should we block? */
583 if (argp->block == FALSE || grant_cb == NULL) {
584 status = nlm4_denied;
585 break;
586 }
587 /*
588 * Should block. Try to reserve this thread
589 * so we can use it to wait for the lock and
590 * later send the granted message. If this
591 * reservation fails, say "no resources".
592 */
593 if (!svc_reserve_thread(sr->rq_xprt)) {
594 status = nlm4_denied_nolocks;
595 break;
596 }
597 /*
598 * OK, can detach this thread, so this call
599 * will block below (after we reply).
600 * The "blocked" reply tells the client to
601 * expect a "granted" call-back later.
602 */
603 status = nlm4_blocked;
604 do_blocking = TRUE;
605 do_mon_req = TRUE;
606 break;
607
608 case ENOLCK:
609 /* Failed for lack of resources. */
610 status = nlm4_denied_nolocks;
611 break;
612
613 case EROFS:
614 /* read-only file system */
615 status = nlm4_rofs;
616 break;
617
618 case EFBIG:
619 /* file too big */
620 status = nlm4_fbig;
621 break;
622
623 case EDEADLK:
624 /* dead lock condition */
625 status = nlm4_deadlck;
626 break;
627
628 default:
629 status = nlm4_denied;
630 break;
631 }
632
633 doreply:
634 resp->stat.stat = status;
635
636 /*
637 * We get one of two function pointers; one for a
638 * normal RPC reply, and another for doing an RPC
639 * "callback" _res reply for a _msg function.
640 * Use either of those to send the reply now.
641 *
642 * If sending this reply fails, just leave the
643 * lock in the list for retransmitted requests.
644 * Cleanup is via unlock or host rele (statmon).
645 */
646 if (reply_cb != NULL) {
647 /* i.e. nlm_lock_1_reply */
648 if (!(*reply_cb)(sr->rq_xprt, resp))
649 svcerr_systemerr(sr->rq_xprt);
650 }
651 if (res_cb != NULL && rpcp != NULL)
652 NLM_INVOKE_CALLBACK("lock", rpcp, resp, res_cb);
653
654 /*
655 * The reply has been sent to the client.
656 * Start monitoring this client (maybe).
657 *
658 * Note that the non-monitored (NM) calls pass grant_cb=NULL
659 * indicating that the client doesn't support RPC callbacks.
660 * No monitoring for these (lame) clients.
661 */
662 if (do_mon_req && grant_cb != NULL)
663 nlm_host_monitor(g, host, argp->state);
664
665 if (do_blocking) {
666 /*
667 * We need to block on this lock, and when that
668 * completes, do the granted RPC call. Note that
669 * we "reserved" this thread above, so we can now
670 * "detach" it from the RPC SVC pool, allowing it
671 * to block indefinitely if needed.
672 */
673 ASSERT(grant_cb != NULL);
674 (void) svc_detach_thread(sr->rq_xprt);
675 nlm_block(argp, host, nvp, &fl, grant_cb, sr->rq_vers);
676 }
677
678 out:
679 DTRACE_PROBE3(lock__end, struct nlm_globals *, g,
680 struct nlm_host *, host, nlm4_res *, resp);
681
682 if (rpcp != NULL)
683 nlm_host_rele_rpc(host, rpcp);
684
685 nlm_vhold_release(host, nvp);
686 nlm_host_release(g, host);
687 }
688
689 /*
690 * Helper for nlm_do_lock(), partly for observability,
691 * (we'll see a call blocked in this function) and
692 * because nlm_do_lock() was getting quite long.
693 */
694 static void
nlm_block(nlm4_lockargs * lockargs,struct nlm_host * host,struct nlm_vhold * nvp,struct flock64 * flp,nlm_granted_cb grant_cb,rpcvers_t vers)695 nlm_block(nlm4_lockargs *lockargs,
696 struct nlm_host *host,
697 struct nlm_vhold *nvp,
698 struct flock64 *flp,
699 nlm_granted_cb grant_cb,
700 rpcvers_t vers)
701 {
702 nlm4_testargs args;
703 nlm4_res res;
704 int error;
705 flk_callback_t flk_cb;
706 struct nlm_block_cb_data cb_data;
707 nlm_rpc_t *rpcp = NULL;
708 enum clnt_stat status;
709
710 /*
711 * Keep a list of blocked locks on nh_pending, and use it
712 * to cancel these threads in nlm_destroy_client_pending.
713 *
714 * Check to see if this lock is already in the list. If so,
715 * some earlier call is already blocked getting this lock,
716 * so there's nothing more this call needs to do.
717 */
718 error = nlm_slreq_register(host, nvp, flp);
719 if (error != 0) {
720 /*
721 * Sleeping lock request with given fl is already
722 * registered by someone else. This means that
723 * some other thread is handling the request, let
724 * it do its work.
725 */
726 ASSERT(error == EEXIST);
727 return;
728 }
729
730 /*
731 * Make sure we can get an RPC client handle we can use to
732 * deliver the "granted" callback if/when we get the lock.
733 * If we can't, there's no point blocking to get the lock
734 * for them because they'll never find out about it.
735 */
736 error = nlm_host_get_rpc(host, vers, &rpcp);
737 if (error != 0) {
738 (void) nlm_slreq_unregister(host, nvp, flp);
739 return;
740 }
741
742 cb_data.hostp = host;
743 cb_data.nvp = nvp;
744 cb_data.flp = flp;
745 cb_data.registered = TRUE;
746 flk_init_callback(&flk_cb, nlm_block_callback, &cb_data);
747
748 /* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */
749 error = nlm_vop_frlock(nvp->nv_vp, F_SETLKW, flp,
750 F_REMOTELOCK | FREAD | FWRITE,
751 (u_offset_t)0, &flk_cb, CRED(), NULL);
752
753 /*
754 * If the nlm_block_callback didn't already do it...
755 */
756 if (cb_data.registered)
757 (void) nlm_slreq_unregister(host, nvp, flp);
758
759 if (error != 0) {
760 /*
761 * We failed getting the lock, but have no way to
762 * tell the client about that. Let 'em time out.
763 */
764 return;
765 }
766 /*
767 * ... else we got the lock on behalf of this client.
768 *
769 * We MUST either tell the client about this lock
770 * (via the "granted" callback RPC) or unlock.
771 *
772 * Do the "granted" call-back to the client.
773 */
774 bzero(&args, sizeof (args));
775 args.cookie = lockargs->cookie;
776 args.exclusive = lockargs->exclusive;
777 args.alock = lockargs->alock;
778 bzero(&res, sizeof (res));
779
780 /*
781 * Not using the NLM_INVOKE_CALLBACK() macro because
782 * we need to take actions on errors.
783 */
784 status = (*grant_cb)(&args, &res, (rpcp)->nr_handle);
785 if (status != RPC_SUCCESS) {
786 struct rpc_err err;
787
788 CLNT_GETERR((rpcp)->nr_handle, &err);
789 NLM_ERR("NLM: %s callback failed: "
790 "stat %d, err %d\n", "grant", status,
791 err.re_errno);
792 res.stat.stat = nlm4_failed;
793 }
794 if (res.stat.stat != nlm4_granted) {
795 /*
796 * Failed to deliver the granted callback, so
797 * the client doesn't know about this lock.
798 * Unlock the lock. The client will time out.
799 */
800 (void) nlm_vop_frlock(nvp->nv_vp, F_UNLCK, flp,
801 F_REMOTELOCK | FREAD | FWRITE,
802 (u_offset_t)0, NULL, CRED(), NULL);
803 }
804 xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
805
806 nlm_host_rele_rpc(host, rpcp);
807 }
808
809 /*
810 * The function that is used as flk callback when NLM server
811 * sets new sleeping lock. The function unregisters NLM
812 * sleeping lock request (nlm_slreq) associated with the
813 * sleeping lock _before_ lock becomes active. It prevents
814 * potential race condition between nlm_block() and
815 * nlm_do_cancel().
816 */
817 static callb_cpr_t *
nlm_block_callback(flk_cb_when_t when,void * data)818 nlm_block_callback(flk_cb_when_t when, void *data)
819 {
820 struct nlm_block_cb_data *cb_data;
821
822 cb_data = (struct nlm_block_cb_data *)data;
823 if (when == FLK_AFTER_SLEEP) {
824 (void) nlm_slreq_unregister(cb_data->hostp,
825 cb_data->nvp, cb_data->flp);
826 cb_data->registered = FALSE;
827 }
828
829 return (0);
830 }
831
832 /*
833 * NLM_CANCEL, NLM_CANCEL_MSG,
834 * NLM4_CANCEL, NLM4_CANCEL_MSG,
835 * Client gives up waiting for a blocking lock.
836 */
837 void
nlm_do_cancel(nlm4_cancargs * argp,nlm4_res * resp,struct svc_req * sr,nlm_res_cb cb)838 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *resp,
839 struct svc_req *sr, nlm_res_cb cb)
840 {
841 struct nlm_globals *g;
842 struct nlm_host *host;
843 struct netbuf *addr;
844 struct nlm_vhold *nvp = NULL;
845 nlm_rpc_t *rpcp = NULL;
846 char *netid;
847 char *name;
848 int error;
849 struct flock64 fl;
850
851 nlm_copy_netobj(&resp->cookie, &argp->cookie);
852 netid = svc_getnetid(sr->rq_xprt);
853 addr = svc_getrpccaller(sr->rq_xprt);
854 name = argp->alock.caller_name;
855
856 g = zone_getspecific(nlm_zone_key, curzone);
857 host = nlm_host_findcreate(g, name, netid, addr);
858 if (host == NULL) {
859 resp->stat.stat = nlm4_denied_nolocks;
860 return;
861 }
862 if (cb != NULL) {
863 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
864 if (error != 0) {
865 resp->stat.stat = nlm4_denied_nolocks;
866 goto out;
867 }
868 }
869
870 DTRACE_PROBE3(start, struct nlm_globals *, g,
871 struct nlm_host *, host, nlm4_cancargs *, argp);
872
873 if (NLM_IN_GRACE(g)) {
874 resp->stat.stat = nlm4_denied_grace_period;
875 goto out;
876 }
877
878 nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
879 if (nvp == NULL) {
880 resp->stat.stat = nlm4_stale_fh;
881 goto out;
882 }
883
884 /* Convert to local form. */
885 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
886 (argp->exclusive) ? F_WRLCK : F_RDLCK);
887 if (error) {
888 resp->stat.stat = nlm4_failed;
889 goto out;
890 }
891
892 error = nlm_slreq_unregister(host, nvp, &fl);
893 if (error != 0) {
894 /*
895 * There's no sleeping lock request corresponding
896 * to the lock. Then requested sleeping lock
897 * doesn't exist.
898 */
899 resp->stat.stat = nlm4_denied;
900 goto out;
901 }
902
903 fl.l_type = F_UNLCK;
904 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl,
905 F_REMOTELOCK | FREAD | FWRITE,
906 (u_offset_t)0, NULL, CRED(), NULL);
907
908 resp->stat.stat = (error == 0) ?
909 nlm4_granted : nlm4_denied;
910
911 out:
912 /*
913 * If we have a callback function, use that to
914 * deliver the response via another RPC call.
915 */
916 if (cb != NULL && rpcp != NULL)
917 NLM_INVOKE_CALLBACK("cancel", rpcp, resp, cb);
918
919 DTRACE_PROBE3(cancel__end, struct nlm_globals *, g,
920 struct nlm_host *, host, nlm4_res *, resp);
921
922 if (rpcp != NULL)
923 nlm_host_rele_rpc(host, rpcp);
924
925 nlm_vhold_release(host, nvp);
926 nlm_host_release(g, host);
927 }
928
929 /*
930 * NLM_UNLOCK, NLM_UNLOCK_MSG,
931 * NLM4_UNLOCK, NLM4_UNLOCK_MSG,
932 * Client removes one of their locks.
933 */
934 void
nlm_do_unlock(nlm4_unlockargs * argp,nlm4_res * resp,struct svc_req * sr,nlm_res_cb cb)935 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *resp,
936 struct svc_req *sr, nlm_res_cb cb)
937 {
938 struct nlm_globals *g;
939 struct nlm_host *host;
940 struct netbuf *addr;
941 nlm_rpc_t *rpcp = NULL;
942 vnode_t *vp = NULL;
943 char *netid;
944 char *name;
945 int error;
946 struct flock64 fl;
947
948 nlm_copy_netobj(&resp->cookie, &argp->cookie);
949
950 netid = svc_getnetid(sr->rq_xprt);
951 addr = svc_getrpccaller(sr->rq_xprt);
952 name = argp->alock.caller_name;
953
954 /*
955 * NLM_UNLOCK operation doesn't have an error code
956 * denoting that operation failed, so we always
957 * return nlm4_granted except when the server is
958 * in a grace period.
959 */
960 resp->stat.stat = nlm4_granted;
961
962 g = zone_getspecific(nlm_zone_key, curzone);
963 host = nlm_host_findcreate(g, name, netid, addr);
964 if (host == NULL)
965 return;
966
967 if (cb != NULL) {
968 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
969 if (error != 0)
970 goto out;
971 }
972
973 DTRACE_PROBE3(start, struct nlm_globals *, g,
974 struct nlm_host *, host, nlm4_unlockargs *, argp);
975
976 if (NLM_IN_GRACE(g)) {
977 resp->stat.stat = nlm4_denied_grace_period;
978 goto out;
979 }
980
981 vp = nlm_fh_to_vp(&argp->alock.fh);
982 if (vp == NULL)
983 goto out;
984
985 /* Convert to local form. */
986 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, F_UNLCK);
987 if (error)
988 goto out;
989
990 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_UNLCK, &fl, F_REMOTE); */
991 error = nlm_vop_frlock(vp, F_SETLK, &fl,
992 F_REMOTELOCK | FREAD | FWRITE,
993 (u_offset_t)0, NULL, CRED(), NULL);
994
995 DTRACE_PROBE1(unlock__res, int, error);
996 out:
997 /*
998 * If we have a callback function, use that to
999 * deliver the response via another RPC call.
1000 */
1001 if (cb != NULL && rpcp != NULL)
1002 NLM_INVOKE_CALLBACK("unlock", rpcp, resp, cb);
1003
1004 DTRACE_PROBE3(unlock__end, struct nlm_globals *, g,
1005 struct nlm_host *, host, nlm4_res *, resp);
1006
1007 if (vp != NULL)
1008 VN_RELE(vp);
1009 if (rpcp != NULL)
1010 nlm_host_rele_rpc(host, rpcp);
1011
1012 nlm_host_release(g, host);
1013 }
1014
1015 /*
1016 * NLM_GRANTED, NLM_GRANTED_MSG,
1017 * NLM4_GRANTED, NLM4_GRANTED_MSG,
1018 *
1019 * This service routine is special. It's the only one that's
1020 * really part of our NLM _client_ support, used by _servers_
1021 * to "call back" when a blocking lock from this NLM client
1022 * is granted by the server. In this case, we _know_ there is
1023 * already an nlm_host allocated and held by the client code.
1024 * We want to find that nlm_host here.
1025 *
1026 * Over in nlm_call_lock(), the client encoded the sysid for this
1027 * server in the "owner handle" netbuf sent with our lock request.
1028 * We can now use that to find the nlm_host object we used there.
1029 * (NB: The owner handle is opaque to the server.)
1030 */
1031 void
nlm_do_granted(nlm4_testargs * argp,nlm4_res * resp,struct svc_req * sr,nlm_res_cb cb)1032 nlm_do_granted(nlm4_testargs *argp, nlm4_res *resp,
1033 struct svc_req *sr, nlm_res_cb cb)
1034 {
1035 struct nlm_globals *g;
1036 struct nlm_owner_handle *oh;
1037 struct nlm_host *host;
1038 nlm_rpc_t *rpcp = NULL;
1039 int error;
1040
1041 nlm_copy_netobj(&resp->cookie, &argp->cookie);
1042 resp->stat.stat = nlm4_denied;
1043
1044 g = zone_getspecific(nlm_zone_key, curzone);
1045 oh = (void *) argp->alock.oh.n_bytes;
1046 if (oh == NULL)
1047 return;
1048
1049 host = nlm_host_find_by_sysid(g, oh->oh_sysid);
1050 if (host == NULL)
1051 return;
1052
1053 if (cb != NULL) {
1054 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
1055 if (error != 0)
1056 goto out;
1057 }
1058
1059 if (NLM_IN_GRACE(g)) {
1060 resp->stat.stat = nlm4_denied_grace_period;
1061 goto out;
1062 }
1063
1064 error = nlm_slock_grant(g, host, &argp->alock);
1065 if (error == 0)
1066 resp->stat.stat = nlm4_granted;
1067
1068 out:
1069 /*
1070 * If we have a callback function, use that to
1071 * deliver the response via another RPC call.
1072 */
1073 if (cb != NULL && rpcp != NULL)
1074 NLM_INVOKE_CALLBACK("do_granted", rpcp, resp, cb);
1075
1076 if (rpcp != NULL)
1077 nlm_host_rele_rpc(host, rpcp);
1078
1079 nlm_host_release(g, host);
1080 }
1081
1082 /*
1083 * NLM_FREE_ALL, NLM4_FREE_ALL
1084 *
1085 * Destroy all lock state for the calling client.
1086 */
1087 void
nlm_do_free_all(nlm4_notify * argp,void * res,struct svc_req * sr)1088 nlm_do_free_all(nlm4_notify *argp, void *res, struct svc_req *sr)
1089 {
1090 struct nlm_globals *g;
1091 struct nlm_host_list host_list;
1092 struct nlm_host *hostp;
1093
1094 TAILQ_INIT(&host_list);
1095 g = zone_getspecific(nlm_zone_key, curzone);
1096
1097 /* Serialize calls to clean locks. */
1098 mutex_enter(&g->clean_lock);
1099
1100 /*
1101 * Find all hosts that have the given node name and put them on a
1102 * local list.
1103 */
1104 mutex_enter(&g->lock);
1105 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
1106 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
1107 if (strcasecmp(hostp->nh_name, argp->name) == 0) {
1108 /*
1109 * If needed take the host out of the idle list since
1110 * we are taking a reference.
1111 */
1112 if (hostp->nh_flags & NLM_NH_INIDLE) {
1113 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp,
1114 nh_link);
1115 hostp->nh_flags &= ~NLM_NH_INIDLE;
1116 }
1117 hostp->nh_refs++;
1118
1119 TAILQ_INSERT_TAIL(&host_list, hostp, nh_link);
1120 }
1121 }
1122 mutex_exit(&g->lock);
1123
1124 /* Free locks for all hosts on the local list. */
1125 while (!TAILQ_EMPTY(&host_list)) {
1126 hostp = TAILQ_FIRST(&host_list);
1127 TAILQ_REMOVE(&host_list, hostp, nh_link);
1128
1129 /*
1130 * Note that this does not do client-side cleanup.
1131 * We want to do that ONLY if statd tells us the
1132 * server has restarted.
1133 */
1134 nlm_host_notify_server(hostp, argp->state);
1135 nlm_host_release(g, hostp);
1136 }
1137
1138 mutex_exit(&g->clean_lock);
1139
1140 (void) res;
1141 (void) sr;
1142 }
1143
1144 static void
nlm_init_shrlock(struct shrlock * shr,nlm4_share * nshare,struct nlm_host * host)1145 nlm_init_shrlock(struct shrlock *shr,
1146 nlm4_share *nshare, struct nlm_host *host)
1147 {
1148
1149 switch (nshare->access) {
1150 default:
1151 case fsa_NONE:
1152 shr->s_access = 0;
1153 break;
1154 case fsa_R:
1155 shr->s_access = F_RDACC;
1156 break;
1157 case fsa_W:
1158 shr->s_access = F_WRACC;
1159 break;
1160 case fsa_RW:
1161 shr->s_access = F_RWACC;
1162 break;
1163 }
1164
1165 switch (nshare->mode) {
1166 default:
1167 case fsm_DN:
1168 shr->s_deny = F_NODNY;
1169 break;
1170 case fsm_DR:
1171 shr->s_deny = F_RDDNY;
1172 break;
1173 case fsm_DW:
1174 shr->s_deny = F_WRDNY;
1175 break;
1176 case fsm_DRW:
1177 shr->s_deny = F_RWDNY;
1178 break;
1179 }
1180
1181 shr->s_sysid = host->nh_sysid;
1182 shr->s_pid = 0;
1183 shr->s_own_len = nshare->oh.n_len;
1184 shr->s_owner = nshare->oh.n_bytes;
1185 }
1186
1187 /*
1188 * NLM_SHARE, NLM4_SHARE
1189 *
1190 * Request a DOS-style share reservation
1191 */
1192 void
nlm_do_share(nlm4_shareargs * argp,nlm4_shareres * resp,struct svc_req * sr)1193 nlm_do_share(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1194 {
1195 struct nlm_globals *g;
1196 struct nlm_host *host;
1197 struct netbuf *addr;
1198 struct nlm_vhold *nvp = NULL;
1199 char *netid;
1200 char *name;
1201 int error;
1202 struct shrlock shr;
1203
1204 nlm_copy_netobj(&resp->cookie, &argp->cookie);
1205
1206 name = argp->share.caller_name;
1207 netid = svc_getnetid(sr->rq_xprt);
1208 addr = svc_getrpccaller(sr->rq_xprt);
1209
1210 g = zone_getspecific(nlm_zone_key, curzone);
1211 host = nlm_host_findcreate(g, name, netid, addr);
1212 if (host == NULL) {
1213 resp->stat = nlm4_denied_nolocks;
1214 return;
1215 }
1216
1217 DTRACE_PROBE3(share__start, struct nlm_globals *, g,
1218 struct nlm_host *, host, nlm4_shareargs *, argp);
1219
1220 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
1221 resp->stat = nlm4_denied_grace_period;
1222 goto out;
1223 }
1224
1225 /*
1226 * Get holded vnode when on lock operation.
1227 * Only lock() and share() need vhold objects.
1228 */
1229 nvp = nlm_fh_to_vhold(host, &argp->share.fh);
1230 if (nvp == NULL) {
1231 resp->stat = nlm4_stale_fh;
1232 goto out;
1233 }
1234
1235 /* Convert to local form. */
1236 nlm_init_shrlock(&shr, &argp->share, host);
1237 error = VOP_SHRLOCK(nvp->nv_vp, F_SHARE, &shr,
1238 FREAD | FWRITE, CRED(), NULL);
1239
1240 if (error == 0) {
1241 resp->stat = nlm4_granted;
1242 nlm_host_monitor(g, host, 0);
1243 } else {
1244 resp->stat = nlm4_denied;
1245 }
1246
1247 out:
1248 DTRACE_PROBE3(share__end, struct nlm_globals *, g,
1249 struct nlm_host *, host, nlm4_shareres *, resp);
1250
1251 nlm_vhold_release(host, nvp);
1252 nlm_host_release(g, host);
1253 }
1254
1255 /*
1256 * NLM_UNSHARE, NLM4_UNSHARE
1257 *
1258 * Release a DOS-style share reservation
1259 */
1260 void
nlm_do_unshare(nlm4_shareargs * argp,nlm4_shareres * resp,struct svc_req * sr)1261 nlm_do_unshare(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1262 {
1263 struct nlm_globals *g;
1264 struct nlm_host *host;
1265 struct netbuf *addr;
1266 vnode_t *vp = NULL;
1267 char *netid;
1268 int error;
1269 struct shrlock shr;
1270
1271 nlm_copy_netobj(&resp->cookie, &argp->cookie);
1272
1273 netid = svc_getnetid(sr->rq_xprt);
1274 addr = svc_getrpccaller(sr->rq_xprt);
1275
1276 g = zone_getspecific(nlm_zone_key, curzone);
1277 host = nlm_host_find(g, netid, addr);
1278 if (host == NULL) {
1279 resp->stat = nlm4_denied_nolocks;
1280 return;
1281 }
1282
1283 DTRACE_PROBE3(unshare__start, struct nlm_globals *, g,
1284 struct nlm_host *, host, nlm4_shareargs *, argp);
1285
1286 if (NLM_IN_GRACE(g)) {
1287 resp->stat = nlm4_denied_grace_period;
1288 goto out;
1289 }
1290
1291 vp = nlm_fh_to_vp(&argp->share.fh);
1292 if (vp == NULL) {
1293 resp->stat = nlm4_stale_fh;
1294 goto out;
1295 }
1296
1297 /* Convert to local form. */
1298 nlm_init_shrlock(&shr, &argp->share, host);
1299 error = VOP_SHRLOCK(vp, F_UNSHARE, &shr,
1300 FREAD | FWRITE, CRED(), NULL);
1301
1302 (void) error;
1303 resp->stat = nlm4_granted;
1304
1305 out:
1306 DTRACE_PROBE3(unshare__end, struct nlm_globals *, g,
1307 struct nlm_host *, host, nlm4_shareres *, resp);
1308
1309 if (vp != NULL)
1310 VN_RELE(vp);
1311
1312 nlm_host_release(g, host);
1313 }
1314
1315 /*
1316 * NLM wrapper to VOP_FRLOCK that checks the validity of the lock before
1317 * invoking the vnode operation.
1318 */
1319 static int
nlm_vop_frlock(vnode_t * vp,int cmd,flock64_t * bfp,int flag,offset_t offset,struct flk_callback * flk_cbp,cred_t * cr,caller_context_t * ct)1320 nlm_vop_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
1321 struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct)
1322 {
1323 if (bfp->l_len != 0 && bfp->l_start + (bfp->l_len - 1) < bfp->l_start) {
1324 return (EOVERFLOW);
1325 }
1326
1327 return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1328 }
1329