1 /*
2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 * Authors: Doug Rabson <dfr@rabson.org>
4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Copyright (c) 2012 by Delphix. All rights reserved.
30 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
31 * Copyright 2014 Joyent, Inc. All rights reserved.
32 */
33
34 /*
35 * NFS Lock Manager service functions (nlm_do_...)
36 * Called from nlm_rpc_svc.c wrappers.
37 *
38 * Source code derived from FreeBSD nlm_prot_impl.c
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/thread.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/mount.h>
47 #include <sys/priv.h>
48 #include <sys/proc.h>
49 #include <sys/share.h>
50 #include <sys/socket.h>
51 #include <sys/syscall.h>
52 #include <sys/syslog.h>
53 #include <sys/systm.h>
54 #include <sys/taskq.h>
55 #include <sys/unistd.h>
56 #include <sys/vnode.h>
57 #include <sys/vfs.h>
58 #include <sys/queue.h>
59 #include <sys/sdt.h>
60 #include <netinet/in.h>
61
62 #include <rpc/rpc.h>
63 #include <rpc/xdr.h>
64 #include <rpc/pmap_prot.h>
65 #include <rpc/pmap_clnt.h>
66 #include <rpc/rpcb_prot.h>
67
68 #include <rpcsvc/nlm_prot.h>
69 #include <rpcsvc/sm_inter.h>
70
71 #include <nfs/nfs.h>
72 #include <nfs/nfs_clnt.h>
73 #include <nfs/export.h>
74 #include <nfs/rnode.h>
75
76 #include "nlm_impl.h"
77
78 #define NLM_IN_GRACE(g) (ddi_get_lbolt() < (g)->grace_threshold)
79
80 struct nlm_block_cb_data {
81 struct nlm_host *hostp;
82 struct nlm_vhold *nvp;
83 struct flock64 *flp;
84 };
85
86 /*
87 * Invoke an asyncronous RPC callbeck
88 * (used when NLM server needs to reply to MSG NLM procedure).
89 */
90 #define NLM_INVOKE_CALLBACK(descr, rpcp, resp, callb) \
91 do { \
92 enum clnt_stat _stat; \
93 \
94 _stat = (*(callb))(resp, NULL, (rpcp)->nr_handle); \
95 if (_stat != RPC_SUCCESS && _stat != RPC_TIMEDOUT) { \
96 struct rpc_err _err; \
97 \
98 CLNT_GETERR((rpcp)->nr_handle, &_err); \
99 NLM_ERR("NLM: %s callback failed: " \
100 "stat %d, err %d\n", descr, _stat, \
101 _err.re_errno); \
102 } \
103 \
104 _NOTE(CONSTCOND) } while (0)
105
106 static void nlm_block(
107 nlm4_lockargs *lockargs,
108 struct nlm_host *host,
109 struct nlm_vhold *nvp,
110 nlm_rpc_t *rpcp,
111 struct flock64 *fl,
112 nlm_testargs_cb grant_cb);
113
114 static vnode_t *nlm_fh_to_vp(struct netobj *);
115 static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *);
116 static void nlm_init_shrlock(struct shrlock *, nlm4_share *, struct nlm_host *);
117 static callb_cpr_t *nlm_block_callback(flk_cb_when_t, void *);
118 static int nlm_vop_frlock(vnode_t *, int, flock64_t *, int, offset_t,
119 struct flk_callback *, cred_t *, caller_context_t *);
120
121 /*
122 * Convert a lock from network to local form, and
123 * check for valid range (no overflow).
124 */
125 static int
nlm_init_flock(struct flock64 * fl,struct nlm4_lock * nl,struct nlm_host * host,rpcvers_t vers,short type)126 nlm_init_flock(struct flock64 *fl, struct nlm4_lock *nl,
127 struct nlm_host *host, rpcvers_t vers, short type)
128 {
129 uint64_t off, len;
130
131 bzero(fl, sizeof (*fl));
132 off = nl->l_offset;
133 len = nl->l_len;
134
135 if (vers < NLM4_VERS) {
136 if (off > MAX_UOFF32 || len > MAX_UOFF32)
137 return (EINVAL);
138 if (off + len > MAX_UOFF32 + 1)
139 return (EINVAL);
140 } else {
141 /*
142 * Check range for 64-bit client (no overflow).
143 * Again allow len == ~0 to mean lock to EOF.
144 */
145 if (len == MAX_U_OFFSET_T)
146 len = 0;
147 if (len != 0 && off + (len - 1) < off)
148 return (EINVAL);
149 }
150
151 fl->l_type = type;
152 fl->l_whence = SEEK_SET;
153 fl->l_start = off;
154 fl->l_len = len;
155 fl->l_sysid = host->nh_sysid;
156 fl->l_pid = nl->svid;
157 /* l_pad */
158
159 return (0);
160 }
161
162 /*
163 * Convert an fhandle into a vnode.
164 * Uses the file id (fh_len + fh_data) in the fhandle to get the vnode.
165 * WARNING: users of this routine must do a VN_RELE on the vnode when they
166 * are done with it.
167 * This is just like nfs_fhtovp() but without the exportinfo argument.
168 */
169 static vnode_t *
lm_fhtovp(fhandle3_t * fh)170 lm_fhtovp(fhandle3_t *fh)
171 {
172 vfs_t *vfsp;
173 vnode_t *vp;
174 int error;
175
176 vfsp = getvfs(&fh->_fh3_fsid);
177 if (vfsp == NULL)
178 return (NULL);
179
180 /* LINTED E_BAD_PTR_CAST_ALIGN */
181 error = VFS_VGET(vfsp, &vp, (fid_t *)&(fh->_fh3_len));
182 VFS_RELE(vfsp);
183 if (error || vp == NULL)
184 return (NULL);
185
186 return (vp);
187 }
188
189 /*
190 * Gets vnode from client's filehandle
191 * NOTE: Holds vnode, it _must_ be explicitly
192 * released by VN_RELE().
193 */
194 static vnode_t *
nlm_fh_to_vp(struct netobj * fh)195 nlm_fh_to_vp(struct netobj *fh)
196 {
197 fhandle3_t *fhp;
198
199 /*
200 * Get a vnode pointer for the given NFS file handle.
201 * Note that it could be an NFSv2 or NFSv3 handle,
202 * which means the size might vary. (don't copy)
203 */
204 if (fh->n_len < sizeof (fhandle_t))
205 return (NULL);
206
207 /* We know this is aligned (kmem_alloc) */
208 /* LINTED E_BAD_PTR_CAST_ALIGN */
209 fhp = (fhandle3_t *)fh->n_bytes;
210
211 /*
212 * See the comment for NFS_FH3MAXDATA in uts/common/nfs/nfs.h for
213 * converting fhandles. Check the NFSv3 file handle size. The lockmgr
214 * is not used for NFS v4.
215 */
216 if (fhp->_fh3_len > NFS_FH3MAXDATA || fhp->_fh3_len == 0)
217 return (NULL);
218
219 return (lm_fhtovp(fhp));
220 }
221
222 /*
223 * Get vhold from client's filehandle, but in contrast to
224 * The function tries to check some access rights as well.
225 *
226 * NOTE: vhold object _must_ be explicitly released by
227 * nlm_vhold_release().
228 */
229 static struct nlm_vhold *
nlm_fh_to_vhold(struct nlm_host * hostp,struct netobj * fh)230 nlm_fh_to_vhold(struct nlm_host *hostp, struct netobj *fh)
231 {
232 vnode_t *vp;
233 struct nlm_vhold *nvp;
234
235 vp = nlm_fh_to_vp(fh);
236 if (vp == NULL)
237 return (NULL);
238
239
240 nvp = nlm_vhold_get(hostp, vp);
241
242 /*
243 * Both nlm_fh_to_vp() and nlm_vhold_get()
244 * do VN_HOLD(), so we need to drop one
245 * reference on vnode.
246 */
247 VN_RELE(vp);
248 return (nvp);
249 }
250
251 /* ******************************************************************* */
252
253 /*
254 * NLM implementation details, called from the RPC svc code.
255 */
256
257 /*
258 * Call-back from NFS statd, used to notify that one of our
259 * hosts had a status change. The host can be either an
260 * NFS client, NFS server or both.
261 * According to NSM protocol description, the state is a
262 * number that is increases monotonically each time the
263 * state of host changes. An even number indicates that
264 * the host is down, while an odd number indicates that
265 * the host is up.
266 *
267 * Here we ignore this even/odd difference of status number
268 * reported by the NSM, we launch notification handlers
269 * every time the state is changed. The reason we why do so
270 * is that client and server can talk to each other using
271 * connectionless transport and it's easy to lose packet
272 * containing NSM notification with status number update.
273 *
274 * In nlm_host_monitor(), we put the sysid in the private data
275 * that statd carries in this callback, so we can easliy find
276 * the host this call applies to.
277 */
278 /* ARGSUSED */
279 void
nlm_do_notify1(nlm_sm_status * argp,void * res,struct svc_req * sr)280 nlm_do_notify1(nlm_sm_status *argp, void *res, struct svc_req *sr)
281 {
282 struct nlm_globals *g;
283 struct nlm_host *host;
284 uint16_t sysid;
285
286 g = zone_getspecific(nlm_zone_key, curzone);
287 bcopy(&argp->priv, &sysid, sizeof (sysid));
288
289 DTRACE_PROBE2(nsm__notify, uint16_t, sysid,
290 int, argp->state);
291
292 host = nlm_host_find_by_sysid(g, (sysid_t)sysid);
293 if (host == NULL)
294 return;
295
296 nlm_host_notify_server(host, argp->state);
297 nlm_host_notify_client(host, argp->state);
298 nlm_host_release(g, host);
299 }
300
301 /*
302 * Another available call-back for NFS statd.
303 * Not currently used.
304 */
305 /* ARGSUSED */
306 void
nlm_do_notify2(nlm_sm_status * argp,void * res,struct svc_req * sr)307 nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr)
308 {
309 ASSERT(0);
310 }
311
312
313 /*
314 * NLM_TEST, NLM_TEST_MSG,
315 * NLM4_TEST, NLM4_TEST_MSG,
316 * Client inquiry about locks, non-blocking.
317 */
318 void
nlm_do_test(nlm4_testargs * argp,nlm4_testres * resp,struct svc_req * sr,nlm_testres_cb cb)319 nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp,
320 struct svc_req *sr, nlm_testres_cb cb)
321 {
322 struct nlm_globals *g;
323 struct nlm_host *host;
324 struct nlm4_holder *lh;
325 struct nlm_owner_handle *oh;
326 nlm_rpc_t *rpcp = NULL;
327 vnode_t *vp = NULL;
328 struct netbuf *addr;
329 char *netid;
330 char *name;
331 int error;
332 struct flock64 fl;
333
334 nlm_copy_netobj(&resp->cookie, &argp->cookie);
335
336 name = argp->alock.caller_name;
337 netid = svc_getnetid(sr->rq_xprt);
338 addr = svc_getrpccaller(sr->rq_xprt);
339
340 g = zone_getspecific(nlm_zone_key, curzone);
341 host = nlm_host_findcreate(g, name, netid, addr);
342 if (host == NULL) {
343 resp->stat.stat = nlm4_denied_nolocks;
344 return;
345 }
346 if (cb != NULL) {
347 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
348 if (error != 0) {
349 resp->stat.stat = nlm4_denied_nolocks;
350 goto out;
351 }
352 }
353
354 vp = nlm_fh_to_vp(&argp->alock.fh);
355 if (vp == NULL) {
356 resp->stat.stat = nlm4_stale_fh;
357 goto out;
358 }
359
360 if (NLM_IN_GRACE(g)) {
361 resp->stat.stat = nlm4_denied_grace_period;
362 goto out;
363 }
364
365 /* Convert to local form. */
366 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
367 (argp->exclusive) ? F_WRLCK : F_RDLCK);
368 if (error) {
369 resp->stat.stat = nlm4_failed;
370 goto out;
371 }
372
373 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_GETLK, &fl, F_REMOTE); */
374 error = nlm_vop_frlock(vp, F_GETLK, &fl,
375 F_REMOTELOCK | FREAD | FWRITE,
376 (u_offset_t)0, NULL, CRED(), NULL);
377 if (error) {
378 resp->stat.stat = nlm4_failed;
379 goto out;
380 }
381
382 if (fl.l_type == F_UNLCK) {
383 resp->stat.stat = nlm4_granted;
384 goto out;
385 }
386 resp->stat.stat = nlm4_denied;
387
388 /*
389 * This lock "test" fails due to a conflicting lock.
390 *
391 * If this is a v1 client, make sure the conflicting
392 * lock range we report can be expressed with 32-bit
393 * offsets. The lock range requested was expressed
394 * as 32-bit offset and length, so at least part of
395 * the conflicting lock should lie below MAX_UOFF32.
396 * If the conflicting lock extends past that, we'll
397 * trim the range to end at MAX_UOFF32 so this lock
398 * can be represented in a 32-bit response. Check
399 * the start also (paranoid, but a low cost check).
400 */
401 if (sr->rq_vers < NLM4_VERS) {
402 uint64 maxlen;
403 if (fl.l_start > MAX_UOFF32)
404 fl.l_start = MAX_UOFF32;
405 maxlen = MAX_UOFF32 + 1 - fl.l_start;
406 if (fl.l_len > maxlen)
407 fl.l_len = maxlen;
408 }
409
410 /*
411 * Build the nlm4_holder result structure.
412 *
413 * Note that lh->oh is freed via xdr_free,
414 * xdr_nlm4_holder, xdr_netobj, xdr_bytes.
415 */
416 oh = kmem_zalloc(sizeof (*oh), KM_SLEEP);
417 oh->oh_sysid = (sysid_t)fl.l_sysid;
418 lh = &resp->stat.nlm4_testrply_u.holder;
419 lh->exclusive = (fl.l_type == F_WRLCK);
420 lh->svid = fl.l_pid;
421 lh->oh.n_len = sizeof (*oh);
422 lh->oh.n_bytes = (void *)oh;
423 lh->l_offset = fl.l_start;
424 lh->l_len = fl.l_len;
425
426 out:
427 /*
428 * If we have a callback function, use that to
429 * deliver the response via another RPC call.
430 */
431 if (cb != NULL && rpcp != NULL)
432 NLM_INVOKE_CALLBACK("test", rpcp, resp, cb);
433
434 if (vp != NULL)
435 VN_RELE(vp);
436 if (rpcp != NULL)
437 nlm_host_rele_rpc(host, rpcp);
438
439 nlm_host_release(g, host);
440 }
441
442 /*
443 * NLM_LOCK, NLM_LOCK_MSG, NLM_NM_LOCK
444 * NLM4_LOCK, NLM4_LOCK_MSG, NLM4_NM_LOCK
445 *
446 * Client request to set a lock, possibly blocking.
447 *
448 * If the lock needs to block, we return status blocked to
449 * this RPC call, and then later call back the client with
450 * a "granted" callback. Tricky aspects of this include:
451 * sending a reply before this function returns, and then
452 * borrowing this thread from the RPC service pool for the
453 * wait on the lock and doing the later granted callback.
454 *
455 * We also have to keep a list of locks (pending + granted)
456 * both to handle retransmitted requests, and to keep the
457 * vnodes for those locks active.
458 */
459 void
nlm_do_lock(nlm4_lockargs * argp,nlm4_res * resp,struct svc_req * sr,nlm_reply_cb reply_cb,nlm_res_cb res_cb,nlm_testargs_cb grant_cb)460 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr,
461 nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_testargs_cb grant_cb)
462 {
463 struct nlm_globals *g;
464 struct flock64 fl;
465 struct nlm_host *host = NULL;
466 struct netbuf *addr;
467 struct nlm_vhold *nvp = NULL;
468 nlm_rpc_t *rpcp = NULL;
469 char *netid;
470 char *name;
471 int error, flags;
472 bool_t do_blocking = FALSE;
473 bool_t do_mon_req = FALSE;
474 enum nlm4_stats status;
475
476 nlm_copy_netobj(&resp->cookie, &argp->cookie);
477
478 name = argp->alock.caller_name;
479 netid = svc_getnetid(sr->rq_xprt);
480 addr = svc_getrpccaller(sr->rq_xprt);
481
482 g = zone_getspecific(nlm_zone_key, curzone);
483 host = nlm_host_findcreate(g, name, netid, addr);
484 if (host == NULL) {
485 DTRACE_PROBE4(no__host, struct nlm_globals *, g,
486 char *, name, char *, netid, struct netbuf *, addr);
487 status = nlm4_denied_nolocks;
488 goto doreply;
489 }
490
491 DTRACE_PROBE3(start, struct nlm_globals *, g,
492 struct nlm_host *, host, nlm4_lockargs *, argp);
493
494 /*
495 * If we may need to do _msg_ call needing an RPC
496 * callback, get the RPC client handle now,
497 * so we know if we can bind to the NLM service on
498 * this client.
499 *
500 * Note: host object carries transport type.
501 * One client using multiple transports gets
502 * separate sysids for each of its transports.
503 */
504 if (res_cb != NULL || (grant_cb != NULL && argp->block == TRUE)) {
505 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
506 if (error != 0) {
507 status = nlm4_denied_nolocks;
508 goto doreply;
509 }
510 }
511
512 /*
513 * During the "grace period", only allow reclaim.
514 */
515 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
516 status = nlm4_denied_grace_period;
517 goto doreply;
518 }
519
520 /*
521 * Check whether we missed host shutdown event
522 */
523 if (nlm_host_get_state(host) != argp->state)
524 nlm_host_notify_server(host, argp->state);
525
526 /*
527 * Get a hold on the vnode for a lock operation.
528 * Only lock() and share() need vhold objects.
529 */
530 nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
531 if (nvp == NULL) {
532 status = nlm4_stale_fh;
533 goto doreply;
534 }
535
536 /* Convert to local form. */
537 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
538 (argp->exclusive) ? F_WRLCK : F_RDLCK);
539 if (error) {
540 status = nlm4_failed;
541 goto doreply;
542 }
543
544 /*
545 * Try to lock non-blocking first. If we succeed
546 * getting the lock, we can reply with the granted
547 * status directly and avoid the complications of
548 * making the "granted" RPC callback later.
549 *
550 * This also let's us find out now about some
551 * possible errors like EROFS, etc.
552 */
553 flags = F_REMOTELOCK | FREAD | FWRITE;
554 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, flags,
555 (u_offset_t)0, NULL, CRED(), NULL);
556
557 DTRACE_PROBE3(setlk__res, struct flock64 *, &fl,
558 int, flags, int, error);
559
560 switch (error) {
561 case 0:
562 /* Got it without waiting! */
563 status = nlm4_granted;
564 do_mon_req = TRUE;
565 break;
566
567 /* EINPROGRESS too? */
568 case EAGAIN:
569 /* We did not get the lock. Should we block? */
570 if (argp->block == FALSE || grant_cb == NULL) {
571 status = nlm4_denied;
572 break;
573 }
574 /*
575 * Should block. Try to reserve this thread
576 * so we can use it to wait for the lock and
577 * later send the granted message. If this
578 * reservation fails, say "no resources".
579 */
580 if (!svc_reserve_thread(sr->rq_xprt)) {
581 status = nlm4_denied_nolocks;
582 break;
583 }
584 /*
585 * OK, can detach this thread, so this call
586 * will block below (after we reply).
587 */
588 status = nlm4_blocked;
589 do_blocking = TRUE;
590 do_mon_req = TRUE;
591 break;
592
593 case ENOLCK:
594 /* Failed for lack of resources. */
595 status = nlm4_denied_nolocks;
596 break;
597
598 case EROFS:
599 /* read-only file system */
600 status = nlm4_rofs;
601 break;
602
603 case EFBIG:
604 /* file too big */
605 status = nlm4_fbig;
606 break;
607
608 case EDEADLK:
609 /* dead lock condition */
610 status = nlm4_deadlck;
611 break;
612
613 default:
614 status = nlm4_denied;
615 break;
616 }
617
618 doreply:
619 resp->stat.stat = status;
620
621 /*
622 * We get one of two function pointers; one for a
623 * normal RPC reply, and another for doing an RPC
624 * "callback" _res reply for a _msg function.
625 * Use either of those to send the reply now.
626 *
627 * If sending this reply fails, just leave the
628 * lock in the list for retransmitted requests.
629 * Cleanup is via unlock or host rele (statmon).
630 */
631 if (reply_cb != NULL) {
632 /* i.e. nlm_lock_1_reply */
633 if (!(*reply_cb)(sr->rq_xprt, resp))
634 svcerr_systemerr(sr->rq_xprt);
635 }
636 if (res_cb != NULL && rpcp != NULL)
637 NLM_INVOKE_CALLBACK("lock", rpcp, resp, res_cb);
638
639 /*
640 * The reply has been sent to the client.
641 * Start monitoring this client (maybe).
642 *
643 * Note that the non-monitored (NM) calls pass grant_cb=NULL
644 * indicating that the client doesn't support RPC callbacks.
645 * No monitoring for these (lame) clients.
646 */
647 if (do_mon_req && grant_cb != NULL)
648 nlm_host_monitor(g, host, argp->state);
649
650 if (do_blocking) {
651 /*
652 * We need to block on this lock, and when that
653 * completes, do the granted RPC call. Note that
654 * we "reserved" this thread above, so we can now
655 * "detach" it from the RPC SVC pool, allowing it
656 * to block indefinitely if needed.
657 */
658 ASSERT(rpcp != NULL);
659 (void) svc_detach_thread(sr->rq_xprt);
660 nlm_block(argp, host, nvp, rpcp, &fl, grant_cb);
661 }
662
663 DTRACE_PROBE3(lock__end, struct nlm_globals *, g,
664 struct nlm_host *, host, nlm4_res *, resp);
665
666 if (rpcp != NULL)
667 nlm_host_rele_rpc(host, rpcp);
668
669 nlm_vhold_release(host, nvp);
670 nlm_host_release(g, host);
671 }
672
673 /*
674 * Helper for nlm_do_lock(), partly for observability,
675 * (we'll see a call blocked in this function) and
676 * because nlm_do_lock() was getting quite long.
677 */
678 static void
nlm_block(nlm4_lockargs * lockargs,struct nlm_host * host,struct nlm_vhold * nvp,nlm_rpc_t * rpcp,struct flock64 * flp,nlm_testargs_cb grant_cb)679 nlm_block(nlm4_lockargs *lockargs,
680 struct nlm_host *host,
681 struct nlm_vhold *nvp,
682 nlm_rpc_t *rpcp,
683 struct flock64 *flp,
684 nlm_testargs_cb grant_cb)
685 {
686 nlm4_testargs args;
687 int error;
688 flk_callback_t flk_cb;
689 struct nlm_block_cb_data cb_data;
690
691 /*
692 * Keep a list of blocked locks on nh_pending, and use it
693 * to cancel these threads in nlm_destroy_client_pending.
694 *
695 * Check to see if this lock is already in the list
696 * and if not, add an entry for it. Allocate first,
697 * then if we don't insert, free the new one.
698 * Caller already has vp held.
699 */
700
701 error = nlm_slreq_register(host, nvp, flp);
702 if (error != 0) {
703 /*
704 * Sleeping lock request with given fl is already
705 * registered by someone else. This means that
706 * some other thread is handling the request, let
707 * him to do its work.
708 */
709 ASSERT(error == EEXIST);
710 return;
711 }
712
713 cb_data.hostp = host;
714 cb_data.nvp = nvp;
715 cb_data.flp = flp;
716 flk_init_callback(&flk_cb, nlm_block_callback, &cb_data);
717
718 /* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */
719 error = nlm_vop_frlock(nvp->nv_vp, F_SETLKW, flp,
720 F_REMOTELOCK | FREAD | FWRITE,
721 (u_offset_t)0, &flk_cb, CRED(), NULL);
722
723 if (error != 0) {
724 /*
725 * We failed getting the lock, but have no way to
726 * tell the client about that. Let 'em time out.
727 */
728 (void) nlm_slreq_unregister(host, nvp, flp);
729 return;
730 }
731
732 /*
733 * Do the "granted" call-back to the client.
734 */
735 args.cookie = lockargs->cookie;
736 args.exclusive = lockargs->exclusive;
737 args.alock = lockargs->alock;
738
739 NLM_INVOKE_CALLBACK("grant", rpcp, &args, grant_cb);
740 }
741
742 /*
743 * The function that is used as flk callback when NLM server
744 * sets new sleeping lock. The function unregisters NLM
745 * sleeping lock request (nlm_slreq) associated with the
746 * sleeping lock _before_ lock becomes active. It prevents
747 * potential race condition between nlm_block() and
748 * nlm_do_cancel().
749 */
750 static callb_cpr_t *
nlm_block_callback(flk_cb_when_t when,void * data)751 nlm_block_callback(flk_cb_when_t when, void *data)
752 {
753 struct nlm_block_cb_data *cb_data;
754
755 cb_data = (struct nlm_block_cb_data *)data;
756 if (when == FLK_AFTER_SLEEP) {
757 (void) nlm_slreq_unregister(cb_data->hostp,
758 cb_data->nvp, cb_data->flp);
759 }
760
761 return (0);
762 }
763
764 /*
765 * NLM_CANCEL, NLM_CANCEL_MSG,
766 * NLM4_CANCEL, NLM4_CANCEL_MSG,
767 * Client gives up waiting for a blocking lock.
768 */
769 void
nlm_do_cancel(nlm4_cancargs * argp,nlm4_res * resp,struct svc_req * sr,nlm_res_cb cb)770 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *resp,
771 struct svc_req *sr, nlm_res_cb cb)
772 {
773 struct nlm_globals *g;
774 struct nlm_host *host;
775 struct netbuf *addr;
776 struct nlm_vhold *nvp = NULL;
777 nlm_rpc_t *rpcp = NULL;
778 char *netid;
779 char *name;
780 int error;
781 struct flock64 fl;
782
783 nlm_copy_netobj(&resp->cookie, &argp->cookie);
784 netid = svc_getnetid(sr->rq_xprt);
785 addr = svc_getrpccaller(sr->rq_xprt);
786 name = argp->alock.caller_name;
787
788 g = zone_getspecific(nlm_zone_key, curzone);
789 host = nlm_host_findcreate(g, name, netid, addr);
790 if (host == NULL) {
791 resp->stat.stat = nlm4_denied_nolocks;
792 return;
793 }
794 if (cb != NULL) {
795 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
796 if (error != 0) {
797 resp->stat.stat = nlm4_denied_nolocks;
798 goto out;
799 }
800 }
801
802 DTRACE_PROBE3(start, struct nlm_globals *, g,
803 struct nlm_host *, host, nlm4_cancargs *, argp);
804
805 if (NLM_IN_GRACE(g)) {
806 resp->stat.stat = nlm4_denied_grace_period;
807 goto out;
808 }
809
810 nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
811 if (nvp == NULL) {
812 resp->stat.stat = nlm4_stale_fh;
813 goto out;
814 }
815
816 /* Convert to local form. */
817 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
818 (argp->exclusive) ? F_WRLCK : F_RDLCK);
819 if (error) {
820 resp->stat.stat = nlm4_failed;
821 goto out;
822 }
823
824 error = nlm_slreq_unregister(host, nvp, &fl);
825 if (error != 0) {
826 /*
827 * There's no sleeping lock request corresponding
828 * to the lock. Then requested sleeping lock
829 * doesn't exist.
830 */
831 resp->stat.stat = nlm4_denied;
832 goto out;
833 }
834
835 fl.l_type = F_UNLCK;
836 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl,
837 F_REMOTELOCK | FREAD | FWRITE,
838 (u_offset_t)0, NULL, CRED(), NULL);
839
840 resp->stat.stat = (error == 0) ?
841 nlm4_granted : nlm4_denied;
842
843 out:
844 /*
845 * If we have a callback function, use that to
846 * deliver the response via another RPC call.
847 */
848 if (cb != NULL && rpcp != NULL)
849 NLM_INVOKE_CALLBACK("cancel", rpcp, resp, cb);
850
851 DTRACE_PROBE3(cancel__end, struct nlm_globals *, g,
852 struct nlm_host *, host, nlm4_res *, resp);
853
854 if (rpcp != NULL)
855 nlm_host_rele_rpc(host, rpcp);
856
857 nlm_vhold_release(host, nvp);
858 nlm_host_release(g, host);
859 }
860
861 /*
862 * NLM_UNLOCK, NLM_UNLOCK_MSG,
863 * NLM4_UNLOCK, NLM4_UNLOCK_MSG,
864 * Client removes one of their locks.
865 */
866 void
nlm_do_unlock(nlm4_unlockargs * argp,nlm4_res * resp,struct svc_req * sr,nlm_res_cb cb)867 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *resp,
868 struct svc_req *sr, nlm_res_cb cb)
869 {
870 struct nlm_globals *g;
871 struct nlm_host *host;
872 struct netbuf *addr;
873 nlm_rpc_t *rpcp = NULL;
874 vnode_t *vp = NULL;
875 char *netid;
876 char *name;
877 int error;
878 struct flock64 fl;
879
880 nlm_copy_netobj(&resp->cookie, &argp->cookie);
881
882 netid = svc_getnetid(sr->rq_xprt);
883 addr = svc_getrpccaller(sr->rq_xprt);
884 name = argp->alock.caller_name;
885
886 /*
887 * NLM_UNLOCK operation doesn't have an error code
888 * denoting that operation failed, so we always
889 * return nlm4_granted except when the server is
890 * in a grace period.
891 */
892 resp->stat.stat = nlm4_granted;
893
894 g = zone_getspecific(nlm_zone_key, curzone);
895 host = nlm_host_findcreate(g, name, netid, addr);
896 if (host == NULL)
897 return;
898
899 if (cb != NULL) {
900 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
901 if (error != 0)
902 goto out;
903 }
904
905 DTRACE_PROBE3(start, struct nlm_globals *, g,
906 struct nlm_host *, host, nlm4_unlockargs *, argp);
907
908 if (NLM_IN_GRACE(g)) {
909 resp->stat.stat = nlm4_denied_grace_period;
910 goto out;
911 }
912
913 vp = nlm_fh_to_vp(&argp->alock.fh);
914 if (vp == NULL)
915 goto out;
916
917 /* Convert to local form. */
918 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, F_UNLCK);
919 if (error)
920 goto out;
921
922 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_UNLCK, &fl, F_REMOTE); */
923 error = nlm_vop_frlock(vp, F_SETLK, &fl,
924 F_REMOTELOCK | FREAD | FWRITE,
925 (u_offset_t)0, NULL, CRED(), NULL);
926
927 DTRACE_PROBE1(unlock__res, int, error);
928 out:
929 /*
930 * If we have a callback function, use that to
931 * deliver the response via another RPC call.
932 */
933 if (cb != NULL && rpcp != NULL)
934 NLM_INVOKE_CALLBACK("unlock", rpcp, resp, cb);
935
936 DTRACE_PROBE3(unlock__end, struct nlm_globals *, g,
937 struct nlm_host *, host, nlm4_res *, resp);
938
939 if (vp != NULL)
940 VN_RELE(vp);
941 if (rpcp != NULL)
942 nlm_host_rele_rpc(host, rpcp);
943
944 nlm_host_release(g, host);
945 }
946
947 /*
948 * NLM_GRANTED, NLM_GRANTED_MSG,
949 * NLM4_GRANTED, NLM4_GRANTED_MSG,
950 *
951 * This service routine is special. It's the only one that's
952 * really part of our NLM _client_ support, used by _servers_
953 * to "call back" when a blocking lock from this NLM client
954 * is granted by the server. In this case, we _know_ there is
955 * already an nlm_host allocated and held by the client code.
956 * We want to find that nlm_host here.
957 *
958 * Over in nlm_call_lock(), the client encoded the sysid for this
959 * server in the "owner handle" netbuf sent with our lock request.
960 * We can now use that to find the nlm_host object we used there.
961 * (NB: The owner handle is opaque to the server.)
962 */
963 void
nlm_do_granted(nlm4_testargs * argp,nlm4_res * resp,struct svc_req * sr,nlm_res_cb cb)964 nlm_do_granted(nlm4_testargs *argp, nlm4_res *resp,
965 struct svc_req *sr, nlm_res_cb cb)
966 {
967 struct nlm_globals *g;
968 struct nlm_owner_handle *oh;
969 struct nlm_host *host;
970 nlm_rpc_t *rpcp = NULL;
971 int error;
972
973 nlm_copy_netobj(&resp->cookie, &argp->cookie);
974 resp->stat.stat = nlm4_denied;
975
976 g = zone_getspecific(nlm_zone_key, curzone);
977 oh = (void *) argp->alock.oh.n_bytes;
978 if (oh == NULL)
979 return;
980
981 host = nlm_host_find_by_sysid(g, oh->oh_sysid);
982 if (host == NULL)
983 return;
984
985 if (cb != NULL) {
986 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
987 if (error != 0)
988 goto out;
989 }
990
991 if (NLM_IN_GRACE(g)) {
992 resp->stat.stat = nlm4_denied_grace_period;
993 goto out;
994 }
995
996 error = nlm_slock_grant(g, host, &argp->alock);
997 if (error == 0)
998 resp->stat.stat = nlm4_granted;
999
1000 out:
1001 /*
1002 * If we have a callback function, use that to
1003 * deliver the response via another RPC call.
1004 */
1005 if (cb != NULL && rpcp != NULL)
1006 NLM_INVOKE_CALLBACK("do_granted", rpcp, resp, cb);
1007
1008 if (rpcp != NULL)
1009 nlm_host_rele_rpc(host, rpcp);
1010
1011 nlm_host_release(g, host);
1012 }
1013
1014 /*
1015 * NLM_FREE_ALL, NLM4_FREE_ALL
1016 *
1017 * Destroy all lock state for the calling client.
1018 */
1019 void
nlm_do_free_all(nlm4_notify * argp,void * res,struct svc_req * sr)1020 nlm_do_free_all(nlm4_notify *argp, void *res, struct svc_req *sr)
1021 {
1022 struct nlm_globals *g;
1023 struct nlm_host_list host_list;
1024 struct nlm_host *hostp;
1025
1026 TAILQ_INIT(&host_list);
1027 g = zone_getspecific(nlm_zone_key, curzone);
1028
1029 /* Serialize calls to clean locks. */
1030 mutex_enter(&g->clean_lock);
1031
1032 /*
1033 * Find all hosts that have the given node name and put them on a
1034 * local list.
1035 */
1036 mutex_enter(&g->lock);
1037 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
1038 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
1039 if (strcasecmp(hostp->nh_name, argp->name) == 0) {
1040 /*
1041 * If needed take the host out of the idle list since
1042 * we are taking a reference.
1043 */
1044 if (hostp->nh_flags & NLM_NH_INIDLE) {
1045 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp,
1046 nh_link);
1047 hostp->nh_flags &= ~NLM_NH_INIDLE;
1048 }
1049 hostp->nh_refs++;
1050
1051 TAILQ_INSERT_TAIL(&host_list, hostp, nh_link);
1052 }
1053 }
1054 mutex_exit(&g->lock);
1055
1056 /* Free locks for all hosts on the local list. */
1057 while (!TAILQ_EMPTY(&host_list)) {
1058 hostp = TAILQ_FIRST(&host_list);
1059 TAILQ_REMOVE(&host_list, hostp, nh_link);
1060
1061 /*
1062 * Note that this does not do client-side cleanup.
1063 * We want to do that ONLY if statd tells us the
1064 * server has restarted.
1065 */
1066 nlm_host_notify_server(hostp, argp->state);
1067 nlm_host_release(g, hostp);
1068 }
1069
1070 mutex_exit(&g->clean_lock);
1071
1072 (void) res;
1073 (void) sr;
1074 }
1075
1076 static void
nlm_init_shrlock(struct shrlock * shr,nlm4_share * nshare,struct nlm_host * host)1077 nlm_init_shrlock(struct shrlock *shr,
1078 nlm4_share *nshare, struct nlm_host *host)
1079 {
1080
1081 switch (nshare->access) {
1082 default:
1083 case fsa_NONE:
1084 shr->s_access = 0;
1085 break;
1086 case fsa_R:
1087 shr->s_access = F_RDACC;
1088 break;
1089 case fsa_W:
1090 shr->s_access = F_WRACC;
1091 break;
1092 case fsa_RW:
1093 shr->s_access = F_RWACC;
1094 break;
1095 }
1096
1097 switch (nshare->mode) {
1098 default:
1099 case fsm_DN:
1100 shr->s_deny = F_NODNY;
1101 break;
1102 case fsm_DR:
1103 shr->s_deny = F_RDDNY;
1104 break;
1105 case fsm_DW:
1106 shr->s_deny = F_WRDNY;
1107 break;
1108 case fsm_DRW:
1109 shr->s_deny = F_RWDNY;
1110 break;
1111 }
1112
1113 shr->s_sysid = host->nh_sysid;
1114 shr->s_pid = 0;
1115 shr->s_own_len = nshare->oh.n_len;
1116 shr->s_owner = nshare->oh.n_bytes;
1117 }
1118
1119 /*
1120 * NLM_SHARE, NLM4_SHARE
1121 *
1122 * Request a DOS-style share reservation
1123 */
1124 void
nlm_do_share(nlm4_shareargs * argp,nlm4_shareres * resp,struct svc_req * sr)1125 nlm_do_share(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1126 {
1127 struct nlm_globals *g;
1128 struct nlm_host *host;
1129 struct netbuf *addr;
1130 struct nlm_vhold *nvp = NULL;
1131 char *netid;
1132 char *name;
1133 int error;
1134 struct shrlock shr;
1135
1136 nlm_copy_netobj(&resp->cookie, &argp->cookie);
1137
1138 name = argp->share.caller_name;
1139 netid = svc_getnetid(sr->rq_xprt);
1140 addr = svc_getrpccaller(sr->rq_xprt);
1141
1142 g = zone_getspecific(nlm_zone_key, curzone);
1143 host = nlm_host_findcreate(g, name, netid, addr);
1144 if (host == NULL) {
1145 resp->stat = nlm4_denied_nolocks;
1146 return;
1147 }
1148
1149 DTRACE_PROBE3(share__start, struct nlm_globals *, g,
1150 struct nlm_host *, host, nlm4_shareargs *, argp);
1151
1152 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
1153 resp->stat = nlm4_denied_grace_period;
1154 goto out;
1155 }
1156
1157 /*
1158 * Get holded vnode when on lock operation.
1159 * Only lock() and share() need vhold objects.
1160 */
1161 nvp = nlm_fh_to_vhold(host, &argp->share.fh);
1162 if (nvp == NULL) {
1163 resp->stat = nlm4_stale_fh;
1164 goto out;
1165 }
1166
1167 /* Convert to local form. */
1168 nlm_init_shrlock(&shr, &argp->share, host);
1169 error = VOP_SHRLOCK(nvp->nv_vp, F_SHARE, &shr,
1170 FREAD | FWRITE, CRED(), NULL);
1171
1172 if (error == 0) {
1173 resp->stat = nlm4_granted;
1174 nlm_host_monitor(g, host, 0);
1175 } else {
1176 resp->stat = nlm4_denied;
1177 }
1178
1179 out:
1180 DTRACE_PROBE3(share__end, struct nlm_globals *, g,
1181 struct nlm_host *, host, nlm4_shareres *, resp);
1182
1183 nlm_vhold_release(host, nvp);
1184 nlm_host_release(g, host);
1185 }
1186
1187 /*
1188 * NLM_UNSHARE, NLM4_UNSHARE
1189 *
1190 * Release a DOS-style share reservation
1191 */
1192 void
nlm_do_unshare(nlm4_shareargs * argp,nlm4_shareres * resp,struct svc_req * sr)1193 nlm_do_unshare(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1194 {
1195 struct nlm_globals *g;
1196 struct nlm_host *host;
1197 struct netbuf *addr;
1198 vnode_t *vp = NULL;
1199 char *netid;
1200 int error;
1201 struct shrlock shr;
1202
1203 nlm_copy_netobj(&resp->cookie, &argp->cookie);
1204
1205 netid = svc_getnetid(sr->rq_xprt);
1206 addr = svc_getrpccaller(sr->rq_xprt);
1207
1208 g = zone_getspecific(nlm_zone_key, curzone);
1209 host = nlm_host_find(g, netid, addr);
1210 if (host == NULL) {
1211 resp->stat = nlm4_denied_nolocks;
1212 return;
1213 }
1214
1215 DTRACE_PROBE3(unshare__start, struct nlm_globals *, g,
1216 struct nlm_host *, host, nlm4_shareargs *, argp);
1217
1218 if (NLM_IN_GRACE(g)) {
1219 resp->stat = nlm4_denied_grace_period;
1220 goto out;
1221 }
1222
1223 vp = nlm_fh_to_vp(&argp->share.fh);
1224 if (vp == NULL) {
1225 resp->stat = nlm4_stale_fh;
1226 goto out;
1227 }
1228
1229 /* Convert to local form. */
1230 nlm_init_shrlock(&shr, &argp->share, host);
1231 error = VOP_SHRLOCK(vp, F_UNSHARE, &shr,
1232 FREAD | FWRITE, CRED(), NULL);
1233
1234 (void) error;
1235 resp->stat = nlm4_granted;
1236
1237 out:
1238 DTRACE_PROBE3(unshare__end, struct nlm_globals *, g,
1239 struct nlm_host *, host, nlm4_shareres *, resp);
1240
1241 if (vp != NULL)
1242 VN_RELE(vp);
1243
1244 nlm_host_release(g, host);
1245 }
1246
1247 /*
1248 * NLM wrapper to VOP_FRLOCK that checks the validity of the lock before
1249 * invoking the vnode operation.
1250 */
1251 static int
nlm_vop_frlock(vnode_t * vp,int cmd,flock64_t * bfp,int flag,offset_t offset,struct flk_callback * flk_cbp,cred_t * cr,caller_context_t * ct)1252 nlm_vop_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
1253 struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct)
1254 {
1255 if (bfp->l_len != 0 && bfp->l_start + (bfp->l_len - 1) < bfp->l_start) {
1256 return (EOVERFLOW);
1257 }
1258
1259 return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1260 }
1261