1 /* 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 30 * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. 31 * Copyright 2014 Joyent, Inc. All rights reserved. 32 */ 33 34 /* 35 * NFS Lock Manager service functions (nlm_do_...) 36 * Called from nlm_rpc_svc.c wrappers. 37 * 38 * Source code derived from FreeBSD nlm_prot_impl.c 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/thread.h> 44 #include <sys/fcntl.h> 45 #include <sys/flock.h> 46 #include <sys/mount.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/share.h> 50 #include <sys/socket.h> 51 #include <sys/syscall.h> 52 #include <sys/syslog.h> 53 #include <sys/systm.h> 54 #include <sys/taskq.h> 55 #include <sys/unistd.h> 56 #include <sys/vnode.h> 57 #include <sys/vfs.h> 58 #include <sys/queue.h> 59 #include <sys/sdt.h> 60 #include <netinet/in.h> 61 62 #include <rpc/rpc.h> 63 #include <rpc/xdr.h> 64 #include <rpc/pmap_prot.h> 65 #include <rpc/pmap_clnt.h> 66 #include <rpc/rpcb_prot.h> 67 68 #include <rpcsvc/nlm_prot.h> 69 #include <rpcsvc/sm_inter.h> 70 71 #include <nfs/nfs.h> 72 #include <nfs/nfs_clnt.h> 73 #include <nfs/export.h> 74 #include <nfs/rnode.h> 75 76 #include "nlm_impl.h" 77 78 #define NLM_IN_GRACE(g) (ddi_get_lbolt() < (g)->grace_threshold) 79 80 struct nlm_block_cb_data { 81 struct nlm_host *hostp; 82 struct nlm_vhold *nvp; 83 struct flock64 *flp; 84 bool_t registered; 85 }; 86 87 /* 88 * Invoke an asyncronous RPC callbeck 89 * (used when NLM server needs to reply to MSG NLM procedure). 90 */ 91 #define NLM_INVOKE_CALLBACK(descr, rpcp, resp, callb) \ 92 do { \ 93 enum clnt_stat _stat; \ 94 \ 95 _stat = (*(callb))(resp, NULL, (rpcp)->nr_handle); \ 96 if (_stat != RPC_SUCCESS && _stat != RPC_TIMEDOUT) { \ 97 struct rpc_err _err; \ 98 \ 99 CLNT_GETERR((rpcp)->nr_handle, &_err); \ 100 NLM_ERR("NLM: %s callback failed: " \ 101 "stat %d, err %d\n", descr, _stat, \ 102 _err.re_errno); \ 103 } \ 104 \ 105 _NOTE(CONSTCOND) } while (0) 106 107 static void nlm_block( 108 nlm4_lockargs *lockargs, 109 struct nlm_host *host, 110 struct nlm_vhold *nvp, 111 struct flock64 *fl, 112 nlm_granted_cb grant_cb, 113 rpcvers_t); 114 115 static vnode_t *nlm_fh_to_vp(struct netobj *); 116 static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *); 117 static void nlm_init_shrlock(struct shrlock *, nlm4_share *, struct nlm_host *); 118 static callb_cpr_t *nlm_block_callback(flk_cb_when_t, void *); 119 static int nlm_vop_frlock(vnode_t *, int, flock64_t *, int, offset_t, 120 struct flk_callback *, cred_t *, caller_context_t *); 121 122 /* 123 * Convert a lock from network to local form, and 124 * check for valid range (no overflow). 125 */ 126 static int 127 nlm_init_flock(struct flock64 *fl, struct nlm4_lock *nl, 128 struct nlm_host *host, rpcvers_t vers, short type) 129 { 130 uint64_t off, len; 131 132 bzero(fl, sizeof (*fl)); 133 off = nl->l_offset; 134 len = nl->l_len; 135 136 if (vers < NLM4_VERS) { 137 if (off > MAX_UOFF32 || len > MAX_UOFF32) 138 return (EINVAL); 139 if (off + len > MAX_UOFF32 + 1) 140 return (EINVAL); 141 } else { 142 /* 143 * Check range for 64-bit client (no overflow). 144 * Again allow len == ~0 to mean lock to EOF. 145 */ 146 if (len == MAX_U_OFFSET_T) 147 len = 0; 148 if (len != 0 && off + (len - 1) < off) 149 return (EINVAL); 150 } 151 152 fl->l_type = type; 153 fl->l_whence = SEEK_SET; 154 fl->l_start = off; 155 fl->l_len = len; 156 fl->l_sysid = host->nh_sysid; 157 fl->l_pid = nl->svid; 158 /* l_pad */ 159 160 return (0); 161 } 162 163 /* 164 * Convert an fhandle into a vnode. 165 * Uses the file id (fh_len + fh_data) in the fhandle to get the vnode. 166 * WARNING: users of this routine must do a VN_RELE on the vnode when they 167 * are done with it. 168 * This is just like nfs_fhtovp() but without the exportinfo argument. 169 */ 170 static vnode_t * 171 lm_fhtovp(fhandle3_t *fh) 172 { 173 vfs_t *vfsp; 174 vnode_t *vp; 175 int error; 176 177 vfsp = getvfs(&fh->_fh3_fsid); 178 if (vfsp == NULL) 179 return (NULL); 180 181 /* LINTED E_BAD_PTR_CAST_ALIGN */ 182 error = VFS_VGET(vfsp, &vp, (fid_t *)&(fh->_fh3_len)); 183 VFS_RELE(vfsp); 184 if (error || vp == NULL) 185 return (NULL); 186 187 return (vp); 188 } 189 190 /* 191 * Gets vnode from client's filehandle 192 * NOTE: Holds vnode, it _must_ be explicitly 193 * released by VN_RELE(). 194 */ 195 static vnode_t * 196 nlm_fh_to_vp(struct netobj *fh) 197 { 198 fhandle3_t *fhp; 199 200 /* 201 * Get a vnode pointer for the given NFS file handle. 202 * Note that it could be an NFSv2 or NFSv3 handle, 203 * which means the size might vary. (don't copy) 204 */ 205 if (fh->n_len < sizeof (fhandle_t)) 206 return (NULL); 207 208 /* We know this is aligned (kmem_alloc) */ 209 /* LINTED E_BAD_PTR_CAST_ALIGN */ 210 fhp = (fhandle3_t *)fh->n_bytes; 211 212 /* 213 * See the comment for NFS_FH3MAXDATA in uts/common/nfs/nfs.h for 214 * converting fhandles. Check the NFSv3 file handle size. The lockmgr 215 * is not used for NFS v4. 216 */ 217 if (fhp->_fh3_len > NFS_FH3MAXDATA || fhp->_fh3_len == 0) 218 return (NULL); 219 220 return (lm_fhtovp(fhp)); 221 } 222 223 /* 224 * Get vhold from client's filehandle, but in contrast to 225 * The function tries to check some access rights as well. 226 * 227 * NOTE: vhold object _must_ be explicitly released by 228 * nlm_vhold_release(). 229 */ 230 static struct nlm_vhold * 231 nlm_fh_to_vhold(struct nlm_host *hostp, struct netobj *fh) 232 { 233 vnode_t *vp; 234 struct nlm_vhold *nvp; 235 236 vp = nlm_fh_to_vp(fh); 237 if (vp == NULL) 238 return (NULL); 239 240 241 nvp = nlm_vhold_get(hostp, vp); 242 243 /* 244 * Both nlm_fh_to_vp() and nlm_vhold_get() 245 * do VN_HOLD(), so we need to drop one 246 * reference on vnode. 247 */ 248 VN_RELE(vp); 249 return (nvp); 250 } 251 252 /* ******************************************************************* */ 253 254 /* 255 * NLM implementation details, called from the RPC svc code. 256 */ 257 258 /* 259 * Call-back from NFS statd, used to notify that one of our 260 * hosts had a status change. The host can be either an 261 * NFS client, NFS server or both. 262 * According to NSM protocol description, the state is a 263 * number that is increases monotonically each time the 264 * state of host changes. An even number indicates that 265 * the host is down, while an odd number indicates that 266 * the host is up. 267 * 268 * Here we ignore this even/odd difference of status number 269 * reported by the NSM, we launch notification handlers 270 * every time the state is changed. The reason we why do so 271 * is that client and server can talk to each other using 272 * connectionless transport and it's easy to lose packet 273 * containing NSM notification with status number update. 274 * 275 * In nlm_host_monitor(), we put the sysid in the private data 276 * that statd carries in this callback, so we can easliy find 277 * the host this call applies to. 278 */ 279 /* ARGSUSED */ 280 void 281 nlm_do_notify1(nlm_sm_status *argp, void *res, struct svc_req *sr) 282 { 283 struct nlm_globals *g; 284 struct nlm_host *host; 285 uint16_t sysid; 286 287 g = zone_getspecific(nlm_zone_key, curzone); 288 bcopy(&argp->priv, &sysid, sizeof (sysid)); 289 290 DTRACE_PROBE2(nsm__notify, uint16_t, sysid, 291 int, argp->state); 292 293 host = nlm_host_find_by_sysid(g, (sysid_t)sysid); 294 if (host == NULL) 295 return; 296 297 nlm_host_notify_server(host, argp->state); 298 nlm_host_notify_client(host, argp->state); 299 nlm_host_release(g, host); 300 } 301 302 /* 303 * Another available call-back for NFS statd. 304 * Not currently used. 305 */ 306 /* ARGSUSED */ 307 void 308 nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr) 309 { 310 ASSERT(0); 311 } 312 313 314 /* 315 * NLM_TEST, NLM_TEST_MSG, 316 * NLM4_TEST, NLM4_TEST_MSG, 317 * Client inquiry about locks, non-blocking. 318 * 319 * Arg cb is NULL for NLM_TEST, NLM4_TEST, and 320 * non-NULL for NLM_TEST_MSG, NLM4_TEST_MSG 321 * The MSG forms use the cb to send the reply, 322 * and don't return a reply for this call. 323 */ 324 void 325 nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp, 326 struct svc_req *sr, nlm_testres_cb cb) 327 { 328 struct nlm_globals *g; 329 struct nlm_host *host; 330 struct nlm4_holder *lh; 331 struct nlm_owner_handle *oh; 332 nlm_rpc_t *rpcp = NULL; 333 vnode_t *vp = NULL; 334 struct netbuf *addr; 335 char *netid; 336 char *name; 337 int error; 338 struct flock64 fl; 339 340 nlm_copy_netobj(&resp->cookie, &argp->cookie); 341 342 name = argp->alock.caller_name; 343 netid = svc_getnetid(sr->rq_xprt); 344 addr = svc_getrpccaller(sr->rq_xprt); 345 346 g = zone_getspecific(nlm_zone_key, curzone); 347 host = nlm_host_findcreate(g, name, netid, addr); 348 if (host == NULL) { 349 resp->stat.stat = nlm4_denied_nolocks; 350 return; 351 } 352 if (cb != NULL) { 353 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 354 if (error != 0) { 355 resp->stat.stat = nlm4_denied_nolocks; 356 goto out; 357 } 358 } 359 360 vp = nlm_fh_to_vp(&argp->alock.fh); 361 if (vp == NULL) { 362 resp->stat.stat = nlm4_stale_fh; 363 goto out; 364 } 365 366 if (NLM_IN_GRACE(g)) { 367 resp->stat.stat = nlm4_denied_grace_period; 368 goto out; 369 } 370 371 /* Convert to local form. */ 372 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, 373 (argp->exclusive) ? F_WRLCK : F_RDLCK); 374 if (error) { 375 resp->stat.stat = nlm4_failed; 376 goto out; 377 } 378 379 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_GETLK, &fl, F_REMOTE); */ 380 error = nlm_vop_frlock(vp, F_GETLK, &fl, 381 F_REMOTELOCK | FREAD | FWRITE, 382 (u_offset_t)0, NULL, CRED(), NULL); 383 if (error) { 384 resp->stat.stat = nlm4_failed; 385 goto out; 386 } 387 388 if (fl.l_type == F_UNLCK) { 389 resp->stat.stat = nlm4_granted; 390 goto out; 391 } 392 resp->stat.stat = nlm4_denied; 393 394 /* 395 * This lock "test" fails due to a conflicting lock. 396 * 397 * If this is a v1 client, make sure the conflicting 398 * lock range we report can be expressed with 32-bit 399 * offsets. The lock range requested was expressed 400 * as 32-bit offset and length, so at least part of 401 * the conflicting lock should lie below MAX_UOFF32. 402 * If the conflicting lock extends past that, we'll 403 * trim the range to end at MAX_UOFF32 so this lock 404 * can be represented in a 32-bit response. Check 405 * the start also (paranoid, but a low cost check). 406 */ 407 if (sr->rq_vers < NLM4_VERS) { 408 uint64 maxlen; 409 if (fl.l_start > MAX_UOFF32) 410 fl.l_start = MAX_UOFF32; 411 maxlen = MAX_UOFF32 + 1 - fl.l_start; 412 if (fl.l_len > maxlen) 413 fl.l_len = maxlen; 414 } 415 416 /* 417 * Build the nlm4_holder result structure. 418 * 419 * Note that lh->oh is freed via xdr_free, 420 * xdr_nlm4_holder, xdr_netobj, xdr_bytes. 421 */ 422 oh = kmem_zalloc(sizeof (*oh), KM_SLEEP); 423 oh->oh_sysid = (sysid_t)fl.l_sysid; 424 lh = &resp->stat.nlm4_testrply_u.holder; 425 lh->exclusive = (fl.l_type == F_WRLCK); 426 lh->svid = fl.l_pid; 427 lh->oh.n_len = sizeof (*oh); 428 lh->oh.n_bytes = (void *)oh; 429 lh->l_offset = fl.l_start; 430 lh->l_len = fl.l_len; 431 432 out: 433 /* 434 * If we have a callback function, use that to 435 * deliver the response via another RPC call. 436 */ 437 if (cb != NULL && rpcp != NULL) 438 NLM_INVOKE_CALLBACK("test", rpcp, resp, cb); 439 440 if (vp != NULL) 441 VN_RELE(vp); 442 if (rpcp != NULL) 443 nlm_host_rele_rpc(host, rpcp); 444 445 nlm_host_release(g, host); 446 } 447 448 /* 449 * NLM_LOCK, NLM_LOCK_MSG, NLM_NM_LOCK 450 * NLM4_LOCK, NLM4_LOCK_MSG, NLM4_NM_LOCK 451 * 452 * Client request to set a lock, possibly blocking. 453 * 454 * If the lock needs to block, we return status blocked to 455 * this RPC call, and then later call back the client with 456 * a "granted" callback. Tricky aspects of this include: 457 * sending a reply before this function returns, and then 458 * borrowing this thread from the RPC service pool for the 459 * wait on the lock and doing the later granted callback. 460 * 461 * We also have to keep a list of locks (pending + granted) 462 * both to handle retransmitted requests, and to keep the 463 * vnodes for those locks active. 464 * 465 * Callback arguments: 466 * reply_cb Used to send a normal RPC reply just as if 467 * we had filled in a response for our caller. 468 * Needed because we do work after the reply. 469 * res_cb Used for the MSG calls, where there's no 470 * regular RPC response. 471 * grant_cb Used to CALL the client informing them of a 472 * granted lock after a "blocked" reply. 473 */ 474 void 475 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr, 476 nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_granted_cb grant_cb) 477 { 478 struct nlm_globals *g; 479 struct flock64 fl; 480 struct nlm_host *host = NULL; 481 struct netbuf *addr; 482 struct nlm_vhold *nvp = NULL; 483 nlm_rpc_t *rpcp = NULL; 484 char *netid; 485 char *name; 486 int error, flags; 487 bool_t do_blocking = FALSE; 488 bool_t do_mon_req = FALSE; 489 enum nlm4_stats status; 490 491 nlm_copy_netobj(&resp->cookie, &argp->cookie); 492 493 name = argp->alock.caller_name; 494 netid = svc_getnetid(sr->rq_xprt); 495 addr = svc_getrpccaller(sr->rq_xprt); 496 497 g = zone_getspecific(nlm_zone_key, curzone); 498 host = nlm_host_findcreate(g, name, netid, addr); 499 if (host == NULL) { 500 DTRACE_PROBE4(no__host, struct nlm_globals *, g, 501 char *, name, char *, netid, struct netbuf *, addr); 502 status = nlm4_denied_nolocks; 503 goto doreply; 504 } 505 506 DTRACE_PROBE3(start, struct nlm_globals *, g, 507 struct nlm_host *, host, nlm4_lockargs *, argp); 508 509 /* 510 * If this is a MSG call (NLM_LOCK_MSG, NLM4_LOCK_MSG) 511 * we'll have res_cb != NULL, and we know we'll need an 512 * RPC client handle _now_ so we can send the response. 513 * If we can't get an rpc handle (rpcp) then we have 514 * no way to respond, and the client will time out. 515 */ 516 if (res_cb != NULL) { 517 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 518 if (error != 0) { 519 ASSERT(rpcp == NULL); 520 status = nlm4_denied_nolocks; 521 goto out; 522 } 523 } 524 525 /* 526 * During the "grace period", only allow reclaim. 527 */ 528 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) { 529 status = nlm4_denied_grace_period; 530 goto doreply; 531 } 532 533 /* 534 * Check whether we missed host shutdown event 535 */ 536 if (nlm_host_get_state(host) != argp->state) 537 nlm_host_notify_server(host, argp->state); 538 539 /* 540 * Get a hold on the vnode for a lock operation. 541 * Only lock() and share() need vhold objects. 542 */ 543 nvp = nlm_fh_to_vhold(host, &argp->alock.fh); 544 if (nvp == NULL) { 545 status = nlm4_stale_fh; 546 goto doreply; 547 } 548 549 /* Convert to local form. */ 550 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, 551 (argp->exclusive) ? F_WRLCK : F_RDLCK); 552 if (error) { 553 status = nlm4_failed; 554 goto doreply; 555 } 556 557 /* 558 * Try to lock non-blocking first. If we succeed 559 * getting the lock, we can reply with the granted 560 * status directly and avoid the complications of 561 * making the "granted" RPC callback later. 562 * 563 * This also let's us find out now about some 564 * possible errors like EROFS, etc. 565 */ 566 flags = F_REMOTELOCK | FREAD | FWRITE; 567 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, flags, 568 (u_offset_t)0, NULL, CRED(), NULL); 569 570 DTRACE_PROBE3(setlk__res, struct flock64 *, &fl, 571 int, flags, int, error); 572 573 switch (error) { 574 case 0: 575 /* Got it without waiting! */ 576 status = nlm4_granted; 577 do_mon_req = TRUE; 578 break; 579 580 /* EINPROGRESS too? */ 581 case EAGAIN: 582 /* We did not get the lock. Should we block? */ 583 if (argp->block == FALSE || grant_cb == NULL) { 584 status = nlm4_denied; 585 break; 586 } 587 /* 588 * Should block. Try to reserve this thread 589 * so we can use it to wait for the lock and 590 * later send the granted message. If this 591 * reservation fails, say "no resources". 592 */ 593 if (!svc_reserve_thread(sr->rq_xprt)) { 594 status = nlm4_denied_nolocks; 595 break; 596 } 597 /* 598 * OK, can detach this thread, so this call 599 * will block below (after we reply). 600 * The "blocked" reply tells the client to 601 * expect a "granted" call-back later. 602 */ 603 status = nlm4_blocked; 604 do_blocking = TRUE; 605 do_mon_req = TRUE; 606 break; 607 608 case ENOLCK: 609 /* Failed for lack of resources. */ 610 status = nlm4_denied_nolocks; 611 break; 612 613 case EROFS: 614 /* read-only file system */ 615 status = nlm4_rofs; 616 break; 617 618 case EFBIG: 619 /* file too big */ 620 status = nlm4_fbig; 621 break; 622 623 case EDEADLK: 624 /* dead lock condition */ 625 status = nlm4_deadlck; 626 break; 627 628 default: 629 status = nlm4_denied; 630 break; 631 } 632 633 doreply: 634 resp->stat.stat = status; 635 636 /* 637 * We get one of two function pointers; one for a 638 * normal RPC reply, and another for doing an RPC 639 * "callback" _res reply for a _msg function. 640 * Use either of those to send the reply now. 641 * 642 * If sending this reply fails, just leave the 643 * lock in the list for retransmitted requests. 644 * Cleanup is via unlock or host rele (statmon). 645 */ 646 if (reply_cb != NULL) { 647 /* i.e. nlm_lock_1_reply */ 648 if (!(*reply_cb)(sr->rq_xprt, resp)) 649 svcerr_systemerr(sr->rq_xprt); 650 } 651 if (res_cb != NULL && rpcp != NULL) 652 NLM_INVOKE_CALLBACK("lock", rpcp, resp, res_cb); 653 654 /* 655 * The reply has been sent to the client. 656 * Start monitoring this client (maybe). 657 * 658 * Note that the non-monitored (NM) calls pass grant_cb=NULL 659 * indicating that the client doesn't support RPC callbacks. 660 * No monitoring for these (lame) clients. 661 */ 662 if (do_mon_req && grant_cb != NULL) 663 nlm_host_monitor(g, host, argp->state); 664 665 if (do_blocking) { 666 /* 667 * We need to block on this lock, and when that 668 * completes, do the granted RPC call. Note that 669 * we "reserved" this thread above, so we can now 670 * "detach" it from the RPC SVC pool, allowing it 671 * to block indefinitely if needed. 672 */ 673 ASSERT(grant_cb != NULL); 674 (void) svc_detach_thread(sr->rq_xprt); 675 nlm_block(argp, host, nvp, &fl, grant_cb, sr->rq_vers); 676 } 677 678 out: 679 DTRACE_PROBE3(lock__end, struct nlm_globals *, g, 680 struct nlm_host *, host, nlm4_res *, resp); 681 682 if (rpcp != NULL) 683 nlm_host_rele_rpc(host, rpcp); 684 685 nlm_vhold_release(host, nvp); 686 nlm_host_release(g, host); 687 } 688 689 /* 690 * Helper for nlm_do_lock(), partly for observability, 691 * (we'll see a call blocked in this function) and 692 * because nlm_do_lock() was getting quite long. 693 */ 694 static void 695 nlm_block(nlm4_lockargs *lockargs, 696 struct nlm_host *host, 697 struct nlm_vhold *nvp, 698 struct flock64 *flp, 699 nlm_granted_cb grant_cb, 700 rpcvers_t vers) 701 { 702 nlm4_testargs args; 703 nlm4_res res; 704 int error; 705 flk_callback_t flk_cb; 706 struct nlm_block_cb_data cb_data; 707 nlm_rpc_t *rpcp = NULL; 708 enum clnt_stat status; 709 710 /* 711 * Keep a list of blocked locks on nh_pending, and use it 712 * to cancel these threads in nlm_destroy_client_pending. 713 * 714 * Check to see if this lock is already in the list. If so, 715 * some earlier call is already blocked getting this lock, 716 * so there's nothing more this call needs to do. 717 */ 718 error = nlm_slreq_register(host, nvp, flp); 719 if (error != 0) { 720 /* 721 * Sleeping lock request with given fl is already 722 * registered by someone else. This means that 723 * some other thread is handling the request, let 724 * it do its work. 725 */ 726 ASSERT(error == EEXIST); 727 return; 728 } 729 730 /* 731 * Make sure we can get an RPC client handle we can use to 732 * deliver the "granted" callback if/when we get the lock. 733 * If we can't, there's no point blocking to get the lock 734 * for them because they'll never find out about it. 735 */ 736 error = nlm_host_get_rpc(host, vers, &rpcp); 737 if (error != 0) { 738 (void) nlm_slreq_unregister(host, nvp, flp); 739 return; 740 } 741 742 cb_data.hostp = host; 743 cb_data.nvp = nvp; 744 cb_data.flp = flp; 745 cb_data.registered = TRUE; 746 flk_init_callback(&flk_cb, nlm_block_callback, &cb_data); 747 748 /* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */ 749 error = nlm_vop_frlock(nvp->nv_vp, F_SETLKW, flp, 750 F_REMOTELOCK | FREAD | FWRITE, 751 (u_offset_t)0, &flk_cb, CRED(), NULL); 752 753 /* 754 * If the nlm_block_callback didn't already do it... 755 */ 756 if (cb_data.registered) 757 (void) nlm_slreq_unregister(host, nvp, flp); 758 759 if (error != 0) { 760 /* 761 * We failed getting the lock, but have no way to 762 * tell the client about that. Let 'em time out. 763 */ 764 return; 765 } 766 /* 767 * ... else we got the lock on behalf of this client. 768 * 769 * We MUST either tell the client about this lock 770 * (via the "granted" callback RPC) or unlock. 771 * 772 * Do the "granted" call-back to the client. 773 */ 774 bzero(&args, sizeof (args)); 775 args.cookie = lockargs->cookie; 776 args.exclusive = lockargs->exclusive; 777 args.alock = lockargs->alock; 778 bzero(&res, sizeof (res)); 779 780 /* 781 * Not using the NLM_INVOKE_CALLBACK() macro because 782 * we need to take actions on errors. 783 */ 784 status = (*grant_cb)(&args, &res, (rpcp)->nr_handle); 785 if (status != RPC_SUCCESS) { 786 struct rpc_err err; 787 788 CLNT_GETERR((rpcp)->nr_handle, &err); 789 NLM_ERR("NLM: %s callback failed: " 790 "stat %d, err %d\n", "grant", status, 791 err.re_errno); 792 res.stat.stat = nlm4_failed; 793 } 794 if (res.stat.stat != nlm4_granted) { 795 /* 796 * Failed to deliver the granted callback, so 797 * the client doesn't know about this lock. 798 * Unlock the lock. The client will time out. 799 */ 800 (void) nlm_vop_frlock(nvp->nv_vp, F_UNLCK, flp, 801 F_REMOTELOCK | FREAD | FWRITE, 802 (u_offset_t)0, NULL, CRED(), NULL); 803 } 804 xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res); 805 806 nlm_host_rele_rpc(host, rpcp); 807 } 808 809 /* 810 * The function that is used as flk callback when NLM server 811 * sets new sleeping lock. The function unregisters NLM 812 * sleeping lock request (nlm_slreq) associated with the 813 * sleeping lock _before_ lock becomes active. It prevents 814 * potential race condition between nlm_block() and 815 * nlm_do_cancel(). 816 */ 817 static callb_cpr_t * 818 nlm_block_callback(flk_cb_when_t when, void *data) 819 { 820 struct nlm_block_cb_data *cb_data; 821 822 cb_data = (struct nlm_block_cb_data *)data; 823 if (when == FLK_AFTER_SLEEP) { 824 (void) nlm_slreq_unregister(cb_data->hostp, 825 cb_data->nvp, cb_data->flp); 826 cb_data->registered = FALSE; 827 } 828 829 return (0); 830 } 831 832 /* 833 * NLM_CANCEL, NLM_CANCEL_MSG, 834 * NLM4_CANCEL, NLM4_CANCEL_MSG, 835 * Client gives up waiting for a blocking lock. 836 */ 837 void 838 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *resp, 839 struct svc_req *sr, nlm_res_cb cb) 840 { 841 struct nlm_globals *g; 842 struct nlm_host *host; 843 struct netbuf *addr; 844 struct nlm_vhold *nvp = NULL; 845 nlm_rpc_t *rpcp = NULL; 846 char *netid; 847 char *name; 848 int error; 849 struct flock64 fl; 850 851 nlm_copy_netobj(&resp->cookie, &argp->cookie); 852 netid = svc_getnetid(sr->rq_xprt); 853 addr = svc_getrpccaller(sr->rq_xprt); 854 name = argp->alock.caller_name; 855 856 g = zone_getspecific(nlm_zone_key, curzone); 857 host = nlm_host_findcreate(g, name, netid, addr); 858 if (host == NULL) { 859 resp->stat.stat = nlm4_denied_nolocks; 860 return; 861 } 862 if (cb != NULL) { 863 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 864 if (error != 0) { 865 resp->stat.stat = nlm4_denied_nolocks; 866 goto out; 867 } 868 } 869 870 DTRACE_PROBE3(start, struct nlm_globals *, g, 871 struct nlm_host *, host, nlm4_cancargs *, argp); 872 873 if (NLM_IN_GRACE(g)) { 874 resp->stat.stat = nlm4_denied_grace_period; 875 goto out; 876 } 877 878 nvp = nlm_fh_to_vhold(host, &argp->alock.fh); 879 if (nvp == NULL) { 880 resp->stat.stat = nlm4_stale_fh; 881 goto out; 882 } 883 884 /* Convert to local form. */ 885 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, 886 (argp->exclusive) ? F_WRLCK : F_RDLCK); 887 if (error) { 888 resp->stat.stat = nlm4_failed; 889 goto out; 890 } 891 892 error = nlm_slreq_unregister(host, nvp, &fl); 893 if (error != 0) { 894 /* 895 * There's no sleeping lock request corresponding 896 * to the lock. Then requested sleeping lock 897 * doesn't exist. 898 */ 899 resp->stat.stat = nlm4_denied; 900 goto out; 901 } 902 903 fl.l_type = F_UNLCK; 904 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, 905 F_REMOTELOCK | FREAD | FWRITE, 906 (u_offset_t)0, NULL, CRED(), NULL); 907 908 resp->stat.stat = (error == 0) ? 909 nlm4_granted : nlm4_denied; 910 911 out: 912 /* 913 * If we have a callback function, use that to 914 * deliver the response via another RPC call. 915 */ 916 if (cb != NULL && rpcp != NULL) 917 NLM_INVOKE_CALLBACK("cancel", rpcp, resp, cb); 918 919 DTRACE_PROBE3(cancel__end, struct nlm_globals *, g, 920 struct nlm_host *, host, nlm4_res *, resp); 921 922 if (rpcp != NULL) 923 nlm_host_rele_rpc(host, rpcp); 924 925 nlm_vhold_release(host, nvp); 926 nlm_host_release(g, host); 927 } 928 929 /* 930 * NLM_UNLOCK, NLM_UNLOCK_MSG, 931 * NLM4_UNLOCK, NLM4_UNLOCK_MSG, 932 * Client removes one of their locks. 933 */ 934 void 935 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *resp, 936 struct svc_req *sr, nlm_res_cb cb) 937 { 938 struct nlm_globals *g; 939 struct nlm_host *host; 940 struct netbuf *addr; 941 nlm_rpc_t *rpcp = NULL; 942 vnode_t *vp = NULL; 943 char *netid; 944 char *name; 945 int error; 946 struct flock64 fl; 947 948 nlm_copy_netobj(&resp->cookie, &argp->cookie); 949 950 netid = svc_getnetid(sr->rq_xprt); 951 addr = svc_getrpccaller(sr->rq_xprt); 952 name = argp->alock.caller_name; 953 954 /* 955 * NLM_UNLOCK operation doesn't have an error code 956 * denoting that operation failed, so we always 957 * return nlm4_granted except when the server is 958 * in a grace period. 959 */ 960 resp->stat.stat = nlm4_granted; 961 962 g = zone_getspecific(nlm_zone_key, curzone); 963 host = nlm_host_findcreate(g, name, netid, addr); 964 if (host == NULL) 965 return; 966 967 if (cb != NULL) { 968 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 969 if (error != 0) 970 goto out; 971 } 972 973 DTRACE_PROBE3(start, struct nlm_globals *, g, 974 struct nlm_host *, host, nlm4_unlockargs *, argp); 975 976 if (NLM_IN_GRACE(g)) { 977 resp->stat.stat = nlm4_denied_grace_period; 978 goto out; 979 } 980 981 vp = nlm_fh_to_vp(&argp->alock.fh); 982 if (vp == NULL) 983 goto out; 984 985 /* Convert to local form. */ 986 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, F_UNLCK); 987 if (error) 988 goto out; 989 990 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_UNLCK, &fl, F_REMOTE); */ 991 error = nlm_vop_frlock(vp, F_SETLK, &fl, 992 F_REMOTELOCK | FREAD | FWRITE, 993 (u_offset_t)0, NULL, CRED(), NULL); 994 995 DTRACE_PROBE1(unlock__res, int, error); 996 out: 997 /* 998 * If we have a callback function, use that to 999 * deliver the response via another RPC call. 1000 */ 1001 if (cb != NULL && rpcp != NULL) 1002 NLM_INVOKE_CALLBACK("unlock", rpcp, resp, cb); 1003 1004 DTRACE_PROBE3(unlock__end, struct nlm_globals *, g, 1005 struct nlm_host *, host, nlm4_res *, resp); 1006 1007 if (vp != NULL) 1008 VN_RELE(vp); 1009 if (rpcp != NULL) 1010 nlm_host_rele_rpc(host, rpcp); 1011 1012 nlm_host_release(g, host); 1013 } 1014 1015 /* 1016 * NLM_GRANTED, NLM_GRANTED_MSG, 1017 * NLM4_GRANTED, NLM4_GRANTED_MSG, 1018 * 1019 * This service routine is special. It's the only one that's 1020 * really part of our NLM _client_ support, used by _servers_ 1021 * to "call back" when a blocking lock from this NLM client 1022 * is granted by the server. In this case, we _know_ there is 1023 * already an nlm_host allocated and held by the client code. 1024 * We want to find that nlm_host here. 1025 * 1026 * Over in nlm_call_lock(), the client encoded the sysid for this 1027 * server in the "owner handle" netbuf sent with our lock request. 1028 * We can now use that to find the nlm_host object we used there. 1029 * (NB: The owner handle is opaque to the server.) 1030 */ 1031 void 1032 nlm_do_granted(nlm4_testargs *argp, nlm4_res *resp, 1033 struct svc_req *sr, nlm_res_cb cb) 1034 { 1035 struct nlm_globals *g; 1036 struct nlm_owner_handle *oh; 1037 struct nlm_host *host; 1038 nlm_rpc_t *rpcp = NULL; 1039 int error; 1040 1041 nlm_copy_netobj(&resp->cookie, &argp->cookie); 1042 resp->stat.stat = nlm4_denied; 1043 1044 g = zone_getspecific(nlm_zone_key, curzone); 1045 oh = (void *) argp->alock.oh.n_bytes; 1046 if (oh == NULL) 1047 return; 1048 1049 host = nlm_host_find_by_sysid(g, oh->oh_sysid); 1050 if (host == NULL) 1051 return; 1052 1053 if (cb != NULL) { 1054 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 1055 if (error != 0) 1056 goto out; 1057 } 1058 1059 if (NLM_IN_GRACE(g)) { 1060 resp->stat.stat = nlm4_denied_grace_period; 1061 goto out; 1062 } 1063 1064 error = nlm_slock_grant(g, host, &argp->alock); 1065 if (error == 0) 1066 resp->stat.stat = nlm4_granted; 1067 1068 out: 1069 /* 1070 * If we have a callback function, use that to 1071 * deliver the response via another RPC call. 1072 */ 1073 if (cb != NULL && rpcp != NULL) 1074 NLM_INVOKE_CALLBACK("do_granted", rpcp, resp, cb); 1075 1076 if (rpcp != NULL) 1077 nlm_host_rele_rpc(host, rpcp); 1078 1079 nlm_host_release(g, host); 1080 } 1081 1082 /* 1083 * NLM_FREE_ALL, NLM4_FREE_ALL 1084 * 1085 * Destroy all lock state for the calling client. 1086 */ 1087 void 1088 nlm_do_free_all(nlm4_notify *argp, void *res, struct svc_req *sr) 1089 { 1090 struct nlm_globals *g; 1091 struct nlm_host_list host_list; 1092 struct nlm_host *hostp; 1093 1094 TAILQ_INIT(&host_list); 1095 g = zone_getspecific(nlm_zone_key, curzone); 1096 1097 /* Serialize calls to clean locks. */ 1098 mutex_enter(&g->clean_lock); 1099 1100 /* 1101 * Find all hosts that have the given node name and put them on a 1102 * local list. 1103 */ 1104 mutex_enter(&g->lock); 1105 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL; 1106 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) { 1107 if (strcasecmp(hostp->nh_name, argp->name) == 0) { 1108 /* 1109 * If needed take the host out of the idle list since 1110 * we are taking a reference. 1111 */ 1112 if (hostp->nh_flags & NLM_NH_INIDLE) { 1113 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, 1114 nh_link); 1115 hostp->nh_flags &= ~NLM_NH_INIDLE; 1116 } 1117 hostp->nh_refs++; 1118 1119 TAILQ_INSERT_TAIL(&host_list, hostp, nh_link); 1120 } 1121 } 1122 mutex_exit(&g->lock); 1123 1124 /* Free locks for all hosts on the local list. */ 1125 while (!TAILQ_EMPTY(&host_list)) { 1126 hostp = TAILQ_FIRST(&host_list); 1127 TAILQ_REMOVE(&host_list, hostp, nh_link); 1128 1129 /* 1130 * Note that this does not do client-side cleanup. 1131 * We want to do that ONLY if statd tells us the 1132 * server has restarted. 1133 */ 1134 nlm_host_notify_server(hostp, argp->state); 1135 nlm_host_release(g, hostp); 1136 } 1137 1138 mutex_exit(&g->clean_lock); 1139 1140 (void) res; 1141 (void) sr; 1142 } 1143 1144 static void 1145 nlm_init_shrlock(struct shrlock *shr, 1146 nlm4_share *nshare, struct nlm_host *host) 1147 { 1148 1149 switch (nshare->access) { 1150 default: 1151 case fsa_NONE: 1152 shr->s_access = 0; 1153 break; 1154 case fsa_R: 1155 shr->s_access = F_RDACC; 1156 break; 1157 case fsa_W: 1158 shr->s_access = F_WRACC; 1159 break; 1160 case fsa_RW: 1161 shr->s_access = F_RWACC; 1162 break; 1163 } 1164 1165 switch (nshare->mode) { 1166 default: 1167 case fsm_DN: 1168 shr->s_deny = F_NODNY; 1169 break; 1170 case fsm_DR: 1171 shr->s_deny = F_RDDNY; 1172 break; 1173 case fsm_DW: 1174 shr->s_deny = F_WRDNY; 1175 break; 1176 case fsm_DRW: 1177 shr->s_deny = F_RWDNY; 1178 break; 1179 } 1180 1181 shr->s_sysid = host->nh_sysid; 1182 shr->s_pid = 0; 1183 shr->s_own_len = nshare->oh.n_len; 1184 shr->s_owner = nshare->oh.n_bytes; 1185 } 1186 1187 /* 1188 * NLM_SHARE, NLM4_SHARE 1189 * 1190 * Request a DOS-style share reservation 1191 */ 1192 void 1193 nlm_do_share(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr) 1194 { 1195 struct nlm_globals *g; 1196 struct nlm_host *host; 1197 struct netbuf *addr; 1198 struct nlm_vhold *nvp = NULL; 1199 char *netid; 1200 char *name; 1201 int error; 1202 struct shrlock shr; 1203 1204 nlm_copy_netobj(&resp->cookie, &argp->cookie); 1205 1206 name = argp->share.caller_name; 1207 netid = svc_getnetid(sr->rq_xprt); 1208 addr = svc_getrpccaller(sr->rq_xprt); 1209 1210 g = zone_getspecific(nlm_zone_key, curzone); 1211 host = nlm_host_findcreate(g, name, netid, addr); 1212 if (host == NULL) { 1213 resp->stat = nlm4_denied_nolocks; 1214 return; 1215 } 1216 1217 DTRACE_PROBE3(share__start, struct nlm_globals *, g, 1218 struct nlm_host *, host, nlm4_shareargs *, argp); 1219 1220 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) { 1221 resp->stat = nlm4_denied_grace_period; 1222 goto out; 1223 } 1224 1225 /* 1226 * Get holded vnode when on lock operation. 1227 * Only lock() and share() need vhold objects. 1228 */ 1229 nvp = nlm_fh_to_vhold(host, &argp->share.fh); 1230 if (nvp == NULL) { 1231 resp->stat = nlm4_stale_fh; 1232 goto out; 1233 } 1234 1235 /* Convert to local form. */ 1236 nlm_init_shrlock(&shr, &argp->share, host); 1237 error = VOP_SHRLOCK(nvp->nv_vp, F_SHARE, &shr, 1238 FREAD | FWRITE, CRED(), NULL); 1239 1240 if (error == 0) { 1241 resp->stat = nlm4_granted; 1242 nlm_host_monitor(g, host, 0); 1243 } else { 1244 resp->stat = nlm4_denied; 1245 } 1246 1247 out: 1248 DTRACE_PROBE3(share__end, struct nlm_globals *, g, 1249 struct nlm_host *, host, nlm4_shareres *, resp); 1250 1251 nlm_vhold_release(host, nvp); 1252 nlm_host_release(g, host); 1253 } 1254 1255 /* 1256 * NLM_UNSHARE, NLM4_UNSHARE 1257 * 1258 * Release a DOS-style share reservation 1259 */ 1260 void 1261 nlm_do_unshare(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr) 1262 { 1263 struct nlm_globals *g; 1264 struct nlm_host *host; 1265 struct netbuf *addr; 1266 vnode_t *vp = NULL; 1267 char *netid; 1268 int error; 1269 struct shrlock shr; 1270 1271 nlm_copy_netobj(&resp->cookie, &argp->cookie); 1272 1273 netid = svc_getnetid(sr->rq_xprt); 1274 addr = svc_getrpccaller(sr->rq_xprt); 1275 1276 g = zone_getspecific(nlm_zone_key, curzone); 1277 host = nlm_host_find(g, netid, addr); 1278 if (host == NULL) { 1279 resp->stat = nlm4_denied_nolocks; 1280 return; 1281 } 1282 1283 DTRACE_PROBE3(unshare__start, struct nlm_globals *, g, 1284 struct nlm_host *, host, nlm4_shareargs *, argp); 1285 1286 if (NLM_IN_GRACE(g)) { 1287 resp->stat = nlm4_denied_grace_period; 1288 goto out; 1289 } 1290 1291 vp = nlm_fh_to_vp(&argp->share.fh); 1292 if (vp == NULL) { 1293 resp->stat = nlm4_stale_fh; 1294 goto out; 1295 } 1296 1297 /* Convert to local form. */ 1298 nlm_init_shrlock(&shr, &argp->share, host); 1299 error = VOP_SHRLOCK(vp, F_UNSHARE, &shr, 1300 FREAD | FWRITE, CRED(), NULL); 1301 1302 (void) error; 1303 resp->stat = nlm4_granted; 1304 1305 out: 1306 DTRACE_PROBE3(unshare__end, struct nlm_globals *, g, 1307 struct nlm_host *, host, nlm4_shareres *, resp); 1308 1309 if (vp != NULL) 1310 VN_RELE(vp); 1311 1312 nlm_host_release(g, host); 1313 } 1314 1315 /* 1316 * NLM wrapper to VOP_FRLOCK that checks the validity of the lock before 1317 * invoking the vnode operation. 1318 */ 1319 static int 1320 nlm_vop_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 1321 struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct) 1322 { 1323 if (bfp->l_len != 0 && bfp->l_start + (bfp->l_len - 1) < bfp->l_start) { 1324 return (EOVERFLOW); 1325 } 1326 1327 return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 1328 } 1329