1 /* 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Copyright (c) 2012 by Delphix. All rights reserved. 30 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 31 * Copyright 2014 Joyent, Inc. All rights reserved. 32 */ 33 34 /* 35 * NFS Lock Manager service functions (nlm_do_...) 36 * Called from nlm_rpc_svc.c wrappers. 37 * 38 * Source code derived from FreeBSD nlm_prot_impl.c 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/thread.h> 44 #include <sys/fcntl.h> 45 #include <sys/flock.h> 46 #include <sys/mount.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/share.h> 50 #include <sys/socket.h> 51 #include <sys/syscall.h> 52 #include <sys/syslog.h> 53 #include <sys/systm.h> 54 #include <sys/taskq.h> 55 #include <sys/unistd.h> 56 #include <sys/vnode.h> 57 #include <sys/vfs.h> 58 #include <sys/queue.h> 59 #include <sys/sdt.h> 60 #include <netinet/in.h> 61 62 #include <rpc/rpc.h> 63 #include <rpc/xdr.h> 64 #include <rpc/pmap_prot.h> 65 #include <rpc/pmap_clnt.h> 66 #include <rpc/rpcb_prot.h> 67 68 #include <rpcsvc/nlm_prot.h> 69 #include <rpcsvc/sm_inter.h> 70 71 #include <nfs/nfs.h> 72 #include <nfs/nfs_clnt.h> 73 #include <nfs/export.h> 74 #include <nfs/rnode.h> 75 76 #include "nlm_impl.h" 77 78 #define NLM_IN_GRACE(g) (ddi_get_lbolt() < (g)->grace_threshold) 79 80 struct nlm_block_cb_data { 81 struct nlm_host *hostp; 82 struct nlm_vhold *nvp; 83 struct flock64 *flp; 84 }; 85 86 /* 87 * Invoke an asyncronous RPC callbeck 88 * (used when NLM server needs to reply to MSG NLM procedure). 89 */ 90 #define NLM_INVOKE_CALLBACK(descr, rpcp, resp, callb) \ 91 do { \ 92 enum clnt_stat _stat; \ 93 \ 94 _stat = (*(callb))(resp, NULL, (rpcp)->nr_handle); \ 95 if (_stat != RPC_SUCCESS && _stat != RPC_TIMEDOUT) { \ 96 struct rpc_err _err; \ 97 \ 98 CLNT_GETERR((rpcp)->nr_handle, &_err); \ 99 NLM_ERR("NLM: %s callback failed: " \ 100 "stat %d, err %d\n", descr, _stat, \ 101 _err.re_errno); \ 102 } \ 103 \ 104 _NOTE(CONSTCOND) } while (0) 105 106 static void nlm_block( 107 nlm4_lockargs *lockargs, 108 struct nlm_host *host, 109 struct nlm_vhold *nvp, 110 nlm_rpc_t *rpcp, 111 struct flock64 *fl, 112 nlm_testargs_cb grant_cb); 113 114 static vnode_t *nlm_fh_to_vp(struct netobj *); 115 static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *); 116 static void nlm_init_shrlock(struct shrlock *, nlm4_share *, struct nlm_host *); 117 static callb_cpr_t *nlm_block_callback(flk_cb_when_t, void *); 118 static int nlm_vop_frlock(vnode_t *, int, flock64_t *, int, offset_t, 119 struct flk_callback *, cred_t *, caller_context_t *); 120 121 /* 122 * Convert a lock from network to local form, and 123 * check for valid range (no overflow). 124 */ 125 static int 126 nlm_init_flock(struct flock64 *fl, struct nlm4_lock *nl, 127 struct nlm_host *host, rpcvers_t vers, short type) 128 { 129 uint64_t off, len; 130 131 bzero(fl, sizeof (*fl)); 132 off = nl->l_offset; 133 len = nl->l_len; 134 135 if (vers < NLM4_VERS) { 136 if (off > MAX_UOFF32 || len > MAX_UOFF32) 137 return (EINVAL); 138 if (off + len > MAX_UOFF32 + 1) 139 return (EINVAL); 140 } else { 141 /* 142 * Check range for 64-bit client (no overflow). 143 * Again allow len == ~0 to mean lock to EOF. 144 */ 145 if (len == MAX_U_OFFSET_T) 146 len = 0; 147 if (len != 0 && off + (len - 1) < off) 148 return (EINVAL); 149 } 150 151 fl->l_type = type; 152 fl->l_whence = SEEK_SET; 153 fl->l_start = off; 154 fl->l_len = len; 155 fl->l_sysid = host->nh_sysid; 156 fl->l_pid = nl->svid; 157 /* l_pad */ 158 159 return (0); 160 } 161 162 /* 163 * Convert an fhandle into a vnode. 164 * Uses the file id (fh_len + fh_data) in the fhandle to get the vnode. 165 * WARNING: users of this routine must do a VN_RELE on the vnode when they 166 * are done with it. 167 * This is just like nfs_fhtovp() but without the exportinfo argument. 168 */ 169 static vnode_t * 170 lm_fhtovp(fhandle3_t *fh) 171 { 172 vfs_t *vfsp; 173 vnode_t *vp; 174 int error; 175 176 vfsp = getvfs(&fh->_fh3_fsid); 177 if (vfsp == NULL) 178 return (NULL); 179 180 /* LINTED E_BAD_PTR_CAST_ALIGN */ 181 error = VFS_VGET(vfsp, &vp, (fid_t *)&(fh->_fh3_len)); 182 VFS_RELE(vfsp); 183 if (error || vp == NULL) 184 return (NULL); 185 186 return (vp); 187 } 188 189 /* 190 * Gets vnode from client's filehandle 191 * NOTE: Holds vnode, it _must_ be explicitly 192 * released by VN_RELE(). 193 */ 194 static vnode_t * 195 nlm_fh_to_vp(struct netobj *fh) 196 { 197 fhandle3_t *fhp; 198 199 /* 200 * Get a vnode pointer for the given NFS file handle. 201 * Note that it could be an NFSv2 or NFSv3 handle, 202 * which means the size might vary. (don't copy) 203 */ 204 if (fh->n_len < sizeof (fhandle_t)) 205 return (NULL); 206 207 /* We know this is aligned (kmem_alloc) */ 208 /* LINTED E_BAD_PTR_CAST_ALIGN */ 209 fhp = (fhandle3_t *)fh->n_bytes; 210 211 /* 212 * See the comment for NFS_FH3MAXDATA in uts/common/nfs/nfs.h for 213 * converting fhandles. Check the NFSv3 file handle size. The lockmgr 214 * is not used for NFS v4. 215 */ 216 if (fhp->_fh3_len > NFS_FH3MAXDATA || fhp->_fh3_len == 0) 217 return (NULL); 218 219 return (lm_fhtovp(fhp)); 220 } 221 222 /* 223 * Get vhold from client's filehandle, but in contrast to 224 * The function tries to check some access rights as well. 225 * 226 * NOTE: vhold object _must_ be explicitly released by 227 * nlm_vhold_release(). 228 */ 229 static struct nlm_vhold * 230 nlm_fh_to_vhold(struct nlm_host *hostp, struct netobj *fh) 231 { 232 vnode_t *vp; 233 struct nlm_vhold *nvp; 234 235 vp = nlm_fh_to_vp(fh); 236 if (vp == NULL) 237 return (NULL); 238 239 240 nvp = nlm_vhold_get(hostp, vp); 241 242 /* 243 * Both nlm_fh_to_vp() and nlm_vhold_get() 244 * do VN_HOLD(), so we need to drop one 245 * reference on vnode. 246 */ 247 VN_RELE(vp); 248 return (nvp); 249 } 250 251 /* ******************************************************************* */ 252 253 /* 254 * NLM implementation details, called from the RPC svc code. 255 */ 256 257 /* 258 * Call-back from NFS statd, used to notify that one of our 259 * hosts had a status change. The host can be either an 260 * NFS client, NFS server or both. 261 * According to NSM protocol description, the state is a 262 * number that is increases monotonically each time the 263 * state of host changes. An even number indicates that 264 * the host is down, while an odd number indicates that 265 * the host is up. 266 * 267 * Here we ignore this even/odd difference of status number 268 * reported by the NSM, we launch notification handlers 269 * every time the state is changed. The reason we why do so 270 * is that client and server can talk to each other using 271 * connectionless transport and it's easy to lose packet 272 * containing NSM notification with status number update. 273 * 274 * In nlm_host_monitor(), we put the sysid in the private data 275 * that statd carries in this callback, so we can easliy find 276 * the host this call applies to. 277 */ 278 /* ARGSUSED */ 279 void 280 nlm_do_notify1(nlm_sm_status *argp, void *res, struct svc_req *sr) 281 { 282 struct nlm_globals *g; 283 struct nlm_host *host; 284 uint16_t sysid; 285 286 g = zone_getspecific(nlm_zone_key, curzone); 287 bcopy(&argp->priv, &sysid, sizeof (sysid)); 288 289 DTRACE_PROBE2(nsm__notify, uint16_t, sysid, 290 int, argp->state); 291 292 host = nlm_host_find_by_sysid(g, (sysid_t)sysid); 293 if (host == NULL) 294 return; 295 296 nlm_host_notify_server(host, argp->state); 297 nlm_host_notify_client(host, argp->state); 298 nlm_host_release(g, host); 299 } 300 301 /* 302 * Another available call-back for NFS statd. 303 * Not currently used. 304 */ 305 /* ARGSUSED */ 306 void 307 nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr) 308 { 309 ASSERT(0); 310 } 311 312 313 /* 314 * NLM_TEST, NLM_TEST_MSG, 315 * NLM4_TEST, NLM4_TEST_MSG, 316 * Client inquiry about locks, non-blocking. 317 */ 318 void 319 nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp, 320 struct svc_req *sr, nlm_testres_cb cb) 321 { 322 struct nlm_globals *g; 323 struct nlm_host *host; 324 struct nlm4_holder *lh; 325 struct nlm_owner_handle *oh; 326 nlm_rpc_t *rpcp = NULL; 327 vnode_t *vp = NULL; 328 struct netbuf *addr; 329 char *netid; 330 char *name; 331 int error; 332 struct flock64 fl; 333 334 nlm_copy_netobj(&resp->cookie, &argp->cookie); 335 336 name = argp->alock.caller_name; 337 netid = svc_getnetid(sr->rq_xprt); 338 addr = svc_getrpccaller(sr->rq_xprt); 339 340 g = zone_getspecific(nlm_zone_key, curzone); 341 host = nlm_host_findcreate(g, name, netid, addr); 342 if (host == NULL) { 343 resp->stat.stat = nlm4_denied_nolocks; 344 return; 345 } 346 if (cb != NULL) { 347 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 348 if (error != 0) { 349 resp->stat.stat = nlm4_denied_nolocks; 350 goto out; 351 } 352 } 353 354 vp = nlm_fh_to_vp(&argp->alock.fh); 355 if (vp == NULL) { 356 resp->stat.stat = nlm4_stale_fh; 357 goto out; 358 } 359 360 if (NLM_IN_GRACE(g)) { 361 resp->stat.stat = nlm4_denied_grace_period; 362 goto out; 363 } 364 365 /* Convert to local form. */ 366 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, 367 (argp->exclusive) ? F_WRLCK : F_RDLCK); 368 if (error) { 369 resp->stat.stat = nlm4_failed; 370 goto out; 371 } 372 373 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_GETLK, &fl, F_REMOTE); */ 374 error = nlm_vop_frlock(vp, F_GETLK, &fl, 375 F_REMOTELOCK | FREAD | FWRITE, 376 (u_offset_t)0, NULL, CRED(), NULL); 377 if (error) { 378 resp->stat.stat = nlm4_failed; 379 goto out; 380 } 381 382 if (fl.l_type == F_UNLCK) { 383 resp->stat.stat = nlm4_granted; 384 goto out; 385 } 386 resp->stat.stat = nlm4_denied; 387 388 /* 389 * This lock "test" fails due to a conflicting lock. 390 * 391 * If this is a v1 client, make sure the conflicting 392 * lock range we report can be expressed with 32-bit 393 * offsets. The lock range requested was expressed 394 * as 32-bit offset and length, so at least part of 395 * the conflicting lock should lie below MAX_UOFF32. 396 * If the conflicting lock extends past that, we'll 397 * trim the range to end at MAX_UOFF32 so this lock 398 * can be represented in a 32-bit response. Check 399 * the start also (paranoid, but a low cost check). 400 */ 401 if (sr->rq_vers < NLM4_VERS) { 402 uint64 maxlen; 403 if (fl.l_start > MAX_UOFF32) 404 fl.l_start = MAX_UOFF32; 405 maxlen = MAX_UOFF32 + 1 - fl.l_start; 406 if (fl.l_len > maxlen) 407 fl.l_len = maxlen; 408 } 409 410 /* 411 * Build the nlm4_holder result structure. 412 * 413 * Note that lh->oh is freed via xdr_free, 414 * xdr_nlm4_holder, xdr_netobj, xdr_bytes. 415 */ 416 oh = kmem_zalloc(sizeof (*oh), KM_SLEEP); 417 oh->oh_sysid = (sysid_t)fl.l_sysid; 418 lh = &resp->stat.nlm4_testrply_u.holder; 419 lh->exclusive = (fl.l_type == F_WRLCK); 420 lh->svid = fl.l_pid; 421 lh->oh.n_len = sizeof (*oh); 422 lh->oh.n_bytes = (void *)oh; 423 lh->l_offset = fl.l_start; 424 lh->l_len = fl.l_len; 425 426 out: 427 /* 428 * If we have a callback function, use that to 429 * deliver the response via another RPC call. 430 */ 431 if (cb != NULL && rpcp != NULL) 432 NLM_INVOKE_CALLBACK("test", rpcp, resp, cb); 433 434 if (vp != NULL) 435 VN_RELE(vp); 436 if (rpcp != NULL) 437 nlm_host_rele_rpc(host, rpcp); 438 439 nlm_host_release(g, host); 440 } 441 442 /* 443 * NLM_LOCK, NLM_LOCK_MSG, NLM_NM_LOCK 444 * NLM4_LOCK, NLM4_LOCK_MSG, NLM4_NM_LOCK 445 * 446 * Client request to set a lock, possibly blocking. 447 * 448 * If the lock needs to block, we return status blocked to 449 * this RPC call, and then later call back the client with 450 * a "granted" callback. Tricky aspects of this include: 451 * sending a reply before this function returns, and then 452 * borrowing this thread from the RPC service pool for the 453 * wait on the lock and doing the later granted callback. 454 * 455 * We also have to keep a list of locks (pending + granted) 456 * both to handle retransmitted requests, and to keep the 457 * vnodes for those locks active. 458 */ 459 void 460 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr, 461 nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_testargs_cb grant_cb) 462 { 463 struct nlm_globals *g; 464 struct flock64 fl; 465 struct nlm_host *host = NULL; 466 struct netbuf *addr; 467 struct nlm_vhold *nvp = NULL; 468 nlm_rpc_t *rpcp = NULL; 469 char *netid; 470 char *name; 471 int error, flags; 472 bool_t do_blocking = FALSE; 473 bool_t do_mon_req = FALSE; 474 enum nlm4_stats status; 475 476 nlm_copy_netobj(&resp->cookie, &argp->cookie); 477 478 name = argp->alock.caller_name; 479 netid = svc_getnetid(sr->rq_xprt); 480 addr = svc_getrpccaller(sr->rq_xprt); 481 482 g = zone_getspecific(nlm_zone_key, curzone); 483 host = nlm_host_findcreate(g, name, netid, addr); 484 if (host == NULL) { 485 DTRACE_PROBE4(no__host, struct nlm_globals *, g, 486 char *, name, char *, netid, struct netbuf *, addr); 487 status = nlm4_denied_nolocks; 488 goto doreply; 489 } 490 491 DTRACE_PROBE3(start, struct nlm_globals *, g, 492 struct nlm_host *, host, nlm4_lockargs *, argp); 493 494 /* 495 * If we may need to do _msg_ call needing an RPC 496 * callback, get the RPC client handle now, 497 * so we know if we can bind to the NLM service on 498 * this client. 499 * 500 * Note: host object carries transport type. 501 * One client using multiple transports gets 502 * separate sysids for each of its transports. 503 */ 504 if (res_cb != NULL || (grant_cb != NULL && argp->block == TRUE)) { 505 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 506 if (error != 0) { 507 status = nlm4_denied_nolocks; 508 goto doreply; 509 } 510 } 511 512 /* 513 * During the "grace period", only allow reclaim. 514 */ 515 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) { 516 status = nlm4_denied_grace_period; 517 goto doreply; 518 } 519 520 /* 521 * Check whether we missed host shutdown event 522 */ 523 if (nlm_host_get_state(host) != argp->state) 524 nlm_host_notify_server(host, argp->state); 525 526 /* 527 * Get a hold on the vnode for a lock operation. 528 * Only lock() and share() need vhold objects. 529 */ 530 nvp = nlm_fh_to_vhold(host, &argp->alock.fh); 531 if (nvp == NULL) { 532 status = nlm4_stale_fh; 533 goto doreply; 534 } 535 536 /* Convert to local form. */ 537 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, 538 (argp->exclusive) ? F_WRLCK : F_RDLCK); 539 if (error) { 540 status = nlm4_failed; 541 goto doreply; 542 } 543 544 /* 545 * Try to lock non-blocking first. If we succeed 546 * getting the lock, we can reply with the granted 547 * status directly and avoid the complications of 548 * making the "granted" RPC callback later. 549 * 550 * This also let's us find out now about some 551 * possible errors like EROFS, etc. 552 */ 553 flags = F_REMOTELOCK | FREAD | FWRITE; 554 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, flags, 555 (u_offset_t)0, NULL, CRED(), NULL); 556 557 DTRACE_PROBE3(setlk__res, struct flock64 *, &fl, 558 int, flags, int, error); 559 560 switch (error) { 561 case 0: 562 /* Got it without waiting! */ 563 status = nlm4_granted; 564 do_mon_req = TRUE; 565 break; 566 567 /* EINPROGRESS too? */ 568 case EAGAIN: 569 /* We did not get the lock. Should we block? */ 570 if (argp->block == FALSE || grant_cb == NULL) { 571 status = nlm4_denied; 572 break; 573 } 574 /* 575 * Should block. Try to reserve this thread 576 * so we can use it to wait for the lock and 577 * later send the granted message. If this 578 * reservation fails, say "no resources". 579 */ 580 if (!svc_reserve_thread(sr->rq_xprt)) { 581 status = nlm4_denied_nolocks; 582 break; 583 } 584 /* 585 * OK, can detach this thread, so this call 586 * will block below (after we reply). 587 */ 588 status = nlm4_blocked; 589 do_blocking = TRUE; 590 do_mon_req = TRUE; 591 break; 592 593 case ENOLCK: 594 /* Failed for lack of resources. */ 595 status = nlm4_denied_nolocks; 596 break; 597 598 case EROFS: 599 /* read-only file system */ 600 status = nlm4_rofs; 601 break; 602 603 case EFBIG: 604 /* file too big */ 605 status = nlm4_fbig; 606 break; 607 608 case EDEADLK: 609 /* dead lock condition */ 610 status = nlm4_deadlck; 611 break; 612 613 default: 614 status = nlm4_denied; 615 break; 616 } 617 618 doreply: 619 resp->stat.stat = status; 620 621 /* 622 * We get one of two function pointers; one for a 623 * normal RPC reply, and another for doing an RPC 624 * "callback" _res reply for a _msg function. 625 * Use either of those to send the reply now. 626 * 627 * If sending this reply fails, just leave the 628 * lock in the list for retransmitted requests. 629 * Cleanup is via unlock or host rele (statmon). 630 */ 631 if (reply_cb != NULL) { 632 /* i.e. nlm_lock_1_reply */ 633 if (!(*reply_cb)(sr->rq_xprt, resp)) 634 svcerr_systemerr(sr->rq_xprt); 635 } 636 if (res_cb != NULL && rpcp != NULL) 637 NLM_INVOKE_CALLBACK("lock", rpcp, resp, res_cb); 638 639 /* 640 * The reply has been sent to the client. 641 * Start monitoring this client (maybe). 642 * 643 * Note that the non-monitored (NM) calls pass grant_cb=NULL 644 * indicating that the client doesn't support RPC callbacks. 645 * No monitoring for these (lame) clients. 646 */ 647 if (do_mon_req && grant_cb != NULL) 648 nlm_host_monitor(g, host, argp->state); 649 650 if (do_blocking) { 651 /* 652 * We need to block on this lock, and when that 653 * completes, do the granted RPC call. Note that 654 * we "reserved" this thread above, so we can now 655 * "detach" it from the RPC SVC pool, allowing it 656 * to block indefinitely if needed. 657 */ 658 ASSERT(rpcp != NULL); 659 (void) svc_detach_thread(sr->rq_xprt); 660 nlm_block(argp, host, nvp, rpcp, &fl, grant_cb); 661 } 662 663 DTRACE_PROBE3(lock__end, struct nlm_globals *, g, 664 struct nlm_host *, host, nlm4_res *, resp); 665 666 if (rpcp != NULL) 667 nlm_host_rele_rpc(host, rpcp); 668 669 nlm_vhold_release(host, nvp); 670 nlm_host_release(g, host); 671 } 672 673 /* 674 * Helper for nlm_do_lock(), partly for observability, 675 * (we'll see a call blocked in this function) and 676 * because nlm_do_lock() was getting quite long. 677 */ 678 static void 679 nlm_block(nlm4_lockargs *lockargs, 680 struct nlm_host *host, 681 struct nlm_vhold *nvp, 682 nlm_rpc_t *rpcp, 683 struct flock64 *flp, 684 nlm_testargs_cb grant_cb) 685 { 686 nlm4_testargs args; 687 int error; 688 flk_callback_t flk_cb; 689 struct nlm_block_cb_data cb_data; 690 691 /* 692 * Keep a list of blocked locks on nh_pending, and use it 693 * to cancel these threads in nlm_destroy_client_pending. 694 * 695 * Check to see if this lock is already in the list 696 * and if not, add an entry for it. Allocate first, 697 * then if we don't insert, free the new one. 698 * Caller already has vp held. 699 */ 700 701 error = nlm_slreq_register(host, nvp, flp); 702 if (error != 0) { 703 /* 704 * Sleeping lock request with given fl is already 705 * registered by someone else. This means that 706 * some other thread is handling the request, let 707 * him to do its work. 708 */ 709 ASSERT(error == EEXIST); 710 return; 711 } 712 713 cb_data.hostp = host; 714 cb_data.nvp = nvp; 715 cb_data.flp = flp; 716 flk_init_callback(&flk_cb, nlm_block_callback, &cb_data); 717 718 /* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */ 719 error = nlm_vop_frlock(nvp->nv_vp, F_SETLKW, flp, 720 F_REMOTELOCK | FREAD | FWRITE, 721 (u_offset_t)0, &flk_cb, CRED(), NULL); 722 723 if (error != 0) { 724 /* 725 * We failed getting the lock, but have no way to 726 * tell the client about that. Let 'em time out. 727 */ 728 (void) nlm_slreq_unregister(host, nvp, flp); 729 return; 730 } 731 732 /* 733 * Do the "granted" call-back to the client. 734 */ 735 args.cookie = lockargs->cookie; 736 args.exclusive = lockargs->exclusive; 737 args.alock = lockargs->alock; 738 739 NLM_INVOKE_CALLBACK("grant", rpcp, &args, grant_cb); 740 } 741 742 /* 743 * The function that is used as flk callback when NLM server 744 * sets new sleeping lock. The function unregisters NLM 745 * sleeping lock request (nlm_slreq) associated with the 746 * sleeping lock _before_ lock becomes active. It prevents 747 * potential race condition between nlm_block() and 748 * nlm_do_cancel(). 749 */ 750 static callb_cpr_t * 751 nlm_block_callback(flk_cb_when_t when, void *data) 752 { 753 struct nlm_block_cb_data *cb_data; 754 755 cb_data = (struct nlm_block_cb_data *)data; 756 if (when == FLK_AFTER_SLEEP) { 757 (void) nlm_slreq_unregister(cb_data->hostp, 758 cb_data->nvp, cb_data->flp); 759 } 760 761 return (0); 762 } 763 764 /* 765 * NLM_CANCEL, NLM_CANCEL_MSG, 766 * NLM4_CANCEL, NLM4_CANCEL_MSG, 767 * Client gives up waiting for a blocking lock. 768 */ 769 void 770 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *resp, 771 struct svc_req *sr, nlm_res_cb cb) 772 { 773 struct nlm_globals *g; 774 struct nlm_host *host; 775 struct netbuf *addr; 776 struct nlm_vhold *nvp = NULL; 777 nlm_rpc_t *rpcp = NULL; 778 char *netid; 779 char *name; 780 int error; 781 struct flock64 fl; 782 783 nlm_copy_netobj(&resp->cookie, &argp->cookie); 784 netid = svc_getnetid(sr->rq_xprt); 785 addr = svc_getrpccaller(sr->rq_xprt); 786 name = argp->alock.caller_name; 787 788 g = zone_getspecific(nlm_zone_key, curzone); 789 host = nlm_host_findcreate(g, name, netid, addr); 790 if (host == NULL) { 791 resp->stat.stat = nlm4_denied_nolocks; 792 return; 793 } 794 if (cb != NULL) { 795 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 796 if (error != 0) { 797 resp->stat.stat = nlm4_denied_nolocks; 798 return; 799 } 800 } 801 802 DTRACE_PROBE3(start, struct nlm_globals *, g, 803 struct nlm_host *, host, nlm4_cancargs *, argp); 804 805 if (NLM_IN_GRACE(g)) { 806 resp->stat.stat = nlm4_denied_grace_period; 807 goto out; 808 } 809 810 nvp = nlm_fh_to_vhold(host, &argp->alock.fh); 811 if (nvp == NULL) { 812 resp->stat.stat = nlm4_stale_fh; 813 goto out; 814 } 815 816 /* Convert to local form. */ 817 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, 818 (argp->exclusive) ? F_WRLCK : F_RDLCK); 819 if (error) { 820 resp->stat.stat = nlm4_failed; 821 goto out; 822 } 823 824 error = nlm_slreq_unregister(host, nvp, &fl); 825 if (error != 0) { 826 /* 827 * There's no sleeping lock request corresponding 828 * to the lock. Then requested sleeping lock 829 * doesn't exist. 830 */ 831 resp->stat.stat = nlm4_denied; 832 goto out; 833 } 834 835 fl.l_type = F_UNLCK; 836 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, 837 F_REMOTELOCK | FREAD | FWRITE, 838 (u_offset_t)0, NULL, CRED(), NULL); 839 840 resp->stat.stat = (error == 0) ? 841 nlm4_granted : nlm4_denied; 842 843 out: 844 /* 845 * If we have a callback function, use that to 846 * deliver the response via another RPC call. 847 */ 848 if (cb != NULL && rpcp != NULL) 849 NLM_INVOKE_CALLBACK("cancel", rpcp, resp, cb); 850 851 DTRACE_PROBE3(cancel__end, struct nlm_globals *, g, 852 struct nlm_host *, host, nlm4_res *, resp); 853 854 if (rpcp != NULL) 855 nlm_host_rele_rpc(host, rpcp); 856 857 nlm_vhold_release(host, nvp); 858 nlm_host_release(g, host); 859 } 860 861 /* 862 * NLM_UNLOCK, NLM_UNLOCK_MSG, 863 * NLM4_UNLOCK, NLM4_UNLOCK_MSG, 864 * Client removes one of their locks. 865 */ 866 void 867 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *resp, 868 struct svc_req *sr, nlm_res_cb cb) 869 { 870 struct nlm_globals *g; 871 struct nlm_host *host; 872 struct netbuf *addr; 873 nlm_rpc_t *rpcp = NULL; 874 vnode_t *vp = NULL; 875 char *netid; 876 char *name; 877 int error; 878 struct flock64 fl; 879 880 nlm_copy_netobj(&resp->cookie, &argp->cookie); 881 882 netid = svc_getnetid(sr->rq_xprt); 883 addr = svc_getrpccaller(sr->rq_xprt); 884 name = argp->alock.caller_name; 885 886 /* 887 * NLM_UNLOCK operation doesn't have an error code 888 * denoting that operation failed, so we always 889 * return nlm4_granted except when the server is 890 * in a grace period. 891 */ 892 resp->stat.stat = nlm4_granted; 893 894 g = zone_getspecific(nlm_zone_key, curzone); 895 host = nlm_host_findcreate(g, name, netid, addr); 896 if (host == NULL) 897 return; 898 899 if (cb != NULL) { 900 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 901 if (error != 0) 902 goto out; 903 } 904 905 DTRACE_PROBE3(start, struct nlm_globals *, g, 906 struct nlm_host *, host, nlm4_unlockargs *, argp); 907 908 if (NLM_IN_GRACE(g)) { 909 resp->stat.stat = nlm4_denied_grace_period; 910 goto out; 911 } 912 913 vp = nlm_fh_to_vp(&argp->alock.fh); 914 if (vp == NULL) 915 goto out; 916 917 /* Convert to local form. */ 918 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, F_UNLCK); 919 if (error) 920 goto out; 921 922 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_UNLCK, &fl, F_REMOTE); */ 923 error = nlm_vop_frlock(vp, F_SETLK, &fl, 924 F_REMOTELOCK | FREAD | FWRITE, 925 (u_offset_t)0, NULL, CRED(), NULL); 926 927 DTRACE_PROBE1(unlock__res, int, error); 928 out: 929 /* 930 * If we have a callback function, use that to 931 * deliver the response via another RPC call. 932 */ 933 if (cb != NULL && rpcp != NULL) 934 NLM_INVOKE_CALLBACK("unlock", rpcp, resp, cb); 935 936 DTRACE_PROBE3(unlock__end, struct nlm_globals *, g, 937 struct nlm_host *, host, nlm4_res *, resp); 938 939 if (vp != NULL) 940 VN_RELE(vp); 941 if (rpcp != NULL) 942 nlm_host_rele_rpc(host, rpcp); 943 944 nlm_host_release(g, host); 945 } 946 947 /* 948 * NLM_GRANTED, NLM_GRANTED_MSG, 949 * NLM4_GRANTED, NLM4_GRANTED_MSG, 950 * 951 * This service routine is special. It's the only one that's 952 * really part of our NLM _client_ support, used by _servers_ 953 * to "call back" when a blocking lock from this NLM client 954 * is granted by the server. In this case, we _know_ there is 955 * already an nlm_host allocated and held by the client code. 956 * We want to find that nlm_host here. 957 * 958 * Over in nlm_call_lock(), the client encoded the sysid for this 959 * server in the "owner handle" netbuf sent with our lock request. 960 * We can now use that to find the nlm_host object we used there. 961 * (NB: The owner handle is opaque to the server.) 962 */ 963 void 964 nlm_do_granted(nlm4_testargs *argp, nlm4_res *resp, 965 struct svc_req *sr, nlm_res_cb cb) 966 { 967 struct nlm_globals *g; 968 struct nlm_owner_handle *oh; 969 struct nlm_host *host; 970 nlm_rpc_t *rpcp = NULL; 971 int error; 972 973 nlm_copy_netobj(&resp->cookie, &argp->cookie); 974 resp->stat.stat = nlm4_denied; 975 976 g = zone_getspecific(nlm_zone_key, curzone); 977 oh = (void *) argp->alock.oh.n_bytes; 978 if (oh == NULL) 979 return; 980 981 host = nlm_host_find_by_sysid(g, oh->oh_sysid); 982 if (host == NULL) 983 return; 984 985 if (cb != NULL) { 986 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); 987 if (error != 0) 988 goto out; 989 } 990 991 if (NLM_IN_GRACE(g)) { 992 resp->stat.stat = nlm4_denied_grace_period; 993 goto out; 994 } 995 996 error = nlm_slock_grant(g, host, &argp->alock); 997 if (error == 0) 998 resp->stat.stat = nlm4_granted; 999 1000 out: 1001 /* 1002 * If we have a callback function, use that to 1003 * deliver the response via another RPC call. 1004 */ 1005 if (cb != NULL && rpcp != NULL) 1006 NLM_INVOKE_CALLBACK("do_granted", rpcp, resp, cb); 1007 1008 if (rpcp != NULL) 1009 nlm_host_rele_rpc(host, rpcp); 1010 1011 nlm_host_release(g, host); 1012 } 1013 1014 /* 1015 * NLM_FREE_ALL, NLM4_FREE_ALL 1016 * 1017 * Destroy all lock state for the calling client. 1018 */ 1019 void 1020 nlm_do_free_all(nlm4_notify *argp, void *res, struct svc_req *sr) 1021 { 1022 struct nlm_globals *g; 1023 struct nlm_host_list host_list; 1024 struct nlm_host *hostp; 1025 1026 TAILQ_INIT(&host_list); 1027 g = zone_getspecific(nlm_zone_key, curzone); 1028 1029 /* Serialize calls to clean locks. */ 1030 mutex_enter(&g->clean_lock); 1031 1032 /* 1033 * Find all hosts that have the given node name and put them on a 1034 * local list. 1035 */ 1036 mutex_enter(&g->lock); 1037 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL; 1038 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) { 1039 if (strcasecmp(hostp->nh_name, argp->name) == 0) { 1040 /* 1041 * If needed take the host out of the idle list since 1042 * we are taking a reference. 1043 */ 1044 if (hostp->nh_flags & NLM_NH_INIDLE) { 1045 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, 1046 nh_link); 1047 hostp->nh_flags &= ~NLM_NH_INIDLE; 1048 } 1049 hostp->nh_refs++; 1050 1051 TAILQ_INSERT_TAIL(&host_list, hostp, nh_link); 1052 } 1053 } 1054 mutex_exit(&g->lock); 1055 1056 /* Free locks for all hosts on the local list. */ 1057 while (!TAILQ_EMPTY(&host_list)) { 1058 hostp = TAILQ_FIRST(&host_list); 1059 TAILQ_REMOVE(&host_list, hostp, nh_link); 1060 1061 /* 1062 * Note that this does not do client-side cleanup. 1063 * We want to do that ONLY if statd tells us the 1064 * server has restarted. 1065 */ 1066 nlm_host_notify_server(hostp, argp->state); 1067 nlm_host_release(g, hostp); 1068 } 1069 1070 mutex_exit(&g->clean_lock); 1071 1072 (void) res; 1073 (void) sr; 1074 } 1075 1076 static void 1077 nlm_init_shrlock(struct shrlock *shr, 1078 nlm4_share *nshare, struct nlm_host *host) 1079 { 1080 1081 switch (nshare->access) { 1082 default: 1083 case fsa_NONE: 1084 shr->s_access = 0; 1085 break; 1086 case fsa_R: 1087 shr->s_access = F_RDACC; 1088 break; 1089 case fsa_W: 1090 shr->s_access = F_WRACC; 1091 break; 1092 case fsa_RW: 1093 shr->s_access = F_RWACC; 1094 break; 1095 } 1096 1097 switch (nshare->mode) { 1098 default: 1099 case fsm_DN: 1100 shr->s_deny = F_NODNY; 1101 break; 1102 case fsm_DR: 1103 shr->s_deny = F_RDDNY; 1104 break; 1105 case fsm_DW: 1106 shr->s_deny = F_WRDNY; 1107 break; 1108 case fsm_DRW: 1109 shr->s_deny = F_RWDNY; 1110 break; 1111 } 1112 1113 shr->s_sysid = host->nh_sysid; 1114 shr->s_pid = 0; 1115 shr->s_own_len = nshare->oh.n_len; 1116 shr->s_owner = nshare->oh.n_bytes; 1117 } 1118 1119 /* 1120 * NLM_SHARE, NLM4_SHARE 1121 * 1122 * Request a DOS-style share reservation 1123 */ 1124 void 1125 nlm_do_share(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr) 1126 { 1127 struct nlm_globals *g; 1128 struct nlm_host *host; 1129 struct netbuf *addr; 1130 struct nlm_vhold *nvp = NULL; 1131 char *netid; 1132 char *name; 1133 int error; 1134 struct shrlock shr; 1135 1136 nlm_copy_netobj(&resp->cookie, &argp->cookie); 1137 1138 name = argp->share.caller_name; 1139 netid = svc_getnetid(sr->rq_xprt); 1140 addr = svc_getrpccaller(sr->rq_xprt); 1141 1142 g = zone_getspecific(nlm_zone_key, curzone); 1143 host = nlm_host_findcreate(g, name, netid, addr); 1144 if (host == NULL) { 1145 resp->stat = nlm4_denied_nolocks; 1146 return; 1147 } 1148 1149 DTRACE_PROBE3(share__start, struct nlm_globals *, g, 1150 struct nlm_host *, host, nlm4_shareargs *, argp); 1151 1152 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) { 1153 resp->stat = nlm4_denied_grace_period; 1154 goto out; 1155 } 1156 1157 /* 1158 * Get holded vnode when on lock operation. 1159 * Only lock() and share() need vhold objects. 1160 */ 1161 nvp = nlm_fh_to_vhold(host, &argp->share.fh); 1162 if (nvp == NULL) { 1163 resp->stat = nlm4_stale_fh; 1164 goto out; 1165 } 1166 1167 /* Convert to local form. */ 1168 nlm_init_shrlock(&shr, &argp->share, host); 1169 error = VOP_SHRLOCK(nvp->nv_vp, F_SHARE, &shr, 1170 FREAD | FWRITE, CRED(), NULL); 1171 1172 if (error == 0) { 1173 resp->stat = nlm4_granted; 1174 nlm_host_monitor(g, host, 0); 1175 } else { 1176 resp->stat = nlm4_denied; 1177 } 1178 1179 out: 1180 DTRACE_PROBE3(share__end, struct nlm_globals *, g, 1181 struct nlm_host *, host, nlm4_shareres *, resp); 1182 1183 nlm_vhold_release(host, nvp); 1184 nlm_host_release(g, host); 1185 } 1186 1187 /* 1188 * NLM_UNSHARE, NLM4_UNSHARE 1189 * 1190 * Release a DOS-style share reservation 1191 */ 1192 void 1193 nlm_do_unshare(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr) 1194 { 1195 struct nlm_globals *g; 1196 struct nlm_host *host; 1197 struct netbuf *addr; 1198 vnode_t *vp = NULL; 1199 char *netid; 1200 int error; 1201 struct shrlock shr; 1202 1203 nlm_copy_netobj(&resp->cookie, &argp->cookie); 1204 1205 netid = svc_getnetid(sr->rq_xprt); 1206 addr = svc_getrpccaller(sr->rq_xprt); 1207 1208 g = zone_getspecific(nlm_zone_key, curzone); 1209 host = nlm_host_find(g, netid, addr); 1210 if (host == NULL) { 1211 resp->stat = nlm4_denied_nolocks; 1212 return; 1213 } 1214 1215 DTRACE_PROBE3(unshare__start, struct nlm_globals *, g, 1216 struct nlm_host *, host, nlm4_shareargs *, argp); 1217 1218 if (NLM_IN_GRACE(g)) { 1219 resp->stat = nlm4_denied_grace_period; 1220 goto out; 1221 } 1222 1223 vp = nlm_fh_to_vp(&argp->share.fh); 1224 if (vp == NULL) { 1225 resp->stat = nlm4_stale_fh; 1226 goto out; 1227 } 1228 1229 /* Convert to local form. */ 1230 nlm_init_shrlock(&shr, &argp->share, host); 1231 error = VOP_SHRLOCK(vp, F_UNSHARE, &shr, 1232 FREAD | FWRITE, CRED(), NULL); 1233 1234 (void) error; 1235 resp->stat = nlm4_granted; 1236 1237 out: 1238 DTRACE_PROBE3(unshare__end, struct nlm_globals *, g, 1239 struct nlm_host *, host, nlm4_shareres *, resp); 1240 1241 if (vp != NULL) 1242 VN_RELE(vp); 1243 1244 nlm_host_release(g, host); 1245 } 1246 1247 /* 1248 * NLM wrapper to VOP_FRLOCK that checks the validity of the lock before 1249 * invoking the vnode operation. 1250 */ 1251 static int 1252 nlm_vop_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 1253 struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct) 1254 { 1255 if (bfp->l_len != 0 && bfp->l_start + (bfp->l_len - 1) < bfp->l_start) { 1256 return (EOVERFLOW); 1257 } 1258 1259 return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 1260 } 1261