1 /* 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 30 * Copyright (c) 2012 by Delphix. All rights reserved. 31 */ 32 33 /* 34 * NFS LockManager, start/stop, support functions, etc. 35 * Most of the interesting code is here. 36 * 37 * Source code derived from FreeBSD nlm_prot_impl.c 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/thread.h> 43 #include <sys/fcntl.h> 44 #include <sys/flock.h> 45 #include <sys/mount.h> 46 #include <sys/priv.h> 47 #include <sys/proc.h> 48 #include <sys/share.h> 49 #include <sys/socket.h> 50 #include <sys/syscall.h> 51 #include <sys/syslog.h> 52 #include <sys/systm.h> 53 #include <sys/class.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 #include <sys/vfs.h> 57 #include <sys/queue.h> 58 #include <sys/bitmap.h> 59 #include <sys/sdt.h> 60 #include <netinet/in.h> 61 62 #include <rpc/rpc.h> 63 #include <rpc/xdr.h> 64 #include <rpc/pmap_prot.h> 65 #include <rpc/pmap_clnt.h> 66 #include <rpc/rpcb_prot.h> 67 68 #include <rpcsvc/nlm_prot.h> 69 #include <rpcsvc/sm_inter.h> 70 #include <rpcsvc/nsm_addr.h> 71 72 #include <nfs/nfs.h> 73 #include <nfs/nfs_clnt.h> 74 #include <nfs/export.h> 75 #include <nfs/rnode.h> 76 #include <nfs/lm.h> 77 78 #include "nlm_impl.h" 79 80 struct nlm_knc { 81 struct knetconfig n_knc; 82 const char *n_netid; 83 }; 84 85 /* 86 * Number of attempts NLM tries to obtain RPC binding 87 * of local statd. 88 */ 89 #define NLM_NSM_RPCBIND_RETRIES 10 90 91 /* 92 * Timeout (in seconds) NLM waits before making another 93 * attempt to obtain RPC binding of local statd. 94 */ 95 #define NLM_NSM_RPCBIND_TIMEOUT 5 96 97 /* 98 * Total number of sysids in NLM sysid bitmap 99 */ 100 #define NLM_BMAP_NITEMS (LM_SYSID_MAX + 1) 101 102 /* 103 * Number of ulong_t words in bitmap that is used 104 * for allocation of sysid numbers. 105 */ 106 #define NLM_BMAP_WORDS (NLM_BMAP_NITEMS / BT_NBIPUL) 107 108 /* 109 * Given an integer x, the macro returns 110 * -1 if x is negative, 111 * 0 if x is zero 112 * 1 if x is positive 113 */ 114 #define SIGN(x) (((x) > 0) - ((x) < 0)) 115 116 #define ARRSIZE(arr) (sizeof (arr) / sizeof ((arr)[0])) 117 #define NLM_KNCS ARRSIZE(nlm_netconfigs) 118 119 krwlock_t lm_lck; 120 121 /* 122 * Zero timeout for asynchronous NLM RPC operations 123 */ 124 static const struct timeval nlm_rpctv_zero = { 0, 0 }; 125 126 /* 127 * List of all Zone globals nlm_globals instences 128 * linked together. 129 */ 130 static struct nlm_globals_list nlm_zones_list; /* (g) */ 131 132 /* 133 * NLM kmem caches 134 */ 135 static struct kmem_cache *nlm_hosts_cache = NULL; 136 static struct kmem_cache *nlm_vhold_cache = NULL; 137 138 /* 139 * A bitmap for allocation of new sysids. 140 * Sysid is a unique number between LM_SYSID 141 * and LM_SYSID_MAX. Sysid represents unique remote 142 * host that does file locks on the given host. 143 */ 144 static ulong_t nlm_sysid_bmap[NLM_BMAP_WORDS]; /* (g) */ 145 static int nlm_sysid_nidx; /* (g) */ 146 147 /* 148 * RPC service registration for all transports 149 */ 150 static SVC_CALLOUT nlm_svcs[] = { 151 { NLM_PROG, 4, 4, nlm_prog_4 }, /* NLM4_VERS */ 152 { NLM_PROG, 1, 3, nlm_prog_3 } /* NLM_VERS - NLM_VERSX */ 153 }; 154 155 static SVC_CALLOUT_TABLE nlm_sct = { 156 ARRSIZE(nlm_svcs), 157 FALSE, 158 nlm_svcs 159 }; 160 161 /* 162 * Static table of all netid/knetconfig network 163 * lock manager can work with. nlm_netconfigs table 164 * is used when we need to get valid knetconfig by 165 * netid and vice versa. 166 * 167 * Knetconfigs are activated either by the call from 168 * user-space lockd daemon (server side) or by taking 169 * knetconfig from NFS mountinfo (client side) 170 */ 171 static struct nlm_knc nlm_netconfigs[] = { /* (g) */ 172 /* UDP */ 173 { 174 { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV }, 175 "udp", 176 }, 177 /* TCP */ 178 { 179 { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV }, 180 "tcp", 181 }, 182 /* UDP over IPv6 */ 183 { 184 { NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV }, 185 "udp6", 186 }, 187 /* TCP over IPv6 */ 188 { 189 { NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV }, 190 "tcp6", 191 }, 192 /* ticlts (loopback over UDP) */ 193 { 194 { NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV }, 195 "ticlts", 196 }, 197 /* ticotsord (loopback over TCP) */ 198 { 199 { NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV }, 200 "ticotsord", 201 }, 202 }; 203 204 /* 205 * NLM misc. function 206 */ 207 static void nlm_copy_netbuf(struct netbuf *, struct netbuf *); 208 static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *); 209 static void nlm_kmem_reclaim(void *); 210 static void nlm_pool_shutdown(void); 211 static void nlm_suspend_zone(struct nlm_globals *); 212 static void nlm_resume_zone(struct nlm_globals *); 213 static void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *); 214 static void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *); 215 216 /* 217 * NLM thread functions 218 */ 219 static void nlm_gc(struct nlm_globals *); 220 static void nlm_reclaimer(struct nlm_host *); 221 222 /* 223 * NLM NSM functions 224 */ 225 static int nlm_init_local_knc(struct knetconfig *); 226 static int nlm_nsm_init_local(struct nlm_nsm *); 227 static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *); 228 static void nlm_nsm_fini(struct nlm_nsm *); 229 static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *); 230 static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *); 231 static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t); 232 static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *); 233 234 /* 235 * NLM host functions 236 */ 237 static int nlm_host_ctor(void *, void *, int); 238 static void nlm_host_dtor(void *, void *); 239 static void nlm_host_destroy(struct nlm_host *); 240 static struct nlm_host *nlm_host_create(char *, const char *, 241 struct knetconfig *, struct netbuf *); 242 static struct nlm_host *nlm_host_find_locked(struct nlm_globals *, 243 const char *, struct netbuf *, avl_index_t *); 244 static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *); 245 static void nlm_host_gc_vholds(struct nlm_host *); 246 static bool_t nlm_host_has_srv_locks(struct nlm_host *); 247 static bool_t nlm_host_has_cli_locks(struct nlm_host *); 248 static bool_t nlm_host_has_locks(struct nlm_host *); 249 250 /* 251 * NLM vhold functions 252 */ 253 static int nlm_vhold_ctor(void *, void *, int); 254 static void nlm_vhold_dtor(void *, void *); 255 static void nlm_vhold_destroy(struct nlm_host *, 256 struct nlm_vhold *); 257 static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *); 258 static void nlm_vhold_clean(struct nlm_vhold *, int); 259 260 /* 261 * NLM client/server sleeping locks/share reservation functions 262 */ 263 struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *, 264 struct nlm_vhold *, struct flock64 *); 265 static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *); 266 static void nlm_shres_destroy_item(struct nlm_shres *); 267 static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *); 268 269 /* 270 * NLM initialization functions. 271 */ 272 void 273 nlm_init(void) 274 { 275 nlm_hosts_cache = kmem_cache_create("nlm_host_cache", 276 sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor, 277 nlm_kmem_reclaim, NULL, NULL, 0); 278 279 nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache", 280 sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor, 281 NULL, NULL, NULL, 0); 282 283 nlm_rpc_init(); 284 TAILQ_INIT(&nlm_zones_list); 285 286 /* initialize sysids bitmap */ 287 bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap)); 288 nlm_sysid_nidx = 1; 289 290 /* 291 * Reserv the sysid #0, because it's associated 292 * with local locks only. Don't let to allocate 293 * it for remote locks. 294 */ 295 BT_SET(nlm_sysid_bmap, 0); 296 } 297 298 void 299 nlm_globals_register(struct nlm_globals *g) 300 { 301 rw_enter(&lm_lck, RW_WRITER); 302 TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link); 303 rw_exit(&lm_lck); 304 } 305 306 void 307 nlm_globals_unregister(struct nlm_globals *g) 308 { 309 rw_enter(&lm_lck, RW_WRITER); 310 TAILQ_REMOVE(&nlm_zones_list, g, nlm_link); 311 rw_exit(&lm_lck); 312 } 313 314 /* ARGSUSED */ 315 static void 316 nlm_kmem_reclaim(void *cdrarg) 317 { 318 struct nlm_globals *g; 319 320 rw_enter(&lm_lck, RW_READER); 321 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) 322 cv_broadcast(&g->nlm_gc_sched_cv); 323 324 rw_exit(&lm_lck); 325 } 326 327 /* 328 * NLM garbage collector thread (GC). 329 * 330 * NLM GC periodically checks whether there're any host objects 331 * that can be cleaned up. It also releases stale vnodes that 332 * live on the server side (under protection of vhold objects). 333 * 334 * NLM host objects are cleaned up from GC thread because 335 * operations helping us to determine whether given host has 336 * any locks can be quite expensive and it's not good to call 337 * them every time the very last reference to the host is dropped. 338 * Thus we use "lazy" approach for hosts cleanup. 339 * 340 * The work of GC is to release stale vnodes on the server side 341 * and destroy hosts that haven't any locks and any activity for 342 * some time (i.e. idle hosts). 343 */ 344 static void 345 nlm_gc(struct nlm_globals *g) 346 { 347 struct nlm_host *hostp; 348 clock_t now, idle_period; 349 350 idle_period = SEC_TO_TICK(g->cn_idle_tmo); 351 mutex_enter(&g->lock); 352 for (;;) { 353 /* 354 * GC thread can be explicitly scheduled from 355 * memory reclamation function. 356 */ 357 (void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock, 358 ddi_get_lbolt() + idle_period); 359 360 /* 361 * NLM is shutting down, time to die. 362 */ 363 if (g->run_status == NLM_ST_STOPPING) 364 break; 365 366 now = ddi_get_lbolt(); 367 DTRACE_PROBE2(gc__start, struct nlm_globals *, g, 368 clock_t, now); 369 370 /* 371 * Handle all hosts that are unused at the moment 372 * until we meet one with idle timeout in future. 373 */ 374 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) { 375 bool_t has_locks = FALSE; 376 377 if (hostp->nh_idle_timeout > now) 378 break; 379 380 /* 381 * Drop global lock while doing expensive work 382 * on this host. We'll re-check any conditions 383 * that might change after retaking the global 384 * lock. 385 */ 386 mutex_exit(&g->lock); 387 mutex_enter(&hostp->nh_lock); 388 389 /* 390 * nlm_globals lock was dropped earlier because 391 * garbage collecting of vholds and checking whether 392 * host has any locks/shares are expensive operations. 393 */ 394 nlm_host_gc_vholds(hostp); 395 has_locks = nlm_host_has_locks(hostp); 396 397 mutex_exit(&hostp->nh_lock); 398 mutex_enter(&g->lock); 399 400 /* 401 * While we were doing expensive operations outside of 402 * nlm_globals critical section, somebody could 403 * take the host, add lock/share to one of its vnodes 404 * and release the host back. If so, host's idle timeout 405 * is renewed and our information about locks on the 406 * given host is outdated. 407 */ 408 if (hostp->nh_idle_timeout > now) 409 continue; 410 411 /* 412 * If either host has locks or somebody has began to 413 * use it while we were outside the nlm_globals critical 414 * section. In both cases we have to renew host's 415 * timeout and put it to the end of LRU list. 416 */ 417 if (has_locks || hostp->nh_refs > 0) { 418 TAILQ_REMOVE(&g->nlm_idle_hosts, 419 hostp, nh_link); 420 hostp->nh_idle_timeout = now + idle_period; 421 TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, 422 hostp, nh_link); 423 continue; 424 } 425 426 /* 427 * We're here if all the following conditions hold: 428 * 1) Host hasn't any locks or share reservations 429 * 2) Host is unused 430 * 3) Host wasn't touched by anyone at least for 431 * g->cn_idle_tmo seconds. 432 * 433 * So, now we can destroy it. 434 */ 435 nlm_host_unregister(g, hostp); 436 mutex_exit(&g->lock); 437 438 nlm_host_unmonitor(g, hostp); 439 nlm_host_destroy(hostp); 440 mutex_enter(&g->lock); 441 if (g->run_status == NLM_ST_STOPPING) 442 break; 443 444 } 445 446 DTRACE_PROBE(gc__end); 447 } 448 449 DTRACE_PROBE1(gc__exit, struct nlm_globals *, g); 450 451 /* Let others know that GC has died */ 452 g->nlm_gc_thread = NULL; 453 mutex_exit(&g->lock); 454 455 cv_broadcast(&g->nlm_gc_finish_cv); 456 zthread_exit(); 457 } 458 459 /* 460 * Thread reclaim locks/shares acquired by the client side 461 * on the given server represented by hostp. 462 */ 463 static void 464 nlm_reclaimer(struct nlm_host *hostp) 465 { 466 struct nlm_globals *g; 467 468 mutex_enter(&hostp->nh_lock); 469 hostp->nh_reclaimer = curthread; 470 mutex_exit(&hostp->nh_lock); 471 472 g = zone_getspecific(nlm_zone_key, curzone); 473 nlm_reclaim_client(g, hostp); 474 475 mutex_enter(&hostp->nh_lock); 476 hostp->nh_flags &= ~NLM_NH_RECLAIM; 477 hostp->nh_reclaimer = NULL; 478 cv_broadcast(&hostp->nh_recl_cv); 479 mutex_exit(&hostp->nh_lock); 480 481 /* 482 * Host was explicitly referenced before 483 * nlm_reclaim() was called, release it 484 * here. 485 */ 486 nlm_host_release(g, hostp); 487 zthread_exit(); 488 } 489 490 /* 491 * Copy a struct netobj. (see xdr.h) 492 */ 493 void 494 nlm_copy_netobj(struct netobj *dst, struct netobj *src) 495 { 496 dst->n_len = src->n_len; 497 dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP); 498 bcopy(src->n_bytes, dst->n_bytes, src->n_len); 499 } 500 501 /* 502 * An NLM specificw replacement for clnt_call(). 503 * nlm_clnt_call() is used by all RPC functions generated 504 * from nlm_prot.x specification. The function is aware 505 * about some pitfalls of NLM RPC procedures and has a logic 506 * that handles them properly. 507 */ 508 enum clnt_stat 509 nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args, 510 caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait) 511 { 512 k_sigset_t oldmask; 513 enum clnt_stat stat; 514 bool_t sig_blocked = FALSE; 515 516 /* 517 * If NLM RPC procnum is one of the NLM _RES procedures 518 * that are used to reply to asynchronous NLM RPC 519 * (MSG calls), explicitly set RPC timeout to zero. 520 * Client doesn't send a reply to RES procedures, so 521 * we don't need to wait anything. 522 * 523 * NOTE: we ignore NLM4_*_RES procnums because they are 524 * equal to NLM_*_RES numbers. 525 */ 526 if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES) 527 wait = nlm_rpctv_zero; 528 529 /* 530 * We need to block signals in case of NLM_CANCEL RPC 531 * in order to prevent interruption of network RPC 532 * calls. 533 */ 534 if (procnum == NLM_CANCEL) { 535 k_sigset_t newmask; 536 537 sigfillset(&newmask); 538 sigreplace(&newmask, &oldmask); 539 sig_blocked = TRUE; 540 } 541 542 stat = clnt_call(clnt, procnum, xdr_args, 543 argsp, xdr_result, resultp, wait); 544 545 /* 546 * Restore signal mask back if signals were blocked 547 */ 548 if (sig_blocked) 549 sigreplace(&oldmask, (k_sigset_t *)NULL); 550 551 return (stat); 552 } 553 554 /* 555 * Suspend NLM client/server in the given zone. 556 * 557 * During suspend operation we mark those hosts 558 * that have any locks with NLM_NH_SUSPEND flags, 559 * so that they can be checked later, when resume 560 * operation occurs. 561 */ 562 static void 563 nlm_suspend_zone(struct nlm_globals *g) 564 { 565 struct nlm_host *hostp; 566 struct nlm_host_list all_hosts; 567 568 /* 569 * Note that while we're doing suspend, GC thread is active 570 * and it can destroy some hosts while we're walking through 571 * the hosts tree. To prevent that and make suspend logic 572 * a bit more simple we put all hosts to local "all_hosts" 573 * list and increment reference counter of each host. 574 * This guaranties that no hosts will be released while 575 * we're doing suspend. 576 * NOTE: reference of each host must be dropped during 577 * resume operation. 578 */ 579 TAILQ_INIT(&all_hosts); 580 mutex_enter(&g->lock); 581 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL; 582 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) { 583 /* 584 * If host is idle, remove it from idle list and 585 * clear idle flag. That is done to prevent GC 586 * from touching this host. 587 */ 588 if (hostp->nh_flags & NLM_NH_INIDLE) { 589 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link); 590 hostp->nh_flags &= ~NLM_NH_INIDLE; 591 } 592 593 hostp->nh_refs++; 594 TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link); 595 } 596 597 /* 598 * Now we can walk through all hosts on the system 599 * with zone globals lock released. The fact the 600 * we have taken a reference to each host guaranties 601 * that no hosts can be destroyed during that process. 602 */ 603 mutex_exit(&g->lock); 604 while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) { 605 mutex_enter(&hostp->nh_lock); 606 if (nlm_host_has_locks(hostp)) 607 hostp->nh_flags |= NLM_NH_SUSPEND; 608 609 mutex_exit(&hostp->nh_lock); 610 TAILQ_REMOVE(&all_hosts, hostp, nh_link); 611 } 612 } 613 614 /* 615 * Resume NLM hosts for the given zone. 616 * 617 * nlm_resume_zone() is called after hosts were suspended 618 * (see nlm_suspend_zone) and its main purpose to check 619 * whether remote locks owned by hosts are still in consistent 620 * state. If they aren't, resume function tries to reclaim 621 * reclaim locks (for client side hosts) and clean locks (for 622 * server side hosts). 623 */ 624 static void 625 nlm_resume_zone(struct nlm_globals *g) 626 { 627 struct nlm_host *hostp, *h_next; 628 629 mutex_enter(&g->lock); 630 hostp = avl_first(&g->nlm_hosts_tree); 631 632 /* 633 * In nlm_suspend_zone() the reference counter of each 634 * host was incremented, so we can safely iterate through 635 * all hosts without worrying that any host we touch will 636 * be removed at the moment. 637 */ 638 while (hostp != NULL) { 639 struct nlm_nsm nsm; 640 enum clnt_stat stat; 641 int32_t sm_state; 642 int error; 643 bool_t resume_failed = FALSE; 644 645 h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp); 646 mutex_exit(&g->lock); 647 648 DTRACE_PROBE1(resume__host, struct nlm_host *, hostp); 649 650 /* 651 * Suspend operation marked that the host doesn't 652 * have any locks. Skip it. 653 */ 654 if (!(hostp->nh_flags & NLM_NH_SUSPEND)) 655 goto cycle_end; 656 657 error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr); 658 if (error != 0) { 659 NLM_ERR("Resume: Failed to contact to NSM of host %s " 660 "[error=%d]\n", hostp->nh_name, error); 661 resume_failed = TRUE; 662 goto cycle_end; 663 } 664 665 stat = nlm_nsm_stat(&nsm, &sm_state); 666 if (stat != RPC_SUCCESS) { 667 NLM_ERR("Resume: Failed to call SM_STAT operation for " 668 "host %s [stat=%d]\n", hostp->nh_name, stat); 669 resume_failed = TRUE; 670 nlm_nsm_fini(&nsm); 671 goto cycle_end; 672 } 673 674 if (sm_state != hostp->nh_state) { 675 /* 676 * Current SM state of the host isn't equal 677 * to the one host had when it was suspended. 678 * Probably it was rebooted. Try to reclaim 679 * locks if the host has any on its client side. 680 * Also try to clean up its server side locks 681 * (if the host has any). 682 */ 683 nlm_host_notify_client(hostp, sm_state); 684 nlm_host_notify_server(hostp, sm_state); 685 } 686 687 nlm_nsm_fini(&nsm); 688 689 cycle_end: 690 if (resume_failed) { 691 /* 692 * Resume failed for the given host. 693 * Just clean up all resources it owns. 694 */ 695 nlm_host_notify_server(hostp, 0); 696 nlm_client_cancel_all(g, hostp); 697 } 698 699 hostp->nh_flags &= ~NLM_NH_SUSPEND; 700 nlm_host_release(g, hostp); 701 hostp = h_next; 702 mutex_enter(&g->lock); 703 } 704 705 mutex_exit(&g->lock); 706 } 707 708 /* 709 * NLM functions responsible for operations on NSM handle. 710 */ 711 712 /* 713 * Initialize knetconfig that is used for communication 714 * with local statd via loopback interface. 715 */ 716 static int 717 nlm_init_local_knc(struct knetconfig *knc) 718 { 719 int error; 720 vnode_t *vp; 721 722 bzero(knc, sizeof (*knc)); 723 error = lookupname("/dev/tcp", UIO_SYSSPACE, 724 FOLLOW, NULLVPP, &vp); 725 if (error != 0) 726 return (error); 727 728 knc->knc_semantics = NC_TPI_COTS; 729 knc->knc_protofmly = NC_INET; 730 knc->knc_proto = NC_TCP; 731 knc->knc_rdev = vp->v_rdev; 732 VN_RELE(vp); 733 734 735 return (0); 736 } 737 738 /* 739 * Initialize NSM handle that will be used to talk 740 * to local statd via loopback interface. 741 */ 742 static int 743 nlm_nsm_init_local(struct nlm_nsm *nsm) 744 { 745 int error; 746 struct knetconfig knc; 747 struct sockaddr_in sin; 748 struct netbuf nb; 749 750 error = nlm_init_local_knc(&knc); 751 if (error != 0) 752 return (error); 753 754 bzero(&sin, sizeof (sin)); 755 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 756 sin.sin_family = AF_INET; 757 758 nb.buf = (char *)&sin; 759 nb.len = nb.maxlen = sizeof (sin); 760 761 return (nlm_nsm_init(nsm, &knc, &nb)); 762 } 763 764 /* 765 * Initialize NSM handle used for talking to statd 766 */ 767 static int 768 nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb) 769 { 770 enum clnt_stat stat; 771 int error, retries; 772 773 bzero(nsm, sizeof (*nsm)); 774 nsm->ns_knc = *knc; 775 nlm_copy_netbuf(&nsm->ns_addr, nb); 776 777 /* 778 * Try several times to get the port of statd service, 779 * If rpcbind_getaddr returns RPC_PROGNOTREGISTERED, 780 * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT 781 * seconds berofore. 782 */ 783 for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) { 784 stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG, 785 SM_VERS, &nsm->ns_addr); 786 if (stat != RPC_SUCCESS) { 787 if (stat == RPC_PROGNOTREGISTERED) { 788 delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT)); 789 continue; 790 } 791 } 792 793 break; 794 } 795 796 if (stat != RPC_SUCCESS) { 797 DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat, 798 int, retries); 799 error = ENOENT; 800 goto error; 801 } 802 803 /* 804 * Create an RPC handle that'll be used for communication with local 805 * statd using the status monitor protocol. 806 */ 807 error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS, 808 0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle); 809 if (error != 0) 810 goto error; 811 812 /* 813 * Create an RPC handle that'll be used for communication with the 814 * local statd using the address registration protocol. 815 */ 816 error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM, 817 NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle); 818 if (error != 0) 819 goto error; 820 821 sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL); 822 return (0); 823 824 error: 825 kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen); 826 if (nsm->ns_handle) 827 CLNT_DESTROY(nsm->ns_handle); 828 829 return (error); 830 } 831 832 static void 833 nlm_nsm_fini(struct nlm_nsm *nsm) 834 { 835 kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen); 836 CLNT_DESTROY(nsm->ns_addr_handle); 837 nsm->ns_addr_handle = NULL; 838 CLNT_DESTROY(nsm->ns_handle); 839 nsm->ns_handle = NULL; 840 sema_destroy(&nsm->ns_sem); 841 } 842 843 static enum clnt_stat 844 nlm_nsm_simu_crash(struct nlm_nsm *nsm) 845 { 846 enum clnt_stat stat; 847 848 sema_p(&nsm->ns_sem); 849 nlm_nsm_clnt_init(nsm->ns_handle, nsm); 850 stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle); 851 sema_v(&nsm->ns_sem); 852 853 return (stat); 854 } 855 856 static enum clnt_stat 857 nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat) 858 { 859 struct sm_name args; 860 struct sm_stat_res res; 861 enum clnt_stat stat; 862 863 args.mon_name = uts_nodename(); 864 bzero(&res, sizeof (res)); 865 866 sema_p(&nsm->ns_sem); 867 nlm_nsm_clnt_init(nsm->ns_handle, nsm); 868 stat = sm_stat_1(&args, &res, nsm->ns_handle); 869 sema_v(&nsm->ns_sem); 870 871 if (stat == RPC_SUCCESS) 872 *out_stat = res.state; 873 874 return (stat); 875 } 876 877 static enum clnt_stat 878 nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv) 879 { 880 struct mon args; 881 struct sm_stat_res res; 882 enum clnt_stat stat; 883 884 bzero(&args, sizeof (args)); 885 bzero(&res, sizeof (res)); 886 887 args.mon_id.mon_name = hostname; 888 args.mon_id.my_id.my_name = uts_nodename(); 889 args.mon_id.my_id.my_prog = NLM_PROG; 890 args.mon_id.my_id.my_vers = NLM_SM; 891 args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1; 892 bcopy(&priv, args.priv, sizeof (priv)); 893 894 sema_p(&nsm->ns_sem); 895 nlm_nsm_clnt_init(nsm->ns_handle, nsm); 896 stat = sm_mon_1(&args, &res, nsm->ns_handle); 897 sema_v(&nsm->ns_sem); 898 899 return (stat); 900 } 901 902 static enum clnt_stat 903 nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname) 904 { 905 struct mon_id args; 906 struct sm_stat res; 907 enum clnt_stat stat; 908 909 bzero(&args, sizeof (args)); 910 bzero(&res, sizeof (res)); 911 912 args.mon_name = hostname; 913 args.my_id.my_name = uts_nodename(); 914 args.my_id.my_prog = NLM_PROG; 915 args.my_id.my_vers = NLM_SM; 916 args.my_id.my_proc = NLM_SM_NOTIFY1; 917 918 sema_p(&nsm->ns_sem); 919 nlm_nsm_clnt_init(nsm->ns_handle, nsm); 920 stat = sm_unmon_1(&args, &res, nsm->ns_handle); 921 sema_v(&nsm->ns_sem); 922 923 return (stat); 924 } 925 926 static enum clnt_stat 927 nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address) 928 { 929 struct reg1args args = { 0 }; 930 struct reg1res res = { 0 }; 931 enum clnt_stat stat; 932 933 args.family = family; 934 args.name = name; 935 args.address = *address; 936 937 sema_p(&nsm->ns_sem); 938 nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm); 939 stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle); 940 sema_v(&nsm->ns_sem); 941 942 return (stat); 943 } 944 945 /* 946 * Get NLM vhold object corresponding to vnode "vp". 947 * If no such object was found, create a new one. 948 * 949 * The purpose of this function is to associate vhold 950 * object with given vnode, so that: 951 * 1) vnode is hold (VN_HOLD) while vhold object is alive. 952 * 2) host has a track of all vnodes it touched by lock 953 * or share operations. These vnodes are accessible 954 * via collection of vhold objects. 955 */ 956 struct nlm_vhold * 957 nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp) 958 { 959 struct nlm_vhold *nvp, *new_nvp = NULL; 960 961 mutex_enter(&hostp->nh_lock); 962 nvp = nlm_vhold_find_locked(hostp, vp); 963 if (nvp != NULL) 964 goto out; 965 966 /* nlm_vhold wasn't found, then create a new one */ 967 mutex_exit(&hostp->nh_lock); 968 new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP); 969 970 /* 971 * Check if another thread has already 972 * created the same nlm_vhold. 973 */ 974 mutex_enter(&hostp->nh_lock); 975 nvp = nlm_vhold_find_locked(hostp, vp); 976 if (nvp == NULL) { 977 nvp = new_nvp; 978 new_nvp = NULL; 979 980 TAILQ_INIT(&nvp->nv_slreqs); 981 nvp->nv_vp = vp; 982 nvp->nv_refcnt = 1; 983 VN_HOLD(nvp->nv_vp); 984 985 VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp, 986 (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0); 987 TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link); 988 } 989 990 out: 991 mutex_exit(&hostp->nh_lock); 992 if (new_nvp != NULL) 993 kmem_cache_free(nlm_vhold_cache, new_nvp); 994 995 return (nvp); 996 } 997 998 /* 999 * Drop a reference to vhold object nvp. 1000 */ 1001 void 1002 nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp) 1003 { 1004 if (nvp == NULL) 1005 return; 1006 1007 mutex_enter(&hostp->nh_lock); 1008 ASSERT(nvp->nv_refcnt > 0); 1009 nvp->nv_refcnt--; 1010 mutex_exit(&hostp->nh_lock); 1011 } 1012 1013 /* 1014 * Clean all locks and share reservations on the 1015 * given vhold object that were acquired by the 1016 * given sysid 1017 */ 1018 static void 1019 nlm_vhold_clean(struct nlm_vhold *nvp, int sysid) 1020 { 1021 cleanlocks(nvp->nv_vp, IGN_PID, sysid); 1022 cleanshares_by_sysid(nvp->nv_vp, sysid); 1023 } 1024 1025 static void 1026 nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp) 1027 { 1028 ASSERT(MUTEX_HELD(&hostp->nh_lock)); 1029 1030 VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp, 1031 (mod_hash_key_t)nvp->nv_vp, 1032 (mod_hash_val_t)&nvp) == 0); 1033 1034 TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link); 1035 VN_RELE(nvp->nv_vp); 1036 nvp->nv_vp = NULL; 1037 1038 kmem_cache_free(nlm_vhold_cache, nvp); 1039 } 1040 1041 /* 1042 * Return TRUE if the given vhold is busy. 1043 * Vhold object is considered to be "busy" when 1044 * all the following conditions hold: 1045 * 1) No one uses it at the moment; 1046 * 2) It hasn't any locks; 1047 * 3) It hasn't any share reservations; 1048 */ 1049 static bool_t 1050 nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp) 1051 { 1052 vnode_t *vp; 1053 int sysid; 1054 1055 ASSERT(MUTEX_HELD(&hostp->nh_lock)); 1056 1057 if (nvp->nv_refcnt > 0) 1058 return (TRUE); 1059 1060 vp = nvp->nv_vp; 1061 sysid = hostp->nh_sysid; 1062 if (flk_has_remote_locks_for_sysid(vp, sysid) || 1063 shr_has_remote_shares(vp, sysid)) 1064 return (TRUE); 1065 1066 return (FALSE); 1067 } 1068 1069 /* ARGSUSED */ 1070 static int 1071 nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags) 1072 { 1073 struct nlm_vhold *nvp = (struct nlm_vhold *)datap; 1074 1075 bzero(nvp, sizeof (*nvp)); 1076 return (0); 1077 } 1078 1079 /* ARGSUSED */ 1080 static void 1081 nlm_vhold_dtor(void *datap, void *cdrarg) 1082 { 1083 struct nlm_vhold *nvp = (struct nlm_vhold *)datap; 1084 1085 ASSERT(nvp->nv_refcnt == 0); 1086 ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs)); 1087 ASSERT(nvp->nv_vp == NULL); 1088 } 1089 1090 struct nlm_vhold * 1091 nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp) 1092 { 1093 struct nlm_vhold *nvp = NULL; 1094 1095 ASSERT(MUTEX_HELD(&hostp->nh_lock)); 1096 (void) mod_hash_find(hostp->nh_vholds_by_vp, 1097 (mod_hash_key_t)vp, 1098 (mod_hash_val_t)&nvp); 1099 1100 if (nvp != NULL) 1101 nvp->nv_refcnt++; 1102 1103 return (nvp); 1104 } 1105 1106 /* 1107 * NLM host functions 1108 */ 1109 static void 1110 nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src) 1111 { 1112 ASSERT(src->len <= src->maxlen); 1113 1114 dst->maxlen = src->maxlen; 1115 dst->len = src->len; 1116 dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP); 1117 bcopy(src->buf, dst->buf, src->len); 1118 } 1119 1120 /* ARGSUSED */ 1121 static int 1122 nlm_host_ctor(void *datap, void *cdrarg, int kmflags) 1123 { 1124 struct nlm_host *hostp = (struct nlm_host *)datap; 1125 1126 bzero(hostp, sizeof (*hostp)); 1127 return (0); 1128 } 1129 1130 /* ARGSUSED */ 1131 static void 1132 nlm_host_dtor(void *datap, void *cdrarg) 1133 { 1134 struct nlm_host *hostp = (struct nlm_host *)datap; 1135 ASSERT(hostp->nh_refs == 0); 1136 } 1137 1138 static void 1139 nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp) 1140 { 1141 ASSERT(hostp->nh_refs == 0); 1142 1143 avl_remove(&g->nlm_hosts_tree, hostp); 1144 VERIFY(mod_hash_remove(g->nlm_hosts_hash, 1145 (mod_hash_key_t)(uintptr_t)hostp->nh_sysid, 1146 (mod_hash_val_t)&hostp) == 0); 1147 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link); 1148 hostp->nh_flags &= ~NLM_NH_INIDLE; 1149 } 1150 1151 /* 1152 * Free resources used by a host. This is called after the reference 1153 * count has reached zero so it doesn't need to worry about locks. 1154 */ 1155 static void 1156 nlm_host_destroy(struct nlm_host *hostp) 1157 { 1158 ASSERT(hostp->nh_name != NULL); 1159 ASSERT(hostp->nh_netid != NULL); 1160 ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list)); 1161 1162 strfree(hostp->nh_name); 1163 strfree(hostp->nh_netid); 1164 kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen); 1165 1166 if (hostp->nh_sysid != LM_NOSYSID) 1167 nlm_sysid_free(hostp->nh_sysid); 1168 1169 nlm_rpc_cache_destroy(hostp); 1170 1171 ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list)); 1172 mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp); 1173 1174 mutex_destroy(&hostp->nh_lock); 1175 cv_destroy(&hostp->nh_rpcb_cv); 1176 cv_destroy(&hostp->nh_recl_cv); 1177 1178 kmem_cache_free(nlm_hosts_cache, hostp); 1179 } 1180 1181 /* 1182 * Cleanup SERVER-side state after a client restarts, 1183 * or becomes unresponsive, or whatever. 1184 * 1185 * We unlock any active locks owned by the host. 1186 * When rpc.lockd is shutting down, 1187 * this function is called with newstate set to zero 1188 * which allows us to cancel any pending async locks 1189 * and clear the locking state. 1190 * 1191 * When "state" is 0, we don't update host's state, 1192 * but cleanup all remote locks on the host. 1193 * It's useful to call this function for resources 1194 * cleanup. 1195 */ 1196 void 1197 nlm_host_notify_server(struct nlm_host *hostp, int32_t state) 1198 { 1199 struct nlm_vhold *nvp; 1200 struct nlm_slreq *slr; 1201 struct nlm_slreq_list slreqs2free; 1202 1203 TAILQ_INIT(&slreqs2free); 1204 mutex_enter(&hostp->nh_lock); 1205 if (state != 0) 1206 hostp->nh_state = state; 1207 1208 TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) { 1209 1210 /* cleanup sleeping requests at first */ 1211 while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) { 1212 TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link); 1213 1214 /* 1215 * Instead of freeing cancelled sleeping request 1216 * here, we add it to the linked list created 1217 * on the stack in order to do all frees outside 1218 * the critical section. 1219 */ 1220 TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link); 1221 } 1222 1223 nvp->nv_refcnt++; 1224 mutex_exit(&hostp->nh_lock); 1225 1226 nlm_vhold_clean(nvp, hostp->nh_sysid); 1227 1228 mutex_enter(&hostp->nh_lock); 1229 nvp->nv_refcnt--; 1230 } 1231 1232 mutex_exit(&hostp->nh_lock); 1233 while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) { 1234 TAILQ_REMOVE(&slreqs2free, slr, nsr_link); 1235 kmem_free(slr, sizeof (*slr)); 1236 } 1237 } 1238 1239 /* 1240 * Cleanup CLIENT-side state after a server restarts, 1241 * or becomes unresponsive, or whatever. 1242 * 1243 * This is called by the local NFS statd when we receive a 1244 * host state change notification. (also nlm_svc_stopping) 1245 * 1246 * Deal with a server restart. If we are stopping the 1247 * NLM service, we'll have newstate == 0, and will just 1248 * cancel all our client-side lock requests. Otherwise, 1249 * start the "recovery" process to reclaim any locks 1250 * we hold on this server. 1251 */ 1252 void 1253 nlm_host_notify_client(struct nlm_host *hostp, int32_t state) 1254 { 1255 mutex_enter(&hostp->nh_lock); 1256 hostp->nh_state = state; 1257 if (hostp->nh_flags & NLM_NH_RECLAIM) { 1258 /* 1259 * Either host's state is up to date or 1260 * host is already in recovery. 1261 */ 1262 mutex_exit(&hostp->nh_lock); 1263 return; 1264 } 1265 1266 hostp->nh_flags |= NLM_NH_RECLAIM; 1267 1268 /* 1269 * Host will be released by the recovery thread, 1270 * thus we need to increment refcount. 1271 */ 1272 hostp->nh_refs++; 1273 mutex_exit(&hostp->nh_lock); 1274 1275 (void) zthread_create(NULL, 0, nlm_reclaimer, 1276 hostp, 0, minclsyspri); 1277 } 1278 1279 /* 1280 * The function is called when NLM client detects that 1281 * server has entered in grace period and client needs 1282 * to wait until reclamation process (if any) does 1283 * its job. 1284 */ 1285 int 1286 nlm_host_wait_grace(struct nlm_host *hostp) 1287 { 1288 struct nlm_globals *g; 1289 int error = 0; 1290 1291 g = zone_getspecific(nlm_zone_key, curzone); 1292 mutex_enter(&hostp->nh_lock); 1293 1294 do { 1295 int rc; 1296 1297 rc = cv_timedwait_sig(&hostp->nh_recl_cv, 1298 &hostp->nh_lock, ddi_get_lbolt() + 1299 SEC_TO_TICK(g->retrans_tmo)); 1300 1301 if (rc == 0) { 1302 error = EINTR; 1303 break; 1304 } 1305 } while (hostp->nh_flags & NLM_NH_RECLAIM); 1306 1307 mutex_exit(&hostp->nh_lock); 1308 return (error); 1309 } 1310 1311 /* 1312 * Create a new NLM host. 1313 * 1314 * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI, 1315 * which needs both a knetconfig and an address when creating 1316 * endpoints. Thus host object stores both knetconfig and 1317 * netid. 1318 */ 1319 static struct nlm_host * 1320 nlm_host_create(char *name, const char *netid, 1321 struct knetconfig *knc, struct netbuf *naddr) 1322 { 1323 struct nlm_host *host; 1324 1325 host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP); 1326 1327 mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL); 1328 cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL); 1329 cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL); 1330 1331 host->nh_sysid = LM_NOSYSID; 1332 host->nh_refs = 1; 1333 host->nh_name = strdup(name); 1334 host->nh_netid = strdup(netid); 1335 host->nh_knc = *knc; 1336 nlm_copy_netbuf(&host->nh_addr, naddr); 1337 1338 host->nh_state = 0; 1339 host->nh_rpcb_state = NRPCB_NEED_UPDATE; 1340 host->nh_flags = 0; 1341 1342 host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash", 1343 32, mod_hash_null_valdtor, sizeof (vnode_t)); 1344 1345 TAILQ_INIT(&host->nh_vholds_list); 1346 TAILQ_INIT(&host->nh_rpchc); 1347 1348 return (host); 1349 } 1350 1351 /* 1352 * Cancel all client side sleeping locks owned by given host. 1353 */ 1354 void 1355 nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp) 1356 { 1357 struct nlm_slock *nslp; 1358 1359 mutex_enter(&g->lock); 1360 TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) { 1361 if (nslp->nsl_host == hostp) { 1362 nslp->nsl_state = NLM_SL_CANCELLED; 1363 cv_broadcast(&nslp->nsl_cond); 1364 } 1365 } 1366 1367 mutex_exit(&g->lock); 1368 } 1369 1370 /* 1371 * Garbage collect stale vhold objects. 1372 * 1373 * In other words check whether vnodes that are 1374 * held by vhold objects still have any locks 1375 * or shares or still in use. If they aren't, 1376 * just destroy them. 1377 */ 1378 static void 1379 nlm_host_gc_vholds(struct nlm_host *hostp) 1380 { 1381 struct nlm_vhold *nvp; 1382 1383 ASSERT(MUTEX_HELD(&hostp->nh_lock)); 1384 1385 nvp = TAILQ_FIRST(&hostp->nh_vholds_list); 1386 while (nvp != NULL) { 1387 struct nlm_vhold *nvp_tmp; 1388 1389 if (nlm_vhold_busy(hostp, nvp)) { 1390 nvp = TAILQ_NEXT(nvp, nv_link); 1391 continue; 1392 } 1393 1394 nvp_tmp = TAILQ_NEXT(nvp, nv_link); 1395 nlm_vhold_destroy(hostp, nvp); 1396 nvp = nvp_tmp; 1397 } 1398 } 1399 1400 /* 1401 * Check whether the given host has any 1402 * server side locks or share reservations. 1403 */ 1404 static bool_t 1405 nlm_host_has_srv_locks(struct nlm_host *hostp) 1406 { 1407 /* 1408 * It's cheap and simple: if server has 1409 * any locks/shares there must be vhold 1410 * object storing the affected vnode. 1411 * 1412 * NOTE: We don't need to check sleeping 1413 * locks on the server side, because if 1414 * server side sleeping lock is alive, 1415 * there must be a vhold object corresponding 1416 * to target vnode. 1417 */ 1418 ASSERT(MUTEX_HELD(&hostp->nh_lock)); 1419 if (!TAILQ_EMPTY(&hostp->nh_vholds_list)) 1420 return (TRUE); 1421 1422 return (FALSE); 1423 } 1424 1425 /* 1426 * Check whether the given host has any client side 1427 * locks or share reservations. 1428 */ 1429 static bool_t 1430 nlm_host_has_cli_locks(struct nlm_host *hostp) 1431 { 1432 ASSERT(MUTEX_HELD(&hostp->nh_lock)); 1433 1434 /* 1435 * XXX: It's not the way I'd like to do the check, 1436 * because flk_sysid_has_locks() can be very 1437 * expensive by design. Unfortunatelly it iterates 1438 * through all locks on the system, doesn't matter 1439 * were they made on remote system via NLM or 1440 * on local system via reclock. To understand the 1441 * problem, consider that there're dozens of thousands 1442 * of locks that are made on some ZFS dataset. And there's 1443 * another dataset shared by NFS where NLM client had locks 1444 * some time ago, but doesn't have them now. 1445 * In this case flk_sysid_has_locks() will iterate 1446 * thrught dozens of thousands locks until it returns us 1447 * FALSE. 1448 * Oh, I hope that in shiny future somebody will make 1449 * local lock manager (os/flock.c) better, so that 1450 * it'd be more friedly to remote locks and 1451 * flk_sysid_has_locks() wouldn't be so expensive. 1452 */ 1453 if (flk_sysid_has_locks(hostp->nh_sysid | 1454 LM_SYSID_CLIENT, FLK_QUERY_ACTIVE)) 1455 return (TRUE); 1456 1457 /* 1458 * Check whether host has any share reservations 1459 * registered on the client side. 1460 */ 1461 if (hostp->nh_shrlist != NULL) 1462 return (TRUE); 1463 1464 return (FALSE); 1465 } 1466 1467 /* 1468 * Determine whether the given host owns any 1469 * locks or share reservations. 1470 */ 1471 static bool_t 1472 nlm_host_has_locks(struct nlm_host *hostp) 1473 { 1474 if (nlm_host_has_srv_locks(hostp)) 1475 return (TRUE); 1476 1477 return (nlm_host_has_cli_locks(hostp)); 1478 } 1479 1480 /* 1481 * This function compares only addresses of two netbufs 1482 * that belong to NC_TCP[6] or NC_UDP[6] protofamily. 1483 * Port part of netbuf is ignored. 1484 * 1485 * Return values: 1486 * -1: nb1's address is "smaller" than nb2's 1487 * 0: addresses are equal 1488 * 1: nb1's address is "greater" than nb2's 1489 */ 1490 static int 1491 nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2) 1492 { 1493 union nlm_addr { 1494 struct sockaddr sa; 1495 struct sockaddr_in sin; 1496 struct sockaddr_in6 sin6; 1497 } *na1, *na2; 1498 int res; 1499 1500 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1501 na1 = (union nlm_addr *)nb1->buf; 1502 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1503 na2 = (union nlm_addr *)nb2->buf; 1504 1505 if (na1->sa.sa_family < na2->sa.sa_family) 1506 return (-1); 1507 if (na1->sa.sa_family > na2->sa.sa_family) 1508 return (1); 1509 1510 switch (na1->sa.sa_family) { 1511 case AF_INET: 1512 res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr, 1513 sizeof (na1->sin.sin_addr)); 1514 break; 1515 case AF_INET6: 1516 res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr, 1517 sizeof (na1->sin6.sin6_addr)); 1518 break; 1519 default: 1520 VERIFY(0); 1521 return (0); 1522 } 1523 1524 return (SIGN(res)); 1525 } 1526 1527 /* 1528 * Compare two nlm hosts. 1529 * Return values: 1530 * -1: host1 is "smaller" than host2 1531 * 0: host1 is equal to host2 1532 * 1: host1 is "greater" than host2 1533 */ 1534 int 1535 nlm_host_cmp(const void *p1, const void *p2) 1536 { 1537 struct nlm_host *h1 = (struct nlm_host *)p1; 1538 struct nlm_host *h2 = (struct nlm_host *)p2; 1539 int res; 1540 1541 res = strcmp(h1->nh_netid, h2->nh_netid); 1542 if (res != 0) 1543 return (SIGN(res)); 1544 1545 res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr); 1546 return (res); 1547 } 1548 1549 /* 1550 * Find the host specified by... (see below) 1551 * If found, increment the ref count. 1552 */ 1553 static struct nlm_host * 1554 nlm_host_find_locked(struct nlm_globals *g, const char *netid, 1555 struct netbuf *naddr, avl_index_t *wherep) 1556 { 1557 struct nlm_host *hostp, key; 1558 avl_index_t pos; 1559 1560 ASSERT(MUTEX_HELD(&g->lock)); 1561 1562 key.nh_netid = (char *)netid; 1563 key.nh_addr.buf = naddr->buf; 1564 key.nh_addr.len = naddr->len; 1565 key.nh_addr.maxlen = naddr->maxlen; 1566 1567 hostp = avl_find(&g->nlm_hosts_tree, &key, &pos); 1568 1569 if (hostp != NULL) { 1570 /* 1571 * Host is inuse now. Remove it from idle 1572 * hosts list if needed. 1573 */ 1574 if (hostp->nh_flags & NLM_NH_INIDLE) { 1575 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link); 1576 hostp->nh_flags &= ~NLM_NH_INIDLE; 1577 } 1578 1579 hostp->nh_refs++; 1580 } 1581 if (wherep != NULL) 1582 *wherep = pos; 1583 1584 return (hostp); 1585 } 1586 1587 /* 1588 * Find NLM host for the given name and address. 1589 */ 1590 struct nlm_host * 1591 nlm_host_find(struct nlm_globals *g, const char *netid, 1592 struct netbuf *addr) 1593 { 1594 struct nlm_host *hostp = NULL; 1595 1596 mutex_enter(&g->lock); 1597 if (g->run_status != NLM_ST_UP) 1598 goto out; 1599 1600 hostp = nlm_host_find_locked(g, netid, addr, NULL); 1601 1602 out: 1603 mutex_exit(&g->lock); 1604 return (hostp); 1605 } 1606 1607 1608 /* 1609 * Find or create an NLM host for the given name and address. 1610 * 1611 * The remote host is determined by all of: name, netidd, address. 1612 * Note that the netid is whatever nlm_svc_add_ep() gave to 1613 * svc_tli_kcreate() for the service binding. If any of these 1614 * are different, allocate a new host (new sysid). 1615 */ 1616 struct nlm_host * 1617 nlm_host_findcreate(struct nlm_globals *g, char *name, 1618 const char *netid, struct netbuf *addr) 1619 { 1620 int err; 1621 struct nlm_host *host, *newhost = NULL; 1622 struct knetconfig knc; 1623 avl_index_t where; 1624 1625 mutex_enter(&g->lock); 1626 if (g->run_status != NLM_ST_UP) { 1627 mutex_exit(&g->lock); 1628 return (NULL); 1629 } 1630 1631 host = nlm_host_find_locked(g, netid, addr, NULL); 1632 mutex_exit(&g->lock); 1633 if (host != NULL) 1634 return (host); 1635 1636 err = nlm_knc_from_netid(netid, &knc); 1637 if (err != 0) 1638 return (NULL); 1639 /* 1640 * Do allocations (etc.) outside of mutex, 1641 * and then check again before inserting. 1642 */ 1643 newhost = nlm_host_create(name, netid, &knc, addr); 1644 newhost->nh_sysid = nlm_sysid_alloc(); 1645 if (newhost->nh_sysid == LM_NOSYSID) 1646 goto out; 1647 1648 mutex_enter(&g->lock); 1649 host = nlm_host_find_locked(g, netid, addr, &where); 1650 if (host == NULL) { 1651 host = newhost; 1652 newhost = NULL; 1653 1654 /* 1655 * Insert host to the hosts AVL tree that is 1656 * used to lookup by <netid, address> pair. 1657 */ 1658 avl_insert(&g->nlm_hosts_tree, host, where); 1659 1660 /* 1661 * Insert host ot the hosts hash table that is 1662 * used to lookup host by sysid. 1663 */ 1664 VERIFY(mod_hash_insert(g->nlm_hosts_hash, 1665 (mod_hash_key_t)(uintptr_t)host->nh_sysid, 1666 (mod_hash_val_t)host) == 0); 1667 } 1668 1669 mutex_exit(&g->lock); 1670 1671 out: 1672 if (newhost != NULL) 1673 nlm_host_destroy(newhost); 1674 1675 return (host); 1676 } 1677 1678 /* 1679 * Find the NLM host that matches the value of 'sysid'. 1680 * If found, return it with a new ref, 1681 * else return NULL. 1682 */ 1683 struct nlm_host * 1684 nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid) 1685 { 1686 struct nlm_host *hostp = NULL; 1687 1688 mutex_enter(&g->lock); 1689 if (g->run_status != NLM_ST_UP) 1690 goto out; 1691 1692 (void) mod_hash_find(g->nlm_hosts_hash, 1693 (mod_hash_key_t)(uintptr_t)sysid, 1694 (mod_hash_val_t)&hostp); 1695 1696 if (hostp == NULL) 1697 goto out; 1698 1699 /* 1700 * Host is inuse now. Remove it 1701 * from idle hosts list if needed. 1702 */ 1703 if (hostp->nh_flags & NLM_NH_INIDLE) { 1704 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link); 1705 hostp->nh_flags &= ~NLM_NH_INIDLE; 1706 } 1707 1708 hostp->nh_refs++; 1709 1710 out: 1711 mutex_exit(&g->lock); 1712 return (hostp); 1713 } 1714 1715 /* 1716 * Release the given host. 1717 * I.e. drop a reference that was taken earlier by one of 1718 * the following functions: nlm_host_findcreate(), nlm_host_find(), 1719 * nlm_host_find_by_sysid(). 1720 * 1721 * When the very last reference is dropped, host is moved to 1722 * so-called "idle state". All hosts that are in idle state 1723 * have an idle timeout. If timeout is expired, GC thread 1724 * checks whether hosts have any locks and if they heven't 1725 * any, it removes them. 1726 * NOTE: only unused hosts can be in idle state. 1727 */ 1728 void 1729 nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp) 1730 { 1731 if (hostp == NULL) 1732 return; 1733 1734 mutex_enter(&g->lock); 1735 ASSERT(hostp->nh_refs > 0); 1736 1737 hostp->nh_refs--; 1738 if (hostp->nh_refs != 0) { 1739 mutex_exit(&g->lock); 1740 return; 1741 } 1742 1743 /* 1744 * The very last reference to the host was dropped, 1745 * thus host is unused now. Set its idle timeout 1746 * and move it to the idle hosts LRU list. 1747 */ 1748 hostp->nh_idle_timeout = ddi_get_lbolt() + 1749 SEC_TO_TICK(g->cn_idle_tmo); 1750 1751 ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0); 1752 TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link); 1753 hostp->nh_flags |= NLM_NH_INIDLE; 1754 mutex_exit(&g->lock); 1755 } 1756 1757 /* 1758 * Unregister this NLM host (NFS client) with the local statd 1759 * due to idleness (no locks held for a while). 1760 */ 1761 void 1762 nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host) 1763 { 1764 enum clnt_stat stat; 1765 1766 VERIFY(host->nh_refs == 0); 1767 if (!(host->nh_flags & NLM_NH_MONITORED)) 1768 return; 1769 1770 host->nh_flags &= ~NLM_NH_MONITORED; 1771 stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name); 1772 if (stat != RPC_SUCCESS) { 1773 NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat); 1774 return; 1775 } 1776 } 1777 1778 /* 1779 * Ask the local NFS statd to begin monitoring this host. 1780 * It will call us back when that host restarts, using the 1781 * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1, 1782 * which is handled in nlm_do_notify1(). 1783 */ 1784 void 1785 nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state) 1786 { 1787 int family; 1788 netobj obj; 1789 enum clnt_stat stat; 1790 1791 if (state != 0 && host->nh_state == 0) { 1792 /* 1793 * This is the first time we have seen an NSM state 1794 * Value for this host. We record it here to help 1795 * detect host reboots. 1796 */ 1797 host->nh_state = state; 1798 } 1799 1800 mutex_enter(&host->nh_lock); 1801 if (host->nh_flags & NLM_NH_MONITORED) { 1802 mutex_exit(&host->nh_lock); 1803 return; 1804 } 1805 1806 host->nh_flags |= NLM_NH_MONITORED; 1807 mutex_exit(&host->nh_lock); 1808 1809 /* 1810 * Before we begin monitoring the host register the network address 1811 * associated with this hostname. 1812 */ 1813 nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj); 1814 stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj); 1815 if (stat != RPC_SUCCESS) { 1816 NLM_WARN("Failed to register address, stat=%d\n", stat); 1817 mutex_enter(&g->lock); 1818 host->nh_flags &= ~NLM_NH_MONITORED; 1819 mutex_exit(&g->lock); 1820 1821 return; 1822 } 1823 1824 /* 1825 * Tell statd how to call us with status updates for 1826 * this host. Updates arrive via nlm_do_notify1(). 1827 * 1828 * We put our assigned system ID value in the priv field to 1829 * make it simpler to find the host if we are notified of a 1830 * host restart. 1831 */ 1832 stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid); 1833 if (stat != RPC_SUCCESS) { 1834 NLM_WARN("Failed to contact local NSM, stat=%d\n", stat); 1835 mutex_enter(&g->lock); 1836 host->nh_flags &= ~NLM_NH_MONITORED; 1837 mutex_exit(&g->lock); 1838 1839 return; 1840 } 1841 } 1842 1843 int 1844 nlm_host_get_state(struct nlm_host *hostp) 1845 { 1846 1847 return (hostp->nh_state); 1848 } 1849 1850 /* 1851 * NLM client/server sleeping locks 1852 */ 1853 1854 /* 1855 * Register client side sleeping lock. 1856 * 1857 * Our client code calls this to keep information 1858 * about sleeping lock somewhere. When it receives 1859 * grant callback from server or when it just 1860 * needs to remove all sleeping locks from vnode, 1861 * it uses this information for remove/apply lock 1862 * properly. 1863 */ 1864 struct nlm_slock * 1865 nlm_slock_register( 1866 struct nlm_globals *g, 1867 struct nlm_host *host, 1868 struct nlm4_lock *lock, 1869 struct vnode *vp) 1870 { 1871 struct nlm_slock *nslp; 1872 1873 nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP); 1874 cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL); 1875 nslp->nsl_lock = *lock; 1876 nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh); 1877 nslp->nsl_state = NLM_SL_BLOCKED; 1878 nslp->nsl_host = host; 1879 nslp->nsl_vp = vp; 1880 1881 mutex_enter(&g->lock); 1882 TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link); 1883 mutex_exit(&g->lock); 1884 1885 return (nslp); 1886 } 1887 1888 /* 1889 * Remove this lock from the wait list and destroy it. 1890 */ 1891 void 1892 nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp) 1893 { 1894 mutex_enter(&g->lock); 1895 TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link); 1896 mutex_exit(&g->lock); 1897 1898 kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len); 1899 cv_destroy(&nslp->nsl_cond); 1900 kmem_free(nslp, sizeof (*nslp)); 1901 } 1902 1903 /* 1904 * Wait for a granted callback or cancellation event 1905 * for a sleeping lock. 1906 * 1907 * If a signal interrupted the wait or if the lock 1908 * was cancelled, return EINTR - the caller must arrange to send 1909 * a cancellation to the server. 1910 * 1911 * If timeout occurred, return ETIMEDOUT - the caller must 1912 * resend the lock request to the server. 1913 * 1914 * On success return 0. 1915 */ 1916 int 1917 nlm_slock_wait(struct nlm_globals *g, 1918 struct nlm_slock *nslp, uint_t timeo_secs) 1919 { 1920 clock_t timeo_ticks; 1921 int cv_res, error; 1922 1923 /* 1924 * If the granted message arrived before we got here, 1925 * nw->nw_state will be GRANTED - in that case, don't sleep. 1926 */ 1927 cv_res = 1; 1928 timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs); 1929 1930 mutex_enter(&g->lock); 1931 if (nslp->nsl_state == NLM_SL_BLOCKED) { 1932 cv_res = cv_timedwait_sig(&nslp->nsl_cond, 1933 &g->lock, timeo_ticks); 1934 } 1935 1936 /* 1937 * No matter why we wake up, if the lock was 1938 * cancelled, let the function caller to know 1939 * about it by returning EINTR. 1940 */ 1941 if (nslp->nsl_state == NLM_SL_CANCELLED) { 1942 error = EINTR; 1943 goto out; 1944 } 1945 1946 if (cv_res <= 0) { 1947 /* We was woken up either by timeout or interrupt */ 1948 error = (cv_res < 0) ? ETIMEDOUT : EINTR; 1949 1950 /* 1951 * The granted message may arrive after the 1952 * interrupt/timeout but before we manage to lock the 1953 * mutex. Detect this by examining nslp. 1954 */ 1955 if (nslp->nsl_state == NLM_SL_GRANTED) 1956 error = 0; 1957 } else { /* awaken via cv_signal or didn't block */ 1958 error = 0; 1959 VERIFY(nslp->nsl_state == NLM_SL_GRANTED); 1960 } 1961 1962 out: 1963 mutex_exit(&g->lock); 1964 return (error); 1965 } 1966 1967 /* 1968 * Mark client side sleeping lock as granted 1969 * and wake up a process blocked on the lock. 1970 * Called from server side NLM_GRANT handler. 1971 * 1972 * If sleeping lock is found return 0, otherwise 1973 * return ENOENT. 1974 */ 1975 int 1976 nlm_slock_grant(struct nlm_globals *g, 1977 struct nlm_host *hostp, struct nlm4_lock *alock) 1978 { 1979 struct nlm_slock *nslp; 1980 int error = ENOENT; 1981 1982 mutex_enter(&g->lock); 1983 TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) { 1984 if ((nslp->nsl_state != NLM_SL_BLOCKED) || 1985 (nslp->nsl_host != hostp)) 1986 continue; 1987 1988 if (alock->svid == nslp->nsl_lock.svid && 1989 alock->l_offset == nslp->nsl_lock.l_offset && 1990 alock->l_len == nslp->nsl_lock.l_len && 1991 alock->fh.n_len == nslp->nsl_lock.fh.n_len && 1992 bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes, 1993 nslp->nsl_lock.fh.n_len) == 0) { 1994 nslp->nsl_state = NLM_SL_GRANTED; 1995 cv_broadcast(&nslp->nsl_cond); 1996 error = 0; 1997 break; 1998 } 1999 } 2000 2001 mutex_exit(&g->lock); 2002 return (error); 2003 } 2004 2005 /* 2006 * Register sleeping lock request corresponding to 2007 * flp on the given vhold object. 2008 * On success function returns 0, otherwise (if 2009 * lock request with the same flp is already 2010 * registered) function returns EEXIST. 2011 */ 2012 int 2013 nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp, 2014 struct flock64 *flp) 2015 { 2016 struct nlm_slreq *slr, *new_slr = NULL; 2017 int ret = EEXIST; 2018 2019 mutex_enter(&hostp->nh_lock); 2020 slr = nlm_slreq_find_locked(hostp, nvp, flp); 2021 if (slr != NULL) 2022 goto out; 2023 2024 mutex_exit(&hostp->nh_lock); 2025 new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP); 2026 bcopy(flp, &new_slr->nsr_fl, sizeof (*flp)); 2027 2028 mutex_enter(&hostp->nh_lock); 2029 slr = nlm_slreq_find_locked(hostp, nvp, flp); 2030 if (slr == NULL) { 2031 slr = new_slr; 2032 new_slr = NULL; 2033 ret = 0; 2034 2035 TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link); 2036 } 2037 2038 out: 2039 mutex_exit(&hostp->nh_lock); 2040 if (new_slr != NULL) 2041 kmem_free(new_slr, sizeof (*new_slr)); 2042 2043 return (ret); 2044 } 2045 2046 /* 2047 * Unregister sleeping lock request corresponding 2048 * to flp from the given vhold object. 2049 * On success function returns 0, otherwise (if 2050 * lock request corresponding to flp isn't found 2051 * on the given vhold) function returns ENOENT. 2052 */ 2053 int 2054 nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp, 2055 struct flock64 *flp) 2056 { 2057 struct nlm_slreq *slr; 2058 2059 mutex_enter(&hostp->nh_lock); 2060 slr = nlm_slreq_find_locked(hostp, nvp, flp); 2061 if (slr == NULL) { 2062 mutex_exit(&hostp->nh_lock); 2063 return (ENOENT); 2064 } 2065 2066 TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link); 2067 mutex_exit(&hostp->nh_lock); 2068 2069 kmem_free(slr, sizeof (*slr)); 2070 return (0); 2071 } 2072 2073 /* 2074 * Find sleeping lock request on the given vhold object by flp. 2075 */ 2076 struct nlm_slreq * 2077 nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp, 2078 struct flock64 *flp) 2079 { 2080 struct nlm_slreq *slr = NULL; 2081 2082 ASSERT(MUTEX_HELD(&hostp->nh_lock)); 2083 TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) { 2084 if (slr->nsr_fl.l_start == flp->l_start && 2085 slr->nsr_fl.l_len == flp->l_len && 2086 slr->nsr_fl.l_pid == flp->l_pid && 2087 slr->nsr_fl.l_type == flp->l_type) 2088 break; 2089 } 2090 2091 return (slr); 2092 } 2093 2094 /* 2095 * NLM tracks active share reservations made on the client side. 2096 * It needs to have a track of share reservations for two purposes 2097 * 1) to determine if nlm_host is busy (if it has active locks and/or 2098 * share reservations, it is) 2099 * 2) to recover active share reservations when NLM server reports 2100 * that it has rebooted. 2101 * 2102 * Unfortunately Illumos local share reservations manager (see os/share.c) 2103 * doesn't have an ability to lookup all reservations on the system 2104 * by sysid (like local lock manager) or get all reservations by sysid. 2105 * It tracks reservations per vnode and is able to get/looup them 2106 * on particular vnode. It's not what NLM needs. Thus it has that ugly 2107 * share reservations tracking scheme. 2108 */ 2109 2110 void 2111 nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp) 2112 { 2113 struct nlm_shres *nsp, *nsp_new; 2114 2115 /* 2116 * NFS code must fill the s_owner, so that 2117 * s_own_len is never 0. 2118 */ 2119 ASSERT(shrp->s_own_len > 0); 2120 nsp_new = nlm_shres_create_item(shrp, vp); 2121 2122 mutex_enter(&hostp->nh_lock); 2123 for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) 2124 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) 2125 break; 2126 2127 if (nsp != NULL) { 2128 /* 2129 * Found a duplicate. Do nothing. 2130 */ 2131 2132 goto out; 2133 } 2134 2135 nsp = nsp_new; 2136 nsp_new = NULL; 2137 nsp->ns_next = hostp->nh_shrlist; 2138 hostp->nh_shrlist = nsp; 2139 2140 out: 2141 mutex_exit(&hostp->nh_lock); 2142 if (nsp_new != NULL) 2143 nlm_shres_destroy_item(nsp_new); 2144 } 2145 2146 void 2147 nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp) 2148 { 2149 struct nlm_shres *nsp, *nsp_prev = NULL; 2150 2151 mutex_enter(&hostp->nh_lock); 2152 nsp = hostp->nh_shrlist; 2153 while (nsp != NULL) { 2154 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) { 2155 struct nlm_shres *nsp_del; 2156 2157 nsp_del = nsp; 2158 nsp = nsp->ns_next; 2159 if (nsp_prev != NULL) 2160 nsp_prev->ns_next = nsp; 2161 else 2162 hostp->nh_shrlist = nsp; 2163 2164 nlm_shres_destroy_item(nsp_del); 2165 continue; 2166 } 2167 2168 nsp_prev = nsp; 2169 nsp = nsp->ns_next; 2170 } 2171 2172 mutex_exit(&hostp->nh_lock); 2173 } 2174 2175 /* 2176 * Get a _copy_ of the list of all active share reservations 2177 * made by the given host. 2178 * NOTE: the list function returns _must_ be released using 2179 * nlm_free_shrlist(). 2180 */ 2181 struct nlm_shres * 2182 nlm_get_active_shres(struct nlm_host *hostp) 2183 { 2184 struct nlm_shres *nsp, *nslist = NULL; 2185 2186 mutex_enter(&hostp->nh_lock); 2187 for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) { 2188 struct nlm_shres *nsp_new; 2189 2190 nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp); 2191 nsp_new->ns_next = nslist; 2192 nslist = nsp_new; 2193 } 2194 2195 mutex_exit(&hostp->nh_lock); 2196 return (nslist); 2197 } 2198 2199 /* 2200 * Free memory allocated for the active share reservations 2201 * list created by nlm_get_active_shres() function. 2202 */ 2203 void 2204 nlm_free_shrlist(struct nlm_shres *nslist) 2205 { 2206 struct nlm_shres *nsp; 2207 2208 while (nslist != NULL) { 2209 nsp = nslist; 2210 nslist = nslist->ns_next; 2211 2212 nlm_shres_destroy_item(nsp); 2213 } 2214 } 2215 2216 static bool_t 2217 nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2) 2218 { 2219 if (shrp1->s_sysid == shrp2->s_sysid && 2220 shrp1->s_pid == shrp2->s_pid && 2221 shrp1->s_own_len == shrp2->s_own_len && 2222 bcmp(shrp1->s_owner, shrp2->s_owner, 2223 shrp1->s_own_len) == 0) 2224 return (TRUE); 2225 2226 return (FALSE); 2227 } 2228 2229 static struct nlm_shres * 2230 nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp) 2231 { 2232 struct nlm_shres *nsp; 2233 2234 nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP); 2235 nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP); 2236 bcopy(shrp, nsp->ns_shr, sizeof (*shrp)); 2237 nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP); 2238 bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len); 2239 nsp->ns_vp = vp; 2240 2241 return (nsp); 2242 } 2243 2244 static void 2245 nlm_shres_destroy_item(struct nlm_shres *nsp) 2246 { 2247 kmem_free(nsp->ns_shr->s_owner, 2248 nsp->ns_shr->s_own_len); 2249 kmem_free(nsp->ns_shr, sizeof (struct shrlock)); 2250 kmem_free(nsp, sizeof (*nsp)); 2251 } 2252 2253 /* 2254 * Called by klmmod.c when lockd adds a network endpoint 2255 * on which we should begin RPC services. 2256 */ 2257 int 2258 nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc) 2259 { 2260 SVCMASTERXPRT *xprt = NULL; 2261 int error; 2262 2263 error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt, 2264 &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE); 2265 if (error != 0) 2266 return (error); 2267 2268 (void) nlm_knc_to_netid(knc); 2269 return (0); 2270 } 2271 2272 /* 2273 * Start NLM service. 2274 */ 2275 int 2276 nlm_svc_starting(struct nlm_globals *g, struct file *fp, 2277 const char *netid, struct knetconfig *knc) 2278 { 2279 int error; 2280 enum clnt_stat stat; 2281 2282 VERIFY(g->run_status == NLM_ST_STARTING); 2283 VERIFY(g->nlm_gc_thread == NULL); 2284 2285 error = nlm_nsm_init_local(&g->nlm_nsm); 2286 if (error != 0) { 2287 NLM_ERR("Failed to initialize NSM handler " 2288 "(error=%d)\n", error); 2289 g->run_status = NLM_ST_DOWN; 2290 return (error); 2291 } 2292 2293 error = EIO; 2294 2295 /* 2296 * Create an NLM garbage collector thread that will 2297 * clean up stale vholds and hosts objects. 2298 */ 2299 g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc, 2300 g, 0, minclsyspri); 2301 2302 /* 2303 * Send SIMU_CRASH to local statd to report that 2304 * NLM started, so that statd can report other hosts 2305 * about NLM state change. 2306 */ 2307 2308 stat = nlm_nsm_simu_crash(&g->nlm_nsm); 2309 if (stat != RPC_SUCCESS) { 2310 NLM_ERR("Failed to connect to local statd " 2311 "(rpcerr=%d)\n", stat); 2312 goto shutdown_lm; 2313 } 2314 2315 stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state); 2316 if (stat != RPC_SUCCESS) { 2317 NLM_ERR("Failed to get the status of local statd " 2318 "(rpcerr=%d)\n", stat); 2319 goto shutdown_lm; 2320 } 2321 2322 g->grace_threshold = ddi_get_lbolt() + 2323 SEC_TO_TICK(g->grace_period); 2324 2325 /* Register endpoint used for communications with local NLM */ 2326 error = nlm_svc_add_ep(fp, netid, knc); 2327 if (error != 0) 2328 goto shutdown_lm; 2329 2330 (void) svc_pool_control(NLM_SVCPOOL_ID, 2331 SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown); 2332 g->run_status = NLM_ST_UP; 2333 return (0); 2334 2335 shutdown_lm: 2336 mutex_enter(&g->lock); 2337 g->run_status = NLM_ST_STOPPING; 2338 mutex_exit(&g->lock); 2339 2340 nlm_svc_stopping(g); 2341 return (error); 2342 } 2343 2344 /* 2345 * Called when the server pool is destroyed, so that 2346 * all transports are closed and no any server threads 2347 * exist. 2348 * 2349 * Just call lm_shutdown() to shut NLM down properly. 2350 */ 2351 static void 2352 nlm_pool_shutdown(void) 2353 { 2354 (void) lm_shutdown(); 2355 } 2356 2357 /* 2358 * Stop NLM service, cleanup all resources 2359 * NLM owns at the moment. 2360 * 2361 * NOTE: NFS code can call NLM while it's 2362 * stopping or even if it's shut down. Any attempt 2363 * to lock file either on client or on the server 2364 * will fail if NLM isn't in NLM_ST_UP state. 2365 */ 2366 void 2367 nlm_svc_stopping(struct nlm_globals *g) 2368 { 2369 mutex_enter(&g->lock); 2370 ASSERT(g->run_status == NLM_ST_STOPPING); 2371 2372 /* 2373 * Ask NLM GC thread to exit and wait until it dies. 2374 */ 2375 cv_signal(&g->nlm_gc_sched_cv); 2376 while (g->nlm_gc_thread != NULL) 2377 cv_wait(&g->nlm_gc_finish_cv, &g->lock); 2378 2379 mutex_exit(&g->lock); 2380 2381 /* 2382 * Cleanup locks owned by NLM hosts. 2383 * NOTE: New hosts won't be created while 2384 * NLM is stopping. 2385 */ 2386 while (!avl_is_empty(&g->nlm_hosts_tree)) { 2387 struct nlm_host *hostp; 2388 int busy_hosts = 0; 2389 2390 /* 2391 * Iterate through all NLM hosts in the system 2392 * and drop the locks they own by force. 2393 */ 2394 hostp = avl_first(&g->nlm_hosts_tree); 2395 while (hostp != NULL) { 2396 /* Cleanup all client and server side locks */ 2397 nlm_client_cancel_all(g, hostp); 2398 nlm_host_notify_server(hostp, 0); 2399 2400 mutex_enter(&hostp->nh_lock); 2401 nlm_host_gc_vholds(hostp); 2402 if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) { 2403 /* 2404 * Oh, it seems the host is still busy, let 2405 * it some time to release and go to the 2406 * next one. 2407 */ 2408 2409 mutex_exit(&hostp->nh_lock); 2410 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp); 2411 busy_hosts++; 2412 continue; 2413 } 2414 2415 mutex_exit(&hostp->nh_lock); 2416 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp); 2417 } 2418 2419 /* 2420 * All hosts go to nlm_idle_hosts list after 2421 * all locks they own are cleaned up and last refereces 2422 * were dropped. Just destroy all hosts in nlm_idle_hosts 2423 * list, they can not be removed from there while we're 2424 * in stopping state. 2425 */ 2426 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) { 2427 nlm_host_unregister(g, hostp); 2428 nlm_host_destroy(hostp); 2429 } 2430 2431 if (busy_hosts > 0) { 2432 /* 2433 * There're some hosts that weren't cleaned 2434 * up. Probably they're in resource cleanup 2435 * process. Give them some time to do drop 2436 * references. 2437 */ 2438 delay(MSEC_TO_TICK(500)); 2439 } 2440 } 2441 2442 ASSERT(TAILQ_EMPTY(&g->nlm_slocks)); 2443 2444 nlm_nsm_fini(&g->nlm_nsm); 2445 g->lockd_pid = 0; 2446 g->run_status = NLM_ST_DOWN; 2447 } 2448 2449 /* 2450 * Returns TRUE if the given vnode has 2451 * any active or sleeping locks. 2452 */ 2453 int 2454 nlm_vp_active(const vnode_t *vp) 2455 { 2456 struct nlm_globals *g; 2457 struct nlm_host *hostp; 2458 struct nlm_vhold *nvp; 2459 int active = 0; 2460 2461 g = zone_getspecific(nlm_zone_key, curzone); 2462 2463 /* 2464 * Server side NLM has locks on the given vnode 2465 * if there exist a vhold object that holds 2466 * the given vnode "vp" in one of NLM hosts. 2467 */ 2468 mutex_enter(&g->lock); 2469 hostp = avl_first(&g->nlm_hosts_tree); 2470 while (hostp != NULL) { 2471 mutex_enter(&hostp->nh_lock); 2472 nvp = nlm_vhold_find_locked(hostp, vp); 2473 mutex_exit(&hostp->nh_lock); 2474 if (nvp != NULL) { 2475 active = 1; 2476 break; 2477 } 2478 2479 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp); 2480 } 2481 2482 mutex_exit(&g->lock); 2483 return (active); 2484 } 2485 2486 /* 2487 * Called right before NFS export is going to 2488 * dissapear. The function finds all vnodes 2489 * belonging to the given export and cleans 2490 * all remote locks and share reservations 2491 * on them. 2492 */ 2493 void 2494 nlm_unexport(struct exportinfo *exi) 2495 { 2496 struct nlm_globals *g; 2497 struct nlm_host *hostp; 2498 2499 g = zone_getspecific(nlm_zone_key, curzone); 2500 2501 mutex_enter(&g->lock); 2502 hostp = avl_first(&g->nlm_hosts_tree); 2503 while (hostp != NULL) { 2504 struct nlm_vhold *nvp; 2505 2506 mutex_enter(&hostp->nh_lock); 2507 TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) { 2508 vnode_t *vp; 2509 2510 nvp->nv_refcnt++; 2511 mutex_exit(&hostp->nh_lock); 2512 2513 vp = nvp->nv_vp; 2514 2515 if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid)) 2516 goto next_iter; 2517 2518 /* 2519 * Ok, it we found out that vnode vp is under 2520 * control by the exportinfo exi, now we need 2521 * to drop all locks from this vnode, let's 2522 * do it. 2523 */ 2524 nlm_vhold_clean(nvp, hostp->nh_sysid); 2525 2526 next_iter: 2527 mutex_enter(&hostp->nh_lock); 2528 nvp->nv_refcnt--; 2529 } 2530 2531 mutex_exit(&hostp->nh_lock); 2532 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp); 2533 } 2534 2535 mutex_exit(&g->lock); 2536 } 2537 2538 /* 2539 * Allocate new unique sysid. 2540 * In case of failure (no available sysids) 2541 * return LM_NOSYSID. 2542 */ 2543 sysid_t 2544 nlm_sysid_alloc(void) 2545 { 2546 sysid_t ret_sysid = LM_NOSYSID; 2547 2548 rw_enter(&lm_lck, RW_WRITER); 2549 if (nlm_sysid_nidx > LM_SYSID_MAX) 2550 nlm_sysid_nidx = LM_SYSID; 2551 2552 if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) { 2553 BT_SET(nlm_sysid_bmap, nlm_sysid_nidx); 2554 ret_sysid = nlm_sysid_nidx++; 2555 } else { 2556 index_t id; 2557 2558 id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS); 2559 if (id > 0) { 2560 nlm_sysid_nidx = id + 1; 2561 ret_sysid = id; 2562 BT_SET(nlm_sysid_bmap, id); 2563 } 2564 } 2565 2566 rw_exit(&lm_lck); 2567 return (ret_sysid); 2568 } 2569 2570 void 2571 nlm_sysid_free(sysid_t sysid) 2572 { 2573 ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX); 2574 2575 rw_enter(&lm_lck, RW_WRITER); 2576 ASSERT(BT_TEST(nlm_sysid_bmap, sysid)); 2577 BT_CLEAR(nlm_sysid_bmap, sysid); 2578 rw_exit(&lm_lck); 2579 } 2580 2581 /* 2582 * Return true if the request came from a local caller. 2583 * By necessity, this "knows" the netid names invented 2584 * in lm_svc() and nlm_netid_from_knetconfig(). 2585 */ 2586 bool_t 2587 nlm_caller_is_local(SVCXPRT *transp) 2588 { 2589 char *netid; 2590 struct netbuf *rtaddr; 2591 2592 netid = svc_getnetid(transp); 2593 rtaddr = svc_getrpccaller(transp); 2594 2595 if (netid == NULL) 2596 return (FALSE); 2597 2598 if (strcmp(netid, "ticlts") == 0 || 2599 strcmp(netid, "ticotsord") == 0) 2600 return (TRUE); 2601 2602 if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) { 2603 struct sockaddr_in *sin = (void *)rtaddr->buf; 2604 if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) 2605 return (TRUE); 2606 } 2607 if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) { 2608 struct sockaddr_in6 *sin6 = (void *)rtaddr->buf; 2609 if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) 2610 return (TRUE); 2611 } 2612 2613 return (FALSE); /* unknown transport */ 2614 } 2615 2616 /* 2617 * Get netid string correspondig to the given knetconfig. 2618 * If not done already, save knc->knc_rdev in our table. 2619 */ 2620 const char * 2621 nlm_knc_to_netid(struct knetconfig *knc) 2622 { 2623 int i; 2624 dev_t rdev; 2625 struct nlm_knc *nc; 2626 const char *netid = NULL; 2627 2628 rw_enter(&lm_lck, RW_READER); 2629 for (i = 0; i < NLM_KNCS; i++) { 2630 nc = &nlm_netconfigs[i]; 2631 2632 if (nc->n_knc.knc_semantics == knc->knc_semantics && 2633 strcmp(nc->n_knc.knc_protofmly, 2634 knc->knc_protofmly) == 0) { 2635 netid = nc->n_netid; 2636 rdev = nc->n_knc.knc_rdev; 2637 break; 2638 } 2639 } 2640 rw_exit(&lm_lck); 2641 2642 if (netid != NULL && rdev == NODEV) { 2643 rw_enter(&lm_lck, RW_WRITER); 2644 if (nc->n_knc.knc_rdev == NODEV) 2645 nc->n_knc.knc_rdev = knc->knc_rdev; 2646 rw_exit(&lm_lck); 2647 } 2648 2649 return (netid); 2650 } 2651 2652 /* 2653 * Get a knetconfig corresponding to the given netid. 2654 * If there's no knetconfig for this netid, ENOENT 2655 * is returned. 2656 */ 2657 int 2658 nlm_knc_from_netid(const char *netid, struct knetconfig *knc) 2659 { 2660 int i, ret; 2661 2662 ret = ENOENT; 2663 for (i = 0; i < NLM_KNCS; i++) { 2664 struct nlm_knc *nknc; 2665 2666 nknc = &nlm_netconfigs[i]; 2667 if (strcmp(netid, nknc->n_netid) == 0 && 2668 nknc->n_knc.knc_rdev != NODEV) { 2669 *knc = nknc->n_knc; 2670 ret = 0; 2671 break; 2672 } 2673 } 2674 2675 return (ret); 2676 } 2677 2678 void 2679 nlm_cprsuspend(void) 2680 { 2681 struct nlm_globals *g; 2682 2683 rw_enter(&lm_lck, RW_READER); 2684 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) 2685 nlm_suspend_zone(g); 2686 2687 rw_exit(&lm_lck); 2688 } 2689 2690 void 2691 nlm_cprresume(void) 2692 { 2693 struct nlm_globals *g; 2694 2695 rw_enter(&lm_lck, RW_READER); 2696 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) 2697 nlm_resume_zone(g); 2698 2699 rw_exit(&lm_lck); 2700 } 2701 2702 static void 2703 nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm) 2704 { 2705 (void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0, 2706 NLM_RPC_RETRIES, kcred); 2707 } 2708 2709 static void 2710 nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj) 2711 { 2712 /* LINTED pointer alignment */ 2713 struct sockaddr *sa = (struct sockaddr *)addr->buf; 2714 2715 *family = sa->sa_family; 2716 2717 switch (sa->sa_family) { 2718 case AF_INET: { 2719 /* LINTED pointer alignment */ 2720 struct sockaddr_in *sin = (struct sockaddr_in *)sa; 2721 2722 obj->n_len = sizeof (sin->sin_addr); 2723 obj->n_bytes = (char *)&sin->sin_addr; 2724 break; 2725 } 2726 2727 case AF_INET6: { 2728 /* LINTED pointer alignment */ 2729 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; 2730 2731 obj->n_len = sizeof (sin6->sin6_addr); 2732 obj->n_bytes = (char *)&sin6->sin6_addr; 2733 break; 2734 } 2735 2736 default: 2737 VERIFY(0); 2738 break; 2739 } 2740 } 2741