1 /*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet6.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/fail.h> 35 #include <sys/fcntl.h> 36 #include <sys/kernel.h> 37 #include <sys/kthread.h> 38 #include <sys/lockf.h> 39 #include <sys/malloc.h> 40 #include <sys/mount.h> 41 #if __FreeBSD_version >= 700000 42 #include <sys/priv.h> 43 #endif 44 #include <sys/proc.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/syscall.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysent.h> 50 #include <sys/syslog.h> 51 #include <sys/sysproto.h> 52 #include <sys/systm.h> 53 #include <sys/taskqueue.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 57 #include <nfs/nfsproto.h> 58 #include <nfs/nfs_lock.h> 59 60 #include <nlm/nlm_prot.h> 61 #include <nlm/sm_inter.h> 62 #include <nlm/nlm.h> 63 #include <rpc/rpc_com.h> 64 #include <rpc/rpcb_prot.h> 65 66 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 67 68 /* 69 * If a host is inactive (and holds no locks) for this amount of 70 * seconds, we consider it idle and stop tracking it. 71 */ 72 #define NLM_IDLE_TIMEOUT 30 73 74 /* 75 * We check the host list for idle every few seconds. 76 */ 77 #define NLM_IDLE_PERIOD 5 78 79 /* 80 * We only look for GRANTED_RES messages for a little while. 81 */ 82 #define NLM_EXPIRE_TIMEOUT 10 83 84 /* 85 * Support for sysctl vfs.nlm.sysid 86 */ 87 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, 88 "Network Lock Manager"); 89 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, ""); 90 91 /* 92 * Syscall hooks 93 */ 94 static int nlm_syscall_offset = SYS_nlm_syscall; 95 static struct sysent nlm_syscall_prev_sysent; 96 #if __FreeBSD_version < 700000 97 static struct sysent nlm_syscall_sysent = { 98 (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE, 99 (sy_call_t *) nlm_syscall 100 }; 101 #else 102 MAKE_SYSENT(nlm_syscall); 103 #endif 104 static bool_t nlm_syscall_registered = FALSE; 105 106 /* 107 * Debug level passed in from userland. We also support a sysctl hook 108 * so that it can be changed on a live system. 109 */ 110 static int nlm_debug_level; 111 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 112 113 #define NLM_DEBUG(_level, args...) \ 114 do { \ 115 if (nlm_debug_level >= (_level)) \ 116 log(LOG_DEBUG, args); \ 117 } while(0) 118 #define NLM_ERR(args...) \ 119 do { \ 120 log(LOG_ERR, args); \ 121 } while(0) 122 123 /* 124 * Grace period handling. The value of nlm_grace_threshold is the 125 * value of time_uptime after which we are serving requests normally. 126 */ 127 static time_t nlm_grace_threshold; 128 129 /* 130 * We check for idle hosts if time_uptime is greater than 131 * nlm_next_idle_check, 132 */ 133 static time_t nlm_next_idle_check; 134 135 /* 136 * A flag to indicate the server is already running. 137 */ 138 static int nlm_is_running; 139 140 /* 141 * A socket to use for RPC - shared by all IPv4 RPC clients. 142 */ 143 static struct socket *nlm_socket; 144 145 #ifdef INET6 146 147 /* 148 * A socket to use for RPC - shared by all IPv6 RPC clients. 149 */ 150 static struct socket *nlm_socket6; 151 152 #endif 153 154 /* 155 * An RPC client handle that can be used to communicate with the local 156 * NSM. 157 */ 158 static CLIENT *nlm_nsm; 159 160 /* 161 * An AUTH handle for the server's creds. 162 */ 163 static AUTH *nlm_auth; 164 165 /* 166 * A zero timeval for sending async RPC messages. 167 */ 168 struct timeval nlm_zero_tv = { 0, 0 }; 169 170 /* 171 * The local NSM state number 172 */ 173 int nlm_nsm_state; 174 175 176 /* 177 * A lock to protect the host list and waiting lock list. 178 */ 179 static struct mtx nlm_global_lock; 180 181 /* 182 * Locks: 183 * (l) locked by nh_lock 184 * (s) only accessed via server RPC which is single threaded 185 * (g) locked by nlm_global_lock 186 * (c) const until freeing 187 * (a) modified using atomic ops 188 */ 189 190 /* 191 * A pending client-side lock request, stored on the nlm_waiting_locks 192 * list. 193 */ 194 struct nlm_waiting_lock { 195 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 196 bool_t nw_waiting; /* (g) */ 197 nlm4_lock nw_lock; /* (c) */ 198 union nfsfh nw_fh; /* (c) */ 199 struct vnode *nw_vp; /* (c) */ 200 }; 201 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 202 203 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 204 205 /* 206 * A pending server-side asynchronous lock request, stored on the 207 * nh_pending list of the NLM host. 208 */ 209 struct nlm_async_lock { 210 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 211 struct task af_task; /* (c) async callback details */ 212 void *af_cookie; /* (l) lock manager cancel token */ 213 struct vnode *af_vp; /* (l) vnode to lock */ 214 struct flock af_fl; /* (c) lock details */ 215 struct nlm_host *af_host; /* (c) host which is locking */ 216 CLIENT *af_rpc; /* (c) rpc client to send message */ 217 nlm4_testargs af_granted; /* (c) notification details */ 218 time_t af_expiretime; /* (c) notification time */ 219 }; 220 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 221 222 /* 223 * NLM host. 224 */ 225 enum nlm_host_state { 226 NLM_UNMONITORED, 227 NLM_MONITORED, 228 NLM_MONITOR_FAILED, 229 NLM_RECOVERING 230 }; 231 232 struct nlm_rpc { 233 CLIENT *nr_client; /* (l) RPC client handle */ 234 time_t nr_create_time; /* (l) when client was created */ 235 }; 236 237 struct nlm_host { 238 struct mtx nh_lock; 239 volatile u_int nh_refs; /* (a) reference count */ 240 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 241 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 242 uint32_t nh_sysid; /* (c) our allocaed system ID */ 243 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 244 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 245 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 246 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 247 rpcvers_t nh_vers; /* (s) NLM version of host */ 248 int nh_state; /* (s) last seen NSM state of host */ 249 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 250 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 251 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 252 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 253 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 254 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 255 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 256 }; 257 TAILQ_HEAD(nlm_host_list, nlm_host); 258 259 static struct nlm_host_list nlm_hosts; /* (g) */ 260 static uint32_t nlm_next_sysid = 1; /* (g) */ 261 262 static void nlm_host_unmonitor(struct nlm_host *); 263 264 struct nlm_grantcookie { 265 uint32_t ng_sysid; 266 uint32_t ng_cookie; 267 }; 268 269 static inline uint32_t 270 ng_sysid(struct netobj *src) 271 { 272 273 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 274 } 275 276 static inline uint32_t 277 ng_cookie(struct netobj *src) 278 { 279 280 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 281 } 282 283 /**********************************************************************/ 284 285 /* 286 * Initialise NLM globals. 287 */ 288 static void 289 nlm_init(void *dummy) 290 { 291 int error; 292 293 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 294 TAILQ_INIT(&nlm_waiting_locks); 295 TAILQ_INIT(&nlm_hosts); 296 297 error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent, 298 &nlm_syscall_prev_sysent, SY_THR_STATIC_KLD); 299 if (error) 300 NLM_ERR("Can't register NLM syscall\n"); 301 else 302 nlm_syscall_registered = TRUE; 303 } 304 SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL); 305 306 static void 307 nlm_uninit(void *dummy) 308 { 309 310 if (nlm_syscall_registered) 311 syscall_deregister(&nlm_syscall_offset, 312 &nlm_syscall_prev_sysent); 313 } 314 SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL); 315 316 /* 317 * Create a netobj from an arbitrary source. 318 */ 319 void 320 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 321 struct malloc_type *type) 322 { 323 324 dst->n_len = srcsize; 325 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 326 memcpy(dst->n_bytes, src, srcsize); 327 } 328 329 /* 330 * Copy a struct netobj. 331 */ 332 void 333 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 334 struct malloc_type *type) 335 { 336 337 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 338 } 339 340 341 /* 342 * Create an RPC client handle for the given (address,prog,vers) 343 * triple using UDP. 344 */ 345 static CLIENT * 346 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 347 { 348 char *wchan = "nlmrcv"; 349 const char* protofmly; 350 struct sockaddr_storage ss; 351 struct socket *so; 352 CLIENT *rpcb; 353 struct timeval timo; 354 RPCB parms; 355 char *uaddr; 356 enum clnt_stat stat = RPC_SUCCESS; 357 int rpcvers = RPCBVERS4; 358 bool_t do_tcp = FALSE; 359 bool_t tryagain = FALSE; 360 struct portmap mapping; 361 u_short port = 0; 362 363 /* 364 * First we need to contact the remote RPCBIND service to find 365 * the right port. 366 */ 367 memcpy(&ss, sa, sa->sa_len); 368 switch (ss.ss_family) { 369 case AF_INET: 370 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 371 protofmly = "inet"; 372 so = nlm_socket; 373 break; 374 375 #ifdef INET6 376 case AF_INET6: 377 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 378 protofmly = "inet6"; 379 so = nlm_socket6; 380 break; 381 #endif 382 383 default: 384 /* 385 * Unsupported address family - fail. 386 */ 387 return (NULL); 388 } 389 390 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 391 RPCBPROG, rpcvers, 0, 0); 392 if (!rpcb) 393 return (NULL); 394 395 try_tcp: 396 parms.r_prog = prog; 397 parms.r_vers = vers; 398 if (do_tcp) 399 parms.r_netid = "tcp"; 400 else 401 parms.r_netid = "udp"; 402 parms.r_addr = ""; 403 parms.r_owner = ""; 404 405 /* 406 * Use the default timeout. 407 */ 408 timo.tv_sec = 25; 409 timo.tv_usec = 0; 410 again: 411 switch (rpcvers) { 412 case RPCBVERS4: 413 case RPCBVERS: 414 /* 415 * Try RPCBIND 4 then 3. 416 */ 417 uaddr = NULL; 418 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 419 (xdrproc_t) xdr_rpcb, &parms, 420 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 421 if (stat == RPC_SUCCESS) { 422 /* 423 * We have a reply from the remote RPCBIND - turn it 424 * into an appropriate address and make a new client 425 * that can talk to the remote NLM. 426 * 427 * XXX fixup IPv6 scope ID. 428 */ 429 struct netbuf *a; 430 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 431 if (!a) { 432 tryagain = TRUE; 433 } else { 434 tryagain = FALSE; 435 memcpy(&ss, a->buf, a->len); 436 free(a->buf, M_RPC); 437 free(a, M_RPC); 438 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 439 } 440 } 441 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 442 if (rpcvers == RPCBVERS4) 443 rpcvers = RPCBVERS; 444 else if (rpcvers == RPCBVERS) 445 rpcvers = PMAPVERS; 446 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 447 goto again; 448 } 449 break; 450 case PMAPVERS: 451 /* 452 * Try portmap. 453 */ 454 mapping.pm_prog = parms.r_prog; 455 mapping.pm_vers = parms.r_vers; 456 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 457 mapping.pm_port = 0; 458 459 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 460 (xdrproc_t) xdr_portmap, &mapping, 461 (xdrproc_t) xdr_u_short, &port, timo); 462 463 if (stat == RPC_SUCCESS) { 464 switch (ss.ss_family) { 465 case AF_INET: 466 ((struct sockaddr_in *)&ss)->sin_port = 467 htons(port); 468 break; 469 470 #ifdef INET6 471 case AF_INET6: 472 ((struct sockaddr_in6 *)&ss)->sin6_port = 473 htons(port); 474 break; 475 #endif 476 } 477 } 478 break; 479 default: 480 panic("invalid rpcvers %d", rpcvers); 481 } 482 /* 483 * We may have a positive response from the portmapper, but the NLM 484 * service was not found. Make sure we received a valid port. 485 */ 486 switch (ss.ss_family) { 487 case AF_INET: 488 port = ((struct sockaddr_in *)&ss)->sin_port; 489 break; 490 #ifdef INET6 491 case AF_INET6: 492 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 493 break; 494 #endif 495 } 496 if (stat != RPC_SUCCESS || !port) { 497 /* 498 * If we were able to talk to rpcbind or portmap, but the udp 499 * variant wasn't available, ask about tcp. 500 * 501 * XXX - We could also check for a TCP portmapper, but 502 * if the host is running a portmapper at all, we should be able 503 * to hail it over UDP. 504 */ 505 if (stat == RPC_SUCCESS && !do_tcp) { 506 do_tcp = TRUE; 507 goto try_tcp; 508 } 509 510 /* Otherwise, bad news. */ 511 NLM_ERR("NLM: failed to contact remote rpcbind, " 512 "stat = %d, port = %d\n", (int) stat, port); 513 CLNT_DESTROY(rpcb); 514 return (NULL); 515 } 516 517 if (do_tcp) { 518 /* 519 * Destroy the UDP client we used to speak to rpcbind and 520 * recreate as a TCP client. 521 */ 522 struct netconfig *nconf = NULL; 523 524 CLNT_DESTROY(rpcb); 525 526 switch (ss.ss_family) { 527 case AF_INET: 528 nconf = getnetconfigent("tcp"); 529 break; 530 #ifdef INET6 531 case AF_INET6: 532 nconf = getnetconfigent("tcp6"); 533 break; 534 #endif 535 } 536 537 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 538 prog, vers, 0, 0); 539 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 540 rpcb->cl_auth = nlm_auth; 541 542 } else { 543 /* 544 * Re-use the client we used to speak to rpcbind. 545 */ 546 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 547 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 548 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 549 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 550 rpcb->cl_auth = nlm_auth; 551 } 552 553 return (rpcb); 554 } 555 556 /* 557 * This async callback after when an async lock request has been 558 * granted. We notify the host which initiated the request. 559 */ 560 static void 561 nlm_lock_callback(void *arg, int pending) 562 { 563 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 564 struct rpc_callextra ext; 565 566 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 567 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 568 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 569 ng_cookie(&af->af_granted.cookie)); 570 571 /* 572 * Send the results back to the host. 573 * 574 * Note: there is a possible race here with nlm_host_notify 575 * destroying the RPC client. To avoid problems, the first 576 * thing nlm_host_notify does is to cancel pending async lock 577 * requests. 578 */ 579 memset(&ext, 0, sizeof(ext)); 580 ext.rc_auth = nlm_auth; 581 if (af->af_host->nh_vers == NLM_VERS4) { 582 nlm4_granted_msg_4(&af->af_granted, 583 NULL, af->af_rpc, &ext, nlm_zero_tv); 584 } else { 585 /* 586 * Back-convert to legacy protocol 587 */ 588 nlm_testargs granted; 589 granted.cookie = af->af_granted.cookie; 590 granted.exclusive = af->af_granted.exclusive; 591 granted.alock.caller_name = 592 af->af_granted.alock.caller_name; 593 granted.alock.fh = af->af_granted.alock.fh; 594 granted.alock.oh = af->af_granted.alock.oh; 595 granted.alock.svid = af->af_granted.alock.svid; 596 granted.alock.l_offset = 597 af->af_granted.alock.l_offset; 598 granted.alock.l_len = 599 af->af_granted.alock.l_len; 600 601 nlm_granted_msg_1(&granted, 602 NULL, af->af_rpc, &ext, nlm_zero_tv); 603 } 604 605 /* 606 * Move this entry to the nh_granted list. 607 */ 608 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 609 mtx_lock(&af->af_host->nh_lock); 610 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 611 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 612 mtx_unlock(&af->af_host->nh_lock); 613 } 614 615 /* 616 * Free an async lock request. The request must have been removed from 617 * any list. 618 */ 619 static void 620 nlm_free_async_lock(struct nlm_async_lock *af) 621 { 622 /* 623 * Free an async lock. 624 */ 625 if (af->af_rpc) 626 CLNT_RELEASE(af->af_rpc); 627 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 628 if (af->af_vp) 629 vrele(af->af_vp); 630 free(af, M_NLM); 631 } 632 633 /* 634 * Cancel our async request - this must be called with 635 * af->nh_host->nh_lock held. This is slightly complicated by a 636 * potential race with our own callback. If we fail to cancel the 637 * lock, it must already have been granted - we make sure our async 638 * task has completed by calling taskqueue_drain in this case. 639 */ 640 static int 641 nlm_cancel_async_lock(struct nlm_async_lock *af) 642 { 643 struct nlm_host *host = af->af_host; 644 int error; 645 646 mtx_assert(&host->nh_lock, MA_OWNED); 647 648 mtx_unlock(&host->nh_lock); 649 650 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 651 F_REMOTE, NULL, &af->af_cookie); 652 653 if (error) { 654 /* 655 * We failed to cancel - make sure our callback has 656 * completed before we continue. 657 */ 658 taskqueue_drain(taskqueue_thread, &af->af_task); 659 } 660 661 mtx_lock(&host->nh_lock); 662 663 if (!error) { 664 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 665 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 666 667 /* 668 * Remove from the nh_pending list and free now that 669 * we are safe from the callback. 670 */ 671 TAILQ_REMOVE(&host->nh_pending, af, af_link); 672 mtx_unlock(&host->nh_lock); 673 nlm_free_async_lock(af); 674 mtx_lock(&host->nh_lock); 675 } 676 677 return (error); 678 } 679 680 static void 681 nlm_check_expired_locks(struct nlm_host *host) 682 { 683 struct nlm_async_lock *af; 684 time_t uptime = time_uptime; 685 686 mtx_lock(&host->nh_lock); 687 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 688 && uptime >= af->af_expiretime) { 689 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 690 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 691 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 692 ng_cookie(&af->af_granted.cookie)); 693 TAILQ_REMOVE(&host->nh_granted, af, af_link); 694 mtx_unlock(&host->nh_lock); 695 nlm_free_async_lock(af); 696 mtx_lock(&host->nh_lock); 697 } 698 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 699 TAILQ_REMOVE(&host->nh_finished, af, af_link); 700 mtx_unlock(&host->nh_lock); 701 nlm_free_async_lock(af); 702 mtx_lock(&host->nh_lock); 703 } 704 mtx_unlock(&host->nh_lock); 705 } 706 707 /* 708 * Free resources used by a host. This is called after the reference 709 * count has reached zero so it doesn't need to worry about locks. 710 */ 711 static void 712 nlm_host_destroy(struct nlm_host *host) 713 { 714 715 mtx_lock(&nlm_global_lock); 716 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 717 mtx_unlock(&nlm_global_lock); 718 719 if (host->nh_srvrpc.nr_client) 720 CLNT_RELEASE(host->nh_srvrpc.nr_client); 721 if (host->nh_clntrpc.nr_client) 722 CLNT_RELEASE(host->nh_clntrpc.nr_client); 723 mtx_destroy(&host->nh_lock); 724 sysctl_ctx_free(&host->nh_sysctl); 725 free(host, M_NLM); 726 } 727 728 /* 729 * Thread start callback for client lock recovery 730 */ 731 static void 732 nlm_client_recovery_start(void *arg) 733 { 734 struct nlm_host *host = (struct nlm_host *) arg; 735 736 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 737 host->nh_caller_name); 738 739 nlm_client_recovery(host); 740 741 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 742 host->nh_caller_name); 743 744 host->nh_monstate = NLM_MONITORED; 745 nlm_host_release(host); 746 747 kthread_exit(); 748 } 749 750 /* 751 * This is called when we receive a host state change notification. We 752 * unlock any active locks owned by the host. When rpc.lockd is 753 * shutting down, this function is called with newstate set to zero 754 * which allows us to cancel any pending async locks and clear the 755 * locking state. 756 */ 757 static void 758 nlm_host_notify(struct nlm_host *host, int newstate) 759 { 760 struct nlm_async_lock *af; 761 762 if (newstate) { 763 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 764 "state is %d\n", host->nh_caller_name, 765 host->nh_sysid, newstate); 766 } 767 768 /* 769 * Cancel any pending async locks for this host. 770 */ 771 mtx_lock(&host->nh_lock); 772 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 773 /* 774 * nlm_cancel_async_lock will remove the entry from 775 * nh_pending and free it. 776 */ 777 nlm_cancel_async_lock(af); 778 } 779 mtx_unlock(&host->nh_lock); 780 nlm_check_expired_locks(host); 781 782 /* 783 * The host just rebooted - trash its locks. 784 */ 785 lf_clearremotesys(host->nh_sysid); 786 host->nh_state = newstate; 787 788 /* 789 * If we have any remote locks for this host (i.e. it 790 * represents a remote NFS server that our local NFS client 791 * has locks for), start a recovery thread. 792 */ 793 if (newstate != 0 794 && host->nh_monstate != NLM_RECOVERING 795 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 796 struct thread *td; 797 host->nh_monstate = NLM_RECOVERING; 798 refcount_acquire(&host->nh_refs); 799 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 800 "NFS lock recovery for %s", host->nh_caller_name); 801 } 802 } 803 804 /* 805 * Sysctl handler to count the number of locks for a sysid. 806 */ 807 static int 808 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 809 { 810 struct nlm_host *host; 811 int count; 812 813 host = oidp->oid_arg1; 814 count = lf_countlocks(host->nh_sysid); 815 return sysctl_handle_int(oidp, &count, 0, req); 816 } 817 818 /* 819 * Sysctl handler to count the number of client locks for a sysid. 820 */ 821 static int 822 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 823 { 824 struct nlm_host *host; 825 int count; 826 827 host = oidp->oid_arg1; 828 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 829 return sysctl_handle_int(oidp, &count, 0, req); 830 } 831 832 /* 833 * Create a new NLM host. 834 */ 835 static struct nlm_host * 836 nlm_create_host(const char* caller_name) 837 { 838 struct nlm_host *host; 839 struct sysctl_oid *oid; 840 841 mtx_assert(&nlm_global_lock, MA_OWNED); 842 843 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 844 caller_name, nlm_next_sysid); 845 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 846 if (!host) 847 return (NULL); 848 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 849 host->nh_refs = 1; 850 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 851 host->nh_sysid = nlm_next_sysid++; 852 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 853 "%d", host->nh_sysid); 854 host->nh_vers = 0; 855 host->nh_state = 0; 856 host->nh_monstate = NLM_UNMONITORED; 857 host->nh_grantcookie = 1; 858 TAILQ_INIT(&host->nh_pending); 859 TAILQ_INIT(&host->nh_granted); 860 TAILQ_INIT(&host->nh_finished); 861 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 862 863 mtx_unlock(&nlm_global_lock); 864 865 sysctl_ctx_init(&host->nh_sysctl); 866 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 867 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 868 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, ""); 869 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 870 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 871 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 872 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 873 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 874 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 875 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 876 "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 877 nlm_host_lock_count_sysctl, "I", ""); 878 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 879 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 880 nlm_host_client_lock_count_sysctl, "I", ""); 881 882 mtx_lock(&nlm_global_lock); 883 884 return (host); 885 } 886 887 /* 888 * Acquire the next sysid for remote locks not handled by the NLM. 889 */ 890 uint32_t 891 nlm_acquire_next_sysid(void) 892 { 893 uint32_t next_sysid; 894 895 mtx_lock(&nlm_global_lock); 896 next_sysid = nlm_next_sysid++; 897 mtx_unlock(&nlm_global_lock); 898 return (next_sysid); 899 } 900 901 /* 902 * Return non-zero if the address parts of the two sockaddrs are the 903 * same. 904 */ 905 static int 906 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 907 { 908 const struct sockaddr_in *a4, *b4; 909 #ifdef INET6 910 const struct sockaddr_in6 *a6, *b6; 911 #endif 912 913 if (a->sa_family != b->sa_family) 914 return (FALSE); 915 916 switch (a->sa_family) { 917 case AF_INET: 918 a4 = (const struct sockaddr_in *) a; 919 b4 = (const struct sockaddr_in *) b; 920 return !memcmp(&a4->sin_addr, &b4->sin_addr, 921 sizeof(a4->sin_addr)); 922 #ifdef INET6 923 case AF_INET6: 924 a6 = (const struct sockaddr_in6 *) a; 925 b6 = (const struct sockaddr_in6 *) b; 926 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 927 sizeof(a6->sin6_addr)); 928 #endif 929 } 930 931 return (0); 932 } 933 934 /* 935 * Check for idle hosts and stop monitoring them. We could also free 936 * the host structure here, possibly after a larger timeout but that 937 * would require some care to avoid races with 938 * e.g. nlm_host_lock_count_sysctl. 939 */ 940 static void 941 nlm_check_idle(void) 942 { 943 struct nlm_host *host; 944 945 mtx_assert(&nlm_global_lock, MA_OWNED); 946 947 if (time_uptime <= nlm_next_idle_check) 948 return; 949 950 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 951 952 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 953 if (host->nh_monstate == NLM_MONITORED 954 && time_uptime > host->nh_idle_timeout) { 955 mtx_unlock(&nlm_global_lock); 956 if (lf_countlocks(host->nh_sysid) > 0 957 || lf_countlocks(NLM_SYSID_CLIENT 958 + host->nh_sysid)) { 959 host->nh_idle_timeout = 960 time_uptime + NLM_IDLE_TIMEOUT; 961 mtx_lock(&nlm_global_lock); 962 continue; 963 } 964 nlm_host_unmonitor(host); 965 mtx_lock(&nlm_global_lock); 966 } 967 } 968 } 969 970 /* 971 * Search for an existing NLM host that matches the given name 972 * (typically the caller_name element of an nlm4_lock). If none is 973 * found, create a new host. If 'addr' is non-NULL, record the remote 974 * address of the host so that we can call it back for async 975 * responses. If 'vers' is greater than zero then record the NLM 976 * program version to use to communicate with this client. 977 */ 978 struct nlm_host * 979 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 980 rpcvers_t vers) 981 { 982 struct nlm_host *host; 983 984 mtx_lock(&nlm_global_lock); 985 986 /* 987 * The remote host is determined by caller_name. 988 */ 989 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 990 if (!strcmp(host->nh_caller_name, name)) 991 break; 992 } 993 994 if (!host) { 995 host = nlm_create_host(name); 996 if (!host) { 997 mtx_unlock(&nlm_global_lock); 998 return (NULL); 999 } 1000 } 1001 refcount_acquire(&host->nh_refs); 1002 1003 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1004 1005 /* 1006 * If we have an address for the host, record it so that we 1007 * can send async replies etc. 1008 */ 1009 if (addr) { 1010 1011 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 1012 ("Strange remote transport address length")); 1013 1014 /* 1015 * If we have seen an address before and we currently 1016 * have an RPC client handle, make sure the address is 1017 * the same, otherwise discard the client handle. 1018 */ 1019 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1020 if (!nlm_compare_addr( 1021 (struct sockaddr *) &host->nh_addr, 1022 addr) 1023 || host->nh_vers != vers) { 1024 CLIENT *client; 1025 mtx_lock(&host->nh_lock); 1026 client = host->nh_srvrpc.nr_client; 1027 host->nh_srvrpc.nr_client = NULL; 1028 mtx_unlock(&host->nh_lock); 1029 if (client) { 1030 CLNT_RELEASE(client); 1031 } 1032 } 1033 } 1034 memcpy(&host->nh_addr, addr, addr->sa_len); 1035 host->nh_vers = vers; 1036 } 1037 1038 nlm_check_idle(); 1039 1040 mtx_unlock(&nlm_global_lock); 1041 1042 return (host); 1043 } 1044 1045 /* 1046 * Search for an existing NLM host that matches the given remote 1047 * address. If none is found, create a new host with the requested 1048 * address and remember 'vers' as the NLM protocol version to use for 1049 * that host. 1050 */ 1051 struct nlm_host * 1052 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1053 { 1054 /* 1055 * Fake up a name using inet_ntop. This buffer is 1056 * large enough for an IPv6 address. 1057 */ 1058 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1059 struct nlm_host *host; 1060 1061 switch (addr->sa_family) { 1062 case AF_INET: 1063 inet_ntop(AF_INET, 1064 &((const struct sockaddr_in *) addr)->sin_addr, 1065 tmp, sizeof tmp); 1066 break; 1067 #ifdef INET6 1068 case AF_INET6: 1069 inet_ntop(AF_INET6, 1070 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1071 tmp, sizeof tmp); 1072 break; 1073 #endif 1074 default: 1075 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1076 } 1077 1078 1079 mtx_lock(&nlm_global_lock); 1080 1081 /* 1082 * The remote host is determined by caller_name. 1083 */ 1084 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1085 if (nlm_compare_addr(addr, 1086 (const struct sockaddr *) &host->nh_addr)) 1087 break; 1088 } 1089 1090 if (!host) { 1091 host = nlm_create_host(tmp); 1092 if (!host) { 1093 mtx_unlock(&nlm_global_lock); 1094 return (NULL); 1095 } 1096 memcpy(&host->nh_addr, addr, addr->sa_len); 1097 host->nh_vers = vers; 1098 } 1099 refcount_acquire(&host->nh_refs); 1100 1101 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1102 1103 nlm_check_idle(); 1104 1105 mtx_unlock(&nlm_global_lock); 1106 1107 return (host); 1108 } 1109 1110 /* 1111 * Find the NLM host that matches the value of 'sysid'. If none 1112 * exists, return NULL. 1113 */ 1114 static struct nlm_host * 1115 nlm_find_host_by_sysid(int sysid) 1116 { 1117 struct nlm_host *host; 1118 1119 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1120 if (host->nh_sysid == sysid) { 1121 refcount_acquire(&host->nh_refs); 1122 return (host); 1123 } 1124 } 1125 1126 return (NULL); 1127 } 1128 1129 void nlm_host_release(struct nlm_host *host) 1130 { 1131 if (refcount_release(&host->nh_refs)) { 1132 /* 1133 * Free the host 1134 */ 1135 nlm_host_destroy(host); 1136 } 1137 } 1138 1139 /* 1140 * Unregister this NLM host with the local NSM due to idleness. 1141 */ 1142 static void 1143 nlm_host_unmonitor(struct nlm_host *host) 1144 { 1145 mon_id smmonid; 1146 sm_stat_res smstat; 1147 struct timeval timo; 1148 enum clnt_stat stat; 1149 1150 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1151 host->nh_caller_name, host->nh_sysid); 1152 1153 /* 1154 * We put our assigned system ID value in the priv field to 1155 * make it simpler to find the host if we are notified of a 1156 * host restart. 1157 */ 1158 smmonid.mon_name = host->nh_caller_name; 1159 smmonid.my_id.my_name = "localhost"; 1160 smmonid.my_id.my_prog = NLM_PROG; 1161 smmonid.my_id.my_vers = NLM_SM; 1162 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1163 1164 timo.tv_sec = 25; 1165 timo.tv_usec = 0; 1166 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1167 (xdrproc_t) xdr_mon, &smmonid, 1168 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1169 1170 if (stat != RPC_SUCCESS) { 1171 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1172 return; 1173 } 1174 if (smstat.res_stat == stat_fail) { 1175 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1176 host->nh_caller_name); 1177 return; 1178 } 1179 1180 host->nh_monstate = NLM_UNMONITORED; 1181 } 1182 1183 /* 1184 * Register this NLM host with the local NSM so that we can be 1185 * notified if it reboots. 1186 */ 1187 void 1188 nlm_host_monitor(struct nlm_host *host, int state) 1189 { 1190 mon smmon; 1191 sm_stat_res smstat; 1192 struct timeval timo; 1193 enum clnt_stat stat; 1194 1195 if (state && !host->nh_state) { 1196 /* 1197 * This is the first time we have seen an NSM state 1198 * value for this host. We record it here to help 1199 * detect host reboots. 1200 */ 1201 host->nh_state = state; 1202 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1203 host->nh_caller_name, host->nh_sysid, state); 1204 } 1205 1206 mtx_lock(&host->nh_lock); 1207 if (host->nh_monstate != NLM_UNMONITORED) { 1208 mtx_unlock(&host->nh_lock); 1209 return; 1210 } 1211 host->nh_monstate = NLM_MONITORED; 1212 mtx_unlock(&host->nh_lock); 1213 1214 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1215 host->nh_caller_name, host->nh_sysid); 1216 1217 /* 1218 * We put our assigned system ID value in the priv field to 1219 * make it simpler to find the host if we are notified of a 1220 * host restart. 1221 */ 1222 smmon.mon_id.mon_name = host->nh_caller_name; 1223 smmon.mon_id.my_id.my_name = "localhost"; 1224 smmon.mon_id.my_id.my_prog = NLM_PROG; 1225 smmon.mon_id.my_id.my_vers = NLM_SM; 1226 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1227 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1228 1229 timo.tv_sec = 25; 1230 timo.tv_usec = 0; 1231 stat = CLNT_CALL(nlm_nsm, SM_MON, 1232 (xdrproc_t) xdr_mon, &smmon, 1233 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1234 1235 if (stat != RPC_SUCCESS) { 1236 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1237 return; 1238 } 1239 if (smstat.res_stat == stat_fail) { 1240 NLM_ERR("Local NSM refuses to monitor %s\n", 1241 host->nh_caller_name); 1242 mtx_lock(&host->nh_lock); 1243 host->nh_monstate = NLM_MONITOR_FAILED; 1244 mtx_unlock(&host->nh_lock); 1245 return; 1246 } 1247 1248 host->nh_monstate = NLM_MONITORED; 1249 } 1250 1251 /* 1252 * Return an RPC client handle that can be used to talk to the NLM 1253 * running on the given host. 1254 */ 1255 CLIENT * 1256 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1257 { 1258 struct nlm_rpc *rpc; 1259 CLIENT *client; 1260 1261 mtx_lock(&host->nh_lock); 1262 1263 if (isserver) 1264 rpc = &host->nh_srvrpc; 1265 else 1266 rpc = &host->nh_clntrpc; 1267 1268 /* 1269 * We can't hold onto RPC handles for too long - the async 1270 * call/reply protocol used by some NLM clients makes it hard 1271 * to tell when they change port numbers (e.g. after a 1272 * reboot). Note that if a client reboots while it isn't 1273 * holding any locks, it won't bother to notify us. We 1274 * expire the RPC handles after two minutes. 1275 */ 1276 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1277 client = rpc->nr_client; 1278 rpc->nr_client = NULL; 1279 mtx_unlock(&host->nh_lock); 1280 CLNT_RELEASE(client); 1281 mtx_lock(&host->nh_lock); 1282 } 1283 1284 if (!rpc->nr_client) { 1285 mtx_unlock(&host->nh_lock); 1286 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1287 NLM_PROG, host->nh_vers); 1288 mtx_lock(&host->nh_lock); 1289 1290 if (client) { 1291 if (rpc->nr_client) { 1292 mtx_unlock(&host->nh_lock); 1293 CLNT_DESTROY(client); 1294 mtx_lock(&host->nh_lock); 1295 } else { 1296 rpc->nr_client = client; 1297 rpc->nr_create_time = time_uptime; 1298 } 1299 } 1300 } 1301 1302 client = rpc->nr_client; 1303 if (client) 1304 CLNT_ACQUIRE(client); 1305 mtx_unlock(&host->nh_lock); 1306 1307 return (client); 1308 1309 } 1310 1311 int nlm_host_get_sysid(struct nlm_host *host) 1312 { 1313 1314 return (host->nh_sysid); 1315 } 1316 1317 int 1318 nlm_host_get_state(struct nlm_host *host) 1319 { 1320 1321 return (host->nh_state); 1322 } 1323 1324 void * 1325 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1326 { 1327 struct nlm_waiting_lock *nw; 1328 1329 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1330 nw->nw_lock = *lock; 1331 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1332 nw->nw_lock.fh.n_len); 1333 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1334 nw->nw_waiting = TRUE; 1335 nw->nw_vp = vp; 1336 mtx_lock(&nlm_global_lock); 1337 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1338 mtx_unlock(&nlm_global_lock); 1339 1340 return nw; 1341 } 1342 1343 void 1344 nlm_deregister_wait_lock(void *handle) 1345 { 1346 struct nlm_waiting_lock *nw = handle; 1347 1348 mtx_lock(&nlm_global_lock); 1349 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1350 mtx_unlock(&nlm_global_lock); 1351 1352 free(nw, M_NLM); 1353 } 1354 1355 int 1356 nlm_wait_lock(void *handle, int timo) 1357 { 1358 struct nlm_waiting_lock *nw = handle; 1359 int error; 1360 1361 /* 1362 * If the granted message arrived before we got here, 1363 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1364 */ 1365 mtx_lock(&nlm_global_lock); 1366 error = 0; 1367 if (nw->nw_waiting) 1368 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1369 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1370 if (error) { 1371 /* 1372 * The granted message may arrive after the 1373 * interrupt/timeout but before we manage to lock the 1374 * mutex. Detect this by examining nw_lock. 1375 */ 1376 if (!nw->nw_waiting) 1377 error = 0; 1378 } else { 1379 /* 1380 * If nlm_cancel_wait is called, then error will be 1381 * zero but nw_waiting will still be TRUE. We 1382 * translate this into EINTR. 1383 */ 1384 if (nw->nw_waiting) 1385 error = EINTR; 1386 } 1387 mtx_unlock(&nlm_global_lock); 1388 1389 free(nw, M_NLM); 1390 1391 return (error); 1392 } 1393 1394 void 1395 nlm_cancel_wait(struct vnode *vp) 1396 { 1397 struct nlm_waiting_lock *nw; 1398 1399 mtx_lock(&nlm_global_lock); 1400 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1401 if (nw->nw_vp == vp) { 1402 wakeup(nw); 1403 } 1404 } 1405 mtx_unlock(&nlm_global_lock); 1406 } 1407 1408 1409 /**********************************************************************/ 1410 1411 /* 1412 * Syscall interface with userland. 1413 */ 1414 1415 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1416 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1417 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1418 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1419 1420 static int 1421 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1422 { 1423 static rpcvers_t versions[] = { 1424 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1425 }; 1426 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1427 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1428 }; 1429 static const int version_count = sizeof(versions) / sizeof(versions[0]); 1430 1431 SVCXPRT **xprts; 1432 char netid[16]; 1433 char uaddr[128]; 1434 struct netconfig *nconf; 1435 int i, j, error; 1436 1437 if (!addr_count) { 1438 NLM_ERR("NLM: no service addresses given - can't start server"); 1439 return (EINVAL); 1440 } 1441 1442 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1443 for (i = 0; i < version_count; i++) { 1444 for (j = 0; j < addr_count; j++) { 1445 /* 1446 * Create transports for the first version and 1447 * then just register everything else to the 1448 * same transports. 1449 */ 1450 if (i == 0) { 1451 char *up; 1452 1453 error = copyin(&addrs[2*j], &up, 1454 sizeof(char*)); 1455 if (error) 1456 goto out; 1457 error = copyinstr(up, netid, sizeof(netid), 1458 NULL); 1459 if (error) 1460 goto out; 1461 error = copyin(&addrs[2*j+1], &up, 1462 sizeof(char*)); 1463 if (error) 1464 goto out; 1465 error = copyinstr(up, uaddr, sizeof(uaddr), 1466 NULL); 1467 if (error) 1468 goto out; 1469 nconf = getnetconfigent(netid); 1470 if (!nconf) { 1471 NLM_ERR("Can't lookup netid %s\n", 1472 netid); 1473 error = EINVAL; 1474 goto out; 1475 } 1476 xprts[j] = svc_tp_create(pool, dispatchers[i], 1477 NLM_PROG, versions[i], uaddr, nconf); 1478 if (!xprts[j]) { 1479 NLM_ERR("NLM: unable to create " 1480 "(NLM_PROG, %d).\n", versions[i]); 1481 error = EINVAL; 1482 goto out; 1483 } 1484 freenetconfigent(nconf); 1485 } else { 1486 nconf = getnetconfigent(xprts[j]->xp_netid); 1487 rpcb_unset(NLM_PROG, versions[i], nconf); 1488 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1489 dispatchers[i], nconf)) { 1490 NLM_ERR("NLM: can't register " 1491 "(NLM_PROG, %d)\n", versions[i]); 1492 error = EINVAL; 1493 goto out; 1494 } 1495 } 1496 } 1497 } 1498 error = 0; 1499 out: 1500 for (j = 0; j < addr_count; j++) { 1501 if (xprts[j]) 1502 SVC_RELEASE(xprts[j]); 1503 } 1504 free(xprts, M_NLM); 1505 return (error); 1506 } 1507 1508 /* 1509 * Main server entry point. Contacts the local NSM to get its current 1510 * state and send SM_UNMON_ALL. Registers the NLM services and then 1511 * services requests. Does not return until the server is interrupted 1512 * by a signal. 1513 */ 1514 static int 1515 nlm_server_main(int addr_count, char **addrs) 1516 { 1517 struct thread *td = curthread; 1518 int error; 1519 SVCPOOL *pool = NULL; 1520 struct sockopt opt; 1521 int portlow; 1522 #ifdef INET6 1523 struct sockaddr_in6 sin6; 1524 #endif 1525 struct sockaddr_in sin; 1526 my_id id; 1527 sm_stat smstat; 1528 struct timeval timo; 1529 enum clnt_stat stat; 1530 struct nlm_host *host, *nhost; 1531 struct nlm_waiting_lock *nw; 1532 vop_advlock_t *old_nfs_advlock; 1533 vop_reclaim_t *old_nfs_reclaim; 1534 1535 if (nlm_is_running != 0) { 1536 NLM_ERR("NLM: can't start server - " 1537 "it appears to be running already\n"); 1538 return (EPERM); 1539 } 1540 1541 if (nlm_socket == NULL) { 1542 memset(&opt, 0, sizeof(opt)); 1543 1544 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1545 td->td_ucred, td); 1546 if (error) { 1547 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1548 error); 1549 return (error); 1550 } 1551 opt.sopt_dir = SOPT_SET; 1552 opt.sopt_level = IPPROTO_IP; 1553 opt.sopt_name = IP_PORTRANGE; 1554 portlow = IP_PORTRANGE_LOW; 1555 opt.sopt_val = &portlow; 1556 opt.sopt_valsize = sizeof(portlow); 1557 sosetopt(nlm_socket, &opt); 1558 1559 #ifdef INET6 1560 nlm_socket6 = NULL; 1561 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1562 td->td_ucred, td); 1563 if (error) { 1564 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1565 error); 1566 soclose(nlm_socket); 1567 nlm_socket = NULL; 1568 return (error); 1569 } 1570 opt.sopt_dir = SOPT_SET; 1571 opt.sopt_level = IPPROTO_IPV6; 1572 opt.sopt_name = IPV6_PORTRANGE; 1573 portlow = IPV6_PORTRANGE_LOW; 1574 opt.sopt_val = &portlow; 1575 opt.sopt_valsize = sizeof(portlow); 1576 sosetopt(nlm_socket6, &opt); 1577 #endif 1578 } 1579 1580 nlm_auth = authunix_create(curthread->td_ucred); 1581 1582 #ifdef INET6 1583 memset(&sin6, 0, sizeof(sin6)); 1584 sin6.sin6_len = sizeof(sin6); 1585 sin6.sin6_family = AF_INET6; 1586 sin6.sin6_addr = in6addr_loopback; 1587 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1588 if (!nlm_nsm) { 1589 #endif 1590 memset(&sin, 0, sizeof(sin)); 1591 sin.sin_len = sizeof(sin); 1592 sin.sin_family = AF_INET; 1593 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1594 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1595 SM_VERS); 1596 #ifdef INET6 1597 } 1598 #endif 1599 1600 if (!nlm_nsm) { 1601 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1602 error = EINVAL; 1603 goto out; 1604 } 1605 1606 pool = svcpool_create("NLM", NULL); 1607 1608 error = nlm_register_services(pool, addr_count, addrs); 1609 if (error) 1610 goto out; 1611 1612 memset(&id, 0, sizeof(id)); 1613 id.my_name = "NFS NLM"; 1614 1615 timo.tv_sec = 25; 1616 timo.tv_usec = 0; 1617 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1618 (xdrproc_t) xdr_my_id, &id, 1619 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1620 1621 if (stat != RPC_SUCCESS) { 1622 struct rpc_err err; 1623 1624 CLNT_GETERR(nlm_nsm, &err); 1625 NLM_ERR("NLM: unexpected error contacting NSM, " 1626 "stat=%d, errno=%d\n", stat, err.re_errno); 1627 error = EINVAL; 1628 goto out; 1629 } 1630 nlm_is_running = 1; 1631 1632 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1633 nlm_nsm_state = smstat.state; 1634 1635 old_nfs_advlock = nfs_advlock_p; 1636 nfs_advlock_p = nlm_advlock; 1637 old_nfs_reclaim = nfs_reclaim_p; 1638 nfs_reclaim_p = nlm_reclaim; 1639 1640 svc_run(pool); 1641 error = 0; 1642 1643 nfs_advlock_p = old_nfs_advlock; 1644 nfs_reclaim_p = old_nfs_reclaim; 1645 1646 out: 1647 nlm_is_running = 0; 1648 if (pool) 1649 svcpool_destroy(pool); 1650 1651 /* 1652 * We are finished communicating with the NSM. 1653 */ 1654 if (nlm_nsm) { 1655 CLNT_RELEASE(nlm_nsm); 1656 nlm_nsm = NULL; 1657 } 1658 1659 /* 1660 * Trash all the existing state so that if the server 1661 * restarts, it gets a clean slate. This is complicated by the 1662 * possibility that there may be other threads trying to make 1663 * client locking requests. 1664 * 1665 * First we fake a client reboot notification which will 1666 * cancel any pending async locks and purge remote lock state 1667 * from the local lock manager. We release the reference from 1668 * nlm_hosts to the host (which may remove it from the list 1669 * and free it). After this phase, the only entries in the 1670 * nlm_host list should be from other threads performing 1671 * client lock requests. 1672 */ 1673 mtx_lock(&nlm_global_lock); 1674 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1675 wakeup(nw); 1676 } 1677 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1678 mtx_unlock(&nlm_global_lock); 1679 nlm_host_notify(host, 0); 1680 nlm_host_release(host); 1681 mtx_lock(&nlm_global_lock); 1682 } 1683 mtx_unlock(&nlm_global_lock); 1684 1685 AUTH_DESTROY(nlm_auth); 1686 1687 return (error); 1688 } 1689 1690 int 1691 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1692 { 1693 int error; 1694 1695 #if __FreeBSD_version >= 700000 1696 error = priv_check(td, PRIV_NFS_LOCKD); 1697 #else 1698 error = suser(td); 1699 #endif 1700 if (error) 1701 return (error); 1702 1703 nlm_debug_level = uap->debug_level; 1704 nlm_grace_threshold = time_uptime + uap->grace_period; 1705 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1706 1707 return nlm_server_main(uap->addr_count, uap->addrs); 1708 } 1709 1710 /**********************************************************************/ 1711 1712 /* 1713 * NLM implementation details, called from the RPC stubs. 1714 */ 1715 1716 1717 void 1718 nlm_sm_notify(struct nlm_sm_status *argp) 1719 { 1720 uint32_t sysid; 1721 struct nlm_host *host; 1722 1723 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1724 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1725 host = nlm_find_host_by_sysid(sysid); 1726 if (host) { 1727 nlm_host_notify(host, argp->state); 1728 nlm_host_release(host); 1729 } 1730 } 1731 1732 static void 1733 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1734 { 1735 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1736 } 1737 1738 struct vfs_state { 1739 struct mount *vs_mp; 1740 struct vnode *vs_vp; 1741 int vs_vnlocked; 1742 }; 1743 1744 static int 1745 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1746 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1747 { 1748 int error, exflags; 1749 struct ucred *cred = NULL, *credanon = NULL; 1750 1751 memset(vs, 0, sizeof(*vs)); 1752 1753 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1754 if (!vs->vs_mp) { 1755 return (ESTALE); 1756 } 1757 1758 /* accmode == 0 means don't check, since it is an unlock. */ 1759 if (accmode != 0) { 1760 error = VFS_CHECKEXP(vs->vs_mp, 1761 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1762 NULL, NULL); 1763 if (error) 1764 goto out; 1765 1766 if (exflags & MNT_EXRDONLY || 1767 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1768 error = EROFS; 1769 goto out; 1770 } 1771 } 1772 1773 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1774 if (error) 1775 goto out; 1776 vs->vs_vnlocked = TRUE; 1777 1778 if (accmode != 0) { 1779 if (!svc_getcred(rqstp, &cred, NULL)) { 1780 error = EINVAL; 1781 goto out; 1782 } 1783 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1784 crfree(cred); 1785 cred = credanon; 1786 credanon = NULL; 1787 } 1788 1789 /* 1790 * Check cred. 1791 */ 1792 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1793 /* 1794 * If this failed and accmode != VWRITE, try again with 1795 * VWRITE to maintain backwards compatibility with the 1796 * old code that always used VWRITE. 1797 */ 1798 if (error != 0 && accmode != VWRITE) 1799 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1800 if (error) 1801 goto out; 1802 } 1803 1804 #if __FreeBSD_version < 800011 1805 VOP_UNLOCK(vs->vs_vp, 0, curthread); 1806 #else 1807 VOP_UNLOCK(vs->vs_vp, 0); 1808 #endif 1809 vs->vs_vnlocked = FALSE; 1810 1811 out: 1812 if (cred) 1813 crfree(cred); 1814 if (credanon) 1815 crfree(credanon); 1816 1817 return (error); 1818 } 1819 1820 static void 1821 nlm_release_vfs_state(struct vfs_state *vs) 1822 { 1823 1824 if (vs->vs_vp) { 1825 if (vs->vs_vnlocked) 1826 vput(vs->vs_vp); 1827 else 1828 vrele(vs->vs_vp); 1829 } 1830 if (vs->vs_mp) 1831 vfs_rel(vs->vs_mp); 1832 } 1833 1834 static nlm4_stats 1835 nlm_convert_error(int error) 1836 { 1837 1838 if (error == ESTALE) 1839 return nlm4_stale_fh; 1840 else if (error == EROFS) 1841 return nlm4_rofs; 1842 else 1843 return nlm4_failed; 1844 } 1845 1846 int 1847 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1848 CLIENT **rpcp) 1849 { 1850 fhandle_t fh; 1851 struct vfs_state vs; 1852 struct nlm_host *host, *bhost; 1853 int error, sysid; 1854 struct flock fl; 1855 accmode_t accmode; 1856 1857 memset(result, 0, sizeof(*result)); 1858 memset(&vs, 0, sizeof(vs)); 1859 1860 host = nlm_find_host_by_name(argp->alock.caller_name, 1861 svc_getrpccaller(rqstp), rqstp->rq_vers); 1862 if (!host) { 1863 result->stat.stat = nlm4_denied_nolocks; 1864 return (ENOMEM); 1865 } 1866 1867 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1868 host->nh_caller_name, host->nh_sysid); 1869 1870 nlm_check_expired_locks(host); 1871 sysid = host->nh_sysid; 1872 1873 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1874 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1875 1876 if (time_uptime < nlm_grace_threshold) { 1877 result->stat.stat = nlm4_denied_grace_period; 1878 goto out; 1879 } 1880 1881 accmode = argp->exclusive ? VWRITE : VREAD; 1882 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1883 if (error) { 1884 result->stat.stat = nlm_convert_error(error); 1885 goto out; 1886 } 1887 1888 fl.l_start = argp->alock.l_offset; 1889 fl.l_len = argp->alock.l_len; 1890 fl.l_pid = argp->alock.svid; 1891 fl.l_sysid = sysid; 1892 fl.l_whence = SEEK_SET; 1893 if (argp->exclusive) 1894 fl.l_type = F_WRLCK; 1895 else 1896 fl.l_type = F_RDLCK; 1897 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1898 if (error) { 1899 result->stat.stat = nlm4_failed; 1900 goto out; 1901 } 1902 1903 if (fl.l_type == F_UNLCK) { 1904 result->stat.stat = nlm4_granted; 1905 } else { 1906 result->stat.stat = nlm4_denied; 1907 result->stat.nlm4_testrply_u.holder.exclusive = 1908 (fl.l_type == F_WRLCK); 1909 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1910 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1911 if (bhost) { 1912 /* 1913 * We don't have any useful way of recording 1914 * the value of oh used in the original lock 1915 * request. Ideally, the test reply would have 1916 * a space for the owning host's name allowing 1917 * our caller's NLM to keep track. 1918 * 1919 * As far as I can see, Solaris uses an eight 1920 * byte structure for oh which contains a four 1921 * byte pid encoded in local byte order and 1922 * the first four bytes of the host 1923 * name. Linux uses a variable length string 1924 * 'pid@hostname' in ascii but doesn't even 1925 * return that in test replies. 1926 * 1927 * For the moment, return nothing in oh 1928 * (already zero'ed above). 1929 */ 1930 nlm_host_release(bhost); 1931 } 1932 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1933 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1934 } 1935 1936 out: 1937 nlm_release_vfs_state(&vs); 1938 if (rpcp) 1939 *rpcp = nlm_host_get_rpc(host, TRUE); 1940 nlm_host_release(host); 1941 return (0); 1942 } 1943 1944 int 1945 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1946 bool_t monitor, CLIENT **rpcp) 1947 { 1948 fhandle_t fh; 1949 struct vfs_state vs; 1950 struct nlm_host *host; 1951 int error, sysid; 1952 struct flock fl; 1953 accmode_t accmode; 1954 1955 memset(result, 0, sizeof(*result)); 1956 memset(&vs, 0, sizeof(vs)); 1957 1958 host = nlm_find_host_by_name(argp->alock.caller_name, 1959 svc_getrpccaller(rqstp), rqstp->rq_vers); 1960 if (!host) { 1961 result->stat.stat = nlm4_denied_nolocks; 1962 return (ENOMEM); 1963 } 1964 1965 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1966 host->nh_caller_name, host->nh_sysid); 1967 1968 if (monitor && host->nh_state && argp->state 1969 && host->nh_state != argp->state) { 1970 /* 1971 * The host rebooted without telling us. Trash its 1972 * locks. 1973 */ 1974 nlm_host_notify(host, argp->state); 1975 } 1976 1977 nlm_check_expired_locks(host); 1978 sysid = host->nh_sysid; 1979 1980 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1981 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1982 1983 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1984 result->stat.stat = nlm4_denied_grace_period; 1985 goto out; 1986 } 1987 1988 accmode = argp->exclusive ? VWRITE : VREAD; 1989 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1990 if (error) { 1991 result->stat.stat = nlm_convert_error(error); 1992 goto out; 1993 } 1994 1995 fl.l_start = argp->alock.l_offset; 1996 fl.l_len = argp->alock.l_len; 1997 fl.l_pid = argp->alock.svid; 1998 fl.l_sysid = sysid; 1999 fl.l_whence = SEEK_SET; 2000 if (argp->exclusive) 2001 fl.l_type = F_WRLCK; 2002 else 2003 fl.l_type = F_RDLCK; 2004 if (argp->block) { 2005 struct nlm_async_lock *af; 2006 CLIENT *client; 2007 struct nlm_grantcookie cookie; 2008 2009 /* 2010 * First, make sure we can contact the host's NLM. 2011 */ 2012 client = nlm_host_get_rpc(host, TRUE); 2013 if (!client) { 2014 result->stat.stat = nlm4_failed; 2015 goto out; 2016 } 2017 2018 /* 2019 * First we need to check and see if there is an 2020 * existing blocked lock that matches. This could be a 2021 * badly behaved client or an RPC re-send. If we find 2022 * one, just return nlm4_blocked. 2023 */ 2024 mtx_lock(&host->nh_lock); 2025 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2026 if (af->af_fl.l_start == fl.l_start 2027 && af->af_fl.l_len == fl.l_len 2028 && af->af_fl.l_pid == fl.l_pid 2029 && af->af_fl.l_type == fl.l_type) { 2030 break; 2031 } 2032 } 2033 if (!af) { 2034 cookie.ng_sysid = host->nh_sysid; 2035 cookie.ng_cookie = host->nh_grantcookie++; 2036 } 2037 mtx_unlock(&host->nh_lock); 2038 if (af) { 2039 CLNT_RELEASE(client); 2040 result->stat.stat = nlm4_blocked; 2041 goto out; 2042 } 2043 2044 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2045 M_WAITOK|M_ZERO); 2046 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2047 af->af_vp = vs.vs_vp; 2048 af->af_fl = fl; 2049 af->af_host = host; 2050 af->af_rpc = client; 2051 /* 2052 * We use M_RPC here so that we can xdr_free the thing 2053 * later. 2054 */ 2055 nlm_make_netobj(&af->af_granted.cookie, 2056 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2057 af->af_granted.exclusive = argp->exclusive; 2058 af->af_granted.alock.caller_name = 2059 strdup(argp->alock.caller_name, M_RPC); 2060 nlm_copy_netobj(&af->af_granted.alock.fh, 2061 &argp->alock.fh, M_RPC); 2062 nlm_copy_netobj(&af->af_granted.alock.oh, 2063 &argp->alock.oh, M_RPC); 2064 af->af_granted.alock.svid = argp->alock.svid; 2065 af->af_granted.alock.l_offset = argp->alock.l_offset; 2066 af->af_granted.alock.l_len = argp->alock.l_len; 2067 2068 /* 2069 * Put the entry on the pending list before calling 2070 * VOP_ADVLOCKASYNC. We do this in case the lock 2071 * request was blocked (returning EINPROGRESS) but 2072 * then granted before we manage to run again. The 2073 * client may receive the granted message before we 2074 * send our blocked reply but thats their problem. 2075 */ 2076 mtx_lock(&host->nh_lock); 2077 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2078 mtx_unlock(&host->nh_lock); 2079 2080 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2081 &af->af_task, &af->af_cookie); 2082 2083 /* 2084 * If the lock completed synchronously, just free the 2085 * tracking structure now. 2086 */ 2087 if (error != EINPROGRESS) { 2088 CLNT_RELEASE(af->af_rpc); 2089 mtx_lock(&host->nh_lock); 2090 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2091 mtx_unlock(&host->nh_lock); 2092 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2093 &af->af_granted); 2094 free(af, M_NLM); 2095 } else { 2096 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2097 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2098 /* 2099 * Don't vrele the vnode just yet - this must 2100 * wait until either the async callback 2101 * happens or the lock is cancelled. 2102 */ 2103 vs.vs_vp = NULL; 2104 } 2105 } else { 2106 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2107 } 2108 2109 if (error) { 2110 if (error == EINPROGRESS) { 2111 result->stat.stat = nlm4_blocked; 2112 } else if (error == EDEADLK) { 2113 result->stat.stat = nlm4_deadlck; 2114 } else if (error == EAGAIN) { 2115 result->stat.stat = nlm4_denied; 2116 } else { 2117 result->stat.stat = nlm4_failed; 2118 } 2119 } else { 2120 if (monitor) 2121 nlm_host_monitor(host, argp->state); 2122 result->stat.stat = nlm4_granted; 2123 } 2124 2125 out: 2126 nlm_release_vfs_state(&vs); 2127 if (rpcp) 2128 *rpcp = nlm_host_get_rpc(host, TRUE); 2129 nlm_host_release(host); 2130 return (0); 2131 } 2132 2133 int 2134 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2135 CLIENT **rpcp) 2136 { 2137 fhandle_t fh; 2138 struct vfs_state vs; 2139 struct nlm_host *host; 2140 int error, sysid; 2141 struct flock fl; 2142 struct nlm_async_lock *af; 2143 2144 memset(result, 0, sizeof(*result)); 2145 memset(&vs, 0, sizeof(vs)); 2146 2147 host = nlm_find_host_by_name(argp->alock.caller_name, 2148 svc_getrpccaller(rqstp), rqstp->rq_vers); 2149 if (!host) { 2150 result->stat.stat = nlm4_denied_nolocks; 2151 return (ENOMEM); 2152 } 2153 2154 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2155 host->nh_caller_name, host->nh_sysid); 2156 2157 nlm_check_expired_locks(host); 2158 sysid = host->nh_sysid; 2159 2160 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2161 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2162 2163 if (time_uptime < nlm_grace_threshold) { 2164 result->stat.stat = nlm4_denied_grace_period; 2165 goto out; 2166 } 2167 2168 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2169 if (error) { 2170 result->stat.stat = nlm_convert_error(error); 2171 goto out; 2172 } 2173 2174 fl.l_start = argp->alock.l_offset; 2175 fl.l_len = argp->alock.l_len; 2176 fl.l_pid = argp->alock.svid; 2177 fl.l_sysid = sysid; 2178 fl.l_whence = SEEK_SET; 2179 if (argp->exclusive) 2180 fl.l_type = F_WRLCK; 2181 else 2182 fl.l_type = F_RDLCK; 2183 2184 /* 2185 * First we need to try and find the async lock request - if 2186 * there isn't one, we give up and return nlm4_denied. 2187 */ 2188 mtx_lock(&host->nh_lock); 2189 2190 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2191 if (af->af_fl.l_start == fl.l_start 2192 && af->af_fl.l_len == fl.l_len 2193 && af->af_fl.l_pid == fl.l_pid 2194 && af->af_fl.l_type == fl.l_type) { 2195 break; 2196 } 2197 } 2198 2199 if (!af) { 2200 mtx_unlock(&host->nh_lock); 2201 result->stat.stat = nlm4_denied; 2202 goto out; 2203 } 2204 2205 error = nlm_cancel_async_lock(af); 2206 2207 if (error) { 2208 result->stat.stat = nlm4_denied; 2209 } else { 2210 result->stat.stat = nlm4_granted; 2211 } 2212 2213 mtx_unlock(&host->nh_lock); 2214 2215 out: 2216 nlm_release_vfs_state(&vs); 2217 if (rpcp) 2218 *rpcp = nlm_host_get_rpc(host, TRUE); 2219 nlm_host_release(host); 2220 return (0); 2221 } 2222 2223 int 2224 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2225 CLIENT **rpcp) 2226 { 2227 fhandle_t fh; 2228 struct vfs_state vs; 2229 struct nlm_host *host; 2230 int error, sysid; 2231 struct flock fl; 2232 2233 memset(result, 0, sizeof(*result)); 2234 memset(&vs, 0, sizeof(vs)); 2235 2236 host = nlm_find_host_by_name(argp->alock.caller_name, 2237 svc_getrpccaller(rqstp), rqstp->rq_vers); 2238 if (!host) { 2239 result->stat.stat = nlm4_denied_nolocks; 2240 return (ENOMEM); 2241 } 2242 2243 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2244 host->nh_caller_name, host->nh_sysid); 2245 2246 nlm_check_expired_locks(host); 2247 sysid = host->nh_sysid; 2248 2249 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2250 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2251 2252 if (time_uptime < nlm_grace_threshold) { 2253 result->stat.stat = nlm4_denied_grace_period; 2254 goto out; 2255 } 2256 2257 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2258 if (error) { 2259 result->stat.stat = nlm_convert_error(error); 2260 goto out; 2261 } 2262 2263 fl.l_start = argp->alock.l_offset; 2264 fl.l_len = argp->alock.l_len; 2265 fl.l_pid = argp->alock.svid; 2266 fl.l_sysid = sysid; 2267 fl.l_whence = SEEK_SET; 2268 fl.l_type = F_UNLCK; 2269 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2270 2271 /* 2272 * Ignore the error - there is no result code for failure, 2273 * only for grace period. 2274 */ 2275 result->stat.stat = nlm4_granted; 2276 2277 out: 2278 nlm_release_vfs_state(&vs); 2279 if (rpcp) 2280 *rpcp = nlm_host_get_rpc(host, TRUE); 2281 nlm_host_release(host); 2282 return (0); 2283 } 2284 2285 int 2286 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2287 2288 CLIENT **rpcp) 2289 { 2290 struct nlm_host *host; 2291 struct nlm_waiting_lock *nw; 2292 2293 memset(result, 0, sizeof(*result)); 2294 2295 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2296 if (!host) { 2297 result->stat.stat = nlm4_denied_nolocks; 2298 return (ENOMEM); 2299 } 2300 2301 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2302 result->stat.stat = nlm4_denied; 2303 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2304 2305 mtx_lock(&nlm_global_lock); 2306 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2307 if (!nw->nw_waiting) 2308 continue; 2309 if (argp->alock.svid == nw->nw_lock.svid 2310 && argp->alock.l_offset == nw->nw_lock.l_offset 2311 && argp->alock.l_len == nw->nw_lock.l_len 2312 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2313 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2314 nw->nw_lock.fh.n_len)) { 2315 nw->nw_waiting = FALSE; 2316 wakeup(nw); 2317 result->stat.stat = nlm4_granted; 2318 break; 2319 } 2320 } 2321 mtx_unlock(&nlm_global_lock); 2322 2323 out: 2324 if (rpcp) 2325 *rpcp = nlm_host_get_rpc(host, TRUE); 2326 nlm_host_release(host); 2327 return (0); 2328 } 2329 2330 void 2331 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2332 { 2333 struct nlm_host *host = NULL; 2334 struct nlm_async_lock *af = NULL; 2335 int error; 2336 2337 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2338 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2339 goto out; 2340 } 2341 2342 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2343 if (!host) { 2344 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2345 goto out; 2346 } 2347 2348 mtx_lock(&host->nh_lock); 2349 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2350 if (ng_cookie(&argp->cookie) == 2351 ng_cookie(&af->af_granted.cookie)) 2352 break; 2353 if (af) 2354 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2355 mtx_unlock(&host->nh_lock); 2356 2357 if (!af) { 2358 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2359 "with unrecognized cookie %d:%d", host->nh_caller_name, 2360 host->nh_sysid, ng_sysid(&argp->cookie), 2361 ng_cookie(&argp->cookie)); 2362 goto out; 2363 } 2364 2365 if (argp->stat.stat != nlm4_granted) { 2366 af->af_fl.l_type = F_UNLCK; 2367 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2368 if (error) { 2369 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2370 "and we failed to unlock (%d)", host->nh_caller_name, 2371 host->nh_sysid, error); 2372 goto out; 2373 } 2374 2375 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2376 af, host->nh_caller_name, host->nh_sysid); 2377 } else { 2378 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2379 af, host->nh_caller_name, host->nh_sysid); 2380 } 2381 2382 out: 2383 if (af) 2384 nlm_free_async_lock(af); 2385 if (host) 2386 nlm_host_release(host); 2387 } 2388 2389 void 2390 nlm_do_free_all(nlm4_notify *argp) 2391 { 2392 struct nlm_host *host, *thost; 2393 2394 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2395 if (!strcmp(host->nh_caller_name, argp->name)) 2396 nlm_host_notify(host, argp->state); 2397 } 2398 } 2399 2400 /* 2401 * Kernel module glue 2402 */ 2403 static int 2404 nfslockd_modevent(module_t mod, int type, void *data) 2405 { 2406 2407 switch (type) { 2408 case MOD_LOAD: 2409 return (0); 2410 case MOD_UNLOAD: 2411 /* The NLM module cannot be safely unloaded. */ 2412 /* FALLTHROUGH */ 2413 default: 2414 return (EOPNOTSUPP); 2415 } 2416 } 2417 static moduledata_t nfslockd_mod = { 2418 "nfslockd", 2419 nfslockd_modevent, 2420 NULL, 2421 }; 2422 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2423 2424 /* So that loader and kldload(2) can find us, wherever we are.. */ 2425 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2426 MODULE_DEPEND(nfslockd, nfslock, 1, 1, 1); 2427 MODULE_VERSION(nfslockd, 1); 2428