1 /*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet6.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/fail.h> 35 #include <sys/fcntl.h> 36 #include <sys/kernel.h> 37 #include <sys/kthread.h> 38 #include <sys/lockf.h> 39 #include <sys/malloc.h> 40 #include <sys/mount.h> 41 #if __FreeBSD_version >= 700000 42 #include <sys/priv.h> 43 #endif 44 #include <sys/proc.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/syscall.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysent.h> 50 #include <sys/syslog.h> 51 #include <sys/sysproto.h> 52 #include <sys/systm.h> 53 #include <sys/taskqueue.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 57 #include <nfs/nfsproto.h> 58 #include <nfs/nfs_lock.h> 59 60 #include <nlm/nlm_prot.h> 61 #include <nlm/sm_inter.h> 62 #include <nlm/nlm.h> 63 #include <rpc/rpc_com.h> 64 #include <rpc/rpcb_prot.h> 65 66 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 67 68 /* 69 * If a host is inactive (and holds no locks) for this amount of 70 * seconds, we consider it idle and stop tracking it. 71 */ 72 #define NLM_IDLE_TIMEOUT 30 73 74 /* 75 * We check the host list for idle every few seconds. 76 */ 77 #define NLM_IDLE_PERIOD 5 78 79 /* 80 * We only look for GRANTED_RES messages for a little while. 81 */ 82 #define NLM_EXPIRE_TIMEOUT 10 83 84 /* 85 * Support for sysctl vfs.nlm.sysid 86 */ 87 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, 88 "Network Lock Manager"); 89 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, ""); 90 91 /* 92 * Syscall hooks 93 */ 94 static int nlm_syscall_offset = SYS_nlm_syscall; 95 static struct sysent nlm_syscall_prev_sysent; 96 #if __FreeBSD_version < 700000 97 static struct sysent nlm_syscall_sysent = { 98 (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE, 99 (sy_call_t *) nlm_syscall 100 }; 101 #else 102 MAKE_SYSENT(nlm_syscall); 103 #endif 104 static bool_t nlm_syscall_registered = FALSE; 105 106 /* 107 * Debug level passed in from userland. We also support a sysctl hook 108 * so that it can be changed on a live system. 109 */ 110 static int nlm_debug_level; 111 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 112 113 #define NLM_DEBUG(_level, args...) \ 114 do { \ 115 if (nlm_debug_level >= (_level)) \ 116 log(LOG_DEBUG, args); \ 117 } while(0) 118 #define NLM_ERR(args...) \ 119 do { \ 120 log(LOG_ERR, args); \ 121 } while(0) 122 123 /* 124 * Grace period handling. The value of nlm_grace_threshold is the 125 * value of time_uptime after which we are serving requests normally. 126 */ 127 static time_t nlm_grace_threshold; 128 129 /* 130 * We check for idle hosts if time_uptime is greater than 131 * nlm_next_idle_check, 132 */ 133 static time_t nlm_next_idle_check; 134 135 /* 136 * A flag to indicate the server is already running. 137 */ 138 static int nlm_is_running; 139 140 /* 141 * A socket to use for RPC - shared by all IPv4 RPC clients. 142 */ 143 static struct socket *nlm_socket; 144 145 #ifdef INET6 146 147 /* 148 * A socket to use for RPC - shared by all IPv6 RPC clients. 149 */ 150 static struct socket *nlm_socket6; 151 152 #endif 153 154 /* 155 * An RPC client handle that can be used to communicate with the local 156 * NSM. 157 */ 158 static CLIENT *nlm_nsm; 159 160 /* 161 * An AUTH handle for the server's creds. 162 */ 163 static AUTH *nlm_auth; 164 165 /* 166 * A zero timeval for sending async RPC messages. 167 */ 168 struct timeval nlm_zero_tv = { 0, 0 }; 169 170 /* 171 * The local NSM state number 172 */ 173 int nlm_nsm_state; 174 175 176 /* 177 * A lock to protect the host list and waiting lock list. 178 */ 179 static struct mtx nlm_global_lock; 180 181 /* 182 * Locks: 183 * (l) locked by nh_lock 184 * (s) only accessed via server RPC which is single threaded 185 * (g) locked by nlm_global_lock 186 * (c) const until freeing 187 * (a) modified using atomic ops 188 */ 189 190 /* 191 * A pending client-side lock request, stored on the nlm_waiting_locks 192 * list. 193 */ 194 struct nlm_waiting_lock { 195 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 196 bool_t nw_waiting; /* (g) */ 197 nlm4_lock nw_lock; /* (c) */ 198 union nfsfh nw_fh; /* (c) */ 199 struct vnode *nw_vp; /* (c) */ 200 }; 201 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 202 203 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 204 205 /* 206 * A pending server-side asynchronous lock request, stored on the 207 * nh_pending list of the NLM host. 208 */ 209 struct nlm_async_lock { 210 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 211 struct task af_task; /* (c) async callback details */ 212 void *af_cookie; /* (l) lock manager cancel token */ 213 struct vnode *af_vp; /* (l) vnode to lock */ 214 struct flock af_fl; /* (c) lock details */ 215 struct nlm_host *af_host; /* (c) host which is locking */ 216 CLIENT *af_rpc; /* (c) rpc client to send message */ 217 nlm4_testargs af_granted; /* (c) notification details */ 218 time_t af_expiretime; /* (c) notification time */ 219 }; 220 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 221 222 /* 223 * NLM host. 224 */ 225 enum nlm_host_state { 226 NLM_UNMONITORED, 227 NLM_MONITORED, 228 NLM_MONITOR_FAILED, 229 NLM_RECOVERING 230 }; 231 232 struct nlm_rpc { 233 CLIENT *nr_client; /* (l) RPC client handle */ 234 time_t nr_create_time; /* (l) when client was created */ 235 }; 236 237 struct nlm_host { 238 struct mtx nh_lock; 239 volatile u_int nh_refs; /* (a) reference count */ 240 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 241 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 242 uint32_t nh_sysid; /* (c) our allocaed system ID */ 243 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 244 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 245 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 246 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 247 rpcvers_t nh_vers; /* (s) NLM version of host */ 248 int nh_state; /* (s) last seen NSM state of host */ 249 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 250 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 251 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 252 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 253 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 254 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 255 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 256 }; 257 TAILQ_HEAD(nlm_host_list, nlm_host); 258 259 static struct nlm_host_list nlm_hosts; /* (g) */ 260 static uint32_t nlm_next_sysid = 1; /* (g) */ 261 262 static void nlm_host_unmonitor(struct nlm_host *); 263 264 struct nlm_grantcookie { 265 uint32_t ng_sysid; 266 uint32_t ng_cookie; 267 }; 268 269 static inline uint32_t 270 ng_sysid(struct netobj *src) 271 { 272 273 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 274 } 275 276 static inline uint32_t 277 ng_cookie(struct netobj *src) 278 { 279 280 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 281 } 282 283 /**********************************************************************/ 284 285 /* 286 * Initialise NLM globals. 287 */ 288 static void 289 nlm_init(void *dummy) 290 { 291 int error; 292 293 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 294 TAILQ_INIT(&nlm_waiting_locks); 295 TAILQ_INIT(&nlm_hosts); 296 297 error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent, 298 &nlm_syscall_prev_sysent, SY_THR_STATIC_KLD); 299 if (error) 300 NLM_ERR("Can't register NLM syscall\n"); 301 else 302 nlm_syscall_registered = TRUE; 303 } 304 SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL); 305 306 static void 307 nlm_uninit(void *dummy) 308 { 309 310 if (nlm_syscall_registered) 311 syscall_deregister(&nlm_syscall_offset, 312 &nlm_syscall_prev_sysent); 313 } 314 SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL); 315 316 /* 317 * Create a netobj from an arbitrary source. 318 */ 319 void 320 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 321 struct malloc_type *type) 322 { 323 324 dst->n_len = srcsize; 325 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 326 memcpy(dst->n_bytes, src, srcsize); 327 } 328 329 /* 330 * Copy a struct netobj. 331 */ 332 void 333 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 334 struct malloc_type *type) 335 { 336 337 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 338 } 339 340 341 /* 342 * Create an RPC client handle for the given (address,prog,vers) 343 * triple using UDP. 344 */ 345 static CLIENT * 346 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 347 { 348 char *wchan = "nlmrcv"; 349 const char* protofmly; 350 struct sockaddr_storage ss; 351 struct socket *so; 352 CLIENT *rpcb; 353 struct timeval timo; 354 RPCB parms; 355 char *uaddr; 356 enum clnt_stat stat = RPC_SUCCESS; 357 int rpcvers = RPCBVERS4; 358 bool_t do_tcp = FALSE; 359 bool_t tryagain = FALSE; 360 struct portmap mapping; 361 u_short port = 0; 362 363 /* 364 * First we need to contact the remote RPCBIND service to find 365 * the right port. 366 */ 367 memcpy(&ss, sa, sa->sa_len); 368 switch (ss.ss_family) { 369 case AF_INET: 370 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 371 protofmly = "inet"; 372 so = nlm_socket; 373 break; 374 375 #ifdef INET6 376 case AF_INET6: 377 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 378 protofmly = "inet6"; 379 so = nlm_socket6; 380 break; 381 #endif 382 383 default: 384 /* 385 * Unsupported address family - fail. 386 */ 387 return (NULL); 388 } 389 390 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 391 RPCBPROG, rpcvers, 0, 0); 392 if (!rpcb) 393 return (NULL); 394 395 try_tcp: 396 parms.r_prog = prog; 397 parms.r_vers = vers; 398 if (do_tcp) 399 parms.r_netid = "tcp"; 400 else 401 parms.r_netid = "udp"; 402 parms.r_addr = ""; 403 parms.r_owner = ""; 404 405 /* 406 * Use the default timeout. 407 */ 408 timo.tv_sec = 25; 409 timo.tv_usec = 0; 410 again: 411 switch (rpcvers) { 412 case RPCBVERS4: 413 case RPCBVERS: 414 /* 415 * Try RPCBIND 4 then 3. 416 */ 417 uaddr = NULL; 418 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 419 (xdrproc_t) xdr_rpcb, &parms, 420 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 421 if (stat == RPC_SUCCESS) { 422 /* 423 * We have a reply from the remote RPCBIND - turn it 424 * into an appropriate address and make a new client 425 * that can talk to the remote NLM. 426 * 427 * XXX fixup IPv6 scope ID. 428 */ 429 struct netbuf *a; 430 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 431 if (!a) { 432 tryagain = TRUE; 433 } else { 434 tryagain = FALSE; 435 memcpy(&ss, a->buf, a->len); 436 free(a->buf, M_RPC); 437 free(a, M_RPC); 438 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 439 } 440 } 441 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 442 if (rpcvers == RPCBVERS4) 443 rpcvers = RPCBVERS; 444 else if (rpcvers == RPCBVERS) 445 rpcvers = PMAPVERS; 446 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 447 goto again; 448 } 449 break; 450 case PMAPVERS: 451 /* 452 * Try portmap. 453 */ 454 mapping.pm_prog = parms.r_prog; 455 mapping.pm_vers = parms.r_vers; 456 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 457 mapping.pm_port = 0; 458 459 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 460 (xdrproc_t) xdr_portmap, &mapping, 461 (xdrproc_t) xdr_u_short, &port, timo); 462 463 if (stat == RPC_SUCCESS) { 464 switch (ss.ss_family) { 465 case AF_INET: 466 ((struct sockaddr_in *)&ss)->sin_port = 467 htons(port); 468 break; 469 470 #ifdef INET6 471 case AF_INET6: 472 ((struct sockaddr_in6 *)&ss)->sin6_port = 473 htons(port); 474 break; 475 #endif 476 } 477 } 478 break; 479 default: 480 panic("invalid rpcvers %d", rpcvers); 481 } 482 /* 483 * We may have a positive response from the portmapper, but the NLM 484 * service was not found. Make sure we received a valid port. 485 */ 486 switch (ss.ss_family) { 487 case AF_INET: 488 port = ((struct sockaddr_in *)&ss)->sin_port; 489 break; 490 #ifdef INET6 491 case AF_INET6: 492 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 493 break; 494 #endif 495 } 496 if (stat != RPC_SUCCESS || !port) { 497 /* 498 * If we were able to talk to rpcbind or portmap, but the udp 499 * variant wasn't available, ask about tcp. 500 * 501 * XXX - We could also check for a TCP portmapper, but 502 * if the host is running a portmapper at all, we should be able 503 * to hail it over UDP. 504 */ 505 if (stat == RPC_SUCCESS && !do_tcp) { 506 do_tcp = TRUE; 507 goto try_tcp; 508 } 509 510 /* Otherwise, bad news. */ 511 NLM_ERR("NLM: failed to contact remote rpcbind, " 512 "stat = %d, port = %d\n", (int) stat, port); 513 CLNT_DESTROY(rpcb); 514 return (NULL); 515 } 516 517 if (do_tcp) { 518 /* 519 * Destroy the UDP client we used to speak to rpcbind and 520 * recreate as a TCP client. 521 */ 522 struct netconfig *nconf = NULL; 523 524 CLNT_DESTROY(rpcb); 525 526 switch (ss.ss_family) { 527 case AF_INET: 528 nconf = getnetconfigent("tcp"); 529 break; 530 #ifdef INET6 531 case AF_INET6: 532 nconf = getnetconfigent("tcp6"); 533 break; 534 #endif 535 } 536 537 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 538 prog, vers, 0, 0); 539 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 540 rpcb->cl_auth = nlm_auth; 541 542 } else { 543 /* 544 * Re-use the client we used to speak to rpcbind. 545 */ 546 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 547 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 548 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 549 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 550 rpcb->cl_auth = nlm_auth; 551 } 552 553 return (rpcb); 554 } 555 556 /* 557 * This async callback after when an async lock request has been 558 * granted. We notify the host which initiated the request. 559 */ 560 static void 561 nlm_lock_callback(void *arg, int pending) 562 { 563 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 564 struct rpc_callextra ext; 565 566 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 567 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 568 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 569 ng_cookie(&af->af_granted.cookie)); 570 571 /* 572 * Send the results back to the host. 573 * 574 * Note: there is a possible race here with nlm_host_notify 575 * destroying the RPC client. To avoid problems, the first 576 * thing nlm_host_notify does is to cancel pending async lock 577 * requests. 578 */ 579 memset(&ext, 0, sizeof(ext)); 580 ext.rc_auth = nlm_auth; 581 if (af->af_host->nh_vers == NLM_VERS4) { 582 nlm4_granted_msg_4(&af->af_granted, 583 NULL, af->af_rpc, &ext, nlm_zero_tv); 584 } else { 585 /* 586 * Back-convert to legacy protocol 587 */ 588 nlm_testargs granted; 589 granted.cookie = af->af_granted.cookie; 590 granted.exclusive = af->af_granted.exclusive; 591 granted.alock.caller_name = 592 af->af_granted.alock.caller_name; 593 granted.alock.fh = af->af_granted.alock.fh; 594 granted.alock.oh = af->af_granted.alock.oh; 595 granted.alock.svid = af->af_granted.alock.svid; 596 granted.alock.l_offset = 597 af->af_granted.alock.l_offset; 598 granted.alock.l_len = 599 af->af_granted.alock.l_len; 600 601 nlm_granted_msg_1(&granted, 602 NULL, af->af_rpc, &ext, nlm_zero_tv); 603 } 604 605 /* 606 * Move this entry to the nh_granted list. 607 */ 608 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 609 mtx_lock(&af->af_host->nh_lock); 610 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 611 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 612 mtx_unlock(&af->af_host->nh_lock); 613 } 614 615 /* 616 * Free an async lock request. The request must have been removed from 617 * any list. 618 */ 619 static void 620 nlm_free_async_lock(struct nlm_async_lock *af) 621 { 622 /* 623 * Free an async lock. 624 */ 625 if (af->af_rpc) 626 CLNT_RELEASE(af->af_rpc); 627 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 628 if (af->af_vp) 629 vrele(af->af_vp); 630 free(af, M_NLM); 631 } 632 633 /* 634 * Cancel our async request - this must be called with 635 * af->nh_host->nh_lock held. This is slightly complicated by a 636 * potential race with our own callback. If we fail to cancel the 637 * lock, it must already have been granted - we make sure our async 638 * task has completed by calling taskqueue_drain in this case. 639 */ 640 static int 641 nlm_cancel_async_lock(struct nlm_async_lock *af) 642 { 643 struct nlm_host *host = af->af_host; 644 int error; 645 646 mtx_assert(&host->nh_lock, MA_OWNED); 647 648 mtx_unlock(&host->nh_lock); 649 650 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 651 F_REMOTE, NULL, &af->af_cookie); 652 653 if (error) { 654 /* 655 * We failed to cancel - make sure our callback has 656 * completed before we continue. 657 */ 658 taskqueue_drain(taskqueue_thread, &af->af_task); 659 } 660 661 mtx_lock(&host->nh_lock); 662 663 if (!error) { 664 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 665 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 666 667 /* 668 * Remove from the nh_pending list and free now that 669 * we are safe from the callback. 670 */ 671 TAILQ_REMOVE(&host->nh_pending, af, af_link); 672 mtx_unlock(&host->nh_lock); 673 nlm_free_async_lock(af); 674 mtx_lock(&host->nh_lock); 675 } 676 677 return (error); 678 } 679 680 static void 681 nlm_check_expired_locks(struct nlm_host *host) 682 { 683 struct nlm_async_lock *af; 684 time_t uptime = time_uptime; 685 686 mtx_lock(&host->nh_lock); 687 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 688 && uptime >= af->af_expiretime) { 689 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 690 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 691 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 692 ng_cookie(&af->af_granted.cookie)); 693 TAILQ_REMOVE(&host->nh_granted, af, af_link); 694 mtx_unlock(&host->nh_lock); 695 nlm_free_async_lock(af); 696 mtx_lock(&host->nh_lock); 697 } 698 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 699 TAILQ_REMOVE(&host->nh_finished, af, af_link); 700 mtx_unlock(&host->nh_lock); 701 nlm_free_async_lock(af); 702 mtx_lock(&host->nh_lock); 703 } 704 mtx_unlock(&host->nh_lock); 705 } 706 707 /* 708 * Free resources used by a host. This is called after the reference 709 * count has reached zero so it doesn't need to worry about locks. 710 */ 711 static void 712 nlm_host_destroy(struct nlm_host *host) 713 { 714 715 mtx_lock(&nlm_global_lock); 716 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 717 mtx_unlock(&nlm_global_lock); 718 719 if (host->nh_srvrpc.nr_client) 720 CLNT_RELEASE(host->nh_srvrpc.nr_client); 721 if (host->nh_clntrpc.nr_client) 722 CLNT_RELEASE(host->nh_clntrpc.nr_client); 723 mtx_destroy(&host->nh_lock); 724 sysctl_ctx_free(&host->nh_sysctl); 725 free(host, M_NLM); 726 } 727 728 /* 729 * Thread start callback for client lock recovery 730 */ 731 static void 732 nlm_client_recovery_start(void *arg) 733 { 734 struct nlm_host *host = (struct nlm_host *) arg; 735 736 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 737 host->nh_caller_name); 738 739 nlm_client_recovery(host); 740 741 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 742 host->nh_caller_name); 743 744 host->nh_monstate = NLM_MONITORED; 745 nlm_host_release(host); 746 747 kthread_exit(); 748 } 749 750 /* 751 * This is called when we receive a host state change notification. We 752 * unlock any active locks owned by the host. When rpc.lockd is 753 * shutting down, this function is called with newstate set to zero 754 * which allows us to cancel any pending async locks and clear the 755 * locking state. 756 */ 757 static void 758 nlm_host_notify(struct nlm_host *host, int newstate) 759 { 760 struct nlm_async_lock *af; 761 762 if (newstate) { 763 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 764 "state is %d\n", host->nh_caller_name, 765 host->nh_sysid, newstate); 766 } 767 768 /* 769 * Cancel any pending async locks for this host. 770 */ 771 mtx_lock(&host->nh_lock); 772 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 773 /* 774 * nlm_cancel_async_lock will remove the entry from 775 * nh_pending and free it. 776 */ 777 nlm_cancel_async_lock(af); 778 } 779 mtx_unlock(&host->nh_lock); 780 nlm_check_expired_locks(host); 781 782 /* 783 * The host just rebooted - trash its locks. 784 */ 785 lf_clearremotesys(host->nh_sysid); 786 host->nh_state = newstate; 787 788 /* 789 * If we have any remote locks for this host (i.e. it 790 * represents a remote NFS server that our local NFS client 791 * has locks for), start a recovery thread. 792 */ 793 if (newstate != 0 794 && host->nh_monstate != NLM_RECOVERING 795 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 796 struct thread *td; 797 host->nh_monstate = NLM_RECOVERING; 798 refcount_acquire(&host->nh_refs); 799 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 800 "NFS lock recovery for %s", host->nh_caller_name); 801 } 802 } 803 804 /* 805 * Sysctl handler to count the number of locks for a sysid. 806 */ 807 static int 808 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 809 { 810 struct nlm_host *host; 811 int count; 812 813 host = oidp->oid_arg1; 814 count = lf_countlocks(host->nh_sysid); 815 return sysctl_handle_int(oidp, &count, 0, req); 816 } 817 818 /* 819 * Sysctl handler to count the number of client locks for a sysid. 820 */ 821 static int 822 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 823 { 824 struct nlm_host *host; 825 int count; 826 827 host = oidp->oid_arg1; 828 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 829 return sysctl_handle_int(oidp, &count, 0, req); 830 } 831 832 /* 833 * Create a new NLM host. 834 */ 835 static struct nlm_host * 836 nlm_create_host(const char* caller_name) 837 { 838 struct nlm_host *host; 839 struct sysctl_oid *oid; 840 841 mtx_assert(&nlm_global_lock, MA_OWNED); 842 843 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 844 caller_name, nlm_next_sysid); 845 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 846 if (!host) 847 return (NULL); 848 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 849 host->nh_refs = 1; 850 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 851 host->nh_sysid = nlm_next_sysid++; 852 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 853 "%d", host->nh_sysid); 854 host->nh_vers = 0; 855 host->nh_state = 0; 856 host->nh_monstate = NLM_UNMONITORED; 857 host->nh_grantcookie = 1; 858 TAILQ_INIT(&host->nh_pending); 859 TAILQ_INIT(&host->nh_granted); 860 TAILQ_INIT(&host->nh_finished); 861 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 862 863 mtx_unlock(&nlm_global_lock); 864 865 sysctl_ctx_init(&host->nh_sysctl); 866 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 867 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 868 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, ""); 869 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 870 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 871 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 872 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 873 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 874 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 875 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 876 "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 877 nlm_host_lock_count_sysctl, "I", ""); 878 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 879 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 880 nlm_host_client_lock_count_sysctl, "I", ""); 881 882 mtx_lock(&nlm_global_lock); 883 884 return (host); 885 } 886 887 /* 888 * Acquire the next sysid for remote locks not handled by the NLM. 889 */ 890 uint32_t 891 nlm_acquire_next_sysid(void) 892 { 893 uint32_t next_sysid; 894 895 mtx_lock(&nlm_global_lock); 896 next_sysid = nlm_next_sysid++; 897 mtx_unlock(&nlm_global_lock); 898 return (next_sysid); 899 } 900 901 /* 902 * Return non-zero if the address parts of the two sockaddrs are the 903 * same. 904 */ 905 static int 906 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 907 { 908 const struct sockaddr_in *a4, *b4; 909 #ifdef INET6 910 const struct sockaddr_in6 *a6, *b6; 911 #endif 912 913 if (a->sa_family != b->sa_family) 914 return (FALSE); 915 916 switch (a->sa_family) { 917 case AF_INET: 918 a4 = (const struct sockaddr_in *) a; 919 b4 = (const struct sockaddr_in *) b; 920 return !memcmp(&a4->sin_addr, &b4->sin_addr, 921 sizeof(a4->sin_addr)); 922 #ifdef INET6 923 case AF_INET6: 924 a6 = (const struct sockaddr_in6 *) a; 925 b6 = (const struct sockaddr_in6 *) b; 926 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 927 sizeof(a6->sin6_addr)); 928 #endif 929 } 930 931 return (0); 932 } 933 934 /* 935 * Check for idle hosts and stop monitoring them. We could also free 936 * the host structure here, possibly after a larger timeout but that 937 * would require some care to avoid races with 938 * e.g. nlm_host_lock_count_sysctl. 939 */ 940 static void 941 nlm_check_idle(void) 942 { 943 struct nlm_host *host; 944 945 mtx_assert(&nlm_global_lock, MA_OWNED); 946 947 if (time_uptime <= nlm_next_idle_check) 948 return; 949 950 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 951 952 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 953 if (host->nh_monstate == NLM_MONITORED 954 && time_uptime > host->nh_idle_timeout) { 955 mtx_unlock(&nlm_global_lock); 956 if (lf_countlocks(host->nh_sysid) > 0 957 || lf_countlocks(NLM_SYSID_CLIENT 958 + host->nh_sysid)) { 959 host->nh_idle_timeout = 960 time_uptime + NLM_IDLE_TIMEOUT; 961 mtx_lock(&nlm_global_lock); 962 continue; 963 } 964 nlm_host_unmonitor(host); 965 mtx_lock(&nlm_global_lock); 966 } 967 } 968 } 969 970 /* 971 * Search for an existing NLM host that matches the given name 972 * (typically the caller_name element of an nlm4_lock). If none is 973 * found, create a new host. If 'addr' is non-NULL, record the remote 974 * address of the host so that we can call it back for async 975 * responses. If 'vers' is greater than zero then record the NLM 976 * program version to use to communicate with this client. 977 */ 978 struct nlm_host * 979 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 980 rpcvers_t vers) 981 { 982 struct nlm_host *host; 983 984 mtx_lock(&nlm_global_lock); 985 986 /* 987 * The remote host is determined by caller_name. 988 */ 989 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 990 if (!strcmp(host->nh_caller_name, name)) 991 break; 992 } 993 994 if (!host) { 995 host = nlm_create_host(name); 996 if (!host) { 997 mtx_unlock(&nlm_global_lock); 998 return (NULL); 999 } 1000 } 1001 refcount_acquire(&host->nh_refs); 1002 1003 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1004 1005 /* 1006 * If we have an address for the host, record it so that we 1007 * can send async replies etc. 1008 */ 1009 if (addr) { 1010 1011 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 1012 ("Strange remote transport address length")); 1013 1014 /* 1015 * If we have seen an address before and we currently 1016 * have an RPC client handle, make sure the address is 1017 * the same, otherwise discard the client handle. 1018 */ 1019 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1020 if (!nlm_compare_addr( 1021 (struct sockaddr *) &host->nh_addr, 1022 addr) 1023 || host->nh_vers != vers) { 1024 CLIENT *client; 1025 mtx_lock(&host->nh_lock); 1026 client = host->nh_srvrpc.nr_client; 1027 host->nh_srvrpc.nr_client = NULL; 1028 mtx_unlock(&host->nh_lock); 1029 if (client) { 1030 CLNT_RELEASE(client); 1031 } 1032 } 1033 } 1034 memcpy(&host->nh_addr, addr, addr->sa_len); 1035 host->nh_vers = vers; 1036 } 1037 1038 nlm_check_idle(); 1039 1040 mtx_unlock(&nlm_global_lock); 1041 1042 return (host); 1043 } 1044 1045 /* 1046 * Search for an existing NLM host that matches the given remote 1047 * address. If none is found, create a new host with the requested 1048 * address and remember 'vers' as the NLM protocol version to use for 1049 * that host. 1050 */ 1051 struct nlm_host * 1052 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1053 { 1054 /* 1055 * Fake up a name using inet_ntop. This buffer is 1056 * large enough for an IPv6 address. 1057 */ 1058 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1059 struct nlm_host *host; 1060 1061 switch (addr->sa_family) { 1062 case AF_INET: 1063 inet_ntop(AF_INET, 1064 &((const struct sockaddr_in *) addr)->sin_addr, 1065 tmp, sizeof tmp); 1066 break; 1067 #ifdef INET6 1068 case AF_INET6: 1069 inet_ntop(AF_INET6, 1070 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1071 tmp, sizeof tmp); 1072 break; 1073 #endif 1074 default: 1075 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1076 } 1077 1078 1079 mtx_lock(&nlm_global_lock); 1080 1081 /* 1082 * The remote host is determined by caller_name. 1083 */ 1084 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1085 if (nlm_compare_addr(addr, 1086 (const struct sockaddr *) &host->nh_addr)) 1087 break; 1088 } 1089 1090 if (!host) { 1091 host = nlm_create_host(tmp); 1092 if (!host) { 1093 mtx_unlock(&nlm_global_lock); 1094 return (NULL); 1095 } 1096 memcpy(&host->nh_addr, addr, addr->sa_len); 1097 host->nh_vers = vers; 1098 } 1099 refcount_acquire(&host->nh_refs); 1100 1101 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1102 1103 nlm_check_idle(); 1104 1105 mtx_unlock(&nlm_global_lock); 1106 1107 return (host); 1108 } 1109 1110 /* 1111 * Find the NLM host that matches the value of 'sysid'. If none 1112 * exists, return NULL. 1113 */ 1114 static struct nlm_host * 1115 nlm_find_host_by_sysid(int sysid) 1116 { 1117 struct nlm_host *host; 1118 1119 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1120 if (host->nh_sysid == sysid) { 1121 refcount_acquire(&host->nh_refs); 1122 return (host); 1123 } 1124 } 1125 1126 return (NULL); 1127 } 1128 1129 void nlm_host_release(struct nlm_host *host) 1130 { 1131 if (refcount_release(&host->nh_refs)) { 1132 /* 1133 * Free the host 1134 */ 1135 nlm_host_destroy(host); 1136 } 1137 } 1138 1139 /* 1140 * Unregister this NLM host with the local NSM due to idleness. 1141 */ 1142 static void 1143 nlm_host_unmonitor(struct nlm_host *host) 1144 { 1145 mon_id smmonid; 1146 sm_stat_res smstat; 1147 struct timeval timo; 1148 enum clnt_stat stat; 1149 1150 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1151 host->nh_caller_name, host->nh_sysid); 1152 1153 /* 1154 * We put our assigned system ID value in the priv field to 1155 * make it simpler to find the host if we are notified of a 1156 * host restart. 1157 */ 1158 smmonid.mon_name = host->nh_caller_name; 1159 smmonid.my_id.my_name = "localhost"; 1160 smmonid.my_id.my_prog = NLM_PROG; 1161 smmonid.my_id.my_vers = NLM_SM; 1162 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1163 1164 timo.tv_sec = 25; 1165 timo.tv_usec = 0; 1166 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1167 (xdrproc_t) xdr_mon, &smmonid, 1168 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1169 1170 if (stat != RPC_SUCCESS) { 1171 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1172 return; 1173 } 1174 if (smstat.res_stat == stat_fail) { 1175 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1176 host->nh_caller_name); 1177 return; 1178 } 1179 1180 host->nh_monstate = NLM_UNMONITORED; 1181 } 1182 1183 /* 1184 * Register this NLM host with the local NSM so that we can be 1185 * notified if it reboots. 1186 */ 1187 void 1188 nlm_host_monitor(struct nlm_host *host, int state) 1189 { 1190 mon smmon; 1191 sm_stat_res smstat; 1192 struct timeval timo; 1193 enum clnt_stat stat; 1194 1195 if (state && !host->nh_state) { 1196 /* 1197 * This is the first time we have seen an NSM state 1198 * value for this host. We record it here to help 1199 * detect host reboots. 1200 */ 1201 host->nh_state = state; 1202 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1203 host->nh_caller_name, host->nh_sysid, state); 1204 } 1205 1206 mtx_lock(&host->nh_lock); 1207 if (host->nh_monstate != NLM_UNMONITORED) { 1208 mtx_unlock(&host->nh_lock); 1209 return; 1210 } 1211 host->nh_monstate = NLM_MONITORED; 1212 mtx_unlock(&host->nh_lock); 1213 1214 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1215 host->nh_caller_name, host->nh_sysid); 1216 1217 /* 1218 * We put our assigned system ID value in the priv field to 1219 * make it simpler to find the host if we are notified of a 1220 * host restart. 1221 */ 1222 smmon.mon_id.mon_name = host->nh_caller_name; 1223 smmon.mon_id.my_id.my_name = "localhost"; 1224 smmon.mon_id.my_id.my_prog = NLM_PROG; 1225 smmon.mon_id.my_id.my_vers = NLM_SM; 1226 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1227 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1228 1229 timo.tv_sec = 25; 1230 timo.tv_usec = 0; 1231 stat = CLNT_CALL(nlm_nsm, SM_MON, 1232 (xdrproc_t) xdr_mon, &smmon, 1233 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1234 1235 if (stat != RPC_SUCCESS) { 1236 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1237 return; 1238 } 1239 if (smstat.res_stat == stat_fail) { 1240 NLM_ERR("Local NSM refuses to monitor %s\n", 1241 host->nh_caller_name); 1242 mtx_lock(&host->nh_lock); 1243 host->nh_monstate = NLM_MONITOR_FAILED; 1244 mtx_unlock(&host->nh_lock); 1245 return; 1246 } 1247 1248 host->nh_monstate = NLM_MONITORED; 1249 } 1250 1251 /* 1252 * Return an RPC client handle that can be used to talk to the NLM 1253 * running on the given host. 1254 */ 1255 CLIENT * 1256 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1257 { 1258 struct nlm_rpc *rpc; 1259 CLIENT *client; 1260 1261 mtx_lock(&host->nh_lock); 1262 1263 if (isserver) 1264 rpc = &host->nh_srvrpc; 1265 else 1266 rpc = &host->nh_clntrpc; 1267 1268 /* 1269 * We can't hold onto RPC handles for too long - the async 1270 * call/reply protocol used by some NLM clients makes it hard 1271 * to tell when they change port numbers (e.g. after a 1272 * reboot). Note that if a client reboots while it isn't 1273 * holding any locks, it won't bother to notify us. We 1274 * expire the RPC handles after two minutes. 1275 */ 1276 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1277 client = rpc->nr_client; 1278 rpc->nr_client = NULL; 1279 mtx_unlock(&host->nh_lock); 1280 CLNT_RELEASE(client); 1281 mtx_lock(&host->nh_lock); 1282 } 1283 1284 if (!rpc->nr_client) { 1285 mtx_unlock(&host->nh_lock); 1286 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1287 NLM_PROG, host->nh_vers); 1288 mtx_lock(&host->nh_lock); 1289 1290 if (client) { 1291 if (rpc->nr_client) { 1292 mtx_unlock(&host->nh_lock); 1293 CLNT_DESTROY(client); 1294 mtx_lock(&host->nh_lock); 1295 } else { 1296 rpc->nr_client = client; 1297 rpc->nr_create_time = time_uptime; 1298 } 1299 } 1300 } 1301 1302 client = rpc->nr_client; 1303 if (client) 1304 CLNT_ACQUIRE(client); 1305 mtx_unlock(&host->nh_lock); 1306 1307 return (client); 1308 1309 } 1310 1311 int nlm_host_get_sysid(struct nlm_host *host) 1312 { 1313 1314 return (host->nh_sysid); 1315 } 1316 1317 int 1318 nlm_host_get_state(struct nlm_host *host) 1319 { 1320 1321 return (host->nh_state); 1322 } 1323 1324 void * 1325 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1326 { 1327 struct nlm_waiting_lock *nw; 1328 1329 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1330 nw->nw_lock = *lock; 1331 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1332 nw->nw_lock.fh.n_len); 1333 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1334 nw->nw_waiting = TRUE; 1335 nw->nw_vp = vp; 1336 mtx_lock(&nlm_global_lock); 1337 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1338 mtx_unlock(&nlm_global_lock); 1339 1340 return nw; 1341 } 1342 1343 void 1344 nlm_deregister_wait_lock(void *handle) 1345 { 1346 struct nlm_waiting_lock *nw = handle; 1347 1348 mtx_lock(&nlm_global_lock); 1349 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1350 mtx_unlock(&nlm_global_lock); 1351 1352 free(nw, M_NLM); 1353 } 1354 1355 int 1356 nlm_wait_lock(void *handle, int timo) 1357 { 1358 struct nlm_waiting_lock *nw = handle; 1359 int error, stops_deferred; 1360 1361 /* 1362 * If the granted message arrived before we got here, 1363 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1364 */ 1365 mtx_lock(&nlm_global_lock); 1366 error = 0; 1367 if (nw->nw_waiting) { 1368 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1369 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1370 sigallowstop(stops_deferred); 1371 } 1372 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1373 if (error) { 1374 /* 1375 * The granted message may arrive after the 1376 * interrupt/timeout but before we manage to lock the 1377 * mutex. Detect this by examining nw_lock. 1378 */ 1379 if (!nw->nw_waiting) 1380 error = 0; 1381 } else { 1382 /* 1383 * If nlm_cancel_wait is called, then error will be 1384 * zero but nw_waiting will still be TRUE. We 1385 * translate this into EINTR. 1386 */ 1387 if (nw->nw_waiting) 1388 error = EINTR; 1389 } 1390 mtx_unlock(&nlm_global_lock); 1391 1392 free(nw, M_NLM); 1393 1394 return (error); 1395 } 1396 1397 void 1398 nlm_cancel_wait(struct vnode *vp) 1399 { 1400 struct nlm_waiting_lock *nw; 1401 1402 mtx_lock(&nlm_global_lock); 1403 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1404 if (nw->nw_vp == vp) { 1405 wakeup(nw); 1406 } 1407 } 1408 mtx_unlock(&nlm_global_lock); 1409 } 1410 1411 1412 /**********************************************************************/ 1413 1414 /* 1415 * Syscall interface with userland. 1416 */ 1417 1418 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1419 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1420 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1421 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1422 1423 static int 1424 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1425 { 1426 static rpcvers_t versions[] = { 1427 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1428 }; 1429 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1430 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1431 }; 1432 1433 SVCXPRT **xprts; 1434 char netid[16]; 1435 char uaddr[128]; 1436 struct netconfig *nconf; 1437 int i, j, error; 1438 1439 if (!addr_count) { 1440 NLM_ERR("NLM: no service addresses given - can't start server"); 1441 return (EINVAL); 1442 } 1443 1444 if (addr_count < 0 || addr_count > 256 ) { 1445 NLM_ERR("NLM: too many service addresses (%d) given, " 1446 "max 256 - can't start server\n", addr_count); 1447 return (EINVAL); 1448 } 1449 1450 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1451 for (i = 0; i < nitems(versions); i++) { 1452 for (j = 0; j < addr_count; j++) { 1453 /* 1454 * Create transports for the first version and 1455 * then just register everything else to the 1456 * same transports. 1457 */ 1458 if (i == 0) { 1459 char *up; 1460 1461 error = copyin(&addrs[2*j], &up, 1462 sizeof(char*)); 1463 if (error) 1464 goto out; 1465 error = copyinstr(up, netid, sizeof(netid), 1466 NULL); 1467 if (error) 1468 goto out; 1469 error = copyin(&addrs[2*j+1], &up, 1470 sizeof(char*)); 1471 if (error) 1472 goto out; 1473 error = copyinstr(up, uaddr, sizeof(uaddr), 1474 NULL); 1475 if (error) 1476 goto out; 1477 nconf = getnetconfigent(netid); 1478 if (!nconf) { 1479 NLM_ERR("Can't lookup netid %s\n", 1480 netid); 1481 error = EINVAL; 1482 goto out; 1483 } 1484 xprts[j] = svc_tp_create(pool, dispatchers[i], 1485 NLM_PROG, versions[i], uaddr, nconf); 1486 if (!xprts[j]) { 1487 NLM_ERR("NLM: unable to create " 1488 "(NLM_PROG, %d).\n", versions[i]); 1489 error = EINVAL; 1490 goto out; 1491 } 1492 freenetconfigent(nconf); 1493 } else { 1494 nconf = getnetconfigent(xprts[j]->xp_netid); 1495 rpcb_unset(NLM_PROG, versions[i], nconf); 1496 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1497 dispatchers[i], nconf)) { 1498 NLM_ERR("NLM: can't register " 1499 "(NLM_PROG, %d)\n", versions[i]); 1500 error = EINVAL; 1501 goto out; 1502 } 1503 } 1504 } 1505 } 1506 error = 0; 1507 out: 1508 for (j = 0; j < addr_count; j++) { 1509 if (xprts[j]) 1510 SVC_RELEASE(xprts[j]); 1511 } 1512 free(xprts, M_NLM); 1513 return (error); 1514 } 1515 1516 /* 1517 * Main server entry point. Contacts the local NSM to get its current 1518 * state and send SM_UNMON_ALL. Registers the NLM services and then 1519 * services requests. Does not return until the server is interrupted 1520 * by a signal. 1521 */ 1522 static int 1523 nlm_server_main(int addr_count, char **addrs) 1524 { 1525 struct thread *td = curthread; 1526 int error; 1527 SVCPOOL *pool = NULL; 1528 struct sockopt opt; 1529 int portlow; 1530 #ifdef INET6 1531 struct sockaddr_in6 sin6; 1532 #endif 1533 struct sockaddr_in sin; 1534 my_id id; 1535 sm_stat smstat; 1536 struct timeval timo; 1537 enum clnt_stat stat; 1538 struct nlm_host *host, *nhost; 1539 struct nlm_waiting_lock *nw; 1540 vop_advlock_t *old_nfs_advlock; 1541 vop_reclaim_t *old_nfs_reclaim; 1542 1543 if (nlm_is_running != 0) { 1544 NLM_ERR("NLM: can't start server - " 1545 "it appears to be running already\n"); 1546 return (EPERM); 1547 } 1548 1549 if (nlm_socket == NULL) { 1550 memset(&opt, 0, sizeof(opt)); 1551 1552 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1553 td->td_ucred, td); 1554 if (error) { 1555 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1556 error); 1557 return (error); 1558 } 1559 opt.sopt_dir = SOPT_SET; 1560 opt.sopt_level = IPPROTO_IP; 1561 opt.sopt_name = IP_PORTRANGE; 1562 portlow = IP_PORTRANGE_LOW; 1563 opt.sopt_val = &portlow; 1564 opt.sopt_valsize = sizeof(portlow); 1565 sosetopt(nlm_socket, &opt); 1566 1567 #ifdef INET6 1568 nlm_socket6 = NULL; 1569 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1570 td->td_ucred, td); 1571 if (error) { 1572 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1573 error); 1574 soclose(nlm_socket); 1575 nlm_socket = NULL; 1576 return (error); 1577 } 1578 opt.sopt_dir = SOPT_SET; 1579 opt.sopt_level = IPPROTO_IPV6; 1580 opt.sopt_name = IPV6_PORTRANGE; 1581 portlow = IPV6_PORTRANGE_LOW; 1582 opt.sopt_val = &portlow; 1583 opt.sopt_valsize = sizeof(portlow); 1584 sosetopt(nlm_socket6, &opt); 1585 #endif 1586 } 1587 1588 nlm_auth = authunix_create(curthread->td_ucred); 1589 1590 #ifdef INET6 1591 memset(&sin6, 0, sizeof(sin6)); 1592 sin6.sin6_len = sizeof(sin6); 1593 sin6.sin6_family = AF_INET6; 1594 sin6.sin6_addr = in6addr_loopback; 1595 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1596 if (!nlm_nsm) { 1597 #endif 1598 memset(&sin, 0, sizeof(sin)); 1599 sin.sin_len = sizeof(sin); 1600 sin.sin_family = AF_INET; 1601 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1602 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1603 SM_VERS); 1604 #ifdef INET6 1605 } 1606 #endif 1607 1608 if (!nlm_nsm) { 1609 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1610 error = EINVAL; 1611 goto out; 1612 } 1613 1614 pool = svcpool_create("NLM", NULL); 1615 1616 error = nlm_register_services(pool, addr_count, addrs); 1617 if (error) 1618 goto out; 1619 1620 memset(&id, 0, sizeof(id)); 1621 id.my_name = "NFS NLM"; 1622 1623 timo.tv_sec = 25; 1624 timo.tv_usec = 0; 1625 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1626 (xdrproc_t) xdr_my_id, &id, 1627 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1628 1629 if (stat != RPC_SUCCESS) { 1630 struct rpc_err err; 1631 1632 CLNT_GETERR(nlm_nsm, &err); 1633 NLM_ERR("NLM: unexpected error contacting NSM, " 1634 "stat=%d, errno=%d\n", stat, err.re_errno); 1635 error = EINVAL; 1636 goto out; 1637 } 1638 nlm_is_running = 1; 1639 1640 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1641 nlm_nsm_state = smstat.state; 1642 1643 old_nfs_advlock = nfs_advlock_p; 1644 nfs_advlock_p = nlm_advlock; 1645 old_nfs_reclaim = nfs_reclaim_p; 1646 nfs_reclaim_p = nlm_reclaim; 1647 1648 svc_run(pool); 1649 error = 0; 1650 1651 nfs_advlock_p = old_nfs_advlock; 1652 nfs_reclaim_p = old_nfs_reclaim; 1653 1654 out: 1655 nlm_is_running = 0; 1656 if (pool) 1657 svcpool_destroy(pool); 1658 1659 /* 1660 * We are finished communicating with the NSM. 1661 */ 1662 if (nlm_nsm) { 1663 CLNT_RELEASE(nlm_nsm); 1664 nlm_nsm = NULL; 1665 } 1666 1667 /* 1668 * Trash all the existing state so that if the server 1669 * restarts, it gets a clean slate. This is complicated by the 1670 * possibility that there may be other threads trying to make 1671 * client locking requests. 1672 * 1673 * First we fake a client reboot notification which will 1674 * cancel any pending async locks and purge remote lock state 1675 * from the local lock manager. We release the reference from 1676 * nlm_hosts to the host (which may remove it from the list 1677 * and free it). After this phase, the only entries in the 1678 * nlm_host list should be from other threads performing 1679 * client lock requests. 1680 */ 1681 mtx_lock(&nlm_global_lock); 1682 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1683 wakeup(nw); 1684 } 1685 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1686 mtx_unlock(&nlm_global_lock); 1687 nlm_host_notify(host, 0); 1688 nlm_host_release(host); 1689 mtx_lock(&nlm_global_lock); 1690 } 1691 mtx_unlock(&nlm_global_lock); 1692 1693 AUTH_DESTROY(nlm_auth); 1694 1695 return (error); 1696 } 1697 1698 int 1699 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1700 { 1701 int error; 1702 1703 #if __FreeBSD_version >= 700000 1704 error = priv_check(td, PRIV_NFS_LOCKD); 1705 #else 1706 error = suser(td); 1707 #endif 1708 if (error) 1709 return (error); 1710 1711 nlm_debug_level = uap->debug_level; 1712 nlm_grace_threshold = time_uptime + uap->grace_period; 1713 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1714 1715 return nlm_server_main(uap->addr_count, uap->addrs); 1716 } 1717 1718 /**********************************************************************/ 1719 1720 /* 1721 * NLM implementation details, called from the RPC stubs. 1722 */ 1723 1724 1725 void 1726 nlm_sm_notify(struct nlm_sm_status *argp) 1727 { 1728 uint32_t sysid; 1729 struct nlm_host *host; 1730 1731 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1732 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1733 host = nlm_find_host_by_sysid(sysid); 1734 if (host) { 1735 nlm_host_notify(host, argp->state); 1736 nlm_host_release(host); 1737 } 1738 } 1739 1740 static void 1741 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1742 { 1743 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1744 } 1745 1746 struct vfs_state { 1747 struct mount *vs_mp; 1748 struct vnode *vs_vp; 1749 int vs_vnlocked; 1750 }; 1751 1752 static int 1753 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1754 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1755 { 1756 int error, exflags; 1757 struct ucred *cred = NULL, *credanon = NULL; 1758 1759 memset(vs, 0, sizeof(*vs)); 1760 1761 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1762 if (!vs->vs_mp) { 1763 return (ESTALE); 1764 } 1765 1766 /* accmode == 0 means don't check, since it is an unlock. */ 1767 if (accmode != 0) { 1768 error = VFS_CHECKEXP(vs->vs_mp, 1769 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1770 NULL, NULL); 1771 if (error) 1772 goto out; 1773 1774 if (exflags & MNT_EXRDONLY || 1775 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1776 error = EROFS; 1777 goto out; 1778 } 1779 } 1780 1781 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1782 if (error) 1783 goto out; 1784 vs->vs_vnlocked = TRUE; 1785 1786 if (accmode != 0) { 1787 if (!svc_getcred(rqstp, &cred, NULL)) { 1788 error = EINVAL; 1789 goto out; 1790 } 1791 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1792 crfree(cred); 1793 cred = credanon; 1794 credanon = NULL; 1795 } 1796 1797 /* 1798 * Check cred. 1799 */ 1800 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1801 /* 1802 * If this failed and accmode != VWRITE, try again with 1803 * VWRITE to maintain backwards compatibility with the 1804 * old code that always used VWRITE. 1805 */ 1806 if (error != 0 && accmode != VWRITE) 1807 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1808 if (error) 1809 goto out; 1810 } 1811 1812 #if __FreeBSD_version < 800011 1813 VOP_UNLOCK(vs->vs_vp, 0, curthread); 1814 #else 1815 VOP_UNLOCK(vs->vs_vp, 0); 1816 #endif 1817 vs->vs_vnlocked = FALSE; 1818 1819 out: 1820 if (cred) 1821 crfree(cred); 1822 if (credanon) 1823 crfree(credanon); 1824 1825 return (error); 1826 } 1827 1828 static void 1829 nlm_release_vfs_state(struct vfs_state *vs) 1830 { 1831 1832 if (vs->vs_vp) { 1833 if (vs->vs_vnlocked) 1834 vput(vs->vs_vp); 1835 else 1836 vrele(vs->vs_vp); 1837 } 1838 if (vs->vs_mp) 1839 vfs_rel(vs->vs_mp); 1840 } 1841 1842 static nlm4_stats 1843 nlm_convert_error(int error) 1844 { 1845 1846 if (error == ESTALE) 1847 return nlm4_stale_fh; 1848 else if (error == EROFS) 1849 return nlm4_rofs; 1850 else 1851 return nlm4_failed; 1852 } 1853 1854 int 1855 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1856 CLIENT **rpcp) 1857 { 1858 fhandle_t fh; 1859 struct vfs_state vs; 1860 struct nlm_host *host, *bhost; 1861 int error, sysid; 1862 struct flock fl; 1863 accmode_t accmode; 1864 1865 memset(result, 0, sizeof(*result)); 1866 memset(&vs, 0, sizeof(vs)); 1867 1868 host = nlm_find_host_by_name(argp->alock.caller_name, 1869 svc_getrpccaller(rqstp), rqstp->rq_vers); 1870 if (!host) { 1871 result->stat.stat = nlm4_denied_nolocks; 1872 return (ENOMEM); 1873 } 1874 1875 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1876 host->nh_caller_name, host->nh_sysid); 1877 1878 nlm_check_expired_locks(host); 1879 sysid = host->nh_sysid; 1880 1881 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1882 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1883 1884 if (time_uptime < nlm_grace_threshold) { 1885 result->stat.stat = nlm4_denied_grace_period; 1886 goto out; 1887 } 1888 1889 accmode = argp->exclusive ? VWRITE : VREAD; 1890 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1891 if (error) { 1892 result->stat.stat = nlm_convert_error(error); 1893 goto out; 1894 } 1895 1896 fl.l_start = argp->alock.l_offset; 1897 fl.l_len = argp->alock.l_len; 1898 fl.l_pid = argp->alock.svid; 1899 fl.l_sysid = sysid; 1900 fl.l_whence = SEEK_SET; 1901 if (argp->exclusive) 1902 fl.l_type = F_WRLCK; 1903 else 1904 fl.l_type = F_RDLCK; 1905 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1906 if (error) { 1907 result->stat.stat = nlm4_failed; 1908 goto out; 1909 } 1910 1911 if (fl.l_type == F_UNLCK) { 1912 result->stat.stat = nlm4_granted; 1913 } else { 1914 result->stat.stat = nlm4_denied; 1915 result->stat.nlm4_testrply_u.holder.exclusive = 1916 (fl.l_type == F_WRLCK); 1917 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1918 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1919 if (bhost) { 1920 /* 1921 * We don't have any useful way of recording 1922 * the value of oh used in the original lock 1923 * request. Ideally, the test reply would have 1924 * a space for the owning host's name allowing 1925 * our caller's NLM to keep track. 1926 * 1927 * As far as I can see, Solaris uses an eight 1928 * byte structure for oh which contains a four 1929 * byte pid encoded in local byte order and 1930 * the first four bytes of the host 1931 * name. Linux uses a variable length string 1932 * 'pid@hostname' in ascii but doesn't even 1933 * return that in test replies. 1934 * 1935 * For the moment, return nothing in oh 1936 * (already zero'ed above). 1937 */ 1938 nlm_host_release(bhost); 1939 } 1940 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1941 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1942 } 1943 1944 out: 1945 nlm_release_vfs_state(&vs); 1946 if (rpcp) 1947 *rpcp = nlm_host_get_rpc(host, TRUE); 1948 nlm_host_release(host); 1949 return (0); 1950 } 1951 1952 int 1953 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1954 bool_t monitor, CLIENT **rpcp) 1955 { 1956 fhandle_t fh; 1957 struct vfs_state vs; 1958 struct nlm_host *host; 1959 int error, sysid; 1960 struct flock fl; 1961 accmode_t accmode; 1962 1963 memset(result, 0, sizeof(*result)); 1964 memset(&vs, 0, sizeof(vs)); 1965 1966 host = nlm_find_host_by_name(argp->alock.caller_name, 1967 svc_getrpccaller(rqstp), rqstp->rq_vers); 1968 if (!host) { 1969 result->stat.stat = nlm4_denied_nolocks; 1970 return (ENOMEM); 1971 } 1972 1973 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1974 host->nh_caller_name, host->nh_sysid); 1975 1976 if (monitor && host->nh_state && argp->state 1977 && host->nh_state != argp->state) { 1978 /* 1979 * The host rebooted without telling us. Trash its 1980 * locks. 1981 */ 1982 nlm_host_notify(host, argp->state); 1983 } 1984 1985 nlm_check_expired_locks(host); 1986 sysid = host->nh_sysid; 1987 1988 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1989 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1990 1991 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1992 result->stat.stat = nlm4_denied_grace_period; 1993 goto out; 1994 } 1995 1996 accmode = argp->exclusive ? VWRITE : VREAD; 1997 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1998 if (error) { 1999 result->stat.stat = nlm_convert_error(error); 2000 goto out; 2001 } 2002 2003 fl.l_start = argp->alock.l_offset; 2004 fl.l_len = argp->alock.l_len; 2005 fl.l_pid = argp->alock.svid; 2006 fl.l_sysid = sysid; 2007 fl.l_whence = SEEK_SET; 2008 if (argp->exclusive) 2009 fl.l_type = F_WRLCK; 2010 else 2011 fl.l_type = F_RDLCK; 2012 if (argp->block) { 2013 struct nlm_async_lock *af; 2014 CLIENT *client; 2015 struct nlm_grantcookie cookie; 2016 2017 /* 2018 * First, make sure we can contact the host's NLM. 2019 */ 2020 client = nlm_host_get_rpc(host, TRUE); 2021 if (!client) { 2022 result->stat.stat = nlm4_failed; 2023 goto out; 2024 } 2025 2026 /* 2027 * First we need to check and see if there is an 2028 * existing blocked lock that matches. This could be a 2029 * badly behaved client or an RPC re-send. If we find 2030 * one, just return nlm4_blocked. 2031 */ 2032 mtx_lock(&host->nh_lock); 2033 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2034 if (af->af_fl.l_start == fl.l_start 2035 && af->af_fl.l_len == fl.l_len 2036 && af->af_fl.l_pid == fl.l_pid 2037 && af->af_fl.l_type == fl.l_type) { 2038 break; 2039 } 2040 } 2041 if (!af) { 2042 cookie.ng_sysid = host->nh_sysid; 2043 cookie.ng_cookie = host->nh_grantcookie++; 2044 } 2045 mtx_unlock(&host->nh_lock); 2046 if (af) { 2047 CLNT_RELEASE(client); 2048 result->stat.stat = nlm4_blocked; 2049 goto out; 2050 } 2051 2052 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2053 M_WAITOK|M_ZERO); 2054 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2055 af->af_vp = vs.vs_vp; 2056 af->af_fl = fl; 2057 af->af_host = host; 2058 af->af_rpc = client; 2059 /* 2060 * We use M_RPC here so that we can xdr_free the thing 2061 * later. 2062 */ 2063 nlm_make_netobj(&af->af_granted.cookie, 2064 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2065 af->af_granted.exclusive = argp->exclusive; 2066 af->af_granted.alock.caller_name = 2067 strdup(argp->alock.caller_name, M_RPC); 2068 nlm_copy_netobj(&af->af_granted.alock.fh, 2069 &argp->alock.fh, M_RPC); 2070 nlm_copy_netobj(&af->af_granted.alock.oh, 2071 &argp->alock.oh, M_RPC); 2072 af->af_granted.alock.svid = argp->alock.svid; 2073 af->af_granted.alock.l_offset = argp->alock.l_offset; 2074 af->af_granted.alock.l_len = argp->alock.l_len; 2075 2076 /* 2077 * Put the entry on the pending list before calling 2078 * VOP_ADVLOCKASYNC. We do this in case the lock 2079 * request was blocked (returning EINPROGRESS) but 2080 * then granted before we manage to run again. The 2081 * client may receive the granted message before we 2082 * send our blocked reply but thats their problem. 2083 */ 2084 mtx_lock(&host->nh_lock); 2085 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2086 mtx_unlock(&host->nh_lock); 2087 2088 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2089 &af->af_task, &af->af_cookie); 2090 2091 /* 2092 * If the lock completed synchronously, just free the 2093 * tracking structure now. 2094 */ 2095 if (error != EINPROGRESS) { 2096 CLNT_RELEASE(af->af_rpc); 2097 mtx_lock(&host->nh_lock); 2098 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2099 mtx_unlock(&host->nh_lock); 2100 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2101 &af->af_granted); 2102 free(af, M_NLM); 2103 } else { 2104 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2105 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2106 /* 2107 * Don't vrele the vnode just yet - this must 2108 * wait until either the async callback 2109 * happens or the lock is cancelled. 2110 */ 2111 vs.vs_vp = NULL; 2112 } 2113 } else { 2114 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2115 } 2116 2117 if (error) { 2118 if (error == EINPROGRESS) { 2119 result->stat.stat = nlm4_blocked; 2120 } else if (error == EDEADLK) { 2121 result->stat.stat = nlm4_deadlck; 2122 } else if (error == EAGAIN) { 2123 result->stat.stat = nlm4_denied; 2124 } else { 2125 result->stat.stat = nlm4_failed; 2126 } 2127 } else { 2128 if (monitor) 2129 nlm_host_monitor(host, argp->state); 2130 result->stat.stat = nlm4_granted; 2131 } 2132 2133 out: 2134 nlm_release_vfs_state(&vs); 2135 if (rpcp) 2136 *rpcp = nlm_host_get_rpc(host, TRUE); 2137 nlm_host_release(host); 2138 return (0); 2139 } 2140 2141 int 2142 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2143 CLIENT **rpcp) 2144 { 2145 fhandle_t fh; 2146 struct vfs_state vs; 2147 struct nlm_host *host; 2148 int error, sysid; 2149 struct flock fl; 2150 struct nlm_async_lock *af; 2151 2152 memset(result, 0, sizeof(*result)); 2153 memset(&vs, 0, sizeof(vs)); 2154 2155 host = nlm_find_host_by_name(argp->alock.caller_name, 2156 svc_getrpccaller(rqstp), rqstp->rq_vers); 2157 if (!host) { 2158 result->stat.stat = nlm4_denied_nolocks; 2159 return (ENOMEM); 2160 } 2161 2162 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2163 host->nh_caller_name, host->nh_sysid); 2164 2165 nlm_check_expired_locks(host); 2166 sysid = host->nh_sysid; 2167 2168 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2169 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2170 2171 if (time_uptime < nlm_grace_threshold) { 2172 result->stat.stat = nlm4_denied_grace_period; 2173 goto out; 2174 } 2175 2176 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2177 if (error) { 2178 result->stat.stat = nlm_convert_error(error); 2179 goto out; 2180 } 2181 2182 fl.l_start = argp->alock.l_offset; 2183 fl.l_len = argp->alock.l_len; 2184 fl.l_pid = argp->alock.svid; 2185 fl.l_sysid = sysid; 2186 fl.l_whence = SEEK_SET; 2187 if (argp->exclusive) 2188 fl.l_type = F_WRLCK; 2189 else 2190 fl.l_type = F_RDLCK; 2191 2192 /* 2193 * First we need to try and find the async lock request - if 2194 * there isn't one, we give up and return nlm4_denied. 2195 */ 2196 mtx_lock(&host->nh_lock); 2197 2198 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2199 if (af->af_fl.l_start == fl.l_start 2200 && af->af_fl.l_len == fl.l_len 2201 && af->af_fl.l_pid == fl.l_pid 2202 && af->af_fl.l_type == fl.l_type) { 2203 break; 2204 } 2205 } 2206 2207 if (!af) { 2208 mtx_unlock(&host->nh_lock); 2209 result->stat.stat = nlm4_denied; 2210 goto out; 2211 } 2212 2213 error = nlm_cancel_async_lock(af); 2214 2215 if (error) { 2216 result->stat.stat = nlm4_denied; 2217 } else { 2218 result->stat.stat = nlm4_granted; 2219 } 2220 2221 mtx_unlock(&host->nh_lock); 2222 2223 out: 2224 nlm_release_vfs_state(&vs); 2225 if (rpcp) 2226 *rpcp = nlm_host_get_rpc(host, TRUE); 2227 nlm_host_release(host); 2228 return (0); 2229 } 2230 2231 int 2232 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2233 CLIENT **rpcp) 2234 { 2235 fhandle_t fh; 2236 struct vfs_state vs; 2237 struct nlm_host *host; 2238 int error, sysid; 2239 struct flock fl; 2240 2241 memset(result, 0, sizeof(*result)); 2242 memset(&vs, 0, sizeof(vs)); 2243 2244 host = nlm_find_host_by_name(argp->alock.caller_name, 2245 svc_getrpccaller(rqstp), rqstp->rq_vers); 2246 if (!host) { 2247 result->stat.stat = nlm4_denied_nolocks; 2248 return (ENOMEM); 2249 } 2250 2251 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2252 host->nh_caller_name, host->nh_sysid); 2253 2254 nlm_check_expired_locks(host); 2255 sysid = host->nh_sysid; 2256 2257 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2258 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2259 2260 if (time_uptime < nlm_grace_threshold) { 2261 result->stat.stat = nlm4_denied_grace_period; 2262 goto out; 2263 } 2264 2265 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2266 if (error) { 2267 result->stat.stat = nlm_convert_error(error); 2268 goto out; 2269 } 2270 2271 fl.l_start = argp->alock.l_offset; 2272 fl.l_len = argp->alock.l_len; 2273 fl.l_pid = argp->alock.svid; 2274 fl.l_sysid = sysid; 2275 fl.l_whence = SEEK_SET; 2276 fl.l_type = F_UNLCK; 2277 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2278 2279 /* 2280 * Ignore the error - there is no result code for failure, 2281 * only for grace period. 2282 */ 2283 result->stat.stat = nlm4_granted; 2284 2285 out: 2286 nlm_release_vfs_state(&vs); 2287 if (rpcp) 2288 *rpcp = nlm_host_get_rpc(host, TRUE); 2289 nlm_host_release(host); 2290 return (0); 2291 } 2292 2293 int 2294 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2295 2296 CLIENT **rpcp) 2297 { 2298 struct nlm_host *host; 2299 struct nlm_waiting_lock *nw; 2300 2301 memset(result, 0, sizeof(*result)); 2302 2303 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2304 if (!host) { 2305 result->stat.stat = nlm4_denied_nolocks; 2306 return (ENOMEM); 2307 } 2308 2309 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2310 result->stat.stat = nlm4_denied; 2311 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2312 2313 mtx_lock(&nlm_global_lock); 2314 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2315 if (!nw->nw_waiting) 2316 continue; 2317 if (argp->alock.svid == nw->nw_lock.svid 2318 && argp->alock.l_offset == nw->nw_lock.l_offset 2319 && argp->alock.l_len == nw->nw_lock.l_len 2320 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2321 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2322 nw->nw_lock.fh.n_len)) { 2323 nw->nw_waiting = FALSE; 2324 wakeup(nw); 2325 result->stat.stat = nlm4_granted; 2326 break; 2327 } 2328 } 2329 mtx_unlock(&nlm_global_lock); 2330 2331 out: 2332 if (rpcp) 2333 *rpcp = nlm_host_get_rpc(host, TRUE); 2334 nlm_host_release(host); 2335 return (0); 2336 } 2337 2338 void 2339 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2340 { 2341 struct nlm_host *host = NULL; 2342 struct nlm_async_lock *af = NULL; 2343 int error; 2344 2345 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2346 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2347 goto out; 2348 } 2349 2350 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2351 if (!host) { 2352 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2353 goto out; 2354 } 2355 2356 mtx_lock(&host->nh_lock); 2357 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2358 if (ng_cookie(&argp->cookie) == 2359 ng_cookie(&af->af_granted.cookie)) 2360 break; 2361 if (af) 2362 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2363 mtx_unlock(&host->nh_lock); 2364 2365 if (!af) { 2366 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2367 "with unrecognized cookie %d:%d", host->nh_caller_name, 2368 host->nh_sysid, ng_sysid(&argp->cookie), 2369 ng_cookie(&argp->cookie)); 2370 goto out; 2371 } 2372 2373 if (argp->stat.stat != nlm4_granted) { 2374 af->af_fl.l_type = F_UNLCK; 2375 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2376 if (error) { 2377 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2378 "and we failed to unlock (%d)", host->nh_caller_name, 2379 host->nh_sysid, error); 2380 goto out; 2381 } 2382 2383 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2384 af, host->nh_caller_name, host->nh_sysid); 2385 } else { 2386 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2387 af, host->nh_caller_name, host->nh_sysid); 2388 } 2389 2390 out: 2391 if (af) 2392 nlm_free_async_lock(af); 2393 if (host) 2394 nlm_host_release(host); 2395 } 2396 2397 void 2398 nlm_do_free_all(nlm4_notify *argp) 2399 { 2400 struct nlm_host *host, *thost; 2401 2402 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2403 if (!strcmp(host->nh_caller_name, argp->name)) 2404 nlm_host_notify(host, argp->state); 2405 } 2406 } 2407 2408 /* 2409 * Kernel module glue 2410 */ 2411 static int 2412 nfslockd_modevent(module_t mod, int type, void *data) 2413 { 2414 2415 switch (type) { 2416 case MOD_LOAD: 2417 return (0); 2418 case MOD_UNLOAD: 2419 /* The NLM module cannot be safely unloaded. */ 2420 /* FALLTHROUGH */ 2421 default: 2422 return (EOPNOTSUPP); 2423 } 2424 } 2425 static moduledata_t nfslockd_mod = { 2426 "nfslockd", 2427 nfslockd_modevent, 2428 NULL, 2429 }; 2430 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2431 2432 /* So that loader and kldload(2) can find us, wherever we are.. */ 2433 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2434 MODULE_DEPEND(nfslockd, nfslock, 1, 1, 1); 2435 MODULE_VERSION(nfslockd, 1); 2436