1 /*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet6.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/fail.h> 35 #include <sys/fcntl.h> 36 #include <sys/kernel.h> 37 #include <sys/kthread.h> 38 #include <sys/lockf.h> 39 #include <sys/malloc.h> 40 #include <sys/mount.h> 41 #if __FreeBSD_version >= 700000 42 #include <sys/priv.h> 43 #endif 44 #include <sys/proc.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/syscall.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysent.h> 50 #include <sys/syslog.h> 51 #include <sys/sysproto.h> 52 #include <sys/systm.h> 53 #include <sys/taskqueue.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 57 #include <nfs/nfsproto.h> 58 #include <nfs/nfs_lock.h> 59 60 #include <nlm/nlm_prot.h> 61 #include <nlm/sm_inter.h> 62 #include <nlm/nlm.h> 63 #include <rpc/rpc_com.h> 64 #include <rpc/rpcb_prot.h> 65 66 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 67 68 /* 69 * If a host is inactive (and holds no locks) for this amount of 70 * seconds, we consider it idle and stop tracking it. 71 */ 72 #define NLM_IDLE_TIMEOUT 30 73 74 /* 75 * We check the host list for idle every few seconds. 76 */ 77 #define NLM_IDLE_PERIOD 5 78 79 /* 80 * We only look for GRANTED_RES messages for a little while. 81 */ 82 #define NLM_EXPIRE_TIMEOUT 10 83 84 /* 85 * Support for sysctl vfs.nlm.sysid 86 */ 87 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, 88 "Network Lock Manager"); 89 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, ""); 90 91 /* 92 * Syscall hooks 93 */ 94 static int nlm_syscall_offset = SYS_nlm_syscall; 95 static struct sysent nlm_syscall_prev_sysent; 96 #if __FreeBSD_version < 700000 97 static struct sysent nlm_syscall_sysent = { 98 (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE, 99 (sy_call_t *) nlm_syscall 100 }; 101 #else 102 MAKE_SYSENT(nlm_syscall); 103 #endif 104 static bool_t nlm_syscall_registered = FALSE; 105 106 /* 107 * Debug level passed in from userland. We also support a sysctl hook 108 * so that it can be changed on a live system. 109 */ 110 static int nlm_debug_level; 111 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 112 113 #define NLM_DEBUG(_level, args...) \ 114 do { \ 115 if (nlm_debug_level >= (_level)) \ 116 log(LOG_DEBUG, args); \ 117 } while(0) 118 #define NLM_ERR(args...) \ 119 do { \ 120 log(LOG_ERR, args); \ 121 } while(0) 122 123 /* 124 * Grace period handling. The value of nlm_grace_threshold is the 125 * value of time_uptime after which we are serving requests normally. 126 */ 127 static time_t nlm_grace_threshold; 128 129 /* 130 * We check for idle hosts if time_uptime is greater than 131 * nlm_next_idle_check, 132 */ 133 static time_t nlm_next_idle_check; 134 135 /* 136 * A flag to indicate the server is already running. 137 */ 138 static int nlm_is_running; 139 140 /* 141 * A socket to use for RPC - shared by all IPv4 RPC clients. 142 */ 143 static struct socket *nlm_socket; 144 145 #ifdef INET6 146 147 /* 148 * A socket to use for RPC - shared by all IPv6 RPC clients. 149 */ 150 static struct socket *nlm_socket6; 151 152 #endif 153 154 /* 155 * An RPC client handle that can be used to communicate with the local 156 * NSM. 157 */ 158 static CLIENT *nlm_nsm; 159 160 /* 161 * An AUTH handle for the server's creds. 162 */ 163 static AUTH *nlm_auth; 164 165 /* 166 * A zero timeval for sending async RPC messages. 167 */ 168 struct timeval nlm_zero_tv = { 0, 0 }; 169 170 /* 171 * The local NSM state number 172 */ 173 int nlm_nsm_state; 174 175 176 /* 177 * A lock to protect the host list and waiting lock list. 178 */ 179 static struct mtx nlm_global_lock; 180 181 /* 182 * Locks: 183 * (l) locked by nh_lock 184 * (s) only accessed via server RPC which is single threaded 185 * (g) locked by nlm_global_lock 186 * (c) const until freeing 187 * (a) modified using atomic ops 188 */ 189 190 /* 191 * A pending client-side lock request, stored on the nlm_waiting_locks 192 * list. 193 */ 194 struct nlm_waiting_lock { 195 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 196 bool_t nw_waiting; /* (g) */ 197 nlm4_lock nw_lock; /* (c) */ 198 union nfsfh nw_fh; /* (c) */ 199 struct vnode *nw_vp; /* (c) */ 200 }; 201 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 202 203 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 204 205 /* 206 * A pending server-side asynchronous lock request, stored on the 207 * nh_pending list of the NLM host. 208 */ 209 struct nlm_async_lock { 210 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 211 struct task af_task; /* (c) async callback details */ 212 void *af_cookie; /* (l) lock manager cancel token */ 213 struct vnode *af_vp; /* (l) vnode to lock */ 214 struct flock af_fl; /* (c) lock details */ 215 struct nlm_host *af_host; /* (c) host which is locking */ 216 CLIENT *af_rpc; /* (c) rpc client to send message */ 217 nlm4_testargs af_granted; /* (c) notification details */ 218 time_t af_expiretime; /* (c) notification time */ 219 }; 220 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 221 222 /* 223 * NLM host. 224 */ 225 enum nlm_host_state { 226 NLM_UNMONITORED, 227 NLM_MONITORED, 228 NLM_MONITOR_FAILED, 229 NLM_RECOVERING 230 }; 231 232 struct nlm_rpc { 233 CLIENT *nr_client; /* (l) RPC client handle */ 234 time_t nr_create_time; /* (l) when client was created */ 235 }; 236 237 struct nlm_host { 238 struct mtx nh_lock; 239 volatile u_int nh_refs; /* (a) reference count */ 240 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 241 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 242 uint32_t nh_sysid; /* (c) our allocaed system ID */ 243 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 244 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 245 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 246 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 247 rpcvers_t nh_vers; /* (s) NLM version of host */ 248 int nh_state; /* (s) last seen NSM state of host */ 249 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 250 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 251 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 252 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 253 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 254 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 255 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 256 }; 257 TAILQ_HEAD(nlm_host_list, nlm_host); 258 259 static struct nlm_host_list nlm_hosts; /* (g) */ 260 static uint32_t nlm_next_sysid = 1; /* (g) */ 261 262 static void nlm_host_unmonitor(struct nlm_host *); 263 264 struct nlm_grantcookie { 265 uint32_t ng_sysid; 266 uint32_t ng_cookie; 267 }; 268 269 static inline uint32_t 270 ng_sysid(struct netobj *src) 271 { 272 273 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 274 } 275 276 static inline uint32_t 277 ng_cookie(struct netobj *src) 278 { 279 280 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 281 } 282 283 /**********************************************************************/ 284 285 /* 286 * Initialise NLM globals. 287 */ 288 static void 289 nlm_init(void *dummy) 290 { 291 int error; 292 293 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 294 TAILQ_INIT(&nlm_waiting_locks); 295 TAILQ_INIT(&nlm_hosts); 296 297 error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent, 298 &nlm_syscall_prev_sysent, SY_THR_STATIC_KLD); 299 if (error) 300 NLM_ERR("Can't register NLM syscall\n"); 301 else 302 nlm_syscall_registered = TRUE; 303 } 304 SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL); 305 306 static void 307 nlm_uninit(void *dummy) 308 { 309 310 if (nlm_syscall_registered) 311 syscall_deregister(&nlm_syscall_offset, 312 &nlm_syscall_prev_sysent); 313 } 314 SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL); 315 316 /* 317 * Create a netobj from an arbitrary source. 318 */ 319 void 320 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 321 struct malloc_type *type) 322 { 323 324 dst->n_len = srcsize; 325 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 326 memcpy(dst->n_bytes, src, srcsize); 327 } 328 329 /* 330 * Copy a struct netobj. 331 */ 332 void 333 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 334 struct malloc_type *type) 335 { 336 337 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 338 } 339 340 341 /* 342 * Create an RPC client handle for the given (address,prog,vers) 343 * triple using UDP. 344 */ 345 static CLIENT * 346 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 347 { 348 char *wchan = "nlmrcv"; 349 const char* protofmly; 350 struct sockaddr_storage ss; 351 struct socket *so; 352 CLIENT *rpcb; 353 struct timeval timo; 354 RPCB parms; 355 char *uaddr; 356 enum clnt_stat stat = RPC_SUCCESS; 357 int rpcvers = RPCBVERS4; 358 bool_t do_tcp = FALSE; 359 bool_t tryagain = FALSE; 360 struct portmap mapping; 361 u_short port = 0; 362 363 /* 364 * First we need to contact the remote RPCBIND service to find 365 * the right port. 366 */ 367 memcpy(&ss, sa, sa->sa_len); 368 switch (ss.ss_family) { 369 case AF_INET: 370 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 371 protofmly = "inet"; 372 so = nlm_socket; 373 break; 374 375 #ifdef INET6 376 case AF_INET6: 377 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 378 protofmly = "inet6"; 379 so = nlm_socket6; 380 break; 381 #endif 382 383 default: 384 /* 385 * Unsupported address family - fail. 386 */ 387 return (NULL); 388 } 389 390 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 391 RPCBPROG, rpcvers, 0, 0); 392 if (!rpcb) 393 return (NULL); 394 395 try_tcp: 396 parms.r_prog = prog; 397 parms.r_vers = vers; 398 if (do_tcp) 399 parms.r_netid = "tcp"; 400 else 401 parms.r_netid = "udp"; 402 parms.r_addr = ""; 403 parms.r_owner = ""; 404 405 /* 406 * Use the default timeout. 407 */ 408 timo.tv_sec = 25; 409 timo.tv_usec = 0; 410 again: 411 switch (rpcvers) { 412 case RPCBVERS4: 413 case RPCBVERS: 414 /* 415 * Try RPCBIND 4 then 3. 416 */ 417 uaddr = NULL; 418 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 419 (xdrproc_t) xdr_rpcb, &parms, 420 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 421 if (stat == RPC_SUCCESS) { 422 /* 423 * We have a reply from the remote RPCBIND - turn it 424 * into an appropriate address and make a new client 425 * that can talk to the remote NLM. 426 * 427 * XXX fixup IPv6 scope ID. 428 */ 429 struct netbuf *a; 430 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 431 if (!a) { 432 tryagain = TRUE; 433 } else { 434 tryagain = FALSE; 435 memcpy(&ss, a->buf, a->len); 436 free(a->buf, M_RPC); 437 free(a, M_RPC); 438 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 439 } 440 } 441 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 442 if (rpcvers == RPCBVERS4) 443 rpcvers = RPCBVERS; 444 else if (rpcvers == RPCBVERS) 445 rpcvers = PMAPVERS; 446 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 447 goto again; 448 } 449 break; 450 case PMAPVERS: 451 /* 452 * Try portmap. 453 */ 454 mapping.pm_prog = parms.r_prog; 455 mapping.pm_vers = parms.r_vers; 456 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 457 mapping.pm_port = 0; 458 459 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 460 (xdrproc_t) xdr_portmap, &mapping, 461 (xdrproc_t) xdr_u_short, &port, timo); 462 463 if (stat == RPC_SUCCESS) { 464 switch (ss.ss_family) { 465 case AF_INET: 466 ((struct sockaddr_in *)&ss)->sin_port = 467 htons(port); 468 break; 469 470 #ifdef INET6 471 case AF_INET6: 472 ((struct sockaddr_in6 *)&ss)->sin6_port = 473 htons(port); 474 break; 475 #endif 476 } 477 } 478 break; 479 default: 480 panic("invalid rpcvers %d", rpcvers); 481 } 482 /* 483 * We may have a positive response from the portmapper, but the NLM 484 * service was not found. Make sure we received a valid port. 485 */ 486 switch (ss.ss_family) { 487 case AF_INET: 488 port = ((struct sockaddr_in *)&ss)->sin_port; 489 break; 490 #ifdef INET6 491 case AF_INET6: 492 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 493 break; 494 #endif 495 } 496 if (stat != RPC_SUCCESS || !port) { 497 /* 498 * If we were able to talk to rpcbind or portmap, but the udp 499 * variant wasn't available, ask about tcp. 500 * 501 * XXX - We could also check for a TCP portmapper, but 502 * if the host is running a portmapper at all, we should be able 503 * to hail it over UDP. 504 */ 505 if (stat == RPC_SUCCESS && !do_tcp) { 506 do_tcp = TRUE; 507 goto try_tcp; 508 } 509 510 /* Otherwise, bad news. */ 511 NLM_ERR("NLM: failed to contact remote rpcbind, " 512 "stat = %d, port = %d\n", (int) stat, port); 513 CLNT_DESTROY(rpcb); 514 return (NULL); 515 } 516 517 if (do_tcp) { 518 /* 519 * Destroy the UDP client we used to speak to rpcbind and 520 * recreate as a TCP client. 521 */ 522 struct netconfig *nconf = NULL; 523 524 CLNT_DESTROY(rpcb); 525 526 switch (ss.ss_family) { 527 case AF_INET: 528 nconf = getnetconfigent("tcp"); 529 break; 530 #ifdef INET6 531 case AF_INET6: 532 nconf = getnetconfigent("tcp6"); 533 break; 534 #endif 535 } 536 537 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 538 prog, vers, 0, 0); 539 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 540 rpcb->cl_auth = nlm_auth; 541 542 } else { 543 /* 544 * Re-use the client we used to speak to rpcbind. 545 */ 546 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 547 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 548 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 549 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 550 rpcb->cl_auth = nlm_auth; 551 } 552 553 return (rpcb); 554 } 555 556 /* 557 * This async callback after when an async lock request has been 558 * granted. We notify the host which initiated the request. 559 */ 560 static void 561 nlm_lock_callback(void *arg, int pending) 562 { 563 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 564 struct rpc_callextra ext; 565 566 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 567 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 568 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 569 ng_cookie(&af->af_granted.cookie)); 570 571 /* 572 * Send the results back to the host. 573 * 574 * Note: there is a possible race here with nlm_host_notify 575 * destroying the RPC client. To avoid problems, the first 576 * thing nlm_host_notify does is to cancel pending async lock 577 * requests. 578 */ 579 memset(&ext, 0, sizeof(ext)); 580 ext.rc_auth = nlm_auth; 581 if (af->af_host->nh_vers == NLM_VERS4) { 582 nlm4_granted_msg_4(&af->af_granted, 583 NULL, af->af_rpc, &ext, nlm_zero_tv); 584 } else { 585 /* 586 * Back-convert to legacy protocol 587 */ 588 nlm_testargs granted; 589 granted.cookie = af->af_granted.cookie; 590 granted.exclusive = af->af_granted.exclusive; 591 granted.alock.caller_name = 592 af->af_granted.alock.caller_name; 593 granted.alock.fh = af->af_granted.alock.fh; 594 granted.alock.oh = af->af_granted.alock.oh; 595 granted.alock.svid = af->af_granted.alock.svid; 596 granted.alock.l_offset = 597 af->af_granted.alock.l_offset; 598 granted.alock.l_len = 599 af->af_granted.alock.l_len; 600 601 nlm_granted_msg_1(&granted, 602 NULL, af->af_rpc, &ext, nlm_zero_tv); 603 } 604 605 /* 606 * Move this entry to the nh_granted list. 607 */ 608 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 609 mtx_lock(&af->af_host->nh_lock); 610 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 611 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 612 mtx_unlock(&af->af_host->nh_lock); 613 } 614 615 /* 616 * Free an async lock request. The request must have been removed from 617 * any list. 618 */ 619 static void 620 nlm_free_async_lock(struct nlm_async_lock *af) 621 { 622 /* 623 * Free an async lock. 624 */ 625 if (af->af_rpc) 626 CLNT_RELEASE(af->af_rpc); 627 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 628 if (af->af_vp) 629 vrele(af->af_vp); 630 free(af, M_NLM); 631 } 632 633 /* 634 * Cancel our async request - this must be called with 635 * af->nh_host->nh_lock held. This is slightly complicated by a 636 * potential race with our own callback. If we fail to cancel the 637 * lock, it must already have been granted - we make sure our async 638 * task has completed by calling taskqueue_drain in this case. 639 */ 640 static int 641 nlm_cancel_async_lock(struct nlm_async_lock *af) 642 { 643 struct nlm_host *host = af->af_host; 644 int error; 645 646 mtx_assert(&host->nh_lock, MA_OWNED); 647 648 mtx_unlock(&host->nh_lock); 649 650 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 651 F_REMOTE, NULL, &af->af_cookie); 652 653 if (error) { 654 /* 655 * We failed to cancel - make sure our callback has 656 * completed before we continue. 657 */ 658 taskqueue_drain(taskqueue_thread, &af->af_task); 659 } 660 661 mtx_lock(&host->nh_lock); 662 663 if (!error) { 664 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 665 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 666 667 /* 668 * Remove from the nh_pending list and free now that 669 * we are safe from the callback. 670 */ 671 TAILQ_REMOVE(&host->nh_pending, af, af_link); 672 mtx_unlock(&host->nh_lock); 673 nlm_free_async_lock(af); 674 mtx_lock(&host->nh_lock); 675 } 676 677 return (error); 678 } 679 680 static void 681 nlm_check_expired_locks(struct nlm_host *host) 682 { 683 struct nlm_async_lock *af; 684 time_t uptime = time_uptime; 685 686 mtx_lock(&host->nh_lock); 687 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 688 && uptime >= af->af_expiretime) { 689 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 690 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 691 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 692 ng_cookie(&af->af_granted.cookie)); 693 TAILQ_REMOVE(&host->nh_granted, af, af_link); 694 mtx_unlock(&host->nh_lock); 695 nlm_free_async_lock(af); 696 mtx_lock(&host->nh_lock); 697 } 698 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 699 TAILQ_REMOVE(&host->nh_finished, af, af_link); 700 mtx_unlock(&host->nh_lock); 701 nlm_free_async_lock(af); 702 mtx_lock(&host->nh_lock); 703 } 704 mtx_unlock(&host->nh_lock); 705 } 706 707 /* 708 * Free resources used by a host. This is called after the reference 709 * count has reached zero so it doesn't need to worry about locks. 710 */ 711 static void 712 nlm_host_destroy(struct nlm_host *host) 713 { 714 715 mtx_lock(&nlm_global_lock); 716 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 717 mtx_unlock(&nlm_global_lock); 718 719 if (host->nh_srvrpc.nr_client) 720 CLNT_RELEASE(host->nh_srvrpc.nr_client); 721 if (host->nh_clntrpc.nr_client) 722 CLNT_RELEASE(host->nh_clntrpc.nr_client); 723 mtx_destroy(&host->nh_lock); 724 sysctl_ctx_free(&host->nh_sysctl); 725 free(host, M_NLM); 726 } 727 728 /* 729 * Thread start callback for client lock recovery 730 */ 731 static void 732 nlm_client_recovery_start(void *arg) 733 { 734 struct nlm_host *host = (struct nlm_host *) arg; 735 736 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 737 host->nh_caller_name); 738 739 nlm_client_recovery(host); 740 741 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 742 host->nh_caller_name); 743 744 host->nh_monstate = NLM_MONITORED; 745 nlm_host_release(host); 746 747 kthread_exit(); 748 } 749 750 /* 751 * This is called when we receive a host state change notification. We 752 * unlock any active locks owned by the host. When rpc.lockd is 753 * shutting down, this function is called with newstate set to zero 754 * which allows us to cancel any pending async locks and clear the 755 * locking state. 756 */ 757 static void 758 nlm_host_notify(struct nlm_host *host, int newstate) 759 { 760 struct nlm_async_lock *af; 761 762 if (newstate) { 763 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 764 "state is %d\n", host->nh_caller_name, 765 host->nh_sysid, newstate); 766 } 767 768 /* 769 * Cancel any pending async locks for this host. 770 */ 771 mtx_lock(&host->nh_lock); 772 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 773 /* 774 * nlm_cancel_async_lock will remove the entry from 775 * nh_pending and free it. 776 */ 777 nlm_cancel_async_lock(af); 778 } 779 mtx_unlock(&host->nh_lock); 780 nlm_check_expired_locks(host); 781 782 /* 783 * The host just rebooted - trash its locks. 784 */ 785 lf_clearremotesys(host->nh_sysid); 786 host->nh_state = newstate; 787 788 /* 789 * If we have any remote locks for this host (i.e. it 790 * represents a remote NFS server that our local NFS client 791 * has locks for), start a recovery thread. 792 */ 793 if (newstate != 0 794 && host->nh_monstate != NLM_RECOVERING 795 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 796 struct thread *td; 797 host->nh_monstate = NLM_RECOVERING; 798 refcount_acquire(&host->nh_refs); 799 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 800 "NFS lock recovery for %s", host->nh_caller_name); 801 } 802 } 803 804 /* 805 * Sysctl handler to count the number of locks for a sysid. 806 */ 807 static int 808 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 809 { 810 struct nlm_host *host; 811 int count; 812 813 host = oidp->oid_arg1; 814 count = lf_countlocks(host->nh_sysid); 815 return sysctl_handle_int(oidp, &count, 0, req); 816 } 817 818 /* 819 * Sysctl handler to count the number of client locks for a sysid. 820 */ 821 static int 822 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 823 { 824 struct nlm_host *host; 825 int count; 826 827 host = oidp->oid_arg1; 828 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 829 return sysctl_handle_int(oidp, &count, 0, req); 830 } 831 832 /* 833 * Create a new NLM host. 834 */ 835 static struct nlm_host * 836 nlm_create_host(const char* caller_name) 837 { 838 struct nlm_host *host; 839 struct sysctl_oid *oid; 840 841 mtx_assert(&nlm_global_lock, MA_OWNED); 842 843 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 844 caller_name, nlm_next_sysid); 845 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 846 if (!host) 847 return (NULL); 848 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 849 host->nh_refs = 1; 850 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 851 host->nh_sysid = nlm_next_sysid++; 852 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 853 "%d", host->nh_sysid); 854 host->nh_vers = 0; 855 host->nh_state = 0; 856 host->nh_monstate = NLM_UNMONITORED; 857 host->nh_grantcookie = 1; 858 TAILQ_INIT(&host->nh_pending); 859 TAILQ_INIT(&host->nh_granted); 860 TAILQ_INIT(&host->nh_finished); 861 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 862 863 mtx_unlock(&nlm_global_lock); 864 865 sysctl_ctx_init(&host->nh_sysctl); 866 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 867 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 868 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, ""); 869 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 870 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 871 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 872 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 873 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 874 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 875 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 876 "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 877 nlm_host_lock_count_sysctl, "I", ""); 878 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 879 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 880 nlm_host_client_lock_count_sysctl, "I", ""); 881 882 mtx_lock(&nlm_global_lock); 883 884 return (host); 885 } 886 887 /* 888 * Acquire the next sysid for remote locks not handled by the NLM. 889 */ 890 uint32_t 891 nlm_acquire_next_sysid(void) 892 { 893 uint32_t next_sysid; 894 895 mtx_lock(&nlm_global_lock); 896 next_sysid = nlm_next_sysid++; 897 mtx_unlock(&nlm_global_lock); 898 return (next_sysid); 899 } 900 901 /* 902 * Return non-zero if the address parts of the two sockaddrs are the 903 * same. 904 */ 905 static int 906 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 907 { 908 const struct sockaddr_in *a4, *b4; 909 #ifdef INET6 910 const struct sockaddr_in6 *a6, *b6; 911 #endif 912 913 if (a->sa_family != b->sa_family) 914 return (FALSE); 915 916 switch (a->sa_family) { 917 case AF_INET: 918 a4 = (const struct sockaddr_in *) a; 919 b4 = (const struct sockaddr_in *) b; 920 return !memcmp(&a4->sin_addr, &b4->sin_addr, 921 sizeof(a4->sin_addr)); 922 #ifdef INET6 923 case AF_INET6: 924 a6 = (const struct sockaddr_in6 *) a; 925 b6 = (const struct sockaddr_in6 *) b; 926 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 927 sizeof(a6->sin6_addr)); 928 #endif 929 } 930 931 return (0); 932 } 933 934 /* 935 * Check for idle hosts and stop monitoring them. We could also free 936 * the host structure here, possibly after a larger timeout but that 937 * would require some care to avoid races with 938 * e.g. nlm_host_lock_count_sysctl. 939 */ 940 static void 941 nlm_check_idle(void) 942 { 943 struct nlm_host *host; 944 945 mtx_assert(&nlm_global_lock, MA_OWNED); 946 947 if (time_uptime <= nlm_next_idle_check) 948 return; 949 950 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 951 952 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 953 if (host->nh_monstate == NLM_MONITORED 954 && time_uptime > host->nh_idle_timeout) { 955 mtx_unlock(&nlm_global_lock); 956 if (lf_countlocks(host->nh_sysid) > 0 957 || lf_countlocks(NLM_SYSID_CLIENT 958 + host->nh_sysid)) { 959 host->nh_idle_timeout = 960 time_uptime + NLM_IDLE_TIMEOUT; 961 mtx_lock(&nlm_global_lock); 962 continue; 963 } 964 nlm_host_unmonitor(host); 965 mtx_lock(&nlm_global_lock); 966 } 967 } 968 } 969 970 /* 971 * Search for an existing NLM host that matches the given name 972 * (typically the caller_name element of an nlm4_lock). If none is 973 * found, create a new host. If 'addr' is non-NULL, record the remote 974 * address of the host so that we can call it back for async 975 * responses. If 'vers' is greater than zero then record the NLM 976 * program version to use to communicate with this client. 977 */ 978 struct nlm_host * 979 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 980 rpcvers_t vers) 981 { 982 struct nlm_host *host; 983 984 mtx_lock(&nlm_global_lock); 985 986 /* 987 * The remote host is determined by caller_name. 988 */ 989 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 990 if (!strcmp(host->nh_caller_name, name)) 991 break; 992 } 993 994 if (!host) { 995 host = nlm_create_host(name); 996 if (!host) { 997 mtx_unlock(&nlm_global_lock); 998 return (NULL); 999 } 1000 } 1001 refcount_acquire(&host->nh_refs); 1002 1003 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1004 1005 /* 1006 * If we have an address for the host, record it so that we 1007 * can send async replies etc. 1008 */ 1009 if (addr) { 1010 1011 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 1012 ("Strange remote transport address length")); 1013 1014 /* 1015 * If we have seen an address before and we currently 1016 * have an RPC client handle, make sure the address is 1017 * the same, otherwise discard the client handle. 1018 */ 1019 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1020 if (!nlm_compare_addr( 1021 (struct sockaddr *) &host->nh_addr, 1022 addr) 1023 || host->nh_vers != vers) { 1024 CLIENT *client; 1025 mtx_lock(&host->nh_lock); 1026 client = host->nh_srvrpc.nr_client; 1027 host->nh_srvrpc.nr_client = NULL; 1028 mtx_unlock(&host->nh_lock); 1029 if (client) { 1030 CLNT_RELEASE(client); 1031 } 1032 } 1033 } 1034 memcpy(&host->nh_addr, addr, addr->sa_len); 1035 host->nh_vers = vers; 1036 } 1037 1038 nlm_check_idle(); 1039 1040 mtx_unlock(&nlm_global_lock); 1041 1042 return (host); 1043 } 1044 1045 /* 1046 * Search for an existing NLM host that matches the given remote 1047 * address. If none is found, create a new host with the requested 1048 * address and remember 'vers' as the NLM protocol version to use for 1049 * that host. 1050 */ 1051 struct nlm_host * 1052 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1053 { 1054 /* 1055 * Fake up a name using inet_ntop. This buffer is 1056 * large enough for an IPv6 address. 1057 */ 1058 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1059 struct nlm_host *host; 1060 1061 switch (addr->sa_family) { 1062 case AF_INET: 1063 inet_ntop(AF_INET, 1064 &((const struct sockaddr_in *) addr)->sin_addr, 1065 tmp, sizeof tmp); 1066 break; 1067 #ifdef INET6 1068 case AF_INET6: 1069 inet_ntop(AF_INET6, 1070 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1071 tmp, sizeof tmp); 1072 break; 1073 #endif 1074 default: 1075 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1076 } 1077 1078 1079 mtx_lock(&nlm_global_lock); 1080 1081 /* 1082 * The remote host is determined by caller_name. 1083 */ 1084 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1085 if (nlm_compare_addr(addr, 1086 (const struct sockaddr *) &host->nh_addr)) 1087 break; 1088 } 1089 1090 if (!host) { 1091 host = nlm_create_host(tmp); 1092 if (!host) { 1093 mtx_unlock(&nlm_global_lock); 1094 return (NULL); 1095 } 1096 memcpy(&host->nh_addr, addr, addr->sa_len); 1097 host->nh_vers = vers; 1098 } 1099 refcount_acquire(&host->nh_refs); 1100 1101 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1102 1103 nlm_check_idle(); 1104 1105 mtx_unlock(&nlm_global_lock); 1106 1107 return (host); 1108 } 1109 1110 /* 1111 * Find the NLM host that matches the value of 'sysid'. If none 1112 * exists, return NULL. 1113 */ 1114 static struct nlm_host * 1115 nlm_find_host_by_sysid(int sysid) 1116 { 1117 struct nlm_host *host; 1118 1119 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1120 if (host->nh_sysid == sysid) { 1121 refcount_acquire(&host->nh_refs); 1122 return (host); 1123 } 1124 } 1125 1126 return (NULL); 1127 } 1128 1129 void nlm_host_release(struct nlm_host *host) 1130 { 1131 if (refcount_release(&host->nh_refs)) { 1132 /* 1133 * Free the host 1134 */ 1135 nlm_host_destroy(host); 1136 } 1137 } 1138 1139 /* 1140 * Unregister this NLM host with the local NSM due to idleness. 1141 */ 1142 static void 1143 nlm_host_unmonitor(struct nlm_host *host) 1144 { 1145 mon_id smmonid; 1146 sm_stat_res smstat; 1147 struct timeval timo; 1148 enum clnt_stat stat; 1149 1150 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1151 host->nh_caller_name, host->nh_sysid); 1152 1153 /* 1154 * We put our assigned system ID value in the priv field to 1155 * make it simpler to find the host if we are notified of a 1156 * host restart. 1157 */ 1158 smmonid.mon_name = host->nh_caller_name; 1159 smmonid.my_id.my_name = "localhost"; 1160 smmonid.my_id.my_prog = NLM_PROG; 1161 smmonid.my_id.my_vers = NLM_SM; 1162 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1163 1164 timo.tv_sec = 25; 1165 timo.tv_usec = 0; 1166 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1167 (xdrproc_t) xdr_mon, &smmonid, 1168 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1169 1170 if (stat != RPC_SUCCESS) { 1171 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1172 return; 1173 } 1174 if (smstat.res_stat == stat_fail) { 1175 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1176 host->nh_caller_name); 1177 return; 1178 } 1179 1180 host->nh_monstate = NLM_UNMONITORED; 1181 } 1182 1183 /* 1184 * Register this NLM host with the local NSM so that we can be 1185 * notified if it reboots. 1186 */ 1187 void 1188 nlm_host_monitor(struct nlm_host *host, int state) 1189 { 1190 mon smmon; 1191 sm_stat_res smstat; 1192 struct timeval timo; 1193 enum clnt_stat stat; 1194 1195 if (state && !host->nh_state) { 1196 /* 1197 * This is the first time we have seen an NSM state 1198 * value for this host. We record it here to help 1199 * detect host reboots. 1200 */ 1201 host->nh_state = state; 1202 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1203 host->nh_caller_name, host->nh_sysid, state); 1204 } 1205 1206 mtx_lock(&host->nh_lock); 1207 if (host->nh_monstate != NLM_UNMONITORED) { 1208 mtx_unlock(&host->nh_lock); 1209 return; 1210 } 1211 host->nh_monstate = NLM_MONITORED; 1212 mtx_unlock(&host->nh_lock); 1213 1214 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1215 host->nh_caller_name, host->nh_sysid); 1216 1217 /* 1218 * We put our assigned system ID value in the priv field to 1219 * make it simpler to find the host if we are notified of a 1220 * host restart. 1221 */ 1222 smmon.mon_id.mon_name = host->nh_caller_name; 1223 smmon.mon_id.my_id.my_name = "localhost"; 1224 smmon.mon_id.my_id.my_prog = NLM_PROG; 1225 smmon.mon_id.my_id.my_vers = NLM_SM; 1226 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1227 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1228 1229 timo.tv_sec = 25; 1230 timo.tv_usec = 0; 1231 stat = CLNT_CALL(nlm_nsm, SM_MON, 1232 (xdrproc_t) xdr_mon, &smmon, 1233 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1234 1235 if (stat != RPC_SUCCESS) { 1236 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1237 return; 1238 } 1239 if (smstat.res_stat == stat_fail) { 1240 NLM_ERR("Local NSM refuses to monitor %s\n", 1241 host->nh_caller_name); 1242 mtx_lock(&host->nh_lock); 1243 host->nh_monstate = NLM_MONITOR_FAILED; 1244 mtx_unlock(&host->nh_lock); 1245 return; 1246 } 1247 1248 host->nh_monstate = NLM_MONITORED; 1249 } 1250 1251 /* 1252 * Return an RPC client handle that can be used to talk to the NLM 1253 * running on the given host. 1254 */ 1255 CLIENT * 1256 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1257 { 1258 struct nlm_rpc *rpc; 1259 CLIENT *client; 1260 1261 mtx_lock(&host->nh_lock); 1262 1263 if (isserver) 1264 rpc = &host->nh_srvrpc; 1265 else 1266 rpc = &host->nh_clntrpc; 1267 1268 /* 1269 * We can't hold onto RPC handles for too long - the async 1270 * call/reply protocol used by some NLM clients makes it hard 1271 * to tell when they change port numbers (e.g. after a 1272 * reboot). Note that if a client reboots while it isn't 1273 * holding any locks, it won't bother to notify us. We 1274 * expire the RPC handles after two minutes. 1275 */ 1276 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1277 client = rpc->nr_client; 1278 rpc->nr_client = NULL; 1279 mtx_unlock(&host->nh_lock); 1280 CLNT_RELEASE(client); 1281 mtx_lock(&host->nh_lock); 1282 } 1283 1284 if (!rpc->nr_client) { 1285 mtx_unlock(&host->nh_lock); 1286 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1287 NLM_PROG, host->nh_vers); 1288 mtx_lock(&host->nh_lock); 1289 1290 if (client) { 1291 if (rpc->nr_client) { 1292 mtx_unlock(&host->nh_lock); 1293 CLNT_DESTROY(client); 1294 mtx_lock(&host->nh_lock); 1295 } else { 1296 rpc->nr_client = client; 1297 rpc->nr_create_time = time_uptime; 1298 } 1299 } 1300 } 1301 1302 client = rpc->nr_client; 1303 if (client) 1304 CLNT_ACQUIRE(client); 1305 mtx_unlock(&host->nh_lock); 1306 1307 return (client); 1308 1309 } 1310 1311 int nlm_host_get_sysid(struct nlm_host *host) 1312 { 1313 1314 return (host->nh_sysid); 1315 } 1316 1317 int 1318 nlm_host_get_state(struct nlm_host *host) 1319 { 1320 1321 return (host->nh_state); 1322 } 1323 1324 void * 1325 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1326 { 1327 struct nlm_waiting_lock *nw; 1328 1329 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1330 nw->nw_lock = *lock; 1331 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1332 nw->nw_lock.fh.n_len); 1333 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1334 nw->nw_waiting = TRUE; 1335 nw->nw_vp = vp; 1336 mtx_lock(&nlm_global_lock); 1337 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1338 mtx_unlock(&nlm_global_lock); 1339 1340 return nw; 1341 } 1342 1343 void 1344 nlm_deregister_wait_lock(void *handle) 1345 { 1346 struct nlm_waiting_lock *nw = handle; 1347 1348 mtx_lock(&nlm_global_lock); 1349 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1350 mtx_unlock(&nlm_global_lock); 1351 1352 free(nw, M_NLM); 1353 } 1354 1355 int 1356 nlm_wait_lock(void *handle, int timo) 1357 { 1358 struct nlm_waiting_lock *nw = handle; 1359 int error; 1360 1361 /* 1362 * If the granted message arrived before we got here, 1363 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1364 */ 1365 mtx_lock(&nlm_global_lock); 1366 error = 0; 1367 if (nw->nw_waiting) 1368 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1369 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1370 if (error) { 1371 /* 1372 * The granted message may arrive after the 1373 * interrupt/timeout but before we manage to lock the 1374 * mutex. Detect this by examining nw_lock. 1375 */ 1376 if (!nw->nw_waiting) 1377 error = 0; 1378 } else { 1379 /* 1380 * If nlm_cancel_wait is called, then error will be 1381 * zero but nw_waiting will still be TRUE. We 1382 * translate this into EINTR. 1383 */ 1384 if (nw->nw_waiting) 1385 error = EINTR; 1386 } 1387 mtx_unlock(&nlm_global_lock); 1388 1389 free(nw, M_NLM); 1390 1391 return (error); 1392 } 1393 1394 void 1395 nlm_cancel_wait(struct vnode *vp) 1396 { 1397 struct nlm_waiting_lock *nw; 1398 1399 mtx_lock(&nlm_global_lock); 1400 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1401 if (nw->nw_vp == vp) { 1402 wakeup(nw); 1403 } 1404 } 1405 mtx_unlock(&nlm_global_lock); 1406 } 1407 1408 1409 /**********************************************************************/ 1410 1411 /* 1412 * Syscall interface with userland. 1413 */ 1414 1415 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1416 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1417 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1418 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1419 1420 static int 1421 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1422 { 1423 static rpcvers_t versions[] = { 1424 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1425 }; 1426 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1427 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1428 }; 1429 1430 SVCXPRT **xprts; 1431 char netid[16]; 1432 char uaddr[128]; 1433 struct netconfig *nconf; 1434 int i, j, error; 1435 1436 if (!addr_count) { 1437 NLM_ERR("NLM: no service addresses given - can't start server"); 1438 return (EINVAL); 1439 } 1440 1441 if (addr_count < 0 || addr_count > 256 ) { 1442 NLM_ERR("NLM: too many service addresses (%d) given, " 1443 "max 256 - can't start server\n", addr_count); 1444 return (EINVAL); 1445 } 1446 1447 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1448 for (i = 0; i < nitems(versions); i++) { 1449 for (j = 0; j < addr_count; j++) { 1450 /* 1451 * Create transports for the first version and 1452 * then just register everything else to the 1453 * same transports. 1454 */ 1455 if (i == 0) { 1456 char *up; 1457 1458 error = copyin(&addrs[2*j], &up, 1459 sizeof(char*)); 1460 if (error) 1461 goto out; 1462 error = copyinstr(up, netid, sizeof(netid), 1463 NULL); 1464 if (error) 1465 goto out; 1466 error = copyin(&addrs[2*j+1], &up, 1467 sizeof(char*)); 1468 if (error) 1469 goto out; 1470 error = copyinstr(up, uaddr, sizeof(uaddr), 1471 NULL); 1472 if (error) 1473 goto out; 1474 nconf = getnetconfigent(netid); 1475 if (!nconf) { 1476 NLM_ERR("Can't lookup netid %s\n", 1477 netid); 1478 error = EINVAL; 1479 goto out; 1480 } 1481 xprts[j] = svc_tp_create(pool, dispatchers[i], 1482 NLM_PROG, versions[i], uaddr, nconf); 1483 if (!xprts[j]) { 1484 NLM_ERR("NLM: unable to create " 1485 "(NLM_PROG, %d).\n", versions[i]); 1486 error = EINVAL; 1487 goto out; 1488 } 1489 freenetconfigent(nconf); 1490 } else { 1491 nconf = getnetconfigent(xprts[j]->xp_netid); 1492 rpcb_unset(NLM_PROG, versions[i], nconf); 1493 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1494 dispatchers[i], nconf)) { 1495 NLM_ERR("NLM: can't register " 1496 "(NLM_PROG, %d)\n", versions[i]); 1497 error = EINVAL; 1498 goto out; 1499 } 1500 } 1501 } 1502 } 1503 error = 0; 1504 out: 1505 for (j = 0; j < addr_count; j++) { 1506 if (xprts[j]) 1507 SVC_RELEASE(xprts[j]); 1508 } 1509 free(xprts, M_NLM); 1510 return (error); 1511 } 1512 1513 /* 1514 * Main server entry point. Contacts the local NSM to get its current 1515 * state and send SM_UNMON_ALL. Registers the NLM services and then 1516 * services requests. Does not return until the server is interrupted 1517 * by a signal. 1518 */ 1519 static int 1520 nlm_server_main(int addr_count, char **addrs) 1521 { 1522 struct thread *td = curthread; 1523 int error; 1524 SVCPOOL *pool = NULL; 1525 struct sockopt opt; 1526 int portlow; 1527 #ifdef INET6 1528 struct sockaddr_in6 sin6; 1529 #endif 1530 struct sockaddr_in sin; 1531 my_id id; 1532 sm_stat smstat; 1533 struct timeval timo; 1534 enum clnt_stat stat; 1535 struct nlm_host *host, *nhost; 1536 struct nlm_waiting_lock *nw; 1537 vop_advlock_t *old_nfs_advlock; 1538 vop_reclaim_t *old_nfs_reclaim; 1539 1540 if (nlm_is_running != 0) { 1541 NLM_ERR("NLM: can't start server - " 1542 "it appears to be running already\n"); 1543 return (EPERM); 1544 } 1545 1546 if (nlm_socket == NULL) { 1547 memset(&opt, 0, sizeof(opt)); 1548 1549 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1550 td->td_ucred, td); 1551 if (error) { 1552 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1553 error); 1554 return (error); 1555 } 1556 opt.sopt_dir = SOPT_SET; 1557 opt.sopt_level = IPPROTO_IP; 1558 opt.sopt_name = IP_PORTRANGE; 1559 portlow = IP_PORTRANGE_LOW; 1560 opt.sopt_val = &portlow; 1561 opt.sopt_valsize = sizeof(portlow); 1562 sosetopt(nlm_socket, &opt); 1563 1564 #ifdef INET6 1565 nlm_socket6 = NULL; 1566 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1567 td->td_ucred, td); 1568 if (error) { 1569 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1570 error); 1571 soclose(nlm_socket); 1572 nlm_socket = NULL; 1573 return (error); 1574 } 1575 opt.sopt_dir = SOPT_SET; 1576 opt.sopt_level = IPPROTO_IPV6; 1577 opt.sopt_name = IPV6_PORTRANGE; 1578 portlow = IPV6_PORTRANGE_LOW; 1579 opt.sopt_val = &portlow; 1580 opt.sopt_valsize = sizeof(portlow); 1581 sosetopt(nlm_socket6, &opt); 1582 #endif 1583 } 1584 1585 nlm_auth = authunix_create(curthread->td_ucred); 1586 1587 #ifdef INET6 1588 memset(&sin6, 0, sizeof(sin6)); 1589 sin6.sin6_len = sizeof(sin6); 1590 sin6.sin6_family = AF_INET6; 1591 sin6.sin6_addr = in6addr_loopback; 1592 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1593 if (!nlm_nsm) { 1594 #endif 1595 memset(&sin, 0, sizeof(sin)); 1596 sin.sin_len = sizeof(sin); 1597 sin.sin_family = AF_INET; 1598 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1599 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1600 SM_VERS); 1601 #ifdef INET6 1602 } 1603 #endif 1604 1605 if (!nlm_nsm) { 1606 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1607 error = EINVAL; 1608 goto out; 1609 } 1610 1611 pool = svcpool_create("NLM", NULL); 1612 1613 error = nlm_register_services(pool, addr_count, addrs); 1614 if (error) 1615 goto out; 1616 1617 memset(&id, 0, sizeof(id)); 1618 id.my_name = "NFS NLM"; 1619 1620 timo.tv_sec = 25; 1621 timo.tv_usec = 0; 1622 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1623 (xdrproc_t) xdr_my_id, &id, 1624 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1625 1626 if (stat != RPC_SUCCESS) { 1627 struct rpc_err err; 1628 1629 CLNT_GETERR(nlm_nsm, &err); 1630 NLM_ERR("NLM: unexpected error contacting NSM, " 1631 "stat=%d, errno=%d\n", stat, err.re_errno); 1632 error = EINVAL; 1633 goto out; 1634 } 1635 nlm_is_running = 1; 1636 1637 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1638 nlm_nsm_state = smstat.state; 1639 1640 old_nfs_advlock = nfs_advlock_p; 1641 nfs_advlock_p = nlm_advlock; 1642 old_nfs_reclaim = nfs_reclaim_p; 1643 nfs_reclaim_p = nlm_reclaim; 1644 1645 svc_run(pool); 1646 error = 0; 1647 1648 nfs_advlock_p = old_nfs_advlock; 1649 nfs_reclaim_p = old_nfs_reclaim; 1650 1651 out: 1652 nlm_is_running = 0; 1653 if (pool) 1654 svcpool_destroy(pool); 1655 1656 /* 1657 * We are finished communicating with the NSM. 1658 */ 1659 if (nlm_nsm) { 1660 CLNT_RELEASE(nlm_nsm); 1661 nlm_nsm = NULL; 1662 } 1663 1664 /* 1665 * Trash all the existing state so that if the server 1666 * restarts, it gets a clean slate. This is complicated by the 1667 * possibility that there may be other threads trying to make 1668 * client locking requests. 1669 * 1670 * First we fake a client reboot notification which will 1671 * cancel any pending async locks and purge remote lock state 1672 * from the local lock manager. We release the reference from 1673 * nlm_hosts to the host (which may remove it from the list 1674 * and free it). After this phase, the only entries in the 1675 * nlm_host list should be from other threads performing 1676 * client lock requests. 1677 */ 1678 mtx_lock(&nlm_global_lock); 1679 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1680 wakeup(nw); 1681 } 1682 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1683 mtx_unlock(&nlm_global_lock); 1684 nlm_host_notify(host, 0); 1685 nlm_host_release(host); 1686 mtx_lock(&nlm_global_lock); 1687 } 1688 mtx_unlock(&nlm_global_lock); 1689 1690 AUTH_DESTROY(nlm_auth); 1691 1692 return (error); 1693 } 1694 1695 int 1696 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1697 { 1698 int error; 1699 1700 #if __FreeBSD_version >= 700000 1701 error = priv_check(td, PRIV_NFS_LOCKD); 1702 #else 1703 error = suser(td); 1704 #endif 1705 if (error) 1706 return (error); 1707 1708 nlm_debug_level = uap->debug_level; 1709 nlm_grace_threshold = time_uptime + uap->grace_period; 1710 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1711 1712 return nlm_server_main(uap->addr_count, uap->addrs); 1713 } 1714 1715 /**********************************************************************/ 1716 1717 /* 1718 * NLM implementation details, called from the RPC stubs. 1719 */ 1720 1721 1722 void 1723 nlm_sm_notify(struct nlm_sm_status *argp) 1724 { 1725 uint32_t sysid; 1726 struct nlm_host *host; 1727 1728 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1729 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1730 host = nlm_find_host_by_sysid(sysid); 1731 if (host) { 1732 nlm_host_notify(host, argp->state); 1733 nlm_host_release(host); 1734 } 1735 } 1736 1737 static void 1738 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1739 { 1740 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1741 } 1742 1743 struct vfs_state { 1744 struct mount *vs_mp; 1745 struct vnode *vs_vp; 1746 int vs_vnlocked; 1747 }; 1748 1749 static int 1750 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1751 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1752 { 1753 int error, exflags; 1754 struct ucred *cred = NULL, *credanon = NULL; 1755 1756 memset(vs, 0, sizeof(*vs)); 1757 1758 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1759 if (!vs->vs_mp) { 1760 return (ESTALE); 1761 } 1762 1763 /* accmode == 0 means don't check, since it is an unlock. */ 1764 if (accmode != 0) { 1765 error = VFS_CHECKEXP(vs->vs_mp, 1766 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1767 NULL, NULL); 1768 if (error) 1769 goto out; 1770 1771 if (exflags & MNT_EXRDONLY || 1772 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1773 error = EROFS; 1774 goto out; 1775 } 1776 } 1777 1778 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1779 if (error) 1780 goto out; 1781 vs->vs_vnlocked = TRUE; 1782 1783 if (accmode != 0) { 1784 if (!svc_getcred(rqstp, &cred, NULL)) { 1785 error = EINVAL; 1786 goto out; 1787 } 1788 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1789 crfree(cred); 1790 cred = credanon; 1791 credanon = NULL; 1792 } 1793 1794 /* 1795 * Check cred. 1796 */ 1797 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1798 /* 1799 * If this failed and accmode != VWRITE, try again with 1800 * VWRITE to maintain backwards compatibility with the 1801 * old code that always used VWRITE. 1802 */ 1803 if (error != 0 && accmode != VWRITE) 1804 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1805 if (error) 1806 goto out; 1807 } 1808 1809 #if __FreeBSD_version < 800011 1810 VOP_UNLOCK(vs->vs_vp, 0, curthread); 1811 #else 1812 VOP_UNLOCK(vs->vs_vp, 0); 1813 #endif 1814 vs->vs_vnlocked = FALSE; 1815 1816 out: 1817 if (cred) 1818 crfree(cred); 1819 if (credanon) 1820 crfree(credanon); 1821 1822 return (error); 1823 } 1824 1825 static void 1826 nlm_release_vfs_state(struct vfs_state *vs) 1827 { 1828 1829 if (vs->vs_vp) { 1830 if (vs->vs_vnlocked) 1831 vput(vs->vs_vp); 1832 else 1833 vrele(vs->vs_vp); 1834 } 1835 if (vs->vs_mp) 1836 vfs_rel(vs->vs_mp); 1837 } 1838 1839 static nlm4_stats 1840 nlm_convert_error(int error) 1841 { 1842 1843 if (error == ESTALE) 1844 return nlm4_stale_fh; 1845 else if (error == EROFS) 1846 return nlm4_rofs; 1847 else 1848 return nlm4_failed; 1849 } 1850 1851 int 1852 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1853 CLIENT **rpcp) 1854 { 1855 fhandle_t fh; 1856 struct vfs_state vs; 1857 struct nlm_host *host, *bhost; 1858 int error, sysid; 1859 struct flock fl; 1860 accmode_t accmode; 1861 1862 memset(result, 0, sizeof(*result)); 1863 memset(&vs, 0, sizeof(vs)); 1864 1865 host = nlm_find_host_by_name(argp->alock.caller_name, 1866 svc_getrpccaller(rqstp), rqstp->rq_vers); 1867 if (!host) { 1868 result->stat.stat = nlm4_denied_nolocks; 1869 return (ENOMEM); 1870 } 1871 1872 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1873 host->nh_caller_name, host->nh_sysid); 1874 1875 nlm_check_expired_locks(host); 1876 sysid = host->nh_sysid; 1877 1878 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1879 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1880 1881 if (time_uptime < nlm_grace_threshold) { 1882 result->stat.stat = nlm4_denied_grace_period; 1883 goto out; 1884 } 1885 1886 accmode = argp->exclusive ? VWRITE : VREAD; 1887 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1888 if (error) { 1889 result->stat.stat = nlm_convert_error(error); 1890 goto out; 1891 } 1892 1893 fl.l_start = argp->alock.l_offset; 1894 fl.l_len = argp->alock.l_len; 1895 fl.l_pid = argp->alock.svid; 1896 fl.l_sysid = sysid; 1897 fl.l_whence = SEEK_SET; 1898 if (argp->exclusive) 1899 fl.l_type = F_WRLCK; 1900 else 1901 fl.l_type = F_RDLCK; 1902 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1903 if (error) { 1904 result->stat.stat = nlm4_failed; 1905 goto out; 1906 } 1907 1908 if (fl.l_type == F_UNLCK) { 1909 result->stat.stat = nlm4_granted; 1910 } else { 1911 result->stat.stat = nlm4_denied; 1912 result->stat.nlm4_testrply_u.holder.exclusive = 1913 (fl.l_type == F_WRLCK); 1914 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1915 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1916 if (bhost) { 1917 /* 1918 * We don't have any useful way of recording 1919 * the value of oh used in the original lock 1920 * request. Ideally, the test reply would have 1921 * a space for the owning host's name allowing 1922 * our caller's NLM to keep track. 1923 * 1924 * As far as I can see, Solaris uses an eight 1925 * byte structure for oh which contains a four 1926 * byte pid encoded in local byte order and 1927 * the first four bytes of the host 1928 * name. Linux uses a variable length string 1929 * 'pid@hostname' in ascii but doesn't even 1930 * return that in test replies. 1931 * 1932 * For the moment, return nothing in oh 1933 * (already zero'ed above). 1934 */ 1935 nlm_host_release(bhost); 1936 } 1937 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1938 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1939 } 1940 1941 out: 1942 nlm_release_vfs_state(&vs); 1943 if (rpcp) 1944 *rpcp = nlm_host_get_rpc(host, TRUE); 1945 nlm_host_release(host); 1946 return (0); 1947 } 1948 1949 int 1950 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1951 bool_t monitor, CLIENT **rpcp) 1952 { 1953 fhandle_t fh; 1954 struct vfs_state vs; 1955 struct nlm_host *host; 1956 int error, sysid; 1957 struct flock fl; 1958 accmode_t accmode; 1959 1960 memset(result, 0, sizeof(*result)); 1961 memset(&vs, 0, sizeof(vs)); 1962 1963 host = nlm_find_host_by_name(argp->alock.caller_name, 1964 svc_getrpccaller(rqstp), rqstp->rq_vers); 1965 if (!host) { 1966 result->stat.stat = nlm4_denied_nolocks; 1967 return (ENOMEM); 1968 } 1969 1970 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1971 host->nh_caller_name, host->nh_sysid); 1972 1973 if (monitor && host->nh_state && argp->state 1974 && host->nh_state != argp->state) { 1975 /* 1976 * The host rebooted without telling us. Trash its 1977 * locks. 1978 */ 1979 nlm_host_notify(host, argp->state); 1980 } 1981 1982 nlm_check_expired_locks(host); 1983 sysid = host->nh_sysid; 1984 1985 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1986 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1987 1988 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1989 result->stat.stat = nlm4_denied_grace_period; 1990 goto out; 1991 } 1992 1993 accmode = argp->exclusive ? VWRITE : VREAD; 1994 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1995 if (error) { 1996 result->stat.stat = nlm_convert_error(error); 1997 goto out; 1998 } 1999 2000 fl.l_start = argp->alock.l_offset; 2001 fl.l_len = argp->alock.l_len; 2002 fl.l_pid = argp->alock.svid; 2003 fl.l_sysid = sysid; 2004 fl.l_whence = SEEK_SET; 2005 if (argp->exclusive) 2006 fl.l_type = F_WRLCK; 2007 else 2008 fl.l_type = F_RDLCK; 2009 if (argp->block) { 2010 struct nlm_async_lock *af; 2011 CLIENT *client; 2012 struct nlm_grantcookie cookie; 2013 2014 /* 2015 * First, make sure we can contact the host's NLM. 2016 */ 2017 client = nlm_host_get_rpc(host, TRUE); 2018 if (!client) { 2019 result->stat.stat = nlm4_failed; 2020 goto out; 2021 } 2022 2023 /* 2024 * First we need to check and see if there is an 2025 * existing blocked lock that matches. This could be a 2026 * badly behaved client or an RPC re-send. If we find 2027 * one, just return nlm4_blocked. 2028 */ 2029 mtx_lock(&host->nh_lock); 2030 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2031 if (af->af_fl.l_start == fl.l_start 2032 && af->af_fl.l_len == fl.l_len 2033 && af->af_fl.l_pid == fl.l_pid 2034 && af->af_fl.l_type == fl.l_type) { 2035 break; 2036 } 2037 } 2038 if (!af) { 2039 cookie.ng_sysid = host->nh_sysid; 2040 cookie.ng_cookie = host->nh_grantcookie++; 2041 } 2042 mtx_unlock(&host->nh_lock); 2043 if (af) { 2044 CLNT_RELEASE(client); 2045 result->stat.stat = nlm4_blocked; 2046 goto out; 2047 } 2048 2049 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2050 M_WAITOK|M_ZERO); 2051 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2052 af->af_vp = vs.vs_vp; 2053 af->af_fl = fl; 2054 af->af_host = host; 2055 af->af_rpc = client; 2056 /* 2057 * We use M_RPC here so that we can xdr_free the thing 2058 * later. 2059 */ 2060 nlm_make_netobj(&af->af_granted.cookie, 2061 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2062 af->af_granted.exclusive = argp->exclusive; 2063 af->af_granted.alock.caller_name = 2064 strdup(argp->alock.caller_name, M_RPC); 2065 nlm_copy_netobj(&af->af_granted.alock.fh, 2066 &argp->alock.fh, M_RPC); 2067 nlm_copy_netobj(&af->af_granted.alock.oh, 2068 &argp->alock.oh, M_RPC); 2069 af->af_granted.alock.svid = argp->alock.svid; 2070 af->af_granted.alock.l_offset = argp->alock.l_offset; 2071 af->af_granted.alock.l_len = argp->alock.l_len; 2072 2073 /* 2074 * Put the entry on the pending list before calling 2075 * VOP_ADVLOCKASYNC. We do this in case the lock 2076 * request was blocked (returning EINPROGRESS) but 2077 * then granted before we manage to run again. The 2078 * client may receive the granted message before we 2079 * send our blocked reply but thats their problem. 2080 */ 2081 mtx_lock(&host->nh_lock); 2082 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2083 mtx_unlock(&host->nh_lock); 2084 2085 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2086 &af->af_task, &af->af_cookie); 2087 2088 /* 2089 * If the lock completed synchronously, just free the 2090 * tracking structure now. 2091 */ 2092 if (error != EINPROGRESS) { 2093 CLNT_RELEASE(af->af_rpc); 2094 mtx_lock(&host->nh_lock); 2095 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2096 mtx_unlock(&host->nh_lock); 2097 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2098 &af->af_granted); 2099 free(af, M_NLM); 2100 } else { 2101 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2102 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2103 /* 2104 * Don't vrele the vnode just yet - this must 2105 * wait until either the async callback 2106 * happens or the lock is cancelled. 2107 */ 2108 vs.vs_vp = NULL; 2109 } 2110 } else { 2111 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2112 } 2113 2114 if (error) { 2115 if (error == EINPROGRESS) { 2116 result->stat.stat = nlm4_blocked; 2117 } else if (error == EDEADLK) { 2118 result->stat.stat = nlm4_deadlck; 2119 } else if (error == EAGAIN) { 2120 result->stat.stat = nlm4_denied; 2121 } else { 2122 result->stat.stat = nlm4_failed; 2123 } 2124 } else { 2125 if (monitor) 2126 nlm_host_monitor(host, argp->state); 2127 result->stat.stat = nlm4_granted; 2128 } 2129 2130 out: 2131 nlm_release_vfs_state(&vs); 2132 if (rpcp) 2133 *rpcp = nlm_host_get_rpc(host, TRUE); 2134 nlm_host_release(host); 2135 return (0); 2136 } 2137 2138 int 2139 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2140 CLIENT **rpcp) 2141 { 2142 fhandle_t fh; 2143 struct vfs_state vs; 2144 struct nlm_host *host; 2145 int error, sysid; 2146 struct flock fl; 2147 struct nlm_async_lock *af; 2148 2149 memset(result, 0, sizeof(*result)); 2150 memset(&vs, 0, sizeof(vs)); 2151 2152 host = nlm_find_host_by_name(argp->alock.caller_name, 2153 svc_getrpccaller(rqstp), rqstp->rq_vers); 2154 if (!host) { 2155 result->stat.stat = nlm4_denied_nolocks; 2156 return (ENOMEM); 2157 } 2158 2159 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2160 host->nh_caller_name, host->nh_sysid); 2161 2162 nlm_check_expired_locks(host); 2163 sysid = host->nh_sysid; 2164 2165 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2166 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2167 2168 if (time_uptime < nlm_grace_threshold) { 2169 result->stat.stat = nlm4_denied_grace_period; 2170 goto out; 2171 } 2172 2173 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2174 if (error) { 2175 result->stat.stat = nlm_convert_error(error); 2176 goto out; 2177 } 2178 2179 fl.l_start = argp->alock.l_offset; 2180 fl.l_len = argp->alock.l_len; 2181 fl.l_pid = argp->alock.svid; 2182 fl.l_sysid = sysid; 2183 fl.l_whence = SEEK_SET; 2184 if (argp->exclusive) 2185 fl.l_type = F_WRLCK; 2186 else 2187 fl.l_type = F_RDLCK; 2188 2189 /* 2190 * First we need to try and find the async lock request - if 2191 * there isn't one, we give up and return nlm4_denied. 2192 */ 2193 mtx_lock(&host->nh_lock); 2194 2195 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2196 if (af->af_fl.l_start == fl.l_start 2197 && af->af_fl.l_len == fl.l_len 2198 && af->af_fl.l_pid == fl.l_pid 2199 && af->af_fl.l_type == fl.l_type) { 2200 break; 2201 } 2202 } 2203 2204 if (!af) { 2205 mtx_unlock(&host->nh_lock); 2206 result->stat.stat = nlm4_denied; 2207 goto out; 2208 } 2209 2210 error = nlm_cancel_async_lock(af); 2211 2212 if (error) { 2213 result->stat.stat = nlm4_denied; 2214 } else { 2215 result->stat.stat = nlm4_granted; 2216 } 2217 2218 mtx_unlock(&host->nh_lock); 2219 2220 out: 2221 nlm_release_vfs_state(&vs); 2222 if (rpcp) 2223 *rpcp = nlm_host_get_rpc(host, TRUE); 2224 nlm_host_release(host); 2225 return (0); 2226 } 2227 2228 int 2229 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2230 CLIENT **rpcp) 2231 { 2232 fhandle_t fh; 2233 struct vfs_state vs; 2234 struct nlm_host *host; 2235 int error, sysid; 2236 struct flock fl; 2237 2238 memset(result, 0, sizeof(*result)); 2239 memset(&vs, 0, sizeof(vs)); 2240 2241 host = nlm_find_host_by_name(argp->alock.caller_name, 2242 svc_getrpccaller(rqstp), rqstp->rq_vers); 2243 if (!host) { 2244 result->stat.stat = nlm4_denied_nolocks; 2245 return (ENOMEM); 2246 } 2247 2248 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2249 host->nh_caller_name, host->nh_sysid); 2250 2251 nlm_check_expired_locks(host); 2252 sysid = host->nh_sysid; 2253 2254 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2255 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2256 2257 if (time_uptime < nlm_grace_threshold) { 2258 result->stat.stat = nlm4_denied_grace_period; 2259 goto out; 2260 } 2261 2262 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2263 if (error) { 2264 result->stat.stat = nlm_convert_error(error); 2265 goto out; 2266 } 2267 2268 fl.l_start = argp->alock.l_offset; 2269 fl.l_len = argp->alock.l_len; 2270 fl.l_pid = argp->alock.svid; 2271 fl.l_sysid = sysid; 2272 fl.l_whence = SEEK_SET; 2273 fl.l_type = F_UNLCK; 2274 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2275 2276 /* 2277 * Ignore the error - there is no result code for failure, 2278 * only for grace period. 2279 */ 2280 result->stat.stat = nlm4_granted; 2281 2282 out: 2283 nlm_release_vfs_state(&vs); 2284 if (rpcp) 2285 *rpcp = nlm_host_get_rpc(host, TRUE); 2286 nlm_host_release(host); 2287 return (0); 2288 } 2289 2290 int 2291 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2292 2293 CLIENT **rpcp) 2294 { 2295 struct nlm_host *host; 2296 struct nlm_waiting_lock *nw; 2297 2298 memset(result, 0, sizeof(*result)); 2299 2300 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2301 if (!host) { 2302 result->stat.stat = nlm4_denied_nolocks; 2303 return (ENOMEM); 2304 } 2305 2306 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2307 result->stat.stat = nlm4_denied; 2308 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2309 2310 mtx_lock(&nlm_global_lock); 2311 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2312 if (!nw->nw_waiting) 2313 continue; 2314 if (argp->alock.svid == nw->nw_lock.svid 2315 && argp->alock.l_offset == nw->nw_lock.l_offset 2316 && argp->alock.l_len == nw->nw_lock.l_len 2317 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2318 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2319 nw->nw_lock.fh.n_len)) { 2320 nw->nw_waiting = FALSE; 2321 wakeup(nw); 2322 result->stat.stat = nlm4_granted; 2323 break; 2324 } 2325 } 2326 mtx_unlock(&nlm_global_lock); 2327 2328 out: 2329 if (rpcp) 2330 *rpcp = nlm_host_get_rpc(host, TRUE); 2331 nlm_host_release(host); 2332 return (0); 2333 } 2334 2335 void 2336 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2337 { 2338 struct nlm_host *host = NULL; 2339 struct nlm_async_lock *af = NULL; 2340 int error; 2341 2342 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2343 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2344 goto out; 2345 } 2346 2347 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2348 if (!host) { 2349 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2350 goto out; 2351 } 2352 2353 mtx_lock(&host->nh_lock); 2354 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2355 if (ng_cookie(&argp->cookie) == 2356 ng_cookie(&af->af_granted.cookie)) 2357 break; 2358 if (af) 2359 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2360 mtx_unlock(&host->nh_lock); 2361 2362 if (!af) { 2363 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2364 "with unrecognized cookie %d:%d", host->nh_caller_name, 2365 host->nh_sysid, ng_sysid(&argp->cookie), 2366 ng_cookie(&argp->cookie)); 2367 goto out; 2368 } 2369 2370 if (argp->stat.stat != nlm4_granted) { 2371 af->af_fl.l_type = F_UNLCK; 2372 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2373 if (error) { 2374 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2375 "and we failed to unlock (%d)", host->nh_caller_name, 2376 host->nh_sysid, error); 2377 goto out; 2378 } 2379 2380 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2381 af, host->nh_caller_name, host->nh_sysid); 2382 } else { 2383 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2384 af, host->nh_caller_name, host->nh_sysid); 2385 } 2386 2387 out: 2388 if (af) 2389 nlm_free_async_lock(af); 2390 if (host) 2391 nlm_host_release(host); 2392 } 2393 2394 void 2395 nlm_do_free_all(nlm4_notify *argp) 2396 { 2397 struct nlm_host *host, *thost; 2398 2399 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2400 if (!strcmp(host->nh_caller_name, argp->name)) 2401 nlm_host_notify(host, argp->state); 2402 } 2403 } 2404 2405 /* 2406 * Kernel module glue 2407 */ 2408 static int 2409 nfslockd_modevent(module_t mod, int type, void *data) 2410 { 2411 2412 switch (type) { 2413 case MOD_LOAD: 2414 return (0); 2415 case MOD_UNLOAD: 2416 /* The NLM module cannot be safely unloaded. */ 2417 /* FALLTHROUGH */ 2418 default: 2419 return (EOPNOTSUPP); 2420 } 2421 } 2422 static moduledata_t nfslockd_mod = { 2423 "nfslockd", 2424 nfslockd_modevent, 2425 NULL, 2426 }; 2427 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2428 2429 /* So that loader and kldload(2) can find us, wherever we are.. */ 2430 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2431 MODULE_DEPEND(nfslockd, nfslock, 1, 1, 1); 2432 MODULE_VERSION(nfslockd, 1); 2433