1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 5 * Authors: Doug Rabson <dfr@rabson.org> 6 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "opt_inet6.h" 31 32 #include <sys/param.h> 33 #include <sys/fail.h> 34 #include <sys/fcntl.h> 35 #include <sys/kernel.h> 36 #include <sys/kthread.h> 37 #include <sys/lockf.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/syscall.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysent.h> 47 #include <sys/syslog.h> 48 #include <sys/sysproto.h> 49 #include <sys/systm.h> 50 #include <sys/taskqueue.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 54 #include <nfs/nfsproto.h> 55 #include <nfs/nfs_lock.h> 56 57 #include <nlm/nlm_prot.h> 58 #include <nlm/sm_inter.h> 59 #include <nlm/nlm.h> 60 #include <rpc/rpc_com.h> 61 #include <rpc/rpcb_prot.h> 62 63 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 64 65 /* 66 * If a host is inactive (and holds no locks) for this amount of 67 * seconds, we consider it idle and stop tracking it. 68 */ 69 #define NLM_IDLE_TIMEOUT 30 70 71 /* 72 * We check the host list for idle every few seconds. 73 */ 74 #define NLM_IDLE_PERIOD 5 75 76 /* 77 * We only look for GRANTED_RES messages for a little while. 78 */ 79 #define NLM_EXPIRE_TIMEOUT 10 80 81 /* 82 * Support for sysctl vfs.nlm.sysid 83 */ 84 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 85 "Network Lock Manager"); 86 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, 87 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 88 ""); 89 90 /* 91 * Syscall hooks 92 */ 93 static struct syscall_helper_data nlm_syscalls[] = { 94 SYSCALL_INIT_HELPER(nlm_syscall), 95 SYSCALL_INIT_LAST 96 }; 97 98 /* 99 * Debug level passed in from userland. We also support a sysctl hook 100 * so that it can be changed on a live system. 101 */ 102 static int nlm_debug_level; 103 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 104 105 #define NLM_DEBUG(_level, args...) \ 106 do { \ 107 if (nlm_debug_level >= (_level)) \ 108 log(LOG_DEBUG, args); \ 109 } while(0) 110 #define NLM_ERR(args...) \ 111 do { \ 112 log(LOG_ERR, args); \ 113 } while(0) 114 115 /* 116 * Grace period handling. The value of nlm_grace_threshold is the 117 * value of time_uptime after which we are serving requests normally. 118 */ 119 static time_t nlm_grace_threshold; 120 121 /* 122 * We check for idle hosts if time_uptime is greater than 123 * nlm_next_idle_check, 124 */ 125 static time_t nlm_next_idle_check; 126 127 /* 128 * A flag to indicate the server is already running. 129 */ 130 static int nlm_is_running; 131 132 /* 133 * A socket to use for RPC - shared by all IPv4 RPC clients. 134 */ 135 static struct socket *nlm_socket; 136 137 #ifdef INET6 138 139 /* 140 * A socket to use for RPC - shared by all IPv6 RPC clients. 141 */ 142 static struct socket *nlm_socket6; 143 144 #endif 145 146 /* 147 * An RPC client handle that can be used to communicate with the local 148 * NSM. 149 */ 150 static CLIENT *nlm_nsm; 151 152 /* 153 * An AUTH handle for the server's creds. 154 */ 155 static AUTH *nlm_auth; 156 157 /* 158 * A zero timeval for sending async RPC messages. 159 */ 160 struct timeval nlm_zero_tv = { 0, 0 }; 161 162 /* 163 * The local NSM state number 164 */ 165 int nlm_nsm_state; 166 167 /* 168 * A lock to protect the host list and waiting lock list. 169 */ 170 static struct mtx nlm_global_lock; 171 172 /* 173 * Locks: 174 * (l) locked by nh_lock 175 * (s) only accessed via server RPC which is single threaded 176 * (g) locked by nlm_global_lock 177 * (c) const until freeing 178 * (a) modified using atomic ops 179 */ 180 181 /* 182 * A pending client-side lock request, stored on the nlm_waiting_locks 183 * list. 184 */ 185 struct nlm_waiting_lock { 186 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 187 bool_t nw_waiting; /* (g) */ 188 nlm4_lock nw_lock; /* (c) */ 189 union nfsfh nw_fh; /* (c) */ 190 struct vnode *nw_vp; /* (c) */ 191 }; 192 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 193 194 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 195 196 /* 197 * A pending server-side asynchronous lock request, stored on the 198 * nh_pending list of the NLM host. 199 */ 200 struct nlm_async_lock { 201 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 202 struct task af_task; /* (c) async callback details */ 203 void *af_cookie; /* (l) lock manager cancel token */ 204 struct vnode *af_vp; /* (l) vnode to lock */ 205 struct flock af_fl; /* (c) lock details */ 206 struct nlm_host *af_host; /* (c) host which is locking */ 207 CLIENT *af_rpc; /* (c) rpc client to send message */ 208 nlm4_testargs af_granted; /* (c) notification details */ 209 time_t af_expiretime; /* (c) notification time */ 210 }; 211 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 212 213 /* 214 * NLM host. 215 */ 216 enum nlm_host_state { 217 NLM_UNMONITORED, 218 NLM_MONITORED, 219 NLM_MONITOR_FAILED, 220 NLM_RECOVERING 221 }; 222 223 struct nlm_rpc { 224 CLIENT *nr_client; /* (l) RPC client handle */ 225 time_t nr_create_time; /* (l) when client was created */ 226 }; 227 228 struct nlm_host { 229 struct mtx nh_lock; 230 volatile u_int nh_refs; /* (a) reference count */ 231 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 232 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 233 uint32_t nh_sysid; /* (c) our allocaed system ID */ 234 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 235 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 236 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 237 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 238 rpcvers_t nh_vers; /* (s) NLM version of host */ 239 int nh_state; /* (s) last seen NSM state of host */ 240 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 241 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 242 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 243 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 244 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 245 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 246 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 247 }; 248 TAILQ_HEAD(nlm_host_list, nlm_host); 249 250 static struct nlm_host_list nlm_hosts; /* (g) */ 251 static uint32_t nlm_next_sysid = 1; /* (g) */ 252 253 static void nlm_host_unmonitor(struct nlm_host *); 254 255 struct nlm_grantcookie { 256 uint32_t ng_sysid; 257 uint32_t ng_cookie; 258 }; 259 260 static inline uint32_t 261 ng_sysid(struct netobj *src) 262 { 263 264 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 265 } 266 267 static inline uint32_t 268 ng_cookie(struct netobj *src) 269 { 270 271 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 272 } 273 274 /**********************************************************************/ 275 276 /* 277 * Initialise NLM globals. 278 */ 279 static int 280 nlm_init(void) 281 { 282 int error; 283 284 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 285 TAILQ_INIT(&nlm_waiting_locks); 286 TAILQ_INIT(&nlm_hosts); 287 288 error = syscall_helper_register(nlm_syscalls, SY_THR_STATIC_KLD); 289 if (error != 0) 290 NLM_ERR("Can't register NLM syscall\n"); 291 return (error); 292 } 293 294 static void 295 nlm_uninit(void) 296 { 297 298 syscall_helper_unregister(nlm_syscalls); 299 } 300 301 /* 302 * Create a netobj from an arbitrary source. 303 */ 304 void 305 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 306 struct malloc_type *type) 307 { 308 309 dst->n_len = srcsize; 310 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 311 memcpy(dst->n_bytes, src, srcsize); 312 } 313 314 /* 315 * Copy a struct netobj. 316 */ 317 void 318 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 319 struct malloc_type *type) 320 { 321 322 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 323 } 324 325 /* 326 * Create an RPC client handle for the given (address,prog,vers) 327 * triple using UDP. 328 */ 329 static CLIENT * 330 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 331 { 332 char *wchan = "nlmrcv"; 333 struct sockaddr_storage ss; 334 struct socket *so; 335 CLIENT *rpcb; 336 struct timeval timo; 337 RPCB parms; 338 char *uaddr; 339 enum clnt_stat stat = RPC_SUCCESS; 340 int rpcvers = RPCBVERS4; 341 bool_t do_tcp = FALSE; 342 bool_t tryagain = FALSE; 343 struct portmap mapping; 344 u_short port = 0; 345 struct sockaddr_in *sin4; 346 char namebuf[INET_ADDRSTRLEN]; 347 #ifdef INET6 348 struct sockaddr_in6 *sin6; 349 char namebuf6[INET6_ADDRSTRLEN]; 350 #endif 351 352 /* 353 * First we need to contact the remote RPCBIND service to find 354 * the right port. 355 */ 356 memcpy(&ss, sa, sa->sa_len); 357 switch (ss.ss_family) { 358 case AF_INET: 359 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 360 so = nlm_socket; 361 break; 362 #ifdef INET6 363 case AF_INET6: 364 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 365 so = nlm_socket6; 366 break; 367 #endif 368 369 default: 370 /* 371 * Unsupported address family - fail. 372 */ 373 return (NULL); 374 } 375 376 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 377 RPCBPROG, rpcvers, 0, 0); 378 if (!rpcb) 379 return (NULL); 380 381 try_tcp: 382 parms.r_prog = prog; 383 parms.r_vers = vers; 384 if (do_tcp) 385 parms.r_netid = "tcp"; 386 else 387 parms.r_netid = "udp"; 388 parms.r_addr = ""; 389 parms.r_owner = ""; 390 391 /* 392 * Use the default timeout. 393 */ 394 timo.tv_sec = 25; 395 timo.tv_usec = 0; 396 again: 397 switch (rpcvers) { 398 case RPCBVERS4: 399 case RPCBVERS: 400 /* 401 * Try RPCBIND 4 then 3. 402 */ 403 uaddr = NULL; 404 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 405 (xdrproc_t) xdr_rpcb, &parms, 406 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 407 if (stat == RPC_SUCCESS) { 408 /* 409 * We have a reply from the remote RPCBIND - turn it 410 * into an appropriate address and make a new client 411 * that can talk to the remote NLM. 412 * 413 * XXX fixup IPv6 scope ID. 414 */ 415 struct netbuf *a; 416 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 417 if (!a) { 418 tryagain = TRUE; 419 } else { 420 tryagain = FALSE; 421 memcpy(&ss, a->buf, a->len); 422 free(a->buf, M_RPC); 423 free(a, M_RPC); 424 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 425 } 426 } 427 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 428 if (rpcvers == RPCBVERS4) 429 rpcvers = RPCBVERS; 430 else if (rpcvers == RPCBVERS) 431 rpcvers = PMAPVERS; 432 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 433 goto again; 434 } 435 break; 436 case PMAPVERS: 437 /* 438 * Try portmap. 439 */ 440 mapping.pm_prog = parms.r_prog; 441 mapping.pm_vers = parms.r_vers; 442 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 443 mapping.pm_port = 0; 444 445 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 446 (xdrproc_t) xdr_portmap, &mapping, 447 (xdrproc_t) xdr_u_short, &port, timo); 448 449 if (stat == RPC_SUCCESS) { 450 switch (ss.ss_family) { 451 case AF_INET: 452 ((struct sockaddr_in *)&ss)->sin_port = 453 htons(port); 454 break; 455 456 #ifdef INET6 457 case AF_INET6: 458 ((struct sockaddr_in6 *)&ss)->sin6_port = 459 htons(port); 460 break; 461 #endif 462 } 463 } 464 break; 465 default: 466 panic("invalid rpcvers %d", rpcvers); 467 } 468 /* 469 * We may have a positive response from the portmapper, but the NLM 470 * service was not found. Make sure we received a valid port. 471 */ 472 switch (ss.ss_family) { 473 case AF_INET: 474 port = ((struct sockaddr_in *)&ss)->sin_port; 475 break; 476 #ifdef INET6 477 case AF_INET6: 478 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 479 break; 480 #endif 481 } 482 if (stat != RPC_SUCCESS || !port) { 483 /* 484 * If we were able to talk to rpcbind or portmap, but the udp 485 * variant wasn't available, ask about tcp. 486 * 487 * XXX - We could also check for a TCP portmapper, but 488 * if the host is running a portmapper at all, we should be able 489 * to hail it over UDP. 490 */ 491 if (stat == RPC_SUCCESS && !do_tcp) { 492 do_tcp = TRUE; 493 goto try_tcp; 494 } 495 496 /* Otherwise, bad news. */ 497 switch (ss.ss_family) { 498 case AF_INET: 499 sin4 = (struct sockaddr_in *)&ss; 500 inet_ntop(ss.ss_family, &sin4->sin_addr, 501 namebuf, sizeof namebuf); 502 NLM_ERR("NLM: failed to contact remote rpcbind, " 503 "stat = %d, host = %s, port = %d\n", 504 (int) stat, namebuf, htons(port)); 505 break; 506 #ifdef INET6 507 case AF_INET6: 508 sin6 = (struct sockaddr_in6 *)&ss; 509 inet_ntop(ss.ss_family, &sin6->sin6_addr, 510 namebuf6, sizeof namebuf6); 511 NLM_ERR("NLM: failed to contact remote rpcbind, " 512 "stat = %d, host = %s, port = %d\n", 513 (int) stat, namebuf6, htons(port)); 514 break; 515 #endif 516 } 517 CLNT_DESTROY(rpcb); 518 return (NULL); 519 } 520 521 if (do_tcp) { 522 /* 523 * Destroy the UDP client we used to speak to rpcbind and 524 * recreate as a TCP client. 525 */ 526 struct netconfig *nconf = NULL; 527 528 CLNT_DESTROY(rpcb); 529 530 switch (ss.ss_family) { 531 case AF_INET: 532 nconf = getnetconfigent("tcp"); 533 break; 534 #ifdef INET6 535 case AF_INET6: 536 nconf = getnetconfigent("tcp6"); 537 break; 538 #endif 539 } 540 541 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 542 prog, vers, 0, 0); 543 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 544 rpcb->cl_auth = nlm_auth; 545 546 } else { 547 /* 548 * Re-use the client we used to speak to rpcbind. 549 */ 550 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 551 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 552 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 553 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 554 rpcb->cl_auth = nlm_auth; 555 } 556 557 return (rpcb); 558 } 559 560 /* 561 * This async callback after when an async lock request has been 562 * granted. We notify the host which initiated the request. 563 */ 564 static void 565 nlm_lock_callback(void *arg, int pending) 566 { 567 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 568 struct rpc_callextra ext; 569 570 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 571 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 572 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 573 ng_cookie(&af->af_granted.cookie)); 574 575 /* 576 * Send the results back to the host. 577 * 578 * Note: there is a possible race here with nlm_host_notify 579 * destroying the RPC client. To avoid problems, the first 580 * thing nlm_host_notify does is to cancel pending async lock 581 * requests. 582 */ 583 memset(&ext, 0, sizeof(ext)); 584 ext.rc_auth = nlm_auth; 585 if (af->af_host->nh_vers == NLM_VERS4) { 586 nlm4_granted_msg_4(&af->af_granted, 587 NULL, af->af_rpc, &ext, nlm_zero_tv); 588 } else { 589 /* 590 * Back-convert to legacy protocol 591 */ 592 nlm_testargs granted; 593 granted.cookie = af->af_granted.cookie; 594 granted.exclusive = af->af_granted.exclusive; 595 granted.alock.caller_name = 596 af->af_granted.alock.caller_name; 597 granted.alock.fh = af->af_granted.alock.fh; 598 granted.alock.oh = af->af_granted.alock.oh; 599 granted.alock.svid = af->af_granted.alock.svid; 600 granted.alock.l_offset = 601 af->af_granted.alock.l_offset; 602 granted.alock.l_len = 603 af->af_granted.alock.l_len; 604 605 nlm_granted_msg_1(&granted, 606 NULL, af->af_rpc, &ext, nlm_zero_tv); 607 } 608 609 /* 610 * Move this entry to the nh_granted list. 611 */ 612 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 613 mtx_lock(&af->af_host->nh_lock); 614 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 615 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 616 mtx_unlock(&af->af_host->nh_lock); 617 } 618 619 /* 620 * Free an async lock request. The request must have been removed from 621 * any list. 622 */ 623 static void 624 nlm_free_async_lock(struct nlm_async_lock *af) 625 { 626 /* 627 * Free an async lock. 628 */ 629 if (af->af_rpc) 630 CLNT_RELEASE(af->af_rpc); 631 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 632 if (af->af_vp) 633 vrele(af->af_vp); 634 free(af, M_NLM); 635 } 636 637 /* 638 * Cancel our async request - this must be called with 639 * af->nh_host->nh_lock held. This is slightly complicated by a 640 * potential race with our own callback. If we fail to cancel the 641 * lock, it must already have been granted - we make sure our async 642 * task has completed by calling taskqueue_drain in this case. 643 */ 644 static int 645 nlm_cancel_async_lock(struct nlm_async_lock *af) 646 { 647 struct nlm_host *host = af->af_host; 648 int error; 649 650 mtx_assert(&host->nh_lock, MA_OWNED); 651 652 mtx_unlock(&host->nh_lock); 653 654 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 655 F_REMOTE, NULL, &af->af_cookie); 656 657 if (error) { 658 /* 659 * We failed to cancel - make sure our callback has 660 * completed before we continue. 661 */ 662 taskqueue_drain(taskqueue_thread, &af->af_task); 663 } 664 665 mtx_lock(&host->nh_lock); 666 667 if (!error) { 668 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 669 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 670 671 /* 672 * Remove from the nh_pending list and free now that 673 * we are safe from the callback. 674 */ 675 TAILQ_REMOVE(&host->nh_pending, af, af_link); 676 mtx_unlock(&host->nh_lock); 677 nlm_free_async_lock(af); 678 mtx_lock(&host->nh_lock); 679 } 680 681 return (error); 682 } 683 684 static void 685 nlm_check_expired_locks(struct nlm_host *host) 686 { 687 struct nlm_async_lock *af; 688 time_t uptime = time_uptime; 689 690 mtx_lock(&host->nh_lock); 691 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 692 && uptime >= af->af_expiretime) { 693 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 694 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 695 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 696 ng_cookie(&af->af_granted.cookie)); 697 TAILQ_REMOVE(&host->nh_granted, af, af_link); 698 mtx_unlock(&host->nh_lock); 699 nlm_free_async_lock(af); 700 mtx_lock(&host->nh_lock); 701 } 702 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 703 TAILQ_REMOVE(&host->nh_finished, af, af_link); 704 mtx_unlock(&host->nh_lock); 705 nlm_free_async_lock(af); 706 mtx_lock(&host->nh_lock); 707 } 708 mtx_unlock(&host->nh_lock); 709 } 710 711 /* 712 * Free resources used by a host. This is called after the reference 713 * count has reached zero so it doesn't need to worry about locks. 714 */ 715 static void 716 nlm_host_destroy(struct nlm_host *host) 717 { 718 719 mtx_lock(&nlm_global_lock); 720 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 721 mtx_unlock(&nlm_global_lock); 722 723 if (host->nh_srvrpc.nr_client) 724 CLNT_RELEASE(host->nh_srvrpc.nr_client); 725 if (host->nh_clntrpc.nr_client) 726 CLNT_RELEASE(host->nh_clntrpc.nr_client); 727 mtx_destroy(&host->nh_lock); 728 sysctl_ctx_free(&host->nh_sysctl); 729 free(host, M_NLM); 730 } 731 732 /* 733 * Thread start callback for client lock recovery 734 */ 735 static void 736 nlm_client_recovery_start(void *arg) 737 { 738 struct nlm_host *host = (struct nlm_host *) arg; 739 740 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 741 host->nh_caller_name); 742 743 nlm_client_recovery(host); 744 745 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 746 host->nh_caller_name); 747 748 host->nh_monstate = NLM_MONITORED; 749 nlm_host_release(host); 750 751 kthread_exit(); 752 } 753 754 /* 755 * This is called when we receive a host state change notification. We 756 * unlock any active locks owned by the host. When rpc.lockd is 757 * shutting down, this function is called with newstate set to zero 758 * which allows us to cancel any pending async locks and clear the 759 * locking state. 760 */ 761 static void 762 nlm_host_notify(struct nlm_host *host, int newstate) 763 { 764 struct nlm_async_lock *af; 765 766 if (newstate) { 767 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 768 "state is %d\n", host->nh_caller_name, 769 host->nh_sysid, newstate); 770 } 771 772 /* 773 * Cancel any pending async locks for this host. 774 */ 775 mtx_lock(&host->nh_lock); 776 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 777 /* 778 * nlm_cancel_async_lock will remove the entry from 779 * nh_pending and free it. 780 */ 781 nlm_cancel_async_lock(af); 782 } 783 mtx_unlock(&host->nh_lock); 784 nlm_check_expired_locks(host); 785 786 /* 787 * The host just rebooted - trash its locks. 788 */ 789 lf_clearremotesys(host->nh_sysid); 790 host->nh_state = newstate; 791 792 /* 793 * If we have any remote locks for this host (i.e. it 794 * represents a remote NFS server that our local NFS client 795 * has locks for), start a recovery thread. 796 */ 797 if (newstate != 0 798 && host->nh_monstate != NLM_RECOVERING 799 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 800 struct thread *td; 801 host->nh_monstate = NLM_RECOVERING; 802 refcount_acquire(&host->nh_refs); 803 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 804 "NFS lock recovery for %s", host->nh_caller_name); 805 } 806 } 807 808 /* 809 * Sysctl handler to count the number of locks for a sysid. 810 */ 811 static int 812 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 813 { 814 struct nlm_host *host; 815 int count; 816 817 host = oidp->oid_arg1; 818 count = lf_countlocks(host->nh_sysid); 819 return sysctl_handle_int(oidp, &count, 0, req); 820 } 821 822 /* 823 * Sysctl handler to count the number of client locks for a sysid. 824 */ 825 static int 826 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 827 { 828 struct nlm_host *host; 829 int count; 830 831 host = oidp->oid_arg1; 832 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 833 return sysctl_handle_int(oidp, &count, 0, req); 834 } 835 836 /* 837 * Create a new NLM host. 838 */ 839 static struct nlm_host * 840 nlm_create_host(const char* caller_name) 841 { 842 struct nlm_host *host; 843 struct sysctl_oid *oid; 844 845 mtx_assert(&nlm_global_lock, MA_OWNED); 846 847 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 848 caller_name, nlm_next_sysid); 849 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 850 if (!host) 851 return (NULL); 852 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 853 refcount_init(&host->nh_refs, 1); 854 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 855 host->nh_sysid = nlm_next_sysid++; 856 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 857 "%d", host->nh_sysid); 858 host->nh_vers = 0; 859 host->nh_state = 0; 860 host->nh_monstate = NLM_UNMONITORED; 861 host->nh_grantcookie = 1; 862 TAILQ_INIT(&host->nh_pending); 863 TAILQ_INIT(&host->nh_granted); 864 TAILQ_INIT(&host->nh_finished); 865 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 866 867 mtx_unlock(&nlm_global_lock); 868 869 sysctl_ctx_init(&host->nh_sysctl); 870 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 871 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 872 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD | CTLFLAG_MPSAFE, 873 NULL, ""); 874 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 875 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 876 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 877 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 878 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 879 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 880 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 881 "lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, host, 882 0, nlm_host_lock_count_sysctl, "I", ""); 883 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 884 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 885 host, 0, nlm_host_client_lock_count_sysctl, "I", ""); 886 887 mtx_lock(&nlm_global_lock); 888 889 return (host); 890 } 891 892 /* 893 * Acquire the next sysid for remote locks not handled by the NLM. 894 */ 895 uint32_t 896 nlm_acquire_next_sysid(void) 897 { 898 uint32_t next_sysid; 899 900 mtx_lock(&nlm_global_lock); 901 next_sysid = nlm_next_sysid++; 902 mtx_unlock(&nlm_global_lock); 903 return (next_sysid); 904 } 905 906 /* 907 * Return non-zero if the address parts of the two sockaddrs are the 908 * same. 909 */ 910 static int 911 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 912 { 913 const struct sockaddr_in *a4, *b4; 914 #ifdef INET6 915 const struct sockaddr_in6 *a6, *b6; 916 #endif 917 918 if (a->sa_family != b->sa_family) 919 return (FALSE); 920 921 switch (a->sa_family) { 922 case AF_INET: 923 a4 = (const struct sockaddr_in *) a; 924 b4 = (const struct sockaddr_in *) b; 925 return !memcmp(&a4->sin_addr, &b4->sin_addr, 926 sizeof(a4->sin_addr)); 927 #ifdef INET6 928 case AF_INET6: 929 a6 = (const struct sockaddr_in6 *) a; 930 b6 = (const struct sockaddr_in6 *) b; 931 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 932 sizeof(a6->sin6_addr)); 933 #endif 934 } 935 936 return (0); 937 } 938 939 /* 940 * Check for idle hosts and stop monitoring them. We could also free 941 * the host structure here, possibly after a larger timeout but that 942 * would require some care to avoid races with 943 * e.g. nlm_host_lock_count_sysctl. 944 */ 945 static void 946 nlm_check_idle(void) 947 { 948 struct nlm_host *host; 949 950 mtx_assert(&nlm_global_lock, MA_OWNED); 951 952 if (time_uptime <= nlm_next_idle_check) 953 return; 954 955 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 956 957 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 958 if (host->nh_monstate == NLM_MONITORED 959 && time_uptime > host->nh_idle_timeout) { 960 mtx_unlock(&nlm_global_lock); 961 if (lf_countlocks(host->nh_sysid) > 0 962 || lf_countlocks(NLM_SYSID_CLIENT 963 + host->nh_sysid)) { 964 host->nh_idle_timeout = 965 time_uptime + NLM_IDLE_TIMEOUT; 966 mtx_lock(&nlm_global_lock); 967 continue; 968 } 969 nlm_host_unmonitor(host); 970 mtx_lock(&nlm_global_lock); 971 } 972 } 973 } 974 975 /* 976 * Search for an existing NLM host that matches the given name 977 * (typically the caller_name element of an nlm4_lock). If none is 978 * found, create a new host. If 'addr' is non-NULL, record the remote 979 * address of the host so that we can call it back for async 980 * responses. If 'vers' is greater than zero then record the NLM 981 * program version to use to communicate with this client. 982 */ 983 struct nlm_host * 984 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 985 rpcvers_t vers) 986 { 987 struct nlm_host *host; 988 989 mtx_lock(&nlm_global_lock); 990 991 /* 992 * The remote host is determined by caller_name. 993 */ 994 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 995 if (!strcmp(host->nh_caller_name, name)) 996 break; 997 } 998 999 if (!host) { 1000 host = nlm_create_host(name); 1001 if (!host) { 1002 mtx_unlock(&nlm_global_lock); 1003 return (NULL); 1004 } 1005 } 1006 refcount_acquire(&host->nh_refs); 1007 1008 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1009 1010 /* 1011 * If we have an address for the host, record it so that we 1012 * can send async replies etc. 1013 */ 1014 if (addr) { 1015 1016 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 1017 ("Strange remote transport address length")); 1018 1019 /* 1020 * If we have seen an address before and we currently 1021 * have an RPC client handle, make sure the address is 1022 * the same, otherwise discard the client handle. 1023 */ 1024 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1025 if (!nlm_compare_addr( 1026 (struct sockaddr *) &host->nh_addr, 1027 addr) 1028 || host->nh_vers != vers) { 1029 CLIENT *client; 1030 mtx_lock(&host->nh_lock); 1031 client = host->nh_srvrpc.nr_client; 1032 host->nh_srvrpc.nr_client = NULL; 1033 mtx_unlock(&host->nh_lock); 1034 if (client) { 1035 CLNT_RELEASE(client); 1036 } 1037 } 1038 } 1039 memcpy(&host->nh_addr, addr, addr->sa_len); 1040 host->nh_vers = vers; 1041 } 1042 1043 nlm_check_idle(); 1044 1045 mtx_unlock(&nlm_global_lock); 1046 1047 return (host); 1048 } 1049 1050 /* 1051 * Search for an existing NLM host that matches the given remote 1052 * address. If none is found, create a new host with the requested 1053 * address and remember 'vers' as the NLM protocol version to use for 1054 * that host. 1055 */ 1056 struct nlm_host * 1057 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1058 { 1059 /* 1060 * Fake up a name using inet_ntop. This buffer is 1061 * large enough for an IPv6 address. 1062 */ 1063 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1064 struct nlm_host *host; 1065 1066 switch (addr->sa_family) { 1067 case AF_INET: 1068 inet_ntop(AF_INET, 1069 &((const struct sockaddr_in *) addr)->sin_addr, 1070 tmp, sizeof tmp); 1071 break; 1072 #ifdef INET6 1073 case AF_INET6: 1074 inet_ntop(AF_INET6, 1075 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1076 tmp, sizeof tmp); 1077 break; 1078 #endif 1079 default: 1080 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1081 } 1082 1083 mtx_lock(&nlm_global_lock); 1084 1085 /* 1086 * The remote host is determined by caller_name. 1087 */ 1088 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1089 if (nlm_compare_addr(addr, 1090 (const struct sockaddr *) &host->nh_addr)) 1091 break; 1092 } 1093 1094 if (!host) { 1095 host = nlm_create_host(tmp); 1096 if (!host) { 1097 mtx_unlock(&nlm_global_lock); 1098 return (NULL); 1099 } 1100 memcpy(&host->nh_addr, addr, addr->sa_len); 1101 host->nh_vers = vers; 1102 } 1103 refcount_acquire(&host->nh_refs); 1104 1105 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1106 1107 nlm_check_idle(); 1108 1109 mtx_unlock(&nlm_global_lock); 1110 1111 return (host); 1112 } 1113 1114 /* 1115 * Find the NLM host that matches the value of 'sysid'. If none 1116 * exists, return NULL. 1117 */ 1118 static struct nlm_host * 1119 nlm_find_host_by_sysid(int sysid) 1120 { 1121 struct nlm_host *host; 1122 1123 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1124 if (host->nh_sysid == sysid) { 1125 refcount_acquire(&host->nh_refs); 1126 return (host); 1127 } 1128 } 1129 1130 return (NULL); 1131 } 1132 1133 void nlm_host_release(struct nlm_host *host) 1134 { 1135 if (refcount_release(&host->nh_refs)) { 1136 /* 1137 * Free the host 1138 */ 1139 nlm_host_destroy(host); 1140 } 1141 } 1142 1143 /* 1144 * Unregister this NLM host with the local NSM due to idleness. 1145 */ 1146 static void 1147 nlm_host_unmonitor(struct nlm_host *host) 1148 { 1149 mon_id smmonid; 1150 sm_stat_res smstat; 1151 struct timeval timo; 1152 enum clnt_stat stat; 1153 1154 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1155 host->nh_caller_name, host->nh_sysid); 1156 1157 /* 1158 * We put our assigned system ID value in the priv field to 1159 * make it simpler to find the host if we are notified of a 1160 * host restart. 1161 */ 1162 smmonid.mon_name = host->nh_caller_name; 1163 smmonid.my_id.my_name = "localhost"; 1164 smmonid.my_id.my_prog = NLM_PROG; 1165 smmonid.my_id.my_vers = NLM_SM; 1166 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1167 1168 timo.tv_sec = 25; 1169 timo.tv_usec = 0; 1170 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1171 (xdrproc_t) xdr_mon, &smmonid, 1172 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1173 1174 if (stat != RPC_SUCCESS) { 1175 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1176 return; 1177 } 1178 if (smstat.res_stat == stat_fail) { 1179 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1180 host->nh_caller_name); 1181 return; 1182 } 1183 1184 host->nh_monstate = NLM_UNMONITORED; 1185 } 1186 1187 /* 1188 * Register this NLM host with the local NSM so that we can be 1189 * notified if it reboots. 1190 */ 1191 void 1192 nlm_host_monitor(struct nlm_host *host, int state) 1193 { 1194 mon smmon; 1195 sm_stat_res smstat; 1196 struct timeval timo; 1197 enum clnt_stat stat; 1198 1199 if (state && !host->nh_state) { 1200 /* 1201 * This is the first time we have seen an NSM state 1202 * value for this host. We record it here to help 1203 * detect host reboots. 1204 */ 1205 host->nh_state = state; 1206 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1207 host->nh_caller_name, host->nh_sysid, state); 1208 } 1209 1210 mtx_lock(&host->nh_lock); 1211 if (host->nh_monstate != NLM_UNMONITORED) { 1212 mtx_unlock(&host->nh_lock); 1213 return; 1214 } 1215 host->nh_monstate = NLM_MONITORED; 1216 mtx_unlock(&host->nh_lock); 1217 1218 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1219 host->nh_caller_name, host->nh_sysid); 1220 1221 /* 1222 * We put our assigned system ID value in the priv field to 1223 * make it simpler to find the host if we are notified of a 1224 * host restart. 1225 */ 1226 smmon.mon_id.mon_name = host->nh_caller_name; 1227 smmon.mon_id.my_id.my_name = "localhost"; 1228 smmon.mon_id.my_id.my_prog = NLM_PROG; 1229 smmon.mon_id.my_id.my_vers = NLM_SM; 1230 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1231 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1232 1233 timo.tv_sec = 25; 1234 timo.tv_usec = 0; 1235 stat = CLNT_CALL(nlm_nsm, SM_MON, 1236 (xdrproc_t) xdr_mon, &smmon, 1237 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1238 1239 if (stat != RPC_SUCCESS) { 1240 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1241 return; 1242 } 1243 if (smstat.res_stat == stat_fail) { 1244 NLM_ERR("Local NSM refuses to monitor %s\n", 1245 host->nh_caller_name); 1246 mtx_lock(&host->nh_lock); 1247 host->nh_monstate = NLM_MONITOR_FAILED; 1248 mtx_unlock(&host->nh_lock); 1249 return; 1250 } 1251 1252 host->nh_monstate = NLM_MONITORED; 1253 } 1254 1255 /* 1256 * Return an RPC client handle that can be used to talk to the NLM 1257 * running on the given host. 1258 */ 1259 CLIENT * 1260 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1261 { 1262 struct nlm_rpc *rpc; 1263 CLIENT *client; 1264 1265 mtx_lock(&host->nh_lock); 1266 1267 if (isserver) 1268 rpc = &host->nh_srvrpc; 1269 else 1270 rpc = &host->nh_clntrpc; 1271 1272 /* 1273 * We can't hold onto RPC handles for too long - the async 1274 * call/reply protocol used by some NLM clients makes it hard 1275 * to tell when they change port numbers (e.g. after a 1276 * reboot). Note that if a client reboots while it isn't 1277 * holding any locks, it won't bother to notify us. We 1278 * expire the RPC handles after two minutes. 1279 */ 1280 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1281 client = rpc->nr_client; 1282 rpc->nr_client = NULL; 1283 mtx_unlock(&host->nh_lock); 1284 CLNT_RELEASE(client); 1285 mtx_lock(&host->nh_lock); 1286 } 1287 1288 if (!rpc->nr_client) { 1289 mtx_unlock(&host->nh_lock); 1290 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1291 NLM_PROG, host->nh_vers); 1292 mtx_lock(&host->nh_lock); 1293 1294 if (client) { 1295 if (rpc->nr_client) { 1296 mtx_unlock(&host->nh_lock); 1297 CLNT_DESTROY(client); 1298 mtx_lock(&host->nh_lock); 1299 } else { 1300 rpc->nr_client = client; 1301 rpc->nr_create_time = time_uptime; 1302 } 1303 } 1304 } 1305 1306 client = rpc->nr_client; 1307 if (client) 1308 CLNT_ACQUIRE(client); 1309 mtx_unlock(&host->nh_lock); 1310 1311 return (client); 1312 1313 } 1314 1315 int nlm_host_get_sysid(struct nlm_host *host) 1316 { 1317 1318 return (host->nh_sysid); 1319 } 1320 1321 int 1322 nlm_host_get_state(struct nlm_host *host) 1323 { 1324 1325 return (host->nh_state); 1326 } 1327 1328 void * 1329 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1330 { 1331 struct nlm_waiting_lock *nw; 1332 1333 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1334 nw->nw_lock = *lock; 1335 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1336 nw->nw_lock.fh.n_len); 1337 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1338 nw->nw_waiting = TRUE; 1339 nw->nw_vp = vp; 1340 mtx_lock(&nlm_global_lock); 1341 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1342 mtx_unlock(&nlm_global_lock); 1343 1344 return nw; 1345 } 1346 1347 void 1348 nlm_deregister_wait_lock(void *handle) 1349 { 1350 struct nlm_waiting_lock *nw = handle; 1351 1352 mtx_lock(&nlm_global_lock); 1353 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1354 mtx_unlock(&nlm_global_lock); 1355 1356 free(nw, M_NLM); 1357 } 1358 1359 int 1360 nlm_wait_lock(void *handle, int timo) 1361 { 1362 struct nlm_waiting_lock *nw = handle; 1363 int error, stops_deferred; 1364 1365 /* 1366 * If the granted message arrived before we got here, 1367 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1368 */ 1369 mtx_lock(&nlm_global_lock); 1370 error = 0; 1371 if (nw->nw_waiting) { 1372 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1373 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1374 sigallowstop(stops_deferred); 1375 } 1376 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1377 if (error) { 1378 /* 1379 * The granted message may arrive after the 1380 * interrupt/timeout but before we manage to lock the 1381 * mutex. Detect this by examining nw_lock. 1382 */ 1383 if (!nw->nw_waiting) 1384 error = 0; 1385 } else { 1386 /* 1387 * If nlm_cancel_wait is called, then error will be 1388 * zero but nw_waiting will still be TRUE. We 1389 * translate this into EINTR. 1390 */ 1391 if (nw->nw_waiting) 1392 error = EINTR; 1393 } 1394 mtx_unlock(&nlm_global_lock); 1395 1396 free(nw, M_NLM); 1397 1398 return (error); 1399 } 1400 1401 void 1402 nlm_cancel_wait(struct vnode *vp) 1403 { 1404 struct nlm_waiting_lock *nw; 1405 1406 mtx_lock(&nlm_global_lock); 1407 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1408 if (nw->nw_vp == vp) { 1409 wakeup(nw); 1410 } 1411 } 1412 mtx_unlock(&nlm_global_lock); 1413 } 1414 1415 /**********************************************************************/ 1416 1417 /* 1418 * Syscall interface with userland. 1419 */ 1420 1421 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1422 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1423 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1424 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1425 1426 static int 1427 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1428 { 1429 static rpcvers_t versions[] = { 1430 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1431 }; 1432 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1433 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1434 }; 1435 1436 SVCXPRT **xprts; 1437 char netid[16]; 1438 char uaddr[128]; 1439 struct netconfig *nconf; 1440 int i, j, error; 1441 1442 if (!addr_count) { 1443 NLM_ERR("NLM: no service addresses given - can't start server"); 1444 return (EINVAL); 1445 } 1446 1447 if (addr_count < 0 || addr_count > 256 ) { 1448 NLM_ERR("NLM: too many service addresses (%d) given, " 1449 "max 256 - can't start server\n", addr_count); 1450 return (EINVAL); 1451 } 1452 1453 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1454 for (i = 0; i < nitems(versions); i++) { 1455 for (j = 0; j < addr_count; j++) { 1456 /* 1457 * Create transports for the first version and 1458 * then just register everything else to the 1459 * same transports. 1460 */ 1461 if (i == 0) { 1462 char *up; 1463 1464 error = copyin(&addrs[2*j], &up, 1465 sizeof(char*)); 1466 if (error) 1467 goto out; 1468 error = copyinstr(up, netid, sizeof(netid), 1469 NULL); 1470 if (error) 1471 goto out; 1472 error = copyin(&addrs[2*j+1], &up, 1473 sizeof(char*)); 1474 if (error) 1475 goto out; 1476 error = copyinstr(up, uaddr, sizeof(uaddr), 1477 NULL); 1478 if (error) 1479 goto out; 1480 nconf = getnetconfigent(netid); 1481 if (!nconf) { 1482 NLM_ERR("Can't lookup netid %s\n", 1483 netid); 1484 error = EINVAL; 1485 goto out; 1486 } 1487 xprts[j] = svc_tp_create(pool, dispatchers[i], 1488 NLM_PROG, versions[i], uaddr, nconf); 1489 if (!xprts[j]) { 1490 NLM_ERR("NLM: unable to create " 1491 "(NLM_PROG, %d).\n", versions[i]); 1492 error = EINVAL; 1493 goto out; 1494 } 1495 freenetconfigent(nconf); 1496 } else { 1497 nconf = getnetconfigent(xprts[j]->xp_netid); 1498 rpcb_unset(NLM_PROG, versions[i], nconf); 1499 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1500 dispatchers[i], nconf)) { 1501 NLM_ERR("NLM: can't register " 1502 "(NLM_PROG, %d)\n", versions[i]); 1503 error = EINVAL; 1504 goto out; 1505 } 1506 } 1507 } 1508 } 1509 error = 0; 1510 out: 1511 for (j = 0; j < addr_count; j++) { 1512 if (xprts[j]) 1513 SVC_RELEASE(xprts[j]); 1514 } 1515 free(xprts, M_NLM); 1516 return (error); 1517 } 1518 1519 /* 1520 * Main server entry point. Contacts the local NSM to get its current 1521 * state and send SM_UNMON_ALL. Registers the NLM services and then 1522 * services requests. Does not return until the server is interrupted 1523 * by a signal. 1524 */ 1525 static int 1526 nlm_server_main(int addr_count, char **addrs) 1527 { 1528 struct thread *td = curthread; 1529 int error; 1530 SVCPOOL *pool = NULL; 1531 struct sockopt opt; 1532 int portlow; 1533 #ifdef INET6 1534 struct sockaddr_in6 sin6; 1535 #endif 1536 struct sockaddr_in sin; 1537 my_id id; 1538 sm_stat smstat; 1539 struct timeval timo; 1540 enum clnt_stat stat; 1541 struct nlm_host *host, *nhost; 1542 struct nlm_waiting_lock *nw; 1543 vop_advlock_t *old_nfs_advlock; 1544 vop_reclaim_t *old_nfs_reclaim; 1545 1546 if (nlm_is_running != 0) { 1547 NLM_ERR("NLM: can't start server - " 1548 "it appears to be running already\n"); 1549 return (EPERM); 1550 } 1551 1552 if (nlm_socket == NULL) { 1553 memset(&opt, 0, sizeof(opt)); 1554 1555 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1556 td->td_ucred, td); 1557 if (error) { 1558 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1559 error); 1560 return (error); 1561 } 1562 opt.sopt_dir = SOPT_SET; 1563 opt.sopt_level = IPPROTO_IP; 1564 opt.sopt_name = IP_PORTRANGE; 1565 portlow = IP_PORTRANGE_LOW; 1566 opt.sopt_val = &portlow; 1567 opt.sopt_valsize = sizeof(portlow); 1568 sosetopt(nlm_socket, &opt); 1569 1570 #ifdef INET6 1571 nlm_socket6 = NULL; 1572 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1573 td->td_ucred, td); 1574 if (error) { 1575 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1576 error); 1577 soclose(nlm_socket); 1578 nlm_socket = NULL; 1579 return (error); 1580 } 1581 opt.sopt_dir = SOPT_SET; 1582 opt.sopt_level = IPPROTO_IPV6; 1583 opt.sopt_name = IPV6_PORTRANGE; 1584 portlow = IPV6_PORTRANGE_LOW; 1585 opt.sopt_val = &portlow; 1586 opt.sopt_valsize = sizeof(portlow); 1587 sosetopt(nlm_socket6, &opt); 1588 #endif 1589 } 1590 1591 nlm_auth = authunix_create(curthread->td_ucred); 1592 1593 #ifdef INET6 1594 memset(&sin6, 0, sizeof(sin6)); 1595 sin6.sin6_len = sizeof(sin6); 1596 sin6.sin6_family = AF_INET6; 1597 sin6.sin6_addr = in6addr_loopback; 1598 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1599 if (!nlm_nsm) { 1600 #endif 1601 memset(&sin, 0, sizeof(sin)); 1602 sin.sin_len = sizeof(sin); 1603 sin.sin_family = AF_INET; 1604 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1605 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1606 SM_VERS); 1607 #ifdef INET6 1608 } 1609 #endif 1610 1611 if (!nlm_nsm) { 1612 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1613 error = EINVAL; 1614 goto out; 1615 } 1616 1617 pool = svcpool_create("NLM", NULL); 1618 1619 error = nlm_register_services(pool, addr_count, addrs); 1620 if (error) 1621 goto out; 1622 1623 memset(&id, 0, sizeof(id)); 1624 id.my_name = "NFS NLM"; 1625 1626 timo.tv_sec = 25; 1627 timo.tv_usec = 0; 1628 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1629 (xdrproc_t) xdr_my_id, &id, 1630 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1631 1632 if (stat != RPC_SUCCESS) { 1633 struct rpc_err err; 1634 1635 CLNT_GETERR(nlm_nsm, &err); 1636 NLM_ERR("NLM: unexpected error contacting NSM, " 1637 "stat=%d, errno=%d\n", stat, err.re_errno); 1638 error = EINVAL; 1639 goto out; 1640 } 1641 nlm_is_running = 1; 1642 1643 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1644 nlm_nsm_state = smstat.state; 1645 1646 old_nfs_advlock = nfs_advlock_p; 1647 nfs_advlock_p = nlm_advlock; 1648 old_nfs_reclaim = nfs_reclaim_p; 1649 nfs_reclaim_p = nlm_reclaim; 1650 1651 svc_run(pool); 1652 error = 0; 1653 1654 nfs_advlock_p = old_nfs_advlock; 1655 nfs_reclaim_p = old_nfs_reclaim; 1656 1657 out: 1658 nlm_is_running = 0; 1659 if (pool) 1660 svcpool_destroy(pool); 1661 1662 /* 1663 * We are finished communicating with the NSM. 1664 */ 1665 if (nlm_nsm) { 1666 CLNT_RELEASE(nlm_nsm); 1667 nlm_nsm = NULL; 1668 } 1669 1670 /* 1671 * Trash all the existing state so that if the server 1672 * restarts, it gets a clean slate. This is complicated by the 1673 * possibility that there may be other threads trying to make 1674 * client locking requests. 1675 * 1676 * First we fake a client reboot notification which will 1677 * cancel any pending async locks and purge remote lock state 1678 * from the local lock manager. We release the reference from 1679 * nlm_hosts to the host (which may remove it from the list 1680 * and free it). After this phase, the only entries in the 1681 * nlm_host list should be from other threads performing 1682 * client lock requests. 1683 */ 1684 mtx_lock(&nlm_global_lock); 1685 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1686 wakeup(nw); 1687 } 1688 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1689 mtx_unlock(&nlm_global_lock); 1690 nlm_host_notify(host, 0); 1691 nlm_host_release(host); 1692 mtx_lock(&nlm_global_lock); 1693 } 1694 mtx_unlock(&nlm_global_lock); 1695 1696 AUTH_DESTROY(nlm_auth); 1697 1698 return (error); 1699 } 1700 1701 int 1702 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1703 { 1704 int error; 1705 1706 error = priv_check(td, PRIV_NFS_LOCKD); 1707 if (error) 1708 return (error); 1709 1710 nlm_debug_level = uap->debug_level; 1711 nlm_grace_threshold = time_uptime + uap->grace_period; 1712 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1713 1714 return nlm_server_main(uap->addr_count, uap->addrs); 1715 } 1716 1717 /**********************************************************************/ 1718 1719 /* 1720 * NLM implementation details, called from the RPC stubs. 1721 */ 1722 1723 void 1724 nlm_sm_notify(struct nlm_sm_status *argp) 1725 { 1726 uint32_t sysid; 1727 struct nlm_host *host; 1728 1729 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1730 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1731 host = nlm_find_host_by_sysid(sysid); 1732 if (host) { 1733 nlm_host_notify(host, argp->state); 1734 nlm_host_release(host); 1735 } 1736 } 1737 1738 static void 1739 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1740 { 1741 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1742 } 1743 1744 struct vfs_state { 1745 struct mount *vs_mp; 1746 struct vnode *vs_vp; 1747 int vs_vnlocked; 1748 }; 1749 1750 static int 1751 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1752 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1753 { 1754 int error; 1755 uint64_t exflags; 1756 struct ucred *cred = NULL, *credanon = NULL; 1757 1758 memset(vs, 0, sizeof(*vs)); 1759 1760 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1761 if (!vs->vs_mp) { 1762 return (ESTALE); 1763 } 1764 1765 /* accmode == 0 means don't check, since it is an unlock. */ 1766 if (accmode != 0) { 1767 error = VFS_CHECKEXP(vs->vs_mp, 1768 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1769 NULL, NULL); 1770 if (error) 1771 goto out; 1772 1773 if (exflags & MNT_EXRDONLY || 1774 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1775 error = EROFS; 1776 goto out; 1777 } 1778 } 1779 1780 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1781 if (error) 1782 goto out; 1783 vs->vs_vnlocked = TRUE; 1784 1785 if (accmode != 0) { 1786 if (!svc_getcred(rqstp, &cred, NULL)) { 1787 error = EINVAL; 1788 goto out; 1789 } 1790 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1791 crfree(cred); 1792 cred = credanon; 1793 credanon = NULL; 1794 } 1795 1796 /* 1797 * Check cred. 1798 */ 1799 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1800 /* 1801 * If this failed and accmode != VWRITE, try again with 1802 * VWRITE to maintain backwards compatibility with the 1803 * old code that always used VWRITE. 1804 */ 1805 if (error != 0 && accmode != VWRITE) 1806 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1807 if (error) 1808 goto out; 1809 } 1810 1811 VOP_UNLOCK(vs->vs_vp); 1812 vs->vs_vnlocked = FALSE; 1813 1814 out: 1815 if (cred) 1816 crfree(cred); 1817 if (credanon) 1818 crfree(credanon); 1819 1820 return (error); 1821 } 1822 1823 static void 1824 nlm_release_vfs_state(struct vfs_state *vs) 1825 { 1826 1827 if (vs->vs_vp) { 1828 if (vs->vs_vnlocked) 1829 vput(vs->vs_vp); 1830 else 1831 vrele(vs->vs_vp); 1832 } 1833 if (vs->vs_mp) 1834 vfs_rel(vs->vs_mp); 1835 } 1836 1837 static nlm4_stats 1838 nlm_convert_error(int error) 1839 { 1840 1841 if (error == ESTALE) 1842 return nlm4_stale_fh; 1843 else if (error == EROFS) 1844 return nlm4_rofs; 1845 else 1846 return nlm4_failed; 1847 } 1848 1849 int 1850 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1851 CLIENT **rpcp) 1852 { 1853 fhandle_t fh; 1854 struct vfs_state vs; 1855 struct nlm_host *host, *bhost; 1856 int error, sysid; 1857 struct flock fl; 1858 accmode_t accmode; 1859 1860 memset(result, 0, sizeof(*result)); 1861 memset(&vs, 0, sizeof(vs)); 1862 1863 host = nlm_find_host_by_name(argp->alock.caller_name, 1864 svc_getrpccaller(rqstp), rqstp->rq_vers); 1865 if (!host) { 1866 result->stat.stat = nlm4_denied_nolocks; 1867 return (ENOMEM); 1868 } 1869 1870 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1871 host->nh_caller_name, host->nh_sysid); 1872 1873 nlm_check_expired_locks(host); 1874 sysid = host->nh_sysid; 1875 1876 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1877 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1878 1879 if (time_uptime < nlm_grace_threshold) { 1880 result->stat.stat = nlm4_denied_grace_period; 1881 goto out; 1882 } 1883 1884 accmode = argp->exclusive ? VWRITE : VREAD; 1885 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1886 if (error) { 1887 result->stat.stat = nlm_convert_error(error); 1888 goto out; 1889 } 1890 1891 fl.l_start = argp->alock.l_offset; 1892 fl.l_len = argp->alock.l_len; 1893 fl.l_pid = argp->alock.svid; 1894 fl.l_sysid = sysid; 1895 fl.l_whence = SEEK_SET; 1896 if (argp->exclusive) 1897 fl.l_type = F_WRLCK; 1898 else 1899 fl.l_type = F_RDLCK; 1900 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1901 if (error) { 1902 result->stat.stat = nlm4_failed; 1903 goto out; 1904 } 1905 1906 if (fl.l_type == F_UNLCK) { 1907 result->stat.stat = nlm4_granted; 1908 } else { 1909 result->stat.stat = nlm4_denied; 1910 result->stat.nlm4_testrply_u.holder.exclusive = 1911 (fl.l_type == F_WRLCK); 1912 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1913 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1914 if (bhost) { 1915 /* 1916 * We don't have any useful way of recording 1917 * the value of oh used in the original lock 1918 * request. Ideally, the test reply would have 1919 * a space for the owning host's name allowing 1920 * our caller's NLM to keep track. 1921 * 1922 * As far as I can see, Solaris uses an eight 1923 * byte structure for oh which contains a four 1924 * byte pid encoded in local byte order and 1925 * the first four bytes of the host 1926 * name. Linux uses a variable length string 1927 * 'pid@hostname' in ascii but doesn't even 1928 * return that in test replies. 1929 * 1930 * For the moment, return nothing in oh 1931 * (already zero'ed above). 1932 */ 1933 nlm_host_release(bhost); 1934 } 1935 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1936 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1937 } 1938 1939 out: 1940 nlm_release_vfs_state(&vs); 1941 if (rpcp) 1942 *rpcp = nlm_host_get_rpc(host, TRUE); 1943 nlm_host_release(host); 1944 return (0); 1945 } 1946 1947 int 1948 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1949 bool_t monitor, CLIENT **rpcp) 1950 { 1951 fhandle_t fh; 1952 struct vfs_state vs; 1953 struct nlm_host *host; 1954 int error, sysid; 1955 struct flock fl; 1956 accmode_t accmode; 1957 1958 memset(result, 0, sizeof(*result)); 1959 memset(&vs, 0, sizeof(vs)); 1960 1961 host = nlm_find_host_by_name(argp->alock.caller_name, 1962 svc_getrpccaller(rqstp), rqstp->rq_vers); 1963 if (!host) { 1964 result->stat.stat = nlm4_denied_nolocks; 1965 return (ENOMEM); 1966 } 1967 1968 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1969 host->nh_caller_name, host->nh_sysid); 1970 1971 if (monitor && host->nh_state && argp->state 1972 && host->nh_state != argp->state) { 1973 /* 1974 * The host rebooted without telling us. Trash its 1975 * locks. 1976 */ 1977 nlm_host_notify(host, argp->state); 1978 } 1979 1980 nlm_check_expired_locks(host); 1981 sysid = host->nh_sysid; 1982 1983 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1984 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1985 1986 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1987 result->stat.stat = nlm4_denied_grace_period; 1988 goto out; 1989 } 1990 1991 accmode = argp->exclusive ? VWRITE : VREAD; 1992 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1993 if (error) { 1994 result->stat.stat = nlm_convert_error(error); 1995 goto out; 1996 } 1997 1998 fl.l_start = argp->alock.l_offset; 1999 fl.l_len = argp->alock.l_len; 2000 fl.l_pid = argp->alock.svid; 2001 fl.l_sysid = sysid; 2002 fl.l_whence = SEEK_SET; 2003 if (argp->exclusive) 2004 fl.l_type = F_WRLCK; 2005 else 2006 fl.l_type = F_RDLCK; 2007 if (argp->block) { 2008 struct nlm_async_lock *af; 2009 CLIENT *client; 2010 struct nlm_grantcookie cookie; 2011 2012 /* 2013 * First, make sure we can contact the host's NLM. 2014 */ 2015 client = nlm_host_get_rpc(host, TRUE); 2016 if (!client) { 2017 result->stat.stat = nlm4_failed; 2018 goto out; 2019 } 2020 2021 /* 2022 * First we need to check and see if there is an 2023 * existing blocked lock that matches. This could be a 2024 * badly behaved client or an RPC re-send. If we find 2025 * one, just return nlm4_blocked. 2026 */ 2027 mtx_lock(&host->nh_lock); 2028 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2029 if (af->af_fl.l_start == fl.l_start 2030 && af->af_fl.l_len == fl.l_len 2031 && af->af_fl.l_pid == fl.l_pid 2032 && af->af_fl.l_type == fl.l_type) { 2033 break; 2034 } 2035 } 2036 if (!af) { 2037 cookie.ng_sysid = host->nh_sysid; 2038 cookie.ng_cookie = host->nh_grantcookie++; 2039 } 2040 mtx_unlock(&host->nh_lock); 2041 if (af) { 2042 CLNT_RELEASE(client); 2043 result->stat.stat = nlm4_blocked; 2044 goto out; 2045 } 2046 2047 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2048 M_WAITOK|M_ZERO); 2049 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2050 af->af_vp = vs.vs_vp; 2051 af->af_fl = fl; 2052 af->af_host = host; 2053 af->af_rpc = client; 2054 /* 2055 * We use M_RPC here so that we can xdr_free the thing 2056 * later. 2057 */ 2058 nlm_make_netobj(&af->af_granted.cookie, 2059 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2060 af->af_granted.exclusive = argp->exclusive; 2061 af->af_granted.alock.caller_name = 2062 strdup(argp->alock.caller_name, M_RPC); 2063 nlm_copy_netobj(&af->af_granted.alock.fh, 2064 &argp->alock.fh, M_RPC); 2065 nlm_copy_netobj(&af->af_granted.alock.oh, 2066 &argp->alock.oh, M_RPC); 2067 af->af_granted.alock.svid = argp->alock.svid; 2068 af->af_granted.alock.l_offset = argp->alock.l_offset; 2069 af->af_granted.alock.l_len = argp->alock.l_len; 2070 2071 /* 2072 * Put the entry on the pending list before calling 2073 * VOP_ADVLOCKASYNC. We do this in case the lock 2074 * request was blocked (returning EINPROGRESS) but 2075 * then granted before we manage to run again. The 2076 * client may receive the granted message before we 2077 * send our blocked reply but thats their problem. 2078 */ 2079 mtx_lock(&host->nh_lock); 2080 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2081 mtx_unlock(&host->nh_lock); 2082 2083 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2084 &af->af_task, &af->af_cookie); 2085 2086 /* 2087 * If the lock completed synchronously, just free the 2088 * tracking structure now. 2089 */ 2090 if (error != EINPROGRESS) { 2091 CLNT_RELEASE(af->af_rpc); 2092 mtx_lock(&host->nh_lock); 2093 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2094 mtx_unlock(&host->nh_lock); 2095 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2096 &af->af_granted); 2097 free(af, M_NLM); 2098 } else { 2099 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2100 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2101 /* 2102 * Don't vrele the vnode just yet - this must 2103 * wait until either the async callback 2104 * happens or the lock is cancelled. 2105 */ 2106 vs.vs_vp = NULL; 2107 } 2108 } else { 2109 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2110 } 2111 2112 if (error) { 2113 if (error == EINPROGRESS) { 2114 result->stat.stat = nlm4_blocked; 2115 } else if (error == EDEADLK) { 2116 result->stat.stat = nlm4_deadlck; 2117 } else if (error == EAGAIN) { 2118 result->stat.stat = nlm4_denied; 2119 } else { 2120 result->stat.stat = nlm4_failed; 2121 } 2122 } else { 2123 if (monitor) 2124 nlm_host_monitor(host, argp->state); 2125 result->stat.stat = nlm4_granted; 2126 } 2127 2128 out: 2129 nlm_release_vfs_state(&vs); 2130 if (rpcp) 2131 *rpcp = nlm_host_get_rpc(host, TRUE); 2132 nlm_host_release(host); 2133 return (0); 2134 } 2135 2136 int 2137 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2138 CLIENT **rpcp) 2139 { 2140 fhandle_t fh; 2141 struct vfs_state vs; 2142 struct nlm_host *host; 2143 int error, sysid; 2144 struct flock fl; 2145 struct nlm_async_lock *af; 2146 2147 memset(result, 0, sizeof(*result)); 2148 memset(&vs, 0, sizeof(vs)); 2149 2150 host = nlm_find_host_by_name(argp->alock.caller_name, 2151 svc_getrpccaller(rqstp), rqstp->rq_vers); 2152 if (!host) { 2153 result->stat.stat = nlm4_denied_nolocks; 2154 return (ENOMEM); 2155 } 2156 2157 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2158 host->nh_caller_name, host->nh_sysid); 2159 2160 nlm_check_expired_locks(host); 2161 sysid = host->nh_sysid; 2162 2163 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2164 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2165 2166 if (time_uptime < nlm_grace_threshold) { 2167 result->stat.stat = nlm4_denied_grace_period; 2168 goto out; 2169 } 2170 2171 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2172 if (error) { 2173 result->stat.stat = nlm_convert_error(error); 2174 goto out; 2175 } 2176 2177 fl.l_start = argp->alock.l_offset; 2178 fl.l_len = argp->alock.l_len; 2179 fl.l_pid = argp->alock.svid; 2180 fl.l_sysid = sysid; 2181 fl.l_whence = SEEK_SET; 2182 if (argp->exclusive) 2183 fl.l_type = F_WRLCK; 2184 else 2185 fl.l_type = F_RDLCK; 2186 2187 /* 2188 * First we need to try and find the async lock request - if 2189 * there isn't one, we give up and return nlm4_denied. 2190 */ 2191 mtx_lock(&host->nh_lock); 2192 2193 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2194 if (af->af_fl.l_start == fl.l_start 2195 && af->af_fl.l_len == fl.l_len 2196 && af->af_fl.l_pid == fl.l_pid 2197 && af->af_fl.l_type == fl.l_type) { 2198 break; 2199 } 2200 } 2201 2202 if (!af) { 2203 mtx_unlock(&host->nh_lock); 2204 result->stat.stat = nlm4_denied; 2205 goto out; 2206 } 2207 2208 error = nlm_cancel_async_lock(af); 2209 2210 if (error) { 2211 result->stat.stat = nlm4_denied; 2212 } else { 2213 result->stat.stat = nlm4_granted; 2214 } 2215 2216 mtx_unlock(&host->nh_lock); 2217 2218 out: 2219 nlm_release_vfs_state(&vs); 2220 if (rpcp) 2221 *rpcp = nlm_host_get_rpc(host, TRUE); 2222 nlm_host_release(host); 2223 return (0); 2224 } 2225 2226 int 2227 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2228 CLIENT **rpcp) 2229 { 2230 fhandle_t fh; 2231 struct vfs_state vs; 2232 struct nlm_host *host; 2233 int error, sysid; 2234 struct flock fl; 2235 2236 memset(result, 0, sizeof(*result)); 2237 memset(&vs, 0, sizeof(vs)); 2238 2239 host = nlm_find_host_by_name(argp->alock.caller_name, 2240 svc_getrpccaller(rqstp), rqstp->rq_vers); 2241 if (!host) { 2242 result->stat.stat = nlm4_denied_nolocks; 2243 return (ENOMEM); 2244 } 2245 2246 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2247 host->nh_caller_name, host->nh_sysid); 2248 2249 nlm_check_expired_locks(host); 2250 sysid = host->nh_sysid; 2251 2252 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2253 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2254 2255 if (time_uptime < nlm_grace_threshold) { 2256 result->stat.stat = nlm4_denied_grace_period; 2257 goto out; 2258 } 2259 2260 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2261 if (error) { 2262 result->stat.stat = nlm_convert_error(error); 2263 goto out; 2264 } 2265 2266 fl.l_start = argp->alock.l_offset; 2267 fl.l_len = argp->alock.l_len; 2268 fl.l_pid = argp->alock.svid; 2269 fl.l_sysid = sysid; 2270 fl.l_whence = SEEK_SET; 2271 fl.l_type = F_UNLCK; 2272 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2273 2274 /* 2275 * Ignore the error - there is no result code for failure, 2276 * only for grace period. 2277 */ 2278 result->stat.stat = nlm4_granted; 2279 2280 out: 2281 nlm_release_vfs_state(&vs); 2282 if (rpcp) 2283 *rpcp = nlm_host_get_rpc(host, TRUE); 2284 nlm_host_release(host); 2285 return (0); 2286 } 2287 2288 int 2289 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2290 2291 CLIENT **rpcp) 2292 { 2293 struct nlm_host *host; 2294 struct nlm_waiting_lock *nw; 2295 2296 memset(result, 0, sizeof(*result)); 2297 2298 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2299 if (!host) { 2300 result->stat.stat = nlm4_denied_nolocks; 2301 return (ENOMEM); 2302 } 2303 2304 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2305 result->stat.stat = nlm4_denied; 2306 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2307 2308 mtx_lock(&nlm_global_lock); 2309 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2310 if (!nw->nw_waiting) 2311 continue; 2312 if (argp->alock.svid == nw->nw_lock.svid 2313 && argp->alock.l_offset == nw->nw_lock.l_offset 2314 && argp->alock.l_len == nw->nw_lock.l_len 2315 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2316 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2317 nw->nw_lock.fh.n_len)) { 2318 nw->nw_waiting = FALSE; 2319 wakeup(nw); 2320 result->stat.stat = nlm4_granted; 2321 break; 2322 } 2323 } 2324 mtx_unlock(&nlm_global_lock); 2325 2326 out: 2327 if (rpcp) 2328 *rpcp = nlm_host_get_rpc(host, TRUE); 2329 nlm_host_release(host); 2330 return (0); 2331 } 2332 2333 void 2334 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2335 { 2336 struct nlm_host *host = NULL; 2337 struct nlm_async_lock *af = NULL; 2338 int error; 2339 2340 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2341 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2342 goto out; 2343 } 2344 2345 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2346 if (!host) { 2347 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2348 goto out; 2349 } 2350 2351 mtx_lock(&host->nh_lock); 2352 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2353 if (ng_cookie(&argp->cookie) == 2354 ng_cookie(&af->af_granted.cookie)) 2355 break; 2356 if (af) 2357 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2358 mtx_unlock(&host->nh_lock); 2359 2360 if (!af) { 2361 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2362 "with unrecognized cookie %d:%d", host->nh_caller_name, 2363 host->nh_sysid, ng_sysid(&argp->cookie), 2364 ng_cookie(&argp->cookie)); 2365 goto out; 2366 } 2367 2368 if (argp->stat.stat != nlm4_granted) { 2369 af->af_fl.l_type = F_UNLCK; 2370 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2371 if (error) { 2372 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2373 "and we failed to unlock (%d)", host->nh_caller_name, 2374 host->nh_sysid, error); 2375 goto out; 2376 } 2377 2378 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2379 af, host->nh_caller_name, host->nh_sysid); 2380 } else { 2381 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2382 af, host->nh_caller_name, host->nh_sysid); 2383 } 2384 2385 out: 2386 if (af) 2387 nlm_free_async_lock(af); 2388 if (host) 2389 nlm_host_release(host); 2390 } 2391 2392 void 2393 nlm_do_free_all(nlm4_notify *argp) 2394 { 2395 struct nlm_host *host, *thost; 2396 2397 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2398 if (!strcmp(host->nh_caller_name, argp->name)) 2399 nlm_host_notify(host, argp->state); 2400 } 2401 } 2402 2403 /* 2404 * Kernel module glue 2405 */ 2406 static int 2407 nfslockd_modevent(module_t mod, int type, void *data) 2408 { 2409 2410 switch (type) { 2411 case MOD_LOAD: 2412 return (nlm_init()); 2413 2414 case MOD_UNLOAD: 2415 nlm_uninit(); 2416 /* The NLM module cannot be safely unloaded. */ 2417 /* FALLTHROUGH */ 2418 default: 2419 return (EOPNOTSUPP); 2420 } 2421 } 2422 static moduledata_t nfslockd_mod = { 2423 "nfslockd", 2424 nfslockd_modevent, 2425 NULL, 2426 }; 2427 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2428 2429 /* So that loader and kldload(2) can find us, wherever we are.. */ 2430 MODULE_DEPEND(nfslockd, xdr, 1, 1, 1); 2431 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2432 MODULE_DEPEND(nfslockd, nfscommon, 1, 1, 1); 2433 MODULE_VERSION(nfslockd, 1); 2434