1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 5 * Authors: Doug Rabson <dfr@rabson.org> 6 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "opt_inet6.h" 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/fail.h> 35 #include <sys/fcntl.h> 36 #include <sys/kernel.h> 37 #include <sys/kthread.h> 38 #include <sys/lockf.h> 39 #include <sys/malloc.h> 40 #include <sys/mount.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/syscall.h> 46 #include <sys/sysctl.h> 47 #include <sys/sysent.h> 48 #include <sys/syslog.h> 49 #include <sys/sysproto.h> 50 #include <sys/systm.h> 51 #include <sys/taskqueue.h> 52 #include <sys/unistd.h> 53 #include <sys/vnode.h> 54 55 #include <nfs/nfsproto.h> 56 #include <nfs/nfs_lock.h> 57 58 #include <nlm/nlm_prot.h> 59 #include <nlm/sm_inter.h> 60 #include <nlm/nlm.h> 61 #include <rpc/rpc_com.h> 62 #include <rpc/rpcb_prot.h> 63 64 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 65 66 /* 67 * If a host is inactive (and holds no locks) for this amount of 68 * seconds, we consider it idle and stop tracking it. 69 */ 70 #define NLM_IDLE_TIMEOUT 30 71 72 /* 73 * We check the host list for idle every few seconds. 74 */ 75 #define NLM_IDLE_PERIOD 5 76 77 /* 78 * We only look for GRANTED_RES messages for a little while. 79 */ 80 #define NLM_EXPIRE_TIMEOUT 10 81 82 /* 83 * Support for sysctl vfs.nlm.sysid 84 */ 85 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 86 "Network Lock Manager"); 87 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, 88 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 89 ""); 90 91 /* 92 * Syscall hooks 93 */ 94 static struct syscall_helper_data nlm_syscalls[] = { 95 SYSCALL_INIT_HELPER(nlm_syscall), 96 SYSCALL_INIT_LAST 97 }; 98 99 /* 100 * Debug level passed in from userland. We also support a sysctl hook 101 * so that it can be changed on a live system. 102 */ 103 static int nlm_debug_level; 104 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 105 106 #define NLM_DEBUG(_level, args...) \ 107 do { \ 108 if (nlm_debug_level >= (_level)) \ 109 log(LOG_DEBUG, args); \ 110 } while(0) 111 #define NLM_ERR(args...) \ 112 do { \ 113 log(LOG_ERR, args); \ 114 } while(0) 115 116 /* 117 * Grace period handling. The value of nlm_grace_threshold is the 118 * value of time_uptime after which we are serving requests normally. 119 */ 120 static time_t nlm_grace_threshold; 121 122 /* 123 * We check for idle hosts if time_uptime is greater than 124 * nlm_next_idle_check, 125 */ 126 static time_t nlm_next_idle_check; 127 128 /* 129 * A flag to indicate the server is already running. 130 */ 131 static int nlm_is_running; 132 133 /* 134 * A socket to use for RPC - shared by all IPv4 RPC clients. 135 */ 136 static struct socket *nlm_socket; 137 138 #ifdef INET6 139 140 /* 141 * A socket to use for RPC - shared by all IPv6 RPC clients. 142 */ 143 static struct socket *nlm_socket6; 144 145 #endif 146 147 /* 148 * An RPC client handle that can be used to communicate with the local 149 * NSM. 150 */ 151 static CLIENT *nlm_nsm; 152 153 /* 154 * An AUTH handle for the server's creds. 155 */ 156 static AUTH *nlm_auth; 157 158 /* 159 * A zero timeval for sending async RPC messages. 160 */ 161 struct timeval nlm_zero_tv = { 0, 0 }; 162 163 /* 164 * The local NSM state number 165 */ 166 int nlm_nsm_state; 167 168 /* 169 * A lock to protect the host list and waiting lock list. 170 */ 171 static struct mtx nlm_global_lock; 172 173 /* 174 * Locks: 175 * (l) locked by nh_lock 176 * (s) only accessed via server RPC which is single threaded 177 * (g) locked by nlm_global_lock 178 * (c) const until freeing 179 * (a) modified using atomic ops 180 */ 181 182 /* 183 * A pending client-side lock request, stored on the nlm_waiting_locks 184 * list. 185 */ 186 struct nlm_waiting_lock { 187 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 188 bool_t nw_waiting; /* (g) */ 189 nlm4_lock nw_lock; /* (c) */ 190 union nfsfh nw_fh; /* (c) */ 191 struct vnode *nw_vp; /* (c) */ 192 }; 193 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 194 195 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 196 197 /* 198 * A pending server-side asynchronous lock request, stored on the 199 * nh_pending list of the NLM host. 200 */ 201 struct nlm_async_lock { 202 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 203 struct task af_task; /* (c) async callback details */ 204 void *af_cookie; /* (l) lock manager cancel token */ 205 struct vnode *af_vp; /* (l) vnode to lock */ 206 struct flock af_fl; /* (c) lock details */ 207 struct nlm_host *af_host; /* (c) host which is locking */ 208 CLIENT *af_rpc; /* (c) rpc client to send message */ 209 nlm4_testargs af_granted; /* (c) notification details */ 210 time_t af_expiretime; /* (c) notification time */ 211 }; 212 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 213 214 /* 215 * NLM host. 216 */ 217 enum nlm_host_state { 218 NLM_UNMONITORED, 219 NLM_MONITORED, 220 NLM_MONITOR_FAILED, 221 NLM_RECOVERING 222 }; 223 224 struct nlm_rpc { 225 CLIENT *nr_client; /* (l) RPC client handle */ 226 time_t nr_create_time; /* (l) when client was created */ 227 }; 228 229 struct nlm_host { 230 struct mtx nh_lock; 231 volatile u_int nh_refs; /* (a) reference count */ 232 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 233 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 234 uint32_t nh_sysid; /* (c) our allocaed system ID */ 235 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 236 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 237 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 238 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 239 rpcvers_t nh_vers; /* (s) NLM version of host */ 240 int nh_state; /* (s) last seen NSM state of host */ 241 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 242 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 243 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 244 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 245 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 246 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 247 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 248 }; 249 TAILQ_HEAD(nlm_host_list, nlm_host); 250 251 static struct nlm_host_list nlm_hosts; /* (g) */ 252 static uint32_t nlm_next_sysid = 1; /* (g) */ 253 254 static void nlm_host_unmonitor(struct nlm_host *); 255 256 struct nlm_grantcookie { 257 uint32_t ng_sysid; 258 uint32_t ng_cookie; 259 }; 260 261 static inline uint32_t 262 ng_sysid(struct netobj *src) 263 { 264 265 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 266 } 267 268 static inline uint32_t 269 ng_cookie(struct netobj *src) 270 { 271 272 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 273 } 274 275 /**********************************************************************/ 276 277 /* 278 * Initialise NLM globals. 279 */ 280 static int 281 nlm_init(void) 282 { 283 int error; 284 285 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 286 TAILQ_INIT(&nlm_waiting_locks); 287 TAILQ_INIT(&nlm_hosts); 288 289 error = syscall_helper_register(nlm_syscalls, SY_THR_STATIC_KLD); 290 if (error != 0) 291 NLM_ERR("Can't register NLM syscall\n"); 292 return (error); 293 } 294 295 static void 296 nlm_uninit(void) 297 { 298 299 syscall_helper_unregister(nlm_syscalls); 300 } 301 302 /* 303 * Create a netobj from an arbitrary source. 304 */ 305 void 306 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 307 struct malloc_type *type) 308 { 309 310 dst->n_len = srcsize; 311 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 312 memcpy(dst->n_bytes, src, srcsize); 313 } 314 315 /* 316 * Copy a struct netobj. 317 */ 318 void 319 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 320 struct malloc_type *type) 321 { 322 323 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 324 } 325 326 /* 327 * Create an RPC client handle for the given (address,prog,vers) 328 * triple using UDP. 329 */ 330 static CLIENT * 331 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 332 { 333 char *wchan = "nlmrcv"; 334 struct sockaddr_storage ss; 335 struct socket *so; 336 CLIENT *rpcb; 337 struct timeval timo; 338 RPCB parms; 339 char *uaddr; 340 enum clnt_stat stat = RPC_SUCCESS; 341 int rpcvers = RPCBVERS4; 342 bool_t do_tcp = FALSE; 343 bool_t tryagain = FALSE; 344 struct portmap mapping; 345 u_short port = 0; 346 struct sockaddr_in *sin4; 347 char namebuf[INET_ADDRSTRLEN]; 348 #ifdef INET6 349 struct sockaddr_in6 *sin6; 350 char namebuf6[INET6_ADDRSTRLEN]; 351 #endif 352 353 /* 354 * First we need to contact the remote RPCBIND service to find 355 * the right port. 356 */ 357 memcpy(&ss, sa, sa->sa_len); 358 switch (ss.ss_family) { 359 case AF_INET: 360 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 361 so = nlm_socket; 362 break; 363 #ifdef INET6 364 case AF_INET6: 365 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 366 so = nlm_socket6; 367 break; 368 #endif 369 370 default: 371 /* 372 * Unsupported address family - fail. 373 */ 374 return (NULL); 375 } 376 377 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 378 RPCBPROG, rpcvers, 0, 0); 379 if (!rpcb) 380 return (NULL); 381 382 try_tcp: 383 parms.r_prog = prog; 384 parms.r_vers = vers; 385 if (do_tcp) 386 parms.r_netid = "tcp"; 387 else 388 parms.r_netid = "udp"; 389 parms.r_addr = ""; 390 parms.r_owner = ""; 391 392 /* 393 * Use the default timeout. 394 */ 395 timo.tv_sec = 25; 396 timo.tv_usec = 0; 397 again: 398 switch (rpcvers) { 399 case RPCBVERS4: 400 case RPCBVERS: 401 /* 402 * Try RPCBIND 4 then 3. 403 */ 404 uaddr = NULL; 405 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 406 (xdrproc_t) xdr_rpcb, &parms, 407 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 408 if (stat == RPC_SUCCESS) { 409 /* 410 * We have a reply from the remote RPCBIND - turn it 411 * into an appropriate address and make a new client 412 * that can talk to the remote NLM. 413 * 414 * XXX fixup IPv6 scope ID. 415 */ 416 struct netbuf *a; 417 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 418 if (!a) { 419 tryagain = TRUE; 420 } else { 421 tryagain = FALSE; 422 memcpy(&ss, a->buf, a->len); 423 free(a->buf, M_RPC); 424 free(a, M_RPC); 425 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 426 } 427 } 428 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 429 if (rpcvers == RPCBVERS4) 430 rpcvers = RPCBVERS; 431 else if (rpcvers == RPCBVERS) 432 rpcvers = PMAPVERS; 433 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 434 goto again; 435 } 436 break; 437 case PMAPVERS: 438 /* 439 * Try portmap. 440 */ 441 mapping.pm_prog = parms.r_prog; 442 mapping.pm_vers = parms.r_vers; 443 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 444 mapping.pm_port = 0; 445 446 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 447 (xdrproc_t) xdr_portmap, &mapping, 448 (xdrproc_t) xdr_u_short, &port, timo); 449 450 if (stat == RPC_SUCCESS) { 451 switch (ss.ss_family) { 452 case AF_INET: 453 ((struct sockaddr_in *)&ss)->sin_port = 454 htons(port); 455 break; 456 457 #ifdef INET6 458 case AF_INET6: 459 ((struct sockaddr_in6 *)&ss)->sin6_port = 460 htons(port); 461 break; 462 #endif 463 } 464 } 465 break; 466 default: 467 panic("invalid rpcvers %d", rpcvers); 468 } 469 /* 470 * We may have a positive response from the portmapper, but the NLM 471 * service was not found. Make sure we received a valid port. 472 */ 473 switch (ss.ss_family) { 474 case AF_INET: 475 port = ((struct sockaddr_in *)&ss)->sin_port; 476 break; 477 #ifdef INET6 478 case AF_INET6: 479 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 480 break; 481 #endif 482 } 483 if (stat != RPC_SUCCESS || !port) { 484 /* 485 * If we were able to talk to rpcbind or portmap, but the udp 486 * variant wasn't available, ask about tcp. 487 * 488 * XXX - We could also check for a TCP portmapper, but 489 * if the host is running a portmapper at all, we should be able 490 * to hail it over UDP. 491 */ 492 if (stat == RPC_SUCCESS && !do_tcp) { 493 do_tcp = TRUE; 494 goto try_tcp; 495 } 496 497 /* Otherwise, bad news. */ 498 switch (ss.ss_family) { 499 case AF_INET: 500 sin4 = (struct sockaddr_in *)&ss; 501 inet_ntop(ss.ss_family, &sin4->sin_addr, 502 namebuf, sizeof namebuf); 503 NLM_ERR("NLM: failed to contact remote rpcbind, " 504 "stat = %d, host = %s, port = %d\n", 505 (int) stat, namebuf, htons(port)); 506 break; 507 #ifdef INET6 508 case AF_INET6: 509 sin6 = (struct sockaddr_in6 *)&ss; 510 inet_ntop(ss.ss_family, &sin6->sin6_addr, 511 namebuf6, sizeof namebuf6); 512 NLM_ERR("NLM: failed to contact remote rpcbind, " 513 "stat = %d, host = %s, port = %d\n", 514 (int) stat, namebuf6, htons(port)); 515 break; 516 #endif 517 } 518 CLNT_DESTROY(rpcb); 519 return (NULL); 520 } 521 522 if (do_tcp) { 523 /* 524 * Destroy the UDP client we used to speak to rpcbind and 525 * recreate as a TCP client. 526 */ 527 struct netconfig *nconf = NULL; 528 529 CLNT_DESTROY(rpcb); 530 531 switch (ss.ss_family) { 532 case AF_INET: 533 nconf = getnetconfigent("tcp"); 534 break; 535 #ifdef INET6 536 case AF_INET6: 537 nconf = getnetconfigent("tcp6"); 538 break; 539 #endif 540 } 541 542 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 543 prog, vers, 0, 0); 544 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 545 rpcb->cl_auth = nlm_auth; 546 547 } else { 548 /* 549 * Re-use the client we used to speak to rpcbind. 550 */ 551 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 552 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 553 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 554 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 555 rpcb->cl_auth = nlm_auth; 556 } 557 558 return (rpcb); 559 } 560 561 /* 562 * This async callback after when an async lock request has been 563 * granted. We notify the host which initiated the request. 564 */ 565 static void 566 nlm_lock_callback(void *arg, int pending) 567 { 568 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 569 struct rpc_callextra ext; 570 571 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 572 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 573 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 574 ng_cookie(&af->af_granted.cookie)); 575 576 /* 577 * Send the results back to the host. 578 * 579 * Note: there is a possible race here with nlm_host_notify 580 * destroying the RPC client. To avoid problems, the first 581 * thing nlm_host_notify does is to cancel pending async lock 582 * requests. 583 */ 584 memset(&ext, 0, sizeof(ext)); 585 ext.rc_auth = nlm_auth; 586 if (af->af_host->nh_vers == NLM_VERS4) { 587 nlm4_granted_msg_4(&af->af_granted, 588 NULL, af->af_rpc, &ext, nlm_zero_tv); 589 } else { 590 /* 591 * Back-convert to legacy protocol 592 */ 593 nlm_testargs granted; 594 granted.cookie = af->af_granted.cookie; 595 granted.exclusive = af->af_granted.exclusive; 596 granted.alock.caller_name = 597 af->af_granted.alock.caller_name; 598 granted.alock.fh = af->af_granted.alock.fh; 599 granted.alock.oh = af->af_granted.alock.oh; 600 granted.alock.svid = af->af_granted.alock.svid; 601 granted.alock.l_offset = 602 af->af_granted.alock.l_offset; 603 granted.alock.l_len = 604 af->af_granted.alock.l_len; 605 606 nlm_granted_msg_1(&granted, 607 NULL, af->af_rpc, &ext, nlm_zero_tv); 608 } 609 610 /* 611 * Move this entry to the nh_granted list. 612 */ 613 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 614 mtx_lock(&af->af_host->nh_lock); 615 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 616 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 617 mtx_unlock(&af->af_host->nh_lock); 618 } 619 620 /* 621 * Free an async lock request. The request must have been removed from 622 * any list. 623 */ 624 static void 625 nlm_free_async_lock(struct nlm_async_lock *af) 626 { 627 /* 628 * Free an async lock. 629 */ 630 if (af->af_rpc) 631 CLNT_RELEASE(af->af_rpc); 632 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 633 if (af->af_vp) 634 vrele(af->af_vp); 635 free(af, M_NLM); 636 } 637 638 /* 639 * Cancel our async request - this must be called with 640 * af->nh_host->nh_lock held. This is slightly complicated by a 641 * potential race with our own callback. If we fail to cancel the 642 * lock, it must already have been granted - we make sure our async 643 * task has completed by calling taskqueue_drain in this case. 644 */ 645 static int 646 nlm_cancel_async_lock(struct nlm_async_lock *af) 647 { 648 struct nlm_host *host = af->af_host; 649 int error; 650 651 mtx_assert(&host->nh_lock, MA_OWNED); 652 653 mtx_unlock(&host->nh_lock); 654 655 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 656 F_REMOTE, NULL, &af->af_cookie); 657 658 if (error) { 659 /* 660 * We failed to cancel - make sure our callback has 661 * completed before we continue. 662 */ 663 taskqueue_drain(taskqueue_thread, &af->af_task); 664 } 665 666 mtx_lock(&host->nh_lock); 667 668 if (!error) { 669 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 670 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 671 672 /* 673 * Remove from the nh_pending list and free now that 674 * we are safe from the callback. 675 */ 676 TAILQ_REMOVE(&host->nh_pending, af, af_link); 677 mtx_unlock(&host->nh_lock); 678 nlm_free_async_lock(af); 679 mtx_lock(&host->nh_lock); 680 } 681 682 return (error); 683 } 684 685 static void 686 nlm_check_expired_locks(struct nlm_host *host) 687 { 688 struct nlm_async_lock *af; 689 time_t uptime = time_uptime; 690 691 mtx_lock(&host->nh_lock); 692 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 693 && uptime >= af->af_expiretime) { 694 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 695 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 696 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 697 ng_cookie(&af->af_granted.cookie)); 698 TAILQ_REMOVE(&host->nh_granted, af, af_link); 699 mtx_unlock(&host->nh_lock); 700 nlm_free_async_lock(af); 701 mtx_lock(&host->nh_lock); 702 } 703 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 704 TAILQ_REMOVE(&host->nh_finished, af, af_link); 705 mtx_unlock(&host->nh_lock); 706 nlm_free_async_lock(af); 707 mtx_lock(&host->nh_lock); 708 } 709 mtx_unlock(&host->nh_lock); 710 } 711 712 /* 713 * Free resources used by a host. This is called after the reference 714 * count has reached zero so it doesn't need to worry about locks. 715 */ 716 static void 717 nlm_host_destroy(struct nlm_host *host) 718 { 719 720 mtx_lock(&nlm_global_lock); 721 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 722 mtx_unlock(&nlm_global_lock); 723 724 if (host->nh_srvrpc.nr_client) 725 CLNT_RELEASE(host->nh_srvrpc.nr_client); 726 if (host->nh_clntrpc.nr_client) 727 CLNT_RELEASE(host->nh_clntrpc.nr_client); 728 mtx_destroy(&host->nh_lock); 729 sysctl_ctx_free(&host->nh_sysctl); 730 free(host, M_NLM); 731 } 732 733 /* 734 * Thread start callback for client lock recovery 735 */ 736 static void 737 nlm_client_recovery_start(void *arg) 738 { 739 struct nlm_host *host = (struct nlm_host *) arg; 740 741 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 742 host->nh_caller_name); 743 744 nlm_client_recovery(host); 745 746 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 747 host->nh_caller_name); 748 749 host->nh_monstate = NLM_MONITORED; 750 nlm_host_release(host); 751 752 kthread_exit(); 753 } 754 755 /* 756 * This is called when we receive a host state change notification. We 757 * unlock any active locks owned by the host. When rpc.lockd is 758 * shutting down, this function is called with newstate set to zero 759 * which allows us to cancel any pending async locks and clear the 760 * locking state. 761 */ 762 static void 763 nlm_host_notify(struct nlm_host *host, int newstate) 764 { 765 struct nlm_async_lock *af; 766 767 if (newstate) { 768 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 769 "state is %d\n", host->nh_caller_name, 770 host->nh_sysid, newstate); 771 } 772 773 /* 774 * Cancel any pending async locks for this host. 775 */ 776 mtx_lock(&host->nh_lock); 777 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 778 /* 779 * nlm_cancel_async_lock will remove the entry from 780 * nh_pending and free it. 781 */ 782 nlm_cancel_async_lock(af); 783 } 784 mtx_unlock(&host->nh_lock); 785 nlm_check_expired_locks(host); 786 787 /* 788 * The host just rebooted - trash its locks. 789 */ 790 lf_clearremotesys(host->nh_sysid); 791 host->nh_state = newstate; 792 793 /* 794 * If we have any remote locks for this host (i.e. it 795 * represents a remote NFS server that our local NFS client 796 * has locks for), start a recovery thread. 797 */ 798 if (newstate != 0 799 && host->nh_monstate != NLM_RECOVERING 800 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 801 struct thread *td; 802 host->nh_monstate = NLM_RECOVERING; 803 refcount_acquire(&host->nh_refs); 804 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 805 "NFS lock recovery for %s", host->nh_caller_name); 806 } 807 } 808 809 /* 810 * Sysctl handler to count the number of locks for a sysid. 811 */ 812 static int 813 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 814 { 815 struct nlm_host *host; 816 int count; 817 818 host = oidp->oid_arg1; 819 count = lf_countlocks(host->nh_sysid); 820 return sysctl_handle_int(oidp, &count, 0, req); 821 } 822 823 /* 824 * Sysctl handler to count the number of client locks for a sysid. 825 */ 826 static int 827 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 828 { 829 struct nlm_host *host; 830 int count; 831 832 host = oidp->oid_arg1; 833 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 834 return sysctl_handle_int(oidp, &count, 0, req); 835 } 836 837 /* 838 * Create a new NLM host. 839 */ 840 static struct nlm_host * 841 nlm_create_host(const char* caller_name) 842 { 843 struct nlm_host *host; 844 struct sysctl_oid *oid; 845 846 mtx_assert(&nlm_global_lock, MA_OWNED); 847 848 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 849 caller_name, nlm_next_sysid); 850 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 851 if (!host) 852 return (NULL); 853 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 854 refcount_init(&host->nh_refs, 1); 855 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 856 host->nh_sysid = nlm_next_sysid++; 857 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 858 "%d", host->nh_sysid); 859 host->nh_vers = 0; 860 host->nh_state = 0; 861 host->nh_monstate = NLM_UNMONITORED; 862 host->nh_grantcookie = 1; 863 TAILQ_INIT(&host->nh_pending); 864 TAILQ_INIT(&host->nh_granted); 865 TAILQ_INIT(&host->nh_finished); 866 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 867 868 mtx_unlock(&nlm_global_lock); 869 870 sysctl_ctx_init(&host->nh_sysctl); 871 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 872 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 873 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD | CTLFLAG_MPSAFE, 874 NULL, ""); 875 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 876 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 877 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 878 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 879 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 880 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 881 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 882 "lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, host, 883 0, nlm_host_lock_count_sysctl, "I", ""); 884 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 885 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 886 host, 0, nlm_host_client_lock_count_sysctl, "I", ""); 887 888 mtx_lock(&nlm_global_lock); 889 890 return (host); 891 } 892 893 /* 894 * Acquire the next sysid for remote locks not handled by the NLM. 895 */ 896 uint32_t 897 nlm_acquire_next_sysid(void) 898 { 899 uint32_t next_sysid; 900 901 mtx_lock(&nlm_global_lock); 902 next_sysid = nlm_next_sysid++; 903 mtx_unlock(&nlm_global_lock); 904 return (next_sysid); 905 } 906 907 /* 908 * Return non-zero if the address parts of the two sockaddrs are the 909 * same. 910 */ 911 static int 912 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 913 { 914 const struct sockaddr_in *a4, *b4; 915 #ifdef INET6 916 const struct sockaddr_in6 *a6, *b6; 917 #endif 918 919 if (a->sa_family != b->sa_family) 920 return (FALSE); 921 922 switch (a->sa_family) { 923 case AF_INET: 924 a4 = (const struct sockaddr_in *) a; 925 b4 = (const struct sockaddr_in *) b; 926 return !memcmp(&a4->sin_addr, &b4->sin_addr, 927 sizeof(a4->sin_addr)); 928 #ifdef INET6 929 case AF_INET6: 930 a6 = (const struct sockaddr_in6 *) a; 931 b6 = (const struct sockaddr_in6 *) b; 932 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 933 sizeof(a6->sin6_addr)); 934 #endif 935 } 936 937 return (0); 938 } 939 940 /* 941 * Check for idle hosts and stop monitoring them. We could also free 942 * the host structure here, possibly after a larger timeout but that 943 * would require some care to avoid races with 944 * e.g. nlm_host_lock_count_sysctl. 945 */ 946 static void 947 nlm_check_idle(void) 948 { 949 struct nlm_host *host; 950 951 mtx_assert(&nlm_global_lock, MA_OWNED); 952 953 if (time_uptime <= nlm_next_idle_check) 954 return; 955 956 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 957 958 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 959 if (host->nh_monstate == NLM_MONITORED 960 && time_uptime > host->nh_idle_timeout) { 961 mtx_unlock(&nlm_global_lock); 962 if (lf_countlocks(host->nh_sysid) > 0 963 || lf_countlocks(NLM_SYSID_CLIENT 964 + host->nh_sysid)) { 965 host->nh_idle_timeout = 966 time_uptime + NLM_IDLE_TIMEOUT; 967 mtx_lock(&nlm_global_lock); 968 continue; 969 } 970 nlm_host_unmonitor(host); 971 mtx_lock(&nlm_global_lock); 972 } 973 } 974 } 975 976 /* 977 * Search for an existing NLM host that matches the given name 978 * (typically the caller_name element of an nlm4_lock). If none is 979 * found, create a new host. If 'addr' is non-NULL, record the remote 980 * address of the host so that we can call it back for async 981 * responses. If 'vers' is greater than zero then record the NLM 982 * program version to use to communicate with this client. 983 */ 984 struct nlm_host * 985 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 986 rpcvers_t vers) 987 { 988 struct nlm_host *host; 989 990 mtx_lock(&nlm_global_lock); 991 992 /* 993 * The remote host is determined by caller_name. 994 */ 995 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 996 if (!strcmp(host->nh_caller_name, name)) 997 break; 998 } 999 1000 if (!host) { 1001 host = nlm_create_host(name); 1002 if (!host) { 1003 mtx_unlock(&nlm_global_lock); 1004 return (NULL); 1005 } 1006 } 1007 refcount_acquire(&host->nh_refs); 1008 1009 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1010 1011 /* 1012 * If we have an address for the host, record it so that we 1013 * can send async replies etc. 1014 */ 1015 if (addr) { 1016 1017 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 1018 ("Strange remote transport address length")); 1019 1020 /* 1021 * If we have seen an address before and we currently 1022 * have an RPC client handle, make sure the address is 1023 * the same, otherwise discard the client handle. 1024 */ 1025 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1026 if (!nlm_compare_addr( 1027 (struct sockaddr *) &host->nh_addr, 1028 addr) 1029 || host->nh_vers != vers) { 1030 CLIENT *client; 1031 mtx_lock(&host->nh_lock); 1032 client = host->nh_srvrpc.nr_client; 1033 host->nh_srvrpc.nr_client = NULL; 1034 mtx_unlock(&host->nh_lock); 1035 if (client) { 1036 CLNT_RELEASE(client); 1037 } 1038 } 1039 } 1040 memcpy(&host->nh_addr, addr, addr->sa_len); 1041 host->nh_vers = vers; 1042 } 1043 1044 nlm_check_idle(); 1045 1046 mtx_unlock(&nlm_global_lock); 1047 1048 return (host); 1049 } 1050 1051 /* 1052 * Search for an existing NLM host that matches the given remote 1053 * address. If none is found, create a new host with the requested 1054 * address and remember 'vers' as the NLM protocol version to use for 1055 * that host. 1056 */ 1057 struct nlm_host * 1058 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1059 { 1060 /* 1061 * Fake up a name using inet_ntop. This buffer is 1062 * large enough for an IPv6 address. 1063 */ 1064 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1065 struct nlm_host *host; 1066 1067 switch (addr->sa_family) { 1068 case AF_INET: 1069 inet_ntop(AF_INET, 1070 &((const struct sockaddr_in *) addr)->sin_addr, 1071 tmp, sizeof tmp); 1072 break; 1073 #ifdef INET6 1074 case AF_INET6: 1075 inet_ntop(AF_INET6, 1076 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1077 tmp, sizeof tmp); 1078 break; 1079 #endif 1080 default: 1081 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1082 } 1083 1084 mtx_lock(&nlm_global_lock); 1085 1086 /* 1087 * The remote host is determined by caller_name. 1088 */ 1089 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1090 if (nlm_compare_addr(addr, 1091 (const struct sockaddr *) &host->nh_addr)) 1092 break; 1093 } 1094 1095 if (!host) { 1096 host = nlm_create_host(tmp); 1097 if (!host) { 1098 mtx_unlock(&nlm_global_lock); 1099 return (NULL); 1100 } 1101 memcpy(&host->nh_addr, addr, addr->sa_len); 1102 host->nh_vers = vers; 1103 } 1104 refcount_acquire(&host->nh_refs); 1105 1106 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1107 1108 nlm_check_idle(); 1109 1110 mtx_unlock(&nlm_global_lock); 1111 1112 return (host); 1113 } 1114 1115 /* 1116 * Find the NLM host that matches the value of 'sysid'. If none 1117 * exists, return NULL. 1118 */ 1119 static struct nlm_host * 1120 nlm_find_host_by_sysid(int sysid) 1121 { 1122 struct nlm_host *host; 1123 1124 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1125 if (host->nh_sysid == sysid) { 1126 refcount_acquire(&host->nh_refs); 1127 return (host); 1128 } 1129 } 1130 1131 return (NULL); 1132 } 1133 1134 void nlm_host_release(struct nlm_host *host) 1135 { 1136 if (refcount_release(&host->nh_refs)) { 1137 /* 1138 * Free the host 1139 */ 1140 nlm_host_destroy(host); 1141 } 1142 } 1143 1144 /* 1145 * Unregister this NLM host with the local NSM due to idleness. 1146 */ 1147 static void 1148 nlm_host_unmonitor(struct nlm_host *host) 1149 { 1150 mon_id smmonid; 1151 sm_stat_res smstat; 1152 struct timeval timo; 1153 enum clnt_stat stat; 1154 1155 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1156 host->nh_caller_name, host->nh_sysid); 1157 1158 /* 1159 * We put our assigned system ID value in the priv field to 1160 * make it simpler to find the host if we are notified of a 1161 * host restart. 1162 */ 1163 smmonid.mon_name = host->nh_caller_name; 1164 smmonid.my_id.my_name = "localhost"; 1165 smmonid.my_id.my_prog = NLM_PROG; 1166 smmonid.my_id.my_vers = NLM_SM; 1167 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1168 1169 timo.tv_sec = 25; 1170 timo.tv_usec = 0; 1171 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1172 (xdrproc_t) xdr_mon, &smmonid, 1173 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1174 1175 if (stat != RPC_SUCCESS) { 1176 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1177 return; 1178 } 1179 if (smstat.res_stat == stat_fail) { 1180 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1181 host->nh_caller_name); 1182 return; 1183 } 1184 1185 host->nh_monstate = NLM_UNMONITORED; 1186 } 1187 1188 /* 1189 * Register this NLM host with the local NSM so that we can be 1190 * notified if it reboots. 1191 */ 1192 void 1193 nlm_host_monitor(struct nlm_host *host, int state) 1194 { 1195 mon smmon; 1196 sm_stat_res smstat; 1197 struct timeval timo; 1198 enum clnt_stat stat; 1199 1200 if (state && !host->nh_state) { 1201 /* 1202 * This is the first time we have seen an NSM state 1203 * value for this host. We record it here to help 1204 * detect host reboots. 1205 */ 1206 host->nh_state = state; 1207 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1208 host->nh_caller_name, host->nh_sysid, state); 1209 } 1210 1211 mtx_lock(&host->nh_lock); 1212 if (host->nh_monstate != NLM_UNMONITORED) { 1213 mtx_unlock(&host->nh_lock); 1214 return; 1215 } 1216 host->nh_monstate = NLM_MONITORED; 1217 mtx_unlock(&host->nh_lock); 1218 1219 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1220 host->nh_caller_name, host->nh_sysid); 1221 1222 /* 1223 * We put our assigned system ID value in the priv field to 1224 * make it simpler to find the host if we are notified of a 1225 * host restart. 1226 */ 1227 smmon.mon_id.mon_name = host->nh_caller_name; 1228 smmon.mon_id.my_id.my_name = "localhost"; 1229 smmon.mon_id.my_id.my_prog = NLM_PROG; 1230 smmon.mon_id.my_id.my_vers = NLM_SM; 1231 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1232 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1233 1234 timo.tv_sec = 25; 1235 timo.tv_usec = 0; 1236 stat = CLNT_CALL(nlm_nsm, SM_MON, 1237 (xdrproc_t) xdr_mon, &smmon, 1238 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1239 1240 if (stat != RPC_SUCCESS) { 1241 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1242 return; 1243 } 1244 if (smstat.res_stat == stat_fail) { 1245 NLM_ERR("Local NSM refuses to monitor %s\n", 1246 host->nh_caller_name); 1247 mtx_lock(&host->nh_lock); 1248 host->nh_monstate = NLM_MONITOR_FAILED; 1249 mtx_unlock(&host->nh_lock); 1250 return; 1251 } 1252 1253 host->nh_monstate = NLM_MONITORED; 1254 } 1255 1256 /* 1257 * Return an RPC client handle that can be used to talk to the NLM 1258 * running on the given host. 1259 */ 1260 CLIENT * 1261 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1262 { 1263 struct nlm_rpc *rpc; 1264 CLIENT *client; 1265 1266 mtx_lock(&host->nh_lock); 1267 1268 if (isserver) 1269 rpc = &host->nh_srvrpc; 1270 else 1271 rpc = &host->nh_clntrpc; 1272 1273 /* 1274 * We can't hold onto RPC handles for too long - the async 1275 * call/reply protocol used by some NLM clients makes it hard 1276 * to tell when they change port numbers (e.g. after a 1277 * reboot). Note that if a client reboots while it isn't 1278 * holding any locks, it won't bother to notify us. We 1279 * expire the RPC handles after two minutes. 1280 */ 1281 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1282 client = rpc->nr_client; 1283 rpc->nr_client = NULL; 1284 mtx_unlock(&host->nh_lock); 1285 CLNT_RELEASE(client); 1286 mtx_lock(&host->nh_lock); 1287 } 1288 1289 if (!rpc->nr_client) { 1290 mtx_unlock(&host->nh_lock); 1291 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1292 NLM_PROG, host->nh_vers); 1293 mtx_lock(&host->nh_lock); 1294 1295 if (client) { 1296 if (rpc->nr_client) { 1297 mtx_unlock(&host->nh_lock); 1298 CLNT_DESTROY(client); 1299 mtx_lock(&host->nh_lock); 1300 } else { 1301 rpc->nr_client = client; 1302 rpc->nr_create_time = time_uptime; 1303 } 1304 } 1305 } 1306 1307 client = rpc->nr_client; 1308 if (client) 1309 CLNT_ACQUIRE(client); 1310 mtx_unlock(&host->nh_lock); 1311 1312 return (client); 1313 1314 } 1315 1316 int nlm_host_get_sysid(struct nlm_host *host) 1317 { 1318 1319 return (host->nh_sysid); 1320 } 1321 1322 int 1323 nlm_host_get_state(struct nlm_host *host) 1324 { 1325 1326 return (host->nh_state); 1327 } 1328 1329 void * 1330 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1331 { 1332 struct nlm_waiting_lock *nw; 1333 1334 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1335 nw->nw_lock = *lock; 1336 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1337 nw->nw_lock.fh.n_len); 1338 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1339 nw->nw_waiting = TRUE; 1340 nw->nw_vp = vp; 1341 mtx_lock(&nlm_global_lock); 1342 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1343 mtx_unlock(&nlm_global_lock); 1344 1345 return nw; 1346 } 1347 1348 void 1349 nlm_deregister_wait_lock(void *handle) 1350 { 1351 struct nlm_waiting_lock *nw = handle; 1352 1353 mtx_lock(&nlm_global_lock); 1354 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1355 mtx_unlock(&nlm_global_lock); 1356 1357 free(nw, M_NLM); 1358 } 1359 1360 int 1361 nlm_wait_lock(void *handle, int timo) 1362 { 1363 struct nlm_waiting_lock *nw = handle; 1364 int error, stops_deferred; 1365 1366 /* 1367 * If the granted message arrived before we got here, 1368 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1369 */ 1370 mtx_lock(&nlm_global_lock); 1371 error = 0; 1372 if (nw->nw_waiting) { 1373 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1374 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1375 sigallowstop(stops_deferred); 1376 } 1377 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1378 if (error) { 1379 /* 1380 * The granted message may arrive after the 1381 * interrupt/timeout but before we manage to lock the 1382 * mutex. Detect this by examining nw_lock. 1383 */ 1384 if (!nw->nw_waiting) 1385 error = 0; 1386 } else { 1387 /* 1388 * If nlm_cancel_wait is called, then error will be 1389 * zero but nw_waiting will still be TRUE. We 1390 * translate this into EINTR. 1391 */ 1392 if (nw->nw_waiting) 1393 error = EINTR; 1394 } 1395 mtx_unlock(&nlm_global_lock); 1396 1397 free(nw, M_NLM); 1398 1399 return (error); 1400 } 1401 1402 void 1403 nlm_cancel_wait(struct vnode *vp) 1404 { 1405 struct nlm_waiting_lock *nw; 1406 1407 mtx_lock(&nlm_global_lock); 1408 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1409 if (nw->nw_vp == vp) { 1410 wakeup(nw); 1411 } 1412 } 1413 mtx_unlock(&nlm_global_lock); 1414 } 1415 1416 /**********************************************************************/ 1417 1418 /* 1419 * Syscall interface with userland. 1420 */ 1421 1422 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1423 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1424 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1425 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1426 1427 static int 1428 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1429 { 1430 static rpcvers_t versions[] = { 1431 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1432 }; 1433 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1434 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1435 }; 1436 1437 SVCXPRT **xprts; 1438 char netid[16]; 1439 char uaddr[128]; 1440 struct netconfig *nconf; 1441 int i, j, error; 1442 1443 if (!addr_count) { 1444 NLM_ERR("NLM: no service addresses given - can't start server"); 1445 return (EINVAL); 1446 } 1447 1448 if (addr_count < 0 || addr_count > 256 ) { 1449 NLM_ERR("NLM: too many service addresses (%d) given, " 1450 "max 256 - can't start server\n", addr_count); 1451 return (EINVAL); 1452 } 1453 1454 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1455 for (i = 0; i < nitems(versions); i++) { 1456 for (j = 0; j < addr_count; j++) { 1457 /* 1458 * Create transports for the first version and 1459 * then just register everything else to the 1460 * same transports. 1461 */ 1462 if (i == 0) { 1463 char *up; 1464 1465 error = copyin(&addrs[2*j], &up, 1466 sizeof(char*)); 1467 if (error) 1468 goto out; 1469 error = copyinstr(up, netid, sizeof(netid), 1470 NULL); 1471 if (error) 1472 goto out; 1473 error = copyin(&addrs[2*j+1], &up, 1474 sizeof(char*)); 1475 if (error) 1476 goto out; 1477 error = copyinstr(up, uaddr, sizeof(uaddr), 1478 NULL); 1479 if (error) 1480 goto out; 1481 nconf = getnetconfigent(netid); 1482 if (!nconf) { 1483 NLM_ERR("Can't lookup netid %s\n", 1484 netid); 1485 error = EINVAL; 1486 goto out; 1487 } 1488 xprts[j] = svc_tp_create(pool, dispatchers[i], 1489 NLM_PROG, versions[i], uaddr, nconf); 1490 if (!xprts[j]) { 1491 NLM_ERR("NLM: unable to create " 1492 "(NLM_PROG, %d).\n", versions[i]); 1493 error = EINVAL; 1494 goto out; 1495 } 1496 freenetconfigent(nconf); 1497 } else { 1498 nconf = getnetconfigent(xprts[j]->xp_netid); 1499 rpcb_unset(NLM_PROG, versions[i], nconf); 1500 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1501 dispatchers[i], nconf)) { 1502 NLM_ERR("NLM: can't register " 1503 "(NLM_PROG, %d)\n", versions[i]); 1504 error = EINVAL; 1505 goto out; 1506 } 1507 } 1508 } 1509 } 1510 error = 0; 1511 out: 1512 for (j = 0; j < addr_count; j++) { 1513 if (xprts[j]) 1514 SVC_RELEASE(xprts[j]); 1515 } 1516 free(xprts, M_NLM); 1517 return (error); 1518 } 1519 1520 /* 1521 * Main server entry point. Contacts the local NSM to get its current 1522 * state and send SM_UNMON_ALL. Registers the NLM services and then 1523 * services requests. Does not return until the server is interrupted 1524 * by a signal. 1525 */ 1526 static int 1527 nlm_server_main(int addr_count, char **addrs) 1528 { 1529 struct thread *td = curthread; 1530 int error; 1531 SVCPOOL *pool = NULL; 1532 struct sockopt opt; 1533 int portlow; 1534 #ifdef INET6 1535 struct sockaddr_in6 sin6; 1536 #endif 1537 struct sockaddr_in sin; 1538 my_id id; 1539 sm_stat smstat; 1540 struct timeval timo; 1541 enum clnt_stat stat; 1542 struct nlm_host *host, *nhost; 1543 struct nlm_waiting_lock *nw; 1544 vop_advlock_t *old_nfs_advlock; 1545 vop_reclaim_t *old_nfs_reclaim; 1546 1547 if (nlm_is_running != 0) { 1548 NLM_ERR("NLM: can't start server - " 1549 "it appears to be running already\n"); 1550 return (EPERM); 1551 } 1552 1553 if (nlm_socket == NULL) { 1554 memset(&opt, 0, sizeof(opt)); 1555 1556 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1557 td->td_ucred, td); 1558 if (error) { 1559 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1560 error); 1561 return (error); 1562 } 1563 opt.sopt_dir = SOPT_SET; 1564 opt.sopt_level = IPPROTO_IP; 1565 opt.sopt_name = IP_PORTRANGE; 1566 portlow = IP_PORTRANGE_LOW; 1567 opt.sopt_val = &portlow; 1568 opt.sopt_valsize = sizeof(portlow); 1569 sosetopt(nlm_socket, &opt); 1570 1571 #ifdef INET6 1572 nlm_socket6 = NULL; 1573 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1574 td->td_ucred, td); 1575 if (error) { 1576 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1577 error); 1578 soclose(nlm_socket); 1579 nlm_socket = NULL; 1580 return (error); 1581 } 1582 opt.sopt_dir = SOPT_SET; 1583 opt.sopt_level = IPPROTO_IPV6; 1584 opt.sopt_name = IPV6_PORTRANGE; 1585 portlow = IPV6_PORTRANGE_LOW; 1586 opt.sopt_val = &portlow; 1587 opt.sopt_valsize = sizeof(portlow); 1588 sosetopt(nlm_socket6, &opt); 1589 #endif 1590 } 1591 1592 nlm_auth = authunix_create(curthread->td_ucred); 1593 1594 #ifdef INET6 1595 memset(&sin6, 0, sizeof(sin6)); 1596 sin6.sin6_len = sizeof(sin6); 1597 sin6.sin6_family = AF_INET6; 1598 sin6.sin6_addr = in6addr_loopback; 1599 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1600 if (!nlm_nsm) { 1601 #endif 1602 memset(&sin, 0, sizeof(sin)); 1603 sin.sin_len = sizeof(sin); 1604 sin.sin_family = AF_INET; 1605 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1606 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1607 SM_VERS); 1608 #ifdef INET6 1609 } 1610 #endif 1611 1612 if (!nlm_nsm) { 1613 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1614 error = EINVAL; 1615 goto out; 1616 } 1617 1618 pool = svcpool_create("NLM", NULL); 1619 1620 error = nlm_register_services(pool, addr_count, addrs); 1621 if (error) 1622 goto out; 1623 1624 memset(&id, 0, sizeof(id)); 1625 id.my_name = "NFS NLM"; 1626 1627 timo.tv_sec = 25; 1628 timo.tv_usec = 0; 1629 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1630 (xdrproc_t) xdr_my_id, &id, 1631 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1632 1633 if (stat != RPC_SUCCESS) { 1634 struct rpc_err err; 1635 1636 CLNT_GETERR(nlm_nsm, &err); 1637 NLM_ERR("NLM: unexpected error contacting NSM, " 1638 "stat=%d, errno=%d\n", stat, err.re_errno); 1639 error = EINVAL; 1640 goto out; 1641 } 1642 nlm_is_running = 1; 1643 1644 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1645 nlm_nsm_state = smstat.state; 1646 1647 old_nfs_advlock = nfs_advlock_p; 1648 nfs_advlock_p = nlm_advlock; 1649 old_nfs_reclaim = nfs_reclaim_p; 1650 nfs_reclaim_p = nlm_reclaim; 1651 1652 svc_run(pool); 1653 error = 0; 1654 1655 nfs_advlock_p = old_nfs_advlock; 1656 nfs_reclaim_p = old_nfs_reclaim; 1657 1658 out: 1659 nlm_is_running = 0; 1660 if (pool) 1661 svcpool_destroy(pool); 1662 1663 /* 1664 * We are finished communicating with the NSM. 1665 */ 1666 if (nlm_nsm) { 1667 CLNT_RELEASE(nlm_nsm); 1668 nlm_nsm = NULL; 1669 } 1670 1671 /* 1672 * Trash all the existing state so that if the server 1673 * restarts, it gets a clean slate. This is complicated by the 1674 * possibility that there may be other threads trying to make 1675 * client locking requests. 1676 * 1677 * First we fake a client reboot notification which will 1678 * cancel any pending async locks and purge remote lock state 1679 * from the local lock manager. We release the reference from 1680 * nlm_hosts to the host (which may remove it from the list 1681 * and free it). After this phase, the only entries in the 1682 * nlm_host list should be from other threads performing 1683 * client lock requests. 1684 */ 1685 mtx_lock(&nlm_global_lock); 1686 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1687 wakeup(nw); 1688 } 1689 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1690 mtx_unlock(&nlm_global_lock); 1691 nlm_host_notify(host, 0); 1692 nlm_host_release(host); 1693 mtx_lock(&nlm_global_lock); 1694 } 1695 mtx_unlock(&nlm_global_lock); 1696 1697 AUTH_DESTROY(nlm_auth); 1698 1699 return (error); 1700 } 1701 1702 int 1703 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1704 { 1705 int error; 1706 1707 error = priv_check(td, PRIV_NFS_LOCKD); 1708 if (error) 1709 return (error); 1710 1711 nlm_debug_level = uap->debug_level; 1712 nlm_grace_threshold = time_uptime + uap->grace_period; 1713 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1714 1715 return nlm_server_main(uap->addr_count, uap->addrs); 1716 } 1717 1718 /**********************************************************************/ 1719 1720 /* 1721 * NLM implementation details, called from the RPC stubs. 1722 */ 1723 1724 void 1725 nlm_sm_notify(struct nlm_sm_status *argp) 1726 { 1727 uint32_t sysid; 1728 struct nlm_host *host; 1729 1730 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1731 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1732 host = nlm_find_host_by_sysid(sysid); 1733 if (host) { 1734 nlm_host_notify(host, argp->state); 1735 nlm_host_release(host); 1736 } 1737 } 1738 1739 static void 1740 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1741 { 1742 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1743 } 1744 1745 struct vfs_state { 1746 struct mount *vs_mp; 1747 struct vnode *vs_vp; 1748 int vs_vnlocked; 1749 }; 1750 1751 static int 1752 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1753 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1754 { 1755 int error; 1756 uint64_t exflags; 1757 struct ucred *cred = NULL, *credanon = NULL; 1758 1759 memset(vs, 0, sizeof(*vs)); 1760 1761 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1762 if (!vs->vs_mp) { 1763 return (ESTALE); 1764 } 1765 1766 /* accmode == 0 means don't check, since it is an unlock. */ 1767 if (accmode != 0) { 1768 error = VFS_CHECKEXP(vs->vs_mp, 1769 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1770 NULL, NULL); 1771 if (error) 1772 goto out; 1773 1774 if (exflags & MNT_EXRDONLY || 1775 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1776 error = EROFS; 1777 goto out; 1778 } 1779 } 1780 1781 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1782 if (error) 1783 goto out; 1784 vs->vs_vnlocked = TRUE; 1785 1786 if (accmode != 0) { 1787 if (!svc_getcred(rqstp, &cred, NULL)) { 1788 error = EINVAL; 1789 goto out; 1790 } 1791 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1792 crfree(cred); 1793 cred = credanon; 1794 credanon = NULL; 1795 } 1796 1797 /* 1798 * Check cred. 1799 */ 1800 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1801 /* 1802 * If this failed and accmode != VWRITE, try again with 1803 * VWRITE to maintain backwards compatibility with the 1804 * old code that always used VWRITE. 1805 */ 1806 if (error != 0 && accmode != VWRITE) 1807 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1808 if (error) 1809 goto out; 1810 } 1811 1812 VOP_UNLOCK(vs->vs_vp); 1813 vs->vs_vnlocked = FALSE; 1814 1815 out: 1816 if (cred) 1817 crfree(cred); 1818 if (credanon) 1819 crfree(credanon); 1820 1821 return (error); 1822 } 1823 1824 static void 1825 nlm_release_vfs_state(struct vfs_state *vs) 1826 { 1827 1828 if (vs->vs_vp) { 1829 if (vs->vs_vnlocked) 1830 vput(vs->vs_vp); 1831 else 1832 vrele(vs->vs_vp); 1833 } 1834 if (vs->vs_mp) 1835 vfs_rel(vs->vs_mp); 1836 } 1837 1838 static nlm4_stats 1839 nlm_convert_error(int error) 1840 { 1841 1842 if (error == ESTALE) 1843 return nlm4_stale_fh; 1844 else if (error == EROFS) 1845 return nlm4_rofs; 1846 else 1847 return nlm4_failed; 1848 } 1849 1850 int 1851 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1852 CLIENT **rpcp) 1853 { 1854 fhandle_t fh; 1855 struct vfs_state vs; 1856 struct nlm_host *host, *bhost; 1857 int error, sysid; 1858 struct flock fl; 1859 accmode_t accmode; 1860 1861 memset(result, 0, sizeof(*result)); 1862 memset(&vs, 0, sizeof(vs)); 1863 1864 host = nlm_find_host_by_name(argp->alock.caller_name, 1865 svc_getrpccaller(rqstp), rqstp->rq_vers); 1866 if (!host) { 1867 result->stat.stat = nlm4_denied_nolocks; 1868 return (ENOMEM); 1869 } 1870 1871 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1872 host->nh_caller_name, host->nh_sysid); 1873 1874 nlm_check_expired_locks(host); 1875 sysid = host->nh_sysid; 1876 1877 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1878 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1879 1880 if (time_uptime < nlm_grace_threshold) { 1881 result->stat.stat = nlm4_denied_grace_period; 1882 goto out; 1883 } 1884 1885 accmode = argp->exclusive ? VWRITE : VREAD; 1886 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1887 if (error) { 1888 result->stat.stat = nlm_convert_error(error); 1889 goto out; 1890 } 1891 1892 fl.l_start = argp->alock.l_offset; 1893 fl.l_len = argp->alock.l_len; 1894 fl.l_pid = argp->alock.svid; 1895 fl.l_sysid = sysid; 1896 fl.l_whence = SEEK_SET; 1897 if (argp->exclusive) 1898 fl.l_type = F_WRLCK; 1899 else 1900 fl.l_type = F_RDLCK; 1901 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1902 if (error) { 1903 result->stat.stat = nlm4_failed; 1904 goto out; 1905 } 1906 1907 if (fl.l_type == F_UNLCK) { 1908 result->stat.stat = nlm4_granted; 1909 } else { 1910 result->stat.stat = nlm4_denied; 1911 result->stat.nlm4_testrply_u.holder.exclusive = 1912 (fl.l_type == F_WRLCK); 1913 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1914 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1915 if (bhost) { 1916 /* 1917 * We don't have any useful way of recording 1918 * the value of oh used in the original lock 1919 * request. Ideally, the test reply would have 1920 * a space for the owning host's name allowing 1921 * our caller's NLM to keep track. 1922 * 1923 * As far as I can see, Solaris uses an eight 1924 * byte structure for oh which contains a four 1925 * byte pid encoded in local byte order and 1926 * the first four bytes of the host 1927 * name. Linux uses a variable length string 1928 * 'pid@hostname' in ascii but doesn't even 1929 * return that in test replies. 1930 * 1931 * For the moment, return nothing in oh 1932 * (already zero'ed above). 1933 */ 1934 nlm_host_release(bhost); 1935 } 1936 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1937 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1938 } 1939 1940 out: 1941 nlm_release_vfs_state(&vs); 1942 if (rpcp) 1943 *rpcp = nlm_host_get_rpc(host, TRUE); 1944 nlm_host_release(host); 1945 return (0); 1946 } 1947 1948 int 1949 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1950 bool_t monitor, CLIENT **rpcp) 1951 { 1952 fhandle_t fh; 1953 struct vfs_state vs; 1954 struct nlm_host *host; 1955 int error, sysid; 1956 struct flock fl; 1957 accmode_t accmode; 1958 1959 memset(result, 0, sizeof(*result)); 1960 memset(&vs, 0, sizeof(vs)); 1961 1962 host = nlm_find_host_by_name(argp->alock.caller_name, 1963 svc_getrpccaller(rqstp), rqstp->rq_vers); 1964 if (!host) { 1965 result->stat.stat = nlm4_denied_nolocks; 1966 return (ENOMEM); 1967 } 1968 1969 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1970 host->nh_caller_name, host->nh_sysid); 1971 1972 if (monitor && host->nh_state && argp->state 1973 && host->nh_state != argp->state) { 1974 /* 1975 * The host rebooted without telling us. Trash its 1976 * locks. 1977 */ 1978 nlm_host_notify(host, argp->state); 1979 } 1980 1981 nlm_check_expired_locks(host); 1982 sysid = host->nh_sysid; 1983 1984 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1985 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1986 1987 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1988 result->stat.stat = nlm4_denied_grace_period; 1989 goto out; 1990 } 1991 1992 accmode = argp->exclusive ? VWRITE : VREAD; 1993 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1994 if (error) { 1995 result->stat.stat = nlm_convert_error(error); 1996 goto out; 1997 } 1998 1999 fl.l_start = argp->alock.l_offset; 2000 fl.l_len = argp->alock.l_len; 2001 fl.l_pid = argp->alock.svid; 2002 fl.l_sysid = sysid; 2003 fl.l_whence = SEEK_SET; 2004 if (argp->exclusive) 2005 fl.l_type = F_WRLCK; 2006 else 2007 fl.l_type = F_RDLCK; 2008 if (argp->block) { 2009 struct nlm_async_lock *af; 2010 CLIENT *client; 2011 struct nlm_grantcookie cookie; 2012 2013 /* 2014 * First, make sure we can contact the host's NLM. 2015 */ 2016 client = nlm_host_get_rpc(host, TRUE); 2017 if (!client) { 2018 result->stat.stat = nlm4_failed; 2019 goto out; 2020 } 2021 2022 /* 2023 * First we need to check and see if there is an 2024 * existing blocked lock that matches. This could be a 2025 * badly behaved client or an RPC re-send. If we find 2026 * one, just return nlm4_blocked. 2027 */ 2028 mtx_lock(&host->nh_lock); 2029 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2030 if (af->af_fl.l_start == fl.l_start 2031 && af->af_fl.l_len == fl.l_len 2032 && af->af_fl.l_pid == fl.l_pid 2033 && af->af_fl.l_type == fl.l_type) { 2034 break; 2035 } 2036 } 2037 if (!af) { 2038 cookie.ng_sysid = host->nh_sysid; 2039 cookie.ng_cookie = host->nh_grantcookie++; 2040 } 2041 mtx_unlock(&host->nh_lock); 2042 if (af) { 2043 CLNT_RELEASE(client); 2044 result->stat.stat = nlm4_blocked; 2045 goto out; 2046 } 2047 2048 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2049 M_WAITOK|M_ZERO); 2050 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2051 af->af_vp = vs.vs_vp; 2052 af->af_fl = fl; 2053 af->af_host = host; 2054 af->af_rpc = client; 2055 /* 2056 * We use M_RPC here so that we can xdr_free the thing 2057 * later. 2058 */ 2059 nlm_make_netobj(&af->af_granted.cookie, 2060 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2061 af->af_granted.exclusive = argp->exclusive; 2062 af->af_granted.alock.caller_name = 2063 strdup(argp->alock.caller_name, M_RPC); 2064 nlm_copy_netobj(&af->af_granted.alock.fh, 2065 &argp->alock.fh, M_RPC); 2066 nlm_copy_netobj(&af->af_granted.alock.oh, 2067 &argp->alock.oh, M_RPC); 2068 af->af_granted.alock.svid = argp->alock.svid; 2069 af->af_granted.alock.l_offset = argp->alock.l_offset; 2070 af->af_granted.alock.l_len = argp->alock.l_len; 2071 2072 /* 2073 * Put the entry on the pending list before calling 2074 * VOP_ADVLOCKASYNC. We do this in case the lock 2075 * request was blocked (returning EINPROGRESS) but 2076 * then granted before we manage to run again. The 2077 * client may receive the granted message before we 2078 * send our blocked reply but thats their problem. 2079 */ 2080 mtx_lock(&host->nh_lock); 2081 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2082 mtx_unlock(&host->nh_lock); 2083 2084 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2085 &af->af_task, &af->af_cookie); 2086 2087 /* 2088 * If the lock completed synchronously, just free the 2089 * tracking structure now. 2090 */ 2091 if (error != EINPROGRESS) { 2092 CLNT_RELEASE(af->af_rpc); 2093 mtx_lock(&host->nh_lock); 2094 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2095 mtx_unlock(&host->nh_lock); 2096 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2097 &af->af_granted); 2098 free(af, M_NLM); 2099 } else { 2100 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2101 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2102 /* 2103 * Don't vrele the vnode just yet - this must 2104 * wait until either the async callback 2105 * happens or the lock is cancelled. 2106 */ 2107 vs.vs_vp = NULL; 2108 } 2109 } else { 2110 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2111 } 2112 2113 if (error) { 2114 if (error == EINPROGRESS) { 2115 result->stat.stat = nlm4_blocked; 2116 } else if (error == EDEADLK) { 2117 result->stat.stat = nlm4_deadlck; 2118 } else if (error == EAGAIN) { 2119 result->stat.stat = nlm4_denied; 2120 } else { 2121 result->stat.stat = nlm4_failed; 2122 } 2123 } else { 2124 if (monitor) 2125 nlm_host_monitor(host, argp->state); 2126 result->stat.stat = nlm4_granted; 2127 } 2128 2129 out: 2130 nlm_release_vfs_state(&vs); 2131 if (rpcp) 2132 *rpcp = nlm_host_get_rpc(host, TRUE); 2133 nlm_host_release(host); 2134 return (0); 2135 } 2136 2137 int 2138 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2139 CLIENT **rpcp) 2140 { 2141 fhandle_t fh; 2142 struct vfs_state vs; 2143 struct nlm_host *host; 2144 int error, sysid; 2145 struct flock fl; 2146 struct nlm_async_lock *af; 2147 2148 memset(result, 0, sizeof(*result)); 2149 memset(&vs, 0, sizeof(vs)); 2150 2151 host = nlm_find_host_by_name(argp->alock.caller_name, 2152 svc_getrpccaller(rqstp), rqstp->rq_vers); 2153 if (!host) { 2154 result->stat.stat = nlm4_denied_nolocks; 2155 return (ENOMEM); 2156 } 2157 2158 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2159 host->nh_caller_name, host->nh_sysid); 2160 2161 nlm_check_expired_locks(host); 2162 sysid = host->nh_sysid; 2163 2164 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2165 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2166 2167 if (time_uptime < nlm_grace_threshold) { 2168 result->stat.stat = nlm4_denied_grace_period; 2169 goto out; 2170 } 2171 2172 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2173 if (error) { 2174 result->stat.stat = nlm_convert_error(error); 2175 goto out; 2176 } 2177 2178 fl.l_start = argp->alock.l_offset; 2179 fl.l_len = argp->alock.l_len; 2180 fl.l_pid = argp->alock.svid; 2181 fl.l_sysid = sysid; 2182 fl.l_whence = SEEK_SET; 2183 if (argp->exclusive) 2184 fl.l_type = F_WRLCK; 2185 else 2186 fl.l_type = F_RDLCK; 2187 2188 /* 2189 * First we need to try and find the async lock request - if 2190 * there isn't one, we give up and return nlm4_denied. 2191 */ 2192 mtx_lock(&host->nh_lock); 2193 2194 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2195 if (af->af_fl.l_start == fl.l_start 2196 && af->af_fl.l_len == fl.l_len 2197 && af->af_fl.l_pid == fl.l_pid 2198 && af->af_fl.l_type == fl.l_type) { 2199 break; 2200 } 2201 } 2202 2203 if (!af) { 2204 mtx_unlock(&host->nh_lock); 2205 result->stat.stat = nlm4_denied; 2206 goto out; 2207 } 2208 2209 error = nlm_cancel_async_lock(af); 2210 2211 if (error) { 2212 result->stat.stat = nlm4_denied; 2213 } else { 2214 result->stat.stat = nlm4_granted; 2215 } 2216 2217 mtx_unlock(&host->nh_lock); 2218 2219 out: 2220 nlm_release_vfs_state(&vs); 2221 if (rpcp) 2222 *rpcp = nlm_host_get_rpc(host, TRUE); 2223 nlm_host_release(host); 2224 return (0); 2225 } 2226 2227 int 2228 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2229 CLIENT **rpcp) 2230 { 2231 fhandle_t fh; 2232 struct vfs_state vs; 2233 struct nlm_host *host; 2234 int error, sysid; 2235 struct flock fl; 2236 2237 memset(result, 0, sizeof(*result)); 2238 memset(&vs, 0, sizeof(vs)); 2239 2240 host = nlm_find_host_by_name(argp->alock.caller_name, 2241 svc_getrpccaller(rqstp), rqstp->rq_vers); 2242 if (!host) { 2243 result->stat.stat = nlm4_denied_nolocks; 2244 return (ENOMEM); 2245 } 2246 2247 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2248 host->nh_caller_name, host->nh_sysid); 2249 2250 nlm_check_expired_locks(host); 2251 sysid = host->nh_sysid; 2252 2253 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2254 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2255 2256 if (time_uptime < nlm_grace_threshold) { 2257 result->stat.stat = nlm4_denied_grace_period; 2258 goto out; 2259 } 2260 2261 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2262 if (error) { 2263 result->stat.stat = nlm_convert_error(error); 2264 goto out; 2265 } 2266 2267 fl.l_start = argp->alock.l_offset; 2268 fl.l_len = argp->alock.l_len; 2269 fl.l_pid = argp->alock.svid; 2270 fl.l_sysid = sysid; 2271 fl.l_whence = SEEK_SET; 2272 fl.l_type = F_UNLCK; 2273 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2274 2275 /* 2276 * Ignore the error - there is no result code for failure, 2277 * only for grace period. 2278 */ 2279 result->stat.stat = nlm4_granted; 2280 2281 out: 2282 nlm_release_vfs_state(&vs); 2283 if (rpcp) 2284 *rpcp = nlm_host_get_rpc(host, TRUE); 2285 nlm_host_release(host); 2286 return (0); 2287 } 2288 2289 int 2290 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2291 2292 CLIENT **rpcp) 2293 { 2294 struct nlm_host *host; 2295 struct nlm_waiting_lock *nw; 2296 2297 memset(result, 0, sizeof(*result)); 2298 2299 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2300 if (!host) { 2301 result->stat.stat = nlm4_denied_nolocks; 2302 return (ENOMEM); 2303 } 2304 2305 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2306 result->stat.stat = nlm4_denied; 2307 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2308 2309 mtx_lock(&nlm_global_lock); 2310 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2311 if (!nw->nw_waiting) 2312 continue; 2313 if (argp->alock.svid == nw->nw_lock.svid 2314 && argp->alock.l_offset == nw->nw_lock.l_offset 2315 && argp->alock.l_len == nw->nw_lock.l_len 2316 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2317 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2318 nw->nw_lock.fh.n_len)) { 2319 nw->nw_waiting = FALSE; 2320 wakeup(nw); 2321 result->stat.stat = nlm4_granted; 2322 break; 2323 } 2324 } 2325 mtx_unlock(&nlm_global_lock); 2326 2327 out: 2328 if (rpcp) 2329 *rpcp = nlm_host_get_rpc(host, TRUE); 2330 nlm_host_release(host); 2331 return (0); 2332 } 2333 2334 void 2335 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2336 { 2337 struct nlm_host *host = NULL; 2338 struct nlm_async_lock *af = NULL; 2339 int error; 2340 2341 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2342 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2343 goto out; 2344 } 2345 2346 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2347 if (!host) { 2348 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2349 goto out; 2350 } 2351 2352 mtx_lock(&host->nh_lock); 2353 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2354 if (ng_cookie(&argp->cookie) == 2355 ng_cookie(&af->af_granted.cookie)) 2356 break; 2357 if (af) 2358 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2359 mtx_unlock(&host->nh_lock); 2360 2361 if (!af) { 2362 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2363 "with unrecognized cookie %d:%d", host->nh_caller_name, 2364 host->nh_sysid, ng_sysid(&argp->cookie), 2365 ng_cookie(&argp->cookie)); 2366 goto out; 2367 } 2368 2369 if (argp->stat.stat != nlm4_granted) { 2370 af->af_fl.l_type = F_UNLCK; 2371 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2372 if (error) { 2373 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2374 "and we failed to unlock (%d)", host->nh_caller_name, 2375 host->nh_sysid, error); 2376 goto out; 2377 } 2378 2379 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2380 af, host->nh_caller_name, host->nh_sysid); 2381 } else { 2382 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2383 af, host->nh_caller_name, host->nh_sysid); 2384 } 2385 2386 out: 2387 if (af) 2388 nlm_free_async_lock(af); 2389 if (host) 2390 nlm_host_release(host); 2391 } 2392 2393 void 2394 nlm_do_free_all(nlm4_notify *argp) 2395 { 2396 struct nlm_host *host, *thost; 2397 2398 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2399 if (!strcmp(host->nh_caller_name, argp->name)) 2400 nlm_host_notify(host, argp->state); 2401 } 2402 } 2403 2404 /* 2405 * Kernel module glue 2406 */ 2407 static int 2408 nfslockd_modevent(module_t mod, int type, void *data) 2409 { 2410 2411 switch (type) { 2412 case MOD_LOAD: 2413 return (nlm_init()); 2414 2415 case MOD_UNLOAD: 2416 nlm_uninit(); 2417 /* The NLM module cannot be safely unloaded. */ 2418 /* FALLTHROUGH */ 2419 default: 2420 return (EOPNOTSUPP); 2421 } 2422 } 2423 static moduledata_t nfslockd_mod = { 2424 "nfslockd", 2425 nfslockd_modevent, 2426 NULL, 2427 }; 2428 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2429 2430 /* So that loader and kldload(2) can find us, wherever we are.. */ 2431 MODULE_DEPEND(nfslockd, xdr, 1, 1, 1); 2432 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2433 MODULE_DEPEND(nfslockd, nfscommon, 1, 1, 1); 2434 MODULE_VERSION(nfslockd, 1); 2435