1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 5 * Authors: Doug Rabson <dfr@rabson.org> 6 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "opt_inet6.h" 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/fail.h> 37 #include <sys/fcntl.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/lockf.h> 41 #include <sys/malloc.h> 42 #include <sys/mount.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/syscall.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysent.h> 50 #include <sys/syslog.h> 51 #include <sys/sysproto.h> 52 #include <sys/systm.h> 53 #include <sys/taskqueue.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 57 #include <nfs/nfsproto.h> 58 #include <nfs/nfs_lock.h> 59 60 #include <nlm/nlm_prot.h> 61 #include <nlm/sm_inter.h> 62 #include <nlm/nlm.h> 63 #include <rpc/rpc_com.h> 64 #include <rpc/rpcb_prot.h> 65 66 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 67 68 /* 69 * If a host is inactive (and holds no locks) for this amount of 70 * seconds, we consider it idle and stop tracking it. 71 */ 72 #define NLM_IDLE_TIMEOUT 30 73 74 /* 75 * We check the host list for idle every few seconds. 76 */ 77 #define NLM_IDLE_PERIOD 5 78 79 /* 80 * We only look for GRANTED_RES messages for a little while. 81 */ 82 #define NLM_EXPIRE_TIMEOUT 10 83 84 /* 85 * Support for sysctl vfs.nlm.sysid 86 */ 87 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 88 "Network Lock Manager"); 89 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, 90 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 91 ""); 92 93 /* 94 * Syscall hooks 95 */ 96 static struct syscall_helper_data nlm_syscalls[] = { 97 SYSCALL_INIT_HELPER(nlm_syscall), 98 SYSCALL_INIT_LAST 99 }; 100 101 /* 102 * Debug level passed in from userland. We also support a sysctl hook 103 * so that it can be changed on a live system. 104 */ 105 static int nlm_debug_level; 106 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 107 108 #define NLM_DEBUG(_level, args...) \ 109 do { \ 110 if (nlm_debug_level >= (_level)) \ 111 log(LOG_DEBUG, args); \ 112 } while(0) 113 #define NLM_ERR(args...) \ 114 do { \ 115 log(LOG_ERR, args); \ 116 } while(0) 117 118 /* 119 * Grace period handling. The value of nlm_grace_threshold is the 120 * value of time_uptime after which we are serving requests normally. 121 */ 122 static time_t nlm_grace_threshold; 123 124 /* 125 * We check for idle hosts if time_uptime is greater than 126 * nlm_next_idle_check, 127 */ 128 static time_t nlm_next_idle_check; 129 130 /* 131 * A flag to indicate the server is already running. 132 */ 133 static int nlm_is_running; 134 135 /* 136 * A socket to use for RPC - shared by all IPv4 RPC clients. 137 */ 138 static struct socket *nlm_socket; 139 140 #ifdef INET6 141 142 /* 143 * A socket to use for RPC - shared by all IPv6 RPC clients. 144 */ 145 static struct socket *nlm_socket6; 146 147 #endif 148 149 /* 150 * An RPC client handle that can be used to communicate with the local 151 * NSM. 152 */ 153 static CLIENT *nlm_nsm; 154 155 /* 156 * An AUTH handle for the server's creds. 157 */ 158 static AUTH *nlm_auth; 159 160 /* 161 * A zero timeval for sending async RPC messages. 162 */ 163 struct timeval nlm_zero_tv = { 0, 0 }; 164 165 /* 166 * The local NSM state number 167 */ 168 int nlm_nsm_state; 169 170 171 /* 172 * A lock to protect the host list and waiting lock list. 173 */ 174 static struct mtx nlm_global_lock; 175 176 /* 177 * Locks: 178 * (l) locked by nh_lock 179 * (s) only accessed via server RPC which is single threaded 180 * (g) locked by nlm_global_lock 181 * (c) const until freeing 182 * (a) modified using atomic ops 183 */ 184 185 /* 186 * A pending client-side lock request, stored on the nlm_waiting_locks 187 * list. 188 */ 189 struct nlm_waiting_lock { 190 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 191 bool_t nw_waiting; /* (g) */ 192 nlm4_lock nw_lock; /* (c) */ 193 union nfsfh nw_fh; /* (c) */ 194 struct vnode *nw_vp; /* (c) */ 195 }; 196 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 197 198 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 199 200 /* 201 * A pending server-side asynchronous lock request, stored on the 202 * nh_pending list of the NLM host. 203 */ 204 struct nlm_async_lock { 205 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 206 struct task af_task; /* (c) async callback details */ 207 void *af_cookie; /* (l) lock manager cancel token */ 208 struct vnode *af_vp; /* (l) vnode to lock */ 209 struct flock af_fl; /* (c) lock details */ 210 struct nlm_host *af_host; /* (c) host which is locking */ 211 CLIENT *af_rpc; /* (c) rpc client to send message */ 212 nlm4_testargs af_granted; /* (c) notification details */ 213 time_t af_expiretime; /* (c) notification time */ 214 }; 215 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 216 217 /* 218 * NLM host. 219 */ 220 enum nlm_host_state { 221 NLM_UNMONITORED, 222 NLM_MONITORED, 223 NLM_MONITOR_FAILED, 224 NLM_RECOVERING 225 }; 226 227 struct nlm_rpc { 228 CLIENT *nr_client; /* (l) RPC client handle */ 229 time_t nr_create_time; /* (l) when client was created */ 230 }; 231 232 struct nlm_host { 233 struct mtx nh_lock; 234 volatile u_int nh_refs; /* (a) reference count */ 235 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 236 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 237 uint32_t nh_sysid; /* (c) our allocaed system ID */ 238 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 239 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 240 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 241 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 242 rpcvers_t nh_vers; /* (s) NLM version of host */ 243 int nh_state; /* (s) last seen NSM state of host */ 244 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 245 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 246 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 247 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 248 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 249 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 250 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 251 }; 252 TAILQ_HEAD(nlm_host_list, nlm_host); 253 254 static struct nlm_host_list nlm_hosts; /* (g) */ 255 static uint32_t nlm_next_sysid = 1; /* (g) */ 256 257 static void nlm_host_unmonitor(struct nlm_host *); 258 259 struct nlm_grantcookie { 260 uint32_t ng_sysid; 261 uint32_t ng_cookie; 262 }; 263 264 static inline uint32_t 265 ng_sysid(struct netobj *src) 266 { 267 268 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 269 } 270 271 static inline uint32_t 272 ng_cookie(struct netobj *src) 273 { 274 275 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 276 } 277 278 /**********************************************************************/ 279 280 /* 281 * Initialise NLM globals. 282 */ 283 static int 284 nlm_init(void) 285 { 286 int error; 287 288 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 289 TAILQ_INIT(&nlm_waiting_locks); 290 TAILQ_INIT(&nlm_hosts); 291 292 error = syscall_helper_register(nlm_syscalls, SY_THR_STATIC_KLD); 293 if (error != 0) 294 NLM_ERR("Can't register NLM syscall\n"); 295 return (error); 296 } 297 298 static void 299 nlm_uninit(void) 300 { 301 302 syscall_helper_unregister(nlm_syscalls); 303 } 304 305 /* 306 * Create a netobj from an arbitrary source. 307 */ 308 void 309 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 310 struct malloc_type *type) 311 { 312 313 dst->n_len = srcsize; 314 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 315 memcpy(dst->n_bytes, src, srcsize); 316 } 317 318 /* 319 * Copy a struct netobj. 320 */ 321 void 322 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 323 struct malloc_type *type) 324 { 325 326 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 327 } 328 329 330 /* 331 * Create an RPC client handle for the given (address,prog,vers) 332 * triple using UDP. 333 */ 334 static CLIENT * 335 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 336 { 337 char *wchan = "nlmrcv"; 338 struct sockaddr_storage ss; 339 struct socket *so; 340 CLIENT *rpcb; 341 struct timeval timo; 342 RPCB parms; 343 char *uaddr; 344 enum clnt_stat stat = RPC_SUCCESS; 345 int rpcvers = RPCBVERS4; 346 bool_t do_tcp = FALSE; 347 bool_t tryagain = FALSE; 348 struct portmap mapping; 349 u_short port = 0; 350 351 /* 352 * First we need to contact the remote RPCBIND service to find 353 * the right port. 354 */ 355 memcpy(&ss, sa, sa->sa_len); 356 switch (ss.ss_family) { 357 case AF_INET: 358 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 359 so = nlm_socket; 360 break; 361 #ifdef INET6 362 case AF_INET6: 363 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 364 so = nlm_socket6; 365 break; 366 #endif 367 368 default: 369 /* 370 * Unsupported address family - fail. 371 */ 372 return (NULL); 373 } 374 375 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 376 RPCBPROG, rpcvers, 0, 0); 377 if (!rpcb) 378 return (NULL); 379 380 try_tcp: 381 parms.r_prog = prog; 382 parms.r_vers = vers; 383 if (do_tcp) 384 parms.r_netid = "tcp"; 385 else 386 parms.r_netid = "udp"; 387 parms.r_addr = ""; 388 parms.r_owner = ""; 389 390 /* 391 * Use the default timeout. 392 */ 393 timo.tv_sec = 25; 394 timo.tv_usec = 0; 395 again: 396 switch (rpcvers) { 397 case RPCBVERS4: 398 case RPCBVERS: 399 /* 400 * Try RPCBIND 4 then 3. 401 */ 402 uaddr = NULL; 403 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 404 (xdrproc_t) xdr_rpcb, &parms, 405 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 406 if (stat == RPC_SUCCESS) { 407 /* 408 * We have a reply from the remote RPCBIND - turn it 409 * into an appropriate address and make a new client 410 * that can talk to the remote NLM. 411 * 412 * XXX fixup IPv6 scope ID. 413 */ 414 struct netbuf *a; 415 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 416 if (!a) { 417 tryagain = TRUE; 418 } else { 419 tryagain = FALSE; 420 memcpy(&ss, a->buf, a->len); 421 free(a->buf, M_RPC); 422 free(a, M_RPC); 423 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 424 } 425 } 426 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 427 if (rpcvers == RPCBVERS4) 428 rpcvers = RPCBVERS; 429 else if (rpcvers == RPCBVERS) 430 rpcvers = PMAPVERS; 431 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 432 goto again; 433 } 434 break; 435 case PMAPVERS: 436 /* 437 * Try portmap. 438 */ 439 mapping.pm_prog = parms.r_prog; 440 mapping.pm_vers = parms.r_vers; 441 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 442 mapping.pm_port = 0; 443 444 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 445 (xdrproc_t) xdr_portmap, &mapping, 446 (xdrproc_t) xdr_u_short, &port, timo); 447 448 if (stat == RPC_SUCCESS) { 449 switch (ss.ss_family) { 450 case AF_INET: 451 ((struct sockaddr_in *)&ss)->sin_port = 452 htons(port); 453 break; 454 455 #ifdef INET6 456 case AF_INET6: 457 ((struct sockaddr_in6 *)&ss)->sin6_port = 458 htons(port); 459 break; 460 #endif 461 } 462 } 463 break; 464 default: 465 panic("invalid rpcvers %d", rpcvers); 466 } 467 /* 468 * We may have a positive response from the portmapper, but the NLM 469 * service was not found. Make sure we received a valid port. 470 */ 471 switch (ss.ss_family) { 472 case AF_INET: 473 port = ((struct sockaddr_in *)&ss)->sin_port; 474 break; 475 #ifdef INET6 476 case AF_INET6: 477 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 478 break; 479 #endif 480 } 481 if (stat != RPC_SUCCESS || !port) { 482 /* 483 * If we were able to talk to rpcbind or portmap, but the udp 484 * variant wasn't available, ask about tcp. 485 * 486 * XXX - We could also check for a TCP portmapper, but 487 * if the host is running a portmapper at all, we should be able 488 * to hail it over UDP. 489 */ 490 if (stat == RPC_SUCCESS && !do_tcp) { 491 do_tcp = TRUE; 492 goto try_tcp; 493 } 494 495 /* Otherwise, bad news. */ 496 NLM_ERR("NLM: failed to contact remote rpcbind, " 497 "stat = %d, port = %d\n", (int) stat, port); 498 CLNT_DESTROY(rpcb); 499 return (NULL); 500 } 501 502 if (do_tcp) { 503 /* 504 * Destroy the UDP client we used to speak to rpcbind and 505 * recreate as a TCP client. 506 */ 507 struct netconfig *nconf = NULL; 508 509 CLNT_DESTROY(rpcb); 510 511 switch (ss.ss_family) { 512 case AF_INET: 513 nconf = getnetconfigent("tcp"); 514 break; 515 #ifdef INET6 516 case AF_INET6: 517 nconf = getnetconfigent("tcp6"); 518 break; 519 #endif 520 } 521 522 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 523 prog, vers, 0, 0); 524 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 525 rpcb->cl_auth = nlm_auth; 526 527 } else { 528 /* 529 * Re-use the client we used to speak to rpcbind. 530 */ 531 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 532 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 533 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 534 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 535 rpcb->cl_auth = nlm_auth; 536 } 537 538 return (rpcb); 539 } 540 541 /* 542 * This async callback after when an async lock request has been 543 * granted. We notify the host which initiated the request. 544 */ 545 static void 546 nlm_lock_callback(void *arg, int pending) 547 { 548 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 549 struct rpc_callextra ext; 550 551 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 552 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 553 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 554 ng_cookie(&af->af_granted.cookie)); 555 556 /* 557 * Send the results back to the host. 558 * 559 * Note: there is a possible race here with nlm_host_notify 560 * destroying the RPC client. To avoid problems, the first 561 * thing nlm_host_notify does is to cancel pending async lock 562 * requests. 563 */ 564 memset(&ext, 0, sizeof(ext)); 565 ext.rc_auth = nlm_auth; 566 if (af->af_host->nh_vers == NLM_VERS4) { 567 nlm4_granted_msg_4(&af->af_granted, 568 NULL, af->af_rpc, &ext, nlm_zero_tv); 569 } else { 570 /* 571 * Back-convert to legacy protocol 572 */ 573 nlm_testargs granted; 574 granted.cookie = af->af_granted.cookie; 575 granted.exclusive = af->af_granted.exclusive; 576 granted.alock.caller_name = 577 af->af_granted.alock.caller_name; 578 granted.alock.fh = af->af_granted.alock.fh; 579 granted.alock.oh = af->af_granted.alock.oh; 580 granted.alock.svid = af->af_granted.alock.svid; 581 granted.alock.l_offset = 582 af->af_granted.alock.l_offset; 583 granted.alock.l_len = 584 af->af_granted.alock.l_len; 585 586 nlm_granted_msg_1(&granted, 587 NULL, af->af_rpc, &ext, nlm_zero_tv); 588 } 589 590 /* 591 * Move this entry to the nh_granted list. 592 */ 593 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 594 mtx_lock(&af->af_host->nh_lock); 595 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 596 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 597 mtx_unlock(&af->af_host->nh_lock); 598 } 599 600 /* 601 * Free an async lock request. The request must have been removed from 602 * any list. 603 */ 604 static void 605 nlm_free_async_lock(struct nlm_async_lock *af) 606 { 607 /* 608 * Free an async lock. 609 */ 610 if (af->af_rpc) 611 CLNT_RELEASE(af->af_rpc); 612 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 613 if (af->af_vp) 614 vrele(af->af_vp); 615 free(af, M_NLM); 616 } 617 618 /* 619 * Cancel our async request - this must be called with 620 * af->nh_host->nh_lock held. This is slightly complicated by a 621 * potential race with our own callback. If we fail to cancel the 622 * lock, it must already have been granted - we make sure our async 623 * task has completed by calling taskqueue_drain in this case. 624 */ 625 static int 626 nlm_cancel_async_lock(struct nlm_async_lock *af) 627 { 628 struct nlm_host *host = af->af_host; 629 int error; 630 631 mtx_assert(&host->nh_lock, MA_OWNED); 632 633 mtx_unlock(&host->nh_lock); 634 635 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 636 F_REMOTE, NULL, &af->af_cookie); 637 638 if (error) { 639 /* 640 * We failed to cancel - make sure our callback has 641 * completed before we continue. 642 */ 643 taskqueue_drain(taskqueue_thread, &af->af_task); 644 } 645 646 mtx_lock(&host->nh_lock); 647 648 if (!error) { 649 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 650 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 651 652 /* 653 * Remove from the nh_pending list and free now that 654 * we are safe from the callback. 655 */ 656 TAILQ_REMOVE(&host->nh_pending, af, af_link); 657 mtx_unlock(&host->nh_lock); 658 nlm_free_async_lock(af); 659 mtx_lock(&host->nh_lock); 660 } 661 662 return (error); 663 } 664 665 static void 666 nlm_check_expired_locks(struct nlm_host *host) 667 { 668 struct nlm_async_lock *af; 669 time_t uptime = time_uptime; 670 671 mtx_lock(&host->nh_lock); 672 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 673 && uptime >= af->af_expiretime) { 674 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 675 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 676 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 677 ng_cookie(&af->af_granted.cookie)); 678 TAILQ_REMOVE(&host->nh_granted, af, af_link); 679 mtx_unlock(&host->nh_lock); 680 nlm_free_async_lock(af); 681 mtx_lock(&host->nh_lock); 682 } 683 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 684 TAILQ_REMOVE(&host->nh_finished, af, af_link); 685 mtx_unlock(&host->nh_lock); 686 nlm_free_async_lock(af); 687 mtx_lock(&host->nh_lock); 688 } 689 mtx_unlock(&host->nh_lock); 690 } 691 692 /* 693 * Free resources used by a host. This is called after the reference 694 * count has reached zero so it doesn't need to worry about locks. 695 */ 696 static void 697 nlm_host_destroy(struct nlm_host *host) 698 { 699 700 mtx_lock(&nlm_global_lock); 701 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 702 mtx_unlock(&nlm_global_lock); 703 704 if (host->nh_srvrpc.nr_client) 705 CLNT_RELEASE(host->nh_srvrpc.nr_client); 706 if (host->nh_clntrpc.nr_client) 707 CLNT_RELEASE(host->nh_clntrpc.nr_client); 708 mtx_destroy(&host->nh_lock); 709 sysctl_ctx_free(&host->nh_sysctl); 710 free(host, M_NLM); 711 } 712 713 /* 714 * Thread start callback for client lock recovery 715 */ 716 static void 717 nlm_client_recovery_start(void *arg) 718 { 719 struct nlm_host *host = (struct nlm_host *) arg; 720 721 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 722 host->nh_caller_name); 723 724 nlm_client_recovery(host); 725 726 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 727 host->nh_caller_name); 728 729 host->nh_monstate = NLM_MONITORED; 730 nlm_host_release(host); 731 732 kthread_exit(); 733 } 734 735 /* 736 * This is called when we receive a host state change notification. We 737 * unlock any active locks owned by the host. When rpc.lockd is 738 * shutting down, this function is called with newstate set to zero 739 * which allows us to cancel any pending async locks and clear the 740 * locking state. 741 */ 742 static void 743 nlm_host_notify(struct nlm_host *host, int newstate) 744 { 745 struct nlm_async_lock *af; 746 747 if (newstate) { 748 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 749 "state is %d\n", host->nh_caller_name, 750 host->nh_sysid, newstate); 751 } 752 753 /* 754 * Cancel any pending async locks for this host. 755 */ 756 mtx_lock(&host->nh_lock); 757 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 758 /* 759 * nlm_cancel_async_lock will remove the entry from 760 * nh_pending and free it. 761 */ 762 nlm_cancel_async_lock(af); 763 } 764 mtx_unlock(&host->nh_lock); 765 nlm_check_expired_locks(host); 766 767 /* 768 * The host just rebooted - trash its locks. 769 */ 770 lf_clearremotesys(host->nh_sysid); 771 host->nh_state = newstate; 772 773 /* 774 * If we have any remote locks for this host (i.e. it 775 * represents a remote NFS server that our local NFS client 776 * has locks for), start a recovery thread. 777 */ 778 if (newstate != 0 779 && host->nh_monstate != NLM_RECOVERING 780 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 781 struct thread *td; 782 host->nh_monstate = NLM_RECOVERING; 783 refcount_acquire(&host->nh_refs); 784 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 785 "NFS lock recovery for %s", host->nh_caller_name); 786 } 787 } 788 789 /* 790 * Sysctl handler to count the number of locks for a sysid. 791 */ 792 static int 793 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 794 { 795 struct nlm_host *host; 796 int count; 797 798 host = oidp->oid_arg1; 799 count = lf_countlocks(host->nh_sysid); 800 return sysctl_handle_int(oidp, &count, 0, req); 801 } 802 803 /* 804 * Sysctl handler to count the number of client locks for a sysid. 805 */ 806 static int 807 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 808 { 809 struct nlm_host *host; 810 int count; 811 812 host = oidp->oid_arg1; 813 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 814 return sysctl_handle_int(oidp, &count, 0, req); 815 } 816 817 /* 818 * Create a new NLM host. 819 */ 820 static struct nlm_host * 821 nlm_create_host(const char* caller_name) 822 { 823 struct nlm_host *host; 824 struct sysctl_oid *oid; 825 826 mtx_assert(&nlm_global_lock, MA_OWNED); 827 828 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 829 caller_name, nlm_next_sysid); 830 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 831 if (!host) 832 return (NULL); 833 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 834 host->nh_refs = 1; 835 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 836 host->nh_sysid = nlm_next_sysid++; 837 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 838 "%d", host->nh_sysid); 839 host->nh_vers = 0; 840 host->nh_state = 0; 841 host->nh_monstate = NLM_UNMONITORED; 842 host->nh_grantcookie = 1; 843 TAILQ_INIT(&host->nh_pending); 844 TAILQ_INIT(&host->nh_granted); 845 TAILQ_INIT(&host->nh_finished); 846 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 847 848 mtx_unlock(&nlm_global_lock); 849 850 sysctl_ctx_init(&host->nh_sysctl); 851 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 852 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 853 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD | CTLFLAG_MPSAFE, 854 NULL, ""); 855 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 856 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 857 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 858 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 859 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 860 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 861 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 862 "lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, host, 863 0, nlm_host_lock_count_sysctl, "I", ""); 864 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 865 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 866 host, 0, nlm_host_client_lock_count_sysctl, "I", ""); 867 868 mtx_lock(&nlm_global_lock); 869 870 return (host); 871 } 872 873 /* 874 * Acquire the next sysid for remote locks not handled by the NLM. 875 */ 876 uint32_t 877 nlm_acquire_next_sysid(void) 878 { 879 uint32_t next_sysid; 880 881 mtx_lock(&nlm_global_lock); 882 next_sysid = nlm_next_sysid++; 883 mtx_unlock(&nlm_global_lock); 884 return (next_sysid); 885 } 886 887 /* 888 * Return non-zero if the address parts of the two sockaddrs are the 889 * same. 890 */ 891 static int 892 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 893 { 894 const struct sockaddr_in *a4, *b4; 895 #ifdef INET6 896 const struct sockaddr_in6 *a6, *b6; 897 #endif 898 899 if (a->sa_family != b->sa_family) 900 return (FALSE); 901 902 switch (a->sa_family) { 903 case AF_INET: 904 a4 = (const struct sockaddr_in *) a; 905 b4 = (const struct sockaddr_in *) b; 906 return !memcmp(&a4->sin_addr, &b4->sin_addr, 907 sizeof(a4->sin_addr)); 908 #ifdef INET6 909 case AF_INET6: 910 a6 = (const struct sockaddr_in6 *) a; 911 b6 = (const struct sockaddr_in6 *) b; 912 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 913 sizeof(a6->sin6_addr)); 914 #endif 915 } 916 917 return (0); 918 } 919 920 /* 921 * Check for idle hosts and stop monitoring them. We could also free 922 * the host structure here, possibly after a larger timeout but that 923 * would require some care to avoid races with 924 * e.g. nlm_host_lock_count_sysctl. 925 */ 926 static void 927 nlm_check_idle(void) 928 { 929 struct nlm_host *host; 930 931 mtx_assert(&nlm_global_lock, MA_OWNED); 932 933 if (time_uptime <= nlm_next_idle_check) 934 return; 935 936 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 937 938 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 939 if (host->nh_monstate == NLM_MONITORED 940 && time_uptime > host->nh_idle_timeout) { 941 mtx_unlock(&nlm_global_lock); 942 if (lf_countlocks(host->nh_sysid) > 0 943 || lf_countlocks(NLM_SYSID_CLIENT 944 + host->nh_sysid)) { 945 host->nh_idle_timeout = 946 time_uptime + NLM_IDLE_TIMEOUT; 947 mtx_lock(&nlm_global_lock); 948 continue; 949 } 950 nlm_host_unmonitor(host); 951 mtx_lock(&nlm_global_lock); 952 } 953 } 954 } 955 956 /* 957 * Search for an existing NLM host that matches the given name 958 * (typically the caller_name element of an nlm4_lock). If none is 959 * found, create a new host. If 'addr' is non-NULL, record the remote 960 * address of the host so that we can call it back for async 961 * responses. If 'vers' is greater than zero then record the NLM 962 * program version to use to communicate with this client. 963 */ 964 struct nlm_host * 965 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 966 rpcvers_t vers) 967 { 968 struct nlm_host *host; 969 970 mtx_lock(&nlm_global_lock); 971 972 /* 973 * The remote host is determined by caller_name. 974 */ 975 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 976 if (!strcmp(host->nh_caller_name, name)) 977 break; 978 } 979 980 if (!host) { 981 host = nlm_create_host(name); 982 if (!host) { 983 mtx_unlock(&nlm_global_lock); 984 return (NULL); 985 } 986 } 987 refcount_acquire(&host->nh_refs); 988 989 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 990 991 /* 992 * If we have an address for the host, record it so that we 993 * can send async replies etc. 994 */ 995 if (addr) { 996 997 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 998 ("Strange remote transport address length")); 999 1000 /* 1001 * If we have seen an address before and we currently 1002 * have an RPC client handle, make sure the address is 1003 * the same, otherwise discard the client handle. 1004 */ 1005 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1006 if (!nlm_compare_addr( 1007 (struct sockaddr *) &host->nh_addr, 1008 addr) 1009 || host->nh_vers != vers) { 1010 CLIENT *client; 1011 mtx_lock(&host->nh_lock); 1012 client = host->nh_srvrpc.nr_client; 1013 host->nh_srvrpc.nr_client = NULL; 1014 mtx_unlock(&host->nh_lock); 1015 if (client) { 1016 CLNT_RELEASE(client); 1017 } 1018 } 1019 } 1020 memcpy(&host->nh_addr, addr, addr->sa_len); 1021 host->nh_vers = vers; 1022 } 1023 1024 nlm_check_idle(); 1025 1026 mtx_unlock(&nlm_global_lock); 1027 1028 return (host); 1029 } 1030 1031 /* 1032 * Search for an existing NLM host that matches the given remote 1033 * address. If none is found, create a new host with the requested 1034 * address and remember 'vers' as the NLM protocol version to use for 1035 * that host. 1036 */ 1037 struct nlm_host * 1038 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1039 { 1040 /* 1041 * Fake up a name using inet_ntop. This buffer is 1042 * large enough for an IPv6 address. 1043 */ 1044 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1045 struct nlm_host *host; 1046 1047 switch (addr->sa_family) { 1048 case AF_INET: 1049 inet_ntop(AF_INET, 1050 &((const struct sockaddr_in *) addr)->sin_addr, 1051 tmp, sizeof tmp); 1052 break; 1053 #ifdef INET6 1054 case AF_INET6: 1055 inet_ntop(AF_INET6, 1056 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1057 tmp, sizeof tmp); 1058 break; 1059 #endif 1060 default: 1061 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1062 } 1063 1064 1065 mtx_lock(&nlm_global_lock); 1066 1067 /* 1068 * The remote host is determined by caller_name. 1069 */ 1070 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1071 if (nlm_compare_addr(addr, 1072 (const struct sockaddr *) &host->nh_addr)) 1073 break; 1074 } 1075 1076 if (!host) { 1077 host = nlm_create_host(tmp); 1078 if (!host) { 1079 mtx_unlock(&nlm_global_lock); 1080 return (NULL); 1081 } 1082 memcpy(&host->nh_addr, addr, addr->sa_len); 1083 host->nh_vers = vers; 1084 } 1085 refcount_acquire(&host->nh_refs); 1086 1087 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1088 1089 nlm_check_idle(); 1090 1091 mtx_unlock(&nlm_global_lock); 1092 1093 return (host); 1094 } 1095 1096 /* 1097 * Find the NLM host that matches the value of 'sysid'. If none 1098 * exists, return NULL. 1099 */ 1100 static struct nlm_host * 1101 nlm_find_host_by_sysid(int sysid) 1102 { 1103 struct nlm_host *host; 1104 1105 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1106 if (host->nh_sysid == sysid) { 1107 refcount_acquire(&host->nh_refs); 1108 return (host); 1109 } 1110 } 1111 1112 return (NULL); 1113 } 1114 1115 void nlm_host_release(struct nlm_host *host) 1116 { 1117 if (refcount_release(&host->nh_refs)) { 1118 /* 1119 * Free the host 1120 */ 1121 nlm_host_destroy(host); 1122 } 1123 } 1124 1125 /* 1126 * Unregister this NLM host with the local NSM due to idleness. 1127 */ 1128 static void 1129 nlm_host_unmonitor(struct nlm_host *host) 1130 { 1131 mon_id smmonid; 1132 sm_stat_res smstat; 1133 struct timeval timo; 1134 enum clnt_stat stat; 1135 1136 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1137 host->nh_caller_name, host->nh_sysid); 1138 1139 /* 1140 * We put our assigned system ID value in the priv field to 1141 * make it simpler to find the host if we are notified of a 1142 * host restart. 1143 */ 1144 smmonid.mon_name = host->nh_caller_name; 1145 smmonid.my_id.my_name = "localhost"; 1146 smmonid.my_id.my_prog = NLM_PROG; 1147 smmonid.my_id.my_vers = NLM_SM; 1148 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1149 1150 timo.tv_sec = 25; 1151 timo.tv_usec = 0; 1152 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1153 (xdrproc_t) xdr_mon, &smmonid, 1154 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1155 1156 if (stat != RPC_SUCCESS) { 1157 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1158 return; 1159 } 1160 if (smstat.res_stat == stat_fail) { 1161 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1162 host->nh_caller_name); 1163 return; 1164 } 1165 1166 host->nh_monstate = NLM_UNMONITORED; 1167 } 1168 1169 /* 1170 * Register this NLM host with the local NSM so that we can be 1171 * notified if it reboots. 1172 */ 1173 void 1174 nlm_host_monitor(struct nlm_host *host, int state) 1175 { 1176 mon smmon; 1177 sm_stat_res smstat; 1178 struct timeval timo; 1179 enum clnt_stat stat; 1180 1181 if (state && !host->nh_state) { 1182 /* 1183 * This is the first time we have seen an NSM state 1184 * value for this host. We record it here to help 1185 * detect host reboots. 1186 */ 1187 host->nh_state = state; 1188 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1189 host->nh_caller_name, host->nh_sysid, state); 1190 } 1191 1192 mtx_lock(&host->nh_lock); 1193 if (host->nh_monstate != NLM_UNMONITORED) { 1194 mtx_unlock(&host->nh_lock); 1195 return; 1196 } 1197 host->nh_monstate = NLM_MONITORED; 1198 mtx_unlock(&host->nh_lock); 1199 1200 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1201 host->nh_caller_name, host->nh_sysid); 1202 1203 /* 1204 * We put our assigned system ID value in the priv field to 1205 * make it simpler to find the host if we are notified of a 1206 * host restart. 1207 */ 1208 smmon.mon_id.mon_name = host->nh_caller_name; 1209 smmon.mon_id.my_id.my_name = "localhost"; 1210 smmon.mon_id.my_id.my_prog = NLM_PROG; 1211 smmon.mon_id.my_id.my_vers = NLM_SM; 1212 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1213 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1214 1215 timo.tv_sec = 25; 1216 timo.tv_usec = 0; 1217 stat = CLNT_CALL(nlm_nsm, SM_MON, 1218 (xdrproc_t) xdr_mon, &smmon, 1219 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1220 1221 if (stat != RPC_SUCCESS) { 1222 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1223 return; 1224 } 1225 if (smstat.res_stat == stat_fail) { 1226 NLM_ERR("Local NSM refuses to monitor %s\n", 1227 host->nh_caller_name); 1228 mtx_lock(&host->nh_lock); 1229 host->nh_monstate = NLM_MONITOR_FAILED; 1230 mtx_unlock(&host->nh_lock); 1231 return; 1232 } 1233 1234 host->nh_monstate = NLM_MONITORED; 1235 } 1236 1237 /* 1238 * Return an RPC client handle that can be used to talk to the NLM 1239 * running on the given host. 1240 */ 1241 CLIENT * 1242 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1243 { 1244 struct nlm_rpc *rpc; 1245 CLIENT *client; 1246 1247 mtx_lock(&host->nh_lock); 1248 1249 if (isserver) 1250 rpc = &host->nh_srvrpc; 1251 else 1252 rpc = &host->nh_clntrpc; 1253 1254 /* 1255 * We can't hold onto RPC handles for too long - the async 1256 * call/reply protocol used by some NLM clients makes it hard 1257 * to tell when they change port numbers (e.g. after a 1258 * reboot). Note that if a client reboots while it isn't 1259 * holding any locks, it won't bother to notify us. We 1260 * expire the RPC handles after two minutes. 1261 */ 1262 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1263 client = rpc->nr_client; 1264 rpc->nr_client = NULL; 1265 mtx_unlock(&host->nh_lock); 1266 CLNT_RELEASE(client); 1267 mtx_lock(&host->nh_lock); 1268 } 1269 1270 if (!rpc->nr_client) { 1271 mtx_unlock(&host->nh_lock); 1272 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1273 NLM_PROG, host->nh_vers); 1274 mtx_lock(&host->nh_lock); 1275 1276 if (client) { 1277 if (rpc->nr_client) { 1278 mtx_unlock(&host->nh_lock); 1279 CLNT_DESTROY(client); 1280 mtx_lock(&host->nh_lock); 1281 } else { 1282 rpc->nr_client = client; 1283 rpc->nr_create_time = time_uptime; 1284 } 1285 } 1286 } 1287 1288 client = rpc->nr_client; 1289 if (client) 1290 CLNT_ACQUIRE(client); 1291 mtx_unlock(&host->nh_lock); 1292 1293 return (client); 1294 1295 } 1296 1297 int nlm_host_get_sysid(struct nlm_host *host) 1298 { 1299 1300 return (host->nh_sysid); 1301 } 1302 1303 int 1304 nlm_host_get_state(struct nlm_host *host) 1305 { 1306 1307 return (host->nh_state); 1308 } 1309 1310 void * 1311 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1312 { 1313 struct nlm_waiting_lock *nw; 1314 1315 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1316 nw->nw_lock = *lock; 1317 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1318 nw->nw_lock.fh.n_len); 1319 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1320 nw->nw_waiting = TRUE; 1321 nw->nw_vp = vp; 1322 mtx_lock(&nlm_global_lock); 1323 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1324 mtx_unlock(&nlm_global_lock); 1325 1326 return nw; 1327 } 1328 1329 void 1330 nlm_deregister_wait_lock(void *handle) 1331 { 1332 struct nlm_waiting_lock *nw = handle; 1333 1334 mtx_lock(&nlm_global_lock); 1335 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1336 mtx_unlock(&nlm_global_lock); 1337 1338 free(nw, M_NLM); 1339 } 1340 1341 int 1342 nlm_wait_lock(void *handle, int timo) 1343 { 1344 struct nlm_waiting_lock *nw = handle; 1345 int error, stops_deferred; 1346 1347 /* 1348 * If the granted message arrived before we got here, 1349 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1350 */ 1351 mtx_lock(&nlm_global_lock); 1352 error = 0; 1353 if (nw->nw_waiting) { 1354 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1355 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1356 sigallowstop(stops_deferred); 1357 } 1358 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1359 if (error) { 1360 /* 1361 * The granted message may arrive after the 1362 * interrupt/timeout but before we manage to lock the 1363 * mutex. Detect this by examining nw_lock. 1364 */ 1365 if (!nw->nw_waiting) 1366 error = 0; 1367 } else { 1368 /* 1369 * If nlm_cancel_wait is called, then error will be 1370 * zero but nw_waiting will still be TRUE. We 1371 * translate this into EINTR. 1372 */ 1373 if (nw->nw_waiting) 1374 error = EINTR; 1375 } 1376 mtx_unlock(&nlm_global_lock); 1377 1378 free(nw, M_NLM); 1379 1380 return (error); 1381 } 1382 1383 void 1384 nlm_cancel_wait(struct vnode *vp) 1385 { 1386 struct nlm_waiting_lock *nw; 1387 1388 mtx_lock(&nlm_global_lock); 1389 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1390 if (nw->nw_vp == vp) { 1391 wakeup(nw); 1392 } 1393 } 1394 mtx_unlock(&nlm_global_lock); 1395 } 1396 1397 1398 /**********************************************************************/ 1399 1400 /* 1401 * Syscall interface with userland. 1402 */ 1403 1404 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1405 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1406 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1407 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1408 1409 static int 1410 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1411 { 1412 static rpcvers_t versions[] = { 1413 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1414 }; 1415 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1416 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1417 }; 1418 1419 SVCXPRT **xprts; 1420 char netid[16]; 1421 char uaddr[128]; 1422 struct netconfig *nconf; 1423 int i, j, error; 1424 1425 if (!addr_count) { 1426 NLM_ERR("NLM: no service addresses given - can't start server"); 1427 return (EINVAL); 1428 } 1429 1430 if (addr_count < 0 || addr_count > 256 ) { 1431 NLM_ERR("NLM: too many service addresses (%d) given, " 1432 "max 256 - can't start server\n", addr_count); 1433 return (EINVAL); 1434 } 1435 1436 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1437 for (i = 0; i < nitems(versions); i++) { 1438 for (j = 0; j < addr_count; j++) { 1439 /* 1440 * Create transports for the first version and 1441 * then just register everything else to the 1442 * same transports. 1443 */ 1444 if (i == 0) { 1445 char *up; 1446 1447 error = copyin(&addrs[2*j], &up, 1448 sizeof(char*)); 1449 if (error) 1450 goto out; 1451 error = copyinstr(up, netid, sizeof(netid), 1452 NULL); 1453 if (error) 1454 goto out; 1455 error = copyin(&addrs[2*j+1], &up, 1456 sizeof(char*)); 1457 if (error) 1458 goto out; 1459 error = copyinstr(up, uaddr, sizeof(uaddr), 1460 NULL); 1461 if (error) 1462 goto out; 1463 nconf = getnetconfigent(netid); 1464 if (!nconf) { 1465 NLM_ERR("Can't lookup netid %s\n", 1466 netid); 1467 error = EINVAL; 1468 goto out; 1469 } 1470 xprts[j] = svc_tp_create(pool, dispatchers[i], 1471 NLM_PROG, versions[i], uaddr, nconf); 1472 if (!xprts[j]) { 1473 NLM_ERR("NLM: unable to create " 1474 "(NLM_PROG, %d).\n", versions[i]); 1475 error = EINVAL; 1476 goto out; 1477 } 1478 freenetconfigent(nconf); 1479 } else { 1480 nconf = getnetconfigent(xprts[j]->xp_netid); 1481 rpcb_unset(NLM_PROG, versions[i], nconf); 1482 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1483 dispatchers[i], nconf)) { 1484 NLM_ERR("NLM: can't register " 1485 "(NLM_PROG, %d)\n", versions[i]); 1486 error = EINVAL; 1487 goto out; 1488 } 1489 } 1490 } 1491 } 1492 error = 0; 1493 out: 1494 for (j = 0; j < addr_count; j++) { 1495 if (xprts[j]) 1496 SVC_RELEASE(xprts[j]); 1497 } 1498 free(xprts, M_NLM); 1499 return (error); 1500 } 1501 1502 /* 1503 * Main server entry point. Contacts the local NSM to get its current 1504 * state and send SM_UNMON_ALL. Registers the NLM services and then 1505 * services requests. Does not return until the server is interrupted 1506 * by a signal. 1507 */ 1508 static int 1509 nlm_server_main(int addr_count, char **addrs) 1510 { 1511 struct thread *td = curthread; 1512 int error; 1513 SVCPOOL *pool = NULL; 1514 struct sockopt opt; 1515 int portlow; 1516 #ifdef INET6 1517 struct sockaddr_in6 sin6; 1518 #endif 1519 struct sockaddr_in sin; 1520 my_id id; 1521 sm_stat smstat; 1522 struct timeval timo; 1523 enum clnt_stat stat; 1524 struct nlm_host *host, *nhost; 1525 struct nlm_waiting_lock *nw; 1526 vop_advlock_t *old_nfs_advlock; 1527 vop_reclaim_t *old_nfs_reclaim; 1528 1529 if (nlm_is_running != 0) { 1530 NLM_ERR("NLM: can't start server - " 1531 "it appears to be running already\n"); 1532 return (EPERM); 1533 } 1534 1535 if (nlm_socket == NULL) { 1536 memset(&opt, 0, sizeof(opt)); 1537 1538 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1539 td->td_ucred, td); 1540 if (error) { 1541 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1542 error); 1543 return (error); 1544 } 1545 opt.sopt_dir = SOPT_SET; 1546 opt.sopt_level = IPPROTO_IP; 1547 opt.sopt_name = IP_PORTRANGE; 1548 portlow = IP_PORTRANGE_LOW; 1549 opt.sopt_val = &portlow; 1550 opt.sopt_valsize = sizeof(portlow); 1551 sosetopt(nlm_socket, &opt); 1552 1553 #ifdef INET6 1554 nlm_socket6 = NULL; 1555 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1556 td->td_ucred, td); 1557 if (error) { 1558 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1559 error); 1560 soclose(nlm_socket); 1561 nlm_socket = NULL; 1562 return (error); 1563 } 1564 opt.sopt_dir = SOPT_SET; 1565 opt.sopt_level = IPPROTO_IPV6; 1566 opt.sopt_name = IPV6_PORTRANGE; 1567 portlow = IPV6_PORTRANGE_LOW; 1568 opt.sopt_val = &portlow; 1569 opt.sopt_valsize = sizeof(portlow); 1570 sosetopt(nlm_socket6, &opt); 1571 #endif 1572 } 1573 1574 nlm_auth = authunix_create(curthread->td_ucred); 1575 1576 #ifdef INET6 1577 memset(&sin6, 0, sizeof(sin6)); 1578 sin6.sin6_len = sizeof(sin6); 1579 sin6.sin6_family = AF_INET6; 1580 sin6.sin6_addr = in6addr_loopback; 1581 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1582 if (!nlm_nsm) { 1583 #endif 1584 memset(&sin, 0, sizeof(sin)); 1585 sin.sin_len = sizeof(sin); 1586 sin.sin_family = AF_INET; 1587 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1588 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1589 SM_VERS); 1590 #ifdef INET6 1591 } 1592 #endif 1593 1594 if (!nlm_nsm) { 1595 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1596 error = EINVAL; 1597 goto out; 1598 } 1599 1600 pool = svcpool_create("NLM", NULL); 1601 1602 error = nlm_register_services(pool, addr_count, addrs); 1603 if (error) 1604 goto out; 1605 1606 memset(&id, 0, sizeof(id)); 1607 id.my_name = "NFS NLM"; 1608 1609 timo.tv_sec = 25; 1610 timo.tv_usec = 0; 1611 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1612 (xdrproc_t) xdr_my_id, &id, 1613 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1614 1615 if (stat != RPC_SUCCESS) { 1616 struct rpc_err err; 1617 1618 CLNT_GETERR(nlm_nsm, &err); 1619 NLM_ERR("NLM: unexpected error contacting NSM, " 1620 "stat=%d, errno=%d\n", stat, err.re_errno); 1621 error = EINVAL; 1622 goto out; 1623 } 1624 nlm_is_running = 1; 1625 1626 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1627 nlm_nsm_state = smstat.state; 1628 1629 old_nfs_advlock = nfs_advlock_p; 1630 nfs_advlock_p = nlm_advlock; 1631 old_nfs_reclaim = nfs_reclaim_p; 1632 nfs_reclaim_p = nlm_reclaim; 1633 1634 svc_run(pool); 1635 error = 0; 1636 1637 nfs_advlock_p = old_nfs_advlock; 1638 nfs_reclaim_p = old_nfs_reclaim; 1639 1640 out: 1641 nlm_is_running = 0; 1642 if (pool) 1643 svcpool_destroy(pool); 1644 1645 /* 1646 * We are finished communicating with the NSM. 1647 */ 1648 if (nlm_nsm) { 1649 CLNT_RELEASE(nlm_nsm); 1650 nlm_nsm = NULL; 1651 } 1652 1653 /* 1654 * Trash all the existing state so that if the server 1655 * restarts, it gets a clean slate. This is complicated by the 1656 * possibility that there may be other threads trying to make 1657 * client locking requests. 1658 * 1659 * First we fake a client reboot notification which will 1660 * cancel any pending async locks and purge remote lock state 1661 * from the local lock manager. We release the reference from 1662 * nlm_hosts to the host (which may remove it from the list 1663 * and free it). After this phase, the only entries in the 1664 * nlm_host list should be from other threads performing 1665 * client lock requests. 1666 */ 1667 mtx_lock(&nlm_global_lock); 1668 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1669 wakeup(nw); 1670 } 1671 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1672 mtx_unlock(&nlm_global_lock); 1673 nlm_host_notify(host, 0); 1674 nlm_host_release(host); 1675 mtx_lock(&nlm_global_lock); 1676 } 1677 mtx_unlock(&nlm_global_lock); 1678 1679 AUTH_DESTROY(nlm_auth); 1680 1681 return (error); 1682 } 1683 1684 int 1685 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1686 { 1687 int error; 1688 1689 error = priv_check(td, PRIV_NFS_LOCKD); 1690 if (error) 1691 return (error); 1692 1693 nlm_debug_level = uap->debug_level; 1694 nlm_grace_threshold = time_uptime + uap->grace_period; 1695 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1696 1697 return nlm_server_main(uap->addr_count, uap->addrs); 1698 } 1699 1700 /**********************************************************************/ 1701 1702 /* 1703 * NLM implementation details, called from the RPC stubs. 1704 */ 1705 1706 1707 void 1708 nlm_sm_notify(struct nlm_sm_status *argp) 1709 { 1710 uint32_t sysid; 1711 struct nlm_host *host; 1712 1713 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1714 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1715 host = nlm_find_host_by_sysid(sysid); 1716 if (host) { 1717 nlm_host_notify(host, argp->state); 1718 nlm_host_release(host); 1719 } 1720 } 1721 1722 static void 1723 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1724 { 1725 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1726 } 1727 1728 struct vfs_state { 1729 struct mount *vs_mp; 1730 struct vnode *vs_vp; 1731 int vs_vnlocked; 1732 }; 1733 1734 static int 1735 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1736 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1737 { 1738 int error; 1739 uint64_t exflags; 1740 struct ucred *cred = NULL, *credanon = NULL; 1741 1742 memset(vs, 0, sizeof(*vs)); 1743 1744 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1745 if (!vs->vs_mp) { 1746 return (ESTALE); 1747 } 1748 1749 /* accmode == 0 means don't check, since it is an unlock. */ 1750 if (accmode != 0) { 1751 error = VFS_CHECKEXP(vs->vs_mp, 1752 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1753 NULL, NULL); 1754 if (error) 1755 goto out; 1756 1757 if (exflags & MNT_EXRDONLY || 1758 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1759 error = EROFS; 1760 goto out; 1761 } 1762 } 1763 1764 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1765 if (error) 1766 goto out; 1767 vs->vs_vnlocked = TRUE; 1768 1769 if (accmode != 0) { 1770 if (!svc_getcred(rqstp, &cred, NULL)) { 1771 error = EINVAL; 1772 goto out; 1773 } 1774 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1775 crfree(cred); 1776 cred = credanon; 1777 credanon = NULL; 1778 } 1779 1780 /* 1781 * Check cred. 1782 */ 1783 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1784 /* 1785 * If this failed and accmode != VWRITE, try again with 1786 * VWRITE to maintain backwards compatibility with the 1787 * old code that always used VWRITE. 1788 */ 1789 if (error != 0 && accmode != VWRITE) 1790 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1791 if (error) 1792 goto out; 1793 } 1794 1795 VOP_UNLOCK(vs->vs_vp); 1796 vs->vs_vnlocked = FALSE; 1797 1798 out: 1799 if (cred) 1800 crfree(cred); 1801 if (credanon) 1802 crfree(credanon); 1803 1804 return (error); 1805 } 1806 1807 static void 1808 nlm_release_vfs_state(struct vfs_state *vs) 1809 { 1810 1811 if (vs->vs_vp) { 1812 if (vs->vs_vnlocked) 1813 vput(vs->vs_vp); 1814 else 1815 vrele(vs->vs_vp); 1816 } 1817 if (vs->vs_mp) 1818 vfs_rel(vs->vs_mp); 1819 } 1820 1821 static nlm4_stats 1822 nlm_convert_error(int error) 1823 { 1824 1825 if (error == ESTALE) 1826 return nlm4_stale_fh; 1827 else if (error == EROFS) 1828 return nlm4_rofs; 1829 else 1830 return nlm4_failed; 1831 } 1832 1833 int 1834 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1835 CLIENT **rpcp) 1836 { 1837 fhandle_t fh; 1838 struct vfs_state vs; 1839 struct nlm_host *host, *bhost; 1840 int error, sysid; 1841 struct flock fl; 1842 accmode_t accmode; 1843 1844 memset(result, 0, sizeof(*result)); 1845 memset(&vs, 0, sizeof(vs)); 1846 1847 host = nlm_find_host_by_name(argp->alock.caller_name, 1848 svc_getrpccaller(rqstp), rqstp->rq_vers); 1849 if (!host) { 1850 result->stat.stat = nlm4_denied_nolocks; 1851 return (ENOMEM); 1852 } 1853 1854 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1855 host->nh_caller_name, host->nh_sysid); 1856 1857 nlm_check_expired_locks(host); 1858 sysid = host->nh_sysid; 1859 1860 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1861 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1862 1863 if (time_uptime < nlm_grace_threshold) { 1864 result->stat.stat = nlm4_denied_grace_period; 1865 goto out; 1866 } 1867 1868 accmode = argp->exclusive ? VWRITE : VREAD; 1869 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1870 if (error) { 1871 result->stat.stat = nlm_convert_error(error); 1872 goto out; 1873 } 1874 1875 fl.l_start = argp->alock.l_offset; 1876 fl.l_len = argp->alock.l_len; 1877 fl.l_pid = argp->alock.svid; 1878 fl.l_sysid = sysid; 1879 fl.l_whence = SEEK_SET; 1880 if (argp->exclusive) 1881 fl.l_type = F_WRLCK; 1882 else 1883 fl.l_type = F_RDLCK; 1884 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1885 if (error) { 1886 result->stat.stat = nlm4_failed; 1887 goto out; 1888 } 1889 1890 if (fl.l_type == F_UNLCK) { 1891 result->stat.stat = nlm4_granted; 1892 } else { 1893 result->stat.stat = nlm4_denied; 1894 result->stat.nlm4_testrply_u.holder.exclusive = 1895 (fl.l_type == F_WRLCK); 1896 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1897 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1898 if (bhost) { 1899 /* 1900 * We don't have any useful way of recording 1901 * the value of oh used in the original lock 1902 * request. Ideally, the test reply would have 1903 * a space for the owning host's name allowing 1904 * our caller's NLM to keep track. 1905 * 1906 * As far as I can see, Solaris uses an eight 1907 * byte structure for oh which contains a four 1908 * byte pid encoded in local byte order and 1909 * the first four bytes of the host 1910 * name. Linux uses a variable length string 1911 * 'pid@hostname' in ascii but doesn't even 1912 * return that in test replies. 1913 * 1914 * For the moment, return nothing in oh 1915 * (already zero'ed above). 1916 */ 1917 nlm_host_release(bhost); 1918 } 1919 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1920 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1921 } 1922 1923 out: 1924 nlm_release_vfs_state(&vs); 1925 if (rpcp) 1926 *rpcp = nlm_host_get_rpc(host, TRUE); 1927 nlm_host_release(host); 1928 return (0); 1929 } 1930 1931 int 1932 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1933 bool_t monitor, CLIENT **rpcp) 1934 { 1935 fhandle_t fh; 1936 struct vfs_state vs; 1937 struct nlm_host *host; 1938 int error, sysid; 1939 struct flock fl; 1940 accmode_t accmode; 1941 1942 memset(result, 0, sizeof(*result)); 1943 memset(&vs, 0, sizeof(vs)); 1944 1945 host = nlm_find_host_by_name(argp->alock.caller_name, 1946 svc_getrpccaller(rqstp), rqstp->rq_vers); 1947 if (!host) { 1948 result->stat.stat = nlm4_denied_nolocks; 1949 return (ENOMEM); 1950 } 1951 1952 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1953 host->nh_caller_name, host->nh_sysid); 1954 1955 if (monitor && host->nh_state && argp->state 1956 && host->nh_state != argp->state) { 1957 /* 1958 * The host rebooted without telling us. Trash its 1959 * locks. 1960 */ 1961 nlm_host_notify(host, argp->state); 1962 } 1963 1964 nlm_check_expired_locks(host); 1965 sysid = host->nh_sysid; 1966 1967 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1968 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1969 1970 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1971 result->stat.stat = nlm4_denied_grace_period; 1972 goto out; 1973 } 1974 1975 accmode = argp->exclusive ? VWRITE : VREAD; 1976 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1977 if (error) { 1978 result->stat.stat = nlm_convert_error(error); 1979 goto out; 1980 } 1981 1982 fl.l_start = argp->alock.l_offset; 1983 fl.l_len = argp->alock.l_len; 1984 fl.l_pid = argp->alock.svid; 1985 fl.l_sysid = sysid; 1986 fl.l_whence = SEEK_SET; 1987 if (argp->exclusive) 1988 fl.l_type = F_WRLCK; 1989 else 1990 fl.l_type = F_RDLCK; 1991 if (argp->block) { 1992 struct nlm_async_lock *af; 1993 CLIENT *client; 1994 struct nlm_grantcookie cookie; 1995 1996 /* 1997 * First, make sure we can contact the host's NLM. 1998 */ 1999 client = nlm_host_get_rpc(host, TRUE); 2000 if (!client) { 2001 result->stat.stat = nlm4_failed; 2002 goto out; 2003 } 2004 2005 /* 2006 * First we need to check and see if there is an 2007 * existing blocked lock that matches. This could be a 2008 * badly behaved client or an RPC re-send. If we find 2009 * one, just return nlm4_blocked. 2010 */ 2011 mtx_lock(&host->nh_lock); 2012 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2013 if (af->af_fl.l_start == fl.l_start 2014 && af->af_fl.l_len == fl.l_len 2015 && af->af_fl.l_pid == fl.l_pid 2016 && af->af_fl.l_type == fl.l_type) { 2017 break; 2018 } 2019 } 2020 if (!af) { 2021 cookie.ng_sysid = host->nh_sysid; 2022 cookie.ng_cookie = host->nh_grantcookie++; 2023 } 2024 mtx_unlock(&host->nh_lock); 2025 if (af) { 2026 CLNT_RELEASE(client); 2027 result->stat.stat = nlm4_blocked; 2028 goto out; 2029 } 2030 2031 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2032 M_WAITOK|M_ZERO); 2033 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2034 af->af_vp = vs.vs_vp; 2035 af->af_fl = fl; 2036 af->af_host = host; 2037 af->af_rpc = client; 2038 /* 2039 * We use M_RPC here so that we can xdr_free the thing 2040 * later. 2041 */ 2042 nlm_make_netobj(&af->af_granted.cookie, 2043 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2044 af->af_granted.exclusive = argp->exclusive; 2045 af->af_granted.alock.caller_name = 2046 strdup(argp->alock.caller_name, M_RPC); 2047 nlm_copy_netobj(&af->af_granted.alock.fh, 2048 &argp->alock.fh, M_RPC); 2049 nlm_copy_netobj(&af->af_granted.alock.oh, 2050 &argp->alock.oh, M_RPC); 2051 af->af_granted.alock.svid = argp->alock.svid; 2052 af->af_granted.alock.l_offset = argp->alock.l_offset; 2053 af->af_granted.alock.l_len = argp->alock.l_len; 2054 2055 /* 2056 * Put the entry on the pending list before calling 2057 * VOP_ADVLOCKASYNC. We do this in case the lock 2058 * request was blocked (returning EINPROGRESS) but 2059 * then granted before we manage to run again. The 2060 * client may receive the granted message before we 2061 * send our blocked reply but thats their problem. 2062 */ 2063 mtx_lock(&host->nh_lock); 2064 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2065 mtx_unlock(&host->nh_lock); 2066 2067 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2068 &af->af_task, &af->af_cookie); 2069 2070 /* 2071 * If the lock completed synchronously, just free the 2072 * tracking structure now. 2073 */ 2074 if (error != EINPROGRESS) { 2075 CLNT_RELEASE(af->af_rpc); 2076 mtx_lock(&host->nh_lock); 2077 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2078 mtx_unlock(&host->nh_lock); 2079 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2080 &af->af_granted); 2081 free(af, M_NLM); 2082 } else { 2083 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2084 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2085 /* 2086 * Don't vrele the vnode just yet - this must 2087 * wait until either the async callback 2088 * happens or the lock is cancelled. 2089 */ 2090 vs.vs_vp = NULL; 2091 } 2092 } else { 2093 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2094 } 2095 2096 if (error) { 2097 if (error == EINPROGRESS) { 2098 result->stat.stat = nlm4_blocked; 2099 } else if (error == EDEADLK) { 2100 result->stat.stat = nlm4_deadlck; 2101 } else if (error == EAGAIN) { 2102 result->stat.stat = nlm4_denied; 2103 } else { 2104 result->stat.stat = nlm4_failed; 2105 } 2106 } else { 2107 if (monitor) 2108 nlm_host_monitor(host, argp->state); 2109 result->stat.stat = nlm4_granted; 2110 } 2111 2112 out: 2113 nlm_release_vfs_state(&vs); 2114 if (rpcp) 2115 *rpcp = nlm_host_get_rpc(host, TRUE); 2116 nlm_host_release(host); 2117 return (0); 2118 } 2119 2120 int 2121 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2122 CLIENT **rpcp) 2123 { 2124 fhandle_t fh; 2125 struct vfs_state vs; 2126 struct nlm_host *host; 2127 int error, sysid; 2128 struct flock fl; 2129 struct nlm_async_lock *af; 2130 2131 memset(result, 0, sizeof(*result)); 2132 memset(&vs, 0, sizeof(vs)); 2133 2134 host = nlm_find_host_by_name(argp->alock.caller_name, 2135 svc_getrpccaller(rqstp), rqstp->rq_vers); 2136 if (!host) { 2137 result->stat.stat = nlm4_denied_nolocks; 2138 return (ENOMEM); 2139 } 2140 2141 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2142 host->nh_caller_name, host->nh_sysid); 2143 2144 nlm_check_expired_locks(host); 2145 sysid = host->nh_sysid; 2146 2147 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2148 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2149 2150 if (time_uptime < nlm_grace_threshold) { 2151 result->stat.stat = nlm4_denied_grace_period; 2152 goto out; 2153 } 2154 2155 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2156 if (error) { 2157 result->stat.stat = nlm_convert_error(error); 2158 goto out; 2159 } 2160 2161 fl.l_start = argp->alock.l_offset; 2162 fl.l_len = argp->alock.l_len; 2163 fl.l_pid = argp->alock.svid; 2164 fl.l_sysid = sysid; 2165 fl.l_whence = SEEK_SET; 2166 if (argp->exclusive) 2167 fl.l_type = F_WRLCK; 2168 else 2169 fl.l_type = F_RDLCK; 2170 2171 /* 2172 * First we need to try and find the async lock request - if 2173 * there isn't one, we give up and return nlm4_denied. 2174 */ 2175 mtx_lock(&host->nh_lock); 2176 2177 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2178 if (af->af_fl.l_start == fl.l_start 2179 && af->af_fl.l_len == fl.l_len 2180 && af->af_fl.l_pid == fl.l_pid 2181 && af->af_fl.l_type == fl.l_type) { 2182 break; 2183 } 2184 } 2185 2186 if (!af) { 2187 mtx_unlock(&host->nh_lock); 2188 result->stat.stat = nlm4_denied; 2189 goto out; 2190 } 2191 2192 error = nlm_cancel_async_lock(af); 2193 2194 if (error) { 2195 result->stat.stat = nlm4_denied; 2196 } else { 2197 result->stat.stat = nlm4_granted; 2198 } 2199 2200 mtx_unlock(&host->nh_lock); 2201 2202 out: 2203 nlm_release_vfs_state(&vs); 2204 if (rpcp) 2205 *rpcp = nlm_host_get_rpc(host, TRUE); 2206 nlm_host_release(host); 2207 return (0); 2208 } 2209 2210 int 2211 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2212 CLIENT **rpcp) 2213 { 2214 fhandle_t fh; 2215 struct vfs_state vs; 2216 struct nlm_host *host; 2217 int error, sysid; 2218 struct flock fl; 2219 2220 memset(result, 0, sizeof(*result)); 2221 memset(&vs, 0, sizeof(vs)); 2222 2223 host = nlm_find_host_by_name(argp->alock.caller_name, 2224 svc_getrpccaller(rqstp), rqstp->rq_vers); 2225 if (!host) { 2226 result->stat.stat = nlm4_denied_nolocks; 2227 return (ENOMEM); 2228 } 2229 2230 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2231 host->nh_caller_name, host->nh_sysid); 2232 2233 nlm_check_expired_locks(host); 2234 sysid = host->nh_sysid; 2235 2236 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2237 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2238 2239 if (time_uptime < nlm_grace_threshold) { 2240 result->stat.stat = nlm4_denied_grace_period; 2241 goto out; 2242 } 2243 2244 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2245 if (error) { 2246 result->stat.stat = nlm_convert_error(error); 2247 goto out; 2248 } 2249 2250 fl.l_start = argp->alock.l_offset; 2251 fl.l_len = argp->alock.l_len; 2252 fl.l_pid = argp->alock.svid; 2253 fl.l_sysid = sysid; 2254 fl.l_whence = SEEK_SET; 2255 fl.l_type = F_UNLCK; 2256 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2257 2258 /* 2259 * Ignore the error - there is no result code for failure, 2260 * only for grace period. 2261 */ 2262 result->stat.stat = nlm4_granted; 2263 2264 out: 2265 nlm_release_vfs_state(&vs); 2266 if (rpcp) 2267 *rpcp = nlm_host_get_rpc(host, TRUE); 2268 nlm_host_release(host); 2269 return (0); 2270 } 2271 2272 int 2273 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2274 2275 CLIENT **rpcp) 2276 { 2277 struct nlm_host *host; 2278 struct nlm_waiting_lock *nw; 2279 2280 memset(result, 0, sizeof(*result)); 2281 2282 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2283 if (!host) { 2284 result->stat.stat = nlm4_denied_nolocks; 2285 return (ENOMEM); 2286 } 2287 2288 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2289 result->stat.stat = nlm4_denied; 2290 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2291 2292 mtx_lock(&nlm_global_lock); 2293 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2294 if (!nw->nw_waiting) 2295 continue; 2296 if (argp->alock.svid == nw->nw_lock.svid 2297 && argp->alock.l_offset == nw->nw_lock.l_offset 2298 && argp->alock.l_len == nw->nw_lock.l_len 2299 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2300 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2301 nw->nw_lock.fh.n_len)) { 2302 nw->nw_waiting = FALSE; 2303 wakeup(nw); 2304 result->stat.stat = nlm4_granted; 2305 break; 2306 } 2307 } 2308 mtx_unlock(&nlm_global_lock); 2309 2310 out: 2311 if (rpcp) 2312 *rpcp = nlm_host_get_rpc(host, TRUE); 2313 nlm_host_release(host); 2314 return (0); 2315 } 2316 2317 void 2318 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2319 { 2320 struct nlm_host *host = NULL; 2321 struct nlm_async_lock *af = NULL; 2322 int error; 2323 2324 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2325 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2326 goto out; 2327 } 2328 2329 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2330 if (!host) { 2331 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2332 goto out; 2333 } 2334 2335 mtx_lock(&host->nh_lock); 2336 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2337 if (ng_cookie(&argp->cookie) == 2338 ng_cookie(&af->af_granted.cookie)) 2339 break; 2340 if (af) 2341 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2342 mtx_unlock(&host->nh_lock); 2343 2344 if (!af) { 2345 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2346 "with unrecognized cookie %d:%d", host->nh_caller_name, 2347 host->nh_sysid, ng_sysid(&argp->cookie), 2348 ng_cookie(&argp->cookie)); 2349 goto out; 2350 } 2351 2352 if (argp->stat.stat != nlm4_granted) { 2353 af->af_fl.l_type = F_UNLCK; 2354 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2355 if (error) { 2356 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2357 "and we failed to unlock (%d)", host->nh_caller_name, 2358 host->nh_sysid, error); 2359 goto out; 2360 } 2361 2362 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2363 af, host->nh_caller_name, host->nh_sysid); 2364 } else { 2365 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2366 af, host->nh_caller_name, host->nh_sysid); 2367 } 2368 2369 out: 2370 if (af) 2371 nlm_free_async_lock(af); 2372 if (host) 2373 nlm_host_release(host); 2374 } 2375 2376 void 2377 nlm_do_free_all(nlm4_notify *argp) 2378 { 2379 struct nlm_host *host, *thost; 2380 2381 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2382 if (!strcmp(host->nh_caller_name, argp->name)) 2383 nlm_host_notify(host, argp->state); 2384 } 2385 } 2386 2387 /* 2388 * Kernel module glue 2389 */ 2390 static int 2391 nfslockd_modevent(module_t mod, int type, void *data) 2392 { 2393 2394 switch (type) { 2395 case MOD_LOAD: 2396 return (nlm_init()); 2397 2398 case MOD_UNLOAD: 2399 nlm_uninit(); 2400 /* The NLM module cannot be safely unloaded. */ 2401 /* FALLTHROUGH */ 2402 default: 2403 return (EOPNOTSUPP); 2404 } 2405 } 2406 static moduledata_t nfslockd_mod = { 2407 "nfslockd", 2408 nfslockd_modevent, 2409 NULL, 2410 }; 2411 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2412 2413 /* So that loader and kldload(2) can find us, wherever we are.. */ 2414 MODULE_DEPEND(nfslockd, xdr, 1, 1, 1); 2415 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2416 MODULE_DEPEND(nfslockd, nfscommon, 1, 1, 1); 2417 MODULE_VERSION(nfslockd, 1); 2418