1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 5 * Authors: Doug Rabson <dfr@rabson.org> 6 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "opt_inet6.h" 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/fail.h> 37 #include <sys/fcntl.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/lockf.h> 41 #include <sys/malloc.h> 42 #include <sys/mount.h> 43 #if __FreeBSD_version >= 700000 44 #include <sys/priv.h> 45 #endif 46 #include <sys/proc.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/syscall.h> 50 #include <sys/sysctl.h> 51 #include <sys/sysent.h> 52 #include <sys/syslog.h> 53 #include <sys/sysproto.h> 54 #include <sys/systm.h> 55 #include <sys/taskqueue.h> 56 #include <sys/unistd.h> 57 #include <sys/vnode.h> 58 59 #include <nfs/nfsproto.h> 60 #include <nfs/nfs_lock.h> 61 62 #include <nlm/nlm_prot.h> 63 #include <nlm/sm_inter.h> 64 #include <nlm/nlm.h> 65 #include <rpc/rpc_com.h> 66 #include <rpc/rpcb_prot.h> 67 68 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 69 70 /* 71 * If a host is inactive (and holds no locks) for this amount of 72 * seconds, we consider it idle and stop tracking it. 73 */ 74 #define NLM_IDLE_TIMEOUT 30 75 76 /* 77 * We check the host list for idle every few seconds. 78 */ 79 #define NLM_IDLE_PERIOD 5 80 81 /* 82 * We only look for GRANTED_RES messages for a little while. 83 */ 84 #define NLM_EXPIRE_TIMEOUT 10 85 86 /* 87 * Support for sysctl vfs.nlm.sysid 88 */ 89 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 90 "Network Lock Manager"); 91 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, 92 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 93 ""); 94 95 /* 96 * Syscall hooks 97 */ 98 static struct syscall_helper_data nlm_syscalls[] = { 99 SYSCALL_INIT_HELPER(nlm_syscall), 100 SYSCALL_INIT_LAST 101 }; 102 103 /* 104 * Debug level passed in from userland. We also support a sysctl hook 105 * so that it can be changed on a live system. 106 */ 107 static int nlm_debug_level; 108 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 109 110 #define NLM_DEBUG(_level, args...) \ 111 do { \ 112 if (nlm_debug_level >= (_level)) \ 113 log(LOG_DEBUG, args); \ 114 } while(0) 115 #define NLM_ERR(args...) \ 116 do { \ 117 log(LOG_ERR, args); \ 118 } while(0) 119 120 /* 121 * Grace period handling. The value of nlm_grace_threshold is the 122 * value of time_uptime after which we are serving requests normally. 123 */ 124 static time_t nlm_grace_threshold; 125 126 /* 127 * We check for idle hosts if time_uptime is greater than 128 * nlm_next_idle_check, 129 */ 130 static time_t nlm_next_idle_check; 131 132 /* 133 * A flag to indicate the server is already running. 134 */ 135 static int nlm_is_running; 136 137 /* 138 * A socket to use for RPC - shared by all IPv4 RPC clients. 139 */ 140 static struct socket *nlm_socket; 141 142 #ifdef INET6 143 144 /* 145 * A socket to use for RPC - shared by all IPv6 RPC clients. 146 */ 147 static struct socket *nlm_socket6; 148 149 #endif 150 151 /* 152 * An RPC client handle that can be used to communicate with the local 153 * NSM. 154 */ 155 static CLIENT *nlm_nsm; 156 157 /* 158 * An AUTH handle for the server's creds. 159 */ 160 static AUTH *nlm_auth; 161 162 /* 163 * A zero timeval for sending async RPC messages. 164 */ 165 struct timeval nlm_zero_tv = { 0, 0 }; 166 167 /* 168 * The local NSM state number 169 */ 170 int nlm_nsm_state; 171 172 173 /* 174 * A lock to protect the host list and waiting lock list. 175 */ 176 static struct mtx nlm_global_lock; 177 178 /* 179 * Locks: 180 * (l) locked by nh_lock 181 * (s) only accessed via server RPC which is single threaded 182 * (g) locked by nlm_global_lock 183 * (c) const until freeing 184 * (a) modified using atomic ops 185 */ 186 187 /* 188 * A pending client-side lock request, stored on the nlm_waiting_locks 189 * list. 190 */ 191 struct nlm_waiting_lock { 192 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 193 bool_t nw_waiting; /* (g) */ 194 nlm4_lock nw_lock; /* (c) */ 195 union nfsfh nw_fh; /* (c) */ 196 struct vnode *nw_vp; /* (c) */ 197 }; 198 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 199 200 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 201 202 /* 203 * A pending server-side asynchronous lock request, stored on the 204 * nh_pending list of the NLM host. 205 */ 206 struct nlm_async_lock { 207 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 208 struct task af_task; /* (c) async callback details */ 209 void *af_cookie; /* (l) lock manager cancel token */ 210 struct vnode *af_vp; /* (l) vnode to lock */ 211 struct flock af_fl; /* (c) lock details */ 212 struct nlm_host *af_host; /* (c) host which is locking */ 213 CLIENT *af_rpc; /* (c) rpc client to send message */ 214 nlm4_testargs af_granted; /* (c) notification details */ 215 time_t af_expiretime; /* (c) notification time */ 216 }; 217 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 218 219 /* 220 * NLM host. 221 */ 222 enum nlm_host_state { 223 NLM_UNMONITORED, 224 NLM_MONITORED, 225 NLM_MONITOR_FAILED, 226 NLM_RECOVERING 227 }; 228 229 struct nlm_rpc { 230 CLIENT *nr_client; /* (l) RPC client handle */ 231 time_t nr_create_time; /* (l) when client was created */ 232 }; 233 234 struct nlm_host { 235 struct mtx nh_lock; 236 volatile u_int nh_refs; /* (a) reference count */ 237 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 238 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 239 uint32_t nh_sysid; /* (c) our allocaed system ID */ 240 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 241 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 242 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 243 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 244 rpcvers_t nh_vers; /* (s) NLM version of host */ 245 int nh_state; /* (s) last seen NSM state of host */ 246 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 247 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 248 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 249 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 250 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 251 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 252 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 253 }; 254 TAILQ_HEAD(nlm_host_list, nlm_host); 255 256 static struct nlm_host_list nlm_hosts; /* (g) */ 257 static uint32_t nlm_next_sysid = 1; /* (g) */ 258 259 static void nlm_host_unmonitor(struct nlm_host *); 260 261 struct nlm_grantcookie { 262 uint32_t ng_sysid; 263 uint32_t ng_cookie; 264 }; 265 266 static inline uint32_t 267 ng_sysid(struct netobj *src) 268 { 269 270 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 271 } 272 273 static inline uint32_t 274 ng_cookie(struct netobj *src) 275 { 276 277 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 278 } 279 280 /**********************************************************************/ 281 282 /* 283 * Initialise NLM globals. 284 */ 285 static int 286 nlm_init(void) 287 { 288 int error; 289 290 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 291 TAILQ_INIT(&nlm_waiting_locks); 292 TAILQ_INIT(&nlm_hosts); 293 294 error = syscall_helper_register(nlm_syscalls, SY_THR_STATIC_KLD); 295 if (error != 0) 296 NLM_ERR("Can't register NLM syscall\n"); 297 return (error); 298 } 299 300 static void 301 nlm_uninit(void) 302 { 303 304 syscall_helper_unregister(nlm_syscalls); 305 } 306 307 /* 308 * Create a netobj from an arbitrary source. 309 */ 310 void 311 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 312 struct malloc_type *type) 313 { 314 315 dst->n_len = srcsize; 316 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 317 memcpy(dst->n_bytes, src, srcsize); 318 } 319 320 /* 321 * Copy a struct netobj. 322 */ 323 void 324 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 325 struct malloc_type *type) 326 { 327 328 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 329 } 330 331 332 /* 333 * Create an RPC client handle for the given (address,prog,vers) 334 * triple using UDP. 335 */ 336 static CLIENT * 337 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 338 { 339 char *wchan = "nlmrcv"; 340 struct sockaddr_storage ss; 341 struct socket *so; 342 CLIENT *rpcb; 343 struct timeval timo; 344 RPCB parms; 345 char *uaddr; 346 enum clnt_stat stat = RPC_SUCCESS; 347 int rpcvers = RPCBVERS4; 348 bool_t do_tcp = FALSE; 349 bool_t tryagain = FALSE; 350 struct portmap mapping; 351 u_short port = 0; 352 353 /* 354 * First we need to contact the remote RPCBIND service to find 355 * the right port. 356 */ 357 memcpy(&ss, sa, sa->sa_len); 358 switch (ss.ss_family) { 359 case AF_INET: 360 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 361 so = nlm_socket; 362 break; 363 #ifdef INET6 364 case AF_INET6: 365 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 366 so = nlm_socket6; 367 break; 368 #endif 369 370 default: 371 /* 372 * Unsupported address family - fail. 373 */ 374 return (NULL); 375 } 376 377 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 378 RPCBPROG, rpcvers, 0, 0); 379 if (!rpcb) 380 return (NULL); 381 382 try_tcp: 383 parms.r_prog = prog; 384 parms.r_vers = vers; 385 if (do_tcp) 386 parms.r_netid = "tcp"; 387 else 388 parms.r_netid = "udp"; 389 parms.r_addr = ""; 390 parms.r_owner = ""; 391 392 /* 393 * Use the default timeout. 394 */ 395 timo.tv_sec = 25; 396 timo.tv_usec = 0; 397 again: 398 switch (rpcvers) { 399 case RPCBVERS4: 400 case RPCBVERS: 401 /* 402 * Try RPCBIND 4 then 3. 403 */ 404 uaddr = NULL; 405 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 406 (xdrproc_t) xdr_rpcb, &parms, 407 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 408 if (stat == RPC_SUCCESS) { 409 /* 410 * We have a reply from the remote RPCBIND - turn it 411 * into an appropriate address and make a new client 412 * that can talk to the remote NLM. 413 * 414 * XXX fixup IPv6 scope ID. 415 */ 416 struct netbuf *a; 417 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 418 if (!a) { 419 tryagain = TRUE; 420 } else { 421 tryagain = FALSE; 422 memcpy(&ss, a->buf, a->len); 423 free(a->buf, M_RPC); 424 free(a, M_RPC); 425 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 426 } 427 } 428 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 429 if (rpcvers == RPCBVERS4) 430 rpcvers = RPCBVERS; 431 else if (rpcvers == RPCBVERS) 432 rpcvers = PMAPVERS; 433 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 434 goto again; 435 } 436 break; 437 case PMAPVERS: 438 /* 439 * Try portmap. 440 */ 441 mapping.pm_prog = parms.r_prog; 442 mapping.pm_vers = parms.r_vers; 443 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 444 mapping.pm_port = 0; 445 446 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 447 (xdrproc_t) xdr_portmap, &mapping, 448 (xdrproc_t) xdr_u_short, &port, timo); 449 450 if (stat == RPC_SUCCESS) { 451 switch (ss.ss_family) { 452 case AF_INET: 453 ((struct sockaddr_in *)&ss)->sin_port = 454 htons(port); 455 break; 456 457 #ifdef INET6 458 case AF_INET6: 459 ((struct sockaddr_in6 *)&ss)->sin6_port = 460 htons(port); 461 break; 462 #endif 463 } 464 } 465 break; 466 default: 467 panic("invalid rpcvers %d", rpcvers); 468 } 469 /* 470 * We may have a positive response from the portmapper, but the NLM 471 * service was not found. Make sure we received a valid port. 472 */ 473 switch (ss.ss_family) { 474 case AF_INET: 475 port = ((struct sockaddr_in *)&ss)->sin_port; 476 break; 477 #ifdef INET6 478 case AF_INET6: 479 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 480 break; 481 #endif 482 } 483 if (stat != RPC_SUCCESS || !port) { 484 /* 485 * If we were able to talk to rpcbind or portmap, but the udp 486 * variant wasn't available, ask about tcp. 487 * 488 * XXX - We could also check for a TCP portmapper, but 489 * if the host is running a portmapper at all, we should be able 490 * to hail it over UDP. 491 */ 492 if (stat == RPC_SUCCESS && !do_tcp) { 493 do_tcp = TRUE; 494 goto try_tcp; 495 } 496 497 /* Otherwise, bad news. */ 498 NLM_ERR("NLM: failed to contact remote rpcbind, " 499 "stat = %d, port = %d\n", (int) stat, port); 500 CLNT_DESTROY(rpcb); 501 return (NULL); 502 } 503 504 if (do_tcp) { 505 /* 506 * Destroy the UDP client we used to speak to rpcbind and 507 * recreate as a TCP client. 508 */ 509 struct netconfig *nconf = NULL; 510 511 CLNT_DESTROY(rpcb); 512 513 switch (ss.ss_family) { 514 case AF_INET: 515 nconf = getnetconfigent("tcp"); 516 break; 517 #ifdef INET6 518 case AF_INET6: 519 nconf = getnetconfigent("tcp6"); 520 break; 521 #endif 522 } 523 524 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 525 prog, vers, 0, 0); 526 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 527 rpcb->cl_auth = nlm_auth; 528 529 } else { 530 /* 531 * Re-use the client we used to speak to rpcbind. 532 */ 533 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 534 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 535 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 536 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 537 rpcb->cl_auth = nlm_auth; 538 } 539 540 return (rpcb); 541 } 542 543 /* 544 * This async callback after when an async lock request has been 545 * granted. We notify the host which initiated the request. 546 */ 547 static void 548 nlm_lock_callback(void *arg, int pending) 549 { 550 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 551 struct rpc_callextra ext; 552 553 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 554 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 555 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 556 ng_cookie(&af->af_granted.cookie)); 557 558 /* 559 * Send the results back to the host. 560 * 561 * Note: there is a possible race here with nlm_host_notify 562 * destroying the RPC client. To avoid problems, the first 563 * thing nlm_host_notify does is to cancel pending async lock 564 * requests. 565 */ 566 memset(&ext, 0, sizeof(ext)); 567 ext.rc_auth = nlm_auth; 568 if (af->af_host->nh_vers == NLM_VERS4) { 569 nlm4_granted_msg_4(&af->af_granted, 570 NULL, af->af_rpc, &ext, nlm_zero_tv); 571 } else { 572 /* 573 * Back-convert to legacy protocol 574 */ 575 nlm_testargs granted; 576 granted.cookie = af->af_granted.cookie; 577 granted.exclusive = af->af_granted.exclusive; 578 granted.alock.caller_name = 579 af->af_granted.alock.caller_name; 580 granted.alock.fh = af->af_granted.alock.fh; 581 granted.alock.oh = af->af_granted.alock.oh; 582 granted.alock.svid = af->af_granted.alock.svid; 583 granted.alock.l_offset = 584 af->af_granted.alock.l_offset; 585 granted.alock.l_len = 586 af->af_granted.alock.l_len; 587 588 nlm_granted_msg_1(&granted, 589 NULL, af->af_rpc, &ext, nlm_zero_tv); 590 } 591 592 /* 593 * Move this entry to the nh_granted list. 594 */ 595 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 596 mtx_lock(&af->af_host->nh_lock); 597 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 598 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 599 mtx_unlock(&af->af_host->nh_lock); 600 } 601 602 /* 603 * Free an async lock request. The request must have been removed from 604 * any list. 605 */ 606 static void 607 nlm_free_async_lock(struct nlm_async_lock *af) 608 { 609 /* 610 * Free an async lock. 611 */ 612 if (af->af_rpc) 613 CLNT_RELEASE(af->af_rpc); 614 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 615 if (af->af_vp) 616 vrele(af->af_vp); 617 free(af, M_NLM); 618 } 619 620 /* 621 * Cancel our async request - this must be called with 622 * af->nh_host->nh_lock held. This is slightly complicated by a 623 * potential race with our own callback. If we fail to cancel the 624 * lock, it must already have been granted - we make sure our async 625 * task has completed by calling taskqueue_drain in this case. 626 */ 627 static int 628 nlm_cancel_async_lock(struct nlm_async_lock *af) 629 { 630 struct nlm_host *host = af->af_host; 631 int error; 632 633 mtx_assert(&host->nh_lock, MA_OWNED); 634 635 mtx_unlock(&host->nh_lock); 636 637 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 638 F_REMOTE, NULL, &af->af_cookie); 639 640 if (error) { 641 /* 642 * We failed to cancel - make sure our callback has 643 * completed before we continue. 644 */ 645 taskqueue_drain(taskqueue_thread, &af->af_task); 646 } 647 648 mtx_lock(&host->nh_lock); 649 650 if (!error) { 651 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 652 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 653 654 /* 655 * Remove from the nh_pending list and free now that 656 * we are safe from the callback. 657 */ 658 TAILQ_REMOVE(&host->nh_pending, af, af_link); 659 mtx_unlock(&host->nh_lock); 660 nlm_free_async_lock(af); 661 mtx_lock(&host->nh_lock); 662 } 663 664 return (error); 665 } 666 667 static void 668 nlm_check_expired_locks(struct nlm_host *host) 669 { 670 struct nlm_async_lock *af; 671 time_t uptime = time_uptime; 672 673 mtx_lock(&host->nh_lock); 674 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 675 && uptime >= af->af_expiretime) { 676 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 677 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 678 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 679 ng_cookie(&af->af_granted.cookie)); 680 TAILQ_REMOVE(&host->nh_granted, af, af_link); 681 mtx_unlock(&host->nh_lock); 682 nlm_free_async_lock(af); 683 mtx_lock(&host->nh_lock); 684 } 685 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 686 TAILQ_REMOVE(&host->nh_finished, af, af_link); 687 mtx_unlock(&host->nh_lock); 688 nlm_free_async_lock(af); 689 mtx_lock(&host->nh_lock); 690 } 691 mtx_unlock(&host->nh_lock); 692 } 693 694 /* 695 * Free resources used by a host. This is called after the reference 696 * count has reached zero so it doesn't need to worry about locks. 697 */ 698 static void 699 nlm_host_destroy(struct nlm_host *host) 700 { 701 702 mtx_lock(&nlm_global_lock); 703 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 704 mtx_unlock(&nlm_global_lock); 705 706 if (host->nh_srvrpc.nr_client) 707 CLNT_RELEASE(host->nh_srvrpc.nr_client); 708 if (host->nh_clntrpc.nr_client) 709 CLNT_RELEASE(host->nh_clntrpc.nr_client); 710 mtx_destroy(&host->nh_lock); 711 sysctl_ctx_free(&host->nh_sysctl); 712 free(host, M_NLM); 713 } 714 715 /* 716 * Thread start callback for client lock recovery 717 */ 718 static void 719 nlm_client_recovery_start(void *arg) 720 { 721 struct nlm_host *host = (struct nlm_host *) arg; 722 723 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 724 host->nh_caller_name); 725 726 nlm_client_recovery(host); 727 728 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 729 host->nh_caller_name); 730 731 host->nh_monstate = NLM_MONITORED; 732 nlm_host_release(host); 733 734 kthread_exit(); 735 } 736 737 /* 738 * This is called when we receive a host state change notification. We 739 * unlock any active locks owned by the host. When rpc.lockd is 740 * shutting down, this function is called with newstate set to zero 741 * which allows us to cancel any pending async locks and clear the 742 * locking state. 743 */ 744 static void 745 nlm_host_notify(struct nlm_host *host, int newstate) 746 { 747 struct nlm_async_lock *af; 748 749 if (newstate) { 750 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 751 "state is %d\n", host->nh_caller_name, 752 host->nh_sysid, newstate); 753 } 754 755 /* 756 * Cancel any pending async locks for this host. 757 */ 758 mtx_lock(&host->nh_lock); 759 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 760 /* 761 * nlm_cancel_async_lock will remove the entry from 762 * nh_pending and free it. 763 */ 764 nlm_cancel_async_lock(af); 765 } 766 mtx_unlock(&host->nh_lock); 767 nlm_check_expired_locks(host); 768 769 /* 770 * The host just rebooted - trash its locks. 771 */ 772 lf_clearremotesys(host->nh_sysid); 773 host->nh_state = newstate; 774 775 /* 776 * If we have any remote locks for this host (i.e. it 777 * represents a remote NFS server that our local NFS client 778 * has locks for), start a recovery thread. 779 */ 780 if (newstate != 0 781 && host->nh_monstate != NLM_RECOVERING 782 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 783 struct thread *td; 784 host->nh_monstate = NLM_RECOVERING; 785 refcount_acquire(&host->nh_refs); 786 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 787 "NFS lock recovery for %s", host->nh_caller_name); 788 } 789 } 790 791 /* 792 * Sysctl handler to count the number of locks for a sysid. 793 */ 794 static int 795 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 796 { 797 struct nlm_host *host; 798 int count; 799 800 host = oidp->oid_arg1; 801 count = lf_countlocks(host->nh_sysid); 802 return sysctl_handle_int(oidp, &count, 0, req); 803 } 804 805 /* 806 * Sysctl handler to count the number of client locks for a sysid. 807 */ 808 static int 809 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 810 { 811 struct nlm_host *host; 812 int count; 813 814 host = oidp->oid_arg1; 815 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 816 return sysctl_handle_int(oidp, &count, 0, req); 817 } 818 819 /* 820 * Create a new NLM host. 821 */ 822 static struct nlm_host * 823 nlm_create_host(const char* caller_name) 824 { 825 struct nlm_host *host; 826 struct sysctl_oid *oid; 827 828 mtx_assert(&nlm_global_lock, MA_OWNED); 829 830 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 831 caller_name, nlm_next_sysid); 832 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 833 if (!host) 834 return (NULL); 835 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 836 host->nh_refs = 1; 837 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 838 host->nh_sysid = nlm_next_sysid++; 839 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 840 "%d", host->nh_sysid); 841 host->nh_vers = 0; 842 host->nh_state = 0; 843 host->nh_monstate = NLM_UNMONITORED; 844 host->nh_grantcookie = 1; 845 TAILQ_INIT(&host->nh_pending); 846 TAILQ_INIT(&host->nh_granted); 847 TAILQ_INIT(&host->nh_finished); 848 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 849 850 mtx_unlock(&nlm_global_lock); 851 852 sysctl_ctx_init(&host->nh_sysctl); 853 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 854 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 855 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD | CTLFLAG_MPSAFE, 856 NULL, ""); 857 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 858 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 859 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 860 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 861 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 862 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 863 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 864 "lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, host, 865 0, nlm_host_lock_count_sysctl, "I", ""); 866 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 867 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 868 host, 0, nlm_host_client_lock_count_sysctl, "I", ""); 869 870 mtx_lock(&nlm_global_lock); 871 872 return (host); 873 } 874 875 /* 876 * Acquire the next sysid for remote locks not handled by the NLM. 877 */ 878 uint32_t 879 nlm_acquire_next_sysid(void) 880 { 881 uint32_t next_sysid; 882 883 mtx_lock(&nlm_global_lock); 884 next_sysid = nlm_next_sysid++; 885 mtx_unlock(&nlm_global_lock); 886 return (next_sysid); 887 } 888 889 /* 890 * Return non-zero if the address parts of the two sockaddrs are the 891 * same. 892 */ 893 static int 894 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 895 { 896 const struct sockaddr_in *a4, *b4; 897 #ifdef INET6 898 const struct sockaddr_in6 *a6, *b6; 899 #endif 900 901 if (a->sa_family != b->sa_family) 902 return (FALSE); 903 904 switch (a->sa_family) { 905 case AF_INET: 906 a4 = (const struct sockaddr_in *) a; 907 b4 = (const struct sockaddr_in *) b; 908 return !memcmp(&a4->sin_addr, &b4->sin_addr, 909 sizeof(a4->sin_addr)); 910 #ifdef INET6 911 case AF_INET6: 912 a6 = (const struct sockaddr_in6 *) a; 913 b6 = (const struct sockaddr_in6 *) b; 914 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 915 sizeof(a6->sin6_addr)); 916 #endif 917 } 918 919 return (0); 920 } 921 922 /* 923 * Check for idle hosts and stop monitoring them. We could also free 924 * the host structure here, possibly after a larger timeout but that 925 * would require some care to avoid races with 926 * e.g. nlm_host_lock_count_sysctl. 927 */ 928 static void 929 nlm_check_idle(void) 930 { 931 struct nlm_host *host; 932 933 mtx_assert(&nlm_global_lock, MA_OWNED); 934 935 if (time_uptime <= nlm_next_idle_check) 936 return; 937 938 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 939 940 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 941 if (host->nh_monstate == NLM_MONITORED 942 && time_uptime > host->nh_idle_timeout) { 943 mtx_unlock(&nlm_global_lock); 944 if (lf_countlocks(host->nh_sysid) > 0 945 || lf_countlocks(NLM_SYSID_CLIENT 946 + host->nh_sysid)) { 947 host->nh_idle_timeout = 948 time_uptime + NLM_IDLE_TIMEOUT; 949 mtx_lock(&nlm_global_lock); 950 continue; 951 } 952 nlm_host_unmonitor(host); 953 mtx_lock(&nlm_global_lock); 954 } 955 } 956 } 957 958 /* 959 * Search for an existing NLM host that matches the given name 960 * (typically the caller_name element of an nlm4_lock). If none is 961 * found, create a new host. If 'addr' is non-NULL, record the remote 962 * address of the host so that we can call it back for async 963 * responses. If 'vers' is greater than zero then record the NLM 964 * program version to use to communicate with this client. 965 */ 966 struct nlm_host * 967 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 968 rpcvers_t vers) 969 { 970 struct nlm_host *host; 971 972 mtx_lock(&nlm_global_lock); 973 974 /* 975 * The remote host is determined by caller_name. 976 */ 977 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 978 if (!strcmp(host->nh_caller_name, name)) 979 break; 980 } 981 982 if (!host) { 983 host = nlm_create_host(name); 984 if (!host) { 985 mtx_unlock(&nlm_global_lock); 986 return (NULL); 987 } 988 } 989 refcount_acquire(&host->nh_refs); 990 991 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 992 993 /* 994 * If we have an address for the host, record it so that we 995 * can send async replies etc. 996 */ 997 if (addr) { 998 999 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 1000 ("Strange remote transport address length")); 1001 1002 /* 1003 * If we have seen an address before and we currently 1004 * have an RPC client handle, make sure the address is 1005 * the same, otherwise discard the client handle. 1006 */ 1007 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1008 if (!nlm_compare_addr( 1009 (struct sockaddr *) &host->nh_addr, 1010 addr) 1011 || host->nh_vers != vers) { 1012 CLIENT *client; 1013 mtx_lock(&host->nh_lock); 1014 client = host->nh_srvrpc.nr_client; 1015 host->nh_srvrpc.nr_client = NULL; 1016 mtx_unlock(&host->nh_lock); 1017 if (client) { 1018 CLNT_RELEASE(client); 1019 } 1020 } 1021 } 1022 memcpy(&host->nh_addr, addr, addr->sa_len); 1023 host->nh_vers = vers; 1024 } 1025 1026 nlm_check_idle(); 1027 1028 mtx_unlock(&nlm_global_lock); 1029 1030 return (host); 1031 } 1032 1033 /* 1034 * Search for an existing NLM host that matches the given remote 1035 * address. If none is found, create a new host with the requested 1036 * address and remember 'vers' as the NLM protocol version to use for 1037 * that host. 1038 */ 1039 struct nlm_host * 1040 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1041 { 1042 /* 1043 * Fake up a name using inet_ntop. This buffer is 1044 * large enough for an IPv6 address. 1045 */ 1046 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1047 struct nlm_host *host; 1048 1049 switch (addr->sa_family) { 1050 case AF_INET: 1051 inet_ntop(AF_INET, 1052 &((const struct sockaddr_in *) addr)->sin_addr, 1053 tmp, sizeof tmp); 1054 break; 1055 #ifdef INET6 1056 case AF_INET6: 1057 inet_ntop(AF_INET6, 1058 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1059 tmp, sizeof tmp); 1060 break; 1061 #endif 1062 default: 1063 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1064 } 1065 1066 1067 mtx_lock(&nlm_global_lock); 1068 1069 /* 1070 * The remote host is determined by caller_name. 1071 */ 1072 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1073 if (nlm_compare_addr(addr, 1074 (const struct sockaddr *) &host->nh_addr)) 1075 break; 1076 } 1077 1078 if (!host) { 1079 host = nlm_create_host(tmp); 1080 if (!host) { 1081 mtx_unlock(&nlm_global_lock); 1082 return (NULL); 1083 } 1084 memcpy(&host->nh_addr, addr, addr->sa_len); 1085 host->nh_vers = vers; 1086 } 1087 refcount_acquire(&host->nh_refs); 1088 1089 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1090 1091 nlm_check_idle(); 1092 1093 mtx_unlock(&nlm_global_lock); 1094 1095 return (host); 1096 } 1097 1098 /* 1099 * Find the NLM host that matches the value of 'sysid'. If none 1100 * exists, return NULL. 1101 */ 1102 static struct nlm_host * 1103 nlm_find_host_by_sysid(int sysid) 1104 { 1105 struct nlm_host *host; 1106 1107 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1108 if (host->nh_sysid == sysid) { 1109 refcount_acquire(&host->nh_refs); 1110 return (host); 1111 } 1112 } 1113 1114 return (NULL); 1115 } 1116 1117 void nlm_host_release(struct nlm_host *host) 1118 { 1119 if (refcount_release(&host->nh_refs)) { 1120 /* 1121 * Free the host 1122 */ 1123 nlm_host_destroy(host); 1124 } 1125 } 1126 1127 /* 1128 * Unregister this NLM host with the local NSM due to idleness. 1129 */ 1130 static void 1131 nlm_host_unmonitor(struct nlm_host *host) 1132 { 1133 mon_id smmonid; 1134 sm_stat_res smstat; 1135 struct timeval timo; 1136 enum clnt_stat stat; 1137 1138 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1139 host->nh_caller_name, host->nh_sysid); 1140 1141 /* 1142 * We put our assigned system ID value in the priv field to 1143 * make it simpler to find the host if we are notified of a 1144 * host restart. 1145 */ 1146 smmonid.mon_name = host->nh_caller_name; 1147 smmonid.my_id.my_name = "localhost"; 1148 smmonid.my_id.my_prog = NLM_PROG; 1149 smmonid.my_id.my_vers = NLM_SM; 1150 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1151 1152 timo.tv_sec = 25; 1153 timo.tv_usec = 0; 1154 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1155 (xdrproc_t) xdr_mon, &smmonid, 1156 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1157 1158 if (stat != RPC_SUCCESS) { 1159 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1160 return; 1161 } 1162 if (smstat.res_stat == stat_fail) { 1163 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1164 host->nh_caller_name); 1165 return; 1166 } 1167 1168 host->nh_monstate = NLM_UNMONITORED; 1169 } 1170 1171 /* 1172 * Register this NLM host with the local NSM so that we can be 1173 * notified if it reboots. 1174 */ 1175 void 1176 nlm_host_monitor(struct nlm_host *host, int state) 1177 { 1178 mon smmon; 1179 sm_stat_res smstat; 1180 struct timeval timo; 1181 enum clnt_stat stat; 1182 1183 if (state && !host->nh_state) { 1184 /* 1185 * This is the first time we have seen an NSM state 1186 * value for this host. We record it here to help 1187 * detect host reboots. 1188 */ 1189 host->nh_state = state; 1190 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1191 host->nh_caller_name, host->nh_sysid, state); 1192 } 1193 1194 mtx_lock(&host->nh_lock); 1195 if (host->nh_monstate != NLM_UNMONITORED) { 1196 mtx_unlock(&host->nh_lock); 1197 return; 1198 } 1199 host->nh_monstate = NLM_MONITORED; 1200 mtx_unlock(&host->nh_lock); 1201 1202 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1203 host->nh_caller_name, host->nh_sysid); 1204 1205 /* 1206 * We put our assigned system ID value in the priv field to 1207 * make it simpler to find the host if we are notified of a 1208 * host restart. 1209 */ 1210 smmon.mon_id.mon_name = host->nh_caller_name; 1211 smmon.mon_id.my_id.my_name = "localhost"; 1212 smmon.mon_id.my_id.my_prog = NLM_PROG; 1213 smmon.mon_id.my_id.my_vers = NLM_SM; 1214 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1215 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1216 1217 timo.tv_sec = 25; 1218 timo.tv_usec = 0; 1219 stat = CLNT_CALL(nlm_nsm, SM_MON, 1220 (xdrproc_t) xdr_mon, &smmon, 1221 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1222 1223 if (stat != RPC_SUCCESS) { 1224 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1225 return; 1226 } 1227 if (smstat.res_stat == stat_fail) { 1228 NLM_ERR("Local NSM refuses to monitor %s\n", 1229 host->nh_caller_name); 1230 mtx_lock(&host->nh_lock); 1231 host->nh_monstate = NLM_MONITOR_FAILED; 1232 mtx_unlock(&host->nh_lock); 1233 return; 1234 } 1235 1236 host->nh_monstate = NLM_MONITORED; 1237 } 1238 1239 /* 1240 * Return an RPC client handle that can be used to talk to the NLM 1241 * running on the given host. 1242 */ 1243 CLIENT * 1244 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1245 { 1246 struct nlm_rpc *rpc; 1247 CLIENT *client; 1248 1249 mtx_lock(&host->nh_lock); 1250 1251 if (isserver) 1252 rpc = &host->nh_srvrpc; 1253 else 1254 rpc = &host->nh_clntrpc; 1255 1256 /* 1257 * We can't hold onto RPC handles for too long - the async 1258 * call/reply protocol used by some NLM clients makes it hard 1259 * to tell when they change port numbers (e.g. after a 1260 * reboot). Note that if a client reboots while it isn't 1261 * holding any locks, it won't bother to notify us. We 1262 * expire the RPC handles after two minutes. 1263 */ 1264 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1265 client = rpc->nr_client; 1266 rpc->nr_client = NULL; 1267 mtx_unlock(&host->nh_lock); 1268 CLNT_RELEASE(client); 1269 mtx_lock(&host->nh_lock); 1270 } 1271 1272 if (!rpc->nr_client) { 1273 mtx_unlock(&host->nh_lock); 1274 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1275 NLM_PROG, host->nh_vers); 1276 mtx_lock(&host->nh_lock); 1277 1278 if (client) { 1279 if (rpc->nr_client) { 1280 mtx_unlock(&host->nh_lock); 1281 CLNT_DESTROY(client); 1282 mtx_lock(&host->nh_lock); 1283 } else { 1284 rpc->nr_client = client; 1285 rpc->nr_create_time = time_uptime; 1286 } 1287 } 1288 } 1289 1290 client = rpc->nr_client; 1291 if (client) 1292 CLNT_ACQUIRE(client); 1293 mtx_unlock(&host->nh_lock); 1294 1295 return (client); 1296 1297 } 1298 1299 int nlm_host_get_sysid(struct nlm_host *host) 1300 { 1301 1302 return (host->nh_sysid); 1303 } 1304 1305 int 1306 nlm_host_get_state(struct nlm_host *host) 1307 { 1308 1309 return (host->nh_state); 1310 } 1311 1312 void * 1313 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1314 { 1315 struct nlm_waiting_lock *nw; 1316 1317 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1318 nw->nw_lock = *lock; 1319 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1320 nw->nw_lock.fh.n_len); 1321 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1322 nw->nw_waiting = TRUE; 1323 nw->nw_vp = vp; 1324 mtx_lock(&nlm_global_lock); 1325 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1326 mtx_unlock(&nlm_global_lock); 1327 1328 return nw; 1329 } 1330 1331 void 1332 nlm_deregister_wait_lock(void *handle) 1333 { 1334 struct nlm_waiting_lock *nw = handle; 1335 1336 mtx_lock(&nlm_global_lock); 1337 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1338 mtx_unlock(&nlm_global_lock); 1339 1340 free(nw, M_NLM); 1341 } 1342 1343 int 1344 nlm_wait_lock(void *handle, int timo) 1345 { 1346 struct nlm_waiting_lock *nw = handle; 1347 int error, stops_deferred; 1348 1349 /* 1350 * If the granted message arrived before we got here, 1351 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1352 */ 1353 mtx_lock(&nlm_global_lock); 1354 error = 0; 1355 if (nw->nw_waiting) { 1356 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1357 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1358 sigallowstop(stops_deferred); 1359 } 1360 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1361 if (error) { 1362 /* 1363 * The granted message may arrive after the 1364 * interrupt/timeout but before we manage to lock the 1365 * mutex. Detect this by examining nw_lock. 1366 */ 1367 if (!nw->nw_waiting) 1368 error = 0; 1369 } else { 1370 /* 1371 * If nlm_cancel_wait is called, then error will be 1372 * zero but nw_waiting will still be TRUE. We 1373 * translate this into EINTR. 1374 */ 1375 if (nw->nw_waiting) 1376 error = EINTR; 1377 } 1378 mtx_unlock(&nlm_global_lock); 1379 1380 free(nw, M_NLM); 1381 1382 return (error); 1383 } 1384 1385 void 1386 nlm_cancel_wait(struct vnode *vp) 1387 { 1388 struct nlm_waiting_lock *nw; 1389 1390 mtx_lock(&nlm_global_lock); 1391 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1392 if (nw->nw_vp == vp) { 1393 wakeup(nw); 1394 } 1395 } 1396 mtx_unlock(&nlm_global_lock); 1397 } 1398 1399 1400 /**********************************************************************/ 1401 1402 /* 1403 * Syscall interface with userland. 1404 */ 1405 1406 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1407 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1408 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1409 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1410 1411 static int 1412 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1413 { 1414 static rpcvers_t versions[] = { 1415 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1416 }; 1417 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1418 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1419 }; 1420 1421 SVCXPRT **xprts; 1422 char netid[16]; 1423 char uaddr[128]; 1424 struct netconfig *nconf; 1425 int i, j, error; 1426 1427 if (!addr_count) { 1428 NLM_ERR("NLM: no service addresses given - can't start server"); 1429 return (EINVAL); 1430 } 1431 1432 if (addr_count < 0 || addr_count > 256 ) { 1433 NLM_ERR("NLM: too many service addresses (%d) given, " 1434 "max 256 - can't start server\n", addr_count); 1435 return (EINVAL); 1436 } 1437 1438 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1439 for (i = 0; i < nitems(versions); i++) { 1440 for (j = 0; j < addr_count; j++) { 1441 /* 1442 * Create transports for the first version and 1443 * then just register everything else to the 1444 * same transports. 1445 */ 1446 if (i == 0) { 1447 char *up; 1448 1449 error = copyin(&addrs[2*j], &up, 1450 sizeof(char*)); 1451 if (error) 1452 goto out; 1453 error = copyinstr(up, netid, sizeof(netid), 1454 NULL); 1455 if (error) 1456 goto out; 1457 error = copyin(&addrs[2*j+1], &up, 1458 sizeof(char*)); 1459 if (error) 1460 goto out; 1461 error = copyinstr(up, uaddr, sizeof(uaddr), 1462 NULL); 1463 if (error) 1464 goto out; 1465 nconf = getnetconfigent(netid); 1466 if (!nconf) { 1467 NLM_ERR("Can't lookup netid %s\n", 1468 netid); 1469 error = EINVAL; 1470 goto out; 1471 } 1472 xprts[j] = svc_tp_create(pool, dispatchers[i], 1473 NLM_PROG, versions[i], uaddr, nconf); 1474 if (!xprts[j]) { 1475 NLM_ERR("NLM: unable to create " 1476 "(NLM_PROG, %d).\n", versions[i]); 1477 error = EINVAL; 1478 goto out; 1479 } 1480 freenetconfigent(nconf); 1481 } else { 1482 nconf = getnetconfigent(xprts[j]->xp_netid); 1483 rpcb_unset(NLM_PROG, versions[i], nconf); 1484 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1485 dispatchers[i], nconf)) { 1486 NLM_ERR("NLM: can't register " 1487 "(NLM_PROG, %d)\n", versions[i]); 1488 error = EINVAL; 1489 goto out; 1490 } 1491 } 1492 } 1493 } 1494 error = 0; 1495 out: 1496 for (j = 0; j < addr_count; j++) { 1497 if (xprts[j]) 1498 SVC_RELEASE(xprts[j]); 1499 } 1500 free(xprts, M_NLM); 1501 return (error); 1502 } 1503 1504 /* 1505 * Main server entry point. Contacts the local NSM to get its current 1506 * state and send SM_UNMON_ALL. Registers the NLM services and then 1507 * services requests. Does not return until the server is interrupted 1508 * by a signal. 1509 */ 1510 static int 1511 nlm_server_main(int addr_count, char **addrs) 1512 { 1513 struct thread *td = curthread; 1514 int error; 1515 SVCPOOL *pool = NULL; 1516 struct sockopt opt; 1517 int portlow; 1518 #ifdef INET6 1519 struct sockaddr_in6 sin6; 1520 #endif 1521 struct sockaddr_in sin; 1522 my_id id; 1523 sm_stat smstat; 1524 struct timeval timo; 1525 enum clnt_stat stat; 1526 struct nlm_host *host, *nhost; 1527 struct nlm_waiting_lock *nw; 1528 vop_advlock_t *old_nfs_advlock; 1529 vop_reclaim_t *old_nfs_reclaim; 1530 1531 if (nlm_is_running != 0) { 1532 NLM_ERR("NLM: can't start server - " 1533 "it appears to be running already\n"); 1534 return (EPERM); 1535 } 1536 1537 if (nlm_socket == NULL) { 1538 memset(&opt, 0, sizeof(opt)); 1539 1540 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1541 td->td_ucred, td); 1542 if (error) { 1543 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1544 error); 1545 return (error); 1546 } 1547 opt.sopt_dir = SOPT_SET; 1548 opt.sopt_level = IPPROTO_IP; 1549 opt.sopt_name = IP_PORTRANGE; 1550 portlow = IP_PORTRANGE_LOW; 1551 opt.sopt_val = &portlow; 1552 opt.sopt_valsize = sizeof(portlow); 1553 sosetopt(nlm_socket, &opt); 1554 1555 #ifdef INET6 1556 nlm_socket6 = NULL; 1557 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1558 td->td_ucred, td); 1559 if (error) { 1560 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1561 error); 1562 soclose(nlm_socket); 1563 nlm_socket = NULL; 1564 return (error); 1565 } 1566 opt.sopt_dir = SOPT_SET; 1567 opt.sopt_level = IPPROTO_IPV6; 1568 opt.sopt_name = IPV6_PORTRANGE; 1569 portlow = IPV6_PORTRANGE_LOW; 1570 opt.sopt_val = &portlow; 1571 opt.sopt_valsize = sizeof(portlow); 1572 sosetopt(nlm_socket6, &opt); 1573 #endif 1574 } 1575 1576 nlm_auth = authunix_create(curthread->td_ucred); 1577 1578 #ifdef INET6 1579 memset(&sin6, 0, sizeof(sin6)); 1580 sin6.sin6_len = sizeof(sin6); 1581 sin6.sin6_family = AF_INET6; 1582 sin6.sin6_addr = in6addr_loopback; 1583 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1584 if (!nlm_nsm) { 1585 #endif 1586 memset(&sin, 0, sizeof(sin)); 1587 sin.sin_len = sizeof(sin); 1588 sin.sin_family = AF_INET; 1589 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1590 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1591 SM_VERS); 1592 #ifdef INET6 1593 } 1594 #endif 1595 1596 if (!nlm_nsm) { 1597 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1598 error = EINVAL; 1599 goto out; 1600 } 1601 1602 pool = svcpool_create("NLM", NULL); 1603 1604 error = nlm_register_services(pool, addr_count, addrs); 1605 if (error) 1606 goto out; 1607 1608 memset(&id, 0, sizeof(id)); 1609 id.my_name = "NFS NLM"; 1610 1611 timo.tv_sec = 25; 1612 timo.tv_usec = 0; 1613 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1614 (xdrproc_t) xdr_my_id, &id, 1615 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1616 1617 if (stat != RPC_SUCCESS) { 1618 struct rpc_err err; 1619 1620 CLNT_GETERR(nlm_nsm, &err); 1621 NLM_ERR("NLM: unexpected error contacting NSM, " 1622 "stat=%d, errno=%d\n", stat, err.re_errno); 1623 error = EINVAL; 1624 goto out; 1625 } 1626 nlm_is_running = 1; 1627 1628 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1629 nlm_nsm_state = smstat.state; 1630 1631 old_nfs_advlock = nfs_advlock_p; 1632 nfs_advlock_p = nlm_advlock; 1633 old_nfs_reclaim = nfs_reclaim_p; 1634 nfs_reclaim_p = nlm_reclaim; 1635 1636 svc_run(pool); 1637 error = 0; 1638 1639 nfs_advlock_p = old_nfs_advlock; 1640 nfs_reclaim_p = old_nfs_reclaim; 1641 1642 out: 1643 nlm_is_running = 0; 1644 if (pool) 1645 svcpool_destroy(pool); 1646 1647 /* 1648 * We are finished communicating with the NSM. 1649 */ 1650 if (nlm_nsm) { 1651 CLNT_RELEASE(nlm_nsm); 1652 nlm_nsm = NULL; 1653 } 1654 1655 /* 1656 * Trash all the existing state so that if the server 1657 * restarts, it gets a clean slate. This is complicated by the 1658 * possibility that there may be other threads trying to make 1659 * client locking requests. 1660 * 1661 * First we fake a client reboot notification which will 1662 * cancel any pending async locks and purge remote lock state 1663 * from the local lock manager. We release the reference from 1664 * nlm_hosts to the host (which may remove it from the list 1665 * and free it). After this phase, the only entries in the 1666 * nlm_host list should be from other threads performing 1667 * client lock requests. 1668 */ 1669 mtx_lock(&nlm_global_lock); 1670 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1671 wakeup(nw); 1672 } 1673 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1674 mtx_unlock(&nlm_global_lock); 1675 nlm_host_notify(host, 0); 1676 nlm_host_release(host); 1677 mtx_lock(&nlm_global_lock); 1678 } 1679 mtx_unlock(&nlm_global_lock); 1680 1681 AUTH_DESTROY(nlm_auth); 1682 1683 return (error); 1684 } 1685 1686 int 1687 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1688 { 1689 int error; 1690 1691 #if __FreeBSD_version >= 700000 1692 error = priv_check(td, PRIV_NFS_LOCKD); 1693 #else 1694 error = suser(td); 1695 #endif 1696 if (error) 1697 return (error); 1698 1699 nlm_debug_level = uap->debug_level; 1700 nlm_grace_threshold = time_uptime + uap->grace_period; 1701 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1702 1703 return nlm_server_main(uap->addr_count, uap->addrs); 1704 } 1705 1706 /**********************************************************************/ 1707 1708 /* 1709 * NLM implementation details, called from the RPC stubs. 1710 */ 1711 1712 1713 void 1714 nlm_sm_notify(struct nlm_sm_status *argp) 1715 { 1716 uint32_t sysid; 1717 struct nlm_host *host; 1718 1719 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1720 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1721 host = nlm_find_host_by_sysid(sysid); 1722 if (host) { 1723 nlm_host_notify(host, argp->state); 1724 nlm_host_release(host); 1725 } 1726 } 1727 1728 static void 1729 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1730 { 1731 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1732 } 1733 1734 struct vfs_state { 1735 struct mount *vs_mp; 1736 struct vnode *vs_vp; 1737 int vs_vnlocked; 1738 }; 1739 1740 static int 1741 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1742 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1743 { 1744 int error, exflags; 1745 struct ucred *cred = NULL, *credanon = NULL; 1746 1747 memset(vs, 0, sizeof(*vs)); 1748 1749 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1750 if (!vs->vs_mp) { 1751 return (ESTALE); 1752 } 1753 1754 /* accmode == 0 means don't check, since it is an unlock. */ 1755 if (accmode != 0) { 1756 error = VFS_CHECKEXP(vs->vs_mp, 1757 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1758 NULL, NULL); 1759 if (error) 1760 goto out; 1761 1762 if (exflags & MNT_EXRDONLY || 1763 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1764 error = EROFS; 1765 goto out; 1766 } 1767 } 1768 1769 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1770 if (error) 1771 goto out; 1772 vs->vs_vnlocked = TRUE; 1773 1774 if (accmode != 0) { 1775 if (!svc_getcred(rqstp, &cred, NULL)) { 1776 error = EINVAL; 1777 goto out; 1778 } 1779 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1780 crfree(cred); 1781 cred = credanon; 1782 credanon = NULL; 1783 } 1784 1785 /* 1786 * Check cred. 1787 */ 1788 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1789 /* 1790 * If this failed and accmode != VWRITE, try again with 1791 * VWRITE to maintain backwards compatibility with the 1792 * old code that always used VWRITE. 1793 */ 1794 if (error != 0 && accmode != VWRITE) 1795 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1796 if (error) 1797 goto out; 1798 } 1799 1800 #if __FreeBSD_version < 800011 1801 VOP_UNLOCK(vs->vs_vp, 0, curthread); 1802 #else 1803 VOP_UNLOCK(vs->vs_vp); 1804 #endif 1805 vs->vs_vnlocked = FALSE; 1806 1807 out: 1808 if (cred) 1809 crfree(cred); 1810 if (credanon) 1811 crfree(credanon); 1812 1813 return (error); 1814 } 1815 1816 static void 1817 nlm_release_vfs_state(struct vfs_state *vs) 1818 { 1819 1820 if (vs->vs_vp) { 1821 if (vs->vs_vnlocked) 1822 vput(vs->vs_vp); 1823 else 1824 vrele(vs->vs_vp); 1825 } 1826 if (vs->vs_mp) 1827 vfs_rel(vs->vs_mp); 1828 } 1829 1830 static nlm4_stats 1831 nlm_convert_error(int error) 1832 { 1833 1834 if (error == ESTALE) 1835 return nlm4_stale_fh; 1836 else if (error == EROFS) 1837 return nlm4_rofs; 1838 else 1839 return nlm4_failed; 1840 } 1841 1842 int 1843 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1844 CLIENT **rpcp) 1845 { 1846 fhandle_t fh; 1847 struct vfs_state vs; 1848 struct nlm_host *host, *bhost; 1849 int error, sysid; 1850 struct flock fl; 1851 accmode_t accmode; 1852 1853 memset(result, 0, sizeof(*result)); 1854 memset(&vs, 0, sizeof(vs)); 1855 1856 host = nlm_find_host_by_name(argp->alock.caller_name, 1857 svc_getrpccaller(rqstp), rqstp->rq_vers); 1858 if (!host) { 1859 result->stat.stat = nlm4_denied_nolocks; 1860 return (ENOMEM); 1861 } 1862 1863 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1864 host->nh_caller_name, host->nh_sysid); 1865 1866 nlm_check_expired_locks(host); 1867 sysid = host->nh_sysid; 1868 1869 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1870 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1871 1872 if (time_uptime < nlm_grace_threshold) { 1873 result->stat.stat = nlm4_denied_grace_period; 1874 goto out; 1875 } 1876 1877 accmode = argp->exclusive ? VWRITE : VREAD; 1878 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1879 if (error) { 1880 result->stat.stat = nlm_convert_error(error); 1881 goto out; 1882 } 1883 1884 fl.l_start = argp->alock.l_offset; 1885 fl.l_len = argp->alock.l_len; 1886 fl.l_pid = argp->alock.svid; 1887 fl.l_sysid = sysid; 1888 fl.l_whence = SEEK_SET; 1889 if (argp->exclusive) 1890 fl.l_type = F_WRLCK; 1891 else 1892 fl.l_type = F_RDLCK; 1893 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1894 if (error) { 1895 result->stat.stat = nlm4_failed; 1896 goto out; 1897 } 1898 1899 if (fl.l_type == F_UNLCK) { 1900 result->stat.stat = nlm4_granted; 1901 } else { 1902 result->stat.stat = nlm4_denied; 1903 result->stat.nlm4_testrply_u.holder.exclusive = 1904 (fl.l_type == F_WRLCK); 1905 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1906 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1907 if (bhost) { 1908 /* 1909 * We don't have any useful way of recording 1910 * the value of oh used in the original lock 1911 * request. Ideally, the test reply would have 1912 * a space for the owning host's name allowing 1913 * our caller's NLM to keep track. 1914 * 1915 * As far as I can see, Solaris uses an eight 1916 * byte structure for oh which contains a four 1917 * byte pid encoded in local byte order and 1918 * the first four bytes of the host 1919 * name. Linux uses a variable length string 1920 * 'pid@hostname' in ascii but doesn't even 1921 * return that in test replies. 1922 * 1923 * For the moment, return nothing in oh 1924 * (already zero'ed above). 1925 */ 1926 nlm_host_release(bhost); 1927 } 1928 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1929 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1930 } 1931 1932 out: 1933 nlm_release_vfs_state(&vs); 1934 if (rpcp) 1935 *rpcp = nlm_host_get_rpc(host, TRUE); 1936 nlm_host_release(host); 1937 return (0); 1938 } 1939 1940 int 1941 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1942 bool_t monitor, CLIENT **rpcp) 1943 { 1944 fhandle_t fh; 1945 struct vfs_state vs; 1946 struct nlm_host *host; 1947 int error, sysid; 1948 struct flock fl; 1949 accmode_t accmode; 1950 1951 memset(result, 0, sizeof(*result)); 1952 memset(&vs, 0, sizeof(vs)); 1953 1954 host = nlm_find_host_by_name(argp->alock.caller_name, 1955 svc_getrpccaller(rqstp), rqstp->rq_vers); 1956 if (!host) { 1957 result->stat.stat = nlm4_denied_nolocks; 1958 return (ENOMEM); 1959 } 1960 1961 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1962 host->nh_caller_name, host->nh_sysid); 1963 1964 if (monitor && host->nh_state && argp->state 1965 && host->nh_state != argp->state) { 1966 /* 1967 * The host rebooted without telling us. Trash its 1968 * locks. 1969 */ 1970 nlm_host_notify(host, argp->state); 1971 } 1972 1973 nlm_check_expired_locks(host); 1974 sysid = host->nh_sysid; 1975 1976 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1977 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1978 1979 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1980 result->stat.stat = nlm4_denied_grace_period; 1981 goto out; 1982 } 1983 1984 accmode = argp->exclusive ? VWRITE : VREAD; 1985 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1986 if (error) { 1987 result->stat.stat = nlm_convert_error(error); 1988 goto out; 1989 } 1990 1991 fl.l_start = argp->alock.l_offset; 1992 fl.l_len = argp->alock.l_len; 1993 fl.l_pid = argp->alock.svid; 1994 fl.l_sysid = sysid; 1995 fl.l_whence = SEEK_SET; 1996 if (argp->exclusive) 1997 fl.l_type = F_WRLCK; 1998 else 1999 fl.l_type = F_RDLCK; 2000 if (argp->block) { 2001 struct nlm_async_lock *af; 2002 CLIENT *client; 2003 struct nlm_grantcookie cookie; 2004 2005 /* 2006 * First, make sure we can contact the host's NLM. 2007 */ 2008 client = nlm_host_get_rpc(host, TRUE); 2009 if (!client) { 2010 result->stat.stat = nlm4_failed; 2011 goto out; 2012 } 2013 2014 /* 2015 * First we need to check and see if there is an 2016 * existing blocked lock that matches. This could be a 2017 * badly behaved client or an RPC re-send. If we find 2018 * one, just return nlm4_blocked. 2019 */ 2020 mtx_lock(&host->nh_lock); 2021 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2022 if (af->af_fl.l_start == fl.l_start 2023 && af->af_fl.l_len == fl.l_len 2024 && af->af_fl.l_pid == fl.l_pid 2025 && af->af_fl.l_type == fl.l_type) { 2026 break; 2027 } 2028 } 2029 if (!af) { 2030 cookie.ng_sysid = host->nh_sysid; 2031 cookie.ng_cookie = host->nh_grantcookie++; 2032 } 2033 mtx_unlock(&host->nh_lock); 2034 if (af) { 2035 CLNT_RELEASE(client); 2036 result->stat.stat = nlm4_blocked; 2037 goto out; 2038 } 2039 2040 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2041 M_WAITOK|M_ZERO); 2042 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2043 af->af_vp = vs.vs_vp; 2044 af->af_fl = fl; 2045 af->af_host = host; 2046 af->af_rpc = client; 2047 /* 2048 * We use M_RPC here so that we can xdr_free the thing 2049 * later. 2050 */ 2051 nlm_make_netobj(&af->af_granted.cookie, 2052 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2053 af->af_granted.exclusive = argp->exclusive; 2054 af->af_granted.alock.caller_name = 2055 strdup(argp->alock.caller_name, M_RPC); 2056 nlm_copy_netobj(&af->af_granted.alock.fh, 2057 &argp->alock.fh, M_RPC); 2058 nlm_copy_netobj(&af->af_granted.alock.oh, 2059 &argp->alock.oh, M_RPC); 2060 af->af_granted.alock.svid = argp->alock.svid; 2061 af->af_granted.alock.l_offset = argp->alock.l_offset; 2062 af->af_granted.alock.l_len = argp->alock.l_len; 2063 2064 /* 2065 * Put the entry on the pending list before calling 2066 * VOP_ADVLOCKASYNC. We do this in case the lock 2067 * request was blocked (returning EINPROGRESS) but 2068 * then granted before we manage to run again. The 2069 * client may receive the granted message before we 2070 * send our blocked reply but thats their problem. 2071 */ 2072 mtx_lock(&host->nh_lock); 2073 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2074 mtx_unlock(&host->nh_lock); 2075 2076 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2077 &af->af_task, &af->af_cookie); 2078 2079 /* 2080 * If the lock completed synchronously, just free the 2081 * tracking structure now. 2082 */ 2083 if (error != EINPROGRESS) { 2084 CLNT_RELEASE(af->af_rpc); 2085 mtx_lock(&host->nh_lock); 2086 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2087 mtx_unlock(&host->nh_lock); 2088 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2089 &af->af_granted); 2090 free(af, M_NLM); 2091 } else { 2092 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2093 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2094 /* 2095 * Don't vrele the vnode just yet - this must 2096 * wait until either the async callback 2097 * happens or the lock is cancelled. 2098 */ 2099 vs.vs_vp = NULL; 2100 } 2101 } else { 2102 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2103 } 2104 2105 if (error) { 2106 if (error == EINPROGRESS) { 2107 result->stat.stat = nlm4_blocked; 2108 } else if (error == EDEADLK) { 2109 result->stat.stat = nlm4_deadlck; 2110 } else if (error == EAGAIN) { 2111 result->stat.stat = nlm4_denied; 2112 } else { 2113 result->stat.stat = nlm4_failed; 2114 } 2115 } else { 2116 if (monitor) 2117 nlm_host_monitor(host, argp->state); 2118 result->stat.stat = nlm4_granted; 2119 } 2120 2121 out: 2122 nlm_release_vfs_state(&vs); 2123 if (rpcp) 2124 *rpcp = nlm_host_get_rpc(host, TRUE); 2125 nlm_host_release(host); 2126 return (0); 2127 } 2128 2129 int 2130 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2131 CLIENT **rpcp) 2132 { 2133 fhandle_t fh; 2134 struct vfs_state vs; 2135 struct nlm_host *host; 2136 int error, sysid; 2137 struct flock fl; 2138 struct nlm_async_lock *af; 2139 2140 memset(result, 0, sizeof(*result)); 2141 memset(&vs, 0, sizeof(vs)); 2142 2143 host = nlm_find_host_by_name(argp->alock.caller_name, 2144 svc_getrpccaller(rqstp), rqstp->rq_vers); 2145 if (!host) { 2146 result->stat.stat = nlm4_denied_nolocks; 2147 return (ENOMEM); 2148 } 2149 2150 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2151 host->nh_caller_name, host->nh_sysid); 2152 2153 nlm_check_expired_locks(host); 2154 sysid = host->nh_sysid; 2155 2156 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2157 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2158 2159 if (time_uptime < nlm_grace_threshold) { 2160 result->stat.stat = nlm4_denied_grace_period; 2161 goto out; 2162 } 2163 2164 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2165 if (error) { 2166 result->stat.stat = nlm_convert_error(error); 2167 goto out; 2168 } 2169 2170 fl.l_start = argp->alock.l_offset; 2171 fl.l_len = argp->alock.l_len; 2172 fl.l_pid = argp->alock.svid; 2173 fl.l_sysid = sysid; 2174 fl.l_whence = SEEK_SET; 2175 if (argp->exclusive) 2176 fl.l_type = F_WRLCK; 2177 else 2178 fl.l_type = F_RDLCK; 2179 2180 /* 2181 * First we need to try and find the async lock request - if 2182 * there isn't one, we give up and return nlm4_denied. 2183 */ 2184 mtx_lock(&host->nh_lock); 2185 2186 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2187 if (af->af_fl.l_start == fl.l_start 2188 && af->af_fl.l_len == fl.l_len 2189 && af->af_fl.l_pid == fl.l_pid 2190 && af->af_fl.l_type == fl.l_type) { 2191 break; 2192 } 2193 } 2194 2195 if (!af) { 2196 mtx_unlock(&host->nh_lock); 2197 result->stat.stat = nlm4_denied; 2198 goto out; 2199 } 2200 2201 error = nlm_cancel_async_lock(af); 2202 2203 if (error) { 2204 result->stat.stat = nlm4_denied; 2205 } else { 2206 result->stat.stat = nlm4_granted; 2207 } 2208 2209 mtx_unlock(&host->nh_lock); 2210 2211 out: 2212 nlm_release_vfs_state(&vs); 2213 if (rpcp) 2214 *rpcp = nlm_host_get_rpc(host, TRUE); 2215 nlm_host_release(host); 2216 return (0); 2217 } 2218 2219 int 2220 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2221 CLIENT **rpcp) 2222 { 2223 fhandle_t fh; 2224 struct vfs_state vs; 2225 struct nlm_host *host; 2226 int error, sysid; 2227 struct flock fl; 2228 2229 memset(result, 0, sizeof(*result)); 2230 memset(&vs, 0, sizeof(vs)); 2231 2232 host = nlm_find_host_by_name(argp->alock.caller_name, 2233 svc_getrpccaller(rqstp), rqstp->rq_vers); 2234 if (!host) { 2235 result->stat.stat = nlm4_denied_nolocks; 2236 return (ENOMEM); 2237 } 2238 2239 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2240 host->nh_caller_name, host->nh_sysid); 2241 2242 nlm_check_expired_locks(host); 2243 sysid = host->nh_sysid; 2244 2245 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2246 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2247 2248 if (time_uptime < nlm_grace_threshold) { 2249 result->stat.stat = nlm4_denied_grace_period; 2250 goto out; 2251 } 2252 2253 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2254 if (error) { 2255 result->stat.stat = nlm_convert_error(error); 2256 goto out; 2257 } 2258 2259 fl.l_start = argp->alock.l_offset; 2260 fl.l_len = argp->alock.l_len; 2261 fl.l_pid = argp->alock.svid; 2262 fl.l_sysid = sysid; 2263 fl.l_whence = SEEK_SET; 2264 fl.l_type = F_UNLCK; 2265 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2266 2267 /* 2268 * Ignore the error - there is no result code for failure, 2269 * only for grace period. 2270 */ 2271 result->stat.stat = nlm4_granted; 2272 2273 out: 2274 nlm_release_vfs_state(&vs); 2275 if (rpcp) 2276 *rpcp = nlm_host_get_rpc(host, TRUE); 2277 nlm_host_release(host); 2278 return (0); 2279 } 2280 2281 int 2282 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2283 2284 CLIENT **rpcp) 2285 { 2286 struct nlm_host *host; 2287 struct nlm_waiting_lock *nw; 2288 2289 memset(result, 0, sizeof(*result)); 2290 2291 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2292 if (!host) { 2293 result->stat.stat = nlm4_denied_nolocks; 2294 return (ENOMEM); 2295 } 2296 2297 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2298 result->stat.stat = nlm4_denied; 2299 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2300 2301 mtx_lock(&nlm_global_lock); 2302 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2303 if (!nw->nw_waiting) 2304 continue; 2305 if (argp->alock.svid == nw->nw_lock.svid 2306 && argp->alock.l_offset == nw->nw_lock.l_offset 2307 && argp->alock.l_len == nw->nw_lock.l_len 2308 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2309 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2310 nw->nw_lock.fh.n_len)) { 2311 nw->nw_waiting = FALSE; 2312 wakeup(nw); 2313 result->stat.stat = nlm4_granted; 2314 break; 2315 } 2316 } 2317 mtx_unlock(&nlm_global_lock); 2318 2319 out: 2320 if (rpcp) 2321 *rpcp = nlm_host_get_rpc(host, TRUE); 2322 nlm_host_release(host); 2323 return (0); 2324 } 2325 2326 void 2327 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2328 { 2329 struct nlm_host *host = NULL; 2330 struct nlm_async_lock *af = NULL; 2331 int error; 2332 2333 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2334 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2335 goto out; 2336 } 2337 2338 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2339 if (!host) { 2340 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2341 goto out; 2342 } 2343 2344 mtx_lock(&host->nh_lock); 2345 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2346 if (ng_cookie(&argp->cookie) == 2347 ng_cookie(&af->af_granted.cookie)) 2348 break; 2349 if (af) 2350 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2351 mtx_unlock(&host->nh_lock); 2352 2353 if (!af) { 2354 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2355 "with unrecognized cookie %d:%d", host->nh_caller_name, 2356 host->nh_sysid, ng_sysid(&argp->cookie), 2357 ng_cookie(&argp->cookie)); 2358 goto out; 2359 } 2360 2361 if (argp->stat.stat != nlm4_granted) { 2362 af->af_fl.l_type = F_UNLCK; 2363 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2364 if (error) { 2365 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2366 "and we failed to unlock (%d)", host->nh_caller_name, 2367 host->nh_sysid, error); 2368 goto out; 2369 } 2370 2371 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2372 af, host->nh_caller_name, host->nh_sysid); 2373 } else { 2374 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2375 af, host->nh_caller_name, host->nh_sysid); 2376 } 2377 2378 out: 2379 if (af) 2380 nlm_free_async_lock(af); 2381 if (host) 2382 nlm_host_release(host); 2383 } 2384 2385 void 2386 nlm_do_free_all(nlm4_notify *argp) 2387 { 2388 struct nlm_host *host, *thost; 2389 2390 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2391 if (!strcmp(host->nh_caller_name, argp->name)) 2392 nlm_host_notify(host, argp->state); 2393 } 2394 } 2395 2396 /* 2397 * Kernel module glue 2398 */ 2399 static int 2400 nfslockd_modevent(module_t mod, int type, void *data) 2401 { 2402 2403 switch (type) { 2404 case MOD_LOAD: 2405 return (nlm_init()); 2406 2407 case MOD_UNLOAD: 2408 nlm_uninit(); 2409 /* The NLM module cannot be safely unloaded. */ 2410 /* FALLTHROUGH */ 2411 default: 2412 return (EOPNOTSUPP); 2413 } 2414 } 2415 static moduledata_t nfslockd_mod = { 2416 "nfslockd", 2417 nfslockd_modevent, 2418 NULL, 2419 }; 2420 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2421 2422 /* So that loader and kldload(2) can find us, wherever we are.. */ 2423 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2424 MODULE_DEPEND(nfslockd, nfslock, 1, 1, 1); 2425 MODULE_VERSION(nfslockd, 1); 2426