1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 5 * Authors: Doug Rabson <dfr@rabson.org> 6 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "opt_inet6.h" 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/fail.h> 37 #include <sys/fcntl.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/lockf.h> 41 #include <sys/malloc.h> 42 #include <sys/mount.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/syscall.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysent.h> 50 #include <sys/syslog.h> 51 #include <sys/sysproto.h> 52 #include <sys/systm.h> 53 #include <sys/taskqueue.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 57 #include <nfs/nfsproto.h> 58 #include <nfs/nfs_lock.h> 59 60 #include <nlm/nlm_prot.h> 61 #include <nlm/sm_inter.h> 62 #include <nlm/nlm.h> 63 #include <rpc/rpc_com.h> 64 #include <rpc/rpcb_prot.h> 65 66 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 67 68 /* 69 * If a host is inactive (and holds no locks) for this amount of 70 * seconds, we consider it idle and stop tracking it. 71 */ 72 #define NLM_IDLE_TIMEOUT 30 73 74 /* 75 * We check the host list for idle every few seconds. 76 */ 77 #define NLM_IDLE_PERIOD 5 78 79 /* 80 * We only look for GRANTED_RES messages for a little while. 81 */ 82 #define NLM_EXPIRE_TIMEOUT 10 83 84 /* 85 * Support for sysctl vfs.nlm.sysid 86 */ 87 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 88 "Network Lock Manager"); 89 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, 90 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 91 ""); 92 93 /* 94 * Syscall hooks 95 */ 96 static struct syscall_helper_data nlm_syscalls[] = { 97 SYSCALL_INIT_HELPER(nlm_syscall), 98 SYSCALL_INIT_LAST 99 }; 100 101 /* 102 * Debug level passed in from userland. We also support a sysctl hook 103 * so that it can be changed on a live system. 104 */ 105 static int nlm_debug_level; 106 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 107 108 #define NLM_DEBUG(_level, args...) \ 109 do { \ 110 if (nlm_debug_level >= (_level)) \ 111 log(LOG_DEBUG, args); \ 112 } while(0) 113 #define NLM_ERR(args...) \ 114 do { \ 115 log(LOG_ERR, args); \ 116 } while(0) 117 118 /* 119 * Grace period handling. The value of nlm_grace_threshold is the 120 * value of time_uptime after which we are serving requests normally. 121 */ 122 static time_t nlm_grace_threshold; 123 124 /* 125 * We check for idle hosts if time_uptime is greater than 126 * nlm_next_idle_check, 127 */ 128 static time_t nlm_next_idle_check; 129 130 /* 131 * A flag to indicate the server is already running. 132 */ 133 static int nlm_is_running; 134 135 /* 136 * A socket to use for RPC - shared by all IPv4 RPC clients. 137 */ 138 static struct socket *nlm_socket; 139 140 #ifdef INET6 141 142 /* 143 * A socket to use for RPC - shared by all IPv6 RPC clients. 144 */ 145 static struct socket *nlm_socket6; 146 147 #endif 148 149 /* 150 * An RPC client handle that can be used to communicate with the local 151 * NSM. 152 */ 153 static CLIENT *nlm_nsm; 154 155 /* 156 * An AUTH handle for the server's creds. 157 */ 158 static AUTH *nlm_auth; 159 160 /* 161 * A zero timeval for sending async RPC messages. 162 */ 163 struct timeval nlm_zero_tv = { 0, 0 }; 164 165 /* 166 * The local NSM state number 167 */ 168 int nlm_nsm_state; 169 170 /* 171 * A lock to protect the host list and waiting lock list. 172 */ 173 static struct mtx nlm_global_lock; 174 175 /* 176 * Locks: 177 * (l) locked by nh_lock 178 * (s) only accessed via server RPC which is single threaded 179 * (g) locked by nlm_global_lock 180 * (c) const until freeing 181 * (a) modified using atomic ops 182 */ 183 184 /* 185 * A pending client-side lock request, stored on the nlm_waiting_locks 186 * list. 187 */ 188 struct nlm_waiting_lock { 189 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 190 bool_t nw_waiting; /* (g) */ 191 nlm4_lock nw_lock; /* (c) */ 192 union nfsfh nw_fh; /* (c) */ 193 struct vnode *nw_vp; /* (c) */ 194 }; 195 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 196 197 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 198 199 /* 200 * A pending server-side asynchronous lock request, stored on the 201 * nh_pending list of the NLM host. 202 */ 203 struct nlm_async_lock { 204 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 205 struct task af_task; /* (c) async callback details */ 206 void *af_cookie; /* (l) lock manager cancel token */ 207 struct vnode *af_vp; /* (l) vnode to lock */ 208 struct flock af_fl; /* (c) lock details */ 209 struct nlm_host *af_host; /* (c) host which is locking */ 210 CLIENT *af_rpc; /* (c) rpc client to send message */ 211 nlm4_testargs af_granted; /* (c) notification details */ 212 time_t af_expiretime; /* (c) notification time */ 213 }; 214 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 215 216 /* 217 * NLM host. 218 */ 219 enum nlm_host_state { 220 NLM_UNMONITORED, 221 NLM_MONITORED, 222 NLM_MONITOR_FAILED, 223 NLM_RECOVERING 224 }; 225 226 struct nlm_rpc { 227 CLIENT *nr_client; /* (l) RPC client handle */ 228 time_t nr_create_time; /* (l) when client was created */ 229 }; 230 231 struct nlm_host { 232 struct mtx nh_lock; 233 volatile u_int nh_refs; /* (a) reference count */ 234 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 235 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 236 uint32_t nh_sysid; /* (c) our allocaed system ID */ 237 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 238 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 239 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 240 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 241 rpcvers_t nh_vers; /* (s) NLM version of host */ 242 int nh_state; /* (s) last seen NSM state of host */ 243 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 244 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 245 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 246 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 247 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 248 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 249 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 250 }; 251 TAILQ_HEAD(nlm_host_list, nlm_host); 252 253 static struct nlm_host_list nlm_hosts; /* (g) */ 254 static uint32_t nlm_next_sysid = 1; /* (g) */ 255 256 static void nlm_host_unmonitor(struct nlm_host *); 257 258 struct nlm_grantcookie { 259 uint32_t ng_sysid; 260 uint32_t ng_cookie; 261 }; 262 263 static inline uint32_t 264 ng_sysid(struct netobj *src) 265 { 266 267 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 268 } 269 270 static inline uint32_t 271 ng_cookie(struct netobj *src) 272 { 273 274 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 275 } 276 277 /**********************************************************************/ 278 279 /* 280 * Initialise NLM globals. 281 */ 282 static int 283 nlm_init(void) 284 { 285 int error; 286 287 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 288 TAILQ_INIT(&nlm_waiting_locks); 289 TAILQ_INIT(&nlm_hosts); 290 291 error = syscall_helper_register(nlm_syscalls, SY_THR_STATIC_KLD); 292 if (error != 0) 293 NLM_ERR("Can't register NLM syscall\n"); 294 return (error); 295 } 296 297 static void 298 nlm_uninit(void) 299 { 300 301 syscall_helper_unregister(nlm_syscalls); 302 } 303 304 /* 305 * Create a netobj from an arbitrary source. 306 */ 307 void 308 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 309 struct malloc_type *type) 310 { 311 312 dst->n_len = srcsize; 313 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 314 memcpy(dst->n_bytes, src, srcsize); 315 } 316 317 /* 318 * Copy a struct netobj. 319 */ 320 void 321 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 322 struct malloc_type *type) 323 { 324 325 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 326 } 327 328 /* 329 * Create an RPC client handle for the given (address,prog,vers) 330 * triple using UDP. 331 */ 332 static CLIENT * 333 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 334 { 335 char *wchan = "nlmrcv"; 336 struct sockaddr_storage ss; 337 struct socket *so; 338 CLIENT *rpcb; 339 struct timeval timo; 340 RPCB parms; 341 char *uaddr; 342 enum clnt_stat stat = RPC_SUCCESS; 343 int rpcvers = RPCBVERS4; 344 bool_t do_tcp = FALSE; 345 bool_t tryagain = FALSE; 346 struct portmap mapping; 347 u_short port = 0; 348 349 /* 350 * First we need to contact the remote RPCBIND service to find 351 * the right port. 352 */ 353 memcpy(&ss, sa, sa->sa_len); 354 switch (ss.ss_family) { 355 case AF_INET: 356 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 357 so = nlm_socket; 358 break; 359 #ifdef INET6 360 case AF_INET6: 361 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 362 so = nlm_socket6; 363 break; 364 #endif 365 366 default: 367 /* 368 * Unsupported address family - fail. 369 */ 370 return (NULL); 371 } 372 373 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 374 RPCBPROG, rpcvers, 0, 0); 375 if (!rpcb) 376 return (NULL); 377 378 try_tcp: 379 parms.r_prog = prog; 380 parms.r_vers = vers; 381 if (do_tcp) 382 parms.r_netid = "tcp"; 383 else 384 parms.r_netid = "udp"; 385 parms.r_addr = ""; 386 parms.r_owner = ""; 387 388 /* 389 * Use the default timeout. 390 */ 391 timo.tv_sec = 25; 392 timo.tv_usec = 0; 393 again: 394 switch (rpcvers) { 395 case RPCBVERS4: 396 case RPCBVERS: 397 /* 398 * Try RPCBIND 4 then 3. 399 */ 400 uaddr = NULL; 401 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 402 (xdrproc_t) xdr_rpcb, &parms, 403 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 404 if (stat == RPC_SUCCESS) { 405 /* 406 * We have a reply from the remote RPCBIND - turn it 407 * into an appropriate address and make a new client 408 * that can talk to the remote NLM. 409 * 410 * XXX fixup IPv6 scope ID. 411 */ 412 struct netbuf *a; 413 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 414 if (!a) { 415 tryagain = TRUE; 416 } else { 417 tryagain = FALSE; 418 memcpy(&ss, a->buf, a->len); 419 free(a->buf, M_RPC); 420 free(a, M_RPC); 421 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 422 } 423 } 424 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 425 if (rpcvers == RPCBVERS4) 426 rpcvers = RPCBVERS; 427 else if (rpcvers == RPCBVERS) 428 rpcvers = PMAPVERS; 429 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 430 goto again; 431 } 432 break; 433 case PMAPVERS: 434 /* 435 * Try portmap. 436 */ 437 mapping.pm_prog = parms.r_prog; 438 mapping.pm_vers = parms.r_vers; 439 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 440 mapping.pm_port = 0; 441 442 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 443 (xdrproc_t) xdr_portmap, &mapping, 444 (xdrproc_t) xdr_u_short, &port, timo); 445 446 if (stat == RPC_SUCCESS) { 447 switch (ss.ss_family) { 448 case AF_INET: 449 ((struct sockaddr_in *)&ss)->sin_port = 450 htons(port); 451 break; 452 453 #ifdef INET6 454 case AF_INET6: 455 ((struct sockaddr_in6 *)&ss)->sin6_port = 456 htons(port); 457 break; 458 #endif 459 } 460 } 461 break; 462 default: 463 panic("invalid rpcvers %d", rpcvers); 464 } 465 /* 466 * We may have a positive response from the portmapper, but the NLM 467 * service was not found. Make sure we received a valid port. 468 */ 469 switch (ss.ss_family) { 470 case AF_INET: 471 port = ((struct sockaddr_in *)&ss)->sin_port; 472 break; 473 #ifdef INET6 474 case AF_INET6: 475 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 476 break; 477 #endif 478 } 479 if (stat != RPC_SUCCESS || !port) { 480 /* 481 * If we were able to talk to rpcbind or portmap, but the udp 482 * variant wasn't available, ask about tcp. 483 * 484 * XXX - We could also check for a TCP portmapper, but 485 * if the host is running a portmapper at all, we should be able 486 * to hail it over UDP. 487 */ 488 if (stat == RPC_SUCCESS && !do_tcp) { 489 do_tcp = TRUE; 490 goto try_tcp; 491 } 492 493 /* Otherwise, bad news. */ 494 NLM_ERR("NLM: failed to contact remote rpcbind, " 495 "stat = %d, port = %d\n", (int) stat, port); 496 CLNT_DESTROY(rpcb); 497 return (NULL); 498 } 499 500 if (do_tcp) { 501 /* 502 * Destroy the UDP client we used to speak to rpcbind and 503 * recreate as a TCP client. 504 */ 505 struct netconfig *nconf = NULL; 506 507 CLNT_DESTROY(rpcb); 508 509 switch (ss.ss_family) { 510 case AF_INET: 511 nconf = getnetconfigent("tcp"); 512 break; 513 #ifdef INET6 514 case AF_INET6: 515 nconf = getnetconfigent("tcp6"); 516 break; 517 #endif 518 } 519 520 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 521 prog, vers, 0, 0); 522 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 523 rpcb->cl_auth = nlm_auth; 524 525 } else { 526 /* 527 * Re-use the client we used to speak to rpcbind. 528 */ 529 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 530 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 531 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 532 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 533 rpcb->cl_auth = nlm_auth; 534 } 535 536 return (rpcb); 537 } 538 539 /* 540 * This async callback after when an async lock request has been 541 * granted. We notify the host which initiated the request. 542 */ 543 static void 544 nlm_lock_callback(void *arg, int pending) 545 { 546 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 547 struct rpc_callextra ext; 548 549 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 550 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 551 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 552 ng_cookie(&af->af_granted.cookie)); 553 554 /* 555 * Send the results back to the host. 556 * 557 * Note: there is a possible race here with nlm_host_notify 558 * destroying the RPC client. To avoid problems, the first 559 * thing nlm_host_notify does is to cancel pending async lock 560 * requests. 561 */ 562 memset(&ext, 0, sizeof(ext)); 563 ext.rc_auth = nlm_auth; 564 if (af->af_host->nh_vers == NLM_VERS4) { 565 nlm4_granted_msg_4(&af->af_granted, 566 NULL, af->af_rpc, &ext, nlm_zero_tv); 567 } else { 568 /* 569 * Back-convert to legacy protocol 570 */ 571 nlm_testargs granted; 572 granted.cookie = af->af_granted.cookie; 573 granted.exclusive = af->af_granted.exclusive; 574 granted.alock.caller_name = 575 af->af_granted.alock.caller_name; 576 granted.alock.fh = af->af_granted.alock.fh; 577 granted.alock.oh = af->af_granted.alock.oh; 578 granted.alock.svid = af->af_granted.alock.svid; 579 granted.alock.l_offset = 580 af->af_granted.alock.l_offset; 581 granted.alock.l_len = 582 af->af_granted.alock.l_len; 583 584 nlm_granted_msg_1(&granted, 585 NULL, af->af_rpc, &ext, nlm_zero_tv); 586 } 587 588 /* 589 * Move this entry to the nh_granted list. 590 */ 591 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 592 mtx_lock(&af->af_host->nh_lock); 593 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 594 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 595 mtx_unlock(&af->af_host->nh_lock); 596 } 597 598 /* 599 * Free an async lock request. The request must have been removed from 600 * any list. 601 */ 602 static void 603 nlm_free_async_lock(struct nlm_async_lock *af) 604 { 605 /* 606 * Free an async lock. 607 */ 608 if (af->af_rpc) 609 CLNT_RELEASE(af->af_rpc); 610 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 611 if (af->af_vp) 612 vrele(af->af_vp); 613 free(af, M_NLM); 614 } 615 616 /* 617 * Cancel our async request - this must be called with 618 * af->nh_host->nh_lock held. This is slightly complicated by a 619 * potential race with our own callback. If we fail to cancel the 620 * lock, it must already have been granted - we make sure our async 621 * task has completed by calling taskqueue_drain in this case. 622 */ 623 static int 624 nlm_cancel_async_lock(struct nlm_async_lock *af) 625 { 626 struct nlm_host *host = af->af_host; 627 int error; 628 629 mtx_assert(&host->nh_lock, MA_OWNED); 630 631 mtx_unlock(&host->nh_lock); 632 633 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 634 F_REMOTE, NULL, &af->af_cookie); 635 636 if (error) { 637 /* 638 * We failed to cancel - make sure our callback has 639 * completed before we continue. 640 */ 641 taskqueue_drain(taskqueue_thread, &af->af_task); 642 } 643 644 mtx_lock(&host->nh_lock); 645 646 if (!error) { 647 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 648 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 649 650 /* 651 * Remove from the nh_pending list and free now that 652 * we are safe from the callback. 653 */ 654 TAILQ_REMOVE(&host->nh_pending, af, af_link); 655 mtx_unlock(&host->nh_lock); 656 nlm_free_async_lock(af); 657 mtx_lock(&host->nh_lock); 658 } 659 660 return (error); 661 } 662 663 static void 664 nlm_check_expired_locks(struct nlm_host *host) 665 { 666 struct nlm_async_lock *af; 667 time_t uptime = time_uptime; 668 669 mtx_lock(&host->nh_lock); 670 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 671 && uptime >= af->af_expiretime) { 672 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 673 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 674 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 675 ng_cookie(&af->af_granted.cookie)); 676 TAILQ_REMOVE(&host->nh_granted, af, af_link); 677 mtx_unlock(&host->nh_lock); 678 nlm_free_async_lock(af); 679 mtx_lock(&host->nh_lock); 680 } 681 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 682 TAILQ_REMOVE(&host->nh_finished, af, af_link); 683 mtx_unlock(&host->nh_lock); 684 nlm_free_async_lock(af); 685 mtx_lock(&host->nh_lock); 686 } 687 mtx_unlock(&host->nh_lock); 688 } 689 690 /* 691 * Free resources used by a host. This is called after the reference 692 * count has reached zero so it doesn't need to worry about locks. 693 */ 694 static void 695 nlm_host_destroy(struct nlm_host *host) 696 { 697 698 mtx_lock(&nlm_global_lock); 699 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 700 mtx_unlock(&nlm_global_lock); 701 702 if (host->nh_srvrpc.nr_client) 703 CLNT_RELEASE(host->nh_srvrpc.nr_client); 704 if (host->nh_clntrpc.nr_client) 705 CLNT_RELEASE(host->nh_clntrpc.nr_client); 706 mtx_destroy(&host->nh_lock); 707 sysctl_ctx_free(&host->nh_sysctl); 708 free(host, M_NLM); 709 } 710 711 /* 712 * Thread start callback for client lock recovery 713 */ 714 static void 715 nlm_client_recovery_start(void *arg) 716 { 717 struct nlm_host *host = (struct nlm_host *) arg; 718 719 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 720 host->nh_caller_name); 721 722 nlm_client_recovery(host); 723 724 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 725 host->nh_caller_name); 726 727 host->nh_monstate = NLM_MONITORED; 728 nlm_host_release(host); 729 730 kthread_exit(); 731 } 732 733 /* 734 * This is called when we receive a host state change notification. We 735 * unlock any active locks owned by the host. When rpc.lockd is 736 * shutting down, this function is called with newstate set to zero 737 * which allows us to cancel any pending async locks and clear the 738 * locking state. 739 */ 740 static void 741 nlm_host_notify(struct nlm_host *host, int newstate) 742 { 743 struct nlm_async_lock *af; 744 745 if (newstate) { 746 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 747 "state is %d\n", host->nh_caller_name, 748 host->nh_sysid, newstate); 749 } 750 751 /* 752 * Cancel any pending async locks for this host. 753 */ 754 mtx_lock(&host->nh_lock); 755 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 756 /* 757 * nlm_cancel_async_lock will remove the entry from 758 * nh_pending and free it. 759 */ 760 nlm_cancel_async_lock(af); 761 } 762 mtx_unlock(&host->nh_lock); 763 nlm_check_expired_locks(host); 764 765 /* 766 * The host just rebooted - trash its locks. 767 */ 768 lf_clearremotesys(host->nh_sysid); 769 host->nh_state = newstate; 770 771 /* 772 * If we have any remote locks for this host (i.e. it 773 * represents a remote NFS server that our local NFS client 774 * has locks for), start a recovery thread. 775 */ 776 if (newstate != 0 777 && host->nh_monstate != NLM_RECOVERING 778 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 779 struct thread *td; 780 host->nh_monstate = NLM_RECOVERING; 781 refcount_acquire(&host->nh_refs); 782 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 783 "NFS lock recovery for %s", host->nh_caller_name); 784 } 785 } 786 787 /* 788 * Sysctl handler to count the number of locks for a sysid. 789 */ 790 static int 791 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 792 { 793 struct nlm_host *host; 794 int count; 795 796 host = oidp->oid_arg1; 797 count = lf_countlocks(host->nh_sysid); 798 return sysctl_handle_int(oidp, &count, 0, req); 799 } 800 801 /* 802 * Sysctl handler to count the number of client locks for a sysid. 803 */ 804 static int 805 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 806 { 807 struct nlm_host *host; 808 int count; 809 810 host = oidp->oid_arg1; 811 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 812 return sysctl_handle_int(oidp, &count, 0, req); 813 } 814 815 /* 816 * Create a new NLM host. 817 */ 818 static struct nlm_host * 819 nlm_create_host(const char* caller_name) 820 { 821 struct nlm_host *host; 822 struct sysctl_oid *oid; 823 824 mtx_assert(&nlm_global_lock, MA_OWNED); 825 826 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 827 caller_name, nlm_next_sysid); 828 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 829 if (!host) 830 return (NULL); 831 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 832 host->nh_refs = 1; 833 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 834 host->nh_sysid = nlm_next_sysid++; 835 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 836 "%d", host->nh_sysid); 837 host->nh_vers = 0; 838 host->nh_state = 0; 839 host->nh_monstate = NLM_UNMONITORED; 840 host->nh_grantcookie = 1; 841 TAILQ_INIT(&host->nh_pending); 842 TAILQ_INIT(&host->nh_granted); 843 TAILQ_INIT(&host->nh_finished); 844 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 845 846 mtx_unlock(&nlm_global_lock); 847 848 sysctl_ctx_init(&host->nh_sysctl); 849 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 850 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 851 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD | CTLFLAG_MPSAFE, 852 NULL, ""); 853 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 854 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 855 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 856 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 857 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 858 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 859 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 860 "lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, host, 861 0, nlm_host_lock_count_sysctl, "I", ""); 862 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 863 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 864 host, 0, nlm_host_client_lock_count_sysctl, "I", ""); 865 866 mtx_lock(&nlm_global_lock); 867 868 return (host); 869 } 870 871 /* 872 * Acquire the next sysid for remote locks not handled by the NLM. 873 */ 874 uint32_t 875 nlm_acquire_next_sysid(void) 876 { 877 uint32_t next_sysid; 878 879 mtx_lock(&nlm_global_lock); 880 next_sysid = nlm_next_sysid++; 881 mtx_unlock(&nlm_global_lock); 882 return (next_sysid); 883 } 884 885 /* 886 * Return non-zero if the address parts of the two sockaddrs are the 887 * same. 888 */ 889 static int 890 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 891 { 892 const struct sockaddr_in *a4, *b4; 893 #ifdef INET6 894 const struct sockaddr_in6 *a6, *b6; 895 #endif 896 897 if (a->sa_family != b->sa_family) 898 return (FALSE); 899 900 switch (a->sa_family) { 901 case AF_INET: 902 a4 = (const struct sockaddr_in *) a; 903 b4 = (const struct sockaddr_in *) b; 904 return !memcmp(&a4->sin_addr, &b4->sin_addr, 905 sizeof(a4->sin_addr)); 906 #ifdef INET6 907 case AF_INET6: 908 a6 = (const struct sockaddr_in6 *) a; 909 b6 = (const struct sockaddr_in6 *) b; 910 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 911 sizeof(a6->sin6_addr)); 912 #endif 913 } 914 915 return (0); 916 } 917 918 /* 919 * Check for idle hosts and stop monitoring them. We could also free 920 * the host structure here, possibly after a larger timeout but that 921 * would require some care to avoid races with 922 * e.g. nlm_host_lock_count_sysctl. 923 */ 924 static void 925 nlm_check_idle(void) 926 { 927 struct nlm_host *host; 928 929 mtx_assert(&nlm_global_lock, MA_OWNED); 930 931 if (time_uptime <= nlm_next_idle_check) 932 return; 933 934 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 935 936 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 937 if (host->nh_monstate == NLM_MONITORED 938 && time_uptime > host->nh_idle_timeout) { 939 mtx_unlock(&nlm_global_lock); 940 if (lf_countlocks(host->nh_sysid) > 0 941 || lf_countlocks(NLM_SYSID_CLIENT 942 + host->nh_sysid)) { 943 host->nh_idle_timeout = 944 time_uptime + NLM_IDLE_TIMEOUT; 945 mtx_lock(&nlm_global_lock); 946 continue; 947 } 948 nlm_host_unmonitor(host); 949 mtx_lock(&nlm_global_lock); 950 } 951 } 952 } 953 954 /* 955 * Search for an existing NLM host that matches the given name 956 * (typically the caller_name element of an nlm4_lock). If none is 957 * found, create a new host. If 'addr' is non-NULL, record the remote 958 * address of the host so that we can call it back for async 959 * responses. If 'vers' is greater than zero then record the NLM 960 * program version to use to communicate with this client. 961 */ 962 struct nlm_host * 963 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 964 rpcvers_t vers) 965 { 966 struct nlm_host *host; 967 968 mtx_lock(&nlm_global_lock); 969 970 /* 971 * The remote host is determined by caller_name. 972 */ 973 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 974 if (!strcmp(host->nh_caller_name, name)) 975 break; 976 } 977 978 if (!host) { 979 host = nlm_create_host(name); 980 if (!host) { 981 mtx_unlock(&nlm_global_lock); 982 return (NULL); 983 } 984 } 985 refcount_acquire(&host->nh_refs); 986 987 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 988 989 /* 990 * If we have an address for the host, record it so that we 991 * can send async replies etc. 992 */ 993 if (addr) { 994 995 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 996 ("Strange remote transport address length")); 997 998 /* 999 * If we have seen an address before and we currently 1000 * have an RPC client handle, make sure the address is 1001 * the same, otherwise discard the client handle. 1002 */ 1003 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1004 if (!nlm_compare_addr( 1005 (struct sockaddr *) &host->nh_addr, 1006 addr) 1007 || host->nh_vers != vers) { 1008 CLIENT *client; 1009 mtx_lock(&host->nh_lock); 1010 client = host->nh_srvrpc.nr_client; 1011 host->nh_srvrpc.nr_client = NULL; 1012 mtx_unlock(&host->nh_lock); 1013 if (client) { 1014 CLNT_RELEASE(client); 1015 } 1016 } 1017 } 1018 memcpy(&host->nh_addr, addr, addr->sa_len); 1019 host->nh_vers = vers; 1020 } 1021 1022 nlm_check_idle(); 1023 1024 mtx_unlock(&nlm_global_lock); 1025 1026 return (host); 1027 } 1028 1029 /* 1030 * Search for an existing NLM host that matches the given remote 1031 * address. If none is found, create a new host with the requested 1032 * address and remember 'vers' as the NLM protocol version to use for 1033 * that host. 1034 */ 1035 struct nlm_host * 1036 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1037 { 1038 /* 1039 * Fake up a name using inet_ntop. This buffer is 1040 * large enough for an IPv6 address. 1041 */ 1042 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1043 struct nlm_host *host; 1044 1045 switch (addr->sa_family) { 1046 case AF_INET: 1047 inet_ntop(AF_INET, 1048 &((const struct sockaddr_in *) addr)->sin_addr, 1049 tmp, sizeof tmp); 1050 break; 1051 #ifdef INET6 1052 case AF_INET6: 1053 inet_ntop(AF_INET6, 1054 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1055 tmp, sizeof tmp); 1056 break; 1057 #endif 1058 default: 1059 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1060 } 1061 1062 mtx_lock(&nlm_global_lock); 1063 1064 /* 1065 * The remote host is determined by caller_name. 1066 */ 1067 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1068 if (nlm_compare_addr(addr, 1069 (const struct sockaddr *) &host->nh_addr)) 1070 break; 1071 } 1072 1073 if (!host) { 1074 host = nlm_create_host(tmp); 1075 if (!host) { 1076 mtx_unlock(&nlm_global_lock); 1077 return (NULL); 1078 } 1079 memcpy(&host->nh_addr, addr, addr->sa_len); 1080 host->nh_vers = vers; 1081 } 1082 refcount_acquire(&host->nh_refs); 1083 1084 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1085 1086 nlm_check_idle(); 1087 1088 mtx_unlock(&nlm_global_lock); 1089 1090 return (host); 1091 } 1092 1093 /* 1094 * Find the NLM host that matches the value of 'sysid'. If none 1095 * exists, return NULL. 1096 */ 1097 static struct nlm_host * 1098 nlm_find_host_by_sysid(int sysid) 1099 { 1100 struct nlm_host *host; 1101 1102 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1103 if (host->nh_sysid == sysid) { 1104 refcount_acquire(&host->nh_refs); 1105 return (host); 1106 } 1107 } 1108 1109 return (NULL); 1110 } 1111 1112 void nlm_host_release(struct nlm_host *host) 1113 { 1114 if (refcount_release(&host->nh_refs)) { 1115 /* 1116 * Free the host 1117 */ 1118 nlm_host_destroy(host); 1119 } 1120 } 1121 1122 /* 1123 * Unregister this NLM host with the local NSM due to idleness. 1124 */ 1125 static void 1126 nlm_host_unmonitor(struct nlm_host *host) 1127 { 1128 mon_id smmonid; 1129 sm_stat_res smstat; 1130 struct timeval timo; 1131 enum clnt_stat stat; 1132 1133 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1134 host->nh_caller_name, host->nh_sysid); 1135 1136 /* 1137 * We put our assigned system ID value in the priv field to 1138 * make it simpler to find the host if we are notified of a 1139 * host restart. 1140 */ 1141 smmonid.mon_name = host->nh_caller_name; 1142 smmonid.my_id.my_name = "localhost"; 1143 smmonid.my_id.my_prog = NLM_PROG; 1144 smmonid.my_id.my_vers = NLM_SM; 1145 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1146 1147 timo.tv_sec = 25; 1148 timo.tv_usec = 0; 1149 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1150 (xdrproc_t) xdr_mon, &smmonid, 1151 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1152 1153 if (stat != RPC_SUCCESS) { 1154 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1155 return; 1156 } 1157 if (smstat.res_stat == stat_fail) { 1158 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1159 host->nh_caller_name); 1160 return; 1161 } 1162 1163 host->nh_monstate = NLM_UNMONITORED; 1164 } 1165 1166 /* 1167 * Register this NLM host with the local NSM so that we can be 1168 * notified if it reboots. 1169 */ 1170 void 1171 nlm_host_monitor(struct nlm_host *host, int state) 1172 { 1173 mon smmon; 1174 sm_stat_res smstat; 1175 struct timeval timo; 1176 enum clnt_stat stat; 1177 1178 if (state && !host->nh_state) { 1179 /* 1180 * This is the first time we have seen an NSM state 1181 * value for this host. We record it here to help 1182 * detect host reboots. 1183 */ 1184 host->nh_state = state; 1185 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1186 host->nh_caller_name, host->nh_sysid, state); 1187 } 1188 1189 mtx_lock(&host->nh_lock); 1190 if (host->nh_monstate != NLM_UNMONITORED) { 1191 mtx_unlock(&host->nh_lock); 1192 return; 1193 } 1194 host->nh_monstate = NLM_MONITORED; 1195 mtx_unlock(&host->nh_lock); 1196 1197 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1198 host->nh_caller_name, host->nh_sysid); 1199 1200 /* 1201 * We put our assigned system ID value in the priv field to 1202 * make it simpler to find the host if we are notified of a 1203 * host restart. 1204 */ 1205 smmon.mon_id.mon_name = host->nh_caller_name; 1206 smmon.mon_id.my_id.my_name = "localhost"; 1207 smmon.mon_id.my_id.my_prog = NLM_PROG; 1208 smmon.mon_id.my_id.my_vers = NLM_SM; 1209 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1210 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1211 1212 timo.tv_sec = 25; 1213 timo.tv_usec = 0; 1214 stat = CLNT_CALL(nlm_nsm, SM_MON, 1215 (xdrproc_t) xdr_mon, &smmon, 1216 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1217 1218 if (stat != RPC_SUCCESS) { 1219 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1220 return; 1221 } 1222 if (smstat.res_stat == stat_fail) { 1223 NLM_ERR("Local NSM refuses to monitor %s\n", 1224 host->nh_caller_name); 1225 mtx_lock(&host->nh_lock); 1226 host->nh_monstate = NLM_MONITOR_FAILED; 1227 mtx_unlock(&host->nh_lock); 1228 return; 1229 } 1230 1231 host->nh_monstate = NLM_MONITORED; 1232 } 1233 1234 /* 1235 * Return an RPC client handle that can be used to talk to the NLM 1236 * running on the given host. 1237 */ 1238 CLIENT * 1239 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1240 { 1241 struct nlm_rpc *rpc; 1242 CLIENT *client; 1243 1244 mtx_lock(&host->nh_lock); 1245 1246 if (isserver) 1247 rpc = &host->nh_srvrpc; 1248 else 1249 rpc = &host->nh_clntrpc; 1250 1251 /* 1252 * We can't hold onto RPC handles for too long - the async 1253 * call/reply protocol used by some NLM clients makes it hard 1254 * to tell when they change port numbers (e.g. after a 1255 * reboot). Note that if a client reboots while it isn't 1256 * holding any locks, it won't bother to notify us. We 1257 * expire the RPC handles after two minutes. 1258 */ 1259 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1260 client = rpc->nr_client; 1261 rpc->nr_client = NULL; 1262 mtx_unlock(&host->nh_lock); 1263 CLNT_RELEASE(client); 1264 mtx_lock(&host->nh_lock); 1265 } 1266 1267 if (!rpc->nr_client) { 1268 mtx_unlock(&host->nh_lock); 1269 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1270 NLM_PROG, host->nh_vers); 1271 mtx_lock(&host->nh_lock); 1272 1273 if (client) { 1274 if (rpc->nr_client) { 1275 mtx_unlock(&host->nh_lock); 1276 CLNT_DESTROY(client); 1277 mtx_lock(&host->nh_lock); 1278 } else { 1279 rpc->nr_client = client; 1280 rpc->nr_create_time = time_uptime; 1281 } 1282 } 1283 } 1284 1285 client = rpc->nr_client; 1286 if (client) 1287 CLNT_ACQUIRE(client); 1288 mtx_unlock(&host->nh_lock); 1289 1290 return (client); 1291 1292 } 1293 1294 int nlm_host_get_sysid(struct nlm_host *host) 1295 { 1296 1297 return (host->nh_sysid); 1298 } 1299 1300 int 1301 nlm_host_get_state(struct nlm_host *host) 1302 { 1303 1304 return (host->nh_state); 1305 } 1306 1307 void * 1308 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1309 { 1310 struct nlm_waiting_lock *nw; 1311 1312 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1313 nw->nw_lock = *lock; 1314 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1315 nw->nw_lock.fh.n_len); 1316 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1317 nw->nw_waiting = TRUE; 1318 nw->nw_vp = vp; 1319 mtx_lock(&nlm_global_lock); 1320 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1321 mtx_unlock(&nlm_global_lock); 1322 1323 return nw; 1324 } 1325 1326 void 1327 nlm_deregister_wait_lock(void *handle) 1328 { 1329 struct nlm_waiting_lock *nw = handle; 1330 1331 mtx_lock(&nlm_global_lock); 1332 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1333 mtx_unlock(&nlm_global_lock); 1334 1335 free(nw, M_NLM); 1336 } 1337 1338 int 1339 nlm_wait_lock(void *handle, int timo) 1340 { 1341 struct nlm_waiting_lock *nw = handle; 1342 int error, stops_deferred; 1343 1344 /* 1345 * If the granted message arrived before we got here, 1346 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1347 */ 1348 mtx_lock(&nlm_global_lock); 1349 error = 0; 1350 if (nw->nw_waiting) { 1351 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1352 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1353 sigallowstop(stops_deferred); 1354 } 1355 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1356 if (error) { 1357 /* 1358 * The granted message may arrive after the 1359 * interrupt/timeout but before we manage to lock the 1360 * mutex. Detect this by examining nw_lock. 1361 */ 1362 if (!nw->nw_waiting) 1363 error = 0; 1364 } else { 1365 /* 1366 * If nlm_cancel_wait is called, then error will be 1367 * zero but nw_waiting will still be TRUE. We 1368 * translate this into EINTR. 1369 */ 1370 if (nw->nw_waiting) 1371 error = EINTR; 1372 } 1373 mtx_unlock(&nlm_global_lock); 1374 1375 free(nw, M_NLM); 1376 1377 return (error); 1378 } 1379 1380 void 1381 nlm_cancel_wait(struct vnode *vp) 1382 { 1383 struct nlm_waiting_lock *nw; 1384 1385 mtx_lock(&nlm_global_lock); 1386 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1387 if (nw->nw_vp == vp) { 1388 wakeup(nw); 1389 } 1390 } 1391 mtx_unlock(&nlm_global_lock); 1392 } 1393 1394 /**********************************************************************/ 1395 1396 /* 1397 * Syscall interface with userland. 1398 */ 1399 1400 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1401 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1402 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1403 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1404 1405 static int 1406 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1407 { 1408 static rpcvers_t versions[] = { 1409 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1410 }; 1411 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1412 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1413 }; 1414 1415 SVCXPRT **xprts; 1416 char netid[16]; 1417 char uaddr[128]; 1418 struct netconfig *nconf; 1419 int i, j, error; 1420 1421 if (!addr_count) { 1422 NLM_ERR("NLM: no service addresses given - can't start server"); 1423 return (EINVAL); 1424 } 1425 1426 if (addr_count < 0 || addr_count > 256 ) { 1427 NLM_ERR("NLM: too many service addresses (%d) given, " 1428 "max 256 - can't start server\n", addr_count); 1429 return (EINVAL); 1430 } 1431 1432 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1433 for (i = 0; i < nitems(versions); i++) { 1434 for (j = 0; j < addr_count; j++) { 1435 /* 1436 * Create transports for the first version and 1437 * then just register everything else to the 1438 * same transports. 1439 */ 1440 if (i == 0) { 1441 char *up; 1442 1443 error = copyin(&addrs[2*j], &up, 1444 sizeof(char*)); 1445 if (error) 1446 goto out; 1447 error = copyinstr(up, netid, sizeof(netid), 1448 NULL); 1449 if (error) 1450 goto out; 1451 error = copyin(&addrs[2*j+1], &up, 1452 sizeof(char*)); 1453 if (error) 1454 goto out; 1455 error = copyinstr(up, uaddr, sizeof(uaddr), 1456 NULL); 1457 if (error) 1458 goto out; 1459 nconf = getnetconfigent(netid); 1460 if (!nconf) { 1461 NLM_ERR("Can't lookup netid %s\n", 1462 netid); 1463 error = EINVAL; 1464 goto out; 1465 } 1466 xprts[j] = svc_tp_create(pool, dispatchers[i], 1467 NLM_PROG, versions[i], uaddr, nconf); 1468 if (!xprts[j]) { 1469 NLM_ERR("NLM: unable to create " 1470 "(NLM_PROG, %d).\n", versions[i]); 1471 error = EINVAL; 1472 goto out; 1473 } 1474 freenetconfigent(nconf); 1475 } else { 1476 nconf = getnetconfigent(xprts[j]->xp_netid); 1477 rpcb_unset(NLM_PROG, versions[i], nconf); 1478 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1479 dispatchers[i], nconf)) { 1480 NLM_ERR("NLM: can't register " 1481 "(NLM_PROG, %d)\n", versions[i]); 1482 error = EINVAL; 1483 goto out; 1484 } 1485 } 1486 } 1487 } 1488 error = 0; 1489 out: 1490 for (j = 0; j < addr_count; j++) { 1491 if (xprts[j]) 1492 SVC_RELEASE(xprts[j]); 1493 } 1494 free(xprts, M_NLM); 1495 return (error); 1496 } 1497 1498 /* 1499 * Main server entry point. Contacts the local NSM to get its current 1500 * state and send SM_UNMON_ALL. Registers the NLM services and then 1501 * services requests. Does not return until the server is interrupted 1502 * by a signal. 1503 */ 1504 static int 1505 nlm_server_main(int addr_count, char **addrs) 1506 { 1507 struct thread *td = curthread; 1508 int error; 1509 SVCPOOL *pool = NULL; 1510 struct sockopt opt; 1511 int portlow; 1512 #ifdef INET6 1513 struct sockaddr_in6 sin6; 1514 #endif 1515 struct sockaddr_in sin; 1516 my_id id; 1517 sm_stat smstat; 1518 struct timeval timo; 1519 enum clnt_stat stat; 1520 struct nlm_host *host, *nhost; 1521 struct nlm_waiting_lock *nw; 1522 vop_advlock_t *old_nfs_advlock; 1523 vop_reclaim_t *old_nfs_reclaim; 1524 1525 if (nlm_is_running != 0) { 1526 NLM_ERR("NLM: can't start server - " 1527 "it appears to be running already\n"); 1528 return (EPERM); 1529 } 1530 1531 if (nlm_socket == NULL) { 1532 memset(&opt, 0, sizeof(opt)); 1533 1534 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1535 td->td_ucred, td); 1536 if (error) { 1537 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1538 error); 1539 return (error); 1540 } 1541 opt.sopt_dir = SOPT_SET; 1542 opt.sopt_level = IPPROTO_IP; 1543 opt.sopt_name = IP_PORTRANGE; 1544 portlow = IP_PORTRANGE_LOW; 1545 opt.sopt_val = &portlow; 1546 opt.sopt_valsize = sizeof(portlow); 1547 sosetopt(nlm_socket, &opt); 1548 1549 #ifdef INET6 1550 nlm_socket6 = NULL; 1551 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1552 td->td_ucred, td); 1553 if (error) { 1554 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1555 error); 1556 soclose(nlm_socket); 1557 nlm_socket = NULL; 1558 return (error); 1559 } 1560 opt.sopt_dir = SOPT_SET; 1561 opt.sopt_level = IPPROTO_IPV6; 1562 opt.sopt_name = IPV6_PORTRANGE; 1563 portlow = IPV6_PORTRANGE_LOW; 1564 opt.sopt_val = &portlow; 1565 opt.sopt_valsize = sizeof(portlow); 1566 sosetopt(nlm_socket6, &opt); 1567 #endif 1568 } 1569 1570 nlm_auth = authunix_create(curthread->td_ucred); 1571 1572 #ifdef INET6 1573 memset(&sin6, 0, sizeof(sin6)); 1574 sin6.sin6_len = sizeof(sin6); 1575 sin6.sin6_family = AF_INET6; 1576 sin6.sin6_addr = in6addr_loopback; 1577 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1578 if (!nlm_nsm) { 1579 #endif 1580 memset(&sin, 0, sizeof(sin)); 1581 sin.sin_len = sizeof(sin); 1582 sin.sin_family = AF_INET; 1583 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1584 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1585 SM_VERS); 1586 #ifdef INET6 1587 } 1588 #endif 1589 1590 if (!nlm_nsm) { 1591 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1592 error = EINVAL; 1593 goto out; 1594 } 1595 1596 pool = svcpool_create("NLM", NULL); 1597 1598 error = nlm_register_services(pool, addr_count, addrs); 1599 if (error) 1600 goto out; 1601 1602 memset(&id, 0, sizeof(id)); 1603 id.my_name = "NFS NLM"; 1604 1605 timo.tv_sec = 25; 1606 timo.tv_usec = 0; 1607 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1608 (xdrproc_t) xdr_my_id, &id, 1609 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1610 1611 if (stat != RPC_SUCCESS) { 1612 struct rpc_err err; 1613 1614 CLNT_GETERR(nlm_nsm, &err); 1615 NLM_ERR("NLM: unexpected error contacting NSM, " 1616 "stat=%d, errno=%d\n", stat, err.re_errno); 1617 error = EINVAL; 1618 goto out; 1619 } 1620 nlm_is_running = 1; 1621 1622 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1623 nlm_nsm_state = smstat.state; 1624 1625 old_nfs_advlock = nfs_advlock_p; 1626 nfs_advlock_p = nlm_advlock; 1627 old_nfs_reclaim = nfs_reclaim_p; 1628 nfs_reclaim_p = nlm_reclaim; 1629 1630 svc_run(pool); 1631 error = 0; 1632 1633 nfs_advlock_p = old_nfs_advlock; 1634 nfs_reclaim_p = old_nfs_reclaim; 1635 1636 out: 1637 nlm_is_running = 0; 1638 if (pool) 1639 svcpool_destroy(pool); 1640 1641 /* 1642 * We are finished communicating with the NSM. 1643 */ 1644 if (nlm_nsm) { 1645 CLNT_RELEASE(nlm_nsm); 1646 nlm_nsm = NULL; 1647 } 1648 1649 /* 1650 * Trash all the existing state so that if the server 1651 * restarts, it gets a clean slate. This is complicated by the 1652 * possibility that there may be other threads trying to make 1653 * client locking requests. 1654 * 1655 * First we fake a client reboot notification which will 1656 * cancel any pending async locks and purge remote lock state 1657 * from the local lock manager. We release the reference from 1658 * nlm_hosts to the host (which may remove it from the list 1659 * and free it). After this phase, the only entries in the 1660 * nlm_host list should be from other threads performing 1661 * client lock requests. 1662 */ 1663 mtx_lock(&nlm_global_lock); 1664 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1665 wakeup(nw); 1666 } 1667 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1668 mtx_unlock(&nlm_global_lock); 1669 nlm_host_notify(host, 0); 1670 nlm_host_release(host); 1671 mtx_lock(&nlm_global_lock); 1672 } 1673 mtx_unlock(&nlm_global_lock); 1674 1675 AUTH_DESTROY(nlm_auth); 1676 1677 return (error); 1678 } 1679 1680 int 1681 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1682 { 1683 int error; 1684 1685 error = priv_check(td, PRIV_NFS_LOCKD); 1686 if (error) 1687 return (error); 1688 1689 nlm_debug_level = uap->debug_level; 1690 nlm_grace_threshold = time_uptime + uap->grace_period; 1691 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1692 1693 return nlm_server_main(uap->addr_count, uap->addrs); 1694 } 1695 1696 /**********************************************************************/ 1697 1698 /* 1699 * NLM implementation details, called from the RPC stubs. 1700 */ 1701 1702 void 1703 nlm_sm_notify(struct nlm_sm_status *argp) 1704 { 1705 uint32_t sysid; 1706 struct nlm_host *host; 1707 1708 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1709 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1710 host = nlm_find_host_by_sysid(sysid); 1711 if (host) { 1712 nlm_host_notify(host, argp->state); 1713 nlm_host_release(host); 1714 } 1715 } 1716 1717 static void 1718 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1719 { 1720 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1721 } 1722 1723 struct vfs_state { 1724 struct mount *vs_mp; 1725 struct vnode *vs_vp; 1726 int vs_vnlocked; 1727 }; 1728 1729 static int 1730 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1731 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1732 { 1733 int error; 1734 uint64_t exflags; 1735 struct ucred *cred = NULL, *credanon = NULL; 1736 1737 memset(vs, 0, sizeof(*vs)); 1738 1739 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1740 if (!vs->vs_mp) { 1741 return (ESTALE); 1742 } 1743 1744 /* accmode == 0 means don't check, since it is an unlock. */ 1745 if (accmode != 0) { 1746 error = VFS_CHECKEXP(vs->vs_mp, 1747 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1748 NULL, NULL); 1749 if (error) 1750 goto out; 1751 1752 if (exflags & MNT_EXRDONLY || 1753 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1754 error = EROFS; 1755 goto out; 1756 } 1757 } 1758 1759 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1760 if (error) 1761 goto out; 1762 vs->vs_vnlocked = TRUE; 1763 1764 if (accmode != 0) { 1765 if (!svc_getcred(rqstp, &cred, NULL)) { 1766 error = EINVAL; 1767 goto out; 1768 } 1769 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1770 crfree(cred); 1771 cred = credanon; 1772 credanon = NULL; 1773 } 1774 1775 /* 1776 * Check cred. 1777 */ 1778 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1779 /* 1780 * If this failed and accmode != VWRITE, try again with 1781 * VWRITE to maintain backwards compatibility with the 1782 * old code that always used VWRITE. 1783 */ 1784 if (error != 0 && accmode != VWRITE) 1785 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1786 if (error) 1787 goto out; 1788 } 1789 1790 VOP_UNLOCK(vs->vs_vp); 1791 vs->vs_vnlocked = FALSE; 1792 1793 out: 1794 if (cred) 1795 crfree(cred); 1796 if (credanon) 1797 crfree(credanon); 1798 1799 return (error); 1800 } 1801 1802 static void 1803 nlm_release_vfs_state(struct vfs_state *vs) 1804 { 1805 1806 if (vs->vs_vp) { 1807 if (vs->vs_vnlocked) 1808 vput(vs->vs_vp); 1809 else 1810 vrele(vs->vs_vp); 1811 } 1812 if (vs->vs_mp) 1813 vfs_rel(vs->vs_mp); 1814 } 1815 1816 static nlm4_stats 1817 nlm_convert_error(int error) 1818 { 1819 1820 if (error == ESTALE) 1821 return nlm4_stale_fh; 1822 else if (error == EROFS) 1823 return nlm4_rofs; 1824 else 1825 return nlm4_failed; 1826 } 1827 1828 int 1829 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1830 CLIENT **rpcp) 1831 { 1832 fhandle_t fh; 1833 struct vfs_state vs; 1834 struct nlm_host *host, *bhost; 1835 int error, sysid; 1836 struct flock fl; 1837 accmode_t accmode; 1838 1839 memset(result, 0, sizeof(*result)); 1840 memset(&vs, 0, sizeof(vs)); 1841 1842 host = nlm_find_host_by_name(argp->alock.caller_name, 1843 svc_getrpccaller(rqstp), rqstp->rq_vers); 1844 if (!host) { 1845 result->stat.stat = nlm4_denied_nolocks; 1846 return (ENOMEM); 1847 } 1848 1849 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1850 host->nh_caller_name, host->nh_sysid); 1851 1852 nlm_check_expired_locks(host); 1853 sysid = host->nh_sysid; 1854 1855 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1856 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1857 1858 if (time_uptime < nlm_grace_threshold) { 1859 result->stat.stat = nlm4_denied_grace_period; 1860 goto out; 1861 } 1862 1863 accmode = argp->exclusive ? VWRITE : VREAD; 1864 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1865 if (error) { 1866 result->stat.stat = nlm_convert_error(error); 1867 goto out; 1868 } 1869 1870 fl.l_start = argp->alock.l_offset; 1871 fl.l_len = argp->alock.l_len; 1872 fl.l_pid = argp->alock.svid; 1873 fl.l_sysid = sysid; 1874 fl.l_whence = SEEK_SET; 1875 if (argp->exclusive) 1876 fl.l_type = F_WRLCK; 1877 else 1878 fl.l_type = F_RDLCK; 1879 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1880 if (error) { 1881 result->stat.stat = nlm4_failed; 1882 goto out; 1883 } 1884 1885 if (fl.l_type == F_UNLCK) { 1886 result->stat.stat = nlm4_granted; 1887 } else { 1888 result->stat.stat = nlm4_denied; 1889 result->stat.nlm4_testrply_u.holder.exclusive = 1890 (fl.l_type == F_WRLCK); 1891 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1892 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1893 if (bhost) { 1894 /* 1895 * We don't have any useful way of recording 1896 * the value of oh used in the original lock 1897 * request. Ideally, the test reply would have 1898 * a space for the owning host's name allowing 1899 * our caller's NLM to keep track. 1900 * 1901 * As far as I can see, Solaris uses an eight 1902 * byte structure for oh which contains a four 1903 * byte pid encoded in local byte order and 1904 * the first four bytes of the host 1905 * name. Linux uses a variable length string 1906 * 'pid@hostname' in ascii but doesn't even 1907 * return that in test replies. 1908 * 1909 * For the moment, return nothing in oh 1910 * (already zero'ed above). 1911 */ 1912 nlm_host_release(bhost); 1913 } 1914 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1915 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1916 } 1917 1918 out: 1919 nlm_release_vfs_state(&vs); 1920 if (rpcp) 1921 *rpcp = nlm_host_get_rpc(host, TRUE); 1922 nlm_host_release(host); 1923 return (0); 1924 } 1925 1926 int 1927 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1928 bool_t monitor, CLIENT **rpcp) 1929 { 1930 fhandle_t fh; 1931 struct vfs_state vs; 1932 struct nlm_host *host; 1933 int error, sysid; 1934 struct flock fl; 1935 accmode_t accmode; 1936 1937 memset(result, 0, sizeof(*result)); 1938 memset(&vs, 0, sizeof(vs)); 1939 1940 host = nlm_find_host_by_name(argp->alock.caller_name, 1941 svc_getrpccaller(rqstp), rqstp->rq_vers); 1942 if (!host) { 1943 result->stat.stat = nlm4_denied_nolocks; 1944 return (ENOMEM); 1945 } 1946 1947 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1948 host->nh_caller_name, host->nh_sysid); 1949 1950 if (monitor && host->nh_state && argp->state 1951 && host->nh_state != argp->state) { 1952 /* 1953 * The host rebooted without telling us. Trash its 1954 * locks. 1955 */ 1956 nlm_host_notify(host, argp->state); 1957 } 1958 1959 nlm_check_expired_locks(host); 1960 sysid = host->nh_sysid; 1961 1962 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1963 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1964 1965 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1966 result->stat.stat = nlm4_denied_grace_period; 1967 goto out; 1968 } 1969 1970 accmode = argp->exclusive ? VWRITE : VREAD; 1971 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1972 if (error) { 1973 result->stat.stat = nlm_convert_error(error); 1974 goto out; 1975 } 1976 1977 fl.l_start = argp->alock.l_offset; 1978 fl.l_len = argp->alock.l_len; 1979 fl.l_pid = argp->alock.svid; 1980 fl.l_sysid = sysid; 1981 fl.l_whence = SEEK_SET; 1982 if (argp->exclusive) 1983 fl.l_type = F_WRLCK; 1984 else 1985 fl.l_type = F_RDLCK; 1986 if (argp->block) { 1987 struct nlm_async_lock *af; 1988 CLIENT *client; 1989 struct nlm_grantcookie cookie; 1990 1991 /* 1992 * First, make sure we can contact the host's NLM. 1993 */ 1994 client = nlm_host_get_rpc(host, TRUE); 1995 if (!client) { 1996 result->stat.stat = nlm4_failed; 1997 goto out; 1998 } 1999 2000 /* 2001 * First we need to check and see if there is an 2002 * existing blocked lock that matches. This could be a 2003 * badly behaved client or an RPC re-send. If we find 2004 * one, just return nlm4_blocked. 2005 */ 2006 mtx_lock(&host->nh_lock); 2007 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2008 if (af->af_fl.l_start == fl.l_start 2009 && af->af_fl.l_len == fl.l_len 2010 && af->af_fl.l_pid == fl.l_pid 2011 && af->af_fl.l_type == fl.l_type) { 2012 break; 2013 } 2014 } 2015 if (!af) { 2016 cookie.ng_sysid = host->nh_sysid; 2017 cookie.ng_cookie = host->nh_grantcookie++; 2018 } 2019 mtx_unlock(&host->nh_lock); 2020 if (af) { 2021 CLNT_RELEASE(client); 2022 result->stat.stat = nlm4_blocked; 2023 goto out; 2024 } 2025 2026 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2027 M_WAITOK|M_ZERO); 2028 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2029 af->af_vp = vs.vs_vp; 2030 af->af_fl = fl; 2031 af->af_host = host; 2032 af->af_rpc = client; 2033 /* 2034 * We use M_RPC here so that we can xdr_free the thing 2035 * later. 2036 */ 2037 nlm_make_netobj(&af->af_granted.cookie, 2038 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2039 af->af_granted.exclusive = argp->exclusive; 2040 af->af_granted.alock.caller_name = 2041 strdup(argp->alock.caller_name, M_RPC); 2042 nlm_copy_netobj(&af->af_granted.alock.fh, 2043 &argp->alock.fh, M_RPC); 2044 nlm_copy_netobj(&af->af_granted.alock.oh, 2045 &argp->alock.oh, M_RPC); 2046 af->af_granted.alock.svid = argp->alock.svid; 2047 af->af_granted.alock.l_offset = argp->alock.l_offset; 2048 af->af_granted.alock.l_len = argp->alock.l_len; 2049 2050 /* 2051 * Put the entry on the pending list before calling 2052 * VOP_ADVLOCKASYNC. We do this in case the lock 2053 * request was blocked (returning EINPROGRESS) but 2054 * then granted before we manage to run again. The 2055 * client may receive the granted message before we 2056 * send our blocked reply but thats their problem. 2057 */ 2058 mtx_lock(&host->nh_lock); 2059 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2060 mtx_unlock(&host->nh_lock); 2061 2062 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2063 &af->af_task, &af->af_cookie); 2064 2065 /* 2066 * If the lock completed synchronously, just free the 2067 * tracking structure now. 2068 */ 2069 if (error != EINPROGRESS) { 2070 CLNT_RELEASE(af->af_rpc); 2071 mtx_lock(&host->nh_lock); 2072 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2073 mtx_unlock(&host->nh_lock); 2074 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2075 &af->af_granted); 2076 free(af, M_NLM); 2077 } else { 2078 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2079 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2080 /* 2081 * Don't vrele the vnode just yet - this must 2082 * wait until either the async callback 2083 * happens or the lock is cancelled. 2084 */ 2085 vs.vs_vp = NULL; 2086 } 2087 } else { 2088 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2089 } 2090 2091 if (error) { 2092 if (error == EINPROGRESS) { 2093 result->stat.stat = nlm4_blocked; 2094 } else if (error == EDEADLK) { 2095 result->stat.stat = nlm4_deadlck; 2096 } else if (error == EAGAIN) { 2097 result->stat.stat = nlm4_denied; 2098 } else { 2099 result->stat.stat = nlm4_failed; 2100 } 2101 } else { 2102 if (monitor) 2103 nlm_host_monitor(host, argp->state); 2104 result->stat.stat = nlm4_granted; 2105 } 2106 2107 out: 2108 nlm_release_vfs_state(&vs); 2109 if (rpcp) 2110 *rpcp = nlm_host_get_rpc(host, TRUE); 2111 nlm_host_release(host); 2112 return (0); 2113 } 2114 2115 int 2116 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2117 CLIENT **rpcp) 2118 { 2119 fhandle_t fh; 2120 struct vfs_state vs; 2121 struct nlm_host *host; 2122 int error, sysid; 2123 struct flock fl; 2124 struct nlm_async_lock *af; 2125 2126 memset(result, 0, sizeof(*result)); 2127 memset(&vs, 0, sizeof(vs)); 2128 2129 host = nlm_find_host_by_name(argp->alock.caller_name, 2130 svc_getrpccaller(rqstp), rqstp->rq_vers); 2131 if (!host) { 2132 result->stat.stat = nlm4_denied_nolocks; 2133 return (ENOMEM); 2134 } 2135 2136 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2137 host->nh_caller_name, host->nh_sysid); 2138 2139 nlm_check_expired_locks(host); 2140 sysid = host->nh_sysid; 2141 2142 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2143 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2144 2145 if (time_uptime < nlm_grace_threshold) { 2146 result->stat.stat = nlm4_denied_grace_period; 2147 goto out; 2148 } 2149 2150 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2151 if (error) { 2152 result->stat.stat = nlm_convert_error(error); 2153 goto out; 2154 } 2155 2156 fl.l_start = argp->alock.l_offset; 2157 fl.l_len = argp->alock.l_len; 2158 fl.l_pid = argp->alock.svid; 2159 fl.l_sysid = sysid; 2160 fl.l_whence = SEEK_SET; 2161 if (argp->exclusive) 2162 fl.l_type = F_WRLCK; 2163 else 2164 fl.l_type = F_RDLCK; 2165 2166 /* 2167 * First we need to try and find the async lock request - if 2168 * there isn't one, we give up and return nlm4_denied. 2169 */ 2170 mtx_lock(&host->nh_lock); 2171 2172 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2173 if (af->af_fl.l_start == fl.l_start 2174 && af->af_fl.l_len == fl.l_len 2175 && af->af_fl.l_pid == fl.l_pid 2176 && af->af_fl.l_type == fl.l_type) { 2177 break; 2178 } 2179 } 2180 2181 if (!af) { 2182 mtx_unlock(&host->nh_lock); 2183 result->stat.stat = nlm4_denied; 2184 goto out; 2185 } 2186 2187 error = nlm_cancel_async_lock(af); 2188 2189 if (error) { 2190 result->stat.stat = nlm4_denied; 2191 } else { 2192 result->stat.stat = nlm4_granted; 2193 } 2194 2195 mtx_unlock(&host->nh_lock); 2196 2197 out: 2198 nlm_release_vfs_state(&vs); 2199 if (rpcp) 2200 *rpcp = nlm_host_get_rpc(host, TRUE); 2201 nlm_host_release(host); 2202 return (0); 2203 } 2204 2205 int 2206 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2207 CLIENT **rpcp) 2208 { 2209 fhandle_t fh; 2210 struct vfs_state vs; 2211 struct nlm_host *host; 2212 int error, sysid; 2213 struct flock fl; 2214 2215 memset(result, 0, sizeof(*result)); 2216 memset(&vs, 0, sizeof(vs)); 2217 2218 host = nlm_find_host_by_name(argp->alock.caller_name, 2219 svc_getrpccaller(rqstp), rqstp->rq_vers); 2220 if (!host) { 2221 result->stat.stat = nlm4_denied_nolocks; 2222 return (ENOMEM); 2223 } 2224 2225 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2226 host->nh_caller_name, host->nh_sysid); 2227 2228 nlm_check_expired_locks(host); 2229 sysid = host->nh_sysid; 2230 2231 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2232 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2233 2234 if (time_uptime < nlm_grace_threshold) { 2235 result->stat.stat = nlm4_denied_grace_period; 2236 goto out; 2237 } 2238 2239 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2240 if (error) { 2241 result->stat.stat = nlm_convert_error(error); 2242 goto out; 2243 } 2244 2245 fl.l_start = argp->alock.l_offset; 2246 fl.l_len = argp->alock.l_len; 2247 fl.l_pid = argp->alock.svid; 2248 fl.l_sysid = sysid; 2249 fl.l_whence = SEEK_SET; 2250 fl.l_type = F_UNLCK; 2251 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2252 2253 /* 2254 * Ignore the error - there is no result code for failure, 2255 * only for grace period. 2256 */ 2257 result->stat.stat = nlm4_granted; 2258 2259 out: 2260 nlm_release_vfs_state(&vs); 2261 if (rpcp) 2262 *rpcp = nlm_host_get_rpc(host, TRUE); 2263 nlm_host_release(host); 2264 return (0); 2265 } 2266 2267 int 2268 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2269 2270 CLIENT **rpcp) 2271 { 2272 struct nlm_host *host; 2273 struct nlm_waiting_lock *nw; 2274 2275 memset(result, 0, sizeof(*result)); 2276 2277 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2278 if (!host) { 2279 result->stat.stat = nlm4_denied_nolocks; 2280 return (ENOMEM); 2281 } 2282 2283 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2284 result->stat.stat = nlm4_denied; 2285 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2286 2287 mtx_lock(&nlm_global_lock); 2288 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2289 if (!nw->nw_waiting) 2290 continue; 2291 if (argp->alock.svid == nw->nw_lock.svid 2292 && argp->alock.l_offset == nw->nw_lock.l_offset 2293 && argp->alock.l_len == nw->nw_lock.l_len 2294 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2295 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2296 nw->nw_lock.fh.n_len)) { 2297 nw->nw_waiting = FALSE; 2298 wakeup(nw); 2299 result->stat.stat = nlm4_granted; 2300 break; 2301 } 2302 } 2303 mtx_unlock(&nlm_global_lock); 2304 2305 out: 2306 if (rpcp) 2307 *rpcp = nlm_host_get_rpc(host, TRUE); 2308 nlm_host_release(host); 2309 return (0); 2310 } 2311 2312 void 2313 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2314 { 2315 struct nlm_host *host = NULL; 2316 struct nlm_async_lock *af = NULL; 2317 int error; 2318 2319 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2320 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2321 goto out; 2322 } 2323 2324 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2325 if (!host) { 2326 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2327 goto out; 2328 } 2329 2330 mtx_lock(&host->nh_lock); 2331 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2332 if (ng_cookie(&argp->cookie) == 2333 ng_cookie(&af->af_granted.cookie)) 2334 break; 2335 if (af) 2336 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2337 mtx_unlock(&host->nh_lock); 2338 2339 if (!af) { 2340 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2341 "with unrecognized cookie %d:%d", host->nh_caller_name, 2342 host->nh_sysid, ng_sysid(&argp->cookie), 2343 ng_cookie(&argp->cookie)); 2344 goto out; 2345 } 2346 2347 if (argp->stat.stat != nlm4_granted) { 2348 af->af_fl.l_type = F_UNLCK; 2349 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2350 if (error) { 2351 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2352 "and we failed to unlock (%d)", host->nh_caller_name, 2353 host->nh_sysid, error); 2354 goto out; 2355 } 2356 2357 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2358 af, host->nh_caller_name, host->nh_sysid); 2359 } else { 2360 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2361 af, host->nh_caller_name, host->nh_sysid); 2362 } 2363 2364 out: 2365 if (af) 2366 nlm_free_async_lock(af); 2367 if (host) 2368 nlm_host_release(host); 2369 } 2370 2371 void 2372 nlm_do_free_all(nlm4_notify *argp) 2373 { 2374 struct nlm_host *host, *thost; 2375 2376 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2377 if (!strcmp(host->nh_caller_name, argp->name)) 2378 nlm_host_notify(host, argp->state); 2379 } 2380 } 2381 2382 /* 2383 * Kernel module glue 2384 */ 2385 static int 2386 nfslockd_modevent(module_t mod, int type, void *data) 2387 { 2388 2389 switch (type) { 2390 case MOD_LOAD: 2391 return (nlm_init()); 2392 2393 case MOD_UNLOAD: 2394 nlm_uninit(); 2395 /* The NLM module cannot be safely unloaded. */ 2396 /* FALLTHROUGH */ 2397 default: 2398 return (EOPNOTSUPP); 2399 } 2400 } 2401 static moduledata_t nfslockd_mod = { 2402 "nfslockd", 2403 nfslockd_modevent, 2404 NULL, 2405 }; 2406 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2407 2408 /* So that loader and kldload(2) can find us, wherever we are.. */ 2409 MODULE_DEPEND(nfslockd, xdr, 1, 1, 1); 2410 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2411 MODULE_DEPEND(nfslockd, nfscommon, 1, 1, 1); 2412 MODULE_VERSION(nfslockd, 1); 2413