1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 5 * Authors: Doug Rabson <dfr@rabson.org> 6 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "opt_inet6.h" 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/fail.h> 35 #include <sys/fcntl.h> 36 #include <sys/kernel.h> 37 #include <sys/kthread.h> 38 #include <sys/lockf.h> 39 #include <sys/malloc.h> 40 #include <sys/mount.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/syscall.h> 46 #include <sys/sysctl.h> 47 #include <sys/sysent.h> 48 #include <sys/syslog.h> 49 #include <sys/sysproto.h> 50 #include <sys/systm.h> 51 #include <sys/taskqueue.h> 52 #include <sys/unistd.h> 53 #include <sys/vnode.h> 54 55 #include <nfs/nfsproto.h> 56 #include <nfs/nfs_lock.h> 57 58 #include <nlm/nlm_prot.h> 59 #include <nlm/sm_inter.h> 60 #include <nlm/nlm.h> 61 #include <rpc/rpc_com.h> 62 #include <rpc/rpcb_prot.h> 63 64 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 65 66 /* 67 * If a host is inactive (and holds no locks) for this amount of 68 * seconds, we consider it idle and stop tracking it. 69 */ 70 #define NLM_IDLE_TIMEOUT 30 71 72 /* 73 * We check the host list for idle every few seconds. 74 */ 75 #define NLM_IDLE_PERIOD 5 76 77 /* 78 * We only look for GRANTED_RES messages for a little while. 79 */ 80 #define NLM_EXPIRE_TIMEOUT 10 81 82 /* 83 * Support for sysctl vfs.nlm.sysid 84 */ 85 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 86 "Network Lock Manager"); 87 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, 88 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 89 ""); 90 91 /* 92 * Syscall hooks 93 */ 94 static struct syscall_helper_data nlm_syscalls[] = { 95 SYSCALL_INIT_HELPER(nlm_syscall), 96 SYSCALL_INIT_LAST 97 }; 98 99 /* 100 * Debug level passed in from userland. We also support a sysctl hook 101 * so that it can be changed on a live system. 102 */ 103 static int nlm_debug_level; 104 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 105 106 #define NLM_DEBUG(_level, args...) \ 107 do { \ 108 if (nlm_debug_level >= (_level)) \ 109 log(LOG_DEBUG, args); \ 110 } while(0) 111 #define NLM_ERR(args...) \ 112 do { \ 113 log(LOG_ERR, args); \ 114 } while(0) 115 116 /* 117 * Grace period handling. The value of nlm_grace_threshold is the 118 * value of time_uptime after which we are serving requests normally. 119 */ 120 static time_t nlm_grace_threshold; 121 122 /* 123 * We check for idle hosts if time_uptime is greater than 124 * nlm_next_idle_check, 125 */ 126 static time_t nlm_next_idle_check; 127 128 /* 129 * A flag to indicate the server is already running. 130 */ 131 static int nlm_is_running; 132 133 /* 134 * A socket to use for RPC - shared by all IPv4 RPC clients. 135 */ 136 static struct socket *nlm_socket; 137 138 #ifdef INET6 139 140 /* 141 * A socket to use for RPC - shared by all IPv6 RPC clients. 142 */ 143 static struct socket *nlm_socket6; 144 145 #endif 146 147 /* 148 * An RPC client handle that can be used to communicate with the local 149 * NSM. 150 */ 151 static CLIENT *nlm_nsm; 152 153 /* 154 * An AUTH handle for the server's creds. 155 */ 156 static AUTH *nlm_auth; 157 158 /* 159 * A zero timeval for sending async RPC messages. 160 */ 161 struct timeval nlm_zero_tv = { 0, 0 }; 162 163 /* 164 * The local NSM state number 165 */ 166 int nlm_nsm_state; 167 168 /* 169 * A lock to protect the host list and waiting lock list. 170 */ 171 static struct mtx nlm_global_lock; 172 173 /* 174 * Locks: 175 * (l) locked by nh_lock 176 * (s) only accessed via server RPC which is single threaded 177 * (g) locked by nlm_global_lock 178 * (c) const until freeing 179 * (a) modified using atomic ops 180 */ 181 182 /* 183 * A pending client-side lock request, stored on the nlm_waiting_locks 184 * list. 185 */ 186 struct nlm_waiting_lock { 187 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 188 bool_t nw_waiting; /* (g) */ 189 nlm4_lock nw_lock; /* (c) */ 190 union nfsfh nw_fh; /* (c) */ 191 struct vnode *nw_vp; /* (c) */ 192 }; 193 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 194 195 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 196 197 /* 198 * A pending server-side asynchronous lock request, stored on the 199 * nh_pending list of the NLM host. 200 */ 201 struct nlm_async_lock { 202 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 203 struct task af_task; /* (c) async callback details */ 204 void *af_cookie; /* (l) lock manager cancel token */ 205 struct vnode *af_vp; /* (l) vnode to lock */ 206 struct flock af_fl; /* (c) lock details */ 207 struct nlm_host *af_host; /* (c) host which is locking */ 208 CLIENT *af_rpc; /* (c) rpc client to send message */ 209 nlm4_testargs af_granted; /* (c) notification details */ 210 time_t af_expiretime; /* (c) notification time */ 211 }; 212 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 213 214 /* 215 * NLM host. 216 */ 217 enum nlm_host_state { 218 NLM_UNMONITORED, 219 NLM_MONITORED, 220 NLM_MONITOR_FAILED, 221 NLM_RECOVERING 222 }; 223 224 struct nlm_rpc { 225 CLIENT *nr_client; /* (l) RPC client handle */ 226 time_t nr_create_time; /* (l) when client was created */ 227 }; 228 229 struct nlm_host { 230 struct mtx nh_lock; 231 volatile u_int nh_refs; /* (a) reference count */ 232 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 233 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 234 uint32_t nh_sysid; /* (c) our allocaed system ID */ 235 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 236 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 237 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 238 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 239 rpcvers_t nh_vers; /* (s) NLM version of host */ 240 int nh_state; /* (s) last seen NSM state of host */ 241 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 242 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 243 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 244 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 245 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 246 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 247 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 248 }; 249 TAILQ_HEAD(nlm_host_list, nlm_host); 250 251 static struct nlm_host_list nlm_hosts; /* (g) */ 252 static uint32_t nlm_next_sysid = 1; /* (g) */ 253 254 static void nlm_host_unmonitor(struct nlm_host *); 255 256 struct nlm_grantcookie { 257 uint32_t ng_sysid; 258 uint32_t ng_cookie; 259 }; 260 261 static inline uint32_t 262 ng_sysid(struct netobj *src) 263 { 264 265 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 266 } 267 268 static inline uint32_t 269 ng_cookie(struct netobj *src) 270 { 271 272 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 273 } 274 275 /**********************************************************************/ 276 277 /* 278 * Initialise NLM globals. 279 */ 280 static int 281 nlm_init(void) 282 { 283 int error; 284 285 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 286 TAILQ_INIT(&nlm_waiting_locks); 287 TAILQ_INIT(&nlm_hosts); 288 289 error = syscall_helper_register(nlm_syscalls, SY_THR_STATIC_KLD); 290 if (error != 0) 291 NLM_ERR("Can't register NLM syscall\n"); 292 return (error); 293 } 294 295 static void 296 nlm_uninit(void) 297 { 298 299 syscall_helper_unregister(nlm_syscalls); 300 } 301 302 /* 303 * Create a netobj from an arbitrary source. 304 */ 305 void 306 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 307 struct malloc_type *type) 308 { 309 310 dst->n_len = srcsize; 311 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 312 memcpy(dst->n_bytes, src, srcsize); 313 } 314 315 /* 316 * Copy a struct netobj. 317 */ 318 void 319 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 320 struct malloc_type *type) 321 { 322 323 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 324 } 325 326 /* 327 * Create an RPC client handle for the given (address,prog,vers) 328 * triple using UDP. 329 */ 330 static CLIENT * 331 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 332 { 333 char *wchan = "nlmrcv"; 334 struct sockaddr_storage ss; 335 struct socket *so; 336 CLIENT *rpcb; 337 struct timeval timo; 338 RPCB parms; 339 char *uaddr; 340 enum clnt_stat stat = RPC_SUCCESS; 341 int rpcvers = RPCBVERS4; 342 bool_t do_tcp = FALSE; 343 bool_t tryagain = FALSE; 344 struct portmap mapping; 345 u_short port = 0; 346 347 /* 348 * First we need to contact the remote RPCBIND service to find 349 * the right port. 350 */ 351 memcpy(&ss, sa, sa->sa_len); 352 switch (ss.ss_family) { 353 case AF_INET: 354 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 355 so = nlm_socket; 356 break; 357 #ifdef INET6 358 case AF_INET6: 359 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 360 so = nlm_socket6; 361 break; 362 #endif 363 364 default: 365 /* 366 * Unsupported address family - fail. 367 */ 368 return (NULL); 369 } 370 371 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 372 RPCBPROG, rpcvers, 0, 0); 373 if (!rpcb) 374 return (NULL); 375 376 try_tcp: 377 parms.r_prog = prog; 378 parms.r_vers = vers; 379 if (do_tcp) 380 parms.r_netid = "tcp"; 381 else 382 parms.r_netid = "udp"; 383 parms.r_addr = ""; 384 parms.r_owner = ""; 385 386 /* 387 * Use the default timeout. 388 */ 389 timo.tv_sec = 25; 390 timo.tv_usec = 0; 391 again: 392 switch (rpcvers) { 393 case RPCBVERS4: 394 case RPCBVERS: 395 /* 396 * Try RPCBIND 4 then 3. 397 */ 398 uaddr = NULL; 399 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 400 (xdrproc_t) xdr_rpcb, &parms, 401 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 402 if (stat == RPC_SUCCESS) { 403 /* 404 * We have a reply from the remote RPCBIND - turn it 405 * into an appropriate address and make a new client 406 * that can talk to the remote NLM. 407 * 408 * XXX fixup IPv6 scope ID. 409 */ 410 struct netbuf *a; 411 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 412 if (!a) { 413 tryagain = TRUE; 414 } else { 415 tryagain = FALSE; 416 memcpy(&ss, a->buf, a->len); 417 free(a->buf, M_RPC); 418 free(a, M_RPC); 419 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 420 } 421 } 422 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 423 if (rpcvers == RPCBVERS4) 424 rpcvers = RPCBVERS; 425 else if (rpcvers == RPCBVERS) 426 rpcvers = PMAPVERS; 427 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 428 goto again; 429 } 430 break; 431 case PMAPVERS: 432 /* 433 * Try portmap. 434 */ 435 mapping.pm_prog = parms.r_prog; 436 mapping.pm_vers = parms.r_vers; 437 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 438 mapping.pm_port = 0; 439 440 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 441 (xdrproc_t) xdr_portmap, &mapping, 442 (xdrproc_t) xdr_u_short, &port, timo); 443 444 if (stat == RPC_SUCCESS) { 445 switch (ss.ss_family) { 446 case AF_INET: 447 ((struct sockaddr_in *)&ss)->sin_port = 448 htons(port); 449 break; 450 451 #ifdef INET6 452 case AF_INET6: 453 ((struct sockaddr_in6 *)&ss)->sin6_port = 454 htons(port); 455 break; 456 #endif 457 } 458 } 459 break; 460 default: 461 panic("invalid rpcvers %d", rpcvers); 462 } 463 /* 464 * We may have a positive response from the portmapper, but the NLM 465 * service was not found. Make sure we received a valid port. 466 */ 467 switch (ss.ss_family) { 468 case AF_INET: 469 port = ((struct sockaddr_in *)&ss)->sin_port; 470 break; 471 #ifdef INET6 472 case AF_INET6: 473 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 474 break; 475 #endif 476 } 477 if (stat != RPC_SUCCESS || !port) { 478 /* 479 * If we were able to talk to rpcbind or portmap, but the udp 480 * variant wasn't available, ask about tcp. 481 * 482 * XXX - We could also check for a TCP portmapper, but 483 * if the host is running a portmapper at all, we should be able 484 * to hail it over UDP. 485 */ 486 if (stat == RPC_SUCCESS && !do_tcp) { 487 do_tcp = TRUE; 488 goto try_tcp; 489 } 490 491 /* Otherwise, bad news. */ 492 NLM_ERR("NLM: failed to contact remote rpcbind, " 493 "stat = %d, port = %d\n", (int) stat, port); 494 CLNT_DESTROY(rpcb); 495 return (NULL); 496 } 497 498 if (do_tcp) { 499 /* 500 * Destroy the UDP client we used to speak to rpcbind and 501 * recreate as a TCP client. 502 */ 503 struct netconfig *nconf = NULL; 504 505 CLNT_DESTROY(rpcb); 506 507 switch (ss.ss_family) { 508 case AF_INET: 509 nconf = getnetconfigent("tcp"); 510 break; 511 #ifdef INET6 512 case AF_INET6: 513 nconf = getnetconfigent("tcp6"); 514 break; 515 #endif 516 } 517 518 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 519 prog, vers, 0, 0); 520 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 521 rpcb->cl_auth = nlm_auth; 522 523 } else { 524 /* 525 * Re-use the client we used to speak to rpcbind. 526 */ 527 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 528 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 529 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 530 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 531 rpcb->cl_auth = nlm_auth; 532 } 533 534 return (rpcb); 535 } 536 537 /* 538 * This async callback after when an async lock request has been 539 * granted. We notify the host which initiated the request. 540 */ 541 static void 542 nlm_lock_callback(void *arg, int pending) 543 { 544 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 545 struct rpc_callextra ext; 546 547 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 548 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 549 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 550 ng_cookie(&af->af_granted.cookie)); 551 552 /* 553 * Send the results back to the host. 554 * 555 * Note: there is a possible race here with nlm_host_notify 556 * destroying the RPC client. To avoid problems, the first 557 * thing nlm_host_notify does is to cancel pending async lock 558 * requests. 559 */ 560 memset(&ext, 0, sizeof(ext)); 561 ext.rc_auth = nlm_auth; 562 if (af->af_host->nh_vers == NLM_VERS4) { 563 nlm4_granted_msg_4(&af->af_granted, 564 NULL, af->af_rpc, &ext, nlm_zero_tv); 565 } else { 566 /* 567 * Back-convert to legacy protocol 568 */ 569 nlm_testargs granted; 570 granted.cookie = af->af_granted.cookie; 571 granted.exclusive = af->af_granted.exclusive; 572 granted.alock.caller_name = 573 af->af_granted.alock.caller_name; 574 granted.alock.fh = af->af_granted.alock.fh; 575 granted.alock.oh = af->af_granted.alock.oh; 576 granted.alock.svid = af->af_granted.alock.svid; 577 granted.alock.l_offset = 578 af->af_granted.alock.l_offset; 579 granted.alock.l_len = 580 af->af_granted.alock.l_len; 581 582 nlm_granted_msg_1(&granted, 583 NULL, af->af_rpc, &ext, nlm_zero_tv); 584 } 585 586 /* 587 * Move this entry to the nh_granted list. 588 */ 589 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 590 mtx_lock(&af->af_host->nh_lock); 591 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 592 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 593 mtx_unlock(&af->af_host->nh_lock); 594 } 595 596 /* 597 * Free an async lock request. The request must have been removed from 598 * any list. 599 */ 600 static void 601 nlm_free_async_lock(struct nlm_async_lock *af) 602 { 603 /* 604 * Free an async lock. 605 */ 606 if (af->af_rpc) 607 CLNT_RELEASE(af->af_rpc); 608 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 609 if (af->af_vp) 610 vrele(af->af_vp); 611 free(af, M_NLM); 612 } 613 614 /* 615 * Cancel our async request - this must be called with 616 * af->nh_host->nh_lock held. This is slightly complicated by a 617 * potential race with our own callback. If we fail to cancel the 618 * lock, it must already have been granted - we make sure our async 619 * task has completed by calling taskqueue_drain in this case. 620 */ 621 static int 622 nlm_cancel_async_lock(struct nlm_async_lock *af) 623 { 624 struct nlm_host *host = af->af_host; 625 int error; 626 627 mtx_assert(&host->nh_lock, MA_OWNED); 628 629 mtx_unlock(&host->nh_lock); 630 631 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 632 F_REMOTE, NULL, &af->af_cookie); 633 634 if (error) { 635 /* 636 * We failed to cancel - make sure our callback has 637 * completed before we continue. 638 */ 639 taskqueue_drain(taskqueue_thread, &af->af_task); 640 } 641 642 mtx_lock(&host->nh_lock); 643 644 if (!error) { 645 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 646 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 647 648 /* 649 * Remove from the nh_pending list and free now that 650 * we are safe from the callback. 651 */ 652 TAILQ_REMOVE(&host->nh_pending, af, af_link); 653 mtx_unlock(&host->nh_lock); 654 nlm_free_async_lock(af); 655 mtx_lock(&host->nh_lock); 656 } 657 658 return (error); 659 } 660 661 static void 662 nlm_check_expired_locks(struct nlm_host *host) 663 { 664 struct nlm_async_lock *af; 665 time_t uptime = time_uptime; 666 667 mtx_lock(&host->nh_lock); 668 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 669 && uptime >= af->af_expiretime) { 670 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 671 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 672 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 673 ng_cookie(&af->af_granted.cookie)); 674 TAILQ_REMOVE(&host->nh_granted, af, af_link); 675 mtx_unlock(&host->nh_lock); 676 nlm_free_async_lock(af); 677 mtx_lock(&host->nh_lock); 678 } 679 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 680 TAILQ_REMOVE(&host->nh_finished, af, af_link); 681 mtx_unlock(&host->nh_lock); 682 nlm_free_async_lock(af); 683 mtx_lock(&host->nh_lock); 684 } 685 mtx_unlock(&host->nh_lock); 686 } 687 688 /* 689 * Free resources used by a host. This is called after the reference 690 * count has reached zero so it doesn't need to worry about locks. 691 */ 692 static void 693 nlm_host_destroy(struct nlm_host *host) 694 { 695 696 mtx_lock(&nlm_global_lock); 697 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 698 mtx_unlock(&nlm_global_lock); 699 700 if (host->nh_srvrpc.nr_client) 701 CLNT_RELEASE(host->nh_srvrpc.nr_client); 702 if (host->nh_clntrpc.nr_client) 703 CLNT_RELEASE(host->nh_clntrpc.nr_client); 704 mtx_destroy(&host->nh_lock); 705 sysctl_ctx_free(&host->nh_sysctl); 706 free(host, M_NLM); 707 } 708 709 /* 710 * Thread start callback for client lock recovery 711 */ 712 static void 713 nlm_client_recovery_start(void *arg) 714 { 715 struct nlm_host *host = (struct nlm_host *) arg; 716 717 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 718 host->nh_caller_name); 719 720 nlm_client_recovery(host); 721 722 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 723 host->nh_caller_name); 724 725 host->nh_monstate = NLM_MONITORED; 726 nlm_host_release(host); 727 728 kthread_exit(); 729 } 730 731 /* 732 * This is called when we receive a host state change notification. We 733 * unlock any active locks owned by the host. When rpc.lockd is 734 * shutting down, this function is called with newstate set to zero 735 * which allows us to cancel any pending async locks and clear the 736 * locking state. 737 */ 738 static void 739 nlm_host_notify(struct nlm_host *host, int newstate) 740 { 741 struct nlm_async_lock *af; 742 743 if (newstate) { 744 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 745 "state is %d\n", host->nh_caller_name, 746 host->nh_sysid, newstate); 747 } 748 749 /* 750 * Cancel any pending async locks for this host. 751 */ 752 mtx_lock(&host->nh_lock); 753 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 754 /* 755 * nlm_cancel_async_lock will remove the entry from 756 * nh_pending and free it. 757 */ 758 nlm_cancel_async_lock(af); 759 } 760 mtx_unlock(&host->nh_lock); 761 nlm_check_expired_locks(host); 762 763 /* 764 * The host just rebooted - trash its locks. 765 */ 766 lf_clearremotesys(host->nh_sysid); 767 host->nh_state = newstate; 768 769 /* 770 * If we have any remote locks for this host (i.e. it 771 * represents a remote NFS server that our local NFS client 772 * has locks for), start a recovery thread. 773 */ 774 if (newstate != 0 775 && host->nh_monstate != NLM_RECOVERING 776 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 777 struct thread *td; 778 host->nh_monstate = NLM_RECOVERING; 779 refcount_acquire(&host->nh_refs); 780 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 781 "NFS lock recovery for %s", host->nh_caller_name); 782 } 783 } 784 785 /* 786 * Sysctl handler to count the number of locks for a sysid. 787 */ 788 static int 789 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 790 { 791 struct nlm_host *host; 792 int count; 793 794 host = oidp->oid_arg1; 795 count = lf_countlocks(host->nh_sysid); 796 return sysctl_handle_int(oidp, &count, 0, req); 797 } 798 799 /* 800 * Sysctl handler to count the number of client locks for a sysid. 801 */ 802 static int 803 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 804 { 805 struct nlm_host *host; 806 int count; 807 808 host = oidp->oid_arg1; 809 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 810 return sysctl_handle_int(oidp, &count, 0, req); 811 } 812 813 /* 814 * Create a new NLM host. 815 */ 816 static struct nlm_host * 817 nlm_create_host(const char* caller_name) 818 { 819 struct nlm_host *host; 820 struct sysctl_oid *oid; 821 822 mtx_assert(&nlm_global_lock, MA_OWNED); 823 824 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 825 caller_name, nlm_next_sysid); 826 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 827 if (!host) 828 return (NULL); 829 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 830 refcount_init(&host->nh_refs, 1); 831 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 832 host->nh_sysid = nlm_next_sysid++; 833 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 834 "%d", host->nh_sysid); 835 host->nh_vers = 0; 836 host->nh_state = 0; 837 host->nh_monstate = NLM_UNMONITORED; 838 host->nh_grantcookie = 1; 839 TAILQ_INIT(&host->nh_pending); 840 TAILQ_INIT(&host->nh_granted); 841 TAILQ_INIT(&host->nh_finished); 842 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 843 844 mtx_unlock(&nlm_global_lock); 845 846 sysctl_ctx_init(&host->nh_sysctl); 847 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 848 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 849 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD | CTLFLAG_MPSAFE, 850 NULL, ""); 851 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 852 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 853 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 854 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 855 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 856 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 857 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 858 "lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, host, 859 0, nlm_host_lock_count_sysctl, "I", ""); 860 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 861 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 862 host, 0, nlm_host_client_lock_count_sysctl, "I", ""); 863 864 mtx_lock(&nlm_global_lock); 865 866 return (host); 867 } 868 869 /* 870 * Acquire the next sysid for remote locks not handled by the NLM. 871 */ 872 uint32_t 873 nlm_acquire_next_sysid(void) 874 { 875 uint32_t next_sysid; 876 877 mtx_lock(&nlm_global_lock); 878 next_sysid = nlm_next_sysid++; 879 mtx_unlock(&nlm_global_lock); 880 return (next_sysid); 881 } 882 883 /* 884 * Return non-zero if the address parts of the two sockaddrs are the 885 * same. 886 */ 887 static int 888 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 889 { 890 const struct sockaddr_in *a4, *b4; 891 #ifdef INET6 892 const struct sockaddr_in6 *a6, *b6; 893 #endif 894 895 if (a->sa_family != b->sa_family) 896 return (FALSE); 897 898 switch (a->sa_family) { 899 case AF_INET: 900 a4 = (const struct sockaddr_in *) a; 901 b4 = (const struct sockaddr_in *) b; 902 return !memcmp(&a4->sin_addr, &b4->sin_addr, 903 sizeof(a4->sin_addr)); 904 #ifdef INET6 905 case AF_INET6: 906 a6 = (const struct sockaddr_in6 *) a; 907 b6 = (const struct sockaddr_in6 *) b; 908 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 909 sizeof(a6->sin6_addr)); 910 #endif 911 } 912 913 return (0); 914 } 915 916 /* 917 * Check for idle hosts and stop monitoring them. We could also free 918 * the host structure here, possibly after a larger timeout but that 919 * would require some care to avoid races with 920 * e.g. nlm_host_lock_count_sysctl. 921 */ 922 static void 923 nlm_check_idle(void) 924 { 925 struct nlm_host *host; 926 927 mtx_assert(&nlm_global_lock, MA_OWNED); 928 929 if (time_uptime <= nlm_next_idle_check) 930 return; 931 932 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 933 934 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 935 if (host->nh_monstate == NLM_MONITORED 936 && time_uptime > host->nh_idle_timeout) { 937 mtx_unlock(&nlm_global_lock); 938 if (lf_countlocks(host->nh_sysid) > 0 939 || lf_countlocks(NLM_SYSID_CLIENT 940 + host->nh_sysid)) { 941 host->nh_idle_timeout = 942 time_uptime + NLM_IDLE_TIMEOUT; 943 mtx_lock(&nlm_global_lock); 944 continue; 945 } 946 nlm_host_unmonitor(host); 947 mtx_lock(&nlm_global_lock); 948 } 949 } 950 } 951 952 /* 953 * Search for an existing NLM host that matches the given name 954 * (typically the caller_name element of an nlm4_lock). If none is 955 * found, create a new host. If 'addr' is non-NULL, record the remote 956 * address of the host so that we can call it back for async 957 * responses. If 'vers' is greater than zero then record the NLM 958 * program version to use to communicate with this client. 959 */ 960 struct nlm_host * 961 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 962 rpcvers_t vers) 963 { 964 struct nlm_host *host; 965 966 mtx_lock(&nlm_global_lock); 967 968 /* 969 * The remote host is determined by caller_name. 970 */ 971 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 972 if (!strcmp(host->nh_caller_name, name)) 973 break; 974 } 975 976 if (!host) { 977 host = nlm_create_host(name); 978 if (!host) { 979 mtx_unlock(&nlm_global_lock); 980 return (NULL); 981 } 982 } 983 refcount_acquire(&host->nh_refs); 984 985 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 986 987 /* 988 * If we have an address for the host, record it so that we 989 * can send async replies etc. 990 */ 991 if (addr) { 992 993 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 994 ("Strange remote transport address length")); 995 996 /* 997 * If we have seen an address before and we currently 998 * have an RPC client handle, make sure the address is 999 * the same, otherwise discard the client handle. 1000 */ 1001 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1002 if (!nlm_compare_addr( 1003 (struct sockaddr *) &host->nh_addr, 1004 addr) 1005 || host->nh_vers != vers) { 1006 CLIENT *client; 1007 mtx_lock(&host->nh_lock); 1008 client = host->nh_srvrpc.nr_client; 1009 host->nh_srvrpc.nr_client = NULL; 1010 mtx_unlock(&host->nh_lock); 1011 if (client) { 1012 CLNT_RELEASE(client); 1013 } 1014 } 1015 } 1016 memcpy(&host->nh_addr, addr, addr->sa_len); 1017 host->nh_vers = vers; 1018 } 1019 1020 nlm_check_idle(); 1021 1022 mtx_unlock(&nlm_global_lock); 1023 1024 return (host); 1025 } 1026 1027 /* 1028 * Search for an existing NLM host that matches the given remote 1029 * address. If none is found, create a new host with the requested 1030 * address and remember 'vers' as the NLM protocol version to use for 1031 * that host. 1032 */ 1033 struct nlm_host * 1034 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1035 { 1036 /* 1037 * Fake up a name using inet_ntop. This buffer is 1038 * large enough for an IPv6 address. 1039 */ 1040 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1041 struct nlm_host *host; 1042 1043 switch (addr->sa_family) { 1044 case AF_INET: 1045 inet_ntop(AF_INET, 1046 &((const struct sockaddr_in *) addr)->sin_addr, 1047 tmp, sizeof tmp); 1048 break; 1049 #ifdef INET6 1050 case AF_INET6: 1051 inet_ntop(AF_INET6, 1052 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1053 tmp, sizeof tmp); 1054 break; 1055 #endif 1056 default: 1057 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1058 } 1059 1060 mtx_lock(&nlm_global_lock); 1061 1062 /* 1063 * The remote host is determined by caller_name. 1064 */ 1065 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1066 if (nlm_compare_addr(addr, 1067 (const struct sockaddr *) &host->nh_addr)) 1068 break; 1069 } 1070 1071 if (!host) { 1072 host = nlm_create_host(tmp); 1073 if (!host) { 1074 mtx_unlock(&nlm_global_lock); 1075 return (NULL); 1076 } 1077 memcpy(&host->nh_addr, addr, addr->sa_len); 1078 host->nh_vers = vers; 1079 } 1080 refcount_acquire(&host->nh_refs); 1081 1082 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1083 1084 nlm_check_idle(); 1085 1086 mtx_unlock(&nlm_global_lock); 1087 1088 return (host); 1089 } 1090 1091 /* 1092 * Find the NLM host that matches the value of 'sysid'. If none 1093 * exists, return NULL. 1094 */ 1095 static struct nlm_host * 1096 nlm_find_host_by_sysid(int sysid) 1097 { 1098 struct nlm_host *host; 1099 1100 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1101 if (host->nh_sysid == sysid) { 1102 refcount_acquire(&host->nh_refs); 1103 return (host); 1104 } 1105 } 1106 1107 return (NULL); 1108 } 1109 1110 void nlm_host_release(struct nlm_host *host) 1111 { 1112 if (refcount_release(&host->nh_refs)) { 1113 /* 1114 * Free the host 1115 */ 1116 nlm_host_destroy(host); 1117 } 1118 } 1119 1120 /* 1121 * Unregister this NLM host with the local NSM due to idleness. 1122 */ 1123 static void 1124 nlm_host_unmonitor(struct nlm_host *host) 1125 { 1126 mon_id smmonid; 1127 sm_stat_res smstat; 1128 struct timeval timo; 1129 enum clnt_stat stat; 1130 1131 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1132 host->nh_caller_name, host->nh_sysid); 1133 1134 /* 1135 * We put our assigned system ID value in the priv field to 1136 * make it simpler to find the host if we are notified of a 1137 * host restart. 1138 */ 1139 smmonid.mon_name = host->nh_caller_name; 1140 smmonid.my_id.my_name = "localhost"; 1141 smmonid.my_id.my_prog = NLM_PROG; 1142 smmonid.my_id.my_vers = NLM_SM; 1143 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1144 1145 timo.tv_sec = 25; 1146 timo.tv_usec = 0; 1147 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1148 (xdrproc_t) xdr_mon, &smmonid, 1149 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1150 1151 if (stat != RPC_SUCCESS) { 1152 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1153 return; 1154 } 1155 if (smstat.res_stat == stat_fail) { 1156 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1157 host->nh_caller_name); 1158 return; 1159 } 1160 1161 host->nh_monstate = NLM_UNMONITORED; 1162 } 1163 1164 /* 1165 * Register this NLM host with the local NSM so that we can be 1166 * notified if it reboots. 1167 */ 1168 void 1169 nlm_host_monitor(struct nlm_host *host, int state) 1170 { 1171 mon smmon; 1172 sm_stat_res smstat; 1173 struct timeval timo; 1174 enum clnt_stat stat; 1175 1176 if (state && !host->nh_state) { 1177 /* 1178 * This is the first time we have seen an NSM state 1179 * value for this host. We record it here to help 1180 * detect host reboots. 1181 */ 1182 host->nh_state = state; 1183 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1184 host->nh_caller_name, host->nh_sysid, state); 1185 } 1186 1187 mtx_lock(&host->nh_lock); 1188 if (host->nh_monstate != NLM_UNMONITORED) { 1189 mtx_unlock(&host->nh_lock); 1190 return; 1191 } 1192 host->nh_monstate = NLM_MONITORED; 1193 mtx_unlock(&host->nh_lock); 1194 1195 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1196 host->nh_caller_name, host->nh_sysid); 1197 1198 /* 1199 * We put our assigned system ID value in the priv field to 1200 * make it simpler to find the host if we are notified of a 1201 * host restart. 1202 */ 1203 smmon.mon_id.mon_name = host->nh_caller_name; 1204 smmon.mon_id.my_id.my_name = "localhost"; 1205 smmon.mon_id.my_id.my_prog = NLM_PROG; 1206 smmon.mon_id.my_id.my_vers = NLM_SM; 1207 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1208 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1209 1210 timo.tv_sec = 25; 1211 timo.tv_usec = 0; 1212 stat = CLNT_CALL(nlm_nsm, SM_MON, 1213 (xdrproc_t) xdr_mon, &smmon, 1214 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1215 1216 if (stat != RPC_SUCCESS) { 1217 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1218 return; 1219 } 1220 if (smstat.res_stat == stat_fail) { 1221 NLM_ERR("Local NSM refuses to monitor %s\n", 1222 host->nh_caller_name); 1223 mtx_lock(&host->nh_lock); 1224 host->nh_monstate = NLM_MONITOR_FAILED; 1225 mtx_unlock(&host->nh_lock); 1226 return; 1227 } 1228 1229 host->nh_monstate = NLM_MONITORED; 1230 } 1231 1232 /* 1233 * Return an RPC client handle that can be used to talk to the NLM 1234 * running on the given host. 1235 */ 1236 CLIENT * 1237 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1238 { 1239 struct nlm_rpc *rpc; 1240 CLIENT *client; 1241 1242 mtx_lock(&host->nh_lock); 1243 1244 if (isserver) 1245 rpc = &host->nh_srvrpc; 1246 else 1247 rpc = &host->nh_clntrpc; 1248 1249 /* 1250 * We can't hold onto RPC handles for too long - the async 1251 * call/reply protocol used by some NLM clients makes it hard 1252 * to tell when they change port numbers (e.g. after a 1253 * reboot). Note that if a client reboots while it isn't 1254 * holding any locks, it won't bother to notify us. We 1255 * expire the RPC handles after two minutes. 1256 */ 1257 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1258 client = rpc->nr_client; 1259 rpc->nr_client = NULL; 1260 mtx_unlock(&host->nh_lock); 1261 CLNT_RELEASE(client); 1262 mtx_lock(&host->nh_lock); 1263 } 1264 1265 if (!rpc->nr_client) { 1266 mtx_unlock(&host->nh_lock); 1267 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1268 NLM_PROG, host->nh_vers); 1269 mtx_lock(&host->nh_lock); 1270 1271 if (client) { 1272 if (rpc->nr_client) { 1273 mtx_unlock(&host->nh_lock); 1274 CLNT_DESTROY(client); 1275 mtx_lock(&host->nh_lock); 1276 } else { 1277 rpc->nr_client = client; 1278 rpc->nr_create_time = time_uptime; 1279 } 1280 } 1281 } 1282 1283 client = rpc->nr_client; 1284 if (client) 1285 CLNT_ACQUIRE(client); 1286 mtx_unlock(&host->nh_lock); 1287 1288 return (client); 1289 1290 } 1291 1292 int nlm_host_get_sysid(struct nlm_host *host) 1293 { 1294 1295 return (host->nh_sysid); 1296 } 1297 1298 int 1299 nlm_host_get_state(struct nlm_host *host) 1300 { 1301 1302 return (host->nh_state); 1303 } 1304 1305 void * 1306 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1307 { 1308 struct nlm_waiting_lock *nw; 1309 1310 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1311 nw->nw_lock = *lock; 1312 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1313 nw->nw_lock.fh.n_len); 1314 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1315 nw->nw_waiting = TRUE; 1316 nw->nw_vp = vp; 1317 mtx_lock(&nlm_global_lock); 1318 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1319 mtx_unlock(&nlm_global_lock); 1320 1321 return nw; 1322 } 1323 1324 void 1325 nlm_deregister_wait_lock(void *handle) 1326 { 1327 struct nlm_waiting_lock *nw = handle; 1328 1329 mtx_lock(&nlm_global_lock); 1330 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1331 mtx_unlock(&nlm_global_lock); 1332 1333 free(nw, M_NLM); 1334 } 1335 1336 int 1337 nlm_wait_lock(void *handle, int timo) 1338 { 1339 struct nlm_waiting_lock *nw = handle; 1340 int error, stops_deferred; 1341 1342 /* 1343 * If the granted message arrived before we got here, 1344 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1345 */ 1346 mtx_lock(&nlm_global_lock); 1347 error = 0; 1348 if (nw->nw_waiting) { 1349 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1350 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1351 sigallowstop(stops_deferred); 1352 } 1353 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1354 if (error) { 1355 /* 1356 * The granted message may arrive after the 1357 * interrupt/timeout but before we manage to lock the 1358 * mutex. Detect this by examining nw_lock. 1359 */ 1360 if (!nw->nw_waiting) 1361 error = 0; 1362 } else { 1363 /* 1364 * If nlm_cancel_wait is called, then error will be 1365 * zero but nw_waiting will still be TRUE. We 1366 * translate this into EINTR. 1367 */ 1368 if (nw->nw_waiting) 1369 error = EINTR; 1370 } 1371 mtx_unlock(&nlm_global_lock); 1372 1373 free(nw, M_NLM); 1374 1375 return (error); 1376 } 1377 1378 void 1379 nlm_cancel_wait(struct vnode *vp) 1380 { 1381 struct nlm_waiting_lock *nw; 1382 1383 mtx_lock(&nlm_global_lock); 1384 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1385 if (nw->nw_vp == vp) { 1386 wakeup(nw); 1387 } 1388 } 1389 mtx_unlock(&nlm_global_lock); 1390 } 1391 1392 /**********************************************************************/ 1393 1394 /* 1395 * Syscall interface with userland. 1396 */ 1397 1398 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1399 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1400 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1401 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1402 1403 static int 1404 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1405 { 1406 static rpcvers_t versions[] = { 1407 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1408 }; 1409 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1410 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1411 }; 1412 1413 SVCXPRT **xprts; 1414 char netid[16]; 1415 char uaddr[128]; 1416 struct netconfig *nconf; 1417 int i, j, error; 1418 1419 if (!addr_count) { 1420 NLM_ERR("NLM: no service addresses given - can't start server"); 1421 return (EINVAL); 1422 } 1423 1424 if (addr_count < 0 || addr_count > 256 ) { 1425 NLM_ERR("NLM: too many service addresses (%d) given, " 1426 "max 256 - can't start server\n", addr_count); 1427 return (EINVAL); 1428 } 1429 1430 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1431 for (i = 0; i < nitems(versions); i++) { 1432 for (j = 0; j < addr_count; j++) { 1433 /* 1434 * Create transports for the first version and 1435 * then just register everything else to the 1436 * same transports. 1437 */ 1438 if (i == 0) { 1439 char *up; 1440 1441 error = copyin(&addrs[2*j], &up, 1442 sizeof(char*)); 1443 if (error) 1444 goto out; 1445 error = copyinstr(up, netid, sizeof(netid), 1446 NULL); 1447 if (error) 1448 goto out; 1449 error = copyin(&addrs[2*j+1], &up, 1450 sizeof(char*)); 1451 if (error) 1452 goto out; 1453 error = copyinstr(up, uaddr, sizeof(uaddr), 1454 NULL); 1455 if (error) 1456 goto out; 1457 nconf = getnetconfigent(netid); 1458 if (!nconf) { 1459 NLM_ERR("Can't lookup netid %s\n", 1460 netid); 1461 error = EINVAL; 1462 goto out; 1463 } 1464 xprts[j] = svc_tp_create(pool, dispatchers[i], 1465 NLM_PROG, versions[i], uaddr, nconf); 1466 if (!xprts[j]) { 1467 NLM_ERR("NLM: unable to create " 1468 "(NLM_PROG, %d).\n", versions[i]); 1469 error = EINVAL; 1470 goto out; 1471 } 1472 freenetconfigent(nconf); 1473 } else { 1474 nconf = getnetconfigent(xprts[j]->xp_netid); 1475 rpcb_unset(NLM_PROG, versions[i], nconf); 1476 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1477 dispatchers[i], nconf)) { 1478 NLM_ERR("NLM: can't register " 1479 "(NLM_PROG, %d)\n", versions[i]); 1480 error = EINVAL; 1481 goto out; 1482 } 1483 } 1484 } 1485 } 1486 error = 0; 1487 out: 1488 for (j = 0; j < addr_count; j++) { 1489 if (xprts[j]) 1490 SVC_RELEASE(xprts[j]); 1491 } 1492 free(xprts, M_NLM); 1493 return (error); 1494 } 1495 1496 /* 1497 * Main server entry point. Contacts the local NSM to get its current 1498 * state and send SM_UNMON_ALL. Registers the NLM services and then 1499 * services requests. Does not return until the server is interrupted 1500 * by a signal. 1501 */ 1502 static int 1503 nlm_server_main(int addr_count, char **addrs) 1504 { 1505 struct thread *td = curthread; 1506 int error; 1507 SVCPOOL *pool = NULL; 1508 struct sockopt opt; 1509 int portlow; 1510 #ifdef INET6 1511 struct sockaddr_in6 sin6; 1512 #endif 1513 struct sockaddr_in sin; 1514 my_id id; 1515 sm_stat smstat; 1516 struct timeval timo; 1517 enum clnt_stat stat; 1518 struct nlm_host *host, *nhost; 1519 struct nlm_waiting_lock *nw; 1520 vop_advlock_t *old_nfs_advlock; 1521 vop_reclaim_t *old_nfs_reclaim; 1522 1523 if (nlm_is_running != 0) { 1524 NLM_ERR("NLM: can't start server - " 1525 "it appears to be running already\n"); 1526 return (EPERM); 1527 } 1528 1529 if (nlm_socket == NULL) { 1530 memset(&opt, 0, sizeof(opt)); 1531 1532 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1533 td->td_ucred, td); 1534 if (error) { 1535 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1536 error); 1537 return (error); 1538 } 1539 opt.sopt_dir = SOPT_SET; 1540 opt.sopt_level = IPPROTO_IP; 1541 opt.sopt_name = IP_PORTRANGE; 1542 portlow = IP_PORTRANGE_LOW; 1543 opt.sopt_val = &portlow; 1544 opt.sopt_valsize = sizeof(portlow); 1545 sosetopt(nlm_socket, &opt); 1546 1547 #ifdef INET6 1548 nlm_socket6 = NULL; 1549 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1550 td->td_ucred, td); 1551 if (error) { 1552 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1553 error); 1554 soclose(nlm_socket); 1555 nlm_socket = NULL; 1556 return (error); 1557 } 1558 opt.sopt_dir = SOPT_SET; 1559 opt.sopt_level = IPPROTO_IPV6; 1560 opt.sopt_name = IPV6_PORTRANGE; 1561 portlow = IPV6_PORTRANGE_LOW; 1562 opt.sopt_val = &portlow; 1563 opt.sopt_valsize = sizeof(portlow); 1564 sosetopt(nlm_socket6, &opt); 1565 #endif 1566 } 1567 1568 nlm_auth = authunix_create(curthread->td_ucred); 1569 1570 #ifdef INET6 1571 memset(&sin6, 0, sizeof(sin6)); 1572 sin6.sin6_len = sizeof(sin6); 1573 sin6.sin6_family = AF_INET6; 1574 sin6.sin6_addr = in6addr_loopback; 1575 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1576 if (!nlm_nsm) { 1577 #endif 1578 memset(&sin, 0, sizeof(sin)); 1579 sin.sin_len = sizeof(sin); 1580 sin.sin_family = AF_INET; 1581 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1582 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1583 SM_VERS); 1584 #ifdef INET6 1585 } 1586 #endif 1587 1588 if (!nlm_nsm) { 1589 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1590 error = EINVAL; 1591 goto out; 1592 } 1593 1594 pool = svcpool_create("NLM", NULL); 1595 1596 error = nlm_register_services(pool, addr_count, addrs); 1597 if (error) 1598 goto out; 1599 1600 memset(&id, 0, sizeof(id)); 1601 id.my_name = "NFS NLM"; 1602 1603 timo.tv_sec = 25; 1604 timo.tv_usec = 0; 1605 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1606 (xdrproc_t) xdr_my_id, &id, 1607 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1608 1609 if (stat != RPC_SUCCESS) { 1610 struct rpc_err err; 1611 1612 CLNT_GETERR(nlm_nsm, &err); 1613 NLM_ERR("NLM: unexpected error contacting NSM, " 1614 "stat=%d, errno=%d\n", stat, err.re_errno); 1615 error = EINVAL; 1616 goto out; 1617 } 1618 nlm_is_running = 1; 1619 1620 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1621 nlm_nsm_state = smstat.state; 1622 1623 old_nfs_advlock = nfs_advlock_p; 1624 nfs_advlock_p = nlm_advlock; 1625 old_nfs_reclaim = nfs_reclaim_p; 1626 nfs_reclaim_p = nlm_reclaim; 1627 1628 svc_run(pool); 1629 error = 0; 1630 1631 nfs_advlock_p = old_nfs_advlock; 1632 nfs_reclaim_p = old_nfs_reclaim; 1633 1634 out: 1635 nlm_is_running = 0; 1636 if (pool) 1637 svcpool_destroy(pool); 1638 1639 /* 1640 * We are finished communicating with the NSM. 1641 */ 1642 if (nlm_nsm) { 1643 CLNT_RELEASE(nlm_nsm); 1644 nlm_nsm = NULL; 1645 } 1646 1647 /* 1648 * Trash all the existing state so that if the server 1649 * restarts, it gets a clean slate. This is complicated by the 1650 * possibility that there may be other threads trying to make 1651 * client locking requests. 1652 * 1653 * First we fake a client reboot notification which will 1654 * cancel any pending async locks and purge remote lock state 1655 * from the local lock manager. We release the reference from 1656 * nlm_hosts to the host (which may remove it from the list 1657 * and free it). After this phase, the only entries in the 1658 * nlm_host list should be from other threads performing 1659 * client lock requests. 1660 */ 1661 mtx_lock(&nlm_global_lock); 1662 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1663 wakeup(nw); 1664 } 1665 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1666 mtx_unlock(&nlm_global_lock); 1667 nlm_host_notify(host, 0); 1668 nlm_host_release(host); 1669 mtx_lock(&nlm_global_lock); 1670 } 1671 mtx_unlock(&nlm_global_lock); 1672 1673 AUTH_DESTROY(nlm_auth); 1674 1675 return (error); 1676 } 1677 1678 int 1679 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1680 { 1681 int error; 1682 1683 error = priv_check(td, PRIV_NFS_LOCKD); 1684 if (error) 1685 return (error); 1686 1687 nlm_debug_level = uap->debug_level; 1688 nlm_grace_threshold = time_uptime + uap->grace_period; 1689 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1690 1691 return nlm_server_main(uap->addr_count, uap->addrs); 1692 } 1693 1694 /**********************************************************************/ 1695 1696 /* 1697 * NLM implementation details, called from the RPC stubs. 1698 */ 1699 1700 void 1701 nlm_sm_notify(struct nlm_sm_status *argp) 1702 { 1703 uint32_t sysid; 1704 struct nlm_host *host; 1705 1706 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1707 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1708 host = nlm_find_host_by_sysid(sysid); 1709 if (host) { 1710 nlm_host_notify(host, argp->state); 1711 nlm_host_release(host); 1712 } 1713 } 1714 1715 static void 1716 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1717 { 1718 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1719 } 1720 1721 struct vfs_state { 1722 struct mount *vs_mp; 1723 struct vnode *vs_vp; 1724 int vs_vnlocked; 1725 }; 1726 1727 static int 1728 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1729 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1730 { 1731 int error; 1732 uint64_t exflags; 1733 struct ucred *cred = NULL, *credanon = NULL; 1734 1735 memset(vs, 0, sizeof(*vs)); 1736 1737 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1738 if (!vs->vs_mp) { 1739 return (ESTALE); 1740 } 1741 1742 /* accmode == 0 means don't check, since it is an unlock. */ 1743 if (accmode != 0) { 1744 error = VFS_CHECKEXP(vs->vs_mp, 1745 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1746 NULL, NULL); 1747 if (error) 1748 goto out; 1749 1750 if (exflags & MNT_EXRDONLY || 1751 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1752 error = EROFS; 1753 goto out; 1754 } 1755 } 1756 1757 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1758 if (error) 1759 goto out; 1760 vs->vs_vnlocked = TRUE; 1761 1762 if (accmode != 0) { 1763 if (!svc_getcred(rqstp, &cred, NULL)) { 1764 error = EINVAL; 1765 goto out; 1766 } 1767 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1768 crfree(cred); 1769 cred = credanon; 1770 credanon = NULL; 1771 } 1772 1773 /* 1774 * Check cred. 1775 */ 1776 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1777 /* 1778 * If this failed and accmode != VWRITE, try again with 1779 * VWRITE to maintain backwards compatibility with the 1780 * old code that always used VWRITE. 1781 */ 1782 if (error != 0 && accmode != VWRITE) 1783 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1784 if (error) 1785 goto out; 1786 } 1787 1788 VOP_UNLOCK(vs->vs_vp); 1789 vs->vs_vnlocked = FALSE; 1790 1791 out: 1792 if (cred) 1793 crfree(cred); 1794 if (credanon) 1795 crfree(credanon); 1796 1797 return (error); 1798 } 1799 1800 static void 1801 nlm_release_vfs_state(struct vfs_state *vs) 1802 { 1803 1804 if (vs->vs_vp) { 1805 if (vs->vs_vnlocked) 1806 vput(vs->vs_vp); 1807 else 1808 vrele(vs->vs_vp); 1809 } 1810 if (vs->vs_mp) 1811 vfs_rel(vs->vs_mp); 1812 } 1813 1814 static nlm4_stats 1815 nlm_convert_error(int error) 1816 { 1817 1818 if (error == ESTALE) 1819 return nlm4_stale_fh; 1820 else if (error == EROFS) 1821 return nlm4_rofs; 1822 else 1823 return nlm4_failed; 1824 } 1825 1826 int 1827 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1828 CLIENT **rpcp) 1829 { 1830 fhandle_t fh; 1831 struct vfs_state vs; 1832 struct nlm_host *host, *bhost; 1833 int error, sysid; 1834 struct flock fl; 1835 accmode_t accmode; 1836 1837 memset(result, 0, sizeof(*result)); 1838 memset(&vs, 0, sizeof(vs)); 1839 1840 host = nlm_find_host_by_name(argp->alock.caller_name, 1841 svc_getrpccaller(rqstp), rqstp->rq_vers); 1842 if (!host) { 1843 result->stat.stat = nlm4_denied_nolocks; 1844 return (ENOMEM); 1845 } 1846 1847 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1848 host->nh_caller_name, host->nh_sysid); 1849 1850 nlm_check_expired_locks(host); 1851 sysid = host->nh_sysid; 1852 1853 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1854 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1855 1856 if (time_uptime < nlm_grace_threshold) { 1857 result->stat.stat = nlm4_denied_grace_period; 1858 goto out; 1859 } 1860 1861 accmode = argp->exclusive ? VWRITE : VREAD; 1862 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1863 if (error) { 1864 result->stat.stat = nlm_convert_error(error); 1865 goto out; 1866 } 1867 1868 fl.l_start = argp->alock.l_offset; 1869 fl.l_len = argp->alock.l_len; 1870 fl.l_pid = argp->alock.svid; 1871 fl.l_sysid = sysid; 1872 fl.l_whence = SEEK_SET; 1873 if (argp->exclusive) 1874 fl.l_type = F_WRLCK; 1875 else 1876 fl.l_type = F_RDLCK; 1877 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1878 if (error) { 1879 result->stat.stat = nlm4_failed; 1880 goto out; 1881 } 1882 1883 if (fl.l_type == F_UNLCK) { 1884 result->stat.stat = nlm4_granted; 1885 } else { 1886 result->stat.stat = nlm4_denied; 1887 result->stat.nlm4_testrply_u.holder.exclusive = 1888 (fl.l_type == F_WRLCK); 1889 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1890 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1891 if (bhost) { 1892 /* 1893 * We don't have any useful way of recording 1894 * the value of oh used in the original lock 1895 * request. Ideally, the test reply would have 1896 * a space for the owning host's name allowing 1897 * our caller's NLM to keep track. 1898 * 1899 * As far as I can see, Solaris uses an eight 1900 * byte structure for oh which contains a four 1901 * byte pid encoded in local byte order and 1902 * the first four bytes of the host 1903 * name. Linux uses a variable length string 1904 * 'pid@hostname' in ascii but doesn't even 1905 * return that in test replies. 1906 * 1907 * For the moment, return nothing in oh 1908 * (already zero'ed above). 1909 */ 1910 nlm_host_release(bhost); 1911 } 1912 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1913 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1914 } 1915 1916 out: 1917 nlm_release_vfs_state(&vs); 1918 if (rpcp) 1919 *rpcp = nlm_host_get_rpc(host, TRUE); 1920 nlm_host_release(host); 1921 return (0); 1922 } 1923 1924 int 1925 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1926 bool_t monitor, CLIENT **rpcp) 1927 { 1928 fhandle_t fh; 1929 struct vfs_state vs; 1930 struct nlm_host *host; 1931 int error, sysid; 1932 struct flock fl; 1933 accmode_t accmode; 1934 1935 memset(result, 0, sizeof(*result)); 1936 memset(&vs, 0, sizeof(vs)); 1937 1938 host = nlm_find_host_by_name(argp->alock.caller_name, 1939 svc_getrpccaller(rqstp), rqstp->rq_vers); 1940 if (!host) { 1941 result->stat.stat = nlm4_denied_nolocks; 1942 return (ENOMEM); 1943 } 1944 1945 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1946 host->nh_caller_name, host->nh_sysid); 1947 1948 if (monitor && host->nh_state && argp->state 1949 && host->nh_state != argp->state) { 1950 /* 1951 * The host rebooted without telling us. Trash its 1952 * locks. 1953 */ 1954 nlm_host_notify(host, argp->state); 1955 } 1956 1957 nlm_check_expired_locks(host); 1958 sysid = host->nh_sysid; 1959 1960 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1961 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1962 1963 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1964 result->stat.stat = nlm4_denied_grace_period; 1965 goto out; 1966 } 1967 1968 accmode = argp->exclusive ? VWRITE : VREAD; 1969 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1970 if (error) { 1971 result->stat.stat = nlm_convert_error(error); 1972 goto out; 1973 } 1974 1975 fl.l_start = argp->alock.l_offset; 1976 fl.l_len = argp->alock.l_len; 1977 fl.l_pid = argp->alock.svid; 1978 fl.l_sysid = sysid; 1979 fl.l_whence = SEEK_SET; 1980 if (argp->exclusive) 1981 fl.l_type = F_WRLCK; 1982 else 1983 fl.l_type = F_RDLCK; 1984 if (argp->block) { 1985 struct nlm_async_lock *af; 1986 CLIENT *client; 1987 struct nlm_grantcookie cookie; 1988 1989 /* 1990 * First, make sure we can contact the host's NLM. 1991 */ 1992 client = nlm_host_get_rpc(host, TRUE); 1993 if (!client) { 1994 result->stat.stat = nlm4_failed; 1995 goto out; 1996 } 1997 1998 /* 1999 * First we need to check and see if there is an 2000 * existing blocked lock that matches. This could be a 2001 * badly behaved client or an RPC re-send. If we find 2002 * one, just return nlm4_blocked. 2003 */ 2004 mtx_lock(&host->nh_lock); 2005 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2006 if (af->af_fl.l_start == fl.l_start 2007 && af->af_fl.l_len == fl.l_len 2008 && af->af_fl.l_pid == fl.l_pid 2009 && af->af_fl.l_type == fl.l_type) { 2010 break; 2011 } 2012 } 2013 if (!af) { 2014 cookie.ng_sysid = host->nh_sysid; 2015 cookie.ng_cookie = host->nh_grantcookie++; 2016 } 2017 mtx_unlock(&host->nh_lock); 2018 if (af) { 2019 CLNT_RELEASE(client); 2020 result->stat.stat = nlm4_blocked; 2021 goto out; 2022 } 2023 2024 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2025 M_WAITOK|M_ZERO); 2026 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2027 af->af_vp = vs.vs_vp; 2028 af->af_fl = fl; 2029 af->af_host = host; 2030 af->af_rpc = client; 2031 /* 2032 * We use M_RPC here so that we can xdr_free the thing 2033 * later. 2034 */ 2035 nlm_make_netobj(&af->af_granted.cookie, 2036 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2037 af->af_granted.exclusive = argp->exclusive; 2038 af->af_granted.alock.caller_name = 2039 strdup(argp->alock.caller_name, M_RPC); 2040 nlm_copy_netobj(&af->af_granted.alock.fh, 2041 &argp->alock.fh, M_RPC); 2042 nlm_copy_netobj(&af->af_granted.alock.oh, 2043 &argp->alock.oh, M_RPC); 2044 af->af_granted.alock.svid = argp->alock.svid; 2045 af->af_granted.alock.l_offset = argp->alock.l_offset; 2046 af->af_granted.alock.l_len = argp->alock.l_len; 2047 2048 /* 2049 * Put the entry on the pending list before calling 2050 * VOP_ADVLOCKASYNC. We do this in case the lock 2051 * request was blocked (returning EINPROGRESS) but 2052 * then granted before we manage to run again. The 2053 * client may receive the granted message before we 2054 * send our blocked reply but thats their problem. 2055 */ 2056 mtx_lock(&host->nh_lock); 2057 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2058 mtx_unlock(&host->nh_lock); 2059 2060 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2061 &af->af_task, &af->af_cookie); 2062 2063 /* 2064 * If the lock completed synchronously, just free the 2065 * tracking structure now. 2066 */ 2067 if (error != EINPROGRESS) { 2068 CLNT_RELEASE(af->af_rpc); 2069 mtx_lock(&host->nh_lock); 2070 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2071 mtx_unlock(&host->nh_lock); 2072 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2073 &af->af_granted); 2074 free(af, M_NLM); 2075 } else { 2076 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2077 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2078 /* 2079 * Don't vrele the vnode just yet - this must 2080 * wait until either the async callback 2081 * happens or the lock is cancelled. 2082 */ 2083 vs.vs_vp = NULL; 2084 } 2085 } else { 2086 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2087 } 2088 2089 if (error) { 2090 if (error == EINPROGRESS) { 2091 result->stat.stat = nlm4_blocked; 2092 } else if (error == EDEADLK) { 2093 result->stat.stat = nlm4_deadlck; 2094 } else if (error == EAGAIN) { 2095 result->stat.stat = nlm4_denied; 2096 } else { 2097 result->stat.stat = nlm4_failed; 2098 } 2099 } else { 2100 if (monitor) 2101 nlm_host_monitor(host, argp->state); 2102 result->stat.stat = nlm4_granted; 2103 } 2104 2105 out: 2106 nlm_release_vfs_state(&vs); 2107 if (rpcp) 2108 *rpcp = nlm_host_get_rpc(host, TRUE); 2109 nlm_host_release(host); 2110 return (0); 2111 } 2112 2113 int 2114 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2115 CLIENT **rpcp) 2116 { 2117 fhandle_t fh; 2118 struct vfs_state vs; 2119 struct nlm_host *host; 2120 int error, sysid; 2121 struct flock fl; 2122 struct nlm_async_lock *af; 2123 2124 memset(result, 0, sizeof(*result)); 2125 memset(&vs, 0, sizeof(vs)); 2126 2127 host = nlm_find_host_by_name(argp->alock.caller_name, 2128 svc_getrpccaller(rqstp), rqstp->rq_vers); 2129 if (!host) { 2130 result->stat.stat = nlm4_denied_nolocks; 2131 return (ENOMEM); 2132 } 2133 2134 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2135 host->nh_caller_name, host->nh_sysid); 2136 2137 nlm_check_expired_locks(host); 2138 sysid = host->nh_sysid; 2139 2140 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2141 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2142 2143 if (time_uptime < nlm_grace_threshold) { 2144 result->stat.stat = nlm4_denied_grace_period; 2145 goto out; 2146 } 2147 2148 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2149 if (error) { 2150 result->stat.stat = nlm_convert_error(error); 2151 goto out; 2152 } 2153 2154 fl.l_start = argp->alock.l_offset; 2155 fl.l_len = argp->alock.l_len; 2156 fl.l_pid = argp->alock.svid; 2157 fl.l_sysid = sysid; 2158 fl.l_whence = SEEK_SET; 2159 if (argp->exclusive) 2160 fl.l_type = F_WRLCK; 2161 else 2162 fl.l_type = F_RDLCK; 2163 2164 /* 2165 * First we need to try and find the async lock request - if 2166 * there isn't one, we give up and return nlm4_denied. 2167 */ 2168 mtx_lock(&host->nh_lock); 2169 2170 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2171 if (af->af_fl.l_start == fl.l_start 2172 && af->af_fl.l_len == fl.l_len 2173 && af->af_fl.l_pid == fl.l_pid 2174 && af->af_fl.l_type == fl.l_type) { 2175 break; 2176 } 2177 } 2178 2179 if (!af) { 2180 mtx_unlock(&host->nh_lock); 2181 result->stat.stat = nlm4_denied; 2182 goto out; 2183 } 2184 2185 error = nlm_cancel_async_lock(af); 2186 2187 if (error) { 2188 result->stat.stat = nlm4_denied; 2189 } else { 2190 result->stat.stat = nlm4_granted; 2191 } 2192 2193 mtx_unlock(&host->nh_lock); 2194 2195 out: 2196 nlm_release_vfs_state(&vs); 2197 if (rpcp) 2198 *rpcp = nlm_host_get_rpc(host, TRUE); 2199 nlm_host_release(host); 2200 return (0); 2201 } 2202 2203 int 2204 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2205 CLIENT **rpcp) 2206 { 2207 fhandle_t fh; 2208 struct vfs_state vs; 2209 struct nlm_host *host; 2210 int error, sysid; 2211 struct flock fl; 2212 2213 memset(result, 0, sizeof(*result)); 2214 memset(&vs, 0, sizeof(vs)); 2215 2216 host = nlm_find_host_by_name(argp->alock.caller_name, 2217 svc_getrpccaller(rqstp), rqstp->rq_vers); 2218 if (!host) { 2219 result->stat.stat = nlm4_denied_nolocks; 2220 return (ENOMEM); 2221 } 2222 2223 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2224 host->nh_caller_name, host->nh_sysid); 2225 2226 nlm_check_expired_locks(host); 2227 sysid = host->nh_sysid; 2228 2229 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2230 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2231 2232 if (time_uptime < nlm_grace_threshold) { 2233 result->stat.stat = nlm4_denied_grace_period; 2234 goto out; 2235 } 2236 2237 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2238 if (error) { 2239 result->stat.stat = nlm_convert_error(error); 2240 goto out; 2241 } 2242 2243 fl.l_start = argp->alock.l_offset; 2244 fl.l_len = argp->alock.l_len; 2245 fl.l_pid = argp->alock.svid; 2246 fl.l_sysid = sysid; 2247 fl.l_whence = SEEK_SET; 2248 fl.l_type = F_UNLCK; 2249 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2250 2251 /* 2252 * Ignore the error - there is no result code for failure, 2253 * only for grace period. 2254 */ 2255 result->stat.stat = nlm4_granted; 2256 2257 out: 2258 nlm_release_vfs_state(&vs); 2259 if (rpcp) 2260 *rpcp = nlm_host_get_rpc(host, TRUE); 2261 nlm_host_release(host); 2262 return (0); 2263 } 2264 2265 int 2266 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2267 2268 CLIENT **rpcp) 2269 { 2270 struct nlm_host *host; 2271 struct nlm_waiting_lock *nw; 2272 2273 memset(result, 0, sizeof(*result)); 2274 2275 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2276 if (!host) { 2277 result->stat.stat = nlm4_denied_nolocks; 2278 return (ENOMEM); 2279 } 2280 2281 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2282 result->stat.stat = nlm4_denied; 2283 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2284 2285 mtx_lock(&nlm_global_lock); 2286 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2287 if (!nw->nw_waiting) 2288 continue; 2289 if (argp->alock.svid == nw->nw_lock.svid 2290 && argp->alock.l_offset == nw->nw_lock.l_offset 2291 && argp->alock.l_len == nw->nw_lock.l_len 2292 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2293 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2294 nw->nw_lock.fh.n_len)) { 2295 nw->nw_waiting = FALSE; 2296 wakeup(nw); 2297 result->stat.stat = nlm4_granted; 2298 break; 2299 } 2300 } 2301 mtx_unlock(&nlm_global_lock); 2302 2303 out: 2304 if (rpcp) 2305 *rpcp = nlm_host_get_rpc(host, TRUE); 2306 nlm_host_release(host); 2307 return (0); 2308 } 2309 2310 void 2311 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2312 { 2313 struct nlm_host *host = NULL; 2314 struct nlm_async_lock *af = NULL; 2315 int error; 2316 2317 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2318 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2319 goto out; 2320 } 2321 2322 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2323 if (!host) { 2324 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2325 goto out; 2326 } 2327 2328 mtx_lock(&host->nh_lock); 2329 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2330 if (ng_cookie(&argp->cookie) == 2331 ng_cookie(&af->af_granted.cookie)) 2332 break; 2333 if (af) 2334 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2335 mtx_unlock(&host->nh_lock); 2336 2337 if (!af) { 2338 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2339 "with unrecognized cookie %d:%d", host->nh_caller_name, 2340 host->nh_sysid, ng_sysid(&argp->cookie), 2341 ng_cookie(&argp->cookie)); 2342 goto out; 2343 } 2344 2345 if (argp->stat.stat != nlm4_granted) { 2346 af->af_fl.l_type = F_UNLCK; 2347 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2348 if (error) { 2349 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2350 "and we failed to unlock (%d)", host->nh_caller_name, 2351 host->nh_sysid, error); 2352 goto out; 2353 } 2354 2355 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2356 af, host->nh_caller_name, host->nh_sysid); 2357 } else { 2358 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2359 af, host->nh_caller_name, host->nh_sysid); 2360 } 2361 2362 out: 2363 if (af) 2364 nlm_free_async_lock(af); 2365 if (host) 2366 nlm_host_release(host); 2367 } 2368 2369 void 2370 nlm_do_free_all(nlm4_notify *argp) 2371 { 2372 struct nlm_host *host, *thost; 2373 2374 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2375 if (!strcmp(host->nh_caller_name, argp->name)) 2376 nlm_host_notify(host, argp->state); 2377 } 2378 } 2379 2380 /* 2381 * Kernel module glue 2382 */ 2383 static int 2384 nfslockd_modevent(module_t mod, int type, void *data) 2385 { 2386 2387 switch (type) { 2388 case MOD_LOAD: 2389 return (nlm_init()); 2390 2391 case MOD_UNLOAD: 2392 nlm_uninit(); 2393 /* The NLM module cannot be safely unloaded. */ 2394 /* FALLTHROUGH */ 2395 default: 2396 return (EOPNOTSUPP); 2397 } 2398 } 2399 static moduledata_t nfslockd_mod = { 2400 "nfslockd", 2401 nfslockd_modevent, 2402 NULL, 2403 }; 2404 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2405 2406 /* So that loader and kldload(2) can find us, wherever we are.. */ 2407 MODULE_DEPEND(nfslockd, xdr, 1, 1, 1); 2408 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2409 MODULE_DEPEND(nfslockd, nfscommon, 1, 1, 1); 2410 MODULE_VERSION(nfslockd, 1); 2411