1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 5 * Authors: Doug Rabson <dfr@rabson.org> 6 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "opt_inet6.h" 31 32 #include <sys/param.h> 33 #include <sys/fail.h> 34 #include <sys/fcntl.h> 35 #include <sys/kernel.h> 36 #include <sys/kthread.h> 37 #include <sys/lockf.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/jail.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/syscall.h> 46 #include <sys/sysctl.h> 47 #include <sys/sysent.h> 48 #include <sys/syslog.h> 49 #include <sys/sysproto.h> 50 #include <sys/systm.h> 51 #include <sys/taskqueue.h> 52 #include <sys/unistd.h> 53 #include <sys/vnode.h> 54 55 #include <nfs/nfsproto.h> 56 #include <nfs/nfs_lock.h> 57 58 #include <nlm/nlm_prot.h> 59 #include <nlm/sm_inter.h> 60 #include <nlm/nlm.h> 61 #include <rpc/rpc_com.h> 62 #include <rpc/rpcb_prot.h> 63 64 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 65 66 /* 67 * If a host is inactive (and holds no locks) for this amount of 68 * seconds, we consider it idle and stop tracking it. 69 */ 70 #define NLM_IDLE_TIMEOUT 30 71 72 /* 73 * We check the host list for idle every few seconds. 74 */ 75 #define NLM_IDLE_PERIOD 5 76 77 /* 78 * We only look for GRANTED_RES messages for a little while. 79 */ 80 #define NLM_EXPIRE_TIMEOUT 10 81 82 /* 83 * Support for sysctl vfs.nlm.sysid 84 */ 85 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 86 "Network Lock Manager"); 87 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, 88 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 89 ""); 90 91 /* 92 * Syscall hooks 93 */ 94 static struct syscall_helper_data nlm_syscalls[] = { 95 SYSCALL_INIT_HELPER(nlm_syscall), 96 SYSCALL_INIT_LAST 97 }; 98 99 /* 100 * Debug level passed in from userland. We also support a sysctl hook 101 * so that it can be changed on a live system. 102 */ 103 static int nlm_debug_level; 104 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 105 106 #define NLM_DEBUG(_level, args...) \ 107 do { \ 108 if (nlm_debug_level >= (_level)) \ 109 log(LOG_DEBUG, args); \ 110 } while(0) 111 #define NLM_ERR(args...) \ 112 do { \ 113 log(LOG_ERR, args); \ 114 } while(0) 115 116 /* 117 * Grace period handling. The value of nlm_grace_threshold is the 118 * value of time_uptime after which we are serving requests normally. 119 */ 120 static time_t nlm_grace_threshold; 121 122 /* 123 * We check for idle hosts if time_uptime is greater than 124 * nlm_next_idle_check, 125 */ 126 static time_t nlm_next_idle_check; 127 128 /* 129 * A flag to indicate the server is already running. 130 */ 131 static int nlm_is_running; 132 133 /* 134 * A socket to use for RPC - shared by all IPv4 RPC clients. 135 */ 136 static struct socket *nlm_socket; 137 138 #ifdef INET6 139 140 /* 141 * A socket to use for RPC - shared by all IPv6 RPC clients. 142 */ 143 static struct socket *nlm_socket6; 144 145 #endif 146 147 /* 148 * An RPC client handle that can be used to communicate with the local 149 * NSM. 150 */ 151 static CLIENT *nlm_nsm; 152 153 /* 154 * An AUTH handle for the server's creds. 155 */ 156 static AUTH *nlm_auth; 157 158 /* 159 * A zero timeval for sending async RPC messages. 160 */ 161 struct timeval nlm_zero_tv = { 0, 0 }; 162 163 /* 164 * The local NSM state number 165 */ 166 int nlm_nsm_state; 167 168 /* 169 * A lock to protect the host list and waiting lock list. 170 */ 171 static struct mtx nlm_global_lock; 172 173 /* 174 * Locks: 175 * (l) locked by nh_lock 176 * (s) only accessed via server RPC which is single threaded 177 * (g) locked by nlm_global_lock 178 * (c) const until freeing 179 * (a) modified using atomic ops 180 */ 181 182 /* 183 * A pending client-side lock request, stored on the nlm_waiting_locks 184 * list. 185 */ 186 struct nlm_waiting_lock { 187 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 188 bool_t nw_waiting; /* (g) */ 189 nlm4_lock nw_lock; /* (c) */ 190 union nfsfh nw_fh; /* (c) */ 191 struct vnode *nw_vp; /* (c) */ 192 }; 193 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 194 195 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 196 197 /* 198 * A pending server-side asynchronous lock request, stored on the 199 * nh_pending list of the NLM host. 200 */ 201 struct nlm_async_lock { 202 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 203 struct task af_task; /* (c) async callback details */ 204 void *af_cookie; /* (l) lock manager cancel token */ 205 struct vnode *af_vp; /* (l) vnode to lock */ 206 struct flock af_fl; /* (c) lock details */ 207 struct nlm_host *af_host; /* (c) host which is locking */ 208 CLIENT *af_rpc; /* (c) rpc client to send message */ 209 nlm4_testargs af_granted; /* (c) notification details */ 210 time_t af_expiretime; /* (c) notification time */ 211 }; 212 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 213 214 /* 215 * NLM host. 216 */ 217 enum nlm_host_state { 218 NLM_UNMONITORED, 219 NLM_MONITORED, 220 NLM_MONITOR_FAILED, 221 NLM_RECOVERING 222 }; 223 224 struct nlm_rpc { 225 CLIENT *nr_client; /* (l) RPC client handle */ 226 time_t nr_create_time; /* (l) when client was created */ 227 }; 228 229 struct nlm_host { 230 struct mtx nh_lock; 231 volatile u_int nh_refs; /* (a) reference count */ 232 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 233 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 234 uint32_t nh_sysid; /* (c) our allocaed system ID */ 235 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 236 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 237 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 238 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 239 rpcvers_t nh_vers; /* (s) NLM version of host */ 240 int nh_state; /* (s) last seen NSM state of host */ 241 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 242 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 243 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 244 uint32_t nh_grantcookie; /* (l) grant cookie counter */ 245 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 246 struct nlm_async_lock_list nh_granted; /* (l) granted locks */ 247 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 248 }; 249 TAILQ_HEAD(nlm_host_list, nlm_host); 250 251 static struct nlm_host_list nlm_hosts; /* (g) */ 252 static uint32_t nlm_next_sysid = 1; /* (g) */ 253 254 static void nlm_host_unmonitor(struct nlm_host *); 255 256 struct nlm_grantcookie { 257 uint32_t ng_sysid; 258 uint32_t ng_cookie; 259 }; 260 261 static inline uint32_t 262 ng_sysid(struct netobj *src) 263 { 264 265 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid; 266 } 267 268 static inline uint32_t 269 ng_cookie(struct netobj *src) 270 { 271 272 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie; 273 } 274 275 /**********************************************************************/ 276 277 /* 278 * Initialise NLM globals. 279 */ 280 static int 281 nlm_init(void) 282 { 283 int error; 284 285 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 286 TAILQ_INIT(&nlm_waiting_locks); 287 TAILQ_INIT(&nlm_hosts); 288 289 error = syscall_helper_register(nlm_syscalls, SY_THR_STATIC_KLD); 290 if (error != 0) 291 NLM_ERR("Can't register NLM syscall\n"); 292 return (error); 293 } 294 295 static void 296 nlm_uninit(void) 297 { 298 299 syscall_helper_unregister(nlm_syscalls); 300 } 301 302 /* 303 * Create a netobj from an arbitrary source. 304 */ 305 void 306 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize, 307 struct malloc_type *type) 308 { 309 310 dst->n_len = srcsize; 311 dst->n_bytes = malloc(srcsize, type, M_WAITOK); 312 memcpy(dst->n_bytes, src, srcsize); 313 } 314 315 /* 316 * Copy a struct netobj. 317 */ 318 void 319 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 320 struct malloc_type *type) 321 { 322 323 nlm_make_netobj(dst, src->n_bytes, src->n_len, type); 324 } 325 326 /* 327 * Create an RPC client handle for the given (address,prog,vers) 328 * triple using UDP. 329 */ 330 static CLIENT * 331 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 332 { 333 char *wchan = "nlmrcv"; 334 struct sockaddr_storage ss; 335 struct socket *so; 336 CLIENT *rpcb; 337 struct timeval timo; 338 RPCB parms; 339 char *uaddr; 340 enum clnt_stat stat = RPC_SUCCESS; 341 int rpcvers = RPCBVERS4; 342 bool_t do_tcp = FALSE; 343 bool_t tryagain = FALSE; 344 struct portmap mapping; 345 u_short port = 0; 346 struct sockaddr_in *sin4; 347 char namebuf[INET_ADDRSTRLEN]; 348 #ifdef INET6 349 struct sockaddr_in6 *sin6; 350 char namebuf6[INET6_ADDRSTRLEN]; 351 #endif 352 353 /* 354 * First we need to contact the remote RPCBIND service to find 355 * the right port. 356 */ 357 memcpy(&ss, sa, sa->sa_len); 358 switch (ss.ss_family) { 359 case AF_INET: 360 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 361 so = nlm_socket; 362 break; 363 #ifdef INET6 364 case AF_INET6: 365 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 366 so = nlm_socket6; 367 break; 368 #endif 369 370 default: 371 /* 372 * Unsupported address family - fail. 373 */ 374 return (NULL); 375 } 376 377 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 378 RPCBPROG, rpcvers, 0, 0); 379 if (!rpcb) 380 return (NULL); 381 382 try_tcp: 383 parms.r_prog = prog; 384 parms.r_vers = vers; 385 if (do_tcp) 386 parms.r_netid = "tcp"; 387 else 388 parms.r_netid = "udp"; 389 parms.r_addr = ""; 390 parms.r_owner = ""; 391 392 /* 393 * Use the default timeout. 394 */ 395 timo.tv_sec = 25; 396 timo.tv_usec = 0; 397 again: 398 switch (rpcvers) { 399 case RPCBVERS4: 400 case RPCBVERS: 401 /* 402 * Try RPCBIND 4 then 3. 403 */ 404 uaddr = NULL; 405 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 406 (xdrproc_t) xdr_rpcb, &parms, 407 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 408 if (stat == RPC_SUCCESS) { 409 /* 410 * We have a reply from the remote RPCBIND - turn it 411 * into an appropriate address and make a new client 412 * that can talk to the remote NLM. 413 * 414 * XXX fixup IPv6 scope ID. 415 */ 416 struct netbuf *a; 417 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 418 if (!a) { 419 tryagain = TRUE; 420 } else { 421 tryagain = FALSE; 422 memcpy(&ss, a->buf, a->len); 423 free(a->buf, M_RPC); 424 free(a, M_RPC); 425 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 426 } 427 } 428 if (tryagain || stat == RPC_PROGVERSMISMATCH) { 429 if (rpcvers == RPCBVERS4) 430 rpcvers = RPCBVERS; 431 else if (rpcvers == RPCBVERS) 432 rpcvers = PMAPVERS; 433 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 434 goto again; 435 } 436 break; 437 case PMAPVERS: 438 /* 439 * Try portmap. 440 */ 441 mapping.pm_prog = parms.r_prog; 442 mapping.pm_vers = parms.r_vers; 443 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 444 mapping.pm_port = 0; 445 446 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 447 (xdrproc_t) xdr_portmap, &mapping, 448 (xdrproc_t) xdr_u_short, &port, timo); 449 450 if (stat == RPC_SUCCESS) { 451 switch (ss.ss_family) { 452 case AF_INET: 453 ((struct sockaddr_in *)&ss)->sin_port = 454 htons(port); 455 break; 456 457 #ifdef INET6 458 case AF_INET6: 459 ((struct sockaddr_in6 *)&ss)->sin6_port = 460 htons(port); 461 break; 462 #endif 463 } 464 } 465 break; 466 default: 467 panic("invalid rpcvers %d", rpcvers); 468 } 469 /* 470 * We may have a positive response from the portmapper, but the NLM 471 * service was not found. Make sure we received a valid port. 472 */ 473 switch (ss.ss_family) { 474 case AF_INET: 475 port = ((struct sockaddr_in *)&ss)->sin_port; 476 break; 477 #ifdef INET6 478 case AF_INET6: 479 port = ((struct sockaddr_in6 *)&ss)->sin6_port; 480 break; 481 #endif 482 } 483 if (stat != RPC_SUCCESS || !port) { 484 /* 485 * If we were able to talk to rpcbind or portmap, but the udp 486 * variant wasn't available, ask about tcp. 487 * 488 * XXX - We could also check for a TCP portmapper, but 489 * if the host is running a portmapper at all, we should be able 490 * to hail it over UDP. 491 */ 492 if (stat == RPC_SUCCESS && !do_tcp) { 493 do_tcp = TRUE; 494 goto try_tcp; 495 } 496 497 /* Otherwise, bad news. */ 498 switch (ss.ss_family) { 499 case AF_INET: 500 sin4 = (struct sockaddr_in *)&ss; 501 inet_ntop(ss.ss_family, &sin4->sin_addr, 502 namebuf, sizeof namebuf); 503 NLM_ERR("NLM: failed to contact remote rpcbind, " 504 "stat = %d, host = %s, port = %d\n", 505 (int) stat, namebuf, htons(port)); 506 break; 507 #ifdef INET6 508 case AF_INET6: 509 sin6 = (struct sockaddr_in6 *)&ss; 510 inet_ntop(ss.ss_family, &sin6->sin6_addr, 511 namebuf6, sizeof namebuf6); 512 NLM_ERR("NLM: failed to contact remote rpcbind, " 513 "stat = %d, host = %s, port = %d\n", 514 (int) stat, namebuf6, htons(port)); 515 break; 516 #endif 517 } 518 CLNT_DESTROY(rpcb); 519 return (NULL); 520 } 521 522 if (do_tcp) { 523 /* 524 * Destroy the UDP client we used to speak to rpcbind and 525 * recreate as a TCP client. 526 */ 527 struct netconfig *nconf = NULL; 528 529 CLNT_DESTROY(rpcb); 530 531 switch (ss.ss_family) { 532 case AF_INET: 533 nconf = getnetconfigent("tcp"); 534 break; 535 #ifdef INET6 536 case AF_INET6: 537 nconf = getnetconfigent("tcp6"); 538 break; 539 #endif 540 } 541 542 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 543 prog, vers, 0, 0); 544 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 545 rpcb->cl_auth = nlm_auth; 546 547 } else { 548 /* 549 * Re-use the client we used to speak to rpcbind. 550 */ 551 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 552 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 553 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 554 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 555 rpcb->cl_auth = nlm_auth; 556 } 557 558 return (rpcb); 559 } 560 561 /* 562 * This async callback after when an async lock request has been 563 * granted. We notify the host which initiated the request. 564 */ 565 static void 566 nlm_lock_callback(void *arg, int pending) 567 { 568 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 569 struct rpc_callextra ext; 570 571 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted," 572 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 573 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 574 ng_cookie(&af->af_granted.cookie)); 575 576 /* 577 * Send the results back to the host. 578 * 579 * Note: there is a possible race here with nlm_host_notify 580 * destroying the RPC client. To avoid problems, the first 581 * thing nlm_host_notify does is to cancel pending async lock 582 * requests. 583 */ 584 memset(&ext, 0, sizeof(ext)); 585 ext.rc_auth = nlm_auth; 586 if (af->af_host->nh_vers == NLM_VERS4) { 587 nlm4_granted_msg_4(&af->af_granted, 588 NULL, af->af_rpc, &ext, nlm_zero_tv); 589 } else { 590 /* 591 * Back-convert to legacy protocol 592 */ 593 nlm_testargs granted; 594 granted.cookie = af->af_granted.cookie; 595 granted.exclusive = af->af_granted.exclusive; 596 granted.alock.caller_name = 597 af->af_granted.alock.caller_name; 598 granted.alock.fh = af->af_granted.alock.fh; 599 granted.alock.oh = af->af_granted.alock.oh; 600 granted.alock.svid = af->af_granted.alock.svid; 601 granted.alock.l_offset = 602 af->af_granted.alock.l_offset; 603 granted.alock.l_len = 604 af->af_granted.alock.l_len; 605 606 nlm_granted_msg_1(&granted, 607 NULL, af->af_rpc, &ext, nlm_zero_tv); 608 } 609 610 /* 611 * Move this entry to the nh_granted list. 612 */ 613 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT; 614 mtx_lock(&af->af_host->nh_lock); 615 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 616 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link); 617 mtx_unlock(&af->af_host->nh_lock); 618 } 619 620 /* 621 * Free an async lock request. The request must have been removed from 622 * any list. 623 */ 624 static void 625 nlm_free_async_lock(struct nlm_async_lock *af) 626 { 627 /* 628 * Free an async lock. 629 */ 630 if (af->af_rpc) 631 CLNT_RELEASE(af->af_rpc); 632 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 633 if (af->af_vp) 634 vrele(af->af_vp); 635 free(af, M_NLM); 636 } 637 638 /* 639 * Cancel our async request - this must be called with 640 * af->nh_host->nh_lock held. This is slightly complicated by a 641 * potential race with our own callback. If we fail to cancel the 642 * lock, it must already have been granted - we make sure our async 643 * task has completed by calling taskqueue_drain in this case. 644 */ 645 static int 646 nlm_cancel_async_lock(struct nlm_async_lock *af) 647 { 648 struct nlm_host *host = af->af_host; 649 int error; 650 651 mtx_assert(&host->nh_lock, MA_OWNED); 652 653 mtx_unlock(&host->nh_lock); 654 655 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 656 F_REMOTE, NULL, &af->af_cookie); 657 658 if (error) { 659 /* 660 * We failed to cancel - make sure our callback has 661 * completed before we continue. 662 */ 663 taskqueue_drain(taskqueue_thread, &af->af_task); 664 } 665 666 mtx_lock(&host->nh_lock); 667 668 if (!error) { 669 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 670 "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 671 672 /* 673 * Remove from the nh_pending list and free now that 674 * we are safe from the callback. 675 */ 676 TAILQ_REMOVE(&host->nh_pending, af, af_link); 677 mtx_unlock(&host->nh_lock); 678 nlm_free_async_lock(af); 679 mtx_lock(&host->nh_lock); 680 } 681 682 return (error); 683 } 684 685 static void 686 nlm_check_expired_locks(struct nlm_host *host) 687 { 688 struct nlm_async_lock *af; 689 time_t uptime = time_uptime; 690 691 mtx_lock(&host->nh_lock); 692 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL 693 && uptime >= af->af_expiretime) { 694 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired," 695 " cookie %d:%d\n", af, af->af_host->nh_caller_name, 696 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie), 697 ng_cookie(&af->af_granted.cookie)); 698 TAILQ_REMOVE(&host->nh_granted, af, af_link); 699 mtx_unlock(&host->nh_lock); 700 nlm_free_async_lock(af); 701 mtx_lock(&host->nh_lock); 702 } 703 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 704 TAILQ_REMOVE(&host->nh_finished, af, af_link); 705 mtx_unlock(&host->nh_lock); 706 nlm_free_async_lock(af); 707 mtx_lock(&host->nh_lock); 708 } 709 mtx_unlock(&host->nh_lock); 710 } 711 712 /* 713 * Free resources used by a host. This is called after the reference 714 * count has reached zero so it doesn't need to worry about locks. 715 */ 716 static void 717 nlm_host_destroy(struct nlm_host *host) 718 { 719 720 mtx_lock(&nlm_global_lock); 721 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 722 mtx_unlock(&nlm_global_lock); 723 724 if (host->nh_srvrpc.nr_client) 725 CLNT_RELEASE(host->nh_srvrpc.nr_client); 726 if (host->nh_clntrpc.nr_client) 727 CLNT_RELEASE(host->nh_clntrpc.nr_client); 728 mtx_destroy(&host->nh_lock); 729 sysctl_ctx_free(&host->nh_sysctl); 730 free(host, M_NLM); 731 } 732 733 /* 734 * Thread start callback for client lock recovery 735 */ 736 static void 737 nlm_client_recovery_start(void *arg) 738 { 739 struct nlm_host *host = (struct nlm_host *) arg; 740 741 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 742 host->nh_caller_name); 743 744 nlm_client_recovery(host); 745 746 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 747 host->nh_caller_name); 748 749 host->nh_monstate = NLM_MONITORED; 750 nlm_host_release(host); 751 752 kthread_exit(); 753 } 754 755 /* 756 * This is called when we receive a host state change notification. We 757 * unlock any active locks owned by the host. When rpc.lockd is 758 * shutting down, this function is called with newstate set to zero 759 * which allows us to cancel any pending async locks and clear the 760 * locking state. 761 */ 762 static void 763 nlm_host_notify(struct nlm_host *host, int newstate) 764 { 765 struct nlm_async_lock *af; 766 767 if (newstate) { 768 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 769 "state is %d\n", host->nh_caller_name, 770 host->nh_sysid, newstate); 771 } 772 773 /* 774 * Cancel any pending async locks for this host. 775 */ 776 mtx_lock(&host->nh_lock); 777 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 778 /* 779 * nlm_cancel_async_lock will remove the entry from 780 * nh_pending and free it. 781 */ 782 nlm_cancel_async_lock(af); 783 } 784 mtx_unlock(&host->nh_lock); 785 nlm_check_expired_locks(host); 786 787 /* 788 * The host just rebooted - trash its locks. 789 */ 790 lf_clearremotesys(host->nh_sysid); 791 host->nh_state = newstate; 792 793 /* 794 * If we have any remote locks for this host (i.e. it 795 * represents a remote NFS server that our local NFS client 796 * has locks for), start a recovery thread. 797 */ 798 if (newstate != 0 799 && host->nh_monstate != NLM_RECOVERING 800 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 801 struct thread *td; 802 host->nh_monstate = NLM_RECOVERING; 803 refcount_acquire(&host->nh_refs); 804 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 805 "NFS lock recovery for %s", host->nh_caller_name); 806 } 807 } 808 809 /* 810 * Sysctl handler to count the number of locks for a sysid. 811 */ 812 static int 813 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 814 { 815 struct nlm_host *host; 816 int count; 817 818 host = oidp->oid_arg1; 819 count = lf_countlocks(host->nh_sysid); 820 return sysctl_handle_int(oidp, &count, 0, req); 821 } 822 823 /* 824 * Sysctl handler to count the number of client locks for a sysid. 825 */ 826 static int 827 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 828 { 829 struct nlm_host *host; 830 int count; 831 832 host = oidp->oid_arg1; 833 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 834 return sysctl_handle_int(oidp, &count, 0, req); 835 } 836 837 /* 838 * Create a new NLM host. 839 */ 840 static struct nlm_host * 841 nlm_create_host(const char* caller_name) 842 { 843 struct nlm_host *host; 844 struct sysctl_oid *oid; 845 846 mtx_assert(&nlm_global_lock, MA_OWNED); 847 848 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 849 caller_name, nlm_next_sysid); 850 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 851 if (!host) 852 return (NULL); 853 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 854 refcount_init(&host->nh_refs, 1); 855 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 856 host->nh_sysid = nlm_next_sysid++; 857 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 858 "%d", host->nh_sysid); 859 host->nh_vers = 0; 860 host->nh_state = 0; 861 host->nh_monstate = NLM_UNMONITORED; 862 host->nh_grantcookie = 1; 863 TAILQ_INIT(&host->nh_pending); 864 TAILQ_INIT(&host->nh_granted); 865 TAILQ_INIT(&host->nh_finished); 866 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 867 868 mtx_unlock(&nlm_global_lock); 869 870 sysctl_ctx_init(&host->nh_sysctl); 871 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 872 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 873 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD | CTLFLAG_MPSAFE, 874 NULL, ""); 875 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 876 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 877 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 878 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 879 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 880 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 881 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 882 "lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, host, 883 0, nlm_host_lock_count_sysctl, "I", ""); 884 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 885 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 886 host, 0, nlm_host_client_lock_count_sysctl, "I", ""); 887 888 mtx_lock(&nlm_global_lock); 889 890 return (host); 891 } 892 893 /* 894 * Acquire the next sysid for remote locks not handled by the NLM. 895 */ 896 uint32_t 897 nlm_acquire_next_sysid(void) 898 { 899 uint32_t next_sysid; 900 901 mtx_lock(&nlm_global_lock); 902 next_sysid = nlm_next_sysid++; 903 mtx_unlock(&nlm_global_lock); 904 return (next_sysid); 905 } 906 907 /* 908 * Return non-zero if the address parts of the two sockaddrs are the 909 * same. 910 */ 911 static int 912 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 913 { 914 const struct sockaddr_in *a4, *b4; 915 #ifdef INET6 916 const struct sockaddr_in6 *a6, *b6; 917 #endif 918 919 if (a->sa_family != b->sa_family) 920 return (FALSE); 921 922 switch (a->sa_family) { 923 case AF_INET: 924 a4 = (const struct sockaddr_in *) a; 925 b4 = (const struct sockaddr_in *) b; 926 return !memcmp(&a4->sin_addr, &b4->sin_addr, 927 sizeof(a4->sin_addr)); 928 #ifdef INET6 929 case AF_INET6: 930 a6 = (const struct sockaddr_in6 *) a; 931 b6 = (const struct sockaddr_in6 *) b; 932 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 933 sizeof(a6->sin6_addr)); 934 #endif 935 } 936 937 return (0); 938 } 939 940 /* 941 * Check for idle hosts and stop monitoring them. We could also free 942 * the host structure here, possibly after a larger timeout but that 943 * would require some care to avoid races with 944 * e.g. nlm_host_lock_count_sysctl. 945 */ 946 static void 947 nlm_check_idle(void) 948 { 949 struct nlm_host *host; 950 951 mtx_assert(&nlm_global_lock, MA_OWNED); 952 953 if (time_uptime <= nlm_next_idle_check) 954 return; 955 956 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 957 958 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 959 if (host->nh_monstate == NLM_MONITORED 960 && time_uptime > host->nh_idle_timeout) { 961 mtx_unlock(&nlm_global_lock); 962 if (lf_countlocks(host->nh_sysid) > 0 963 || lf_countlocks(NLM_SYSID_CLIENT 964 + host->nh_sysid)) { 965 host->nh_idle_timeout = 966 time_uptime + NLM_IDLE_TIMEOUT; 967 mtx_lock(&nlm_global_lock); 968 continue; 969 } 970 nlm_host_unmonitor(host); 971 mtx_lock(&nlm_global_lock); 972 } 973 } 974 } 975 976 /* 977 * Search for an existing NLM host that matches the given name 978 * (typically the caller_name element of an nlm4_lock). If none is 979 * found, create a new host. If 'addr' is non-NULL, record the remote 980 * address of the host so that we can call it back for async 981 * responses. If 'vers' is greater than zero then record the NLM 982 * program version to use to communicate with this client. 983 */ 984 struct nlm_host * 985 nlm_find_host_by_name(const char *name, const struct sockaddr *addr, 986 rpcvers_t vers) 987 { 988 struct nlm_host *host; 989 990 mtx_lock(&nlm_global_lock); 991 992 /* 993 * The remote host is determined by caller_name. 994 */ 995 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 996 if (!strcmp(host->nh_caller_name, name)) 997 break; 998 } 999 1000 if (!host) { 1001 host = nlm_create_host(name); 1002 if (!host) { 1003 mtx_unlock(&nlm_global_lock); 1004 return (NULL); 1005 } 1006 } 1007 refcount_acquire(&host->nh_refs); 1008 1009 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1010 1011 /* 1012 * If we have an address for the host, record it so that we 1013 * can send async replies etc. 1014 */ 1015 if (addr) { 1016 1017 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 1018 ("Strange remote transport address length")); 1019 1020 /* 1021 * If we have seen an address before and we currently 1022 * have an RPC client handle, make sure the address is 1023 * the same, otherwise discard the client handle. 1024 */ 1025 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 1026 if (!nlm_compare_addr( 1027 (struct sockaddr *) &host->nh_addr, 1028 addr) 1029 || host->nh_vers != vers) { 1030 CLIENT *client; 1031 mtx_lock(&host->nh_lock); 1032 client = host->nh_srvrpc.nr_client; 1033 host->nh_srvrpc.nr_client = NULL; 1034 mtx_unlock(&host->nh_lock); 1035 if (client) { 1036 CLNT_RELEASE(client); 1037 } 1038 } 1039 } 1040 memcpy(&host->nh_addr, addr, addr->sa_len); 1041 host->nh_vers = vers; 1042 } 1043 1044 nlm_check_idle(); 1045 1046 mtx_unlock(&nlm_global_lock); 1047 1048 return (host); 1049 } 1050 1051 /* 1052 * Search for an existing NLM host that matches the given remote 1053 * address. If none is found, create a new host with the requested 1054 * address and remember 'vers' as the NLM protocol version to use for 1055 * that host. 1056 */ 1057 struct nlm_host * 1058 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1059 { 1060 /* 1061 * Fake up a name using inet_ntop. This buffer is 1062 * large enough for an IPv6 address. 1063 */ 1064 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1065 struct nlm_host *host; 1066 1067 switch (addr->sa_family) { 1068 case AF_INET: 1069 inet_ntop(AF_INET, 1070 &((const struct sockaddr_in *) addr)->sin_addr, 1071 tmp, sizeof tmp); 1072 break; 1073 #ifdef INET6 1074 case AF_INET6: 1075 inet_ntop(AF_INET6, 1076 &((const struct sockaddr_in6 *) addr)->sin6_addr, 1077 tmp, sizeof tmp); 1078 break; 1079 #endif 1080 default: 1081 strlcpy(tmp, "<unknown>", sizeof(tmp)); 1082 } 1083 1084 mtx_lock(&nlm_global_lock); 1085 1086 /* 1087 * The remote host is determined by caller_name. 1088 */ 1089 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1090 if (nlm_compare_addr(addr, 1091 (const struct sockaddr *) &host->nh_addr)) 1092 break; 1093 } 1094 1095 if (!host) { 1096 host = nlm_create_host(tmp); 1097 if (!host) { 1098 mtx_unlock(&nlm_global_lock); 1099 return (NULL); 1100 } 1101 memcpy(&host->nh_addr, addr, addr->sa_len); 1102 host->nh_vers = vers; 1103 } 1104 refcount_acquire(&host->nh_refs); 1105 1106 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1107 1108 nlm_check_idle(); 1109 1110 mtx_unlock(&nlm_global_lock); 1111 1112 return (host); 1113 } 1114 1115 /* 1116 * Find the NLM host that matches the value of 'sysid'. If none 1117 * exists, return NULL. 1118 */ 1119 static struct nlm_host * 1120 nlm_find_host_by_sysid(int sysid) 1121 { 1122 struct nlm_host *host; 1123 1124 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1125 if (host->nh_sysid == sysid) { 1126 refcount_acquire(&host->nh_refs); 1127 return (host); 1128 } 1129 } 1130 1131 return (NULL); 1132 } 1133 1134 void nlm_host_release(struct nlm_host *host) 1135 { 1136 if (refcount_release(&host->nh_refs)) { 1137 /* 1138 * Free the host 1139 */ 1140 nlm_host_destroy(host); 1141 } 1142 } 1143 1144 /* 1145 * Unregister this NLM host with the local NSM due to idleness. 1146 */ 1147 static void 1148 nlm_host_unmonitor(struct nlm_host *host) 1149 { 1150 mon_id smmonid; 1151 sm_stat_res smstat; 1152 struct timeval timo; 1153 enum clnt_stat stat; 1154 1155 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1156 host->nh_caller_name, host->nh_sysid); 1157 1158 /* 1159 * We put our assigned system ID value in the priv field to 1160 * make it simpler to find the host if we are notified of a 1161 * host restart. 1162 */ 1163 smmonid.mon_name = host->nh_caller_name; 1164 smmonid.my_id.my_name = "localhost"; 1165 smmonid.my_id.my_prog = NLM_PROG; 1166 smmonid.my_id.my_vers = NLM_SM; 1167 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1168 1169 timo.tv_sec = 25; 1170 timo.tv_usec = 0; 1171 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1172 (xdrproc_t) xdr_mon, &smmonid, 1173 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1174 1175 if (stat != RPC_SUCCESS) { 1176 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1177 return; 1178 } 1179 if (smstat.res_stat == stat_fail) { 1180 NLM_ERR("Local NSM refuses to unmonitor %s\n", 1181 host->nh_caller_name); 1182 return; 1183 } 1184 1185 host->nh_monstate = NLM_UNMONITORED; 1186 } 1187 1188 /* 1189 * Register this NLM host with the local NSM so that we can be 1190 * notified if it reboots. 1191 */ 1192 void 1193 nlm_host_monitor(struct nlm_host *host, int state) 1194 { 1195 mon smmon; 1196 sm_stat_res smstat; 1197 struct timeval timo; 1198 enum clnt_stat stat; 1199 1200 if (state && !host->nh_state) { 1201 /* 1202 * This is the first time we have seen an NSM state 1203 * value for this host. We record it here to help 1204 * detect host reboots. 1205 */ 1206 host->nh_state = state; 1207 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1208 host->nh_caller_name, host->nh_sysid, state); 1209 } 1210 1211 mtx_lock(&host->nh_lock); 1212 if (host->nh_monstate != NLM_UNMONITORED) { 1213 mtx_unlock(&host->nh_lock); 1214 return; 1215 } 1216 host->nh_monstate = NLM_MONITORED; 1217 mtx_unlock(&host->nh_lock); 1218 1219 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1220 host->nh_caller_name, host->nh_sysid); 1221 1222 /* 1223 * We put our assigned system ID value in the priv field to 1224 * make it simpler to find the host if we are notified of a 1225 * host restart. 1226 */ 1227 smmon.mon_id.mon_name = host->nh_caller_name; 1228 smmon.mon_id.my_id.my_name = "localhost"; 1229 smmon.mon_id.my_id.my_prog = NLM_PROG; 1230 smmon.mon_id.my_id.my_vers = NLM_SM; 1231 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1232 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1233 1234 timo.tv_sec = 25; 1235 timo.tv_usec = 0; 1236 stat = CLNT_CALL(nlm_nsm, SM_MON, 1237 (xdrproc_t) xdr_mon, &smmon, 1238 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1239 1240 if (stat != RPC_SUCCESS) { 1241 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1242 return; 1243 } 1244 if (smstat.res_stat == stat_fail) { 1245 NLM_ERR("Local NSM refuses to monitor %s\n", 1246 host->nh_caller_name); 1247 mtx_lock(&host->nh_lock); 1248 host->nh_monstate = NLM_MONITOR_FAILED; 1249 mtx_unlock(&host->nh_lock); 1250 return; 1251 } 1252 1253 host->nh_monstate = NLM_MONITORED; 1254 } 1255 1256 /* 1257 * Return an RPC client handle that can be used to talk to the NLM 1258 * running on the given host. 1259 */ 1260 CLIENT * 1261 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1262 { 1263 struct nlm_rpc *rpc; 1264 CLIENT *client; 1265 1266 mtx_lock(&host->nh_lock); 1267 1268 if (isserver) 1269 rpc = &host->nh_srvrpc; 1270 else 1271 rpc = &host->nh_clntrpc; 1272 1273 /* 1274 * We can't hold onto RPC handles for too long - the async 1275 * call/reply protocol used by some NLM clients makes it hard 1276 * to tell when they change port numbers (e.g. after a 1277 * reboot). Note that if a client reboots while it isn't 1278 * holding any locks, it won't bother to notify us. We 1279 * expire the RPC handles after two minutes. 1280 */ 1281 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1282 client = rpc->nr_client; 1283 rpc->nr_client = NULL; 1284 mtx_unlock(&host->nh_lock); 1285 CLNT_RELEASE(client); 1286 mtx_lock(&host->nh_lock); 1287 } 1288 1289 if (!rpc->nr_client) { 1290 mtx_unlock(&host->nh_lock); 1291 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1292 NLM_PROG, host->nh_vers); 1293 mtx_lock(&host->nh_lock); 1294 1295 if (client) { 1296 if (rpc->nr_client) { 1297 mtx_unlock(&host->nh_lock); 1298 CLNT_DESTROY(client); 1299 mtx_lock(&host->nh_lock); 1300 } else { 1301 rpc->nr_client = client; 1302 rpc->nr_create_time = time_uptime; 1303 } 1304 } 1305 } 1306 1307 client = rpc->nr_client; 1308 if (client) 1309 CLNT_ACQUIRE(client); 1310 mtx_unlock(&host->nh_lock); 1311 1312 return (client); 1313 1314 } 1315 1316 int nlm_host_get_sysid(struct nlm_host *host) 1317 { 1318 1319 return (host->nh_sysid); 1320 } 1321 1322 int 1323 nlm_host_get_state(struct nlm_host *host) 1324 { 1325 1326 return (host->nh_state); 1327 } 1328 1329 void * 1330 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1331 { 1332 struct nlm_waiting_lock *nw; 1333 1334 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1335 nw->nw_lock = *lock; 1336 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1337 nw->nw_lock.fh.n_len); 1338 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1339 nw->nw_waiting = TRUE; 1340 nw->nw_vp = vp; 1341 mtx_lock(&nlm_global_lock); 1342 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1343 mtx_unlock(&nlm_global_lock); 1344 1345 return nw; 1346 } 1347 1348 void 1349 nlm_deregister_wait_lock(void *handle) 1350 { 1351 struct nlm_waiting_lock *nw = handle; 1352 1353 mtx_lock(&nlm_global_lock); 1354 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1355 mtx_unlock(&nlm_global_lock); 1356 1357 free(nw, M_NLM); 1358 } 1359 1360 int 1361 nlm_wait_lock(void *handle, int timo) 1362 { 1363 struct nlm_waiting_lock *nw = handle; 1364 int error, stops_deferred; 1365 1366 /* 1367 * If the granted message arrived before we got here, 1368 * nw->nw_waiting will be FALSE - in that case, don't sleep. 1369 */ 1370 mtx_lock(&nlm_global_lock); 1371 error = 0; 1372 if (nw->nw_waiting) { 1373 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART); 1374 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1375 sigallowstop(stops_deferred); 1376 } 1377 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1378 if (error) { 1379 /* 1380 * The granted message may arrive after the 1381 * interrupt/timeout but before we manage to lock the 1382 * mutex. Detect this by examining nw_lock. 1383 */ 1384 if (!nw->nw_waiting) 1385 error = 0; 1386 } else { 1387 /* 1388 * If nlm_cancel_wait is called, then error will be 1389 * zero but nw_waiting will still be TRUE. We 1390 * translate this into EINTR. 1391 */ 1392 if (nw->nw_waiting) 1393 error = EINTR; 1394 } 1395 mtx_unlock(&nlm_global_lock); 1396 1397 free(nw, M_NLM); 1398 1399 return (error); 1400 } 1401 1402 void 1403 nlm_cancel_wait(struct vnode *vp) 1404 { 1405 struct nlm_waiting_lock *nw; 1406 1407 mtx_lock(&nlm_global_lock); 1408 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1409 if (nw->nw_vp == vp) { 1410 wakeup(nw); 1411 } 1412 } 1413 mtx_unlock(&nlm_global_lock); 1414 } 1415 1416 /**********************************************************************/ 1417 1418 /* 1419 * Syscall interface with userland. 1420 */ 1421 1422 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1423 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1424 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1425 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1426 1427 static int 1428 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1429 { 1430 static rpcvers_t versions[] = { 1431 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1432 }; 1433 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1434 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1435 }; 1436 1437 SVCXPRT **xprts; 1438 char netid[16]; 1439 char uaddr[128]; 1440 struct netconfig *nconf; 1441 int i, j, error; 1442 1443 if (!addr_count) { 1444 NLM_ERR("NLM: no service addresses given - can't start server"); 1445 return (EINVAL); 1446 } 1447 1448 if (addr_count < 0 || addr_count > 256 ) { 1449 NLM_ERR("NLM: too many service addresses (%d) given, " 1450 "max 256 - can't start server\n", addr_count); 1451 return (EINVAL); 1452 } 1453 1454 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1455 for (i = 0; i < nitems(versions); i++) { 1456 for (j = 0; j < addr_count; j++) { 1457 /* 1458 * Create transports for the first version and 1459 * then just register everything else to the 1460 * same transports. 1461 */ 1462 if (i == 0) { 1463 char *up; 1464 1465 error = copyin(&addrs[2*j], &up, 1466 sizeof(char*)); 1467 if (error) 1468 goto out; 1469 error = copyinstr(up, netid, sizeof(netid), 1470 NULL); 1471 if (error) 1472 goto out; 1473 error = copyin(&addrs[2*j+1], &up, 1474 sizeof(char*)); 1475 if (error) 1476 goto out; 1477 error = copyinstr(up, uaddr, sizeof(uaddr), 1478 NULL); 1479 if (error) 1480 goto out; 1481 nconf = getnetconfigent(netid); 1482 if (!nconf) { 1483 NLM_ERR("Can't lookup netid %s\n", 1484 netid); 1485 error = EINVAL; 1486 goto out; 1487 } 1488 xprts[j] = svc_tp_create(pool, dispatchers[i], 1489 NLM_PROG, versions[i], uaddr, nconf); 1490 if (!xprts[j]) { 1491 NLM_ERR("NLM: unable to create " 1492 "(NLM_PROG, %d).\n", versions[i]); 1493 error = EINVAL; 1494 goto out; 1495 } 1496 freenetconfigent(nconf); 1497 } else { 1498 nconf = getnetconfigent(xprts[j]->xp_netid); 1499 rpcb_unset(NLM_PROG, versions[i], nconf); 1500 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1501 dispatchers[i], nconf)) { 1502 NLM_ERR("NLM: can't register " 1503 "(NLM_PROG, %d)\n", versions[i]); 1504 error = EINVAL; 1505 goto out; 1506 } 1507 } 1508 } 1509 } 1510 error = 0; 1511 out: 1512 for (j = 0; j < addr_count; j++) { 1513 if (xprts[j]) 1514 SVC_RELEASE(xprts[j]); 1515 } 1516 free(xprts, M_NLM); 1517 return (error); 1518 } 1519 1520 /* 1521 * Main server entry point. Contacts the local NSM to get its current 1522 * state and send SM_UNMON_ALL. Registers the NLM services and then 1523 * services requests. Does not return until the server is interrupted 1524 * by a signal. 1525 */ 1526 static int 1527 nlm_server_main(int addr_count, char **addrs) 1528 { 1529 struct thread *td = curthread; 1530 int error; 1531 SVCPOOL *pool = NULL; 1532 struct sockopt opt; 1533 int portlow; 1534 #ifdef INET6 1535 struct sockaddr_in6 sin6; 1536 #endif 1537 struct sockaddr_in sin; 1538 my_id id; 1539 sm_stat smstat; 1540 struct timeval timo; 1541 enum clnt_stat stat; 1542 struct nlm_host *host, *nhost; 1543 struct nlm_waiting_lock *nw; 1544 vop_advlock_t *old_nfs_advlock; 1545 vop_reclaim_t *old_nfs_reclaim; 1546 1547 if (nlm_is_running != 0) { 1548 NLM_ERR("NLM: can't start server - " 1549 "it appears to be running already\n"); 1550 return (EPERM); 1551 } 1552 1553 if (nlm_socket == NULL) { 1554 memset(&opt, 0, sizeof(opt)); 1555 1556 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1557 td->td_ucred, td); 1558 if (error) { 1559 NLM_ERR("NLM: can't create IPv4 socket - error %d\n", 1560 error); 1561 return (error); 1562 } 1563 opt.sopt_dir = SOPT_SET; 1564 opt.sopt_level = IPPROTO_IP; 1565 opt.sopt_name = IP_PORTRANGE; 1566 portlow = IP_PORTRANGE_LOW; 1567 opt.sopt_val = &portlow; 1568 opt.sopt_valsize = sizeof(portlow); 1569 sosetopt(nlm_socket, &opt); 1570 1571 #ifdef INET6 1572 nlm_socket6 = NULL; 1573 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1574 td->td_ucred, td); 1575 if (error) { 1576 NLM_ERR("NLM: can't create IPv6 socket - error %d\n", 1577 error); 1578 soclose(nlm_socket); 1579 nlm_socket = NULL; 1580 return (error); 1581 } 1582 opt.sopt_dir = SOPT_SET; 1583 opt.sopt_level = IPPROTO_IPV6; 1584 opt.sopt_name = IPV6_PORTRANGE; 1585 portlow = IPV6_PORTRANGE_LOW; 1586 opt.sopt_val = &portlow; 1587 opt.sopt_valsize = sizeof(portlow); 1588 sosetopt(nlm_socket6, &opt); 1589 #endif 1590 } 1591 1592 nlm_auth = authunix_create(curthread->td_ucred); 1593 1594 #ifdef INET6 1595 memset(&sin6, 0, sizeof(sin6)); 1596 sin6.sin6_len = sizeof(sin6); 1597 sin6.sin6_family = AF_INET6; 1598 sin6.sin6_addr = in6addr_loopback; 1599 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1600 if (!nlm_nsm) { 1601 #endif 1602 memset(&sin, 0, sizeof(sin)); 1603 sin.sin_len = sizeof(sin); 1604 sin.sin_family = AF_INET; 1605 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1606 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1607 SM_VERS); 1608 #ifdef INET6 1609 } 1610 #endif 1611 1612 if (!nlm_nsm) { 1613 NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1614 error = EINVAL; 1615 goto out; 1616 } 1617 1618 pool = svcpool_create("NLM", NULL); 1619 1620 error = nlm_register_services(pool, addr_count, addrs); 1621 if (error) 1622 goto out; 1623 1624 memset(&id, 0, sizeof(id)); 1625 id.my_name = "NFS NLM"; 1626 1627 timo.tv_sec = 25; 1628 timo.tv_usec = 0; 1629 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1630 (xdrproc_t) xdr_my_id, &id, 1631 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1632 1633 if (stat != RPC_SUCCESS) { 1634 struct rpc_err err; 1635 1636 CLNT_GETERR(nlm_nsm, &err); 1637 NLM_ERR("NLM: unexpected error contacting NSM, " 1638 "stat=%d, errno=%d\n", stat, err.re_errno); 1639 error = EINVAL; 1640 goto out; 1641 } 1642 nlm_is_running = 1; 1643 1644 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1645 nlm_nsm_state = smstat.state; 1646 1647 old_nfs_advlock = nfs_advlock_p; 1648 nfs_advlock_p = nlm_advlock; 1649 old_nfs_reclaim = nfs_reclaim_p; 1650 nfs_reclaim_p = nlm_reclaim; 1651 1652 svc_run(pool); 1653 error = 0; 1654 1655 nfs_advlock_p = old_nfs_advlock; 1656 nfs_reclaim_p = old_nfs_reclaim; 1657 1658 out: 1659 nlm_is_running = 0; 1660 if (pool) 1661 svcpool_destroy(pool); 1662 1663 /* 1664 * We are finished communicating with the NSM. 1665 */ 1666 if (nlm_nsm) { 1667 CLNT_RELEASE(nlm_nsm); 1668 nlm_nsm = NULL; 1669 } 1670 1671 /* 1672 * Trash all the existing state so that if the server 1673 * restarts, it gets a clean slate. This is complicated by the 1674 * possibility that there may be other threads trying to make 1675 * client locking requests. 1676 * 1677 * First we fake a client reboot notification which will 1678 * cancel any pending async locks and purge remote lock state 1679 * from the local lock manager. We release the reference from 1680 * nlm_hosts to the host (which may remove it from the list 1681 * and free it). After this phase, the only entries in the 1682 * nlm_host list should be from other threads performing 1683 * client lock requests. 1684 */ 1685 mtx_lock(&nlm_global_lock); 1686 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1687 wakeup(nw); 1688 } 1689 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1690 mtx_unlock(&nlm_global_lock); 1691 nlm_host_notify(host, 0); 1692 nlm_host_release(host); 1693 mtx_lock(&nlm_global_lock); 1694 } 1695 mtx_unlock(&nlm_global_lock); 1696 1697 AUTH_DESTROY(nlm_auth); 1698 1699 return (error); 1700 } 1701 1702 int 1703 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1704 { 1705 int error; 1706 1707 error = priv_check(td, PRIV_NFS_LOCKD); 1708 if (error) 1709 return (error); 1710 1711 nlm_debug_level = uap->debug_level; 1712 nlm_grace_threshold = time_uptime + uap->grace_period; 1713 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1714 1715 CURVNET_SET(TD_TO_VNET(td)); 1716 error = nlm_server_main(uap->addr_count, uap->addrs); 1717 CURVNET_RESTORE(); 1718 1719 return (error); 1720 } 1721 1722 /**********************************************************************/ 1723 1724 /* 1725 * NLM implementation details, called from the RPC stubs. 1726 */ 1727 1728 void 1729 nlm_sm_notify(struct nlm_sm_status *argp) 1730 { 1731 uint32_t sysid; 1732 struct nlm_host *host; 1733 1734 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1735 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1736 host = nlm_find_host_by_sysid(sysid); 1737 if (host) { 1738 nlm_host_notify(host, argp->state); 1739 nlm_host_release(host); 1740 } 1741 } 1742 1743 static void 1744 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1745 { 1746 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1747 } 1748 1749 struct vfs_state { 1750 struct mount *vs_mp; 1751 struct vnode *vs_vp; 1752 int vs_vnlocked; 1753 }; 1754 1755 static int 1756 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1757 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode) 1758 { 1759 int error; 1760 uint64_t exflags; 1761 struct ucred *cred = NULL, *credanon = NULL; 1762 1763 memset(vs, 0, sizeof(*vs)); 1764 1765 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1766 if (!vs->vs_mp) { 1767 return (ESTALE); 1768 } 1769 1770 /* accmode == 0 means don't check, since it is an unlock. */ 1771 if (accmode != 0) { 1772 error = VFS_CHECKEXP(vs->vs_mp, 1773 (struct sockaddr *)&host->nh_addr, &exflags, &credanon, 1774 NULL, NULL); 1775 if (error) 1776 goto out; 1777 1778 if (exflags & MNT_EXRDONLY || 1779 (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1780 error = EROFS; 1781 goto out; 1782 } 1783 } 1784 1785 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp); 1786 if (error) 1787 goto out; 1788 vs->vs_vnlocked = TRUE; 1789 1790 if (accmode != 0) { 1791 if (!svc_getcred(rqstp, &cred, NULL)) { 1792 error = EINVAL; 1793 goto out; 1794 } 1795 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1796 crfree(cred); 1797 cred = credanon; 1798 credanon = NULL; 1799 } 1800 1801 /* 1802 * Check cred. 1803 */ 1804 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread); 1805 /* 1806 * If this failed and accmode != VWRITE, try again with 1807 * VWRITE to maintain backwards compatibility with the 1808 * old code that always used VWRITE. 1809 */ 1810 if (error != 0 && accmode != VWRITE) 1811 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1812 if (error) 1813 goto out; 1814 } 1815 1816 VOP_UNLOCK(vs->vs_vp); 1817 vs->vs_vnlocked = FALSE; 1818 1819 out: 1820 if (cred) 1821 crfree(cred); 1822 if (credanon) 1823 crfree(credanon); 1824 1825 return (error); 1826 } 1827 1828 static void 1829 nlm_release_vfs_state(struct vfs_state *vs) 1830 { 1831 1832 if (vs->vs_vp) { 1833 if (vs->vs_vnlocked) 1834 vput(vs->vs_vp); 1835 else 1836 vrele(vs->vs_vp); 1837 } 1838 if (vs->vs_mp) 1839 vfs_rel(vs->vs_mp); 1840 } 1841 1842 static nlm4_stats 1843 nlm_convert_error(int error) 1844 { 1845 1846 if (error == ESTALE) 1847 return nlm4_stale_fh; 1848 else if (error == EROFS) 1849 return nlm4_rofs; 1850 else 1851 return nlm4_failed; 1852 } 1853 1854 int 1855 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1856 CLIENT **rpcp) 1857 { 1858 fhandle_t fh; 1859 struct vfs_state vs; 1860 struct nlm_host *host, *bhost; 1861 int error, sysid; 1862 struct flock fl; 1863 accmode_t accmode; 1864 1865 memset(result, 0, sizeof(*result)); 1866 memset(&vs, 0, sizeof(vs)); 1867 1868 host = nlm_find_host_by_name(argp->alock.caller_name, 1869 svc_getrpccaller(rqstp), rqstp->rq_vers); 1870 if (!host) { 1871 result->stat.stat = nlm4_denied_nolocks; 1872 return (ENOMEM); 1873 } 1874 1875 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1876 host->nh_caller_name, host->nh_sysid); 1877 1878 nlm_check_expired_locks(host); 1879 sysid = host->nh_sysid; 1880 1881 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1882 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1883 1884 if (time_uptime < nlm_grace_threshold) { 1885 result->stat.stat = nlm4_denied_grace_period; 1886 goto out; 1887 } 1888 1889 accmode = argp->exclusive ? VWRITE : VREAD; 1890 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1891 if (error) { 1892 result->stat.stat = nlm_convert_error(error); 1893 goto out; 1894 } 1895 1896 fl.l_start = argp->alock.l_offset; 1897 fl.l_len = argp->alock.l_len; 1898 fl.l_pid = argp->alock.svid; 1899 fl.l_sysid = sysid; 1900 fl.l_whence = SEEK_SET; 1901 if (argp->exclusive) 1902 fl.l_type = F_WRLCK; 1903 else 1904 fl.l_type = F_RDLCK; 1905 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1906 if (error) { 1907 result->stat.stat = nlm4_failed; 1908 goto out; 1909 } 1910 1911 if (fl.l_type == F_UNLCK) { 1912 result->stat.stat = nlm4_granted; 1913 } else { 1914 result->stat.stat = nlm4_denied; 1915 result->stat.nlm4_testrply_u.holder.exclusive = 1916 (fl.l_type == F_WRLCK); 1917 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1918 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1919 if (bhost) { 1920 /* 1921 * We don't have any useful way of recording 1922 * the value of oh used in the original lock 1923 * request. Ideally, the test reply would have 1924 * a space for the owning host's name allowing 1925 * our caller's NLM to keep track. 1926 * 1927 * As far as I can see, Solaris uses an eight 1928 * byte structure for oh which contains a four 1929 * byte pid encoded in local byte order and 1930 * the first four bytes of the host 1931 * name. Linux uses a variable length string 1932 * 'pid@hostname' in ascii but doesn't even 1933 * return that in test replies. 1934 * 1935 * For the moment, return nothing in oh 1936 * (already zero'ed above). 1937 */ 1938 nlm_host_release(bhost); 1939 } 1940 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1941 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1942 } 1943 1944 out: 1945 nlm_release_vfs_state(&vs); 1946 if (rpcp) 1947 *rpcp = nlm_host_get_rpc(host, TRUE); 1948 nlm_host_release(host); 1949 return (0); 1950 } 1951 1952 int 1953 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1954 bool_t monitor, CLIENT **rpcp) 1955 { 1956 fhandle_t fh; 1957 struct vfs_state vs; 1958 struct nlm_host *host; 1959 int error, sysid; 1960 struct flock fl; 1961 accmode_t accmode; 1962 1963 memset(result, 0, sizeof(*result)); 1964 memset(&vs, 0, sizeof(vs)); 1965 1966 host = nlm_find_host_by_name(argp->alock.caller_name, 1967 svc_getrpccaller(rqstp), rqstp->rq_vers); 1968 if (!host) { 1969 result->stat.stat = nlm4_denied_nolocks; 1970 return (ENOMEM); 1971 } 1972 1973 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1974 host->nh_caller_name, host->nh_sysid); 1975 1976 if (monitor && host->nh_state && argp->state 1977 && host->nh_state != argp->state) { 1978 /* 1979 * The host rebooted without telling us. Trash its 1980 * locks. 1981 */ 1982 nlm_host_notify(host, argp->state); 1983 } 1984 1985 nlm_check_expired_locks(host); 1986 sysid = host->nh_sysid; 1987 1988 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1989 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1990 1991 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1992 result->stat.stat = nlm4_denied_grace_period; 1993 goto out; 1994 } 1995 1996 accmode = argp->exclusive ? VWRITE : VREAD; 1997 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode); 1998 if (error) { 1999 result->stat.stat = nlm_convert_error(error); 2000 goto out; 2001 } 2002 2003 fl.l_start = argp->alock.l_offset; 2004 fl.l_len = argp->alock.l_len; 2005 fl.l_pid = argp->alock.svid; 2006 fl.l_sysid = sysid; 2007 fl.l_whence = SEEK_SET; 2008 if (argp->exclusive) 2009 fl.l_type = F_WRLCK; 2010 else 2011 fl.l_type = F_RDLCK; 2012 if (argp->block) { 2013 struct nlm_async_lock *af; 2014 CLIENT *client; 2015 struct nlm_grantcookie cookie; 2016 2017 /* 2018 * First, make sure we can contact the host's NLM. 2019 */ 2020 client = nlm_host_get_rpc(host, TRUE); 2021 if (!client) { 2022 result->stat.stat = nlm4_failed; 2023 goto out; 2024 } 2025 2026 /* 2027 * First we need to check and see if there is an 2028 * existing blocked lock that matches. This could be a 2029 * badly behaved client or an RPC re-send. If we find 2030 * one, just return nlm4_blocked. 2031 */ 2032 mtx_lock(&host->nh_lock); 2033 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2034 if (af->af_fl.l_start == fl.l_start 2035 && af->af_fl.l_len == fl.l_len 2036 && af->af_fl.l_pid == fl.l_pid 2037 && af->af_fl.l_type == fl.l_type) { 2038 break; 2039 } 2040 } 2041 if (!af) { 2042 cookie.ng_sysid = host->nh_sysid; 2043 cookie.ng_cookie = host->nh_grantcookie++; 2044 } 2045 mtx_unlock(&host->nh_lock); 2046 if (af) { 2047 CLNT_RELEASE(client); 2048 result->stat.stat = nlm4_blocked; 2049 goto out; 2050 } 2051 2052 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2053 M_WAITOK|M_ZERO); 2054 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2055 af->af_vp = vs.vs_vp; 2056 af->af_fl = fl; 2057 af->af_host = host; 2058 af->af_rpc = client; 2059 /* 2060 * We use M_RPC here so that we can xdr_free the thing 2061 * later. 2062 */ 2063 nlm_make_netobj(&af->af_granted.cookie, 2064 (caddr_t)&cookie, sizeof(cookie), M_RPC); 2065 af->af_granted.exclusive = argp->exclusive; 2066 af->af_granted.alock.caller_name = 2067 strdup(argp->alock.caller_name, M_RPC); 2068 nlm_copy_netobj(&af->af_granted.alock.fh, 2069 &argp->alock.fh, M_RPC); 2070 nlm_copy_netobj(&af->af_granted.alock.oh, 2071 &argp->alock.oh, M_RPC); 2072 af->af_granted.alock.svid = argp->alock.svid; 2073 af->af_granted.alock.l_offset = argp->alock.l_offset; 2074 af->af_granted.alock.l_len = argp->alock.l_len; 2075 2076 /* 2077 * Put the entry on the pending list before calling 2078 * VOP_ADVLOCKASYNC. We do this in case the lock 2079 * request was blocked (returning EINPROGRESS) but 2080 * then granted before we manage to run again. The 2081 * client may receive the granted message before we 2082 * send our blocked reply but thats their problem. 2083 */ 2084 mtx_lock(&host->nh_lock); 2085 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2086 mtx_unlock(&host->nh_lock); 2087 2088 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2089 &af->af_task, &af->af_cookie); 2090 2091 /* 2092 * If the lock completed synchronously, just free the 2093 * tracking structure now. 2094 */ 2095 if (error != EINPROGRESS) { 2096 CLNT_RELEASE(af->af_rpc); 2097 mtx_lock(&host->nh_lock); 2098 TAILQ_REMOVE(&host->nh_pending, af, af_link); 2099 mtx_unlock(&host->nh_lock); 2100 xdr_free((xdrproc_t) xdr_nlm4_testargs, 2101 &af->af_granted); 2102 free(af, M_NLM); 2103 } else { 2104 NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2105 "(sysid %d)\n", af, host->nh_caller_name, sysid); 2106 /* 2107 * Don't vrele the vnode just yet - this must 2108 * wait until either the async callback 2109 * happens or the lock is cancelled. 2110 */ 2111 vs.vs_vp = NULL; 2112 } 2113 } else { 2114 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2115 } 2116 2117 if (error) { 2118 if (error == EINPROGRESS) { 2119 result->stat.stat = nlm4_blocked; 2120 } else if (error == EDEADLK) { 2121 result->stat.stat = nlm4_deadlck; 2122 } else if (error == EAGAIN) { 2123 result->stat.stat = nlm4_denied; 2124 } else { 2125 result->stat.stat = nlm4_failed; 2126 } 2127 } else { 2128 if (monitor) 2129 nlm_host_monitor(host, argp->state); 2130 result->stat.stat = nlm4_granted; 2131 } 2132 2133 out: 2134 nlm_release_vfs_state(&vs); 2135 if (rpcp) 2136 *rpcp = nlm_host_get_rpc(host, TRUE); 2137 nlm_host_release(host); 2138 return (0); 2139 } 2140 2141 int 2142 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2143 CLIENT **rpcp) 2144 { 2145 fhandle_t fh; 2146 struct vfs_state vs; 2147 struct nlm_host *host; 2148 int error, sysid; 2149 struct flock fl; 2150 struct nlm_async_lock *af; 2151 2152 memset(result, 0, sizeof(*result)); 2153 memset(&vs, 0, sizeof(vs)); 2154 2155 host = nlm_find_host_by_name(argp->alock.caller_name, 2156 svc_getrpccaller(rqstp), rqstp->rq_vers); 2157 if (!host) { 2158 result->stat.stat = nlm4_denied_nolocks; 2159 return (ENOMEM); 2160 } 2161 2162 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2163 host->nh_caller_name, host->nh_sysid); 2164 2165 nlm_check_expired_locks(host); 2166 sysid = host->nh_sysid; 2167 2168 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2169 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2170 2171 if (time_uptime < nlm_grace_threshold) { 2172 result->stat.stat = nlm4_denied_grace_period; 2173 goto out; 2174 } 2175 2176 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2177 if (error) { 2178 result->stat.stat = nlm_convert_error(error); 2179 goto out; 2180 } 2181 2182 fl.l_start = argp->alock.l_offset; 2183 fl.l_len = argp->alock.l_len; 2184 fl.l_pid = argp->alock.svid; 2185 fl.l_sysid = sysid; 2186 fl.l_whence = SEEK_SET; 2187 if (argp->exclusive) 2188 fl.l_type = F_WRLCK; 2189 else 2190 fl.l_type = F_RDLCK; 2191 2192 /* 2193 * First we need to try and find the async lock request - if 2194 * there isn't one, we give up and return nlm4_denied. 2195 */ 2196 mtx_lock(&host->nh_lock); 2197 2198 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2199 if (af->af_fl.l_start == fl.l_start 2200 && af->af_fl.l_len == fl.l_len 2201 && af->af_fl.l_pid == fl.l_pid 2202 && af->af_fl.l_type == fl.l_type) { 2203 break; 2204 } 2205 } 2206 2207 if (!af) { 2208 mtx_unlock(&host->nh_lock); 2209 result->stat.stat = nlm4_denied; 2210 goto out; 2211 } 2212 2213 error = nlm_cancel_async_lock(af); 2214 2215 if (error) { 2216 result->stat.stat = nlm4_denied; 2217 } else { 2218 result->stat.stat = nlm4_granted; 2219 } 2220 2221 mtx_unlock(&host->nh_lock); 2222 2223 out: 2224 nlm_release_vfs_state(&vs); 2225 if (rpcp) 2226 *rpcp = nlm_host_get_rpc(host, TRUE); 2227 nlm_host_release(host); 2228 return (0); 2229 } 2230 2231 int 2232 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2233 CLIENT **rpcp) 2234 { 2235 fhandle_t fh; 2236 struct vfs_state vs; 2237 struct nlm_host *host; 2238 int error, sysid; 2239 struct flock fl; 2240 2241 memset(result, 0, sizeof(*result)); 2242 memset(&vs, 0, sizeof(vs)); 2243 2244 host = nlm_find_host_by_name(argp->alock.caller_name, 2245 svc_getrpccaller(rqstp), rqstp->rq_vers); 2246 if (!host) { 2247 result->stat.stat = nlm4_denied_nolocks; 2248 return (ENOMEM); 2249 } 2250 2251 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2252 host->nh_caller_name, host->nh_sysid); 2253 2254 nlm_check_expired_locks(host); 2255 sysid = host->nh_sysid; 2256 2257 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2258 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2259 2260 if (time_uptime < nlm_grace_threshold) { 2261 result->stat.stat = nlm4_denied_grace_period; 2262 goto out; 2263 } 2264 2265 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0); 2266 if (error) { 2267 result->stat.stat = nlm_convert_error(error); 2268 goto out; 2269 } 2270 2271 fl.l_start = argp->alock.l_offset; 2272 fl.l_len = argp->alock.l_len; 2273 fl.l_pid = argp->alock.svid; 2274 fl.l_sysid = sysid; 2275 fl.l_whence = SEEK_SET; 2276 fl.l_type = F_UNLCK; 2277 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2278 2279 /* 2280 * Ignore the error - there is no result code for failure, 2281 * only for grace period. 2282 */ 2283 result->stat.stat = nlm4_granted; 2284 2285 out: 2286 nlm_release_vfs_state(&vs); 2287 if (rpcp) 2288 *rpcp = nlm_host_get_rpc(host, TRUE); 2289 nlm_host_release(host); 2290 return (0); 2291 } 2292 2293 int 2294 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2295 2296 CLIENT **rpcp) 2297 { 2298 struct nlm_host *host; 2299 struct nlm_waiting_lock *nw; 2300 2301 memset(result, 0, sizeof(*result)); 2302 2303 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2304 if (!host) { 2305 result->stat.stat = nlm4_denied_nolocks; 2306 return (ENOMEM); 2307 } 2308 2309 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2310 result->stat.stat = nlm4_denied; 2311 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out); 2312 2313 mtx_lock(&nlm_global_lock); 2314 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2315 if (!nw->nw_waiting) 2316 continue; 2317 if (argp->alock.svid == nw->nw_lock.svid 2318 && argp->alock.l_offset == nw->nw_lock.l_offset 2319 && argp->alock.l_len == nw->nw_lock.l_len 2320 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2321 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2322 nw->nw_lock.fh.n_len)) { 2323 nw->nw_waiting = FALSE; 2324 wakeup(nw); 2325 result->stat.stat = nlm4_granted; 2326 break; 2327 } 2328 } 2329 mtx_unlock(&nlm_global_lock); 2330 2331 out: 2332 if (rpcp) 2333 *rpcp = nlm_host_get_rpc(host, TRUE); 2334 nlm_host_release(host); 2335 return (0); 2336 } 2337 2338 void 2339 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp) 2340 { 2341 struct nlm_host *host = NULL; 2342 struct nlm_async_lock *af = NULL; 2343 int error; 2344 2345 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) { 2346 NLM_DEBUG(1, "NLM: bogus grant cookie"); 2347 goto out; 2348 } 2349 2350 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie)); 2351 if (!host) { 2352 NLM_DEBUG(1, "NLM: Unknown host rejected our grant"); 2353 goto out; 2354 } 2355 2356 mtx_lock(&host->nh_lock); 2357 TAILQ_FOREACH(af, &host->nh_granted, af_link) 2358 if (ng_cookie(&argp->cookie) == 2359 ng_cookie(&af->af_granted.cookie)) 2360 break; 2361 if (af) 2362 TAILQ_REMOVE(&host->nh_granted, af, af_link); 2363 mtx_unlock(&host->nh_lock); 2364 2365 if (!af) { 2366 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant " 2367 "with unrecognized cookie %d:%d", host->nh_caller_name, 2368 host->nh_sysid, ng_sysid(&argp->cookie), 2369 ng_cookie(&argp->cookie)); 2370 goto out; 2371 } 2372 2373 if (argp->stat.stat != nlm4_granted) { 2374 af->af_fl.l_type = F_UNLCK; 2375 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE); 2376 if (error) { 2377 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant " 2378 "and we failed to unlock (%d)", host->nh_caller_name, 2379 host->nh_sysid, error); 2380 goto out; 2381 } 2382 2383 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)", 2384 af, host->nh_caller_name, host->nh_sysid); 2385 } else { 2386 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)", 2387 af, host->nh_caller_name, host->nh_sysid); 2388 } 2389 2390 out: 2391 if (af) 2392 nlm_free_async_lock(af); 2393 if (host) 2394 nlm_host_release(host); 2395 } 2396 2397 void 2398 nlm_do_free_all(nlm4_notify *argp) 2399 { 2400 struct nlm_host *host, *thost; 2401 2402 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2403 if (!strcmp(host->nh_caller_name, argp->name)) 2404 nlm_host_notify(host, argp->state); 2405 } 2406 } 2407 2408 /* 2409 * Kernel module glue 2410 */ 2411 static int 2412 nfslockd_modevent(module_t mod, int type, void *data) 2413 { 2414 2415 switch (type) { 2416 case MOD_LOAD: 2417 return (nlm_init()); 2418 2419 case MOD_UNLOAD: 2420 nlm_uninit(); 2421 /* The NLM module cannot be safely unloaded. */ 2422 /* FALLTHROUGH */ 2423 default: 2424 return (EOPNOTSUPP); 2425 } 2426 } 2427 static moduledata_t nfslockd_mod = { 2428 "nfslockd", 2429 nfslockd_modevent, 2430 NULL, 2431 }; 2432 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2433 2434 /* So that loader and kldload(2) can find us, wherever we are.. */ 2435 MODULE_DEPEND(nfslockd, xdr, 1, 1, 1); 2436 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2437 MODULE_DEPEND(nfslockd, nfscommon, 1, 1, 1); 2438 MODULE_VERSION(nfslockd, 1); 2439