1 /*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet6.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/fcntl.h> 35 #include <sys/kernel.h> 36 #include <sys/lockf.h> 37 #include <sys/malloc.h> 38 #include <sys/mount.h> 39 #if __FreeBSD_version >= 700000 40 #include <sys/priv.h> 41 #endif 42 #include <sys/proc.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/syscall.h> 46 #include <sys/sysctl.h> 47 #include <sys/sysent.h> 48 #include <sys/sysproto.h> 49 #include <sys/systm.h> 50 #include <sys/taskqueue.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 54 #include <nlm/nlm_prot.h> 55 #include <nlm/sm_inter.h> 56 #include <nlm/nlm.h> 57 #include <rpc/rpc_com.h> 58 #include <rpc/rpcb_prot.h> 59 60 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 61 62 /* 63 * If a host is inactive (and holds no locks) for this amount of 64 * seconds, we consider it idle and stop tracking it. 65 */ 66 #define NLM_IDLE_TIMEOUT 30 67 68 /* 69 * We check the host list for idle every few seconds. 70 */ 71 #define NLM_IDLE_PERIOD 5 72 73 /* 74 * Support for sysctl vfs.nlm.sysid 75 */ 76 SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, "Network Lock Manager"); 77 SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, ""); 78 79 /* 80 * Syscall hooks 81 */ 82 static int nlm_syscall_offset = SYS_nlm_syscall; 83 static struct sysent nlm_syscall_prev_sysent; 84 #if __FreeBSD_version < 700000 85 static struct sysent nlm_syscall_sysent = { 86 (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE, 87 (sy_call_t *) nlm_syscall 88 }; 89 #else 90 MAKE_SYSENT(nlm_syscall); 91 #endif 92 static bool_t nlm_syscall_registered = FALSE; 93 94 /* 95 * Debug level passed in from userland. We also support a sysctl hook 96 * so that it can be changed on a live system. 97 */ 98 static int nlm_debug_level; 99 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 100 101 /* 102 * Grace period handling. The value of nlm_grace_threshold is the 103 * value of time_uptime after which we are serving requests normally. 104 */ 105 static time_t nlm_grace_threshold; 106 107 /* 108 * We check for idle hosts if time_uptime is greater than 109 * nlm_next_idle_check, 110 */ 111 static time_t nlm_next_idle_check; 112 113 /* 114 * A socket to use for RPC - shared by all IPv4 RPC clients. 115 */ 116 static struct socket *nlm_socket; 117 118 #ifdef INET6 119 120 /* 121 * A socket to use for RPC - shared by all IPv6 RPC clients. 122 */ 123 static struct socket *nlm_socket6; 124 125 #endif 126 127 /* 128 * An RPC client handle that can be used to communicate with the local 129 * NSM. 130 */ 131 static CLIENT *nlm_nsm; 132 133 /* 134 * An RPC client handle that can be used to communicate with the 135 * userland part of lockd. 136 */ 137 static CLIENT *nlm_lockd; 138 139 /* 140 * Locks: 141 * (l) locked by nh_lock 142 * (s) only accessed via server RPC which is single threaded 143 * (c) const until freeing 144 */ 145 146 /* 147 * A pending asynchronous lock request, stored on the nh_pending list 148 * of the NLM host. 149 */ 150 struct nlm_async_lock { 151 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 152 struct task af_task; /* (c) async callback details */ 153 void *af_cookie; /* (l) lock manager cancel token */ 154 struct vnode *af_vp; /* (l) vnode to lock */ 155 struct flock af_fl; /* (c) lock details */ 156 struct nlm_host *af_host; /* (c) host which is locking */ 157 nlm4_testargs af_granted; /* (c) notification details */ 158 }; 159 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 160 161 /* 162 * NLM host. 163 */ 164 enum nlm_host_state { 165 NLM_UNMONITORED, 166 NLM_MONITORED, 167 NLM_MONITOR_FAILED 168 }; 169 struct nlm_host { 170 struct mtx nh_lock; 171 TAILQ_ENTRY(nlm_host) nh_link; /* (s) global list of hosts */ 172 char *nh_caller_name; /* (c) printable name of host */ 173 uint32_t nh_sysid; /* (c) our allocaed system ID */ 174 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 175 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 176 CLIENT *nh_rpc; /* (s) RPC handle to send to host */ 177 rpcvers_t nh_vers; /* (s) NLM version of host */ 178 int nh_state; /* (s) last seen NSM state of host */ 179 enum nlm_host_state nh_monstate; /* (s) local NSM monitoring state */ 180 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 181 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 182 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 183 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 184 }; 185 TAILQ_HEAD(nlm_host_list, nlm_host); 186 187 static struct nlm_host_list nlm_hosts; 188 static uint32_t nlm_next_sysid = 1; 189 190 static void nlm_host_unmonitor(struct nlm_host *); 191 192 /**********************************************************************/ 193 194 /* 195 * Initialise NLM globals. 196 */ 197 static void 198 nlm_init(void *dummy) 199 { 200 int error; 201 202 TAILQ_INIT(&nlm_hosts); 203 204 error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent, 205 &nlm_syscall_prev_sysent); 206 if (error) 207 printf("Can't register NLM syscall\n"); 208 else 209 nlm_syscall_registered = TRUE; 210 } 211 SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL); 212 213 static void 214 nlm_uninit(void *dummy) 215 { 216 217 if (nlm_syscall_registered) 218 syscall_deregister(&nlm_syscall_offset, 219 &nlm_syscall_prev_sysent); 220 } 221 SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL); 222 223 /* 224 * Copy a struct netobj. 225 */ 226 void 227 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 228 struct malloc_type *type) 229 { 230 231 dst->n_len = src->n_len; 232 dst->n_bytes = malloc(src->n_len, type, M_WAITOK); 233 memcpy(dst->n_bytes, src->n_bytes, src->n_len); 234 } 235 236 /* 237 * Create an RPC client handle for the given (address,prog,vers) 238 * triple using UDP. 239 */ 240 static CLIENT * 241 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 242 { 243 const char *wchan = "nlmrcv"; 244 const char* protofmly; 245 struct sockaddr_storage ss; 246 struct socket *so; 247 CLIENT *rpcb; 248 struct timeval timo; 249 RPCB parms; 250 char *uaddr; 251 enum clnt_stat stat; 252 int rpcvers; 253 254 /* 255 * First we need to contact the remote RPCBIND service to find 256 * the right port. 257 */ 258 memcpy(&ss, sa, sa->sa_len); 259 switch (ss.ss_family) { 260 case AF_INET: 261 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 262 protofmly = "inet"; 263 so = nlm_socket; 264 break; 265 266 #ifdef INET6 267 case AF_INET6: 268 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 269 protofmly = "inet6"; 270 so = nlm_socket6; 271 break; 272 #endif 273 274 default: 275 /* 276 * Unsupported address family - fail. 277 */ 278 return (NULL); 279 } 280 281 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 282 RPCBPROG, RPCBVERS4, 0, 0); 283 if (!rpcb) 284 return (NULL); 285 286 parms.r_prog = prog; 287 parms.r_vers = vers; 288 parms.r_netid = "udp"; 289 parms.r_addr = ""; 290 parms.r_owner = ""; 291 292 /* 293 * Use the default timeout. 294 */ 295 timo.tv_sec = 25; 296 timo.tv_usec = 0; 297 again: 298 uaddr = NULL; 299 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 300 (xdrproc_t) xdr_rpcb, &parms, 301 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 302 if (stat == RPC_PROGVERSMISMATCH) { 303 /* 304 * Try RPCBIND version 3 if we haven't already. 305 * 306 * XXX fall back to portmap? 307 */ 308 CLNT_CONTROL(rpcb, CLGET_VERS, &rpcvers); 309 if (rpcvers == RPCBVERS4) { 310 rpcvers = RPCBVERS; 311 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 312 goto again; 313 } 314 } 315 316 if (stat == RPC_SUCCESS) { 317 /* 318 * We have a reply from the remote RPCBIND - turn it into an 319 * appropriate address and make a new client that can talk to 320 * the remote NLM. 321 * 322 * XXX fixup IPv6 scope ID. 323 */ 324 struct netbuf *a; 325 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 326 if (!a) { 327 CLNT_DESTROY(rpcb); 328 return (NULL); 329 } 330 memcpy(&ss, a->buf, a->len); 331 free(a->buf, M_RPC); 332 free(a, M_RPC); 333 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 334 } else if (stat == RPC_PROGVERSMISMATCH) { 335 /* 336 * Try portmap. 337 */ 338 struct pmap mapping; 339 u_short port; 340 341 rpcvers = PMAPVERS; 342 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 343 344 mapping.pm_prog = parms.r_prog; 345 mapping.pm_vers = parms.r_vers; 346 mapping.pm_prot = IPPROTO_UDP; 347 mapping.pm_port = 0; 348 349 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 350 (xdrproc_t) xdr_pmap, &mapping, 351 (xdrproc_t) xdr_u_short, &port, timo); 352 353 if (stat == RPC_SUCCESS) { 354 switch (ss.ss_family) { 355 case AF_INET: 356 ((struct sockaddr_in *)&ss)->sin_port = 357 htons(port); 358 break; 359 360 #ifdef INET6 361 case AF_INET6: 362 ((struct sockaddr_in6 *)&ss)->sin6_port = 363 htons(port); 364 break; 365 #endif 366 } 367 } 368 } 369 if (stat != RPC_SUCCESS) { 370 printf("NLM: failed to contact remote rpcbind, stat = %d\n", 371 (int) stat); 372 CLNT_DESTROY(rpcb); 373 return (NULL); 374 } 375 376 /* 377 * Re-use the client we used to speak to rpcbind. 378 */ 379 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 380 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 381 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 382 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, &wchan); 383 rpcb->cl_auth = authunix_create(curthread->td_ucred); 384 385 return (rpcb); 386 } 387 388 /* 389 * This async callback after when an async lock request has been 390 * granted. We notify the host which initiated the request. 391 */ 392 static void 393 nlm_lock_callback(void *arg, int pending) 394 { 395 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 396 397 if (nlm_debug_level >= 2) 398 printf("NLM: async lock %p for %s (sysid %d) granted\n", 399 af, af->af_host->nh_caller_name, 400 af->af_host->nh_sysid); 401 402 /* 403 * Send the results back to the host. 404 * 405 * Note: there is a possible race here with nlm_host_notify 406 * destroying the RPC client. To avoid problems, the first 407 * thing nlm_host_notify does is to cancel pending async lock 408 * requests. 409 */ 410 if (af->af_host->nh_vers == NLM_VERS4) { 411 nlm4_granted_msg_4(&af->af_granted, 412 NULL, af->af_host->nh_rpc); 413 } else { 414 /* 415 * Back-convert to legacy protocol 416 */ 417 nlm_testargs granted; 418 granted.cookie = af->af_granted.cookie; 419 granted.exclusive = af->af_granted.exclusive; 420 granted.alock.caller_name = 421 af->af_granted.alock.caller_name; 422 granted.alock.fh = af->af_granted.alock.fh; 423 granted.alock.oh = af->af_granted.alock.oh; 424 granted.alock.svid = af->af_granted.alock.svid; 425 granted.alock.l_offset = 426 af->af_granted.alock.l_offset; 427 granted.alock.l_len = 428 af->af_granted.alock.l_len; 429 430 nlm_granted_msg_1(&granted, 431 NULL, af->af_host->nh_rpc); 432 } 433 434 /* 435 * Move this entry to the nh_finished list. Someone else will 436 * free it later - its too hard to do it here safely without 437 * racing with cancel. 438 * 439 * XXX possibly we should have a third "granted sent but not 440 * ack'ed" list so that we can re-send the granted message. 441 */ 442 mtx_lock(&af->af_host->nh_lock); 443 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 444 TAILQ_INSERT_TAIL(&af->af_host->nh_finished, af, af_link); 445 mtx_unlock(&af->af_host->nh_lock); 446 } 447 448 /* 449 * Free an async lock request. The request must have been removed from 450 * any list. 451 */ 452 static void 453 nlm_free_async_lock(struct nlm_async_lock *af) 454 { 455 /* 456 * Free an async lock. 457 */ 458 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 459 if (af->af_vp) 460 vrele(af->af_vp); 461 free(af, M_NLM); 462 } 463 464 /* 465 * Cancel our async request - this must be called with 466 * af->nh_host->nh_lock held. This is slightly complicated by a 467 * potential race with our own callback. If we fail to cancel the 468 * lock, it must already have been granted - we make sure our async 469 * task has completed by calling taskqueue_drain in this case. 470 */ 471 static int 472 nlm_cancel_async_lock(struct nlm_async_lock *af) 473 { 474 struct nlm_host *host = af->af_host; 475 int error; 476 477 mtx_assert(&host->nh_lock, MA_OWNED); 478 479 mtx_unlock(&host->nh_lock); 480 481 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 482 F_REMOTE, NULL, &af->af_cookie); 483 484 if (error) { 485 /* 486 * We failed to cancel - make sure our callback has 487 * completed before we continue. 488 */ 489 taskqueue_drain(taskqueue_thread, &af->af_task); 490 } 491 492 mtx_lock(&host->nh_lock); 493 494 if (!error) { 495 if (nlm_debug_level >= 2) 496 printf("NLM: async lock %p for %s (sysid %d) " 497 "cancelled\n", 498 af, host->nh_caller_name, host->nh_sysid); 499 500 /* 501 * Remove from the nh_pending list and free now that 502 * we are safe from the callback. 503 */ 504 TAILQ_REMOVE(&host->nh_pending, af, af_link); 505 mtx_unlock(&host->nh_lock); 506 nlm_free_async_lock(af); 507 mtx_lock(&host->nh_lock); 508 } 509 510 return (error); 511 } 512 513 static void 514 nlm_free_finished_locks(struct nlm_host *host) 515 { 516 struct nlm_async_lock *af; 517 518 mtx_lock(&host->nh_lock); 519 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 520 TAILQ_REMOVE(&host->nh_finished, af, af_link); 521 mtx_unlock(&host->nh_lock); 522 nlm_free_async_lock(af); 523 mtx_lock(&host->nh_lock); 524 } 525 mtx_unlock(&host->nh_lock); 526 } 527 528 /* 529 * This is called when we receive a host state change 530 * notification. We unlock any active locks owned by the host. 531 */ 532 static void 533 nlm_host_notify(struct nlm_host *host, int newstate, bool_t destroy) 534 { 535 struct nlm_async_lock *af; 536 537 if (newstate) { 538 if (nlm_debug_level >= 1) 539 printf("NLM: host %s (sysid %d) rebooted, new " 540 "state is %d\n", 541 host->nh_caller_name, host->nh_sysid, newstate); 542 } 543 544 /* 545 * Cancel any pending async locks for this host. 546 */ 547 mtx_lock(&host->nh_lock); 548 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 549 /* 550 * nlm_cancel_async_lock will remove the entry from 551 * nh_pending and free it. 552 */ 553 nlm_cancel_async_lock(af); 554 } 555 mtx_unlock(&host->nh_lock); 556 nlm_free_finished_locks(host); 557 558 /* 559 * The host just rebooted - trash its locks and forget any 560 * RPC client handle that we may have for it. 561 */ 562 lf_clearremotesys(host->nh_sysid); 563 if (host->nh_rpc) { 564 AUTH_DESTROY(host->nh_rpc->cl_auth); 565 CLNT_DESTROY(host->nh_rpc); 566 host->nh_rpc = NULL; 567 } 568 host->nh_state = newstate; 569 570 /* 571 * Destroy the host if the caller believes that it won't be 572 * used again. This is safe enough - if we see the same name 573 * again, we will just create a new host. 574 */ 575 if (destroy) { 576 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 577 mtx_destroy(&host->nh_lock); 578 sysctl_ctx_free(&host->nh_sysctl); 579 free(host->nh_caller_name, M_NLM); 580 free(host, M_NLM); 581 } 582 } 583 584 /* 585 * Sysctl handler to count the number of locks for a sysid. 586 */ 587 static int 588 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 589 { 590 struct nlm_host *host; 591 int count; 592 593 host = oidp->oid_arg1; 594 count = lf_countlocks(host->nh_sysid); 595 return sysctl_handle_int(oidp, &count, 0, req); 596 } 597 598 /* 599 * Create a new NLM host. 600 */ 601 static struct nlm_host * 602 nlm_create_host(const char* caller_name) 603 { 604 struct nlm_host *host; 605 struct sysctl_oid *oid; 606 607 if (nlm_debug_level >= 1) 608 printf("NLM: new host %s (sysid %d)\n", 609 caller_name, nlm_next_sysid); 610 host = malloc(sizeof(struct nlm_host), M_NLM, M_WAITOK|M_ZERO); 611 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 612 host->nh_caller_name = strdup(caller_name, M_NLM); 613 host->nh_sysid = nlm_next_sysid++; 614 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 615 "%d", host->nh_sysid); 616 host->nh_rpc = NULL; 617 host->nh_vers = 0; 618 host->nh_state = 0; 619 host->nh_monstate = NLM_UNMONITORED; 620 TAILQ_INIT(&host->nh_pending); 621 TAILQ_INIT(&host->nh_finished); 622 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 623 624 sysctl_ctx_init(&host->nh_sysctl); 625 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 626 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 627 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, ""); 628 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 629 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 630 SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 631 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 632 SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 633 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 634 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 635 "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 636 nlm_host_lock_count_sysctl, "I", ""); 637 638 return (host); 639 } 640 641 /* 642 * Return non-zero if the address parts of the two sockaddrs are the 643 * same. 644 */ 645 static int 646 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 647 { 648 const struct sockaddr_in *a4, *b4; 649 #ifdef INET6 650 const struct sockaddr_in6 *a6, *b6; 651 #endif 652 653 if (a->sa_family != b->sa_family) 654 return (FALSE); 655 656 switch (a->sa_family) { 657 case AF_INET: 658 a4 = (const struct sockaddr_in *) a; 659 b4 = (const struct sockaddr_in *) b; 660 return !memcmp(&a4->sin_addr, &b4->sin_addr, 661 sizeof(a4->sin_addr)); 662 #ifdef INET6 663 case AF_INET6: 664 a6 = (const struct sockaddr_in6 *) a; 665 b6 = (const struct sockaddr_in6 *) b; 666 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 667 sizeof(a6->sin6_addr)); 668 #endif 669 } 670 671 return (0); 672 } 673 674 /* 675 * Check for idle hosts and stop monitoring them. We could also free 676 * the host structure here, possibly after a larger timeout but that 677 * would require some care to avoid races with 678 * e.g. nlm_host_lock_count_sysctl. 679 */ 680 static void 681 nlm_check_idle(void) 682 { 683 struct nlm_host *host; 684 685 if (time_uptime <= nlm_next_idle_check) 686 return; 687 688 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 689 690 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 691 if (host->nh_monstate == NLM_MONITORED 692 && time_uptime > host->nh_idle_timeout) { 693 if (lf_countlocks(host->nh_sysid) > 0) { 694 host->nh_idle_timeout = 695 time_uptime + NLM_IDLE_TIMEOUT; 696 continue; 697 } 698 nlm_host_unmonitor(host); 699 } 700 } 701 } 702 703 /* 704 * Search for an existing NLM host that matches the given name 705 * (typically the caller_name element of an nlm4_lock). If none is 706 * found, create a new host. If 'rqstp' is non-NULL, record the remote 707 * address of the host so that we can call it back for async 708 * responses. 709 */ 710 struct nlm_host * 711 nlm_find_host_by_name(const char *name, struct svc_req *rqstp) 712 { 713 struct nlm_host *host; 714 715 nlm_check_idle(); 716 717 /* 718 * The remote host is determined by caller_name. 719 */ 720 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 721 if (!strcmp(host->nh_caller_name, name)) 722 break; 723 } 724 725 if (!host) 726 host = nlm_create_host(name); 727 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 728 729 /* 730 * If we have an RPC request, record the remote address so 731 * that can send async replies etc. 732 */ 733 if (rqstp) { 734 struct netbuf *addr = &rqstp->rq_xprt->xp_rtaddr; 735 736 KASSERT(addr->len < sizeof(struct sockaddr_storage), 737 ("Strange remote transport address length")); 738 739 /* 740 * If we have seen an address before and we currently 741 * have an RPC client handle, make sure the address is 742 * the same, otherwise discard the client handle. 743 */ 744 if (host->nh_addr.ss_len && host->nh_rpc) { 745 if (!nlm_compare_addr( 746 (struct sockaddr *) &host->nh_addr, 747 (struct sockaddr *) addr->buf) 748 || host->nh_vers != rqstp->rq_vers) { 749 AUTH_DESTROY(host->nh_rpc->cl_auth); 750 CLNT_DESTROY(host->nh_rpc); 751 host->nh_rpc = NULL; 752 } 753 } 754 memcpy(&host->nh_addr, addr->buf, addr->len); 755 host->nh_vers = rqstp->rq_vers; 756 } 757 758 return (host); 759 } 760 761 /* 762 * Search for an existing NLM host that matches the given remote 763 * address. If none is found, create a new host with the requested 764 * address and remember 'vers' as the NLM protocol version to use for 765 * that host. 766 */ 767 struct nlm_host * 768 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 769 { 770 struct nlm_host *host; 771 772 nlm_check_idle(); 773 774 /* 775 * The remote host is determined by caller_name. 776 */ 777 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 778 if (nlm_compare_addr(addr, 779 (const struct sockaddr *) &host->nh_addr)) 780 break; 781 } 782 783 if (!host) { 784 /* 785 * Fake up a name using inet_ntop. This buffer is 786 * large enough for an IPv6 address. 787 */ 788 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 789 switch (addr->sa_family) { 790 case AF_INET: 791 __rpc_inet_ntop(AF_INET, 792 &((const struct sockaddr_in *) addr)->sin_addr, 793 tmp, sizeof tmp); 794 break; 795 #ifdef INET6 796 case AF_INET6: 797 __rpc_inet_ntop(AF_INET6, 798 &((const struct sockaddr_in6 *) addr)->sin6_addr, 799 tmp, sizeof tmp); 800 break; 801 #endif 802 default: 803 strcmp(tmp, "<unknown>"); 804 } 805 host = nlm_create_host(tmp); 806 memcpy(&host->nh_addr, addr, addr->sa_len); 807 host->nh_vers = vers; 808 } 809 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 810 811 return (host); 812 } 813 814 /* 815 * Find the NLM host that matches the value of 'sysid'. If none 816 * exists, return NULL. 817 */ 818 static struct nlm_host * 819 nlm_find_host_by_sysid(int sysid) 820 { 821 struct nlm_host *host; 822 823 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 824 if (host->nh_sysid == sysid) 825 return (host); 826 } 827 828 return (NULL); 829 } 830 831 /* 832 * Unregister this NLM host with the local NSM due to idleness. 833 */ 834 static void 835 nlm_host_unmonitor(struct nlm_host *host) 836 { 837 mon_id smmonid; 838 sm_stat_res smstat; 839 struct timeval timo; 840 enum clnt_stat stat; 841 842 if (nlm_debug_level >= 1) 843 printf("NLM: unmonitoring %s (sysid %d)\n", 844 host->nh_caller_name, host->nh_sysid); 845 846 /* 847 * We put our assigned system ID value in the priv field to 848 * make it simpler to find the host if we are notified of a 849 * host restart. 850 */ 851 smmonid.mon_name = host->nh_caller_name; 852 smmonid.my_id.my_name = "localhost"; 853 smmonid.my_id.my_prog = NLM_PROG; 854 smmonid.my_id.my_vers = NLM_SM; 855 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 856 857 timo.tv_sec = 25; 858 timo.tv_usec = 0; 859 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 860 (xdrproc_t) xdr_mon, &smmonid, 861 (xdrproc_t) xdr_sm_stat, &smstat, timo); 862 863 if (stat != RPC_SUCCESS) { 864 printf("Failed to contact local NSM - rpc error %d\n", stat); 865 return; 866 } 867 if (smstat.res_stat == stat_fail) { 868 printf("Local NSM refuses to unmonitor %s\n", 869 host->nh_caller_name); 870 return; 871 } 872 873 host->nh_monstate = NLM_UNMONITORED; 874 } 875 876 /* 877 * Register this NLM host with the local NSM so that we can be 878 * notified if it reboots. 879 */ 880 static void 881 nlm_host_monitor(struct nlm_host *host, int state) 882 { 883 mon smmon; 884 sm_stat_res smstat; 885 struct timeval timo; 886 enum clnt_stat stat; 887 888 if (host->nh_state && state && host->nh_state != state) { 889 /* 890 * The host rebooted without telling us. Trash its 891 * locks. 892 */ 893 nlm_host_notify(host, state, FALSE); 894 } 895 896 if (state && !host->nh_state) { 897 /* 898 * This is the first time we have seen an NSM state 899 * value for this host. We record it here to help 900 * detect host reboots. 901 */ 902 host->nh_state = state; 903 if (nlm_debug_level >= 1) 904 printf("NLM: host %s (sysid %d) has NSM state %d\n", 905 host->nh_caller_name, host->nh_sysid, state); 906 } 907 908 if (host->nh_monstate != NLM_UNMONITORED) 909 return; 910 911 if (nlm_debug_level >= 1) 912 printf("NLM: monitoring %s (sysid %d)\n", 913 host->nh_caller_name, host->nh_sysid); 914 915 /* 916 * We put our assigned system ID value in the priv field to 917 * make it simpler to find the host if we are notified of a 918 * host restart. 919 */ 920 smmon.mon_id.mon_name = host->nh_caller_name; 921 smmon.mon_id.my_id.my_name = "localhost"; 922 smmon.mon_id.my_id.my_prog = NLM_PROG; 923 smmon.mon_id.my_id.my_vers = NLM_SM; 924 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 925 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 926 927 timo.tv_sec = 25; 928 timo.tv_usec = 0; 929 stat = CLNT_CALL(nlm_nsm, SM_MON, 930 (xdrproc_t) xdr_mon, &smmon, 931 (xdrproc_t) xdr_sm_stat, &smstat, timo); 932 933 if (stat != RPC_SUCCESS) { 934 printf("Failed to contact local NSM - rpc error %d\n", stat); 935 return; 936 } 937 if (smstat.res_stat == stat_fail) { 938 printf("Local NSM refuses to monitor %s\n", 939 host->nh_caller_name); 940 host->nh_monstate = NLM_MONITOR_FAILED; 941 return; 942 } 943 944 host->nh_monstate = NLM_MONITORED; 945 } 946 947 /* 948 * Return an RPC client handle that can be used to talk to the NLM 949 * running on the given host. 950 */ 951 CLIENT * 952 nlm_host_get_rpc(struct nlm_host *host) 953 { 954 struct timeval zero; 955 956 if (host->nh_rpc) 957 return (host->nh_rpc); 958 959 /* 960 * Set the send timeout to zero - we only use this rpc handle 961 * for sending async replies which have no return value. 962 */ 963 host->nh_rpc = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 964 NLM_PROG, host->nh_vers); 965 966 if (host->nh_rpc) { 967 zero.tv_sec = 0; 968 zero.tv_usec = 0; 969 CLNT_CONTROL(host->nh_rpc, CLSET_TIMEOUT, &zero); 970 971 /* 972 * Monitor the host - if it reboots, the address of 973 * its NSM might change so we must discard our RPC 974 * handle. 975 */ 976 nlm_host_monitor(host, 0); 977 } 978 979 return (host->nh_rpc); 980 } 981 982 /**********************************************************************/ 983 984 /* 985 * Syscall interface with userland. 986 */ 987 988 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 989 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 990 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 991 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 992 993 static int 994 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 995 { 996 static rpcvers_t versions[] = { 997 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 998 }; 999 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1000 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1001 }; 1002 static const int version_count = sizeof(versions) / sizeof(versions[0]); 1003 1004 SVCXPRT **xprts; 1005 char netid[16]; 1006 char uaddr[128]; 1007 struct netconfig *nconf; 1008 int i, j, error; 1009 1010 if (!addr_count) { 1011 printf("NLM: no service addresses given - can't start server"); 1012 return (EINVAL); 1013 } 1014 1015 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK); 1016 for (i = 0; i < version_count; i++) { 1017 for (j = 0; j < addr_count; j++) { 1018 /* 1019 * Create transports for the first version and 1020 * then just register everything else to the 1021 * same transports. 1022 */ 1023 if (i == 0) { 1024 char *up; 1025 1026 error = copyin(&addrs[2*j], &up, 1027 sizeof(char*)); 1028 if (error) 1029 goto out; 1030 error = copyinstr(up, netid, sizeof(netid), 1031 NULL); 1032 if (error) 1033 goto out; 1034 error = copyin(&addrs[2*j+1], &up, 1035 sizeof(char*)); 1036 if (error) 1037 goto out; 1038 error = copyinstr(up, uaddr, sizeof(uaddr), 1039 NULL); 1040 if (error) 1041 goto out; 1042 nconf = getnetconfigent(netid); 1043 if (!nconf) { 1044 printf("Can't lookup netid %s\n", 1045 netid); 1046 error = EINVAL; 1047 goto out; 1048 } 1049 xprts[j] = svc_tp_create(pool, dispatchers[i], 1050 NLM_PROG, versions[i], uaddr, nconf); 1051 if (!xprts[j]) { 1052 printf("NLM: unable to create " 1053 "(NLM_PROG, %d).\n", versions[i]); 1054 error = EINVAL; 1055 goto out; 1056 } 1057 freenetconfigent(nconf); 1058 } else { 1059 nconf = getnetconfigent(xprts[j]->xp_netid); 1060 rpcb_unset(NLM_PROG, versions[i], nconf); 1061 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1062 dispatchers[i], nconf)) { 1063 printf("NLM: can't register " 1064 "(NLM_PROG, %d)\n", versions[i]); 1065 error = EINVAL; 1066 goto out; 1067 } 1068 } 1069 } 1070 } 1071 error = 0; 1072 out: 1073 free(xprts, M_NLM); 1074 return (error); 1075 } 1076 1077 /* 1078 * Main server entry point. Contacts the local NSM to get its current 1079 * state and send SM_UNMON_ALL. Registers the NLM services and then 1080 * services requests. Does not return until the server is interrupted 1081 * by a signal. 1082 */ 1083 static int 1084 nlm_server_main(int addr_count, char **addrs) 1085 { 1086 struct thread *td = curthread; 1087 int error; 1088 SVCPOOL *pool = NULL; 1089 struct sockopt opt; 1090 int portlow; 1091 #ifdef INET6 1092 struct sockaddr_in6 sin6; 1093 #endif 1094 struct sockaddr_in sin; 1095 my_id id; 1096 sm_stat smstat; 1097 struct timeval timo; 1098 enum clnt_stat stat; 1099 struct nlm_host *host; 1100 1101 if (nlm_socket) { 1102 printf("NLM: can't start server - it appears to be running already\n"); 1103 return (EPERM); 1104 } 1105 1106 memset(&opt, 0, sizeof(opt)); 1107 1108 nlm_socket = NULL; 1109 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1110 td->td_ucred, td); 1111 if (error) { 1112 printf("NLM: can't create IPv4 socket - error %d\n", error); 1113 return (error); 1114 } 1115 opt.sopt_dir = SOPT_SET; 1116 opt.sopt_level = IPPROTO_IP; 1117 opt.sopt_name = IP_PORTRANGE; 1118 portlow = IP_PORTRANGE_LOW; 1119 opt.sopt_val = &portlow; 1120 opt.sopt_valsize = sizeof(portlow); 1121 sosetopt(nlm_socket, &opt); 1122 1123 #ifdef INET6 1124 nlm_socket6 = NULL; 1125 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1126 td->td_ucred, td); 1127 if (error) { 1128 printf("NLM: can't create IPv6 socket - error %d\n", error); 1129 return (error); 1130 } 1131 opt.sopt_dir = SOPT_SET; 1132 opt.sopt_level = IPPROTO_IPV6; 1133 opt.sopt_name = IPV6_PORTRANGE; 1134 portlow = IPV6_PORTRANGE_LOW; 1135 opt.sopt_val = &portlow; 1136 opt.sopt_valsize = sizeof(portlow); 1137 sosetopt(nlm_socket6, &opt); 1138 #endif 1139 1140 #ifdef INET6 1141 memset(&sin6, 0, sizeof(sin6)); 1142 sin6.sin6_len = sizeof(sin6); 1143 sin6.sin6_family = AF_INET6; 1144 sin6.sin6_addr = in6addr_loopback; 1145 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1146 if (!nlm_nsm) { 1147 #endif 1148 memset(&sin, 0, sizeof(sin)); 1149 sin.sin_len = sizeof(sin); 1150 sin.sin_family = AF_INET; 1151 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1152 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1153 SM_VERS); 1154 #ifdef INET6 1155 } 1156 #endif 1157 1158 if (!nlm_nsm) { 1159 printf("Can't start NLM - unable to contact NSM\n"); 1160 error = EINVAL; 1161 goto out; 1162 } 1163 1164 pool = svcpool_create(); 1165 1166 error = nlm_register_services(pool, addr_count, addrs); 1167 if (error) 1168 goto out; 1169 1170 memset(&id, 0, sizeof(id)); 1171 id.my_name = "NFS NLM"; 1172 1173 timo.tv_sec = 25; 1174 timo.tv_usec = 0; 1175 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1176 (xdrproc_t) xdr_my_id, &id, 1177 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1178 1179 if (stat != RPC_SUCCESS) { 1180 struct rpc_err err; 1181 1182 CLNT_GETERR(nlm_nsm, &err); 1183 printf("NLM: unexpected error contacting NSM, stat=%d, errno=%d\n", 1184 stat, err.re_errno); 1185 error = EINVAL; 1186 goto out; 1187 } 1188 1189 if (nlm_debug_level >= 1) 1190 printf("NLM: local NSM state is %d\n", smstat.state); 1191 1192 svc_run(pool); 1193 error = 0; 1194 1195 out: 1196 if (pool) 1197 svcpool_destroy(pool); 1198 1199 /* 1200 * Trash all the existing state so that if the server 1201 * restarts, it gets a clean slate. 1202 */ 1203 while ((host = TAILQ_FIRST(&nlm_hosts)) != NULL) { 1204 nlm_host_notify(host, 0, TRUE); 1205 } 1206 if (nlm_nsm) { 1207 AUTH_DESTROY(nlm_nsm->cl_auth); 1208 CLNT_DESTROY(nlm_nsm); 1209 nlm_nsm = NULL; 1210 } 1211 if (nlm_lockd) { 1212 AUTH_DESTROY(nlm_lockd->cl_auth); 1213 CLNT_DESTROY(nlm_lockd); 1214 nlm_lockd = NULL; 1215 } 1216 1217 soclose(nlm_socket); 1218 nlm_socket = NULL; 1219 #ifdef INET6 1220 soclose(nlm_socket6); 1221 nlm_socket6 = NULL; 1222 #endif 1223 1224 return (error); 1225 } 1226 1227 int 1228 nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1229 { 1230 int error; 1231 1232 #if __FreeBSD_version >= 700000 1233 error = priv_check(td, PRIV_NFS_LOCKD); 1234 #else 1235 error = suser(td); 1236 #endif 1237 if (error) 1238 return (error); 1239 1240 nlm_debug_level = uap->debug_level; 1241 nlm_grace_threshold = time_uptime + uap->grace_period; 1242 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1243 1244 return nlm_server_main(uap->addr_count, uap->addrs); 1245 } 1246 1247 /**********************************************************************/ 1248 1249 /* 1250 * NLM implementation details, called from the RPC stubs. 1251 */ 1252 1253 1254 void 1255 nlm_sm_notify(struct nlm_sm_status *argp) 1256 { 1257 uint32_t sysid; 1258 struct nlm_host *host; 1259 1260 if (nlm_debug_level >= 3) 1261 printf("nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1262 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1263 host = nlm_find_host_by_sysid(sysid); 1264 if (host) 1265 nlm_host_notify(host, argp->state, FALSE); 1266 } 1267 1268 static void 1269 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1270 { 1271 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1272 } 1273 1274 struct vfs_state { 1275 struct mount *vs_mp; 1276 struct vnode *vs_vp; 1277 int vs_vfslocked; 1278 int vs_vnlocked; 1279 }; 1280 1281 static int 1282 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1283 fhandle_t *fhp, struct vfs_state *vs) 1284 { 1285 int error, exflags, freecred; 1286 struct ucred *cred = NULL, *credanon; 1287 1288 memset(vs, 0, sizeof(*vs)); 1289 freecred = FALSE; 1290 1291 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1292 if (!vs->vs_mp) { 1293 return (ESTALE); 1294 } 1295 vs->vs_vfslocked = VFS_LOCK_GIANT(vs->vs_mp); 1296 1297 error = VFS_CHECKEXP(vs->vs_mp, (struct sockaddr *)&host->nh_addr, 1298 &exflags, &credanon); 1299 if (error) 1300 goto out; 1301 1302 if (exflags & MNT_EXRDONLY || (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1303 error = EROFS; 1304 goto out; 1305 } 1306 1307 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, &vs->vs_vp); 1308 if (error) 1309 goto out; 1310 vs->vs_vnlocked = TRUE; 1311 1312 cred = crget(); 1313 freecred = TRUE; 1314 if (!svc_getcred(rqstp, cred, NULL)) { 1315 error = EINVAL; 1316 goto out; 1317 } 1318 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1319 crfree(cred); 1320 cred = credanon; 1321 freecred = FALSE; 1322 } 1323 1324 /* 1325 * Check cred. 1326 */ 1327 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1328 if (error) 1329 goto out; 1330 1331 #if __FreeBSD_version < 800011 1332 VOP_UNLOCK(vs->vs_vp, 0, curthread); 1333 #else 1334 VOP_UNLOCK(vs->vs_vp, 0); 1335 #endif 1336 vs->vs_vnlocked = FALSE; 1337 1338 out: 1339 if (freecred) 1340 crfree(cred); 1341 1342 return (error); 1343 } 1344 1345 static void 1346 nlm_release_vfs_state(struct vfs_state *vs) 1347 { 1348 1349 if (vs->vs_vp) { 1350 if (vs->vs_vnlocked) 1351 vput(vs->vs_vp); 1352 else 1353 vrele(vs->vs_vp); 1354 } 1355 if (vs->vs_mp) 1356 vfs_rel(vs->vs_mp); 1357 VFS_UNLOCK_GIANT(vs->vs_vfslocked); 1358 } 1359 1360 static nlm4_stats 1361 nlm_convert_error(int error) 1362 { 1363 1364 if (error == ESTALE) 1365 return nlm4_stale_fh; 1366 else if (error == EROFS) 1367 return nlm4_rofs; 1368 else 1369 return nlm4_failed; 1370 } 1371 1372 struct nlm_host * 1373 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp) 1374 { 1375 fhandle_t fh; 1376 struct vfs_state vs; 1377 struct nlm_host *host, *bhost; 1378 int error, sysid; 1379 struct flock fl; 1380 1381 memset(result, 0, sizeof(*result)); 1382 1383 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1384 if (!host) { 1385 result->stat.stat = nlm4_denied_nolocks; 1386 return (NULL); 1387 } 1388 1389 if (nlm_debug_level >= 3) 1390 printf("nlm_do_test(): caller_name = %s (sysid = %d)\n", 1391 host->nh_caller_name, host->nh_sysid); 1392 1393 nlm_free_finished_locks(host); 1394 sysid = host->nh_sysid; 1395 1396 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1397 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1398 1399 if (time_uptime < nlm_grace_threshold) { 1400 result->stat.stat = nlm4_denied_grace_period; 1401 return (host); 1402 } 1403 1404 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1405 if (error) { 1406 result->stat.stat = nlm_convert_error(error); 1407 goto out; 1408 } 1409 1410 fl.l_start = argp->alock.l_offset; 1411 fl.l_len = argp->alock.l_len; 1412 fl.l_pid = argp->alock.svid; 1413 fl.l_sysid = sysid; 1414 fl.l_whence = SEEK_SET; 1415 if (argp->exclusive) 1416 fl.l_type = F_WRLCK; 1417 else 1418 fl.l_type = F_RDLCK; 1419 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1420 if (error) { 1421 result->stat.stat = nlm4_failed; 1422 goto out; 1423 } 1424 1425 if (fl.l_type == F_UNLCK) { 1426 result->stat.stat = nlm4_granted; 1427 } else { 1428 result->stat.stat = nlm4_denied; 1429 result->stat.nlm4_testrply_u.holder.exclusive = 1430 (fl.l_type == F_WRLCK); 1431 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1432 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1433 if (bhost) { 1434 /* 1435 * We don't have any useful way of recording 1436 * the value of oh used in the original lock 1437 * request. Ideally, the test reply would have 1438 * a space for the owning host's name allowing 1439 * our caller's NLM to keep track. 1440 * 1441 * As far as I can see, Solaris uses an eight 1442 * byte structure for oh which contains a four 1443 * byte pid encoded in local byte order and 1444 * the first four bytes of the host 1445 * name. Linux uses a variable length string 1446 * 'pid@hostname' in ascii but doesn't even 1447 * return that in test replies. 1448 * 1449 * For the moment, return nothing in oh 1450 * (already zero'ed above). 1451 */ 1452 } 1453 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1454 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1455 } 1456 1457 out: 1458 nlm_release_vfs_state(&vs); 1459 return (host); 1460 } 1461 1462 struct nlm_host * 1463 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1464 bool_t monitor) 1465 { 1466 fhandle_t fh; 1467 struct vfs_state vs; 1468 struct nlm_host *host; 1469 int error, sysid; 1470 struct flock fl; 1471 1472 memset(result, 0, sizeof(*result)); 1473 1474 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1475 if (!host) { 1476 result->stat.stat = nlm4_denied_nolocks; 1477 return (NULL); 1478 } 1479 1480 if (nlm_debug_level >= 3) 1481 printf("nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1482 host->nh_caller_name, host->nh_sysid); 1483 1484 nlm_free_finished_locks(host); 1485 sysid = host->nh_sysid; 1486 1487 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1488 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1489 1490 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1491 result->stat.stat = nlm4_denied_grace_period; 1492 return (host); 1493 } 1494 1495 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1496 if (error) { 1497 result->stat.stat = nlm_convert_error(error); 1498 goto out; 1499 } 1500 1501 fl.l_start = argp->alock.l_offset; 1502 fl.l_len = argp->alock.l_len; 1503 fl.l_pid = argp->alock.svid; 1504 fl.l_sysid = sysid; 1505 fl.l_whence = SEEK_SET; 1506 if (argp->exclusive) 1507 fl.l_type = F_WRLCK; 1508 else 1509 fl.l_type = F_RDLCK; 1510 if (argp->block) { 1511 struct nlm_async_lock *af; 1512 1513 /* 1514 * First, make sure we can contact the host's NLM. 1515 */ 1516 if (!nlm_host_get_rpc(host)) { 1517 result->stat.stat = nlm4_failed; 1518 goto out; 1519 } 1520 1521 /* 1522 * First we need to check and see if there is an 1523 * existing blocked lock that matches. This could be a 1524 * badly behaved client or an RPC re-send. If we find 1525 * one, just return nlm4_blocked. 1526 */ 1527 mtx_lock(&host->nh_lock); 1528 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 1529 if (af->af_fl.l_start == fl.l_start 1530 && af->af_fl.l_len == fl.l_len 1531 && af->af_fl.l_pid == fl.l_pid 1532 && af->af_fl.l_type == fl.l_type) { 1533 break; 1534 } 1535 } 1536 mtx_unlock(&host->nh_lock); 1537 if (af) { 1538 result->stat.stat = nlm4_blocked; 1539 goto out; 1540 } 1541 1542 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 1543 M_WAITOK|M_ZERO); 1544 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 1545 af->af_vp = vs.vs_vp; 1546 af->af_fl = fl; 1547 af->af_host = host; 1548 /* 1549 * We use M_RPC here so that we can xdr_free the thing 1550 * later. 1551 */ 1552 af->af_granted.exclusive = argp->exclusive; 1553 af->af_granted.alock.caller_name = 1554 strdup(argp->alock.caller_name, M_RPC); 1555 nlm_copy_netobj(&af->af_granted.alock.fh, 1556 &argp->alock.fh, M_RPC); 1557 nlm_copy_netobj(&af->af_granted.alock.oh, 1558 &argp->alock.oh, M_RPC); 1559 af->af_granted.alock.svid = argp->alock.svid; 1560 af->af_granted.alock.l_offset = argp->alock.l_offset; 1561 af->af_granted.alock.l_len = argp->alock.l_len; 1562 1563 /* 1564 * Put the entry on the pending list before calling 1565 * VOP_ADVLOCKASYNC. We do this in case the lock 1566 * request was blocked (returning EINPROGRESS) but 1567 * then granted before we manage to run again. The 1568 * client may receive the granted message before we 1569 * send our blocked reply but thats their problem. 1570 */ 1571 mtx_lock(&host->nh_lock); 1572 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 1573 mtx_unlock(&host->nh_lock); 1574 1575 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 1576 &af->af_task, &af->af_cookie); 1577 1578 /* 1579 * If the lock completed synchronously, just free the 1580 * tracking structure now. 1581 */ 1582 if (error != EINPROGRESS) { 1583 mtx_lock(&host->nh_lock); 1584 TAILQ_REMOVE(&host->nh_pending, af, af_link); 1585 mtx_unlock(&host->nh_lock); 1586 xdr_free((xdrproc_t) xdr_nlm4_testargs, 1587 &af->af_granted); 1588 free(af, M_NLM); 1589 } else { 1590 if (nlm_debug_level >= 2) 1591 printf("NLM: pending async lock %p for %s " 1592 "(sysid %d)\n", 1593 af, host->nh_caller_name, sysid); 1594 /* 1595 * Don't vrele the vnode just yet - this must 1596 * wait until either the async callback 1597 * happens or the lock is cancelled. 1598 */ 1599 vs.vs_vp = NULL; 1600 } 1601 } else { 1602 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 1603 } 1604 1605 if (error) { 1606 if (error == EINPROGRESS) { 1607 result->stat.stat = nlm4_blocked; 1608 } else if (error == EDEADLK) { 1609 result->stat.stat = nlm4_deadlck; 1610 } else if (error == EAGAIN) { 1611 result->stat.stat = nlm4_denied; 1612 } else { 1613 result->stat.stat = nlm4_failed; 1614 } 1615 } else { 1616 if (monitor) 1617 nlm_host_monitor(host, argp->state); 1618 result->stat.stat = nlm4_granted; 1619 } 1620 1621 out: 1622 nlm_release_vfs_state(&vs); 1623 1624 return (host); 1625 } 1626 1627 struct nlm_host * 1628 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp) 1629 { 1630 fhandle_t fh; 1631 struct vfs_state vs; 1632 struct nlm_host *host; 1633 int error, sysid; 1634 struct flock fl; 1635 struct nlm_async_lock *af; 1636 1637 memset(result, 0, sizeof(*result)); 1638 1639 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1640 if (!host) { 1641 result->stat.stat = nlm4_denied_nolocks; 1642 return (NULL); 1643 } 1644 1645 if (nlm_debug_level >= 3) 1646 printf("nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 1647 host->nh_caller_name, host->nh_sysid); 1648 1649 nlm_free_finished_locks(host); 1650 sysid = host->nh_sysid; 1651 1652 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1653 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1654 1655 if (time_uptime < nlm_grace_threshold) { 1656 result->stat.stat = nlm4_denied_grace_period; 1657 return (host); 1658 } 1659 1660 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1661 if (error) { 1662 result->stat.stat = nlm_convert_error(error); 1663 goto out; 1664 } 1665 1666 fl.l_start = argp->alock.l_offset; 1667 fl.l_len = argp->alock.l_len; 1668 fl.l_pid = argp->alock.svid; 1669 fl.l_sysid = sysid; 1670 fl.l_whence = SEEK_SET; 1671 if (argp->exclusive) 1672 fl.l_type = F_WRLCK; 1673 else 1674 fl.l_type = F_RDLCK; 1675 1676 /* 1677 * First we need to try and find the async lock request - if 1678 * there isn't one, we give up and return nlm4_denied. 1679 */ 1680 mtx_lock(&host->nh_lock); 1681 1682 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 1683 if (af->af_fl.l_start == fl.l_start 1684 && af->af_fl.l_len == fl.l_len 1685 && af->af_fl.l_pid == fl.l_pid 1686 && af->af_fl.l_type == fl.l_type) { 1687 break; 1688 } 1689 } 1690 1691 if (!af) { 1692 mtx_unlock(&host->nh_lock); 1693 result->stat.stat = nlm4_denied; 1694 goto out; 1695 } 1696 1697 error = nlm_cancel_async_lock(af); 1698 1699 if (error) { 1700 result->stat.stat = nlm4_denied; 1701 } else { 1702 result->stat.stat = nlm4_granted; 1703 } 1704 1705 mtx_unlock(&host->nh_lock); 1706 1707 out: 1708 nlm_release_vfs_state(&vs); 1709 1710 return (host); 1711 } 1712 1713 struct nlm_host * 1714 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp) 1715 { 1716 fhandle_t fh; 1717 struct vfs_state vs; 1718 struct nlm_host *host; 1719 int error, sysid; 1720 struct flock fl; 1721 1722 memset(result, 0, sizeof(*result)); 1723 1724 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1725 if (!host) { 1726 result->stat.stat = nlm4_denied_nolocks; 1727 return (NULL); 1728 } 1729 1730 if (nlm_debug_level >= 3) 1731 printf("nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 1732 host->nh_caller_name, host->nh_sysid); 1733 1734 nlm_free_finished_locks(host); 1735 sysid = host->nh_sysid; 1736 1737 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1738 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1739 1740 if (time_uptime < nlm_grace_threshold) { 1741 result->stat.stat = nlm4_denied_grace_period; 1742 return (host); 1743 } 1744 1745 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1746 if (error) { 1747 result->stat.stat = nlm_convert_error(error); 1748 goto out; 1749 } 1750 1751 fl.l_start = argp->alock.l_offset; 1752 fl.l_len = argp->alock.l_len; 1753 fl.l_pid = argp->alock.svid; 1754 fl.l_sysid = sysid; 1755 fl.l_whence = SEEK_SET; 1756 fl.l_type = F_UNLCK; 1757 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 1758 1759 /* 1760 * Ignore the error - there is no result code for failure, 1761 * only for grace period. 1762 */ 1763 result->stat.stat = nlm4_granted; 1764 1765 out: 1766 nlm_release_vfs_state(&vs); 1767 1768 return (host); 1769 } 1770 1771 void 1772 nlm_do_free_all(nlm4_notify *argp) 1773 { 1774 struct nlm_host *host, *thost; 1775 1776 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 1777 if (!strcmp(host->nh_caller_name, argp->name)) 1778 nlm_host_notify(host, argp->state, FALSE); 1779 } 1780 } 1781 1782 #define _PATH_RPCLOCKDSOCK "/var/run/rpclockd.sock" 1783 1784 /* 1785 * Make a connection to the userland lockd - we push anything we can't 1786 * handle out to userland. 1787 */ 1788 CLIENT * 1789 nlm_user_lockd(void) 1790 { 1791 struct sockaddr_un sun; 1792 struct netconfig *nconf; 1793 struct timeval zero; 1794 1795 if (nlm_lockd) 1796 return (nlm_lockd); 1797 1798 sun.sun_family = AF_LOCAL; 1799 strcpy(sun.sun_path, _PATH_RPCLOCKDSOCK); 1800 sun.sun_len = SUN_LEN(&sun); 1801 1802 nconf = getnetconfigent("local"); 1803 nlm_lockd = clnt_reconnect_create(nconf, (struct sockaddr *) &sun, 1804 NLM_PROG, NLM_VERS4, RPC_MAXDATASIZE, RPC_MAXDATASIZE); 1805 1806 /* 1807 * Set the send timeout to zero - we only use this rpc handle 1808 * for sending async replies which have no return value. 1809 */ 1810 zero.tv_sec = 0; 1811 zero.tv_usec = 0; 1812 CLNT_CONTROL(nlm_lockd, CLSET_TIMEOUT, &zero); 1813 1814 return (nlm_lockd); 1815 } 1816 1817 /* 1818 * Kernel module glue 1819 */ 1820 static int 1821 nfslockd_modevent(module_t mod, int type, void *data) 1822 { 1823 1824 return (0); 1825 } 1826 static moduledata_t nfslockd_mod = { 1827 "nfslockd", 1828 nfslockd_modevent, 1829 NULL, 1830 }; 1831 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 1832 1833 /* So that loader and kldload(2) can find us, wherever we are.. */ 1834 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 1835 MODULE_VERSION(nfslockd, 1); 1836