1 /*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet6.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/fcntl.h> 35 #include <sys/kernel.h> 36 #include <sys/lockf.h> 37 #include <sys/malloc.h> 38 #include <sys/mount.h> 39 #if __FreeBSD_version >= 700000 40 #include <sys/priv.h> 41 #endif 42 #include <sys/proc.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/syscall.h> 46 #include <sys/sysctl.h> 47 #include <sys/sysent.h> 48 #include <sys/sysproto.h> 49 #include <sys/systm.h> 50 #include <sys/taskqueue.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 54 #include <nlm/nlm_prot.h> 55 #include <nlm/sm_inter.h> 56 #include <nlm/nlm.h> 57 #include <rpc/rpc_com.h> 58 #include <rpc/rpcb_prot.h> 59 60 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 61 62 /* 63 * If a host is inactive (and holds no locks) for this amount of 64 * seconds, we consider it idle and stop tracking it. 65 */ 66 #define NLM_IDLE_TIMEOUT 30 67 68 /* 69 * We check the host list for idle every few seconds. 70 */ 71 #define NLM_IDLE_PERIOD 5 72 73 /* 74 * Support for sysctl vfs.nlm.sysid 75 */ 76 SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, "Network Lock Manager"); 77 SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, ""); 78 79 /* 80 * Syscall hooks 81 */ 82 static int nlm_syscall_offset = SYS_nlm_syscall; 83 static struct sysent nlm_syscall_prev_sysent; 84 #if __FreeBSD_version < 700000 85 static struct sysent nlm_syscall_sysent = { 86 (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE, 87 (sy_call_t *) nlm_syscall 88 }; 89 #else 90 MAKE_SYSENT(nlm_syscall); 91 #endif 92 static bool_t nlm_syscall_registered = FALSE; 93 94 /* 95 * Debug level passed in from userland. We also support a sysctl hook 96 * so that it can be changed on a live system. 97 */ 98 static int nlm_debug_level; 99 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 100 101 /* 102 * Grace period handling. The value of nlm_grace_threshold is the 103 * value of time_uptime after which we are serving requests normally. 104 */ 105 static time_t nlm_grace_threshold; 106 107 /* 108 * We check for idle hosts if time_uptime is greater than 109 * nlm_next_idle_check, 110 */ 111 static time_t nlm_next_idle_check; 112 113 /* 114 * A socket to use for RPC - shared by all IPv4 RPC clients. 115 */ 116 static struct socket *nlm_socket; 117 118 #ifdef INET6 119 120 /* 121 * A socket to use for RPC - shared by all IPv6 RPC clients. 122 */ 123 static struct socket *nlm_socket6; 124 125 #endif 126 127 /* 128 * An RPC client handle that can be used to communicate with the local 129 * NSM. 130 */ 131 static CLIENT *nlm_nsm; 132 133 /* 134 * An RPC client handle that can be used to communicate with the 135 * userland part of lockd. 136 */ 137 static CLIENT *nlm_lockd; 138 139 /* 140 * Locks: 141 * (l) locked by nh_lock 142 * (s) only accessed via server RPC which is single threaded 143 * (c) const until freeing 144 */ 145 146 /* 147 * A pending asynchronous lock request, stored on the nh_pending list 148 * of the NLM host. 149 */ 150 struct nlm_async_lock { 151 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 152 struct task af_task; /* (c) async callback details */ 153 void *af_cookie; /* (l) lock manager cancel token */ 154 struct vnode *af_vp; /* (l) vnode to lock */ 155 struct flock af_fl; /* (c) lock details */ 156 struct nlm_host *af_host; /* (c) host which is locking */ 157 nlm4_testargs af_granted; /* (c) notification details */ 158 }; 159 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 160 161 /* 162 * NLM host. 163 */ 164 enum nlm_host_state { 165 NLM_UNMONITORED, 166 NLM_MONITORED, 167 NLM_MONITOR_FAILED 168 }; 169 struct nlm_host { 170 struct mtx nh_lock; 171 TAILQ_ENTRY(nlm_host) nh_link; /* (s) global list of hosts */ 172 char *nh_caller_name; /* (c) printable name of host */ 173 uint32_t nh_sysid; /* (c) our allocaed system ID */ 174 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 175 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 176 CLIENT *nh_rpc; /* (s) RPC handle to send to host */ 177 rpcvers_t nh_vers; /* (s) NLM version of host */ 178 int nh_state; /* (s) last seen NSM state of host */ 179 enum nlm_host_state nh_monstate; /* (s) local NSM monitoring state */ 180 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 181 time_t nh_rpc_create_time; /* (s) Time we create RPC client */ 182 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 183 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 184 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 185 }; 186 TAILQ_HEAD(nlm_host_list, nlm_host); 187 188 static struct nlm_host_list nlm_hosts; 189 static uint32_t nlm_next_sysid = 1; 190 191 static void nlm_host_unmonitor(struct nlm_host *); 192 193 /**********************************************************************/ 194 195 /* 196 * Initialise NLM globals. 197 */ 198 static void 199 nlm_init(void *dummy) 200 { 201 int error; 202 203 TAILQ_INIT(&nlm_hosts); 204 205 error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent, 206 &nlm_syscall_prev_sysent); 207 if (error) 208 printf("Can't register NLM syscall\n"); 209 else 210 nlm_syscall_registered = TRUE; 211 } 212 SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL); 213 214 static void 215 nlm_uninit(void *dummy) 216 { 217 218 if (nlm_syscall_registered) 219 syscall_deregister(&nlm_syscall_offset, 220 &nlm_syscall_prev_sysent); 221 } 222 SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL); 223 224 /* 225 * Copy a struct netobj. 226 */ 227 void 228 nlm_copy_netobj(struct netobj *dst, struct netobj *src, 229 struct malloc_type *type) 230 { 231 232 dst->n_len = src->n_len; 233 dst->n_bytes = malloc(src->n_len, type, M_WAITOK); 234 memcpy(dst->n_bytes, src->n_bytes, src->n_len); 235 } 236 237 /* 238 * Create an RPC client handle for the given (address,prog,vers) 239 * triple using UDP. 240 */ 241 static CLIENT * 242 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 243 { 244 const char *wchan = "nlmrcv"; 245 const char* protofmly; 246 struct sockaddr_storage ss; 247 struct socket *so; 248 CLIENT *rpcb; 249 struct timeval timo; 250 RPCB parms; 251 char *uaddr; 252 enum clnt_stat stat; 253 int rpcvers; 254 255 /* 256 * First we need to contact the remote RPCBIND service to find 257 * the right port. 258 */ 259 memcpy(&ss, sa, sa->sa_len); 260 switch (ss.ss_family) { 261 case AF_INET: 262 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 263 protofmly = "inet"; 264 so = nlm_socket; 265 break; 266 267 #ifdef INET6 268 case AF_INET6: 269 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 270 protofmly = "inet6"; 271 so = nlm_socket6; 272 break; 273 #endif 274 275 default: 276 /* 277 * Unsupported address family - fail. 278 */ 279 return (NULL); 280 } 281 282 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 283 RPCBPROG, RPCBVERS4, 0, 0); 284 if (!rpcb) 285 return (NULL); 286 287 parms.r_prog = prog; 288 parms.r_vers = vers; 289 parms.r_netid = "udp"; 290 parms.r_addr = ""; 291 parms.r_owner = ""; 292 293 /* 294 * Use the default timeout. 295 */ 296 timo.tv_sec = 25; 297 timo.tv_usec = 0; 298 again: 299 uaddr = NULL; 300 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 301 (xdrproc_t) xdr_rpcb, &parms, 302 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 303 if (stat == RPC_PROGVERSMISMATCH) { 304 /* 305 * Try RPCBIND version 3 if we haven't already. 306 * 307 * XXX fall back to portmap? 308 */ 309 CLNT_CONTROL(rpcb, CLGET_VERS, &rpcvers); 310 if (rpcvers == RPCBVERS4) { 311 rpcvers = RPCBVERS; 312 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 313 goto again; 314 } 315 } 316 317 if (stat == RPC_SUCCESS) { 318 /* 319 * We have a reply from the remote RPCBIND - turn it into an 320 * appropriate address and make a new client that can talk to 321 * the remote NLM. 322 * 323 * XXX fixup IPv6 scope ID. 324 */ 325 struct netbuf *a; 326 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 327 if (!a) { 328 CLNT_DESTROY(rpcb); 329 return (NULL); 330 } 331 memcpy(&ss, a->buf, a->len); 332 free(a->buf, M_RPC); 333 free(a, M_RPC); 334 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 335 } else if (stat == RPC_PROGVERSMISMATCH) { 336 /* 337 * Try portmap. 338 */ 339 struct pmap mapping; 340 u_short port; 341 342 rpcvers = PMAPVERS; 343 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 344 345 mapping.pm_prog = parms.r_prog; 346 mapping.pm_vers = parms.r_vers; 347 mapping.pm_prot = IPPROTO_UDP; 348 mapping.pm_port = 0; 349 350 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 351 (xdrproc_t) xdr_pmap, &mapping, 352 (xdrproc_t) xdr_u_short, &port, timo); 353 354 if (stat == RPC_SUCCESS) { 355 switch (ss.ss_family) { 356 case AF_INET: 357 ((struct sockaddr_in *)&ss)->sin_port = 358 htons(port); 359 break; 360 361 #ifdef INET6 362 case AF_INET6: 363 ((struct sockaddr_in6 *)&ss)->sin6_port = 364 htons(port); 365 break; 366 #endif 367 } 368 } 369 } 370 if (stat != RPC_SUCCESS) { 371 printf("NLM: failed to contact remote rpcbind, stat = %d\n", 372 (int) stat); 373 CLNT_DESTROY(rpcb); 374 return (NULL); 375 } 376 377 /* 378 * Re-use the client we used to speak to rpcbind. 379 */ 380 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 381 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 382 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 383 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, &wchan); 384 rpcb->cl_auth = authunix_create(curthread->td_ucred); 385 386 return (rpcb); 387 } 388 389 /* 390 * This async callback after when an async lock request has been 391 * granted. We notify the host which initiated the request. 392 */ 393 static void 394 nlm_lock_callback(void *arg, int pending) 395 { 396 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 397 398 if (nlm_debug_level >= 2) 399 printf("NLM: async lock %p for %s (sysid %d) granted\n", 400 af, af->af_host->nh_caller_name, 401 af->af_host->nh_sysid); 402 403 /* 404 * Send the results back to the host. 405 * 406 * Note: there is a possible race here with nlm_host_notify 407 * destroying the RPC client. To avoid problems, the first 408 * thing nlm_host_notify does is to cancel pending async lock 409 * requests. 410 */ 411 if (af->af_host->nh_vers == NLM_VERS4) { 412 nlm4_granted_msg_4(&af->af_granted, 413 NULL, af->af_host->nh_rpc); 414 } else { 415 /* 416 * Back-convert to legacy protocol 417 */ 418 nlm_testargs granted; 419 granted.cookie = af->af_granted.cookie; 420 granted.exclusive = af->af_granted.exclusive; 421 granted.alock.caller_name = 422 af->af_granted.alock.caller_name; 423 granted.alock.fh = af->af_granted.alock.fh; 424 granted.alock.oh = af->af_granted.alock.oh; 425 granted.alock.svid = af->af_granted.alock.svid; 426 granted.alock.l_offset = 427 af->af_granted.alock.l_offset; 428 granted.alock.l_len = 429 af->af_granted.alock.l_len; 430 431 nlm_granted_msg_1(&granted, 432 NULL, af->af_host->nh_rpc); 433 } 434 435 /* 436 * Move this entry to the nh_finished list. Someone else will 437 * free it later - its too hard to do it here safely without 438 * racing with cancel. 439 * 440 * XXX possibly we should have a third "granted sent but not 441 * ack'ed" list so that we can re-send the granted message. 442 */ 443 mtx_lock(&af->af_host->nh_lock); 444 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 445 TAILQ_INSERT_TAIL(&af->af_host->nh_finished, af, af_link); 446 mtx_unlock(&af->af_host->nh_lock); 447 } 448 449 /* 450 * Free an async lock request. The request must have been removed from 451 * any list. 452 */ 453 static void 454 nlm_free_async_lock(struct nlm_async_lock *af) 455 { 456 /* 457 * Free an async lock. 458 */ 459 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 460 if (af->af_vp) 461 vrele(af->af_vp); 462 free(af, M_NLM); 463 } 464 465 /* 466 * Cancel our async request - this must be called with 467 * af->nh_host->nh_lock held. This is slightly complicated by a 468 * potential race with our own callback. If we fail to cancel the 469 * lock, it must already have been granted - we make sure our async 470 * task has completed by calling taskqueue_drain in this case. 471 */ 472 static int 473 nlm_cancel_async_lock(struct nlm_async_lock *af) 474 { 475 struct nlm_host *host = af->af_host; 476 int error; 477 478 mtx_assert(&host->nh_lock, MA_OWNED); 479 480 mtx_unlock(&host->nh_lock); 481 482 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 483 F_REMOTE, NULL, &af->af_cookie); 484 485 if (error) { 486 /* 487 * We failed to cancel - make sure our callback has 488 * completed before we continue. 489 */ 490 taskqueue_drain(taskqueue_thread, &af->af_task); 491 } 492 493 mtx_lock(&host->nh_lock); 494 495 if (!error) { 496 if (nlm_debug_level >= 2) 497 printf("NLM: async lock %p for %s (sysid %d) " 498 "cancelled\n", 499 af, host->nh_caller_name, host->nh_sysid); 500 501 /* 502 * Remove from the nh_pending list and free now that 503 * we are safe from the callback. 504 */ 505 TAILQ_REMOVE(&host->nh_pending, af, af_link); 506 mtx_unlock(&host->nh_lock); 507 nlm_free_async_lock(af); 508 mtx_lock(&host->nh_lock); 509 } 510 511 return (error); 512 } 513 514 static void 515 nlm_free_finished_locks(struct nlm_host *host) 516 { 517 struct nlm_async_lock *af; 518 519 mtx_lock(&host->nh_lock); 520 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 521 TAILQ_REMOVE(&host->nh_finished, af, af_link); 522 mtx_unlock(&host->nh_lock); 523 nlm_free_async_lock(af); 524 mtx_lock(&host->nh_lock); 525 } 526 mtx_unlock(&host->nh_lock); 527 } 528 529 /* 530 * This is called when we receive a host state change 531 * notification. We unlock any active locks owned by the host. 532 */ 533 static void 534 nlm_host_notify(struct nlm_host *host, int newstate, bool_t destroy) 535 { 536 struct nlm_async_lock *af; 537 538 if (newstate) { 539 if (nlm_debug_level >= 1) 540 printf("NLM: host %s (sysid %d) rebooted, new " 541 "state is %d\n", 542 host->nh_caller_name, host->nh_sysid, newstate); 543 } 544 545 /* 546 * Cancel any pending async locks for this host. 547 */ 548 mtx_lock(&host->nh_lock); 549 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 550 /* 551 * nlm_cancel_async_lock will remove the entry from 552 * nh_pending and free it. 553 */ 554 nlm_cancel_async_lock(af); 555 } 556 mtx_unlock(&host->nh_lock); 557 nlm_free_finished_locks(host); 558 559 /* 560 * The host just rebooted - trash its locks and forget any 561 * RPC client handle that we may have for it. 562 */ 563 lf_clearremotesys(host->nh_sysid); 564 if (host->nh_rpc) { 565 AUTH_DESTROY(host->nh_rpc->cl_auth); 566 CLNT_DESTROY(host->nh_rpc); 567 host->nh_rpc = NULL; 568 } 569 host->nh_state = newstate; 570 571 /* 572 * Destroy the host if the caller believes that it won't be 573 * used again. This is safe enough - if we see the same name 574 * again, we will just create a new host. 575 */ 576 if (destroy) { 577 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 578 mtx_destroy(&host->nh_lock); 579 sysctl_ctx_free(&host->nh_sysctl); 580 free(host->nh_caller_name, M_NLM); 581 free(host, M_NLM); 582 } 583 } 584 585 /* 586 * Sysctl handler to count the number of locks for a sysid. 587 */ 588 static int 589 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 590 { 591 struct nlm_host *host; 592 int count; 593 594 host = oidp->oid_arg1; 595 count = lf_countlocks(host->nh_sysid); 596 return sysctl_handle_int(oidp, &count, 0, req); 597 } 598 599 /* 600 * Create a new NLM host. 601 */ 602 static struct nlm_host * 603 nlm_create_host(const char* caller_name) 604 { 605 struct nlm_host *host; 606 struct sysctl_oid *oid; 607 608 if (nlm_debug_level >= 1) 609 printf("NLM: new host %s (sysid %d)\n", 610 caller_name, nlm_next_sysid); 611 host = malloc(sizeof(struct nlm_host), M_NLM, M_WAITOK|M_ZERO); 612 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 613 host->nh_caller_name = strdup(caller_name, M_NLM); 614 host->nh_sysid = nlm_next_sysid++; 615 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 616 "%d", host->nh_sysid); 617 host->nh_rpc = NULL; 618 host->nh_vers = 0; 619 host->nh_state = 0; 620 host->nh_monstate = NLM_UNMONITORED; 621 TAILQ_INIT(&host->nh_pending); 622 TAILQ_INIT(&host->nh_finished); 623 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 624 625 sysctl_ctx_init(&host->nh_sysctl); 626 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 627 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 628 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, ""); 629 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 630 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 631 SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 632 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 633 SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 634 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 635 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 636 "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 637 nlm_host_lock_count_sysctl, "I", ""); 638 639 return (host); 640 } 641 642 /* 643 * Return non-zero if the address parts of the two sockaddrs are the 644 * same. 645 */ 646 static int 647 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 648 { 649 const struct sockaddr_in *a4, *b4; 650 #ifdef INET6 651 const struct sockaddr_in6 *a6, *b6; 652 #endif 653 654 if (a->sa_family != b->sa_family) 655 return (FALSE); 656 657 switch (a->sa_family) { 658 case AF_INET: 659 a4 = (const struct sockaddr_in *) a; 660 b4 = (const struct sockaddr_in *) b; 661 return !memcmp(&a4->sin_addr, &b4->sin_addr, 662 sizeof(a4->sin_addr)); 663 #ifdef INET6 664 case AF_INET6: 665 a6 = (const struct sockaddr_in6 *) a; 666 b6 = (const struct sockaddr_in6 *) b; 667 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 668 sizeof(a6->sin6_addr)); 669 #endif 670 } 671 672 return (0); 673 } 674 675 /* 676 * Check for idle hosts and stop monitoring them. We could also free 677 * the host structure here, possibly after a larger timeout but that 678 * would require some care to avoid races with 679 * e.g. nlm_host_lock_count_sysctl. 680 */ 681 static void 682 nlm_check_idle(void) 683 { 684 struct nlm_host *host; 685 686 if (time_uptime <= nlm_next_idle_check) 687 return; 688 689 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 690 691 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 692 if (host->nh_monstate == NLM_MONITORED 693 && time_uptime > host->nh_idle_timeout) { 694 if (lf_countlocks(host->nh_sysid) > 0) { 695 host->nh_idle_timeout = 696 time_uptime + NLM_IDLE_TIMEOUT; 697 continue; 698 } 699 nlm_host_unmonitor(host); 700 } 701 } 702 } 703 704 /* 705 * Search for an existing NLM host that matches the given name 706 * (typically the caller_name element of an nlm4_lock). If none is 707 * found, create a new host. If 'rqstp' is non-NULL, record the remote 708 * address of the host so that we can call it back for async 709 * responses. 710 */ 711 struct nlm_host * 712 nlm_find_host_by_name(const char *name, struct svc_req *rqstp) 713 { 714 struct nlm_host *host; 715 716 nlm_check_idle(); 717 718 /* 719 * The remote host is determined by caller_name. 720 */ 721 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 722 if (!strcmp(host->nh_caller_name, name)) 723 break; 724 } 725 726 if (!host) 727 host = nlm_create_host(name); 728 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 729 730 /* 731 * If we have an RPC request, record the remote address so 732 * that can send async replies etc. 733 */ 734 if (rqstp) { 735 struct netbuf *addr = &rqstp->rq_xprt->xp_rtaddr; 736 737 KASSERT(addr->len < sizeof(struct sockaddr_storage), 738 ("Strange remote transport address length")); 739 740 /* 741 * If we have seen an address before and we currently 742 * have an RPC client handle, make sure the address is 743 * the same, otherwise discard the client handle. 744 */ 745 if (host->nh_addr.ss_len && host->nh_rpc) { 746 if (!nlm_compare_addr( 747 (struct sockaddr *) &host->nh_addr, 748 (struct sockaddr *) addr->buf) 749 || host->nh_vers != rqstp->rq_vers) { 750 AUTH_DESTROY(host->nh_rpc->cl_auth); 751 CLNT_DESTROY(host->nh_rpc); 752 host->nh_rpc = NULL; 753 } 754 } 755 memcpy(&host->nh_addr, addr->buf, addr->len); 756 host->nh_vers = rqstp->rq_vers; 757 } 758 759 return (host); 760 } 761 762 /* 763 * Search for an existing NLM host that matches the given remote 764 * address. If none is found, create a new host with the requested 765 * address and remember 'vers' as the NLM protocol version to use for 766 * that host. 767 */ 768 struct nlm_host * 769 nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 770 { 771 struct nlm_host *host; 772 773 nlm_check_idle(); 774 775 /* 776 * The remote host is determined by caller_name. 777 */ 778 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 779 if (nlm_compare_addr(addr, 780 (const struct sockaddr *) &host->nh_addr)) 781 break; 782 } 783 784 if (!host) { 785 /* 786 * Fake up a name using inet_ntop. This buffer is 787 * large enough for an IPv6 address. 788 */ 789 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 790 switch (addr->sa_family) { 791 case AF_INET: 792 __rpc_inet_ntop(AF_INET, 793 &((const struct sockaddr_in *) addr)->sin_addr, 794 tmp, sizeof tmp); 795 break; 796 #ifdef INET6 797 case AF_INET6: 798 __rpc_inet_ntop(AF_INET6, 799 &((const struct sockaddr_in6 *) addr)->sin6_addr, 800 tmp, sizeof tmp); 801 break; 802 #endif 803 default: 804 strcmp(tmp, "<unknown>"); 805 } 806 host = nlm_create_host(tmp); 807 memcpy(&host->nh_addr, addr, addr->sa_len); 808 host->nh_vers = vers; 809 } 810 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 811 812 return (host); 813 } 814 815 /* 816 * Find the NLM host that matches the value of 'sysid'. If none 817 * exists, return NULL. 818 */ 819 static struct nlm_host * 820 nlm_find_host_by_sysid(int sysid) 821 { 822 struct nlm_host *host; 823 824 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 825 if (host->nh_sysid == sysid) 826 return (host); 827 } 828 829 return (NULL); 830 } 831 832 /* 833 * Unregister this NLM host with the local NSM due to idleness. 834 */ 835 static void 836 nlm_host_unmonitor(struct nlm_host *host) 837 { 838 mon_id smmonid; 839 sm_stat_res smstat; 840 struct timeval timo; 841 enum clnt_stat stat; 842 843 if (nlm_debug_level >= 1) 844 printf("NLM: unmonitoring %s (sysid %d)\n", 845 host->nh_caller_name, host->nh_sysid); 846 847 /* 848 * We put our assigned system ID value in the priv field to 849 * make it simpler to find the host if we are notified of a 850 * host restart. 851 */ 852 smmonid.mon_name = host->nh_caller_name; 853 smmonid.my_id.my_name = "localhost"; 854 smmonid.my_id.my_prog = NLM_PROG; 855 smmonid.my_id.my_vers = NLM_SM; 856 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 857 858 timo.tv_sec = 25; 859 timo.tv_usec = 0; 860 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 861 (xdrproc_t) xdr_mon, &smmonid, 862 (xdrproc_t) xdr_sm_stat, &smstat, timo); 863 864 if (stat != RPC_SUCCESS) { 865 printf("Failed to contact local NSM - rpc error %d\n", stat); 866 return; 867 } 868 if (smstat.res_stat == stat_fail) { 869 printf("Local NSM refuses to unmonitor %s\n", 870 host->nh_caller_name); 871 return; 872 } 873 874 host->nh_monstate = NLM_UNMONITORED; 875 } 876 877 /* 878 * Register this NLM host with the local NSM so that we can be 879 * notified if it reboots. 880 */ 881 static void 882 nlm_host_monitor(struct nlm_host *host, int state) 883 { 884 mon smmon; 885 sm_stat_res smstat; 886 struct timeval timo; 887 enum clnt_stat stat; 888 889 if (state && !host->nh_state) { 890 /* 891 * This is the first time we have seen an NSM state 892 * value for this host. We record it here to help 893 * detect host reboots. 894 */ 895 host->nh_state = state; 896 if (nlm_debug_level >= 1) 897 printf("NLM: host %s (sysid %d) has NSM state %d\n", 898 host->nh_caller_name, host->nh_sysid, state); 899 } 900 901 if (host->nh_monstate != NLM_UNMONITORED) 902 return; 903 904 if (nlm_debug_level >= 1) 905 printf("NLM: monitoring %s (sysid %d)\n", 906 host->nh_caller_name, host->nh_sysid); 907 908 /* 909 * We put our assigned system ID value in the priv field to 910 * make it simpler to find the host if we are notified of a 911 * host restart. 912 */ 913 smmon.mon_id.mon_name = host->nh_caller_name; 914 smmon.mon_id.my_id.my_name = "localhost"; 915 smmon.mon_id.my_id.my_prog = NLM_PROG; 916 smmon.mon_id.my_id.my_vers = NLM_SM; 917 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 918 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 919 920 timo.tv_sec = 25; 921 timo.tv_usec = 0; 922 stat = CLNT_CALL(nlm_nsm, SM_MON, 923 (xdrproc_t) xdr_mon, &smmon, 924 (xdrproc_t) xdr_sm_stat, &smstat, timo); 925 926 if (stat != RPC_SUCCESS) { 927 printf("Failed to contact local NSM - rpc error %d\n", stat); 928 return; 929 } 930 if (smstat.res_stat == stat_fail) { 931 printf("Local NSM refuses to monitor %s\n", 932 host->nh_caller_name); 933 host->nh_monstate = NLM_MONITOR_FAILED; 934 return; 935 } 936 937 host->nh_monstate = NLM_MONITORED; 938 } 939 940 /* 941 * Return an RPC client handle that can be used to talk to the NLM 942 * running on the given host. 943 */ 944 CLIENT * 945 nlm_host_get_rpc(struct nlm_host *host) 946 { 947 struct timeval zero; 948 949 /* 950 * We can't hold onto RPC handles for too long - the async 951 * call/reply protocol used by some NLM clients makes it hard 952 * to tell when they change port numbers (e.g. after a 953 * reboot). Note that if a client reboots while it isn't 954 * holding any locks, it won't bother to notify us. We 955 * expire the RPC handles after two minutes. 956 */ 957 if (host->nh_rpc && time_uptime > host->nh_rpc_create_time + 2*60) { 958 CLIENT *client; 959 client = host->nh_rpc; 960 host->nh_rpc = NULL; 961 CLNT_DESTROY(client); 962 } 963 964 if (host->nh_rpc) 965 return (host->nh_rpc); 966 967 /* 968 * Set the send timeout to zero - we only use this rpc handle 969 * for sending async replies which have no return value. 970 */ 971 host->nh_rpc = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 972 NLM_PROG, host->nh_vers); 973 974 if (host->nh_rpc) { 975 zero.tv_sec = 0; 976 zero.tv_usec = 0; 977 CLNT_CONTROL(host->nh_rpc, CLSET_TIMEOUT, &zero); 978 979 host->nh_rpc_create_time = time_uptime; 980 } 981 982 return (host->nh_rpc); 983 } 984 985 /**********************************************************************/ 986 987 /* 988 * Syscall interface with userland. 989 */ 990 991 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 992 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 993 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 994 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 995 996 static int 997 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 998 { 999 static rpcvers_t versions[] = { 1000 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1001 }; 1002 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1003 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1004 }; 1005 static const int version_count = sizeof(versions) / sizeof(versions[0]); 1006 1007 SVCXPRT **xprts; 1008 char netid[16]; 1009 char uaddr[128]; 1010 struct netconfig *nconf; 1011 int i, j, error; 1012 1013 if (!addr_count) { 1014 printf("NLM: no service addresses given - can't start server"); 1015 return (EINVAL); 1016 } 1017 1018 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK); 1019 for (i = 0; i < version_count; i++) { 1020 for (j = 0; j < addr_count; j++) { 1021 /* 1022 * Create transports for the first version and 1023 * then just register everything else to the 1024 * same transports. 1025 */ 1026 if (i == 0) { 1027 char *up; 1028 1029 error = copyin(&addrs[2*j], &up, 1030 sizeof(char*)); 1031 if (error) 1032 goto out; 1033 error = copyinstr(up, netid, sizeof(netid), 1034 NULL); 1035 if (error) 1036 goto out; 1037 error = copyin(&addrs[2*j+1], &up, 1038 sizeof(char*)); 1039 if (error) 1040 goto out; 1041 error = copyinstr(up, uaddr, sizeof(uaddr), 1042 NULL); 1043 if (error) 1044 goto out; 1045 nconf = getnetconfigent(netid); 1046 if (!nconf) { 1047 printf("Can't lookup netid %s\n", 1048 netid); 1049 error = EINVAL; 1050 goto out; 1051 } 1052 xprts[j] = svc_tp_create(pool, dispatchers[i], 1053 NLM_PROG, versions[i], uaddr, nconf); 1054 if (!xprts[j]) { 1055 printf("NLM: unable to create " 1056 "(NLM_PROG, %d).\n", versions[i]); 1057 error = EINVAL; 1058 goto out; 1059 } 1060 freenetconfigent(nconf); 1061 } else { 1062 nconf = getnetconfigent(xprts[j]->xp_netid); 1063 rpcb_unset(NLM_PROG, versions[i], nconf); 1064 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1065 dispatchers[i], nconf)) { 1066 printf("NLM: can't register " 1067 "(NLM_PROG, %d)\n", versions[i]); 1068 error = EINVAL; 1069 goto out; 1070 } 1071 } 1072 } 1073 } 1074 error = 0; 1075 out: 1076 free(xprts, M_NLM); 1077 return (error); 1078 } 1079 1080 /* 1081 * Main server entry point. Contacts the local NSM to get its current 1082 * state and send SM_UNMON_ALL. Registers the NLM services and then 1083 * services requests. Does not return until the server is interrupted 1084 * by a signal. 1085 */ 1086 static int 1087 nlm_server_main(int addr_count, char **addrs) 1088 { 1089 struct thread *td = curthread; 1090 int error; 1091 SVCPOOL *pool = NULL; 1092 struct sockopt opt; 1093 int portlow; 1094 #ifdef INET6 1095 struct sockaddr_in6 sin6; 1096 #endif 1097 struct sockaddr_in sin; 1098 my_id id; 1099 sm_stat smstat; 1100 struct timeval timo; 1101 enum clnt_stat stat; 1102 struct nlm_host *host; 1103 1104 if (nlm_socket) { 1105 printf("NLM: can't start server - it appears to be running already\n"); 1106 return (EPERM); 1107 } 1108 1109 memset(&opt, 0, sizeof(opt)); 1110 1111 nlm_socket = NULL; 1112 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1113 td->td_ucred, td); 1114 if (error) { 1115 printf("NLM: can't create IPv4 socket - error %d\n", error); 1116 return (error); 1117 } 1118 opt.sopt_dir = SOPT_SET; 1119 opt.sopt_level = IPPROTO_IP; 1120 opt.sopt_name = IP_PORTRANGE; 1121 portlow = IP_PORTRANGE_LOW; 1122 opt.sopt_val = &portlow; 1123 opt.sopt_valsize = sizeof(portlow); 1124 sosetopt(nlm_socket, &opt); 1125 1126 #ifdef INET6 1127 nlm_socket6 = NULL; 1128 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1129 td->td_ucred, td); 1130 if (error) { 1131 printf("NLM: can't create IPv6 socket - error %d\n", error); 1132 return (error); 1133 } 1134 opt.sopt_dir = SOPT_SET; 1135 opt.sopt_level = IPPROTO_IPV6; 1136 opt.sopt_name = IPV6_PORTRANGE; 1137 portlow = IPV6_PORTRANGE_LOW; 1138 opt.sopt_val = &portlow; 1139 opt.sopt_valsize = sizeof(portlow); 1140 sosetopt(nlm_socket6, &opt); 1141 #endif 1142 1143 #ifdef INET6 1144 memset(&sin6, 0, sizeof(sin6)); 1145 sin6.sin6_len = sizeof(sin6); 1146 sin6.sin6_family = AF_INET6; 1147 sin6.sin6_addr = in6addr_loopback; 1148 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1149 if (!nlm_nsm) { 1150 #endif 1151 memset(&sin, 0, sizeof(sin)); 1152 sin.sin_len = sizeof(sin); 1153 sin.sin_family = AF_INET; 1154 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1155 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1156 SM_VERS); 1157 #ifdef INET6 1158 } 1159 #endif 1160 1161 if (!nlm_nsm) { 1162 printf("Can't start NLM - unable to contact NSM\n"); 1163 error = EINVAL; 1164 goto out; 1165 } 1166 1167 pool = svcpool_create(); 1168 1169 error = nlm_register_services(pool, addr_count, addrs); 1170 if (error) 1171 goto out; 1172 1173 memset(&id, 0, sizeof(id)); 1174 id.my_name = "NFS NLM"; 1175 1176 timo.tv_sec = 25; 1177 timo.tv_usec = 0; 1178 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1179 (xdrproc_t) xdr_my_id, &id, 1180 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1181 1182 if (stat != RPC_SUCCESS) { 1183 struct rpc_err err; 1184 1185 CLNT_GETERR(nlm_nsm, &err); 1186 printf("NLM: unexpected error contacting NSM, stat=%d, errno=%d\n", 1187 stat, err.re_errno); 1188 error = EINVAL; 1189 goto out; 1190 } 1191 1192 if (nlm_debug_level >= 1) 1193 printf("NLM: local NSM state is %d\n", smstat.state); 1194 1195 svc_run(pool); 1196 error = 0; 1197 1198 out: 1199 if (pool) 1200 svcpool_destroy(pool); 1201 1202 /* 1203 * Trash all the existing state so that if the server 1204 * restarts, it gets a clean slate. 1205 */ 1206 while ((host = TAILQ_FIRST(&nlm_hosts)) != NULL) { 1207 nlm_host_notify(host, 0, TRUE); 1208 } 1209 if (nlm_nsm) { 1210 AUTH_DESTROY(nlm_nsm->cl_auth); 1211 CLNT_DESTROY(nlm_nsm); 1212 nlm_nsm = NULL; 1213 } 1214 if (nlm_lockd) { 1215 AUTH_DESTROY(nlm_lockd->cl_auth); 1216 CLNT_DESTROY(nlm_lockd); 1217 nlm_lockd = NULL; 1218 } 1219 1220 soclose(nlm_socket); 1221 nlm_socket = NULL; 1222 #ifdef INET6 1223 soclose(nlm_socket6); 1224 nlm_socket6 = NULL; 1225 #endif 1226 1227 return (error); 1228 } 1229 1230 int 1231 nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1232 { 1233 int error; 1234 1235 #if __FreeBSD_version >= 700000 1236 error = priv_check(td, PRIV_NFS_LOCKD); 1237 #else 1238 error = suser(td); 1239 #endif 1240 if (error) 1241 return (error); 1242 1243 nlm_debug_level = uap->debug_level; 1244 nlm_grace_threshold = time_uptime + uap->grace_period; 1245 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1246 1247 return nlm_server_main(uap->addr_count, uap->addrs); 1248 } 1249 1250 /**********************************************************************/ 1251 1252 /* 1253 * NLM implementation details, called from the RPC stubs. 1254 */ 1255 1256 1257 void 1258 nlm_sm_notify(struct nlm_sm_status *argp) 1259 { 1260 uint32_t sysid; 1261 struct nlm_host *host; 1262 1263 if (nlm_debug_level >= 3) 1264 printf("nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1265 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1266 host = nlm_find_host_by_sysid(sysid); 1267 if (host) 1268 nlm_host_notify(host, argp->state, FALSE); 1269 } 1270 1271 static void 1272 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1273 { 1274 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1275 } 1276 1277 struct vfs_state { 1278 struct mount *vs_mp; 1279 struct vnode *vs_vp; 1280 int vs_vfslocked; 1281 int vs_vnlocked; 1282 }; 1283 1284 static int 1285 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1286 fhandle_t *fhp, struct vfs_state *vs) 1287 { 1288 int error, exflags, freecred; 1289 struct ucred *cred = NULL, *credanon; 1290 1291 memset(vs, 0, sizeof(*vs)); 1292 freecred = FALSE; 1293 1294 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1295 if (!vs->vs_mp) { 1296 return (ESTALE); 1297 } 1298 vs->vs_vfslocked = VFS_LOCK_GIANT(vs->vs_mp); 1299 1300 error = VFS_CHECKEXP(vs->vs_mp, (struct sockaddr *)&host->nh_addr, 1301 &exflags, &credanon); 1302 if (error) 1303 goto out; 1304 1305 if (exflags & MNT_EXRDONLY || (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1306 error = EROFS; 1307 goto out; 1308 } 1309 1310 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, &vs->vs_vp); 1311 if (error) 1312 goto out; 1313 vs->vs_vnlocked = TRUE; 1314 1315 cred = crget(); 1316 freecred = TRUE; 1317 if (!svc_getcred(rqstp, cred, NULL)) { 1318 error = EINVAL; 1319 goto out; 1320 } 1321 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1322 crfree(cred); 1323 cred = credanon; 1324 freecred = FALSE; 1325 } 1326 1327 /* 1328 * Check cred. 1329 */ 1330 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1331 if (error) 1332 goto out; 1333 1334 #if __FreeBSD_version < 800011 1335 VOP_UNLOCK(vs->vs_vp, 0, curthread); 1336 #else 1337 VOP_UNLOCK(vs->vs_vp, 0); 1338 #endif 1339 vs->vs_vnlocked = FALSE; 1340 1341 out: 1342 if (freecred) 1343 crfree(cred); 1344 1345 return (error); 1346 } 1347 1348 static void 1349 nlm_release_vfs_state(struct vfs_state *vs) 1350 { 1351 1352 if (vs->vs_vp) { 1353 if (vs->vs_vnlocked) 1354 vput(vs->vs_vp); 1355 else 1356 vrele(vs->vs_vp); 1357 } 1358 if (vs->vs_mp) 1359 vfs_rel(vs->vs_mp); 1360 VFS_UNLOCK_GIANT(vs->vs_vfslocked); 1361 } 1362 1363 static nlm4_stats 1364 nlm_convert_error(int error) 1365 { 1366 1367 if (error == ESTALE) 1368 return nlm4_stale_fh; 1369 else if (error == EROFS) 1370 return nlm4_rofs; 1371 else 1372 return nlm4_failed; 1373 } 1374 1375 struct nlm_host * 1376 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp) 1377 { 1378 fhandle_t fh; 1379 struct vfs_state vs; 1380 struct nlm_host *host, *bhost; 1381 int error, sysid; 1382 struct flock fl; 1383 1384 memset(result, 0, sizeof(*result)); 1385 1386 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1387 if (!host) { 1388 result->stat.stat = nlm4_denied_nolocks; 1389 return (NULL); 1390 } 1391 1392 if (nlm_debug_level >= 3) 1393 printf("nlm_do_test(): caller_name = %s (sysid = %d)\n", 1394 host->nh_caller_name, host->nh_sysid); 1395 1396 nlm_free_finished_locks(host); 1397 sysid = host->nh_sysid; 1398 1399 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1400 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1401 1402 if (time_uptime < nlm_grace_threshold) { 1403 result->stat.stat = nlm4_denied_grace_period; 1404 return (host); 1405 } 1406 1407 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1408 if (error) { 1409 result->stat.stat = nlm_convert_error(error); 1410 goto out; 1411 } 1412 1413 fl.l_start = argp->alock.l_offset; 1414 fl.l_len = argp->alock.l_len; 1415 fl.l_pid = argp->alock.svid; 1416 fl.l_sysid = sysid; 1417 fl.l_whence = SEEK_SET; 1418 if (argp->exclusive) 1419 fl.l_type = F_WRLCK; 1420 else 1421 fl.l_type = F_RDLCK; 1422 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1423 if (error) { 1424 result->stat.stat = nlm4_failed; 1425 goto out; 1426 } 1427 1428 if (fl.l_type == F_UNLCK) { 1429 result->stat.stat = nlm4_granted; 1430 } else { 1431 result->stat.stat = nlm4_denied; 1432 result->stat.nlm4_testrply_u.holder.exclusive = 1433 (fl.l_type == F_WRLCK); 1434 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1435 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1436 if (bhost) { 1437 /* 1438 * We don't have any useful way of recording 1439 * the value of oh used in the original lock 1440 * request. Ideally, the test reply would have 1441 * a space for the owning host's name allowing 1442 * our caller's NLM to keep track. 1443 * 1444 * As far as I can see, Solaris uses an eight 1445 * byte structure for oh which contains a four 1446 * byte pid encoded in local byte order and 1447 * the first four bytes of the host 1448 * name. Linux uses a variable length string 1449 * 'pid@hostname' in ascii but doesn't even 1450 * return that in test replies. 1451 * 1452 * For the moment, return nothing in oh 1453 * (already zero'ed above). 1454 */ 1455 } 1456 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1457 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1458 } 1459 1460 out: 1461 nlm_release_vfs_state(&vs); 1462 return (host); 1463 } 1464 1465 struct nlm_host * 1466 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1467 bool_t monitor) 1468 { 1469 fhandle_t fh; 1470 struct vfs_state vs; 1471 struct nlm_host *host; 1472 int error, sysid; 1473 struct flock fl; 1474 1475 memset(result, 0, sizeof(*result)); 1476 1477 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1478 if (!host) { 1479 result->stat.stat = nlm4_denied_nolocks; 1480 return (NULL); 1481 } 1482 1483 if (nlm_debug_level >= 3) 1484 printf("nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1485 host->nh_caller_name, host->nh_sysid); 1486 1487 if (monitor && host->nh_state && argp->state 1488 && host->nh_state != argp->state) { 1489 /* 1490 * The host rebooted without telling us. Trash its 1491 * locks. 1492 */ 1493 nlm_host_notify(host, argp->state, FALSE); 1494 } 1495 1496 nlm_free_finished_locks(host); 1497 sysid = host->nh_sysid; 1498 1499 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1500 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1501 1502 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1503 result->stat.stat = nlm4_denied_grace_period; 1504 return (host); 1505 } 1506 1507 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1508 if (error) { 1509 result->stat.stat = nlm_convert_error(error); 1510 goto out; 1511 } 1512 1513 fl.l_start = argp->alock.l_offset; 1514 fl.l_len = argp->alock.l_len; 1515 fl.l_pid = argp->alock.svid; 1516 fl.l_sysid = sysid; 1517 fl.l_whence = SEEK_SET; 1518 if (argp->exclusive) 1519 fl.l_type = F_WRLCK; 1520 else 1521 fl.l_type = F_RDLCK; 1522 if (argp->block) { 1523 struct nlm_async_lock *af; 1524 1525 /* 1526 * First, make sure we can contact the host's NLM. 1527 */ 1528 if (!nlm_host_get_rpc(host)) { 1529 result->stat.stat = nlm4_failed; 1530 goto out; 1531 } 1532 1533 /* 1534 * First we need to check and see if there is an 1535 * existing blocked lock that matches. This could be a 1536 * badly behaved client or an RPC re-send. If we find 1537 * one, just return nlm4_blocked. 1538 */ 1539 mtx_lock(&host->nh_lock); 1540 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 1541 if (af->af_fl.l_start == fl.l_start 1542 && af->af_fl.l_len == fl.l_len 1543 && af->af_fl.l_pid == fl.l_pid 1544 && af->af_fl.l_type == fl.l_type) { 1545 break; 1546 } 1547 } 1548 mtx_unlock(&host->nh_lock); 1549 if (af) { 1550 result->stat.stat = nlm4_blocked; 1551 goto out; 1552 } 1553 1554 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 1555 M_WAITOK|M_ZERO); 1556 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 1557 af->af_vp = vs.vs_vp; 1558 af->af_fl = fl; 1559 af->af_host = host; 1560 /* 1561 * We use M_RPC here so that we can xdr_free the thing 1562 * later. 1563 */ 1564 af->af_granted.exclusive = argp->exclusive; 1565 af->af_granted.alock.caller_name = 1566 strdup(argp->alock.caller_name, M_RPC); 1567 nlm_copy_netobj(&af->af_granted.alock.fh, 1568 &argp->alock.fh, M_RPC); 1569 nlm_copy_netobj(&af->af_granted.alock.oh, 1570 &argp->alock.oh, M_RPC); 1571 af->af_granted.alock.svid = argp->alock.svid; 1572 af->af_granted.alock.l_offset = argp->alock.l_offset; 1573 af->af_granted.alock.l_len = argp->alock.l_len; 1574 1575 /* 1576 * Put the entry on the pending list before calling 1577 * VOP_ADVLOCKASYNC. We do this in case the lock 1578 * request was blocked (returning EINPROGRESS) but 1579 * then granted before we manage to run again. The 1580 * client may receive the granted message before we 1581 * send our blocked reply but thats their problem. 1582 */ 1583 mtx_lock(&host->nh_lock); 1584 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 1585 mtx_unlock(&host->nh_lock); 1586 1587 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 1588 &af->af_task, &af->af_cookie); 1589 1590 /* 1591 * If the lock completed synchronously, just free the 1592 * tracking structure now. 1593 */ 1594 if (error != EINPROGRESS) { 1595 mtx_lock(&host->nh_lock); 1596 TAILQ_REMOVE(&host->nh_pending, af, af_link); 1597 mtx_unlock(&host->nh_lock); 1598 xdr_free((xdrproc_t) xdr_nlm4_testargs, 1599 &af->af_granted); 1600 free(af, M_NLM); 1601 } else { 1602 if (nlm_debug_level >= 2) 1603 printf("NLM: pending async lock %p for %s " 1604 "(sysid %d)\n", 1605 af, host->nh_caller_name, sysid); 1606 /* 1607 * Don't vrele the vnode just yet - this must 1608 * wait until either the async callback 1609 * happens or the lock is cancelled. 1610 */ 1611 vs.vs_vp = NULL; 1612 } 1613 } else { 1614 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 1615 } 1616 1617 if (error) { 1618 if (error == EINPROGRESS) { 1619 result->stat.stat = nlm4_blocked; 1620 } else if (error == EDEADLK) { 1621 result->stat.stat = nlm4_deadlck; 1622 } else if (error == EAGAIN) { 1623 result->stat.stat = nlm4_denied; 1624 } else { 1625 result->stat.stat = nlm4_failed; 1626 } 1627 } else { 1628 if (monitor) 1629 nlm_host_monitor(host, argp->state); 1630 result->stat.stat = nlm4_granted; 1631 } 1632 1633 out: 1634 nlm_release_vfs_state(&vs); 1635 1636 return (host); 1637 } 1638 1639 struct nlm_host * 1640 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp) 1641 { 1642 fhandle_t fh; 1643 struct vfs_state vs; 1644 struct nlm_host *host; 1645 int error, sysid; 1646 struct flock fl; 1647 struct nlm_async_lock *af; 1648 1649 memset(result, 0, sizeof(*result)); 1650 1651 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1652 if (!host) { 1653 result->stat.stat = nlm4_denied_nolocks; 1654 return (NULL); 1655 } 1656 1657 if (nlm_debug_level >= 3) 1658 printf("nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 1659 host->nh_caller_name, host->nh_sysid); 1660 1661 nlm_free_finished_locks(host); 1662 sysid = host->nh_sysid; 1663 1664 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1665 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1666 1667 if (time_uptime < nlm_grace_threshold) { 1668 result->stat.stat = nlm4_denied_grace_period; 1669 return (host); 1670 } 1671 1672 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1673 if (error) { 1674 result->stat.stat = nlm_convert_error(error); 1675 goto out; 1676 } 1677 1678 fl.l_start = argp->alock.l_offset; 1679 fl.l_len = argp->alock.l_len; 1680 fl.l_pid = argp->alock.svid; 1681 fl.l_sysid = sysid; 1682 fl.l_whence = SEEK_SET; 1683 if (argp->exclusive) 1684 fl.l_type = F_WRLCK; 1685 else 1686 fl.l_type = F_RDLCK; 1687 1688 /* 1689 * First we need to try and find the async lock request - if 1690 * there isn't one, we give up and return nlm4_denied. 1691 */ 1692 mtx_lock(&host->nh_lock); 1693 1694 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 1695 if (af->af_fl.l_start == fl.l_start 1696 && af->af_fl.l_len == fl.l_len 1697 && af->af_fl.l_pid == fl.l_pid 1698 && af->af_fl.l_type == fl.l_type) { 1699 break; 1700 } 1701 } 1702 1703 if (!af) { 1704 mtx_unlock(&host->nh_lock); 1705 result->stat.stat = nlm4_denied; 1706 goto out; 1707 } 1708 1709 error = nlm_cancel_async_lock(af); 1710 1711 if (error) { 1712 result->stat.stat = nlm4_denied; 1713 } else { 1714 result->stat.stat = nlm4_granted; 1715 } 1716 1717 mtx_unlock(&host->nh_lock); 1718 1719 out: 1720 nlm_release_vfs_state(&vs); 1721 1722 return (host); 1723 } 1724 1725 struct nlm_host * 1726 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp) 1727 { 1728 fhandle_t fh; 1729 struct vfs_state vs; 1730 struct nlm_host *host; 1731 int error, sysid; 1732 struct flock fl; 1733 1734 memset(result, 0, sizeof(*result)); 1735 1736 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1737 if (!host) { 1738 result->stat.stat = nlm4_denied_nolocks; 1739 return (NULL); 1740 } 1741 1742 if (nlm_debug_level >= 3) 1743 printf("nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 1744 host->nh_caller_name, host->nh_sysid); 1745 1746 nlm_free_finished_locks(host); 1747 sysid = host->nh_sysid; 1748 1749 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1750 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1751 1752 if (time_uptime < nlm_grace_threshold) { 1753 result->stat.stat = nlm4_denied_grace_period; 1754 return (host); 1755 } 1756 1757 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1758 if (error) { 1759 result->stat.stat = nlm_convert_error(error); 1760 goto out; 1761 } 1762 1763 fl.l_start = argp->alock.l_offset; 1764 fl.l_len = argp->alock.l_len; 1765 fl.l_pid = argp->alock.svid; 1766 fl.l_sysid = sysid; 1767 fl.l_whence = SEEK_SET; 1768 fl.l_type = F_UNLCK; 1769 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 1770 1771 /* 1772 * Ignore the error - there is no result code for failure, 1773 * only for grace period. 1774 */ 1775 result->stat.stat = nlm4_granted; 1776 1777 out: 1778 nlm_release_vfs_state(&vs); 1779 1780 return (host); 1781 } 1782 1783 void 1784 nlm_do_free_all(nlm4_notify *argp) 1785 { 1786 struct nlm_host *host, *thost; 1787 1788 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 1789 if (!strcmp(host->nh_caller_name, argp->name)) 1790 nlm_host_notify(host, argp->state, FALSE); 1791 } 1792 } 1793 1794 #define _PATH_RPCLOCKDSOCK "/var/run/rpclockd.sock" 1795 1796 /* 1797 * Make a connection to the userland lockd - we push anything we can't 1798 * handle out to userland. 1799 */ 1800 CLIENT * 1801 nlm_user_lockd(void) 1802 { 1803 struct sockaddr_un sun; 1804 struct netconfig *nconf; 1805 struct timeval zero; 1806 1807 if (nlm_lockd) 1808 return (nlm_lockd); 1809 1810 sun.sun_family = AF_LOCAL; 1811 strcpy(sun.sun_path, _PATH_RPCLOCKDSOCK); 1812 sun.sun_len = SUN_LEN(&sun); 1813 1814 nconf = getnetconfigent("local"); 1815 nlm_lockd = clnt_reconnect_create(nconf, (struct sockaddr *) &sun, 1816 NLM_PROG, NLM_VERS4, RPC_MAXDATASIZE, RPC_MAXDATASIZE); 1817 1818 /* 1819 * Set the send timeout to zero - we only use this rpc handle 1820 * for sending async replies which have no return value. 1821 */ 1822 zero.tv_sec = 0; 1823 zero.tv_usec = 0; 1824 CLNT_CONTROL(nlm_lockd, CLSET_TIMEOUT, &zero); 1825 1826 return (nlm_lockd); 1827 } 1828 1829 /* 1830 * Kernel module glue 1831 */ 1832 static int 1833 nfslockd_modevent(module_t mod, int type, void *data) 1834 { 1835 1836 return (0); 1837 } 1838 static moduledata_t nfslockd_mod = { 1839 "nfslockd", 1840 nfslockd_modevent, 1841 NULL, 1842 }; 1843 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 1844 1845 /* So that loader and kldload(2) can find us, wherever we are.. */ 1846 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 1847 MODULE_VERSION(nfslockd, 1); 1848