1 /*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pjd@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by Pawel Jakub Dawidek under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker.h> 36 #include <sys/module.h> 37 #include <sys/stat.h> 38 #include <sys/wait.h> 39 40 #include <err.h> 41 #include <errno.h> 42 #include <libutil.h> 43 #include <signal.h> 44 #include <stdbool.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <sysexits.h> 49 #include <unistd.h> 50 51 #include <activemap.h> 52 #include <pjdlog.h> 53 54 #include "control.h" 55 #include "event.h" 56 #include "hast.h" 57 #include "hast_proto.h" 58 #include "hastd.h" 59 #include "hooks.h" 60 #include "subr.h" 61 62 /* Path to configuration file. */ 63 const char *cfgpath = HAST_CONFIG; 64 /* Hastd configuration. */ 65 static struct hastd_config *cfg; 66 /* Was SIGINT or SIGTERM signal received? */ 67 bool sigexit_received = false; 68 /* PID file handle. */ 69 struct pidfh *pfh; 70 71 /* How often check for hooks running for too long. */ 72 #define REPORT_INTERVAL 5 73 74 static void 75 usage(void) 76 { 77 78 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]"); 79 } 80 81 static void 82 g_gate_load(void) 83 { 84 85 if (modfind("g_gate") == -1) { 86 /* Not present in kernel, try loading it. */ 87 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { 88 if (errno != EEXIST) { 89 pjdlog_exit(EX_OSERR, 90 "Unable to load geom_gate module"); 91 } 92 } 93 } 94 } 95 96 void 97 descriptors_cleanup(struct hast_resource *res) 98 { 99 struct hast_resource *tres; 100 101 TAILQ_FOREACH(tres, &cfg->hc_resources, hr_next) { 102 if (tres == res) { 103 PJDLOG_VERIFY(res->hr_role == HAST_ROLE_SECONDARY || 104 (res->hr_remotein == NULL && 105 res->hr_remoteout == NULL)); 106 continue; 107 } 108 if (tres->hr_remotein != NULL) 109 proto_close(tres->hr_remotein); 110 if (tres->hr_remoteout != NULL) 111 proto_close(tres->hr_remoteout); 112 if (tres->hr_ctrl != NULL) 113 proto_close(tres->hr_ctrl); 114 if (tres->hr_event != NULL) 115 proto_close(tres->hr_event); 116 if (tres->hr_conn != NULL) 117 proto_close(tres->hr_conn); 118 } 119 if (cfg->hc_controlin != NULL) 120 proto_close(cfg->hc_controlin); 121 proto_close(cfg->hc_controlconn); 122 proto_close(cfg->hc_listenconn); 123 (void)pidfile_close(pfh); 124 hook_fini(); 125 pjdlog_fini(); 126 } 127 128 static const char * 129 dtype2str(mode_t mode) 130 { 131 132 if (S_ISBLK(mode)) 133 return ("block device"); 134 else if (S_ISCHR(mode)) 135 return ("character device"); 136 else if (S_ISDIR(mode)) 137 return ("directory"); 138 else if (S_ISFIFO(mode)) 139 return ("pipe or FIFO"); 140 else if (S_ISLNK(mode)) 141 return ("symbolic link"); 142 else if (S_ISREG(mode)) 143 return ("regular file"); 144 else if (S_ISSOCK(mode)) 145 return ("socket"); 146 else if (S_ISWHT(mode)) 147 return ("whiteout"); 148 else 149 return ("unknown"); 150 } 151 152 void 153 descriptors_assert(const struct hast_resource *res, int pjdlogmode) 154 { 155 char msg[256]; 156 struct stat sb; 157 long maxfd; 158 bool isopen; 159 mode_t mode; 160 int fd; 161 162 /* 163 * At this point descriptor to syslog socket is closed, so if we want 164 * to log assertion message, we have to first store it in 'msg' local 165 * buffer and then open syslog socket and log it. 166 */ 167 msg[0] = '\0'; 168 169 maxfd = sysconf(_SC_OPEN_MAX); 170 if (maxfd < 0) { 171 pjdlog_init(pjdlogmode); 172 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 173 role2str(res->hr_role)); 174 pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed"); 175 pjdlog_fini(); 176 maxfd = 16384; 177 } 178 for (fd = 0; fd <= maxfd; fd++) { 179 if (fstat(fd, &sb) == 0) { 180 isopen = true; 181 mode = sb.st_mode; 182 } else if (errno == EBADF) { 183 isopen = false; 184 mode = 0; 185 } else { 186 (void)snprintf(msg, sizeof(msg), 187 "Unable to fstat descriptor %d: %s", fd, 188 strerror(errno)); 189 break; 190 } 191 if (fd == STDIN_FILENO || fd == STDOUT_FILENO || 192 fd == STDERR_FILENO) { 193 if (!isopen) { 194 (void)snprintf(msg, sizeof(msg), 195 "Descriptor %d (%s) is closed, but should be open.", 196 fd, (fd == STDIN_FILENO ? "stdin" : 197 (fd == STDOUT_FILENO ? "stdout" : "stderr"))); 198 break; 199 } 200 } else if (fd == proto_descriptor(res->hr_event)) { 201 if (!isopen) { 202 (void)snprintf(msg, sizeof(msg), 203 "Descriptor %d (event) is closed, but should be open.", 204 fd); 205 break; 206 } 207 if (!S_ISSOCK(mode)) { 208 (void)snprintf(msg, sizeof(msg), 209 "Descriptor %d (event) is %s, but should be %s.", 210 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 211 break; 212 } 213 } else if (fd == proto_descriptor(res->hr_ctrl)) { 214 if (!isopen) { 215 (void)snprintf(msg, sizeof(msg), 216 "Descriptor %d (ctrl) is closed, but should be open.", 217 fd); 218 break; 219 } 220 if (!S_ISSOCK(mode)) { 221 (void)snprintf(msg, sizeof(msg), 222 "Descriptor %d (ctrl) is %s, but should be %s.", 223 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 224 break; 225 } 226 } else if (fd == proto_descriptor(res->hr_conn)) { 227 if (!isopen) { 228 (void)snprintf(msg, sizeof(msg), 229 "Descriptor %d (conn) is closed, but should be open.", 230 fd); 231 break; 232 } 233 if (!S_ISSOCK(mode)) { 234 (void)snprintf(msg, sizeof(msg), 235 "Descriptor %d (conn) is %s, but should be %s.", 236 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 237 break; 238 } 239 } else if (res->hr_role == HAST_ROLE_SECONDARY && 240 fd == proto_descriptor(res->hr_remotein)) { 241 if (!isopen) { 242 (void)snprintf(msg, sizeof(msg), 243 "Descriptor %d (remote in) is closed, but should be open.", 244 fd); 245 break; 246 } 247 if (!S_ISSOCK(mode)) { 248 (void)snprintf(msg, sizeof(msg), 249 "Descriptor %d (remote in) is %s, but should be %s.", 250 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 251 break; 252 } 253 } else if (res->hr_role == HAST_ROLE_SECONDARY && 254 fd == proto_descriptor(res->hr_remoteout)) { 255 if (!isopen) { 256 (void)snprintf(msg, sizeof(msg), 257 "Descriptor %d (remote out) is closed, but should be open.", 258 fd); 259 break; 260 } 261 if (!S_ISSOCK(mode)) { 262 (void)snprintf(msg, sizeof(msg), 263 "Descriptor %d (remote out) is %s, but should be %s.", 264 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 265 break; 266 } 267 } else { 268 if (isopen) { 269 (void)snprintf(msg, sizeof(msg), 270 "Descriptor %d is open (%s), but should be closed.", 271 fd, dtype2str(mode)); 272 break; 273 } 274 } 275 } 276 if (msg[0] != '\0') { 277 pjdlog_init(pjdlogmode); 278 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 279 role2str(res->hr_role)); 280 PJDLOG_ABORT("%s", msg); 281 } 282 } 283 284 static void 285 child_exit_log(unsigned int pid, int status) 286 { 287 288 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 289 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).", 290 pid); 291 } else if (WIFSIGNALED(status)) { 292 pjdlog_error("Worker process killed (pid=%u, signal=%d).", 293 pid, WTERMSIG(status)); 294 } else { 295 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).", 296 pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1); 297 } 298 } 299 300 static void 301 child_exit(void) 302 { 303 struct hast_resource *res; 304 int status; 305 pid_t pid; 306 307 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) { 308 /* Find resource related to the process that just exited. */ 309 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 310 if (pid == res->hr_workerpid) 311 break; 312 } 313 if (res == NULL) { 314 /* 315 * This can happen when new connection arrives and we 316 * cancel child responsible for the old one or if this 317 * was hook which we executed. 318 */ 319 hook_check_one(pid, status); 320 continue; 321 } 322 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 323 role2str(res->hr_role)); 324 child_exit_log(pid, status); 325 child_cleanup(res); 326 if (res->hr_role == HAST_ROLE_PRIMARY) { 327 /* 328 * Restart child process if it was killed by signal 329 * or exited because of temporary problem. 330 */ 331 if (WIFSIGNALED(status) || 332 (WIFEXITED(status) && 333 WEXITSTATUS(status) == EX_TEMPFAIL)) { 334 sleep(1); 335 pjdlog_info("Restarting worker process."); 336 hastd_primary(res); 337 } else { 338 res->hr_role = HAST_ROLE_INIT; 339 pjdlog_info("Changing resource role back to %s.", 340 role2str(res->hr_role)); 341 } 342 } 343 pjdlog_prefix_set("%s", ""); 344 } 345 } 346 347 static bool 348 resource_needs_restart(const struct hast_resource *res0, 349 const struct hast_resource *res1) 350 { 351 352 PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 353 354 if (strcmp(res0->hr_provname, res1->hr_provname) != 0) 355 return (true); 356 if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0) 357 return (true); 358 if (res0->hr_role == HAST_ROLE_INIT || 359 res0->hr_role == HAST_ROLE_SECONDARY) { 360 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 361 return (true); 362 if (res0->hr_replication != res1->hr_replication) 363 return (true); 364 if (res0->hr_timeout != res1->hr_timeout) 365 return (true); 366 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 367 return (true); 368 } 369 return (false); 370 } 371 372 static bool 373 resource_needs_reload(const struct hast_resource *res0, 374 const struct hast_resource *res1) 375 { 376 377 PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 378 PJDLOG_ASSERT(strcmp(res0->hr_provname, res1->hr_provname) == 0); 379 PJDLOG_ASSERT(strcmp(res0->hr_localpath, res1->hr_localpath) == 0); 380 381 if (res0->hr_role != HAST_ROLE_PRIMARY) 382 return (false); 383 384 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 385 return (true); 386 if (res0->hr_replication != res1->hr_replication) 387 return (true); 388 if (res0->hr_timeout != res1->hr_timeout) 389 return (true); 390 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 391 return (true); 392 return (false); 393 } 394 395 static void 396 resource_reload(const struct hast_resource *res) 397 { 398 struct nv *nvin, *nvout; 399 int error; 400 401 PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 402 403 nvout = nv_alloc(); 404 nv_add_uint8(nvout, HASTCTL_RELOAD, "cmd"); 405 nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr"); 406 nv_add_int32(nvout, (int32_t)res->hr_replication, "replication"); 407 nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout"); 408 nv_add_string(nvout, res->hr_exec, "exec"); 409 if (nv_error(nvout) != 0) { 410 nv_free(nvout); 411 pjdlog_error("Unable to allocate header for reload message."); 412 return; 413 } 414 if (hast_proto_send(res, res->hr_ctrl, nvout, NULL, 0) < 0) { 415 pjdlog_errno(LOG_ERR, "Unable to send reload message"); 416 nv_free(nvout); 417 return; 418 } 419 nv_free(nvout); 420 421 /* Receive response. */ 422 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 423 pjdlog_errno(LOG_ERR, "Unable to receive reload reply"); 424 return; 425 } 426 error = nv_get_int16(nvin, "error"); 427 nv_free(nvin); 428 if (error != 0) { 429 pjdlog_common(LOG_ERR, 0, error, "Reload failed"); 430 return; 431 } 432 } 433 434 static void 435 hastd_reload(void) 436 { 437 struct hastd_config *newcfg; 438 struct hast_resource *nres, *cres, *tres; 439 uint8_t role; 440 441 pjdlog_info("Reloading configuration..."); 442 443 newcfg = yy_config_parse(cfgpath, false); 444 if (newcfg == NULL) 445 goto failed; 446 447 /* 448 * Check if control address has changed. 449 */ 450 if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) { 451 if (proto_server(newcfg->hc_controladdr, 452 &newcfg->hc_controlconn) < 0) { 453 pjdlog_errno(LOG_ERR, 454 "Unable to listen on control address %s", 455 newcfg->hc_controladdr); 456 goto failed; 457 } 458 } 459 /* 460 * Check if listen address has changed. 461 */ 462 if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) { 463 if (proto_server(newcfg->hc_listenaddr, 464 &newcfg->hc_listenconn) < 0) { 465 pjdlog_errno(LOG_ERR, "Unable to listen on address %s", 466 newcfg->hc_listenaddr); 467 goto failed; 468 } 469 } 470 /* 471 * Only when both control and listen sockets are successfully 472 * initialized switch them to new configuration. 473 */ 474 if (newcfg->hc_controlconn != NULL) { 475 pjdlog_info("Control socket changed from %s to %s.", 476 cfg->hc_controladdr, newcfg->hc_controladdr); 477 proto_close(cfg->hc_controlconn); 478 cfg->hc_controlconn = newcfg->hc_controlconn; 479 newcfg->hc_controlconn = NULL; 480 strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr, 481 sizeof(cfg->hc_controladdr)); 482 } 483 if (newcfg->hc_listenconn != NULL) { 484 pjdlog_info("Listen socket changed from %s to %s.", 485 cfg->hc_listenaddr, newcfg->hc_listenaddr); 486 proto_close(cfg->hc_listenconn); 487 cfg->hc_listenconn = newcfg->hc_listenconn; 488 newcfg->hc_listenconn = NULL; 489 strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr, 490 sizeof(cfg->hc_listenaddr)); 491 } 492 493 /* 494 * Stop and remove resources that were removed from the configuration. 495 */ 496 TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) { 497 TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) { 498 if (strcmp(cres->hr_name, nres->hr_name) == 0) 499 break; 500 } 501 if (nres == NULL) { 502 control_set_role(cres, HAST_ROLE_INIT); 503 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 504 pjdlog_info("Resource %s removed.", cres->hr_name); 505 free(cres); 506 } 507 } 508 /* 509 * Move new resources to the current configuration. 510 */ 511 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 512 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 513 if (strcmp(cres->hr_name, nres->hr_name) == 0) 514 break; 515 } 516 if (cres == NULL) { 517 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 518 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 519 pjdlog_info("Resource %s added.", nres->hr_name); 520 } 521 } 522 /* 523 * Deal with modified resources. 524 * Depending on what has changed exactly we might want to perform 525 * different actions. 526 * 527 * We do full resource restart in the following situations: 528 * Resource role is INIT or SECONDARY. 529 * Resource role is PRIMARY and path to local component or provider 530 * name has changed. 531 * In case of PRIMARY, the worker process will be killed and restarted, 532 * which also means removing /dev/hast/<name> provider and 533 * recreating it. 534 * 535 * We do just reload (send SIGHUP to worker process) if we act as 536 * PRIMARY, but only if remote address, replication mode, timeout or 537 * execution path has changed. For those, there is no need to restart 538 * worker process. 539 * If PRIMARY receives SIGHUP, it will reconnect if remote address or 540 * replication mode has changed or simply set new timeout if only 541 * timeout has changed. 542 */ 543 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 544 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 545 if (strcmp(cres->hr_name, nres->hr_name) == 0) 546 break; 547 } 548 PJDLOG_ASSERT(cres != NULL); 549 if (resource_needs_restart(cres, nres)) { 550 pjdlog_info("Resource %s configuration was modified, restarting it.", 551 cres->hr_name); 552 role = cres->hr_role; 553 control_set_role(cres, HAST_ROLE_INIT); 554 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 555 free(cres); 556 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 557 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 558 control_set_role(nres, role); 559 } else if (resource_needs_reload(cres, nres)) { 560 pjdlog_info("Resource %s configuration was modified, reloading it.", 561 cres->hr_name); 562 strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr, 563 sizeof(cres->hr_remoteaddr)); 564 cres->hr_replication = nres->hr_replication; 565 cres->hr_timeout = nres->hr_timeout; 566 strlcpy(cres->hr_exec, nres->hr_exec, 567 sizeof(cres->hr_exec)); 568 if (cres->hr_workerpid != 0) 569 resource_reload(cres); 570 } 571 } 572 573 yy_config_free(newcfg); 574 pjdlog_info("Configuration reloaded successfully."); 575 return; 576 failed: 577 if (newcfg != NULL) { 578 if (newcfg->hc_controlconn != NULL) 579 proto_close(newcfg->hc_controlconn); 580 if (newcfg->hc_listenconn != NULL) 581 proto_close(newcfg->hc_listenconn); 582 yy_config_free(newcfg); 583 } 584 pjdlog_warning("Configuration not reloaded."); 585 } 586 587 static void 588 terminate_workers(void) 589 { 590 struct hast_resource *res; 591 592 pjdlog_info("Termination signal received, exiting."); 593 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 594 if (res->hr_workerpid == 0) 595 continue; 596 pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).", 597 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 598 if (kill(res->hr_workerpid, SIGTERM) == 0) 599 continue; 600 pjdlog_errno(LOG_WARNING, 601 "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).", 602 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 603 } 604 } 605 606 static void 607 listen_accept(void) 608 { 609 struct hast_resource *res; 610 struct proto_conn *conn; 611 struct nv *nvin, *nvout, *nverr; 612 const char *resname; 613 const unsigned char *token; 614 char laddr[256], raddr[256]; 615 size_t size; 616 pid_t pid; 617 int status; 618 619 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr)); 620 pjdlog_debug(1, "Accepting connection to %s.", laddr); 621 622 if (proto_accept(cfg->hc_listenconn, &conn) < 0) { 623 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr); 624 return; 625 } 626 627 proto_local_address(conn, laddr, sizeof(laddr)); 628 proto_remote_address(conn, raddr, sizeof(raddr)); 629 pjdlog_info("Connection from %s to %s.", raddr, laddr); 630 631 /* Error in setting timeout is not critical, but why should it fail? */ 632 if (proto_timeout(conn, HAST_TIMEOUT) < 0) 633 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); 634 635 nvin = nvout = nverr = NULL; 636 637 /* 638 * Before receiving any data see if remote host have access to any 639 * resource. 640 */ 641 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 642 if (proto_address_match(conn, res->hr_remoteaddr)) 643 break; 644 } 645 if (res == NULL) { 646 pjdlog_error("Client %s isn't known.", raddr); 647 goto close; 648 } 649 /* Ok, remote host can access at least one resource. */ 650 651 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 652 pjdlog_errno(LOG_ERR, "Unable to receive header from %s", 653 raddr); 654 goto close; 655 } 656 657 resname = nv_get_string(nvin, "resource"); 658 if (resname == NULL) { 659 pjdlog_error("No 'resource' field in the header received from %s.", 660 raddr); 661 goto close; 662 } 663 pjdlog_debug(2, "%s: resource=%s", raddr, resname); 664 token = nv_get_uint8_array(nvin, &size, "token"); 665 /* 666 * NULL token means that this is first conection. 667 */ 668 if (token != NULL && size != sizeof(res->hr_token)) { 669 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).", 670 raddr, sizeof(res->hr_token), size); 671 goto close; 672 } 673 674 /* 675 * From now on we want to send errors to the remote node. 676 */ 677 nverr = nv_alloc(); 678 679 /* Find resource related to this connection. */ 680 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 681 if (strcmp(resname, res->hr_name) == 0) 682 break; 683 } 684 /* Have we found the resource? */ 685 if (res == NULL) { 686 pjdlog_error("No resource '%s' as requested by %s.", 687 resname, raddr); 688 nv_add_stringf(nverr, "errmsg", "Resource not configured."); 689 goto fail; 690 } 691 692 /* Now that we know resource name setup log prefix. */ 693 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 694 695 /* Does the remote host have access to this resource? */ 696 if (!proto_address_match(conn, res->hr_remoteaddr)) { 697 pjdlog_error("Client %s has no access to the resource.", raddr); 698 nv_add_stringf(nverr, "errmsg", "No access to the resource."); 699 goto fail; 700 } 701 /* Is the resource marked as secondary? */ 702 if (res->hr_role != HAST_ROLE_SECONDARY) { 703 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.", 704 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY), 705 raddr); 706 nv_add_stringf(nverr, "errmsg", 707 "Remote node acts as %s for the resource and not as %s.", 708 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); 709 goto fail; 710 } 711 /* Does token (if exists) match? */ 712 if (token != NULL && memcmp(token, res->hr_token, 713 sizeof(res->hr_token)) != 0) { 714 pjdlog_error("Token received from %s doesn't match.", raddr); 715 nv_add_stringf(nverr, "errmsg", "Token doesn't match."); 716 goto fail; 717 } 718 /* 719 * If there is no token, but we have half-open connection 720 * (only remotein) or full connection (worker process is running) 721 * we have to cancel those and accept the new connection. 722 */ 723 if (token == NULL) { 724 PJDLOG_ASSERT(res->hr_remoteout == NULL); 725 pjdlog_debug(1, "Initial connection from %s.", raddr); 726 if (res->hr_workerpid != 0) { 727 PJDLOG_ASSERT(res->hr_remotein == NULL); 728 pjdlog_debug(1, 729 "Worker process exists (pid=%u), stopping it.", 730 (unsigned int)res->hr_workerpid); 731 /* Stop child process. */ 732 if (kill(res->hr_workerpid, SIGINT) < 0) { 733 pjdlog_errno(LOG_ERR, 734 "Unable to stop worker process (pid=%u)", 735 (unsigned int)res->hr_workerpid); 736 /* 737 * Other than logging the problem we 738 * ignore it - nothing smart to do. 739 */ 740 } 741 /* Wait for it to exit. */ 742 else if ((pid = waitpid(res->hr_workerpid, 743 &status, 0)) != res->hr_workerpid) { 744 /* We can only log the problem. */ 745 pjdlog_errno(LOG_ERR, 746 "Waiting for worker process (pid=%u) failed", 747 (unsigned int)res->hr_workerpid); 748 } else { 749 child_exit_log(res->hr_workerpid, status); 750 } 751 child_cleanup(res); 752 } else if (res->hr_remotein != NULL) { 753 char oaddr[256]; 754 755 proto_remote_address(res->hr_remotein, oaddr, 756 sizeof(oaddr)); 757 pjdlog_debug(1, 758 "Canceling half-open connection from %s on connection from %s.", 759 oaddr, raddr); 760 proto_close(res->hr_remotein); 761 res->hr_remotein = NULL; 762 } 763 } 764 765 /* 766 * Checks and cleanups are done. 767 */ 768 769 if (token == NULL) { 770 arc4random_buf(res->hr_token, sizeof(res->hr_token)); 771 nvout = nv_alloc(); 772 nv_add_uint8_array(nvout, res->hr_token, 773 sizeof(res->hr_token), "token"); 774 if (nv_error(nvout) != 0) { 775 pjdlog_common(LOG_ERR, 0, nv_error(nvout), 776 "Unable to prepare return header for %s", raddr); 777 nv_add_stringf(nverr, "errmsg", 778 "Remote node was unable to prepare return header: %s.", 779 strerror(nv_error(nvout))); 780 goto fail; 781 } 782 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) { 783 int error = errno; 784 785 pjdlog_errno(LOG_ERR, "Unable to send response to %s", 786 raddr); 787 nv_add_stringf(nverr, "errmsg", 788 "Remote node was unable to send response: %s.", 789 strerror(error)); 790 goto fail; 791 } 792 res->hr_remotein = conn; 793 pjdlog_debug(1, "Incoming connection from %s configured.", 794 raddr); 795 } else { 796 res->hr_remoteout = conn; 797 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr); 798 hastd_secondary(res, nvin); 799 } 800 nv_free(nvin); 801 nv_free(nvout); 802 nv_free(nverr); 803 pjdlog_prefix_set("%s", ""); 804 return; 805 fail: 806 if (nv_error(nverr) != 0) { 807 pjdlog_common(LOG_ERR, 0, nv_error(nverr), 808 "Unable to prepare error header for %s", raddr); 809 goto close; 810 } 811 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) { 812 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr); 813 goto close; 814 } 815 close: 816 if (nvin != NULL) 817 nv_free(nvin); 818 if (nvout != NULL) 819 nv_free(nvout); 820 if (nverr != NULL) 821 nv_free(nverr); 822 proto_close(conn); 823 pjdlog_prefix_set("%s", ""); 824 } 825 826 static void 827 connection_migrate(struct hast_resource *res) 828 { 829 struct proto_conn *conn; 830 int16_t val = 0; 831 832 if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) { 833 pjdlog_errno(LOG_WARNING, 834 "Unable to receive connection command"); 835 return; 836 } 837 if (proto_client(res->hr_remoteaddr, &conn) < 0) { 838 val = errno; 839 pjdlog_errno(LOG_WARNING, 840 "Unable to create outgoing connection to %s", 841 res->hr_remoteaddr); 842 goto out; 843 } 844 if (proto_connect(conn, -1) < 0) { 845 val = errno; 846 pjdlog_errno(LOG_WARNING, "Unable to connect to %s", 847 res->hr_remoteaddr); 848 proto_close(conn); 849 goto out; 850 } 851 val = 0; 852 out: 853 if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) { 854 pjdlog_errno(LOG_WARNING, 855 "Unable to send reply to connection request"); 856 } 857 if (val == 0 && proto_connection_send(res->hr_conn, conn) < 0) 858 pjdlog_errno(LOG_WARNING, "Unable to send connection"); 859 } 860 861 static void 862 main_loop(void) 863 { 864 struct hast_resource *res; 865 struct timeval seltimeout; 866 struct timespec sigtimeout; 867 int fd, maxfd, ret, signo; 868 sigset_t mask; 869 fd_set rfds; 870 871 seltimeout.tv_sec = REPORT_INTERVAL; 872 seltimeout.tv_usec = 0; 873 sigtimeout.tv_sec = 0; 874 sigtimeout.tv_nsec = 0; 875 876 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 877 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 878 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 879 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 880 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 881 882 pjdlog_info("Started successfully, running protocol version %d.", 883 HAST_PROTO_VERSION); 884 885 for (;;) { 886 while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) { 887 switch (signo) { 888 case SIGINT: 889 case SIGTERM: 890 sigexit_received = true; 891 terminate_workers(); 892 proto_close(cfg->hc_controlconn); 893 exit(EX_OK); 894 break; 895 case SIGCHLD: 896 child_exit(); 897 break; 898 case SIGHUP: 899 hastd_reload(); 900 break; 901 default: 902 PJDLOG_ABORT("Unexpected signal (%d).", signo); 903 } 904 } 905 906 /* Setup descriptors for select(2). */ 907 FD_ZERO(&rfds); 908 maxfd = fd = proto_descriptor(cfg->hc_controlconn); 909 PJDLOG_ASSERT(fd >= 0); 910 FD_SET(fd, &rfds); 911 fd = proto_descriptor(cfg->hc_listenconn); 912 PJDLOG_ASSERT(fd >= 0); 913 FD_SET(fd, &rfds); 914 maxfd = fd > maxfd ? fd : maxfd; 915 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 916 if (res->hr_event == NULL) 917 continue; 918 PJDLOG_ASSERT(res->hr_conn != NULL); 919 fd = proto_descriptor(res->hr_event); 920 PJDLOG_ASSERT(fd >= 0); 921 FD_SET(fd, &rfds); 922 maxfd = fd > maxfd ? fd : maxfd; 923 if (res->hr_role == HAST_ROLE_PRIMARY) { 924 /* Only primary workers asks for connections. */ 925 fd = proto_descriptor(res->hr_conn); 926 PJDLOG_ASSERT(fd >= 0); 927 FD_SET(fd, &rfds); 928 maxfd = fd > maxfd ? fd : maxfd; 929 } 930 } 931 932 PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE); 933 ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout); 934 if (ret == 0) 935 hook_check(); 936 else if (ret == -1) { 937 if (errno == EINTR) 938 continue; 939 KEEP_ERRNO((void)pidfile_remove(pfh)); 940 pjdlog_exit(EX_OSERR, "select() failed"); 941 } 942 943 if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds)) 944 control_handle(cfg); 945 if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds)) 946 listen_accept(); 947 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 948 if (res->hr_event == NULL) 949 continue; 950 PJDLOG_ASSERT(res->hr_conn != NULL); 951 if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) { 952 if (event_recv(res) == 0) 953 continue; 954 /* The worker process exited? */ 955 proto_close(res->hr_event); 956 res->hr_event = NULL; 957 proto_close(res->hr_conn); 958 res->hr_conn = NULL; 959 continue; 960 } 961 if (res->hr_role == HAST_ROLE_PRIMARY && 962 FD_ISSET(proto_descriptor(res->hr_conn), &rfds)) { 963 connection_migrate(res); 964 } 965 } 966 } 967 } 968 969 static void 970 dummy_sighandler(int sig __unused) 971 { 972 /* Nothing to do. */ 973 } 974 975 int 976 main(int argc, char *argv[]) 977 { 978 const char *pidfile; 979 pid_t otherpid; 980 bool foreground; 981 int debuglevel; 982 sigset_t mask; 983 984 foreground = false; 985 debuglevel = 0; 986 pidfile = HASTD_PIDFILE; 987 988 for (;;) { 989 int ch; 990 991 ch = getopt(argc, argv, "c:dFhP:"); 992 if (ch == -1) 993 break; 994 switch (ch) { 995 case 'c': 996 cfgpath = optarg; 997 break; 998 case 'd': 999 debuglevel++; 1000 break; 1001 case 'F': 1002 foreground = true; 1003 break; 1004 case 'P': 1005 pidfile = optarg; 1006 break; 1007 case 'h': 1008 default: 1009 usage(); 1010 } 1011 } 1012 argc -= optind; 1013 argv += optind; 1014 1015 pjdlog_init(PJDLOG_MODE_STD); 1016 pjdlog_debug_set(debuglevel); 1017 1018 g_gate_load(); 1019 1020 pfh = pidfile_open(pidfile, 0600, &otherpid); 1021 if (pfh == NULL) { 1022 if (errno == EEXIST) { 1023 pjdlog_exitx(EX_TEMPFAIL, 1024 "Another hastd is already running, pid: %jd.", 1025 (intmax_t)otherpid); 1026 } 1027 /* If we cannot create pidfile from other reasons, only warn. */ 1028 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile"); 1029 } 1030 1031 cfg = yy_config_parse(cfgpath, true); 1032 PJDLOG_ASSERT(cfg != NULL); 1033 1034 /* 1035 * Restore default actions for interesting signals in case parent 1036 * process (like init(8)) decided to ignore some of them (like SIGHUP). 1037 */ 1038 PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR); 1039 PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR); 1040 PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR); 1041 /* 1042 * Because SIGCHLD is ignored by default, setup dummy handler for it, 1043 * so we can mask it. 1044 */ 1045 PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR); 1046 1047 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 1048 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 1049 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 1050 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 1051 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 1052 PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); 1053 1054 /* Listen on control address. */ 1055 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) { 1056 KEEP_ERRNO((void)pidfile_remove(pfh)); 1057 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s", 1058 cfg->hc_controladdr); 1059 } 1060 /* Listen for remote connections. */ 1061 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) { 1062 KEEP_ERRNO((void)pidfile_remove(pfh)); 1063 pjdlog_exit(EX_OSERR, "Unable to listen on address %s", 1064 cfg->hc_listenaddr); 1065 } 1066 1067 if (!foreground) { 1068 if (daemon(0, 0) < 0) { 1069 KEEP_ERRNO((void)pidfile_remove(pfh)); 1070 pjdlog_exit(EX_OSERR, "Unable to daemonize"); 1071 } 1072 1073 /* Start logging to syslog. */ 1074 pjdlog_mode_set(PJDLOG_MODE_SYSLOG); 1075 1076 /* Write PID to a file. */ 1077 if (pidfile_write(pfh) < 0) { 1078 pjdlog_errno(LOG_WARNING, 1079 "Unable to write PID to a file"); 1080 } 1081 } 1082 1083 hook_init(); 1084 1085 main_loop(); 1086 1087 exit(0); 1088 } 1089