1 /*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> 4 * All rights reserved. 5 * 6 * This software was developed by Pawel Jakub Dawidek under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker.h> 36 #include <sys/module.h> 37 #include <sys/stat.h> 38 #include <sys/wait.h> 39 40 #include <err.h> 41 #include <errno.h> 42 #include <libutil.h> 43 #include <signal.h> 44 #include <stdbool.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <sysexits.h> 49 #include <time.h> 50 #include <unistd.h> 51 52 #include <activemap.h> 53 #include <pjdlog.h> 54 55 #include "control.h" 56 #include "event.h" 57 #include "hast.h" 58 #include "hast_proto.h" 59 #include "hastd.h" 60 #include "hooks.h" 61 #include "subr.h" 62 63 /* Path to configuration file. */ 64 const char *cfgpath = HAST_CONFIG; 65 /* Hastd configuration. */ 66 static struct hastd_config *cfg; 67 /* Was SIGINT or SIGTERM signal received? */ 68 bool sigexit_received = false; 69 /* PID file handle. */ 70 struct pidfh *pfh; 71 72 /* How often check for hooks running for too long. */ 73 #define REPORT_INTERVAL 5 74 75 static void 76 usage(void) 77 { 78 79 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]"); 80 } 81 82 static void 83 g_gate_load(void) 84 { 85 86 if (modfind("g_gate") == -1) { 87 /* Not present in kernel, try loading it. */ 88 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { 89 if (errno != EEXIST) { 90 pjdlog_exit(EX_OSERR, 91 "Unable to load geom_gate module"); 92 } 93 } 94 } 95 } 96 97 void 98 descriptors_cleanup(struct hast_resource *res) 99 { 100 struct hast_resource *tres; 101 struct hastd_listen *lst; 102 103 TAILQ_FOREACH(tres, &cfg->hc_resources, hr_next) { 104 if (tres == res) { 105 PJDLOG_VERIFY(res->hr_role == HAST_ROLE_SECONDARY || 106 (res->hr_remotein == NULL && 107 res->hr_remoteout == NULL)); 108 continue; 109 } 110 if (tres->hr_remotein != NULL) 111 proto_close(tres->hr_remotein); 112 if (tres->hr_remoteout != NULL) 113 proto_close(tres->hr_remoteout); 114 if (tres->hr_ctrl != NULL) 115 proto_close(tres->hr_ctrl); 116 if (tres->hr_event != NULL) 117 proto_close(tres->hr_event); 118 if (tres->hr_conn != NULL) 119 proto_close(tres->hr_conn); 120 } 121 if (cfg->hc_controlin != NULL) 122 proto_close(cfg->hc_controlin); 123 proto_close(cfg->hc_controlconn); 124 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) { 125 if (lst->hl_conn != NULL) 126 proto_close(lst->hl_conn); 127 } 128 (void)pidfile_close(pfh); 129 hook_fini(); 130 pjdlog_fini(); 131 } 132 133 static const char * 134 dtype2str(mode_t mode) 135 { 136 137 if (S_ISBLK(mode)) 138 return ("block device"); 139 else if (S_ISCHR(mode)) 140 return ("character device"); 141 else if (S_ISDIR(mode)) 142 return ("directory"); 143 else if (S_ISFIFO(mode)) 144 return ("pipe or FIFO"); 145 else if (S_ISLNK(mode)) 146 return ("symbolic link"); 147 else if (S_ISREG(mode)) 148 return ("regular file"); 149 else if (S_ISSOCK(mode)) 150 return ("socket"); 151 else if (S_ISWHT(mode)) 152 return ("whiteout"); 153 else 154 return ("unknown"); 155 } 156 157 void 158 descriptors_assert(const struct hast_resource *res, int pjdlogmode) 159 { 160 char msg[256]; 161 struct stat sb; 162 long maxfd; 163 bool isopen; 164 mode_t mode; 165 int fd; 166 167 /* 168 * At this point descriptor to syslog socket is closed, so if we want 169 * to log assertion message, we have to first store it in 'msg' local 170 * buffer and then open syslog socket and log it. 171 */ 172 msg[0] = '\0'; 173 174 maxfd = sysconf(_SC_OPEN_MAX); 175 if (maxfd < 0) { 176 pjdlog_init(pjdlogmode); 177 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 178 role2str(res->hr_role)); 179 pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed"); 180 pjdlog_fini(); 181 maxfd = 16384; 182 } 183 for (fd = 0; fd <= maxfd; fd++) { 184 if (fstat(fd, &sb) == 0) { 185 isopen = true; 186 mode = sb.st_mode; 187 } else if (errno == EBADF) { 188 isopen = false; 189 mode = 0; 190 } else { 191 (void)snprintf(msg, sizeof(msg), 192 "Unable to fstat descriptor %d: %s", fd, 193 strerror(errno)); 194 break; 195 } 196 if (fd == STDIN_FILENO || fd == STDOUT_FILENO || 197 fd == STDERR_FILENO) { 198 if (!isopen) { 199 (void)snprintf(msg, sizeof(msg), 200 "Descriptor %d (%s) is closed, but should be open.", 201 fd, (fd == STDIN_FILENO ? "stdin" : 202 (fd == STDOUT_FILENO ? "stdout" : "stderr"))); 203 break; 204 } 205 } else if (fd == proto_descriptor(res->hr_event)) { 206 if (!isopen) { 207 (void)snprintf(msg, sizeof(msg), 208 "Descriptor %d (event) is closed, but should be open.", 209 fd); 210 break; 211 } 212 if (!S_ISSOCK(mode)) { 213 (void)snprintf(msg, sizeof(msg), 214 "Descriptor %d (event) is %s, but should be %s.", 215 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 216 break; 217 } 218 } else if (fd == proto_descriptor(res->hr_ctrl)) { 219 if (!isopen) { 220 (void)snprintf(msg, sizeof(msg), 221 "Descriptor %d (ctrl) is closed, but should be open.", 222 fd); 223 break; 224 } 225 if (!S_ISSOCK(mode)) { 226 (void)snprintf(msg, sizeof(msg), 227 "Descriptor %d (ctrl) is %s, but should be %s.", 228 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 229 break; 230 } 231 } else if (res->hr_role == HAST_ROLE_PRIMARY && 232 fd == proto_descriptor(res->hr_conn)) { 233 if (!isopen) { 234 (void)snprintf(msg, sizeof(msg), 235 "Descriptor %d (conn) is closed, but should be open.", 236 fd); 237 break; 238 } 239 if (!S_ISSOCK(mode)) { 240 (void)snprintf(msg, sizeof(msg), 241 "Descriptor %d (conn) is %s, but should be %s.", 242 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 243 break; 244 } 245 } else if (res->hr_role == HAST_ROLE_SECONDARY && 246 res->hr_conn != NULL && 247 fd == proto_descriptor(res->hr_conn)) { 248 if (isopen) { 249 (void)snprintf(msg, sizeof(msg), 250 "Descriptor %d (conn) is open, but should be closed.", 251 fd); 252 break; 253 } 254 } else if (res->hr_role == HAST_ROLE_SECONDARY && 255 fd == proto_descriptor(res->hr_remotein)) { 256 if (!isopen) { 257 (void)snprintf(msg, sizeof(msg), 258 "Descriptor %d (remote in) is closed, but should be open.", 259 fd); 260 break; 261 } 262 if (!S_ISSOCK(mode)) { 263 (void)snprintf(msg, sizeof(msg), 264 "Descriptor %d (remote in) is %s, but should be %s.", 265 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 266 break; 267 } 268 } else if (res->hr_role == HAST_ROLE_SECONDARY && 269 fd == proto_descriptor(res->hr_remoteout)) { 270 if (!isopen) { 271 (void)snprintf(msg, sizeof(msg), 272 "Descriptor %d (remote out) is closed, but should be open.", 273 fd); 274 break; 275 } 276 if (!S_ISSOCK(mode)) { 277 (void)snprintf(msg, sizeof(msg), 278 "Descriptor %d (remote out) is %s, but should be %s.", 279 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 280 break; 281 } 282 } else { 283 if (isopen) { 284 (void)snprintf(msg, sizeof(msg), 285 "Descriptor %d is open (%s), but should be closed.", 286 fd, dtype2str(mode)); 287 break; 288 } 289 } 290 } 291 if (msg[0] != '\0') { 292 pjdlog_init(pjdlogmode); 293 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 294 role2str(res->hr_role)); 295 PJDLOG_ABORT("%s", msg); 296 } 297 } 298 299 static void 300 child_exit_log(unsigned int pid, int status) 301 { 302 303 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 304 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).", 305 pid); 306 } else if (WIFSIGNALED(status)) { 307 pjdlog_error("Worker process killed (pid=%u, signal=%d).", 308 pid, WTERMSIG(status)); 309 } else { 310 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).", 311 pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1); 312 } 313 } 314 315 static void 316 child_exit(void) 317 { 318 struct hast_resource *res; 319 int status; 320 pid_t pid; 321 322 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) { 323 /* Find resource related to the process that just exited. */ 324 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 325 if (pid == res->hr_workerpid) 326 break; 327 } 328 if (res == NULL) { 329 /* 330 * This can happen when new connection arrives and we 331 * cancel child responsible for the old one or if this 332 * was hook which we executed. 333 */ 334 hook_check_one(pid, status); 335 continue; 336 } 337 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 338 role2str(res->hr_role)); 339 child_exit_log(pid, status); 340 child_cleanup(res); 341 if (res->hr_role == HAST_ROLE_PRIMARY) { 342 /* 343 * Restart child process if it was killed by signal 344 * or exited because of temporary problem. 345 */ 346 if (WIFSIGNALED(status) || 347 (WIFEXITED(status) && 348 WEXITSTATUS(status) == EX_TEMPFAIL)) { 349 sleep(1); 350 pjdlog_info("Restarting worker process."); 351 hastd_primary(res); 352 } else { 353 res->hr_role = HAST_ROLE_INIT; 354 pjdlog_info("Changing resource role back to %s.", 355 role2str(res->hr_role)); 356 } 357 } 358 pjdlog_prefix_set("%s", ""); 359 } 360 } 361 362 static bool 363 resource_needs_restart(const struct hast_resource *res0, 364 const struct hast_resource *res1) 365 { 366 367 PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 368 369 if (strcmp(res0->hr_provname, res1->hr_provname) != 0) 370 return (true); 371 if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0) 372 return (true); 373 if (res0->hr_role == HAST_ROLE_INIT || 374 res0->hr_role == HAST_ROLE_SECONDARY) { 375 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 376 return (true); 377 if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0) 378 return (true); 379 if (res0->hr_replication != res1->hr_replication) 380 return (true); 381 if (res0->hr_checksum != res1->hr_checksum) 382 return (true); 383 if (res0->hr_compression != res1->hr_compression) 384 return (true); 385 if (res0->hr_timeout != res1->hr_timeout) 386 return (true); 387 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 388 return (true); 389 } 390 return (false); 391 } 392 393 static bool 394 resource_needs_reload(const struct hast_resource *res0, 395 const struct hast_resource *res1) 396 { 397 398 PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 399 PJDLOG_ASSERT(strcmp(res0->hr_provname, res1->hr_provname) == 0); 400 PJDLOG_ASSERT(strcmp(res0->hr_localpath, res1->hr_localpath) == 0); 401 402 if (res0->hr_role != HAST_ROLE_PRIMARY) 403 return (false); 404 405 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 406 return (true); 407 if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0) 408 return (true); 409 if (res0->hr_replication != res1->hr_replication) 410 return (true); 411 if (res0->hr_checksum != res1->hr_checksum) 412 return (true); 413 if (res0->hr_compression != res1->hr_compression) 414 return (true); 415 if (res0->hr_timeout != res1->hr_timeout) 416 return (true); 417 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 418 return (true); 419 return (false); 420 } 421 422 static void 423 resource_reload(const struct hast_resource *res) 424 { 425 struct nv *nvin, *nvout; 426 int error; 427 428 PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 429 430 nvout = nv_alloc(); 431 nv_add_uint8(nvout, CONTROL_RELOAD, "cmd"); 432 nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr"); 433 nv_add_string(nvout, res->hr_sourceaddr, "sourceaddr"); 434 nv_add_int32(nvout, (int32_t)res->hr_replication, "replication"); 435 nv_add_int32(nvout, (int32_t)res->hr_checksum, "checksum"); 436 nv_add_int32(nvout, (int32_t)res->hr_compression, "compression"); 437 nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout"); 438 nv_add_string(nvout, res->hr_exec, "exec"); 439 if (nv_error(nvout) != 0) { 440 nv_free(nvout); 441 pjdlog_error("Unable to allocate header for reload message."); 442 return; 443 } 444 if (hast_proto_send(res, res->hr_ctrl, nvout, NULL, 0) < 0) { 445 pjdlog_errno(LOG_ERR, "Unable to send reload message"); 446 nv_free(nvout); 447 return; 448 } 449 nv_free(nvout); 450 451 /* Receive response. */ 452 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 453 pjdlog_errno(LOG_ERR, "Unable to receive reload reply"); 454 return; 455 } 456 error = nv_get_int16(nvin, "error"); 457 nv_free(nvin); 458 if (error != 0) { 459 pjdlog_common(LOG_ERR, 0, error, "Reload failed"); 460 return; 461 } 462 } 463 464 static void 465 hastd_reload(void) 466 { 467 struct hastd_config *newcfg; 468 struct hast_resource *nres, *cres, *tres; 469 struct hastd_listen *nlst, *clst; 470 unsigned int nlisten; 471 uint8_t role; 472 473 pjdlog_info("Reloading configuration..."); 474 475 newcfg = yy_config_parse(cfgpath, false); 476 if (newcfg == NULL) 477 goto failed; 478 479 /* 480 * Check if control address has changed. 481 */ 482 if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) { 483 if (proto_server(newcfg->hc_controladdr, 484 &newcfg->hc_controlconn) < 0) { 485 pjdlog_errno(LOG_ERR, 486 "Unable to listen on control address %s", 487 newcfg->hc_controladdr); 488 goto failed; 489 } 490 } 491 /* 492 * Check if any listen address has changed. 493 */ 494 nlisten = 0; 495 TAILQ_FOREACH(nlst, &newcfg->hc_listen, hl_next) { 496 TAILQ_FOREACH(clst, &cfg->hc_listen, hl_next) { 497 if (strcmp(nlst->hl_addr, clst->hl_addr) == 0) 498 break; 499 } 500 if (clst != NULL && clst->hl_conn != NULL) { 501 pjdlog_info("Keep listening on address %s.", 502 nlst->hl_addr); 503 nlst->hl_conn = clst->hl_conn; 504 nlisten++; 505 } else if (proto_server(nlst->hl_addr, &nlst->hl_conn) == 0) { 506 pjdlog_info("Listening on new address %s.", 507 nlst->hl_addr); 508 nlisten++; 509 } else { 510 pjdlog_errno(LOG_WARNING, 511 "Unable to listen on address %s", nlst->hl_addr); 512 } 513 } 514 if (nlisten == 0) { 515 pjdlog_error("No addresses to listen on."); 516 goto failed; 517 } 518 519 /* No failures from now on. */ 520 521 /* 522 * Switch to new control socket. 523 */ 524 if (newcfg->hc_controlconn != NULL) { 525 pjdlog_info("Control socket changed from %s to %s.", 526 cfg->hc_controladdr, newcfg->hc_controladdr); 527 proto_close(cfg->hc_controlconn); 528 cfg->hc_controlconn = newcfg->hc_controlconn; 529 newcfg->hc_controlconn = NULL; 530 strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr, 531 sizeof(cfg->hc_controladdr)); 532 } 533 /* 534 * Switch to new listen addresses. Close all that were removed. 535 */ 536 while ((clst = TAILQ_FIRST(&cfg->hc_listen)) != NULL) { 537 TAILQ_FOREACH(nlst, &newcfg->hc_listen, hl_next) { 538 if (strcmp(nlst->hl_addr, clst->hl_addr) == 0) 539 break; 540 } 541 if (nlst == NULL && clst->hl_conn != NULL) { 542 proto_close(clst->hl_conn); 543 pjdlog_info("No longer listening on address %s.", 544 clst->hl_addr); 545 } 546 TAILQ_REMOVE(&cfg->hc_listen, clst, hl_next); 547 free(clst); 548 } 549 TAILQ_CONCAT(&cfg->hc_listen, &newcfg->hc_listen, hl_next); 550 551 /* 552 * Stop and remove resources that were removed from the configuration. 553 */ 554 TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) { 555 TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) { 556 if (strcmp(cres->hr_name, nres->hr_name) == 0) 557 break; 558 } 559 if (nres == NULL) { 560 control_set_role(cres, HAST_ROLE_INIT); 561 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 562 pjdlog_info("Resource %s removed.", cres->hr_name); 563 free(cres); 564 } 565 } 566 /* 567 * Move new resources to the current configuration. 568 */ 569 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 570 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 571 if (strcmp(cres->hr_name, nres->hr_name) == 0) 572 break; 573 } 574 if (cres == NULL) { 575 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 576 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 577 pjdlog_info("Resource %s added.", nres->hr_name); 578 } 579 } 580 /* 581 * Deal with modified resources. 582 * Depending on what has changed exactly we might want to perform 583 * different actions. 584 * 585 * We do full resource restart in the following situations: 586 * Resource role is INIT or SECONDARY. 587 * Resource role is PRIMARY and path to local component or provider 588 * name has changed. 589 * In case of PRIMARY, the worker process will be killed and restarted, 590 * which also means removing /dev/hast/<name> provider and 591 * recreating it. 592 * 593 * We do just reload (send SIGHUP to worker process) if we act as 594 * PRIMARY, but only if remote address, replication mode, timeout or 595 * execution path has changed. For those, there is no need to restart 596 * worker process. 597 * If PRIMARY receives SIGHUP, it will reconnect if remote address or 598 * replication mode has changed or simply set new timeout if only 599 * timeout has changed. 600 */ 601 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 602 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 603 if (strcmp(cres->hr_name, nres->hr_name) == 0) 604 break; 605 } 606 PJDLOG_ASSERT(cres != NULL); 607 if (resource_needs_restart(cres, nres)) { 608 pjdlog_info("Resource %s configuration was modified, restarting it.", 609 cres->hr_name); 610 role = cres->hr_role; 611 control_set_role(cres, HAST_ROLE_INIT); 612 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 613 free(cres); 614 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 615 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 616 control_set_role(nres, role); 617 } else if (resource_needs_reload(cres, nres)) { 618 pjdlog_info("Resource %s configuration was modified, reloading it.", 619 cres->hr_name); 620 strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr, 621 sizeof(cres->hr_remoteaddr)); 622 strlcpy(cres->hr_sourceaddr, nres->hr_sourceaddr, 623 sizeof(cres->hr_sourceaddr)); 624 cres->hr_replication = nres->hr_replication; 625 cres->hr_checksum = nres->hr_checksum; 626 cres->hr_compression = nres->hr_compression; 627 cres->hr_timeout = nres->hr_timeout; 628 strlcpy(cres->hr_exec, nres->hr_exec, 629 sizeof(cres->hr_exec)); 630 if (cres->hr_workerpid != 0) 631 resource_reload(cres); 632 } 633 } 634 635 yy_config_free(newcfg); 636 pjdlog_info("Configuration reloaded successfully."); 637 return; 638 failed: 639 if (newcfg != NULL) { 640 if (newcfg->hc_controlconn != NULL) 641 proto_close(newcfg->hc_controlconn); 642 while ((nlst = TAILQ_FIRST(&newcfg->hc_listen)) != NULL) { 643 if (nlst->hl_conn != NULL) { 644 TAILQ_FOREACH(clst, &cfg->hc_listen, hl_next) { 645 if (strcmp(nlst->hl_addr, 646 clst->hl_addr) == 0) { 647 break; 648 } 649 } 650 if (clst == NULL || clst->hl_conn == NULL) 651 proto_close(nlst->hl_conn); 652 } 653 TAILQ_REMOVE(&newcfg->hc_listen, nlst, hl_next); 654 free(nlst); 655 } 656 yy_config_free(newcfg); 657 } 658 pjdlog_warning("Configuration not reloaded."); 659 } 660 661 static void 662 terminate_workers(void) 663 { 664 struct hast_resource *res; 665 666 pjdlog_info("Termination signal received, exiting."); 667 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 668 if (res->hr_workerpid == 0) 669 continue; 670 pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).", 671 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 672 if (kill(res->hr_workerpid, SIGTERM) == 0) 673 continue; 674 pjdlog_errno(LOG_WARNING, 675 "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).", 676 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 677 } 678 } 679 680 static void 681 listen_accept(struct hastd_listen *lst) 682 { 683 struct hast_resource *res; 684 struct proto_conn *conn; 685 struct nv *nvin, *nvout, *nverr; 686 const char *resname; 687 const unsigned char *token; 688 char laddr[256], raddr[256]; 689 size_t size; 690 pid_t pid; 691 int status; 692 693 proto_local_address(lst->hl_conn, laddr, sizeof(laddr)); 694 pjdlog_debug(1, "Accepting connection to %s.", laddr); 695 696 if (proto_accept(lst->hl_conn, &conn) < 0) { 697 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr); 698 return; 699 } 700 701 proto_local_address(conn, laddr, sizeof(laddr)); 702 proto_remote_address(conn, raddr, sizeof(raddr)); 703 pjdlog_info("Connection from %s to %s.", raddr, laddr); 704 705 /* Error in setting timeout is not critical, but why should it fail? */ 706 if (proto_timeout(conn, HAST_TIMEOUT) < 0) 707 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); 708 709 nvin = nvout = nverr = NULL; 710 711 /* 712 * Before receiving any data see if remote host have access to any 713 * resource. 714 */ 715 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 716 if (proto_address_match(conn, res->hr_remoteaddr)) 717 break; 718 } 719 if (res == NULL) { 720 pjdlog_error("Client %s isn't known.", raddr); 721 goto close; 722 } 723 /* Ok, remote host can access at least one resource. */ 724 725 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 726 pjdlog_errno(LOG_ERR, "Unable to receive header from %s", 727 raddr); 728 goto close; 729 } 730 731 resname = nv_get_string(nvin, "resource"); 732 if (resname == NULL) { 733 pjdlog_error("No 'resource' field in the header received from %s.", 734 raddr); 735 goto close; 736 } 737 pjdlog_debug(2, "%s: resource=%s", raddr, resname); 738 token = nv_get_uint8_array(nvin, &size, "token"); 739 /* 740 * NULL token means that this is first conection. 741 */ 742 if (token != NULL && size != sizeof(res->hr_token)) { 743 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).", 744 raddr, sizeof(res->hr_token), size); 745 goto close; 746 } 747 748 /* 749 * From now on we want to send errors to the remote node. 750 */ 751 nverr = nv_alloc(); 752 753 /* Find resource related to this connection. */ 754 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 755 if (strcmp(resname, res->hr_name) == 0) 756 break; 757 } 758 /* Have we found the resource? */ 759 if (res == NULL) { 760 pjdlog_error("No resource '%s' as requested by %s.", 761 resname, raddr); 762 nv_add_stringf(nverr, "errmsg", "Resource not configured."); 763 goto fail; 764 } 765 766 /* Now that we know resource name setup log prefix. */ 767 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 768 769 /* Does the remote host have access to this resource? */ 770 if (!proto_address_match(conn, res->hr_remoteaddr)) { 771 pjdlog_error("Client %s has no access to the resource.", raddr); 772 nv_add_stringf(nverr, "errmsg", "No access to the resource."); 773 goto fail; 774 } 775 /* Is the resource marked as secondary? */ 776 if (res->hr_role != HAST_ROLE_SECONDARY) { 777 pjdlog_warning("We act as %s for the resource and not as %s as requested by %s.", 778 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY), 779 raddr); 780 nv_add_stringf(nverr, "errmsg", 781 "Remote node acts as %s for the resource and not as %s.", 782 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); 783 if (res->hr_role == HAST_ROLE_PRIMARY) { 784 /* 785 * If we act as primary request the other side to wait 786 * for us a bit, as we might be finishing cleanups. 787 */ 788 nv_add_uint8(nverr, 1, "wait"); 789 } 790 goto fail; 791 } 792 /* Does token (if exists) match? */ 793 if (token != NULL && memcmp(token, res->hr_token, 794 sizeof(res->hr_token)) != 0) { 795 pjdlog_error("Token received from %s doesn't match.", raddr); 796 nv_add_stringf(nverr, "errmsg", "Token doesn't match."); 797 goto fail; 798 } 799 /* 800 * If there is no token, but we have half-open connection 801 * (only remotein) or full connection (worker process is running) 802 * we have to cancel those and accept the new connection. 803 */ 804 if (token == NULL) { 805 PJDLOG_ASSERT(res->hr_remoteout == NULL); 806 pjdlog_debug(1, "Initial connection from %s.", raddr); 807 if (res->hr_workerpid != 0) { 808 PJDLOG_ASSERT(res->hr_remotein == NULL); 809 pjdlog_debug(1, 810 "Worker process exists (pid=%u), stopping it.", 811 (unsigned int)res->hr_workerpid); 812 /* Stop child process. */ 813 if (kill(res->hr_workerpid, SIGINT) < 0) { 814 pjdlog_errno(LOG_ERR, 815 "Unable to stop worker process (pid=%u)", 816 (unsigned int)res->hr_workerpid); 817 /* 818 * Other than logging the problem we 819 * ignore it - nothing smart to do. 820 */ 821 } 822 /* Wait for it to exit. */ 823 else if ((pid = waitpid(res->hr_workerpid, 824 &status, 0)) != res->hr_workerpid) { 825 /* We can only log the problem. */ 826 pjdlog_errno(LOG_ERR, 827 "Waiting for worker process (pid=%u) failed", 828 (unsigned int)res->hr_workerpid); 829 } else { 830 child_exit_log(res->hr_workerpid, status); 831 } 832 child_cleanup(res); 833 } else if (res->hr_remotein != NULL) { 834 char oaddr[256]; 835 836 proto_remote_address(res->hr_remotein, oaddr, 837 sizeof(oaddr)); 838 pjdlog_debug(1, 839 "Canceling half-open connection from %s on connection from %s.", 840 oaddr, raddr); 841 proto_close(res->hr_remotein); 842 res->hr_remotein = NULL; 843 } 844 } 845 846 /* 847 * Checks and cleanups are done. 848 */ 849 850 if (token == NULL) { 851 arc4random_buf(res->hr_token, sizeof(res->hr_token)); 852 nvout = nv_alloc(); 853 nv_add_uint8_array(nvout, res->hr_token, 854 sizeof(res->hr_token), "token"); 855 if (nv_error(nvout) != 0) { 856 pjdlog_common(LOG_ERR, 0, nv_error(nvout), 857 "Unable to prepare return header for %s", raddr); 858 nv_add_stringf(nverr, "errmsg", 859 "Remote node was unable to prepare return header: %s.", 860 strerror(nv_error(nvout))); 861 goto fail; 862 } 863 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) { 864 int error = errno; 865 866 pjdlog_errno(LOG_ERR, "Unable to send response to %s", 867 raddr); 868 nv_add_stringf(nverr, "errmsg", 869 "Remote node was unable to send response: %s.", 870 strerror(error)); 871 goto fail; 872 } 873 res->hr_remotein = conn; 874 pjdlog_debug(1, "Incoming connection from %s configured.", 875 raddr); 876 } else { 877 res->hr_remoteout = conn; 878 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr); 879 hastd_secondary(res, nvin); 880 } 881 nv_free(nvin); 882 nv_free(nvout); 883 nv_free(nverr); 884 pjdlog_prefix_set("%s", ""); 885 return; 886 fail: 887 if (nv_error(nverr) != 0) { 888 pjdlog_common(LOG_ERR, 0, nv_error(nverr), 889 "Unable to prepare error header for %s", raddr); 890 goto close; 891 } 892 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) { 893 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr); 894 goto close; 895 } 896 close: 897 if (nvin != NULL) 898 nv_free(nvin); 899 if (nvout != NULL) 900 nv_free(nvout); 901 if (nverr != NULL) 902 nv_free(nverr); 903 proto_close(conn); 904 pjdlog_prefix_set("%s", ""); 905 } 906 907 static void 908 connection_migrate(struct hast_resource *res) 909 { 910 struct proto_conn *conn; 911 int16_t val = 0; 912 913 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 914 915 PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 916 917 if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) { 918 pjdlog_errno(LOG_WARNING, 919 "Unable to receive connection command"); 920 return; 921 } 922 if (proto_client(res->hr_sourceaddr[0] != '\0' ? res->hr_sourceaddr : NULL, 923 res->hr_remoteaddr, &conn) < 0) { 924 val = errno; 925 pjdlog_errno(LOG_WARNING, 926 "Unable to create outgoing connection to %s", 927 res->hr_remoteaddr); 928 goto out; 929 } 930 if (proto_connect(conn, -1) < 0) { 931 val = errno; 932 pjdlog_errno(LOG_WARNING, "Unable to connect to %s", 933 res->hr_remoteaddr); 934 proto_close(conn); 935 goto out; 936 } 937 val = 0; 938 out: 939 if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) { 940 pjdlog_errno(LOG_WARNING, 941 "Unable to send reply to connection request"); 942 } 943 if (val == 0 && proto_connection_send(res->hr_conn, conn) < 0) 944 pjdlog_errno(LOG_WARNING, "Unable to send connection"); 945 946 pjdlog_prefix_set("%s", ""); 947 } 948 949 static void 950 check_signals(void) 951 { 952 struct timespec sigtimeout; 953 sigset_t mask; 954 int signo; 955 956 sigtimeout.tv_sec = 0; 957 sigtimeout.tv_nsec = 0; 958 959 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 960 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 961 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 962 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 963 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 964 965 while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) { 966 switch (signo) { 967 case SIGINT: 968 case SIGTERM: 969 sigexit_received = true; 970 terminate_workers(); 971 proto_close(cfg->hc_controlconn); 972 exit(EX_OK); 973 break; 974 case SIGCHLD: 975 child_exit(); 976 break; 977 case SIGHUP: 978 hastd_reload(); 979 break; 980 default: 981 PJDLOG_ABORT("Unexpected signal (%d).", signo); 982 } 983 } 984 } 985 986 static void 987 main_loop(void) 988 { 989 struct hast_resource *res; 990 struct hastd_listen *lst; 991 struct timeval seltimeout; 992 int fd, maxfd, ret; 993 time_t lastcheck, now; 994 fd_set rfds; 995 996 lastcheck = time(NULL); 997 seltimeout.tv_sec = REPORT_INTERVAL; 998 seltimeout.tv_usec = 0; 999 1000 for (;;) { 1001 check_signals(); 1002 1003 /* Setup descriptors for select(2). */ 1004 FD_ZERO(&rfds); 1005 maxfd = fd = proto_descriptor(cfg->hc_controlconn); 1006 PJDLOG_ASSERT(fd >= 0); 1007 FD_SET(fd, &rfds); 1008 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) { 1009 if (lst->hl_conn == NULL) 1010 continue; 1011 fd = proto_descriptor(lst->hl_conn); 1012 PJDLOG_ASSERT(fd >= 0); 1013 FD_SET(fd, &rfds); 1014 maxfd = fd > maxfd ? fd : maxfd; 1015 } 1016 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 1017 if (res->hr_event == NULL) 1018 continue; 1019 fd = proto_descriptor(res->hr_event); 1020 PJDLOG_ASSERT(fd >= 0); 1021 FD_SET(fd, &rfds); 1022 maxfd = fd > maxfd ? fd : maxfd; 1023 if (res->hr_role == HAST_ROLE_PRIMARY) { 1024 /* Only primary workers asks for connections. */ 1025 PJDLOG_ASSERT(res->hr_conn != NULL); 1026 fd = proto_descriptor(res->hr_conn); 1027 PJDLOG_ASSERT(fd >= 0); 1028 FD_SET(fd, &rfds); 1029 maxfd = fd > maxfd ? fd : maxfd; 1030 } else { 1031 PJDLOG_ASSERT(res->hr_conn == NULL); 1032 } 1033 } 1034 1035 PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE); 1036 ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout); 1037 now = time(NULL); 1038 if (lastcheck + REPORT_INTERVAL <= now) { 1039 hook_check(); 1040 lastcheck = now; 1041 } 1042 if (ret == 0) { 1043 /* 1044 * select(2) timed out, so there should be no 1045 * descriptors to check. 1046 */ 1047 continue; 1048 } else if (ret == -1) { 1049 if (errno == EINTR) 1050 continue; 1051 KEEP_ERRNO((void)pidfile_remove(pfh)); 1052 pjdlog_exit(EX_OSERR, "select() failed"); 1053 } 1054 1055 /* 1056 * Check for signals before we do anything to update our 1057 * info about terminated workers in the meantime. 1058 */ 1059 check_signals(); 1060 1061 if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds)) 1062 control_handle(cfg); 1063 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) { 1064 if (lst->hl_conn == NULL) 1065 continue; 1066 if (FD_ISSET(proto_descriptor(lst->hl_conn), &rfds)) 1067 listen_accept(lst); 1068 } 1069 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 1070 if (res->hr_event == NULL) 1071 continue; 1072 if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) { 1073 if (event_recv(res) == 0) 1074 continue; 1075 /* The worker process exited? */ 1076 proto_close(res->hr_event); 1077 res->hr_event = NULL; 1078 if (res->hr_conn != NULL) { 1079 proto_close(res->hr_conn); 1080 res->hr_conn = NULL; 1081 } 1082 continue; 1083 } 1084 if (res->hr_role == HAST_ROLE_PRIMARY) { 1085 PJDLOG_ASSERT(res->hr_conn != NULL); 1086 if (FD_ISSET(proto_descriptor(res->hr_conn), 1087 &rfds)) { 1088 connection_migrate(res); 1089 } 1090 } else { 1091 PJDLOG_ASSERT(res->hr_conn == NULL); 1092 } 1093 } 1094 } 1095 } 1096 1097 static void 1098 dummy_sighandler(int sig __unused) 1099 { 1100 /* Nothing to do. */ 1101 } 1102 1103 int 1104 main(int argc, char *argv[]) 1105 { 1106 struct hastd_listen *lst; 1107 const char *pidfile; 1108 pid_t otherpid; 1109 bool foreground; 1110 int debuglevel; 1111 sigset_t mask; 1112 1113 foreground = false; 1114 debuglevel = 0; 1115 pidfile = HASTD_PIDFILE; 1116 1117 for (;;) { 1118 int ch; 1119 1120 ch = getopt(argc, argv, "c:dFhP:"); 1121 if (ch == -1) 1122 break; 1123 switch (ch) { 1124 case 'c': 1125 cfgpath = optarg; 1126 break; 1127 case 'd': 1128 debuglevel++; 1129 break; 1130 case 'F': 1131 foreground = true; 1132 break; 1133 case 'P': 1134 pidfile = optarg; 1135 break; 1136 case 'h': 1137 default: 1138 usage(); 1139 } 1140 } 1141 argc -= optind; 1142 argv += optind; 1143 1144 pjdlog_init(PJDLOG_MODE_STD); 1145 pjdlog_debug_set(debuglevel); 1146 1147 g_gate_load(); 1148 1149 pfh = pidfile_open(pidfile, 0600, &otherpid); 1150 if (pfh == NULL) { 1151 if (errno == EEXIST) { 1152 pjdlog_exitx(EX_TEMPFAIL, 1153 "Another hastd is already running, pid: %jd.", 1154 (intmax_t)otherpid); 1155 } 1156 /* If we cannot create pidfile from other reasons, only warn. */ 1157 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile"); 1158 } 1159 1160 cfg = yy_config_parse(cfgpath, true); 1161 PJDLOG_ASSERT(cfg != NULL); 1162 1163 /* 1164 * Restore default actions for interesting signals in case parent 1165 * process (like init(8)) decided to ignore some of them (like SIGHUP). 1166 */ 1167 PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR); 1168 PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR); 1169 PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR); 1170 /* 1171 * Because SIGCHLD is ignored by default, setup dummy handler for it, 1172 * so we can mask it. 1173 */ 1174 PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR); 1175 1176 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 1177 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 1178 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 1179 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 1180 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 1181 PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); 1182 1183 /* Listen on control address. */ 1184 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) { 1185 KEEP_ERRNO((void)pidfile_remove(pfh)); 1186 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s", 1187 cfg->hc_controladdr); 1188 } 1189 /* Listen for remote connections. */ 1190 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) { 1191 if (proto_server(lst->hl_addr, &lst->hl_conn) < 0) { 1192 KEEP_ERRNO((void)pidfile_remove(pfh)); 1193 pjdlog_exit(EX_OSERR, "Unable to listen on address %s", 1194 lst->hl_addr); 1195 } 1196 } 1197 1198 if (!foreground) { 1199 if (daemon(0, 0) < 0) { 1200 KEEP_ERRNO((void)pidfile_remove(pfh)); 1201 pjdlog_exit(EX_OSERR, "Unable to daemonize"); 1202 } 1203 1204 /* Start logging to syslog. */ 1205 pjdlog_mode_set(PJDLOG_MODE_SYSLOG); 1206 1207 /* Write PID to a file. */ 1208 if (pidfile_write(pfh) < 0) { 1209 pjdlog_errno(LOG_WARNING, 1210 "Unable to write PID to a file"); 1211 } 1212 } 1213 1214 pjdlog_info("Started successfully, running protocol version %d.", 1215 HAST_PROTO_VERSION); 1216 1217 pjdlog_debug(1, "Listening on control address %s.", 1218 cfg->hc_controladdr); 1219 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) 1220 pjdlog_info("Listening on address %s.", lst->hl_addr); 1221 1222 hook_init(); 1223 1224 main_loop(); 1225 1226 exit(0); 1227 } 1228