1 /*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> 4 * All rights reserved. 5 * 6 * This software was developed by Pawel Jakub Dawidek under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker.h> 36 #include <sys/module.h> 37 #include <sys/stat.h> 38 #include <sys/wait.h> 39 40 #include <err.h> 41 #include <errno.h> 42 #include <libutil.h> 43 #include <signal.h> 44 #include <stdbool.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <sysexits.h> 49 #include <time.h> 50 #include <unistd.h> 51 52 #include <activemap.h> 53 #include <pjdlog.h> 54 55 #include "control.h" 56 #include "event.h" 57 #include "hast.h" 58 #include "hast_proto.h" 59 #include "hastd.h" 60 #include "hooks.h" 61 #include "subr.h" 62 63 /* Path to configuration file. */ 64 const char *cfgpath = HAST_CONFIG; 65 /* Hastd configuration. */ 66 static struct hastd_config *cfg; 67 /* Was SIGINT or SIGTERM signal received? */ 68 bool sigexit_received = false; 69 /* PID file handle. */ 70 struct pidfh *pfh; 71 72 /* How often check for hooks running for too long. */ 73 #define REPORT_INTERVAL 5 74 75 static void 76 usage(void) 77 { 78 79 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]"); 80 } 81 82 static void 83 g_gate_load(void) 84 { 85 86 if (modfind("g_gate") == -1) { 87 /* Not present in kernel, try loading it. */ 88 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { 89 if (errno != EEXIST) { 90 pjdlog_exit(EX_OSERR, 91 "Unable to load geom_gate module"); 92 } 93 } 94 } 95 } 96 97 void 98 descriptors_cleanup(struct hast_resource *res) 99 { 100 struct hast_resource *tres; 101 struct hastd_listen *lst; 102 103 TAILQ_FOREACH(tres, &cfg->hc_resources, hr_next) { 104 if (tres == res) { 105 PJDLOG_VERIFY(res->hr_role == HAST_ROLE_SECONDARY || 106 (res->hr_remotein == NULL && 107 res->hr_remoteout == NULL)); 108 continue; 109 } 110 if (tres->hr_remotein != NULL) 111 proto_close(tres->hr_remotein); 112 if (tres->hr_remoteout != NULL) 113 proto_close(tres->hr_remoteout); 114 if (tres->hr_ctrl != NULL) 115 proto_close(tres->hr_ctrl); 116 if (tres->hr_event != NULL) 117 proto_close(tres->hr_event); 118 if (tres->hr_conn != NULL) 119 proto_close(tres->hr_conn); 120 } 121 if (cfg->hc_controlin != NULL) 122 proto_close(cfg->hc_controlin); 123 proto_close(cfg->hc_controlconn); 124 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) { 125 if (lst->hl_conn != NULL) 126 proto_close(lst->hl_conn); 127 } 128 (void)pidfile_close(pfh); 129 hook_fini(); 130 pjdlog_fini(); 131 } 132 133 static const char * 134 dtype2str(mode_t mode) 135 { 136 137 if (S_ISBLK(mode)) 138 return ("block device"); 139 else if (S_ISCHR(mode)) 140 return ("character device"); 141 else if (S_ISDIR(mode)) 142 return ("directory"); 143 else if (S_ISFIFO(mode)) 144 return ("pipe or FIFO"); 145 else if (S_ISLNK(mode)) 146 return ("symbolic link"); 147 else if (S_ISREG(mode)) 148 return ("regular file"); 149 else if (S_ISSOCK(mode)) 150 return ("socket"); 151 else if (S_ISWHT(mode)) 152 return ("whiteout"); 153 else 154 return ("unknown"); 155 } 156 157 void 158 descriptors_assert(const struct hast_resource *res, int pjdlogmode) 159 { 160 char msg[256]; 161 struct stat sb; 162 long maxfd; 163 bool isopen; 164 mode_t mode; 165 int fd; 166 167 /* 168 * At this point descriptor to syslog socket is closed, so if we want 169 * to log assertion message, we have to first store it in 'msg' local 170 * buffer and then open syslog socket and log it. 171 */ 172 msg[0] = '\0'; 173 174 maxfd = sysconf(_SC_OPEN_MAX); 175 if (maxfd < 0) { 176 pjdlog_init(pjdlogmode); 177 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 178 role2str(res->hr_role)); 179 pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed"); 180 pjdlog_fini(); 181 maxfd = 16384; 182 } 183 for (fd = 0; fd <= maxfd; fd++) { 184 if (fstat(fd, &sb) == 0) { 185 isopen = true; 186 mode = sb.st_mode; 187 } else if (errno == EBADF) { 188 isopen = false; 189 mode = 0; 190 } else { 191 (void)snprintf(msg, sizeof(msg), 192 "Unable to fstat descriptor %d: %s", fd, 193 strerror(errno)); 194 break; 195 } 196 if (fd == STDIN_FILENO || fd == STDOUT_FILENO || 197 fd == STDERR_FILENO) { 198 if (!isopen) { 199 (void)snprintf(msg, sizeof(msg), 200 "Descriptor %d (%s) is closed, but should be open.", 201 fd, (fd == STDIN_FILENO ? "stdin" : 202 (fd == STDOUT_FILENO ? "stdout" : "stderr"))); 203 break; 204 } 205 } else if (fd == proto_descriptor(res->hr_event)) { 206 if (!isopen) { 207 (void)snprintf(msg, sizeof(msg), 208 "Descriptor %d (event) is closed, but should be open.", 209 fd); 210 break; 211 } 212 if (!S_ISSOCK(mode)) { 213 (void)snprintf(msg, sizeof(msg), 214 "Descriptor %d (event) is %s, but should be %s.", 215 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 216 break; 217 } 218 } else if (fd == proto_descriptor(res->hr_ctrl)) { 219 if (!isopen) { 220 (void)snprintf(msg, sizeof(msg), 221 "Descriptor %d (ctrl) is closed, but should be open.", 222 fd); 223 break; 224 } 225 if (!S_ISSOCK(mode)) { 226 (void)snprintf(msg, sizeof(msg), 227 "Descriptor %d (ctrl) is %s, but should be %s.", 228 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 229 break; 230 } 231 } else if (res->hr_role == HAST_ROLE_PRIMARY && 232 fd == proto_descriptor(res->hr_conn)) { 233 if (!isopen) { 234 (void)snprintf(msg, sizeof(msg), 235 "Descriptor %d (conn) is closed, but should be open.", 236 fd); 237 break; 238 } 239 if (!S_ISSOCK(mode)) { 240 (void)snprintf(msg, sizeof(msg), 241 "Descriptor %d (conn) is %s, but should be %s.", 242 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 243 break; 244 } 245 } else if (res->hr_role == HAST_ROLE_SECONDARY && 246 res->hr_conn != NULL && 247 fd == proto_descriptor(res->hr_conn)) { 248 if (isopen) { 249 (void)snprintf(msg, sizeof(msg), 250 "Descriptor %d (conn) is open, but should be closed.", 251 fd); 252 break; 253 } 254 } else if (res->hr_role == HAST_ROLE_SECONDARY && 255 fd == proto_descriptor(res->hr_remotein)) { 256 if (!isopen) { 257 (void)snprintf(msg, sizeof(msg), 258 "Descriptor %d (remote in) is closed, but should be open.", 259 fd); 260 break; 261 } 262 if (!S_ISSOCK(mode)) { 263 (void)snprintf(msg, sizeof(msg), 264 "Descriptor %d (remote in) is %s, but should be %s.", 265 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 266 break; 267 } 268 } else if (res->hr_role == HAST_ROLE_SECONDARY && 269 fd == proto_descriptor(res->hr_remoteout)) { 270 if (!isopen) { 271 (void)snprintf(msg, sizeof(msg), 272 "Descriptor %d (remote out) is closed, but should be open.", 273 fd); 274 break; 275 } 276 if (!S_ISSOCK(mode)) { 277 (void)snprintf(msg, sizeof(msg), 278 "Descriptor %d (remote out) is %s, but should be %s.", 279 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 280 break; 281 } 282 } else { 283 if (isopen) { 284 (void)snprintf(msg, sizeof(msg), 285 "Descriptor %d is open (%s), but should be closed.", 286 fd, dtype2str(mode)); 287 break; 288 } 289 } 290 } 291 if (msg[0] != '\0') { 292 pjdlog_init(pjdlogmode); 293 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 294 role2str(res->hr_role)); 295 PJDLOG_ABORT("%s", msg); 296 } 297 } 298 299 static void 300 child_exit_log(unsigned int pid, int status) 301 { 302 303 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 304 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).", 305 pid); 306 } else if (WIFSIGNALED(status)) { 307 pjdlog_error("Worker process killed (pid=%u, signal=%d).", 308 pid, WTERMSIG(status)); 309 } else { 310 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).", 311 pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1); 312 } 313 } 314 315 static void 316 child_exit(void) 317 { 318 struct hast_resource *res; 319 int status; 320 pid_t pid; 321 322 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) { 323 /* Find resource related to the process that just exited. */ 324 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 325 if (pid == res->hr_workerpid) 326 break; 327 } 328 if (res == NULL) { 329 /* 330 * This can happen when new connection arrives and we 331 * cancel child responsible for the old one or if this 332 * was hook which we executed. 333 */ 334 hook_check_one(pid, status); 335 continue; 336 } 337 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 338 role2str(res->hr_role)); 339 child_exit_log(pid, status); 340 child_cleanup(res); 341 if (res->hr_role == HAST_ROLE_PRIMARY) { 342 /* 343 * Restart child process if it was killed by signal 344 * or exited because of temporary problem. 345 */ 346 if (WIFSIGNALED(status) || 347 (WIFEXITED(status) && 348 WEXITSTATUS(status) == EX_TEMPFAIL)) { 349 sleep(1); 350 pjdlog_info("Restarting worker process."); 351 hastd_primary(res); 352 } else { 353 res->hr_role = HAST_ROLE_INIT; 354 pjdlog_info("Changing resource role back to %s.", 355 role2str(res->hr_role)); 356 } 357 } 358 pjdlog_prefix_set("%s", ""); 359 } 360 } 361 362 static bool 363 resource_needs_restart(const struct hast_resource *res0, 364 const struct hast_resource *res1) 365 { 366 367 PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 368 369 if (strcmp(res0->hr_provname, res1->hr_provname) != 0) 370 return (true); 371 if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0) 372 return (true); 373 if (res0->hr_role == HAST_ROLE_INIT || 374 res0->hr_role == HAST_ROLE_SECONDARY) { 375 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 376 return (true); 377 if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0) 378 return (true); 379 if (res0->hr_replication != res1->hr_replication) 380 return (true); 381 if (res0->hr_checksum != res1->hr_checksum) 382 return (true); 383 if (res0->hr_compression != res1->hr_compression) 384 return (true); 385 if (res0->hr_timeout != res1->hr_timeout) 386 return (true); 387 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 388 return (true); 389 /* 390 * When metaflush has changed we don't really need restart, 391 * but it is just easier this way. 392 */ 393 if (res0->hr_metaflush != res1->hr_metaflush) 394 return (true); 395 } 396 return (false); 397 } 398 399 static bool 400 resource_needs_reload(const struct hast_resource *res0, 401 const struct hast_resource *res1) 402 { 403 404 PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 405 PJDLOG_ASSERT(strcmp(res0->hr_provname, res1->hr_provname) == 0); 406 PJDLOG_ASSERT(strcmp(res0->hr_localpath, res1->hr_localpath) == 0); 407 408 if (res0->hr_role != HAST_ROLE_PRIMARY) 409 return (false); 410 411 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 412 return (true); 413 if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0) 414 return (true); 415 if (res0->hr_replication != res1->hr_replication) 416 return (true); 417 if (res0->hr_checksum != res1->hr_checksum) 418 return (true); 419 if (res0->hr_compression != res1->hr_compression) 420 return (true); 421 if (res0->hr_timeout != res1->hr_timeout) 422 return (true); 423 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 424 return (true); 425 if (res0->hr_metaflush != res1->hr_metaflush) 426 return (true); 427 return (false); 428 } 429 430 static void 431 resource_reload(const struct hast_resource *res) 432 { 433 struct nv *nvin, *nvout; 434 int error; 435 436 PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 437 438 nvout = nv_alloc(); 439 nv_add_uint8(nvout, CONTROL_RELOAD, "cmd"); 440 nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr"); 441 nv_add_string(nvout, res->hr_sourceaddr, "sourceaddr"); 442 nv_add_int32(nvout, (int32_t)res->hr_replication, "replication"); 443 nv_add_int32(nvout, (int32_t)res->hr_checksum, "checksum"); 444 nv_add_int32(nvout, (int32_t)res->hr_compression, "compression"); 445 nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout"); 446 nv_add_string(nvout, res->hr_exec, "exec"); 447 nv_add_int32(nvout, (int32_t)res->hr_metaflush, "metaflush"); 448 if (nv_error(nvout) != 0) { 449 nv_free(nvout); 450 pjdlog_error("Unable to allocate header for reload message."); 451 return; 452 } 453 if (hast_proto_send(res, res->hr_ctrl, nvout, NULL, 0) < 0) { 454 pjdlog_errno(LOG_ERR, "Unable to send reload message"); 455 nv_free(nvout); 456 return; 457 } 458 nv_free(nvout); 459 460 /* Receive response. */ 461 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 462 pjdlog_errno(LOG_ERR, "Unable to receive reload reply"); 463 return; 464 } 465 error = nv_get_int16(nvin, "error"); 466 nv_free(nvin); 467 if (error != 0) { 468 pjdlog_common(LOG_ERR, 0, error, "Reload failed"); 469 return; 470 } 471 } 472 473 static void 474 hastd_reload(void) 475 { 476 struct hastd_config *newcfg; 477 struct hast_resource *nres, *cres, *tres; 478 struct hastd_listen *nlst, *clst; 479 unsigned int nlisten; 480 uint8_t role; 481 482 pjdlog_info("Reloading configuration..."); 483 484 newcfg = yy_config_parse(cfgpath, false); 485 if (newcfg == NULL) 486 goto failed; 487 488 /* 489 * Check if control address has changed. 490 */ 491 if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) { 492 if (proto_server(newcfg->hc_controladdr, 493 &newcfg->hc_controlconn) < 0) { 494 pjdlog_errno(LOG_ERR, 495 "Unable to listen on control address %s", 496 newcfg->hc_controladdr); 497 goto failed; 498 } 499 } 500 /* 501 * Check if any listen address has changed. 502 */ 503 nlisten = 0; 504 TAILQ_FOREACH(nlst, &newcfg->hc_listen, hl_next) { 505 TAILQ_FOREACH(clst, &cfg->hc_listen, hl_next) { 506 if (strcmp(nlst->hl_addr, clst->hl_addr) == 0) 507 break; 508 } 509 if (clst != NULL && clst->hl_conn != NULL) { 510 pjdlog_info("Keep listening on address %s.", 511 nlst->hl_addr); 512 nlst->hl_conn = clst->hl_conn; 513 nlisten++; 514 } else if (proto_server(nlst->hl_addr, &nlst->hl_conn) == 0) { 515 pjdlog_info("Listening on new address %s.", 516 nlst->hl_addr); 517 nlisten++; 518 } else { 519 pjdlog_errno(LOG_WARNING, 520 "Unable to listen on address %s", nlst->hl_addr); 521 } 522 } 523 if (nlisten == 0) { 524 pjdlog_error("No addresses to listen on."); 525 goto failed; 526 } 527 528 /* No failures from now on. */ 529 530 /* 531 * Switch to new control socket. 532 */ 533 if (newcfg->hc_controlconn != NULL) { 534 pjdlog_info("Control socket changed from %s to %s.", 535 cfg->hc_controladdr, newcfg->hc_controladdr); 536 proto_close(cfg->hc_controlconn); 537 cfg->hc_controlconn = newcfg->hc_controlconn; 538 newcfg->hc_controlconn = NULL; 539 strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr, 540 sizeof(cfg->hc_controladdr)); 541 } 542 /* 543 * Switch to new listen addresses. Close all that were removed. 544 */ 545 while ((clst = TAILQ_FIRST(&cfg->hc_listen)) != NULL) { 546 TAILQ_FOREACH(nlst, &newcfg->hc_listen, hl_next) { 547 if (strcmp(nlst->hl_addr, clst->hl_addr) == 0) 548 break; 549 } 550 if (nlst == NULL && clst->hl_conn != NULL) { 551 proto_close(clst->hl_conn); 552 pjdlog_info("No longer listening on address %s.", 553 clst->hl_addr); 554 } 555 TAILQ_REMOVE(&cfg->hc_listen, clst, hl_next); 556 free(clst); 557 } 558 TAILQ_CONCAT(&cfg->hc_listen, &newcfg->hc_listen, hl_next); 559 560 /* 561 * Stop and remove resources that were removed from the configuration. 562 */ 563 TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) { 564 TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) { 565 if (strcmp(cres->hr_name, nres->hr_name) == 0) 566 break; 567 } 568 if (nres == NULL) { 569 control_set_role(cres, HAST_ROLE_INIT); 570 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 571 pjdlog_info("Resource %s removed.", cres->hr_name); 572 free(cres); 573 } 574 } 575 /* 576 * Move new resources to the current configuration. 577 */ 578 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 579 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 580 if (strcmp(cres->hr_name, nres->hr_name) == 0) 581 break; 582 } 583 if (cres == NULL) { 584 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 585 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 586 pjdlog_info("Resource %s added.", nres->hr_name); 587 } 588 } 589 /* 590 * Deal with modified resources. 591 * Depending on what has changed exactly we might want to perform 592 * different actions. 593 * 594 * We do full resource restart in the following situations: 595 * Resource role is INIT or SECONDARY. 596 * Resource role is PRIMARY and path to local component or provider 597 * name has changed. 598 * In case of PRIMARY, the worker process will be killed and restarted, 599 * which also means removing /dev/hast/<name> provider and 600 * recreating it. 601 * 602 * We do just reload (send SIGHUP to worker process) if we act as 603 * PRIMARY, but only if remote address, source address, replication 604 * mode, timeout, execution path or metaflush has changed. 605 * For those, there is no need to restart worker process. 606 * If PRIMARY receives SIGHUP, it will reconnect if remote address or 607 * source address has changed or it will set new timeout if only timeout 608 * has changed or it will update metaflush if only metaflush has 609 * changed. 610 */ 611 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 612 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 613 if (strcmp(cres->hr_name, nres->hr_name) == 0) 614 break; 615 } 616 PJDLOG_ASSERT(cres != NULL); 617 if (resource_needs_restart(cres, nres)) { 618 pjdlog_info("Resource %s configuration was modified, restarting it.", 619 cres->hr_name); 620 role = cres->hr_role; 621 control_set_role(cres, HAST_ROLE_INIT); 622 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 623 free(cres); 624 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 625 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 626 control_set_role(nres, role); 627 } else if (resource_needs_reload(cres, nres)) { 628 pjdlog_info("Resource %s configuration was modified, reloading it.", 629 cres->hr_name); 630 strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr, 631 sizeof(cres->hr_remoteaddr)); 632 strlcpy(cres->hr_sourceaddr, nres->hr_sourceaddr, 633 sizeof(cres->hr_sourceaddr)); 634 cres->hr_replication = nres->hr_replication; 635 cres->hr_checksum = nres->hr_checksum; 636 cres->hr_compression = nres->hr_compression; 637 cres->hr_timeout = nres->hr_timeout; 638 strlcpy(cres->hr_exec, nres->hr_exec, 639 sizeof(cres->hr_exec)); 640 cres->hr_metaflush = nres->hr_metaflush; 641 if (cres->hr_workerpid != 0) 642 resource_reload(cres); 643 } 644 } 645 646 yy_config_free(newcfg); 647 pjdlog_info("Configuration reloaded successfully."); 648 return; 649 failed: 650 if (newcfg != NULL) { 651 if (newcfg->hc_controlconn != NULL) 652 proto_close(newcfg->hc_controlconn); 653 while ((nlst = TAILQ_FIRST(&newcfg->hc_listen)) != NULL) { 654 if (nlst->hl_conn != NULL) { 655 TAILQ_FOREACH(clst, &cfg->hc_listen, hl_next) { 656 if (strcmp(nlst->hl_addr, 657 clst->hl_addr) == 0) { 658 break; 659 } 660 } 661 if (clst == NULL || clst->hl_conn == NULL) 662 proto_close(nlst->hl_conn); 663 } 664 TAILQ_REMOVE(&newcfg->hc_listen, nlst, hl_next); 665 free(nlst); 666 } 667 yy_config_free(newcfg); 668 } 669 pjdlog_warning("Configuration not reloaded."); 670 } 671 672 static void 673 terminate_workers(void) 674 { 675 struct hast_resource *res; 676 677 pjdlog_info("Termination signal received, exiting."); 678 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 679 if (res->hr_workerpid == 0) 680 continue; 681 pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).", 682 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 683 if (kill(res->hr_workerpid, SIGTERM) == 0) 684 continue; 685 pjdlog_errno(LOG_WARNING, 686 "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).", 687 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 688 } 689 } 690 691 static void 692 listen_accept(struct hastd_listen *lst) 693 { 694 struct hast_resource *res; 695 struct proto_conn *conn; 696 struct nv *nvin, *nvout, *nverr; 697 const char *resname; 698 const unsigned char *token; 699 char laddr[256], raddr[256]; 700 size_t size; 701 pid_t pid; 702 int status; 703 704 proto_local_address(lst->hl_conn, laddr, sizeof(laddr)); 705 pjdlog_debug(1, "Accepting connection to %s.", laddr); 706 707 if (proto_accept(lst->hl_conn, &conn) < 0) { 708 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr); 709 return; 710 } 711 712 proto_local_address(conn, laddr, sizeof(laddr)); 713 proto_remote_address(conn, raddr, sizeof(raddr)); 714 pjdlog_info("Connection from %s to %s.", raddr, laddr); 715 716 /* Error in setting timeout is not critical, but why should it fail? */ 717 if (proto_timeout(conn, HAST_TIMEOUT) < 0) 718 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); 719 720 nvin = nvout = nverr = NULL; 721 722 /* 723 * Before receiving any data see if remote host have access to any 724 * resource. 725 */ 726 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 727 if (proto_address_match(conn, res->hr_remoteaddr)) 728 break; 729 } 730 if (res == NULL) { 731 pjdlog_error("Client %s isn't known.", raddr); 732 goto close; 733 } 734 /* Ok, remote host can access at least one resource. */ 735 736 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 737 pjdlog_errno(LOG_ERR, "Unable to receive header from %s", 738 raddr); 739 goto close; 740 } 741 742 resname = nv_get_string(nvin, "resource"); 743 if (resname == NULL) { 744 pjdlog_error("No 'resource' field in the header received from %s.", 745 raddr); 746 goto close; 747 } 748 pjdlog_debug(2, "%s: resource=%s", raddr, resname); 749 token = nv_get_uint8_array(nvin, &size, "token"); 750 /* 751 * NULL token means that this is first conection. 752 */ 753 if (token != NULL && size != sizeof(res->hr_token)) { 754 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).", 755 raddr, sizeof(res->hr_token), size); 756 goto close; 757 } 758 759 /* 760 * From now on we want to send errors to the remote node. 761 */ 762 nverr = nv_alloc(); 763 764 /* Find resource related to this connection. */ 765 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 766 if (strcmp(resname, res->hr_name) == 0) 767 break; 768 } 769 /* Have we found the resource? */ 770 if (res == NULL) { 771 pjdlog_error("No resource '%s' as requested by %s.", 772 resname, raddr); 773 nv_add_stringf(nverr, "errmsg", "Resource not configured."); 774 goto fail; 775 } 776 777 /* Now that we know resource name setup log prefix. */ 778 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 779 780 /* Does the remote host have access to this resource? */ 781 if (!proto_address_match(conn, res->hr_remoteaddr)) { 782 pjdlog_error("Client %s has no access to the resource.", raddr); 783 nv_add_stringf(nverr, "errmsg", "No access to the resource."); 784 goto fail; 785 } 786 /* Is the resource marked as secondary? */ 787 if (res->hr_role != HAST_ROLE_SECONDARY) { 788 pjdlog_warning("We act as %s for the resource and not as %s as requested by %s.", 789 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY), 790 raddr); 791 nv_add_stringf(nverr, "errmsg", 792 "Remote node acts as %s for the resource and not as %s.", 793 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); 794 if (res->hr_role == HAST_ROLE_PRIMARY) { 795 /* 796 * If we act as primary request the other side to wait 797 * for us a bit, as we might be finishing cleanups. 798 */ 799 nv_add_uint8(nverr, 1, "wait"); 800 } 801 goto fail; 802 } 803 /* Does token (if exists) match? */ 804 if (token != NULL && memcmp(token, res->hr_token, 805 sizeof(res->hr_token)) != 0) { 806 pjdlog_error("Token received from %s doesn't match.", raddr); 807 nv_add_stringf(nverr, "errmsg", "Token doesn't match."); 808 goto fail; 809 } 810 /* 811 * If there is no token, but we have half-open connection 812 * (only remotein) or full connection (worker process is running) 813 * we have to cancel those and accept the new connection. 814 */ 815 if (token == NULL) { 816 PJDLOG_ASSERT(res->hr_remoteout == NULL); 817 pjdlog_debug(1, "Initial connection from %s.", raddr); 818 if (res->hr_workerpid != 0) { 819 PJDLOG_ASSERT(res->hr_remotein == NULL); 820 pjdlog_debug(1, 821 "Worker process exists (pid=%u), stopping it.", 822 (unsigned int)res->hr_workerpid); 823 /* Stop child process. */ 824 if (kill(res->hr_workerpid, SIGINT) < 0) { 825 pjdlog_errno(LOG_ERR, 826 "Unable to stop worker process (pid=%u)", 827 (unsigned int)res->hr_workerpid); 828 /* 829 * Other than logging the problem we 830 * ignore it - nothing smart to do. 831 */ 832 } 833 /* Wait for it to exit. */ 834 else if ((pid = waitpid(res->hr_workerpid, 835 &status, 0)) != res->hr_workerpid) { 836 /* We can only log the problem. */ 837 pjdlog_errno(LOG_ERR, 838 "Waiting for worker process (pid=%u) failed", 839 (unsigned int)res->hr_workerpid); 840 } else { 841 child_exit_log(res->hr_workerpid, status); 842 } 843 child_cleanup(res); 844 } else if (res->hr_remotein != NULL) { 845 char oaddr[256]; 846 847 proto_remote_address(res->hr_remotein, oaddr, 848 sizeof(oaddr)); 849 pjdlog_debug(1, 850 "Canceling half-open connection from %s on connection from %s.", 851 oaddr, raddr); 852 proto_close(res->hr_remotein); 853 res->hr_remotein = NULL; 854 } 855 } 856 857 /* 858 * Checks and cleanups are done. 859 */ 860 861 if (token == NULL) { 862 arc4random_buf(res->hr_token, sizeof(res->hr_token)); 863 nvout = nv_alloc(); 864 nv_add_uint8_array(nvout, res->hr_token, 865 sizeof(res->hr_token), "token"); 866 if (nv_error(nvout) != 0) { 867 pjdlog_common(LOG_ERR, 0, nv_error(nvout), 868 "Unable to prepare return header for %s", raddr); 869 nv_add_stringf(nverr, "errmsg", 870 "Remote node was unable to prepare return header: %s.", 871 strerror(nv_error(nvout))); 872 goto fail; 873 } 874 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) { 875 int error = errno; 876 877 pjdlog_errno(LOG_ERR, "Unable to send response to %s", 878 raddr); 879 nv_add_stringf(nverr, "errmsg", 880 "Remote node was unable to send response: %s.", 881 strerror(error)); 882 goto fail; 883 } 884 res->hr_remotein = conn; 885 pjdlog_debug(1, "Incoming connection from %s configured.", 886 raddr); 887 } else { 888 res->hr_remoteout = conn; 889 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr); 890 hastd_secondary(res, nvin); 891 } 892 nv_free(nvin); 893 nv_free(nvout); 894 nv_free(nverr); 895 pjdlog_prefix_set("%s", ""); 896 return; 897 fail: 898 if (nv_error(nverr) != 0) { 899 pjdlog_common(LOG_ERR, 0, nv_error(nverr), 900 "Unable to prepare error header for %s", raddr); 901 goto close; 902 } 903 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) { 904 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr); 905 goto close; 906 } 907 close: 908 if (nvin != NULL) 909 nv_free(nvin); 910 if (nvout != NULL) 911 nv_free(nvout); 912 if (nverr != NULL) 913 nv_free(nverr); 914 proto_close(conn); 915 pjdlog_prefix_set("%s", ""); 916 } 917 918 static void 919 connection_migrate(struct hast_resource *res) 920 { 921 struct proto_conn *conn; 922 int16_t val = 0; 923 924 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 925 926 PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 927 928 if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) { 929 pjdlog_errno(LOG_WARNING, 930 "Unable to receive connection command"); 931 return; 932 } 933 if (proto_client(res->hr_sourceaddr[0] != '\0' ? res->hr_sourceaddr : NULL, 934 res->hr_remoteaddr, &conn) < 0) { 935 val = errno; 936 pjdlog_errno(LOG_WARNING, 937 "Unable to create outgoing connection to %s", 938 res->hr_remoteaddr); 939 goto out; 940 } 941 if (proto_connect(conn, -1) < 0) { 942 val = errno; 943 pjdlog_errno(LOG_WARNING, "Unable to connect to %s", 944 res->hr_remoteaddr); 945 proto_close(conn); 946 goto out; 947 } 948 val = 0; 949 out: 950 if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) { 951 pjdlog_errno(LOG_WARNING, 952 "Unable to send reply to connection request"); 953 } 954 if (val == 0 && proto_connection_send(res->hr_conn, conn) < 0) 955 pjdlog_errno(LOG_WARNING, "Unable to send connection"); 956 957 pjdlog_prefix_set("%s", ""); 958 } 959 960 static void 961 check_signals(void) 962 { 963 struct timespec sigtimeout; 964 sigset_t mask; 965 int signo; 966 967 sigtimeout.tv_sec = 0; 968 sigtimeout.tv_nsec = 0; 969 970 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 971 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 972 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 973 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 974 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 975 976 while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) { 977 switch (signo) { 978 case SIGINT: 979 case SIGTERM: 980 sigexit_received = true; 981 terminate_workers(); 982 proto_close(cfg->hc_controlconn); 983 exit(EX_OK); 984 break; 985 case SIGCHLD: 986 child_exit(); 987 break; 988 case SIGHUP: 989 hastd_reload(); 990 break; 991 default: 992 PJDLOG_ABORT("Unexpected signal (%d).", signo); 993 } 994 } 995 } 996 997 static void 998 main_loop(void) 999 { 1000 struct hast_resource *res; 1001 struct hastd_listen *lst; 1002 struct timeval seltimeout; 1003 int fd, maxfd, ret; 1004 time_t lastcheck, now; 1005 fd_set rfds; 1006 1007 lastcheck = time(NULL); 1008 seltimeout.tv_sec = REPORT_INTERVAL; 1009 seltimeout.tv_usec = 0; 1010 1011 for (;;) { 1012 check_signals(); 1013 1014 /* Setup descriptors for select(2). */ 1015 FD_ZERO(&rfds); 1016 maxfd = fd = proto_descriptor(cfg->hc_controlconn); 1017 PJDLOG_ASSERT(fd >= 0); 1018 FD_SET(fd, &rfds); 1019 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) { 1020 if (lst->hl_conn == NULL) 1021 continue; 1022 fd = proto_descriptor(lst->hl_conn); 1023 PJDLOG_ASSERT(fd >= 0); 1024 FD_SET(fd, &rfds); 1025 maxfd = fd > maxfd ? fd : maxfd; 1026 } 1027 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 1028 if (res->hr_event == NULL) 1029 continue; 1030 fd = proto_descriptor(res->hr_event); 1031 PJDLOG_ASSERT(fd >= 0); 1032 FD_SET(fd, &rfds); 1033 maxfd = fd > maxfd ? fd : maxfd; 1034 if (res->hr_role == HAST_ROLE_PRIMARY) { 1035 /* Only primary workers asks for connections. */ 1036 PJDLOG_ASSERT(res->hr_conn != NULL); 1037 fd = proto_descriptor(res->hr_conn); 1038 PJDLOG_ASSERT(fd >= 0); 1039 FD_SET(fd, &rfds); 1040 maxfd = fd > maxfd ? fd : maxfd; 1041 } else { 1042 PJDLOG_ASSERT(res->hr_conn == NULL); 1043 } 1044 } 1045 1046 PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE); 1047 ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout); 1048 now = time(NULL); 1049 if (lastcheck + REPORT_INTERVAL <= now) { 1050 hook_check(); 1051 lastcheck = now; 1052 } 1053 if (ret == 0) { 1054 /* 1055 * select(2) timed out, so there should be no 1056 * descriptors to check. 1057 */ 1058 continue; 1059 } else if (ret == -1) { 1060 if (errno == EINTR) 1061 continue; 1062 KEEP_ERRNO((void)pidfile_remove(pfh)); 1063 pjdlog_exit(EX_OSERR, "select() failed"); 1064 } 1065 1066 /* 1067 * Check for signals before we do anything to update our 1068 * info about terminated workers in the meantime. 1069 */ 1070 check_signals(); 1071 1072 if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds)) 1073 control_handle(cfg); 1074 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) { 1075 if (lst->hl_conn == NULL) 1076 continue; 1077 if (FD_ISSET(proto_descriptor(lst->hl_conn), &rfds)) 1078 listen_accept(lst); 1079 } 1080 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 1081 if (res->hr_event == NULL) 1082 continue; 1083 if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) { 1084 if (event_recv(res) == 0) 1085 continue; 1086 /* The worker process exited? */ 1087 proto_close(res->hr_event); 1088 res->hr_event = NULL; 1089 if (res->hr_conn != NULL) { 1090 proto_close(res->hr_conn); 1091 res->hr_conn = NULL; 1092 } 1093 continue; 1094 } 1095 if (res->hr_role == HAST_ROLE_PRIMARY) { 1096 PJDLOG_ASSERT(res->hr_conn != NULL); 1097 if (FD_ISSET(proto_descriptor(res->hr_conn), 1098 &rfds)) { 1099 connection_migrate(res); 1100 } 1101 } else { 1102 PJDLOG_ASSERT(res->hr_conn == NULL); 1103 } 1104 } 1105 } 1106 } 1107 1108 static void 1109 dummy_sighandler(int sig __unused) 1110 { 1111 /* Nothing to do. */ 1112 } 1113 1114 int 1115 main(int argc, char *argv[]) 1116 { 1117 struct hastd_listen *lst; 1118 const char *pidfile; 1119 pid_t otherpid; 1120 bool foreground; 1121 int debuglevel; 1122 sigset_t mask; 1123 1124 foreground = false; 1125 debuglevel = 0; 1126 pidfile = HASTD_PIDFILE; 1127 1128 for (;;) { 1129 int ch; 1130 1131 ch = getopt(argc, argv, "c:dFhP:"); 1132 if (ch == -1) 1133 break; 1134 switch (ch) { 1135 case 'c': 1136 cfgpath = optarg; 1137 break; 1138 case 'd': 1139 debuglevel++; 1140 break; 1141 case 'F': 1142 foreground = true; 1143 break; 1144 case 'P': 1145 pidfile = optarg; 1146 break; 1147 case 'h': 1148 default: 1149 usage(); 1150 } 1151 } 1152 argc -= optind; 1153 argv += optind; 1154 1155 pjdlog_init(PJDLOG_MODE_STD); 1156 pjdlog_debug_set(debuglevel); 1157 1158 g_gate_load(); 1159 1160 pfh = pidfile_open(pidfile, 0600, &otherpid); 1161 if (pfh == NULL) { 1162 if (errno == EEXIST) { 1163 pjdlog_exitx(EX_TEMPFAIL, 1164 "Another hastd is already running, pid: %jd.", 1165 (intmax_t)otherpid); 1166 } 1167 /* If we cannot create pidfile from other reasons, only warn. */ 1168 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile"); 1169 } 1170 1171 cfg = yy_config_parse(cfgpath, true); 1172 PJDLOG_ASSERT(cfg != NULL); 1173 1174 /* 1175 * Restore default actions for interesting signals in case parent 1176 * process (like init(8)) decided to ignore some of them (like SIGHUP). 1177 */ 1178 PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR); 1179 PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR); 1180 PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR); 1181 /* 1182 * Because SIGCHLD is ignored by default, setup dummy handler for it, 1183 * so we can mask it. 1184 */ 1185 PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR); 1186 1187 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 1188 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 1189 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 1190 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 1191 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 1192 PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); 1193 1194 /* Listen on control address. */ 1195 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) { 1196 KEEP_ERRNO((void)pidfile_remove(pfh)); 1197 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s", 1198 cfg->hc_controladdr); 1199 } 1200 /* Listen for remote connections. */ 1201 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) { 1202 if (proto_server(lst->hl_addr, &lst->hl_conn) < 0) { 1203 KEEP_ERRNO((void)pidfile_remove(pfh)); 1204 pjdlog_exit(EX_OSERR, "Unable to listen on address %s", 1205 lst->hl_addr); 1206 } 1207 } 1208 1209 if (!foreground) { 1210 if (daemon(0, 0) < 0) { 1211 KEEP_ERRNO((void)pidfile_remove(pfh)); 1212 pjdlog_exit(EX_OSERR, "Unable to daemonize"); 1213 } 1214 1215 /* Start logging to syslog. */ 1216 pjdlog_mode_set(PJDLOG_MODE_SYSLOG); 1217 1218 /* Write PID to a file. */ 1219 if (pidfile_write(pfh) < 0) { 1220 pjdlog_errno(LOG_WARNING, 1221 "Unable to write PID to a file"); 1222 } 1223 } 1224 1225 pjdlog_info("Started successfully, running protocol version %d.", 1226 HAST_PROTO_VERSION); 1227 1228 pjdlog_debug(1, "Listening on control address %s.", 1229 cfg->hc_controladdr); 1230 TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) 1231 pjdlog_info("Listening on address %s.", lst->hl_addr); 1232 1233 hook_init(); 1234 1235 main_loop(); 1236 1237 exit(0); 1238 } 1239