1 /*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> 4 * All rights reserved. 5 * 6 * This software was developed by Pawel Jakub Dawidek under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker.h> 36 #include <sys/module.h> 37 #include <sys/stat.h> 38 #include <sys/wait.h> 39 40 #include <err.h> 41 #include <errno.h> 42 #include <libutil.h> 43 #include <signal.h> 44 #include <stdbool.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <sysexits.h> 49 #include <time.h> 50 #include <unistd.h> 51 52 #include <activemap.h> 53 #include <pjdlog.h> 54 55 #include "control.h" 56 #include "event.h" 57 #include "hast.h" 58 #include "hast_proto.h" 59 #include "hastd.h" 60 #include "hooks.h" 61 #include "subr.h" 62 63 /* Path to configuration file. */ 64 const char *cfgpath = HAST_CONFIG; 65 /* Hastd configuration. */ 66 static struct hastd_config *cfg; 67 /* Was SIGINT or SIGTERM signal received? */ 68 bool sigexit_received = false; 69 /* PID file handle. */ 70 struct pidfh *pfh; 71 72 /* How often check for hooks running for too long. */ 73 #define REPORT_INTERVAL 5 74 75 static void 76 usage(void) 77 { 78 79 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]"); 80 } 81 82 static void 83 g_gate_load(void) 84 { 85 86 if (modfind("g_gate") == -1) { 87 /* Not present in kernel, try loading it. */ 88 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { 89 if (errno != EEXIST) { 90 pjdlog_exit(EX_OSERR, 91 "Unable to load geom_gate module"); 92 } 93 } 94 } 95 } 96 97 void 98 descriptors_cleanup(struct hast_resource *res) 99 { 100 struct hast_resource *tres; 101 102 TAILQ_FOREACH(tres, &cfg->hc_resources, hr_next) { 103 if (tres == res) { 104 PJDLOG_VERIFY(res->hr_role == HAST_ROLE_SECONDARY || 105 (res->hr_remotein == NULL && 106 res->hr_remoteout == NULL)); 107 continue; 108 } 109 if (tres->hr_remotein != NULL) 110 proto_close(tres->hr_remotein); 111 if (tres->hr_remoteout != NULL) 112 proto_close(tres->hr_remoteout); 113 if (tres->hr_ctrl != NULL) 114 proto_close(tres->hr_ctrl); 115 if (tres->hr_event != NULL) 116 proto_close(tres->hr_event); 117 if (tres->hr_conn != NULL) 118 proto_close(tres->hr_conn); 119 } 120 if (cfg->hc_controlin != NULL) 121 proto_close(cfg->hc_controlin); 122 proto_close(cfg->hc_controlconn); 123 proto_close(cfg->hc_listenconn); 124 (void)pidfile_close(pfh); 125 hook_fini(); 126 pjdlog_fini(); 127 } 128 129 static const char * 130 dtype2str(mode_t mode) 131 { 132 133 if (S_ISBLK(mode)) 134 return ("block device"); 135 else if (S_ISCHR(mode)) 136 return ("character device"); 137 else if (S_ISDIR(mode)) 138 return ("directory"); 139 else if (S_ISFIFO(mode)) 140 return ("pipe or FIFO"); 141 else if (S_ISLNK(mode)) 142 return ("symbolic link"); 143 else if (S_ISREG(mode)) 144 return ("regular file"); 145 else if (S_ISSOCK(mode)) 146 return ("socket"); 147 else if (S_ISWHT(mode)) 148 return ("whiteout"); 149 else 150 return ("unknown"); 151 } 152 153 void 154 descriptors_assert(const struct hast_resource *res, int pjdlogmode) 155 { 156 char msg[256]; 157 struct stat sb; 158 long maxfd; 159 bool isopen; 160 mode_t mode; 161 int fd; 162 163 /* 164 * At this point descriptor to syslog socket is closed, so if we want 165 * to log assertion message, we have to first store it in 'msg' local 166 * buffer and then open syslog socket and log it. 167 */ 168 msg[0] = '\0'; 169 170 maxfd = sysconf(_SC_OPEN_MAX); 171 if (maxfd < 0) { 172 pjdlog_init(pjdlogmode); 173 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 174 role2str(res->hr_role)); 175 pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed"); 176 pjdlog_fini(); 177 maxfd = 16384; 178 } 179 for (fd = 0; fd <= maxfd; fd++) { 180 if (fstat(fd, &sb) == 0) { 181 isopen = true; 182 mode = sb.st_mode; 183 } else if (errno == EBADF) { 184 isopen = false; 185 mode = 0; 186 } else { 187 (void)snprintf(msg, sizeof(msg), 188 "Unable to fstat descriptor %d: %s", fd, 189 strerror(errno)); 190 break; 191 } 192 if (fd == STDIN_FILENO || fd == STDOUT_FILENO || 193 fd == STDERR_FILENO) { 194 if (!isopen) { 195 (void)snprintf(msg, sizeof(msg), 196 "Descriptor %d (%s) is closed, but should be open.", 197 fd, (fd == STDIN_FILENO ? "stdin" : 198 (fd == STDOUT_FILENO ? "stdout" : "stderr"))); 199 break; 200 } 201 } else if (fd == proto_descriptor(res->hr_event)) { 202 if (!isopen) { 203 (void)snprintf(msg, sizeof(msg), 204 "Descriptor %d (event) is closed, but should be open.", 205 fd); 206 break; 207 } 208 if (!S_ISSOCK(mode)) { 209 (void)snprintf(msg, sizeof(msg), 210 "Descriptor %d (event) is %s, but should be %s.", 211 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 212 break; 213 } 214 } else if (fd == proto_descriptor(res->hr_ctrl)) { 215 if (!isopen) { 216 (void)snprintf(msg, sizeof(msg), 217 "Descriptor %d (ctrl) is closed, but should be open.", 218 fd); 219 break; 220 } 221 if (!S_ISSOCK(mode)) { 222 (void)snprintf(msg, sizeof(msg), 223 "Descriptor %d (ctrl) is %s, but should be %s.", 224 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 225 break; 226 } 227 } else if (fd == proto_descriptor(res->hr_conn)) { 228 if (!isopen) { 229 (void)snprintf(msg, sizeof(msg), 230 "Descriptor %d (conn) is closed, but should be open.", 231 fd); 232 break; 233 } 234 if (!S_ISSOCK(mode)) { 235 (void)snprintf(msg, sizeof(msg), 236 "Descriptor %d (conn) is %s, but should be %s.", 237 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 238 break; 239 } 240 } else if (res->hr_role == HAST_ROLE_SECONDARY && 241 fd == proto_descriptor(res->hr_remotein)) { 242 if (!isopen) { 243 (void)snprintf(msg, sizeof(msg), 244 "Descriptor %d (remote in) is closed, but should be open.", 245 fd); 246 break; 247 } 248 if (!S_ISSOCK(mode)) { 249 (void)snprintf(msg, sizeof(msg), 250 "Descriptor %d (remote in) is %s, but should be %s.", 251 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 252 break; 253 } 254 } else if (res->hr_role == HAST_ROLE_SECONDARY && 255 fd == proto_descriptor(res->hr_remoteout)) { 256 if (!isopen) { 257 (void)snprintf(msg, sizeof(msg), 258 "Descriptor %d (remote out) is closed, but should be open.", 259 fd); 260 break; 261 } 262 if (!S_ISSOCK(mode)) { 263 (void)snprintf(msg, sizeof(msg), 264 "Descriptor %d (remote out) is %s, but should be %s.", 265 fd, dtype2str(mode), dtype2str(S_IFSOCK)); 266 break; 267 } 268 } else { 269 if (isopen) { 270 (void)snprintf(msg, sizeof(msg), 271 "Descriptor %d is open (%s), but should be closed.", 272 fd, dtype2str(mode)); 273 break; 274 } 275 } 276 } 277 if (msg[0] != '\0') { 278 pjdlog_init(pjdlogmode); 279 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 280 role2str(res->hr_role)); 281 PJDLOG_ABORT("%s", msg); 282 } 283 } 284 285 static void 286 child_exit_log(unsigned int pid, int status) 287 { 288 289 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 290 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).", 291 pid); 292 } else if (WIFSIGNALED(status)) { 293 pjdlog_error("Worker process killed (pid=%u, signal=%d).", 294 pid, WTERMSIG(status)); 295 } else { 296 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).", 297 pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1); 298 } 299 } 300 301 static void 302 child_exit(void) 303 { 304 struct hast_resource *res; 305 int status; 306 pid_t pid; 307 308 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) { 309 /* Find resource related to the process that just exited. */ 310 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 311 if (pid == res->hr_workerpid) 312 break; 313 } 314 if (res == NULL) { 315 /* 316 * This can happen when new connection arrives and we 317 * cancel child responsible for the old one or if this 318 * was hook which we executed. 319 */ 320 hook_check_one(pid, status); 321 continue; 322 } 323 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 324 role2str(res->hr_role)); 325 child_exit_log(pid, status); 326 child_cleanup(res); 327 if (res->hr_role == HAST_ROLE_PRIMARY) { 328 /* 329 * Restart child process if it was killed by signal 330 * or exited because of temporary problem. 331 */ 332 if (WIFSIGNALED(status) || 333 (WIFEXITED(status) && 334 WEXITSTATUS(status) == EX_TEMPFAIL)) { 335 sleep(1); 336 pjdlog_info("Restarting worker process."); 337 hastd_primary(res); 338 } else { 339 res->hr_role = HAST_ROLE_INIT; 340 pjdlog_info("Changing resource role back to %s.", 341 role2str(res->hr_role)); 342 } 343 } 344 pjdlog_prefix_set("%s", ""); 345 } 346 } 347 348 static bool 349 resource_needs_restart(const struct hast_resource *res0, 350 const struct hast_resource *res1) 351 { 352 353 PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 354 355 if (strcmp(res0->hr_provname, res1->hr_provname) != 0) 356 return (true); 357 if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0) 358 return (true); 359 if (res0->hr_role == HAST_ROLE_INIT || 360 res0->hr_role == HAST_ROLE_SECONDARY) { 361 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 362 return (true); 363 if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0) 364 return (true); 365 if (res0->hr_replication != res1->hr_replication) 366 return (true); 367 if (res0->hr_checksum != res1->hr_checksum) 368 return (true); 369 if (res0->hr_compression != res1->hr_compression) 370 return (true); 371 if (res0->hr_timeout != res1->hr_timeout) 372 return (true); 373 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 374 return (true); 375 } 376 return (false); 377 } 378 379 static bool 380 resource_needs_reload(const struct hast_resource *res0, 381 const struct hast_resource *res1) 382 { 383 384 PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 385 PJDLOG_ASSERT(strcmp(res0->hr_provname, res1->hr_provname) == 0); 386 PJDLOG_ASSERT(strcmp(res0->hr_localpath, res1->hr_localpath) == 0); 387 388 if (res0->hr_role != HAST_ROLE_PRIMARY) 389 return (false); 390 391 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 392 return (true); 393 if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0) 394 return (true); 395 if (res0->hr_replication != res1->hr_replication) 396 return (true); 397 if (res0->hr_checksum != res1->hr_checksum) 398 return (true); 399 if (res0->hr_compression != res1->hr_compression) 400 return (true); 401 if (res0->hr_timeout != res1->hr_timeout) 402 return (true); 403 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 404 return (true); 405 return (false); 406 } 407 408 static void 409 resource_reload(const struct hast_resource *res) 410 { 411 struct nv *nvin, *nvout; 412 int error; 413 414 PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 415 416 nvout = nv_alloc(); 417 nv_add_uint8(nvout, HASTCTL_RELOAD, "cmd"); 418 nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr"); 419 nv_add_string(nvout, res->hr_sourceaddr, "sourceaddr"); 420 nv_add_int32(nvout, (int32_t)res->hr_replication, "replication"); 421 nv_add_int32(nvout, (int32_t)res->hr_checksum, "checksum"); 422 nv_add_int32(nvout, (int32_t)res->hr_compression, "compression"); 423 nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout"); 424 nv_add_string(nvout, res->hr_exec, "exec"); 425 if (nv_error(nvout) != 0) { 426 nv_free(nvout); 427 pjdlog_error("Unable to allocate header for reload message."); 428 return; 429 } 430 if (hast_proto_send(res, res->hr_ctrl, nvout, NULL, 0) < 0) { 431 pjdlog_errno(LOG_ERR, "Unable to send reload message"); 432 nv_free(nvout); 433 return; 434 } 435 nv_free(nvout); 436 437 /* Receive response. */ 438 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 439 pjdlog_errno(LOG_ERR, "Unable to receive reload reply"); 440 return; 441 } 442 error = nv_get_int16(nvin, "error"); 443 nv_free(nvin); 444 if (error != 0) { 445 pjdlog_common(LOG_ERR, 0, error, "Reload failed"); 446 return; 447 } 448 } 449 450 static void 451 hastd_reload(void) 452 { 453 struct hastd_config *newcfg; 454 struct hast_resource *nres, *cres, *tres; 455 uint8_t role; 456 457 pjdlog_info("Reloading configuration..."); 458 459 newcfg = yy_config_parse(cfgpath, false); 460 if (newcfg == NULL) 461 goto failed; 462 463 /* 464 * Check if control address has changed. 465 */ 466 if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) { 467 if (proto_server(newcfg->hc_controladdr, 468 &newcfg->hc_controlconn) < 0) { 469 pjdlog_errno(LOG_ERR, 470 "Unable to listen on control address %s", 471 newcfg->hc_controladdr); 472 goto failed; 473 } 474 } 475 /* 476 * Check if listen address has changed. 477 */ 478 if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) { 479 if (proto_server(newcfg->hc_listenaddr, 480 &newcfg->hc_listenconn) < 0) { 481 pjdlog_errno(LOG_ERR, "Unable to listen on address %s", 482 newcfg->hc_listenaddr); 483 goto failed; 484 } 485 } 486 /* 487 * Only when both control and listen sockets are successfully 488 * initialized switch them to new configuration. 489 */ 490 if (newcfg->hc_controlconn != NULL) { 491 pjdlog_info("Control socket changed from %s to %s.", 492 cfg->hc_controladdr, newcfg->hc_controladdr); 493 proto_close(cfg->hc_controlconn); 494 cfg->hc_controlconn = newcfg->hc_controlconn; 495 newcfg->hc_controlconn = NULL; 496 strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr, 497 sizeof(cfg->hc_controladdr)); 498 } 499 if (newcfg->hc_listenconn != NULL) { 500 pjdlog_info("Listen socket changed from %s to %s.", 501 cfg->hc_listenaddr, newcfg->hc_listenaddr); 502 proto_close(cfg->hc_listenconn); 503 cfg->hc_listenconn = newcfg->hc_listenconn; 504 newcfg->hc_listenconn = NULL; 505 strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr, 506 sizeof(cfg->hc_listenaddr)); 507 } 508 509 /* 510 * Stop and remove resources that were removed from the configuration. 511 */ 512 TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) { 513 TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) { 514 if (strcmp(cres->hr_name, nres->hr_name) == 0) 515 break; 516 } 517 if (nres == NULL) { 518 control_set_role(cres, HAST_ROLE_INIT); 519 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 520 pjdlog_info("Resource %s removed.", cres->hr_name); 521 free(cres); 522 } 523 } 524 /* 525 * Move new resources to the current configuration. 526 */ 527 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 528 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 529 if (strcmp(cres->hr_name, nres->hr_name) == 0) 530 break; 531 } 532 if (cres == NULL) { 533 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 534 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 535 pjdlog_info("Resource %s added.", nres->hr_name); 536 } 537 } 538 /* 539 * Deal with modified resources. 540 * Depending on what has changed exactly we might want to perform 541 * different actions. 542 * 543 * We do full resource restart in the following situations: 544 * Resource role is INIT or SECONDARY. 545 * Resource role is PRIMARY and path to local component or provider 546 * name has changed. 547 * In case of PRIMARY, the worker process will be killed and restarted, 548 * which also means removing /dev/hast/<name> provider and 549 * recreating it. 550 * 551 * We do just reload (send SIGHUP to worker process) if we act as 552 * PRIMARY, but only if remote address, replication mode, timeout or 553 * execution path has changed. For those, there is no need to restart 554 * worker process. 555 * If PRIMARY receives SIGHUP, it will reconnect if remote address or 556 * replication mode has changed or simply set new timeout if only 557 * timeout has changed. 558 */ 559 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 560 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 561 if (strcmp(cres->hr_name, nres->hr_name) == 0) 562 break; 563 } 564 PJDLOG_ASSERT(cres != NULL); 565 if (resource_needs_restart(cres, nres)) { 566 pjdlog_info("Resource %s configuration was modified, restarting it.", 567 cres->hr_name); 568 role = cres->hr_role; 569 control_set_role(cres, HAST_ROLE_INIT); 570 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 571 free(cres); 572 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 573 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 574 control_set_role(nres, role); 575 } else if (resource_needs_reload(cres, nres)) { 576 pjdlog_info("Resource %s configuration was modified, reloading it.", 577 cres->hr_name); 578 strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr, 579 sizeof(cres->hr_remoteaddr)); 580 strlcpy(cres->hr_sourceaddr, nres->hr_sourceaddr, 581 sizeof(cres->hr_sourceaddr)); 582 cres->hr_replication = nres->hr_replication; 583 cres->hr_checksum = nres->hr_checksum; 584 cres->hr_compression = nres->hr_compression; 585 cres->hr_timeout = nres->hr_timeout; 586 strlcpy(cres->hr_exec, nres->hr_exec, 587 sizeof(cres->hr_exec)); 588 if (cres->hr_workerpid != 0) 589 resource_reload(cres); 590 } 591 } 592 593 yy_config_free(newcfg); 594 pjdlog_info("Configuration reloaded successfully."); 595 return; 596 failed: 597 if (newcfg != NULL) { 598 if (newcfg->hc_controlconn != NULL) 599 proto_close(newcfg->hc_controlconn); 600 if (newcfg->hc_listenconn != NULL) 601 proto_close(newcfg->hc_listenconn); 602 yy_config_free(newcfg); 603 } 604 pjdlog_warning("Configuration not reloaded."); 605 } 606 607 static void 608 terminate_workers(void) 609 { 610 struct hast_resource *res; 611 612 pjdlog_info("Termination signal received, exiting."); 613 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 614 if (res->hr_workerpid == 0) 615 continue; 616 pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).", 617 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 618 if (kill(res->hr_workerpid, SIGTERM) == 0) 619 continue; 620 pjdlog_errno(LOG_WARNING, 621 "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).", 622 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 623 } 624 } 625 626 static void 627 listen_accept(void) 628 { 629 struct hast_resource *res; 630 struct proto_conn *conn; 631 struct nv *nvin, *nvout, *nverr; 632 const char *resname; 633 const unsigned char *token; 634 char laddr[256], raddr[256]; 635 size_t size; 636 pid_t pid; 637 int status; 638 639 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr)); 640 pjdlog_debug(1, "Accepting connection to %s.", laddr); 641 642 if (proto_accept(cfg->hc_listenconn, &conn) < 0) { 643 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr); 644 return; 645 } 646 647 proto_local_address(conn, laddr, sizeof(laddr)); 648 proto_remote_address(conn, raddr, sizeof(raddr)); 649 pjdlog_info("Connection from %s to %s.", raddr, laddr); 650 651 /* Error in setting timeout is not critical, but why should it fail? */ 652 if (proto_timeout(conn, HAST_TIMEOUT) < 0) 653 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); 654 655 nvin = nvout = nverr = NULL; 656 657 /* 658 * Before receiving any data see if remote host have access to any 659 * resource. 660 */ 661 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 662 if (proto_address_match(conn, res->hr_remoteaddr)) 663 break; 664 } 665 if (res == NULL) { 666 pjdlog_error("Client %s isn't known.", raddr); 667 goto close; 668 } 669 /* Ok, remote host can access at least one resource. */ 670 671 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 672 pjdlog_errno(LOG_ERR, "Unable to receive header from %s", 673 raddr); 674 goto close; 675 } 676 677 resname = nv_get_string(nvin, "resource"); 678 if (resname == NULL) { 679 pjdlog_error("No 'resource' field in the header received from %s.", 680 raddr); 681 goto close; 682 } 683 pjdlog_debug(2, "%s: resource=%s", raddr, resname); 684 token = nv_get_uint8_array(nvin, &size, "token"); 685 /* 686 * NULL token means that this is first conection. 687 */ 688 if (token != NULL && size != sizeof(res->hr_token)) { 689 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).", 690 raddr, sizeof(res->hr_token), size); 691 goto close; 692 } 693 694 /* 695 * From now on we want to send errors to the remote node. 696 */ 697 nverr = nv_alloc(); 698 699 /* Find resource related to this connection. */ 700 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 701 if (strcmp(resname, res->hr_name) == 0) 702 break; 703 } 704 /* Have we found the resource? */ 705 if (res == NULL) { 706 pjdlog_error("No resource '%s' as requested by %s.", 707 resname, raddr); 708 nv_add_stringf(nverr, "errmsg", "Resource not configured."); 709 goto fail; 710 } 711 712 /* Now that we know resource name setup log prefix. */ 713 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 714 715 /* Does the remote host have access to this resource? */ 716 if (!proto_address_match(conn, res->hr_remoteaddr)) { 717 pjdlog_error("Client %s has no access to the resource.", raddr); 718 nv_add_stringf(nverr, "errmsg", "No access to the resource."); 719 goto fail; 720 } 721 /* Is the resource marked as secondary? */ 722 if (res->hr_role != HAST_ROLE_SECONDARY) { 723 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.", 724 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY), 725 raddr); 726 nv_add_stringf(nverr, "errmsg", 727 "Remote node acts as %s for the resource and not as %s.", 728 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); 729 goto fail; 730 } 731 /* Does token (if exists) match? */ 732 if (token != NULL && memcmp(token, res->hr_token, 733 sizeof(res->hr_token)) != 0) { 734 pjdlog_error("Token received from %s doesn't match.", raddr); 735 nv_add_stringf(nverr, "errmsg", "Token doesn't match."); 736 goto fail; 737 } 738 /* 739 * If there is no token, but we have half-open connection 740 * (only remotein) or full connection (worker process is running) 741 * we have to cancel those and accept the new connection. 742 */ 743 if (token == NULL) { 744 PJDLOG_ASSERT(res->hr_remoteout == NULL); 745 pjdlog_debug(1, "Initial connection from %s.", raddr); 746 if (res->hr_workerpid != 0) { 747 PJDLOG_ASSERT(res->hr_remotein == NULL); 748 pjdlog_debug(1, 749 "Worker process exists (pid=%u), stopping it.", 750 (unsigned int)res->hr_workerpid); 751 /* Stop child process. */ 752 if (kill(res->hr_workerpid, SIGINT) < 0) { 753 pjdlog_errno(LOG_ERR, 754 "Unable to stop worker process (pid=%u)", 755 (unsigned int)res->hr_workerpid); 756 /* 757 * Other than logging the problem we 758 * ignore it - nothing smart to do. 759 */ 760 } 761 /* Wait for it to exit. */ 762 else if ((pid = waitpid(res->hr_workerpid, 763 &status, 0)) != res->hr_workerpid) { 764 /* We can only log the problem. */ 765 pjdlog_errno(LOG_ERR, 766 "Waiting for worker process (pid=%u) failed", 767 (unsigned int)res->hr_workerpid); 768 } else { 769 child_exit_log(res->hr_workerpid, status); 770 } 771 child_cleanup(res); 772 } else if (res->hr_remotein != NULL) { 773 char oaddr[256]; 774 775 proto_remote_address(res->hr_remotein, oaddr, 776 sizeof(oaddr)); 777 pjdlog_debug(1, 778 "Canceling half-open connection from %s on connection from %s.", 779 oaddr, raddr); 780 proto_close(res->hr_remotein); 781 res->hr_remotein = NULL; 782 } 783 } 784 785 /* 786 * Checks and cleanups are done. 787 */ 788 789 if (token == NULL) { 790 arc4random_buf(res->hr_token, sizeof(res->hr_token)); 791 nvout = nv_alloc(); 792 nv_add_uint8_array(nvout, res->hr_token, 793 sizeof(res->hr_token), "token"); 794 if (nv_error(nvout) != 0) { 795 pjdlog_common(LOG_ERR, 0, nv_error(nvout), 796 "Unable to prepare return header for %s", raddr); 797 nv_add_stringf(nverr, "errmsg", 798 "Remote node was unable to prepare return header: %s.", 799 strerror(nv_error(nvout))); 800 goto fail; 801 } 802 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) { 803 int error = errno; 804 805 pjdlog_errno(LOG_ERR, "Unable to send response to %s", 806 raddr); 807 nv_add_stringf(nverr, "errmsg", 808 "Remote node was unable to send response: %s.", 809 strerror(error)); 810 goto fail; 811 } 812 res->hr_remotein = conn; 813 pjdlog_debug(1, "Incoming connection from %s configured.", 814 raddr); 815 } else { 816 res->hr_remoteout = conn; 817 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr); 818 hastd_secondary(res, nvin); 819 } 820 nv_free(nvin); 821 nv_free(nvout); 822 nv_free(nverr); 823 pjdlog_prefix_set("%s", ""); 824 return; 825 fail: 826 if (nv_error(nverr) != 0) { 827 pjdlog_common(LOG_ERR, 0, nv_error(nverr), 828 "Unable to prepare error header for %s", raddr); 829 goto close; 830 } 831 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) { 832 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr); 833 goto close; 834 } 835 close: 836 if (nvin != NULL) 837 nv_free(nvin); 838 if (nvout != NULL) 839 nv_free(nvout); 840 if (nverr != NULL) 841 nv_free(nverr); 842 proto_close(conn); 843 pjdlog_prefix_set("%s", ""); 844 } 845 846 static void 847 connection_migrate(struct hast_resource *res) 848 { 849 struct proto_conn *conn; 850 int16_t val = 0; 851 852 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 853 854 if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) { 855 pjdlog_errno(LOG_WARNING, 856 "Unable to receive connection command"); 857 return; 858 } 859 if (proto_client(res->hr_sourceaddr[0] != '\0' ? res->hr_sourceaddr : NULL, 860 res->hr_remoteaddr, &conn) < 0) { 861 val = errno; 862 pjdlog_errno(LOG_WARNING, 863 "Unable to create outgoing connection to %s", 864 res->hr_remoteaddr); 865 goto out; 866 } 867 if (proto_connect(conn, -1) < 0) { 868 val = errno; 869 pjdlog_errno(LOG_WARNING, "Unable to connect to %s", 870 res->hr_remoteaddr); 871 proto_close(conn); 872 goto out; 873 } 874 val = 0; 875 out: 876 if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) { 877 pjdlog_errno(LOG_WARNING, 878 "Unable to send reply to connection request"); 879 } 880 if (val == 0 && proto_connection_send(res->hr_conn, conn) < 0) 881 pjdlog_errno(LOG_WARNING, "Unable to send connection"); 882 883 pjdlog_prefix_set("%s", ""); 884 } 885 886 static void 887 main_loop(void) 888 { 889 struct hast_resource *res; 890 struct timeval seltimeout; 891 struct timespec sigtimeout; 892 int fd, maxfd, ret, signo; 893 time_t lastcheck, now; 894 sigset_t mask; 895 fd_set rfds; 896 897 lastcheck = time(NULL); 898 seltimeout.tv_sec = REPORT_INTERVAL; 899 seltimeout.tv_usec = 0; 900 sigtimeout.tv_sec = 0; 901 sigtimeout.tv_nsec = 0; 902 903 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 904 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 905 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 906 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 907 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 908 909 pjdlog_info("Started successfully, running protocol version %d.", 910 HAST_PROTO_VERSION); 911 912 for (;;) { 913 while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) { 914 switch (signo) { 915 case SIGINT: 916 case SIGTERM: 917 sigexit_received = true; 918 terminate_workers(); 919 proto_close(cfg->hc_controlconn); 920 exit(EX_OK); 921 break; 922 case SIGCHLD: 923 child_exit(); 924 break; 925 case SIGHUP: 926 hastd_reload(); 927 break; 928 default: 929 PJDLOG_ABORT("Unexpected signal (%d).", signo); 930 } 931 } 932 933 /* Setup descriptors for select(2). */ 934 FD_ZERO(&rfds); 935 maxfd = fd = proto_descriptor(cfg->hc_controlconn); 936 PJDLOG_ASSERT(fd >= 0); 937 FD_SET(fd, &rfds); 938 fd = proto_descriptor(cfg->hc_listenconn); 939 PJDLOG_ASSERT(fd >= 0); 940 FD_SET(fd, &rfds); 941 maxfd = fd > maxfd ? fd : maxfd; 942 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 943 if (res->hr_event == NULL) 944 continue; 945 PJDLOG_ASSERT(res->hr_conn != NULL); 946 fd = proto_descriptor(res->hr_event); 947 PJDLOG_ASSERT(fd >= 0); 948 FD_SET(fd, &rfds); 949 maxfd = fd > maxfd ? fd : maxfd; 950 if (res->hr_role == HAST_ROLE_PRIMARY) { 951 /* Only primary workers asks for connections. */ 952 fd = proto_descriptor(res->hr_conn); 953 PJDLOG_ASSERT(fd >= 0); 954 FD_SET(fd, &rfds); 955 maxfd = fd > maxfd ? fd : maxfd; 956 } 957 } 958 959 PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE); 960 ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout); 961 now = time(NULL); 962 if (lastcheck + REPORT_INTERVAL <= now) { 963 hook_check(); 964 lastcheck = now; 965 } 966 if (ret == 0) { 967 /* 968 * select(2) timed out, so there should be no 969 * descriptors to check. 970 */ 971 continue; 972 } else if (ret == -1) { 973 if (errno == EINTR) 974 continue; 975 KEEP_ERRNO((void)pidfile_remove(pfh)); 976 pjdlog_exit(EX_OSERR, "select() failed"); 977 } 978 979 if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds)) 980 control_handle(cfg); 981 if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds)) 982 listen_accept(); 983 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 984 if (res->hr_event == NULL) 985 continue; 986 PJDLOG_ASSERT(res->hr_conn != NULL); 987 if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) { 988 if (event_recv(res) == 0) 989 continue; 990 /* The worker process exited? */ 991 proto_close(res->hr_event); 992 res->hr_event = NULL; 993 proto_close(res->hr_conn); 994 res->hr_conn = NULL; 995 continue; 996 } 997 if (res->hr_role == HAST_ROLE_PRIMARY && 998 FD_ISSET(proto_descriptor(res->hr_conn), &rfds)) { 999 connection_migrate(res); 1000 } 1001 } 1002 } 1003 } 1004 1005 static void 1006 dummy_sighandler(int sig __unused) 1007 { 1008 /* Nothing to do. */ 1009 } 1010 1011 int 1012 main(int argc, char *argv[]) 1013 { 1014 const char *pidfile; 1015 pid_t otherpid; 1016 bool foreground; 1017 int debuglevel; 1018 sigset_t mask; 1019 1020 foreground = false; 1021 debuglevel = 0; 1022 pidfile = HASTD_PIDFILE; 1023 1024 for (;;) { 1025 int ch; 1026 1027 ch = getopt(argc, argv, "c:dFhP:"); 1028 if (ch == -1) 1029 break; 1030 switch (ch) { 1031 case 'c': 1032 cfgpath = optarg; 1033 break; 1034 case 'd': 1035 debuglevel++; 1036 break; 1037 case 'F': 1038 foreground = true; 1039 break; 1040 case 'P': 1041 pidfile = optarg; 1042 break; 1043 case 'h': 1044 default: 1045 usage(); 1046 } 1047 } 1048 argc -= optind; 1049 argv += optind; 1050 1051 pjdlog_init(PJDLOG_MODE_STD); 1052 pjdlog_debug_set(debuglevel); 1053 1054 g_gate_load(); 1055 1056 pfh = pidfile_open(pidfile, 0600, &otherpid); 1057 if (pfh == NULL) { 1058 if (errno == EEXIST) { 1059 pjdlog_exitx(EX_TEMPFAIL, 1060 "Another hastd is already running, pid: %jd.", 1061 (intmax_t)otherpid); 1062 } 1063 /* If we cannot create pidfile from other reasons, only warn. */ 1064 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile"); 1065 } 1066 1067 cfg = yy_config_parse(cfgpath, true); 1068 PJDLOG_ASSERT(cfg != NULL); 1069 1070 /* 1071 * Restore default actions for interesting signals in case parent 1072 * process (like init(8)) decided to ignore some of them (like SIGHUP). 1073 */ 1074 PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR); 1075 PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR); 1076 PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR); 1077 /* 1078 * Because SIGCHLD is ignored by default, setup dummy handler for it, 1079 * so we can mask it. 1080 */ 1081 PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR); 1082 1083 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 1084 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 1085 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 1086 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 1087 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 1088 PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); 1089 1090 /* Listen on control address. */ 1091 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) { 1092 KEEP_ERRNO((void)pidfile_remove(pfh)); 1093 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s", 1094 cfg->hc_controladdr); 1095 } 1096 /* Listen for remote connections. */ 1097 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) { 1098 KEEP_ERRNO((void)pidfile_remove(pfh)); 1099 pjdlog_exit(EX_OSERR, "Unable to listen on address %s", 1100 cfg->hc_listenaddr); 1101 } 1102 1103 if (!foreground) { 1104 if (daemon(0, 0) < 0) { 1105 KEEP_ERRNO((void)pidfile_remove(pfh)); 1106 pjdlog_exit(EX_OSERR, "Unable to daemonize"); 1107 } 1108 1109 /* Start logging to syslog. */ 1110 pjdlog_mode_set(PJDLOG_MODE_SYSLOG); 1111 1112 /* Write PID to a file. */ 1113 if (pidfile_write(pfh) < 0) { 1114 pjdlog_errno(LOG_WARNING, 1115 "Unable to write PID to a file"); 1116 } 1117 } 1118 1119 hook_init(); 1120 1121 main_loop(); 1122 1123 exit(0); 1124 } 1125