1 /*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Pawel Jakub Dawidek under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/linker.h> 35 #include <sys/module.h> 36 #include <sys/wait.h> 37 38 #include <assert.h> 39 #include <err.h> 40 #include <errno.h> 41 #include <libutil.h> 42 #include <signal.h> 43 #include <stdbool.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <sysexits.h> 48 #include <unistd.h> 49 50 #include <activemap.h> 51 #include <pjdlog.h> 52 53 #include "control.h" 54 #include "hast.h" 55 #include "hast_proto.h" 56 #include "hastd.h" 57 #include "subr.h" 58 59 /* Path to configuration file. */ 60 static const char *cfgpath = HAST_CONFIG; 61 /* Hastd configuration. */ 62 static struct hastd_config *cfg; 63 /* Was SIGCHLD signal received? */ 64 static bool sigchld_received = false; 65 /* Was SIGHUP signal received? */ 66 static bool sighup_received = false; 67 /* Was SIGINT or SIGTERM signal received? */ 68 bool sigexit_received = false; 69 /* PID file handle. */ 70 struct pidfh *pfh; 71 72 static void 73 usage(void) 74 { 75 76 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]"); 77 } 78 79 static void 80 sighandler(int sig) 81 { 82 83 switch (sig) { 84 case SIGCHLD: 85 sigchld_received = true; 86 break; 87 case SIGHUP: 88 sighup_received = true; 89 break; 90 default: 91 assert(!"invalid condition"); 92 } 93 } 94 95 static void 96 g_gate_load(void) 97 { 98 99 if (modfind("g_gate") == -1) { 100 /* Not present in kernel, try loading it. */ 101 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { 102 if (errno != EEXIST) { 103 pjdlog_exit(EX_OSERR, 104 "Unable to load geom_gate module"); 105 } 106 } 107 } 108 } 109 110 static void 111 child_exit(void) 112 { 113 struct hast_resource *res; 114 int status; 115 pid_t pid; 116 117 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) { 118 /* Find resource related to the process that just exited. */ 119 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 120 if (pid == res->hr_workerpid) 121 break; 122 } 123 if (res == NULL) { 124 /* 125 * This can happen when new connection arrives and we 126 * cancel child responsible for the old one. 127 */ 128 continue; 129 } 130 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 131 role2str(res->hr_role)); 132 if (WEXITSTATUS(status) == 0) { 133 pjdlog_debug(1, 134 "Worker process exited gracefully (pid=%u).", 135 (unsigned int)pid); 136 } else { 137 pjdlog_error("Worker process failed (pid=%u, status=%d).", 138 (unsigned int)pid, WEXITSTATUS(status)); 139 } 140 proto_close(res->hr_ctrl); 141 res->hr_workerpid = 0; 142 if (res->hr_role == HAST_ROLE_PRIMARY) { 143 sleep(1); 144 pjdlog_info("Restarting worker process."); 145 hastd_primary(res); 146 } 147 pjdlog_prefix_set("%s", ""); 148 } 149 } 150 151 static void 152 hastd_reload(void) 153 { 154 155 /* TODO */ 156 pjdlog_warning("Configuration reload is not implemented."); 157 } 158 159 static void 160 listen_accept(void) 161 { 162 struct hast_resource *res; 163 struct proto_conn *conn; 164 struct nv *nvin, *nvout, *nverr; 165 const char *resname; 166 const unsigned char *token; 167 char laddr[256], raddr[256]; 168 size_t size; 169 pid_t pid; 170 int status; 171 172 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr)); 173 pjdlog_debug(1, "Accepting connection to %s.", laddr); 174 175 if (proto_accept(cfg->hc_listenconn, &conn) < 0) { 176 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr); 177 return; 178 } 179 180 proto_local_address(conn, laddr, sizeof(laddr)); 181 proto_remote_address(conn, raddr, sizeof(raddr)); 182 pjdlog_info("Connection from %s to %s.", laddr, raddr); 183 184 nvin = nvout = nverr = NULL; 185 186 /* 187 * Before receiving any data see if remote host have access to any 188 * resource. 189 */ 190 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 191 if (proto_address_match(conn, res->hr_remoteaddr)) 192 break; 193 } 194 if (res == NULL) { 195 pjdlog_error("Client %s isn't known.", raddr); 196 goto close; 197 } 198 /* Ok, remote host can access at least one resource. */ 199 200 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 201 pjdlog_errno(LOG_ERR, "Unable to receive header from %s", 202 raddr); 203 goto close; 204 } 205 206 resname = nv_get_string(nvin, "resource"); 207 if (resname == NULL) { 208 pjdlog_error("No 'resource' field in the header received from %s.", 209 raddr); 210 goto close; 211 } 212 pjdlog_debug(2, "%s: resource=%s", raddr, resname); 213 token = nv_get_uint8_array(nvin, &size, "token"); 214 /* 215 * NULL token means that this is first conection. 216 */ 217 if (token != NULL && size != sizeof(res->hr_token)) { 218 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).", 219 raddr, sizeof(res->hr_token), size); 220 goto close; 221 } 222 223 /* 224 * From now on we want to send errors to the remote node. 225 */ 226 nverr = nv_alloc(); 227 228 /* Find resource related to this connection. */ 229 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 230 if (strcmp(resname, res->hr_name) == 0) 231 break; 232 } 233 /* Have we found the resource? */ 234 if (res == NULL) { 235 pjdlog_error("No resource '%s' as requested by %s.", 236 resname, raddr); 237 nv_add_stringf(nverr, "errmsg", "Resource not configured."); 238 goto fail; 239 } 240 241 /* Now that we know resource name setup log prefix. */ 242 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 243 244 /* Does the remote host have access to this resource? */ 245 if (!proto_address_match(conn, res->hr_remoteaddr)) { 246 pjdlog_error("Client %s has no access to the resource.", raddr); 247 nv_add_stringf(nverr, "errmsg", "No access to the resource."); 248 goto fail; 249 } 250 /* Is the resource marked as secondary? */ 251 if (res->hr_role != HAST_ROLE_SECONDARY) { 252 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.", 253 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY), 254 raddr); 255 nv_add_stringf(nverr, "errmsg", 256 "Remote node acts as %s for the resource and not as %s.", 257 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); 258 goto fail; 259 } 260 /* Does token (if exists) match? */ 261 if (token != NULL && memcmp(token, res->hr_token, 262 sizeof(res->hr_token)) != 0) { 263 pjdlog_error("Token received from %s doesn't match.", raddr); 264 nv_add_stringf(nverr, "errmsg", "Toke doesn't match."); 265 goto fail; 266 } 267 /* 268 * If there is no token, but we have half-open connection 269 * (only remotein) or full connection (worker process is running) 270 * we have to cancel those and accept the new connection. 271 */ 272 if (token == NULL) { 273 assert(res->hr_remoteout == NULL); 274 pjdlog_debug(1, "Initial connection from %s.", raddr); 275 if (res->hr_workerpid != 0) { 276 assert(res->hr_remotein == NULL); 277 pjdlog_debug(1, 278 "Worker process exists (pid=%u), stopping it.", 279 (unsigned int)res->hr_workerpid); 280 /* Stop child process. */ 281 if (kill(res->hr_workerpid, SIGINT) < 0) { 282 pjdlog_errno(LOG_ERR, 283 "Unable to stop worker process (pid=%u)", 284 (unsigned int)res->hr_workerpid); 285 /* 286 * Other than logging the problem we 287 * ignore it - nothing smart to do. 288 */ 289 } 290 /* Wait for it to exit. */ 291 else if ((pid = waitpid(res->hr_workerpid, 292 &status, 0)) != res->hr_workerpid) { 293 pjdlog_errno(LOG_ERR, 294 "Waiting for worker process (pid=%u) failed", 295 (unsigned int)res->hr_workerpid); 296 /* See above. */ 297 } else if (status != 0) { 298 pjdlog_error("Worker process (pid=%u) exited ungracefully: status=%d.", 299 (unsigned int)res->hr_workerpid, status); 300 /* See above. */ 301 } else { 302 pjdlog_debug(1, 303 "Worker process (pid=%u) exited gracefully.", 304 (unsigned int)res->hr_workerpid); 305 } 306 res->hr_workerpid = 0; 307 } else if (res->hr_remotein != NULL) { 308 char oaddr[256]; 309 310 proto_remote_address(conn, oaddr, sizeof(oaddr)); 311 pjdlog_debug(1, 312 "Canceling half-open connection from %s on connection from %s.", 313 oaddr, raddr); 314 proto_close(res->hr_remotein); 315 res->hr_remotein = NULL; 316 } 317 } 318 319 /* 320 * Checks and cleanups are done. 321 */ 322 323 if (token == NULL) { 324 arc4random_buf(res->hr_token, sizeof(res->hr_token)); 325 nvout = nv_alloc(); 326 nv_add_uint8_array(nvout, res->hr_token, 327 sizeof(res->hr_token), "token"); 328 if (nv_error(nvout) != 0) { 329 pjdlog_common(LOG_ERR, 0, nv_error(nvout), 330 "Unable to prepare return header for %s", raddr); 331 nv_add_stringf(nverr, "errmsg", 332 "Remote node was unable to prepare return header: %s.", 333 strerror(nv_error(nvout))); 334 goto fail; 335 } 336 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) { 337 int error = errno; 338 339 pjdlog_errno(LOG_ERR, "Unable to send response to %s", 340 raddr); 341 nv_add_stringf(nverr, "errmsg", 342 "Remote node was unable to send response: %s.", 343 strerror(error)); 344 goto fail; 345 } 346 res->hr_remotein = conn; 347 pjdlog_debug(1, "Incoming connection from %s configured.", 348 raddr); 349 } else { 350 res->hr_remoteout = conn; 351 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr); 352 hastd_secondary(res, nvin); 353 } 354 nv_free(nvin); 355 nv_free(nvout); 356 nv_free(nverr); 357 pjdlog_prefix_set("%s", ""); 358 return; 359 fail: 360 if (nv_error(nverr) != 0) { 361 pjdlog_common(LOG_ERR, 0, nv_error(nverr), 362 "Unable to prepare error header for %s", raddr); 363 goto close; 364 } 365 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) { 366 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr); 367 goto close; 368 } 369 close: 370 if (nvin != NULL) 371 nv_free(nvin); 372 if (nvout != NULL) 373 nv_free(nvout); 374 if (nverr != NULL) 375 nv_free(nverr); 376 proto_close(conn); 377 pjdlog_prefix_set("%s", ""); 378 } 379 380 static void 381 main_loop(void) 382 { 383 fd_set rfds, wfds; 384 int fd, maxfd, ret; 385 386 for (;;) { 387 if (sigchld_received) { 388 sigchld_received = false; 389 child_exit(); 390 } 391 if (sighup_received) { 392 sighup_received = false; 393 hastd_reload(); 394 } 395 396 maxfd = 0; 397 FD_ZERO(&rfds); 398 FD_ZERO(&wfds); 399 400 /* Setup descriptors for select(2). */ 401 #define SETUP_FD(conn) do { \ 402 fd = proto_descriptor(conn); \ 403 if (fd >= 0) { \ 404 maxfd = fd > maxfd ? fd : maxfd; \ 405 FD_SET(fd, &rfds); \ 406 FD_SET(fd, &wfds); \ 407 } \ 408 } while (0) 409 SETUP_FD(cfg->hc_controlconn); 410 SETUP_FD(cfg->hc_listenconn); 411 #undef SETUP_FD 412 413 ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL); 414 if (ret == -1) { 415 if (errno == EINTR) 416 continue; 417 KEEP_ERRNO((void)pidfile_remove(pfh)); 418 pjdlog_exit(EX_OSERR, "select() failed"); 419 } 420 421 #define ISSET_FD(conn) \ 422 (FD_ISSET((fd = proto_descriptor(conn)), &rfds) || FD_ISSET(fd, &wfds)) 423 if (ISSET_FD(cfg->hc_controlconn)) 424 control_handle(cfg); 425 if (ISSET_FD(cfg->hc_listenconn)) 426 listen_accept(); 427 #undef ISSET_FD 428 } 429 } 430 431 int 432 main(int argc, char *argv[]) 433 { 434 const char *pidfile; 435 pid_t otherpid; 436 bool foreground; 437 int debuglevel; 438 439 g_gate_load(); 440 441 foreground = false; 442 debuglevel = 0; 443 pidfile = HASTD_PIDFILE; 444 445 for (;;) { 446 int ch; 447 448 ch = getopt(argc, argv, "c:dFhP:"); 449 if (ch == -1) 450 break; 451 switch (ch) { 452 case 'c': 453 cfgpath = optarg; 454 break; 455 case 'd': 456 debuglevel++; 457 break; 458 case 'F': 459 foreground = true; 460 break; 461 case 'P': 462 pidfile = optarg; 463 break; 464 case 'h': 465 default: 466 usage(); 467 } 468 } 469 argc -= optind; 470 argv += optind; 471 472 pjdlog_debug_set(debuglevel); 473 474 pfh = pidfile_open(pidfile, 0600, &otherpid); 475 if (pfh == NULL) { 476 if (errno == EEXIST) { 477 pjdlog_exitx(EX_TEMPFAIL, 478 "Another hastd is already running, pid: %jd.", 479 (intmax_t)otherpid); 480 } 481 /* If we cannot create pidfile from other reasons, only warn. */ 482 pjdlog_errno(LOG_WARNING, "Cannot open or create pidfile"); 483 } 484 485 cfg = yy_config_parse(cfgpath); 486 assert(cfg != NULL); 487 488 signal(SIGHUP, sighandler); 489 signal(SIGCHLD, sighandler); 490 491 /* Listen on control address. */ 492 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) { 493 KEEP_ERRNO((void)pidfile_remove(pfh)); 494 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s", 495 cfg->hc_controladdr); 496 } 497 /* Listen for remote connections. */ 498 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) { 499 KEEP_ERRNO((void)pidfile_remove(pfh)); 500 pjdlog_exit(EX_OSERR, "Unable to listen on address %s", 501 cfg->hc_listenaddr); 502 } 503 504 if (!foreground) { 505 if (daemon(0, 0) < 0) { 506 KEEP_ERRNO((void)pidfile_remove(pfh)); 507 pjdlog_exit(EX_OSERR, "Unable to daemonize"); 508 } 509 510 /* Start logging to syslog. */ 511 pjdlog_mode_set(PJDLOG_MODE_SYSLOG); 512 513 /* Write PID to a file. */ 514 if (pidfile_write(pfh) < 0) { 515 pjdlog_errno(LOG_WARNING, 516 "Unable to write PID to a file"); 517 } 518 } 519 520 main_loop(); 521 522 exit(0); 523 } 524