1 /*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Pawel Jakub Dawidek under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/linker.h> 35 #include <sys/module.h> 36 #include <sys/wait.h> 37 38 #include <assert.h> 39 #include <err.h> 40 #include <errno.h> 41 #include <libutil.h> 42 #include <signal.h> 43 #include <stdbool.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <sysexits.h> 48 #include <unistd.h> 49 50 #include <activemap.h> 51 #include <pjdlog.h> 52 53 #include "control.h" 54 #include "hast.h" 55 #include "hast_proto.h" 56 #include "hastd.h" 57 #include "subr.h" 58 59 /* Path to configuration file. */ 60 static const char *cfgpath = HAST_CONFIG; 61 /* Hastd configuration. */ 62 static struct hastd_config *cfg; 63 /* Was SIGCHLD signal received? */ 64 static bool sigchld_received = false; 65 /* Was SIGHUP signal received? */ 66 static bool sighup_received = false; 67 /* Was SIGINT or SIGTERM signal received? */ 68 bool sigexit_received = false; 69 /* PID file handle. */ 70 struct pidfh *pfh; 71 72 static void 73 usage(void) 74 { 75 76 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]"); 77 } 78 79 static void 80 sighandler(int sig) 81 { 82 83 switch (sig) { 84 case SIGCHLD: 85 sigchld_received = true; 86 break; 87 case SIGHUP: 88 sighup_received = true; 89 break; 90 default: 91 assert(!"invalid condition"); 92 } 93 } 94 95 static void 96 g_gate_load(void) 97 { 98 99 if (modfind("g_gate") == -1) { 100 /* Not present in kernel, try loading it. */ 101 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { 102 if (errno != EEXIST) { 103 pjdlog_exit(EX_OSERR, 104 "Unable to load geom_gate module"); 105 } 106 } 107 } 108 } 109 110 static void 111 child_exit(void) 112 { 113 struct hast_resource *res; 114 int status; 115 pid_t pid; 116 117 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) { 118 /* Find resource related to the process that just exited. */ 119 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 120 if (pid == res->hr_workerpid) 121 break; 122 } 123 if (res == NULL) { 124 /* 125 * This can happen when new connection arrives and we 126 * cancel child responsible for the old one. 127 */ 128 continue; 129 } 130 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 131 role2str(res->hr_role)); 132 if (WEXITSTATUS(status) == 0) { 133 pjdlog_debug(1, 134 "Worker process exited gracefully (pid=%u).", 135 (unsigned int)pid); 136 } else { 137 pjdlog_error("Worker process failed (pid=%u, status=%d).", 138 (unsigned int)pid, WEXITSTATUS(status)); 139 } 140 res->hr_workerpid = 0; 141 if (res->hr_role == HAST_ROLE_PRIMARY) { 142 sleep(1); 143 pjdlog_info("Restarting worker process."); 144 hastd_primary(res); 145 } 146 pjdlog_prefix_set("%s", ""); 147 } 148 } 149 150 static void 151 hastd_reload(void) 152 { 153 154 /* TODO */ 155 pjdlog_warning("Configuration reload is not implemented."); 156 } 157 158 static void 159 listen_accept(void) 160 { 161 struct hast_resource *res; 162 struct proto_conn *conn; 163 struct nv *nvin, *nvout, *nverr; 164 const char *resname; 165 const unsigned char *token; 166 char laddr[256], raddr[256]; 167 size_t size; 168 pid_t pid; 169 int status; 170 171 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr)); 172 pjdlog_debug(1, "Accepting connection to %s.", laddr); 173 174 if (proto_accept(cfg->hc_listenconn, &conn) < 0) { 175 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr); 176 return; 177 } 178 179 proto_local_address(conn, laddr, sizeof(laddr)); 180 proto_remote_address(conn, raddr, sizeof(raddr)); 181 pjdlog_info("Connection from %s to %s.", laddr, raddr); 182 183 nvin = nvout = nverr = NULL; 184 185 /* 186 * Before receiving any data see if remote host have access to any 187 * resource. 188 */ 189 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 190 if (proto_address_match(conn, res->hr_remoteaddr)) 191 break; 192 } 193 if (res == NULL) { 194 pjdlog_error("Client %s isn't known.", raddr); 195 goto close; 196 } 197 /* Ok, remote host can access at least one resource. */ 198 199 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 200 pjdlog_errno(LOG_ERR, "Unable to receive header from %s", 201 raddr); 202 goto close; 203 } 204 205 resname = nv_get_string(nvin, "resource"); 206 if (resname == NULL) { 207 pjdlog_error("No 'resource' field in the header received from %s.", 208 raddr); 209 goto close; 210 } 211 pjdlog_debug(2, "%s: resource=%s", raddr, resname); 212 token = nv_get_uint8_array(nvin, &size, "token"); 213 /* 214 * NULL token means that this is first conection. 215 */ 216 if (token != NULL && size != sizeof(res->hr_token)) { 217 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).", 218 raddr, sizeof(res->hr_token), size); 219 goto close; 220 } 221 222 /* 223 * From now on we want to send errors to the remote node. 224 */ 225 nverr = nv_alloc(); 226 227 /* Find resource related to this connection. */ 228 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 229 if (strcmp(resname, res->hr_name) == 0) 230 break; 231 } 232 /* Have we found the resource? */ 233 if (res == NULL) { 234 pjdlog_error("No resource '%s' as requested by %s.", 235 resname, raddr); 236 nv_add_stringf(nverr, "errmsg", "Resource not configured."); 237 goto fail; 238 } 239 240 /* Now that we know resource name setup log prefix. */ 241 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 242 243 /* Does the remote host have access to this resource? */ 244 if (!proto_address_match(conn, res->hr_remoteaddr)) { 245 pjdlog_error("Client %s has no access to the resource.", raddr); 246 nv_add_stringf(nverr, "errmsg", "No access to the resource."); 247 goto fail; 248 } 249 /* Is the resource marked as secondary? */ 250 if (res->hr_role != HAST_ROLE_SECONDARY) { 251 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.", 252 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY), 253 raddr); 254 nv_add_stringf(nverr, "errmsg", 255 "Remote node acts as %s for the resource and not as %s.", 256 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); 257 goto fail; 258 } 259 /* Does token (if exists) match? */ 260 if (token != NULL && memcmp(token, res->hr_token, 261 sizeof(res->hr_token)) != 0) { 262 pjdlog_error("Token received from %s doesn't match.", raddr); 263 nv_add_stringf(nverr, "errmsg", "Toke doesn't match."); 264 goto fail; 265 } 266 /* 267 * If there is no token, but we have half-open connection 268 * (only remotein) or full connection (worker process is running) 269 * we have to cancel those and accept the new connection. 270 */ 271 if (token == NULL) { 272 assert(res->hr_remoteout == NULL); 273 pjdlog_debug(1, "Initial connection from %s.", raddr); 274 if (res->hr_workerpid != 0) { 275 assert(res->hr_remotein == NULL); 276 pjdlog_debug(1, 277 "Worker process exists (pid=%u), stopping it.", 278 (unsigned int)res->hr_workerpid); 279 /* Stop child process. */ 280 if (kill(res->hr_workerpid, SIGINT) < 0) { 281 pjdlog_errno(LOG_ERR, 282 "Unable to stop worker process (pid=%u)", 283 (unsigned int)res->hr_workerpid); 284 /* 285 * Other than logging the problem we 286 * ignore it - nothing smart to do. 287 */ 288 } 289 /* Wait for it to exit. */ 290 else if ((pid = waitpid(res->hr_workerpid, 291 &status, 0)) != res->hr_workerpid) { 292 pjdlog_errno(LOG_ERR, 293 "Waiting for worker process (pid=%u) failed", 294 (unsigned int)res->hr_workerpid); 295 /* See above. */ 296 } else if (status != 0) { 297 pjdlog_error("Worker process (pid=%u) exited ungracefully: status=%d.", 298 (unsigned int)res->hr_workerpid, status); 299 /* See above. */ 300 } else { 301 pjdlog_debug(1, 302 "Worker process (pid=%u) exited gracefully.", 303 (unsigned int)res->hr_workerpid); 304 } 305 res->hr_workerpid = 0; 306 } else if (res->hr_remotein != NULL) { 307 char oaddr[256]; 308 309 proto_remote_address(conn, oaddr, sizeof(oaddr)); 310 pjdlog_debug(1, 311 "Canceling half-open connection from %s on connection from %s.", 312 oaddr, raddr); 313 proto_close(res->hr_remotein); 314 res->hr_remotein = NULL; 315 } 316 } 317 318 /* 319 * Checks and cleanups are done. 320 */ 321 322 if (token == NULL) { 323 arc4random_buf(res->hr_token, sizeof(res->hr_token)); 324 nvout = nv_alloc(); 325 nv_add_uint8_array(nvout, res->hr_token, 326 sizeof(res->hr_token), "token"); 327 if (nv_error(nvout) != 0) { 328 pjdlog_common(LOG_ERR, 0, nv_error(nvout), 329 "Unable to prepare return header for %s", raddr); 330 nv_add_stringf(nverr, "errmsg", 331 "Remote node was unable to prepare return header: %s.", 332 strerror(nv_error(nvout))); 333 goto fail; 334 } 335 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) { 336 int error = errno; 337 338 pjdlog_errno(LOG_ERR, "Unable to send response to %s", 339 raddr); 340 nv_add_stringf(nverr, "errmsg", 341 "Remote node was unable to send response: %s.", 342 strerror(error)); 343 goto fail; 344 } 345 res->hr_remotein = conn; 346 pjdlog_debug(1, "Incoming connection from %s configured.", 347 raddr); 348 } else { 349 res->hr_remoteout = conn; 350 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr); 351 hastd_secondary(res, nvin); 352 } 353 nv_free(nvin); 354 nv_free(nvout); 355 nv_free(nverr); 356 pjdlog_prefix_set("%s", ""); 357 return; 358 fail: 359 if (nv_error(nverr) != 0) { 360 pjdlog_common(LOG_ERR, 0, nv_error(nverr), 361 "Unable to prepare error header for %s", raddr); 362 goto close; 363 } 364 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) { 365 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr); 366 goto close; 367 } 368 close: 369 if (nvin != NULL) 370 nv_free(nvin); 371 if (nvout != NULL) 372 nv_free(nvout); 373 if (nverr != NULL) 374 nv_free(nverr); 375 proto_close(conn); 376 pjdlog_prefix_set("%s", ""); 377 } 378 379 static void 380 main_loop(void) 381 { 382 fd_set rfds, wfds; 383 int fd, maxfd, ret; 384 385 for (;;) { 386 if (sigchld_received) { 387 sigchld_received = false; 388 child_exit(); 389 } 390 if (sighup_received) { 391 sighup_received = false; 392 hastd_reload(); 393 } 394 395 maxfd = 0; 396 FD_ZERO(&rfds); 397 FD_ZERO(&wfds); 398 399 /* Setup descriptors for select(2). */ 400 #define SETUP_FD(conn) do { \ 401 fd = proto_descriptor(conn); \ 402 if (fd >= 0) { \ 403 maxfd = fd > maxfd ? fd : maxfd; \ 404 FD_SET(fd, &rfds); \ 405 FD_SET(fd, &wfds); \ 406 } \ 407 } while (0) 408 SETUP_FD(cfg->hc_controlconn); 409 SETUP_FD(cfg->hc_listenconn); 410 #undef SETUP_FD 411 412 ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL); 413 if (ret == -1) { 414 if (errno == EINTR) 415 continue; 416 KEEP_ERRNO((void)pidfile_remove(pfh)); 417 pjdlog_exit(EX_OSERR, "select() failed"); 418 } 419 420 #define ISSET_FD(conn) \ 421 (FD_ISSET((fd = proto_descriptor(conn)), &rfds) || FD_ISSET(fd, &wfds)) 422 if (ISSET_FD(cfg->hc_controlconn)) 423 control_handle(cfg); 424 if (ISSET_FD(cfg->hc_listenconn)) 425 listen_accept(); 426 #undef ISSET_FD 427 } 428 } 429 430 int 431 main(int argc, char *argv[]) 432 { 433 const char *pidfile; 434 pid_t otherpid; 435 bool foreground; 436 int debuglevel; 437 438 g_gate_load(); 439 440 foreground = false; 441 debuglevel = 0; 442 pidfile = HASTD_PIDFILE; 443 444 for (;;) { 445 int ch; 446 447 ch = getopt(argc, argv, "c:dFhP:"); 448 if (ch == -1) 449 break; 450 switch (ch) { 451 case 'c': 452 cfgpath = optarg; 453 break; 454 case 'd': 455 debuglevel++; 456 break; 457 case 'F': 458 foreground = true; 459 break; 460 case 'P': 461 pidfile = optarg; 462 break; 463 case 'h': 464 default: 465 usage(); 466 } 467 } 468 argc -= optind; 469 argv += optind; 470 471 pjdlog_debug_set(debuglevel); 472 473 pfh = pidfile_open(pidfile, 0600, &otherpid); 474 if (pfh == NULL) { 475 if (errno == EEXIST) { 476 pjdlog_exitx(EX_TEMPFAIL, 477 "Another hastd is already running, pid: %jd.", 478 (intmax_t)otherpid); 479 } 480 /* If we cannot create pidfile from other reasons, only warn. */ 481 pjdlog_errno(LOG_WARNING, "Cannot open or create pidfile"); 482 } 483 484 cfg = yy_config_parse(cfgpath); 485 assert(cfg != NULL); 486 487 signal(SIGHUP, sighandler); 488 signal(SIGCHLD, sighandler); 489 490 /* Listen on control address. */ 491 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) { 492 KEEP_ERRNO((void)pidfile_remove(pfh)); 493 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s", 494 cfg->hc_controladdr); 495 } 496 /* Listen for remote connections. */ 497 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) { 498 KEEP_ERRNO((void)pidfile_remove(pfh)); 499 pjdlog_exit(EX_OSERR, "Unable to listen on address %s", 500 cfg->hc_listenaddr); 501 } 502 503 if (!foreground) { 504 if (daemon(0, 0) < 0) { 505 KEEP_ERRNO((void)pidfile_remove(pfh)); 506 pjdlog_exit(EX_OSERR, "Unable to daemonize"); 507 } 508 509 /* Start logging to syslog. */ 510 pjdlog_mode_set(PJDLOG_MODE_SYSLOG); 511 512 /* Write PID to a file. */ 513 if (pidfile_write(pfh) < 0) { 514 pjdlog_errno(LOG_WARNING, 515 "Unable to write PID to a file"); 516 } 517 } 518 519 main_loop(); 520 521 exit(0); 522 } 523