xref: /freebsd/sbin/hastd/hastd.c (revision 050570efa79efcc9cf5adeb545f1a679c8dc377b)
1 /*-
2  * Copyright (c) 2009-2010 The FreeBSD Foundation
3  * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
4  * All rights reserved.
5  *
6  * This software was developed by Pawel Jakub Dawidek under sponsorship from
7  * the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/linker.h>
36 #include <sys/module.h>
37 #include <sys/wait.h>
38 
39 #include <assert.h>
40 #include <err.h>
41 #include <errno.h>
42 #include <libutil.h>
43 #include <signal.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <sysexits.h>
49 #include <unistd.h>
50 
51 #include <activemap.h>
52 #include <pjdlog.h>
53 
54 #include "control.h"
55 #include "event.h"
56 #include "hast.h"
57 #include "hast_proto.h"
58 #include "hastd.h"
59 #include "hooks.h"
60 #include "subr.h"
61 
62 /* Path to configuration file. */
63 const char *cfgpath = HAST_CONFIG;
64 /* Hastd configuration. */
65 static struct hastd_config *cfg;
66 /* Was SIGINT or SIGTERM signal received? */
67 bool sigexit_received = false;
68 /* PID file handle. */
69 struct pidfh *pfh;
70 
71 /* How often check for hooks running for too long. */
72 #define	REPORT_INTERVAL	5
73 
74 static void
75 usage(void)
76 {
77 
78 	errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
79 }
80 
81 static void
82 g_gate_load(void)
83 {
84 
85 	if (modfind("g_gate") == -1) {
86 		/* Not present in kernel, try loading it. */
87 		if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
88 			if (errno != EEXIST) {
89 				pjdlog_exit(EX_OSERR,
90 				    "Unable to load geom_gate module");
91 			}
92 		}
93 	}
94 }
95 
96 static void
97 child_exit_log(unsigned int pid, int status)
98 {
99 
100 	if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
101 		pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
102 		    pid);
103 	} else if (WIFSIGNALED(status)) {
104 		pjdlog_error("Worker process killed (pid=%u, signal=%d).",
105 		    pid, WTERMSIG(status));
106 	} else {
107 		pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
108 		    pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
109 	}
110 }
111 
112 static void
113 child_exit(void)
114 {
115 	struct hast_resource *res;
116 	int status;
117 	pid_t pid;
118 
119 	while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
120 		/* Find resource related to the process that just exited. */
121 		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
122 			if (pid == res->hr_workerpid)
123 				break;
124 		}
125 		if (res == NULL) {
126 			/*
127 			 * This can happen when new connection arrives and we
128 			 * cancel child responsible for the old one or if this
129 			 * was hook which we executed.
130 			 */
131 			hook_check_one(pid, status);
132 			continue;
133 		}
134 		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
135 		    role2str(res->hr_role));
136 		child_exit_log(pid, status);
137 		child_cleanup(res);
138 		if (res->hr_role == HAST_ROLE_PRIMARY) {
139 			/*
140 			 * Restart child process if it was killed by signal
141 			 * or exited because of temporary problem.
142 			 */
143 			if (WIFSIGNALED(status) ||
144 			    (WIFEXITED(status) &&
145 			     WEXITSTATUS(status) == EX_TEMPFAIL)) {
146 				sleep(1);
147 				pjdlog_info("Restarting worker process.");
148 				hastd_primary(res);
149 			} else {
150 				res->hr_role = HAST_ROLE_INIT;
151 				pjdlog_info("Changing resource role back to %s.",
152 				    role2str(res->hr_role));
153 			}
154 		}
155 		pjdlog_prefix_set("%s", "");
156 	}
157 }
158 
159 static bool
160 resource_needs_restart(const struct hast_resource *res0,
161     const struct hast_resource *res1)
162 {
163 
164 	assert(strcmp(res0->hr_name, res1->hr_name) == 0);
165 
166 	if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
167 		return (true);
168 	if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
169 		return (true);
170 	if (res0->hr_role == HAST_ROLE_INIT ||
171 	    res0->hr_role == HAST_ROLE_SECONDARY) {
172 		if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
173 			return (true);
174 		if (res0->hr_replication != res1->hr_replication)
175 			return (true);
176 		if (res0->hr_timeout != res1->hr_timeout)
177 			return (true);
178 		if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
179 			return (true);
180 	}
181 	return (false);
182 }
183 
184 static bool
185 resource_needs_reload(const struct hast_resource *res0,
186     const struct hast_resource *res1)
187 {
188 
189 	assert(strcmp(res0->hr_name, res1->hr_name) == 0);
190 	assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
191 	assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
192 
193 	if (res0->hr_role != HAST_ROLE_PRIMARY)
194 		return (false);
195 
196 	if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
197 		return (true);
198 	if (res0->hr_replication != res1->hr_replication)
199 		return (true);
200 	if (res0->hr_timeout != res1->hr_timeout)
201 		return (true);
202 	if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
203 		return (true);
204 	return (false);
205 }
206 
207 static void
208 resource_reload(const struct hast_resource *res)
209 {
210 	struct nv *nvin, *nvout;
211 	int error;
212 
213 	assert(res->hr_role == HAST_ROLE_PRIMARY);
214 
215 	nvout = nv_alloc();
216 	nv_add_uint8(nvout, HASTCTL_RELOAD, "cmd");
217 	nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr");
218 	nv_add_int32(nvout, (int32_t)res->hr_replication, "replication");
219 	nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout");
220 	nv_add_string(nvout, res->hr_exec, "exec");
221 	if (nv_error(nvout) != 0) {
222 		nv_free(nvout);
223 		pjdlog_error("Unable to allocate header for reload message.");
224 		return;
225 	}
226 	if (hast_proto_send(res, res->hr_ctrl, nvout, NULL, 0) < 0) {
227 		pjdlog_errno(LOG_ERR, "Unable to send reload message");
228 		nv_free(nvout);
229 		return;
230 	}
231 	nv_free(nvout);
232 
233 	/* Receive response. */
234 	if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) {
235 		pjdlog_errno(LOG_ERR, "Unable to receive reload reply");
236 		return;
237 	}
238 	error = nv_get_int16(nvin, "error");
239 	nv_free(nvin);
240 	if (error != 0) {
241 		pjdlog_common(LOG_ERR, 0, error, "Reload failed");
242 		return;
243 	}
244 }
245 
246 static void
247 hastd_reload(void)
248 {
249 	struct hastd_config *newcfg;
250 	struct hast_resource *nres, *cres, *tres;
251 	uint8_t role;
252 
253 	pjdlog_info("Reloading configuration...");
254 
255 	newcfg = yy_config_parse(cfgpath, false);
256 	if (newcfg == NULL)
257 		goto failed;
258 
259 	/*
260 	 * Check if control address has changed.
261 	 */
262 	if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
263 		if (proto_server(newcfg->hc_controladdr,
264 		    &newcfg->hc_controlconn) < 0) {
265 			pjdlog_errno(LOG_ERR,
266 			    "Unable to listen on control address %s",
267 			    newcfg->hc_controladdr);
268 			goto failed;
269 		}
270 	}
271 	/*
272 	 * Check if listen address has changed.
273 	 */
274 	if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
275 		if (proto_server(newcfg->hc_listenaddr,
276 		    &newcfg->hc_listenconn) < 0) {
277 			pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
278 			    newcfg->hc_listenaddr);
279 			goto failed;
280 		}
281 	}
282 	/*
283 	 * Only when both control and listen sockets are successfully
284 	 * initialized switch them to new configuration.
285 	 */
286 	if (newcfg->hc_controlconn != NULL) {
287 		pjdlog_info("Control socket changed from %s to %s.",
288 		    cfg->hc_controladdr, newcfg->hc_controladdr);
289 		proto_close(cfg->hc_controlconn);
290 		cfg->hc_controlconn = newcfg->hc_controlconn;
291 		newcfg->hc_controlconn = NULL;
292 		strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
293 		    sizeof(cfg->hc_controladdr));
294 	}
295 	if (newcfg->hc_listenconn != NULL) {
296 		pjdlog_info("Listen socket changed from %s to %s.",
297 		    cfg->hc_listenaddr, newcfg->hc_listenaddr);
298 		proto_close(cfg->hc_listenconn);
299 		cfg->hc_listenconn = newcfg->hc_listenconn;
300 		newcfg->hc_listenconn = NULL;
301 		strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
302 		    sizeof(cfg->hc_listenaddr));
303 	}
304 
305 	/*
306 	 * Stop and remove resources that were removed from the configuration.
307 	 */
308 	TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
309 		TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
310 			if (strcmp(cres->hr_name, nres->hr_name) == 0)
311 				break;
312 		}
313 		if (nres == NULL) {
314 			control_set_role(cres, HAST_ROLE_INIT);
315 			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
316 			pjdlog_info("Resource %s removed.", cres->hr_name);
317 			free(cres);
318 		}
319 	}
320 	/*
321 	 * Move new resources to the current configuration.
322 	 */
323 	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
324 		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
325 			if (strcmp(cres->hr_name, nres->hr_name) == 0)
326 				break;
327 		}
328 		if (cres == NULL) {
329 			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
330 			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
331 			pjdlog_info("Resource %s added.", nres->hr_name);
332 		}
333 	}
334 	/*
335 	 * Deal with modified resources.
336 	 * Depending on what has changed exactly we might want to perform
337 	 * different actions.
338 	 *
339 	 * We do full resource restart in the following situations:
340 	 * Resource role is INIT or SECONDARY.
341 	 * Resource role is PRIMARY and path to local component or provider
342 	 * name has changed.
343 	 * In case of PRIMARY, the worker process will be killed and restarted,
344 	 * which also means removing /dev/hast/<name> provider and
345 	 * recreating it.
346 	 *
347 	 * We do just reload (send SIGHUP to worker process) if we act as
348 	 * PRIMARY, but only if remote address, replication mode, timeout or
349 	 * execution path has changed. For those, there is no need to restart
350 	 * worker process.
351 	 * If PRIMARY receives SIGHUP, it will reconnect if remote address or
352 	 * replication mode has changed or simply set new timeout if only
353 	 * timeout has changed.
354 	 */
355 	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
356 		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
357 			if (strcmp(cres->hr_name, nres->hr_name) == 0)
358 				break;
359 		}
360 		assert(cres != NULL);
361 		if (resource_needs_restart(cres, nres)) {
362 			pjdlog_info("Resource %s configuration was modified, restarting it.",
363 			    cres->hr_name);
364 			role = cres->hr_role;
365 			control_set_role(cres, HAST_ROLE_INIT);
366 			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
367 			free(cres);
368 			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
369 			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
370 			control_set_role(nres, role);
371 		} else if (resource_needs_reload(cres, nres)) {
372 			pjdlog_info("Resource %s configuration was modified, reloading it.",
373 			    cres->hr_name);
374 			strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
375 			    sizeof(cres->hr_remoteaddr));
376 			cres->hr_replication = nres->hr_replication;
377 			cres->hr_timeout = nres->hr_timeout;
378 			strlcpy(cres->hr_exec, nres->hr_exec,
379 			    sizeof(cres->hr_exec));
380 			if (cres->hr_workerpid != 0)
381 				resource_reload(cres);
382 		}
383 	}
384 
385 	yy_config_free(newcfg);
386 	pjdlog_info("Configuration reloaded successfully.");
387 	return;
388 failed:
389 	if (newcfg != NULL) {
390 		if (newcfg->hc_controlconn != NULL)
391 			proto_close(newcfg->hc_controlconn);
392 		if (newcfg->hc_listenconn != NULL)
393 			proto_close(newcfg->hc_listenconn);
394 		yy_config_free(newcfg);
395 	}
396 	pjdlog_warning("Configuration not reloaded.");
397 }
398 
399 static void
400 terminate_workers(void)
401 {
402 	struct hast_resource *res;
403 
404 	pjdlog_info("Termination signal received, exiting.");
405 	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
406 		if (res->hr_workerpid == 0)
407 			continue;
408 		pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).",
409 		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
410 		if (kill(res->hr_workerpid, SIGTERM) == 0)
411 			continue;
412 		pjdlog_errno(LOG_WARNING,
413 		    "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).",
414 		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
415 	}
416 }
417 
418 static void
419 listen_accept(void)
420 {
421 	struct hast_resource *res;
422 	struct proto_conn *conn;
423 	struct nv *nvin, *nvout, *nverr;
424 	const char *resname;
425 	const unsigned char *token;
426 	char laddr[256], raddr[256];
427 	size_t size;
428 	pid_t pid;
429 	int status;
430 
431 	proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
432 	pjdlog_debug(1, "Accepting connection to %s.", laddr);
433 
434 	if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
435 		pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
436 		return;
437 	}
438 
439 	proto_local_address(conn, laddr, sizeof(laddr));
440 	proto_remote_address(conn, raddr, sizeof(raddr));
441 	pjdlog_info("Connection from %s to %s.", raddr, laddr);
442 
443 	/* Error in setting timeout is not critical, but why should it fail? */
444 	if (proto_timeout(conn, HAST_TIMEOUT) < 0)
445 		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
446 
447 	nvin = nvout = nverr = NULL;
448 
449 	/*
450 	 * Before receiving any data see if remote host have access to any
451 	 * resource.
452 	 */
453 	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
454 		if (proto_address_match(conn, res->hr_remoteaddr))
455 			break;
456 	}
457 	if (res == NULL) {
458 		pjdlog_error("Client %s isn't known.", raddr);
459 		goto close;
460 	}
461 	/* Ok, remote host can access at least one resource. */
462 
463 	if (hast_proto_recv_hdr(conn, &nvin) < 0) {
464 		pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
465 		    raddr);
466 		goto close;
467 	}
468 
469 	resname = nv_get_string(nvin, "resource");
470 	if (resname == NULL) {
471 		pjdlog_error("No 'resource' field in the header received from %s.",
472 		    raddr);
473 		goto close;
474 	}
475 	pjdlog_debug(2, "%s: resource=%s", raddr, resname);
476 	token = nv_get_uint8_array(nvin, &size, "token");
477 	/*
478 	 * NULL token means that this is first conection.
479 	 */
480 	if (token != NULL && size != sizeof(res->hr_token)) {
481 		pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
482 		    raddr, sizeof(res->hr_token), size);
483 		goto close;
484 	}
485 
486 	/*
487 	 * From now on we want to send errors to the remote node.
488 	 */
489 	nverr = nv_alloc();
490 
491 	/* Find resource related to this connection. */
492 	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
493 		if (strcmp(resname, res->hr_name) == 0)
494 			break;
495 	}
496 	/* Have we found the resource? */
497 	if (res == NULL) {
498 		pjdlog_error("No resource '%s' as requested by %s.",
499 		    resname, raddr);
500 		nv_add_stringf(nverr, "errmsg", "Resource not configured.");
501 		goto fail;
502 	}
503 
504 	/* Now that we know resource name setup log prefix. */
505 	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
506 
507 	/* Does the remote host have access to this resource? */
508 	if (!proto_address_match(conn, res->hr_remoteaddr)) {
509 		pjdlog_error("Client %s has no access to the resource.", raddr);
510 		nv_add_stringf(nverr, "errmsg", "No access to the resource.");
511 		goto fail;
512 	}
513 	/* Is the resource marked as secondary? */
514 	if (res->hr_role != HAST_ROLE_SECONDARY) {
515 		pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
516 		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
517 		    raddr);
518 		nv_add_stringf(nverr, "errmsg",
519 		    "Remote node acts as %s for the resource and not as %s.",
520 		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
521 		goto fail;
522 	}
523 	/* Does token (if exists) match? */
524 	if (token != NULL && memcmp(token, res->hr_token,
525 	    sizeof(res->hr_token)) != 0) {
526 		pjdlog_error("Token received from %s doesn't match.", raddr);
527 		nv_add_stringf(nverr, "errmsg", "Token doesn't match.");
528 		goto fail;
529 	}
530 	/*
531 	 * If there is no token, but we have half-open connection
532 	 * (only remotein) or full connection (worker process is running)
533 	 * we have to cancel those and accept the new connection.
534 	 */
535 	if (token == NULL) {
536 		assert(res->hr_remoteout == NULL);
537 		pjdlog_debug(1, "Initial connection from %s.", raddr);
538 		if (res->hr_workerpid != 0) {
539 			assert(res->hr_remotein == NULL);
540 			pjdlog_debug(1,
541 			    "Worker process exists (pid=%u), stopping it.",
542 			    (unsigned int)res->hr_workerpid);
543 			/* Stop child process. */
544 			if (kill(res->hr_workerpid, SIGINT) < 0) {
545 				pjdlog_errno(LOG_ERR,
546 				    "Unable to stop worker process (pid=%u)",
547 				    (unsigned int)res->hr_workerpid);
548 				/*
549 				 * Other than logging the problem we
550 				 * ignore it - nothing smart to do.
551 				 */
552 			}
553 			/* Wait for it to exit. */
554 			else if ((pid = waitpid(res->hr_workerpid,
555 			    &status, 0)) != res->hr_workerpid) {
556 				/* We can only log the problem. */
557 				pjdlog_errno(LOG_ERR,
558 				    "Waiting for worker process (pid=%u) failed",
559 				    (unsigned int)res->hr_workerpid);
560 			} else {
561 				child_exit_log(res->hr_workerpid, status);
562 			}
563 			child_cleanup(res);
564 		} else if (res->hr_remotein != NULL) {
565 			char oaddr[256];
566 
567 			proto_remote_address(res->hr_remotein, oaddr,
568 			    sizeof(oaddr));
569 			pjdlog_debug(1,
570 			    "Canceling half-open connection from %s on connection from %s.",
571 			    oaddr, raddr);
572 			proto_close(res->hr_remotein);
573 			res->hr_remotein = NULL;
574 		}
575 	}
576 
577 	/*
578 	 * Checks and cleanups are done.
579 	 */
580 
581 	if (token == NULL) {
582 		arc4random_buf(res->hr_token, sizeof(res->hr_token));
583 		nvout = nv_alloc();
584 		nv_add_uint8_array(nvout, res->hr_token,
585 		    sizeof(res->hr_token), "token");
586 		if (nv_error(nvout) != 0) {
587 			pjdlog_common(LOG_ERR, 0, nv_error(nvout),
588 			    "Unable to prepare return header for %s", raddr);
589 			nv_add_stringf(nverr, "errmsg",
590 			    "Remote node was unable to prepare return header: %s.",
591 			    strerror(nv_error(nvout)));
592 			goto fail;
593 		}
594 		if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
595 			int error = errno;
596 
597 			pjdlog_errno(LOG_ERR, "Unable to send response to %s",
598 			    raddr);
599 			nv_add_stringf(nverr, "errmsg",
600 			    "Remote node was unable to send response: %s.",
601 			    strerror(error));
602 			goto fail;
603 		}
604 		res->hr_remotein = conn;
605 		pjdlog_debug(1, "Incoming connection from %s configured.",
606 		    raddr);
607 	} else {
608 		res->hr_remoteout = conn;
609 		pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
610 		hastd_secondary(res, nvin);
611 	}
612 	nv_free(nvin);
613 	nv_free(nvout);
614 	nv_free(nverr);
615 	pjdlog_prefix_set("%s", "");
616 	return;
617 fail:
618 	if (nv_error(nverr) != 0) {
619 		pjdlog_common(LOG_ERR, 0, nv_error(nverr),
620 		    "Unable to prepare error header for %s", raddr);
621 		goto close;
622 	}
623 	if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
624 		pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
625 		goto close;
626 	}
627 close:
628 	if (nvin != NULL)
629 		nv_free(nvin);
630 	if (nvout != NULL)
631 		nv_free(nvout);
632 	if (nverr != NULL)
633 		nv_free(nverr);
634 	proto_close(conn);
635 	pjdlog_prefix_set("%s", "");
636 }
637 
638 static void
639 main_loop(void)
640 {
641 	struct hast_resource *res;
642 	struct timeval seltimeout;
643 	struct timespec sigtimeout;
644 	int fd, maxfd, ret, signo;
645 	sigset_t mask;
646 	fd_set rfds;
647 
648 	seltimeout.tv_sec = REPORT_INTERVAL;
649 	seltimeout.tv_usec = 0;
650 	sigtimeout.tv_sec = 0;
651 	sigtimeout.tv_nsec = 0;
652 
653 	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
654 	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
655 	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
656 	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
657 	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
658 
659 	pjdlog_info("Started successfully, running protocol version %d.",
660 	    HAST_PROTO_VERSION);
661 
662 	for (;;) {
663 		while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) {
664 			switch (signo) {
665 			case SIGINT:
666 			case SIGTERM:
667 				sigexit_received = true;
668 				terminate_workers();
669 				exit(EX_OK);
670 				break;
671 			case SIGCHLD:
672 				child_exit();
673 				break;
674 			case SIGHUP:
675 				hastd_reload();
676 				break;
677 			default:
678 				assert(!"invalid condition");
679 			}
680 		}
681 
682 		/* Setup descriptors for select(2). */
683 		FD_ZERO(&rfds);
684 		maxfd = fd = proto_descriptor(cfg->hc_controlconn);
685 		assert(fd >= 0);
686 		FD_SET(fd, &rfds);
687 		fd = proto_descriptor(cfg->hc_listenconn);
688 		assert(fd >= 0);
689 		FD_SET(fd, &rfds);
690 		maxfd = fd > maxfd ? fd : maxfd;
691 		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
692 			if (res->hr_event == NULL)
693 				continue;
694 			fd = proto_descriptor(res->hr_event);
695 			assert(fd >= 0);
696 			FD_SET(fd, &rfds);
697 			maxfd = fd > maxfd ? fd : maxfd;
698 		}
699 
700 		assert(maxfd + 1 <= (int)FD_SETSIZE);
701 		ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout);
702 		if (ret == 0)
703 			hook_check();
704 		else if (ret == -1) {
705 			if (errno == EINTR)
706 				continue;
707 			KEEP_ERRNO((void)pidfile_remove(pfh));
708 			pjdlog_exit(EX_OSERR, "select() failed");
709 		}
710 
711 		if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
712 			control_handle(cfg);
713 		if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds))
714 			listen_accept();
715 		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
716 			if (res->hr_event == NULL)
717 				continue;
718 			if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
719 				if (event_recv(res) == 0)
720 					continue;
721 				/* The worker process exited? */
722 				proto_close(res->hr_event);
723 				res->hr_event = NULL;
724 			}
725 		}
726 	}
727 }
728 
729 static void
730 dummy_sighandler(int sig __unused)
731 {
732 	/* Nothing to do. */
733 }
734 
735 int
736 main(int argc, char *argv[])
737 {
738 	const char *pidfile;
739 	pid_t otherpid;
740 	bool foreground;
741 	int debuglevel;
742 	sigset_t mask;
743 
744 	foreground = false;
745 	debuglevel = 0;
746 	pidfile = HASTD_PIDFILE;
747 
748 	for (;;) {
749 		int ch;
750 
751 		ch = getopt(argc, argv, "c:dFhP:");
752 		if (ch == -1)
753 			break;
754 		switch (ch) {
755 		case 'c':
756 			cfgpath = optarg;
757 			break;
758 		case 'd':
759 			debuglevel++;
760 			break;
761 		case 'F':
762 			foreground = true;
763 			break;
764 		case 'P':
765 			pidfile = optarg;
766 			break;
767 		case 'h':
768 		default:
769 			usage();
770 		}
771 	}
772 	argc -= optind;
773 	argv += optind;
774 
775 	pjdlog_debug_set(debuglevel);
776 
777 	g_gate_load();
778 
779 	pfh = pidfile_open(pidfile, 0600, &otherpid);
780 	if (pfh == NULL) {
781 		if (errno == EEXIST) {
782 			pjdlog_exitx(EX_TEMPFAIL,
783 			    "Another hastd is already running, pid: %jd.",
784 			    (intmax_t)otherpid);
785 		}
786 		/* If we cannot create pidfile from other reasons, only warn. */
787 		pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile");
788 	}
789 
790 	cfg = yy_config_parse(cfgpath, true);
791 	assert(cfg != NULL);
792 
793 	/*
794 	 * Restore default actions for interesting signals in case parent
795 	 * process (like init(8)) decided to ignore some of them (like SIGHUP).
796 	 */
797 	PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR);
798 	PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR);
799 	PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR);
800 	/*
801 	 * Because SIGCHLD is ignored by default, setup dummy handler for it,
802 	 * so we can mask it.
803 	 */
804 	PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR);
805 
806 	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
807 	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
808 	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
809 	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
810 	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
811 	PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
812 
813 	/* Listen on control address. */
814 	if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
815 		KEEP_ERRNO((void)pidfile_remove(pfh));
816 		pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
817 		    cfg->hc_controladdr);
818 	}
819 	/* Listen for remote connections. */
820 	if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
821 		KEEP_ERRNO((void)pidfile_remove(pfh));
822 		pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
823 		    cfg->hc_listenaddr);
824 	}
825 
826 	if (!foreground) {
827 		if (daemon(0, 0) < 0) {
828 			KEEP_ERRNO((void)pidfile_remove(pfh));
829 			pjdlog_exit(EX_OSERR, "Unable to daemonize");
830 		}
831 
832 		/* Start logging to syslog. */
833 		pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
834 
835 		/* Write PID to a file. */
836 		if (pidfile_write(pfh) < 0) {
837 			pjdlog_errno(LOG_WARNING,
838 			    "Unable to write PID to a file");
839 		}
840 	}
841 
842 	hook_init();
843 
844 	main_loop();
845 
846 	exit(0);
847 }
848