xref: /freebsd/sbin/hastd/hastd.c (revision 9124ddeb4a551977cf6b2218291e7c666ce25f47)
1 /*-
2  * Copyright (c) 2009-2010 The FreeBSD Foundation
3  * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
4  * All rights reserved.
5  *
6  * This software was developed by Pawel Jakub Dawidek under sponsorship from
7  * the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/linker.h>
36 #include <sys/module.h>
37 #include <sys/stat.h>
38 #include <sys/wait.h>
39 
40 #include <err.h>
41 #include <errno.h>
42 #include <libutil.h>
43 #include <signal.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <sysexits.h>
49 #include <time.h>
50 #include <unistd.h>
51 
52 #include <activemap.h>
53 #include <pjdlog.h>
54 
55 #include "control.h"
56 #include "event.h"
57 #include "hast.h"
58 #include "hast_proto.h"
59 #include "hastd.h"
60 #include "hooks.h"
61 #include "subr.h"
62 
63 /* Path to configuration file. */
64 const char *cfgpath = HAST_CONFIG;
65 /* Hastd configuration. */
66 static struct hastd_config *cfg;
67 /* Was SIGINT or SIGTERM signal received? */
68 bool sigexit_received = false;
69 /* PID file handle. */
70 struct pidfh *pfh;
71 /* Do we run in foreground? */
72 static bool foreground;
73 
74 /* How often check for hooks running for too long. */
75 #define	REPORT_INTERVAL	5
76 
77 static void
78 usage(void)
79 {
80 
81 	errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
82 }
83 
84 static void
85 g_gate_load(void)
86 {
87 
88 	if (modfind("g_gate") == -1) {
89 		/* Not present in kernel, try loading it. */
90 		if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
91 			if (errno != EEXIST) {
92 				pjdlog_exit(EX_OSERR,
93 				    "Unable to load geom_gate module");
94 			}
95 		}
96 	}
97 }
98 
99 void
100 descriptors_cleanup(struct hast_resource *res)
101 {
102 	struct hast_resource *tres, *tmres;
103 	struct hastd_listen *lst;
104 
105 	TAILQ_FOREACH_SAFE(tres, &cfg->hc_resources, hr_next, tmres) {
106 		if (tres == res) {
107 			PJDLOG_VERIFY(res->hr_role == HAST_ROLE_SECONDARY ||
108 			    (res->hr_remotein == NULL &&
109 			     res->hr_remoteout == NULL));
110 			continue;
111 		}
112 		if (tres->hr_remotein != NULL)
113 			proto_close(tres->hr_remotein);
114 		if (tres->hr_remoteout != NULL)
115 			proto_close(tres->hr_remoteout);
116 		if (tres->hr_ctrl != NULL)
117 			proto_close(tres->hr_ctrl);
118 		if (tres->hr_event != NULL)
119 			proto_close(tres->hr_event);
120 		if (tres->hr_conn != NULL)
121 			proto_close(tres->hr_conn);
122 		TAILQ_REMOVE(&cfg->hc_resources, tres, hr_next);
123 		free(tres);
124 	}
125 	if (cfg->hc_controlin != NULL)
126 		proto_close(cfg->hc_controlin);
127 	proto_close(cfg->hc_controlconn);
128 	while ((lst = TAILQ_FIRST(&cfg->hc_listen)) != NULL) {
129 		TAILQ_REMOVE(&cfg->hc_listen, lst, hl_next);
130 		if (lst->hl_conn != NULL)
131 			proto_close(lst->hl_conn);
132 		free(lst);
133 	}
134 	(void)pidfile_close(pfh);
135 	hook_fini();
136 	pjdlog_fini();
137 }
138 
139 static const char *
140 dtype2str(mode_t mode)
141 {
142 
143 	if (S_ISBLK(mode))
144 		return ("block device");
145 	else if (S_ISCHR(mode))
146 		return ("character device");
147 	else if (S_ISDIR(mode))
148 		return ("directory");
149 	else if (S_ISFIFO(mode))
150 		return ("pipe or FIFO");
151 	else if (S_ISLNK(mode))
152 		return ("symbolic link");
153 	else if (S_ISREG(mode))
154 		return ("regular file");
155 	else if (S_ISSOCK(mode))
156 		return ("socket");
157 	else if (S_ISWHT(mode))
158 		return ("whiteout");
159 	else
160 		return ("unknown");
161 }
162 
163 void
164 descriptors_assert(const struct hast_resource *res, int pjdlogmode)
165 {
166 	char msg[256];
167 	struct stat sb;
168 	long maxfd;
169 	bool isopen;
170 	mode_t mode;
171 	int fd;
172 
173 	/*
174 	 * At this point descriptor to syslog socket is closed, so if we want
175 	 * to log assertion message, we have to first store it in 'msg' local
176 	 * buffer and then open syslog socket and log it.
177 	 */
178 	msg[0] = '\0';
179 
180 	maxfd = sysconf(_SC_OPEN_MAX);
181 	if (maxfd == -1) {
182 		pjdlog_init(pjdlogmode);
183 		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
184 		    role2str(res->hr_role));
185 		pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed");
186 		pjdlog_fini();
187 		maxfd = 16384;
188 	}
189 	for (fd = 0; fd <= maxfd; fd++) {
190 		if (fstat(fd, &sb) == 0) {
191 			isopen = true;
192 			mode = sb.st_mode;
193 		} else if (errno == EBADF) {
194 			isopen = false;
195 			mode = 0;
196 		} else {
197 			(void)snprintf(msg, sizeof(msg),
198 			    "Unable to fstat descriptor %d: %s", fd,
199 			    strerror(errno));
200 			break;
201 		}
202 		if (fd == STDIN_FILENO || fd == STDOUT_FILENO ||
203 		    fd == STDERR_FILENO) {
204 			if (!isopen) {
205 				(void)snprintf(msg, sizeof(msg),
206 				    "Descriptor %d (%s) is closed, but should be open.",
207 				    fd, (fd == STDIN_FILENO ? "stdin" :
208 				    (fd == STDOUT_FILENO ? "stdout" : "stderr")));
209 				break;
210 			}
211 		} else if (fd == proto_descriptor(res->hr_event)) {
212 			if (!isopen) {
213 				(void)snprintf(msg, sizeof(msg),
214 				    "Descriptor %d (event) is closed, but should be open.",
215 				    fd);
216 				break;
217 			}
218 			if (!S_ISSOCK(mode)) {
219 				(void)snprintf(msg, sizeof(msg),
220 				    "Descriptor %d (event) is %s, but should be %s.",
221 				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
222 				break;
223 			}
224 		} else if (fd == proto_descriptor(res->hr_ctrl)) {
225 			if (!isopen) {
226 				(void)snprintf(msg, sizeof(msg),
227 				    "Descriptor %d (ctrl) is closed, but should be open.",
228 				    fd);
229 				break;
230 			}
231 			if (!S_ISSOCK(mode)) {
232 				(void)snprintf(msg, sizeof(msg),
233 				    "Descriptor %d (ctrl) is %s, but should be %s.",
234 				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
235 				break;
236 			}
237 		} else if (res->hr_role == HAST_ROLE_PRIMARY &&
238 		    fd == proto_descriptor(res->hr_conn)) {
239 			if (!isopen) {
240 				(void)snprintf(msg, sizeof(msg),
241 				    "Descriptor %d (conn) is closed, but should be open.",
242 				    fd);
243 				break;
244 			}
245 			if (!S_ISSOCK(mode)) {
246 				(void)snprintf(msg, sizeof(msg),
247 				    "Descriptor %d (conn) is %s, but should be %s.",
248 				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
249 				break;
250 			}
251 		} else if (res->hr_role == HAST_ROLE_SECONDARY &&
252 		    res->hr_conn != NULL &&
253 		    fd == proto_descriptor(res->hr_conn)) {
254 			if (isopen) {
255 				(void)snprintf(msg, sizeof(msg),
256 				    "Descriptor %d (conn) is open, but should be closed.",
257 				    fd);
258 				break;
259 			}
260 		} else if (res->hr_role == HAST_ROLE_SECONDARY &&
261 		    fd == proto_descriptor(res->hr_remotein)) {
262 			if (!isopen) {
263 				(void)snprintf(msg, sizeof(msg),
264 				    "Descriptor %d (remote in) is closed, but should be open.",
265 				    fd);
266 				break;
267 			}
268 			if (!S_ISSOCK(mode)) {
269 				(void)snprintf(msg, sizeof(msg),
270 				    "Descriptor %d (remote in) is %s, but should be %s.",
271 				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
272 				break;
273 			}
274 		} else if (res->hr_role == HAST_ROLE_SECONDARY &&
275 		    fd == proto_descriptor(res->hr_remoteout)) {
276 			if (!isopen) {
277 				(void)snprintf(msg, sizeof(msg),
278 				    "Descriptor %d (remote out) is closed, but should be open.",
279 				    fd);
280 				break;
281 			}
282 			if (!S_ISSOCK(mode)) {
283 				(void)snprintf(msg, sizeof(msg),
284 				    "Descriptor %d (remote out) is %s, but should be %s.",
285 				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
286 				break;
287 			}
288 		} else {
289 			if (isopen) {
290 				(void)snprintf(msg, sizeof(msg),
291 				    "Descriptor %d is open (%s), but should be closed.",
292 				    fd, dtype2str(mode));
293 				break;
294 			}
295 		}
296 	}
297 	if (msg[0] != '\0') {
298 		pjdlog_init(pjdlogmode);
299 		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
300 		    role2str(res->hr_role));
301 		PJDLOG_ABORT("%s", msg);
302 	}
303 }
304 
305 static void
306 child_exit_log(unsigned int pid, int status)
307 {
308 
309 	if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
310 		pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
311 		    pid);
312 	} else if (WIFSIGNALED(status)) {
313 		pjdlog_error("Worker process killed (pid=%u, signal=%d).",
314 		    pid, WTERMSIG(status));
315 	} else {
316 		pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
317 		    pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
318 	}
319 }
320 
321 static void
322 child_exit(void)
323 {
324 	struct hast_resource *res;
325 	int status;
326 	pid_t pid;
327 
328 	while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
329 		/* Find resource related to the process that just exited. */
330 		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
331 			if (pid == res->hr_workerpid)
332 				break;
333 		}
334 		if (res == NULL) {
335 			/*
336 			 * This can happen when new connection arrives and we
337 			 * cancel child responsible for the old one or if this
338 			 * was hook which we executed.
339 			 */
340 			hook_check_one(pid, status);
341 			continue;
342 		}
343 		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
344 		    role2str(res->hr_role));
345 		child_exit_log(pid, status);
346 		child_cleanup(res);
347 		if (res->hr_role == HAST_ROLE_PRIMARY) {
348 			/*
349 			 * Restart child process if it was killed by signal
350 			 * or exited because of temporary problem.
351 			 */
352 			if (WIFSIGNALED(status) ||
353 			    (WIFEXITED(status) &&
354 			     WEXITSTATUS(status) == EX_TEMPFAIL)) {
355 				sleep(1);
356 				pjdlog_info("Restarting worker process.");
357 				hastd_primary(res);
358 			} else {
359 				res->hr_role = HAST_ROLE_INIT;
360 				pjdlog_info("Changing resource role back to %s.",
361 				    role2str(res->hr_role));
362 			}
363 		}
364 		pjdlog_prefix_set("%s", "");
365 	}
366 }
367 
368 static bool
369 resource_needs_restart(const struct hast_resource *res0,
370     const struct hast_resource *res1)
371 {
372 
373 	PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0);
374 
375 	if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
376 		return (true);
377 	if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
378 		return (true);
379 	if (res0->hr_role == HAST_ROLE_INIT ||
380 	    res0->hr_role == HAST_ROLE_SECONDARY) {
381 		if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
382 			return (true);
383 		if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0)
384 			return (true);
385 		if (res0->hr_replication != res1->hr_replication)
386 			return (true);
387 		if (res0->hr_checksum != res1->hr_checksum)
388 			return (true);
389 		if (res0->hr_compression != res1->hr_compression)
390 			return (true);
391 		if (res0->hr_timeout != res1->hr_timeout)
392 			return (true);
393 		if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
394 			return (true);
395 		/*
396 		 * When metaflush has changed we don't really need restart,
397 		 * but it is just easier this way.
398 		 */
399 		if (res0->hr_metaflush != res1->hr_metaflush)
400 			return (true);
401 	}
402 	return (false);
403 }
404 
405 static bool
406 resource_needs_reload(const struct hast_resource *res0,
407     const struct hast_resource *res1)
408 {
409 
410 	PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0);
411 	PJDLOG_ASSERT(strcmp(res0->hr_provname, res1->hr_provname) == 0);
412 	PJDLOG_ASSERT(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
413 
414 	if (res0->hr_role != HAST_ROLE_PRIMARY)
415 		return (false);
416 
417 	if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
418 		return (true);
419 	if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0)
420 		return (true);
421 	if (res0->hr_replication != res1->hr_replication)
422 		return (true);
423 	if (res0->hr_checksum != res1->hr_checksum)
424 		return (true);
425 	if (res0->hr_compression != res1->hr_compression)
426 		return (true);
427 	if (res0->hr_timeout != res1->hr_timeout)
428 		return (true);
429 	if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
430 		return (true);
431 	if (res0->hr_metaflush != res1->hr_metaflush)
432 		return (true);
433 	return (false);
434 }
435 
436 static void
437 resource_reload(const struct hast_resource *res)
438 {
439 	struct nv *nvin, *nvout;
440 	int error;
441 
442 	PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY);
443 
444 	nvout = nv_alloc();
445 	nv_add_uint8(nvout, CONTROL_RELOAD, "cmd");
446 	nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr");
447 	nv_add_string(nvout, res->hr_sourceaddr, "sourceaddr");
448 	nv_add_int32(nvout, (int32_t)res->hr_replication, "replication");
449 	nv_add_int32(nvout, (int32_t)res->hr_checksum, "checksum");
450 	nv_add_int32(nvout, (int32_t)res->hr_compression, "compression");
451 	nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout");
452 	nv_add_string(nvout, res->hr_exec, "exec");
453 	nv_add_int32(nvout, (int32_t)res->hr_metaflush, "metaflush");
454 	if (nv_error(nvout) != 0) {
455 		nv_free(nvout);
456 		pjdlog_error("Unable to allocate header for reload message.");
457 		return;
458 	}
459 	if (hast_proto_send(res, res->hr_ctrl, nvout, NULL, 0) == -1) {
460 		pjdlog_errno(LOG_ERR, "Unable to send reload message");
461 		nv_free(nvout);
462 		return;
463 	}
464 	nv_free(nvout);
465 
466 	/* Receive response. */
467 	if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) == -1) {
468 		pjdlog_errno(LOG_ERR, "Unable to receive reload reply");
469 		return;
470 	}
471 	error = nv_get_int16(nvin, "error");
472 	nv_free(nvin);
473 	if (error != 0) {
474 		pjdlog_common(LOG_ERR, 0, error, "Reload failed");
475 		return;
476 	}
477 }
478 
479 static void
480 hastd_reload(void)
481 {
482 	struct hastd_config *newcfg;
483 	struct hast_resource *nres, *cres, *tres;
484 	struct hastd_listen *nlst, *clst;
485 	struct pidfh *newpfh;
486 	unsigned int nlisten;
487 	uint8_t role;
488 	pid_t otherpid;
489 
490 	pjdlog_info("Reloading configuration...");
491 
492 	newpfh = NULL;
493 
494 	newcfg = yy_config_parse(cfgpath, false);
495 	if (newcfg == NULL)
496 		goto failed;
497 
498 	/*
499 	 * Check if control address has changed.
500 	 */
501 	if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
502 		if (proto_server(newcfg->hc_controladdr,
503 		    &newcfg->hc_controlconn) == -1) {
504 			pjdlog_errno(LOG_ERR,
505 			    "Unable to listen on control address %s",
506 			    newcfg->hc_controladdr);
507 			goto failed;
508 		}
509 	}
510 	/*
511 	 * Check if any listen address has changed.
512 	 */
513 	nlisten = 0;
514 	TAILQ_FOREACH(nlst, &newcfg->hc_listen, hl_next) {
515 		TAILQ_FOREACH(clst, &cfg->hc_listen, hl_next) {
516 			if (strcmp(nlst->hl_addr, clst->hl_addr) == 0)
517 				break;
518 		}
519 		if (clst != NULL && clst->hl_conn != NULL) {
520 			pjdlog_info("Keep listening on address %s.",
521 			    nlst->hl_addr);
522 			nlst->hl_conn = clst->hl_conn;
523 			nlisten++;
524 		} else if (proto_server(nlst->hl_addr, &nlst->hl_conn) == 0) {
525 			pjdlog_info("Listening on new address %s.",
526 			    nlst->hl_addr);
527 			nlisten++;
528 		} else {
529 			pjdlog_errno(LOG_WARNING,
530 			    "Unable to listen on address %s", nlst->hl_addr);
531 		}
532 	}
533 	if (nlisten == 0) {
534 		pjdlog_error("No addresses to listen on.");
535 		goto failed;
536 	}
537 	/*
538 	 * Check if pidfile's path has changed.
539 	 */
540 	if (!foreground && strcmp(cfg->hc_pidfile, newcfg->hc_pidfile) != 0) {
541 		newpfh = pidfile_open(newcfg->hc_pidfile, 0600, &otherpid);
542 		if (newpfh == NULL) {
543 			if (errno == EEXIST) {
544 				pjdlog_errno(LOG_WARNING,
545 				    "Another hastd is already running, pidfile: %s, pid: %jd.",
546 				    newcfg->hc_pidfile, (intmax_t)otherpid);
547 			} else {
548 				pjdlog_errno(LOG_WARNING,
549 				    "Unable to open or create pidfile %s",
550 				    newcfg->hc_pidfile);
551 			}
552 		} else if (pidfile_write(newpfh) == -1) {
553 			/* Write PID to a file. */
554 			pjdlog_errno(LOG_WARNING,
555 			    "Unable to write PID to file %s",
556 			    newcfg->hc_pidfile);
557 		} else {
558 			pjdlog_debug(1, "PID stored in %s.",
559 			    newcfg->hc_pidfile);
560 		}
561 	}
562 
563 	/* No failures from now on. */
564 
565 	/*
566 	 * Switch to new control socket.
567 	 */
568 	if (newcfg->hc_controlconn != NULL) {
569 		pjdlog_info("Control socket changed from %s to %s.",
570 		    cfg->hc_controladdr, newcfg->hc_controladdr);
571 		proto_close(cfg->hc_controlconn);
572 		cfg->hc_controlconn = newcfg->hc_controlconn;
573 		newcfg->hc_controlconn = NULL;
574 		strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
575 		    sizeof(cfg->hc_controladdr));
576 	}
577 	/*
578 	 * Switch to new pidfile.
579 	 */
580 	if (newpfh != NULL) {
581 		pjdlog_info("Pidfile changed from %s to %s.", cfg->hc_pidfile,
582 		    newcfg->hc_pidfile);
583 		(void)pidfile_remove(pfh);
584 		pfh = newpfh;
585 		(void)strlcpy(cfg->hc_pidfile, newcfg->hc_pidfile,
586 		    sizeof(cfg->hc_pidfile));
587 	}
588 	/*
589 	 * Switch to new listen addresses. Close all that were removed.
590 	 */
591 	while ((clst = TAILQ_FIRST(&cfg->hc_listen)) != NULL) {
592 		TAILQ_FOREACH(nlst, &newcfg->hc_listen, hl_next) {
593 			if (strcmp(nlst->hl_addr, clst->hl_addr) == 0)
594 				break;
595 		}
596 		if (nlst == NULL && clst->hl_conn != NULL) {
597 			proto_close(clst->hl_conn);
598 			pjdlog_info("No longer listening on address %s.",
599 			    clst->hl_addr);
600 		}
601 		TAILQ_REMOVE(&cfg->hc_listen, clst, hl_next);
602 		free(clst);
603 	}
604 	TAILQ_CONCAT(&cfg->hc_listen, &newcfg->hc_listen, hl_next);
605 
606 	/*
607 	 * Stop and remove resources that were removed from the configuration.
608 	 */
609 	TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
610 		TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
611 			if (strcmp(cres->hr_name, nres->hr_name) == 0)
612 				break;
613 		}
614 		if (nres == NULL) {
615 			control_set_role(cres, HAST_ROLE_INIT);
616 			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
617 			pjdlog_info("Resource %s removed.", cres->hr_name);
618 			free(cres);
619 		}
620 	}
621 	/*
622 	 * Move new resources to the current configuration.
623 	 */
624 	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
625 		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
626 			if (strcmp(cres->hr_name, nres->hr_name) == 0)
627 				break;
628 		}
629 		if (cres == NULL) {
630 			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
631 			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
632 			pjdlog_info("Resource %s added.", nres->hr_name);
633 		}
634 	}
635 	/*
636 	 * Deal with modified resources.
637 	 * Depending on what has changed exactly we might want to perform
638 	 * different actions.
639 	 *
640 	 * We do full resource restart in the following situations:
641 	 * Resource role is INIT or SECONDARY.
642 	 * Resource role is PRIMARY and path to local component or provider
643 	 * name has changed.
644 	 * In case of PRIMARY, the worker process will be killed and restarted,
645 	 * which also means removing /dev/hast/<name> provider and
646 	 * recreating it.
647 	 *
648 	 * We do just reload (send SIGHUP to worker process) if we act as
649 	 * PRIMARY, but only if remote address, source address, replication
650 	 * mode, timeout, execution path or metaflush has changed.
651 	 * For those, there is no need to restart worker process.
652 	 * If PRIMARY receives SIGHUP, it will reconnect if remote address or
653 	 * source address has changed or it will set new timeout if only timeout
654 	 * has changed or it will update metaflush if only metaflush has
655 	 * changed.
656 	 */
657 	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
658 		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
659 			if (strcmp(cres->hr_name, nres->hr_name) == 0)
660 				break;
661 		}
662 		PJDLOG_ASSERT(cres != NULL);
663 		if (resource_needs_restart(cres, nres)) {
664 			pjdlog_info("Resource %s configuration was modified, restarting it.",
665 			    cres->hr_name);
666 			role = cres->hr_role;
667 			control_set_role(cres, HAST_ROLE_INIT);
668 			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
669 			free(cres);
670 			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
671 			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
672 			control_set_role(nres, role);
673 		} else if (resource_needs_reload(cres, nres)) {
674 			pjdlog_info("Resource %s configuration was modified, reloading it.",
675 			    cres->hr_name);
676 			strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
677 			    sizeof(cres->hr_remoteaddr));
678 			strlcpy(cres->hr_sourceaddr, nres->hr_sourceaddr,
679 			    sizeof(cres->hr_sourceaddr));
680 			cres->hr_replication = nres->hr_replication;
681 			cres->hr_checksum = nres->hr_checksum;
682 			cres->hr_compression = nres->hr_compression;
683 			cres->hr_timeout = nres->hr_timeout;
684 			strlcpy(cres->hr_exec, nres->hr_exec,
685 			    sizeof(cres->hr_exec));
686 			cres->hr_metaflush = nres->hr_metaflush;
687 			if (cres->hr_workerpid != 0)
688 				resource_reload(cres);
689 		}
690 	}
691 
692 	yy_config_free(newcfg);
693 	pjdlog_info("Configuration reloaded successfully.");
694 	return;
695 failed:
696 	if (newcfg != NULL) {
697 		if (newcfg->hc_controlconn != NULL)
698 			proto_close(newcfg->hc_controlconn);
699 		while ((nlst = TAILQ_FIRST(&newcfg->hc_listen)) != NULL) {
700 			if (nlst->hl_conn != NULL) {
701 				TAILQ_FOREACH(clst, &cfg->hc_listen, hl_next) {
702 					if (strcmp(nlst->hl_addr,
703 					    clst->hl_addr) == 0) {
704 						break;
705 					}
706 				}
707 				if (clst == NULL || clst->hl_conn == NULL)
708 					proto_close(nlst->hl_conn);
709 			}
710 			TAILQ_REMOVE(&newcfg->hc_listen, nlst, hl_next);
711 			free(nlst);
712 		}
713 		yy_config_free(newcfg);
714 	}
715 	if (newpfh != NULL)
716 		(void)pidfile_remove(newpfh);
717 	pjdlog_warning("Configuration not reloaded.");
718 }
719 
720 static void
721 terminate_workers(void)
722 {
723 	struct hast_resource *res;
724 
725 	pjdlog_info("Termination signal received, exiting.");
726 	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
727 		if (res->hr_workerpid == 0)
728 			continue;
729 		pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).",
730 		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
731 		if (kill(res->hr_workerpid, SIGTERM) == 0)
732 			continue;
733 		pjdlog_errno(LOG_WARNING,
734 		    "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).",
735 		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
736 	}
737 }
738 
739 static void
740 listen_accept(struct hastd_listen *lst)
741 {
742 	struct hast_resource *res;
743 	struct proto_conn *conn;
744 	struct nv *nvin, *nvout, *nverr;
745 	const char *resname;
746 	const unsigned char *token;
747 	char laddr[256], raddr[256];
748 	size_t size;
749 	pid_t pid;
750 	int status;
751 
752 	proto_local_address(lst->hl_conn, laddr, sizeof(laddr));
753 	pjdlog_debug(1, "Accepting connection to %s.", laddr);
754 
755 	if (proto_accept(lst->hl_conn, &conn) == -1) {
756 		pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
757 		return;
758 	}
759 
760 	proto_local_address(conn, laddr, sizeof(laddr));
761 	proto_remote_address(conn, raddr, sizeof(raddr));
762 	pjdlog_info("Connection from %s to %s.", raddr, laddr);
763 
764 	/* Error in setting timeout is not critical, but why should it fail? */
765 	if (proto_timeout(conn, HAST_TIMEOUT) == -1)
766 		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
767 
768 	nvin = nvout = nverr = NULL;
769 
770 	/*
771 	 * Before receiving any data see if remote host have access to any
772 	 * resource.
773 	 */
774 	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
775 		if (proto_address_match(conn, res->hr_remoteaddr))
776 			break;
777 	}
778 	if (res == NULL) {
779 		pjdlog_error("Client %s isn't known.", raddr);
780 		goto close;
781 	}
782 	/* Ok, remote host can access at least one resource. */
783 
784 	if (hast_proto_recv_hdr(conn, &nvin) == -1) {
785 		pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
786 		    raddr);
787 		goto close;
788 	}
789 
790 	resname = nv_get_string(nvin, "resource");
791 	if (resname == NULL) {
792 		pjdlog_error("No 'resource' field in the header received from %s.",
793 		    raddr);
794 		goto close;
795 	}
796 	pjdlog_debug(2, "%s: resource=%s", raddr, resname);
797 	token = nv_get_uint8_array(nvin, &size, "token");
798 	/*
799 	 * NULL token means that this is first connection.
800 	 */
801 	if (token != NULL && size != sizeof(res->hr_token)) {
802 		pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
803 		    raddr, sizeof(res->hr_token), size);
804 		goto close;
805 	}
806 
807 	/*
808 	 * From now on we want to send errors to the remote node.
809 	 */
810 	nverr = nv_alloc();
811 
812 	/* Find resource related to this connection. */
813 	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
814 		if (strcmp(resname, res->hr_name) == 0)
815 			break;
816 	}
817 	/* Have we found the resource? */
818 	if (res == NULL) {
819 		pjdlog_error("No resource '%s' as requested by %s.",
820 		    resname, raddr);
821 		nv_add_stringf(nverr, "errmsg", "Resource not configured.");
822 		goto fail;
823 	}
824 
825 	/* Now that we know resource name setup log prefix. */
826 	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
827 
828 	/* Does the remote host have access to this resource? */
829 	if (!proto_address_match(conn, res->hr_remoteaddr)) {
830 		pjdlog_error("Client %s has no access to the resource.", raddr);
831 		nv_add_stringf(nverr, "errmsg", "No access to the resource.");
832 		goto fail;
833 	}
834 	/* Is the resource marked as secondary? */
835 	if (res->hr_role != HAST_ROLE_SECONDARY) {
836 		pjdlog_warning("We act as %s for the resource and not as %s as requested by %s.",
837 		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
838 		    raddr);
839 		nv_add_stringf(nverr, "errmsg",
840 		    "Remote node acts as %s for the resource and not as %s.",
841 		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
842 		if (res->hr_role == HAST_ROLE_PRIMARY) {
843 			/*
844 			 * If we act as primary request the other side to wait
845 			 * for us a bit, as we might be finishing cleanups.
846 			 */
847 			nv_add_uint8(nverr, 1, "wait");
848 		}
849 		goto fail;
850 	}
851 	/* Does token (if exists) match? */
852 	if (token != NULL && memcmp(token, res->hr_token,
853 	    sizeof(res->hr_token)) != 0) {
854 		pjdlog_error("Token received from %s doesn't match.", raddr);
855 		nv_add_stringf(nverr, "errmsg", "Token doesn't match.");
856 		goto fail;
857 	}
858 	/*
859 	 * If there is no token, but we have half-open connection
860 	 * (only remotein) or full connection (worker process is running)
861 	 * we have to cancel those and accept the new connection.
862 	 */
863 	if (token == NULL) {
864 		PJDLOG_ASSERT(res->hr_remoteout == NULL);
865 		pjdlog_debug(1, "Initial connection from %s.", raddr);
866 		if (res->hr_workerpid != 0) {
867 			PJDLOG_ASSERT(res->hr_remotein == NULL);
868 			pjdlog_debug(1,
869 			    "Worker process exists (pid=%u), stopping it.",
870 			    (unsigned int)res->hr_workerpid);
871 			/* Stop child process. */
872 			if (kill(res->hr_workerpid, SIGINT) == -1) {
873 				pjdlog_errno(LOG_ERR,
874 				    "Unable to stop worker process (pid=%u)",
875 				    (unsigned int)res->hr_workerpid);
876 				/*
877 				 * Other than logging the problem we
878 				 * ignore it - nothing smart to do.
879 				 */
880 			}
881 			/* Wait for it to exit. */
882 			else if ((pid = waitpid(res->hr_workerpid,
883 			    &status, 0)) != res->hr_workerpid) {
884 				/* We can only log the problem. */
885 				pjdlog_errno(LOG_ERR,
886 				    "Waiting for worker process (pid=%u) failed",
887 				    (unsigned int)res->hr_workerpid);
888 			} else {
889 				child_exit_log(res->hr_workerpid, status);
890 			}
891 			child_cleanup(res);
892 		} else if (res->hr_remotein != NULL) {
893 			char oaddr[256];
894 
895 			proto_remote_address(res->hr_remotein, oaddr,
896 			    sizeof(oaddr));
897 			pjdlog_debug(1,
898 			    "Canceling half-open connection from %s on connection from %s.",
899 			    oaddr, raddr);
900 			proto_close(res->hr_remotein);
901 			res->hr_remotein = NULL;
902 		}
903 	}
904 
905 	/*
906 	 * Checks and cleanups are done.
907 	 */
908 
909 	if (token == NULL) {
910 		arc4random_buf(res->hr_token, sizeof(res->hr_token));
911 		nvout = nv_alloc();
912 		nv_add_uint8_array(nvout, res->hr_token,
913 		    sizeof(res->hr_token), "token");
914 		if (nv_error(nvout) != 0) {
915 			pjdlog_common(LOG_ERR, 0, nv_error(nvout),
916 			    "Unable to prepare return header for %s", raddr);
917 			nv_add_stringf(nverr, "errmsg",
918 			    "Remote node was unable to prepare return header: %s.",
919 			    strerror(nv_error(nvout)));
920 			goto fail;
921 		}
922 		if (hast_proto_send(NULL, conn, nvout, NULL, 0) == -1) {
923 			int error = errno;
924 
925 			pjdlog_errno(LOG_ERR, "Unable to send response to %s",
926 			    raddr);
927 			nv_add_stringf(nverr, "errmsg",
928 			    "Remote node was unable to send response: %s.",
929 			    strerror(error));
930 			goto fail;
931 		}
932 		res->hr_remotein = conn;
933 		pjdlog_debug(1, "Incoming connection from %s configured.",
934 		    raddr);
935 	} else {
936 		res->hr_remoteout = conn;
937 		pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
938 		hastd_secondary(res, nvin);
939 	}
940 	nv_free(nvin);
941 	nv_free(nvout);
942 	nv_free(nverr);
943 	pjdlog_prefix_set("%s", "");
944 	return;
945 fail:
946 	if (nv_error(nverr) != 0) {
947 		pjdlog_common(LOG_ERR, 0, nv_error(nverr),
948 		    "Unable to prepare error header for %s", raddr);
949 		goto close;
950 	}
951 	if (hast_proto_send(NULL, conn, nverr, NULL, 0) == -1) {
952 		pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
953 		goto close;
954 	}
955 close:
956 	if (nvin != NULL)
957 		nv_free(nvin);
958 	if (nvout != NULL)
959 		nv_free(nvout);
960 	if (nverr != NULL)
961 		nv_free(nverr);
962 	proto_close(conn);
963 	pjdlog_prefix_set("%s", "");
964 }
965 
966 static void
967 connection_migrate(struct hast_resource *res)
968 {
969 	struct proto_conn *conn;
970 	int16_t val = 0;
971 
972 	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
973 
974 	PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY);
975 
976 	if (proto_recv(res->hr_conn, &val, sizeof(val)) == -1) {
977 		pjdlog_errno(LOG_WARNING,
978 		    "Unable to receive connection command");
979 		return;
980 	}
981 	if (proto_client(res->hr_sourceaddr[0] != '\0' ? res->hr_sourceaddr : NULL,
982 	    res->hr_remoteaddr, &conn) == -1) {
983 		val = errno;
984 		pjdlog_errno(LOG_WARNING,
985 		    "Unable to create outgoing connection to %s",
986 		    res->hr_remoteaddr);
987 		goto out;
988 	}
989 	if (proto_connect(conn, -1) == -1) {
990 		val = errno;
991 		pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
992 		    res->hr_remoteaddr);
993 		proto_close(conn);
994 		goto out;
995 	}
996 	val = 0;
997 out:
998 	if (proto_send(res->hr_conn, &val, sizeof(val)) == -1) {
999 		pjdlog_errno(LOG_WARNING,
1000 		    "Unable to send reply to connection request");
1001 	}
1002 	if (val == 0 && proto_connection_send(res->hr_conn, conn) == -1)
1003 		pjdlog_errno(LOG_WARNING, "Unable to send connection");
1004 
1005 	pjdlog_prefix_set("%s", "");
1006 }
1007 
1008 static void
1009 check_signals(void)
1010 {
1011 	struct timespec sigtimeout;
1012 	sigset_t mask;
1013 	int signo;
1014 
1015 	sigtimeout.tv_sec = 0;
1016 	sigtimeout.tv_nsec = 0;
1017 
1018 	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
1019 	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
1020 	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
1021 	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
1022 	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
1023 
1024 	while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) {
1025 		switch (signo) {
1026 		case SIGINT:
1027 		case SIGTERM:
1028 			sigexit_received = true;
1029 			terminate_workers();
1030 			proto_close(cfg->hc_controlconn);
1031 			exit(EX_OK);
1032 			break;
1033 		case SIGCHLD:
1034 			child_exit();
1035 			break;
1036 		case SIGHUP:
1037 			hastd_reload();
1038 			break;
1039 		default:
1040 			PJDLOG_ABORT("Unexpected signal (%d).", signo);
1041 		}
1042 	}
1043 }
1044 
1045 static void
1046 main_loop(void)
1047 {
1048 	struct hast_resource *res;
1049 	struct hastd_listen *lst;
1050 	struct timeval seltimeout;
1051 	int fd, maxfd, ret;
1052 	time_t lastcheck, now;
1053 	fd_set rfds;
1054 
1055 	lastcheck = time(NULL);
1056 	seltimeout.tv_sec = REPORT_INTERVAL;
1057 	seltimeout.tv_usec = 0;
1058 
1059 	for (;;) {
1060 		check_signals();
1061 
1062 		/* Setup descriptors for select(2). */
1063 		FD_ZERO(&rfds);
1064 		maxfd = fd = proto_descriptor(cfg->hc_controlconn);
1065 		PJDLOG_ASSERT(fd >= 0);
1066 		FD_SET(fd, &rfds);
1067 		TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) {
1068 			if (lst->hl_conn == NULL)
1069 				continue;
1070 			fd = proto_descriptor(lst->hl_conn);
1071 			PJDLOG_ASSERT(fd >= 0);
1072 			FD_SET(fd, &rfds);
1073 			maxfd = fd > maxfd ? fd : maxfd;
1074 		}
1075 		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
1076 			if (res->hr_event == NULL)
1077 				continue;
1078 			fd = proto_descriptor(res->hr_event);
1079 			PJDLOG_ASSERT(fd >= 0);
1080 			FD_SET(fd, &rfds);
1081 			maxfd = fd > maxfd ? fd : maxfd;
1082 			if (res->hr_role == HAST_ROLE_PRIMARY) {
1083 				/* Only primary workers asks for connections. */
1084 				PJDLOG_ASSERT(res->hr_conn != NULL);
1085 				fd = proto_descriptor(res->hr_conn);
1086 				PJDLOG_ASSERT(fd >= 0);
1087 				FD_SET(fd, &rfds);
1088 				maxfd = fd > maxfd ? fd : maxfd;
1089 			} else {
1090 				PJDLOG_ASSERT(res->hr_conn == NULL);
1091 			}
1092 		}
1093 
1094 		PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE);
1095 		ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout);
1096 		now = time(NULL);
1097 		if (lastcheck + REPORT_INTERVAL <= now) {
1098 			hook_check();
1099 			lastcheck = now;
1100 		}
1101 		if (ret == 0) {
1102 			/*
1103 			 * select(2) timed out, so there should be no
1104 			 * descriptors to check.
1105 			 */
1106 			continue;
1107 		} else if (ret == -1) {
1108 			if (errno == EINTR)
1109 				continue;
1110 			KEEP_ERRNO((void)pidfile_remove(pfh));
1111 			pjdlog_exit(EX_OSERR, "select() failed");
1112 		}
1113 
1114 		/*
1115 		 * Check for signals before we do anything to update our
1116 		 * info about terminated workers in the meantime.
1117 		 */
1118 		check_signals();
1119 
1120 		if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
1121 			control_handle(cfg);
1122 		TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) {
1123 			if (lst->hl_conn == NULL)
1124 				continue;
1125 			if (FD_ISSET(proto_descriptor(lst->hl_conn), &rfds))
1126 				listen_accept(lst);
1127 		}
1128 		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
1129 			if (res->hr_event == NULL)
1130 				continue;
1131 			if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
1132 				if (event_recv(res) == 0)
1133 					continue;
1134 				/* The worker process exited? */
1135 				proto_close(res->hr_event);
1136 				res->hr_event = NULL;
1137 				if (res->hr_conn != NULL) {
1138 					proto_close(res->hr_conn);
1139 					res->hr_conn = NULL;
1140 				}
1141 				continue;
1142 			}
1143 			if (res->hr_role == HAST_ROLE_PRIMARY) {
1144 				PJDLOG_ASSERT(res->hr_conn != NULL);
1145 				if (FD_ISSET(proto_descriptor(res->hr_conn),
1146 				    &rfds)) {
1147 					connection_migrate(res);
1148 				}
1149 			} else {
1150 				PJDLOG_ASSERT(res->hr_conn == NULL);
1151 			}
1152 		}
1153 	}
1154 }
1155 
1156 static void
1157 dummy_sighandler(int sig __unused)
1158 {
1159 	/* Nothing to do. */
1160 }
1161 
1162 int
1163 main(int argc, char *argv[])
1164 {
1165 	struct hastd_listen *lst;
1166 	const char *pidfile;
1167 	pid_t otherpid;
1168 	int debuglevel;
1169 	sigset_t mask;
1170 
1171 	foreground = false;
1172 	debuglevel = 0;
1173 	pidfile = NULL;
1174 
1175 	for (;;) {
1176 		int ch;
1177 
1178 		ch = getopt(argc, argv, "c:dFhP:");
1179 		if (ch == -1)
1180 			break;
1181 		switch (ch) {
1182 		case 'c':
1183 			cfgpath = optarg;
1184 			break;
1185 		case 'd':
1186 			debuglevel++;
1187 			break;
1188 		case 'F':
1189 			foreground = true;
1190 			break;
1191 		case 'P':
1192 			pidfile = optarg;
1193 			break;
1194 		case 'h':
1195 		default:
1196 			usage();
1197 		}
1198 	}
1199 	argc -= optind;
1200 	argv += optind;
1201 
1202 	pjdlog_init(PJDLOG_MODE_STD);
1203 	pjdlog_debug_set(debuglevel);
1204 
1205 	g_gate_load();
1206 
1207 	/*
1208 	 * When path to the configuration file is relative, obtain full path,
1209 	 * so we can always find the file, even after daemonizing and changing
1210 	 * working directory to /.
1211 	 */
1212 	if (cfgpath[0] != '/') {
1213 		const char *newcfgpath;
1214 
1215 		newcfgpath = realpath(cfgpath, NULL);
1216 		if (newcfgpath == NULL) {
1217 			pjdlog_exit(EX_CONFIG,
1218 			    "Unable to obtain full path of %s", cfgpath);
1219 		}
1220 		cfgpath = newcfgpath;
1221 	}
1222 
1223 	cfg = yy_config_parse(cfgpath, true);
1224 	PJDLOG_ASSERT(cfg != NULL);
1225 
1226 	if (pidfile != NULL) {
1227 		if (strlcpy(cfg->hc_pidfile, pidfile,
1228 		    sizeof(cfg->hc_pidfile)) >= sizeof(cfg->hc_pidfile)) {
1229 			pjdlog_exitx(EX_CONFIG, "Pidfile path is too long.");
1230 		}
1231 	}
1232 
1233 	if (!foreground) {
1234 		pfh = pidfile_open(cfg->hc_pidfile, 0600, &otherpid);
1235 		if (pfh == NULL) {
1236 			if (errno == EEXIST) {
1237 				pjdlog_exitx(EX_TEMPFAIL,
1238 				    "Another hastd is already running, pidfile: %s, pid: %jd.",
1239 				    cfg->hc_pidfile, (intmax_t)otherpid);
1240 			}
1241 			/*
1242 			 * If we cannot create pidfile for other reasons,
1243 			 * only warn.
1244 			 */
1245 			pjdlog_errno(LOG_WARNING,
1246 			    "Unable to open or create pidfile %s",
1247 			    cfg->hc_pidfile);
1248 		}
1249 	}
1250 
1251 	/*
1252 	 * Restore default actions for interesting signals in case parent
1253 	 * process (like init(8)) decided to ignore some of them (like SIGHUP).
1254 	 */
1255 	PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR);
1256 	PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR);
1257 	PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR);
1258 	/*
1259 	 * Because SIGCHLD is ignored by default, setup dummy handler for it,
1260 	 * so we can mask it.
1261 	 */
1262 	PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR);
1263 
1264 	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
1265 	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
1266 	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
1267 	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
1268 	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
1269 	PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1270 
1271 	/* Listen on control address. */
1272 	if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) == -1) {
1273 		KEEP_ERRNO((void)pidfile_remove(pfh));
1274 		pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
1275 		    cfg->hc_controladdr);
1276 	}
1277 	/* Listen for remote connections. */
1278 	TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next) {
1279 		if (proto_server(lst->hl_addr, &lst->hl_conn) == -1) {
1280 			KEEP_ERRNO((void)pidfile_remove(pfh));
1281 			pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
1282 			    lst->hl_addr);
1283 		}
1284 	}
1285 
1286 	if (!foreground) {
1287 		if (daemon(0, 0) == -1) {
1288 			KEEP_ERRNO((void)pidfile_remove(pfh));
1289 			pjdlog_exit(EX_OSERR, "Unable to daemonize");
1290 		}
1291 
1292 		/* Start logging to syslog. */
1293 		pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
1294 
1295 		/* Write PID to a file. */
1296 		if (pidfile_write(pfh) == -1) {
1297 			pjdlog_errno(LOG_WARNING,
1298 			    "Unable to write PID to a file %s",
1299 			    cfg->hc_pidfile);
1300 		} else {
1301 			pjdlog_debug(1, "PID stored in %s.", cfg->hc_pidfile);
1302 		}
1303 	}
1304 
1305 	pjdlog_info("Started successfully, running protocol version %d.",
1306 	    HAST_PROTO_VERSION);
1307 
1308 	pjdlog_debug(1, "Listening on control address %s.",
1309 	    cfg->hc_controladdr);
1310 	TAILQ_FOREACH(lst, &cfg->hc_listen, hl_next)
1311 		pjdlog_info("Listening on address %s.", lst->hl_addr);
1312 
1313 	hook_init();
1314 
1315 	main_loop();
1316 
1317 	exit(0);
1318 }
1319