xref: /freebsd/sbin/ggate/ggated/ggated.c (revision 96474d2a3fa895fb9636183403fc8ca7ccf60216)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/param.h>
32 #include <sys/bio.h>
33 #include <sys/disk.h>
34 #include <sys/endian.h>
35 #include <sys/ioctl.h>
36 #include <sys/queue.h>
37 #include <sys/socket.h>
38 #include <sys/stat.h>
39 #include <sys/time.h>
40 #include <arpa/inet.h>
41 #include <netinet/in.h>
42 #include <netinet/tcp.h>
43 #include <assert.h>
44 #include <err.h>
45 #include <errno.h>
46 #include <inttypes.h>
47 #include <fcntl.h>
48 #include <libgen.h>
49 #include <libutil.h>
50 #include <paths.h>
51 #include <pthread.h>
52 #include <signal.h>
53 #include <stdarg.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <stdint.h>
57 #include <string.h>
58 #include <syslog.h>
59 #include <unistd.h>
60 
61 #include "ggate.h"
62 
63 
64 #define	GGATED_EXPORT_FILE	"/etc/gg.exports"
65 
66 struct ggd_connection {
67 	off_t		 c_mediasize;
68 	unsigned	 c_sectorsize;
69 	unsigned	 c_flags;	/* flags (RO/RW) */
70 	int		 c_diskfd;
71 	int		 c_sendfd;
72 	int		 c_recvfd;
73 	time_t		 c_birthtime;
74 	char		*c_path;
75 	uint64_t	 c_token;
76 	in_addr_t	 c_srcip;
77 	LIST_ENTRY(ggd_connection) c_next;
78 };
79 
80 struct ggd_request {
81 	struct g_gate_hdr	 r_hdr;
82 	char			*r_data;
83 	TAILQ_ENTRY(ggd_request) r_next;
84 };
85 #define	r_cmd		r_hdr.gh_cmd
86 #define	r_offset	r_hdr.gh_offset
87 #define	r_length	r_hdr.gh_length
88 #define	r_error		r_hdr.gh_error
89 
90 struct ggd_export {
91 	char		*e_path;	/* path to device/file */
92 	in_addr_t	 e_ip;		/* remote IP address */
93 	in_addr_t	 e_mask;	/* IP mask */
94 	unsigned	 e_flags;	/* flags (RO/RW) */
95 	SLIST_ENTRY(ggd_export) e_next;
96 };
97 
98 static const char *exports_file = GGATED_EXPORT_FILE;
99 static int got_sighup = 0;
100 static in_addr_t bindaddr;
101 
102 static TAILQ_HEAD(, ggd_request) inqueue = TAILQ_HEAD_INITIALIZER(inqueue);
103 static TAILQ_HEAD(, ggd_request) outqueue = TAILQ_HEAD_INITIALIZER(outqueue);
104 static pthread_mutex_t inqueue_mtx, outqueue_mtx;
105 static pthread_cond_t inqueue_cond, outqueue_cond;
106 
107 static SLIST_HEAD(, ggd_export) exports = SLIST_HEAD_INITIALIZER(exports);
108 static LIST_HEAD(, ggd_connection) connections = LIST_HEAD_INITIALIZER(connections);
109 
110 static void *recv_thread(void *arg);
111 static void *disk_thread(void *arg);
112 static void *send_thread(void *arg);
113 
114 static void
115 usage(void)
116 {
117 
118 	fprintf(stderr, "usage: %s [-nv] [-a address] [-F pidfile] [-p port] "
119 	    "[-R rcvbuf] [-S sndbuf] [exports file]\n", getprogname());
120 	exit(EXIT_FAILURE);
121 }
122 
123 static char *
124 ip2str(in_addr_t ip)
125 {
126 	static char sip[16];
127 
128 	snprintf(sip, sizeof(sip), "%u.%u.%u.%u",
129 	    ((ip >> 24) & 0xff),
130 	    ((ip >> 16) & 0xff),
131 	    ((ip >> 8) & 0xff),
132 	    (ip & 0xff));
133 	return (sip);
134 }
135 
136 static in_addr_t
137 countmask(unsigned m)
138 {
139 	in_addr_t mask;
140 
141 	if (m == 0) {
142 		mask = 0x0;
143 	} else {
144 		mask = 1 << (32 - m);
145 		mask--;
146 		mask = ~mask;
147 	}
148 	return (mask);
149 }
150 
151 static void
152 line_parse(char *line, unsigned lineno)
153 {
154 	struct ggd_export *ex;
155 	char *word, *path, *sflags;
156 	unsigned flags, i, vmask;
157 	in_addr_t ip, mask;
158 
159 	ip = mask = flags = vmask = 0;
160 	path = NULL;
161 	sflags = NULL;
162 
163 	for (i = 0, word = strtok(line, " \t"); word != NULL;
164 	    i++, word = strtok(NULL, " \t")) {
165 		switch (i) {
166 		case 0: /* IP address or host name */
167 			ip = g_gate_str2ip(strsep(&word, "/"));
168 			if (ip == INADDR_NONE) {
169 				g_gate_xlog("Invalid IP/host name at line %u.",
170 				    lineno);
171 			}
172 			ip = ntohl(ip);
173 			if (word == NULL)
174 				vmask = 32;
175 			else {
176 				errno = 0;
177 				vmask = strtoul(word, NULL, 10);
178 				if (vmask == 0 && errno != 0) {
179 					g_gate_xlog("Invalid IP mask value at "
180 					    "line %u.", lineno);
181 				}
182 				if ((unsigned)vmask > 32) {
183 					g_gate_xlog("Invalid IP mask value at line %u.",
184 					    lineno);
185 				}
186 			}
187 			mask = countmask(vmask);
188 			break;
189 		case 1:	/* flags */
190 			if (strcasecmp("rd", word) == 0 ||
191 			    strcasecmp("ro", word) == 0) {
192 				flags = O_RDONLY;
193 			} else if (strcasecmp("wo", word) == 0) {
194 				flags = O_WRONLY;
195 			} else if (strcasecmp("rw", word) == 0) {
196 				flags = O_RDWR;
197 			} else {
198 				g_gate_xlog("Invalid value in flags field at "
199 				    "line %u.", lineno);
200 			}
201 			sflags = word;
202 			break;
203 		case 2:	/* path */
204 			if (strlen(word) >= MAXPATHLEN) {
205 				g_gate_xlog("Path too long at line %u. ",
206 				    lineno);
207 			}
208 			path = word;
209 			break;
210 		default:
211 			g_gate_xlog("Too many arguments at line %u. ", lineno);
212 		}
213 	}
214 	if (i != 3)
215 		g_gate_xlog("Too few arguments at line %u.", lineno);
216 
217 	ex = malloc(sizeof(*ex));
218 	if (ex == NULL)
219 		g_gate_xlog("Not enough memory.");
220 	ex->e_path = strdup(path);
221 	if (ex->e_path == NULL)
222 		g_gate_xlog("Not enough memory.");
223 
224 	/* Made 'and' here. */
225 	ex->e_ip = (ip & mask);
226 	ex->e_mask = mask;
227 	ex->e_flags = flags;
228 
229 	SLIST_INSERT_HEAD(&exports, ex, e_next);
230 
231 	g_gate_log(LOG_DEBUG, "Added %s/%u %s %s to exports list.",
232 	    ip2str(ex->e_ip), vmask, path, sflags);
233 }
234 
235 static void
236 exports_clear(void)
237 {
238 	struct ggd_export *ex;
239 
240 	while (!SLIST_EMPTY(&exports)) {
241 		ex = SLIST_FIRST(&exports);
242 		SLIST_REMOVE_HEAD(&exports, e_next);
243 		free(ex);
244 	}
245 }
246 
247 #define	EXPORTS_LINE_SIZE	2048
248 static void
249 exports_get(void)
250 {
251 	char buf[EXPORTS_LINE_SIZE], *line;
252 	unsigned lineno = 0, objs = 0, len;
253 	FILE *fd;
254 
255 	exports_clear();
256 
257 	fd = fopen(exports_file, "r");
258 	if (fd == NULL) {
259 		g_gate_xlog("Cannot open exports file (%s): %s.", exports_file,
260 		    strerror(errno));
261 	}
262 
263 	g_gate_log(LOG_INFO, "Reading exports file (%s).", exports_file);
264 
265 	for (;;) {
266 		if (fgets(buf, sizeof(buf), fd) == NULL) {
267 			if (feof(fd))
268 				break;
269 
270 			g_gate_xlog("Error while reading exports file: %s.",
271 			    strerror(errno));
272 		}
273 
274 		/* Increase line count. */
275 		lineno++;
276 
277 		/* Skip spaces and tabs. */
278 		for (line = buf; *line == ' ' || *line == '\t'; ++line)
279 			;
280 
281 		/* Empty line, comment or empty line at the end of file. */
282 		if (*line == '\n' || *line == '#' || *line == '\0')
283 			continue;
284 
285 		len = strlen(line);
286 		if (line[len - 1] == '\n') {
287 			/* Remove new line char. */
288 			line[len - 1] = '\0';
289 		} else {
290 			if (!feof(fd))
291 				g_gate_xlog("Line %u too long.", lineno);
292 		}
293 
294 		line_parse(line, lineno);
295 		objs++;
296 	}
297 
298 	fclose(fd);
299 
300 	if (objs == 0)
301 		g_gate_xlog("There are no objects to export.");
302 
303 	g_gate_log(LOG_INFO, "Exporting %u object(s).", objs);
304 }
305 
306 static int
307 exports_check(struct ggd_export *ex, struct g_gate_cinit *cinit,
308     struct ggd_connection *conn)
309 {
310 	char ipmask[32]; /* 32 == strlen("xxx.xxx.xxx.xxx/xxx.xxx.xxx.xxx")+1 */
311 	int error = 0, flags;
312 
313 	strlcpy(ipmask, ip2str(ex->e_ip), sizeof(ipmask));
314 	strlcat(ipmask, "/", sizeof(ipmask));
315 	strlcat(ipmask, ip2str(ex->e_mask), sizeof(ipmask));
316 	if ((cinit->gc_flags & GGATE_FLAG_RDONLY) != 0) {
317 		if (ex->e_flags == O_WRONLY) {
318 			g_gate_log(LOG_WARNING, "Read-only access requested, "
319 			    "but %s (%s) is exported write-only.", ex->e_path,
320 			    ipmask);
321 			return (EPERM);
322 		} else {
323 			conn->c_flags |= GGATE_FLAG_RDONLY;
324 		}
325 	} else if ((cinit->gc_flags & GGATE_FLAG_WRONLY) != 0) {
326 		if (ex->e_flags == O_RDONLY) {
327 			g_gate_log(LOG_WARNING, "Write-only access requested, "
328 			    "but %s (%s) is exported read-only.", ex->e_path,
329 			    ipmask);
330 			return (EPERM);
331 		} else {
332 			conn->c_flags |= GGATE_FLAG_WRONLY;
333 		}
334 	} else {
335 		if (ex->e_flags == O_RDONLY) {
336 			g_gate_log(LOG_WARNING, "Read-write access requested, "
337 			    "but %s (%s) is exported read-only.", ex->e_path,
338 			    ipmask);
339 			return (EPERM);
340 		} else if (ex->e_flags == O_WRONLY) {
341 			g_gate_log(LOG_WARNING, "Read-write access requested, "
342 			    "but %s (%s) is exported write-only.", ex->e_path,
343 			    ipmask);
344 			return (EPERM);
345 		}
346 	}
347 	if ((conn->c_flags & GGATE_FLAG_RDONLY) != 0)
348 		flags = O_RDONLY;
349 	else if ((conn->c_flags & GGATE_FLAG_WRONLY) != 0)
350 		flags = O_WRONLY;
351 	else
352 		flags = O_RDWR;
353 	if (conn->c_diskfd != -1) {
354 		if (strcmp(conn->c_path, ex->e_path) != 0) {
355 			g_gate_log(LOG_ERR, "old %s and new %s: "
356 			    "Path mismatch during handshakes.",
357 			    conn->c_path, ex->e_path);
358 			return (EPERM);
359 		}
360 		return (0);
361 	}
362 
363 	conn->c_diskfd = open(ex->e_path, flags);
364 	if (conn->c_diskfd == -1) {
365 		error = errno;
366 		g_gate_log(LOG_ERR, "Cannot open %s: %s.", ex->e_path,
367 		    strerror(error));
368 		return (error);
369 	}
370 	return (0);
371 }
372 
373 static struct ggd_export *
374 exports_find(struct sockaddr *s, struct g_gate_cinit *cinit,
375     struct ggd_connection *conn)
376 {
377 	struct ggd_export *ex;
378 	in_addr_t ip;
379 	int error;
380 
381 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
382 	SLIST_FOREACH(ex, &exports, e_next) {
383 		if ((ip & ex->e_mask) != ex->e_ip) {
384 			g_gate_log(LOG_DEBUG, "exports[%s]: IP mismatch.",
385 			    ex->e_path);
386 			continue;
387 		}
388 		if (strcmp(cinit->gc_path, ex->e_path) != 0) {
389 			g_gate_log(LOG_DEBUG, "exports[%s]: Path mismatch.",
390 			    ex->e_path);
391 			continue;
392 		}
393 		error = exports_check(ex, cinit, conn);
394 		if (error == 0)
395 			return (ex);
396 		else {
397 			errno = error;
398 			return (NULL);
399 		}
400 	}
401 	g_gate_log(LOG_WARNING, "Unauthorized connection from: %s.",
402 	    ip2str(ip));
403 	errno = EPERM;
404 	return (NULL);
405 }
406 
407 /*
408  * Remove timed out connections.
409  */
410 static void
411 connection_cleanups(void)
412 {
413 	struct ggd_connection *conn, *tconn;
414 	time_t now;
415 
416 	time(&now);
417 	LIST_FOREACH_SAFE(conn, &connections, c_next, tconn) {
418 		if (now - conn->c_birthtime > 10) {
419 			LIST_REMOVE(conn, c_next);
420 			g_gate_log(LOG_NOTICE,
421 			    "Connection from %s [%s] removed.",
422 			    ip2str(conn->c_srcip), conn->c_path);
423 			close(conn->c_diskfd);
424 			close(conn->c_sendfd);
425 			close(conn->c_recvfd);
426 			free(conn->c_path);
427 			free(conn);
428 		}
429 	}
430 }
431 
432 static struct ggd_connection *
433 connection_find(struct g_gate_cinit *cinit)
434 {
435 	struct ggd_connection *conn;
436 
437 	LIST_FOREACH(conn, &connections, c_next) {
438 		if (conn->c_token == cinit->gc_token)
439 			break;
440 	}
441 	return (conn);
442 }
443 
444 static struct ggd_connection *
445 connection_new(struct g_gate_cinit *cinit, struct sockaddr *s, int sfd)
446 {
447 	struct ggd_connection *conn;
448 	in_addr_t ip;
449 
450 	/*
451 	 * First, look for old connections.
452 	 * We probably should do it every X seconds, but what for?
453 	 * It is only dangerous if an attacker wants to overload connections
454 	 * queue, so here is a good place to do the cleanups.
455 	 */
456 	connection_cleanups();
457 
458 	conn = malloc(sizeof(*conn));
459 	if (conn == NULL)
460 		return (NULL);
461 	conn->c_path = strdup(cinit->gc_path);
462 	if (conn->c_path == NULL) {
463 		free(conn);
464 		return (NULL);
465 	}
466 	conn->c_token = cinit->gc_token;
467 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
468 	conn->c_srcip = ip;
469 	conn->c_diskfd = conn->c_sendfd = conn->c_recvfd = -1;
470 	if ((cinit->gc_flags & GGATE_FLAG_SEND) != 0)
471 		conn->c_sendfd = sfd;
472 	else
473 		conn->c_recvfd = sfd;
474 	conn->c_mediasize = 0;
475 	conn->c_sectorsize = 0;
476 	time(&conn->c_birthtime);
477 	conn->c_flags = cinit->gc_flags;
478 	LIST_INSERT_HEAD(&connections, conn, c_next);
479 	g_gate_log(LOG_DEBUG, "Connection created [%s, %s].", ip2str(ip),
480 	    conn->c_path);
481 	return (conn);
482 }
483 
484 static int
485 connection_add(struct ggd_connection *conn, struct g_gate_cinit *cinit,
486     struct sockaddr *s, int sfd)
487 {
488 	in_addr_t ip;
489 
490 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
491 	if ((cinit->gc_flags & GGATE_FLAG_SEND) != 0) {
492 		if (conn->c_sendfd != -1) {
493 			g_gate_log(LOG_WARNING,
494 			    "Send socket already exists [%s, %s].", ip2str(ip),
495 			    conn->c_path);
496 			return (EEXIST);
497 		}
498 		conn->c_sendfd = sfd;
499 	} else {
500 		if (conn->c_recvfd != -1) {
501 			g_gate_log(LOG_WARNING,
502 			    "Receive socket already exists [%s, %s].",
503 			    ip2str(ip), conn->c_path);
504 			return (EEXIST);
505 		}
506 		conn->c_recvfd = sfd;
507 	}
508 	g_gate_log(LOG_DEBUG, "Connection added [%s, %s].", ip2str(ip),
509 	    conn->c_path);
510 	return (0);
511 }
512 
513 /*
514  * Remove one socket from the given connection or the whole
515  * connection if sfd == -1.
516  */
517 static void
518 connection_remove(struct ggd_connection *conn)
519 {
520 
521 	LIST_REMOVE(conn, c_next);
522 	g_gate_log(LOG_DEBUG, "Connection removed [%s %s].",
523 	    ip2str(conn->c_srcip), conn->c_path);
524 	if (conn->c_diskfd != -1)
525 		close(conn->c_diskfd);
526 	if (conn->c_sendfd != -1)
527 		close(conn->c_sendfd);
528 	if (conn->c_recvfd != -1)
529 		close(conn->c_recvfd);
530 	free(conn->c_path);
531 	free(conn);
532 }
533 
534 static int
535 connection_ready(struct ggd_connection *conn)
536 {
537 
538 	return (conn->c_sendfd != -1 && conn->c_recvfd != -1);
539 }
540 
541 static void
542 connection_launch(struct ggd_connection *conn)
543 {
544 	pthread_t td;
545 	int error, pid;
546 
547 	pid = fork();
548 	if (pid > 0)
549 		return;
550 	else if (pid == -1) {
551 		g_gate_log(LOG_ERR, "Cannot fork: %s.", strerror(errno));
552 		return;
553 	}
554 	g_gate_log(LOG_DEBUG, "Process created [%s].", conn->c_path);
555 
556 	/*
557 	 * Create condition variables and mutexes for in-queue and out-queue
558 	 * synchronization.
559 	 */
560 	error = pthread_mutex_init(&inqueue_mtx, NULL);
561 	if (error != 0) {
562 		g_gate_xlog("pthread_mutex_init(inqueue_mtx): %s.",
563 		    strerror(error));
564 	}
565 	error = pthread_cond_init(&inqueue_cond, NULL);
566 	if (error != 0) {
567 		g_gate_xlog("pthread_cond_init(inqueue_cond): %s.",
568 		    strerror(error));
569 	}
570 	error = pthread_mutex_init(&outqueue_mtx, NULL);
571 	if (error != 0) {
572 		g_gate_xlog("pthread_mutex_init(outqueue_mtx): %s.",
573 		    strerror(error));
574 	}
575 	error = pthread_cond_init(&outqueue_cond, NULL);
576 	if (error != 0) {
577 		g_gate_xlog("pthread_cond_init(outqueue_cond): %s.",
578 		    strerror(error));
579 	}
580 
581 	/*
582 	 * Create threads:
583 	 * recvtd - thread for receiving I/O request
584 	 * diskio - thread for doing I/O request
585 	 * sendtd - thread for sending I/O requests back
586 	 */
587 	error = pthread_create(&td, NULL, send_thread, conn);
588 	if (error != 0) {
589 		g_gate_xlog("pthread_create(send_thread): %s.",
590 		    strerror(error));
591 	}
592 	error = pthread_create(&td, NULL, recv_thread, conn);
593 	if (error != 0) {
594 		g_gate_xlog("pthread_create(recv_thread): %s.",
595 		    strerror(error));
596 	}
597 	disk_thread(conn);
598 }
599 
600 static void
601 sendfail(int sfd, int error, const char *fmt, ...)
602 {
603 	struct g_gate_sinit sinit;
604 	va_list ap;
605 	ssize_t data;
606 
607 	memset(&sinit, 0, sizeof(sinit));
608 	sinit.gs_error = error;
609 	g_gate_swap2n_sinit(&sinit);
610 	data = g_gate_send(sfd, &sinit, sizeof(sinit), 0);
611 	g_gate_swap2h_sinit(&sinit);
612 	if (data != sizeof(sinit)) {
613 		g_gate_log(LOG_WARNING, "Cannot send initial packet: %s.",
614 		    strerror(errno));
615 		return;
616 	}
617 	if (fmt != NULL) {
618 		va_start(ap, fmt);
619 		g_gate_vlog(LOG_WARNING, fmt, ap);
620 		va_end(ap);
621 	}
622 }
623 
624 static void *
625 malloc_waitok(size_t size)
626 {
627 	void *p;
628 
629 	while ((p = malloc(size)) == NULL) {
630 		g_gate_log(LOG_DEBUG, "Cannot allocate %zu bytes.", size);
631 		sleep(1);
632 	}
633 	return (p);
634 }
635 
636 static void *
637 recv_thread(void *arg)
638 {
639 	struct ggd_connection *conn;
640 	struct ggd_request *req;
641 	ssize_t data;
642 	int error, fd;
643 
644 	conn = arg;
645 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
646 	fd = conn->c_recvfd;
647 	for (;;) {
648 		/*
649 		 * Get header packet.
650 		 */
651 		req = malloc_waitok(sizeof(*req));
652 		data = g_gate_recv(fd, &req->r_hdr, sizeof(req->r_hdr),
653 		    MSG_WAITALL);
654 		if (data == 0) {
655 			g_gate_log(LOG_DEBUG, "Process %u exiting.", getpid());
656 			exit(EXIT_SUCCESS);
657 		} else if (data == -1) {
658 			g_gate_xlog("Error while receiving hdr packet: %s.",
659 			    strerror(errno));
660 		} else if (data != sizeof(req->r_hdr)) {
661 			g_gate_xlog("Malformed hdr packet received.");
662 		}
663 		g_gate_log(LOG_DEBUG, "Received hdr packet.");
664 		g_gate_swap2h_hdr(&req->r_hdr);
665 
666 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
667 		    __func__, req->r_offset, req->r_length);
668 
669 		/*
670 		 * Allocate memory for data.
671 		 */
672 		req->r_data = malloc_waitok(req->r_length);
673 
674 		/*
675 		 * Receive data to write for WRITE request.
676 		 */
677 		if (req->r_cmd == GGATE_CMD_WRITE) {
678 			g_gate_log(LOG_DEBUG, "Waiting for %u bytes of data...",
679 			    req->r_length);
680 			data = g_gate_recv(fd, req->r_data, req->r_length,
681 			    MSG_WAITALL);
682 			if (data == -1) {
683 				g_gate_xlog("Error while receiving data: %s.",
684 				    strerror(errno));
685 			}
686 		}
687 
688 		/*
689 		 * Put the request onto the incoming queue.
690 		 */
691 		error = pthread_mutex_lock(&inqueue_mtx);
692 		assert(error == 0);
693 		TAILQ_INSERT_TAIL(&inqueue, req, r_next);
694 		error = pthread_cond_signal(&inqueue_cond);
695 		assert(error == 0);
696 		error = pthread_mutex_unlock(&inqueue_mtx);
697 		assert(error == 0);
698 	}
699 }
700 
701 static void *
702 disk_thread(void *arg)
703 {
704 	struct ggd_connection *conn;
705 	struct ggd_request *req;
706 	ssize_t data;
707 	int error, fd;
708 
709 	conn = arg;
710 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
711 	fd = conn->c_diskfd;
712 	for (;;) {
713 		/*
714 		 * Get a request from the incoming queue.
715 		 */
716 		error = pthread_mutex_lock(&inqueue_mtx);
717 		assert(error == 0);
718 		while ((req = TAILQ_FIRST(&inqueue)) == NULL) {
719 			error = pthread_cond_wait(&inqueue_cond, &inqueue_mtx);
720 			assert(error == 0);
721 		}
722 		TAILQ_REMOVE(&inqueue, req, r_next);
723 		error = pthread_mutex_unlock(&inqueue_mtx);
724 		assert(error == 0);
725 
726 		/*
727 		 * Check the request.
728 		 */
729 		assert(req->r_cmd == GGATE_CMD_READ || req->r_cmd == GGATE_CMD_WRITE);
730 		assert(req->r_offset + req->r_length <= (uintmax_t)conn->c_mediasize);
731 		assert((req->r_offset % conn->c_sectorsize) == 0);
732 		assert((req->r_length % conn->c_sectorsize) == 0);
733 
734 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
735 		     __func__, req->r_offset, req->r_length);
736 
737 		/*
738 		 * Do the request.
739 		 */
740 		data = 0;
741 		switch (req->r_cmd) {
742 		case GGATE_CMD_READ:
743 			data = pread(fd, req->r_data, req->r_length,
744 			    req->r_offset);
745 			break;
746 		case GGATE_CMD_WRITE:
747 			data = pwrite(fd, req->r_data, req->r_length,
748 			    req->r_offset);
749 			/* Free data memory here - better sooner. */
750 			free(req->r_data);
751 			req->r_data = NULL;
752 			break;
753 		}
754 		if (data != (ssize_t)req->r_length) {
755 			/* Report short reads/writes as I/O errors. */
756 			if (errno == 0)
757 				errno = EIO;
758 			g_gate_log(LOG_ERR, "Disk error: %s", strerror(errno));
759 			req->r_error = errno;
760 			if (req->r_data != NULL) {
761 				free(req->r_data);
762 				req->r_data = NULL;
763 			}
764 		}
765 
766 		/*
767 		 * Put the request onto the outgoing queue.
768 		 */
769 		error = pthread_mutex_lock(&outqueue_mtx);
770 		assert(error == 0);
771 		TAILQ_INSERT_TAIL(&outqueue, req, r_next);
772 		error = pthread_cond_signal(&outqueue_cond);
773 		assert(error == 0);
774 		error = pthread_mutex_unlock(&outqueue_mtx);
775 		assert(error == 0);
776 	}
777 
778 	/* NOTREACHED */
779 	return (NULL);
780 }
781 
782 static void *
783 send_thread(void *arg)
784 {
785 	struct ggd_connection *conn;
786 	struct ggd_request *req;
787 	ssize_t data;
788 	int error, fd;
789 
790 	conn = arg;
791 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
792 	fd = conn->c_sendfd;
793 	for (;;) {
794 		/*
795 		 * Get a request from the outgoing queue.
796 		 */
797 		error = pthread_mutex_lock(&outqueue_mtx);
798 		assert(error == 0);
799 		while ((req = TAILQ_FIRST(&outqueue)) == NULL) {
800 			error = pthread_cond_wait(&outqueue_cond,
801 			    &outqueue_mtx);
802 			assert(error == 0);
803 		}
804 		TAILQ_REMOVE(&outqueue, req, r_next);
805 		error = pthread_mutex_unlock(&outqueue_mtx);
806 		assert(error == 0);
807 
808 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
809 		    __func__, req->r_offset, req->r_length);
810 
811 		/*
812 		 * Send the request.
813 		 */
814 		g_gate_swap2n_hdr(&req->r_hdr);
815 		if (g_gate_send(fd, &req->r_hdr, sizeof(req->r_hdr), 0) == -1) {
816 			g_gate_xlog("Error while sending hdr packet: %s.",
817 			    strerror(errno));
818 		}
819 		g_gate_log(LOG_DEBUG, "Sent hdr packet.");
820 		g_gate_swap2h_hdr(&req->r_hdr);
821 		if (req->r_data != NULL) {
822 			data = g_gate_send(fd, req->r_data, req->r_length, 0);
823 			if (data != (ssize_t)req->r_length) {
824 				g_gate_xlog("Error while sending data: %s.",
825 				    strerror(errno));
826 			}
827 			g_gate_log(LOG_DEBUG,
828 			    "Sent %zd bytes (offset=%" PRIu64 ", size=%" PRIu32
829 			    ").", data, req->r_offset, req->r_length);
830 			free(req->r_data);
831 		}
832 		free(req);
833 	}
834 
835 	/* NOTREACHED */
836 	return (NULL);
837 }
838 
839 static void
840 log_connection(struct sockaddr *from)
841 {
842 	in_addr_t ip;
843 
844 	ip = htonl(((struct sockaddr_in *)(void *)from)->sin_addr.s_addr);
845 	g_gate_log(LOG_INFO, "Connection from: %s.", ip2str(ip));
846 }
847 
848 static int
849 handshake(struct sockaddr *from, int sfd)
850 {
851 	struct g_gate_version ver;
852 	struct g_gate_cinit cinit;
853 	struct g_gate_sinit sinit;
854 	struct ggd_connection *conn;
855 	struct ggd_export *ex;
856 	ssize_t data;
857 
858 	log_connection(from);
859 	/*
860 	 * Phase 1: Version verification.
861 	 */
862 	g_gate_log(LOG_DEBUG, "Receiving version packet.");
863 	data = g_gate_recv(sfd, &ver, sizeof(ver), MSG_WAITALL);
864 	g_gate_swap2h_version(&ver);
865 	if (data != sizeof(ver)) {
866 		g_gate_log(LOG_WARNING, "Malformed version packet.");
867 		return (0);
868 	}
869 	g_gate_log(LOG_DEBUG, "Version packet received.");
870 	if (memcmp(ver.gv_magic, GGATE_MAGIC, strlen(GGATE_MAGIC)) != 0) {
871 		g_gate_log(LOG_WARNING, "Invalid magic field.");
872 		return (0);
873 	}
874 	if (ver.gv_version != GGATE_VERSION) {
875 		g_gate_log(LOG_WARNING, "Version %u is not supported.",
876 		    ver.gv_version);
877 		return (0);
878 	}
879 	ver.gv_error = 0;
880 	g_gate_swap2n_version(&ver);
881 	data = g_gate_send(sfd, &ver, sizeof(ver), 0);
882 	g_gate_swap2h_version(&ver);
883 	if (data == -1) {
884 		sendfail(sfd, errno, "Error while sending version packet: %s.",
885 		    strerror(errno));
886 		return (0);
887 	}
888 
889 	/*
890 	 * Phase 2: Request verification.
891 	 */
892 	g_gate_log(LOG_DEBUG, "Receiving initial packet.");
893 	data = g_gate_recv(sfd, &cinit, sizeof(cinit), MSG_WAITALL);
894 	g_gate_swap2h_cinit(&cinit);
895 	if (data != sizeof(cinit)) {
896 		g_gate_log(LOG_WARNING, "Malformed initial packet.");
897 		return (0);
898 	}
899 	g_gate_log(LOG_DEBUG, "Initial packet received.");
900 	conn = connection_find(&cinit);
901 	if (conn != NULL) {
902 		/*
903 		 * Connection should already exists.
904 		 */
905 		g_gate_log(LOG_DEBUG, "Found existing connection (token=%lu).",
906 		    (unsigned long)conn->c_token);
907 		if (connection_add(conn, &cinit, from, sfd) == -1) {
908 			connection_remove(conn);
909 			return (0);
910 		}
911 	} else {
912 		/*
913 		 * New connection, allocate space.
914 		 */
915 		conn = connection_new(&cinit, from, sfd);
916 		if (conn == NULL) {
917 			sendfail(sfd, ENOMEM,
918 			    "Cannot allocate new connection.");
919 			return (0);
920 		}
921 		g_gate_log(LOG_DEBUG, "New connection created (token=%lu).",
922 		    (unsigned long)conn->c_token);
923 	}
924 
925 	ex = exports_find(from, &cinit, conn);
926 	if (ex == NULL) {
927 		sendfail(sfd, errno, NULL);
928 		connection_remove(conn);
929 		return (0);
930 	}
931 	if (conn->c_mediasize == 0) {
932 		conn->c_mediasize = g_gate_mediasize(conn->c_diskfd);
933 		conn->c_sectorsize = g_gate_sectorsize(conn->c_diskfd);
934 	}
935 	sinit.gs_mediasize = conn->c_mediasize;
936 	sinit.gs_sectorsize = conn->c_sectorsize;
937 	sinit.gs_error = 0;
938 
939 	g_gate_log(LOG_DEBUG, "Sending initial packet.");
940 
941 	g_gate_swap2n_sinit(&sinit);
942 	data = g_gate_send(sfd, &sinit, sizeof(sinit), 0);
943 	g_gate_swap2h_sinit(&sinit);
944 	if (data == -1) {
945 		sendfail(sfd, errno, "Error while sending initial packet: %s.",
946 		    strerror(errno));
947 		return (0);
948 	}
949 
950 	if (connection_ready(conn)) {
951 		connection_launch(conn);
952 		connection_remove(conn);
953 	}
954 	return (1);
955 }
956 
957 static void
958 huphandler(int sig __unused)
959 {
960 
961 	got_sighup = 1;
962 }
963 
964 int
965 main(int argc, char *argv[])
966 {
967 	const char *ggated_pidfile = _PATH_VARRUN "/ggated.pid";
968 	struct pidfh *pfh;
969 	struct sockaddr_in serv;
970 	struct sockaddr from;
971 	socklen_t fromlen;
972 	pid_t otherpid;
973 	int ch, sfd, tmpsfd;
974 	unsigned port;
975 
976 	bindaddr = htonl(INADDR_ANY);
977 	port = G_GATE_PORT;
978 	while ((ch = getopt(argc, argv, "a:hnp:F:R:S:v")) != -1) {
979 		switch (ch) {
980 		case 'a':
981 			bindaddr = g_gate_str2ip(optarg);
982 			if (bindaddr == INADDR_NONE) {
983 				errx(EXIT_FAILURE,
984 				    "Invalid IP/host name to bind to.");
985 			}
986 			break;
987 		case 'F':
988 			ggated_pidfile = optarg;
989 			break;
990 		case 'n':
991 			nagle = 0;
992 			break;
993 		case 'p':
994 			errno = 0;
995 			port = strtoul(optarg, NULL, 10);
996 			if (port == 0 && errno != 0)
997 				errx(EXIT_FAILURE, "Invalid port.");
998 			break;
999 		case 'R':
1000 			errno = 0;
1001 			rcvbuf = strtoul(optarg, NULL, 10);
1002 			if (rcvbuf == 0 && errno != 0)
1003 				errx(EXIT_FAILURE, "Invalid rcvbuf.");
1004 			break;
1005 		case 'S':
1006 			errno = 0;
1007 			sndbuf = strtoul(optarg, NULL, 10);
1008 			if (sndbuf == 0 && errno != 0)
1009 				errx(EXIT_FAILURE, "Invalid sndbuf.");
1010 			break;
1011 		case 'v':
1012 			g_gate_verbose++;
1013 			break;
1014 		case 'h':
1015 		default:
1016 			usage();
1017 		}
1018 	}
1019 	argc -= optind;
1020 	argv += optind;
1021 
1022 	if (argv[0] != NULL)
1023 		exports_file = argv[0];
1024 	exports_get();
1025 
1026 	pfh = pidfile_open(ggated_pidfile, 0600, &otherpid);
1027 	if (pfh == NULL) {
1028 		if (errno == EEXIST) {
1029 			errx(EXIT_FAILURE, "Daemon already running, pid: %jd.",
1030 			    (intmax_t)otherpid);
1031 		}
1032 		err(EXIT_FAILURE, "Cannot open/create pidfile");
1033 	}
1034 
1035 	if (!g_gate_verbose) {
1036 		/* Run in daemon mode. */
1037 		if (daemon(0, 0) == -1)
1038 			g_gate_xlog("Cannot daemonize: %s", strerror(errno));
1039 	}
1040 
1041 	pidfile_write(pfh);
1042 
1043 	signal(SIGCHLD, SIG_IGN);
1044 
1045 	sfd = socket(AF_INET, SOCK_STREAM, 0);
1046 	if (sfd == -1)
1047 		g_gate_xlog("Cannot open stream socket: %s.", strerror(errno));
1048 	bzero(&serv, sizeof(serv));
1049 	serv.sin_family = AF_INET;
1050 	serv.sin_addr.s_addr = bindaddr;
1051 	serv.sin_port = htons(port);
1052 
1053 	g_gate_socket_settings(sfd);
1054 
1055 	if (bind(sfd, (struct sockaddr *)&serv, sizeof(serv)) == -1)
1056 		g_gate_xlog("bind(): %s.", strerror(errno));
1057 	if (listen(sfd, 5) == -1)
1058 		g_gate_xlog("listen(): %s.", strerror(errno));
1059 
1060 	g_gate_log(LOG_INFO, "Listen on port: %d.", port);
1061 
1062 	signal(SIGHUP, huphandler);
1063 
1064 	for (;;) {
1065 		fromlen = sizeof(from);
1066 		tmpsfd = accept(sfd, &from, &fromlen);
1067 		if (tmpsfd == -1)
1068 			g_gate_xlog("accept(): %s.", strerror(errno));
1069 
1070 		if (got_sighup) {
1071 			got_sighup = 0;
1072 			exports_get();
1073 		}
1074 
1075 		if (!handshake(&from, tmpsfd))
1076 			close(tmpsfd);
1077 	}
1078 	close(sfd);
1079 	pidfile_remove(pfh);
1080 	exit(EXIT_SUCCESS);
1081 }
1082