xref: /freebsd/sbin/ggate/ggated/ggated.c (revision 23f6875a43f7ce365f2d52cf857da010c47fb03b)
1 /*-
2  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/param.h>
30 #include <sys/bio.h>
31 #include <sys/disk.h>
32 #include <sys/endian.h>
33 #include <sys/ioctl.h>
34 #include <sys/queue.h>
35 #include <sys/socket.h>
36 #include <sys/stat.h>
37 #include <sys/time.h>
38 #include <arpa/inet.h>
39 #include <netinet/in.h>
40 #include <netinet/tcp.h>
41 #include <assert.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <libgen.h>
46 #include <libutil.h>
47 #include <paths.h>
48 #include <pthread.h>
49 #include <signal.h>
50 #include <stdarg.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <stdint.h>
54 #include <string.h>
55 #include <syslog.h>
56 #include <unistd.h>
57 
58 #include "ggate.h"
59 
60 
61 #define	GGATED_EXPORT_FILE	"/etc/gg.exports"
62 
63 struct ggd_connection {
64 	off_t		 c_mediasize;
65 	unsigned	 c_sectorsize;
66 	unsigned	 c_flags;	/* flags (RO/RW) */
67 	int		 c_diskfd;
68 	int		 c_sendfd;
69 	int		 c_recvfd;
70 	time_t		 c_birthtime;
71 	char		*c_path;
72 	uint64_t	 c_token;
73 	in_addr_t	 c_srcip;
74 	LIST_ENTRY(ggd_connection) c_next;
75 };
76 
77 struct ggd_request {
78 	struct g_gate_hdr	 r_hdr;
79 	char			*r_data;
80 	TAILQ_ENTRY(ggd_request) r_next;
81 };
82 #define	r_cmd		r_hdr.gh_cmd
83 #define	r_offset	r_hdr.gh_offset
84 #define	r_length	r_hdr.gh_length
85 #define	r_error		r_hdr.gh_error
86 
87 struct ggd_export {
88 	char		*e_path;	/* path to device/file */
89 	in_addr_t	 e_ip;		/* remote IP address */
90 	in_addr_t	 e_mask;	/* IP mask */
91 	unsigned	 e_flags;	/* flags (RO/RW) */
92 	SLIST_ENTRY(ggd_export) e_next;
93 };
94 
95 static const char *exports_file = GGATED_EXPORT_FILE;
96 static int got_sighup = 0;
97 static in_addr_t bindaddr;
98 
99 static TAILQ_HEAD(, ggd_request) inqueue = TAILQ_HEAD_INITIALIZER(inqueue);
100 static TAILQ_HEAD(, ggd_request) outqueue = TAILQ_HEAD_INITIALIZER(outqueue);
101 static pthread_mutex_t inqueue_mtx, outqueue_mtx;
102 static pthread_cond_t inqueue_cond, outqueue_cond;
103 
104 static SLIST_HEAD(, ggd_export) exports = SLIST_HEAD_INITIALIZER(exports);
105 static LIST_HEAD(, ggd_connection) connections = LIST_HEAD_INITIALIZER(connections);
106 
107 static void *recv_thread(void *arg);
108 static void *disk_thread(void *arg);
109 static void *send_thread(void *arg);
110 
111 static void
112 usage(void)
113 {
114 
115 	fprintf(stderr, "usage: %s [-nv] [-a address] [-F pidfile] [-p port] "
116 	    "[-R rcvbuf] [-S sndbuf] [exports file]\n", getprogname());
117 	exit(EXIT_FAILURE);
118 }
119 
120 static char *
121 ip2str(in_addr_t ip)
122 {
123 	static char sip[16];
124 
125 	snprintf(sip, sizeof(sip), "%u.%u.%u.%u",
126 	    ((ip >> 24) & 0xff),
127 	    ((ip >> 16) & 0xff),
128 	    ((ip >> 8) & 0xff),
129 	    (ip & 0xff));
130 	return (sip);
131 }
132 
133 static in_addr_t
134 countmask(unsigned m)
135 {
136 	in_addr_t mask;
137 
138 	if (m == 0) {
139 		mask = 0x0;
140 	} else {
141 		mask = 1 << (32 - m);
142 		mask--;
143 		mask = ~mask;
144 	}
145 	return (mask);
146 }
147 
148 static void
149 line_parse(char *line, unsigned lineno)
150 {
151 	struct ggd_export *ex;
152 	char *word, *path, *sflags;
153 	unsigned flags, i, vmask;
154 	in_addr_t ip, mask;
155 
156 	ip = mask = flags = vmask = 0;
157 	path = NULL;
158 	sflags = NULL;
159 
160 	for (i = 0, word = strtok(line, " \t"); word != NULL;
161 	    i++, word = strtok(NULL, " \t")) {
162 		switch (i) {
163 		case 0: /* IP address or host name */
164 			ip = g_gate_str2ip(strsep(&word, "/"));
165 			if (ip == INADDR_NONE) {
166 				g_gate_xlog("Invalid IP/host name at line %u.",
167 				    lineno);
168 			}
169 			ip = ntohl(ip);
170 			if (word == NULL)
171 				vmask = 32;
172 			else {
173 				errno = 0;
174 				vmask = strtoul(word, NULL, 10);
175 				if (vmask == 0 && errno != 0) {
176 					g_gate_xlog("Invalid IP mask value at "
177 					    "line %u.", lineno);
178 				}
179 				if ((unsigned)vmask > 32) {
180 					g_gate_xlog("Invalid IP mask value at line %u.",
181 					    lineno);
182 				}
183 			}
184 			mask = countmask(vmask);
185 			break;
186 		case 1:	/* flags */
187 			if (strcasecmp("rd", word) == 0 ||
188 			    strcasecmp("ro", word) == 0) {
189 				flags = O_RDONLY;
190 			} else if (strcasecmp("wo", word) == 0) {
191 				flags = O_WRONLY;
192 			} else if (strcasecmp("rw", word) == 0) {
193 				flags = O_RDWR;
194 			} else {
195 				g_gate_xlog("Invalid value in flags field at "
196 				    "line %u.", lineno);
197 			}
198 			sflags = word;
199 			break;
200 		case 2:	/* path */
201 			if (strlen(word) >= MAXPATHLEN) {
202 				g_gate_xlog("Path too long at line %u. ",
203 				    lineno);
204 			}
205 			path = word;
206 			break;
207 		default:
208 			g_gate_xlog("Too many arguments at line %u. ", lineno);
209 		}
210 	}
211 	if (i != 3)
212 		g_gate_xlog("Too few arguments at line %u.", lineno);
213 
214 	ex = malloc(sizeof(*ex));
215 	if (ex == NULL)
216 		g_gate_xlog("Not enough memory.");
217 	ex->e_path = strdup(path);
218 	if (ex->e_path == NULL)
219 		g_gate_xlog("Not enough memory.");
220 
221 	/* Made 'and' here. */
222 	ex->e_ip = (ip & mask);
223 	ex->e_mask = mask;
224 	ex->e_flags = flags;
225 
226 	SLIST_INSERT_HEAD(&exports, ex, e_next);
227 
228 	g_gate_log(LOG_DEBUG, "Added %s/%u %s %s to exports list.",
229 	    ip2str(ex->e_ip), vmask, path, sflags);
230 }
231 
232 static void
233 exports_clear(void)
234 {
235 	struct ggd_export *ex;
236 
237 	while (!SLIST_EMPTY(&exports)) {
238 		ex = SLIST_FIRST(&exports);
239 		SLIST_REMOVE_HEAD(&exports, e_next);
240 		free(ex);
241 	}
242 }
243 
244 #define	EXPORTS_LINE_SIZE	2048
245 static void
246 exports_get(void)
247 {
248 	char buf[EXPORTS_LINE_SIZE], *line;
249 	unsigned lineno = 0, objs = 0, len;
250 	FILE *fd;
251 
252 	exports_clear();
253 
254 	fd = fopen(exports_file, "r");
255 	if (fd == NULL) {
256 		g_gate_xlog("Cannot open exports file (%s): %s.", exports_file,
257 		    strerror(errno));
258 	}
259 
260 	g_gate_log(LOG_INFO, "Reading exports file (%s).", exports_file);
261 
262 	for (;;) {
263 		if (fgets(buf, sizeof(buf), fd) == NULL) {
264 			if (feof(fd))
265 				break;
266 
267 			g_gate_xlog("Error while reading exports file: %s.",
268 			    strerror(errno));
269 		}
270 
271 		/* Increase line count. */
272 		lineno++;
273 
274 		/* Skip spaces and tabs. */
275 		for (line = buf; *line == ' ' || *line == '\t'; ++line)
276 			;
277 
278 		/* Empty line, comment or empty line at the end of file. */
279 		if (*line == '\n' || *line == '#' || *line == '\0')
280 			continue;
281 
282 		len = strlen(line);
283 		if (line[len - 1] == '\n') {
284 			/* Remove new line char. */
285 			line[len - 1] = '\0';
286 		} else {
287 			if (!feof(fd))
288 				g_gate_xlog("Line %u too long.", lineno);
289 		}
290 
291 		line_parse(line, lineno);
292 		objs++;
293 	}
294 
295 	fclose(fd);
296 
297 	if (objs == 0)
298 		g_gate_xlog("There are no objects to export.");
299 
300 	g_gate_log(LOG_INFO, "Exporting %u object(s).", objs);
301 }
302 
303 static int
304 exports_check(struct ggd_export *ex, struct g_gate_cinit *cinit,
305     struct ggd_connection *conn)
306 {
307 	char ipmask[32]; /* 32 == strlen("xxx.xxx.xxx.xxx/xxx.xxx.xxx.xxx")+1 */
308 	int error = 0, flags;
309 
310 	strlcpy(ipmask, ip2str(ex->e_ip), sizeof(ipmask));
311 	strlcat(ipmask, "/", sizeof(ipmask));
312 	strlcat(ipmask, ip2str(ex->e_mask), sizeof(ipmask));
313 	if ((cinit->gc_flags & GGATE_FLAG_RDONLY) != 0) {
314 		if (ex->e_flags == O_WRONLY) {
315 			g_gate_log(LOG_WARNING, "Read-only access requested, "
316 			    "but %s (%s) is exported write-only.", ex->e_path,
317 			    ipmask);
318 			return (EPERM);
319 		} else {
320 			conn->c_flags |= GGATE_FLAG_RDONLY;
321 		}
322 	} else if ((cinit->gc_flags & GGATE_FLAG_WRONLY) != 0) {
323 		if (ex->e_flags == O_RDONLY) {
324 			g_gate_log(LOG_WARNING, "Write-only access requested, "
325 			    "but %s (%s) is exported read-only.", ex->e_path,
326 			    ipmask);
327 			return (EPERM);
328 		} else {
329 			conn->c_flags |= GGATE_FLAG_WRONLY;
330 		}
331 	} else {
332 		if (ex->e_flags == O_RDONLY) {
333 			g_gate_log(LOG_WARNING, "Read-write access requested, "
334 			    "but %s (%s) is exported read-only.", ex->e_path,
335 			    ipmask);
336 			return (EPERM);
337 		} else if (ex->e_flags == O_WRONLY) {
338 			g_gate_log(LOG_WARNING, "Read-write access requested, "
339 			    "but %s (%s) is exported write-only.", ex->e_path,
340 			    ipmask);
341 			return (EPERM);
342 		}
343 	}
344 	if ((conn->c_flags & GGATE_FLAG_RDONLY) != 0)
345 		flags = O_RDONLY;
346 	else if ((conn->c_flags & GGATE_FLAG_WRONLY) != 0)
347 		flags = O_WRONLY;
348 	else
349 		flags = O_RDWR;
350 	conn->c_diskfd = open(ex->e_path, flags);
351 	if (conn->c_diskfd == -1) {
352 		error = errno;
353 		g_gate_log(LOG_ERR, "Cannot open %s: %s.", ex->e_path,
354 		    strerror(error));
355 		return (error);
356 	}
357 	return (0);
358 }
359 
360 static struct ggd_export *
361 exports_find(struct sockaddr *s, struct g_gate_cinit *cinit,
362     struct ggd_connection *conn)
363 {
364 	struct ggd_export *ex;
365 	in_addr_t ip;
366 	int error;
367 
368 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
369 	SLIST_FOREACH(ex, &exports, e_next) {
370 		if ((ip & ex->e_mask) != ex->e_ip) {
371 			g_gate_log(LOG_DEBUG, "exports[%s]: IP mismatch.",
372 			    ex->e_path);
373 			continue;
374 		}
375 		if (strcmp(cinit->gc_path, ex->e_path) != 0) {
376 			g_gate_log(LOG_DEBUG, "exports[%s]: Path mismatch.",
377 			    ex->e_path);
378 			continue;
379 		}
380 		error = exports_check(ex, cinit, conn);
381 		if (error == 0)
382 			return (ex);
383 		else {
384 			errno = error;
385 			return (NULL);
386 		}
387 	}
388 	g_gate_log(LOG_WARNING, "Unauthorized connection from: %s.",
389 	    ip2str(ip));
390 	errno = EPERM;
391 	return (NULL);
392 }
393 
394 /*
395  * Remove timed out connections.
396  */
397 static void
398 connection_cleanups(void)
399 {
400 	struct ggd_connection *conn, *tconn;
401 	time_t now;
402 
403 	time(&now);
404 	LIST_FOREACH_SAFE(conn, &connections, c_next, tconn) {
405 		if (now - conn->c_birthtime > 10) {
406 			LIST_REMOVE(conn, c_next);
407 			g_gate_log(LOG_NOTICE,
408 			    "Connection from %s [%s] removed.",
409 			    ip2str(conn->c_srcip), conn->c_path);
410 			close(conn->c_diskfd);
411 			close(conn->c_sendfd);
412 			close(conn->c_recvfd);
413 			free(conn->c_path);
414 			free(conn);
415 		}
416 	}
417 }
418 
419 static struct ggd_connection *
420 connection_find(struct g_gate_cinit *cinit)
421 {
422 	struct ggd_connection *conn;
423 
424 	LIST_FOREACH(conn, &connections, c_next) {
425 		if (conn->c_token == cinit->gc_token)
426 			break;
427 	}
428 	return (conn);
429 }
430 
431 static struct ggd_connection *
432 connection_new(struct g_gate_cinit *cinit, struct sockaddr *s, int sfd)
433 {
434 	struct ggd_connection *conn;
435 	in_addr_t ip;
436 
437 	/*
438 	 * First, look for old connections.
439 	 * We probably should do it every X seconds, but what for?
440 	 * It is only dangerous if an attacker wants to overload connections
441 	 * queue, so here is a good place to do the cleanups.
442 	 */
443 	connection_cleanups();
444 
445 	conn = malloc(sizeof(*conn));
446 	if (conn == NULL)
447 		return (NULL);
448 	conn->c_path = strdup(cinit->gc_path);
449 	if (conn->c_path == NULL) {
450 		free(conn);
451 		return (NULL);
452 	}
453 	conn->c_token = cinit->gc_token;
454 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
455 	conn->c_srcip = ip;
456 	conn->c_sendfd = conn->c_recvfd = -1;
457 	if ((cinit->gc_flags & GGATE_FLAG_SEND) != 0)
458 		conn->c_sendfd = sfd;
459 	else
460 		conn->c_recvfd = sfd;
461 	conn->c_mediasize = 0;
462 	conn->c_sectorsize = 0;
463 	time(&conn->c_birthtime);
464 	conn->c_flags = cinit->gc_flags;
465 	LIST_INSERT_HEAD(&connections, conn, c_next);
466 	g_gate_log(LOG_DEBUG, "Connection created [%s, %s].", ip2str(ip),
467 	    conn->c_path);
468 	return (conn);
469 }
470 
471 static int
472 connection_add(struct ggd_connection *conn, struct g_gate_cinit *cinit,
473     struct sockaddr *s, int sfd)
474 {
475 	in_addr_t ip;
476 
477 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
478 	if ((cinit->gc_flags & GGATE_FLAG_SEND) != 0) {
479 		if (conn->c_sendfd != -1) {
480 			g_gate_log(LOG_WARNING,
481 			    "Send socket already exists [%s, %s].", ip2str(ip),
482 			    conn->c_path);
483 			return (EEXIST);
484 		}
485 		conn->c_sendfd = sfd;
486 	} else {
487 		if (conn->c_recvfd != -1) {
488 			g_gate_log(LOG_WARNING,
489 			    "Receive socket already exists [%s, %s].",
490 			    ip2str(ip), conn->c_path);
491 			return (EEXIST);
492 		}
493 		conn->c_recvfd = sfd;
494 	}
495 	g_gate_log(LOG_DEBUG, "Connection added [%s, %s].", ip2str(ip),
496 	    conn->c_path);
497 	return (0);
498 }
499 
500 /*
501  * Remove one socket from the given connection or the whole
502  * connection if sfd == -1.
503  */
504 static void
505 connection_remove(struct ggd_connection *conn)
506 {
507 
508 	LIST_REMOVE(conn, c_next);
509 	g_gate_log(LOG_DEBUG, "Connection removed [%s %s].",
510 	    ip2str(conn->c_srcip), conn->c_path);
511 	if (conn->c_sendfd != -1)
512 		close(conn->c_sendfd);
513 	if (conn->c_recvfd != -1)
514 		close(conn->c_recvfd);
515 	free(conn->c_path);
516 	free(conn);
517 }
518 
519 static int
520 connection_ready(struct ggd_connection *conn)
521 {
522 
523 	return (conn->c_sendfd != -1 && conn->c_recvfd != -1);
524 }
525 
526 static void
527 connection_launch(struct ggd_connection *conn)
528 {
529 	pthread_t td;
530 	int error, pid;
531 
532 	pid = fork();
533 	if (pid > 0)
534 		return;
535 	else if (pid == -1) {
536 		g_gate_log(LOG_ERR, "Cannot fork: %s.", strerror(errno));
537 		return;
538 	}
539 	g_gate_log(LOG_DEBUG, "Process created [%s].", conn->c_path);
540 
541 	/*
542 	 * Create condition variables and mutexes for in-queue and out-queue
543 	 * synchronization.
544 	 */
545 	error = pthread_mutex_init(&inqueue_mtx, NULL);
546 	if (error != 0) {
547 		g_gate_xlog("pthread_mutex_init(inqueue_mtx): %s.",
548 		    strerror(error));
549 	}
550 	error = pthread_cond_init(&inqueue_cond, NULL);
551 	if (error != 0) {
552 		g_gate_xlog("pthread_cond_init(inqueue_cond): %s.",
553 		    strerror(error));
554 	}
555 	error = pthread_mutex_init(&outqueue_mtx, NULL);
556 	if (error != 0) {
557 		g_gate_xlog("pthread_mutex_init(outqueue_mtx): %s.",
558 		    strerror(error));
559 	}
560 	error = pthread_cond_init(&outqueue_cond, NULL);
561 	if (error != 0) {
562 		g_gate_xlog("pthread_cond_init(outqueue_cond): %s.",
563 		    strerror(error));
564 	}
565 
566 	/*
567 	 * Create threads:
568 	 * recvtd - thread for receiving I/O request
569 	 * diskio - thread for doing I/O request
570 	 * sendtd - thread for sending I/O requests back
571 	 */
572 	error = pthread_create(&td, NULL, send_thread, conn);
573 	if (error != 0) {
574 		g_gate_xlog("pthread_create(send_thread): %s.",
575 		    strerror(error));
576 	}
577 	error = pthread_create(&td, NULL, recv_thread, conn);
578 	if (error != 0) {
579 		g_gate_xlog("pthread_create(recv_thread): %s.",
580 		    strerror(error));
581 	}
582 	disk_thread(conn);
583 }
584 
585 static void
586 sendfail(int sfd, int error, const char *fmt, ...)
587 {
588 	struct g_gate_sinit sinit;
589 	va_list ap;
590 	ssize_t data;
591 
592 	sinit.gs_error = error;
593 	g_gate_swap2n_sinit(&sinit);
594 	data = g_gate_send(sfd, &sinit, sizeof(sinit), 0);
595 	g_gate_swap2h_sinit(&sinit);
596 	if (data != sizeof(sinit)) {
597 		g_gate_log(LOG_WARNING, "Cannot send initial packet: %s.",
598 		    strerror(errno));
599 		return;
600 	}
601 	if (fmt != NULL) {
602 		va_start(ap, fmt);
603 		g_gate_vlog(LOG_WARNING, fmt, ap);
604 		va_end(ap);
605 	}
606 }
607 
608 static void *
609 malloc_waitok(size_t size)
610 {
611 	void *p;
612 
613 	while ((p = malloc(size)) == NULL) {
614 		g_gate_log(LOG_DEBUG, "Cannot allocate %zu bytes.", size);
615 		sleep(1);
616 	}
617 	return (p);
618 }
619 
620 static void *
621 recv_thread(void *arg)
622 {
623 	struct ggd_connection *conn;
624 	struct ggd_request *req;
625 	ssize_t data;
626 	int error, fd;
627 
628 	conn = arg;
629 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
630 	fd = conn->c_recvfd;
631 	for (;;) {
632 		/*
633 		 * Get header packet.
634 		 */
635 		req = malloc_waitok(sizeof(*req));
636 		data = g_gate_recv(fd, &req->r_hdr, sizeof(req->r_hdr),
637 		    MSG_WAITALL);
638 		if (data == 0) {
639 			g_gate_log(LOG_DEBUG, "Process %u exiting.", getpid());
640 			exit(EXIT_SUCCESS);
641 		} else if (data == -1) {
642 			g_gate_xlog("Error while receiving hdr packet: %s.",
643 			    strerror(errno));
644 		} else if (data != sizeof(req->r_hdr)) {
645 			g_gate_xlog("Malformed hdr packet received.");
646 		}
647 		g_gate_log(LOG_DEBUG, "Received hdr packet.");
648 		g_gate_swap2h_hdr(&req->r_hdr);
649 
650 		g_gate_log(LOG_DEBUG, "%s: offset=%jd length=%u", __func__,
651 		    (intmax_t)req->r_offset, (unsigned)req->r_length);
652 
653 		/*
654 		 * Allocate memory for data.
655 		 */
656 		req->r_data = malloc_waitok(req->r_length);
657 
658 		/*
659 		 * Receive data to write for WRITE request.
660 		 */
661 		if (req->r_cmd == GGATE_CMD_WRITE) {
662 			g_gate_log(LOG_DEBUG, "Waiting for %u bytes of data...",
663 			    req->r_length);
664 			data = g_gate_recv(fd, req->r_data, req->r_length,
665 			    MSG_WAITALL);
666 			if (data == -1) {
667 				g_gate_xlog("Error while receiving data: %s.",
668 				    strerror(errno));
669 			}
670 		}
671 
672 		/*
673 		 * Put the request onto the incoming queue.
674 		 */
675 		error = pthread_mutex_lock(&inqueue_mtx);
676 		assert(error == 0);
677 		TAILQ_INSERT_TAIL(&inqueue, req, r_next);
678 		error = pthread_cond_signal(&inqueue_cond);
679 		assert(error == 0);
680 		error = pthread_mutex_unlock(&inqueue_mtx);
681 		assert(error == 0);
682 	}
683 }
684 
685 static void *
686 disk_thread(void *arg)
687 {
688 	struct ggd_connection *conn;
689 	struct ggd_request *req;
690 	ssize_t data;
691 	int error, fd;
692 
693 	conn = arg;
694 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
695 	fd = conn->c_diskfd;
696 	for (;;) {
697 		/*
698 		 * Get a request from the incoming queue.
699 		 */
700 		error = pthread_mutex_lock(&inqueue_mtx);
701 		assert(error == 0);
702 		while ((req = TAILQ_FIRST(&inqueue)) == NULL) {
703 			error = pthread_cond_wait(&inqueue_cond, &inqueue_mtx);
704 			assert(error == 0);
705 		}
706 		TAILQ_REMOVE(&inqueue, req, r_next);
707 		error = pthread_mutex_unlock(&inqueue_mtx);
708 		assert(error == 0);
709 
710 		/*
711 		 * Check the request.
712 		 */
713 		assert(req->r_cmd == GGATE_CMD_READ || req->r_cmd == GGATE_CMD_WRITE);
714 		assert(req->r_offset + req->r_length <= (uintmax_t)conn->c_mediasize);
715 		assert((req->r_offset % conn->c_sectorsize) == 0);
716 		assert((req->r_length % conn->c_sectorsize) == 0);
717 
718 		g_gate_log(LOG_DEBUG, "%s: offset=%jd length=%u", __func__,
719 		    (intmax_t)req->r_offset, (unsigned)req->r_length);
720 
721 		/*
722 		 * Do the request.
723 		 */
724 		data = 0;
725 		switch (req->r_cmd) {
726 		case GGATE_CMD_READ:
727 			data = pread(fd, req->r_data, req->r_length,
728 			    req->r_offset);
729 			break;
730 		case GGATE_CMD_WRITE:
731 			data = pwrite(fd, req->r_data, req->r_length,
732 			    req->r_offset);
733 			/* Free data memory here - better sooner. */
734 			free(req->r_data);
735 			req->r_data = NULL;
736 			break;
737 		}
738 		if (data != (ssize_t)req->r_length) {
739 			/* Report short reads/writes as I/O errors. */
740 			if (errno == 0)
741 				errno = EIO;
742 			g_gate_log(LOG_ERR, "Disk error: %s", strerror(errno));
743 			req->r_error = errno;
744 			if (req->r_data != NULL) {
745 				free(req->r_data);
746 				req->r_data = NULL;
747 			}
748 		}
749 
750 		/*
751 		 * Put the request onto the outgoing queue.
752 		 */
753 		error = pthread_mutex_lock(&outqueue_mtx);
754 		assert(error == 0);
755 		TAILQ_INSERT_TAIL(&outqueue, req, r_next);
756 		error = pthread_cond_signal(&outqueue_cond);
757 		assert(error == 0);
758 		error = pthread_mutex_unlock(&outqueue_mtx);
759 		assert(error == 0);
760 	}
761 
762 	/* NOTREACHED */
763 	return (NULL);
764 }
765 
766 static void *
767 send_thread(void *arg)
768 {
769 	struct ggd_connection *conn;
770 	struct ggd_request *req;
771 	ssize_t data;
772 	int error, fd;
773 
774 	conn = arg;
775 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
776 	fd = conn->c_sendfd;
777 	for (;;) {
778 		/*
779 		 * Get a request from the outgoing queue.
780 		 */
781 		error = pthread_mutex_lock(&outqueue_mtx);
782 		assert(error == 0);
783 		while ((req = TAILQ_FIRST(&outqueue)) == NULL) {
784 			error = pthread_cond_wait(&outqueue_cond,
785 			    &outqueue_mtx);
786 			assert(error == 0);
787 		}
788 		TAILQ_REMOVE(&outqueue, req, r_next);
789 		error = pthread_mutex_unlock(&outqueue_mtx);
790 		assert(error == 0);
791 
792 		g_gate_log(LOG_DEBUG, "%s: offset=%jd length=%u", __func__,
793 		    (intmax_t)req->r_offset, (unsigned)req->r_length);
794 
795 		/*
796 		 * Send the request.
797 		 */
798 		g_gate_swap2n_hdr(&req->r_hdr);
799 		if (g_gate_send(fd, &req->r_hdr, sizeof(req->r_hdr), 0) == -1) {
800 			g_gate_xlog("Error while sending hdr packet: %s.",
801 			    strerror(errno));
802 		}
803 		g_gate_log(LOG_DEBUG, "Sent hdr packet.");
804 		g_gate_swap2h_hdr(&req->r_hdr);
805 		if (req->r_data != NULL) {
806 			data = g_gate_send(fd, req->r_data, req->r_length, 0);
807 			if (data != (ssize_t)req->r_length) {
808 				g_gate_xlog("Error while sending data: %s.",
809 				    strerror(errno));
810 			}
811 			g_gate_log(LOG_DEBUG,
812 			    "Sent %zd bytes (offset=%ju, size=%zu).", data,
813 			    (uintmax_t)req->r_offset, (size_t)req->r_length);
814 			free(req->r_data);
815 		}
816 		free(req);
817 	}
818 
819 	/* NOTREACHED */
820 	return (NULL);
821 }
822 
823 static void
824 log_connection(struct sockaddr *from)
825 {
826 	in_addr_t ip;
827 
828 	ip = htonl(((struct sockaddr_in *)(void *)from)->sin_addr.s_addr);
829 	g_gate_log(LOG_INFO, "Connection from: %s.", ip2str(ip));
830 }
831 
832 static int
833 handshake(struct sockaddr *from, int sfd)
834 {
835 	struct g_gate_version ver;
836 	struct g_gate_cinit cinit;
837 	struct g_gate_sinit sinit;
838 	struct ggd_connection *conn;
839 	struct ggd_export *ex;
840 	ssize_t data;
841 
842 	log_connection(from);
843 	/*
844 	 * Phase 1: Version verification.
845 	 */
846 	g_gate_log(LOG_DEBUG, "Receiving version packet.");
847 	data = g_gate_recv(sfd, &ver, sizeof(ver), MSG_WAITALL);
848 	g_gate_swap2h_version(&ver);
849 	if (data != sizeof(ver)) {
850 		g_gate_log(LOG_WARNING, "Malformed version packet.");
851 		return (0);
852 	}
853 	g_gate_log(LOG_DEBUG, "Version packet received.");
854 	if (memcmp(ver.gv_magic, GGATE_MAGIC, strlen(GGATE_MAGIC)) != 0) {
855 		g_gate_log(LOG_WARNING, "Invalid magic field.");
856 		return (0);
857 	}
858 	if (ver.gv_version != GGATE_VERSION) {
859 		g_gate_log(LOG_WARNING, "Version %u is not supported.",
860 		    ver.gv_version);
861 		return (0);
862 	}
863 	ver.gv_error = 0;
864 	g_gate_swap2n_version(&ver);
865 	data = g_gate_send(sfd, &ver, sizeof(ver), 0);
866 	g_gate_swap2h_version(&ver);
867 	if (data == -1) {
868 		sendfail(sfd, errno, "Error while sending version packet: %s.",
869 		    strerror(errno));
870 		return (0);
871 	}
872 
873 	/*
874 	 * Phase 2: Request verification.
875 	 */
876 	g_gate_log(LOG_DEBUG, "Receiving initial packet.");
877 	data = g_gate_recv(sfd, &cinit, sizeof(cinit), MSG_WAITALL);
878 	g_gate_swap2h_cinit(&cinit);
879 	if (data != sizeof(cinit)) {
880 		g_gate_log(LOG_WARNING, "Malformed initial packet.");
881 		return (0);
882 	}
883 	g_gate_log(LOG_DEBUG, "Initial packet received.");
884 	conn = connection_find(&cinit);
885 	if (conn != NULL) {
886 		/*
887 		 * Connection should already exists.
888 		 */
889 		g_gate_log(LOG_DEBUG, "Found existing connection (token=%lu).",
890 		    (unsigned long)conn->c_token);
891 		if (connection_add(conn, &cinit, from, sfd) == -1) {
892 			connection_remove(conn);
893 			return (0);
894 		}
895 	} else {
896 		/*
897 		 * New connection, allocate space.
898 		 */
899 		conn = connection_new(&cinit, from, sfd);
900 		if (conn == NULL) {
901 			sendfail(sfd, ENOMEM,
902 			    "Cannot allocate new connection.");
903 			return (0);
904 		}
905 		g_gate_log(LOG_DEBUG, "New connection created (token=%lu).",
906 		    (unsigned long)conn->c_token);
907 	}
908 
909 	ex = exports_find(from, &cinit, conn);
910 	if (ex == NULL) {
911 		sendfail(sfd, errno, NULL);
912 		connection_remove(conn);
913 		return (0);
914 	}
915 	if (conn->c_mediasize == 0) {
916 		conn->c_mediasize = g_gate_mediasize(conn->c_diskfd);
917 		conn->c_sectorsize = g_gate_sectorsize(conn->c_diskfd);
918 	}
919 	sinit.gs_mediasize = conn->c_mediasize;
920 	sinit.gs_sectorsize = conn->c_sectorsize;
921 	sinit.gs_error = 0;
922 
923 	g_gate_log(LOG_DEBUG, "Sending initial packet.");
924 
925 	g_gate_swap2n_sinit(&sinit);
926 	data = g_gate_send(sfd, &sinit, sizeof(sinit), 0);
927 	g_gate_swap2h_sinit(&sinit);
928 	if (data == -1) {
929 		sendfail(sfd, errno, "Error while sending initial packet: %s.",
930 		    strerror(errno));
931 		return (0);
932 	}
933 
934 	if (connection_ready(conn)) {
935 		connection_launch(conn);
936 		connection_remove(conn);
937 	}
938 	return (1);
939 }
940 
941 static void
942 huphandler(int sig __unused)
943 {
944 
945 	got_sighup = 1;
946 }
947 
948 int
949 main(int argc, char *argv[])
950 {
951 	const char *ggated_pidfile = _PATH_VARRUN "/ggated.pid";
952 	struct pidfh *pfh;
953 	struct sockaddr_in serv;
954 	struct sockaddr from;
955 	socklen_t fromlen;
956 	pid_t otherpid;
957 	int ch, sfd, tmpsfd;
958 	unsigned port;
959 
960 	bindaddr = htonl(INADDR_ANY);
961 	port = G_GATE_PORT;
962 	while ((ch = getopt(argc, argv, "a:hnp:F:R:S:v")) != -1) {
963 		switch (ch) {
964 		case 'a':
965 			bindaddr = g_gate_str2ip(optarg);
966 			if (bindaddr == INADDR_NONE) {
967 				errx(EXIT_FAILURE,
968 				    "Invalid IP/host name to bind to.");
969 			}
970 			break;
971 		case 'F':
972 			ggated_pidfile = optarg;
973 			break;
974 		case 'n':
975 			nagle = 0;
976 			break;
977 		case 'p':
978 			errno = 0;
979 			port = strtoul(optarg, NULL, 10);
980 			if (port == 0 && errno != 0)
981 				errx(EXIT_FAILURE, "Invalid port.");
982 			break;
983 		case 'R':
984 			errno = 0;
985 			rcvbuf = strtoul(optarg, NULL, 10);
986 			if (rcvbuf == 0 && errno != 0)
987 				errx(EXIT_FAILURE, "Invalid rcvbuf.");
988 			break;
989 		case 'S':
990 			errno = 0;
991 			sndbuf = strtoul(optarg, NULL, 10);
992 			if (sndbuf == 0 && errno != 0)
993 				errx(EXIT_FAILURE, "Invalid sndbuf.");
994 			break;
995 		case 'v':
996 			g_gate_verbose++;
997 			break;
998 		case 'h':
999 		default:
1000 			usage();
1001 		}
1002 	}
1003 	argc -= optind;
1004 	argv += optind;
1005 
1006 	if (argv[0] != NULL)
1007 		exports_file = argv[0];
1008 	exports_get();
1009 
1010 	pfh = pidfile_open(ggated_pidfile, 0600, &otherpid);
1011 	if (pfh == NULL) {
1012 		if (errno == EEXIST) {
1013 			errx(EXIT_FAILURE, "Daemon already running, pid: %jd.",
1014 			    (intmax_t)otherpid);
1015 		}
1016 		err(EXIT_FAILURE, "Cannot open/create pidfile");
1017 	}
1018 
1019 	if (!g_gate_verbose) {
1020 		/* Run in daemon mode. */
1021 		if (daemon(0, 0) == -1)
1022 			g_gate_xlog("Cannot daemonize: %s", strerror(errno));
1023 	}
1024 
1025 	pidfile_write(pfh);
1026 
1027 	signal(SIGCHLD, SIG_IGN);
1028 
1029 	sfd = socket(AF_INET, SOCK_STREAM, 0);
1030 	if (sfd == -1)
1031 		g_gate_xlog("Cannot open stream socket: %s.", strerror(errno));
1032 	bzero(&serv, sizeof(serv));
1033 	serv.sin_family = AF_INET;
1034 	serv.sin_addr.s_addr = bindaddr;
1035 	serv.sin_port = htons(port);
1036 
1037 	g_gate_socket_settings(sfd);
1038 
1039 	if (bind(sfd, (struct sockaddr *)&serv, sizeof(serv)) == -1)
1040 		g_gate_xlog("bind(): %s.", strerror(errno));
1041 	if (listen(sfd, 5) == -1)
1042 		g_gate_xlog("listen(): %s.", strerror(errno));
1043 
1044 	g_gate_log(LOG_INFO, "Listen on port: %d.", port);
1045 
1046 	signal(SIGHUP, huphandler);
1047 
1048 	for (;;) {
1049 		fromlen = sizeof(from);
1050 		tmpsfd = accept(sfd, &from, &fromlen);
1051 		if (tmpsfd == -1)
1052 			g_gate_xlog("accept(): %s.", strerror(errno));
1053 
1054 		if (got_sighup) {
1055 			got_sighup = 0;
1056 			exports_get();
1057 		}
1058 
1059 		if (!handshake(&from, tmpsfd))
1060 			close(tmpsfd);
1061 	}
1062 	close(sfd);
1063 	pidfile_remove(pfh);
1064 	exit(EXIT_SUCCESS);
1065 }
1066