xref: /freebsd/sbin/ggate/ggated/ggated.c (revision a90b9d0159070121c221b966469c3e36d912bf82)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/bio.h>
31 #include <sys/disk.h>
32 #include <sys/endian.h>
33 #include <sys/ioctl.h>
34 #include <sys/queue.h>
35 #include <sys/socket.h>
36 #include <sys/stat.h>
37 #include <sys/time.h>
38 #include <arpa/inet.h>
39 #include <netinet/in.h>
40 #include <netinet/tcp.h>
41 #include <assert.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <inttypes.h>
45 #include <fcntl.h>
46 #include <libgen.h>
47 #include <libutil.h>
48 #include <paths.h>
49 #include <pthread.h>
50 #include <signal.h>
51 #include <stdarg.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <stdint.h>
55 #include <string.h>
56 #include <syslog.h>
57 #include <unistd.h>
58 
59 #include "ggate.h"
60 
61 
62 #define	GGATED_EXPORT_FILE	"/etc/gg.exports"
63 
64 struct ggd_connection {
65 	off_t		 c_mediasize;
66 	unsigned	 c_sectorsize;
67 	unsigned	 c_flags;	/* flags (RO/RW) */
68 	int		 c_diskfd;
69 	int		 c_sendfd;
70 	int		 c_recvfd;
71 	time_t		 c_birthtime;
72 	char		*c_path;
73 	uint64_t	 c_token;
74 	in_addr_t	 c_srcip;
75 	LIST_ENTRY(ggd_connection) c_next;
76 };
77 
78 struct ggd_request {
79 	struct g_gate_hdr	 r_hdr;
80 	char			*r_data;
81 	TAILQ_ENTRY(ggd_request) r_next;
82 };
83 #define	r_cmd		r_hdr.gh_cmd
84 #define	r_offset	r_hdr.gh_offset
85 #define	r_length	r_hdr.gh_length
86 #define	r_error		r_hdr.gh_error
87 
88 struct ggd_export {
89 	char		*e_path;	/* path to device/file */
90 	in_addr_t	 e_ip;		/* remote IP address */
91 	in_addr_t	 e_mask;	/* IP mask */
92 	unsigned	 e_flags;	/* flags (RO/RW) */
93 	SLIST_ENTRY(ggd_export) e_next;
94 };
95 
96 static const char *exports_file = GGATED_EXPORT_FILE;
97 static int got_sighup = 0;
98 static in_addr_t bindaddr;
99 
100 static TAILQ_HEAD(, ggd_request) inqueue = TAILQ_HEAD_INITIALIZER(inqueue);
101 static TAILQ_HEAD(, ggd_request) outqueue = TAILQ_HEAD_INITIALIZER(outqueue);
102 static pthread_mutex_t inqueue_mtx, outqueue_mtx;
103 static pthread_cond_t inqueue_cond, outqueue_cond;
104 
105 static SLIST_HEAD(, ggd_export) exports = SLIST_HEAD_INITIALIZER(exports);
106 static LIST_HEAD(, ggd_connection) connections = LIST_HEAD_INITIALIZER(connections);
107 
108 static void *recv_thread(void *arg);
109 static void *disk_thread(void *arg);
110 static void *send_thread(void *arg);
111 
112 static void
113 usage(void)
114 {
115 
116 	fprintf(stderr, "usage: %s [-nv] [-a address] [-F pidfile] [-p port] "
117 	    "[-R rcvbuf] [-S sndbuf] [exports file]\n", getprogname());
118 	exit(EXIT_FAILURE);
119 }
120 
121 static char *
122 ip2str(in_addr_t ip)
123 {
124 	static char sip[16];
125 
126 	snprintf(sip, sizeof(sip), "%u.%u.%u.%u",
127 	    ((ip >> 24) & 0xff),
128 	    ((ip >> 16) & 0xff),
129 	    ((ip >> 8) & 0xff),
130 	    (ip & 0xff));
131 	return (sip);
132 }
133 
134 static in_addr_t
135 countmask(unsigned m)
136 {
137 	in_addr_t mask;
138 
139 	if (m == 0) {
140 		mask = 0x0;
141 	} else {
142 		mask = 1 << (32 - m);
143 		mask--;
144 		mask = ~mask;
145 	}
146 	return (mask);
147 }
148 
149 static void
150 line_parse(char *line, unsigned lineno)
151 {
152 	struct ggd_export *ex;
153 	char *word, *path, *sflags;
154 	unsigned flags, i, vmask;
155 	in_addr_t ip, mask;
156 
157 	ip = mask = flags = vmask = 0;
158 	path = NULL;
159 	sflags = NULL;
160 
161 	for (i = 0, word = strtok(line, " \t"); word != NULL;
162 	    i++, word = strtok(NULL, " \t")) {
163 		switch (i) {
164 		case 0: /* IP address or host name */
165 			ip = g_gate_str2ip(strsep(&word, "/"));
166 			if (ip == INADDR_NONE) {
167 				g_gate_xlog("Invalid IP/host name at line %u.",
168 				    lineno);
169 			}
170 			ip = ntohl(ip);
171 			if (word == NULL)
172 				vmask = 32;
173 			else {
174 				errno = 0;
175 				vmask = strtoul(word, NULL, 10);
176 				if (vmask == 0 && errno != 0) {
177 					g_gate_xlog("Invalid IP mask value at "
178 					    "line %u.", lineno);
179 				}
180 				if ((unsigned)vmask > 32) {
181 					g_gate_xlog("Invalid IP mask value at line %u.",
182 					    lineno);
183 				}
184 			}
185 			mask = countmask(vmask);
186 			break;
187 		case 1:	/* flags */
188 			if (strcasecmp("rd", word) == 0 ||
189 			    strcasecmp("ro", word) == 0) {
190 				flags = O_RDONLY;
191 			} else if (strcasecmp("wo", word) == 0) {
192 				flags = O_WRONLY;
193 			} else if (strcasecmp("rw", word) == 0) {
194 				flags = O_RDWR;
195 			} else {
196 				g_gate_xlog("Invalid value in flags field at "
197 				    "line %u.", lineno);
198 			}
199 			sflags = word;
200 			break;
201 		case 2:	/* path */
202 			if (strlen(word) >= MAXPATHLEN) {
203 				g_gate_xlog("Path too long at line %u. ",
204 				    lineno);
205 			}
206 			path = word;
207 			break;
208 		default:
209 			g_gate_xlog("Too many arguments at line %u. ", lineno);
210 		}
211 	}
212 	if (i != 3)
213 		g_gate_xlog("Too few arguments at line %u.", lineno);
214 
215 	ex = malloc(sizeof(*ex));
216 	if (ex == NULL)
217 		g_gate_xlog("Not enough memory.");
218 	ex->e_path = strdup(path);
219 	if (ex->e_path == NULL)
220 		g_gate_xlog("Not enough memory.");
221 
222 	/* Made 'and' here. */
223 	ex->e_ip = (ip & mask);
224 	ex->e_mask = mask;
225 	ex->e_flags = flags;
226 
227 	SLIST_INSERT_HEAD(&exports, ex, e_next);
228 
229 	g_gate_log(LOG_DEBUG, "Added %s/%u %s %s to exports list.",
230 	    ip2str(ex->e_ip), vmask, path, sflags);
231 }
232 
233 static void
234 exports_clear(void)
235 {
236 	struct ggd_export *ex;
237 
238 	while (!SLIST_EMPTY(&exports)) {
239 		ex = SLIST_FIRST(&exports);
240 		SLIST_REMOVE_HEAD(&exports, e_next);
241 		free(ex);
242 	}
243 }
244 
245 #define	EXPORTS_LINE_SIZE	2048
246 static void
247 exports_get(void)
248 {
249 	char buf[EXPORTS_LINE_SIZE], *line;
250 	unsigned lineno = 0, objs = 0, len;
251 	FILE *fd;
252 
253 	exports_clear();
254 
255 	fd = fopen(exports_file, "r");
256 	if (fd == NULL) {
257 		g_gate_xlog("Cannot open exports file (%s): %s.", exports_file,
258 		    strerror(errno));
259 	}
260 
261 	g_gate_log(LOG_INFO, "Reading exports file (%s).", exports_file);
262 
263 	for (;;) {
264 		if (fgets(buf, sizeof(buf), fd) == NULL) {
265 			if (feof(fd))
266 				break;
267 
268 			g_gate_xlog("Error while reading exports file: %s.",
269 			    strerror(errno));
270 		}
271 
272 		/* Increase line count. */
273 		lineno++;
274 
275 		/* Skip spaces and tabs. */
276 		for (line = buf; *line == ' ' || *line == '\t'; ++line)
277 			;
278 
279 		/* Empty line, comment or empty line at the end of file. */
280 		if (*line == '\n' || *line == '#' || *line == '\0')
281 			continue;
282 
283 		len = strlen(line);
284 		if (line[len - 1] == '\n') {
285 			/* Remove new line char. */
286 			line[len - 1] = '\0';
287 		} else {
288 			if (!feof(fd))
289 				g_gate_xlog("Line %u too long.", lineno);
290 		}
291 
292 		line_parse(line, lineno);
293 		objs++;
294 	}
295 
296 	fclose(fd);
297 
298 	if (objs == 0)
299 		g_gate_xlog("There are no objects to export.");
300 
301 	g_gate_log(LOG_INFO, "Exporting %u object(s).", objs);
302 }
303 
304 static int
305 exports_check(struct ggd_export *ex, struct g_gate_cinit *cinit,
306     struct ggd_connection *conn)
307 {
308 	char ipmask[32]; /* 32 == strlen("xxx.xxx.xxx.xxx/xxx.xxx.xxx.xxx")+1 */
309 	int error = 0, flags;
310 
311 	strlcpy(ipmask, ip2str(ex->e_ip), sizeof(ipmask));
312 	strlcat(ipmask, "/", sizeof(ipmask));
313 	strlcat(ipmask, ip2str(ex->e_mask), sizeof(ipmask));
314 	if ((cinit->gc_flags & GGATE_FLAG_RDONLY) != 0) {
315 		if (ex->e_flags == O_WRONLY) {
316 			g_gate_log(LOG_WARNING, "Read-only access requested, "
317 			    "but %s (%s) is exported write-only.", ex->e_path,
318 			    ipmask);
319 			return (EPERM);
320 		} else {
321 			conn->c_flags |= GGATE_FLAG_RDONLY;
322 		}
323 	} else if ((cinit->gc_flags & GGATE_FLAG_WRONLY) != 0) {
324 		if (ex->e_flags == O_RDONLY) {
325 			g_gate_log(LOG_WARNING, "Write-only access requested, "
326 			    "but %s (%s) is exported read-only.", ex->e_path,
327 			    ipmask);
328 			return (EPERM);
329 		} else {
330 			conn->c_flags |= GGATE_FLAG_WRONLY;
331 		}
332 	} else {
333 		if (ex->e_flags == O_RDONLY) {
334 			g_gate_log(LOG_WARNING, "Read-write access requested, "
335 			    "but %s (%s) is exported read-only.", ex->e_path,
336 			    ipmask);
337 			return (EPERM);
338 		} else if (ex->e_flags == O_WRONLY) {
339 			g_gate_log(LOG_WARNING, "Read-write access requested, "
340 			    "but %s (%s) is exported write-only.", ex->e_path,
341 			    ipmask);
342 			return (EPERM);
343 		}
344 	}
345 	if ((conn->c_flags & GGATE_FLAG_RDONLY) != 0)
346 		flags = O_RDONLY;
347 	else if ((conn->c_flags & GGATE_FLAG_WRONLY) != 0)
348 		flags = O_WRONLY;
349 	else
350 		flags = O_RDWR;
351 	if (conn->c_diskfd != -1) {
352 		if (strcmp(conn->c_path, ex->e_path) != 0) {
353 			g_gate_log(LOG_ERR, "old %s and new %s: "
354 			    "Path mismatch during handshakes.",
355 			    conn->c_path, ex->e_path);
356 			return (EPERM);
357 		}
358 		return (0);
359 	}
360 
361 	conn->c_diskfd = open(ex->e_path, flags);
362 	if (conn->c_diskfd == -1) {
363 		error = errno;
364 		g_gate_log(LOG_ERR, "Cannot open %s: %s.", ex->e_path,
365 		    strerror(error));
366 		return (error);
367 	}
368 	return (0);
369 }
370 
371 static struct ggd_export *
372 exports_find(struct sockaddr *s, struct g_gate_cinit *cinit,
373     struct ggd_connection *conn)
374 {
375 	struct ggd_export *ex;
376 	in_addr_t ip;
377 	int error;
378 
379 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
380 	SLIST_FOREACH(ex, &exports, e_next) {
381 		if ((ip & ex->e_mask) != ex->e_ip) {
382 			g_gate_log(LOG_DEBUG, "exports[%s]: IP mismatch.",
383 			    ex->e_path);
384 			continue;
385 		}
386 		if (strcmp(cinit->gc_path, ex->e_path) != 0) {
387 			g_gate_log(LOG_DEBUG, "exports[%s]: Path mismatch.",
388 			    ex->e_path);
389 			continue;
390 		}
391 		error = exports_check(ex, cinit, conn);
392 		if (error == 0)
393 			return (ex);
394 		else {
395 			errno = error;
396 			return (NULL);
397 		}
398 	}
399 	g_gate_log(LOG_WARNING, "Unauthorized connection from: %s.",
400 	    ip2str(ip));
401 	errno = EPERM;
402 	return (NULL);
403 }
404 
405 /*
406  * Remove timed out connections.
407  */
408 static void
409 connection_cleanups(void)
410 {
411 	struct ggd_connection *conn, *tconn;
412 	time_t now;
413 
414 	time(&now);
415 	LIST_FOREACH_SAFE(conn, &connections, c_next, tconn) {
416 		if (now - conn->c_birthtime > 10) {
417 			LIST_REMOVE(conn, c_next);
418 			g_gate_log(LOG_NOTICE,
419 			    "Connection from %s [%s] removed.",
420 			    ip2str(conn->c_srcip), conn->c_path);
421 			close(conn->c_diskfd);
422 			close(conn->c_sendfd);
423 			close(conn->c_recvfd);
424 			free(conn->c_path);
425 			free(conn);
426 		}
427 	}
428 }
429 
430 static struct ggd_connection *
431 connection_find(struct g_gate_cinit *cinit)
432 {
433 	struct ggd_connection *conn;
434 
435 	LIST_FOREACH(conn, &connections, c_next) {
436 		if (conn->c_token == cinit->gc_token)
437 			break;
438 	}
439 	return (conn);
440 }
441 
442 static struct ggd_connection *
443 connection_new(struct g_gate_cinit *cinit, struct sockaddr *s, int sfd)
444 {
445 	struct ggd_connection *conn;
446 	in_addr_t ip;
447 
448 	/*
449 	 * First, look for old connections.
450 	 * We probably should do it every X seconds, but what for?
451 	 * It is only dangerous if an attacker wants to overload connections
452 	 * queue, so here is a good place to do the cleanups.
453 	 */
454 	connection_cleanups();
455 
456 	conn = malloc(sizeof(*conn));
457 	if (conn == NULL)
458 		return (NULL);
459 	conn->c_path = strdup(cinit->gc_path);
460 	if (conn->c_path == NULL) {
461 		free(conn);
462 		return (NULL);
463 	}
464 	conn->c_token = cinit->gc_token;
465 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
466 	conn->c_srcip = ip;
467 	conn->c_diskfd = conn->c_sendfd = conn->c_recvfd = -1;
468 	if ((cinit->gc_flags & GGATE_FLAG_SEND) != 0)
469 		conn->c_sendfd = sfd;
470 	else
471 		conn->c_recvfd = sfd;
472 	conn->c_mediasize = 0;
473 	conn->c_sectorsize = 0;
474 	time(&conn->c_birthtime);
475 	conn->c_flags = cinit->gc_flags;
476 	LIST_INSERT_HEAD(&connections, conn, c_next);
477 	g_gate_log(LOG_DEBUG, "Connection created [%s, %s].", ip2str(ip),
478 	    conn->c_path);
479 	return (conn);
480 }
481 
482 static int
483 connection_add(struct ggd_connection *conn, struct g_gate_cinit *cinit,
484     struct sockaddr *s, int sfd)
485 {
486 	in_addr_t ip;
487 
488 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
489 	if ((cinit->gc_flags & GGATE_FLAG_SEND) != 0) {
490 		if (conn->c_sendfd != -1) {
491 			g_gate_log(LOG_WARNING,
492 			    "Send socket already exists [%s, %s].", ip2str(ip),
493 			    conn->c_path);
494 			return (EEXIST);
495 		}
496 		conn->c_sendfd = sfd;
497 	} else {
498 		if (conn->c_recvfd != -1) {
499 			g_gate_log(LOG_WARNING,
500 			    "Receive socket already exists [%s, %s].",
501 			    ip2str(ip), conn->c_path);
502 			return (EEXIST);
503 		}
504 		conn->c_recvfd = sfd;
505 	}
506 	g_gate_log(LOG_DEBUG, "Connection added [%s, %s].", ip2str(ip),
507 	    conn->c_path);
508 	return (0);
509 }
510 
511 /*
512  * Remove one socket from the given connection or the whole
513  * connection if sfd == -1.
514  */
515 static void
516 connection_remove(struct ggd_connection *conn)
517 {
518 
519 	LIST_REMOVE(conn, c_next);
520 	g_gate_log(LOG_DEBUG, "Connection removed [%s %s].",
521 	    ip2str(conn->c_srcip), conn->c_path);
522 	if (conn->c_diskfd != -1)
523 		close(conn->c_diskfd);
524 	if (conn->c_sendfd != -1)
525 		close(conn->c_sendfd);
526 	if (conn->c_recvfd != -1)
527 		close(conn->c_recvfd);
528 	free(conn->c_path);
529 	free(conn);
530 }
531 
532 static int
533 connection_ready(struct ggd_connection *conn)
534 {
535 
536 	return (conn->c_sendfd != -1 && conn->c_recvfd != -1);
537 }
538 
539 static void
540 connection_launch(struct ggd_connection *conn)
541 {
542 	pthread_t td;
543 	int error, pid;
544 
545 	pid = fork();
546 	if (pid > 0)
547 		return;
548 	else if (pid == -1) {
549 		g_gate_log(LOG_ERR, "Cannot fork: %s.", strerror(errno));
550 		return;
551 	}
552 	g_gate_log(LOG_DEBUG, "Process created [%s].", conn->c_path);
553 
554 	/*
555 	 * Create condition variables and mutexes for in-queue and out-queue
556 	 * synchronization.
557 	 */
558 	error = pthread_mutex_init(&inqueue_mtx, NULL);
559 	if (error != 0) {
560 		g_gate_xlog("pthread_mutex_init(inqueue_mtx): %s.",
561 		    strerror(error));
562 	}
563 	error = pthread_cond_init(&inqueue_cond, NULL);
564 	if (error != 0) {
565 		g_gate_xlog("pthread_cond_init(inqueue_cond): %s.",
566 		    strerror(error));
567 	}
568 	error = pthread_mutex_init(&outqueue_mtx, NULL);
569 	if (error != 0) {
570 		g_gate_xlog("pthread_mutex_init(outqueue_mtx): %s.",
571 		    strerror(error));
572 	}
573 	error = pthread_cond_init(&outqueue_cond, NULL);
574 	if (error != 0) {
575 		g_gate_xlog("pthread_cond_init(outqueue_cond): %s.",
576 		    strerror(error));
577 	}
578 
579 	/*
580 	 * Create threads:
581 	 * recvtd - thread for receiving I/O request
582 	 * diskio - thread for doing I/O request
583 	 * sendtd - thread for sending I/O requests back
584 	 */
585 	error = pthread_create(&td, NULL, send_thread, conn);
586 	if (error != 0) {
587 		g_gate_xlog("pthread_create(send_thread): %s.",
588 		    strerror(error));
589 	}
590 	error = pthread_create(&td, NULL, recv_thread, conn);
591 	if (error != 0) {
592 		g_gate_xlog("pthread_create(recv_thread): %s.",
593 		    strerror(error));
594 	}
595 	disk_thread(conn);
596 }
597 
598 static void
599 sendfail(int sfd, int error, const char *fmt, ...)
600 {
601 	struct g_gate_sinit sinit;
602 	va_list ap;
603 	ssize_t data;
604 
605 	memset(&sinit, 0, sizeof(sinit));
606 	sinit.gs_error = error;
607 	g_gate_swap2n_sinit(&sinit);
608 	data = g_gate_send(sfd, &sinit, sizeof(sinit), 0);
609 	g_gate_swap2h_sinit(&sinit);
610 	if (data != sizeof(sinit)) {
611 		g_gate_log(LOG_WARNING, "Cannot send initial packet: %s.",
612 		    strerror(errno));
613 		return;
614 	}
615 	if (fmt != NULL) {
616 		va_start(ap, fmt);
617 		g_gate_vlog(LOG_WARNING, fmt, ap);
618 		va_end(ap);
619 	}
620 }
621 
622 static void *
623 malloc_waitok(size_t size)
624 {
625 	void *p;
626 
627 	while ((p = malloc(size)) == NULL) {
628 		g_gate_log(LOG_DEBUG, "Cannot allocate %zu bytes.", size);
629 		sleep(1);
630 	}
631 	return (p);
632 }
633 
634 static void *
635 recv_thread(void *arg)
636 {
637 	struct ggd_connection *conn;
638 	struct ggd_request *req;
639 	ssize_t data;
640 	int error, fd;
641 
642 	conn = arg;
643 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
644 	fd = conn->c_recvfd;
645 	for (;;) {
646 		/*
647 		 * Get header packet.
648 		 */
649 		req = malloc_waitok(sizeof(*req));
650 		data = g_gate_recv(fd, &req->r_hdr, sizeof(req->r_hdr),
651 		    MSG_WAITALL);
652 		if (data == 0) {
653 			g_gate_log(LOG_DEBUG, "Process %u exiting.", getpid());
654 			exit(EXIT_SUCCESS);
655 		} else if (data == -1) {
656 			g_gate_xlog("Error while receiving hdr packet: %s.",
657 			    strerror(errno));
658 		} else if (data != sizeof(req->r_hdr)) {
659 			g_gate_xlog("Malformed hdr packet received.");
660 		}
661 		g_gate_log(LOG_DEBUG, "Received hdr packet.");
662 		g_gate_swap2h_hdr(&req->r_hdr);
663 
664 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
665 		    __func__, req->r_offset, req->r_length);
666 
667 		/*
668 		 * Allocate memory for data.
669 		 */
670 		req->r_data = malloc_waitok(req->r_length);
671 
672 		/*
673 		 * Receive data to write for WRITE request.
674 		 */
675 		if (req->r_cmd == GGATE_CMD_WRITE) {
676 			g_gate_log(LOG_DEBUG, "Waiting for %u bytes of data...",
677 			    req->r_length);
678 			data = g_gate_recv(fd, req->r_data, req->r_length,
679 			    MSG_WAITALL);
680 			if (data == -1) {
681 				g_gate_xlog("Error while receiving data: %s.",
682 				    strerror(errno));
683 			}
684 		}
685 
686 		/*
687 		 * Put the request onto the incoming queue.
688 		 */
689 		error = pthread_mutex_lock(&inqueue_mtx);
690 		assert(error == 0);
691 		TAILQ_INSERT_TAIL(&inqueue, req, r_next);
692 		error = pthread_cond_signal(&inqueue_cond);
693 		assert(error == 0);
694 		error = pthread_mutex_unlock(&inqueue_mtx);
695 		assert(error == 0);
696 	}
697 }
698 
699 static void *
700 disk_thread(void *arg)
701 {
702 	struct ggd_connection *conn;
703 	struct ggd_request *req;
704 	ssize_t data;
705 	int error, fd;
706 
707 	conn = arg;
708 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
709 	fd = conn->c_diskfd;
710 	for (;;) {
711 		/*
712 		 * Get a request from the incoming queue.
713 		 */
714 		error = pthread_mutex_lock(&inqueue_mtx);
715 		assert(error == 0);
716 		while ((req = TAILQ_FIRST(&inqueue)) == NULL) {
717 			error = pthread_cond_wait(&inqueue_cond, &inqueue_mtx);
718 			assert(error == 0);
719 		}
720 		TAILQ_REMOVE(&inqueue, req, r_next);
721 		error = pthread_mutex_unlock(&inqueue_mtx);
722 		assert(error == 0);
723 
724 		/*
725 		 * Check the request.
726 		 */
727 		assert(req->r_offset + req->r_length <= (uintmax_t)conn->c_mediasize);
728 		assert((req->r_offset % conn->c_sectorsize) == 0);
729 		assert((req->r_length % conn->c_sectorsize) == 0);
730 
731 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
732 		     __func__, req->r_offset, req->r_length);
733 
734 		/*
735 		 * Do the request.
736 		 */
737 		data = 0;
738 		switch (req->r_cmd) {
739 		case GGATE_CMD_READ:
740 			data = pread(fd, req->r_data, req->r_length,
741 			    req->r_offset);
742 			break;
743 		case GGATE_CMD_WRITE:
744 			data = pwrite(fd, req->r_data, req->r_length,
745 			    req->r_offset);
746 			/* Free data memory here - better sooner. */
747 			free(req->r_data);
748 			req->r_data = NULL;
749 			break;
750 		case GGATE_CMD_FLUSH:
751 			data = fsync(fd);
752 			if (data != 0)
753 				req->r_error = errno;
754 			break;
755 		default:
756 			g_gate_log(LOG_DEBUG, "Unsupported request: %i", req->r_cmd);
757 			req->r_error = EOPNOTSUPP;
758 			if (req->r_data != NULL) {
759 				free(req->r_data);
760 				req->r_data = NULL;
761 			}
762 			break;
763 		}
764 		if (data != (ssize_t)req->r_length) {
765 			/* Report short reads/writes as I/O errors. */
766 			if (errno == 0)
767 				errno = EIO;
768 			g_gate_log(LOG_ERR, "Disk error: %s", strerror(errno));
769 			req->r_error = errno;
770 			if (req->r_data != NULL) {
771 				free(req->r_data);
772 				req->r_data = NULL;
773 			}
774 		}
775 
776 		/*
777 		 * Put the request onto the outgoing queue.
778 		 */
779 		error = pthread_mutex_lock(&outqueue_mtx);
780 		assert(error == 0);
781 		TAILQ_INSERT_TAIL(&outqueue, req, r_next);
782 		error = pthread_cond_signal(&outqueue_cond);
783 		assert(error == 0);
784 		error = pthread_mutex_unlock(&outqueue_mtx);
785 		assert(error == 0);
786 	}
787 
788 	/* NOTREACHED */
789 	return (NULL);
790 }
791 
792 static void *
793 send_thread(void *arg)
794 {
795 	struct ggd_connection *conn;
796 	struct ggd_request *req;
797 	ssize_t data;
798 	int error, fd;
799 
800 	conn = arg;
801 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
802 	fd = conn->c_sendfd;
803 	for (;;) {
804 		/*
805 		 * Get a request from the outgoing queue.
806 		 */
807 		error = pthread_mutex_lock(&outqueue_mtx);
808 		assert(error == 0);
809 		while ((req = TAILQ_FIRST(&outqueue)) == NULL) {
810 			error = pthread_cond_wait(&outqueue_cond,
811 			    &outqueue_mtx);
812 			assert(error == 0);
813 		}
814 		TAILQ_REMOVE(&outqueue, req, r_next);
815 		error = pthread_mutex_unlock(&outqueue_mtx);
816 		assert(error == 0);
817 
818 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
819 		    __func__, req->r_offset, req->r_length);
820 
821 		/*
822 		 * Send the request.
823 		 */
824 		g_gate_swap2n_hdr(&req->r_hdr);
825 		if (g_gate_send(fd, &req->r_hdr, sizeof(req->r_hdr), 0) == -1) {
826 			g_gate_xlog("Error while sending hdr packet: %s.",
827 			    strerror(errno));
828 		}
829 		g_gate_log(LOG_DEBUG, "Sent hdr packet.");
830 		g_gate_swap2h_hdr(&req->r_hdr);
831 		if (req->r_data != NULL) {
832 			data = g_gate_send(fd, req->r_data, req->r_length, 0);
833 			if (data != (ssize_t)req->r_length) {
834 				g_gate_xlog("Error while sending data: %s.",
835 				    strerror(errno));
836 			}
837 			g_gate_log(LOG_DEBUG,
838 			    "Sent %zd bytes (offset=%" PRIu64 ", size=%" PRIu32
839 			    ").", data, req->r_offset, req->r_length);
840 			free(req->r_data);
841 		}
842 		free(req);
843 	}
844 
845 	/* NOTREACHED */
846 	return (NULL);
847 }
848 
849 static void
850 log_connection(struct sockaddr *from)
851 {
852 	in_addr_t ip;
853 
854 	ip = htonl(((struct sockaddr_in *)(void *)from)->sin_addr.s_addr);
855 	g_gate_log(LOG_INFO, "Connection from: %s.", ip2str(ip));
856 }
857 
858 static int
859 handshake(struct sockaddr *from, int sfd)
860 {
861 	struct g_gate_version ver;
862 	struct g_gate_cinit cinit;
863 	struct g_gate_sinit sinit;
864 	struct ggd_connection *conn;
865 	struct ggd_export *ex;
866 	ssize_t data;
867 
868 	log_connection(from);
869 	/*
870 	 * Phase 1: Version verification.
871 	 */
872 	g_gate_log(LOG_DEBUG, "Receiving version packet.");
873 	data = g_gate_recv(sfd, &ver, sizeof(ver), MSG_WAITALL);
874 	g_gate_swap2h_version(&ver);
875 	if (data != sizeof(ver)) {
876 		g_gate_log(LOG_WARNING, "Malformed version packet.");
877 		return (0);
878 	}
879 	g_gate_log(LOG_DEBUG, "Version packet received.");
880 	if (memcmp(ver.gv_magic, GGATE_MAGIC, strlen(GGATE_MAGIC)) != 0) {
881 		g_gate_log(LOG_WARNING, "Invalid magic field.");
882 		return (0);
883 	}
884 	if (ver.gv_version != GGATE_VERSION) {
885 		g_gate_log(LOG_WARNING, "Version %u is not supported.",
886 		    ver.gv_version);
887 		return (0);
888 	}
889 	ver.gv_error = 0;
890 	g_gate_swap2n_version(&ver);
891 	data = g_gate_send(sfd, &ver, sizeof(ver), 0);
892 	g_gate_swap2h_version(&ver);
893 	if (data == -1) {
894 		sendfail(sfd, errno, "Error while sending version packet: %s.",
895 		    strerror(errno));
896 		return (0);
897 	}
898 
899 	/*
900 	 * Phase 2: Request verification.
901 	 */
902 	g_gate_log(LOG_DEBUG, "Receiving initial packet.");
903 	data = g_gate_recv(sfd, &cinit, sizeof(cinit), MSG_WAITALL);
904 	g_gate_swap2h_cinit(&cinit);
905 	if (data != sizeof(cinit)) {
906 		g_gate_log(LOG_WARNING, "Malformed initial packet.");
907 		return (0);
908 	}
909 	g_gate_log(LOG_DEBUG, "Initial packet received.");
910 	conn = connection_find(&cinit);
911 	if (conn != NULL) {
912 		/*
913 		 * Connection should already exists.
914 		 */
915 		g_gate_log(LOG_DEBUG, "Found existing connection (token=%lu).",
916 		    (unsigned long)conn->c_token);
917 		if (connection_add(conn, &cinit, from, sfd) == -1) {
918 			connection_remove(conn);
919 			return (0);
920 		}
921 	} else {
922 		/*
923 		 * New connection, allocate space.
924 		 */
925 		conn = connection_new(&cinit, from, sfd);
926 		if (conn == NULL) {
927 			sendfail(sfd, ENOMEM,
928 			    "Cannot allocate new connection.");
929 			return (0);
930 		}
931 		g_gate_log(LOG_DEBUG, "New connection created (token=%lu).",
932 		    (unsigned long)conn->c_token);
933 	}
934 
935 	ex = exports_find(from, &cinit, conn);
936 	if (ex == NULL) {
937 		sendfail(sfd, errno, NULL);
938 		connection_remove(conn);
939 		return (0);
940 	}
941 	if (conn->c_mediasize == 0) {
942 		conn->c_mediasize = g_gate_mediasize(conn->c_diskfd);
943 		conn->c_sectorsize = g_gate_sectorsize(conn->c_diskfd);
944 	}
945 	sinit.gs_mediasize = conn->c_mediasize;
946 	sinit.gs_sectorsize = conn->c_sectorsize;
947 	sinit.gs_error = 0;
948 
949 	g_gate_log(LOG_DEBUG, "Sending initial packet.");
950 
951 	g_gate_swap2n_sinit(&sinit);
952 	data = g_gate_send(sfd, &sinit, sizeof(sinit), 0);
953 	g_gate_swap2h_sinit(&sinit);
954 	if (data == -1) {
955 		sendfail(sfd, errno, "Error while sending initial packet: %s.",
956 		    strerror(errno));
957 		return (0);
958 	}
959 
960 	if (connection_ready(conn)) {
961 		connection_launch(conn);
962 		connection_remove(conn);
963 	}
964 	return (1);
965 }
966 
967 static void
968 huphandler(int sig __unused)
969 {
970 
971 	got_sighup = 1;
972 }
973 
974 int
975 main(int argc, char *argv[])
976 {
977 	const char *ggated_pidfile = _PATH_VARRUN "/ggated.pid";
978 	struct pidfh *pfh;
979 	struct sockaddr_in serv;
980 	struct sockaddr from;
981 	socklen_t fromlen;
982 	pid_t otherpid;
983 	int ch, sfd, tmpsfd;
984 	unsigned port;
985 
986 	bindaddr = htonl(INADDR_ANY);
987 	port = G_GATE_PORT;
988 	while ((ch = getopt(argc, argv, "a:hnp:F:R:S:v")) != -1) {
989 		switch (ch) {
990 		case 'a':
991 			bindaddr = g_gate_str2ip(optarg);
992 			if (bindaddr == INADDR_NONE) {
993 				errx(EXIT_FAILURE,
994 				    "Invalid IP/host name to bind to.");
995 			}
996 			break;
997 		case 'F':
998 			ggated_pidfile = optarg;
999 			break;
1000 		case 'n':
1001 			nagle = 0;
1002 			break;
1003 		case 'p':
1004 			errno = 0;
1005 			port = strtoul(optarg, NULL, 10);
1006 			if (port == 0 && errno != 0)
1007 				errx(EXIT_FAILURE, "Invalid port.");
1008 			break;
1009 		case 'R':
1010 			errno = 0;
1011 			rcvbuf = strtoul(optarg, NULL, 10);
1012 			if (rcvbuf == 0 && errno != 0)
1013 				errx(EXIT_FAILURE, "Invalid rcvbuf.");
1014 			break;
1015 		case 'S':
1016 			errno = 0;
1017 			sndbuf = strtoul(optarg, NULL, 10);
1018 			if (sndbuf == 0 && errno != 0)
1019 				errx(EXIT_FAILURE, "Invalid sndbuf.");
1020 			break;
1021 		case 'v':
1022 			g_gate_verbose++;
1023 			break;
1024 		case 'h':
1025 		default:
1026 			usage();
1027 		}
1028 	}
1029 	argc -= optind;
1030 	argv += optind;
1031 
1032 	if (argv[0] != NULL)
1033 		exports_file = argv[0];
1034 	exports_get();
1035 
1036 	pfh = pidfile_open(ggated_pidfile, 0600, &otherpid);
1037 	if (pfh == NULL) {
1038 		if (errno == EEXIST) {
1039 			errx(EXIT_FAILURE, "Daemon already running, pid: %jd.",
1040 			    (intmax_t)otherpid);
1041 		}
1042 		err(EXIT_FAILURE, "Cannot open/create pidfile");
1043 	}
1044 
1045 	if (!g_gate_verbose) {
1046 		/* Run in daemon mode. */
1047 		if (daemon(0, 0) == -1)
1048 			g_gate_xlog("Cannot daemonize: %s", strerror(errno));
1049 	}
1050 
1051 	pidfile_write(pfh);
1052 
1053 	signal(SIGCHLD, SIG_IGN);
1054 
1055 	sfd = socket(AF_INET, SOCK_STREAM, 0);
1056 	if (sfd == -1)
1057 		g_gate_xlog("Cannot open stream socket: %s.", strerror(errno));
1058 	bzero(&serv, sizeof(serv));
1059 	serv.sin_family = AF_INET;
1060 	serv.sin_addr.s_addr = bindaddr;
1061 	serv.sin_port = htons(port);
1062 
1063 	g_gate_socket_settings(sfd);
1064 
1065 	if (bind(sfd, (struct sockaddr *)&serv, sizeof(serv)) == -1)
1066 		g_gate_xlog("bind(): %s.", strerror(errno));
1067 	if (listen(sfd, 5) == -1)
1068 		g_gate_xlog("listen(): %s.", strerror(errno));
1069 
1070 	g_gate_log(LOG_INFO, "Listen on port: %d.", port);
1071 
1072 	signal(SIGHUP, huphandler);
1073 
1074 	for (;;) {
1075 		fromlen = sizeof(from);
1076 		tmpsfd = accept(sfd, &from, &fromlen);
1077 		if (tmpsfd == -1)
1078 			g_gate_xlog("accept(): %s.", strerror(errno));
1079 
1080 		if (got_sighup) {
1081 			got_sighup = 0;
1082 			exports_get();
1083 		}
1084 
1085 		if (!handshake(&from, tmpsfd))
1086 			close(tmpsfd);
1087 	}
1088 	close(sfd);
1089 	pidfile_remove(pfh);
1090 	exit(EXIT_SUCCESS);
1091 }
1092