xref: /freebsd/sbin/ggate/ggated/ggated.c (revision 59f5f100b774de8824fb2fc1a8a11a93bbc2dafd)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/bio.h>
31 #include <sys/disk.h>
32 #include <sys/endian.h>
33 #include <sys/ioctl.h>
34 #include <sys/queue.h>
35 #include <sys/socket.h>
36 #include <sys/stat.h>
37 #include <sys/time.h>
38 #include <arpa/inet.h>
39 #include <netinet/in.h>
40 #include <netinet/tcp.h>
41 #include <assert.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <inttypes.h>
45 #include <fcntl.h>
46 #include <libgen.h>
47 #include <libutil.h>
48 #include <paths.h>
49 #include <pthread.h>
50 #include <signal.h>
51 #include <stdarg.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <stdint.h>
55 #include <string.h>
56 #include <syslog.h>
57 #include <unistd.h>
58 
59 #include "ggate.h"
60 
61 
62 #define	GGATED_EXPORT_FILE	"/etc/gg.exports"
63 
64 struct ggd_connection {
65 	off_t		 c_mediasize;
66 	unsigned	 c_sectorsize;
67 	int		 c_flags;	/* flags (RO/RW) */
68 	int		 c_diskfd;
69 	int		 c_sendfd;
70 	int		 c_recvfd;
71 	time_t		 c_birthtime;
72 	char		*c_path;
73 	uint64_t	 c_token;
74 	in_addr_t	 c_srcip;
75 	LIST_ENTRY(ggd_connection) c_next;
76 };
77 
78 struct ggd_request {
79 	struct g_gate_hdr	 r_hdr;
80 	char			*r_data;
81 	TAILQ_ENTRY(ggd_request) r_next;
82 };
83 #define	r_cmd		r_hdr.gh_cmd
84 #define	r_offset	r_hdr.gh_offset
85 #define	r_length	r_hdr.gh_length
86 #define	r_error		r_hdr.gh_error
87 
88 #define EFLAGS_RDONLY	0x0000
89 #define EFLAGS_WRONLY	0x0001
90 #define EFLAGS_RDWR	0x0002
91 #define EFLAGS_ACCMODE	0x0003
92 #define EFLAGS_DIRECT	0x0004
93 #define EFLAGS_NODIRECT	0x0008
94 
95 struct ggd_export {
96 	char		*e_path;	/* path to device/file */
97 	in_addr_t	 e_ip;		/* remote IP address */
98 	in_addr_t	 e_mask;	/* IP mask */
99 	int		 e_flags;	/* flags (WO/RO/RW/DIRECT/NODIRECT) */
100 	SLIST_ENTRY(ggd_export) e_next;
101 };
102 
103 static const char *exports_file = GGATED_EXPORT_FILE;
104 static int got_sighup = 0;
105 static in_addr_t bindaddr;
106 
107 static TAILQ_HEAD(, ggd_request) inqueue = TAILQ_HEAD_INITIALIZER(inqueue);
108 static TAILQ_HEAD(, ggd_request) outqueue = TAILQ_HEAD_INITIALIZER(outqueue);
109 static pthread_mutex_t inqueue_mtx, outqueue_mtx;
110 static pthread_cond_t inqueue_cond, outqueue_cond;
111 
112 static SLIST_HEAD(, ggd_export) exports = SLIST_HEAD_INITIALIZER(exports);
113 static LIST_HEAD(, ggd_connection) connections = LIST_HEAD_INITIALIZER(connections);
114 
115 static void *recv_thread(void *arg);
116 static void *disk_thread(void *arg);
117 static void *send_thread(void *arg);
118 
119 static void
120 usage(void)
121 {
122 
123 	fprintf(stderr, "usage: %s [-nv] [-a address] [-F pidfile] [-p port] "
124 	    "[-R rcvbuf] [-S sndbuf] [exports file]\n", getprogname());
125 	exit(EXIT_FAILURE);
126 }
127 
128 static char *
129 ip2str(in_addr_t ip)
130 {
131 	static char sip[16];
132 
133 	snprintf(sip, sizeof(sip), "%u.%u.%u.%u",
134 	    ((ip >> 24) & 0xff),
135 	    ((ip >> 16) & 0xff),
136 	    ((ip >> 8) & 0xff),
137 	    (ip & 0xff));
138 	return (sip);
139 }
140 
141 static in_addr_t
142 countmask(unsigned m)
143 {
144 	in_addr_t mask;
145 
146 	if (m == 0) {
147 		mask = 0x0;
148 	} else {
149 		mask = 1 << (32 - m);
150 		mask--;
151 		mask = ~mask;
152 	}
153 	return (mask);
154 }
155 
156 static int
157 parse_flags(const char *flagsstr, int lineno)
158 {
159 	char *flagscpy;
160 	char *word, *brkf;
161 	int access_flags = -1;
162 	int direct_flags = 0;
163 
164 	flagscpy = strdup(flagsstr);
165 	if (flagscpy == NULL) {
166 		g_gate_xlog("Not enough memory.");
167 	}
168 
169 	for (word = strtok_r(flagscpy, ",", &brkf);
170 	     word != NULL;
171 	     word = strtok_r(NULL, ",", &brkf)) {
172 		if (strcasecmp("ro", word) == 0 ||
173 		    strcasecmp("rd", word) == 0) {
174 			access_flags = EFLAGS_RDONLY;
175 		} else if (strcasecmp("wo", word) == 0) {
176 			access_flags = EFLAGS_WRONLY;
177 		} else if (strcasecmp("rw", word) == 0) {
178 			access_flags = EFLAGS_RDWR;
179 		} else if (strcasecmp("direct", word) == 0) {
180 			direct_flags = EFLAGS_DIRECT;
181 		} else if (strcasecmp("nodirect", word) == 0) {
182 			direct_flags = EFLAGS_NODIRECT;
183 		} else {
184 			g_gate_xlog("Invalid value (%s) in flags field at "
185                             "line %u.", word, lineno);
186 		}
187 	}
188 	free(flagscpy);
189 	if (access_flags == -1) {
190 		g_gate_xlog("Invalid value (%s) in flags field at "
191 		    "line %u.", flagsstr, lineno);
192 	}
193 	return (direct_flags | access_flags);
194 }
195 
196 static void
197 line_parse(char *line, unsigned lineno)
198 {
199 	struct ggd_export *ex;
200 	char *word, *path, *sflags, *brkl;
201 	unsigned i, vmask;
202 	int flags;
203 	in_addr_t ip, mask;
204 
205 	ip = mask = flags = vmask = 0;
206 	path = NULL;
207 	sflags = NULL;
208 
209 	for (i = 0, word = strtok_r(line, " \t", &brkl); word != NULL;
210 	    i++, word = strtok_r(NULL, " \t", &brkl)) {
211 		switch (i) {
212 		case 0: /* IP address or host name */
213 			ip = g_gate_str2ip(strsep(&word, "/"));
214 			if (ip == INADDR_NONE) {
215 				g_gate_xlog("Invalid IP/host name at line %u.",
216 				    lineno);
217 			}
218 			ip = ntohl(ip);
219 			if (word == NULL)
220 				vmask = 32;
221 			else {
222 				errno = 0;
223 				vmask = strtoul(word, NULL, 10);
224 				if (vmask == 0 && errno != 0) {
225 					g_gate_xlog("Invalid IP mask value at "
226 					    "line %u.", lineno);
227 				}
228 				if ((unsigned)vmask > 32) {
229 					g_gate_xlog("Invalid IP mask value at line %u.",
230 					    lineno);
231 				}
232 			}
233 			mask = countmask(vmask);
234 			break;
235 		case 1:	/* flags */
236 			flags = parse_flags(word, lineno);
237 			sflags = word;
238 			break;
239 		case 2:	/* path */
240 			if (strlen(word) >= MAXPATHLEN) {
241 				g_gate_xlog("Path too long at line %u. ",
242 				    lineno);
243 			}
244 			path = word;
245 			break;
246 		default:
247 			g_gate_xlog("Too many arguments at line %u. ", lineno);
248 		}
249 	}
250 	if (i != 3)
251 		g_gate_xlog("Too few arguments at line %u.", lineno);
252 
253 	ex = malloc(sizeof(*ex));
254 	if (ex == NULL)
255 		g_gate_xlog("Not enough memory.");
256 	ex->e_path = strdup(path);
257 	if (ex->e_path == NULL)
258 		g_gate_xlog("Not enough memory.");
259 
260 	/* Made 'and' here. */
261 	ex->e_ip = (ip & mask);
262 	ex->e_mask = mask;
263 	ex->e_flags = flags;
264 
265 	SLIST_INSERT_HEAD(&exports, ex, e_next);
266 
267 	g_gate_log(LOG_DEBUG, "Added %s/%u %s %s to exports list.",
268 	    ip2str(ex->e_ip), vmask, path, sflags);
269 }
270 
271 static void
272 exports_clear(void)
273 {
274 	struct ggd_export *ex;
275 
276 	while (!SLIST_EMPTY(&exports)) {
277 		ex = SLIST_FIRST(&exports);
278 		SLIST_REMOVE_HEAD(&exports, e_next);
279 		free(ex);
280 	}
281 }
282 
283 #define	EXPORTS_LINE_SIZE	2048
284 static void
285 exports_get(void)
286 {
287 	char buf[EXPORTS_LINE_SIZE], *line;
288 	unsigned lineno = 0, objs = 0, len;
289 	FILE *fd;
290 
291 	exports_clear();
292 
293 	fd = fopen(exports_file, "r");
294 	if (fd == NULL) {
295 		g_gate_xlog("Cannot open exports file (%s): %s.", exports_file,
296 		    strerror(errno));
297 	}
298 
299 	g_gate_log(LOG_INFO, "Reading exports file (%s).", exports_file);
300 
301 	for (;;) {
302 		if (fgets(buf, sizeof(buf), fd) == NULL) {
303 			if (feof(fd))
304 				break;
305 
306 			g_gate_xlog("Error while reading exports file: %s.",
307 			    strerror(errno));
308 		}
309 
310 		/* Increase line count. */
311 		lineno++;
312 
313 		/* Skip spaces and tabs. */
314 		for (line = buf; *line == ' ' || *line == '\t'; ++line)
315 			;
316 
317 		/* Empty line, comment or empty line at the end of file. */
318 		if (*line == '\n' || *line == '#' || *line == '\0')
319 			continue;
320 
321 		len = strlen(line);
322 		if (line[len - 1] == '\n') {
323 			/* Remove new line char. */
324 			line[len - 1] = '\0';
325 		} else {
326 			if (!feof(fd))
327 				g_gate_xlog("Line %u too long.", lineno);
328 		}
329 
330 		line_parse(line, lineno);
331 		objs++;
332 	}
333 
334 	fclose(fd);
335 
336 	if (objs == 0)
337 		g_gate_xlog("There are no objects to export.");
338 
339 	g_gate_log(LOG_INFO, "Exporting %u object(s).", objs);
340 }
341 
342 static int
343 exports_check(struct ggd_export *ex, struct g_gate_cinit *cinit,
344     struct ggd_connection *conn)
345 {
346 	char ipmask[32]; /* 32 == strlen("xxx.xxx.xxx.xxx/xxx.xxx.xxx.xxx")+1 */
347 	int error = 0, flags, access_flags, direct_flags = 0;
348 
349 	strlcpy(ipmask, ip2str(ex->e_ip), sizeof(ipmask));
350 	strlcat(ipmask, "/", sizeof(ipmask));
351 	strlcat(ipmask, ip2str(ex->e_mask), sizeof(ipmask));
352 
353 	access_flags = ex->e_flags & EFLAGS_ACCMODE;
354 
355 	if ((cinit->gc_flags & GGATE_FLAG_RDONLY) != 0) {
356 		if (access_flags == EFLAGS_WRONLY) {
357 			g_gate_log(LOG_WARNING, "Read-only access requested, "
358 			    "but %s (%s) is exported write-only.", ex->e_path,
359 			    ipmask);
360 			return (EPERM);
361 		} else {
362 			conn->c_flags |= GGATE_FLAG_RDONLY;
363 		}
364 	} else if ((cinit->gc_flags & GGATE_FLAG_WRONLY) != 0) {
365 		if (access_flags == EFLAGS_RDONLY) {
366 			g_gate_log(LOG_WARNING, "Write-only access requested, "
367 			    "but %s (%s) is exported read-only.", ex->e_path,
368 			    ipmask);
369 			return (EPERM);
370 		} else {
371 			conn->c_flags |= GGATE_FLAG_WRONLY;
372 		}
373 	} else {
374 		if (access_flags == EFLAGS_RDONLY) {
375 			g_gate_log(LOG_WARNING, "Read-write access requested, "
376 			    "but %s (%s) is exported read-only.", ex->e_path,
377 			    ipmask);
378 			return (EPERM);
379 		} else if (access_flags == EFLAGS_WRONLY) {
380 			g_gate_log(LOG_WARNING, "Read-write access requested, "
381 			    "but %s (%s) is exported write-only.", ex->e_path,
382 			    ipmask);
383 			return (EPERM);
384 		}
385 	}
386 
387 	if ((cinit->gc_flags & GGATE_FLAG_DIRECT) != 0) {
388 		if (ex->e_flags & EFLAGS_NODIRECT) {
389 			g_gate_log(LOG_WARNING, "Direct IO requested, "
390 			    "but %s (%s) is exported NODIRECT.", ex->e_path,
391 			    ipmask);
392 		} else {
393 			conn->c_flags |= GGATE_FLAG_DIRECT;
394 			direct_flags = O_DIRECT;
395 		}
396 	}
397 
398 	if (ex->e_flags & EFLAGS_DIRECT) {
399 		direct_flags = O_DIRECT;
400 	}
401 
402 	if ((conn->c_flags & GGATE_FLAG_RDONLY) != 0)
403 		flags = O_RDONLY;
404 	else if ((conn->c_flags & GGATE_FLAG_WRONLY) != 0)
405 		flags = O_WRONLY;
406 	else
407 		flags = O_RDWR;
408 	flags |= direct_flags;
409 	if (conn->c_diskfd != -1) {
410 		if (strcmp(conn->c_path, ex->e_path) != 0) {
411 			g_gate_log(LOG_ERR, "old %s and new %s: "
412 			    "Path mismatch during handshakes.",
413 			    conn->c_path, ex->e_path);
414 			return (EPERM);
415 		}
416 		return (0);
417 	}
418 
419 	conn->c_diskfd = open(ex->e_path, flags);
420 	if (conn->c_diskfd == -1) {
421 		error = errno;
422 		g_gate_log(LOG_ERR, "Cannot open %s: %s.", ex->e_path,
423 		    strerror(error));
424 		return (error);
425 	}
426 	return (0);
427 }
428 
429 static struct ggd_export *
430 exports_find(struct sockaddr *s, struct g_gate_cinit *cinit,
431     struct ggd_connection *conn)
432 {
433 	struct ggd_export *ex;
434 	in_addr_t ip;
435 	int error;
436 
437 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
438 	SLIST_FOREACH(ex, &exports, e_next) {
439 		if ((ip & ex->e_mask) != ex->e_ip) {
440 			g_gate_log(LOG_DEBUG, "exports[%s]: IP mismatch.",
441 			    ex->e_path);
442 			continue;
443 		}
444 		if (strcmp(cinit->gc_path, ex->e_path) != 0) {
445 			g_gate_log(LOG_DEBUG, "exports[%s]: Path mismatch.",
446 			    ex->e_path);
447 			continue;
448 		}
449 		error = exports_check(ex, cinit, conn);
450 		if (error == 0)
451 			return (ex);
452 		else {
453 			errno = error;
454 			return (NULL);
455 		}
456 	}
457 	g_gate_log(LOG_WARNING, "Unauthorized connection from: %s.",
458 	    ip2str(ip));
459 	errno = EPERM;
460 	return (NULL);
461 }
462 
463 /*
464  * Remove timed out connections.
465  */
466 static void
467 connection_cleanups(void)
468 {
469 	struct ggd_connection *conn, *tconn;
470 	time_t now;
471 
472 	time(&now);
473 	LIST_FOREACH_SAFE(conn, &connections, c_next, tconn) {
474 		if (now - conn->c_birthtime > 10) {
475 			LIST_REMOVE(conn, c_next);
476 			g_gate_log(LOG_NOTICE,
477 			    "Connection from %s [%s] removed.",
478 			    ip2str(conn->c_srcip), conn->c_path);
479 			close(conn->c_diskfd);
480 			close(conn->c_sendfd);
481 			close(conn->c_recvfd);
482 			free(conn->c_path);
483 			free(conn);
484 		}
485 	}
486 }
487 
488 static struct ggd_connection *
489 connection_find(struct g_gate_cinit *cinit)
490 {
491 	struct ggd_connection *conn;
492 
493 	LIST_FOREACH(conn, &connections, c_next) {
494 		if (conn->c_token == cinit->gc_token)
495 			break;
496 	}
497 	return (conn);
498 }
499 
500 static struct ggd_connection *
501 connection_new(struct g_gate_cinit *cinit, struct sockaddr *s, int sfd)
502 {
503 	struct ggd_connection *conn;
504 	in_addr_t ip;
505 
506 	/*
507 	 * First, look for old connections.
508 	 * We probably should do it every X seconds, but what for?
509 	 * It is only dangerous if an attacker wants to overload connections
510 	 * queue, so here is a good place to do the cleanups.
511 	 */
512 	connection_cleanups();
513 
514 	conn = malloc(sizeof(*conn));
515 	if (conn == NULL)
516 		return (NULL);
517 	conn->c_path = strdup(cinit->gc_path);
518 	if (conn->c_path == NULL) {
519 		free(conn);
520 		return (NULL);
521 	}
522 	conn->c_token = cinit->gc_token;
523 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
524 	conn->c_srcip = ip;
525 	conn->c_diskfd = conn->c_sendfd = conn->c_recvfd = -1;
526 	if ((cinit->gc_flags & GGATE_FLAG_SEND) != 0)
527 		conn->c_sendfd = sfd;
528 	else
529 		conn->c_recvfd = sfd;
530 	conn->c_mediasize = 0;
531 	conn->c_sectorsize = 0;
532 	time(&conn->c_birthtime);
533 	conn->c_flags = cinit->gc_flags;
534 	LIST_INSERT_HEAD(&connections, conn, c_next);
535 	g_gate_log(LOG_DEBUG, "Connection created [%s, %s].", ip2str(ip),
536 	    conn->c_path);
537 	return (conn);
538 }
539 
540 static int
541 connection_add(struct ggd_connection *conn, struct g_gate_cinit *cinit,
542     struct sockaddr *s, int sfd)
543 {
544 	in_addr_t ip;
545 
546 	ip = htonl(((struct sockaddr_in *)(void *)s)->sin_addr.s_addr);
547 	if ((cinit->gc_flags & GGATE_FLAG_SEND) != 0) {
548 		if (conn->c_sendfd != -1) {
549 			g_gate_log(LOG_WARNING,
550 			    "Send socket already exists [%s, %s].", ip2str(ip),
551 			    conn->c_path);
552 			return (EEXIST);
553 		}
554 		conn->c_sendfd = sfd;
555 	} else {
556 		if (conn->c_recvfd != -1) {
557 			g_gate_log(LOG_WARNING,
558 			    "Receive socket already exists [%s, %s].",
559 			    ip2str(ip), conn->c_path);
560 			return (EEXIST);
561 		}
562 		conn->c_recvfd = sfd;
563 	}
564 	g_gate_log(LOG_DEBUG, "Connection added [%s, %s].", ip2str(ip),
565 	    conn->c_path);
566 	return (0);
567 }
568 
569 /*
570  * Remove one socket from the given connection or the whole
571  * connection if sfd == -1.
572  */
573 static void
574 connection_remove(struct ggd_connection *conn)
575 {
576 
577 	LIST_REMOVE(conn, c_next);
578 	g_gate_log(LOG_DEBUG, "Connection removed [%s %s].",
579 	    ip2str(conn->c_srcip), conn->c_path);
580 	if (conn->c_diskfd != -1)
581 		close(conn->c_diskfd);
582 	if (conn->c_sendfd != -1)
583 		close(conn->c_sendfd);
584 	if (conn->c_recvfd != -1)
585 		close(conn->c_recvfd);
586 	free(conn->c_path);
587 	free(conn);
588 }
589 
590 static int
591 connection_ready(struct ggd_connection *conn)
592 {
593 
594 	return (conn->c_sendfd != -1 && conn->c_recvfd != -1);
595 }
596 
597 static void
598 connection_launch(struct ggd_connection *conn)
599 {
600 	pthread_t td;
601 	int error, pid;
602 
603 	pid = fork();
604 	if (pid > 0)
605 		return;
606 	else if (pid == -1) {
607 		g_gate_log(LOG_ERR, "Cannot fork: %s.", strerror(errno));
608 		return;
609 	}
610 	g_gate_log(LOG_DEBUG, "Process created [%s].", conn->c_path);
611 
612 	/*
613 	 * Create condition variables and mutexes for in-queue and out-queue
614 	 * synchronization.
615 	 */
616 	error = pthread_mutex_init(&inqueue_mtx, NULL);
617 	if (error != 0) {
618 		g_gate_xlog("pthread_mutex_init(inqueue_mtx): %s.",
619 		    strerror(error));
620 	}
621 	error = pthread_cond_init(&inqueue_cond, NULL);
622 	if (error != 0) {
623 		g_gate_xlog("pthread_cond_init(inqueue_cond): %s.",
624 		    strerror(error));
625 	}
626 	error = pthread_mutex_init(&outqueue_mtx, NULL);
627 	if (error != 0) {
628 		g_gate_xlog("pthread_mutex_init(outqueue_mtx): %s.",
629 		    strerror(error));
630 	}
631 	error = pthread_cond_init(&outqueue_cond, NULL);
632 	if (error != 0) {
633 		g_gate_xlog("pthread_cond_init(outqueue_cond): %s.",
634 		    strerror(error));
635 	}
636 
637 	/*
638 	 * Create threads:
639 	 * recvtd - thread for receiving I/O request
640 	 * diskio - thread for doing I/O request
641 	 * sendtd - thread for sending I/O requests back
642 	 */
643 	error = pthread_create(&td, NULL, send_thread, conn);
644 	if (error != 0) {
645 		g_gate_xlog("pthread_create(send_thread): %s.",
646 		    strerror(error));
647 	}
648 	error = pthread_create(&td, NULL, recv_thread, conn);
649 	if (error != 0) {
650 		g_gate_xlog("pthread_create(recv_thread): %s.",
651 		    strerror(error));
652 	}
653 	disk_thread(conn);
654 }
655 
656 static void
657 sendfail(int sfd, int error, const char *fmt, ...)
658 {
659 	struct g_gate_sinit sinit;
660 	va_list ap;
661 	ssize_t data;
662 
663 	memset(&sinit, 0, sizeof(sinit));
664 	sinit.gs_error = error;
665 	g_gate_swap2n_sinit(&sinit);
666 	data = g_gate_send(sfd, &sinit, sizeof(sinit), 0);
667 	g_gate_swap2h_sinit(&sinit);
668 	if (data != sizeof(sinit)) {
669 		g_gate_log(LOG_WARNING, "Cannot send initial packet: %s.",
670 		    strerror(errno));
671 		return;
672 	}
673 	if (fmt != NULL) {
674 		va_start(ap, fmt);
675 		g_gate_vlog(LOG_WARNING, fmt, ap);
676 		va_end(ap);
677 	}
678 }
679 
680 static void *
681 malloc_waitok(size_t size)
682 {
683 	void *p;
684 
685 	while ((p = malloc(size)) == NULL) {
686 		g_gate_log(LOG_DEBUG, "Cannot allocate %zu bytes.", size);
687 		sleep(1);
688 	}
689 	return (p);
690 }
691 
692 static void *
693 recv_thread(void *arg)
694 {
695 	struct ggd_connection *conn;
696 	struct ggd_request *req;
697 	ssize_t data;
698 	int error, fd;
699 
700 	conn = arg;
701 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
702 	fd = conn->c_recvfd;
703 	for (;;) {
704 		/*
705 		 * Get header packet.
706 		 */
707 		req = malloc_waitok(sizeof(*req));
708 		data = g_gate_recv(fd, &req->r_hdr, sizeof(req->r_hdr),
709 		    MSG_WAITALL);
710 		if (data == 0) {
711 			g_gate_log(LOG_DEBUG, "Process %u exiting.", getpid());
712 			exit(EXIT_SUCCESS);
713 		} else if (data == -1) {
714 			g_gate_xlog("Error while receiving hdr packet: %s.",
715 			    strerror(errno));
716 		} else if (data != sizeof(req->r_hdr)) {
717 			g_gate_xlog("Malformed hdr packet received.");
718 		}
719 		g_gate_log(LOG_DEBUG, "Received hdr packet.");
720 		g_gate_swap2h_hdr(&req->r_hdr);
721 
722 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
723 		    __func__, req->r_offset, req->r_length);
724 
725 		/*
726 		 * Allocate memory for data.
727 		 */
728 		req->r_data = malloc_waitok(req->r_length);
729 
730 		/*
731 		 * Receive data to write for WRITE request.
732 		 */
733 		if (req->r_cmd == GGATE_CMD_WRITE) {
734 			g_gate_log(LOG_DEBUG, "Waiting for %u bytes of data...",
735 			    req->r_length);
736 			data = g_gate_recv(fd, req->r_data, req->r_length,
737 			    MSG_WAITALL);
738 			if (data == -1) {
739 				g_gate_xlog("Error while receiving data: %s.",
740 				    strerror(errno));
741 			}
742 		}
743 
744 		/*
745 		 * Put the request onto the incoming queue.
746 		 */
747 		error = pthread_mutex_lock(&inqueue_mtx);
748 		assert(error == 0);
749 		TAILQ_INSERT_TAIL(&inqueue, req, r_next);
750 		error = pthread_cond_signal(&inqueue_cond);
751 		assert(error == 0);
752 		error = pthread_mutex_unlock(&inqueue_mtx);
753 		assert(error == 0);
754 	}
755 }
756 
757 static void *
758 disk_thread(void *arg)
759 {
760 	struct ggd_connection *conn;
761 	struct ggd_request *req;
762 	ssize_t data;
763 	int error, fd;
764 
765 	conn = arg;
766 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
767 	fd = conn->c_diskfd;
768 	for (;;) {
769 		/*
770 		 * Get a request from the incoming queue.
771 		 */
772 		error = pthread_mutex_lock(&inqueue_mtx);
773 		assert(error == 0);
774 		while ((req = TAILQ_FIRST(&inqueue)) == NULL) {
775 			error = pthread_cond_wait(&inqueue_cond, &inqueue_mtx);
776 			assert(error == 0);
777 		}
778 		TAILQ_REMOVE(&inqueue, req, r_next);
779 		error = pthread_mutex_unlock(&inqueue_mtx);
780 		assert(error == 0);
781 
782 		/*
783 		 * Check the request.
784 		 */
785 		assert(req->r_offset + req->r_length <= (uintmax_t)conn->c_mediasize);
786 		assert((req->r_offset % conn->c_sectorsize) == 0);
787 		assert((req->r_length % conn->c_sectorsize) == 0);
788 
789 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
790 		     __func__, req->r_offset, req->r_length);
791 
792 		/*
793 		 * Do the request.
794 		 */
795 		data = 0;
796 		switch (req->r_cmd) {
797 		case GGATE_CMD_READ:
798 			data = pread(fd, req->r_data, req->r_length,
799 			    req->r_offset);
800 			break;
801 		case GGATE_CMD_WRITE:
802 			data = pwrite(fd, req->r_data, req->r_length,
803 			    req->r_offset);
804 			/* Free data memory here - better sooner. */
805 			free(req->r_data);
806 			req->r_data = NULL;
807 			break;
808 		case GGATE_CMD_FLUSH:
809 			data = fsync(fd);
810 			if (data != 0)
811 				req->r_error = errno;
812 			break;
813 		default:
814 			g_gate_log(LOG_DEBUG, "Unsupported request: %i", req->r_cmd);
815 			req->r_error = EOPNOTSUPP;
816 			if (req->r_data != NULL) {
817 				free(req->r_data);
818 				req->r_data = NULL;
819 			}
820 			break;
821 		}
822 		if (data != (ssize_t)req->r_length) {
823 			/* Report short reads/writes as I/O errors. */
824 			if (errno == 0)
825 				errno = EIO;
826 			g_gate_log(LOG_ERR, "Disk error: %s", strerror(errno));
827 			req->r_error = errno;
828 			if (req->r_data != NULL) {
829 				free(req->r_data);
830 				req->r_data = NULL;
831 			}
832 		}
833 
834 		/*
835 		 * Put the request onto the outgoing queue.
836 		 */
837 		error = pthread_mutex_lock(&outqueue_mtx);
838 		assert(error == 0);
839 		TAILQ_INSERT_TAIL(&outqueue, req, r_next);
840 		error = pthread_cond_signal(&outqueue_cond);
841 		assert(error == 0);
842 		error = pthread_mutex_unlock(&outqueue_mtx);
843 		assert(error == 0);
844 	}
845 
846 	/* NOTREACHED */
847 	return (NULL);
848 }
849 
850 static void *
851 send_thread(void *arg)
852 {
853 	struct ggd_connection *conn;
854 	struct ggd_request *req;
855 	ssize_t data;
856 	int error, fd;
857 
858 	conn = arg;
859 	g_gate_log(LOG_NOTICE, "%s: started [%s]!", __func__, conn->c_path);
860 	fd = conn->c_sendfd;
861 	for (;;) {
862 		/*
863 		 * Get a request from the outgoing queue.
864 		 */
865 		error = pthread_mutex_lock(&outqueue_mtx);
866 		assert(error == 0);
867 		while ((req = TAILQ_FIRST(&outqueue)) == NULL) {
868 			error = pthread_cond_wait(&outqueue_cond,
869 			    &outqueue_mtx);
870 			assert(error == 0);
871 		}
872 		TAILQ_REMOVE(&outqueue, req, r_next);
873 		error = pthread_mutex_unlock(&outqueue_mtx);
874 		assert(error == 0);
875 
876 		g_gate_log(LOG_DEBUG, "%s: offset=%" PRIu64 " length=%" PRIu32,
877 		    __func__, req->r_offset, req->r_length);
878 
879 		/*
880 		 * Send the request.
881 		 */
882 		g_gate_swap2n_hdr(&req->r_hdr);
883 		if (g_gate_send(fd, &req->r_hdr, sizeof(req->r_hdr), 0) == -1) {
884 			g_gate_xlog("Error while sending hdr packet: %s.",
885 			    strerror(errno));
886 		}
887 		g_gate_log(LOG_DEBUG, "Sent hdr packet.");
888 		g_gate_swap2h_hdr(&req->r_hdr);
889 		if (req->r_data != NULL) {
890 			data = g_gate_send(fd, req->r_data, req->r_length, 0);
891 			if (data != (ssize_t)req->r_length) {
892 				g_gate_xlog("Error while sending data: %s.",
893 				    strerror(errno));
894 			}
895 			g_gate_log(LOG_DEBUG,
896 			    "Sent %zd bytes (offset=%" PRIu64 ", size=%" PRIu32
897 			    ").", data, req->r_offset, req->r_length);
898 			free(req->r_data);
899 		}
900 		free(req);
901 	}
902 
903 	/* NOTREACHED */
904 	return (NULL);
905 }
906 
907 static void
908 log_connection(struct sockaddr *from)
909 {
910 	in_addr_t ip;
911 
912 	ip = htonl(((struct sockaddr_in *)(void *)from)->sin_addr.s_addr);
913 	g_gate_log(LOG_INFO, "Connection from: %s.", ip2str(ip));
914 }
915 
916 static int
917 handshake(struct sockaddr *from, int sfd)
918 {
919 	struct g_gate_version ver;
920 	struct g_gate_cinit cinit;
921 	struct g_gate_sinit sinit;
922 	struct ggd_connection *conn;
923 	struct ggd_export *ex;
924 	ssize_t data;
925 
926 	log_connection(from);
927 	/*
928 	 * Phase 1: Version verification.
929 	 */
930 	g_gate_log(LOG_DEBUG, "Receiving version packet.");
931 	data = g_gate_recv(sfd, &ver, sizeof(ver), MSG_WAITALL);
932 	g_gate_swap2h_version(&ver);
933 	if (data != sizeof(ver)) {
934 		g_gate_log(LOG_WARNING, "Malformed version packet.");
935 		return (0);
936 	}
937 	g_gate_log(LOG_DEBUG, "Version packet received.");
938 	if (memcmp(ver.gv_magic, GGATE_MAGIC, strlen(GGATE_MAGIC)) != 0) {
939 		g_gate_log(LOG_WARNING, "Invalid magic field.");
940 		return (0);
941 	}
942 	if (ver.gv_version != GGATE_VERSION) {
943 		g_gate_log(LOG_WARNING, "Version %u is not supported.",
944 		    ver.gv_version);
945 		return (0);
946 	}
947 	ver.gv_error = 0;
948 	g_gate_swap2n_version(&ver);
949 	data = g_gate_send(sfd, &ver, sizeof(ver), 0);
950 	g_gate_swap2h_version(&ver);
951 	if (data == -1) {
952 		sendfail(sfd, errno, "Error while sending version packet: %s.",
953 		    strerror(errno));
954 		return (0);
955 	}
956 
957 	/*
958 	 * Phase 2: Request verification.
959 	 */
960 	g_gate_log(LOG_DEBUG, "Receiving initial packet.");
961 	data = g_gate_recv(sfd, &cinit, sizeof(cinit), MSG_WAITALL);
962 	g_gate_swap2h_cinit(&cinit);
963 	if (data != sizeof(cinit)) {
964 		g_gate_log(LOG_WARNING, "Malformed initial packet.");
965 		return (0);
966 	}
967 	g_gate_log(LOG_DEBUG, "Initial packet received.");
968 	conn = connection_find(&cinit);
969 	if (conn != NULL) {
970 		/*
971 		 * Connection should already exists.
972 		 */
973 		g_gate_log(LOG_DEBUG, "Found existing connection (token=%lu).",
974 		    (unsigned long)conn->c_token);
975 		if (connection_add(conn, &cinit, from, sfd) == -1) {
976 			connection_remove(conn);
977 			return (0);
978 		}
979 	} else {
980 		/*
981 		 * New connection, allocate space.
982 		 */
983 		conn = connection_new(&cinit, from, sfd);
984 		if (conn == NULL) {
985 			sendfail(sfd, ENOMEM,
986 			    "Cannot allocate new connection.");
987 			return (0);
988 		}
989 		g_gate_log(LOG_DEBUG, "New connection created (token=%lu).",
990 		    (unsigned long)conn->c_token);
991 	}
992 
993 	ex = exports_find(from, &cinit, conn);
994 	if (ex == NULL) {
995 		sendfail(sfd, errno, NULL);
996 		connection_remove(conn);
997 		return (0);
998 	}
999 	if (conn->c_mediasize == 0) {
1000 		conn->c_mediasize = g_gate_mediasize(conn->c_diskfd);
1001 		conn->c_sectorsize = g_gate_sectorsize(conn->c_diskfd);
1002 	}
1003 	sinit.gs_mediasize = conn->c_mediasize;
1004 	sinit.gs_sectorsize = conn->c_sectorsize;
1005 	sinit.gs_error = 0;
1006 
1007 	g_gate_log(LOG_DEBUG, "Sending initial packet.");
1008 
1009 	g_gate_swap2n_sinit(&sinit);
1010 	data = g_gate_send(sfd, &sinit, sizeof(sinit), 0);
1011 	g_gate_swap2h_sinit(&sinit);
1012 	if (data == -1) {
1013 		sendfail(sfd, errno, "Error while sending initial packet: %s.",
1014 		    strerror(errno));
1015 		return (0);
1016 	}
1017 
1018 	if (connection_ready(conn)) {
1019 		connection_launch(conn);
1020 		connection_remove(conn);
1021 	}
1022 	return (1);
1023 }
1024 
1025 static void
1026 huphandler(int sig __unused)
1027 {
1028 
1029 	got_sighup = 1;
1030 }
1031 
1032 int
1033 main(int argc, char *argv[])
1034 {
1035 	const char *ggated_pidfile = _PATH_VARRUN "/ggated.pid";
1036 	struct pidfh *pfh;
1037 	struct sockaddr_in serv;
1038 	struct sockaddr from;
1039 	socklen_t fromlen;
1040 	pid_t otherpid;
1041 	int ch, sfd, tmpsfd;
1042 	unsigned port;
1043 
1044 	bindaddr = htonl(INADDR_ANY);
1045 	port = G_GATE_PORT;
1046 	while ((ch = getopt(argc, argv, "a:hnp:F:R:S:v")) != -1) {
1047 		switch (ch) {
1048 		case 'a':
1049 			bindaddr = g_gate_str2ip(optarg);
1050 			if (bindaddr == INADDR_NONE) {
1051 				errx(EXIT_FAILURE,
1052 				    "Invalid IP/host name to bind to.");
1053 			}
1054 			break;
1055 		case 'F':
1056 			ggated_pidfile = optarg;
1057 			break;
1058 		case 'n':
1059 			nagle = 0;
1060 			break;
1061 		case 'p':
1062 			errno = 0;
1063 			port = strtoul(optarg, NULL, 10);
1064 			if (port == 0 && errno != 0)
1065 				errx(EXIT_FAILURE, "Invalid port.");
1066 			break;
1067 		case 'R':
1068 			errno = 0;
1069 			rcvbuf = strtoul(optarg, NULL, 10);
1070 			if (rcvbuf == 0 && errno != 0)
1071 				errx(EXIT_FAILURE, "Invalid rcvbuf.");
1072 			break;
1073 		case 'S':
1074 			errno = 0;
1075 			sndbuf = strtoul(optarg, NULL, 10);
1076 			if (sndbuf == 0 && errno != 0)
1077 				errx(EXIT_FAILURE, "Invalid sndbuf.");
1078 			break;
1079 		case 'v':
1080 			g_gate_verbose++;
1081 			break;
1082 		case 'h':
1083 		default:
1084 			usage();
1085 		}
1086 	}
1087 	argc -= optind;
1088 	argv += optind;
1089 
1090 	if (argv[0] != NULL)
1091 		exports_file = argv[0];
1092 	exports_get();
1093 
1094 	pfh = pidfile_open(ggated_pidfile, 0600, &otherpid);
1095 	if (pfh == NULL) {
1096 		if (errno == EEXIST) {
1097 			errx(EXIT_FAILURE, "Daemon already running, pid: %jd.",
1098 			    (intmax_t)otherpid);
1099 		}
1100 		err(EXIT_FAILURE, "Cannot open/create pidfile");
1101 	}
1102 
1103 	if (!g_gate_verbose) {
1104 		/* Run in daemon mode. */
1105 		if (daemon(0, 0) == -1)
1106 			g_gate_xlog("Cannot daemonize: %s", strerror(errno));
1107 	}
1108 
1109 	pidfile_write(pfh);
1110 
1111 	signal(SIGCHLD, SIG_IGN);
1112 
1113 	sfd = socket(AF_INET, SOCK_STREAM, 0);
1114 	if (sfd == -1)
1115 		g_gate_xlog("Cannot open stream socket: %s.", strerror(errno));
1116 	bzero(&serv, sizeof(serv));
1117 	serv.sin_family = AF_INET;
1118 	serv.sin_addr.s_addr = bindaddr;
1119 	serv.sin_port = htons(port);
1120 
1121 	g_gate_socket_settings(sfd);
1122 
1123 	if (bind(sfd, (struct sockaddr *)&serv, sizeof(serv)) == -1)
1124 		g_gate_xlog("bind(): %s.", strerror(errno));
1125 	if (listen(sfd, 5) == -1)
1126 		g_gate_xlog("listen(): %s.", strerror(errno));
1127 
1128 	g_gate_log(LOG_INFO, "Listen on port: %d.", port);
1129 
1130 	signal(SIGHUP, huphandler);
1131 
1132 	for (;;) {
1133 		fromlen = sizeof(from);
1134 		tmpsfd = accept(sfd, &from, &fromlen);
1135 		if (tmpsfd == -1)
1136 			g_gate_xlog("accept(): %s.", strerror(errno));
1137 
1138 		if (got_sighup) {
1139 			got_sighup = 0;
1140 			exports_get();
1141 		}
1142 
1143 		if (!handshake(&from, tmpsfd))
1144 			close(tmpsfd);
1145 	}
1146 	close(sfd);
1147 	pidfile_remove(pfh);
1148 	exit(EXIT_SUCCESS);
1149 }
1150