/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 *
 *	Converts binary log files to CLF (Common Log Format).
 *
 */

#include <stdlib.h>
#include <unistd.h>
#include <strings.h>
#include <sys/types.h>
#include <fcntl.h>
#include <stdio.h>
#include <locale.h>
#include <errno.h>
#include <time.h>
#include <synch.h>
#include <syslog.h>

#ifndef	TRUE
#define	TRUE	1
#endif	/* TRUE */

#ifndef	FALSE
#define	FALSE	0
#endif	/* FALSE */

#include "ncadoorhdr.h"
#include "ncalogd.h"

extern char *gettext();

typedef	enum	{	/* Boolean type */
	false = 0,
	true  = 1
} bool;

static const char *const
g_method_strings[8] = {
	"UNKNOWN",
	"OPTIONS",
	"GET",
	"HEAD",
	"POST",
	"PUT",
	"DELETE",
	"TRACE"
};

/* Short month strings */
static const char * const sMonthStr [12] = {
	"Jan",
	"Feb",
	"Mar",
	"Apr",
	"May",
	"Jun",
	"Jul",
	"Aug",
	"Sep",
	"Oct",
	"Nov",
	"Dec",
};

#define	SEC_PER_MIN		(60)
#define	SEC_PER_HOUR		(60*60)
#define	SEC_PER_DAY		(24*60*60)
#define	SEC_PER_YEAR		(365*24*60*60)
#define	LEAP_TO_70		(70/4)

#define	KILO_BYTE		(1024)
#define	MEGA_BYTE		(KILO_BYTE * KILO_BYTE)
#define	GIGA_BYTE		(KILO_BYTE * MEGA_BYTE)

#define	CLF_DATE_BUF_LENGTH	(128)
#define	OUTFILE_BUF_SIZE	(256 * KILO_BYTE)

static bool	g_enable_directio = true;
static ssize_t	g_invalid_count = 0;
static ssize_t	g_skip_count = 0;
static char	*g_start_time_str = NULL;

/* init value must match logd & NCA kmod */
static ssize_t	g_n_log_upcall = 0;

/* input binary file was written in 64k chunks by default  */
static ssize_t	g_infile_blk_size = NCA_DEFAULT_LOG_BUF_SIZE;

/* num of output records, by default infinite */
static ssize_t	g_out_records = -1;

/* start time for log output, default none (i.e. output all) */
static struct tm g_start_time;

/*
 * http_version(version)
 *
 * Returns out the string of a given http version
 */

static char *
http_version(int http_ver)
{
	char	*ver_num;

	switch (http_ver) {
	case HTTP_0_9:
	case HTTP_0_0:
		ver_num = "HTTP/0.9";
		break;
	case HTTP_ERR:
	case HTTP_1_0:
		ver_num = "HTTP/1.0";
		break;
	case HTTP_1_1:
		ver_num = "HTTP/1.1";
		break;
	default:
		ver_num = "HTTP/unknown";
	}

	return (ver_num);
}

static bool
valid_version(int http_ver)
{
	switch (http_ver) {
	case HTTP_0_9:
	case HTTP_0_0:
	case HTTP_1_0:
	case HTTP_1_1:
		return (true);
	default:
		break;
	}

	return (false);
}

static bool
valid_method(int method)
{
	switch (method) {
	case NCA_OPTIONS:
	case NCA_GET:
	case NCA_HEAD:
	case NCA_POST:
	case NCA_PUT:
	case NCA_DELETE:
	case NCA_TRACE:
		return (true);
	default:
		break;
	}

	return (false);
}

/*
 * http_method
 *
 *   Returns the method string for the given method.
 */

static char *
http_method(int method)
{
	if (method < sizeof (g_method_strings) / sizeof (g_method_strings[0]))
		return ((char *)(g_method_strings[method]));
	else
		return ((char *)(g_method_strings[0]));
}

/* sMonth: Return short month string */

static const char *
sMonth(int index)
{
	return (sMonthStr[index]);
}

/*
 * Debug formatting routine.  Returns a character string representation of the
 * addr in buf, of the form xxx.xxx.xxx.xxx.  This routine takes the address
 * as a pointer.  The "xxx" parts including left zero padding so the final
 * string will fit easily in tables.  It would be nice to take a padding
 * length argument instead.
 */

static char *
ip_dot_saddr(uchar_t *addr, char *buf)
{
	(void) sprintf(buf, "%03d.%03d.%03d.%03d",
	    addr[0] & 0xFF, addr[1] & 0xFF, addr[2] & 0xFF, addr[3] & 0xFF);
	return (buf);
}

/*
 * Debug formatting routine.  Returns a character string representation of the
 * addr in buf, of the form xxx.xxx.xxx.xxx.  This routine takes the address
 * in the form of a ipaddr_t and calls ip_dot_saddr with a pointer.
 */

static char *
ip_dot_addr(ipaddr_t addr, char *buf)
{
	return (ip_dot_saddr((uchar_t *)&addr, buf));
}

static int
http_clf_date(char *buf, int bufsize, time_t t)
{
	struct tm	local_time;
	long		time_zone_info;
	char		sign;

	if (localtime_r(&t, &local_time) == NULL)
		return (0);

	if (g_start_time.tm_year > 0 &&
	    (local_time.tm_year < g_start_time.tm_year ||
	    (local_time.tm_year == g_start_time.tm_year &&
	    local_time.tm_mon < g_start_time.tm_mon ||
	    (local_time.tm_mon == g_start_time.tm_mon &&
	    local_time.tm_mday < g_start_time.tm_mday ||
	    (local_time.tm_mday == g_start_time.tm_mday &&
	    local_time.tm_hour < g_start_time.tm_hour ||
	    (local_time.tm_hour == g_start_time.tm_hour &&
	    local_time.tm_min < g_start_time.tm_min ||
	    (local_time.tm_min == g_start_time.tm_min &&
	    local_time.tm_sec < g_start_time.tm_sec))))))) {
		/* clf record before the specified start time */
		return (1);
	}

	if (local_time.tm_isdst)
		time_zone_info = -timezone + SEC_PER_HOUR;
	else
		time_zone_info = -timezone;

	if (time_zone_info < 0) {
		sign = '-';
		time_zone_info = -time_zone_info;
	} else {
		sign = '+';
	}

	(void) snprintf(buf, bufsize,
		"[%02d/%s/%04d:%02d:%02d:%02d %c%02ld%02ld]",
		local_time.tm_mday, sMonth(local_time.tm_mon),
		1900 + local_time.tm_year, local_time.tm_hour,
		local_time.tm_min, local_time.tm_sec,
		sign, time_zone_info / SEC_PER_HOUR,
		time_zone_info % SEC_PER_HOUR);

	return (0);
}

/*
 * xmalloc(size)
 * Abort if malloc fails
 */

static void *
xmalloc(size_t size)
{
	void *p;

	if (! size)
		size = 1;

	if ((p = malloc(size)) == NULL) {
		syslog(LOG_ERR, gettext("Error: ncab2clf: Out of memory\n"));
		abort();
	}

	return (p);
}

/*
 * xstrdup(string)
 *   duplicate string
 */

static char *
xstrdup(const char *string)
{
	char	*new_string;

	if (string) {
		new_string = xmalloc(strlen(string) + 1);
		(void) strcpy(new_string, string);

		return (new_string);
	}

	return (NULL);
}

static void
usage()
{
	(void) fprintf(stderr, gettext(
	    "\nncab2clf [-Dhv] [-b <block-size>] [-i <binary-log-file>] "
			"[-n <n>]\n"
		"    [-o <output-file>] [-s <date/time>]\n"
	    "\tconverts a NCA binary log file to HTTP CLF"
		" (Common Log Format)\n\n"
	    "\t-b <block-size>\n"
		"\t\tinput file blocking size in KB\n"
		"\t\t- default is 64K bytes\n"
	    "\t-D\tdisable directio on <output-file-name>\n"
	    "\t-h\tthis usage message\n"
	    "\t-i <binary-log-file>\n"
		"\t\tspecify input file\n"
	    "\t-n <n>\n"
		"\t\toutput <n> CLF records\n"
	    "\t-o <output-file>\n"
		"\t\tspecify output file\n"
	    "\t-s <date/time>\n"
		"\t\tskip any records before <date/time>\n"
		"\t\t- <date/time> may be in CLF format\n"
		"\t\t- <date/time> may be in time format as specified "
			"by touch(1)\n"
	    "\t-v\tverbose output\n"
	    "\tNote: if no <output-file> - output goes to standard output\n"
	    "\tNote: if no <binary-log-file> - input is taken from standard "
			"input\n"));

	exit(3);
}

/*
 * atoi_for2(p, value)
 *   - stores the numerical value of the two digit string p into value
 *   - return TRUE upon success and FALSE upon failure
 */

static int
atoi_for2(char *p, int *value) {

	*value = (*p - '0') * 10 + *(p+1) - '0';
	if ((*value < 0) || (*value > 99))
		return (FALSE);
	return (TRUE);
}

/*
 * parse_time(t, tm)
 *   - parses the string t to retrieve the UNIX time format as specified by
 *     touch(1).
 *   - return TRUE upon success and FALSE upon failure
 */

static int
parse_time(char *t, struct tm *tm)
{
	int		century = 0;
	int		seconds = 0;
	time_t		when;
	char		*p;

	/*
	 * time in the following format (defined by the touch(1) spec):
	 *	[[CC]YY]MMDDhhmm[.SS]
	 */
	if ((p = strchr(t, '.')) != NULL) {
		if (strchr(p+1, '.') != NULL)
			return (FALSE);
		if (!atoi_for2(p+1, &seconds))
			return (FALSE);
		*p = '\0';
	}

	when = time(0);
	bzero(tm, sizeof (struct tm));
	tm->tm_year = localtime(&when)->tm_year;

	switch (strlen(t)) {
		case 12:	/* CCYYMMDDhhmm */
			if (!atoi_for2(t, &century))
				return (FALSE);
			t += 2;
			/* FALLTHROUGH */
		case 10:	/* YYMMDDhhmm */
			if (!atoi_for2(t, &tm->tm_year))
				return (FALSE);
			t += 2;
			if (century == 0) {
				if (tm->tm_year < 69)
					tm->tm_year += 100;
			} else
				tm->tm_year += (century - 19) * 100;
			/* FALLTHROUGH */
		case 8:		/* MMDDhhmm */
			if (!atoi_for2(t, &tm->tm_mon))
				return (FALSE);
			tm->tm_mon--;
			t += 2;

			if (!atoi_for2(t, &tm->tm_mday))
				return (FALSE);
			t += 2;

			if (!atoi_for2(t, &tm->tm_hour))
				return (FALSE);
			t += 2;

			if (!atoi_for2(t, &tm->tm_min))
				return (FALSE);

			tm->tm_sec = seconds;
			break;
		default:
			return (FALSE);
	}

	return (TRUE);
}

static void
close_files(int ifd, int ofd)
{
	if (ifd != STDIN_FILENO)
		(void) close(ifd);

	if (ofd != STDOUT_FILENO)
		(void) close(ofd);
}

/*
 * Read the requested number of bytes from the given file descriptor
 */

static ssize_t
read_n_bytes(int fd, char *buf, ssize_t bufsize)
{
	ssize_t	num_to_read = bufsize;
	ssize_t	num_already_read = 0;
	ssize_t	i;

	while (num_to_read > 0) {

		i = read(fd, &(buf[num_already_read]), num_to_read);
		if (i < 0) {
			if (errno == EINTR)
				continue;
			else
				(void) fprintf(stderr, gettext(
				    "Error: ncab2clf: "
				    "reading input file: %s\n"),
				    strerror(errno));
				return (-1);	/* some wierd interrupt */
		}

		if (i == 0)
			break;

		num_already_read += i;
		num_to_read -= i;
	}

	return (num_already_read);
}

/*
 * Write the requested number of bytes to the given file descriptor
 */

static ssize_t
write_n_bytes(int fd, char *buf, ssize_t bufsize)
{
	ssize_t	num_to_write = bufsize;
	ssize_t	num_written = 0;
	ssize_t	i;

	while (num_to_write > 0) {

		i = write(fd, &(buf[num_written]), num_to_write);
		if (i < 0) {
			if (errno == EINTR)
				continue;
			else
				(void) fprintf(stderr, gettext(
				    "Error: ncab2clf: "
				    "writing output file: %s\n"),
				    strerror(errno));
				return (-1);	/* some wierd interrupt */
		}

		num_written += i;
		num_to_write -= i;
	}

	return (num_written);
}

/* do constraint checks and determine if it's a valid header */

static bool
is_valid_header(void *ibuf)
{
	nca_log_buf_hdr_t	*h;
	nca_log_stat_t		*s;

	h = (nca_log_buf_hdr_t *)ibuf;

	/* Do some validity checks on ibuf */

	if (((h->nca_loghdr).nca_version != NCA_LOG_VERSION1) ||
	    ((h->nca_loghdr).nca_op != log_op)) {
		return (false);
	}

	s = &(h->nca_logstats);

	if (g_n_log_upcall == 0) {
		g_n_log_upcall = s->n_log_upcall;
	} else {
		if ((++g_n_log_upcall) != (ssize_t)s->n_log_upcall) {
			(void) fprintf(stderr, gettext(
				"Warning: ncab2clf:"
				" expected record number (%d) is"
				" different from the one seen (%d)\n."
				" Resetting the expected record"
				" number.\n"), g_n_log_upcall, s->n_log_upcall);

			g_n_log_upcall = s->n_log_upcall;
		}
	}

	return (true);
}

/* convert input binary buffer into CLF */

static int
b2clf_buf(
	void	*ibuf,
	char	*obuf,
	ssize_t	isize,
	ssize_t	osize,
	ssize_t	*out_size)
{
	nca_log_buf_hdr_t	*h;
	nca_log_stat_t		*s;
	nca_request_log_t	*r;

	char	*br;
	void	*er;
	char	ip_buf[64];
	ssize_t	max_input_size, num_bytes_read;
	int	n_recs;
	bool	error_seen;

	ssize_t	count;
	char	clf_timebuf[CLF_DATE_BUF_LENGTH];
	char	*method;
	char	*http_version_string;
	char	*ruser;
	char	*req_url;
	char	*remote_ip;

	h = (nca_log_buf_hdr_t *)ibuf;
	s = &(h->nca_logstats);
	r = (nca_request_log_t *)(&(h[1]));

	/* OK, it's a valid buffer which we can use, go ahead and convert it */

	max_input_size = (ssize_t)isize - sizeof (nca_log_buf_hdr_t);

	*out_size = 0;
	error_seen = false;
	num_bytes_read = 0;
	for (n_recs = 0; n_recs < s->n_log_recs; n_recs++) {

		/* Make sure there is enough space in the output buffer */

		if ((*out_size >= osize) ||
				(num_bytes_read >= max_input_size)) {
			error_seen = true;
			break;
		}

		if (http_clf_date(clf_timebuf, sizeof (clf_timebuf),
		    ((time_t)r->start_process_time))) {
			/* A start time was speced and we're not there yet */
			++g_skip_count;
			goto skip;
		}

		/* Only logs valid HTTP ops */

		if ((! valid_method((int)r->method)) ||
				(! valid_version((int)r->version))) {
			++g_invalid_count;
			goto skip;
		}

		method = http_method((int)r->method);
		http_version_string = http_version((int)r->version);

		remote_ip = ip_dot_addr(r->remote_host, (char *)&ip_buf);
		if (r->remote_user_len) {
			ruser = NCA_REQLOG_RDATA(r, remote_user);
		} else {
			ruser = "-";
		}

		if (r->request_url_len) {
			req_url = NCA_REQLOG_RDATA(r, request_url);
		} else {
			req_url = "UNKNOWN";
		}

		count = (ssize_t)snprintf(&(obuf[*out_size]), osize - *out_size,
				"%s %s %s %s \"%s %s %s\" %d %d\n",
				((remote_ip) ? remote_ip : "-"),
				/* should be remote_log_name */
				"-",
				ruser,
				clf_timebuf,
				method,
				req_url,
				http_version_string,
				r->response_status,
				r->response_len);

		*out_size += count;
	skip:
		br = (char *)r;
		er = ((char *)r) + NCA_LOG_REC_SIZE(r);

		/*LINTED*/
		r = (nca_request_log_t *)NCA_LOG_ALIGN(er);
		num_bytes_read += (ssize_t)(((char *)r) - br);
		if (g_out_records > 0 && --g_out_records == 0)
			break;
	}

	if (error_seen) {
		(void) fprintf(stderr, gettext(
			"Error: ncab2clf: "
			"Input buffer not fully converted.\n"));

		if (n_recs != s->n_log_recs)
			(void) fprintf(stderr, gettext(
				"Warning: ncab2clf: "
				"Converted only %d of %d records\n"),
				n_recs, s->n_log_recs);
	}

	return (0);
}

static int
b2clf(int ifd, int ofd)
{
	char	*ibuf;
	char	*obuf;
	bool	error_seen;
	bool	eof_seen;
	ssize_t	num_iterations, ni, nh, no, olen;

	nca_log_buf_hdr_t	*h;
	nca_log_stat_t		*s;

	ibuf = xmalloc(g_infile_blk_size);
	obuf = xmalloc(OUTFILE_BUF_SIZE);
	error_seen = false;

	eof_seen = false;
	num_iterations = 0;
	while (! eof_seen && g_out_records != 0) {
		++num_iterations;

		nh = ni = no = 0;

		/* read the binary header first */
		nh = read_n_bytes(ifd, ibuf, sizeof (nca_log_buf_hdr_t));
		if (nh != sizeof (nca_log_buf_hdr_t)) {
			eof_seen = true;
			break;
		}

		if (! is_valid_header(ibuf)) {
			(void) fprintf(stderr, gettext(
			    "Error: ncab2clf: "
			    "Can't convert the input data to CLF\n"));
			continue;
		}

		/* read the data to be converted */
		/* LINTED */
		h = (nca_log_buf_hdr_t *)ibuf;
		s = &(h->nca_logstats);

		if (s->n_log_size == 0)
			continue;

		ni = read_n_bytes(ifd, &(ibuf[nh]), (ssize_t)s->n_log_size);
		if (ni < 0) {
			error_seen = true;
			break;
		} else if (ni < (ssize_t)s->n_log_size) {
			eof_seen = true;
		}

		if (ni == 0)
			break;

		/* convert binary input into text output */

		if (b2clf_buf(ibuf, obuf, ni + nh, OUTFILE_BUF_SIZE, &olen)) {
			(void) fprintf(stderr, gettext(
			    "Error: ncab2clf: "
			    "Can't convert the input data to CLF\n"));
			error_seen = true;
			break;
		}

		/* write out the text data */
		no = write_n_bytes(ofd, obuf, olen);
		if (no != olen) {
			error_seen = true;
			break;
		}

		bzero(ibuf, nh + ni);
		bzero(obuf, no);
	}

	free(ibuf);
	free(obuf);

	if (error_seen)
		return (-1);

	return (0);
}


int
main(int argc, char **argv)
{
	int	c;
	int	ifd;		/* input fd - binary log file */
	int	ofd;
	struct tm t;

	char	*infile = NULL;  /* input file name */
	char	*outfile = NULL; /* output file name */

	char	monstr[64];

	(void) setlocale(LC_ALL, "");

#if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
#define	TEXT_DOMAIN "SYS_TEST"
#endif

	(void) textdomain(TEXT_DOMAIN);

	/* parse any arguments */
	while ((c = getopt(argc, argv, "hvDi:o:b:n:s:")) != EOF) {
		switch (c) {
		case 'h':
			usage();
			break;
		case 'i':
			infile = xstrdup(optarg);
			break;
		case 'D':
			g_enable_directio = false;
			break;
		case 'o':
			outfile = xstrdup(optarg);
			break;
		case 'b':
			g_infile_blk_size = (KILO_BYTE * atoi(optarg));
			break;
		case 'n':
			g_out_records = atoi(optarg);
			break;
		case 's':
			g_start_time_str = strdup(optarg);
			bzero(&t, sizeof (t));
			if (sscanf(optarg, "%d/%3s/%d:%d:%d:%d", &t.tm_mday,
			    &monstr[0], &t.tm_year, &t.tm_hour, &t.tm_min,
			    &t.tm_sec) == 6) {
				/* Valid CLF time (e.g. 06/Apr/2001:09:14:14) */
				t.tm_mon = 0;
				do {
					if (strcasecmp(monstr,
					    sMonthStr[t.tm_mon]) == 0)
						break;
				} while (t.tm_mon++ < 12);
				t.tm_year -= 1900;
				g_start_time = t;
			} else if (parse_time(optarg, &t)) {
				g_start_time = t;
			} else {
				(void) fprintf(stderr,
				    gettext("Error: ncab2clf:"
				    " %s: unrecognized date/time.\n"),
				    optarg);
			}
			break;
		case 'v':
			(void) fprintf(stderr, gettext("Error: ncab2clf: "
			    "verbose functionality not yet supported\n"));
			exit(3);
			break;
		case '?':
			usage();
			break;
		}
	}

	/* set up the input stream */

	if (infile) {

		if ((ifd = open(infile, O_RDONLY)) < 0) {
			(void) fprintf(stderr,
				gettext("Error: ncab2clf: "
				"Failure to open binary log file %s: %s\n"),
				infile, strerror(errno));
			exit(1);
		}

	} else {
		ifd = STDIN_FILENO;
	}

	/* set up the output stream */

	if (outfile) {

		if ((ofd = open(outfile, O_WRONLY|O_CREAT, 0644)) < 0) {
			(void) fprintf(stderr, gettext(
			    "Error: ncab2clf: "
			    "Failure to open output file %s: %s\n"),
			    outfile, strerror(errno));
			exit(1);
		}

		/* Enable directio on output stream if specified */

		if (g_enable_directio)
			(void) directio(ofd, DIRECTIO_ON);

	} else {
		ofd = STDOUT_FILENO;
	}

	if ((b2clf(ifd, ofd) != 0)) {
		close_files(ifd, ofd);
		exit(2);
	}

	close_files(ifd, ofd);

	if (g_invalid_count) {
		(void) fprintf(stderr, gettext("Warning: ncab2clf: %d"
		" number of invalid log records encountered in binary input"
		" file were skipped\n"), g_invalid_count);
	}
	if (g_skip_count) {
		(void) fprintf(stderr, gettext("Warning: ncab2clf:"
		    " %d log records in binary input file before %s"
		    " were skipped\n"),
		    g_skip_count, g_start_time_str);
	}

	return (0);
}