/*-
 * Copyright (c) 2018 Christos Zoulas
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Parse JSON object serialization format (RFC-7159)
 */

#ifndef TEST
#include "file.h"

#ifndef lint
FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
#endif

#include "magic.h"
#else
#include <stdio.h>
#include <stddef.h>
#endif
#include <string.h>

#ifdef DEBUG
#include <stdio.h>
#define DPRINTF(a, b, c)	\
    printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
	(int)(b - c), (const char *)(c))
#define __file_debugused
#else
#define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
#define __file_debugused __attribute__((__unused__))
#endif

#define JSON_ARRAY	0
#define JSON_CONSTANT	1
#define JSON_NUMBER	2
#define JSON_OBJECT	3
#define JSON_STRING	4
#define JSON_ARRAYN	5
#define JSON_MAX	6

/*
 * if JSON_COUNT != 0:
 *	count all the objects, require that we have the whole data file
 * otherwise:
 *	stop if we find an object or an array
 */
#ifndef JSON_COUNT
#define JSON_COUNT 0
#endif

static int json_parse(const unsigned char **, const unsigned char *, size_t *,
	size_t);

static int
json_isspace(const unsigned char uc)
{
	switch (uc) {
	case ' ':
	case '\n':
	case '\r':
	case '\t':
		return 1;
	default:
		return 0;
	}
}

static int
json_isdigit(unsigned char uc)
{
	switch (uc) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
		return 1;
	default:
		return 0;
	}
}

static int
json_isxdigit(unsigned char uc)
{
	if (json_isdigit(uc))
		return 1;
	switch (uc) {
	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
		return 1;
	default:
		return 0;
	}
}

static const unsigned char *
json_skip_space(const unsigned char *uc, const unsigned char *ue)
{
	while (uc < ue && json_isspace(*uc))
		uc++;
	return uc;
}

/*ARGSUSED*/
static int
json_parse_string(const unsigned char **ucp, const unsigned char *ue,
    size_t lvl __file_debugused)
{
	const unsigned char *uc = *ucp;
	size_t i;

	DPRINTF("Parse string: ", uc, *ucp);
	while (uc < ue) {
		switch (*uc++) {
		case '\0':
			goto out;
		case '\\':
			if (uc == ue)
				goto out;
			switch (*uc++) {
			case '\0':
				goto out;
			case '"':
			case '\\':
			case '/':
			case 'b':
			case 'f':
			case 'n':
			case 'r':
			case 't':
				continue;
			case 'u':
				if (ue - uc < 4) {
					uc = ue;
					goto out;
				}
				for (i = 0; i < 4; i++)
					if (!json_isxdigit(*uc++))
						goto out;
				continue;
			default:
				goto out;
			}
		case '"':
			DPRINTF("Good string: ", uc, *ucp);
			*ucp = uc;
			return 1;
		default:
			continue;
		}
	}
out:
	DPRINTF("Bad string: ", uc, *ucp);
	*ucp = uc;
	return 0;
}

static int
json_parse_array(const unsigned char **ucp, const unsigned char *ue,
	size_t *st, size_t lvl)
{
	const unsigned char *uc = *ucp;

	DPRINTF("Parse array: ", uc, *ucp);
	while (uc < ue) {
		uc = json_skip_space(uc, ue);
		if (uc == ue)
			goto out;
		if (*uc == ']')
			goto done;
		if (!json_parse(&uc, ue, st, lvl + 1))
			goto out;
		if (uc == ue)
			goto out;
		switch (*uc) {
		case ',':
			uc++;
			continue;
		case ']':
		done:
			st[JSON_ARRAYN]++;
			DPRINTF("Good array: ", uc, *ucp);
			*ucp = uc + 1;
			return 1;
		default:
			goto out;
		}
	}
out:
	DPRINTF("Bad array: ", uc,  *ucp);
	*ucp = uc;
	return 0;
}

static int
json_parse_object(const unsigned char **ucp, const unsigned char *ue,
	size_t *st, size_t lvl)
{
	const unsigned char *uc = *ucp;
	DPRINTF("Parse object: ", uc, *ucp);
	while (uc < ue) {
		uc = json_skip_space(uc, ue);
		if (uc == ue)
			goto out;
		if (*uc == '}') {
			uc++;
			goto done;
		}
		if (*uc++ != '"') {
			DPRINTF("not string", uc, *ucp);
			goto out;
		}
		DPRINTF("next field", uc, *ucp);
		if (!json_parse_string(&uc, ue, lvl)) {
			DPRINTF("not string", uc, *ucp);
			goto out;
		}
		uc = json_skip_space(uc, ue);
		if (uc == ue)
			goto out;
		if (*uc++ != ':') {
			DPRINTF("not colon", uc, *ucp);
			goto out;
		}
		if (!json_parse(&uc, ue, st, lvl + 1)) {
			DPRINTF("not json", uc, *ucp);
			goto out;
		}
		if (uc == ue)
			goto out;
		switch (*uc++) {
		case ',':
			continue;
		case '}': /* { */
		done:
			DPRINTF("Good object: ", uc, *ucp);
			*ucp = uc;
			return 1;
		default:
			DPRINTF("not more", uc, *ucp);
			*ucp = uc - 1;
			goto out;
		}
	}
out:
	DPRINTF("Bad object: ", uc, *ucp);
	*ucp = uc;
	return 0;
}

/*ARGSUSED*/
static int
json_parse_number(const unsigned char **ucp, const unsigned char *ue, 
    size_t lvl __file_debugused)
{
	const unsigned char *uc = *ucp;
	int got = 0;

	DPRINTF("Parse number: ", uc, *ucp);
	if (uc == ue)
		return 0;
	if (*uc == '-')
		uc++;

	for (; uc < ue; uc++) {
		if (!json_isdigit(*uc))
			break;
		got = 1;
	}
	if (uc == ue)
		goto out;
	if (*uc == '.')
		uc++;
	for (; uc < ue; uc++) {
		if (!json_isdigit(*uc))
			break;
		got = 1;
	}
	if (uc == ue)
		goto out;
	if (got && (*uc == 'e' || *uc == 'E')) {
		uc++;
		got = 0;
		if (uc == ue)
			goto out;
		if (*uc == '+' || *uc == '-')
			uc++;
		for (; uc < ue; uc++) {
			if (!json_isdigit(*uc))
				break;
			got = 1;
		}
	}
out:
	if (!got)
		DPRINTF("Bad number: ", uc, *ucp);
	else
		DPRINTF("Good number: ", uc, *ucp);
	*ucp = uc;
	return got;
}

/*ARGSUSED*/
static int
json_parse_const(const unsigned char **ucp, const unsigned char *ue,
    const char *str, size_t len, size_t lvl __file_debugused)
{
	const unsigned char *uc = *ucp;

	DPRINTF("Parse const: ", uc, *ucp);
	*ucp += --len - 1;
	if (*ucp > ue)
		*ucp = ue;
	for (; uc < ue && --len;) {
		if (*uc++ != *++str) {
			DPRINTF("Bad const: ", uc, *ucp);
			return 0;
		}
	}
	DPRINTF("Good const: ", uc, *ucp);
	return 1;
}

static int
json_parse(const unsigned char **ucp, const unsigned char *ue,
    size_t *st, size_t lvl)
{
	const unsigned char *uc, *ouc;
	int rv = 0;
	int t;

	ouc = uc = json_skip_space(*ucp, ue);
	if (uc == ue)
		goto out;

	// Avoid recursion
	if (lvl > 500) {
		DPRINTF("Too many levels", uc, *ucp);
		return 0;
	}
#if JSON_COUNT
	/* bail quickly if not counting */
	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
		return 1;
#endif

	DPRINTF("Parse general: ", uc, *ucp);
	switch (*uc++) {
	case '"':
		rv = json_parse_string(&uc, ue, lvl + 1);
		t = JSON_STRING;
		break;
	case '[':
		rv = json_parse_array(&uc, ue, st, lvl + 1);
		t = JSON_ARRAY;
		break;
	case '{': /* '}' */
		rv = json_parse_object(&uc, ue, st, lvl + 1);
		t = JSON_OBJECT;
		break;
	case 't':
		rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
		t = JSON_CONSTANT;
		break;
	case 'f':
		rv = json_parse_const(&uc, ue, "false", sizeof("false"),
		    lvl + 1);
		t = JSON_CONSTANT;
		break;
	case 'n':
		rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
		t = JSON_CONSTANT;
		break;
	default:
		--uc;
		rv = json_parse_number(&uc, ue, lvl + 1);
		t = JSON_NUMBER;
		break;
	}
	if (rv)
		st[t]++;
	uc = json_skip_space(uc, ue);
out:
	DPRINTF("End general: ", uc, *ucp);
	*ucp = uc;
	if (lvl == 0) {
		if (!rv)
			return 0;
		if (uc == ue)
			return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
		if (*ouc == *uc && json_parse(&uc, ue, st, 1))
			return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
		else
			return 0;
	}
	return rv;
}

#ifndef TEST
int
file_is_json(struct magic_set *ms, const struct buffer *b)
{
	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
	const unsigned char *ue = uc + b->flen;
	size_t st[JSON_MAX];
	int mime = ms->flags & MAGIC_MIME;
	int jt;


	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
		return 0;

	memset(st, 0, sizeof(st));

	if ((jt = json_parse(&uc, ue, st, 0)) == 0)
		return 0;

	if (mime == MAGIC_MIME_ENCODING)
		return 1;
	if (mime) {
		if (file_printf(ms, "application/%s",
		    jt == 1 ? "json" : "x-ndjson") == -1)
			return -1;
		return 1;
	}
	if (file_printf(ms, "%sJSON text data",
	    jt == 1 ? "" : "New Line Delimited ") == -1)
		return -1;
#if JSON_COUNT
#define P(n) st[n], st[n] > 1 ? "s" : ""
	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
	    "u >1array%s)",
	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
	    P(JSON_NUMBER), P(JSON_ARRAYN))
	    == -1)
		return -1;
#endif
	return 1;
}

#else

#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <err.h>

int
main(int argc, char *argv[])
{
	int fd;
	struct stat st;
	unsigned char *p;
	size_t stats[JSON_MAX];

	if ((fd = open(argv[1], O_RDONLY)) == -1)
		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);

	if (fstat(fd, &st) == -1)
		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);

	if ((p = CAST(char *, malloc(st.st_size))) == NULL)
		err(EXIT_FAILURE, "Can't allocate %jd bytes",
		    (intmax_t)st.st_size);
	if (read(fd, p, st.st_size) != st.st_size)
		err(EXIT_FAILURE, "Can't read %jd bytes",
		    (intmax_t)st.st_size);
	memset(stats, 0, sizeof(stats));
	printf("is json %d\n", json_parse((const unsigned char **)&p,
	    p + st.st_size, stats, 0));
	return 0;
}
#endif