xref: /freebsd/contrib/file/src/is_json.c (revision f5b7695d2d5abd735064870ad43f4b9c723940c1)
1 /*-
2  * Copyright (c) 2018 Christos Zoulas
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * Parse JSON object serialization format (RFC-7159)
29  */
30 
31 #ifndef TEST
32 #include "file.h"
33 
34 #ifndef lint
35 FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $")
36 #endif
37 
38 #include <string.h>
39 #include "magic.h"
40 #endif
41 
42 #ifdef DEBUG
43 #include <stdio.h>
44 #define DPRINTF(a, b, c)	\
45     printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
46 #else
47 #define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
48 #endif
49 
50 #define JSON_ARRAY	0
51 #define JSON_CONSTANT	1
52 #define JSON_NUMBER	2
53 #define JSON_OBJECT	3
54 #define JSON_STRING	4
55 #define JSON_ARRAYN	5
56 #define JSON_MAX	6
57 
58 /*
59  * if JSON_COUNT != 0:
60  *	count all the objects, require that we have the whole data file
61  * otherwise:
62  *	stop if we find an object or an array
63  */
64 #ifndef JSON_COUNT
65 #define JSON_COUNT 0
66 #endif
67 
68 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
69 	size_t);
70 
71 static int
72 json_isspace(const unsigned char uc)
73 {
74 	switch (uc) {
75 	case ' ':
76 	case '\n':
77 	case '\r':
78 	case '\t':
79 		return 1;
80 	default:
81 		return 0;
82 	}
83 }
84 
85 static int
86 json_isdigit(unsigned char uc)
87 {
88 	switch (uc) {
89 	case '0': case '1': case '2': case '3': case '4':
90 	case '5': case '6': case '7': case '8': case '9':
91 		return 1;
92 	default:
93 		return 0;
94 	}
95 }
96 
97 static int
98 json_isxdigit(unsigned char uc)
99 {
100 	if (json_isdigit(uc))
101 		return 1;
102 	switch (uc) {
103 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
104 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
105 		return 1;
106 	default:
107 		return 0;
108 	}
109 }
110 
111 static const unsigned char *
112 json_skip_space(const unsigned char *uc, const unsigned char *ue)
113 {
114 	while (uc < ue && json_isspace(*uc))
115 		uc++;
116 	return uc;
117 }
118 
119 static int
120 json_parse_string(const unsigned char **ucp, const unsigned char *ue)
121 {
122 	const unsigned char *uc = *ucp;
123 	size_t i;
124 
125 	DPRINTF("Parse string: ", uc, *ucp);
126 	while (uc < ue) {
127 		switch (*uc++) {
128 		case '\0':
129 			goto out;
130 		case '\\':
131 			if (uc == ue)
132 				goto out;
133 			switch (*uc++) {
134 			case '\0':
135 				goto out;
136 			case '"':
137 			case '\\':
138 			case '/':
139 			case 'b':
140 			case 'f':
141 			case 'n':
142 			case 'r':
143 			case 't':
144 				continue;
145 			case 'u':
146 				if (ue - uc < 4) {
147 					uc = ue;
148 					goto out;
149 				}
150 				for (i = 0; i < 4; i++)
151 					if (!json_isxdigit(*uc++))
152 						goto out;
153 				continue;
154 			default:
155 				goto out;
156 			}
157 		case '"':
158 			*ucp = uc;
159 			return 1;
160 		default:
161 			continue;
162 		}
163 	}
164 out:
165 	DPRINTF("Bad string: ", uc, *ucp);
166 	*ucp = uc;
167 	return 0;
168 }
169 
170 static int
171 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
172 	size_t *st, size_t lvl)
173 {
174 	const unsigned char *uc = *ucp;
175 	int more = 0;	/* Array has more than 1 element */
176 
177 	DPRINTF("Parse array: ", uc, *ucp);
178 	while (uc < ue) {
179 		if (!json_parse(&uc, ue, st, lvl + 1))
180 			goto out;
181 		if (uc == ue)
182 			goto out;
183 		switch (*uc) {
184 		case ',':
185 			more++;
186 			uc++;
187 			continue;
188 		case ']':
189 			if (more)
190 				st[JSON_ARRAYN]++;
191 			*ucp = uc + 1;
192 			return 1;
193 		default:
194 			goto out;
195 		}
196 	}
197 out:
198 	DPRINTF("Bad array: ", uc,  *ucp);
199 	*ucp = uc;
200 	return 0;
201 }
202 
203 static int
204 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
205 	size_t *st, size_t lvl)
206 {
207 	const unsigned char *uc = *ucp;
208 	DPRINTF("Parse object: ", uc, *ucp);
209 	while (uc < ue) {
210 		uc = json_skip_space(uc, ue);
211 		if (uc == ue)
212 			goto out;
213 		if (*uc++ != '"') {
214 			DPRINTF("not string", uc, *ucp);
215 			goto out;
216 		}
217 		DPRINTF("next field", uc, *ucp);
218 		if (!json_parse_string(&uc, ue)) {
219 			DPRINTF("not string", uc, *ucp);
220 			goto out;
221 		}
222 		uc = json_skip_space(uc, ue);
223 		if (uc == ue)
224 			goto out;
225 		if (*uc++ != ':') {
226 			DPRINTF("not colon", uc, *ucp);
227 			goto out;
228 		}
229 		if (!json_parse(&uc, ue, st, lvl + 1)) {
230 			DPRINTF("not json", uc, *ucp);
231 			goto out;
232 		}
233 		if (uc == ue)
234 			goto out;
235 		switch (*uc++) {
236 		case ',':
237 			continue;
238 		case '}': /* { */
239 			*ucp = uc;
240 			DPRINTF("Good object: ", uc, *ucp);
241 			return 1;
242 		default:
243 			*ucp = uc - 1;
244 			DPRINTF("not more", uc, *ucp);
245 			goto out;
246 		}
247 	}
248 out:
249 	DPRINTF("Bad object: ", uc, *ucp);
250 	*ucp = uc;
251 	return 0;
252 }
253 
254 static int
255 json_parse_number(const unsigned char **ucp, const unsigned char *ue)
256 {
257 	const unsigned char *uc = *ucp;
258 	int got = 0;
259 
260 	DPRINTF("Parse number: ", uc, *ucp);
261 	if (uc == ue)
262 		return 0;
263 	if (*uc == '-')
264 		uc++;
265 
266 	for (; uc < ue; uc++) {
267 		if (!json_isdigit(*uc))
268 			break;
269 		got = 1;
270 	}
271 	if (uc == ue)
272 		goto out;
273 	if (*uc == '.')
274 		uc++;
275 	for (; uc < ue; uc++) {
276 		if (!json_isdigit(*uc))
277 			break;
278 		got = 1;
279 	}
280 	if (uc == ue)
281 		goto out;
282 	if (got && (*uc == 'e' || *uc == 'E')) {
283 		uc++;
284 		got = 0;
285 		if (uc == ue)
286 			goto out;
287 		if (*uc == '+' || *uc == '-')
288 			uc++;
289 		for (; uc < ue; uc++) {
290 			if (!json_isdigit(*uc))
291 				break;
292 			got = 1;
293 		}
294 	}
295 out:
296 	if (!got)
297 		DPRINTF("Bad number: ", uc, *ucp);
298 	else
299 		DPRINTF("Good number: ", uc, *ucp);
300 	*ucp = uc;
301 	return got;
302 }
303 
304 static int
305 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
306     const char *str, size_t len)
307 {
308 	const unsigned char *uc = *ucp;
309 
310 	DPRINTF("Parse const: ", uc, *ucp);
311 	for (len--; uc < ue && --len;) {
312 		if (*uc++ == *++str)
313 			continue;
314 	}
315 	if (len)
316 		DPRINTF("Bad const: ", uc, *ucp);
317 	*ucp = uc;
318 	return len == 0;
319 }
320 
321 static int
322 json_parse(const unsigned char **ucp, const unsigned char *ue,
323     size_t *st, size_t lvl)
324 {
325 	const unsigned char *uc;
326 	int rv = 0;
327 	int t;
328 
329 	uc = json_skip_space(*ucp, ue);
330 	if (uc == ue)
331 		goto out;
332 
333 	// Avoid recursion
334 	if (lvl > 20)
335 		return 0;
336 #if JSON_COUNT
337 	/* bail quickly if not counting */
338 	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
339 		return 1;
340 #endif
341 
342 	DPRINTF("Parse general: ", uc, *ucp);
343 	switch (*uc++) {
344 	case '"':
345 		rv = json_parse_string(&uc, ue);
346 		t = JSON_STRING;
347 		break;
348 	case '[':
349 		rv = json_parse_array(&uc, ue, st, lvl + 1);
350 		t = JSON_ARRAY;
351 		break;
352 	case '{': /* '}' */
353 		rv = json_parse_object(&uc, ue, st, lvl + 1);
354 		t = JSON_OBJECT;
355 		break;
356 	case 't':
357 		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
358 		t = JSON_CONSTANT;
359 		break;
360 	case 'f':
361 		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
362 		t = JSON_CONSTANT;
363 		break;
364 	case 'n':
365 		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
366 		t = JSON_CONSTANT;
367 		break;
368 	default:
369 		--uc;
370 		rv = json_parse_number(&uc, ue);
371 		t = JSON_NUMBER;
372 		break;
373 	}
374 	if (rv)
375 		st[t]++;
376 	uc = json_skip_space(uc, ue);
377 out:
378 	*ucp = uc;
379 	DPRINTF("End general: ", uc, *ucp);
380 	if (lvl == 0)
381 		return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
382 	return rv;
383 }
384 
385 #ifndef TEST
386 int
387 file_is_json(struct magic_set *ms, const struct buffer *b)
388 {
389 	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
390 	const unsigned char *ue = uc + b->flen;
391 	size_t st[JSON_MAX];
392 	int mime = ms->flags & MAGIC_MIME;
393 
394 
395 	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
396 		return 0;
397 
398 	memset(st, 0, sizeof(st));
399 
400 	if (!json_parse(&uc, ue, st, 0))
401 		return 0;
402 
403 	if (mime == MAGIC_MIME_ENCODING)
404 		return 1;
405 	if (mime) {
406 		if (file_printf(ms, "application/json") == -1)
407 			return -1;
408 		return 1;
409 	}
410 	if (file_printf(ms, "JSON data") == -1)
411 		return -1;
412 #if JSON_COUNT
413 #define P(n) st[n], st[n] > 1 ? "s" : ""
414 	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
415 	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
416 	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
417 	    "u >1array%s)",
418 	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
419 	    P(JSON_NUMBER), P(JSON_ARRAYN))
420 	    == -1)
421 		return -1;
422 #endif
423 	return 1;
424 }
425 
426 #else
427 
428 #include <sys/types.h>
429 #include <sys/stat.h>
430 #include <stdio.h>
431 #include <fcntl.h>
432 #include <unistd.h>
433 #include <stdlib.h>
434 #include <stdint.h>
435 #include <err.h>
436 
437 int
438 main(int argc, char *argv[])
439 {
440 	int fd, rv;
441 	struct stat st;
442 	unsigned char *p;
443 	size_t stats[JSON_MAX];
444 
445 	if ((fd = open(argv[1], O_RDONLY)) == -1)
446 		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
447 
448 	if (fstat(fd, &st) == -1)
449 		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
450 
451 	if ((p = malloc(st.st_size)) == NULL)
452 		err(EXIT_FAILURE, "Can't allocate %jd bytes",
453 		    (intmax_t)st.st_size);
454 	if (read(fd, p, st.st_size) != st.st_size)
455 		err(EXIT_FAILURE, "Can't read %jd bytes",
456 		    (intmax_t)st.st_size);
457 	memset(stats, 0, sizeof(stats));
458 	printf("is json %d\n", json_parse((const unsigned char **)&p,
459 	    p + st.st_size, stats, 0));
460 	return 0;
461 }
462 #endif
463