1 /*- 2 * Copyright (c) 2018 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Parse JSON object serialization format (RFC-7159) 29 */ 30 31 #ifndef TEST 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $") 36 #endif 37 38 #include "magic.h" 39 #else 40 #include <stdio.h> 41 #include <stddef.h> 42 #endif 43 #include <string.h> 44 45 #ifdef DEBUG 46 #include <stdio.h> 47 #define DPRINTF(a, b, c) \ 48 printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \ 49 (int)(b - c), (const char *)(c)) 50 #define __file_debugused 51 #else 52 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 53 #define __file_debugused __attribute__((__unused__)) 54 #endif 55 56 #define JSON_ARRAY 0 57 #define JSON_CONSTANT 1 58 #define JSON_NUMBER 2 59 #define JSON_OBJECT 3 60 #define JSON_STRING 4 61 #define JSON_ARRAYN 5 62 #define JSON_MAX 6 63 64 /* 65 * if JSON_COUNT != 0: 66 * count all the objects, require that we have the whole data file 67 * otherwise: 68 * stop if we find an object or an array 69 */ 70 #ifndef JSON_COUNT 71 #define JSON_COUNT 0 72 #endif 73 74 static int json_parse(const unsigned char **, const unsigned char *, size_t *, 75 size_t); 76 77 static int 78 json_isspace(const unsigned char uc) 79 { 80 switch (uc) { 81 case ' ': 82 case '\n': 83 case '\r': 84 case '\t': 85 return 1; 86 default: 87 return 0; 88 } 89 } 90 91 static int 92 json_isdigit(unsigned char uc) 93 { 94 switch (uc) { 95 case '0': case '1': case '2': case '3': case '4': 96 case '5': case '6': case '7': case '8': case '9': 97 return 1; 98 default: 99 return 0; 100 } 101 } 102 103 static int 104 json_isxdigit(unsigned char uc) 105 { 106 if (json_isdigit(uc)) 107 return 1; 108 switch (uc) { 109 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 110 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 111 return 1; 112 default: 113 return 0; 114 } 115 } 116 117 static const unsigned char * 118 json_skip_space(const unsigned char *uc, const unsigned char *ue) 119 { 120 while (uc < ue && json_isspace(*uc)) 121 uc++; 122 return uc; 123 } 124 125 /*ARGSUSED*/ 126 static int 127 json_parse_string(const unsigned char **ucp, const unsigned char *ue, 128 size_t lvl __file_debugused) 129 { 130 const unsigned char *uc = *ucp; 131 size_t i; 132 133 DPRINTF("Parse string: ", uc, *ucp); 134 while (uc < ue) { 135 switch (*uc++) { 136 case '\0': 137 goto out; 138 case '\\': 139 if (uc == ue) 140 goto out; 141 switch (*uc++) { 142 case '\0': 143 goto out; 144 case '"': 145 case '\\': 146 case '/': 147 case 'b': 148 case 'f': 149 case 'n': 150 case 'r': 151 case 't': 152 continue; 153 case 'u': 154 if (ue - uc < 4) { 155 uc = ue; 156 goto out; 157 } 158 for (i = 0; i < 4; i++) 159 if (!json_isxdigit(*uc++)) 160 goto out; 161 continue; 162 default: 163 goto out; 164 } 165 case '"': 166 DPRINTF("Good string: ", uc, *ucp); 167 *ucp = uc; 168 return 1; 169 default: 170 continue; 171 } 172 } 173 out: 174 DPRINTF("Bad string: ", uc, *ucp); 175 *ucp = uc; 176 return 0; 177 } 178 179 static int 180 json_parse_array(const unsigned char **ucp, const unsigned char *ue, 181 size_t *st, size_t lvl) 182 { 183 const unsigned char *uc = *ucp; 184 185 DPRINTF("Parse array: ", uc, *ucp); 186 while (uc < ue) { 187 uc = json_skip_space(uc, ue); 188 if (uc == ue) 189 goto out; 190 if (*uc == ']') 191 goto done; 192 if (!json_parse(&uc, ue, st, lvl + 1)) 193 goto out; 194 if (uc == ue) 195 goto out; 196 switch (*uc) { 197 case ',': 198 uc++; 199 continue; 200 case ']': 201 done: 202 st[JSON_ARRAYN]++; 203 DPRINTF("Good array: ", uc, *ucp); 204 *ucp = uc + 1; 205 return 1; 206 default: 207 goto out; 208 } 209 } 210 out: 211 DPRINTF("Bad array: ", uc, *ucp); 212 *ucp = uc; 213 return 0; 214 } 215 216 static int 217 json_parse_object(const unsigned char **ucp, const unsigned char *ue, 218 size_t *st, size_t lvl) 219 { 220 const unsigned char *uc = *ucp; 221 DPRINTF("Parse object: ", uc, *ucp); 222 while (uc < ue) { 223 uc = json_skip_space(uc, ue); 224 if (uc == ue) 225 goto out; 226 if (*uc == '}') { 227 uc++; 228 goto done; 229 } 230 if (*uc++ != '"') { 231 DPRINTF("not string", uc, *ucp); 232 goto out; 233 } 234 DPRINTF("next field", uc, *ucp); 235 if (!json_parse_string(&uc, ue, lvl)) { 236 DPRINTF("not string", uc, *ucp); 237 goto out; 238 } 239 uc = json_skip_space(uc, ue); 240 if (uc == ue) 241 goto out; 242 if (*uc++ != ':') { 243 DPRINTF("not colon", uc, *ucp); 244 goto out; 245 } 246 if (!json_parse(&uc, ue, st, lvl + 1)) { 247 DPRINTF("not json", uc, *ucp); 248 goto out; 249 } 250 if (uc == ue) 251 goto out; 252 switch (*uc++) { 253 case ',': 254 continue; 255 case '}': /* { */ 256 done: 257 DPRINTF("Good object: ", uc, *ucp); 258 *ucp = uc; 259 return 1; 260 default: 261 DPRINTF("not more", uc, *ucp); 262 *ucp = uc - 1; 263 goto out; 264 } 265 } 266 out: 267 DPRINTF("Bad object: ", uc, *ucp); 268 *ucp = uc; 269 return 0; 270 } 271 272 /*ARGSUSED*/ 273 static int 274 json_parse_number(const unsigned char **ucp, const unsigned char *ue, 275 size_t lvl __file_debugused) 276 { 277 const unsigned char *uc = *ucp; 278 int got = 0; 279 280 DPRINTF("Parse number: ", uc, *ucp); 281 if (uc == ue) 282 return 0; 283 if (*uc == '-') 284 uc++; 285 286 for (; uc < ue; uc++) { 287 if (!json_isdigit(*uc)) 288 break; 289 got = 1; 290 } 291 if (uc == ue) 292 goto out; 293 if (*uc == '.') 294 uc++; 295 for (; uc < ue; uc++) { 296 if (!json_isdigit(*uc)) 297 break; 298 got = 1; 299 } 300 if (uc == ue) 301 goto out; 302 if (got && (*uc == 'e' || *uc == 'E')) { 303 uc++; 304 got = 0; 305 if (uc == ue) 306 goto out; 307 if (*uc == '+' || *uc == '-') 308 uc++; 309 for (; uc < ue; uc++) { 310 if (!json_isdigit(*uc)) 311 break; 312 got = 1; 313 } 314 } 315 out: 316 if (!got) 317 DPRINTF("Bad number: ", uc, *ucp); 318 else 319 DPRINTF("Good number: ", uc, *ucp); 320 *ucp = uc; 321 return got; 322 } 323 324 /*ARGSUSED*/ 325 static int 326 json_parse_const(const unsigned char **ucp, const unsigned char *ue, 327 const char *str, size_t len, size_t lvl __file_debugused) 328 { 329 const unsigned char *uc = *ucp; 330 331 DPRINTF("Parse const: ", uc, *ucp); 332 *ucp += --len - 1; 333 if (*ucp > ue) 334 *ucp = ue; 335 for (; uc < ue && --len;) { 336 if (*uc++ != *++str) { 337 DPRINTF("Bad const: ", uc, *ucp); 338 return 0; 339 } 340 } 341 DPRINTF("Good const: ", uc, *ucp); 342 return 1; 343 } 344 345 static int 346 json_parse(const unsigned char **ucp, const unsigned char *ue, 347 size_t *st, size_t lvl) 348 { 349 const unsigned char *uc, *ouc; 350 int rv = 0; 351 int t; 352 353 ouc = uc = json_skip_space(*ucp, ue); 354 if (uc == ue) 355 goto out; 356 357 // Avoid recursion 358 if (lvl > 500) { 359 DPRINTF("Too many levels", uc, *ucp); 360 return 0; 361 } 362 #if JSON_COUNT 363 /* bail quickly if not counting */ 364 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 365 return 1; 366 #endif 367 368 DPRINTF("Parse general: ", uc, *ucp); 369 switch (*uc++) { 370 case '"': 371 rv = json_parse_string(&uc, ue, lvl + 1); 372 t = JSON_STRING; 373 break; 374 case '[': 375 rv = json_parse_array(&uc, ue, st, lvl + 1); 376 t = JSON_ARRAY; 377 break; 378 case '{': /* '}' */ 379 rv = json_parse_object(&uc, ue, st, lvl + 1); 380 t = JSON_OBJECT; 381 break; 382 case 't': 383 rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1); 384 t = JSON_CONSTANT; 385 break; 386 case 'f': 387 rv = json_parse_const(&uc, ue, "false", sizeof("false"), 388 lvl + 1); 389 t = JSON_CONSTANT; 390 break; 391 case 'n': 392 rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1); 393 t = JSON_CONSTANT; 394 break; 395 default: 396 --uc; 397 rv = json_parse_number(&uc, ue, lvl + 1); 398 t = JSON_NUMBER; 399 break; 400 } 401 if (rv) 402 st[t]++; 403 uc = json_skip_space(uc, ue); 404 out: 405 DPRINTF("End general: ", uc, *ucp); 406 *ucp = uc; 407 if (lvl == 0) { 408 if (!rv) 409 return 0; 410 if (uc == ue) 411 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0; 412 if (*ouc == *uc && json_parse(&uc, ue, st, 1)) 413 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0; 414 else 415 return 0; 416 } 417 return rv; 418 } 419 420 #ifndef TEST 421 int 422 file_is_json(struct magic_set *ms, const struct buffer *b) 423 { 424 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 425 const unsigned char *ue = uc + b->flen; 426 size_t st[JSON_MAX]; 427 int mime = ms->flags & MAGIC_MIME; 428 int jt; 429 430 431 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 432 return 0; 433 434 memset(st, 0, sizeof(st)); 435 436 if ((jt = json_parse(&uc, ue, st, 0)) == 0) 437 return 0; 438 439 if (mime == MAGIC_MIME_ENCODING) 440 return 1; 441 if (mime) { 442 if (file_printf(ms, "application/%s", 443 jt == 1 ? "json" : "x-ndjson") == -1) 444 return -1; 445 return 1; 446 } 447 if (file_printf(ms, "%sJSON text data", 448 jt == 1 ? "" : "New Line Delimited ") == -1) 449 return -1; 450 #if JSON_COUNT 451 #define P(n) st[n], st[n] > 1 ? "s" : "" 452 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 453 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 454 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 455 "u >1array%s)", 456 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 457 P(JSON_NUMBER), P(JSON_ARRAYN)) 458 == -1) 459 return -1; 460 #endif 461 return 1; 462 } 463 464 #else 465 466 #include <sys/types.h> 467 #include <sys/stat.h> 468 #include <stdio.h> 469 #include <fcntl.h> 470 #include <unistd.h> 471 #include <stdlib.h> 472 #include <stdint.h> 473 #include <err.h> 474 475 int 476 main(int argc, char *argv[]) 477 { 478 int fd; 479 struct stat st; 480 unsigned char *p; 481 size_t stats[JSON_MAX]; 482 483 if ((fd = open(argv[1], O_RDONLY)) == -1) 484 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 485 486 if (fstat(fd, &st) == -1) 487 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 488 489 if ((p = CAST(char *, malloc(st.st_size))) == NULL) 490 err(EXIT_FAILURE, "Can't allocate %jd bytes", 491 (intmax_t)st.st_size); 492 if (read(fd, p, st.st_size) != st.st_size) 493 err(EXIT_FAILURE, "Can't read %jd bytes", 494 (intmax_t)st.st_size); 495 memset(stats, 0, sizeof(stats)); 496 printf("is json %d\n", json_parse((const unsigned char **)&p, 497 p + st.st_size, stats, 0)); 498 return 0; 499 } 500 #endif 501