1 /*- 2 * Copyright (c) 2018 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Parse JSON object serialization format (RFC-7159) 29 */ 30 31 #ifndef TEST 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: is_json.c,v 1.20 2022/05/28 00:44:22 christos Exp $") 36 #endif 37 38 #include "magic.h" 39 #else 40 #include <stdio.h> 41 #include <stddef.h> 42 #endif 43 #include <string.h> 44 45 #ifdef DEBUG 46 #include <stdio.h> 47 #define DPRINTF(a, b, c) \ 48 printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \ 49 (int)(b - c), (const char *)(c)) 50 #define __file_debugused 51 #else 52 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 53 #define __file_debugused __attribute__((__unused__)) 54 #endif 55 56 #define JSON_ARRAY 0 57 #define JSON_CONSTANT 1 58 #define JSON_NUMBER 2 59 #define JSON_OBJECT 3 60 #define JSON_STRING 4 61 #define JSON_ARRAYN 5 62 #define JSON_MAX 6 63 64 /* 65 * if JSON_COUNT != 0: 66 * count all the objects, require that we have the whole data file 67 * otherwise: 68 * stop if we find an object or an array 69 */ 70 #ifndef JSON_COUNT 71 #define JSON_COUNT 0 72 #endif 73 74 static int json_parse(const unsigned char **, const unsigned char *, size_t *, 75 size_t); 76 77 static int 78 json_isspace(const unsigned char uc) 79 { 80 switch (uc) { 81 case ' ': 82 case '\n': 83 case '\r': 84 case '\t': 85 return 1; 86 default: 87 return 0; 88 } 89 } 90 91 static int 92 json_isdigit(unsigned char uc) 93 { 94 switch (uc) { 95 case '0': case '1': case '2': case '3': case '4': 96 case '5': case '6': case '7': case '8': case '9': 97 return 1; 98 default: 99 return 0; 100 } 101 } 102 103 static int 104 json_isxdigit(unsigned char uc) 105 { 106 if (json_isdigit(uc)) 107 return 1; 108 switch (uc) { 109 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 110 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 111 return 1; 112 default: 113 return 0; 114 } 115 } 116 117 static const unsigned char * 118 json_skip_space(const unsigned char *uc, const unsigned char *ue) 119 { 120 while (uc < ue && json_isspace(*uc)) 121 uc++; 122 return uc; 123 } 124 125 static int 126 json_parse_string(const unsigned char **ucp, const unsigned char *ue, 127 size_t lvl __file_debugused) 128 { 129 const unsigned char *uc = *ucp; 130 size_t i; 131 132 DPRINTF("Parse string: ", uc, *ucp); 133 while (uc < ue) { 134 switch (*uc++) { 135 case '\0': 136 goto out; 137 case '\\': 138 if (uc == ue) 139 goto out; 140 switch (*uc++) { 141 case '\0': 142 goto out; 143 case '"': 144 case '\\': 145 case '/': 146 case 'b': 147 case 'f': 148 case 'n': 149 case 'r': 150 case 't': 151 continue; 152 case 'u': 153 if (ue - uc < 4) { 154 uc = ue; 155 goto out; 156 } 157 for (i = 0; i < 4; i++) 158 if (!json_isxdigit(*uc++)) 159 goto out; 160 continue; 161 default: 162 goto out; 163 } 164 case '"': 165 DPRINTF("Good string: ", uc, *ucp); 166 *ucp = uc; 167 return 1; 168 default: 169 continue; 170 } 171 } 172 out: 173 DPRINTF("Bad string: ", uc, *ucp); 174 *ucp = uc; 175 return 0; 176 } 177 178 static int 179 json_parse_array(const unsigned char **ucp, const unsigned char *ue, 180 size_t *st, size_t lvl) 181 { 182 const unsigned char *uc = *ucp; 183 184 DPRINTF("Parse array: ", uc, *ucp); 185 while (uc < ue) { 186 if (*uc == ']') 187 goto done; 188 if (!json_parse(&uc, ue, st, lvl + 1)) 189 goto out; 190 if (uc == ue) 191 goto out; 192 switch (*uc) { 193 case ',': 194 uc++; 195 continue; 196 case ']': 197 done: 198 st[JSON_ARRAYN]++; 199 DPRINTF("Good array: ", uc, *ucp); 200 *ucp = uc + 1; 201 return 1; 202 default: 203 goto out; 204 } 205 } 206 out: 207 DPRINTF("Bad array: ", uc, *ucp); 208 *ucp = uc; 209 return 0; 210 } 211 212 static int 213 json_parse_object(const unsigned char **ucp, const unsigned char *ue, 214 size_t *st, size_t lvl) 215 { 216 const unsigned char *uc = *ucp; 217 DPRINTF("Parse object: ", uc, *ucp); 218 while (uc < ue) { 219 uc = json_skip_space(uc, ue); 220 if (uc == ue) 221 goto out; 222 if (*uc == '}') { 223 uc++; 224 goto done; 225 } 226 if (*uc++ != '"') { 227 DPRINTF("not string", uc, *ucp); 228 goto out; 229 } 230 DPRINTF("next field", uc, *ucp); 231 if (!json_parse_string(&uc, ue, lvl)) { 232 DPRINTF("not string", uc, *ucp); 233 goto out; 234 } 235 uc = json_skip_space(uc, ue); 236 if (uc == ue) 237 goto out; 238 if (*uc++ != ':') { 239 DPRINTF("not colon", uc, *ucp); 240 goto out; 241 } 242 if (!json_parse(&uc, ue, st, lvl + 1)) { 243 DPRINTF("not json", uc, *ucp); 244 goto out; 245 } 246 if (uc == ue) 247 goto out; 248 switch (*uc++) { 249 case ',': 250 continue; 251 case '}': /* { */ 252 done: 253 DPRINTF("Good object: ", uc, *ucp); 254 *ucp = uc; 255 return 1; 256 default: 257 DPRINTF("not more", uc, *ucp); 258 *ucp = uc - 1; 259 goto out; 260 } 261 } 262 out: 263 DPRINTF("Bad object: ", uc, *ucp); 264 *ucp = uc; 265 return 0; 266 } 267 268 static int 269 json_parse_number(const unsigned char **ucp, const unsigned char *ue, 270 size_t lvl __file_debugused) 271 { 272 const unsigned char *uc = *ucp; 273 int got = 0; 274 275 DPRINTF("Parse number: ", uc, *ucp); 276 if (uc == ue) 277 return 0; 278 if (*uc == '-') 279 uc++; 280 281 for (; uc < ue; uc++) { 282 if (!json_isdigit(*uc)) 283 break; 284 got = 1; 285 } 286 if (uc == ue) 287 goto out; 288 if (*uc == '.') 289 uc++; 290 for (; uc < ue; uc++) { 291 if (!json_isdigit(*uc)) 292 break; 293 got = 1; 294 } 295 if (uc == ue) 296 goto out; 297 if (got && (*uc == 'e' || *uc == 'E')) { 298 uc++; 299 got = 0; 300 if (uc == ue) 301 goto out; 302 if (*uc == '+' || *uc == '-') 303 uc++; 304 for (; uc < ue; uc++) { 305 if (!json_isdigit(*uc)) 306 break; 307 got = 1; 308 } 309 } 310 out: 311 if (!got) 312 DPRINTF("Bad number: ", uc, *ucp); 313 else 314 DPRINTF("Good number: ", uc, *ucp); 315 *ucp = uc; 316 return got; 317 } 318 319 static int 320 json_parse_const(const unsigned char **ucp, const unsigned char *ue, 321 const char *str, size_t len, size_t lvl __file_debugused) 322 { 323 const unsigned char *uc = *ucp; 324 325 DPRINTF("Parse const: ", uc, *ucp); 326 for (len--; uc < ue && --len;) { 327 if (*uc++ == *++str) 328 continue; 329 } 330 if (len) 331 DPRINTF("Bad const: ", uc, *ucp); 332 *ucp = uc; 333 return len == 0; 334 } 335 336 static int 337 json_parse(const unsigned char **ucp, const unsigned char *ue, 338 size_t *st, size_t lvl) 339 { 340 const unsigned char *uc; 341 int rv = 0; 342 int t; 343 344 uc = json_skip_space(*ucp, ue); 345 if (uc == ue) 346 goto out; 347 348 // Avoid recursion 349 if (lvl > 500) { 350 DPRINTF("Too many levels", uc, *ucp); 351 return 0; 352 } 353 #if JSON_COUNT 354 /* bail quickly if not counting */ 355 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 356 return 1; 357 #endif 358 359 DPRINTF("Parse general: ", uc, *ucp); 360 switch (*uc++) { 361 case '"': 362 rv = json_parse_string(&uc, ue, lvl + 1); 363 t = JSON_STRING; 364 break; 365 case '[': 366 rv = json_parse_array(&uc, ue, st, lvl + 1); 367 t = JSON_ARRAY; 368 break; 369 case '{': /* '}' */ 370 rv = json_parse_object(&uc, ue, st, lvl + 1); 371 t = JSON_OBJECT; 372 break; 373 case 't': 374 rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1); 375 t = JSON_CONSTANT; 376 break; 377 case 'f': 378 rv = json_parse_const(&uc, ue, "false", sizeof("false"), 379 lvl + 1); 380 t = JSON_CONSTANT; 381 break; 382 case 'n': 383 rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1); 384 t = JSON_CONSTANT; 385 break; 386 default: 387 --uc; 388 rv = json_parse_number(&uc, ue, lvl + 1); 389 t = JSON_NUMBER; 390 break; 391 } 392 if (rv) 393 st[t]++; 394 uc = json_skip_space(uc, ue); 395 out: 396 DPRINTF("End general: ", uc, *ucp); 397 *ucp = uc; 398 if (lvl == 0) 399 return rv && uc == ue && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 400 return rv; 401 } 402 403 #ifndef TEST 404 int 405 file_is_json(struct magic_set *ms, const struct buffer *b) 406 { 407 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 408 const unsigned char *ue = uc + b->flen; 409 size_t st[JSON_MAX]; 410 int mime = ms->flags & MAGIC_MIME; 411 412 413 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 414 return 0; 415 416 memset(st, 0, sizeof(st)); 417 418 if (!json_parse(&uc, ue, st, 0)) 419 return 0; 420 421 if (mime == MAGIC_MIME_ENCODING) 422 return 1; 423 if (mime) { 424 if (file_printf(ms, "application/json") == -1) 425 return -1; 426 return 1; 427 } 428 if (file_printf(ms, "JSON text data") == -1) 429 return -1; 430 #if JSON_COUNT 431 #define P(n) st[n], st[n] > 1 ? "s" : "" 432 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 433 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 434 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 435 "u >1array%s)", 436 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 437 P(JSON_NUMBER), P(JSON_ARRAYN)) 438 == -1) 439 return -1; 440 #endif 441 return 1; 442 } 443 444 #else 445 446 #include <sys/types.h> 447 #include <sys/stat.h> 448 #include <stdio.h> 449 #include <fcntl.h> 450 #include <unistd.h> 451 #include <stdlib.h> 452 #include <stdint.h> 453 #include <err.h> 454 455 int 456 main(int argc, char *argv[]) 457 { 458 int fd, rv; 459 struct stat st; 460 unsigned char *p; 461 size_t stats[JSON_MAX]; 462 463 if ((fd = open(argv[1], O_RDONLY)) == -1) 464 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 465 466 if (fstat(fd, &st) == -1) 467 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 468 469 if ((p = CAST(char *, malloc(st.st_size))) == NULL) 470 err(EXIT_FAILURE, "Can't allocate %jd bytes", 471 (intmax_t)st.st_size); 472 if (read(fd, p, st.st_size) != st.st_size) 473 err(EXIT_FAILURE, "Can't read %jd bytes", 474 (intmax_t)st.st_size); 475 memset(stats, 0, sizeof(stats)); 476 printf("is json %d\n", json_parse((const unsigned char **)&p, 477 p + st.st_size, stats, 0)); 478 return 0; 479 } 480 #endif 481