1 /*- 2 * Copyright (c) 2018 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Parse JSON object serialization format (RFC-7159) 29 */ 30 31 #ifndef TEST 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $") 36 #endif 37 38 #include <string.h> 39 #include "magic.h" 40 #endif 41 42 #ifdef DEBUG 43 #include <stdio.h> 44 #define DPRINTF(a, b, c) \ 45 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 46 #else 47 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 48 #endif 49 50 #define JSON_ARRAY 0 51 #define JSON_CONSTANT 1 52 #define JSON_NUMBER 2 53 #define JSON_OBJECT 3 54 #define JSON_STRING 4 55 #define JSON_ARRAYN 5 56 #define JSON_MAX 6 57 58 /* 59 * if JSON_COUNT != 0: 60 * count all the objects, require that we have the whole data file 61 * otherwise: 62 * stop if we find an object or an array 63 */ 64 #ifndef JSON_COUNT 65 #define JSON_COUNT 0 66 #endif 67 68 static int json_parse(const unsigned char **, const unsigned char *, size_t *, 69 size_t); 70 71 static int 72 json_isspace(const unsigned char uc) 73 { 74 switch (uc) { 75 case ' ': 76 case '\n': 77 case '\r': 78 case '\t': 79 return 1; 80 default: 81 return 0; 82 } 83 } 84 85 static int 86 json_isdigit(unsigned char uc) 87 { 88 switch (uc) { 89 case '0': case '1': case '2': case '3': case '4': 90 case '5': case '6': case '7': case '8': case '9': 91 return 1; 92 default: 93 return 0; 94 } 95 } 96 97 static int 98 json_isxdigit(unsigned char uc) 99 { 100 if (json_isdigit(uc)) 101 return 1; 102 switch (uc) { 103 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 104 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 105 return 1; 106 default: 107 return 0; 108 } 109 } 110 111 static const unsigned char * 112 json_skip_space(const unsigned char *uc, const unsigned char *ue) 113 { 114 while (uc < ue && json_isspace(*uc)) 115 uc++; 116 return uc; 117 } 118 119 static int 120 json_parse_string(const unsigned char **ucp, const unsigned char *ue) 121 { 122 const unsigned char *uc = *ucp; 123 size_t i; 124 125 DPRINTF("Parse string: ", uc, *ucp); 126 while (uc < ue) { 127 switch (*uc++) { 128 case '\0': 129 goto out; 130 case '\\': 131 if (uc == ue) 132 goto out; 133 switch (*uc++) { 134 case '\0': 135 goto out; 136 case '"': 137 case '\\': 138 case '/': 139 case 'b': 140 case 'f': 141 case 'n': 142 case 'r': 143 case 't': 144 continue; 145 case 'u': 146 if (ue - uc < 4) { 147 uc = ue; 148 goto out; 149 } 150 for (i = 0; i < 4; i++) 151 if (!json_isxdigit(*uc++)) 152 goto out; 153 continue; 154 default: 155 goto out; 156 } 157 case '"': 158 *ucp = uc; 159 DPRINTF("Good string: ", uc, *ucp); 160 return 1; 161 default: 162 continue; 163 } 164 } 165 out: 166 DPRINTF("Bad string: ", uc, *ucp); 167 *ucp = uc; 168 return 0; 169 } 170 171 static int 172 json_parse_array(const unsigned char **ucp, const unsigned char *ue, 173 size_t *st, size_t lvl) 174 { 175 const unsigned char *uc = *ucp; 176 177 DPRINTF("Parse array: ", uc, *ucp); 178 while (uc < ue) { 179 if (*uc == ']') 180 goto done; 181 if (!json_parse(&uc, ue, st, lvl + 1)) 182 goto out; 183 if (uc == ue) 184 goto out; 185 switch (*uc) { 186 case ',': 187 uc++; 188 continue; 189 case ']': 190 done: 191 st[JSON_ARRAYN]++; 192 *ucp = uc + 1; 193 DPRINTF("Good array: ", uc, *ucp); 194 return 1; 195 default: 196 goto out; 197 } 198 } 199 out: 200 DPRINTF("Bad array: ", uc, *ucp); 201 *ucp = uc; 202 return 0; 203 } 204 205 static int 206 json_parse_object(const unsigned char **ucp, const unsigned char *ue, 207 size_t *st, size_t lvl) 208 { 209 const unsigned char *uc = *ucp; 210 DPRINTF("Parse object: ", uc, *ucp); 211 while (uc < ue) { 212 uc = json_skip_space(uc, ue); 213 if (uc == ue) 214 goto out; 215 if (*uc == '}') { 216 uc++; 217 goto done; 218 } 219 if (*uc++ != '"') { 220 DPRINTF("not string", uc, *ucp); 221 goto out; 222 } 223 DPRINTF("next field", uc, *ucp); 224 if (!json_parse_string(&uc, ue)) { 225 DPRINTF("not string", uc, *ucp); 226 goto out; 227 } 228 uc = json_skip_space(uc, ue); 229 if (uc == ue) 230 goto out; 231 if (*uc++ != ':') { 232 DPRINTF("not colon", uc, *ucp); 233 goto out; 234 } 235 if (!json_parse(&uc, ue, st, lvl + 1)) { 236 DPRINTF("not json", uc, *ucp); 237 goto out; 238 } 239 if (uc == ue) 240 goto out; 241 switch (*uc++) { 242 case ',': 243 continue; 244 case '}': /* { */ 245 done: 246 *ucp = uc; 247 DPRINTF("Good object: ", uc, *ucp); 248 return 1; 249 default: 250 *ucp = uc - 1; 251 DPRINTF("not more", uc, *ucp); 252 goto out; 253 } 254 } 255 out: 256 DPRINTF("Bad object: ", uc, *ucp); 257 *ucp = uc; 258 return 0; 259 } 260 261 static int 262 json_parse_number(const unsigned char **ucp, const unsigned char *ue) 263 { 264 const unsigned char *uc = *ucp; 265 int got = 0; 266 267 DPRINTF("Parse number: ", uc, *ucp); 268 if (uc == ue) 269 return 0; 270 if (*uc == '-') 271 uc++; 272 273 for (; uc < ue; uc++) { 274 if (!json_isdigit(*uc)) 275 break; 276 got = 1; 277 } 278 if (uc == ue) 279 goto out; 280 if (*uc == '.') 281 uc++; 282 for (; uc < ue; uc++) { 283 if (!json_isdigit(*uc)) 284 break; 285 got = 1; 286 } 287 if (uc == ue) 288 goto out; 289 if (got && (*uc == 'e' || *uc == 'E')) { 290 uc++; 291 got = 0; 292 if (uc == ue) 293 goto out; 294 if (*uc == '+' || *uc == '-') 295 uc++; 296 for (; uc < ue; uc++) { 297 if (!json_isdigit(*uc)) 298 break; 299 got = 1; 300 } 301 } 302 out: 303 if (!got) 304 DPRINTF("Bad number: ", uc, *ucp); 305 else 306 DPRINTF("Good number: ", uc, *ucp); 307 *ucp = uc; 308 return got; 309 } 310 311 static int 312 json_parse_const(const unsigned char **ucp, const unsigned char *ue, 313 const char *str, size_t len) 314 { 315 const unsigned char *uc = *ucp; 316 317 DPRINTF("Parse const: ", uc, *ucp); 318 for (len--; uc < ue && --len;) { 319 if (*uc++ == *++str) 320 continue; 321 } 322 if (len) 323 DPRINTF("Bad const: ", uc, *ucp); 324 *ucp = uc; 325 return len == 0; 326 } 327 328 static int 329 json_parse(const unsigned char **ucp, const unsigned char *ue, 330 size_t *st, size_t lvl) 331 { 332 const unsigned char *uc; 333 int rv = 0; 334 int t; 335 336 uc = json_skip_space(*ucp, ue); 337 if (uc == ue) 338 goto out; 339 340 // Avoid recursion 341 if (lvl > 20) 342 return 0; 343 #if JSON_COUNT 344 /* bail quickly if not counting */ 345 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 346 return 1; 347 #endif 348 349 DPRINTF("Parse general: ", uc, *ucp); 350 switch (*uc++) { 351 case '"': 352 rv = json_parse_string(&uc, ue); 353 t = JSON_STRING; 354 break; 355 case '[': 356 rv = json_parse_array(&uc, ue, st, lvl + 1); 357 t = JSON_ARRAY; 358 break; 359 case '{': /* '}' */ 360 rv = json_parse_object(&uc, ue, st, lvl + 1); 361 t = JSON_OBJECT; 362 break; 363 case 't': 364 rv = json_parse_const(&uc, ue, "true", sizeof("true")); 365 t = JSON_CONSTANT; 366 break; 367 case 'f': 368 rv = json_parse_const(&uc, ue, "false", sizeof("false")); 369 t = JSON_CONSTANT; 370 break; 371 case 'n': 372 rv = json_parse_const(&uc, ue, "null", sizeof("null")); 373 t = JSON_CONSTANT; 374 break; 375 default: 376 --uc; 377 rv = json_parse_number(&uc, ue); 378 t = JSON_NUMBER; 379 break; 380 } 381 if (rv) 382 st[t]++; 383 uc = json_skip_space(uc, ue); 384 out: 385 *ucp = uc; 386 DPRINTF("End general: ", uc, *ucp); 387 if (lvl == 0) 388 return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 389 return rv; 390 } 391 392 #ifndef TEST 393 int 394 file_is_json(struct magic_set *ms, const struct buffer *b) 395 { 396 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 397 const unsigned char *ue = uc + b->flen; 398 size_t st[JSON_MAX]; 399 int mime = ms->flags & MAGIC_MIME; 400 401 402 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 403 return 0; 404 405 memset(st, 0, sizeof(st)); 406 407 if (!json_parse(&uc, ue, st, 0)) 408 return 0; 409 410 if (mime == MAGIC_MIME_ENCODING) 411 return 1; 412 if (mime) { 413 if (file_printf(ms, "application/json") == -1) 414 return -1; 415 return 1; 416 } 417 if (file_printf(ms, "JSON data") == -1) 418 return -1; 419 #if JSON_COUNT 420 #define P(n) st[n], st[n] > 1 ? "s" : "" 421 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 422 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 423 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 424 "u >1array%s)", 425 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 426 P(JSON_NUMBER), P(JSON_ARRAYN)) 427 == -1) 428 return -1; 429 #endif 430 return 1; 431 } 432 433 #else 434 435 #include <sys/types.h> 436 #include <sys/stat.h> 437 #include <stdio.h> 438 #include <fcntl.h> 439 #include <unistd.h> 440 #include <stdlib.h> 441 #include <stdint.h> 442 #include <err.h> 443 444 int 445 main(int argc, char *argv[]) 446 { 447 int fd, rv; 448 struct stat st; 449 unsigned char *p; 450 size_t stats[JSON_MAX]; 451 452 if ((fd = open(argv[1], O_RDONLY)) == -1) 453 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 454 455 if (fstat(fd, &st) == -1) 456 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 457 458 if ((p = malloc(st.st_size)) == NULL) 459 err(EXIT_FAILURE, "Can't allocate %jd bytes", 460 (intmax_t)st.st_size); 461 if (read(fd, p, st.st_size) != st.st_size) 462 err(EXIT_FAILURE, "Can't read %jd bytes", 463 (intmax_t)st.st_size); 464 memset(stats, 0, sizeof(stats)); 465 printf("is json %d\n", json_parse((const unsigned char **)&p, 466 p + st.st_size, stats, 0)); 467 return 0; 468 } 469 #endif 470