1 /*- 2 * Copyright (c) 2018 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Parse JSON object serialization format (RFC-7159) 29 */ 30 31 #ifndef TEST 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $") 36 #endif 37 38 #include <string.h> 39 #include "magic.h" 40 #endif 41 42 #ifdef DEBUG 43 #include <stdio.h> 44 #define DPRINTF(a, b, c) \ 45 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 46 #else 47 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 48 #endif 49 50 #define JSON_ARRAY 0 51 #define JSON_CONSTANT 1 52 #define JSON_NUMBER 2 53 #define JSON_OBJECT 3 54 #define JSON_STRING 4 55 #define JSON_ARRAYN 5 56 #define JSON_MAX 6 57 58 /* 59 * if JSON_COUNT != 0: 60 * count all the objects, require that we have the whole data file 61 * otherwise: 62 * stop if we find an object or an array 63 */ 64 #ifndef JSON_COUNT 65 #define JSON_COUNT 0 66 #endif 67 68 static int json_parse(const unsigned char **, const unsigned char *, size_t *, 69 size_t); 70 71 static int 72 json_isspace(const unsigned char uc) 73 { 74 switch (uc) { 75 case ' ': 76 case '\n': 77 case '\r': 78 case '\t': 79 return 1; 80 default: 81 return 0; 82 } 83 } 84 85 static int 86 json_isdigit(unsigned char uc) 87 { 88 switch (uc) { 89 case '0': case '1': case '2': case '3': case '4': 90 case '5': case '6': case '7': case '8': case '9': 91 return 1; 92 default: 93 return 0; 94 } 95 } 96 97 static int 98 json_isxdigit(unsigned char uc) 99 { 100 if (json_isdigit(uc)) 101 return 1; 102 switch (uc) { 103 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 104 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 105 return 1; 106 default: 107 return 0; 108 } 109 } 110 111 static const unsigned char * 112 json_skip_space(const unsigned char *uc, const unsigned char *ue) 113 { 114 while (uc < ue && json_isspace(*uc)) 115 uc++; 116 return uc; 117 } 118 119 static int 120 json_parse_string(const unsigned char **ucp, const unsigned char *ue) 121 { 122 const unsigned char *uc = *ucp; 123 size_t i; 124 125 DPRINTF("Parse string: ", uc, *ucp); 126 while (uc < ue) { 127 switch (*uc++) { 128 case '\0': 129 goto out; 130 case '\\': 131 if (uc == ue) 132 goto out; 133 switch (*uc++) { 134 case '\0': 135 goto out; 136 case '"': 137 case '\\': 138 case '/': 139 case 'b': 140 case 'f': 141 case 'n': 142 case 'r': 143 case 't': 144 continue; 145 case 'u': 146 if (ue - uc < 4) { 147 uc = ue; 148 goto out; 149 } 150 for (i = 0; i < 4; i++) 151 if (!json_isxdigit(*uc++)) 152 goto out; 153 continue; 154 default: 155 goto out; 156 } 157 case '"': 158 *ucp = uc; 159 return 1; 160 default: 161 continue; 162 } 163 } 164 out: 165 DPRINTF("Bad string: ", uc, *ucp); 166 *ucp = uc; 167 return 0; 168 } 169 170 static int 171 json_parse_array(const unsigned char **ucp, const unsigned char *ue, 172 size_t *st, size_t lvl) 173 { 174 const unsigned char *uc = *ucp; 175 int more = 0; /* Array has more than 1 element */ 176 177 DPRINTF("Parse array: ", uc, *ucp); 178 while (uc < ue) { 179 if (!json_parse(&uc, ue, st, lvl + 1)) 180 goto out; 181 if (uc == ue) 182 goto out; 183 switch (*uc) { 184 case ',': 185 more++; 186 uc++; 187 continue; 188 case ']': 189 if (more) 190 st[JSON_ARRAYN]++; 191 *ucp = uc + 1; 192 return 1; 193 default: 194 goto out; 195 } 196 } 197 out: 198 DPRINTF("Bad array: ", uc, *ucp); 199 *ucp = uc; 200 return 0; 201 } 202 203 static int 204 json_parse_object(const unsigned char **ucp, const unsigned char *ue, 205 size_t *st, size_t lvl) 206 { 207 const unsigned char *uc = *ucp; 208 DPRINTF("Parse object: ", uc, *ucp); 209 while (uc < ue) { 210 uc = json_skip_space(uc, ue); 211 if (uc == ue) 212 goto out; 213 if (*uc++ != '"') { 214 DPRINTF("not string", uc, *ucp); 215 goto out; 216 } 217 DPRINTF("next field", uc, *ucp); 218 if (!json_parse_string(&uc, ue)) { 219 DPRINTF("not string", uc, *ucp); 220 goto out; 221 } 222 uc = json_skip_space(uc, ue); 223 if (uc == ue) 224 goto out; 225 if (*uc++ != ':') { 226 DPRINTF("not colon", uc, *ucp); 227 goto out; 228 } 229 if (!json_parse(&uc, ue, st, lvl + 1)) { 230 DPRINTF("not json", uc, *ucp); 231 goto out; 232 } 233 if (uc == ue) 234 goto out; 235 switch (*uc++) { 236 case ',': 237 continue; 238 case '}': /* { */ 239 *ucp = uc; 240 DPRINTF("Good object: ", uc, *ucp); 241 return 1; 242 default: 243 *ucp = uc - 1; 244 DPRINTF("not more", uc, *ucp); 245 goto out; 246 } 247 } 248 out: 249 DPRINTF("Bad object: ", uc, *ucp); 250 *ucp = uc; 251 return 0; 252 } 253 254 static int 255 json_parse_number(const unsigned char **ucp, const unsigned char *ue) 256 { 257 const unsigned char *uc = *ucp; 258 int got = 0; 259 260 DPRINTF("Parse number: ", uc, *ucp); 261 if (uc == ue) 262 return 0; 263 if (*uc == '-') 264 uc++; 265 266 for (; uc < ue; uc++) { 267 if (!json_isdigit(*uc)) 268 break; 269 got = 1; 270 } 271 if (uc == ue) 272 goto out; 273 if (*uc == '.') 274 uc++; 275 for (; uc < ue; uc++) { 276 if (!json_isdigit(*uc)) 277 break; 278 got = 1; 279 } 280 if (uc == ue) 281 goto out; 282 if (got && (*uc == 'e' || *uc == 'E')) { 283 uc++; 284 got = 0; 285 if (uc == ue) 286 goto out; 287 if (*uc == '+' || *uc == '-') 288 uc++; 289 for (; uc < ue; uc++) { 290 if (!json_isdigit(*uc)) 291 break; 292 got = 1; 293 } 294 } 295 out: 296 if (!got) 297 DPRINTF("Bad number: ", uc, *ucp); 298 else 299 DPRINTF("Good number: ", uc, *ucp); 300 *ucp = uc; 301 return got; 302 } 303 304 static int 305 json_parse_const(const unsigned char **ucp, const unsigned char *ue, 306 const char *str, size_t len) 307 { 308 const unsigned char *uc = *ucp; 309 310 DPRINTF("Parse const: ", uc, *ucp); 311 for (len--; uc < ue && --len;) { 312 if (*uc++ == *++str) 313 continue; 314 } 315 if (len) 316 DPRINTF("Bad const: ", uc, *ucp); 317 *ucp = uc; 318 return len == 0; 319 } 320 321 static int 322 json_parse(const unsigned char **ucp, const unsigned char *ue, 323 size_t *st, size_t lvl) 324 { 325 const unsigned char *uc; 326 int rv = 0; 327 int t; 328 329 uc = json_skip_space(*ucp, ue); 330 if (uc == ue) 331 goto out; 332 333 // Avoid recursion 334 if (lvl > 20) 335 return 0; 336 #if JSON_COUNT 337 /* bail quickly if not counting */ 338 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 339 return 1; 340 #endif 341 342 DPRINTF("Parse general: ", uc, *ucp); 343 switch (*uc++) { 344 case '"': 345 rv = json_parse_string(&uc, ue); 346 t = JSON_STRING; 347 break; 348 case '[': 349 rv = json_parse_array(&uc, ue, st, lvl + 1); 350 t = JSON_ARRAY; 351 break; 352 case '{': /* '}' */ 353 rv = json_parse_object(&uc, ue, st, lvl + 1); 354 t = JSON_OBJECT; 355 break; 356 case 't': 357 rv = json_parse_const(&uc, ue, "true", sizeof("true")); 358 t = JSON_CONSTANT; 359 break; 360 case 'f': 361 rv = json_parse_const(&uc, ue, "false", sizeof("false")); 362 t = JSON_CONSTANT; 363 break; 364 case 'n': 365 rv = json_parse_const(&uc, ue, "null", sizeof("null")); 366 t = JSON_CONSTANT; 367 break; 368 default: 369 --uc; 370 rv = json_parse_number(&uc, ue); 371 t = JSON_NUMBER; 372 break; 373 } 374 if (rv) 375 st[t]++; 376 uc = json_skip_space(uc, ue); 377 out: 378 *ucp = uc; 379 DPRINTF("End general: ", uc, *ucp); 380 if (lvl == 0) 381 return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 382 return rv; 383 } 384 385 #ifndef TEST 386 int 387 file_is_json(struct magic_set *ms, const struct buffer *b) 388 { 389 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 390 const unsigned char *ue = uc + b->flen; 391 size_t st[JSON_MAX]; 392 int mime = ms->flags & MAGIC_MIME; 393 394 395 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 396 return 0; 397 398 memset(st, 0, sizeof(st)); 399 400 if (!json_parse(&uc, ue, st, 0)) 401 return 0; 402 403 if (mime == MAGIC_MIME_ENCODING) 404 return 1; 405 if (mime) { 406 if (file_printf(ms, "application/json") == -1) 407 return -1; 408 return 1; 409 } 410 if (file_printf(ms, "JSON data") == -1) 411 return -1; 412 #if JSON_COUNT 413 #define P(n) st[n], st[n] > 1 ? "s" : "" 414 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 415 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 416 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 417 "u >1array%s)", 418 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 419 P(JSON_NUMBER), P(JSON_ARRAYN)) 420 == -1) 421 return -1; 422 #endif 423 return 1; 424 } 425 426 #else 427 428 #include <sys/types.h> 429 #include <sys/stat.h> 430 #include <stdio.h> 431 #include <fcntl.h> 432 #include <unistd.h> 433 #include <stdlib.h> 434 #include <stdint.h> 435 #include <err.h> 436 437 int 438 main(int argc, char *argv[]) 439 { 440 int fd, rv; 441 struct stat st; 442 unsigned char *p; 443 size_t stats[JSON_MAX]; 444 445 if ((fd = open(argv[1], O_RDONLY)) == -1) 446 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 447 448 if (fstat(fd, &st) == -1) 449 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 450 451 if ((p = malloc(st.st_size)) == NULL) 452 err(EXIT_FAILURE, "Can't allocate %jd bytes", 453 (intmax_t)st.st_size); 454 if (read(fd, p, st.st_size) != st.st_size) 455 err(EXIT_FAILURE, "Can't read %jd bytes", 456 (intmax_t)st.st_size); 457 memset(stats, 0, sizeof(stats)); 458 printf("is json %d\n", json_parse((const unsigned char **)&p, 459 p + st.st_size, stats, 0)); 460 return 0; 461 } 462 #endif 463