1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * apprentice - make one pass through /etc/magic, learning its secrets. 30 */ 31 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: apprentice.c,v 1.326 2022/09/13 18:46:07 christos Exp $") 36 #endif /* lint */ 37 38 #include "magic.h" 39 #include <stdlib.h> 40 #ifdef HAVE_UNISTD_H 41 #include <unistd.h> 42 #endif 43 #include <stddef.h> 44 #include <string.h> 45 #include <assert.h> 46 #include <ctype.h> 47 #include <fcntl.h> 48 #ifdef QUICK 49 #include <sys/mman.h> 50 #endif 51 #include <dirent.h> 52 #include <limits.h> 53 #ifdef HAVE_BYTESWAP_H 54 #include <byteswap.h> 55 #endif 56 #ifdef HAVE_SYS_BSWAP_H 57 #include <sys/bswap.h> 58 #endif 59 60 61 #define EATAB {while (isascii(CAST(unsigned char, *l)) && \ 62 isspace(CAST(unsigned char, *l))) ++l;} 63 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \ 64 tolower(CAST(unsigned char, l)) : (l)) 65 /* 66 * Work around a bug in headers on Digital Unix. 67 * At least confirmed for: OSF1 V4.0 878 68 */ 69 #if defined(__osf__) && defined(__DECC) 70 #ifdef MAP_FAILED 71 #undef MAP_FAILED 72 #endif 73 #endif 74 75 #ifndef MAP_FAILED 76 #define MAP_FAILED (void *) -1 77 #endif 78 79 #ifndef MAP_FILE 80 #define MAP_FILE 0 81 #endif 82 83 #define ALLOC_CHUNK CAST(size_t, 10) 84 #define ALLOC_INCR CAST(size_t, 200) 85 86 #define MAP_TYPE_USER 0 87 #define MAP_TYPE_MALLOC 1 88 #define MAP_TYPE_MMAP 2 89 90 struct magic_entry { 91 struct magic *mp; 92 uint32_t cont_count; 93 uint32_t max_count; 94 }; 95 96 struct magic_entry_set { 97 struct magic_entry *me; 98 uint32_t count; 99 uint32_t max; 100 }; 101 102 struct magic_map { 103 void *p; 104 size_t len; 105 int type; 106 struct magic *magic[MAGIC_SETS]; 107 uint32_t nmagic[MAGIC_SETS]; 108 }; 109 110 int file_formats[FILE_NAMES_SIZE]; 111 const size_t file_nformats = FILE_NAMES_SIZE; 112 const char *file_names[FILE_NAMES_SIZE]; 113 const size_t file_nnames = FILE_NAMES_SIZE; 114 115 private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 116 private int hextoint(int); 117 private const char *getstr(struct magic_set *, struct magic *, const char *, 118 int); 119 private int parse(struct magic_set *, struct magic_entry *, const char *, 120 size_t, int); 121 private void eatsize(const char **); 122 private int apprentice_1(struct magic_set *, const char *, int); 123 private ssize_t apprentice_magic_strength_1(const struct magic *); 124 private size_t apprentice_magic_strength(const struct magic *, size_t); 125 private int apprentice_sort(const void *, const void *); 126 private void apprentice_list(struct mlist *, int ); 127 private struct magic_map *apprentice_load(struct magic_set *, 128 const char *, int); 129 private struct mlist *mlist_alloc(void); 130 private void mlist_free_all(struct magic_set *); 131 private void mlist_free(struct mlist *); 132 private void byteswap(struct magic *, uint32_t); 133 private void bs1(struct magic *); 134 135 #if defined(HAVE_BYTESWAP_H) 136 #define swap2(x) bswap_16(x) 137 #define swap4(x) bswap_32(x) 138 #define swap8(x) bswap_64(x) 139 #elif defined(HAVE_SYS_BSWAP_H) 140 #define swap2(x) bswap16(x) 141 #define swap4(x) bswap32(x) 142 #define swap8(x) bswap64(x) 143 #else 144 private uint16_t swap2(uint16_t); 145 private uint32_t swap4(uint32_t); 146 private uint64_t swap8(uint64_t); 147 #endif 148 149 private char *mkdbname(struct magic_set *, const char *, int); 150 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *, 151 size_t); 152 private struct magic_map *apprentice_map(struct magic_set *, const char *); 153 private int check_buffer(struct magic_set *, struct magic_map *, const char *); 154 private void apprentice_unmap(struct magic_map *); 155 private int apprentice_compile(struct magic_set *, struct magic_map *, 156 const char *); 157 private int check_format_type(const char *, int, const char **); 158 private int check_format(struct magic_set *, struct magic *); 159 private int get_op(char); 160 private int parse_mime(struct magic_set *, struct magic_entry *, const char *, 161 size_t); 162 private int parse_strength(struct magic_set *, struct magic_entry *, 163 const char *, size_t); 164 private int parse_apple(struct magic_set *, struct magic_entry *, const char *, 165 size_t); 166 private int parse_ext(struct magic_set *, struct magic_entry *, const char *, 167 size_t); 168 169 170 private size_t magicsize = sizeof(struct magic); 171 172 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 173 174 private struct { 175 const char *name; 176 size_t len; 177 int (*fun)(struct magic_set *, struct magic_entry *, const char *, 178 size_t); 179 } bang[] = { 180 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 181 DECLARE_FIELD(mime), 182 DECLARE_FIELD(apple), 183 DECLARE_FIELD(ext), 184 DECLARE_FIELD(strength), 185 #undef DECLARE_FIELD 186 { NULL, 0, NULL } 187 }; 188 189 #ifdef COMPILE_ONLY 190 191 int main(int, char *[]); 192 193 int 194 main(int argc, char *argv[]) 195 { 196 int ret; 197 struct magic_set *ms; 198 char *progname; 199 200 if ((progname = strrchr(argv[0], '/')) != NULL) 201 progname++; 202 else 203 progname = argv[0]; 204 205 if (argc != 2) { 206 (void)fprintf(stderr, "Usage: %s file\n", progname); 207 return 1; 208 } 209 210 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 211 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 212 return 1; 213 } 214 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 215 if (ret == 1) 216 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 217 magic_close(ms); 218 return ret; 219 } 220 #endif /* COMPILE_ONLY */ 221 222 struct type_tbl_s { 223 const char name[16]; 224 const size_t len; 225 const int type; 226 const int format; 227 }; 228 229 /* 230 * XXX - the actual Single UNIX Specification says that "long" means "long", 231 * as in the C data type, but we treat it as meaning "4-byte integer". 232 * Given that the OS X version of file 5.04 did the same, I guess that passes 233 * the actual test; having "long" be dependent on how big a "long" is on 234 * the machine running "file" is silly. 235 */ 236 static const struct type_tbl_s type_tbl[] = { 237 # define XX(s) s, (sizeof(s) - 1) 238 # define XX_NULL "", 0 239 { XX("invalid"), FILE_INVALID, FILE_FMT_NONE }, 240 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 241 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 242 { XX("default"), FILE_DEFAULT, FILE_FMT_NONE }, 243 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 244 { XX("string"), FILE_STRING, FILE_FMT_STR }, 245 { XX("date"), FILE_DATE, FILE_FMT_STR }, 246 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 247 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 248 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 249 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 250 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 251 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 252 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 253 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 254 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 255 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 256 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 257 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 258 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 259 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 260 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 261 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 262 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 263 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 264 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 265 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 266 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 267 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 268 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 269 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 270 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 271 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 272 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 273 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 274 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 275 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 276 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 277 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 278 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 279 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 280 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 281 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 282 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 283 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 284 { XX("name"), FILE_NAME, FILE_FMT_NONE }, 285 { XX("use"), FILE_USE, FILE_FMT_NONE }, 286 { XX("clear"), FILE_CLEAR, FILE_FMT_NONE }, 287 { XX("der"), FILE_DER, FILE_FMT_STR }, 288 { XX("guid"), FILE_GUID, FILE_FMT_STR }, 289 { XX("offset"), FILE_OFFSET, FILE_FMT_QUAD }, 290 { XX("bevarint"), FILE_BEVARINT, FILE_FMT_STR }, 291 { XX("levarint"), FILE_LEVARINT, FILE_FMT_STR }, 292 { XX("msdosdate"), FILE_MSDOSDATE, FILE_FMT_STR }, 293 { XX("lemsdosdate"), FILE_LEMSDOSDATE, FILE_FMT_STR }, 294 { XX("bemsdosdate"), FILE_BEMSDOSDATE, FILE_FMT_STR }, 295 { XX("msdostime"), FILE_MSDOSTIME, FILE_FMT_STR }, 296 { XX("lemsdostime"), FILE_LEMSDOSTIME, FILE_FMT_STR }, 297 { XX("bemsdostime"), FILE_BEMSDOSTIME, FILE_FMT_STR }, 298 { XX("octal"), FILE_OCTAL, FILE_FMT_STR }, 299 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 300 }; 301 302 /* 303 * These are not types, and cannot be preceded by "u" to make them 304 * unsigned. 305 */ 306 static const struct type_tbl_s special_tbl[] = { 307 { XX("der"), FILE_DER, FILE_FMT_STR }, 308 { XX("name"), FILE_NAME, FILE_FMT_STR }, 309 { XX("use"), FILE_USE, FILE_FMT_STR }, 310 { XX("octal"), FILE_OCTAL, FILE_FMT_STR }, 311 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 312 }; 313 # undef XX 314 # undef XX_NULL 315 316 private int 317 get_type(const struct type_tbl_s *tbl, const char *l, const char **t) 318 { 319 const struct type_tbl_s *p; 320 321 for (p = tbl; p->len; p++) { 322 if (strncmp(l, p->name, p->len) == 0) { 323 if (t) 324 *t = l + p->len; 325 break; 326 } 327 } 328 return p->type; 329 } 330 331 private off_t 332 maxoff_t(void) { 333 if (/*CONSTCOND*/sizeof(off_t) == sizeof(int)) 334 return CAST(off_t, INT_MAX); 335 if (/*CONSTCOND*/sizeof(off_t) == sizeof(long)) 336 return CAST(off_t, LONG_MAX); 337 return 0x7fffffff; 338 } 339 340 private int 341 get_standard_integer_type(const char *l, const char **t) 342 { 343 int type; 344 345 if (isalpha(CAST(unsigned char, l[1]))) { 346 switch (l[1]) { 347 case 'C': 348 /* "dC" and "uC" */ 349 type = FILE_BYTE; 350 break; 351 case 'S': 352 /* "dS" and "uS" */ 353 type = FILE_SHORT; 354 break; 355 case 'I': 356 case 'L': 357 /* 358 * "dI", "dL", "uI", and "uL". 359 * 360 * XXX - the actual Single UNIX Specification says 361 * that "L" means "long", as in the C data type, 362 * but we treat it as meaning "4-byte integer". 363 * Given that the OS X version of file 5.04 did 364 * the same, I guess that passes the actual SUS 365 * validation suite; having "dL" be dependent on 366 * how big a "long" is on the machine running 367 * "file" is silly. 368 */ 369 type = FILE_LONG; 370 break; 371 case 'Q': 372 /* "dQ" and "uQ" */ 373 type = FILE_QUAD; 374 break; 375 default: 376 /* "d{anything else}", "u{anything else}" */ 377 return FILE_INVALID; 378 } 379 l += 2; 380 } else if (isdigit(CAST(unsigned char, l[1]))) { 381 /* 382 * "d{num}" and "u{num}"; we only support {num} values 383 * of 1, 2, 4, and 8 - the Single UNIX Specification 384 * doesn't say anything about whether arbitrary 385 * values should be supported, but both the Solaris 10 386 * and OS X Mountain Lion versions of file passed the 387 * Single UNIX Specification validation suite, and 388 * neither of them support values bigger than 8 or 389 * non-power-of-2 values. 390 */ 391 if (isdigit(CAST(unsigned char, l[2]))) { 392 /* Multi-digit, so > 9 */ 393 return FILE_INVALID; 394 } 395 switch (l[1]) { 396 case '1': 397 type = FILE_BYTE; 398 break; 399 case '2': 400 type = FILE_SHORT; 401 break; 402 case '4': 403 type = FILE_LONG; 404 break; 405 case '8': 406 type = FILE_QUAD; 407 break; 408 default: 409 /* XXX - what about 3, 5, 6, or 7? */ 410 return FILE_INVALID; 411 } 412 l += 2; 413 } else { 414 /* 415 * "d" or "u" by itself. 416 */ 417 type = FILE_LONG; 418 ++l; 419 } 420 if (t) 421 *t = l; 422 return type; 423 } 424 425 private void 426 init_file_tables(void) 427 { 428 static int done = 0; 429 const struct type_tbl_s *p; 430 431 if (done) 432 return; 433 done++; 434 435 for (p = type_tbl; p->len; p++) { 436 assert(p->type < FILE_NAMES_SIZE); 437 file_names[p->type] = p->name; 438 file_formats[p->type] = p->format; 439 } 440 assert(p - type_tbl == FILE_NAMES_SIZE); 441 } 442 443 private int 444 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) 445 { 446 struct mlist *ml; 447 448 mlp->map = NULL; 449 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 450 return -1; 451 452 ml->map = idx == 0 ? map : NULL; 453 ml->magic = map->magic[idx]; 454 ml->nmagic = map->nmagic[idx]; 455 if (ml->nmagic) { 456 ml->magic_rxcomp = CAST(file_regex_t **, 457 calloc(ml->nmagic, sizeof(*ml->magic_rxcomp))); 458 if (ml->magic_rxcomp == NULL) { 459 free(ml); 460 return -1; 461 } 462 } else 463 ml->magic_rxcomp = NULL; 464 mlp->prev->next = ml; 465 ml->prev = mlp->prev; 466 ml->next = mlp; 467 mlp->prev = ml; 468 return 0; 469 } 470 471 /* 472 * Handle one file or directory. 473 */ 474 private int 475 apprentice_1(struct magic_set *ms, const char *fn, int action) 476 { 477 struct magic_map *map; 478 #ifndef COMPILE_ONLY 479 struct mlist *ml; 480 size_t i; 481 #endif 482 483 if (magicsize != FILE_MAGICSIZE) { 484 file_error(ms, 0, "magic element size %lu != %lu", 485 CAST(unsigned long, sizeof(*map->magic[0])), 486 CAST(unsigned long, FILE_MAGICSIZE)); 487 return -1; 488 } 489 490 if (action == FILE_COMPILE) { 491 map = apprentice_load(ms, fn, action); 492 if (map == NULL) 493 return -1; 494 return apprentice_compile(ms, map, fn); 495 } 496 497 #ifndef COMPILE_ONLY 498 map = apprentice_map(ms, fn); 499 if (map == NULL) { 500 if (ms->flags & MAGIC_CHECK) 501 file_magwarn(ms, "using regular magic file `%s'", fn); 502 map = apprentice_load(ms, fn, action); 503 if (map == NULL) 504 return -1; 505 } 506 507 for (i = 0; i < MAGIC_SETS; i++) { 508 if (add_mlist(ms->mlist[i], map, i) == -1) { 509 /* failed to add to any list, free explicitly */ 510 if (i == 0) 511 apprentice_unmap(map); 512 else 513 mlist_free_all(ms); 514 file_oomem(ms, sizeof(*ml)); 515 return -1; 516 } 517 } 518 519 if (action == FILE_LIST) { 520 for (i = 0; i < MAGIC_SETS; i++) { 521 printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n", 522 i); 523 apprentice_list(ms->mlist[i], BINTEST); 524 printf("Text patterns:\n"); 525 apprentice_list(ms->mlist[i], TEXTTEST); 526 } 527 } 528 return 0; 529 #else 530 return 0; 531 #endif /* COMPILE_ONLY */ 532 } 533 534 protected void 535 file_ms_free(struct magic_set *ms) 536 { 537 size_t i; 538 if (ms == NULL) 539 return; 540 for (i = 0; i < MAGIC_SETS; i++) 541 mlist_free(ms->mlist[i]); 542 free(ms->o.pbuf); 543 free(ms->o.buf); 544 free(ms->c.li); 545 #ifdef USE_C_LOCALE 546 freelocale(ms->c_lc_ctype); 547 #endif 548 free(ms); 549 } 550 551 protected struct magic_set * 552 file_ms_alloc(int flags) 553 { 554 struct magic_set *ms; 555 size_t i, len; 556 557 if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u), 558 sizeof(struct magic_set)))) == NULL) 559 return NULL; 560 561 if (magic_setflags(ms, flags) == -1) { 562 errno = EINVAL; 563 goto free; 564 } 565 566 ms->o.buf = ms->o.pbuf = NULL; 567 ms->o.blen = 0; 568 len = (ms->c.len = 10) * sizeof(*ms->c.li); 569 570 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 571 goto free; 572 573 ms->event_flags = 0; 574 ms->error = -1; 575 for (i = 0; i < MAGIC_SETS; i++) 576 ms->mlist[i] = NULL; 577 ms->file = "unknown"; 578 ms->line = 0; 579 ms->indir_max = FILE_INDIR_MAX; 580 ms->name_max = FILE_NAME_MAX; 581 ms->elf_shnum_max = FILE_ELF_SHNUM_MAX; 582 ms->elf_phnum_max = FILE_ELF_PHNUM_MAX; 583 ms->elf_notes_max = FILE_ELF_NOTES_MAX; 584 ms->regex_max = FILE_REGEX_MAX; 585 ms->bytes_max = FILE_BYTES_MAX; 586 ms->encoding_max = FILE_ENCODING_MAX; 587 #ifdef USE_C_LOCALE 588 ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 589 assert(ms->c_lc_ctype != NULL); 590 #endif 591 return ms; 592 free: 593 free(ms); 594 return NULL; 595 } 596 597 private void 598 apprentice_unmap(struct magic_map *map) 599 { 600 size_t i; 601 char *p; 602 if (map == NULL) 603 return; 604 605 switch (map->type) { 606 case MAP_TYPE_USER: 607 break; 608 case MAP_TYPE_MALLOC: 609 p = CAST(char *, map->p); 610 for (i = 0; i < MAGIC_SETS; i++) { 611 char *b = RCAST(char *, map->magic[i]); 612 if (p != NULL && b >= p && b <= p + map->len) 613 continue; 614 free(b); 615 } 616 free(p); 617 break; 618 #ifdef QUICK 619 case MAP_TYPE_MMAP: 620 if (map->p && map->p != MAP_FAILED) 621 (void)munmap(map->p, map->len); 622 break; 623 #endif 624 default: 625 fprintf(stderr, "Bad map type %d", map->type); 626 abort(); 627 } 628 free(map); 629 } 630 631 private struct mlist * 632 mlist_alloc(void) 633 { 634 struct mlist *mlist; 635 if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) { 636 return NULL; 637 } 638 mlist->next = mlist->prev = mlist; 639 return mlist; 640 } 641 642 private void 643 mlist_free_all(struct magic_set *ms) 644 { 645 size_t i; 646 647 for (i = 0; i < MAGIC_SETS; i++) { 648 mlist_free(ms->mlist[i]); 649 ms->mlist[i] = NULL; 650 } 651 } 652 653 private void 654 mlist_free_one(struct mlist *ml) 655 { 656 size_t i; 657 658 if (ml->map) 659 apprentice_unmap(CAST(struct magic_map *, ml->map)); 660 661 for (i = 0; i < ml->nmagic; ++i) { 662 if (ml->magic_rxcomp[i]) { 663 file_regfree(ml->magic_rxcomp[i]); 664 free(ml->magic_rxcomp[i]); 665 ml->magic_rxcomp[i] = NULL; 666 } 667 } 668 free(ml->magic_rxcomp); 669 ml->magic_rxcomp = NULL; 670 free(ml); 671 } 672 673 private void 674 mlist_free(struct mlist *mlist) 675 { 676 struct mlist *ml, *next; 677 678 if (mlist == NULL) 679 return; 680 681 for (ml = mlist->next; ml != mlist;) { 682 next = ml->next; 683 mlist_free_one(ml); 684 ml = next; 685 } 686 mlist_free_one(mlist); 687 } 688 689 #ifndef COMPILE_ONLY 690 /* void **bufs: an array of compiled magic files */ 691 protected int 692 buffer_apprentice(struct magic_set *ms, struct magic **bufs, 693 size_t *sizes, size_t nbufs) 694 { 695 size_t i, j; 696 struct mlist *ml; 697 struct magic_map *map; 698 699 if (nbufs == 0) 700 return -1; 701 702 (void)file_reset(ms, 0); 703 704 init_file_tables(); 705 706 for (i = 0; i < MAGIC_SETS; i++) { 707 mlist_free(ms->mlist[i]); 708 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 709 file_oomem(ms, sizeof(*ms->mlist[i])); 710 goto fail; 711 } 712 } 713 714 for (i = 0; i < nbufs; i++) { 715 map = apprentice_buf(ms, bufs[i], sizes[i]); 716 if (map == NULL) 717 goto fail; 718 719 for (j = 0; j < MAGIC_SETS; j++) { 720 if (add_mlist(ms->mlist[j], map, j) == -1) { 721 file_oomem(ms, sizeof(*ml)); 722 goto fail; 723 } 724 } 725 } 726 727 return 0; 728 fail: 729 mlist_free_all(ms); 730 return -1; 731 } 732 #endif 733 734 /* const char *fn: list of magic files and directories */ 735 protected int 736 file_apprentice(struct magic_set *ms, const char *fn, int action) 737 { 738 char *p, *mfn; 739 int fileerr, errs = -1; 740 size_t i, j; 741 742 (void)file_reset(ms, 0); 743 744 if ((fn = magic_getpath(fn, action)) == NULL) 745 return -1; 746 747 init_file_tables(); 748 749 if ((mfn = strdup(fn)) == NULL) { 750 file_oomem(ms, strlen(fn)); 751 return -1; 752 } 753 754 for (i = 0; i < MAGIC_SETS; i++) { 755 mlist_free(ms->mlist[i]); 756 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 757 file_oomem(ms, sizeof(*ms->mlist[i])); 758 for (j = 0; j < i; j++) { 759 mlist_free(ms->mlist[j]); 760 ms->mlist[j] = NULL; 761 } 762 free(mfn); 763 return -1; 764 } 765 } 766 fn = mfn; 767 768 while (fn) { 769 p = CCAST(char *, strchr(fn, PATHSEP)); 770 if (p) 771 *p++ = '\0'; 772 if (*fn == '\0') 773 break; 774 fileerr = apprentice_1(ms, fn, action); 775 errs = MAX(errs, fileerr); 776 fn = p; 777 } 778 779 free(mfn); 780 781 if (errs == -1) { 782 for (i = 0; i < MAGIC_SETS; i++) { 783 mlist_free(ms->mlist[i]); 784 ms->mlist[i] = NULL; 785 } 786 file_error(ms, 0, "could not find any valid magic files!"); 787 return -1; 788 } 789 790 #if 0 791 /* 792 * Always leave the database loaded 793 */ 794 if (action == FILE_LOAD) 795 return 0; 796 797 for (i = 0; i < MAGIC_SETS; i++) { 798 mlist_free(ms->mlist[i]); 799 ms->mlist[i] = NULL; 800 } 801 #endif 802 803 switch (action) { 804 case FILE_LOAD: 805 case FILE_COMPILE: 806 case FILE_CHECK: 807 case FILE_LIST: 808 return 0; 809 default: 810 file_error(ms, 0, "Invalid action %d", action); 811 return -1; 812 } 813 } 814 815 /* 816 * Compute the real length of a magic expression, for the purposes 817 * of determining how "strong" a magic expression is (approximating 818 * how specific its matches are): 819 * - magic characters count 0 unless escaped. 820 * - [] expressions count 1 821 * - {} expressions count 0 822 * - regular characters or escaped magic characters count 1 823 * - 0 length expressions count as one 824 */ 825 private size_t 826 nonmagic(const char *str) 827 { 828 const char *p; 829 size_t rv = 0; 830 831 for (p = str; *p; p++) 832 switch (*p) { 833 case '\\': /* Escaped anything counts 1 */ 834 if (!*++p) 835 p--; 836 rv++; 837 continue; 838 case '?': /* Magic characters count 0 */ 839 case '*': 840 case '.': 841 case '+': 842 case '^': 843 case '$': 844 continue; 845 case '[': /* Bracketed expressions count 1 the ']' */ 846 while (*p && *p != ']') 847 p++; 848 p--; 849 continue; 850 case '{': /* Braced expressions count 0 */ 851 while (*p && *p != '}') 852 p++; 853 if (!*p) 854 p--; 855 continue; 856 default: /* Anything else counts 1 */ 857 rv++; 858 continue; 859 } 860 861 return rv == 0 ? 1 : rv; /* Return at least 1 */ 862 } 863 864 865 private size_t 866 typesize(int type) 867 { 868 switch (type) { 869 case FILE_BYTE: 870 return 1; 871 872 case FILE_SHORT: 873 case FILE_LESHORT: 874 case FILE_BESHORT: 875 case FILE_MSDOSDATE: 876 case FILE_BEMSDOSDATE: 877 case FILE_LEMSDOSDATE: 878 case FILE_MSDOSTIME: 879 case FILE_BEMSDOSTIME: 880 case FILE_LEMSDOSTIME: 881 return 2; 882 883 case FILE_LONG: 884 case FILE_LELONG: 885 case FILE_BELONG: 886 case FILE_MELONG: 887 return 4; 888 889 case FILE_DATE: 890 case FILE_LEDATE: 891 case FILE_BEDATE: 892 case FILE_MEDATE: 893 case FILE_LDATE: 894 case FILE_LELDATE: 895 case FILE_BELDATE: 896 case FILE_MELDATE: 897 case FILE_FLOAT: 898 case FILE_BEFLOAT: 899 case FILE_LEFLOAT: 900 case FILE_BEID3: 901 case FILE_LEID3: 902 return 4; 903 904 case FILE_QUAD: 905 case FILE_BEQUAD: 906 case FILE_LEQUAD: 907 case FILE_QDATE: 908 case FILE_LEQDATE: 909 case FILE_BEQDATE: 910 case FILE_QLDATE: 911 case FILE_LEQLDATE: 912 case FILE_BEQLDATE: 913 case FILE_QWDATE: 914 case FILE_LEQWDATE: 915 case FILE_BEQWDATE: 916 case FILE_DOUBLE: 917 case FILE_BEDOUBLE: 918 case FILE_LEDOUBLE: 919 case FILE_OFFSET: 920 case FILE_BEVARINT: 921 case FILE_LEVARINT: 922 return 8; 923 924 case FILE_GUID: 925 return 16; 926 927 default: 928 return FILE_BADSIZE; 929 } 930 } 931 932 /* 933 * Get weight of this magic entry, for sorting purposes. 934 */ 935 private ssize_t 936 apprentice_magic_strength_1(const struct magic *m) 937 { 938 #define MULT 10U 939 size_t ts, v; 940 ssize_t val = 2 * MULT; /* baseline strength */ 941 942 switch (m->type) { 943 case FILE_DEFAULT: /* make sure this sorts last */ 944 if (m->factor_op != FILE_FACTOR_OP_NONE) { 945 fprintf(stderr, "Bad factor_op %d", m->factor_op); 946 abort(); 947 } 948 return 0; 949 950 case FILE_BYTE: 951 case FILE_SHORT: 952 case FILE_LESHORT: 953 case FILE_BESHORT: 954 case FILE_LONG: 955 case FILE_LELONG: 956 case FILE_BELONG: 957 case FILE_MELONG: 958 case FILE_DATE: 959 case FILE_LEDATE: 960 case FILE_BEDATE: 961 case FILE_MEDATE: 962 case FILE_LDATE: 963 case FILE_LELDATE: 964 case FILE_BELDATE: 965 case FILE_MELDATE: 966 case FILE_FLOAT: 967 case FILE_BEFLOAT: 968 case FILE_LEFLOAT: 969 case FILE_QUAD: 970 case FILE_BEQUAD: 971 case FILE_LEQUAD: 972 case FILE_QDATE: 973 case FILE_LEQDATE: 974 case FILE_BEQDATE: 975 case FILE_QLDATE: 976 case FILE_LEQLDATE: 977 case FILE_BEQLDATE: 978 case FILE_QWDATE: 979 case FILE_LEQWDATE: 980 case FILE_BEQWDATE: 981 case FILE_DOUBLE: 982 case FILE_BEDOUBLE: 983 case FILE_LEDOUBLE: 984 case FILE_BEVARINT: 985 case FILE_LEVARINT: 986 case FILE_GUID: 987 case FILE_BEID3: 988 case FILE_LEID3: 989 case FILE_OFFSET: 990 case FILE_MSDOSDATE: 991 case FILE_BEMSDOSDATE: 992 case FILE_LEMSDOSDATE: 993 case FILE_MSDOSTIME: 994 case FILE_BEMSDOSTIME: 995 case FILE_LEMSDOSTIME: 996 ts = typesize(m->type); 997 if (ts == FILE_BADSIZE) { 998 (void)fprintf(stderr, "Bad size for type %d\n", 999 m->type); 1000 abort(); 1001 } 1002 val += ts * MULT; 1003 break; 1004 1005 case FILE_PSTRING: 1006 case FILE_STRING: 1007 case FILE_OCTAL: 1008 val += m->vallen * MULT; 1009 break; 1010 1011 case FILE_BESTRING16: 1012 case FILE_LESTRING16: 1013 val += m->vallen * MULT / 2; 1014 break; 1015 1016 case FILE_SEARCH: 1017 if (m->vallen == 0) 1018 break; 1019 val += m->vallen * MAX(MULT / m->vallen, 1); 1020 break; 1021 1022 case FILE_REGEX: 1023 v = nonmagic(m->value.s); 1024 val += v * MAX(MULT / v, 1); 1025 break; 1026 1027 case FILE_INDIRECT: 1028 case FILE_NAME: 1029 case FILE_USE: 1030 case FILE_CLEAR: 1031 break; 1032 1033 case FILE_DER: 1034 val += MULT; 1035 break; 1036 1037 default: 1038 (void)fprintf(stderr, "Bad type %d\n", m->type); 1039 abort(); 1040 } 1041 1042 switch (m->reln) { 1043 case 'x': /* matches anything penalize */ 1044 case '!': /* matches almost anything penalize */ 1045 val = 0; 1046 break; 1047 1048 case '=': /* Exact match, prefer */ 1049 val += MULT; 1050 break; 1051 1052 case '>': 1053 case '<': /* comparison match reduce strength */ 1054 val -= 2 * MULT; 1055 break; 1056 1057 case '^': 1058 case '&': /* masking bits, we could count them too */ 1059 val -= MULT; 1060 break; 1061 1062 default: 1063 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 1064 abort(); 1065 } 1066 1067 return val; 1068 } 1069 1070 1071 /*ARGSUSED*/ 1072 private size_t 1073 apprentice_magic_strength(const struct magic *m, 1074 size_t nmagic __attribute__((__unused__))) 1075 { 1076 ssize_t val = apprentice_magic_strength_1(m); 1077 1078 #ifdef notyet 1079 if (m->desc[0] == '\0') { 1080 size_t i; 1081 /* 1082 * Magic entries with no description get their continuations 1083 * added 1084 */ 1085 for (i = 1; m[i].cont_level != 0 && i < MIN(nmagic, 3); i++) { 1086 ssize_t v = apprentice_magic_strength_1(&m[i]) >> 1087 (i + 1); 1088 val += v; 1089 if (m[i].desc[0] != '\0') 1090 break; 1091 } 1092 } 1093 #endif 1094 1095 switch (m->factor_op) { 1096 case FILE_FACTOR_OP_NONE: 1097 break; 1098 case FILE_FACTOR_OP_PLUS: 1099 val += m->factor; 1100 break; 1101 case FILE_FACTOR_OP_MINUS: 1102 val -= m->factor; 1103 break; 1104 case FILE_FACTOR_OP_TIMES: 1105 val *= m->factor; 1106 break; 1107 case FILE_FACTOR_OP_DIV: 1108 val /= m->factor; 1109 break; 1110 default: 1111 (void)fprintf(stderr, "Bad factor_op %u\n", m->factor_op); 1112 abort(); 1113 } 1114 1115 if (val <= 0) /* ensure we only return 0 for FILE_DEFAULT */ 1116 val = 1; 1117 1118 #ifndef notyet 1119 /* 1120 * Magic entries with no description get a bonus because they depend 1121 * on subsequent magic entries to print something. 1122 */ 1123 if (m->desc[0] == '\0') 1124 val++; 1125 #endif 1126 1127 return val; 1128 } 1129 1130 /* 1131 * Sort callback for sorting entries by "strength" (basically length) 1132 */ 1133 private int 1134 apprentice_sort(const void *a, const void *b) 1135 { 1136 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 1137 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 1138 size_t sa = apprentice_magic_strength(ma->mp, ma->cont_count); 1139 size_t sb = apprentice_magic_strength(mb->mp, mb->cont_count); 1140 if (sa == sb) 1141 return 0; 1142 else if (sa > sb) 1143 return -1; 1144 else 1145 return 1; 1146 } 1147 1148 /* 1149 * Shows sorted patterns list in the order which is used for the matching 1150 */ 1151 private void 1152 apprentice_list(struct mlist *mlist, int mode) 1153 { 1154 uint32_t magindex, descindex, mimeindex, lineindex; 1155 struct mlist *ml; 1156 for (ml = mlist->next; ml != mlist; ml = ml->next) { 1157 for (magindex = 0; magindex < ml->nmagic; magindex++) { 1158 struct magic *m = &ml->magic[magindex]; 1159 if ((m->flag & mode) != mode) { 1160 /* Skip sub-tests */ 1161 while (magindex + 1 < ml->nmagic && 1162 ml->magic[magindex + 1].cont_level != 0) 1163 ++magindex; 1164 continue; /* Skip to next top-level test*/ 1165 } 1166 1167 /* 1168 * Try to iterate over the tree until we find item with 1169 * description/mimetype. 1170 */ 1171 lineindex = descindex = mimeindex = magindex; 1172 for (magindex++; magindex < ml->nmagic && 1173 ml->magic[magindex].cont_level != 0; magindex++) { 1174 if (*ml->magic[descindex].desc == '\0' 1175 && *ml->magic[magindex].desc) 1176 descindex = magindex; 1177 if (*ml->magic[mimeindex].mimetype == '\0' 1178 && *ml->magic[magindex].mimetype) 1179 mimeindex = magindex; 1180 } 1181 1182 printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n", 1183 apprentice_magic_strength(m, ml->nmagic - magindex), 1184 ml->magic[lineindex].lineno, 1185 ml->magic[descindex].desc, 1186 ml->magic[mimeindex].mimetype); 1187 } 1188 } 1189 } 1190 1191 private void 1192 set_test_type(struct magic *mstart, struct magic *m) 1193 { 1194 switch (m->type) { 1195 case FILE_BYTE: 1196 case FILE_SHORT: 1197 case FILE_LONG: 1198 case FILE_DATE: 1199 case FILE_BESHORT: 1200 case FILE_BELONG: 1201 case FILE_BEDATE: 1202 case FILE_LESHORT: 1203 case FILE_LELONG: 1204 case FILE_LEDATE: 1205 case FILE_LDATE: 1206 case FILE_BELDATE: 1207 case FILE_LELDATE: 1208 case FILE_MEDATE: 1209 case FILE_MELDATE: 1210 case FILE_MELONG: 1211 case FILE_QUAD: 1212 case FILE_LEQUAD: 1213 case FILE_BEQUAD: 1214 case FILE_QDATE: 1215 case FILE_LEQDATE: 1216 case FILE_BEQDATE: 1217 case FILE_QLDATE: 1218 case FILE_LEQLDATE: 1219 case FILE_BEQLDATE: 1220 case FILE_QWDATE: 1221 case FILE_LEQWDATE: 1222 case FILE_BEQWDATE: 1223 case FILE_FLOAT: 1224 case FILE_BEFLOAT: 1225 case FILE_LEFLOAT: 1226 case FILE_DOUBLE: 1227 case FILE_BEDOUBLE: 1228 case FILE_LEDOUBLE: 1229 case FILE_BEVARINT: 1230 case FILE_LEVARINT: 1231 case FILE_DER: 1232 case FILE_GUID: 1233 case FILE_OFFSET: 1234 case FILE_MSDOSDATE: 1235 case FILE_BEMSDOSDATE: 1236 case FILE_LEMSDOSDATE: 1237 case FILE_MSDOSTIME: 1238 case FILE_BEMSDOSTIME: 1239 case FILE_LEMSDOSTIME: 1240 case FILE_OCTAL: 1241 mstart->flag |= BINTEST; 1242 break; 1243 case FILE_STRING: 1244 case FILE_PSTRING: 1245 case FILE_BESTRING16: 1246 case FILE_LESTRING16: 1247 /* Allow text overrides */ 1248 if (mstart->str_flags & STRING_TEXTTEST) 1249 mstart->flag |= TEXTTEST; 1250 else 1251 mstart->flag |= BINTEST; 1252 break; 1253 case FILE_REGEX: 1254 case FILE_SEARCH: 1255 /* Check for override */ 1256 if (mstart->str_flags & STRING_BINTEST) 1257 mstart->flag |= BINTEST; 1258 if (mstart->str_flags & STRING_TEXTTEST) 1259 mstart->flag |= TEXTTEST; 1260 1261 if (mstart->flag & (TEXTTEST|BINTEST)) 1262 break; 1263 1264 /* binary test if pattern is not text */ 1265 if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL, 1266 NULL) <= 0) 1267 mstart->flag |= BINTEST; 1268 else 1269 mstart->flag |= TEXTTEST; 1270 break; 1271 case FILE_DEFAULT: 1272 /* can't deduce anything; we shouldn't see this at the 1273 top level anyway */ 1274 break; 1275 case FILE_INVALID: 1276 default: 1277 /* invalid search type, but no need to complain here */ 1278 break; 1279 } 1280 } 1281 1282 private int 1283 addentry(struct magic_set *ms, struct magic_entry *me, 1284 struct magic_entry_set *mset) 1285 { 1286 size_t i = me->mp->type == FILE_NAME ? 1 : 0; 1287 if (mset[i].me == NULL || mset[i].count == mset[i].max) { 1288 struct magic_entry *mp; 1289 1290 size_t incr = mset[i].max + ALLOC_INCR; 1291 if ((mp = CAST(struct magic_entry *, 1292 realloc(mset[i].me, sizeof(*mp) * incr))) == 1293 NULL) { 1294 file_oomem(ms, sizeof(*mp) * incr); 1295 return -1; 1296 } 1297 (void)memset(&mp[mset[i].count], 0, sizeof(*mp) * 1298 ALLOC_INCR); 1299 mset[i].me = mp; 1300 mset[i].max = CAST(uint32_t, incr); 1301 assert(mset[i].max == incr); 1302 } 1303 mset[i].me[mset[i].count++] = *me; 1304 memset(me, 0, sizeof(*me)); 1305 return 0; 1306 } 1307 1308 /* 1309 * Load and parse one file. 1310 */ 1311 private void 1312 load_1(struct magic_set *ms, int action, const char *fn, int *errs, 1313 struct magic_entry_set *mset) 1314 { 1315 size_t lineno = 0, llen = 0; 1316 char *line = NULL; 1317 ssize_t len; 1318 struct magic_entry me; 1319 1320 FILE *f = fopen(ms->file = fn, "r"); 1321 if (f == NULL) { 1322 if (errno != ENOENT) 1323 file_error(ms, errno, "cannot read magic file `%s'", 1324 fn); 1325 (*errs)++; 1326 return; 1327 } 1328 1329 memset(&me, 0, sizeof(me)); 1330 /* read and parse this file */ 1331 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 1332 ms->line++) { 1333 if (len == 0) /* null line, garbage, etc */ 1334 continue; 1335 if (line[len - 1] == '\n') { 1336 lineno++; 1337 line[len - 1] = '\0'; /* delete newline */ 1338 } 1339 switch (line[0]) { 1340 case '\0': /* empty, do not parse */ 1341 case '#': /* comment, do not parse */ 1342 continue; 1343 case '!': 1344 if (line[1] == ':') { 1345 size_t i; 1346 1347 for (i = 0; bang[i].name != NULL; i++) { 1348 if (CAST(size_t, len - 2) > bang[i].len && 1349 memcmp(bang[i].name, line + 2, 1350 bang[i].len) == 0) 1351 break; 1352 } 1353 if (bang[i].name == NULL) { 1354 file_error(ms, 0, 1355 "Unknown !: entry `%s'", line); 1356 (*errs)++; 1357 continue; 1358 } 1359 if (me.mp == NULL) { 1360 file_error(ms, 0, 1361 "No current entry for :!%s type", 1362 bang[i].name); 1363 (*errs)++; 1364 continue; 1365 } 1366 if ((*bang[i].fun)(ms, &me, 1367 line + bang[i].len + 2, 1368 len - bang[i].len - 2) != 0) { 1369 (*errs)++; 1370 continue; 1371 } 1372 continue; 1373 } 1374 /*FALLTHROUGH*/ 1375 default: 1376 again: 1377 switch (parse(ms, &me, line, lineno, action)) { 1378 case 0: 1379 continue; 1380 case 1: 1381 (void)addentry(ms, &me, mset); 1382 goto again; 1383 default: 1384 (*errs)++; 1385 break; 1386 } 1387 } 1388 } 1389 if (me.mp) 1390 (void)addentry(ms, &me, mset); 1391 free(line); 1392 (void)fclose(f); 1393 } 1394 1395 /* 1396 * parse a file or directory of files 1397 * const char *fn: name of magic file or directory 1398 */ 1399 private int 1400 cmpstrp(const void *p1, const void *p2) 1401 { 1402 return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2)); 1403 } 1404 1405 1406 private uint32_t 1407 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1408 uint32_t starttest) 1409 { 1410 static const char text[] = "text"; 1411 static const char binary[] = "binary"; 1412 static const size_t len = sizeof(text); 1413 1414 uint32_t i = starttest; 1415 1416 do { 1417 set_test_type(me[starttest].mp, me[i].mp); 1418 if ((ms->flags & MAGIC_DEBUG) == 0) 1419 continue; 1420 (void)fprintf(stderr, "%s%s%s: %s\n", 1421 me[i].mp->mimetype, 1422 me[i].mp->mimetype[0] == '\0' ? "" : "; ", 1423 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 1424 me[i].mp->flag & BINTEST ? binary : text); 1425 if (me[i].mp->flag & BINTEST) { 1426 char *p = strstr(me[i].mp->desc, text); 1427 if (p && (p == me[i].mp->desc || 1428 isspace(CAST(unsigned char, p[-1]))) && 1429 (p + len - me[i].mp->desc == MAXstring 1430 || (p[len] == '\0' || 1431 isspace(CAST(unsigned char, p[len]))))) 1432 (void)fprintf(stderr, "*** Possible " 1433 "binary test for text type\n"); 1434 } 1435 } while (++i < nme && me[i].mp->cont_level != 0); 1436 return i; 1437 } 1438 1439 private void 1440 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 1441 { 1442 uint32_t i; 1443 for (i = 0; i < nme; i++) { 1444 if (me[i].mp->cont_level == 0 && 1445 me[i].mp->type == FILE_DEFAULT) { 1446 while (++i < nme) 1447 if (me[i].mp->cont_level == 0) 1448 break; 1449 if (i != nme) { 1450 /* XXX - Ugh! */ 1451 ms->line = me[i].mp->lineno; 1452 file_magwarn(ms, 1453 "level 0 \"default\" did not sort last"); 1454 } 1455 return; 1456 } 1457 } 1458 } 1459 1460 private int 1461 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1462 struct magic **ma, uint32_t *nma) 1463 { 1464 uint32_t i, mentrycount = 0; 1465 size_t slen; 1466 1467 for (i = 0; i < nme; i++) 1468 mentrycount += me[i].cont_count; 1469 1470 if (mentrycount == 0) { 1471 *ma = NULL; 1472 *nma = 0; 1473 return 0; 1474 } 1475 1476 slen = sizeof(**ma) * mentrycount; 1477 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 1478 file_oomem(ms, slen); 1479 return -1; 1480 } 1481 1482 mentrycount = 0; 1483 for (i = 0; i < nme; i++) { 1484 (void)memcpy(*ma + mentrycount, me[i].mp, 1485 me[i].cont_count * sizeof(**ma)); 1486 mentrycount += me[i].cont_count; 1487 } 1488 *nma = mentrycount; 1489 return 0; 1490 } 1491 1492 private void 1493 magic_entry_free(struct magic_entry *me, uint32_t nme) 1494 { 1495 uint32_t i; 1496 if (me == NULL) 1497 return; 1498 for (i = 0; i < nme; i++) 1499 free(me[i].mp); 1500 free(me); 1501 } 1502 1503 private struct magic_map * 1504 apprentice_load(struct magic_set *ms, const char *fn, int action) 1505 { 1506 int errs = 0; 1507 uint32_t i, j; 1508 size_t files = 0, maxfiles = 0; 1509 char **filearr = NULL, *mfn; 1510 struct stat st; 1511 struct magic_map *map; 1512 struct magic_entry_set mset[MAGIC_SETS]; 1513 DIR *dir; 1514 struct dirent *d; 1515 1516 memset(mset, 0, sizeof(mset)); 1517 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1518 1519 1520 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) 1521 { 1522 file_oomem(ms, sizeof(*map)); 1523 return NULL; 1524 } 1525 map->type = MAP_TYPE_MALLOC; 1526 1527 /* print silly verbose header for USG compat. */ 1528 if (action == FILE_CHECK) 1529 (void)fprintf(stderr, "%s\n", usg_hdr); 1530 1531 /* load directory or file */ 1532 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1533 dir = opendir(fn); 1534 if (!dir) { 1535 errs++; 1536 goto out; 1537 } 1538 while ((d = readdir(dir)) != NULL) { 1539 if (d->d_name[0] == '.') 1540 continue; 1541 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1542 file_oomem(ms, 1543 strlen(fn) + strlen(d->d_name) + 2); 1544 errs++; 1545 closedir(dir); 1546 goto out; 1547 } 1548 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1549 free(mfn); 1550 continue; 1551 } 1552 if (files >= maxfiles) { 1553 size_t mlen; 1554 char **nfilearr; 1555 maxfiles = (maxfiles + 1) * 2; 1556 mlen = maxfiles * sizeof(*filearr); 1557 if ((nfilearr = CAST(char **, 1558 realloc(filearr, mlen))) == NULL) { 1559 file_oomem(ms, mlen); 1560 free(mfn); 1561 closedir(dir); 1562 errs++; 1563 goto out; 1564 } 1565 filearr = nfilearr; 1566 } 1567 filearr[files++] = mfn; 1568 } 1569 closedir(dir); 1570 if (filearr) { 1571 qsort(filearr, files, sizeof(*filearr), cmpstrp); 1572 for (i = 0; i < files; i++) { 1573 load_1(ms, action, filearr[i], &errs, mset); 1574 free(filearr[i]); 1575 } 1576 free(filearr); 1577 filearr = NULL; 1578 } 1579 } else 1580 load_1(ms, action, fn, &errs, mset); 1581 if (errs) 1582 goto out; 1583 1584 for (j = 0; j < MAGIC_SETS; j++) { 1585 /* Set types of tests */ 1586 for (i = 0; i < mset[j].count; ) { 1587 if (mset[j].me[i].mp->cont_level != 0) { 1588 i++; 1589 continue; 1590 } 1591 i = set_text_binary(ms, mset[j].me, mset[j].count, i); 1592 } 1593 if (mset[j].me) 1594 qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me), 1595 apprentice_sort); 1596 1597 /* 1598 * Make sure that any level 0 "default" line is last 1599 * (if one exists). 1600 */ 1601 set_last_default(ms, mset[j].me, mset[j].count); 1602 1603 /* coalesce per file arrays into a single one, if needed */ 1604 if (mset[j].count == 0) 1605 continue; 1606 1607 if (coalesce_entries(ms, mset[j].me, mset[j].count, 1608 &map->magic[j], &map->nmagic[j]) == -1) { 1609 errs++; 1610 goto out; 1611 } 1612 } 1613 1614 out: 1615 free(filearr); 1616 for (j = 0; j < MAGIC_SETS; j++) 1617 magic_entry_free(mset[j].me, mset[j].count); 1618 1619 if (errs) { 1620 apprentice_unmap(map); 1621 return NULL; 1622 } 1623 return map; 1624 } 1625 1626 /* 1627 * extend the sign bit if the comparison is to be signed 1628 */ 1629 protected uint64_t 1630 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1631 { 1632 if (!(m->flag & UNSIGNED)) { 1633 switch(m->type) { 1634 /* 1635 * Do not remove the casts below. They are 1636 * vital. When later compared with the data, 1637 * the sign extension must have happened. 1638 */ 1639 case FILE_BYTE: 1640 v = CAST(signed char, v); 1641 break; 1642 case FILE_SHORT: 1643 case FILE_BESHORT: 1644 case FILE_LESHORT: 1645 v = CAST(short, v); 1646 break; 1647 case FILE_DATE: 1648 case FILE_BEDATE: 1649 case FILE_LEDATE: 1650 case FILE_MEDATE: 1651 case FILE_LDATE: 1652 case FILE_BELDATE: 1653 case FILE_LELDATE: 1654 case FILE_MELDATE: 1655 case FILE_LONG: 1656 case FILE_BELONG: 1657 case FILE_LELONG: 1658 case FILE_MELONG: 1659 case FILE_FLOAT: 1660 case FILE_BEFLOAT: 1661 case FILE_LEFLOAT: 1662 case FILE_MSDOSDATE: 1663 case FILE_BEMSDOSDATE: 1664 case FILE_LEMSDOSDATE: 1665 case FILE_MSDOSTIME: 1666 case FILE_BEMSDOSTIME: 1667 case FILE_LEMSDOSTIME: 1668 v = CAST(int32_t, v); 1669 break; 1670 case FILE_QUAD: 1671 case FILE_BEQUAD: 1672 case FILE_LEQUAD: 1673 case FILE_QDATE: 1674 case FILE_QLDATE: 1675 case FILE_QWDATE: 1676 case FILE_BEQDATE: 1677 case FILE_BEQLDATE: 1678 case FILE_BEQWDATE: 1679 case FILE_LEQDATE: 1680 case FILE_LEQLDATE: 1681 case FILE_LEQWDATE: 1682 case FILE_DOUBLE: 1683 case FILE_BEDOUBLE: 1684 case FILE_LEDOUBLE: 1685 case FILE_OFFSET: 1686 case FILE_BEVARINT: 1687 case FILE_LEVARINT: 1688 v = CAST(int64_t, v); 1689 break; 1690 case FILE_STRING: 1691 case FILE_PSTRING: 1692 case FILE_BESTRING16: 1693 case FILE_LESTRING16: 1694 case FILE_REGEX: 1695 case FILE_SEARCH: 1696 case FILE_DEFAULT: 1697 case FILE_INDIRECT: 1698 case FILE_NAME: 1699 case FILE_USE: 1700 case FILE_CLEAR: 1701 case FILE_DER: 1702 case FILE_GUID: 1703 case FILE_OCTAL: 1704 break; 1705 default: 1706 if (ms->flags & MAGIC_CHECK) 1707 file_magwarn(ms, "cannot happen: m->type=%d\n", 1708 m->type); 1709 return FILE_BADSIZE; 1710 } 1711 } 1712 return v; 1713 } 1714 1715 private int 1716 string_modifier_check(struct magic_set *ms, struct magic *m) 1717 { 1718 if ((ms->flags & MAGIC_CHECK) == 0) 1719 return 0; 1720 1721 if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) && 1722 (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) { 1723 file_magwarn(ms, 1724 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1725 return -1; 1726 } 1727 switch (m->type) { 1728 case FILE_BESTRING16: 1729 case FILE_LESTRING16: 1730 if (m->str_flags != 0) { 1731 file_magwarn(ms, 1732 "no modifiers allowed for 16-bit strings\n"); 1733 return -1; 1734 } 1735 break; 1736 case FILE_STRING: 1737 case FILE_PSTRING: 1738 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1739 file_magwarn(ms, 1740 "'/%c' only allowed on regex and search\n", 1741 CHAR_REGEX_OFFSET_START); 1742 return -1; 1743 } 1744 break; 1745 case FILE_SEARCH: 1746 if (m->str_range == 0) { 1747 file_magwarn(ms, 1748 "missing range; defaulting to %d\n", 1749 STRING_DEFAULT_RANGE); 1750 m->str_range = STRING_DEFAULT_RANGE; 1751 return -1; 1752 } 1753 break; 1754 case FILE_REGEX: 1755 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1756 file_magwarn(ms, "'/%c' not allowed on regex\n", 1757 CHAR_COMPACT_WHITESPACE); 1758 return -1; 1759 } 1760 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1761 file_magwarn(ms, "'/%c' not allowed on regex\n", 1762 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1763 return -1; 1764 } 1765 break; 1766 default: 1767 file_magwarn(ms, "coding error: m->type=%d\n", 1768 m->type); 1769 return -1; 1770 } 1771 return 0; 1772 } 1773 1774 private int 1775 get_op(char c) 1776 { 1777 switch (c) { 1778 case '&': 1779 return FILE_OPAND; 1780 case '|': 1781 return FILE_OPOR; 1782 case '^': 1783 return FILE_OPXOR; 1784 case '+': 1785 return FILE_OPADD; 1786 case '-': 1787 return FILE_OPMINUS; 1788 case '*': 1789 return FILE_OPMULTIPLY; 1790 case '/': 1791 return FILE_OPDIVIDE; 1792 case '%': 1793 return FILE_OPMODULO; 1794 default: 1795 return -1; 1796 } 1797 } 1798 1799 #ifdef ENABLE_CONDITIONALS 1800 private int 1801 get_cond(const char *l, const char **t) 1802 { 1803 static const struct cond_tbl_s { 1804 char name[8]; 1805 size_t len; 1806 int cond; 1807 } cond_tbl[] = { 1808 { "if", 2, COND_IF }, 1809 { "elif", 4, COND_ELIF }, 1810 { "else", 4, COND_ELSE }, 1811 { "", 0, COND_NONE }, 1812 }; 1813 const struct cond_tbl_s *p; 1814 1815 for (p = cond_tbl; p->len; p++) { 1816 if (strncmp(l, p->name, p->len) == 0 && 1817 isspace(CAST(unsigned char, l[p->len]))) { 1818 if (t) 1819 *t = l + p->len; 1820 break; 1821 } 1822 } 1823 return p->cond; 1824 } 1825 1826 private int 1827 check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1828 { 1829 int last_cond; 1830 last_cond = ms->c.li[cont_level].last_cond; 1831 1832 switch (cond) { 1833 case COND_IF: 1834 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1835 if (ms->flags & MAGIC_CHECK) 1836 file_magwarn(ms, "syntax error: `if'"); 1837 return -1; 1838 } 1839 last_cond = COND_IF; 1840 break; 1841 1842 case COND_ELIF: 1843 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1844 if (ms->flags & MAGIC_CHECK) 1845 file_magwarn(ms, "syntax error: `elif'"); 1846 return -1; 1847 } 1848 last_cond = COND_ELIF; 1849 break; 1850 1851 case COND_ELSE: 1852 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1853 if (ms->flags & MAGIC_CHECK) 1854 file_magwarn(ms, "syntax error: `else'"); 1855 return -1; 1856 } 1857 last_cond = COND_NONE; 1858 break; 1859 1860 case COND_NONE: 1861 last_cond = COND_NONE; 1862 break; 1863 } 1864 1865 ms->c.li[cont_level].last_cond = last_cond; 1866 return 0; 1867 } 1868 #endif /* ENABLE_CONDITIONALS */ 1869 1870 private int 1871 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp) 1872 { 1873 const char *l = *lp; 1874 1875 while (!isspace(CAST(unsigned char, *++l))) 1876 switch (*l) { 1877 case CHAR_INDIRECT_RELATIVE: 1878 m->str_flags |= INDIRECT_RELATIVE; 1879 break; 1880 default: 1881 if (ms->flags & MAGIC_CHECK) 1882 file_magwarn(ms, "indirect modifier `%c' " 1883 "invalid", *l); 1884 *lp = l; 1885 return -1; 1886 } 1887 *lp = l; 1888 return 0; 1889 } 1890 1891 private void 1892 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp, 1893 int op) 1894 { 1895 const char *l = *lp; 1896 char *t; 1897 uint64_t val; 1898 1899 ++l; 1900 m->mask_op |= op; 1901 val = CAST(uint64_t, strtoull(l, &t, 0)); 1902 l = t; 1903 m->num_mask = file_signextend(ms, m, val); 1904 eatsize(&l); 1905 *lp = l; 1906 } 1907 1908 private int 1909 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp) 1910 { 1911 const char *l = *lp; 1912 char *t; 1913 int have_range = 0; 1914 1915 while (!isspace(CAST(unsigned char, *++l))) { 1916 switch (*l) { 1917 case '0': case '1': case '2': 1918 case '3': case '4': case '5': 1919 case '6': case '7': case '8': 1920 case '9': 1921 if (have_range && (ms->flags & MAGIC_CHECK)) 1922 file_magwarn(ms, "multiple ranges"); 1923 have_range = 1; 1924 m->str_range = CAST(uint32_t, strtoul(l, &t, 0)); 1925 if (m->str_range == 0) 1926 file_magwarn(ms, "zero range"); 1927 l = t - 1; 1928 break; 1929 case CHAR_COMPACT_WHITESPACE: 1930 m->str_flags |= STRING_COMPACT_WHITESPACE; 1931 break; 1932 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1933 m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE; 1934 break; 1935 case CHAR_IGNORE_LOWERCASE: 1936 m->str_flags |= STRING_IGNORE_LOWERCASE; 1937 break; 1938 case CHAR_IGNORE_UPPERCASE: 1939 m->str_flags |= STRING_IGNORE_UPPERCASE; 1940 break; 1941 case CHAR_REGEX_OFFSET_START: 1942 m->str_flags |= REGEX_OFFSET_START; 1943 break; 1944 case CHAR_BINTEST: 1945 m->str_flags |= STRING_BINTEST; 1946 break; 1947 case CHAR_TEXTTEST: 1948 m->str_flags |= STRING_TEXTTEST; 1949 break; 1950 case CHAR_TRIM: 1951 m->str_flags |= STRING_TRIM; 1952 break; 1953 case CHAR_FULL_WORD: 1954 m->str_flags |= STRING_FULL_WORD; 1955 break; 1956 case CHAR_PSTRING_1_LE: 1957 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a) 1958 if (m->type != FILE_PSTRING) 1959 goto bad; 1960 SET_LENGTH(PSTRING_1_LE); 1961 break; 1962 case CHAR_PSTRING_2_BE: 1963 if (m->type != FILE_PSTRING) 1964 goto bad; 1965 SET_LENGTH(PSTRING_2_BE); 1966 break; 1967 case CHAR_PSTRING_2_LE: 1968 if (m->type != FILE_PSTRING) 1969 goto bad; 1970 SET_LENGTH(PSTRING_2_LE); 1971 break; 1972 case CHAR_PSTRING_4_BE: 1973 if (m->type != FILE_PSTRING) 1974 goto bad; 1975 SET_LENGTH(PSTRING_4_BE); 1976 break; 1977 case CHAR_PSTRING_4_LE: 1978 switch (m->type) { 1979 case FILE_PSTRING: 1980 case FILE_REGEX: 1981 break; 1982 default: 1983 goto bad; 1984 } 1985 SET_LENGTH(PSTRING_4_LE); 1986 break; 1987 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1988 if (m->type != FILE_PSTRING) 1989 goto bad; 1990 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1991 break; 1992 default: 1993 bad: 1994 if (ms->flags & MAGIC_CHECK) 1995 file_magwarn(ms, "string modifier `%c' " 1996 "invalid", *l); 1997 goto out; 1998 } 1999 /* allow multiple '/' for readability */ 2000 if (l[1] == '/' && !isspace(CAST(unsigned char, l[2]))) 2001 l++; 2002 } 2003 if (string_modifier_check(ms, m) == -1) 2004 goto out; 2005 *lp = l; 2006 return 0; 2007 out: 2008 *lp = l; 2009 return -1; 2010 } 2011 2012 /* 2013 * parse one line from magic file, put into magic[index++] if valid 2014 */ 2015 private int 2016 parse(struct magic_set *ms, struct magic_entry *me, const char *line, 2017 size_t lineno, int action) 2018 { 2019 #ifdef ENABLE_CONDITIONALS 2020 static uint32_t last_cont_level = 0; 2021 #endif 2022 size_t i; 2023 struct magic *m; 2024 const char *l = line; 2025 char *t; 2026 int op; 2027 uint32_t cont_level; 2028 int32_t diff; 2029 2030 cont_level = 0; 2031 2032 /* 2033 * Parse the offset. 2034 */ 2035 while (*l == '>') { 2036 ++l; /* step over */ 2037 cont_level++; 2038 } 2039 #ifdef ENABLE_CONDITIONALS 2040 if (cont_level == 0 || cont_level > last_cont_level) 2041 if (file_check_mem(ms, cont_level) == -1) 2042 return -1; 2043 last_cont_level = cont_level; 2044 #endif 2045 if (cont_level != 0) { 2046 if (me->mp == NULL) { 2047 file_magerror(ms, "No current entry for continuation"); 2048 return -1; 2049 } 2050 if (me->cont_count == 0) { 2051 file_magerror(ms, "Continuations present with 0 count"); 2052 return -1; 2053 } 2054 m = &me->mp[me->cont_count - 1]; 2055 diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level); 2056 if (diff > 1) 2057 file_magwarn(ms, "New continuation level %u is more " 2058 "than one larger than current level %u", cont_level, 2059 m->cont_level); 2060 if (me->cont_count == me->max_count) { 2061 struct magic *nm; 2062 size_t cnt = me->max_count + ALLOC_CHUNK; 2063 if ((nm = CAST(struct magic *, realloc(me->mp, 2064 sizeof(*nm) * cnt))) == NULL) { 2065 file_oomem(ms, sizeof(*nm) * cnt); 2066 return -1; 2067 } 2068 me->mp = nm; 2069 me->max_count = CAST(uint32_t, cnt); 2070 } 2071 m = &me->mp[me->cont_count++]; 2072 (void)memset(m, 0, sizeof(*m)); 2073 m->cont_level = cont_level; 2074 } else { 2075 static const size_t len = sizeof(*m) * ALLOC_CHUNK; 2076 if (me->mp != NULL) 2077 return 1; 2078 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 2079 file_oomem(ms, len); 2080 return -1; 2081 } 2082 me->mp = m; 2083 me->max_count = ALLOC_CHUNK; 2084 (void)memset(m, 0, sizeof(*m)); 2085 m->factor_op = FILE_FACTOR_OP_NONE; 2086 m->cont_level = 0; 2087 me->cont_count = 1; 2088 } 2089 m->lineno = CAST(uint32_t, lineno); 2090 2091 if (*l == '&') { /* m->cont_level == 0 checked below. */ 2092 ++l; /* step over */ 2093 m->flag |= OFFADD; 2094 } 2095 if (*l == '(') { 2096 ++l; /* step over */ 2097 m->flag |= INDIR; 2098 if (m->flag & OFFADD) 2099 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 2100 2101 if (*l == '&') { /* m->cont_level == 0 checked below */ 2102 ++l; /* step over */ 2103 m->flag |= OFFADD; 2104 } 2105 } 2106 /* Indirect offsets are not valid at level 0. */ 2107 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) { 2108 if (ms->flags & MAGIC_CHECK) 2109 file_magwarn(ms, "relative offset at level 0"); 2110 return -1; 2111 } 2112 2113 /* get offset, then skip over it */ 2114 if (*l == '-') { 2115 ++l; /* step over */ 2116 m->flag |= OFFNEGATIVE; 2117 } 2118 m->offset = CAST(int32_t, strtol(l, &t, 0)); 2119 if (l == t) { 2120 if (ms->flags & MAGIC_CHECK) 2121 file_magwarn(ms, "offset `%s' invalid", l); 2122 return -1; 2123 } 2124 2125 l = t; 2126 2127 if (m->flag & INDIR) { 2128 m->in_type = FILE_LONG; 2129 m->in_offset = 0; 2130 m->in_op = 0; 2131 /* 2132 * read [.,lbs][+-]nnnnn) 2133 */ 2134 if (*l == '.' || *l == ',') { 2135 if (*l == ',') 2136 m->in_op |= FILE_OPSIGNED; 2137 l++; 2138 switch (*l) { 2139 case 'l': 2140 m->in_type = FILE_LELONG; 2141 break; 2142 case 'L': 2143 m->in_type = FILE_BELONG; 2144 break; 2145 case 'm': 2146 m->in_type = FILE_MELONG; 2147 break; 2148 case 'h': 2149 case 's': 2150 m->in_type = FILE_LESHORT; 2151 break; 2152 case 'H': 2153 case 'S': 2154 m->in_type = FILE_BESHORT; 2155 break; 2156 case 'c': 2157 case 'b': 2158 case 'C': 2159 case 'B': 2160 m->in_type = FILE_BYTE; 2161 break; 2162 case 'e': 2163 case 'f': 2164 case 'g': 2165 m->in_type = FILE_LEDOUBLE; 2166 break; 2167 case 'E': 2168 case 'F': 2169 case 'G': 2170 m->in_type = FILE_BEDOUBLE; 2171 break; 2172 case 'i': 2173 m->in_type = FILE_LEID3; 2174 break; 2175 case 'I': 2176 m->in_type = FILE_BEID3; 2177 break; 2178 case 'o': 2179 m->in_type = FILE_OCTAL; 2180 break; 2181 case 'q': 2182 m->in_type = FILE_LEQUAD; 2183 break; 2184 case 'Q': 2185 m->in_type = FILE_BEQUAD; 2186 break; 2187 default: 2188 if (ms->flags & MAGIC_CHECK) 2189 file_magwarn(ms, 2190 "indirect offset type `%c' invalid", 2191 *l); 2192 return -1; 2193 } 2194 l++; 2195 } 2196 2197 if (*l == '~') { 2198 m->in_op |= FILE_OPINVERSE; 2199 l++; 2200 } 2201 if ((op = get_op(*l)) != -1) { 2202 m->in_op |= op; 2203 l++; 2204 } 2205 if (*l == '(') { 2206 m->in_op |= FILE_OPINDIRECT; 2207 l++; 2208 } 2209 if (isdigit(CAST(unsigned char, *l)) || *l == '-') { 2210 m->in_offset = CAST(int32_t, strtol(l, &t, 0)); 2211 if (l == t) { 2212 if (ms->flags & MAGIC_CHECK) 2213 file_magwarn(ms, 2214 "in_offset `%s' invalid", l); 2215 return -1; 2216 } 2217 l = t; 2218 } 2219 if (*l++ != ')' || 2220 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) { 2221 if (ms->flags & MAGIC_CHECK) 2222 file_magwarn(ms, 2223 "missing ')' in indirect offset"); 2224 return -1; 2225 } 2226 } 2227 EATAB; 2228 2229 #ifdef ENABLE_CONDITIONALS 2230 m->cond = get_cond(l, &l); 2231 if (check_cond(ms, m->cond, cont_level) == -1) 2232 return -1; 2233 2234 EATAB; 2235 #endif 2236 2237 /* 2238 * Parse the type. 2239 */ 2240 if (*l == 'u') { 2241 /* 2242 * Try it as a keyword type prefixed by "u"; match what 2243 * follows the "u". If that fails, try it as an SUS 2244 * integer type. 2245 */ 2246 m->type = get_type(type_tbl, l + 1, &l); 2247 if (m->type == FILE_INVALID) { 2248 /* 2249 * Not a keyword type; parse it as an SUS type, 2250 * 'u' possibly followed by a number or C/S/L. 2251 */ 2252 m->type = get_standard_integer_type(l, &l); 2253 } 2254 /* It's unsigned. */ 2255 if (m->type != FILE_INVALID) 2256 m->flag |= UNSIGNED; 2257 } else { 2258 /* 2259 * Try it as a keyword type. If that fails, try it as 2260 * an SUS integer type if it begins with "d" or as an 2261 * SUS string type if it begins with "s". In any case, 2262 * it's not unsigned. 2263 */ 2264 m->type = get_type(type_tbl, l, &l); 2265 if (m->type == FILE_INVALID) { 2266 /* 2267 * Not a keyword type; parse it as an SUS type, 2268 * either 'd' possibly followed by a number or 2269 * C/S/L, or just 's'. 2270 */ 2271 if (*l == 'd') 2272 m->type = get_standard_integer_type(l, &l); 2273 else if (*l == 's' 2274 && !isalpha(CAST(unsigned char, l[1]))) { 2275 m->type = FILE_STRING; 2276 ++l; 2277 } 2278 } 2279 } 2280 2281 if (m->type == FILE_INVALID) { 2282 /* Not found - try it as a special keyword. */ 2283 m->type = get_type(special_tbl, l, &l); 2284 } 2285 2286 if (m->type == FILE_INVALID) { 2287 if (ms->flags & MAGIC_CHECK) 2288 file_magwarn(ms, "type `%s' invalid", l); 2289 return -1; 2290 } 2291 2292 if (m->type == FILE_NAME && cont_level != 0) { 2293 if (ms->flags & MAGIC_CHECK) 2294 file_magwarn(ms, "`name%s' entries can only be " 2295 "declared at top level", l); 2296 return -1; 2297 } 2298 2299 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 2300 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 2301 2302 m->mask_op = 0; 2303 if (*l == '~') { 2304 if (!IS_STRING(m->type)) 2305 m->mask_op |= FILE_OPINVERSE; 2306 else if (ms->flags & MAGIC_CHECK) 2307 file_magwarn(ms, "'~' invalid for string types"); 2308 ++l; 2309 } 2310 m->str_range = 0; 2311 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 2312 if ((op = get_op(*l)) != -1) { 2313 if (IS_STRING(m->type)) { 2314 int r; 2315 2316 if (op != FILE_OPDIVIDE) { 2317 if (ms->flags & MAGIC_CHECK) 2318 file_magwarn(ms, 2319 "invalid string/indirect op: " 2320 "`%c'", *t); 2321 return -1; 2322 } 2323 2324 if (m->type == FILE_INDIRECT) 2325 r = parse_indirect_modifier(ms, m, &l); 2326 else 2327 r = parse_string_modifier(ms, m, &l); 2328 if (r == -1) 2329 return -1; 2330 } else 2331 parse_op_modifier(ms, m, &l, op); 2332 } 2333 2334 /* 2335 * We used to set mask to all 1's here, instead let's just not do 2336 * anything if mask = 0 (unless you have a better idea) 2337 */ 2338 EATAB; 2339 2340 switch (*l) { 2341 case '>': 2342 case '<': 2343 m->reln = *l; 2344 ++l; 2345 if (*l == '=') { 2346 if (ms->flags & MAGIC_CHECK) { 2347 file_magwarn(ms, "%c= not supported", 2348 m->reln); 2349 return -1; 2350 } 2351 ++l; 2352 } 2353 break; 2354 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 2355 case '&': 2356 case '^': 2357 case '=': 2358 m->reln = *l; 2359 ++l; 2360 if (*l == '=') { 2361 /* HP compat: ignore &= etc. */ 2362 ++l; 2363 } 2364 break; 2365 case '!': 2366 m->reln = *l; 2367 ++l; 2368 break; 2369 default: 2370 m->reln = '='; /* the default relation */ 2371 if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) && 2372 isspace(CAST(unsigned char, l[1]))) || !l[1])) { 2373 m->reln = *l; 2374 ++l; 2375 } 2376 break; 2377 } 2378 /* 2379 * Grab the value part, except for an 'x' reln. 2380 */ 2381 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 2382 return -1; 2383 2384 /* 2385 * TODO finish this macro and start using it! 2386 * #define offsetcheck {if (offset > ms->bytes_max -1) 2387 * magwarn("offset too big"); } 2388 */ 2389 2390 /* 2391 * Now get last part - the description 2392 */ 2393 EATAB; 2394 if (l[0] == '\b') { 2395 ++l; 2396 m->flag |= NOSPACE; 2397 } else if ((l[0] == '\\') && (l[1] == 'b')) { 2398 ++l; 2399 ++l; 2400 m->flag |= NOSPACE; 2401 } 2402 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 2403 continue; 2404 if (i == sizeof(m->desc)) { 2405 m->desc[sizeof(m->desc) - 1] = '\0'; 2406 if (ms->flags & MAGIC_CHECK) 2407 file_magwarn(ms, "description `%s' truncated", m->desc); 2408 } 2409 2410 /* 2411 * We only do this check while compiling, or if any of the magic 2412 * files were not compiled. 2413 */ 2414 if (ms->flags & MAGIC_CHECK) { 2415 if (check_format(ms, m) == -1) 2416 return -1; 2417 } 2418 #ifndef COMPILE_ONLY 2419 if (action == FILE_CHECK) { 2420 file_mdump(m); 2421 } 2422 #endif 2423 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 2424 return 0; 2425 } 2426 2427 /* 2428 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 2429 * if valid 2430 */ 2431 /*ARGSUSED*/ 2432 private int 2433 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line, 2434 size_t len __attribute__((__unused__))) 2435 { 2436 const char *l = line; 2437 char *el; 2438 unsigned long factor; 2439 struct magic *m = &me->mp[0]; 2440 2441 if (m->factor_op != FILE_FACTOR_OP_NONE) { 2442 file_magwarn(ms, 2443 "Current entry already has a strength type: %c %d", 2444 m->factor_op, m->factor); 2445 return -1; 2446 } 2447 if (m->type == FILE_NAME) { 2448 file_magwarn(ms, "%s: Strength setting is not supported in " 2449 "\"name\" magic entries", m->value.s); 2450 return -1; 2451 } 2452 EATAB; 2453 switch (*l) { 2454 case FILE_FACTOR_OP_NONE: 2455 case FILE_FACTOR_OP_PLUS: 2456 case FILE_FACTOR_OP_MINUS: 2457 case FILE_FACTOR_OP_TIMES: 2458 case FILE_FACTOR_OP_DIV: 2459 m->factor_op = *l++; 2460 break; 2461 default: 2462 file_magwarn(ms, "Unknown factor op `%c'", *l); 2463 return -1; 2464 } 2465 EATAB; 2466 factor = strtoul(l, &el, 0); 2467 if (factor > 255) { 2468 file_magwarn(ms, "Too large factor `%lu'", factor); 2469 goto out; 2470 } 2471 if (*el && !isspace(CAST(unsigned char, *el))) { 2472 file_magwarn(ms, "Bad factor `%s'", l); 2473 goto out; 2474 } 2475 m->factor = CAST(uint8_t, factor); 2476 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 2477 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 2478 m->factor_op, m->factor); 2479 goto out; 2480 } 2481 return 0; 2482 out: 2483 m->factor_op = FILE_FACTOR_OP_NONE; 2484 m->factor = 0; 2485 return -1; 2486 } 2487 2488 private int 2489 goodchar(unsigned char x, const char *extra) 2490 { 2491 return (isascii(x) && isalnum(x)) || strchr(extra, x); 2492 } 2493 2494 private int 2495 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, 2496 size_t llen, off_t off, size_t len, const char *name, const char *extra, 2497 int nt) 2498 { 2499 size_t i; 2500 const char *l = line; 2501 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 2502 char *buf = CAST(char *, CAST(void *, m)) + off; 2503 2504 if (buf[0] != '\0') { 2505 len = nt ? strlen(buf) : len; 2506 file_magwarn(ms, "Current entry already has a %s type " 2507 "`%.*s', new type `%s'", name, CAST(int, len), buf, l); 2508 return -1; 2509 } 2510 2511 if (*m->desc == '\0') { 2512 file_magwarn(ms, "Current entry does not yet have a " 2513 "description for adding a %s type", name); 2514 return -1; 2515 } 2516 2517 EATAB; 2518 for (i = 0; *l && i < llen && i < len && goodchar(*l, extra); 2519 buf[i++] = *l++) 2520 continue; 2521 2522 if (i == len && *l) { 2523 if (nt) 2524 buf[len - 1] = '\0'; 2525 if (ms->flags & MAGIC_CHECK) 2526 file_magwarn(ms, "%s type `%s' truncated %" 2527 SIZE_T_FORMAT "u", name, line, i); 2528 } else { 2529 if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra)) 2530 file_magwarn(ms, "%s type `%s' has bad char '%c'", 2531 name, line, *l); 2532 if (nt) 2533 buf[i] = '\0'; 2534 } 2535 2536 if (i > 0) 2537 return 0; 2538 2539 file_magerror(ms, "Bad magic entry '%s'", line); 2540 return -1; 2541 } 2542 2543 /* 2544 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 2545 * magic[index - 1] 2546 */ 2547 private int 2548 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line, 2549 size_t len) 2550 { 2551 return parse_extra(ms, me, line, len, 2552 CAST(off_t, offsetof(struct magic, apple)), 2553 sizeof(me->mp[0].apple), "APPLE", "!+-./?", 0); 2554 } 2555 2556 /* 2557 * Parse a comma-separated list of extensions 2558 */ 2559 private int 2560 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line, 2561 size_t len) 2562 { 2563 return parse_extra(ms, me, line, len, 2564 CAST(off_t, offsetof(struct magic, ext)), 2565 sizeof(me->mp[0].ext), "EXTENSION", ",!+-/@?_$&", 0); /* & for b&w */ 2566 } 2567 2568 /* 2569 * parse a MIME annotation line from magic file, put into magic[index - 1] 2570 * if valid 2571 */ 2572 private int 2573 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line, 2574 size_t len) 2575 { 2576 return parse_extra(ms, me, line, len, 2577 CAST(off_t, offsetof(struct magic, mimetype)), 2578 sizeof(me->mp[0].mimetype), "MIME", "+-/.$?:{}", 1); 2579 } 2580 2581 private int 2582 check_format_type(const char *ptr, int type, const char **estr) 2583 { 2584 int quad = 0, h; 2585 size_t len, cnt; 2586 if (*ptr == '\0') { 2587 /* Missing format string; bad */ 2588 *estr = "missing format spec"; 2589 return -1; 2590 } 2591 2592 switch (file_formats[type]) { 2593 case FILE_FMT_QUAD: 2594 quad = 1; 2595 /*FALLTHROUGH*/ 2596 case FILE_FMT_NUM: 2597 if (quad == 0) { 2598 switch (type) { 2599 case FILE_BYTE: 2600 h = 2; 2601 break; 2602 case FILE_SHORT: 2603 case FILE_BESHORT: 2604 case FILE_LESHORT: 2605 h = 1; 2606 break; 2607 case FILE_LONG: 2608 case FILE_BELONG: 2609 case FILE_LELONG: 2610 case FILE_MELONG: 2611 case FILE_LEID3: 2612 case FILE_BEID3: 2613 case FILE_INDIRECT: 2614 h = 0; 2615 break; 2616 default: 2617 fprintf(stderr, "Bad number format %d", type); 2618 abort(); 2619 } 2620 } else 2621 h = 0; 2622 while (*ptr && strchr("-.#", *ptr) != NULL) 2623 ptr++; 2624 #define CHECKLEN() do { \ 2625 for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \ 2626 len = len * 10 + (*ptr - '0'); \ 2627 if (cnt > 5 || len > 1024) \ 2628 goto toolong; \ 2629 } while (/*CONSTCOND*/0) 2630 2631 CHECKLEN(); 2632 if (*ptr == '.') 2633 ptr++; 2634 CHECKLEN(); 2635 if (quad) { 2636 if (*ptr++ != 'l') 2637 goto invalid; 2638 if (*ptr++ != 'l') 2639 goto invalid; 2640 } 2641 2642 switch (*ptr++) { 2643 #ifdef STRICT_FORMAT /* "long" formats are int formats for us */ 2644 /* so don't accept the 'l' modifier */ 2645 case 'l': 2646 switch (*ptr++) { 2647 case 'i': 2648 case 'd': 2649 case 'u': 2650 case 'o': 2651 case 'x': 2652 case 'X': 2653 if (h == 0) 2654 return 0; 2655 /*FALLTHROUGH*/ 2656 default: 2657 goto invalid; 2658 } 2659 2660 /* 2661 * Don't accept h and hh modifiers. They make writing 2662 * magic entries more complicated, for very little benefit 2663 */ 2664 case 'h': 2665 if (h-- <= 0) 2666 goto invalid; 2667 switch (*ptr++) { 2668 case 'h': 2669 if (h-- <= 0) 2670 goto invalid; 2671 switch (*ptr++) { 2672 case 'i': 2673 case 'd': 2674 case 'u': 2675 case 'o': 2676 case 'x': 2677 case 'X': 2678 return 0; 2679 default: 2680 goto invalid; 2681 } 2682 case 'i': 2683 case 'd': 2684 case 'u': 2685 case 'o': 2686 case 'x': 2687 case 'X': 2688 if (h == 0) 2689 return 0; 2690 /*FALLTHROUGH*/ 2691 default: 2692 goto invalid; 2693 } 2694 #endif 2695 case 'c': 2696 if (h == 2) 2697 return 0; 2698 goto invalid; 2699 case 'i': 2700 case 'd': 2701 case 'u': 2702 case 'o': 2703 case 'x': 2704 case 'X': 2705 #ifdef STRICT_FORMAT 2706 if (h == 0) 2707 return 0; 2708 /*FALLTHROUGH*/ 2709 #else 2710 return 0; 2711 #endif 2712 default: 2713 goto invalid; 2714 } 2715 2716 case FILE_FMT_FLOAT: 2717 case FILE_FMT_DOUBLE: 2718 if (*ptr == '-') 2719 ptr++; 2720 if (*ptr == '.') 2721 ptr++; 2722 CHECKLEN(); 2723 if (*ptr == '.') 2724 ptr++; 2725 CHECKLEN(); 2726 switch (*ptr++) { 2727 case 'e': 2728 case 'E': 2729 case 'f': 2730 case 'F': 2731 case 'g': 2732 case 'G': 2733 return 0; 2734 2735 default: 2736 goto invalid; 2737 } 2738 2739 2740 case FILE_FMT_STR: 2741 if (*ptr == '-') 2742 ptr++; 2743 while (isdigit(CAST(unsigned char, *ptr))) 2744 ptr++; 2745 if (*ptr == '.') { 2746 ptr++; 2747 while (isdigit(CAST(unsigned char , *ptr))) 2748 ptr++; 2749 } 2750 2751 switch (*ptr++) { 2752 case 's': 2753 return 0; 2754 default: 2755 goto invalid; 2756 } 2757 2758 default: 2759 /* internal error */ 2760 fprintf(stderr, "Bad file format %d", type); 2761 abort(); 2762 } 2763 invalid: 2764 *estr = "not valid"; 2765 toolong: 2766 *estr = "too long"; 2767 return -1; 2768 } 2769 2770 /* 2771 * Check that the optional printf format in description matches 2772 * the type of the magic. 2773 */ 2774 private int 2775 check_format(struct magic_set *ms, struct magic *m) 2776 { 2777 char *ptr; 2778 const char *estr; 2779 2780 for (ptr = m->desc; *ptr; ptr++) 2781 if (*ptr == '%') 2782 break; 2783 if (*ptr == '\0') { 2784 /* No format string; ok */ 2785 return 1; 2786 } 2787 2788 assert(file_nformats == file_nnames); 2789 2790 if (m->type >= file_nformats) { 2791 file_magwarn(ms, "Internal error inconsistency between " 2792 "m->type and format strings"); 2793 return -1; 2794 } 2795 if (file_formats[m->type] == FILE_FMT_NONE) { 2796 file_magwarn(ms, "No format string for `%s' with description " 2797 "`%s'", m->desc, file_names[m->type]); 2798 return -1; 2799 } 2800 2801 ptr++; 2802 if (check_format_type(ptr, m->type, &estr) == -1) { 2803 /* 2804 * TODO: this error message is unhelpful if the format 2805 * string is not one character long 2806 */ 2807 file_magwarn(ms, "Printf format is %s for type " 2808 "`%s' in description `%s'", estr, 2809 file_names[m->type], m->desc); 2810 return -1; 2811 } 2812 2813 for (; *ptr; ptr++) { 2814 if (*ptr == '%') { 2815 file_magwarn(ms, 2816 "Too many format strings (should have at most one) " 2817 "for `%s' with description `%s'", 2818 file_names[m->type], m->desc); 2819 return -1; 2820 } 2821 } 2822 return 0; 2823 } 2824 2825 /* 2826 * Read a numeric value from a pointer, into the value union of a magic 2827 * pointer, according to the magic type. Update the string pointer to point 2828 * just after the number read. Return 0 for success, non-zero for failure. 2829 */ 2830 private int 2831 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2832 { 2833 char *ep; 2834 uint64_t ull; 2835 2836 switch (m->type) { 2837 case FILE_BESTRING16: 2838 case FILE_LESTRING16: 2839 case FILE_STRING: 2840 case FILE_PSTRING: 2841 case FILE_REGEX: 2842 case FILE_SEARCH: 2843 case FILE_NAME: 2844 case FILE_USE: 2845 case FILE_DER: 2846 case FILE_OCTAL: 2847 *p = getstr(ms, m, *p, action == FILE_COMPILE); 2848 if (*p == NULL) { 2849 if (ms->flags & MAGIC_CHECK) 2850 file_magwarn(ms, "cannot get string from `%s'", 2851 m->value.s); 2852 return -1; 2853 } 2854 if (m->type == FILE_REGEX) { 2855 file_regex_t rx; 2856 int rc = file_regcomp(ms, &rx, m->value.s, 2857 REG_EXTENDED); 2858 if (rc == 0) { 2859 file_regfree(&rx); 2860 } 2861 return rc ? -1 : 0; 2862 } 2863 return 0; 2864 default: 2865 if (m->reln == 'x') 2866 return 0; 2867 break; 2868 } 2869 2870 switch (m->type) { 2871 case FILE_FLOAT: 2872 case FILE_BEFLOAT: 2873 case FILE_LEFLOAT: 2874 errno = 0; 2875 #ifdef HAVE_STRTOF 2876 m->value.f = strtof(*p, &ep); 2877 #else 2878 m->value.f = (float)strtod(*p, &ep); 2879 #endif 2880 if (errno == 0) 2881 *p = ep; 2882 return 0; 2883 case FILE_DOUBLE: 2884 case FILE_BEDOUBLE: 2885 case FILE_LEDOUBLE: 2886 errno = 0; 2887 m->value.d = strtod(*p, &ep); 2888 if (errno == 0) 2889 *p = ep; 2890 return 0; 2891 case FILE_GUID: 2892 if (file_parse_guid(*p, m->value.guid) == -1) 2893 return -1; 2894 *p += FILE_GUID_SIZE - 1; 2895 return 0; 2896 default: 2897 errno = 0; 2898 ull = CAST(uint64_t, strtoull(*p, &ep, 0)); 2899 m->value.q = file_signextend(ms, m, ull); 2900 if (*p == ep) { 2901 file_magwarn(ms, "Unparsable number `%s'", *p); 2902 } else { 2903 size_t ts = typesize(m->type); 2904 uint64_t x; 2905 const char *q; 2906 2907 if (ts == FILE_BADSIZE) { 2908 file_magwarn(ms, 2909 "Expected numeric type got `%s'", 2910 type_tbl[m->type].name); 2911 } 2912 for (q = *p; isspace(CAST(unsigned char, *q)); q++) 2913 continue; 2914 if (*q == '-') 2915 ull = -CAST(int64_t, ull); 2916 switch (ts) { 2917 case 1: 2918 x = CAST(uint64_t, ull & ~0xffULL); 2919 break; 2920 case 2: 2921 x = CAST(uint64_t, ull & ~0xffffULL); 2922 break; 2923 case 4: 2924 x = CAST(uint64_t, ull & ~0xffffffffULL); 2925 break; 2926 case 8: 2927 x = 0; 2928 break; 2929 default: 2930 fprintf(stderr, "Bad width %zu", ts); 2931 abort(); 2932 } 2933 if (x) { 2934 file_magwarn(ms, "Overflow for numeric" 2935 " type `%s' value %#" PRIx64, 2936 type_tbl[m->type].name, ull); 2937 } 2938 } 2939 if (errno == 0) { 2940 *p = ep; 2941 eatsize(p); 2942 } 2943 return 0; 2944 } 2945 } 2946 2947 /* 2948 * Convert a string containing C character escapes. Stop at an unescaped 2949 * space or tab. 2950 * Copy the converted version to "m->value.s", and the length in m->vallen. 2951 * Return updated scan pointer as function result. Warn if set. 2952 */ 2953 private const char * 2954 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2955 { 2956 const char *origs = s; 2957 char *p = m->value.s; 2958 size_t plen = sizeof(m->value.s); 2959 char *origp = p; 2960 char *pmax = p + plen - 1; 2961 int c; 2962 int val; 2963 size_t bracket_nesting = 0; 2964 2965 while ((c = *s++) != '\0') { 2966 if (isspace(CAST(unsigned char, c))) 2967 break; 2968 if (p >= pmax) { 2969 file_error(ms, 0, "string too long: `%s'", origs); 2970 return NULL; 2971 } 2972 if (c != '\\') { 2973 if (c == '[') { 2974 bracket_nesting++; 2975 } 2976 if (c == ']' && bracket_nesting > 0) { 2977 bracket_nesting--; 2978 } 2979 *p++ = CAST(char, c); 2980 continue; 2981 } 2982 switch(c = *s++) { 2983 2984 case '\0': 2985 if (warn) 2986 file_magwarn(ms, "incomplete escape"); 2987 s--; 2988 goto out; 2989 case '.': 2990 if (m->type == FILE_REGEX && 2991 bracket_nesting == 0 && warn) { 2992 file_magwarn(ms, "escaped dot ('.') found, " 2993 "use \\\\. instead"); 2994 } 2995 warn = 0; /* already did */ 2996 /*FALLTHROUGH*/ 2997 case '\t': 2998 if (warn) { 2999 file_magwarn(ms, 3000 "escaped tab found, use \\\\t instead"); 3001 warn = 0; /* already did */ 3002 } 3003 /*FALLTHROUGH*/ 3004 default: 3005 if (warn) { 3006 if (isprint(CAST(unsigned char, c))) { 3007 /* Allow escaping of 3008 * ``relations'' */ 3009 if (strchr("<>&^=!", c) == NULL 3010 && (m->type != FILE_REGEX || 3011 strchr("[]().*?^$|{}", c) 3012 == NULL)) { 3013 file_magwarn(ms, "no " 3014 "need to escape " 3015 "`%c'", c); 3016 } 3017 } else { 3018 file_magwarn(ms, 3019 "unknown escape sequence: " 3020 "\\%03o", c); 3021 } 3022 } 3023 /*FALLTHROUGH*/ 3024 /* space, perhaps force people to use \040? */ 3025 case ' ': 3026 #if 0 3027 /* 3028 * Other things people escape, but shouldn't need to, 3029 * so we disallow them 3030 */ 3031 case '\'': 3032 case '"': 3033 case '?': 3034 #endif 3035 /* Relations */ 3036 case '>': 3037 case '<': 3038 case '&': 3039 case '^': 3040 case '=': 3041 case '!': 3042 /* and backslash itself */ 3043 case '\\': 3044 *p++ = CAST(char, c); 3045 break; 3046 3047 case 'a': 3048 *p++ = '\a'; 3049 break; 3050 3051 case 'b': 3052 *p++ = '\b'; 3053 break; 3054 3055 case 'f': 3056 *p++ = '\f'; 3057 break; 3058 3059 case 'n': 3060 *p++ = '\n'; 3061 break; 3062 3063 case 'r': 3064 *p++ = '\r'; 3065 break; 3066 3067 case 't': 3068 *p++ = '\t'; 3069 break; 3070 3071 case 'v': 3072 *p++ = '\v'; 3073 break; 3074 3075 /* \ and up to 3 octal digits */ 3076 case '0': 3077 case '1': 3078 case '2': 3079 case '3': 3080 case '4': 3081 case '5': 3082 case '6': 3083 case '7': 3084 val = c - '0'; 3085 c = *s++; /* try for 2 */ 3086 if (c >= '0' && c <= '7') { 3087 val = (val << 3) | (c - '0'); 3088 c = *s++; /* try for 3 */ 3089 if (c >= '0' && c <= '7') 3090 val = (val << 3) | (c-'0'); 3091 else 3092 --s; 3093 } 3094 else 3095 --s; 3096 *p++ = CAST(char, val); 3097 break; 3098 3099 /* \x and up to 2 hex digits */ 3100 case 'x': 3101 val = 'x'; /* Default if no digits */ 3102 c = hextoint(*s++); /* Get next char */ 3103 if (c >= 0) { 3104 val = c; 3105 c = hextoint(*s++); 3106 if (c >= 0) 3107 val = (val << 4) + c; 3108 else 3109 --s; 3110 } else 3111 --s; 3112 *p++ = CAST(char, val); 3113 break; 3114 } 3115 } 3116 --s; 3117 out: 3118 *p = '\0'; 3119 m->vallen = CAST(unsigned char, (p - origp)); 3120 if (m->type == FILE_PSTRING) { 3121 size_t l = file_pstring_length_size(ms, m); 3122 if (l == FILE_BADSIZE) 3123 return NULL; 3124 m->vallen += CAST(unsigned char, l); 3125 } 3126 return s; 3127 } 3128 3129 3130 /* Single hex char to int; -1 if not a hex char. */ 3131 private int 3132 hextoint(int c) 3133 { 3134 if (!isascii(CAST(unsigned char, c))) 3135 return -1; 3136 if (isdigit(CAST(unsigned char, c))) 3137 return c - '0'; 3138 if ((c >= 'a') && (c <= 'f')) 3139 return c + 10 - 'a'; 3140 if (( c>= 'A') && (c <= 'F')) 3141 return c + 10 - 'A'; 3142 return -1; 3143 } 3144 3145 3146 /* 3147 * Print a string containing C character escapes. 3148 */ 3149 protected void 3150 file_showstr(FILE *fp, const char *s, size_t len) 3151 { 3152 char c; 3153 3154 for (;;) { 3155 if (len == FILE_BADSIZE) { 3156 c = *s++; 3157 if (c == '\0') 3158 break; 3159 } 3160 else { 3161 if (len-- == 0) 3162 break; 3163 c = *s++; 3164 } 3165 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 3166 (void) fputc(c, fp); 3167 else { 3168 (void) fputc('\\', fp); 3169 switch (c) { 3170 case '\a': 3171 (void) fputc('a', fp); 3172 break; 3173 3174 case '\b': 3175 (void) fputc('b', fp); 3176 break; 3177 3178 case '\f': 3179 (void) fputc('f', fp); 3180 break; 3181 3182 case '\n': 3183 (void) fputc('n', fp); 3184 break; 3185 3186 case '\r': 3187 (void) fputc('r', fp); 3188 break; 3189 3190 case '\t': 3191 (void) fputc('t', fp); 3192 break; 3193 3194 case '\v': 3195 (void) fputc('v', fp); 3196 break; 3197 3198 default: 3199 (void) fprintf(fp, "%.3o", c & 0377); 3200 break; 3201 } 3202 } 3203 } 3204 } 3205 3206 /* 3207 * eatsize(): Eat the size spec from a number [eg. 10UL] 3208 */ 3209 private void 3210 eatsize(const char **p) 3211 { 3212 const char *l = *p; 3213 3214 if (LOWCASE(*l) == 'u') 3215 l++; 3216 3217 switch (LOWCASE(*l)) { 3218 case 'l': /* long */ 3219 case 's': /* short */ 3220 case 'h': /* short */ 3221 case 'b': /* char/byte */ 3222 case 'c': /* char/byte */ 3223 l++; 3224 /*FALLTHROUGH*/ 3225 default: 3226 break; 3227 } 3228 3229 *p = l; 3230 } 3231 3232 /* 3233 * handle a buffer containing a compiled file. 3234 */ 3235 private struct magic_map * 3236 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len) 3237 { 3238 struct magic_map *map; 3239 3240 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 3241 file_oomem(ms, sizeof(*map)); 3242 return NULL; 3243 } 3244 map->len = len; 3245 map->p = buf; 3246 map->type = MAP_TYPE_USER; 3247 if (check_buffer(ms, map, "buffer") != 0) { 3248 apprentice_unmap(map); 3249 return NULL; 3250 } 3251 return map; 3252 } 3253 3254 /* 3255 * handle a compiled file. 3256 */ 3257 3258 private struct magic_map * 3259 apprentice_map(struct magic_set *ms, const char *fn) 3260 { 3261 int fd; 3262 struct stat st; 3263 char *dbname = NULL; 3264 struct magic_map *map; 3265 struct magic_map *rv = NULL; 3266 3267 fd = -1; 3268 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 3269 file_oomem(ms, sizeof(*map)); 3270 goto error; 3271 } 3272 map->type = MAP_TYPE_USER; /* unspecified */ 3273 3274 dbname = mkdbname(ms, fn, 0); 3275 if (dbname == NULL) 3276 goto error; 3277 3278 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 3279 goto error; 3280 3281 if (fstat(fd, &st) == -1) { 3282 file_error(ms, errno, "cannot stat `%s'", dbname); 3283 goto error; 3284 } 3285 if (st.st_size < 8 || st.st_size > maxoff_t()) { 3286 file_error(ms, 0, "file `%s' is too %s", dbname, 3287 st.st_size < 8 ? "small" : "large"); 3288 goto error; 3289 } 3290 3291 map->len = CAST(size_t, st.st_size); 3292 #ifdef QUICK 3293 map->type = MAP_TYPE_MMAP; 3294 if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE, 3295 MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) { 3296 file_error(ms, errno, "cannot map `%s'", dbname); 3297 goto error; 3298 } 3299 #else 3300 map->type = MAP_TYPE_MALLOC; 3301 if ((map->p = CAST(void *, malloc(map->len))) == NULL) { 3302 file_oomem(ms, map->len); 3303 goto error; 3304 } 3305 if (read(fd, map->p, map->len) != (ssize_t)map->len) { 3306 file_badread(ms); 3307 goto error; 3308 } 3309 #endif 3310 (void)close(fd); 3311 fd = -1; 3312 3313 if (check_buffer(ms, map, dbname) != 0) { 3314 goto error; 3315 } 3316 #ifdef QUICK 3317 if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) { 3318 file_error(ms, errno, "cannot mprotect `%s'", dbname); 3319 goto error; 3320 } 3321 #endif 3322 3323 free(dbname); 3324 return map; 3325 3326 error: 3327 if (fd != -1) 3328 (void)close(fd); 3329 apprentice_unmap(map); 3330 free(dbname); 3331 return rv; 3332 } 3333 3334 private int 3335 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname) 3336 { 3337 uint32_t *ptr; 3338 uint32_t entries, nentries; 3339 uint32_t version; 3340 int i, needsbyteswap; 3341 3342 ptr = CAST(uint32_t *, map->p); 3343 if (*ptr != MAGICNO) { 3344 if (swap4(*ptr) != MAGICNO) { 3345 file_error(ms, 0, "bad magic in `%s'", dbname); 3346 return -1; 3347 } 3348 needsbyteswap = 1; 3349 } else 3350 needsbyteswap = 0; 3351 if (needsbyteswap) 3352 version = swap4(ptr[1]); 3353 else 3354 version = ptr[1]; 3355 if (version != VERSIONNO) { 3356 file_error(ms, 0, "File %s supports only version %d magic " 3357 "files. `%s' is version %d", VERSION, 3358 VERSIONNO, dbname, version); 3359 return -1; 3360 } 3361 entries = CAST(uint32_t, map->len / sizeof(struct magic)); 3362 if ((entries * sizeof(struct magic)) != map->len) { 3363 file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not " 3364 "a multiple of %" SIZE_T_FORMAT "u", 3365 dbname, map->len, sizeof(struct magic)); 3366 return -1; 3367 } 3368 map->magic[0] = CAST(struct magic *, map->p) + 1; 3369 nentries = 0; 3370 for (i = 0; i < MAGIC_SETS; i++) { 3371 if (needsbyteswap) 3372 map->nmagic[i] = swap4(ptr[i + 2]); 3373 else 3374 map->nmagic[i] = ptr[i + 2]; 3375 if (i != MAGIC_SETS - 1) 3376 map->magic[i + 1] = map->magic[i] + map->nmagic[i]; 3377 nentries += map->nmagic[i]; 3378 } 3379 if (entries != nentries + 1) { 3380 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 3381 dbname, entries, nentries + 1); 3382 return -1; 3383 } 3384 if (needsbyteswap) 3385 for (i = 0; i < MAGIC_SETS; i++) 3386 byteswap(map->magic[i], map->nmagic[i]); 3387 return 0; 3388 } 3389 3390 /* 3391 * handle an mmaped file. 3392 */ 3393 private int 3394 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn) 3395 { 3396 static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS; 3397 static const size_t m = sizeof(**map->magic); 3398 int fd = -1; 3399 size_t len; 3400 char *dbname; 3401 int rv = -1; 3402 uint32_t i; 3403 union { 3404 struct magic m; 3405 uint32_t h[2 + MAGIC_SETS]; 3406 } hdr; 3407 3408 dbname = mkdbname(ms, fn, 1); 3409 3410 if (dbname == NULL) 3411 goto out; 3412 3413 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 3414 { 3415 file_error(ms, errno, "cannot open `%s'", dbname); 3416 goto out; 3417 } 3418 memset(&hdr, 0, sizeof(hdr)); 3419 hdr.h[0] = MAGICNO; 3420 hdr.h[1] = VERSIONNO; 3421 memcpy(hdr.h + 2, map->nmagic, nm); 3422 3423 if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) { 3424 file_error(ms, errno, "error writing `%s'", dbname); 3425 goto out2; 3426 } 3427 3428 for (i = 0; i < MAGIC_SETS; i++) { 3429 len = m * map->nmagic[i]; 3430 if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) { 3431 file_error(ms, errno, "error writing `%s'", dbname); 3432 goto out2; 3433 } 3434 } 3435 3436 rv = 0; 3437 out2: 3438 if (fd != -1) 3439 (void)close(fd); 3440 out: 3441 apprentice_unmap(map); 3442 free(dbname); 3443 return rv; 3444 } 3445 3446 private const char ext[] = ".mgc"; 3447 /* 3448 * make a dbname 3449 */ 3450 private char * 3451 mkdbname(struct magic_set *ms, const char *fn, int strip) 3452 { 3453 const char *p, *q; 3454 char *buf; 3455 3456 if (strip) { 3457 if ((p = strrchr(fn, '/')) != NULL) 3458 fn = ++p; 3459 } 3460 3461 for (q = fn; *q; q++) 3462 continue; 3463 /* Look for .mgc */ 3464 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 3465 if (*p != *q) 3466 break; 3467 3468 /* Did not find .mgc, restore q */ 3469 if (p >= ext) 3470 while (*q) 3471 q++; 3472 3473 q++; 3474 /* Compatibility with old code that looked in .mime */ 3475 if (ms->flags & MAGIC_MIME) { 3476 if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext) 3477 < 0) 3478 return NULL; 3479 if (access(buf, R_OK) != -1) { 3480 ms->flags &= MAGIC_MIME_TYPE; 3481 return buf; 3482 } 3483 free(buf); 3484 } 3485 if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0) 3486 return NULL; 3487 3488 /* Compatibility with old code that looked in .mime */ 3489 if (strstr(fn, ".mime") != NULL) 3490 ms->flags &= MAGIC_MIME_TYPE; 3491 return buf; 3492 } 3493 3494 /* 3495 * Byteswap an mmap'ed file if needed 3496 */ 3497 private void 3498 byteswap(struct magic *magic, uint32_t nmagic) 3499 { 3500 uint32_t i; 3501 for (i = 0; i < nmagic; i++) 3502 bs1(&magic[i]); 3503 } 3504 3505 #if !defined(HAVE_BYTESWAP_H) && !defined(HAVE_SYS_BSWAP_H) 3506 /* 3507 * swap a short 3508 */ 3509 private uint16_t 3510 swap2(uint16_t sv) 3511 { 3512 uint16_t rv; 3513 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3514 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3515 d[0] = s[1]; 3516 d[1] = s[0]; 3517 return rv; 3518 } 3519 3520 /* 3521 * swap an int 3522 */ 3523 private uint32_t 3524 swap4(uint32_t sv) 3525 { 3526 uint32_t rv; 3527 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3528 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3529 d[0] = s[3]; 3530 d[1] = s[2]; 3531 d[2] = s[1]; 3532 d[3] = s[0]; 3533 return rv; 3534 } 3535 3536 /* 3537 * swap a quad 3538 */ 3539 private uint64_t 3540 swap8(uint64_t sv) 3541 { 3542 uint64_t rv; 3543 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3544 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3545 # if 0 3546 d[0] = s[3]; 3547 d[1] = s[2]; 3548 d[2] = s[1]; 3549 d[3] = s[0]; 3550 d[4] = s[7]; 3551 d[5] = s[6]; 3552 d[6] = s[5]; 3553 d[7] = s[4]; 3554 # else 3555 d[0] = s[7]; 3556 d[1] = s[6]; 3557 d[2] = s[5]; 3558 d[3] = s[4]; 3559 d[4] = s[3]; 3560 d[5] = s[2]; 3561 d[6] = s[1]; 3562 d[7] = s[0]; 3563 # endif 3564 return rv; 3565 } 3566 #endif 3567 3568 protected uintmax_t 3569 file_varint2uintmax_t(const unsigned char *us, int t, size_t *l) 3570 { 3571 uintmax_t x = 0; 3572 const unsigned char *c; 3573 if (t == FILE_LEVARINT) { 3574 for (c = us; *c; c++) { 3575 if ((*c & 0x80) == 0) 3576 break; 3577 } 3578 if (l) 3579 *l = c - us + 1; 3580 for (; c >= us; c--) { 3581 x |= *c & 0x7f; 3582 x <<= 7; 3583 } 3584 } else { 3585 for (c = us; *c; c++) { 3586 x |= *c & 0x7f; 3587 if ((*c & 0x80) == 0) 3588 break; 3589 x <<= 7; 3590 } 3591 if (l) 3592 *l = c - us + 1; 3593 } 3594 return x; 3595 } 3596 3597 3598 /* 3599 * byteswap a single magic entry 3600 */ 3601 private void 3602 bs1(struct magic *m) 3603 { 3604 m->cont_level = swap2(m->cont_level); 3605 m->offset = swap4(CAST(uint32_t, m->offset)); 3606 m->in_offset = swap4(CAST(uint32_t, m->in_offset)); 3607 m->lineno = swap4(CAST(uint32_t, m->lineno)); 3608 if (IS_STRING(m->type)) { 3609 m->str_range = swap4(m->str_range); 3610 m->str_flags = swap4(m->str_flags); 3611 } 3612 else { 3613 m->value.q = swap8(m->value.q); 3614 m->num_mask = swap8(m->num_mask); 3615 } 3616 } 3617 3618 protected size_t 3619 file_pstring_length_size(struct magic_set *ms, const struct magic *m) 3620 { 3621 switch (m->str_flags & PSTRING_LEN) { 3622 case PSTRING_1_LE: 3623 return 1; 3624 case PSTRING_2_LE: 3625 case PSTRING_2_BE: 3626 return 2; 3627 case PSTRING_4_LE: 3628 case PSTRING_4_BE: 3629 return 4; 3630 default: 3631 file_error(ms, 0, "corrupt magic file " 3632 "(bad pascal string length %d)", 3633 m->str_flags & PSTRING_LEN); 3634 return FILE_BADSIZE; 3635 } 3636 } 3637 protected size_t 3638 file_pstring_get_length(struct magic_set *ms, const struct magic *m, 3639 const char *ss) 3640 { 3641 size_t len = 0; 3642 const unsigned char *s = RCAST(const unsigned char *, ss); 3643 unsigned int s3, s2, s1, s0; 3644 3645 switch (m->str_flags & PSTRING_LEN) { 3646 case PSTRING_1_LE: 3647 len = *s; 3648 break; 3649 case PSTRING_2_LE: 3650 s0 = s[0]; 3651 s1 = s[1]; 3652 len = (s1 << 8) | s0; 3653 break; 3654 case PSTRING_2_BE: 3655 s0 = s[0]; 3656 s1 = s[1]; 3657 len = (s0 << 8) | s1; 3658 break; 3659 case PSTRING_4_LE: 3660 s0 = s[0]; 3661 s1 = s[1]; 3662 s2 = s[2]; 3663 s3 = s[3]; 3664 len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0; 3665 break; 3666 case PSTRING_4_BE: 3667 s0 = s[0]; 3668 s1 = s[1]; 3669 s2 = s[2]; 3670 s3 = s[3]; 3671 len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3; 3672 break; 3673 default: 3674 file_error(ms, 0, "corrupt magic file " 3675 "(bad pascal string length %d)", 3676 m->str_flags & PSTRING_LEN); 3677 return FILE_BADSIZE; 3678 } 3679 3680 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) { 3681 size_t l = file_pstring_length_size(ms, m); 3682 if (l == FILE_BADSIZE) 3683 return l; 3684 len -= l; 3685 } 3686 3687 return len; 3688 } 3689 3690 protected int 3691 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 3692 { 3693 uint32_t i, j; 3694 struct mlist *mlist, *ml; 3695 3696 mlist = ms->mlist[1]; 3697 3698 for (ml = mlist->next; ml != mlist; ml = ml->next) { 3699 struct magic *ma = ml->magic; 3700 for (i = 0; i < ml->nmagic; i++) { 3701 if (ma[i].type != FILE_NAME) 3702 continue; 3703 if (strcmp(ma[i].value.s, name) == 0) { 3704 v->magic = &ma[i]; 3705 v->magic_rxcomp = &(ml->magic_rxcomp[i]); 3706 for (j = i + 1; j < ml->nmagic; j++) 3707 if (ma[j].cont_level == 0) 3708 break; 3709 v->nmagic = j - i; 3710 return 0; 3711 } 3712 } 3713 } 3714 return -1; 3715 } 3716