1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * apprentice - make one pass through /etc/magic, learning its secrets. 30 */ 31 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: apprentice.c,v 1.227 2014/11/28 02:46:39 christos Exp $") 36 #endif /* lint */ 37 38 #include "magic.h" 39 #include <stdlib.h> 40 #ifdef HAVE_UNISTD_H 41 #include <unistd.h> 42 #endif 43 #ifdef HAVE_STDDEF_H 44 #include <stddef.h> 45 #endif 46 #include <string.h> 47 #include <assert.h> 48 #include <ctype.h> 49 #include <fcntl.h> 50 #ifdef QUICK 51 #include <sys/mman.h> 52 #endif 53 #include <dirent.h> 54 #if defined(HAVE_LIMITS_H) 55 #include <limits.h> 56 #endif 57 58 #ifndef SSIZE_MAX 59 #define MAXMAGIC_SIZE ((ssize_t)0x7fffffff) 60 #else 61 #define MAXMAGIC_SIZE SSIZE_MAX 62 #endif 63 64 #define EATAB {while (isascii((unsigned char) *l) && \ 65 isspace((unsigned char) *l)) ++l;} 66 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 67 tolower((unsigned char) (l)) : (l)) 68 /* 69 * Work around a bug in headers on Digital Unix. 70 * At least confirmed for: OSF1 V4.0 878 71 */ 72 #if defined(__osf__) && defined(__DECC) 73 #ifdef MAP_FAILED 74 #undef MAP_FAILED 75 #endif 76 #endif 77 78 #ifndef MAP_FAILED 79 #define MAP_FAILED (void *) -1 80 #endif 81 82 #ifndef MAP_FILE 83 #define MAP_FILE 0 84 #endif 85 86 #define ALLOC_CHUNK (size_t)10 87 #define ALLOC_INCR (size_t)200 88 89 #define MAP_TYPE_MMAP 0 90 #define MAP_TYPE_MALLOC 1 91 #define MAP_TYPE_USER 2 92 93 struct magic_entry { 94 struct magic *mp; 95 uint32_t cont_count; 96 uint32_t max_count; 97 }; 98 99 struct magic_entry_set { 100 struct magic_entry *me; 101 uint32_t count; 102 uint32_t max; 103 }; 104 105 struct magic_map { 106 void *p; 107 size_t len; 108 int type; 109 struct magic *magic[MAGIC_SETS]; 110 uint32_t nmagic[MAGIC_SETS]; 111 }; 112 113 int file_formats[FILE_NAMES_SIZE]; 114 const size_t file_nformats = FILE_NAMES_SIZE; 115 const char *file_names[FILE_NAMES_SIZE]; 116 const size_t file_nnames = FILE_NAMES_SIZE; 117 118 private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 119 private int hextoint(int); 120 private const char *getstr(struct magic_set *, struct magic *, const char *, 121 int); 122 private int parse(struct magic_set *, struct magic_entry *, const char *, 123 size_t, int); 124 private void eatsize(const char **); 125 private int apprentice_1(struct magic_set *, const char *, int); 126 private size_t apprentice_magic_strength(const struct magic *); 127 private int apprentice_sort(const void *, const void *); 128 private void apprentice_list(struct mlist *, int ); 129 private struct magic_map *apprentice_load(struct magic_set *, 130 const char *, int); 131 private struct mlist *mlist_alloc(void); 132 private void mlist_free(struct mlist *); 133 private void byteswap(struct magic *, uint32_t); 134 private void bs1(struct magic *); 135 private uint16_t swap2(uint16_t); 136 private uint32_t swap4(uint32_t); 137 private uint64_t swap8(uint64_t); 138 private char *mkdbname(struct magic_set *, const char *, int); 139 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *, 140 size_t); 141 private struct magic_map *apprentice_map(struct magic_set *, const char *); 142 private int check_buffer(struct magic_set *, struct magic_map *, const char *); 143 private void apprentice_unmap(struct magic_map *); 144 private int apprentice_compile(struct magic_set *, struct magic_map *, 145 const char *); 146 private int check_format_type(const char *, int); 147 private int check_format(struct magic_set *, struct magic *); 148 private int get_op(char); 149 private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 150 private int parse_strength(struct magic_set *, struct magic_entry *, const char *); 151 private int parse_apple(struct magic_set *, struct magic_entry *, const char *); 152 153 154 private size_t magicsize = sizeof(struct magic); 155 156 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 157 158 private struct { 159 const char *name; 160 size_t len; 161 int (*fun)(struct magic_set *, struct magic_entry *, const char *); 162 } bang[] = { 163 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 164 DECLARE_FIELD(mime), 165 DECLARE_FIELD(apple), 166 DECLARE_FIELD(strength), 167 #undef DECLARE_FIELD 168 { NULL, 0, NULL } 169 }; 170 171 #ifdef COMPILE_ONLY 172 173 int main(int, char *[]); 174 175 int 176 main(int argc, char *argv[]) 177 { 178 int ret; 179 struct magic_set *ms; 180 char *progname; 181 182 if ((progname = strrchr(argv[0], '/')) != NULL) 183 progname++; 184 else 185 progname = argv[0]; 186 187 if (argc != 2) { 188 (void)fprintf(stderr, "Usage: %s file\n", progname); 189 return 1; 190 } 191 192 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 193 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 194 return 1; 195 } 196 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 197 if (ret == 1) 198 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 199 magic_close(ms); 200 return ret; 201 } 202 #endif /* COMPILE_ONLY */ 203 204 struct type_tbl_s { 205 const char name[16]; 206 const size_t len; 207 const int type; 208 const int format; 209 }; 210 211 /* 212 * XXX - the actual Single UNIX Specification says that "long" means "long", 213 * as in the C data type, but we treat it as meaning "4-byte integer". 214 * Given that the OS X version of file 5.04 did the same, I guess that passes 215 * the actual test; having "long" be dependent on how big a "long" is on 216 * the machine running "file" is silly. 217 */ 218 static const struct type_tbl_s type_tbl[] = { 219 # define XX(s) s, (sizeof(s) - 1) 220 # define XX_NULL "", 0 221 { XX("invalid"), FILE_INVALID, FILE_FMT_NONE }, 222 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 223 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 224 { XX("default"), FILE_DEFAULT, FILE_FMT_NONE }, 225 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 226 { XX("string"), FILE_STRING, FILE_FMT_STR }, 227 { XX("date"), FILE_DATE, FILE_FMT_STR }, 228 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 229 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 230 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 231 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 232 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 233 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 234 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 235 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 236 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 237 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 238 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 239 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 240 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 241 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 242 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 243 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 244 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 245 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 246 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 247 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 248 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 249 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 250 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 251 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 252 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 253 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 254 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 255 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 256 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 257 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 258 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 259 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 260 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 261 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 262 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 263 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 264 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 265 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 266 { XX("name"), FILE_NAME, FILE_FMT_NONE }, 267 { XX("use"), FILE_USE, FILE_FMT_NONE }, 268 { XX("clear"), FILE_CLEAR, FILE_FMT_NONE }, 269 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 270 }; 271 272 /* 273 * These are not types, and cannot be preceded by "u" to make them 274 * unsigned. 275 */ 276 static const struct type_tbl_s special_tbl[] = { 277 { XX("name"), FILE_NAME, FILE_FMT_STR }, 278 { XX("use"), FILE_USE, FILE_FMT_STR }, 279 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 280 }; 281 # undef XX 282 # undef XX_NULL 283 284 private int 285 get_type(const struct type_tbl_s *tbl, const char *l, const char **t) 286 { 287 const struct type_tbl_s *p; 288 289 for (p = tbl; p->len; p++) { 290 if (strncmp(l, p->name, p->len) == 0) { 291 if (t) 292 *t = l + p->len; 293 break; 294 } 295 } 296 return p->type; 297 } 298 299 private int 300 get_standard_integer_type(const char *l, const char **t) 301 { 302 int type; 303 304 if (isalpha((unsigned char)l[1])) { 305 switch (l[1]) { 306 case 'C': 307 /* "dC" and "uC" */ 308 type = FILE_BYTE; 309 break; 310 case 'S': 311 /* "dS" and "uS" */ 312 type = FILE_SHORT; 313 break; 314 case 'I': 315 case 'L': 316 /* 317 * "dI", "dL", "uI", and "uL". 318 * 319 * XXX - the actual Single UNIX Specification says 320 * that "L" means "long", as in the C data type, 321 * but we treat it as meaning "4-byte integer". 322 * Given that the OS X version of file 5.04 did 323 * the same, I guess that passes the actual SUS 324 * validation suite; having "dL" be dependent on 325 * how big a "long" is on the machine running 326 * "file" is silly. 327 */ 328 type = FILE_LONG; 329 break; 330 case 'Q': 331 /* "dQ" and "uQ" */ 332 type = FILE_QUAD; 333 break; 334 default: 335 /* "d{anything else}", "u{anything else}" */ 336 return FILE_INVALID; 337 } 338 l += 2; 339 } else if (isdigit((unsigned char)l[1])) { 340 /* 341 * "d{num}" and "u{num}"; we only support {num} values 342 * of 1, 2, 4, and 8 - the Single UNIX Specification 343 * doesn't say anything about whether arbitrary 344 * values should be supported, but both the Solaris 10 345 * and OS X Mountain Lion versions of file passed the 346 * Single UNIX Specification validation suite, and 347 * neither of them support values bigger than 8 or 348 * non-power-of-2 values. 349 */ 350 if (isdigit((unsigned char)l[2])) { 351 /* Multi-digit, so > 9 */ 352 return FILE_INVALID; 353 } 354 switch (l[1]) { 355 case '1': 356 type = FILE_BYTE; 357 break; 358 case '2': 359 type = FILE_SHORT; 360 break; 361 case '4': 362 type = FILE_LONG; 363 break; 364 case '8': 365 type = FILE_QUAD; 366 break; 367 default: 368 /* XXX - what about 3, 5, 6, or 7? */ 369 return FILE_INVALID; 370 } 371 l += 2; 372 } else { 373 /* 374 * "d" or "u" by itself. 375 */ 376 type = FILE_LONG; 377 ++l; 378 } 379 if (t) 380 *t = l; 381 return type; 382 } 383 384 private void 385 init_file_tables(void) 386 { 387 static int done = 0; 388 const struct type_tbl_s *p; 389 390 if (done) 391 return; 392 done++; 393 394 for (p = type_tbl; p->len; p++) { 395 assert(p->type < FILE_NAMES_SIZE); 396 file_names[p->type] = p->name; 397 file_formats[p->type] = p->format; 398 } 399 assert(p - type_tbl == FILE_NAMES_SIZE); 400 } 401 402 private int 403 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) 404 { 405 struct mlist *ml; 406 407 mlp->map = idx == 0 ? map : NULL; 408 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 409 return -1; 410 411 ml->map = NULL; 412 ml->magic = map->magic[idx]; 413 ml->nmagic = map->nmagic[idx]; 414 415 mlp->prev->next = ml; 416 ml->prev = mlp->prev; 417 ml->next = mlp; 418 mlp->prev = ml; 419 return 0; 420 } 421 422 /* 423 * Handle one file or directory. 424 */ 425 private int 426 apprentice_1(struct magic_set *ms, const char *fn, int action) 427 { 428 struct magic_map *map; 429 #ifndef COMPILE_ONLY 430 struct mlist *ml; 431 size_t i; 432 #endif 433 434 if (magicsize != FILE_MAGICSIZE) { 435 file_error(ms, 0, "magic element size %lu != %lu", 436 (unsigned long)sizeof(*map->magic[0]), 437 (unsigned long)FILE_MAGICSIZE); 438 return -1; 439 } 440 441 if (action == FILE_COMPILE) { 442 map = apprentice_load(ms, fn, action); 443 if (map == NULL) 444 return -1; 445 return apprentice_compile(ms, map, fn); 446 } 447 448 #ifndef COMPILE_ONLY 449 map = apprentice_map(ms, fn); 450 if (map == NULL) { 451 if (ms->flags & MAGIC_CHECK) 452 file_magwarn(ms, "using regular magic file `%s'", fn); 453 map = apprentice_load(ms, fn, action); 454 if (map == NULL) 455 return -1; 456 } 457 458 for (i = 0; i < MAGIC_SETS; i++) { 459 if (add_mlist(ms->mlist[i], map, i) == -1) { 460 file_oomem(ms, sizeof(*ml)); 461 goto fail; 462 } 463 } 464 465 if (action == FILE_LIST) { 466 for (i = 0; i < MAGIC_SETS; i++) { 467 printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n", 468 i); 469 apprentice_list(ms->mlist[i], BINTEST); 470 printf("Text patterns:\n"); 471 apprentice_list(ms->mlist[i], TEXTTEST); 472 } 473 } 474 return 0; 475 fail: 476 for (i = 0; i < MAGIC_SETS; i++) { 477 mlist_free(ms->mlist[i]); 478 ms->mlist[i] = NULL; 479 } 480 return -1; 481 #else 482 return 0; 483 #endif /* COMPILE_ONLY */ 484 } 485 486 protected void 487 file_ms_free(struct magic_set *ms) 488 { 489 size_t i; 490 if (ms == NULL) 491 return; 492 for (i = 0; i < MAGIC_SETS; i++) 493 mlist_free(ms->mlist[i]); 494 free(ms->o.pbuf); 495 free(ms->o.buf); 496 free(ms->c.li); 497 free(ms); 498 } 499 500 protected struct magic_set * 501 file_ms_alloc(int flags) 502 { 503 struct magic_set *ms; 504 size_t i, len; 505 506 if ((ms = CAST(struct magic_set *, calloc((size_t)1, 507 sizeof(struct magic_set)))) == NULL) 508 return NULL; 509 510 if (magic_setflags(ms, flags) == -1) { 511 errno = EINVAL; 512 goto free; 513 } 514 515 ms->o.buf = ms->o.pbuf = NULL; 516 len = (ms->c.len = 10) * sizeof(*ms->c.li); 517 518 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 519 goto free; 520 521 ms->event_flags = 0; 522 ms->error = -1; 523 for (i = 0; i < MAGIC_SETS; i++) 524 ms->mlist[i] = NULL; 525 ms->file = "unknown"; 526 ms->line = 0; 527 ms->indir_max = FILE_INDIR_MAX; 528 ms->name_max = FILE_NAME_MAX; 529 ms->elf_shnum_max = FILE_ELF_SHNUM_MAX; 530 ms->elf_phnum_max = FILE_ELF_PHNUM_MAX; 531 return ms; 532 free: 533 free(ms); 534 return NULL; 535 } 536 537 private void 538 apprentice_unmap(struct magic_map *map) 539 { 540 if (map == NULL) 541 return; 542 543 switch (map->type) { 544 #ifdef QUICK 545 case MAP_TYPE_MMAP: 546 if (map->p) 547 (void)munmap(map->p, map->len); 548 break; 549 #endif 550 case MAP_TYPE_MALLOC: 551 free(map->p); 552 break; 553 case MAP_TYPE_USER: 554 break; 555 default: 556 abort(); 557 } 558 free(map); 559 } 560 561 private struct mlist * 562 mlist_alloc(void) 563 { 564 struct mlist *mlist; 565 if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) { 566 return NULL; 567 } 568 mlist->next = mlist->prev = mlist; 569 return mlist; 570 } 571 572 private void 573 mlist_free(struct mlist *mlist) 574 { 575 struct mlist *ml, *next; 576 577 if (mlist == NULL) 578 return; 579 580 ml = mlist->next; 581 for (ml = mlist->next; (next = ml->next) != NULL; ml = next) { 582 if (ml->map) 583 apprentice_unmap(ml->map); 584 free(ml); 585 if (ml == mlist) 586 break; 587 } 588 } 589 590 #ifndef COMPILE_ONLY 591 /* void **bufs: an array of compiled magic files */ 592 protected int 593 buffer_apprentice(struct magic_set *ms, struct magic **bufs, 594 size_t *sizes, size_t nbufs) 595 { 596 size_t i, j; 597 struct mlist *ml; 598 struct magic_map *map; 599 600 if (nbufs == 0) 601 return -1; 602 603 if (ms->mlist[0] != NULL) 604 file_reset(ms); 605 606 init_file_tables(); 607 608 for (i = 0; i < MAGIC_SETS; i++) { 609 mlist_free(ms->mlist[i]); 610 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 611 file_oomem(ms, sizeof(*ms->mlist[i])); 612 goto fail; 613 } 614 } 615 616 for (i = 0; i < nbufs; i++) { 617 map = apprentice_buf(ms, bufs[i], sizes[i]); 618 if (map == NULL) 619 goto fail; 620 621 for (j = 0; j < MAGIC_SETS; j++) { 622 if (add_mlist(ms->mlist[j], map, j) == -1) { 623 file_oomem(ms, sizeof(*ml)); 624 goto fail; 625 } 626 } 627 } 628 629 return 0; 630 fail: 631 for (i = 0; i < MAGIC_SETS; i++) { 632 mlist_free(ms->mlist[i]); 633 ms->mlist[i] = NULL; 634 } 635 return -1; 636 } 637 #endif 638 639 /* const char *fn: list of magic files and directories */ 640 protected int 641 file_apprentice(struct magic_set *ms, const char *fn, int action) 642 { 643 char *p, *mfn; 644 int file_err, errs = -1; 645 size_t i; 646 647 if (ms->mlist[0] != NULL) 648 file_reset(ms); 649 650 if ((fn = magic_getpath(fn, action)) == NULL) 651 return -1; 652 653 init_file_tables(); 654 655 if ((mfn = strdup(fn)) == NULL) { 656 file_oomem(ms, strlen(fn)); 657 return -1; 658 } 659 660 for (i = 0; i < MAGIC_SETS; i++) { 661 mlist_free(ms->mlist[i]); 662 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 663 file_oomem(ms, sizeof(*ms->mlist[i])); 664 while (i-- > 0) { 665 mlist_free(ms->mlist[i]); 666 ms->mlist[i] = NULL; 667 } 668 free(mfn); 669 return -1; 670 } 671 } 672 fn = mfn; 673 674 while (fn) { 675 p = strchr(fn, PATHSEP); 676 if (p) 677 *p++ = '\0'; 678 if (*fn == '\0') 679 break; 680 file_err = apprentice_1(ms, fn, action); 681 errs = MAX(errs, file_err); 682 fn = p; 683 } 684 685 free(mfn); 686 687 if (errs == -1) { 688 for (i = 0; i < MAGIC_SETS; i++) { 689 mlist_free(ms->mlist[i]); 690 ms->mlist[i] = NULL; 691 } 692 file_error(ms, 0, "could not find any valid magic files!"); 693 return -1; 694 } 695 696 #if 0 697 /* 698 * Always leave the database loaded 699 */ 700 if (action == FILE_LOAD) 701 return 0; 702 703 for (i = 0; i < MAGIC_SETS; i++) { 704 mlist_free(ms->mlist[i]); 705 ms->mlist[i] = NULL; 706 } 707 #endif 708 709 switch (action) { 710 case FILE_LOAD: 711 case FILE_COMPILE: 712 case FILE_CHECK: 713 case FILE_LIST: 714 return 0; 715 default: 716 file_error(ms, 0, "Invalid action %d", action); 717 return -1; 718 } 719 } 720 721 /* 722 * Compute the real length of a magic expression, for the purposes 723 * of determining how "strong" a magic expression is (approximating 724 * how specific its matches are): 725 * - magic characters count 0 unless escaped. 726 * - [] expressions count 1 727 * - {} expressions count 0 728 * - regular characters or escaped magic characters count 1 729 * - 0 length expressions count as one 730 */ 731 private size_t 732 nonmagic(const char *str) 733 { 734 const char *p; 735 size_t rv = 0; 736 737 for (p = str; *p; p++) 738 switch (*p) { 739 case '\\': /* Escaped anything counts 1 */ 740 if (!*++p) 741 p--; 742 rv++; 743 continue; 744 case '?': /* Magic characters count 0 */ 745 case '*': 746 case '.': 747 case '+': 748 case '^': 749 case '$': 750 continue; 751 case '[': /* Bracketed expressions count 1 the ']' */ 752 while (*p && *p != ']') 753 p++; 754 p--; 755 continue; 756 case '{': /* Braced expressions count 0 */ 757 while (*p && *p != '}') 758 p++; 759 if (!*p) 760 p--; 761 continue; 762 default: /* Anything else counts 1 */ 763 rv++; 764 continue; 765 } 766 767 return rv == 0 ? 1 : rv; /* Return at least 1 */ 768 } 769 770 /* 771 * Get weight of this magic entry, for sorting purposes. 772 */ 773 private size_t 774 apprentice_magic_strength(const struct magic *m) 775 { 776 #define MULT 10 777 size_t v, val = 2 * MULT; /* baseline strength */ 778 779 switch (m->type) { 780 case FILE_DEFAULT: /* make sure this sorts last */ 781 if (m->factor_op != FILE_FACTOR_OP_NONE) 782 abort(); 783 return 0; 784 785 case FILE_BYTE: 786 val += 1 * MULT; 787 break; 788 789 case FILE_SHORT: 790 case FILE_LESHORT: 791 case FILE_BESHORT: 792 val += 2 * MULT; 793 break; 794 795 case FILE_LONG: 796 case FILE_LELONG: 797 case FILE_BELONG: 798 case FILE_MELONG: 799 val += 4 * MULT; 800 break; 801 802 case FILE_PSTRING: 803 case FILE_STRING: 804 val += m->vallen * MULT; 805 break; 806 807 case FILE_BESTRING16: 808 case FILE_LESTRING16: 809 val += m->vallen * MULT / 2; 810 break; 811 812 case FILE_SEARCH: 813 val += m->vallen * MAX(MULT / m->vallen, 1); 814 break; 815 816 case FILE_REGEX: 817 v = nonmagic(m->value.s); 818 val += v * MAX(MULT / v, 1); 819 break; 820 821 case FILE_DATE: 822 case FILE_LEDATE: 823 case FILE_BEDATE: 824 case FILE_MEDATE: 825 case FILE_LDATE: 826 case FILE_LELDATE: 827 case FILE_BELDATE: 828 case FILE_MELDATE: 829 case FILE_FLOAT: 830 case FILE_BEFLOAT: 831 case FILE_LEFLOAT: 832 val += 4 * MULT; 833 break; 834 835 case FILE_QUAD: 836 case FILE_BEQUAD: 837 case FILE_LEQUAD: 838 case FILE_QDATE: 839 case FILE_LEQDATE: 840 case FILE_BEQDATE: 841 case FILE_QLDATE: 842 case FILE_LEQLDATE: 843 case FILE_BEQLDATE: 844 case FILE_QWDATE: 845 case FILE_LEQWDATE: 846 case FILE_BEQWDATE: 847 case FILE_DOUBLE: 848 case FILE_BEDOUBLE: 849 case FILE_LEDOUBLE: 850 val += 8 * MULT; 851 break; 852 853 case FILE_INDIRECT: 854 case FILE_NAME: 855 case FILE_USE: 856 break; 857 858 default: 859 (void)fprintf(stderr, "Bad type %d\n", m->type); 860 abort(); 861 } 862 863 switch (m->reln) { 864 case 'x': /* matches anything penalize */ 865 case '!': /* matches almost anything penalize */ 866 val = 0; 867 break; 868 869 case '=': /* Exact match, prefer */ 870 val += MULT; 871 break; 872 873 case '>': 874 case '<': /* comparison match reduce strength */ 875 val -= 2 * MULT; 876 break; 877 878 case '^': 879 case '&': /* masking bits, we could count them too */ 880 val -= MULT; 881 break; 882 883 default: 884 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 885 abort(); 886 } 887 888 if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 889 val = 1; 890 891 switch (m->factor_op) { 892 case FILE_FACTOR_OP_NONE: 893 break; 894 case FILE_FACTOR_OP_PLUS: 895 val += m->factor; 896 break; 897 case FILE_FACTOR_OP_MINUS: 898 val -= m->factor; 899 break; 900 case FILE_FACTOR_OP_TIMES: 901 val *= m->factor; 902 break; 903 case FILE_FACTOR_OP_DIV: 904 val /= m->factor; 905 break; 906 default: 907 abort(); 908 } 909 910 /* 911 * Magic entries with no description get a bonus because they depend 912 * on subsequent magic entries to print something. 913 */ 914 if (m->desc[0] == '\0') 915 val++; 916 return val; 917 } 918 919 /* 920 * Sort callback for sorting entries by "strength" (basically length) 921 */ 922 private int 923 apprentice_sort(const void *a, const void *b) 924 { 925 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 926 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 927 size_t sa = apprentice_magic_strength(ma->mp); 928 size_t sb = apprentice_magic_strength(mb->mp); 929 if (sa == sb) 930 return 0; 931 else if (sa > sb) 932 return -1; 933 else 934 return 1; 935 } 936 937 /* 938 * Shows sorted patterns list in the order which is used for the matching 939 */ 940 private void 941 apprentice_list(struct mlist *mlist, int mode) 942 { 943 uint32_t magindex = 0; 944 struct mlist *ml; 945 for (ml = mlist->next; ml != mlist; ml = ml->next) { 946 for (magindex = 0; magindex < ml->nmagic; magindex++) { 947 struct magic *m = &ml->magic[magindex]; 948 if ((m->flag & mode) != mode) { 949 /* Skip sub-tests */ 950 while (magindex + 1 < ml->nmagic && 951 ml->magic[magindex + 1].cont_level != 0) 952 ++magindex; 953 continue; /* Skip to next top-level test*/ 954 } 955 956 /* 957 * Try to iterate over the tree until we find item with 958 * description/mimetype. 959 */ 960 while (magindex + 1 < ml->nmagic && 961 ml->magic[magindex + 1].cont_level != 0 && 962 *ml->magic[magindex].desc == '\0' && 963 *ml->magic[magindex].mimetype == '\0') 964 magindex++; 965 966 printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", 967 apprentice_magic_strength(m), 968 ml->magic[magindex].desc, 969 ml->magic[magindex].mimetype); 970 } 971 } 972 } 973 974 private void 975 set_test_type(struct magic *mstart, struct magic *m) 976 { 977 switch (m->type) { 978 case FILE_BYTE: 979 case FILE_SHORT: 980 case FILE_LONG: 981 case FILE_DATE: 982 case FILE_BESHORT: 983 case FILE_BELONG: 984 case FILE_BEDATE: 985 case FILE_LESHORT: 986 case FILE_LELONG: 987 case FILE_LEDATE: 988 case FILE_LDATE: 989 case FILE_BELDATE: 990 case FILE_LELDATE: 991 case FILE_MEDATE: 992 case FILE_MELDATE: 993 case FILE_MELONG: 994 case FILE_QUAD: 995 case FILE_LEQUAD: 996 case FILE_BEQUAD: 997 case FILE_QDATE: 998 case FILE_LEQDATE: 999 case FILE_BEQDATE: 1000 case FILE_QLDATE: 1001 case FILE_LEQLDATE: 1002 case FILE_BEQLDATE: 1003 case FILE_QWDATE: 1004 case FILE_LEQWDATE: 1005 case FILE_BEQWDATE: 1006 case FILE_FLOAT: 1007 case FILE_BEFLOAT: 1008 case FILE_LEFLOAT: 1009 case FILE_DOUBLE: 1010 case FILE_BEDOUBLE: 1011 case FILE_LEDOUBLE: 1012 mstart->flag |= BINTEST; 1013 break; 1014 case FILE_STRING: 1015 case FILE_PSTRING: 1016 case FILE_BESTRING16: 1017 case FILE_LESTRING16: 1018 /* Allow text overrides */ 1019 if (mstart->str_flags & STRING_TEXTTEST) 1020 mstart->flag |= TEXTTEST; 1021 else 1022 mstart->flag |= BINTEST; 1023 break; 1024 case FILE_REGEX: 1025 case FILE_SEARCH: 1026 /* Check for override */ 1027 if (mstart->str_flags & STRING_BINTEST) 1028 mstart->flag |= BINTEST; 1029 if (mstart->str_flags & STRING_TEXTTEST) 1030 mstart->flag |= TEXTTEST; 1031 1032 if (mstart->flag & (TEXTTEST|BINTEST)) 1033 break; 1034 1035 /* binary test if pattern is not text */ 1036 if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, 1037 NULL) <= 0) 1038 mstart->flag |= BINTEST; 1039 else 1040 mstart->flag |= TEXTTEST; 1041 break; 1042 case FILE_DEFAULT: 1043 /* can't deduce anything; we shouldn't see this at the 1044 top level anyway */ 1045 break; 1046 case FILE_INVALID: 1047 default: 1048 /* invalid search type, but no need to complain here */ 1049 break; 1050 } 1051 } 1052 1053 private int 1054 addentry(struct magic_set *ms, struct magic_entry *me, 1055 struct magic_entry_set *mset) 1056 { 1057 size_t i = me->mp->type == FILE_NAME ? 1 : 0; 1058 if (mset[i].count == mset[i].max) { 1059 struct magic_entry *mp; 1060 1061 mset[i].max += ALLOC_INCR; 1062 if ((mp = CAST(struct magic_entry *, 1063 realloc(mset[i].me, sizeof(*mp) * mset[i].max))) == 1064 NULL) { 1065 file_oomem(ms, sizeof(*mp) * mset[i].max); 1066 return -1; 1067 } 1068 (void)memset(&mp[mset[i].count], 0, sizeof(*mp) * 1069 ALLOC_INCR); 1070 mset[i].me = mp; 1071 } 1072 mset[i].me[mset[i].count++] = *me; 1073 memset(me, 0, sizeof(*me)); 1074 return 0; 1075 } 1076 1077 /* 1078 * Load and parse one file. 1079 */ 1080 private void 1081 load_1(struct magic_set *ms, int action, const char *fn, int *errs, 1082 struct magic_entry_set *mset) 1083 { 1084 size_t lineno = 0, llen = 0; 1085 char *line = NULL; 1086 ssize_t len; 1087 struct magic_entry me; 1088 1089 FILE *f = fopen(ms->file = fn, "r"); 1090 if (f == NULL) { 1091 if (errno != ENOENT) 1092 file_error(ms, errno, "cannot read magic file `%s'", 1093 fn); 1094 (*errs)++; 1095 return; 1096 } 1097 1098 memset(&me, 0, sizeof(me)); 1099 /* read and parse this file */ 1100 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 1101 ms->line++) { 1102 if (len == 0) /* null line, garbage, etc */ 1103 continue; 1104 if (line[len - 1] == '\n') { 1105 lineno++; 1106 line[len - 1] = '\0'; /* delete newline */ 1107 } 1108 switch (line[0]) { 1109 case '\0': /* empty, do not parse */ 1110 case '#': /* comment, do not parse */ 1111 continue; 1112 case '!': 1113 if (line[1] == ':') { 1114 size_t i; 1115 1116 for (i = 0; bang[i].name != NULL; i++) { 1117 if ((size_t)(len - 2) > bang[i].len && 1118 memcmp(bang[i].name, line + 2, 1119 bang[i].len) == 0) 1120 break; 1121 } 1122 if (bang[i].name == NULL) { 1123 file_error(ms, 0, 1124 "Unknown !: entry `%s'", line); 1125 (*errs)++; 1126 continue; 1127 } 1128 if (me.mp == NULL) { 1129 file_error(ms, 0, 1130 "No current entry for :!%s type", 1131 bang[i].name); 1132 (*errs)++; 1133 continue; 1134 } 1135 if ((*bang[i].fun)(ms, &me, 1136 line + bang[i].len + 2) != 0) { 1137 (*errs)++; 1138 continue; 1139 } 1140 continue; 1141 } 1142 /*FALLTHROUGH*/ 1143 default: 1144 again: 1145 switch (parse(ms, &me, line, lineno, action)) { 1146 case 0: 1147 continue; 1148 case 1: 1149 (void)addentry(ms, &me, mset); 1150 goto again; 1151 default: 1152 (*errs)++; 1153 break; 1154 } 1155 } 1156 } 1157 if (me.mp) 1158 (void)addentry(ms, &me, mset); 1159 free(line); 1160 (void)fclose(f); 1161 } 1162 1163 /* 1164 * parse a file or directory of files 1165 * const char *fn: name of magic file or directory 1166 */ 1167 private int 1168 cmpstrp(const void *p1, const void *p2) 1169 { 1170 return strcmp(*(char *const *)p1, *(char *const *)p2); 1171 } 1172 1173 1174 private uint32_t 1175 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1176 uint32_t starttest) 1177 { 1178 static const char text[] = "text"; 1179 static const char binary[] = "binary"; 1180 static const size_t len = sizeof(text); 1181 1182 uint32_t i = starttest; 1183 1184 do { 1185 set_test_type(me[starttest].mp, me[i].mp); 1186 if ((ms->flags & MAGIC_DEBUG) == 0) 1187 continue; 1188 (void)fprintf(stderr, "%s%s%s: %s\n", 1189 me[i].mp->mimetype, 1190 me[i].mp->mimetype[0] == '\0' ? "" : "; ", 1191 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 1192 me[i].mp->flag & BINTEST ? binary : text); 1193 if (me[i].mp->flag & BINTEST) { 1194 char *p = strstr(me[i].mp->desc, text); 1195 if (p && (p == me[i].mp->desc || 1196 isspace((unsigned char)p[-1])) && 1197 (p + len - me[i].mp->desc == MAXstring 1198 || (p[len] == '\0' || 1199 isspace((unsigned char)p[len])))) 1200 (void)fprintf(stderr, "*** Possible " 1201 "binary test for text type\n"); 1202 } 1203 } while (++i < nme && me[i].mp->cont_level != 0); 1204 return i; 1205 } 1206 1207 private void 1208 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 1209 { 1210 uint32_t i; 1211 for (i = 0; i < nme; i++) { 1212 if (me[i].mp->cont_level == 0 && 1213 me[i].mp->type == FILE_DEFAULT) { 1214 while (++i < nme) 1215 if (me[i].mp->cont_level == 0) 1216 break; 1217 if (i != nme) { 1218 /* XXX - Ugh! */ 1219 ms->line = me[i].mp->lineno; 1220 file_magwarn(ms, 1221 "level 0 \"default\" did not sort last"); 1222 } 1223 return; 1224 } 1225 } 1226 } 1227 1228 private int 1229 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1230 struct magic **ma, uint32_t *nma) 1231 { 1232 uint32_t i, mentrycount = 0; 1233 size_t slen; 1234 1235 for (i = 0; i < nme; i++) 1236 mentrycount += me[i].cont_count; 1237 1238 slen = sizeof(**ma) * mentrycount; 1239 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 1240 file_oomem(ms, slen); 1241 return -1; 1242 } 1243 1244 mentrycount = 0; 1245 for (i = 0; i < nme; i++) { 1246 (void)memcpy(*ma + mentrycount, me[i].mp, 1247 me[i].cont_count * sizeof(**ma)); 1248 mentrycount += me[i].cont_count; 1249 } 1250 *nma = mentrycount; 1251 return 0; 1252 } 1253 1254 private void 1255 magic_entry_free(struct magic_entry *me, uint32_t nme) 1256 { 1257 uint32_t i; 1258 if (me == NULL) 1259 return; 1260 for (i = 0; i < nme; i++) 1261 free(me[i].mp); 1262 free(me); 1263 } 1264 1265 private struct magic_map * 1266 apprentice_load(struct magic_set *ms, const char *fn, int action) 1267 { 1268 int errs = 0; 1269 uint32_t i, j; 1270 size_t files = 0, maxfiles = 0; 1271 char **filearr = NULL, *mfn; 1272 struct stat st; 1273 struct magic_map *map; 1274 struct magic_entry_set mset[MAGIC_SETS]; 1275 DIR *dir; 1276 struct dirent *d; 1277 1278 memset(mset, 0, sizeof(mset)); 1279 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1280 1281 1282 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) 1283 { 1284 file_oomem(ms, sizeof(*map)); 1285 return NULL; 1286 } 1287 1288 /* print silly verbose header for USG compat. */ 1289 if (action == FILE_CHECK) 1290 (void)fprintf(stderr, "%s\n", usg_hdr); 1291 1292 /* load directory or file */ 1293 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1294 dir = opendir(fn); 1295 if (!dir) { 1296 errs++; 1297 goto out; 1298 } 1299 while ((d = readdir(dir)) != NULL) { 1300 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1301 file_oomem(ms, 1302 strlen(fn) + strlen(d->d_name) + 2); 1303 errs++; 1304 closedir(dir); 1305 goto out; 1306 } 1307 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1308 free(mfn); 1309 continue; 1310 } 1311 if (files >= maxfiles) { 1312 size_t mlen; 1313 maxfiles = (maxfiles + 1) * 2; 1314 mlen = maxfiles * sizeof(*filearr); 1315 if ((filearr = CAST(char **, 1316 realloc(filearr, mlen))) == NULL) { 1317 file_oomem(ms, mlen); 1318 free(mfn); 1319 closedir(dir); 1320 errs++; 1321 goto out; 1322 } 1323 } 1324 filearr[files++] = mfn; 1325 } 1326 closedir(dir); 1327 qsort(filearr, files, sizeof(*filearr), cmpstrp); 1328 for (i = 0; i < files; i++) { 1329 load_1(ms, action, filearr[i], &errs, mset); 1330 free(filearr[i]); 1331 } 1332 free(filearr); 1333 } else 1334 load_1(ms, action, fn, &errs, mset); 1335 if (errs) 1336 goto out; 1337 1338 for (j = 0; j < MAGIC_SETS; j++) { 1339 /* Set types of tests */ 1340 for (i = 0; i < mset[j].count; ) { 1341 if (mset[j].me[i].mp->cont_level != 0) { 1342 i++; 1343 continue; 1344 } 1345 i = set_text_binary(ms, mset[j].me, mset[j].count, i); 1346 } 1347 qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me), 1348 apprentice_sort); 1349 1350 /* 1351 * Make sure that any level 0 "default" line is last 1352 * (if one exists). 1353 */ 1354 set_last_default(ms, mset[j].me, mset[j].count); 1355 1356 /* coalesce per file arrays into a single one */ 1357 if (coalesce_entries(ms, mset[j].me, mset[j].count, 1358 &map->magic[j], &map->nmagic[j]) == -1) { 1359 errs++; 1360 goto out; 1361 } 1362 } 1363 1364 out: 1365 for (j = 0; j < MAGIC_SETS; j++) 1366 magic_entry_free(mset[j].me, mset[j].count); 1367 1368 if (errs) { 1369 apprentice_unmap(map); 1370 return NULL; 1371 } 1372 return map; 1373 } 1374 1375 /* 1376 * extend the sign bit if the comparison is to be signed 1377 */ 1378 protected uint64_t 1379 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1380 { 1381 if (!(m->flag & UNSIGNED)) { 1382 switch(m->type) { 1383 /* 1384 * Do not remove the casts below. They are 1385 * vital. When later compared with the data, 1386 * the sign extension must have happened. 1387 */ 1388 case FILE_BYTE: 1389 v = (signed char) v; 1390 break; 1391 case FILE_SHORT: 1392 case FILE_BESHORT: 1393 case FILE_LESHORT: 1394 v = (short) v; 1395 break; 1396 case FILE_DATE: 1397 case FILE_BEDATE: 1398 case FILE_LEDATE: 1399 case FILE_MEDATE: 1400 case FILE_LDATE: 1401 case FILE_BELDATE: 1402 case FILE_LELDATE: 1403 case FILE_MELDATE: 1404 case FILE_LONG: 1405 case FILE_BELONG: 1406 case FILE_LELONG: 1407 case FILE_MELONG: 1408 case FILE_FLOAT: 1409 case FILE_BEFLOAT: 1410 case FILE_LEFLOAT: 1411 v = (int32_t) v; 1412 break; 1413 case FILE_QUAD: 1414 case FILE_BEQUAD: 1415 case FILE_LEQUAD: 1416 case FILE_QDATE: 1417 case FILE_QLDATE: 1418 case FILE_QWDATE: 1419 case FILE_BEQDATE: 1420 case FILE_BEQLDATE: 1421 case FILE_BEQWDATE: 1422 case FILE_LEQDATE: 1423 case FILE_LEQLDATE: 1424 case FILE_LEQWDATE: 1425 case FILE_DOUBLE: 1426 case FILE_BEDOUBLE: 1427 case FILE_LEDOUBLE: 1428 v = (int64_t) v; 1429 break; 1430 case FILE_STRING: 1431 case FILE_PSTRING: 1432 case FILE_BESTRING16: 1433 case FILE_LESTRING16: 1434 case FILE_REGEX: 1435 case FILE_SEARCH: 1436 case FILE_DEFAULT: 1437 case FILE_INDIRECT: 1438 case FILE_NAME: 1439 case FILE_USE: 1440 case FILE_CLEAR: 1441 break; 1442 default: 1443 if (ms->flags & MAGIC_CHECK) 1444 file_magwarn(ms, "cannot happen: m->type=%d\n", 1445 m->type); 1446 return ~0U; 1447 } 1448 } 1449 return v; 1450 } 1451 1452 private int 1453 string_modifier_check(struct magic_set *ms, struct magic *m) 1454 { 1455 if ((ms->flags & MAGIC_CHECK) == 0) 1456 return 0; 1457 1458 if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) && 1459 (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) { 1460 file_magwarn(ms, 1461 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1462 return -1; 1463 } 1464 switch (m->type) { 1465 case FILE_BESTRING16: 1466 case FILE_LESTRING16: 1467 if (m->str_flags != 0) { 1468 file_magwarn(ms, 1469 "no modifiers allowed for 16-bit strings\n"); 1470 return -1; 1471 } 1472 break; 1473 case FILE_STRING: 1474 case FILE_PSTRING: 1475 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1476 file_magwarn(ms, 1477 "'/%c' only allowed on regex and search\n", 1478 CHAR_REGEX_OFFSET_START); 1479 return -1; 1480 } 1481 break; 1482 case FILE_SEARCH: 1483 if (m->str_range == 0) { 1484 file_magwarn(ms, 1485 "missing range; defaulting to %d\n", 1486 STRING_DEFAULT_RANGE); 1487 m->str_range = STRING_DEFAULT_RANGE; 1488 return -1; 1489 } 1490 break; 1491 case FILE_REGEX: 1492 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1493 file_magwarn(ms, "'/%c' not allowed on regex\n", 1494 CHAR_COMPACT_WHITESPACE); 1495 return -1; 1496 } 1497 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1498 file_magwarn(ms, "'/%c' not allowed on regex\n", 1499 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1500 return -1; 1501 } 1502 break; 1503 default: 1504 file_magwarn(ms, "coding error: m->type=%d\n", 1505 m->type); 1506 return -1; 1507 } 1508 return 0; 1509 } 1510 1511 private int 1512 get_op(char c) 1513 { 1514 switch (c) { 1515 case '&': 1516 return FILE_OPAND; 1517 case '|': 1518 return FILE_OPOR; 1519 case '^': 1520 return FILE_OPXOR; 1521 case '+': 1522 return FILE_OPADD; 1523 case '-': 1524 return FILE_OPMINUS; 1525 case '*': 1526 return FILE_OPMULTIPLY; 1527 case '/': 1528 return FILE_OPDIVIDE; 1529 case '%': 1530 return FILE_OPMODULO; 1531 default: 1532 return -1; 1533 } 1534 } 1535 1536 #ifdef ENABLE_CONDITIONALS 1537 private int 1538 get_cond(const char *l, const char **t) 1539 { 1540 static const struct cond_tbl_s { 1541 char name[8]; 1542 size_t len; 1543 int cond; 1544 } cond_tbl[] = { 1545 { "if", 2, COND_IF }, 1546 { "elif", 4, COND_ELIF }, 1547 { "else", 4, COND_ELSE }, 1548 { "", 0, COND_NONE }, 1549 }; 1550 const struct cond_tbl_s *p; 1551 1552 for (p = cond_tbl; p->len; p++) { 1553 if (strncmp(l, p->name, p->len) == 0 && 1554 isspace((unsigned char)l[p->len])) { 1555 if (t) 1556 *t = l + p->len; 1557 break; 1558 } 1559 } 1560 return p->cond; 1561 } 1562 1563 private int 1564 check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1565 { 1566 int last_cond; 1567 last_cond = ms->c.li[cont_level].last_cond; 1568 1569 switch (cond) { 1570 case COND_IF: 1571 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1572 if (ms->flags & MAGIC_CHECK) 1573 file_magwarn(ms, "syntax error: `if'"); 1574 return -1; 1575 } 1576 last_cond = COND_IF; 1577 break; 1578 1579 case COND_ELIF: 1580 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1581 if (ms->flags & MAGIC_CHECK) 1582 file_magwarn(ms, "syntax error: `elif'"); 1583 return -1; 1584 } 1585 last_cond = COND_ELIF; 1586 break; 1587 1588 case COND_ELSE: 1589 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1590 if (ms->flags & MAGIC_CHECK) 1591 file_magwarn(ms, "syntax error: `else'"); 1592 return -1; 1593 } 1594 last_cond = COND_NONE; 1595 break; 1596 1597 case COND_NONE: 1598 last_cond = COND_NONE; 1599 break; 1600 } 1601 1602 ms->c.li[cont_level].last_cond = last_cond; 1603 return 0; 1604 } 1605 #endif /* ENABLE_CONDITIONALS */ 1606 1607 /* 1608 * parse one line from magic file, put into magic[index++] if valid 1609 */ 1610 private int 1611 parse(struct magic_set *ms, struct magic_entry *me, const char *line, 1612 size_t lineno, int action) 1613 { 1614 #ifdef ENABLE_CONDITIONALS 1615 static uint32_t last_cont_level = 0; 1616 #endif 1617 size_t i; 1618 struct magic *m; 1619 const char *l = line; 1620 char *t; 1621 int op; 1622 uint32_t cont_level; 1623 int32_t diff; 1624 1625 cont_level = 0; 1626 1627 /* 1628 * Parse the offset. 1629 */ 1630 while (*l == '>') { 1631 ++l; /* step over */ 1632 cont_level++; 1633 } 1634 #ifdef ENABLE_CONDITIONALS 1635 if (cont_level == 0 || cont_level > last_cont_level) 1636 if (file_check_mem(ms, cont_level) == -1) 1637 return -1; 1638 last_cont_level = cont_level; 1639 #endif 1640 if (cont_level != 0) { 1641 if (me->mp == NULL) { 1642 file_magerror(ms, "No current entry for continuation"); 1643 return -1; 1644 } 1645 if (me->cont_count == 0) { 1646 file_magerror(ms, "Continuations present with 0 count"); 1647 return -1; 1648 } 1649 m = &me->mp[me->cont_count - 1]; 1650 diff = (int32_t)cont_level - (int32_t)m->cont_level; 1651 if (diff > 1) 1652 file_magwarn(ms, "New continuation level %u is more " 1653 "than one larger than current level %u", cont_level, 1654 m->cont_level); 1655 if (me->cont_count == me->max_count) { 1656 struct magic *nm; 1657 size_t cnt = me->max_count + ALLOC_CHUNK; 1658 if ((nm = CAST(struct magic *, realloc(me->mp, 1659 sizeof(*nm) * cnt))) == NULL) { 1660 file_oomem(ms, sizeof(*nm) * cnt); 1661 return -1; 1662 } 1663 me->mp = m = nm; 1664 me->max_count = CAST(uint32_t, cnt); 1665 } 1666 m = &me->mp[me->cont_count++]; 1667 (void)memset(m, 0, sizeof(*m)); 1668 m->cont_level = cont_level; 1669 } else { 1670 static const size_t len = sizeof(*m) * ALLOC_CHUNK; 1671 if (me->mp != NULL) 1672 return 1; 1673 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1674 file_oomem(ms, len); 1675 return -1; 1676 } 1677 me->mp = m; 1678 me->max_count = ALLOC_CHUNK; 1679 (void)memset(m, 0, sizeof(*m)); 1680 m->factor_op = FILE_FACTOR_OP_NONE; 1681 m->cont_level = 0; 1682 me->cont_count = 1; 1683 } 1684 m->lineno = CAST(uint32_t, lineno); 1685 1686 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1687 ++l; /* step over */ 1688 m->flag |= OFFADD; 1689 } 1690 if (*l == '(') { 1691 ++l; /* step over */ 1692 m->flag |= INDIR; 1693 if (m->flag & OFFADD) 1694 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1695 1696 if (*l == '&') { /* m->cont_level == 0 checked below */ 1697 ++l; /* step over */ 1698 m->flag |= OFFADD; 1699 } 1700 } 1701 /* Indirect offsets are not valid at level 0. */ 1702 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1703 if (ms->flags & MAGIC_CHECK) 1704 file_magwarn(ms, "relative offset at level 0"); 1705 1706 /* get offset, then skip over it */ 1707 m->offset = (uint32_t)strtoul(l, &t, 0); 1708 if (l == t) 1709 if (ms->flags & MAGIC_CHECK) 1710 file_magwarn(ms, "offset `%s' invalid", l); 1711 l = t; 1712 1713 if (m->flag & INDIR) { 1714 m->in_type = FILE_LONG; 1715 m->in_offset = 0; 1716 /* 1717 * read [.lbs][+-]nnnnn) 1718 */ 1719 if (*l == '.') { 1720 l++; 1721 switch (*l) { 1722 case 'l': 1723 m->in_type = FILE_LELONG; 1724 break; 1725 case 'L': 1726 m->in_type = FILE_BELONG; 1727 break; 1728 case 'm': 1729 m->in_type = FILE_MELONG; 1730 break; 1731 case 'h': 1732 case 's': 1733 m->in_type = FILE_LESHORT; 1734 break; 1735 case 'H': 1736 case 'S': 1737 m->in_type = FILE_BESHORT; 1738 break; 1739 case 'c': 1740 case 'b': 1741 case 'C': 1742 case 'B': 1743 m->in_type = FILE_BYTE; 1744 break; 1745 case 'e': 1746 case 'f': 1747 case 'g': 1748 m->in_type = FILE_LEDOUBLE; 1749 break; 1750 case 'E': 1751 case 'F': 1752 case 'G': 1753 m->in_type = FILE_BEDOUBLE; 1754 break; 1755 case 'i': 1756 m->in_type = FILE_LEID3; 1757 break; 1758 case 'I': 1759 m->in_type = FILE_BEID3; 1760 break; 1761 default: 1762 if (ms->flags & MAGIC_CHECK) 1763 file_magwarn(ms, 1764 "indirect offset type `%c' invalid", 1765 *l); 1766 break; 1767 } 1768 l++; 1769 } 1770 1771 m->in_op = 0; 1772 if (*l == '~') { 1773 m->in_op |= FILE_OPINVERSE; 1774 l++; 1775 } 1776 if ((op = get_op(*l)) != -1) { 1777 m->in_op |= op; 1778 l++; 1779 } 1780 if (*l == '(') { 1781 m->in_op |= FILE_OPINDIRECT; 1782 l++; 1783 } 1784 if (isdigit((unsigned char)*l) || *l == '-') { 1785 m->in_offset = (int32_t)strtol(l, &t, 0); 1786 if (l == t) 1787 if (ms->flags & MAGIC_CHECK) 1788 file_magwarn(ms, 1789 "in_offset `%s' invalid", l); 1790 l = t; 1791 } 1792 if (*l++ != ')' || 1793 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1794 if (ms->flags & MAGIC_CHECK) 1795 file_magwarn(ms, 1796 "missing ')' in indirect offset"); 1797 } 1798 EATAB; 1799 1800 #ifdef ENABLE_CONDITIONALS 1801 m->cond = get_cond(l, &l); 1802 if (check_cond(ms, m->cond, cont_level) == -1) 1803 return -1; 1804 1805 EATAB; 1806 #endif 1807 1808 /* 1809 * Parse the type. 1810 */ 1811 if (*l == 'u') { 1812 /* 1813 * Try it as a keyword type prefixed by "u"; match what 1814 * follows the "u". If that fails, try it as an SUS 1815 * integer type. 1816 */ 1817 m->type = get_type(type_tbl, l + 1, &l); 1818 if (m->type == FILE_INVALID) { 1819 /* 1820 * Not a keyword type; parse it as an SUS type, 1821 * 'u' possibly followed by a number or C/S/L. 1822 */ 1823 m->type = get_standard_integer_type(l, &l); 1824 } 1825 /* It's unsigned. */ 1826 if (m->type != FILE_INVALID) 1827 m->flag |= UNSIGNED; 1828 } else { 1829 /* 1830 * Try it as a keyword type. If that fails, try it as 1831 * an SUS integer type if it begins with "d" or as an 1832 * SUS string type if it begins with "s". In any case, 1833 * it's not unsigned. 1834 */ 1835 m->type = get_type(type_tbl, l, &l); 1836 if (m->type == FILE_INVALID) { 1837 /* 1838 * Not a keyword type; parse it as an SUS type, 1839 * either 'd' possibly followed by a number or 1840 * C/S/L, or just 's'. 1841 */ 1842 if (*l == 'd') 1843 m->type = get_standard_integer_type(l, &l); 1844 else if (*l == 's' && !isalpha((unsigned char)l[1])) { 1845 m->type = FILE_STRING; 1846 ++l; 1847 } 1848 } 1849 } 1850 1851 if (m->type == FILE_INVALID) { 1852 /* Not found - try it as a special keyword. */ 1853 m->type = get_type(special_tbl, l, &l); 1854 } 1855 1856 if (m->type == FILE_INVALID) { 1857 if (ms->flags & MAGIC_CHECK) 1858 file_magwarn(ms, "type `%s' invalid", l); 1859 return -1; 1860 } 1861 1862 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1863 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1864 1865 m->mask_op = 0; 1866 if (*l == '~') { 1867 if (!IS_STRING(m->type)) 1868 m->mask_op |= FILE_OPINVERSE; 1869 else if (ms->flags & MAGIC_CHECK) 1870 file_magwarn(ms, "'~' invalid for string types"); 1871 ++l; 1872 } 1873 m->str_range = 0; 1874 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 1875 if ((op = get_op(*l)) != -1) { 1876 if (!IS_STRING(m->type)) { 1877 uint64_t val; 1878 ++l; 1879 m->mask_op |= op; 1880 val = (uint64_t)strtoull(l, &t, 0); 1881 l = t; 1882 m->num_mask = file_signextend(ms, m, val); 1883 eatsize(&l); 1884 } 1885 else if (op == FILE_OPDIVIDE) { 1886 int have_range = 0; 1887 while (!isspace((unsigned char)*++l)) { 1888 switch (*l) { 1889 case '0': case '1': case '2': 1890 case '3': case '4': case '5': 1891 case '6': case '7': case '8': 1892 case '9': 1893 if (have_range && 1894 (ms->flags & MAGIC_CHECK)) 1895 file_magwarn(ms, 1896 "multiple ranges"); 1897 have_range = 1; 1898 m->str_range = CAST(uint32_t, 1899 strtoul(l, &t, 0)); 1900 if (m->str_range == 0) 1901 file_magwarn(ms, 1902 "zero range"); 1903 l = t - 1; 1904 break; 1905 case CHAR_COMPACT_WHITESPACE: 1906 m->str_flags |= 1907 STRING_COMPACT_WHITESPACE; 1908 break; 1909 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1910 m->str_flags |= 1911 STRING_COMPACT_OPTIONAL_WHITESPACE; 1912 break; 1913 case CHAR_IGNORE_LOWERCASE: 1914 m->str_flags |= STRING_IGNORE_LOWERCASE; 1915 break; 1916 case CHAR_IGNORE_UPPERCASE: 1917 m->str_flags |= STRING_IGNORE_UPPERCASE; 1918 break; 1919 case CHAR_REGEX_OFFSET_START: 1920 m->str_flags |= REGEX_OFFSET_START; 1921 break; 1922 case CHAR_BINTEST: 1923 m->str_flags |= STRING_BINTEST; 1924 break; 1925 case CHAR_TEXTTEST: 1926 m->str_flags |= STRING_TEXTTEST; 1927 break; 1928 case CHAR_TRIM: 1929 m->str_flags |= STRING_TRIM; 1930 break; 1931 case CHAR_PSTRING_1_LE: 1932 if (m->type != FILE_PSTRING) 1933 goto bad; 1934 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; 1935 break; 1936 case CHAR_PSTRING_2_BE: 1937 if (m->type != FILE_PSTRING) 1938 goto bad; 1939 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; 1940 break; 1941 case CHAR_PSTRING_2_LE: 1942 if (m->type != FILE_PSTRING) 1943 goto bad; 1944 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; 1945 break; 1946 case CHAR_PSTRING_4_BE: 1947 if (m->type != FILE_PSTRING) 1948 goto bad; 1949 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; 1950 break; 1951 case CHAR_PSTRING_4_LE: 1952 switch (m->type) { 1953 case FILE_PSTRING: 1954 case FILE_REGEX: 1955 break; 1956 default: 1957 goto bad; 1958 } 1959 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; 1960 break; 1961 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1962 if (m->type != FILE_PSTRING) 1963 goto bad; 1964 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1965 break; 1966 default: 1967 bad: 1968 if (ms->flags & MAGIC_CHECK) 1969 file_magwarn(ms, 1970 "string extension `%c' " 1971 "invalid", *l); 1972 return -1; 1973 } 1974 /* allow multiple '/' for readability */ 1975 if (l[1] == '/' && 1976 !isspace((unsigned char)l[2])) 1977 l++; 1978 } 1979 if (string_modifier_check(ms, m) == -1) 1980 return -1; 1981 } 1982 else { 1983 if (ms->flags & MAGIC_CHECK) 1984 file_magwarn(ms, "invalid string op: %c", *t); 1985 return -1; 1986 } 1987 } 1988 /* 1989 * We used to set mask to all 1's here, instead let's just not do 1990 * anything if mask = 0 (unless you have a better idea) 1991 */ 1992 EATAB; 1993 1994 switch (*l) { 1995 case '>': 1996 case '<': 1997 m->reln = *l; 1998 ++l; 1999 if (*l == '=') { 2000 if (ms->flags & MAGIC_CHECK) { 2001 file_magwarn(ms, "%c= not supported", 2002 m->reln); 2003 return -1; 2004 } 2005 ++l; 2006 } 2007 break; 2008 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 2009 case '&': 2010 case '^': 2011 case '=': 2012 m->reln = *l; 2013 ++l; 2014 if (*l == '=') { 2015 /* HP compat: ignore &= etc. */ 2016 ++l; 2017 } 2018 break; 2019 case '!': 2020 m->reln = *l; 2021 ++l; 2022 break; 2023 default: 2024 m->reln = '='; /* the default relation */ 2025 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 2026 isspace((unsigned char)l[1])) || !l[1])) { 2027 m->reln = *l; 2028 ++l; 2029 } 2030 break; 2031 } 2032 /* 2033 * Grab the value part, except for an 'x' reln. 2034 */ 2035 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 2036 return -1; 2037 2038 /* 2039 * TODO finish this macro and start using it! 2040 * #define offsetcheck {if (offset > HOWMANY-1) 2041 * magwarn("offset too big"); } 2042 */ 2043 2044 /* 2045 * Now get last part - the description 2046 */ 2047 EATAB; 2048 if (l[0] == '\b') { 2049 ++l; 2050 m->flag |= NOSPACE; 2051 } else if ((l[0] == '\\') && (l[1] == 'b')) { 2052 ++l; 2053 ++l; 2054 m->flag |= NOSPACE; 2055 } 2056 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 2057 continue; 2058 if (i == sizeof(m->desc)) { 2059 m->desc[sizeof(m->desc) - 1] = '\0'; 2060 if (ms->flags & MAGIC_CHECK) 2061 file_magwarn(ms, "description `%s' truncated", m->desc); 2062 } 2063 2064 /* 2065 * We only do this check while compiling, or if any of the magic 2066 * files were not compiled. 2067 */ 2068 if (ms->flags & MAGIC_CHECK) { 2069 if (check_format(ms, m) == -1) 2070 return -1; 2071 } 2072 #ifndef COMPILE_ONLY 2073 if (action == FILE_CHECK) { 2074 file_mdump(m); 2075 } 2076 #endif 2077 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 2078 return 0; 2079 } 2080 2081 /* 2082 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 2083 * if valid 2084 */ 2085 private int 2086 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 2087 { 2088 const char *l = line; 2089 char *el; 2090 unsigned long factor; 2091 struct magic *m = &me->mp[0]; 2092 2093 if (m->factor_op != FILE_FACTOR_OP_NONE) { 2094 file_magwarn(ms, 2095 "Current entry already has a strength type: %c %d", 2096 m->factor_op, m->factor); 2097 return -1; 2098 } 2099 if (m->type == FILE_NAME) { 2100 file_magwarn(ms, "%s: Strength setting is not supported in " 2101 "\"name\" magic entries", m->value.s); 2102 return -1; 2103 } 2104 EATAB; 2105 switch (*l) { 2106 case FILE_FACTOR_OP_NONE: 2107 case FILE_FACTOR_OP_PLUS: 2108 case FILE_FACTOR_OP_MINUS: 2109 case FILE_FACTOR_OP_TIMES: 2110 case FILE_FACTOR_OP_DIV: 2111 m->factor_op = *l++; 2112 break; 2113 default: 2114 file_magwarn(ms, "Unknown factor op `%c'", *l); 2115 return -1; 2116 } 2117 EATAB; 2118 factor = strtoul(l, &el, 0); 2119 if (factor > 255) { 2120 file_magwarn(ms, "Too large factor `%lu'", factor); 2121 goto out; 2122 } 2123 if (*el && !isspace((unsigned char)*el)) { 2124 file_magwarn(ms, "Bad factor `%s'", l); 2125 goto out; 2126 } 2127 m->factor = (uint8_t)factor; 2128 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 2129 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 2130 m->factor_op, m->factor); 2131 goto out; 2132 } 2133 return 0; 2134 out: 2135 m->factor_op = FILE_FACTOR_OP_NONE; 2136 m->factor = 0; 2137 return -1; 2138 } 2139 2140 private int 2141 goodchar(unsigned char x, const char *extra) 2142 { 2143 return (isascii(x) && isalnum(x)) || strchr(extra, x); 2144 } 2145 2146 private int 2147 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, 2148 off_t off, size_t len, const char *name, const char *extra, int nt) 2149 { 2150 size_t i; 2151 const char *l = line; 2152 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 2153 char *buf = (char *)m + off; 2154 2155 if (buf[0] != '\0') { 2156 len = nt ? strlen(buf) : len; 2157 file_magwarn(ms, "Current entry already has a %s type " 2158 "`%.*s', new type `%s'", name, (int)len, buf, l); 2159 return -1; 2160 } 2161 2162 if (*m->desc == '\0') { 2163 file_magwarn(ms, "Current entry does not yet have a " 2164 "description for adding a %s type", name); 2165 return -1; 2166 } 2167 2168 EATAB; 2169 for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++) 2170 continue; 2171 2172 if (i == len && *l) { 2173 if (nt) 2174 buf[len - 1] = '\0'; 2175 if (ms->flags & MAGIC_CHECK) 2176 file_magwarn(ms, "%s type `%s' truncated %" 2177 SIZE_T_FORMAT "u", name, line, i); 2178 } else { 2179 if (!isspace((unsigned char)*l) && !goodchar(*l, extra)) 2180 file_magwarn(ms, "%s type `%s' has bad char '%c'", 2181 name, line, *l); 2182 if (nt) 2183 buf[i] = '\0'; 2184 } 2185 2186 if (i > 0) 2187 return 0; 2188 2189 file_magerror(ms, "Bad magic entry '%s'", line); 2190 return -1; 2191 } 2192 2193 /* 2194 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 2195 * magic[index - 1] 2196 */ 2197 private int 2198 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) 2199 { 2200 struct magic *m = &me->mp[0]; 2201 2202 return parse_extra(ms, me, line, offsetof(struct magic, apple), 2203 sizeof(m->apple), "APPLE", "!+-./", 0); 2204 } 2205 2206 /* 2207 * parse a MIME annotation line from magic file, put into magic[index - 1] 2208 * if valid 2209 */ 2210 private int 2211 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 2212 { 2213 struct magic *m = &me->mp[0]; 2214 2215 return parse_extra(ms, me, line, offsetof(struct magic, mimetype), 2216 sizeof(m->mimetype), "MIME", "+-/.", 1); 2217 } 2218 2219 private int 2220 check_format_type(const char *ptr, int type) 2221 { 2222 int quad = 0, h; 2223 if (*ptr == '\0') { 2224 /* Missing format string; bad */ 2225 return -1; 2226 } 2227 2228 switch (file_formats[type]) { 2229 case FILE_FMT_QUAD: 2230 quad = 1; 2231 /*FALLTHROUGH*/ 2232 case FILE_FMT_NUM: 2233 if (quad == 0) { 2234 switch (type) { 2235 case FILE_BYTE: 2236 h = 2; 2237 break; 2238 case FILE_SHORT: 2239 case FILE_BESHORT: 2240 case FILE_LESHORT: 2241 h = 1; 2242 break; 2243 case FILE_LONG: 2244 case FILE_BELONG: 2245 case FILE_LELONG: 2246 case FILE_MELONG: 2247 case FILE_LEID3: 2248 case FILE_BEID3: 2249 case FILE_INDIRECT: 2250 h = 0; 2251 break; 2252 default: 2253 abort(); 2254 } 2255 } else 2256 h = 0; 2257 if (*ptr == '-') 2258 ptr++; 2259 if (*ptr == '.') 2260 ptr++; 2261 while (isdigit((unsigned char)*ptr)) ptr++; 2262 if (*ptr == '.') 2263 ptr++; 2264 while (isdigit((unsigned char)*ptr)) ptr++; 2265 if (quad) { 2266 if (*ptr++ != 'l') 2267 return -1; 2268 if (*ptr++ != 'l') 2269 return -1; 2270 } 2271 2272 switch (*ptr++) { 2273 #ifdef STRICT_FORMAT /* "long" formats are int formats for us */ 2274 /* so don't accept the 'l' modifier */ 2275 case 'l': 2276 switch (*ptr++) { 2277 case 'i': 2278 case 'd': 2279 case 'u': 2280 case 'o': 2281 case 'x': 2282 case 'X': 2283 return h != 0 ? -1 : 0; 2284 default: 2285 return -1; 2286 } 2287 2288 /* 2289 * Don't accept h and hh modifiers. They make writing 2290 * magic entries more complicated, for very little benefit 2291 */ 2292 case 'h': 2293 if (h-- <= 0) 2294 return -1; 2295 switch (*ptr++) { 2296 case 'h': 2297 if (h-- <= 0) 2298 return -1; 2299 switch (*ptr++) { 2300 case 'i': 2301 case 'd': 2302 case 'u': 2303 case 'o': 2304 case 'x': 2305 case 'X': 2306 return 0; 2307 default: 2308 return -1; 2309 } 2310 case 'i': 2311 case 'd': 2312 case 'u': 2313 case 'o': 2314 case 'x': 2315 case 'X': 2316 return h != 0 ? -1 : 0; 2317 default: 2318 return -1; 2319 } 2320 #endif 2321 case 'c': 2322 return h != 2 ? -1 : 0; 2323 case 'i': 2324 case 'd': 2325 case 'u': 2326 case 'o': 2327 case 'x': 2328 case 'X': 2329 #ifdef STRICT_FORMAT 2330 return h != 0 ? -1 : 0; 2331 #else 2332 return 0; 2333 #endif 2334 default: 2335 return -1; 2336 } 2337 2338 case FILE_FMT_FLOAT: 2339 case FILE_FMT_DOUBLE: 2340 if (*ptr == '-') 2341 ptr++; 2342 if (*ptr == '.') 2343 ptr++; 2344 while (isdigit((unsigned char)*ptr)) ptr++; 2345 if (*ptr == '.') 2346 ptr++; 2347 while (isdigit((unsigned char)*ptr)) ptr++; 2348 2349 switch (*ptr++) { 2350 case 'e': 2351 case 'E': 2352 case 'f': 2353 case 'F': 2354 case 'g': 2355 case 'G': 2356 return 0; 2357 2358 default: 2359 return -1; 2360 } 2361 2362 2363 case FILE_FMT_STR: 2364 if (*ptr == '-') 2365 ptr++; 2366 while (isdigit((unsigned char )*ptr)) 2367 ptr++; 2368 if (*ptr == '.') { 2369 ptr++; 2370 while (isdigit((unsigned char )*ptr)) 2371 ptr++; 2372 } 2373 2374 switch (*ptr++) { 2375 case 's': 2376 return 0; 2377 default: 2378 return -1; 2379 } 2380 2381 default: 2382 /* internal error */ 2383 abort(); 2384 } 2385 /*NOTREACHED*/ 2386 return -1; 2387 } 2388 2389 /* 2390 * Check that the optional printf format in description matches 2391 * the type of the magic. 2392 */ 2393 private int 2394 check_format(struct magic_set *ms, struct magic *m) 2395 { 2396 char *ptr; 2397 2398 for (ptr = m->desc; *ptr; ptr++) 2399 if (*ptr == '%') 2400 break; 2401 if (*ptr == '\0') { 2402 /* No format string; ok */ 2403 return 1; 2404 } 2405 2406 assert(file_nformats == file_nnames); 2407 2408 if (m->type >= file_nformats) { 2409 file_magwarn(ms, "Internal error inconsistency between " 2410 "m->type and format strings"); 2411 return -1; 2412 } 2413 if (file_formats[m->type] == FILE_FMT_NONE) { 2414 file_magwarn(ms, "No format string for `%s' with description " 2415 "`%s'", m->desc, file_names[m->type]); 2416 return -1; 2417 } 2418 2419 ptr++; 2420 if (check_format_type(ptr, m->type) == -1) { 2421 /* 2422 * TODO: this error message is unhelpful if the format 2423 * string is not one character long 2424 */ 2425 file_magwarn(ms, "Printf format `%c' is not valid for type " 2426 "`%s' in description `%s'", *ptr ? *ptr : '?', 2427 file_names[m->type], m->desc); 2428 return -1; 2429 } 2430 2431 for (; *ptr; ptr++) { 2432 if (*ptr == '%') { 2433 file_magwarn(ms, 2434 "Too many format strings (should have at most one) " 2435 "for `%s' with description `%s'", 2436 file_names[m->type], m->desc); 2437 return -1; 2438 } 2439 } 2440 return 0; 2441 } 2442 2443 /* 2444 * Read a numeric value from a pointer, into the value union of a magic 2445 * pointer, according to the magic type. Update the string pointer to point 2446 * just after the number read. Return 0 for success, non-zero for failure. 2447 */ 2448 private int 2449 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2450 { 2451 switch (m->type) { 2452 case FILE_BESTRING16: 2453 case FILE_LESTRING16: 2454 case FILE_STRING: 2455 case FILE_PSTRING: 2456 case FILE_REGEX: 2457 case FILE_SEARCH: 2458 case FILE_NAME: 2459 case FILE_USE: 2460 *p = getstr(ms, m, *p, action == FILE_COMPILE); 2461 if (*p == NULL) { 2462 if (ms->flags & MAGIC_CHECK) 2463 file_magwarn(ms, "cannot get string from `%s'", 2464 m->value.s); 2465 return -1; 2466 } 2467 if (m->type == FILE_REGEX) { 2468 file_regex_t rx; 2469 int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED); 2470 if (rc) { 2471 if (ms->flags & MAGIC_CHECK) 2472 file_regerror(&rx, rc, ms); 2473 } 2474 file_regfree(&rx); 2475 return rc ? -1 : 0; 2476 } 2477 return 0; 2478 case FILE_FLOAT: 2479 case FILE_BEFLOAT: 2480 case FILE_LEFLOAT: 2481 if (m->reln != 'x') { 2482 char *ep; 2483 #ifdef HAVE_STRTOF 2484 m->value.f = strtof(*p, &ep); 2485 #else 2486 m->value.f = (float)strtod(*p, &ep); 2487 #endif 2488 *p = ep; 2489 } 2490 return 0; 2491 case FILE_DOUBLE: 2492 case FILE_BEDOUBLE: 2493 case FILE_LEDOUBLE: 2494 if (m->reln != 'x') { 2495 char *ep; 2496 m->value.d = strtod(*p, &ep); 2497 *p = ep; 2498 } 2499 return 0; 2500 default: 2501 if (m->reln != 'x') { 2502 char *ep; 2503 m->value.q = file_signextend(ms, m, 2504 (uint64_t)strtoull(*p, &ep, 0)); 2505 *p = ep; 2506 eatsize(p); 2507 } 2508 return 0; 2509 } 2510 } 2511 2512 /* 2513 * Convert a string containing C character escapes. Stop at an unescaped 2514 * space or tab. 2515 * Copy the converted version to "m->value.s", and the length in m->vallen. 2516 * Return updated scan pointer as function result. Warn if set. 2517 */ 2518 private const char * 2519 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2520 { 2521 const char *origs = s; 2522 char *p = m->value.s; 2523 size_t plen = sizeof(m->value.s); 2524 char *origp = p; 2525 char *pmax = p + plen - 1; 2526 int c; 2527 int val; 2528 2529 while ((c = *s++) != '\0') { 2530 if (isspace((unsigned char) c)) 2531 break; 2532 if (p >= pmax) { 2533 file_error(ms, 0, "string too long: `%s'", origs); 2534 return NULL; 2535 } 2536 if (c == '\\') { 2537 switch(c = *s++) { 2538 2539 case '\0': 2540 if (warn) 2541 file_magwarn(ms, "incomplete escape"); 2542 goto out; 2543 2544 case '\t': 2545 if (warn) { 2546 file_magwarn(ms, 2547 "escaped tab found, use \\t instead"); 2548 warn = 0; /* already did */ 2549 } 2550 /*FALLTHROUGH*/ 2551 default: 2552 if (warn) { 2553 if (isprint((unsigned char)c)) { 2554 /* Allow escaping of 2555 * ``relations'' */ 2556 if (strchr("<>&^=!", c) == NULL 2557 && (m->type != FILE_REGEX || 2558 strchr("[]().*?^$|{}", c) 2559 == NULL)) { 2560 file_magwarn(ms, "no " 2561 "need to escape " 2562 "`%c'", c); 2563 } 2564 } else { 2565 file_magwarn(ms, 2566 "unknown escape sequence: " 2567 "\\%03o", c); 2568 } 2569 } 2570 /*FALLTHROUGH*/ 2571 /* space, perhaps force people to use \040? */ 2572 case ' ': 2573 #if 0 2574 /* 2575 * Other things people escape, but shouldn't need to, 2576 * so we disallow them 2577 */ 2578 case '\'': 2579 case '"': 2580 case '?': 2581 #endif 2582 /* Relations */ 2583 case '>': 2584 case '<': 2585 case '&': 2586 case '^': 2587 case '=': 2588 case '!': 2589 /* and baskslash itself */ 2590 case '\\': 2591 *p++ = (char) c; 2592 break; 2593 2594 case 'a': 2595 *p++ = '\a'; 2596 break; 2597 2598 case 'b': 2599 *p++ = '\b'; 2600 break; 2601 2602 case 'f': 2603 *p++ = '\f'; 2604 break; 2605 2606 case 'n': 2607 *p++ = '\n'; 2608 break; 2609 2610 case 'r': 2611 *p++ = '\r'; 2612 break; 2613 2614 case 't': 2615 *p++ = '\t'; 2616 break; 2617 2618 case 'v': 2619 *p++ = '\v'; 2620 break; 2621 2622 /* \ and up to 3 octal digits */ 2623 case '0': 2624 case '1': 2625 case '2': 2626 case '3': 2627 case '4': 2628 case '5': 2629 case '6': 2630 case '7': 2631 val = c - '0'; 2632 c = *s++; /* try for 2 */ 2633 if (c >= '0' && c <= '7') { 2634 val = (val << 3) | (c - '0'); 2635 c = *s++; /* try for 3 */ 2636 if (c >= '0' && c <= '7') 2637 val = (val << 3) | (c-'0'); 2638 else 2639 --s; 2640 } 2641 else 2642 --s; 2643 *p++ = (char)val; 2644 break; 2645 2646 /* \x and up to 2 hex digits */ 2647 case 'x': 2648 val = 'x'; /* Default if no digits */ 2649 c = hextoint(*s++); /* Get next char */ 2650 if (c >= 0) { 2651 val = c; 2652 c = hextoint(*s++); 2653 if (c >= 0) 2654 val = (val << 4) + c; 2655 else 2656 --s; 2657 } else 2658 --s; 2659 *p++ = (char)val; 2660 break; 2661 } 2662 } else 2663 *p++ = (char)c; 2664 } 2665 out: 2666 *p = '\0'; 2667 m->vallen = CAST(unsigned char, (p - origp)); 2668 if (m->type == FILE_PSTRING) 2669 m->vallen += (unsigned char)file_pstring_length_size(m); 2670 return s; 2671 } 2672 2673 2674 /* Single hex char to int; -1 if not a hex char. */ 2675 private int 2676 hextoint(int c) 2677 { 2678 if (!isascii((unsigned char) c)) 2679 return -1; 2680 if (isdigit((unsigned char) c)) 2681 return c - '0'; 2682 if ((c >= 'a') && (c <= 'f')) 2683 return c + 10 - 'a'; 2684 if (( c>= 'A') && (c <= 'F')) 2685 return c + 10 - 'A'; 2686 return -1; 2687 } 2688 2689 2690 /* 2691 * Print a string containing C character escapes. 2692 */ 2693 protected void 2694 file_showstr(FILE *fp, const char *s, size_t len) 2695 { 2696 char c; 2697 2698 for (;;) { 2699 if (len == ~0U) { 2700 c = *s++; 2701 if (c == '\0') 2702 break; 2703 } 2704 else { 2705 if (len-- == 0) 2706 break; 2707 c = *s++; 2708 } 2709 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2710 (void) fputc(c, fp); 2711 else { 2712 (void) fputc('\\', fp); 2713 switch (c) { 2714 case '\a': 2715 (void) fputc('a', fp); 2716 break; 2717 2718 case '\b': 2719 (void) fputc('b', fp); 2720 break; 2721 2722 case '\f': 2723 (void) fputc('f', fp); 2724 break; 2725 2726 case '\n': 2727 (void) fputc('n', fp); 2728 break; 2729 2730 case '\r': 2731 (void) fputc('r', fp); 2732 break; 2733 2734 case '\t': 2735 (void) fputc('t', fp); 2736 break; 2737 2738 case '\v': 2739 (void) fputc('v', fp); 2740 break; 2741 2742 default: 2743 (void) fprintf(fp, "%.3o", c & 0377); 2744 break; 2745 } 2746 } 2747 } 2748 } 2749 2750 /* 2751 * eatsize(): Eat the size spec from a number [eg. 10UL] 2752 */ 2753 private void 2754 eatsize(const char **p) 2755 { 2756 const char *l = *p; 2757 2758 if (LOWCASE(*l) == 'u') 2759 l++; 2760 2761 switch (LOWCASE(*l)) { 2762 case 'l': /* long */ 2763 case 's': /* short */ 2764 case 'h': /* short */ 2765 case 'b': /* char/byte */ 2766 case 'c': /* char/byte */ 2767 l++; 2768 /*FALLTHROUGH*/ 2769 default: 2770 break; 2771 } 2772 2773 *p = l; 2774 } 2775 2776 /* 2777 * handle a buffer containing a compiled file. 2778 */ 2779 private struct magic_map * 2780 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len) 2781 { 2782 struct magic_map *map; 2783 2784 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 2785 file_oomem(ms, sizeof(*map)); 2786 return NULL; 2787 } 2788 map->len = len; 2789 map->p = buf; 2790 map->type = MAP_TYPE_USER; 2791 if (check_buffer(ms, map, "buffer") != 0) { 2792 apprentice_unmap(map); 2793 return NULL; 2794 } 2795 return map; 2796 } 2797 2798 /* 2799 * handle a compiled file. 2800 */ 2801 2802 private struct magic_map * 2803 apprentice_map(struct magic_set *ms, const char *fn) 2804 { 2805 int fd; 2806 struct stat st; 2807 char *dbname = NULL; 2808 struct magic_map *map; 2809 2810 fd = -1; 2811 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 2812 file_oomem(ms, sizeof(*map)); 2813 goto error; 2814 } 2815 2816 dbname = mkdbname(ms, fn, 0); 2817 if (dbname == NULL) 2818 goto error; 2819 2820 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 2821 goto error; 2822 2823 if (fstat(fd, &st) == -1) { 2824 file_error(ms, errno, "cannot stat `%s'", dbname); 2825 goto error; 2826 } 2827 if (st.st_size < 8 || st.st_size > MAXMAGIC_SIZE) { 2828 file_error(ms, 0, "file `%s' is too %s", dbname, 2829 st.st_size < 8 ? "small" : "large"); 2830 goto error; 2831 } 2832 2833 map->len = (size_t)st.st_size; 2834 #ifdef QUICK 2835 if ((map->p = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 2836 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 2837 file_error(ms, errno, "cannot map `%s'", dbname); 2838 goto error; 2839 } 2840 map->type = MAP_TYPE_MMAP; 2841 #else 2842 if ((map->p = CAST(void *, malloc(map->len))) == NULL) { 2843 file_oomem(ms, map->len); 2844 goto error; 2845 } 2846 if (read(fd, map->p, map->len) != (ssize_t)map->len) { 2847 file_badread(ms); 2848 goto error; 2849 } 2850 map->type = MAP_TYPE_MALLOC; 2851 #define RET 1 2852 #endif 2853 (void)close(fd); 2854 fd = -1; 2855 2856 if (check_buffer(ms, map, dbname) != 0) 2857 goto error; 2858 2859 free(dbname); 2860 return map; 2861 2862 error: 2863 if (fd != -1) 2864 (void)close(fd); 2865 apprentice_unmap(map); 2866 free(dbname); 2867 return NULL; 2868 } 2869 2870 private int 2871 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname) 2872 { 2873 uint32_t *ptr; 2874 uint32_t entries, nentries; 2875 uint32_t version; 2876 int i, needsbyteswap; 2877 2878 ptr = CAST(uint32_t *, map->p); 2879 if (*ptr != MAGICNO) { 2880 if (swap4(*ptr) != MAGICNO) { 2881 file_error(ms, 0, "bad magic in `%s'", dbname); 2882 return -1; 2883 } 2884 needsbyteswap = 1; 2885 } else 2886 needsbyteswap = 0; 2887 if (needsbyteswap) 2888 version = swap4(ptr[1]); 2889 else 2890 version = ptr[1]; 2891 if (version != VERSIONNO) { 2892 file_error(ms, 0, "File %s supports only version %d magic " 2893 "files. `%s' is version %d", VERSION, 2894 VERSIONNO, dbname, version); 2895 return -1; 2896 } 2897 entries = (uint32_t)(map->len / sizeof(struct magic)); 2898 if ((entries * sizeof(struct magic)) != map->len) { 2899 file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not " 2900 "a multiple of %" SIZE_T_FORMAT "u", 2901 dbname, map->len, sizeof(struct magic)); 2902 return -1; 2903 } 2904 map->magic[0] = CAST(struct magic *, map->p) + 1; 2905 nentries = 0; 2906 for (i = 0; i < MAGIC_SETS; i++) { 2907 if (needsbyteswap) 2908 map->nmagic[i] = swap4(ptr[i + 2]); 2909 else 2910 map->nmagic[i] = ptr[i + 2]; 2911 if (i != MAGIC_SETS - 1) 2912 map->magic[i + 1] = map->magic[i] + map->nmagic[i]; 2913 nentries += map->nmagic[i]; 2914 } 2915 if (entries != nentries + 1) { 2916 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 2917 dbname, entries, nentries + 1); 2918 return -1; 2919 } 2920 if (needsbyteswap) 2921 for (i = 0; i < MAGIC_SETS; i++) 2922 byteswap(map->magic[i], map->nmagic[i]); 2923 return 0; 2924 } 2925 2926 /* 2927 * handle an mmaped file. 2928 */ 2929 private int 2930 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn) 2931 { 2932 static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS; 2933 static const size_t m = sizeof(**map->magic); 2934 int fd = -1; 2935 size_t len; 2936 char *dbname; 2937 int rv = -1; 2938 uint32_t i; 2939 union { 2940 struct magic m; 2941 uint32_t h[2 + MAGIC_SETS]; 2942 } hdr; 2943 2944 dbname = mkdbname(ms, fn, 1); 2945 2946 if (dbname == NULL) 2947 goto out; 2948 2949 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 2950 { 2951 file_error(ms, errno, "cannot open `%s'", dbname); 2952 goto out; 2953 } 2954 memset(&hdr, 0, sizeof(hdr)); 2955 hdr.h[0] = MAGICNO; 2956 hdr.h[1] = VERSIONNO; 2957 memcpy(hdr.h + 2, map->nmagic, nm); 2958 2959 if (write(fd, &hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) { 2960 file_error(ms, errno, "error writing `%s'", dbname); 2961 goto out; 2962 } 2963 2964 for (i = 0; i < MAGIC_SETS; i++) { 2965 len = m * map->nmagic[i]; 2966 if (write(fd, map->magic[i], len) != (ssize_t)len) { 2967 file_error(ms, errno, "error writing `%s'", dbname); 2968 goto out; 2969 } 2970 } 2971 2972 if (fd != -1) 2973 (void)close(fd); 2974 rv = 0; 2975 out: 2976 free(dbname); 2977 return rv; 2978 } 2979 2980 private const char ext[] = ".mgc"; 2981 /* 2982 * make a dbname 2983 */ 2984 private char * 2985 mkdbname(struct magic_set *ms, const char *fn, int strip) 2986 { 2987 const char *p, *q; 2988 char *buf; 2989 2990 if (strip) { 2991 if ((p = strrchr(fn, '/')) != NULL) 2992 fn = ++p; 2993 } 2994 2995 for (q = fn; *q; q++) 2996 continue; 2997 /* Look for .mgc */ 2998 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 2999 if (*p != *q) 3000 break; 3001 3002 /* Did not find .mgc, restore q */ 3003 if (p >= ext) 3004 while (*q) 3005 q++; 3006 3007 q++; 3008 /* Compatibility with old code that looked in .mime */ 3009 if (ms->flags & MAGIC_MIME) { 3010 if (asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext) < 0) 3011 return NULL; 3012 if (access(buf, R_OK) != -1) { 3013 ms->flags &= MAGIC_MIME_TYPE; 3014 return buf; 3015 } 3016 free(buf); 3017 } 3018 if (asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext) < 0) 3019 return NULL; 3020 3021 /* Compatibility with old code that looked in .mime */ 3022 if (strstr(p, ".mime") != NULL) 3023 ms->flags &= MAGIC_MIME_TYPE; 3024 return buf; 3025 } 3026 3027 /* 3028 * Byteswap an mmap'ed file if needed 3029 */ 3030 private void 3031 byteswap(struct magic *magic, uint32_t nmagic) 3032 { 3033 uint32_t i; 3034 for (i = 0; i < nmagic; i++) 3035 bs1(&magic[i]); 3036 } 3037 3038 /* 3039 * swap a short 3040 */ 3041 private uint16_t 3042 swap2(uint16_t sv) 3043 { 3044 uint16_t rv; 3045 uint8_t *s = (uint8_t *)(void *)&sv; 3046 uint8_t *d = (uint8_t *)(void *)&rv; 3047 d[0] = s[1]; 3048 d[1] = s[0]; 3049 return rv; 3050 } 3051 3052 /* 3053 * swap an int 3054 */ 3055 private uint32_t 3056 swap4(uint32_t sv) 3057 { 3058 uint32_t rv; 3059 uint8_t *s = (uint8_t *)(void *)&sv; 3060 uint8_t *d = (uint8_t *)(void *)&rv; 3061 d[0] = s[3]; 3062 d[1] = s[2]; 3063 d[2] = s[1]; 3064 d[3] = s[0]; 3065 return rv; 3066 } 3067 3068 /* 3069 * swap a quad 3070 */ 3071 private uint64_t 3072 swap8(uint64_t sv) 3073 { 3074 uint64_t rv; 3075 uint8_t *s = (uint8_t *)(void *)&sv; 3076 uint8_t *d = (uint8_t *)(void *)&rv; 3077 #if 0 3078 d[0] = s[3]; 3079 d[1] = s[2]; 3080 d[2] = s[1]; 3081 d[3] = s[0]; 3082 d[4] = s[7]; 3083 d[5] = s[6]; 3084 d[6] = s[5]; 3085 d[7] = s[4]; 3086 #else 3087 d[0] = s[7]; 3088 d[1] = s[6]; 3089 d[2] = s[5]; 3090 d[3] = s[4]; 3091 d[4] = s[3]; 3092 d[5] = s[2]; 3093 d[6] = s[1]; 3094 d[7] = s[0]; 3095 #endif 3096 return rv; 3097 } 3098 3099 /* 3100 * byteswap a single magic entry 3101 */ 3102 private void 3103 bs1(struct magic *m) 3104 { 3105 m->cont_level = swap2(m->cont_level); 3106 m->offset = swap4((uint32_t)m->offset); 3107 m->in_offset = swap4((uint32_t)m->in_offset); 3108 m->lineno = swap4((uint32_t)m->lineno); 3109 if (IS_STRING(m->type)) { 3110 m->str_range = swap4(m->str_range); 3111 m->str_flags = swap4(m->str_flags); 3112 } 3113 else { 3114 m->value.q = swap8(m->value.q); 3115 m->num_mask = swap8(m->num_mask); 3116 } 3117 } 3118 3119 protected size_t 3120 file_pstring_length_size(const struct magic *m) 3121 { 3122 switch (m->str_flags & PSTRING_LEN) { 3123 case PSTRING_1_LE: 3124 return 1; 3125 case PSTRING_2_LE: 3126 case PSTRING_2_BE: 3127 return 2; 3128 case PSTRING_4_LE: 3129 case PSTRING_4_BE: 3130 return 4; 3131 default: 3132 abort(); /* Impossible */ 3133 return 1; 3134 } 3135 } 3136 protected size_t 3137 file_pstring_get_length(const struct magic *m, const char *s) 3138 { 3139 size_t len = 0; 3140 3141 switch (m->str_flags & PSTRING_LEN) { 3142 case PSTRING_1_LE: 3143 len = *s; 3144 break; 3145 case PSTRING_2_LE: 3146 len = (s[1] << 8) | s[0]; 3147 break; 3148 case PSTRING_2_BE: 3149 len = (s[0] << 8) | s[1]; 3150 break; 3151 case PSTRING_4_LE: 3152 len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; 3153 break; 3154 case PSTRING_4_BE: 3155 len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; 3156 break; 3157 default: 3158 abort(); /* Impossible */ 3159 } 3160 3161 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) 3162 len -= file_pstring_length_size(m); 3163 3164 return len; 3165 } 3166 3167 protected int 3168 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 3169 { 3170 uint32_t i, j; 3171 struct mlist *mlist, *ml; 3172 3173 mlist = ms->mlist[1]; 3174 3175 for (ml = mlist->next; ml != mlist; ml = ml->next) { 3176 struct magic *ma = ml->magic; 3177 uint32_t nma = ml->nmagic; 3178 for (i = 0; i < nma; i++) { 3179 if (ma[i].type != FILE_NAME) 3180 continue; 3181 if (strcmp(ma[i].value.s, name) == 0) { 3182 v->magic = &ma[i]; 3183 for (j = i + 1; j < nma; j++) 3184 if (ma[j].cont_level == 0) 3185 break; 3186 v->nmagic = j - i; 3187 return 0; 3188 } 3189 } 3190 } 3191 return -1; 3192 } 3193