1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * apprentice - make one pass through /etc/magic, learning its secrets. 30 */ 31 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: apprentice.c,v 1.297 2020/05/09 18:57:15 christos Exp $") 36 #endif /* lint */ 37 38 #include "magic.h" 39 #include <stdlib.h> 40 #ifdef HAVE_UNISTD_H 41 #include <unistd.h> 42 #endif 43 #include <stddef.h> 44 #include <string.h> 45 #include <assert.h> 46 #include <ctype.h> 47 #include <fcntl.h> 48 #ifdef QUICK 49 #include <sys/mman.h> 50 #endif 51 #include <dirent.h> 52 #include <limits.h> 53 54 55 #define EATAB {while (isascii(CAST(unsigned char, *l)) && \ 56 isspace(CAST(unsigned char, *l))) ++l;} 57 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \ 58 tolower(CAST(unsigned char, l)) : (l)) 59 /* 60 * Work around a bug in headers on Digital Unix. 61 * At least confirmed for: OSF1 V4.0 878 62 */ 63 #if defined(__osf__) && defined(__DECC) 64 #ifdef MAP_FAILED 65 #undef MAP_FAILED 66 #endif 67 #endif 68 69 #ifndef MAP_FAILED 70 #define MAP_FAILED (void *) -1 71 #endif 72 73 #ifndef MAP_FILE 74 #define MAP_FILE 0 75 #endif 76 77 #define ALLOC_CHUNK CAST(size_t, 10) 78 #define ALLOC_INCR CAST(size_t, 200) 79 80 #define MAP_TYPE_USER 0 81 #define MAP_TYPE_MALLOC 1 82 #define MAP_TYPE_MMAP 2 83 84 struct magic_entry { 85 struct magic *mp; 86 uint32_t cont_count; 87 uint32_t max_count; 88 }; 89 90 struct magic_entry_set { 91 struct magic_entry *me; 92 uint32_t count; 93 uint32_t max; 94 }; 95 96 struct magic_map { 97 void *p; 98 size_t len; 99 int type; 100 struct magic *magic[MAGIC_SETS]; 101 uint32_t nmagic[MAGIC_SETS]; 102 }; 103 104 int file_formats[FILE_NAMES_SIZE]; 105 const size_t file_nformats = FILE_NAMES_SIZE; 106 const char *file_names[FILE_NAMES_SIZE]; 107 const size_t file_nnames = FILE_NAMES_SIZE; 108 109 private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 110 private int hextoint(int); 111 private const char *getstr(struct magic_set *, struct magic *, const char *, 112 int); 113 private int parse(struct magic_set *, struct magic_entry *, const char *, 114 size_t, int); 115 private void eatsize(const char **); 116 private int apprentice_1(struct magic_set *, const char *, int); 117 private size_t apprentice_magic_strength(const struct magic *); 118 private int apprentice_sort(const void *, const void *); 119 private void apprentice_list(struct mlist *, int ); 120 private struct magic_map *apprentice_load(struct magic_set *, 121 const char *, int); 122 private struct mlist *mlist_alloc(void); 123 private void mlist_free_all(struct magic_set *); 124 private void mlist_free(struct mlist *); 125 private void byteswap(struct magic *, uint32_t); 126 private void bs1(struct magic *); 127 private uint16_t swap2(uint16_t); 128 private uint32_t swap4(uint32_t); 129 private uint64_t swap8(uint64_t); 130 private char *mkdbname(struct magic_set *, const char *, int); 131 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *, 132 size_t); 133 private struct magic_map *apprentice_map(struct magic_set *, const char *); 134 private int check_buffer(struct magic_set *, struct magic_map *, const char *); 135 private void apprentice_unmap(struct magic_map *); 136 private int apprentice_compile(struct magic_set *, struct magic_map *, 137 const char *); 138 private int check_format_type(const char *, int, const char **); 139 private int check_format(struct magic_set *, struct magic *); 140 private int get_op(char); 141 private int parse_mime(struct magic_set *, struct magic_entry *, const char *, 142 size_t); 143 private int parse_strength(struct magic_set *, struct magic_entry *, 144 const char *, size_t); 145 private int parse_apple(struct magic_set *, struct magic_entry *, const char *, 146 size_t); 147 private int parse_ext(struct magic_set *, struct magic_entry *, const char *, 148 size_t); 149 150 151 private size_t magicsize = sizeof(struct magic); 152 153 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 154 155 private struct { 156 const char *name; 157 size_t len; 158 int (*fun)(struct magic_set *, struct magic_entry *, const char *, 159 size_t); 160 } bang[] = { 161 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 162 DECLARE_FIELD(mime), 163 DECLARE_FIELD(apple), 164 DECLARE_FIELD(ext), 165 DECLARE_FIELD(strength), 166 #undef DECLARE_FIELD 167 { NULL, 0, NULL } 168 }; 169 170 #ifdef COMPILE_ONLY 171 172 int main(int, char *[]); 173 174 int 175 main(int argc, char *argv[]) 176 { 177 int ret; 178 struct magic_set *ms; 179 char *progname; 180 181 if ((progname = strrchr(argv[0], '/')) != NULL) 182 progname++; 183 else 184 progname = argv[0]; 185 186 if (argc != 2) { 187 (void)fprintf(stderr, "Usage: %s file\n", progname); 188 return 1; 189 } 190 191 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 192 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 193 return 1; 194 } 195 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 196 if (ret == 1) 197 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 198 magic_close(ms); 199 return ret; 200 } 201 #endif /* COMPILE_ONLY */ 202 203 struct type_tbl_s { 204 const char name[16]; 205 const size_t len; 206 const int type; 207 const int format; 208 }; 209 210 /* 211 * XXX - the actual Single UNIX Specification says that "long" means "long", 212 * as in the C data type, but we treat it as meaning "4-byte integer". 213 * Given that the OS X version of file 5.04 did the same, I guess that passes 214 * the actual test; having "long" be dependent on how big a "long" is on 215 * the machine running "file" is silly. 216 */ 217 static const struct type_tbl_s type_tbl[] = { 218 # define XX(s) s, (sizeof(s) - 1) 219 # define XX_NULL "", 0 220 { XX("invalid"), FILE_INVALID, FILE_FMT_NONE }, 221 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 222 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 223 { XX("default"), FILE_DEFAULT, FILE_FMT_NONE }, 224 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 225 { XX("string"), FILE_STRING, FILE_FMT_STR }, 226 { XX("date"), FILE_DATE, FILE_FMT_STR }, 227 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 228 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 229 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 230 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 231 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 232 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 233 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 234 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 235 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 236 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 237 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 238 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 239 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 240 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 241 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 242 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 243 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 244 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 245 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 246 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 247 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 248 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 249 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 250 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 251 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 252 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 253 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 254 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 255 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 256 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 257 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 258 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 259 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 260 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 261 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 262 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 263 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 264 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 265 { XX("name"), FILE_NAME, FILE_FMT_NONE }, 266 { XX("use"), FILE_USE, FILE_FMT_NONE }, 267 { XX("clear"), FILE_CLEAR, FILE_FMT_NONE }, 268 { XX("der"), FILE_DER, FILE_FMT_STR }, 269 { XX("guid"), FILE_GUID, FILE_FMT_STR }, 270 { XX("offset"), FILE_OFFSET, FILE_FMT_QUAD }, 271 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 272 }; 273 274 /* 275 * These are not types, and cannot be preceded by "u" to make them 276 * unsigned. 277 */ 278 static const struct type_tbl_s special_tbl[] = { 279 { XX("der"), FILE_DER, FILE_FMT_STR }, 280 { XX("name"), FILE_NAME, FILE_FMT_STR }, 281 { XX("use"), FILE_USE, FILE_FMT_STR }, 282 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 283 }; 284 # undef XX 285 # undef XX_NULL 286 287 private int 288 get_type(const struct type_tbl_s *tbl, const char *l, const char **t) 289 { 290 const struct type_tbl_s *p; 291 292 for (p = tbl; p->len; p++) { 293 if (strncmp(l, p->name, p->len) == 0) { 294 if (t) 295 *t = l + p->len; 296 break; 297 } 298 } 299 return p->type; 300 } 301 302 private off_t 303 maxoff_t(void) { 304 if (/*CONSTCOND*/sizeof(off_t) == sizeof(int)) 305 return CAST(off_t, INT_MAX); 306 if (/*CONSTCOND*/sizeof(off_t) == sizeof(long)) 307 return CAST(off_t, LONG_MAX); 308 return 0x7fffffff; 309 } 310 311 private int 312 get_standard_integer_type(const char *l, const char **t) 313 { 314 int type; 315 316 if (isalpha(CAST(unsigned char, l[1]))) { 317 switch (l[1]) { 318 case 'C': 319 /* "dC" and "uC" */ 320 type = FILE_BYTE; 321 break; 322 case 'S': 323 /* "dS" and "uS" */ 324 type = FILE_SHORT; 325 break; 326 case 'I': 327 case 'L': 328 /* 329 * "dI", "dL", "uI", and "uL". 330 * 331 * XXX - the actual Single UNIX Specification says 332 * that "L" means "long", as in the C data type, 333 * but we treat it as meaning "4-byte integer". 334 * Given that the OS X version of file 5.04 did 335 * the same, I guess that passes the actual SUS 336 * validation suite; having "dL" be dependent on 337 * how big a "long" is on the machine running 338 * "file" is silly. 339 */ 340 type = FILE_LONG; 341 break; 342 case 'Q': 343 /* "dQ" and "uQ" */ 344 type = FILE_QUAD; 345 break; 346 default: 347 /* "d{anything else}", "u{anything else}" */ 348 return FILE_INVALID; 349 } 350 l += 2; 351 } else if (isdigit(CAST(unsigned char, l[1]))) { 352 /* 353 * "d{num}" and "u{num}"; we only support {num} values 354 * of 1, 2, 4, and 8 - the Single UNIX Specification 355 * doesn't say anything about whether arbitrary 356 * values should be supported, but both the Solaris 10 357 * and OS X Mountain Lion versions of file passed the 358 * Single UNIX Specification validation suite, and 359 * neither of them support values bigger than 8 or 360 * non-power-of-2 values. 361 */ 362 if (isdigit(CAST(unsigned char, l[2]))) { 363 /* Multi-digit, so > 9 */ 364 return FILE_INVALID; 365 } 366 switch (l[1]) { 367 case '1': 368 type = FILE_BYTE; 369 break; 370 case '2': 371 type = FILE_SHORT; 372 break; 373 case '4': 374 type = FILE_LONG; 375 break; 376 case '8': 377 type = FILE_QUAD; 378 break; 379 default: 380 /* XXX - what about 3, 5, 6, or 7? */ 381 return FILE_INVALID; 382 } 383 l += 2; 384 } else { 385 /* 386 * "d" or "u" by itself. 387 */ 388 type = FILE_LONG; 389 ++l; 390 } 391 if (t) 392 *t = l; 393 return type; 394 } 395 396 private void 397 init_file_tables(void) 398 { 399 static int done = 0; 400 const struct type_tbl_s *p; 401 402 if (done) 403 return; 404 done++; 405 406 for (p = type_tbl; p->len; p++) { 407 assert(p->type < FILE_NAMES_SIZE); 408 file_names[p->type] = p->name; 409 file_formats[p->type] = p->format; 410 } 411 assert(p - type_tbl == FILE_NAMES_SIZE); 412 } 413 414 private int 415 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) 416 { 417 struct mlist *ml; 418 419 mlp->map = NULL; 420 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 421 return -1; 422 423 ml->map = idx == 0 ? map : NULL; 424 ml->magic = map->magic[idx]; 425 ml->nmagic = map->nmagic[idx]; 426 427 mlp->prev->next = ml; 428 ml->prev = mlp->prev; 429 ml->next = mlp; 430 mlp->prev = ml; 431 return 0; 432 } 433 434 /* 435 * Handle one file or directory. 436 */ 437 private int 438 apprentice_1(struct magic_set *ms, const char *fn, int action) 439 { 440 struct magic_map *map; 441 #ifndef COMPILE_ONLY 442 struct mlist *ml; 443 size_t i; 444 #endif 445 446 if (magicsize != FILE_MAGICSIZE) { 447 file_error(ms, 0, "magic element size %lu != %lu", 448 CAST(unsigned long, sizeof(*map->magic[0])), 449 CAST(unsigned long, FILE_MAGICSIZE)); 450 return -1; 451 } 452 453 if (action == FILE_COMPILE) { 454 map = apprentice_load(ms, fn, action); 455 if (map == NULL) 456 return -1; 457 return apprentice_compile(ms, map, fn); 458 } 459 460 #ifndef COMPILE_ONLY 461 map = apprentice_map(ms, fn); 462 if (map == NULL) { 463 if (ms->flags & MAGIC_CHECK) 464 file_magwarn(ms, "using regular magic file `%s'", fn); 465 map = apprentice_load(ms, fn, action); 466 if (map == NULL) 467 return -1; 468 } 469 470 for (i = 0; i < MAGIC_SETS; i++) { 471 if (add_mlist(ms->mlist[i], map, i) == -1) { 472 /* failed to add to any list, free explicitly */ 473 if (i == 0) 474 apprentice_unmap(map); 475 else 476 mlist_free_all(ms); 477 file_oomem(ms, sizeof(*ml)); 478 return -1; 479 } 480 } 481 482 if (action == FILE_LIST) { 483 for (i = 0; i < MAGIC_SETS; i++) { 484 printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n", 485 i); 486 apprentice_list(ms->mlist[i], BINTEST); 487 printf("Text patterns:\n"); 488 apprentice_list(ms->mlist[i], TEXTTEST); 489 } 490 } 491 return 0; 492 #else 493 return 0; 494 #endif /* COMPILE_ONLY */ 495 } 496 497 protected void 498 file_ms_free(struct magic_set *ms) 499 { 500 size_t i; 501 if (ms == NULL) 502 return; 503 for (i = 0; i < MAGIC_SETS; i++) 504 mlist_free(ms->mlist[i]); 505 free(ms->o.pbuf); 506 free(ms->o.buf); 507 free(ms->c.li); 508 free(ms); 509 } 510 511 protected struct magic_set * 512 file_ms_alloc(int flags) 513 { 514 struct magic_set *ms; 515 size_t i, len; 516 517 if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u), 518 sizeof(struct magic_set)))) == NULL) 519 return NULL; 520 521 if (magic_setflags(ms, flags) == -1) { 522 errno = EINVAL; 523 goto free; 524 } 525 526 ms->o.buf = ms->o.pbuf = NULL; 527 ms->o.blen = 0; 528 len = (ms->c.len = 10) * sizeof(*ms->c.li); 529 530 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 531 goto free; 532 533 ms->event_flags = 0; 534 ms->error = -1; 535 for (i = 0; i < MAGIC_SETS; i++) 536 ms->mlist[i] = NULL; 537 ms->file = "unknown"; 538 ms->line = 0; 539 ms->indir_max = FILE_INDIR_MAX; 540 ms->name_max = FILE_NAME_MAX; 541 ms->elf_shnum_max = FILE_ELF_SHNUM_MAX; 542 ms->elf_phnum_max = FILE_ELF_PHNUM_MAX; 543 ms->elf_notes_max = FILE_ELF_NOTES_MAX; 544 ms->regex_max = FILE_REGEX_MAX; 545 ms->bytes_max = FILE_BYTES_MAX; 546 return ms; 547 free: 548 free(ms); 549 return NULL; 550 } 551 552 private void 553 apprentice_unmap(struct magic_map *map) 554 { 555 size_t i; 556 if (map == NULL) 557 return; 558 559 switch (map->type) { 560 case MAP_TYPE_USER: 561 break; 562 case MAP_TYPE_MALLOC: 563 for (i = 0; i < MAGIC_SETS; i++) { 564 void *b = map->magic[i]; 565 void *p = map->p; 566 if (CAST(char *, b) >= CAST(char *, p) && 567 CAST(char *, b) <= CAST(char *, p) + map->len) 568 continue; 569 free(map->magic[i]); 570 } 571 free(map->p); 572 break; 573 #ifdef QUICK 574 case MAP_TYPE_MMAP: 575 if (map->p && map->p != MAP_FAILED) 576 (void)munmap(map->p, map->len); 577 break; 578 #endif 579 default: 580 abort(); 581 } 582 free(map); 583 } 584 585 private struct mlist * 586 mlist_alloc(void) 587 { 588 struct mlist *mlist; 589 if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) { 590 return NULL; 591 } 592 mlist->next = mlist->prev = mlist; 593 return mlist; 594 } 595 596 private void 597 mlist_free_all(struct magic_set *ms) 598 { 599 size_t i; 600 601 for (i = 0; i < MAGIC_SETS; i++) { 602 mlist_free(ms->mlist[i]); 603 ms->mlist[i] = NULL; 604 } 605 } 606 607 private void 608 mlist_free_one(struct mlist *ml) 609 { 610 if (ml->map) 611 apprentice_unmap(CAST(struct magic_map *, ml->map)); 612 free(ml); 613 } 614 615 private void 616 mlist_free(struct mlist *mlist) 617 { 618 struct mlist *ml, *next; 619 620 if (mlist == NULL) 621 return; 622 623 for (ml = mlist->next; ml != mlist;) { 624 next = ml->next; 625 mlist_free_one(ml); 626 ml = next; 627 } 628 mlist_free_one(mlist); 629 } 630 631 #ifndef COMPILE_ONLY 632 /* void **bufs: an array of compiled magic files */ 633 protected int 634 buffer_apprentice(struct magic_set *ms, struct magic **bufs, 635 size_t *sizes, size_t nbufs) 636 { 637 size_t i, j; 638 struct mlist *ml; 639 struct magic_map *map; 640 641 if (nbufs == 0) 642 return -1; 643 644 (void)file_reset(ms, 0); 645 646 init_file_tables(); 647 648 for (i = 0; i < MAGIC_SETS; i++) { 649 mlist_free(ms->mlist[i]); 650 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 651 file_oomem(ms, sizeof(*ms->mlist[i])); 652 goto fail; 653 } 654 } 655 656 for (i = 0; i < nbufs; i++) { 657 map = apprentice_buf(ms, bufs[i], sizes[i]); 658 if (map == NULL) 659 goto fail; 660 661 for (j = 0; j < MAGIC_SETS; j++) { 662 if (add_mlist(ms->mlist[j], map, j) == -1) { 663 file_oomem(ms, sizeof(*ml)); 664 goto fail; 665 } 666 } 667 } 668 669 return 0; 670 fail: 671 mlist_free_all(ms); 672 return -1; 673 } 674 #endif 675 676 /* const char *fn: list of magic files and directories */ 677 protected int 678 file_apprentice(struct magic_set *ms, const char *fn, int action) 679 { 680 char *p, *mfn; 681 int fileerr, errs = -1; 682 size_t i, j; 683 684 (void)file_reset(ms, 0); 685 686 if ((fn = magic_getpath(fn, action)) == NULL) 687 return -1; 688 689 init_file_tables(); 690 691 if ((mfn = strdup(fn)) == NULL) { 692 file_oomem(ms, strlen(fn)); 693 return -1; 694 } 695 696 for (i = 0; i < MAGIC_SETS; i++) { 697 mlist_free(ms->mlist[i]); 698 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 699 file_oomem(ms, sizeof(*ms->mlist[i])); 700 for (j = 0; j < i; j++) { 701 mlist_free(ms->mlist[j]); 702 ms->mlist[j] = NULL; 703 } 704 free(mfn); 705 return -1; 706 } 707 } 708 fn = mfn; 709 710 while (fn) { 711 p = strchr(fn, PATHSEP); 712 if (p) 713 *p++ = '\0'; 714 if (*fn == '\0') 715 break; 716 fileerr = apprentice_1(ms, fn, action); 717 errs = MAX(errs, fileerr); 718 fn = p; 719 } 720 721 free(mfn); 722 723 if (errs == -1) { 724 for (i = 0; i < MAGIC_SETS; i++) { 725 mlist_free(ms->mlist[i]); 726 ms->mlist[i] = NULL; 727 } 728 file_error(ms, 0, "could not find any valid magic files!"); 729 return -1; 730 } 731 732 #if 0 733 /* 734 * Always leave the database loaded 735 */ 736 if (action == FILE_LOAD) 737 return 0; 738 739 for (i = 0; i < MAGIC_SETS; i++) { 740 mlist_free(ms->mlist[i]); 741 ms->mlist[i] = NULL; 742 } 743 #endif 744 745 switch (action) { 746 case FILE_LOAD: 747 case FILE_COMPILE: 748 case FILE_CHECK: 749 case FILE_LIST: 750 return 0; 751 default: 752 file_error(ms, 0, "Invalid action %d", action); 753 return -1; 754 } 755 } 756 757 /* 758 * Compute the real length of a magic expression, for the purposes 759 * of determining how "strong" a magic expression is (approximating 760 * how specific its matches are): 761 * - magic characters count 0 unless escaped. 762 * - [] expressions count 1 763 * - {} expressions count 0 764 * - regular characters or escaped magic characters count 1 765 * - 0 length expressions count as one 766 */ 767 private size_t 768 nonmagic(const char *str) 769 { 770 const char *p; 771 size_t rv = 0; 772 773 for (p = str; *p; p++) 774 switch (*p) { 775 case '\\': /* Escaped anything counts 1 */ 776 if (!*++p) 777 p--; 778 rv++; 779 continue; 780 case '?': /* Magic characters count 0 */ 781 case '*': 782 case '.': 783 case '+': 784 case '^': 785 case '$': 786 continue; 787 case '[': /* Bracketed expressions count 1 the ']' */ 788 while (*p && *p != ']') 789 p++; 790 p--; 791 continue; 792 case '{': /* Braced expressions count 0 */ 793 while (*p && *p != '}') 794 p++; 795 if (!*p) 796 p--; 797 continue; 798 default: /* Anything else counts 1 */ 799 rv++; 800 continue; 801 } 802 803 return rv == 0 ? 1 : rv; /* Return at least 1 */ 804 } 805 806 807 private size_t 808 typesize(int type) 809 { 810 switch (type) { 811 case FILE_BYTE: 812 return 1; 813 814 case FILE_SHORT: 815 case FILE_LESHORT: 816 case FILE_BESHORT: 817 return 2; 818 819 case FILE_LONG: 820 case FILE_LELONG: 821 case FILE_BELONG: 822 case FILE_MELONG: 823 return 4; 824 825 case FILE_DATE: 826 case FILE_LEDATE: 827 case FILE_BEDATE: 828 case FILE_MEDATE: 829 case FILE_LDATE: 830 case FILE_LELDATE: 831 case FILE_BELDATE: 832 case FILE_MELDATE: 833 case FILE_FLOAT: 834 case FILE_BEFLOAT: 835 case FILE_LEFLOAT: 836 return 4; 837 838 case FILE_QUAD: 839 case FILE_BEQUAD: 840 case FILE_LEQUAD: 841 case FILE_QDATE: 842 case FILE_LEQDATE: 843 case FILE_BEQDATE: 844 case FILE_QLDATE: 845 case FILE_LEQLDATE: 846 case FILE_BEQLDATE: 847 case FILE_QWDATE: 848 case FILE_LEQWDATE: 849 case FILE_BEQWDATE: 850 case FILE_DOUBLE: 851 case FILE_BEDOUBLE: 852 case FILE_LEDOUBLE: 853 case FILE_OFFSET: 854 return 8; 855 856 case FILE_GUID: 857 return 16; 858 859 default: 860 return FILE_BADSIZE; 861 } 862 } 863 864 /* 865 * Get weight of this magic entry, for sorting purposes. 866 */ 867 private size_t 868 apprentice_magic_strength(const struct magic *m) 869 { 870 #define MULT 10U 871 size_t ts, v; 872 ssize_t val = 2 * MULT; /* baseline strength */ 873 874 switch (m->type) { 875 case FILE_DEFAULT: /* make sure this sorts last */ 876 if (m->factor_op != FILE_FACTOR_OP_NONE) 877 abort(); 878 return 0; 879 880 case FILE_BYTE: 881 case FILE_SHORT: 882 case FILE_LESHORT: 883 case FILE_BESHORT: 884 case FILE_LONG: 885 case FILE_LELONG: 886 case FILE_BELONG: 887 case FILE_MELONG: 888 case FILE_DATE: 889 case FILE_LEDATE: 890 case FILE_BEDATE: 891 case FILE_MEDATE: 892 case FILE_LDATE: 893 case FILE_LELDATE: 894 case FILE_BELDATE: 895 case FILE_MELDATE: 896 case FILE_FLOAT: 897 case FILE_BEFLOAT: 898 case FILE_LEFLOAT: 899 case FILE_QUAD: 900 case FILE_BEQUAD: 901 case FILE_LEQUAD: 902 case FILE_QDATE: 903 case FILE_LEQDATE: 904 case FILE_BEQDATE: 905 case FILE_QLDATE: 906 case FILE_LEQLDATE: 907 case FILE_BEQLDATE: 908 case FILE_QWDATE: 909 case FILE_LEQWDATE: 910 case FILE_BEQWDATE: 911 case FILE_DOUBLE: 912 case FILE_BEDOUBLE: 913 case FILE_LEDOUBLE: 914 case FILE_GUID: 915 case FILE_OFFSET: 916 ts = typesize(m->type); 917 if (ts == FILE_BADSIZE) 918 abort(); 919 val += ts * MULT; 920 break; 921 922 case FILE_PSTRING: 923 case FILE_STRING: 924 val += m->vallen * MULT; 925 break; 926 927 case FILE_BESTRING16: 928 case FILE_LESTRING16: 929 val += m->vallen * MULT / 2; 930 break; 931 932 case FILE_SEARCH: 933 if (m->vallen == 0) 934 break; 935 val += m->vallen * MAX(MULT / m->vallen, 1); 936 break; 937 938 case FILE_REGEX: 939 v = nonmagic(m->value.s); 940 val += v * MAX(MULT / v, 1); 941 break; 942 943 case FILE_INDIRECT: 944 case FILE_NAME: 945 case FILE_USE: 946 break; 947 948 case FILE_DER: 949 val += MULT; 950 break; 951 952 default: 953 (void)fprintf(stderr, "Bad type %d\n", m->type); 954 abort(); 955 } 956 957 switch (m->reln) { 958 case 'x': /* matches anything penalize */ 959 case '!': /* matches almost anything penalize */ 960 val = 0; 961 break; 962 963 case '=': /* Exact match, prefer */ 964 val += MULT; 965 break; 966 967 case '>': 968 case '<': /* comparison match reduce strength */ 969 val -= 2 * MULT; 970 break; 971 972 case '^': 973 case '&': /* masking bits, we could count them too */ 974 val -= MULT; 975 break; 976 977 default: 978 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 979 abort(); 980 } 981 982 switch (m->factor_op) { 983 case FILE_FACTOR_OP_NONE: 984 break; 985 case FILE_FACTOR_OP_PLUS: 986 val += m->factor; 987 break; 988 case FILE_FACTOR_OP_MINUS: 989 val -= m->factor; 990 break; 991 case FILE_FACTOR_OP_TIMES: 992 val *= m->factor; 993 break; 994 case FILE_FACTOR_OP_DIV: 995 val /= m->factor; 996 break; 997 default: 998 abort(); 999 } 1000 1001 if (val <= 0) /* ensure we only return 0 for FILE_DEFAULT */ 1002 val = 1; 1003 1004 /* 1005 * Magic entries with no description get a bonus because they depend 1006 * on subsequent magic entries to print something. 1007 */ 1008 if (m->desc[0] == '\0') 1009 val++; 1010 return val; 1011 } 1012 1013 /* 1014 * Sort callback for sorting entries by "strength" (basically length) 1015 */ 1016 private int 1017 apprentice_sort(const void *a, const void *b) 1018 { 1019 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 1020 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 1021 size_t sa = apprentice_magic_strength(ma->mp); 1022 size_t sb = apprentice_magic_strength(mb->mp); 1023 if (sa == sb) 1024 return 0; 1025 else if (sa > sb) 1026 return -1; 1027 else 1028 return 1; 1029 } 1030 1031 /* 1032 * Shows sorted patterns list in the order which is used for the matching 1033 */ 1034 private void 1035 apprentice_list(struct mlist *mlist, int mode) 1036 { 1037 uint32_t magindex = 0; 1038 struct mlist *ml; 1039 for (ml = mlist->next; ml != mlist; ml = ml->next) { 1040 for (magindex = 0; magindex < ml->nmagic; magindex++) { 1041 struct magic *m = &ml->magic[magindex]; 1042 if ((m->flag & mode) != mode) { 1043 /* Skip sub-tests */ 1044 while (magindex + 1 < ml->nmagic && 1045 ml->magic[magindex + 1].cont_level != 0) 1046 ++magindex; 1047 continue; /* Skip to next top-level test*/ 1048 } 1049 1050 /* 1051 * Try to iterate over the tree until we find item with 1052 * description/mimetype. 1053 */ 1054 while (magindex + 1 < ml->nmagic && 1055 ml->magic[magindex + 1].cont_level != 0 && 1056 *ml->magic[magindex].desc == '\0' && 1057 *ml->magic[magindex].mimetype == '\0') 1058 magindex++; 1059 1060 printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n", 1061 apprentice_magic_strength(m), 1062 ml->magic[magindex].lineno, 1063 ml->magic[magindex].desc, 1064 ml->magic[magindex].mimetype); 1065 } 1066 } 1067 } 1068 1069 private void 1070 set_test_type(struct magic *mstart, struct magic *m) 1071 { 1072 switch (m->type) { 1073 case FILE_BYTE: 1074 case FILE_SHORT: 1075 case FILE_LONG: 1076 case FILE_DATE: 1077 case FILE_BESHORT: 1078 case FILE_BELONG: 1079 case FILE_BEDATE: 1080 case FILE_LESHORT: 1081 case FILE_LELONG: 1082 case FILE_LEDATE: 1083 case FILE_LDATE: 1084 case FILE_BELDATE: 1085 case FILE_LELDATE: 1086 case FILE_MEDATE: 1087 case FILE_MELDATE: 1088 case FILE_MELONG: 1089 case FILE_QUAD: 1090 case FILE_LEQUAD: 1091 case FILE_BEQUAD: 1092 case FILE_QDATE: 1093 case FILE_LEQDATE: 1094 case FILE_BEQDATE: 1095 case FILE_QLDATE: 1096 case FILE_LEQLDATE: 1097 case FILE_BEQLDATE: 1098 case FILE_QWDATE: 1099 case FILE_LEQWDATE: 1100 case FILE_BEQWDATE: 1101 case FILE_FLOAT: 1102 case FILE_BEFLOAT: 1103 case FILE_LEFLOAT: 1104 case FILE_DOUBLE: 1105 case FILE_BEDOUBLE: 1106 case FILE_LEDOUBLE: 1107 case FILE_DER: 1108 case FILE_GUID: 1109 case FILE_OFFSET: 1110 mstart->flag |= BINTEST; 1111 break; 1112 case FILE_STRING: 1113 case FILE_PSTRING: 1114 case FILE_BESTRING16: 1115 case FILE_LESTRING16: 1116 /* Allow text overrides */ 1117 if (mstart->str_flags & STRING_TEXTTEST) 1118 mstart->flag |= TEXTTEST; 1119 else 1120 mstart->flag |= BINTEST; 1121 break; 1122 case FILE_REGEX: 1123 case FILE_SEARCH: 1124 /* Check for override */ 1125 if (mstart->str_flags & STRING_BINTEST) 1126 mstart->flag |= BINTEST; 1127 if (mstart->str_flags & STRING_TEXTTEST) 1128 mstart->flag |= TEXTTEST; 1129 1130 if (mstart->flag & (TEXTTEST|BINTEST)) 1131 break; 1132 1133 /* binary test if pattern is not text */ 1134 if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL, 1135 NULL) <= 0) 1136 mstart->flag |= BINTEST; 1137 else 1138 mstart->flag |= TEXTTEST; 1139 break; 1140 case FILE_DEFAULT: 1141 /* can't deduce anything; we shouldn't see this at the 1142 top level anyway */ 1143 break; 1144 case FILE_INVALID: 1145 default: 1146 /* invalid search type, but no need to complain here */ 1147 break; 1148 } 1149 } 1150 1151 private int 1152 addentry(struct magic_set *ms, struct magic_entry *me, 1153 struct magic_entry_set *mset) 1154 { 1155 size_t i = me->mp->type == FILE_NAME ? 1 : 0; 1156 if (mset[i].count == mset[i].max) { 1157 struct magic_entry *mp; 1158 1159 mset[i].max += ALLOC_INCR; 1160 if ((mp = CAST(struct magic_entry *, 1161 realloc(mset[i].me, sizeof(*mp) * mset[i].max))) == 1162 NULL) { 1163 file_oomem(ms, sizeof(*mp) * mset[i].max); 1164 return -1; 1165 } 1166 (void)memset(&mp[mset[i].count], 0, sizeof(*mp) * 1167 ALLOC_INCR); 1168 mset[i].me = mp; 1169 } 1170 mset[i].me[mset[i].count++] = *me; 1171 memset(me, 0, sizeof(*me)); 1172 return 0; 1173 } 1174 1175 /* 1176 * Load and parse one file. 1177 */ 1178 private void 1179 load_1(struct magic_set *ms, int action, const char *fn, int *errs, 1180 struct magic_entry_set *mset) 1181 { 1182 size_t lineno = 0, llen = 0; 1183 char *line = NULL; 1184 ssize_t len; 1185 struct magic_entry me; 1186 1187 FILE *f = fopen(ms->file = fn, "r"); 1188 if (f == NULL) { 1189 if (errno != ENOENT) 1190 file_error(ms, errno, "cannot read magic file `%s'", 1191 fn); 1192 (*errs)++; 1193 return; 1194 } 1195 1196 memset(&me, 0, sizeof(me)); 1197 /* read and parse this file */ 1198 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 1199 ms->line++) { 1200 if (len == 0) /* null line, garbage, etc */ 1201 continue; 1202 if (line[len - 1] == '\n') { 1203 lineno++; 1204 line[len - 1] = '\0'; /* delete newline */ 1205 } 1206 switch (line[0]) { 1207 case '\0': /* empty, do not parse */ 1208 case '#': /* comment, do not parse */ 1209 continue; 1210 case '!': 1211 if (line[1] == ':') { 1212 size_t i; 1213 1214 for (i = 0; bang[i].name != NULL; i++) { 1215 if (CAST(size_t, len - 2) > bang[i].len && 1216 memcmp(bang[i].name, line + 2, 1217 bang[i].len) == 0) 1218 break; 1219 } 1220 if (bang[i].name == NULL) { 1221 file_error(ms, 0, 1222 "Unknown !: entry `%s'", line); 1223 (*errs)++; 1224 continue; 1225 } 1226 if (me.mp == NULL) { 1227 file_error(ms, 0, 1228 "No current entry for :!%s type", 1229 bang[i].name); 1230 (*errs)++; 1231 continue; 1232 } 1233 if ((*bang[i].fun)(ms, &me, 1234 line + bang[i].len + 2, 1235 len - bang[i].len - 2) != 0) { 1236 (*errs)++; 1237 continue; 1238 } 1239 continue; 1240 } 1241 /*FALLTHROUGH*/ 1242 default: 1243 again: 1244 switch (parse(ms, &me, line, lineno, action)) { 1245 case 0: 1246 continue; 1247 case 1: 1248 (void)addentry(ms, &me, mset); 1249 goto again; 1250 default: 1251 (*errs)++; 1252 break; 1253 } 1254 } 1255 } 1256 if (me.mp) 1257 (void)addentry(ms, &me, mset); 1258 free(line); 1259 (void)fclose(f); 1260 } 1261 1262 /* 1263 * parse a file or directory of files 1264 * const char *fn: name of magic file or directory 1265 */ 1266 private int 1267 cmpstrp(const void *p1, const void *p2) 1268 { 1269 return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2)); 1270 } 1271 1272 1273 private uint32_t 1274 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1275 uint32_t starttest) 1276 { 1277 static const char text[] = "text"; 1278 static const char binary[] = "binary"; 1279 static const size_t len = sizeof(text); 1280 1281 uint32_t i = starttest; 1282 1283 do { 1284 set_test_type(me[starttest].mp, me[i].mp); 1285 if ((ms->flags & MAGIC_DEBUG) == 0) 1286 continue; 1287 (void)fprintf(stderr, "%s%s%s: %s\n", 1288 me[i].mp->mimetype, 1289 me[i].mp->mimetype[0] == '\0' ? "" : "; ", 1290 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 1291 me[i].mp->flag & BINTEST ? binary : text); 1292 if (me[i].mp->flag & BINTEST) { 1293 char *p = strstr(me[i].mp->desc, text); 1294 if (p && (p == me[i].mp->desc || 1295 isspace(CAST(unsigned char, p[-1]))) && 1296 (p + len - me[i].mp->desc == MAXstring 1297 || (p[len] == '\0' || 1298 isspace(CAST(unsigned char, p[len]))))) 1299 (void)fprintf(stderr, "*** Possible " 1300 "binary test for text type\n"); 1301 } 1302 } while (++i < nme && me[i].mp->cont_level != 0); 1303 return i; 1304 } 1305 1306 private void 1307 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 1308 { 1309 uint32_t i; 1310 for (i = 0; i < nme; i++) { 1311 if (me[i].mp->cont_level == 0 && 1312 me[i].mp->type == FILE_DEFAULT) { 1313 while (++i < nme) 1314 if (me[i].mp->cont_level == 0) 1315 break; 1316 if (i != nme) { 1317 /* XXX - Ugh! */ 1318 ms->line = me[i].mp->lineno; 1319 file_magwarn(ms, 1320 "level 0 \"default\" did not sort last"); 1321 } 1322 return; 1323 } 1324 } 1325 } 1326 1327 private int 1328 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1329 struct magic **ma, uint32_t *nma) 1330 { 1331 uint32_t i, mentrycount = 0; 1332 size_t slen; 1333 1334 for (i = 0; i < nme; i++) 1335 mentrycount += me[i].cont_count; 1336 1337 slen = sizeof(**ma) * mentrycount; 1338 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 1339 file_oomem(ms, slen); 1340 return -1; 1341 } 1342 1343 mentrycount = 0; 1344 for (i = 0; i < nme; i++) { 1345 (void)memcpy(*ma + mentrycount, me[i].mp, 1346 me[i].cont_count * sizeof(**ma)); 1347 mentrycount += me[i].cont_count; 1348 } 1349 *nma = mentrycount; 1350 return 0; 1351 } 1352 1353 private void 1354 magic_entry_free(struct magic_entry *me, uint32_t nme) 1355 { 1356 uint32_t i; 1357 if (me == NULL) 1358 return; 1359 for (i = 0; i < nme; i++) 1360 free(me[i].mp); 1361 free(me); 1362 } 1363 1364 private struct magic_map * 1365 apprentice_load(struct magic_set *ms, const char *fn, int action) 1366 { 1367 int errs = 0; 1368 uint32_t i, j; 1369 size_t files = 0, maxfiles = 0; 1370 char **filearr = NULL, *mfn; 1371 struct stat st; 1372 struct magic_map *map; 1373 struct magic_entry_set mset[MAGIC_SETS]; 1374 DIR *dir; 1375 struct dirent *d; 1376 1377 memset(mset, 0, sizeof(mset)); 1378 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1379 1380 1381 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) 1382 { 1383 file_oomem(ms, sizeof(*map)); 1384 return NULL; 1385 } 1386 map->type = MAP_TYPE_MALLOC; 1387 1388 /* print silly verbose header for USG compat. */ 1389 if (action == FILE_CHECK) 1390 (void)fprintf(stderr, "%s\n", usg_hdr); 1391 1392 /* load directory or file */ 1393 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1394 dir = opendir(fn); 1395 if (!dir) { 1396 errs++; 1397 goto out; 1398 } 1399 while ((d = readdir(dir)) != NULL) { 1400 if (d->d_name[0] == '.') 1401 continue; 1402 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1403 file_oomem(ms, 1404 strlen(fn) + strlen(d->d_name) + 2); 1405 errs++; 1406 closedir(dir); 1407 goto out; 1408 } 1409 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1410 free(mfn); 1411 continue; 1412 } 1413 if (files >= maxfiles) { 1414 size_t mlen; 1415 char **nfilearr; 1416 maxfiles = (maxfiles + 1) * 2; 1417 mlen = maxfiles * sizeof(*filearr); 1418 if ((nfilearr = CAST(char **, 1419 realloc(filearr, mlen))) == NULL) { 1420 file_oomem(ms, mlen); 1421 free(mfn); 1422 closedir(dir); 1423 errs++; 1424 goto out; 1425 } 1426 filearr = nfilearr; 1427 } 1428 filearr[files++] = mfn; 1429 } 1430 closedir(dir); 1431 if (filearr) { 1432 qsort(filearr, files, sizeof(*filearr), cmpstrp); 1433 for (i = 0; i < files; i++) { 1434 load_1(ms, action, filearr[i], &errs, mset); 1435 free(filearr[i]); 1436 } 1437 free(filearr); 1438 filearr = NULL; 1439 } 1440 } else 1441 load_1(ms, action, fn, &errs, mset); 1442 if (errs) 1443 goto out; 1444 1445 for (j = 0; j < MAGIC_SETS; j++) { 1446 /* Set types of tests */ 1447 for (i = 0; i < mset[j].count; ) { 1448 if (mset[j].me[i].mp->cont_level != 0) { 1449 i++; 1450 continue; 1451 } 1452 i = set_text_binary(ms, mset[j].me, mset[j].count, i); 1453 } 1454 if (mset[j].me) 1455 qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me), 1456 apprentice_sort); 1457 1458 /* 1459 * Make sure that any level 0 "default" line is last 1460 * (if one exists). 1461 */ 1462 set_last_default(ms, mset[j].me, mset[j].count); 1463 1464 /* coalesce per file arrays into a single one */ 1465 if (coalesce_entries(ms, mset[j].me, mset[j].count, 1466 &map->magic[j], &map->nmagic[j]) == -1) { 1467 errs++; 1468 goto out; 1469 } 1470 } 1471 1472 out: 1473 free(filearr); 1474 for (j = 0; j < MAGIC_SETS; j++) 1475 magic_entry_free(mset[j].me, mset[j].count); 1476 1477 if (errs) { 1478 apprentice_unmap(map); 1479 return NULL; 1480 } 1481 return map; 1482 } 1483 1484 /* 1485 * extend the sign bit if the comparison is to be signed 1486 */ 1487 protected uint64_t 1488 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1489 { 1490 if (!(m->flag & UNSIGNED)) { 1491 switch(m->type) { 1492 /* 1493 * Do not remove the casts below. They are 1494 * vital. When later compared with the data, 1495 * the sign extension must have happened. 1496 */ 1497 case FILE_BYTE: 1498 v = CAST(signed char, v); 1499 break; 1500 case FILE_SHORT: 1501 case FILE_BESHORT: 1502 case FILE_LESHORT: 1503 v = CAST(short, v); 1504 break; 1505 case FILE_DATE: 1506 case FILE_BEDATE: 1507 case FILE_LEDATE: 1508 case FILE_MEDATE: 1509 case FILE_LDATE: 1510 case FILE_BELDATE: 1511 case FILE_LELDATE: 1512 case FILE_MELDATE: 1513 case FILE_LONG: 1514 case FILE_BELONG: 1515 case FILE_LELONG: 1516 case FILE_MELONG: 1517 case FILE_FLOAT: 1518 case FILE_BEFLOAT: 1519 case FILE_LEFLOAT: 1520 v = CAST(int32_t, v); 1521 break; 1522 case FILE_QUAD: 1523 case FILE_BEQUAD: 1524 case FILE_LEQUAD: 1525 case FILE_QDATE: 1526 case FILE_QLDATE: 1527 case FILE_QWDATE: 1528 case FILE_BEQDATE: 1529 case FILE_BEQLDATE: 1530 case FILE_BEQWDATE: 1531 case FILE_LEQDATE: 1532 case FILE_LEQLDATE: 1533 case FILE_LEQWDATE: 1534 case FILE_DOUBLE: 1535 case FILE_BEDOUBLE: 1536 case FILE_LEDOUBLE: 1537 case FILE_OFFSET: 1538 v = CAST(int64_t, v); 1539 break; 1540 case FILE_STRING: 1541 case FILE_PSTRING: 1542 case FILE_BESTRING16: 1543 case FILE_LESTRING16: 1544 case FILE_REGEX: 1545 case FILE_SEARCH: 1546 case FILE_DEFAULT: 1547 case FILE_INDIRECT: 1548 case FILE_NAME: 1549 case FILE_USE: 1550 case FILE_CLEAR: 1551 case FILE_DER: 1552 case FILE_GUID: 1553 break; 1554 default: 1555 if (ms->flags & MAGIC_CHECK) 1556 file_magwarn(ms, "cannot happen: m->type=%d\n", 1557 m->type); 1558 return FILE_BADSIZE; 1559 } 1560 } 1561 return v; 1562 } 1563 1564 private int 1565 string_modifier_check(struct magic_set *ms, struct magic *m) 1566 { 1567 if ((ms->flags & MAGIC_CHECK) == 0) 1568 return 0; 1569 1570 if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) && 1571 (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) { 1572 file_magwarn(ms, 1573 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1574 return -1; 1575 } 1576 switch (m->type) { 1577 case FILE_BESTRING16: 1578 case FILE_LESTRING16: 1579 if (m->str_flags != 0) { 1580 file_magwarn(ms, 1581 "no modifiers allowed for 16-bit strings\n"); 1582 return -1; 1583 } 1584 break; 1585 case FILE_STRING: 1586 case FILE_PSTRING: 1587 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1588 file_magwarn(ms, 1589 "'/%c' only allowed on regex and search\n", 1590 CHAR_REGEX_OFFSET_START); 1591 return -1; 1592 } 1593 break; 1594 case FILE_SEARCH: 1595 if (m->str_range == 0) { 1596 file_magwarn(ms, 1597 "missing range; defaulting to %d\n", 1598 STRING_DEFAULT_RANGE); 1599 m->str_range = STRING_DEFAULT_RANGE; 1600 return -1; 1601 } 1602 break; 1603 case FILE_REGEX: 1604 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1605 file_magwarn(ms, "'/%c' not allowed on regex\n", 1606 CHAR_COMPACT_WHITESPACE); 1607 return -1; 1608 } 1609 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1610 file_magwarn(ms, "'/%c' not allowed on regex\n", 1611 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1612 return -1; 1613 } 1614 break; 1615 default: 1616 file_magwarn(ms, "coding error: m->type=%d\n", 1617 m->type); 1618 return -1; 1619 } 1620 return 0; 1621 } 1622 1623 private int 1624 get_op(char c) 1625 { 1626 switch (c) { 1627 case '&': 1628 return FILE_OPAND; 1629 case '|': 1630 return FILE_OPOR; 1631 case '^': 1632 return FILE_OPXOR; 1633 case '+': 1634 return FILE_OPADD; 1635 case '-': 1636 return FILE_OPMINUS; 1637 case '*': 1638 return FILE_OPMULTIPLY; 1639 case '/': 1640 return FILE_OPDIVIDE; 1641 case '%': 1642 return FILE_OPMODULO; 1643 default: 1644 return -1; 1645 } 1646 } 1647 1648 #ifdef ENABLE_CONDITIONALS 1649 private int 1650 get_cond(const char *l, const char **t) 1651 { 1652 static const struct cond_tbl_s { 1653 char name[8]; 1654 size_t len; 1655 int cond; 1656 } cond_tbl[] = { 1657 { "if", 2, COND_IF }, 1658 { "elif", 4, COND_ELIF }, 1659 { "else", 4, COND_ELSE }, 1660 { "", 0, COND_NONE }, 1661 }; 1662 const struct cond_tbl_s *p; 1663 1664 for (p = cond_tbl; p->len; p++) { 1665 if (strncmp(l, p->name, p->len) == 0 && 1666 isspace(CAST(unsigned char, l[p->len]))) { 1667 if (t) 1668 *t = l + p->len; 1669 break; 1670 } 1671 } 1672 return p->cond; 1673 } 1674 1675 private int 1676 check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1677 { 1678 int last_cond; 1679 last_cond = ms->c.li[cont_level].last_cond; 1680 1681 switch (cond) { 1682 case COND_IF: 1683 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1684 if (ms->flags & MAGIC_CHECK) 1685 file_magwarn(ms, "syntax error: `if'"); 1686 return -1; 1687 } 1688 last_cond = COND_IF; 1689 break; 1690 1691 case COND_ELIF: 1692 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1693 if (ms->flags & MAGIC_CHECK) 1694 file_magwarn(ms, "syntax error: `elif'"); 1695 return -1; 1696 } 1697 last_cond = COND_ELIF; 1698 break; 1699 1700 case COND_ELSE: 1701 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1702 if (ms->flags & MAGIC_CHECK) 1703 file_magwarn(ms, "syntax error: `else'"); 1704 return -1; 1705 } 1706 last_cond = COND_NONE; 1707 break; 1708 1709 case COND_NONE: 1710 last_cond = COND_NONE; 1711 break; 1712 } 1713 1714 ms->c.li[cont_level].last_cond = last_cond; 1715 return 0; 1716 } 1717 #endif /* ENABLE_CONDITIONALS */ 1718 1719 private int 1720 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp) 1721 { 1722 const char *l = *lp; 1723 1724 while (!isspace(CAST(unsigned char, *++l))) 1725 switch (*l) { 1726 case CHAR_INDIRECT_RELATIVE: 1727 m->str_flags |= INDIRECT_RELATIVE; 1728 break; 1729 default: 1730 if (ms->flags & MAGIC_CHECK) 1731 file_magwarn(ms, "indirect modifier `%c' " 1732 "invalid", *l); 1733 *lp = l; 1734 return -1; 1735 } 1736 *lp = l; 1737 return 0; 1738 } 1739 1740 private void 1741 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp, 1742 int op) 1743 { 1744 const char *l = *lp; 1745 char *t; 1746 uint64_t val; 1747 1748 ++l; 1749 m->mask_op |= op; 1750 val = CAST(uint64_t, strtoull(l, &t, 0)); 1751 l = t; 1752 m->num_mask = file_signextend(ms, m, val); 1753 eatsize(&l); 1754 *lp = l; 1755 } 1756 1757 private int 1758 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp) 1759 { 1760 const char *l = *lp; 1761 char *t; 1762 int have_range = 0; 1763 1764 while (!isspace(CAST(unsigned char, *++l))) { 1765 switch (*l) { 1766 case '0': case '1': case '2': 1767 case '3': case '4': case '5': 1768 case '6': case '7': case '8': 1769 case '9': 1770 if (have_range && (ms->flags & MAGIC_CHECK)) 1771 file_magwarn(ms, "multiple ranges"); 1772 have_range = 1; 1773 m->str_range = CAST(uint32_t, strtoul(l, &t, 0)); 1774 if (m->str_range == 0) 1775 file_magwarn(ms, "zero range"); 1776 l = t - 1; 1777 break; 1778 case CHAR_COMPACT_WHITESPACE: 1779 m->str_flags |= STRING_COMPACT_WHITESPACE; 1780 break; 1781 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1782 m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE; 1783 break; 1784 case CHAR_IGNORE_LOWERCASE: 1785 m->str_flags |= STRING_IGNORE_LOWERCASE; 1786 break; 1787 case CHAR_IGNORE_UPPERCASE: 1788 m->str_flags |= STRING_IGNORE_UPPERCASE; 1789 break; 1790 case CHAR_REGEX_OFFSET_START: 1791 m->str_flags |= REGEX_OFFSET_START; 1792 break; 1793 case CHAR_BINTEST: 1794 m->str_flags |= STRING_BINTEST; 1795 break; 1796 case CHAR_TEXTTEST: 1797 m->str_flags |= STRING_TEXTTEST; 1798 break; 1799 case CHAR_TRIM: 1800 m->str_flags |= STRING_TRIM; 1801 break; 1802 case CHAR_PSTRING_1_LE: 1803 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a) 1804 if (m->type != FILE_PSTRING) 1805 goto bad; 1806 SET_LENGTH(PSTRING_1_LE); 1807 break; 1808 case CHAR_PSTRING_2_BE: 1809 if (m->type != FILE_PSTRING) 1810 goto bad; 1811 SET_LENGTH(PSTRING_2_BE); 1812 break; 1813 case CHAR_PSTRING_2_LE: 1814 if (m->type != FILE_PSTRING) 1815 goto bad; 1816 SET_LENGTH(PSTRING_2_LE); 1817 break; 1818 case CHAR_PSTRING_4_BE: 1819 if (m->type != FILE_PSTRING) 1820 goto bad; 1821 SET_LENGTH(PSTRING_4_BE); 1822 break; 1823 case CHAR_PSTRING_4_LE: 1824 switch (m->type) { 1825 case FILE_PSTRING: 1826 case FILE_REGEX: 1827 break; 1828 default: 1829 goto bad; 1830 } 1831 SET_LENGTH(PSTRING_4_LE); 1832 break; 1833 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1834 if (m->type != FILE_PSTRING) 1835 goto bad; 1836 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1837 break; 1838 default: 1839 bad: 1840 if (ms->flags & MAGIC_CHECK) 1841 file_magwarn(ms, "string modifier `%c' " 1842 "invalid", *l); 1843 goto out; 1844 } 1845 /* allow multiple '/' for readability */ 1846 if (l[1] == '/' && !isspace(CAST(unsigned char, l[2]))) 1847 l++; 1848 } 1849 if (string_modifier_check(ms, m) == -1) 1850 goto out; 1851 *lp = l; 1852 return 0; 1853 out: 1854 *lp = l; 1855 return -1; 1856 } 1857 1858 /* 1859 * parse one line from magic file, put into magic[index++] if valid 1860 */ 1861 private int 1862 parse(struct magic_set *ms, struct magic_entry *me, const char *line, 1863 size_t lineno, int action) 1864 { 1865 #ifdef ENABLE_CONDITIONALS 1866 static uint32_t last_cont_level = 0; 1867 #endif 1868 size_t i; 1869 struct magic *m; 1870 const char *l = line; 1871 char *t; 1872 int op; 1873 uint32_t cont_level; 1874 int32_t diff; 1875 1876 cont_level = 0; 1877 1878 /* 1879 * Parse the offset. 1880 */ 1881 while (*l == '>') { 1882 ++l; /* step over */ 1883 cont_level++; 1884 } 1885 #ifdef ENABLE_CONDITIONALS 1886 if (cont_level == 0 || cont_level > last_cont_level) 1887 if (file_check_mem(ms, cont_level) == -1) 1888 return -1; 1889 last_cont_level = cont_level; 1890 #endif 1891 if (cont_level != 0) { 1892 if (me->mp == NULL) { 1893 file_magerror(ms, "No current entry for continuation"); 1894 return -1; 1895 } 1896 if (me->cont_count == 0) { 1897 file_magerror(ms, "Continuations present with 0 count"); 1898 return -1; 1899 } 1900 m = &me->mp[me->cont_count - 1]; 1901 diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level); 1902 if (diff > 1) 1903 file_magwarn(ms, "New continuation level %u is more " 1904 "than one larger than current level %u", cont_level, 1905 m->cont_level); 1906 if (me->cont_count == me->max_count) { 1907 struct magic *nm; 1908 size_t cnt = me->max_count + ALLOC_CHUNK; 1909 if ((nm = CAST(struct magic *, realloc(me->mp, 1910 sizeof(*nm) * cnt))) == NULL) { 1911 file_oomem(ms, sizeof(*nm) * cnt); 1912 return -1; 1913 } 1914 me->mp = nm; 1915 me->max_count = CAST(uint32_t, cnt); 1916 } 1917 m = &me->mp[me->cont_count++]; 1918 (void)memset(m, 0, sizeof(*m)); 1919 m->cont_level = cont_level; 1920 } else { 1921 static const size_t len = sizeof(*m) * ALLOC_CHUNK; 1922 if (me->mp != NULL) 1923 return 1; 1924 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1925 file_oomem(ms, len); 1926 return -1; 1927 } 1928 me->mp = m; 1929 me->max_count = ALLOC_CHUNK; 1930 (void)memset(m, 0, sizeof(*m)); 1931 m->factor_op = FILE_FACTOR_OP_NONE; 1932 m->cont_level = 0; 1933 me->cont_count = 1; 1934 } 1935 m->lineno = CAST(uint32_t, lineno); 1936 1937 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1938 ++l; /* step over */ 1939 m->flag |= OFFADD; 1940 } 1941 if (*l == '(') { 1942 ++l; /* step over */ 1943 m->flag |= INDIR; 1944 if (m->flag & OFFADD) 1945 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1946 1947 if (*l == '&') { /* m->cont_level == 0 checked below */ 1948 ++l; /* step over */ 1949 m->flag |= OFFADD; 1950 } 1951 } 1952 /* Indirect offsets are not valid at level 0. */ 1953 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) { 1954 if (ms->flags & MAGIC_CHECK) 1955 file_magwarn(ms, "relative offset at level 0"); 1956 return -1; 1957 } 1958 1959 /* get offset, then skip over it */ 1960 if (*l == '-') { 1961 ++l; /* step over */ 1962 m->flag |= OFFNEGATIVE; 1963 } 1964 m->offset = CAST(int32_t, strtol(l, &t, 0)); 1965 if (l == t) { 1966 if (ms->flags & MAGIC_CHECK) 1967 file_magwarn(ms, "offset `%s' invalid", l); 1968 return -1; 1969 } 1970 1971 l = t; 1972 1973 if (m->flag & INDIR) { 1974 m->in_type = FILE_LONG; 1975 m->in_offset = 0; 1976 m->in_op = 0; 1977 /* 1978 * read [.,lbs][+-]nnnnn) 1979 */ 1980 if (*l == '.' || *l == ',') { 1981 if (*l == ',') 1982 m->in_op |= FILE_OPSIGNED; 1983 l++; 1984 switch (*l) { 1985 case 'l': 1986 m->in_type = FILE_LELONG; 1987 break; 1988 case 'L': 1989 m->in_type = FILE_BELONG; 1990 break; 1991 case 'm': 1992 m->in_type = FILE_MELONG; 1993 break; 1994 case 'h': 1995 case 's': 1996 m->in_type = FILE_LESHORT; 1997 break; 1998 case 'H': 1999 case 'S': 2000 m->in_type = FILE_BESHORT; 2001 break; 2002 case 'c': 2003 case 'b': 2004 case 'C': 2005 case 'B': 2006 m->in_type = FILE_BYTE; 2007 break; 2008 case 'e': 2009 case 'f': 2010 case 'g': 2011 m->in_type = FILE_LEDOUBLE; 2012 break; 2013 case 'E': 2014 case 'F': 2015 case 'G': 2016 m->in_type = FILE_BEDOUBLE; 2017 break; 2018 case 'i': 2019 m->in_type = FILE_LEID3; 2020 break; 2021 case 'I': 2022 m->in_type = FILE_BEID3; 2023 break; 2024 case 'q': 2025 m->in_type = FILE_LEQUAD; 2026 break; 2027 case 'Q': 2028 m->in_type = FILE_BEQUAD; 2029 break; 2030 default: 2031 if (ms->flags & MAGIC_CHECK) 2032 file_magwarn(ms, 2033 "indirect offset type `%c' invalid", 2034 *l); 2035 return -1; 2036 } 2037 l++; 2038 } 2039 2040 if (*l == '~') { 2041 m->in_op |= FILE_OPINVERSE; 2042 l++; 2043 } 2044 if ((op = get_op(*l)) != -1) { 2045 m->in_op |= op; 2046 l++; 2047 } 2048 if (*l == '(') { 2049 m->in_op |= FILE_OPINDIRECT; 2050 l++; 2051 } 2052 if (isdigit(CAST(unsigned char, *l)) || *l == '-') { 2053 m->in_offset = CAST(int32_t, strtol(l, &t, 0)); 2054 if (l == t) { 2055 if (ms->flags & MAGIC_CHECK) 2056 file_magwarn(ms, 2057 "in_offset `%s' invalid", l); 2058 return -1; 2059 } 2060 l = t; 2061 } 2062 if (*l++ != ')' || 2063 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) { 2064 if (ms->flags & MAGIC_CHECK) 2065 file_magwarn(ms, 2066 "missing ')' in indirect offset"); 2067 return -1; 2068 } 2069 } 2070 EATAB; 2071 2072 #ifdef ENABLE_CONDITIONALS 2073 m->cond = get_cond(l, &l); 2074 if (check_cond(ms, m->cond, cont_level) == -1) 2075 return -1; 2076 2077 EATAB; 2078 #endif 2079 2080 /* 2081 * Parse the type. 2082 */ 2083 if (*l == 'u') { 2084 /* 2085 * Try it as a keyword type prefixed by "u"; match what 2086 * follows the "u". If that fails, try it as an SUS 2087 * integer type. 2088 */ 2089 m->type = get_type(type_tbl, l + 1, &l); 2090 if (m->type == FILE_INVALID) { 2091 /* 2092 * Not a keyword type; parse it as an SUS type, 2093 * 'u' possibly followed by a number or C/S/L. 2094 */ 2095 m->type = get_standard_integer_type(l, &l); 2096 } 2097 /* It's unsigned. */ 2098 if (m->type != FILE_INVALID) 2099 m->flag |= UNSIGNED; 2100 } else { 2101 /* 2102 * Try it as a keyword type. If that fails, try it as 2103 * an SUS integer type if it begins with "d" or as an 2104 * SUS string type if it begins with "s". In any case, 2105 * it's not unsigned. 2106 */ 2107 m->type = get_type(type_tbl, l, &l); 2108 if (m->type == FILE_INVALID) { 2109 /* 2110 * Not a keyword type; parse it as an SUS type, 2111 * either 'd' possibly followed by a number or 2112 * C/S/L, or just 's'. 2113 */ 2114 if (*l == 'd') 2115 m->type = get_standard_integer_type(l, &l); 2116 else if (*l == 's' 2117 && !isalpha(CAST(unsigned char, l[1]))) { 2118 m->type = FILE_STRING; 2119 ++l; 2120 } 2121 } 2122 } 2123 2124 if (m->type == FILE_INVALID) { 2125 /* Not found - try it as a special keyword. */ 2126 m->type = get_type(special_tbl, l, &l); 2127 } 2128 2129 if (m->type == FILE_INVALID) { 2130 if (ms->flags & MAGIC_CHECK) 2131 file_magwarn(ms, "type `%s' invalid", l); 2132 return -1; 2133 } 2134 2135 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 2136 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 2137 2138 m->mask_op = 0; 2139 if (*l == '~') { 2140 if (!IS_STRING(m->type)) 2141 m->mask_op |= FILE_OPINVERSE; 2142 else if (ms->flags & MAGIC_CHECK) 2143 file_magwarn(ms, "'~' invalid for string types"); 2144 ++l; 2145 } 2146 m->str_range = 0; 2147 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 2148 if ((op = get_op(*l)) != -1) { 2149 if (IS_STRING(m->type)) { 2150 int r; 2151 2152 if (op != FILE_OPDIVIDE) { 2153 if (ms->flags & MAGIC_CHECK) 2154 file_magwarn(ms, 2155 "invalid string/indirect op: " 2156 "`%c'", *t); 2157 return -1; 2158 } 2159 2160 if (m->type == FILE_INDIRECT) 2161 r = parse_indirect_modifier(ms, m, &l); 2162 else 2163 r = parse_string_modifier(ms, m, &l); 2164 if (r == -1) 2165 return -1; 2166 } else 2167 parse_op_modifier(ms, m, &l, op); 2168 } 2169 2170 /* 2171 * We used to set mask to all 1's here, instead let's just not do 2172 * anything if mask = 0 (unless you have a better idea) 2173 */ 2174 EATAB; 2175 2176 switch (*l) { 2177 case '>': 2178 case '<': 2179 m->reln = *l; 2180 ++l; 2181 if (*l == '=') { 2182 if (ms->flags & MAGIC_CHECK) { 2183 file_magwarn(ms, "%c= not supported", 2184 m->reln); 2185 return -1; 2186 } 2187 ++l; 2188 } 2189 break; 2190 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 2191 case '&': 2192 case '^': 2193 case '=': 2194 m->reln = *l; 2195 ++l; 2196 if (*l == '=') { 2197 /* HP compat: ignore &= etc. */ 2198 ++l; 2199 } 2200 break; 2201 case '!': 2202 m->reln = *l; 2203 ++l; 2204 break; 2205 default: 2206 m->reln = '='; /* the default relation */ 2207 if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) && 2208 isspace(CAST(unsigned char, l[1]))) || !l[1])) { 2209 m->reln = *l; 2210 ++l; 2211 } 2212 break; 2213 } 2214 /* 2215 * Grab the value part, except for an 'x' reln. 2216 */ 2217 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 2218 return -1; 2219 2220 /* 2221 * TODO finish this macro and start using it! 2222 * #define offsetcheck {if (offset > ms->bytes_max -1) 2223 * magwarn("offset too big"); } 2224 */ 2225 2226 /* 2227 * Now get last part - the description 2228 */ 2229 EATAB; 2230 if (l[0] == '\b') { 2231 ++l; 2232 m->flag |= NOSPACE; 2233 } else if ((l[0] == '\\') && (l[1] == 'b')) { 2234 ++l; 2235 ++l; 2236 m->flag |= NOSPACE; 2237 } 2238 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 2239 continue; 2240 if (i == sizeof(m->desc)) { 2241 m->desc[sizeof(m->desc) - 1] = '\0'; 2242 if (ms->flags & MAGIC_CHECK) 2243 file_magwarn(ms, "description `%s' truncated", m->desc); 2244 } 2245 2246 /* 2247 * We only do this check while compiling, or if any of the magic 2248 * files were not compiled. 2249 */ 2250 if (ms->flags & MAGIC_CHECK) { 2251 if (check_format(ms, m) == -1) 2252 return -1; 2253 } 2254 #ifndef COMPILE_ONLY 2255 if (action == FILE_CHECK) { 2256 file_mdump(m); 2257 } 2258 #endif 2259 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 2260 return 0; 2261 } 2262 2263 /* 2264 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 2265 * if valid 2266 */ 2267 private int 2268 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line, 2269 size_t len __attribute__((__unused__))) 2270 { 2271 const char *l = line; 2272 char *el; 2273 unsigned long factor; 2274 struct magic *m = &me->mp[0]; 2275 2276 if (m->factor_op != FILE_FACTOR_OP_NONE) { 2277 file_magwarn(ms, 2278 "Current entry already has a strength type: %c %d", 2279 m->factor_op, m->factor); 2280 return -1; 2281 } 2282 if (m->type == FILE_NAME) { 2283 file_magwarn(ms, "%s: Strength setting is not supported in " 2284 "\"name\" magic entries", m->value.s); 2285 return -1; 2286 } 2287 EATAB; 2288 switch (*l) { 2289 case FILE_FACTOR_OP_NONE: 2290 case FILE_FACTOR_OP_PLUS: 2291 case FILE_FACTOR_OP_MINUS: 2292 case FILE_FACTOR_OP_TIMES: 2293 case FILE_FACTOR_OP_DIV: 2294 m->factor_op = *l++; 2295 break; 2296 default: 2297 file_magwarn(ms, "Unknown factor op `%c'", *l); 2298 return -1; 2299 } 2300 EATAB; 2301 factor = strtoul(l, &el, 0); 2302 if (factor > 255) { 2303 file_magwarn(ms, "Too large factor `%lu'", factor); 2304 goto out; 2305 } 2306 if (*el && !isspace(CAST(unsigned char, *el))) { 2307 file_magwarn(ms, "Bad factor `%s'", l); 2308 goto out; 2309 } 2310 m->factor = CAST(uint8_t, factor); 2311 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 2312 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 2313 m->factor_op, m->factor); 2314 goto out; 2315 } 2316 return 0; 2317 out: 2318 m->factor_op = FILE_FACTOR_OP_NONE; 2319 m->factor = 0; 2320 return -1; 2321 } 2322 2323 private int 2324 goodchar(unsigned char x, const char *extra) 2325 { 2326 return (isascii(x) && isalnum(x)) || strchr(extra, x); 2327 } 2328 2329 private int 2330 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, 2331 size_t llen, off_t off, size_t len, const char *name, const char *extra, 2332 int nt) 2333 { 2334 size_t i; 2335 const char *l = line; 2336 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 2337 char *buf = CAST(char *, CAST(void *, m)) + off; 2338 2339 if (buf[0] != '\0') { 2340 len = nt ? strlen(buf) : len; 2341 file_magwarn(ms, "Current entry already has a %s type " 2342 "`%.*s', new type `%s'", name, CAST(int, len), buf, l); 2343 return -1; 2344 } 2345 2346 if (*m->desc == '\0') { 2347 file_magwarn(ms, "Current entry does not yet have a " 2348 "description for adding a %s type", name); 2349 return -1; 2350 } 2351 2352 EATAB; 2353 for (i = 0; *l && i < llen && i < len && goodchar(*l, extra); 2354 buf[i++] = *l++) 2355 continue; 2356 2357 if (i == len && *l) { 2358 if (nt) 2359 buf[len - 1] = '\0'; 2360 if (ms->flags & MAGIC_CHECK) 2361 file_magwarn(ms, "%s type `%s' truncated %" 2362 SIZE_T_FORMAT "u", name, line, i); 2363 } else { 2364 if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra)) 2365 file_magwarn(ms, "%s type `%s' has bad char '%c'", 2366 name, line, *l); 2367 if (nt) 2368 buf[i] = '\0'; 2369 } 2370 2371 if (i > 0) 2372 return 0; 2373 2374 file_magerror(ms, "Bad magic entry '%s'", line); 2375 return -1; 2376 } 2377 2378 /* 2379 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 2380 * magic[index - 1] 2381 */ 2382 private int 2383 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line, 2384 size_t len) 2385 { 2386 struct magic *m = &me->mp[0]; 2387 2388 return parse_extra(ms, me, line, len, 2389 CAST(off_t, offsetof(struct magic, apple)), 2390 sizeof(m->apple), "APPLE", "!+-./?", 0); 2391 } 2392 2393 /* 2394 * Parse a comma-separated list of extensions 2395 */ 2396 private int 2397 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line, 2398 size_t len) 2399 { 2400 struct magic *m = &me->mp[0]; 2401 2402 return parse_extra(ms, me, line, len, 2403 CAST(off_t, offsetof(struct magic, ext)), 2404 sizeof(m->ext), "EXTENSION", ",!+-/@?_$", 0); 2405 } 2406 2407 /* 2408 * parse a MIME annotation line from magic file, put into magic[index - 1] 2409 * if valid 2410 */ 2411 private int 2412 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line, 2413 size_t len) 2414 { 2415 struct magic *m = &me->mp[0]; 2416 2417 return parse_extra(ms, me, line, len, 2418 CAST(off_t, offsetof(struct magic, mimetype)), 2419 sizeof(m->mimetype), "MIME", "+-/.$?:{}", 1); 2420 } 2421 2422 private int 2423 check_format_type(const char *ptr, int type, const char **estr) 2424 { 2425 int quad = 0, h; 2426 size_t len, cnt; 2427 if (*ptr == '\0') { 2428 /* Missing format string; bad */ 2429 *estr = "missing format spec"; 2430 return -1; 2431 } 2432 2433 switch (file_formats[type]) { 2434 case FILE_FMT_QUAD: 2435 quad = 1; 2436 /*FALLTHROUGH*/ 2437 case FILE_FMT_NUM: 2438 if (quad == 0) { 2439 switch (type) { 2440 case FILE_BYTE: 2441 h = 2; 2442 break; 2443 case FILE_SHORT: 2444 case FILE_BESHORT: 2445 case FILE_LESHORT: 2446 h = 1; 2447 break; 2448 case FILE_LONG: 2449 case FILE_BELONG: 2450 case FILE_LELONG: 2451 case FILE_MELONG: 2452 case FILE_LEID3: 2453 case FILE_BEID3: 2454 case FILE_INDIRECT: 2455 h = 0; 2456 break; 2457 default: 2458 abort(); 2459 } 2460 } else 2461 h = 0; 2462 if (*ptr == '-') 2463 ptr++; 2464 if (*ptr == '.') 2465 ptr++; 2466 if (*ptr == '#') 2467 ptr++; 2468 #define CHECKLEN() do { \ 2469 for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \ 2470 len = len * 10 + (*ptr - '0'); \ 2471 if (cnt > 5 || len > 1024) \ 2472 goto toolong; \ 2473 } while (/*CONSTCOND*/0) 2474 2475 CHECKLEN(); 2476 if (*ptr == '.') 2477 ptr++; 2478 CHECKLEN(); 2479 if (quad) { 2480 if (*ptr++ != 'l') 2481 goto invalid; 2482 if (*ptr++ != 'l') 2483 goto invalid; 2484 } 2485 2486 switch (*ptr++) { 2487 #ifdef STRICT_FORMAT /* "long" formats are int formats for us */ 2488 /* so don't accept the 'l' modifier */ 2489 case 'l': 2490 switch (*ptr++) { 2491 case 'i': 2492 case 'd': 2493 case 'u': 2494 case 'o': 2495 case 'x': 2496 case 'X': 2497 if (h == 0) 2498 return 0; 2499 /*FALLTHROUGH*/ 2500 default: 2501 goto invalid; 2502 } 2503 2504 /* 2505 * Don't accept h and hh modifiers. They make writing 2506 * magic entries more complicated, for very little benefit 2507 */ 2508 case 'h': 2509 if (h-- <= 0) 2510 goto invalid; 2511 switch (*ptr++) { 2512 case 'h': 2513 if (h-- <= 0) 2514 goto invalid; 2515 switch (*ptr++) { 2516 case 'i': 2517 case 'd': 2518 case 'u': 2519 case 'o': 2520 case 'x': 2521 case 'X': 2522 return 0; 2523 default: 2524 goto invalid; 2525 } 2526 case 'i': 2527 case 'd': 2528 case 'u': 2529 case 'o': 2530 case 'x': 2531 case 'X': 2532 if (h == 0) 2533 return 0; 2534 /*FALLTHROUGH*/ 2535 default: 2536 goto invalid; 2537 } 2538 #endif 2539 case 'c': 2540 if (h == 2) 2541 return 0; 2542 goto invalid; 2543 case 'i': 2544 case 'd': 2545 case 'u': 2546 case 'o': 2547 case 'x': 2548 case 'X': 2549 #ifdef STRICT_FORMAT 2550 if (h == 0) 2551 return 0; 2552 /*FALLTHROUGH*/ 2553 #else 2554 return 0; 2555 #endif 2556 default: 2557 goto invalid; 2558 } 2559 2560 case FILE_FMT_FLOAT: 2561 case FILE_FMT_DOUBLE: 2562 if (*ptr == '-') 2563 ptr++; 2564 if (*ptr == '.') 2565 ptr++; 2566 CHECKLEN(); 2567 if (*ptr == '.') 2568 ptr++; 2569 CHECKLEN(); 2570 switch (*ptr++) { 2571 case 'e': 2572 case 'E': 2573 case 'f': 2574 case 'F': 2575 case 'g': 2576 case 'G': 2577 return 0; 2578 2579 default: 2580 goto invalid; 2581 } 2582 2583 2584 case FILE_FMT_STR: 2585 if (*ptr == '-') 2586 ptr++; 2587 while (isdigit(CAST(unsigned char, *ptr))) 2588 ptr++; 2589 if (*ptr == '.') { 2590 ptr++; 2591 while (isdigit(CAST(unsigned char , *ptr))) 2592 ptr++; 2593 } 2594 2595 switch (*ptr++) { 2596 case 's': 2597 return 0; 2598 default: 2599 goto invalid; 2600 } 2601 2602 default: 2603 /* internal error */ 2604 abort(); 2605 } 2606 invalid: 2607 *estr = "not valid"; 2608 toolong: 2609 *estr = "too long"; 2610 return -1; 2611 } 2612 2613 /* 2614 * Check that the optional printf format in description matches 2615 * the type of the magic. 2616 */ 2617 private int 2618 check_format(struct magic_set *ms, struct magic *m) 2619 { 2620 char *ptr; 2621 const char *estr; 2622 2623 for (ptr = m->desc; *ptr; ptr++) 2624 if (*ptr == '%') 2625 break; 2626 if (*ptr == '\0') { 2627 /* No format string; ok */ 2628 return 1; 2629 } 2630 2631 assert(file_nformats == file_nnames); 2632 2633 if (m->type >= file_nformats) { 2634 file_magwarn(ms, "Internal error inconsistency between " 2635 "m->type and format strings"); 2636 return -1; 2637 } 2638 if (file_formats[m->type] == FILE_FMT_NONE) { 2639 file_magwarn(ms, "No format string for `%s' with description " 2640 "`%s'", m->desc, file_names[m->type]); 2641 return -1; 2642 } 2643 2644 ptr++; 2645 if (check_format_type(ptr, m->type, &estr) == -1) { 2646 /* 2647 * TODO: this error message is unhelpful if the format 2648 * string is not one character long 2649 */ 2650 file_magwarn(ms, "Printf format is %s for type " 2651 "`%s' in description `%s'", estr, 2652 file_names[m->type], m->desc); 2653 return -1; 2654 } 2655 2656 for (; *ptr; ptr++) { 2657 if (*ptr == '%') { 2658 file_magwarn(ms, 2659 "Too many format strings (should have at most one) " 2660 "for `%s' with description `%s'", 2661 file_names[m->type], m->desc); 2662 return -1; 2663 } 2664 } 2665 return 0; 2666 } 2667 2668 /* 2669 * Read a numeric value from a pointer, into the value union of a magic 2670 * pointer, according to the magic type. Update the string pointer to point 2671 * just after the number read. Return 0 for success, non-zero for failure. 2672 */ 2673 private int 2674 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2675 { 2676 char *ep; 2677 uint64_t ull; 2678 2679 switch (m->type) { 2680 case FILE_BESTRING16: 2681 case FILE_LESTRING16: 2682 case FILE_STRING: 2683 case FILE_PSTRING: 2684 case FILE_REGEX: 2685 case FILE_SEARCH: 2686 case FILE_NAME: 2687 case FILE_USE: 2688 case FILE_DER: 2689 *p = getstr(ms, m, *p, action == FILE_COMPILE); 2690 if (*p == NULL) { 2691 if (ms->flags & MAGIC_CHECK) 2692 file_magwarn(ms, "cannot get string from `%s'", 2693 m->value.s); 2694 return -1; 2695 } 2696 if (m->type == FILE_REGEX) { 2697 file_regex_t rx; 2698 int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED); 2699 if (rc) { 2700 if (ms->flags & MAGIC_CHECK) 2701 file_regerror(&rx, rc, ms); 2702 } 2703 file_regfree(&rx); 2704 return rc ? -1 : 0; 2705 } 2706 return 0; 2707 default: 2708 if (m->reln == 'x') 2709 return 0; 2710 break; 2711 } 2712 2713 switch (m->type) { 2714 case FILE_FLOAT: 2715 case FILE_BEFLOAT: 2716 case FILE_LEFLOAT: 2717 errno = 0; 2718 #ifdef HAVE_STRTOF 2719 m->value.f = strtof(*p, &ep); 2720 #else 2721 m->value.f = (float)strtod(*p, &ep); 2722 #endif 2723 if (errno == 0) 2724 *p = ep; 2725 return 0; 2726 case FILE_DOUBLE: 2727 case FILE_BEDOUBLE: 2728 case FILE_LEDOUBLE: 2729 errno = 0; 2730 m->value.d = strtod(*p, &ep); 2731 if (errno == 0) 2732 *p = ep; 2733 return 0; 2734 case FILE_GUID: 2735 if (file_parse_guid(*p, m->value.guid) == -1) 2736 return -1; 2737 *p += FILE_GUID_SIZE - 1; 2738 return 0; 2739 default: 2740 errno = 0; 2741 ull = CAST(uint64_t, strtoull(*p, &ep, 0)); 2742 m->value.q = file_signextend(ms, m, ull); 2743 if (*p == ep) { 2744 file_magwarn(ms, "Unparseable number `%s'", *p); 2745 } else { 2746 size_t ts = typesize(m->type); 2747 uint64_t x; 2748 const char *q; 2749 2750 if (ts == FILE_BADSIZE) { 2751 file_magwarn(ms, 2752 "Expected numeric type got `%s'", 2753 type_tbl[m->type].name); 2754 } 2755 for (q = *p; isspace(CAST(unsigned char, *q)); q++) 2756 continue; 2757 if (*q == '-') 2758 ull = -CAST(int64_t, ull); 2759 switch (ts) { 2760 case 1: 2761 x = CAST(uint64_t, ull & ~0xffULL); 2762 break; 2763 case 2: 2764 x = CAST(uint64_t, ull & ~0xffffULL); 2765 break; 2766 case 4: 2767 x = CAST(uint64_t, ull & ~0xffffffffULL); 2768 break; 2769 case 8: 2770 x = 0; 2771 break; 2772 default: 2773 abort(); 2774 } 2775 if (x) { 2776 file_magwarn(ms, "Overflow for numeric" 2777 " type `%s' value %#" PRIx64, 2778 type_tbl[m->type].name, ull); 2779 } 2780 } 2781 if (errno == 0) { 2782 *p = ep; 2783 eatsize(p); 2784 } 2785 return 0; 2786 } 2787 } 2788 2789 /* 2790 * Convert a string containing C character escapes. Stop at an unescaped 2791 * space or tab. 2792 * Copy the converted version to "m->value.s", and the length in m->vallen. 2793 * Return updated scan pointer as function result. Warn if set. 2794 */ 2795 private const char * 2796 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2797 { 2798 const char *origs = s; 2799 char *p = m->value.s; 2800 size_t plen = sizeof(m->value.s); 2801 char *origp = p; 2802 char *pmax = p + plen - 1; 2803 int c; 2804 int val; 2805 2806 while ((c = *s++) != '\0') { 2807 if (isspace(CAST(unsigned char, c))) 2808 break; 2809 if (p >= pmax) { 2810 file_error(ms, 0, "string too long: `%s'", origs); 2811 return NULL; 2812 } 2813 if (c == '\\') { 2814 switch(c = *s++) { 2815 2816 case '\0': 2817 if (warn) 2818 file_magwarn(ms, "incomplete escape"); 2819 s--; 2820 goto out; 2821 2822 case '\t': 2823 if (warn) { 2824 file_magwarn(ms, 2825 "escaped tab found, use \\t instead"); 2826 warn = 0; /* already did */ 2827 } 2828 /*FALLTHROUGH*/ 2829 default: 2830 if (warn) { 2831 if (isprint(CAST(unsigned char, c))) { 2832 /* Allow escaping of 2833 * ``relations'' */ 2834 if (strchr("<>&^=!", c) == NULL 2835 && (m->type != FILE_REGEX || 2836 strchr("[]().*?^$|{}", c) 2837 == NULL)) { 2838 file_magwarn(ms, "no " 2839 "need to escape " 2840 "`%c'", c); 2841 } 2842 } else { 2843 file_magwarn(ms, 2844 "unknown escape sequence: " 2845 "\\%03o", c); 2846 } 2847 } 2848 /*FALLTHROUGH*/ 2849 /* space, perhaps force people to use \040? */ 2850 case ' ': 2851 #if 0 2852 /* 2853 * Other things people escape, but shouldn't need to, 2854 * so we disallow them 2855 */ 2856 case '\'': 2857 case '"': 2858 case '?': 2859 #endif 2860 /* Relations */ 2861 case '>': 2862 case '<': 2863 case '&': 2864 case '^': 2865 case '=': 2866 case '!': 2867 /* and baskslash itself */ 2868 case '\\': 2869 *p++ = CAST(char, c); 2870 break; 2871 2872 case 'a': 2873 *p++ = '\a'; 2874 break; 2875 2876 case 'b': 2877 *p++ = '\b'; 2878 break; 2879 2880 case 'f': 2881 *p++ = '\f'; 2882 break; 2883 2884 case 'n': 2885 *p++ = '\n'; 2886 break; 2887 2888 case 'r': 2889 *p++ = '\r'; 2890 break; 2891 2892 case 't': 2893 *p++ = '\t'; 2894 break; 2895 2896 case 'v': 2897 *p++ = '\v'; 2898 break; 2899 2900 /* \ and up to 3 octal digits */ 2901 case '0': 2902 case '1': 2903 case '2': 2904 case '3': 2905 case '4': 2906 case '5': 2907 case '6': 2908 case '7': 2909 val = c - '0'; 2910 c = *s++; /* try for 2 */ 2911 if (c >= '0' && c <= '7') { 2912 val = (val << 3) | (c - '0'); 2913 c = *s++; /* try for 3 */ 2914 if (c >= '0' && c <= '7') 2915 val = (val << 3) | (c-'0'); 2916 else 2917 --s; 2918 } 2919 else 2920 --s; 2921 *p++ = CAST(char, val); 2922 break; 2923 2924 /* \x and up to 2 hex digits */ 2925 case 'x': 2926 val = 'x'; /* Default if no digits */ 2927 c = hextoint(*s++); /* Get next char */ 2928 if (c >= 0) { 2929 val = c; 2930 c = hextoint(*s++); 2931 if (c >= 0) 2932 val = (val << 4) + c; 2933 else 2934 --s; 2935 } else 2936 --s; 2937 *p++ = CAST(char, val); 2938 break; 2939 } 2940 } else 2941 *p++ = CAST(char, c); 2942 } 2943 --s; 2944 out: 2945 *p = '\0'; 2946 m->vallen = CAST(unsigned char, (p - origp)); 2947 if (m->type == FILE_PSTRING) { 2948 size_t l = file_pstring_length_size(ms, m); 2949 if (l == FILE_BADSIZE) 2950 return NULL; 2951 m->vallen += CAST(unsigned char, l); 2952 } 2953 return s; 2954 } 2955 2956 2957 /* Single hex char to int; -1 if not a hex char. */ 2958 private int 2959 hextoint(int c) 2960 { 2961 if (!isascii(CAST(unsigned char, c))) 2962 return -1; 2963 if (isdigit(CAST(unsigned char, c))) 2964 return c - '0'; 2965 if ((c >= 'a') && (c <= 'f')) 2966 return c + 10 - 'a'; 2967 if (( c>= 'A') && (c <= 'F')) 2968 return c + 10 - 'A'; 2969 return -1; 2970 } 2971 2972 2973 /* 2974 * Print a string containing C character escapes. 2975 */ 2976 protected void 2977 file_showstr(FILE *fp, const char *s, size_t len) 2978 { 2979 char c; 2980 2981 for (;;) { 2982 if (len == FILE_BADSIZE) { 2983 c = *s++; 2984 if (c == '\0') 2985 break; 2986 } 2987 else { 2988 if (len-- == 0) 2989 break; 2990 c = *s++; 2991 } 2992 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2993 (void) fputc(c, fp); 2994 else { 2995 (void) fputc('\\', fp); 2996 switch (c) { 2997 case '\a': 2998 (void) fputc('a', fp); 2999 break; 3000 3001 case '\b': 3002 (void) fputc('b', fp); 3003 break; 3004 3005 case '\f': 3006 (void) fputc('f', fp); 3007 break; 3008 3009 case '\n': 3010 (void) fputc('n', fp); 3011 break; 3012 3013 case '\r': 3014 (void) fputc('r', fp); 3015 break; 3016 3017 case '\t': 3018 (void) fputc('t', fp); 3019 break; 3020 3021 case '\v': 3022 (void) fputc('v', fp); 3023 break; 3024 3025 default: 3026 (void) fprintf(fp, "%.3o", c & 0377); 3027 break; 3028 } 3029 } 3030 } 3031 } 3032 3033 /* 3034 * eatsize(): Eat the size spec from a number [eg. 10UL] 3035 */ 3036 private void 3037 eatsize(const char **p) 3038 { 3039 const char *l = *p; 3040 3041 if (LOWCASE(*l) == 'u') 3042 l++; 3043 3044 switch (LOWCASE(*l)) { 3045 case 'l': /* long */ 3046 case 's': /* short */ 3047 case 'h': /* short */ 3048 case 'b': /* char/byte */ 3049 case 'c': /* char/byte */ 3050 l++; 3051 /*FALLTHROUGH*/ 3052 default: 3053 break; 3054 } 3055 3056 *p = l; 3057 } 3058 3059 /* 3060 * handle a buffer containing a compiled file. 3061 */ 3062 private struct magic_map * 3063 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len) 3064 { 3065 struct magic_map *map; 3066 3067 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 3068 file_oomem(ms, sizeof(*map)); 3069 return NULL; 3070 } 3071 map->len = len; 3072 map->p = buf; 3073 map->type = MAP_TYPE_USER; 3074 if (check_buffer(ms, map, "buffer") != 0) { 3075 apprentice_unmap(map); 3076 return NULL; 3077 } 3078 return map; 3079 } 3080 3081 /* 3082 * handle a compiled file. 3083 */ 3084 3085 private struct magic_map * 3086 apprentice_map(struct magic_set *ms, const char *fn) 3087 { 3088 int fd; 3089 struct stat st; 3090 char *dbname = NULL; 3091 struct magic_map *map; 3092 struct magic_map *rv = NULL; 3093 3094 fd = -1; 3095 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 3096 file_oomem(ms, sizeof(*map)); 3097 goto error; 3098 } 3099 map->type = MAP_TYPE_USER; /* unspecified */ 3100 3101 dbname = mkdbname(ms, fn, 0); 3102 if (dbname == NULL) 3103 goto error; 3104 3105 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 3106 goto error; 3107 3108 if (fstat(fd, &st) == -1) { 3109 file_error(ms, errno, "cannot stat `%s'", dbname); 3110 goto error; 3111 } 3112 if (st.st_size < 8 || st.st_size > maxoff_t()) { 3113 file_error(ms, 0, "file `%s' is too %s", dbname, 3114 st.st_size < 8 ? "small" : "large"); 3115 goto error; 3116 } 3117 3118 map->len = CAST(size_t, st.st_size); 3119 #ifdef QUICK 3120 map->type = MAP_TYPE_MMAP; 3121 if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE, 3122 MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) { 3123 file_error(ms, errno, "cannot map `%s'", dbname); 3124 goto error; 3125 } 3126 #else 3127 map->type = MAP_TYPE_MALLOC; 3128 if ((map->p = CAST(void *, malloc(map->len))) == NULL) { 3129 file_oomem(ms, map->len); 3130 goto error; 3131 } 3132 if (read(fd, map->p, map->len) != (ssize_t)map->len) { 3133 file_badread(ms); 3134 goto error; 3135 } 3136 #endif 3137 (void)close(fd); 3138 fd = -1; 3139 3140 if (check_buffer(ms, map, dbname) != 0) { 3141 goto error; 3142 } 3143 #ifdef QUICK 3144 if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) { 3145 file_error(ms, errno, "cannot mprotect `%s'", dbname); 3146 goto error; 3147 } 3148 #endif 3149 3150 free(dbname); 3151 return map; 3152 3153 error: 3154 if (fd != -1) 3155 (void)close(fd); 3156 apprentice_unmap(map); 3157 free(dbname); 3158 return rv; 3159 } 3160 3161 private int 3162 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname) 3163 { 3164 uint32_t *ptr; 3165 uint32_t entries, nentries; 3166 uint32_t version; 3167 int i, needsbyteswap; 3168 3169 ptr = CAST(uint32_t *, map->p); 3170 if (*ptr != MAGICNO) { 3171 if (swap4(*ptr) != MAGICNO) { 3172 file_error(ms, 0, "bad magic in `%s'", dbname); 3173 return -1; 3174 } 3175 needsbyteswap = 1; 3176 } else 3177 needsbyteswap = 0; 3178 if (needsbyteswap) 3179 version = swap4(ptr[1]); 3180 else 3181 version = ptr[1]; 3182 if (version != VERSIONNO) { 3183 file_error(ms, 0, "File %s supports only version %d magic " 3184 "files. `%s' is version %d", VERSION, 3185 VERSIONNO, dbname, version); 3186 return -1; 3187 } 3188 entries = CAST(uint32_t, map->len / sizeof(struct magic)); 3189 if ((entries * sizeof(struct magic)) != map->len) { 3190 file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not " 3191 "a multiple of %" SIZE_T_FORMAT "u", 3192 dbname, map->len, sizeof(struct magic)); 3193 return -1; 3194 } 3195 map->magic[0] = CAST(struct magic *, map->p) + 1; 3196 nentries = 0; 3197 for (i = 0; i < MAGIC_SETS; i++) { 3198 if (needsbyteswap) 3199 map->nmagic[i] = swap4(ptr[i + 2]); 3200 else 3201 map->nmagic[i] = ptr[i + 2]; 3202 if (i != MAGIC_SETS - 1) 3203 map->magic[i + 1] = map->magic[i] + map->nmagic[i]; 3204 nentries += map->nmagic[i]; 3205 } 3206 if (entries != nentries + 1) { 3207 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 3208 dbname, entries, nentries + 1); 3209 return -1; 3210 } 3211 if (needsbyteswap) 3212 for (i = 0; i < MAGIC_SETS; i++) 3213 byteswap(map->magic[i], map->nmagic[i]); 3214 return 0; 3215 } 3216 3217 /* 3218 * handle an mmaped file. 3219 */ 3220 private int 3221 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn) 3222 { 3223 static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS; 3224 static const size_t m = sizeof(**map->magic); 3225 int fd = -1; 3226 size_t len; 3227 char *dbname; 3228 int rv = -1; 3229 uint32_t i; 3230 union { 3231 struct magic m; 3232 uint32_t h[2 + MAGIC_SETS]; 3233 } hdr; 3234 3235 dbname = mkdbname(ms, fn, 1); 3236 3237 if (dbname == NULL) 3238 goto out; 3239 3240 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 3241 { 3242 file_error(ms, errno, "cannot open `%s'", dbname); 3243 goto out; 3244 } 3245 memset(&hdr, 0, sizeof(hdr)); 3246 hdr.h[0] = MAGICNO; 3247 hdr.h[1] = VERSIONNO; 3248 memcpy(hdr.h + 2, map->nmagic, nm); 3249 3250 if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) { 3251 file_error(ms, errno, "error writing `%s'", dbname); 3252 goto out2; 3253 } 3254 3255 for (i = 0; i < MAGIC_SETS; i++) { 3256 len = m * map->nmagic[i]; 3257 if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) { 3258 file_error(ms, errno, "error writing `%s'", dbname); 3259 goto out2; 3260 } 3261 } 3262 3263 rv = 0; 3264 out2: 3265 if (fd != -1) 3266 (void)close(fd); 3267 out: 3268 apprentice_unmap(map); 3269 free(dbname); 3270 return rv; 3271 } 3272 3273 private const char ext[] = ".mgc"; 3274 /* 3275 * make a dbname 3276 */ 3277 private char * 3278 mkdbname(struct magic_set *ms, const char *fn, int strip) 3279 { 3280 const char *p, *q; 3281 char *buf; 3282 3283 if (strip) { 3284 if ((p = strrchr(fn, '/')) != NULL) 3285 fn = ++p; 3286 } 3287 3288 for (q = fn; *q; q++) 3289 continue; 3290 /* Look for .mgc */ 3291 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 3292 if (*p != *q) 3293 break; 3294 3295 /* Did not find .mgc, restore q */ 3296 if (p >= ext) 3297 while (*q) 3298 q++; 3299 3300 q++; 3301 /* Compatibility with old code that looked in .mime */ 3302 if (ms->flags & MAGIC_MIME) { 3303 if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext) 3304 < 0) 3305 return NULL; 3306 if (access(buf, R_OK) != -1) { 3307 ms->flags &= MAGIC_MIME_TYPE; 3308 return buf; 3309 } 3310 free(buf); 3311 } 3312 if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0) 3313 return NULL; 3314 3315 /* Compatibility with old code that looked in .mime */ 3316 if (strstr(fn, ".mime") != NULL) 3317 ms->flags &= MAGIC_MIME_TYPE; 3318 return buf; 3319 } 3320 3321 /* 3322 * Byteswap an mmap'ed file if needed 3323 */ 3324 private void 3325 byteswap(struct magic *magic, uint32_t nmagic) 3326 { 3327 uint32_t i; 3328 for (i = 0; i < nmagic; i++) 3329 bs1(&magic[i]); 3330 } 3331 3332 /* 3333 * swap a short 3334 */ 3335 private uint16_t 3336 swap2(uint16_t sv) 3337 { 3338 uint16_t rv; 3339 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3340 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3341 d[0] = s[1]; 3342 d[1] = s[0]; 3343 return rv; 3344 } 3345 3346 /* 3347 * swap an int 3348 */ 3349 private uint32_t 3350 swap4(uint32_t sv) 3351 { 3352 uint32_t rv; 3353 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3354 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3355 d[0] = s[3]; 3356 d[1] = s[2]; 3357 d[2] = s[1]; 3358 d[3] = s[0]; 3359 return rv; 3360 } 3361 3362 /* 3363 * swap a quad 3364 */ 3365 private uint64_t 3366 swap8(uint64_t sv) 3367 { 3368 uint64_t rv; 3369 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3370 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3371 #if 0 3372 d[0] = s[3]; 3373 d[1] = s[2]; 3374 d[2] = s[1]; 3375 d[3] = s[0]; 3376 d[4] = s[7]; 3377 d[5] = s[6]; 3378 d[6] = s[5]; 3379 d[7] = s[4]; 3380 #else 3381 d[0] = s[7]; 3382 d[1] = s[6]; 3383 d[2] = s[5]; 3384 d[3] = s[4]; 3385 d[4] = s[3]; 3386 d[5] = s[2]; 3387 d[6] = s[1]; 3388 d[7] = s[0]; 3389 #endif 3390 return rv; 3391 } 3392 3393 /* 3394 * byteswap a single magic entry 3395 */ 3396 private void 3397 bs1(struct magic *m) 3398 { 3399 m->cont_level = swap2(m->cont_level); 3400 m->offset = swap4(CAST(uint32_t, m->offset)); 3401 m->in_offset = swap4(CAST(uint32_t, m->in_offset)); 3402 m->lineno = swap4(CAST(uint32_t, m->lineno)); 3403 if (IS_STRING(m->type)) { 3404 m->str_range = swap4(m->str_range); 3405 m->str_flags = swap4(m->str_flags); 3406 } 3407 else { 3408 m->value.q = swap8(m->value.q); 3409 m->num_mask = swap8(m->num_mask); 3410 } 3411 } 3412 3413 protected size_t 3414 file_pstring_length_size(struct magic_set *ms, const struct magic *m) 3415 { 3416 switch (m->str_flags & PSTRING_LEN) { 3417 case PSTRING_1_LE: 3418 return 1; 3419 case PSTRING_2_LE: 3420 case PSTRING_2_BE: 3421 return 2; 3422 case PSTRING_4_LE: 3423 case PSTRING_4_BE: 3424 return 4; 3425 default: 3426 file_error(ms, 0, "corrupt magic file " 3427 "(bad pascal string length %d)", 3428 m->str_flags & PSTRING_LEN); 3429 return FILE_BADSIZE; 3430 } 3431 } 3432 protected size_t 3433 file_pstring_get_length(struct magic_set *ms, const struct magic *m, 3434 const char *ss) 3435 { 3436 size_t len = 0; 3437 const unsigned char *s = RCAST(const unsigned char *, ss); 3438 unsigned int s3, s2, s1, s0; 3439 3440 switch (m->str_flags & PSTRING_LEN) { 3441 case PSTRING_1_LE: 3442 len = *s; 3443 break; 3444 case PSTRING_2_LE: 3445 s0 = s[0]; 3446 s1 = s[1]; 3447 len = (s1 << 8) | s0; 3448 break; 3449 case PSTRING_2_BE: 3450 s0 = s[0]; 3451 s1 = s[1]; 3452 len = (s0 << 8) | s1; 3453 break; 3454 case PSTRING_4_LE: 3455 s0 = s[0]; 3456 s1 = s[1]; 3457 s2 = s[2]; 3458 s3 = s[3]; 3459 len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0; 3460 break; 3461 case PSTRING_4_BE: 3462 s0 = s[0]; 3463 s1 = s[1]; 3464 s2 = s[2]; 3465 s3 = s[3]; 3466 len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3; 3467 break; 3468 default: 3469 file_error(ms, 0, "corrupt magic file " 3470 "(bad pascal string length %d)", 3471 m->str_flags & PSTRING_LEN); 3472 return FILE_BADSIZE; 3473 } 3474 3475 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) { 3476 size_t l = file_pstring_length_size(ms, m); 3477 if (l == FILE_BADSIZE) 3478 return l; 3479 len -= l; 3480 } 3481 3482 return len; 3483 } 3484 3485 protected int 3486 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 3487 { 3488 uint32_t i, j; 3489 struct mlist *mlist, *ml; 3490 3491 mlist = ms->mlist[1]; 3492 3493 for (ml = mlist->next; ml != mlist; ml = ml->next) { 3494 struct magic *ma = ml->magic; 3495 uint32_t nma = ml->nmagic; 3496 for (i = 0; i < nma; i++) { 3497 if (ma[i].type != FILE_NAME) 3498 continue; 3499 if (strcmp(ma[i].value.s, name) == 0) { 3500 v->magic = &ma[i]; 3501 for (j = i + 1; j < nma; j++) 3502 if (ma[j].cont_level == 0) 3503 break; 3504 v->nmagic = j - i; 3505 return 0; 3506 } 3507 } 3508 } 3509 return -1; 3510 } 3511