1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * apprentice - make one pass through /etc/magic, learning its secrets. 30 */ 31 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: apprentice.c,v 1.309 2021/09/24 13:59:19 christos Exp $") 36 #endif /* lint */ 37 38 #include "magic.h" 39 #include <stdlib.h> 40 #ifdef HAVE_UNISTD_H 41 #include <unistd.h> 42 #endif 43 #include <stddef.h> 44 #include <string.h> 45 #include <assert.h> 46 #include <ctype.h> 47 #include <fcntl.h> 48 #ifdef QUICK 49 #include <sys/mman.h> 50 #endif 51 #include <dirent.h> 52 #include <limits.h> 53 54 55 #define EATAB {while (isascii(CAST(unsigned char, *l)) && \ 56 isspace(CAST(unsigned char, *l))) ++l;} 57 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \ 58 tolower(CAST(unsigned char, l)) : (l)) 59 /* 60 * Work around a bug in headers on Digital Unix. 61 * At least confirmed for: OSF1 V4.0 878 62 */ 63 #if defined(__osf__) && defined(__DECC) 64 #ifdef MAP_FAILED 65 #undef MAP_FAILED 66 #endif 67 #endif 68 69 #ifndef MAP_FAILED 70 #define MAP_FAILED (void *) -1 71 #endif 72 73 #ifndef MAP_FILE 74 #define MAP_FILE 0 75 #endif 76 77 #define ALLOC_CHUNK CAST(size_t, 10) 78 #define ALLOC_INCR CAST(size_t, 200) 79 80 #define MAP_TYPE_USER 0 81 #define MAP_TYPE_MALLOC 1 82 #define MAP_TYPE_MMAP 2 83 84 struct magic_entry { 85 struct magic *mp; 86 uint32_t cont_count; 87 uint32_t max_count; 88 }; 89 90 struct magic_entry_set { 91 struct magic_entry *me; 92 uint32_t count; 93 uint32_t max; 94 }; 95 96 struct magic_map { 97 void *p; 98 size_t len; 99 int type; 100 struct magic *magic[MAGIC_SETS]; 101 uint32_t nmagic[MAGIC_SETS]; 102 }; 103 104 int file_formats[FILE_NAMES_SIZE]; 105 const size_t file_nformats = FILE_NAMES_SIZE; 106 const char *file_names[FILE_NAMES_SIZE]; 107 const size_t file_nnames = FILE_NAMES_SIZE; 108 109 private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 110 private int hextoint(int); 111 private const char *getstr(struct magic_set *, struct magic *, const char *, 112 int); 113 private int parse(struct magic_set *, struct magic_entry *, const char *, 114 size_t, int); 115 private void eatsize(const char **); 116 private int apprentice_1(struct magic_set *, const char *, int); 117 private size_t apprentice_magic_strength(const struct magic *); 118 private int apprentice_sort(const void *, const void *); 119 private void apprentice_list(struct mlist *, int ); 120 private struct magic_map *apprentice_load(struct magic_set *, 121 const char *, int); 122 private struct mlist *mlist_alloc(void); 123 private void mlist_free_all(struct magic_set *); 124 private void mlist_free(struct mlist *); 125 private void byteswap(struct magic *, uint32_t); 126 private void bs1(struct magic *); 127 private uint16_t swap2(uint16_t); 128 private uint32_t swap4(uint32_t); 129 private uint64_t swap8(uint64_t); 130 private char *mkdbname(struct magic_set *, const char *, int); 131 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *, 132 size_t); 133 private struct magic_map *apprentice_map(struct magic_set *, const char *); 134 private int check_buffer(struct magic_set *, struct magic_map *, const char *); 135 private void apprentice_unmap(struct magic_map *); 136 private int apprentice_compile(struct magic_set *, struct magic_map *, 137 const char *); 138 private int check_format_type(const char *, int, const char **); 139 private int check_format(struct magic_set *, struct magic *); 140 private int get_op(char); 141 private int parse_mime(struct magic_set *, struct magic_entry *, const char *, 142 size_t); 143 private int parse_strength(struct magic_set *, struct magic_entry *, 144 const char *, size_t); 145 private int parse_apple(struct magic_set *, struct magic_entry *, const char *, 146 size_t); 147 private int parse_ext(struct magic_set *, struct magic_entry *, const char *, 148 size_t); 149 150 151 private size_t magicsize = sizeof(struct magic); 152 153 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 154 155 private struct { 156 const char *name; 157 size_t len; 158 int (*fun)(struct magic_set *, struct magic_entry *, const char *, 159 size_t); 160 } bang[] = { 161 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 162 DECLARE_FIELD(mime), 163 DECLARE_FIELD(apple), 164 DECLARE_FIELD(ext), 165 DECLARE_FIELD(strength), 166 #undef DECLARE_FIELD 167 { NULL, 0, NULL } 168 }; 169 170 #ifdef COMPILE_ONLY 171 172 int main(int, char *[]); 173 174 int 175 main(int argc, char *argv[]) 176 { 177 int ret; 178 struct magic_set *ms; 179 char *progname; 180 181 if ((progname = strrchr(argv[0], '/')) != NULL) 182 progname++; 183 else 184 progname = argv[0]; 185 186 if (argc != 2) { 187 (void)fprintf(stderr, "Usage: %s file\n", progname); 188 return 1; 189 } 190 191 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 192 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 193 return 1; 194 } 195 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 196 if (ret == 1) 197 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 198 magic_close(ms); 199 return ret; 200 } 201 #endif /* COMPILE_ONLY */ 202 203 struct type_tbl_s { 204 const char name[16]; 205 const size_t len; 206 const int type; 207 const int format; 208 }; 209 210 /* 211 * XXX - the actual Single UNIX Specification says that "long" means "long", 212 * as in the C data type, but we treat it as meaning "4-byte integer". 213 * Given that the OS X version of file 5.04 did the same, I guess that passes 214 * the actual test; having "long" be dependent on how big a "long" is on 215 * the machine running "file" is silly. 216 */ 217 static const struct type_tbl_s type_tbl[] = { 218 # define XX(s) s, (sizeof(s) - 1) 219 # define XX_NULL "", 0 220 { XX("invalid"), FILE_INVALID, FILE_FMT_NONE }, 221 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 222 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 223 { XX("default"), FILE_DEFAULT, FILE_FMT_NONE }, 224 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 225 { XX("string"), FILE_STRING, FILE_FMT_STR }, 226 { XX("date"), FILE_DATE, FILE_FMT_STR }, 227 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 228 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 229 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 230 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 231 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 232 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 233 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 234 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 235 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 236 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 237 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 238 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 239 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 240 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 241 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 242 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 243 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 244 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 245 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 246 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 247 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 248 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 249 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 250 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 251 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 252 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 253 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 254 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 255 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 256 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 257 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 258 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 259 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 260 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 261 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 262 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 263 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 264 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 265 { XX("name"), FILE_NAME, FILE_FMT_NONE }, 266 { XX("use"), FILE_USE, FILE_FMT_NONE }, 267 { XX("clear"), FILE_CLEAR, FILE_FMT_NONE }, 268 { XX("der"), FILE_DER, FILE_FMT_STR }, 269 { XX("guid"), FILE_GUID, FILE_FMT_STR }, 270 { XX("offset"), FILE_OFFSET, FILE_FMT_QUAD }, 271 { XX("bevarint"), FILE_BEVARINT, FILE_FMT_STR }, 272 { XX("levarint"), FILE_LEVARINT, FILE_FMT_STR }, 273 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 274 }; 275 276 /* 277 * These are not types, and cannot be preceded by "u" to make them 278 * unsigned. 279 */ 280 static const struct type_tbl_s special_tbl[] = { 281 { XX("der"), FILE_DER, FILE_FMT_STR }, 282 { XX("name"), FILE_NAME, FILE_FMT_STR }, 283 { XX("use"), FILE_USE, FILE_FMT_STR }, 284 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 285 }; 286 # undef XX 287 # undef XX_NULL 288 289 private int 290 get_type(const struct type_tbl_s *tbl, const char *l, const char **t) 291 { 292 const struct type_tbl_s *p; 293 294 for (p = tbl; p->len; p++) { 295 if (strncmp(l, p->name, p->len) == 0) { 296 if (t) 297 *t = l + p->len; 298 break; 299 } 300 } 301 return p->type; 302 } 303 304 private off_t 305 maxoff_t(void) { 306 if (/*CONSTCOND*/sizeof(off_t) == sizeof(int)) 307 return CAST(off_t, INT_MAX); 308 if (/*CONSTCOND*/sizeof(off_t) == sizeof(long)) 309 return CAST(off_t, LONG_MAX); 310 return 0x7fffffff; 311 } 312 313 private int 314 get_standard_integer_type(const char *l, const char **t) 315 { 316 int type; 317 318 if (isalpha(CAST(unsigned char, l[1]))) { 319 switch (l[1]) { 320 case 'C': 321 /* "dC" and "uC" */ 322 type = FILE_BYTE; 323 break; 324 case 'S': 325 /* "dS" and "uS" */ 326 type = FILE_SHORT; 327 break; 328 case 'I': 329 case 'L': 330 /* 331 * "dI", "dL", "uI", and "uL". 332 * 333 * XXX - the actual Single UNIX Specification says 334 * that "L" means "long", as in the C data type, 335 * but we treat it as meaning "4-byte integer". 336 * Given that the OS X version of file 5.04 did 337 * the same, I guess that passes the actual SUS 338 * validation suite; having "dL" be dependent on 339 * how big a "long" is on the machine running 340 * "file" is silly. 341 */ 342 type = FILE_LONG; 343 break; 344 case 'Q': 345 /* "dQ" and "uQ" */ 346 type = FILE_QUAD; 347 break; 348 default: 349 /* "d{anything else}", "u{anything else}" */ 350 return FILE_INVALID; 351 } 352 l += 2; 353 } else if (isdigit(CAST(unsigned char, l[1]))) { 354 /* 355 * "d{num}" and "u{num}"; we only support {num} values 356 * of 1, 2, 4, and 8 - the Single UNIX Specification 357 * doesn't say anything about whether arbitrary 358 * values should be supported, but both the Solaris 10 359 * and OS X Mountain Lion versions of file passed the 360 * Single UNIX Specification validation suite, and 361 * neither of them support values bigger than 8 or 362 * non-power-of-2 values. 363 */ 364 if (isdigit(CAST(unsigned char, l[2]))) { 365 /* Multi-digit, so > 9 */ 366 return FILE_INVALID; 367 } 368 switch (l[1]) { 369 case '1': 370 type = FILE_BYTE; 371 break; 372 case '2': 373 type = FILE_SHORT; 374 break; 375 case '4': 376 type = FILE_LONG; 377 break; 378 case '8': 379 type = FILE_QUAD; 380 break; 381 default: 382 /* XXX - what about 3, 5, 6, or 7? */ 383 return FILE_INVALID; 384 } 385 l += 2; 386 } else { 387 /* 388 * "d" or "u" by itself. 389 */ 390 type = FILE_LONG; 391 ++l; 392 } 393 if (t) 394 *t = l; 395 return type; 396 } 397 398 private void 399 init_file_tables(void) 400 { 401 static int done = 0; 402 const struct type_tbl_s *p; 403 404 if (done) 405 return; 406 done++; 407 408 for (p = type_tbl; p->len; p++) { 409 assert(p->type < FILE_NAMES_SIZE); 410 file_names[p->type] = p->name; 411 file_formats[p->type] = p->format; 412 } 413 assert(p - type_tbl == FILE_NAMES_SIZE); 414 } 415 416 private int 417 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) 418 { 419 struct mlist *ml; 420 421 mlp->map = NULL; 422 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 423 return -1; 424 425 ml->map = idx == 0 ? map : NULL; 426 ml->magic = map->magic[idx]; 427 ml->nmagic = map->nmagic[idx]; 428 429 mlp->prev->next = ml; 430 ml->prev = mlp->prev; 431 ml->next = mlp; 432 mlp->prev = ml; 433 return 0; 434 } 435 436 /* 437 * Handle one file or directory. 438 */ 439 private int 440 apprentice_1(struct magic_set *ms, const char *fn, int action) 441 { 442 struct magic_map *map; 443 #ifndef COMPILE_ONLY 444 struct mlist *ml; 445 size_t i; 446 #endif 447 448 if (magicsize != FILE_MAGICSIZE) { 449 file_error(ms, 0, "magic element size %lu != %lu", 450 CAST(unsigned long, sizeof(*map->magic[0])), 451 CAST(unsigned long, FILE_MAGICSIZE)); 452 return -1; 453 } 454 455 if (action == FILE_COMPILE) { 456 map = apprentice_load(ms, fn, action); 457 if (map == NULL) 458 return -1; 459 return apprentice_compile(ms, map, fn); 460 } 461 462 #ifndef COMPILE_ONLY 463 map = apprentice_map(ms, fn); 464 if (map == NULL) { 465 if (ms->flags & MAGIC_CHECK) 466 file_magwarn(ms, "using regular magic file `%s'", fn); 467 map = apprentice_load(ms, fn, action); 468 if (map == NULL) 469 return -1; 470 } 471 472 for (i = 0; i < MAGIC_SETS; i++) { 473 if (add_mlist(ms->mlist[i], map, i) == -1) { 474 /* failed to add to any list, free explicitly */ 475 if (i == 0) 476 apprentice_unmap(map); 477 else 478 mlist_free_all(ms); 479 file_oomem(ms, sizeof(*ml)); 480 return -1; 481 } 482 } 483 484 if (action == FILE_LIST) { 485 for (i = 0; i < MAGIC_SETS; i++) { 486 printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n", 487 i); 488 apprentice_list(ms->mlist[i], BINTEST); 489 printf("Text patterns:\n"); 490 apprentice_list(ms->mlist[i], TEXTTEST); 491 } 492 } 493 return 0; 494 #else 495 return 0; 496 #endif /* COMPILE_ONLY */ 497 } 498 499 protected void 500 file_ms_free(struct magic_set *ms) 501 { 502 size_t i; 503 if (ms == NULL) 504 return; 505 for (i = 0; i < MAGIC_SETS; i++) 506 mlist_free(ms->mlist[i]); 507 free(ms->o.pbuf); 508 free(ms->o.buf); 509 free(ms->c.li); 510 free(ms); 511 } 512 513 protected struct magic_set * 514 file_ms_alloc(int flags) 515 { 516 struct magic_set *ms; 517 size_t i, len; 518 519 if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u), 520 sizeof(struct magic_set)))) == NULL) 521 return NULL; 522 523 if (magic_setflags(ms, flags) == -1) { 524 errno = EINVAL; 525 goto free; 526 } 527 528 ms->o.buf = ms->o.pbuf = NULL; 529 ms->o.blen = 0; 530 len = (ms->c.len = 10) * sizeof(*ms->c.li); 531 532 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 533 goto free; 534 535 ms->event_flags = 0; 536 ms->error = -1; 537 for (i = 0; i < MAGIC_SETS; i++) 538 ms->mlist[i] = NULL; 539 ms->file = "unknown"; 540 ms->line = 0; 541 ms->indir_max = FILE_INDIR_MAX; 542 ms->name_max = FILE_NAME_MAX; 543 ms->elf_shnum_max = FILE_ELF_SHNUM_MAX; 544 ms->elf_phnum_max = FILE_ELF_PHNUM_MAX; 545 ms->elf_notes_max = FILE_ELF_NOTES_MAX; 546 ms->regex_max = FILE_REGEX_MAX; 547 ms->bytes_max = FILE_BYTES_MAX; 548 ms->encoding_max = FILE_ENCODING_MAX; 549 return ms; 550 free: 551 free(ms); 552 return NULL; 553 } 554 555 private void 556 apprentice_unmap(struct magic_map *map) 557 { 558 size_t i; 559 char *p; 560 if (map == NULL) 561 return; 562 563 switch (map->type) { 564 case MAP_TYPE_USER: 565 break; 566 case MAP_TYPE_MALLOC: 567 p = CAST(char *, map->p); 568 for (i = 0; i < MAGIC_SETS; i++) { 569 char *b = RCAST(char *, map->magic[i]); 570 if (p != NULL && b >= p && b <= p + map->len) 571 continue; 572 free(b); 573 } 574 free(p); 575 break; 576 #ifdef QUICK 577 case MAP_TYPE_MMAP: 578 if (map->p && map->p != MAP_FAILED) 579 (void)munmap(map->p, map->len); 580 break; 581 #endif 582 default: 583 abort(); 584 } 585 free(map); 586 } 587 588 private struct mlist * 589 mlist_alloc(void) 590 { 591 struct mlist *mlist; 592 if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) { 593 return NULL; 594 } 595 mlist->next = mlist->prev = mlist; 596 return mlist; 597 } 598 599 private void 600 mlist_free_all(struct magic_set *ms) 601 { 602 size_t i; 603 604 for (i = 0; i < MAGIC_SETS; i++) { 605 mlist_free(ms->mlist[i]); 606 ms->mlist[i] = NULL; 607 } 608 } 609 610 private void 611 mlist_free_one(struct mlist *ml) 612 { 613 if (ml->map) 614 apprentice_unmap(CAST(struct magic_map *, ml->map)); 615 free(ml); 616 } 617 618 private void 619 mlist_free(struct mlist *mlist) 620 { 621 struct mlist *ml, *next; 622 623 if (mlist == NULL) 624 return; 625 626 for (ml = mlist->next; ml != mlist;) { 627 next = ml->next; 628 mlist_free_one(ml); 629 ml = next; 630 } 631 mlist_free_one(mlist); 632 } 633 634 #ifndef COMPILE_ONLY 635 /* void **bufs: an array of compiled magic files */ 636 protected int 637 buffer_apprentice(struct magic_set *ms, struct magic **bufs, 638 size_t *sizes, size_t nbufs) 639 { 640 size_t i, j; 641 struct mlist *ml; 642 struct magic_map *map; 643 644 if (nbufs == 0) 645 return -1; 646 647 (void)file_reset(ms, 0); 648 649 init_file_tables(); 650 651 for (i = 0; i < MAGIC_SETS; i++) { 652 mlist_free(ms->mlist[i]); 653 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 654 file_oomem(ms, sizeof(*ms->mlist[i])); 655 goto fail; 656 } 657 } 658 659 for (i = 0; i < nbufs; i++) { 660 map = apprentice_buf(ms, bufs[i], sizes[i]); 661 if (map == NULL) 662 goto fail; 663 664 for (j = 0; j < MAGIC_SETS; j++) { 665 if (add_mlist(ms->mlist[j], map, j) == -1) { 666 file_oomem(ms, sizeof(*ml)); 667 goto fail; 668 } 669 } 670 } 671 672 return 0; 673 fail: 674 mlist_free_all(ms); 675 return -1; 676 } 677 #endif 678 679 /* const char *fn: list of magic files and directories */ 680 protected int 681 file_apprentice(struct magic_set *ms, const char *fn, int action) 682 { 683 char *p, *mfn; 684 int fileerr, errs = -1; 685 size_t i, j; 686 687 (void)file_reset(ms, 0); 688 689 if ((fn = magic_getpath(fn, action)) == NULL) 690 return -1; 691 692 init_file_tables(); 693 694 if ((mfn = strdup(fn)) == NULL) { 695 file_oomem(ms, strlen(fn)); 696 return -1; 697 } 698 699 for (i = 0; i < MAGIC_SETS; i++) { 700 mlist_free(ms->mlist[i]); 701 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 702 file_oomem(ms, sizeof(*ms->mlist[i])); 703 for (j = 0; j < i; j++) { 704 mlist_free(ms->mlist[j]); 705 ms->mlist[j] = NULL; 706 } 707 free(mfn); 708 return -1; 709 } 710 } 711 fn = mfn; 712 713 while (fn) { 714 p = strchr(fn, PATHSEP); 715 if (p) 716 *p++ = '\0'; 717 if (*fn == '\0') 718 break; 719 fileerr = apprentice_1(ms, fn, action); 720 errs = MAX(errs, fileerr); 721 fn = p; 722 } 723 724 free(mfn); 725 726 if (errs == -1) { 727 for (i = 0; i < MAGIC_SETS; i++) { 728 mlist_free(ms->mlist[i]); 729 ms->mlist[i] = NULL; 730 } 731 file_error(ms, 0, "could not find any valid magic files!"); 732 return -1; 733 } 734 735 #if 0 736 /* 737 * Always leave the database loaded 738 */ 739 if (action == FILE_LOAD) 740 return 0; 741 742 for (i = 0; i < MAGIC_SETS; i++) { 743 mlist_free(ms->mlist[i]); 744 ms->mlist[i] = NULL; 745 } 746 #endif 747 748 switch (action) { 749 case FILE_LOAD: 750 case FILE_COMPILE: 751 case FILE_CHECK: 752 case FILE_LIST: 753 return 0; 754 default: 755 file_error(ms, 0, "Invalid action %d", action); 756 return -1; 757 } 758 } 759 760 /* 761 * Compute the real length of a magic expression, for the purposes 762 * of determining how "strong" a magic expression is (approximating 763 * how specific its matches are): 764 * - magic characters count 0 unless escaped. 765 * - [] expressions count 1 766 * - {} expressions count 0 767 * - regular characters or escaped magic characters count 1 768 * - 0 length expressions count as one 769 */ 770 private size_t 771 nonmagic(const char *str) 772 { 773 const char *p; 774 size_t rv = 0; 775 776 for (p = str; *p; p++) 777 switch (*p) { 778 case '\\': /* Escaped anything counts 1 */ 779 if (!*++p) 780 p--; 781 rv++; 782 continue; 783 case '?': /* Magic characters count 0 */ 784 case '*': 785 case '.': 786 case '+': 787 case '^': 788 case '$': 789 continue; 790 case '[': /* Bracketed expressions count 1 the ']' */ 791 while (*p && *p != ']') 792 p++; 793 p--; 794 continue; 795 case '{': /* Braced expressions count 0 */ 796 while (*p && *p != '}') 797 p++; 798 if (!*p) 799 p--; 800 continue; 801 default: /* Anything else counts 1 */ 802 rv++; 803 continue; 804 } 805 806 return rv == 0 ? 1 : rv; /* Return at least 1 */ 807 } 808 809 810 private size_t 811 typesize(int type) 812 { 813 switch (type) { 814 case FILE_BYTE: 815 return 1; 816 817 case FILE_SHORT: 818 case FILE_LESHORT: 819 case FILE_BESHORT: 820 return 2; 821 822 case FILE_LONG: 823 case FILE_LELONG: 824 case FILE_BELONG: 825 case FILE_MELONG: 826 return 4; 827 828 case FILE_DATE: 829 case FILE_LEDATE: 830 case FILE_BEDATE: 831 case FILE_MEDATE: 832 case FILE_LDATE: 833 case FILE_LELDATE: 834 case FILE_BELDATE: 835 case FILE_MELDATE: 836 case FILE_FLOAT: 837 case FILE_BEFLOAT: 838 case FILE_LEFLOAT: 839 return 4; 840 841 case FILE_QUAD: 842 case FILE_BEQUAD: 843 case FILE_LEQUAD: 844 case FILE_QDATE: 845 case FILE_LEQDATE: 846 case FILE_BEQDATE: 847 case FILE_QLDATE: 848 case FILE_LEQLDATE: 849 case FILE_BEQLDATE: 850 case FILE_QWDATE: 851 case FILE_LEQWDATE: 852 case FILE_BEQWDATE: 853 case FILE_DOUBLE: 854 case FILE_BEDOUBLE: 855 case FILE_LEDOUBLE: 856 case FILE_OFFSET: 857 case FILE_BEVARINT: 858 case FILE_LEVARINT: 859 return 8; 860 861 case FILE_GUID: 862 return 16; 863 864 default: 865 return FILE_BADSIZE; 866 } 867 } 868 869 /* 870 * Get weight of this magic entry, for sorting purposes. 871 */ 872 private size_t 873 apprentice_magic_strength(const struct magic *m) 874 { 875 #define MULT 10U 876 size_t ts, v; 877 ssize_t val = 2 * MULT; /* baseline strength */ 878 879 switch (m->type) { 880 case FILE_DEFAULT: /* make sure this sorts last */ 881 if (m->factor_op != FILE_FACTOR_OP_NONE) 882 abort(); 883 return 0; 884 885 case FILE_BYTE: 886 case FILE_SHORT: 887 case FILE_LESHORT: 888 case FILE_BESHORT: 889 case FILE_LONG: 890 case FILE_LELONG: 891 case FILE_BELONG: 892 case FILE_MELONG: 893 case FILE_DATE: 894 case FILE_LEDATE: 895 case FILE_BEDATE: 896 case FILE_MEDATE: 897 case FILE_LDATE: 898 case FILE_LELDATE: 899 case FILE_BELDATE: 900 case FILE_MELDATE: 901 case FILE_FLOAT: 902 case FILE_BEFLOAT: 903 case FILE_LEFLOAT: 904 case FILE_QUAD: 905 case FILE_BEQUAD: 906 case FILE_LEQUAD: 907 case FILE_QDATE: 908 case FILE_LEQDATE: 909 case FILE_BEQDATE: 910 case FILE_QLDATE: 911 case FILE_LEQLDATE: 912 case FILE_BEQLDATE: 913 case FILE_QWDATE: 914 case FILE_LEQWDATE: 915 case FILE_BEQWDATE: 916 case FILE_DOUBLE: 917 case FILE_BEDOUBLE: 918 case FILE_LEDOUBLE: 919 case FILE_BEVARINT: 920 case FILE_LEVARINT: 921 case FILE_GUID: 922 case FILE_OFFSET: 923 ts = typesize(m->type); 924 if (ts == FILE_BADSIZE) 925 abort(); 926 val += ts * MULT; 927 break; 928 929 case FILE_PSTRING: 930 case FILE_STRING: 931 val += m->vallen * MULT; 932 break; 933 934 case FILE_BESTRING16: 935 case FILE_LESTRING16: 936 val += m->vallen * MULT / 2; 937 break; 938 939 case FILE_SEARCH: 940 if (m->vallen == 0) 941 break; 942 val += m->vallen * MAX(MULT / m->vallen, 1); 943 break; 944 945 case FILE_REGEX: 946 v = nonmagic(m->value.s); 947 val += v * MAX(MULT / v, 1); 948 break; 949 950 case FILE_INDIRECT: 951 case FILE_NAME: 952 case FILE_USE: 953 break; 954 955 case FILE_DER: 956 val += MULT; 957 break; 958 959 default: 960 (void)fprintf(stderr, "Bad type %d\n", m->type); 961 abort(); 962 } 963 964 switch (m->reln) { 965 case 'x': /* matches anything penalize */ 966 case '!': /* matches almost anything penalize */ 967 val = 0; 968 break; 969 970 case '=': /* Exact match, prefer */ 971 val += MULT; 972 break; 973 974 case '>': 975 case '<': /* comparison match reduce strength */ 976 val -= 2 * MULT; 977 break; 978 979 case '^': 980 case '&': /* masking bits, we could count them too */ 981 val -= MULT; 982 break; 983 984 default: 985 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 986 abort(); 987 } 988 989 switch (m->factor_op) { 990 case FILE_FACTOR_OP_NONE: 991 break; 992 case FILE_FACTOR_OP_PLUS: 993 val += m->factor; 994 break; 995 case FILE_FACTOR_OP_MINUS: 996 val -= m->factor; 997 break; 998 case FILE_FACTOR_OP_TIMES: 999 val *= m->factor; 1000 break; 1001 case FILE_FACTOR_OP_DIV: 1002 val /= m->factor; 1003 break; 1004 default: 1005 abort(); 1006 } 1007 1008 if (val <= 0) /* ensure we only return 0 for FILE_DEFAULT */ 1009 val = 1; 1010 1011 /* 1012 * Magic entries with no description get a bonus because they depend 1013 * on subsequent magic entries to print something. 1014 */ 1015 if (m->desc[0] == '\0') 1016 val++; 1017 return val; 1018 } 1019 1020 /* 1021 * Sort callback for sorting entries by "strength" (basically length) 1022 */ 1023 private int 1024 apprentice_sort(const void *a, const void *b) 1025 { 1026 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 1027 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 1028 size_t sa = apprentice_magic_strength(ma->mp); 1029 size_t sb = apprentice_magic_strength(mb->mp); 1030 if (sa == sb) 1031 return 0; 1032 else if (sa > sb) 1033 return -1; 1034 else 1035 return 1; 1036 } 1037 1038 /* 1039 * Shows sorted patterns list in the order which is used for the matching 1040 */ 1041 private void 1042 apprentice_list(struct mlist *mlist, int mode) 1043 { 1044 uint32_t magindex = 0; 1045 struct mlist *ml; 1046 for (ml = mlist->next; ml != mlist; ml = ml->next) { 1047 for (magindex = 0; magindex < ml->nmagic; magindex++) { 1048 struct magic *m = &ml->magic[magindex]; 1049 if ((m->flag & mode) != mode) { 1050 /* Skip sub-tests */ 1051 while (magindex + 1 < ml->nmagic && 1052 ml->magic[magindex + 1].cont_level != 0) 1053 ++magindex; 1054 continue; /* Skip to next top-level test*/ 1055 } 1056 1057 /* 1058 * Try to iterate over the tree until we find item with 1059 * description/mimetype. 1060 */ 1061 while (magindex + 1 < ml->nmagic && 1062 ml->magic[magindex + 1].cont_level != 0 && 1063 *ml->magic[magindex].desc == '\0' && 1064 *ml->magic[magindex].mimetype == '\0') 1065 magindex++; 1066 1067 printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n", 1068 apprentice_magic_strength(m), 1069 ml->magic[magindex].lineno, 1070 ml->magic[magindex].desc, 1071 ml->magic[magindex].mimetype); 1072 } 1073 } 1074 } 1075 1076 private void 1077 set_test_type(struct magic *mstart, struct magic *m) 1078 { 1079 switch (m->type) { 1080 case FILE_BYTE: 1081 case FILE_SHORT: 1082 case FILE_LONG: 1083 case FILE_DATE: 1084 case FILE_BESHORT: 1085 case FILE_BELONG: 1086 case FILE_BEDATE: 1087 case FILE_LESHORT: 1088 case FILE_LELONG: 1089 case FILE_LEDATE: 1090 case FILE_LDATE: 1091 case FILE_BELDATE: 1092 case FILE_LELDATE: 1093 case FILE_MEDATE: 1094 case FILE_MELDATE: 1095 case FILE_MELONG: 1096 case FILE_QUAD: 1097 case FILE_LEQUAD: 1098 case FILE_BEQUAD: 1099 case FILE_QDATE: 1100 case FILE_LEQDATE: 1101 case FILE_BEQDATE: 1102 case FILE_QLDATE: 1103 case FILE_LEQLDATE: 1104 case FILE_BEQLDATE: 1105 case FILE_QWDATE: 1106 case FILE_LEQWDATE: 1107 case FILE_BEQWDATE: 1108 case FILE_FLOAT: 1109 case FILE_BEFLOAT: 1110 case FILE_LEFLOAT: 1111 case FILE_DOUBLE: 1112 case FILE_BEDOUBLE: 1113 case FILE_LEDOUBLE: 1114 case FILE_BEVARINT: 1115 case FILE_LEVARINT: 1116 case FILE_DER: 1117 case FILE_GUID: 1118 case FILE_OFFSET: 1119 mstart->flag |= BINTEST; 1120 break; 1121 case FILE_STRING: 1122 case FILE_PSTRING: 1123 case FILE_BESTRING16: 1124 case FILE_LESTRING16: 1125 /* Allow text overrides */ 1126 if (mstart->str_flags & STRING_TEXTTEST) 1127 mstart->flag |= TEXTTEST; 1128 else 1129 mstart->flag |= BINTEST; 1130 break; 1131 case FILE_REGEX: 1132 case FILE_SEARCH: 1133 /* Check for override */ 1134 if (mstart->str_flags & STRING_BINTEST) 1135 mstart->flag |= BINTEST; 1136 if (mstart->str_flags & STRING_TEXTTEST) 1137 mstart->flag |= TEXTTEST; 1138 1139 if (mstart->flag & (TEXTTEST|BINTEST)) 1140 break; 1141 1142 /* binary test if pattern is not text */ 1143 if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL, 1144 NULL) <= 0) 1145 mstart->flag |= BINTEST; 1146 else 1147 mstart->flag |= TEXTTEST; 1148 break; 1149 case FILE_DEFAULT: 1150 /* can't deduce anything; we shouldn't see this at the 1151 top level anyway */ 1152 break; 1153 case FILE_INVALID: 1154 default: 1155 /* invalid search type, but no need to complain here */ 1156 break; 1157 } 1158 } 1159 1160 private int 1161 addentry(struct magic_set *ms, struct magic_entry *me, 1162 struct magic_entry_set *mset) 1163 { 1164 size_t i = me->mp->type == FILE_NAME ? 1 : 0; 1165 if (mset[i].count == mset[i].max) { 1166 struct magic_entry *mp; 1167 1168 mset[i].max += ALLOC_INCR; 1169 if ((mp = CAST(struct magic_entry *, 1170 realloc(mset[i].me, sizeof(*mp) * mset[i].max))) == 1171 NULL) { 1172 file_oomem(ms, sizeof(*mp) * mset[i].max); 1173 return -1; 1174 } 1175 (void)memset(&mp[mset[i].count], 0, sizeof(*mp) * 1176 ALLOC_INCR); 1177 mset[i].me = mp; 1178 } 1179 mset[i].me[mset[i].count++] = *me; 1180 memset(me, 0, sizeof(*me)); 1181 return 0; 1182 } 1183 1184 /* 1185 * Load and parse one file. 1186 */ 1187 private void 1188 load_1(struct magic_set *ms, int action, const char *fn, int *errs, 1189 struct magic_entry_set *mset) 1190 { 1191 size_t lineno = 0, llen = 0; 1192 char *line = NULL; 1193 ssize_t len; 1194 struct magic_entry me; 1195 1196 FILE *f = fopen(ms->file = fn, "r"); 1197 if (f == NULL) { 1198 if (errno != ENOENT) 1199 file_error(ms, errno, "cannot read magic file `%s'", 1200 fn); 1201 (*errs)++; 1202 return; 1203 } 1204 1205 memset(&me, 0, sizeof(me)); 1206 /* read and parse this file */ 1207 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 1208 ms->line++) { 1209 if (len == 0) /* null line, garbage, etc */ 1210 continue; 1211 if (line[len - 1] == '\n') { 1212 lineno++; 1213 line[len - 1] = '\0'; /* delete newline */ 1214 } 1215 switch (line[0]) { 1216 case '\0': /* empty, do not parse */ 1217 case '#': /* comment, do not parse */ 1218 continue; 1219 case '!': 1220 if (line[1] == ':') { 1221 size_t i; 1222 1223 for (i = 0; bang[i].name != NULL; i++) { 1224 if (CAST(size_t, len - 2) > bang[i].len && 1225 memcmp(bang[i].name, line + 2, 1226 bang[i].len) == 0) 1227 break; 1228 } 1229 if (bang[i].name == NULL) { 1230 file_error(ms, 0, 1231 "Unknown !: entry `%s'", line); 1232 (*errs)++; 1233 continue; 1234 } 1235 if (me.mp == NULL) { 1236 file_error(ms, 0, 1237 "No current entry for :!%s type", 1238 bang[i].name); 1239 (*errs)++; 1240 continue; 1241 } 1242 if ((*bang[i].fun)(ms, &me, 1243 line + bang[i].len + 2, 1244 len - bang[i].len - 2) != 0) { 1245 (*errs)++; 1246 continue; 1247 } 1248 continue; 1249 } 1250 /*FALLTHROUGH*/ 1251 default: 1252 again: 1253 switch (parse(ms, &me, line, lineno, action)) { 1254 case 0: 1255 continue; 1256 case 1: 1257 (void)addentry(ms, &me, mset); 1258 goto again; 1259 default: 1260 (*errs)++; 1261 break; 1262 } 1263 } 1264 } 1265 if (me.mp) 1266 (void)addentry(ms, &me, mset); 1267 free(line); 1268 (void)fclose(f); 1269 } 1270 1271 /* 1272 * parse a file or directory of files 1273 * const char *fn: name of magic file or directory 1274 */ 1275 private int 1276 cmpstrp(const void *p1, const void *p2) 1277 { 1278 return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2)); 1279 } 1280 1281 1282 private uint32_t 1283 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1284 uint32_t starttest) 1285 { 1286 static const char text[] = "text"; 1287 static const char binary[] = "binary"; 1288 static const size_t len = sizeof(text); 1289 1290 uint32_t i = starttest; 1291 1292 do { 1293 set_test_type(me[starttest].mp, me[i].mp); 1294 if ((ms->flags & MAGIC_DEBUG) == 0) 1295 continue; 1296 (void)fprintf(stderr, "%s%s%s: %s\n", 1297 me[i].mp->mimetype, 1298 me[i].mp->mimetype[0] == '\0' ? "" : "; ", 1299 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 1300 me[i].mp->flag & BINTEST ? binary : text); 1301 if (me[i].mp->flag & BINTEST) { 1302 char *p = strstr(me[i].mp->desc, text); 1303 if (p && (p == me[i].mp->desc || 1304 isspace(CAST(unsigned char, p[-1]))) && 1305 (p + len - me[i].mp->desc == MAXstring 1306 || (p[len] == '\0' || 1307 isspace(CAST(unsigned char, p[len]))))) 1308 (void)fprintf(stderr, "*** Possible " 1309 "binary test for text type\n"); 1310 } 1311 } while (++i < nme && me[i].mp->cont_level != 0); 1312 return i; 1313 } 1314 1315 private void 1316 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 1317 { 1318 uint32_t i; 1319 for (i = 0; i < nme; i++) { 1320 if (me[i].mp->cont_level == 0 && 1321 me[i].mp->type == FILE_DEFAULT) { 1322 while (++i < nme) 1323 if (me[i].mp->cont_level == 0) 1324 break; 1325 if (i != nme) { 1326 /* XXX - Ugh! */ 1327 ms->line = me[i].mp->lineno; 1328 file_magwarn(ms, 1329 "level 0 \"default\" did not sort last"); 1330 } 1331 return; 1332 } 1333 } 1334 } 1335 1336 private int 1337 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1338 struct magic **ma, uint32_t *nma) 1339 { 1340 uint32_t i, mentrycount = 0; 1341 size_t slen; 1342 1343 for (i = 0; i < nme; i++) 1344 mentrycount += me[i].cont_count; 1345 1346 if (mentrycount == 0) { 1347 *ma = NULL; 1348 *nma = 0; 1349 return 0; 1350 } 1351 1352 slen = sizeof(**ma) * mentrycount; 1353 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 1354 file_oomem(ms, slen); 1355 return -1; 1356 } 1357 1358 mentrycount = 0; 1359 for (i = 0; i < nme; i++) { 1360 (void)memcpy(*ma + mentrycount, me[i].mp, 1361 me[i].cont_count * sizeof(**ma)); 1362 mentrycount += me[i].cont_count; 1363 } 1364 *nma = mentrycount; 1365 return 0; 1366 } 1367 1368 private void 1369 magic_entry_free(struct magic_entry *me, uint32_t nme) 1370 { 1371 uint32_t i; 1372 if (me == NULL) 1373 return; 1374 for (i = 0; i < nme; i++) 1375 free(me[i].mp); 1376 free(me); 1377 } 1378 1379 private struct magic_map * 1380 apprentice_load(struct magic_set *ms, const char *fn, int action) 1381 { 1382 int errs = 0; 1383 uint32_t i, j; 1384 size_t files = 0, maxfiles = 0; 1385 char **filearr = NULL, *mfn; 1386 struct stat st; 1387 struct magic_map *map; 1388 struct magic_entry_set mset[MAGIC_SETS]; 1389 DIR *dir; 1390 struct dirent *d; 1391 1392 memset(mset, 0, sizeof(mset)); 1393 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1394 1395 1396 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) 1397 { 1398 file_oomem(ms, sizeof(*map)); 1399 return NULL; 1400 } 1401 map->type = MAP_TYPE_MALLOC; 1402 1403 /* print silly verbose header for USG compat. */ 1404 if (action == FILE_CHECK) 1405 (void)fprintf(stderr, "%s\n", usg_hdr); 1406 1407 /* load directory or file */ 1408 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1409 dir = opendir(fn); 1410 if (!dir) { 1411 errs++; 1412 goto out; 1413 } 1414 while ((d = readdir(dir)) != NULL) { 1415 if (d->d_name[0] == '.') 1416 continue; 1417 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1418 file_oomem(ms, 1419 strlen(fn) + strlen(d->d_name) + 2); 1420 errs++; 1421 closedir(dir); 1422 goto out; 1423 } 1424 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1425 free(mfn); 1426 continue; 1427 } 1428 if (files >= maxfiles) { 1429 size_t mlen; 1430 char **nfilearr; 1431 maxfiles = (maxfiles + 1) * 2; 1432 mlen = maxfiles * sizeof(*filearr); 1433 if ((nfilearr = CAST(char **, 1434 realloc(filearr, mlen))) == NULL) { 1435 file_oomem(ms, mlen); 1436 free(mfn); 1437 closedir(dir); 1438 errs++; 1439 goto out; 1440 } 1441 filearr = nfilearr; 1442 } 1443 filearr[files++] = mfn; 1444 } 1445 closedir(dir); 1446 if (filearr) { 1447 qsort(filearr, files, sizeof(*filearr), cmpstrp); 1448 for (i = 0; i < files; i++) { 1449 load_1(ms, action, filearr[i], &errs, mset); 1450 free(filearr[i]); 1451 } 1452 free(filearr); 1453 filearr = NULL; 1454 } 1455 } else 1456 load_1(ms, action, fn, &errs, mset); 1457 if (errs) 1458 goto out; 1459 1460 for (j = 0; j < MAGIC_SETS; j++) { 1461 /* Set types of tests */ 1462 for (i = 0; i < mset[j].count; ) { 1463 if (mset[j].me[i].mp->cont_level != 0) { 1464 i++; 1465 continue; 1466 } 1467 i = set_text_binary(ms, mset[j].me, mset[j].count, i); 1468 } 1469 if (mset[j].me) 1470 qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me), 1471 apprentice_sort); 1472 1473 /* 1474 * Make sure that any level 0 "default" line is last 1475 * (if one exists). 1476 */ 1477 set_last_default(ms, mset[j].me, mset[j].count); 1478 1479 /* coalesce per file arrays into a single one, if needed */ 1480 if (mset[j].count == 0) 1481 continue; 1482 1483 if (coalesce_entries(ms, mset[j].me, mset[j].count, 1484 &map->magic[j], &map->nmagic[j]) == -1) { 1485 errs++; 1486 goto out; 1487 } 1488 } 1489 1490 out: 1491 free(filearr); 1492 for (j = 0; j < MAGIC_SETS; j++) 1493 magic_entry_free(mset[j].me, mset[j].count); 1494 1495 if (errs) { 1496 apprentice_unmap(map); 1497 return NULL; 1498 } 1499 return map; 1500 } 1501 1502 /* 1503 * extend the sign bit if the comparison is to be signed 1504 */ 1505 protected uint64_t 1506 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1507 { 1508 if (!(m->flag & UNSIGNED)) { 1509 switch(m->type) { 1510 /* 1511 * Do not remove the casts below. They are 1512 * vital. When later compared with the data, 1513 * the sign extension must have happened. 1514 */ 1515 case FILE_BYTE: 1516 v = CAST(signed char, v); 1517 break; 1518 case FILE_SHORT: 1519 case FILE_BESHORT: 1520 case FILE_LESHORT: 1521 v = CAST(short, v); 1522 break; 1523 case FILE_DATE: 1524 case FILE_BEDATE: 1525 case FILE_LEDATE: 1526 case FILE_MEDATE: 1527 case FILE_LDATE: 1528 case FILE_BELDATE: 1529 case FILE_LELDATE: 1530 case FILE_MELDATE: 1531 case FILE_LONG: 1532 case FILE_BELONG: 1533 case FILE_LELONG: 1534 case FILE_MELONG: 1535 case FILE_FLOAT: 1536 case FILE_BEFLOAT: 1537 case FILE_LEFLOAT: 1538 v = CAST(int32_t, v); 1539 break; 1540 case FILE_QUAD: 1541 case FILE_BEQUAD: 1542 case FILE_LEQUAD: 1543 case FILE_QDATE: 1544 case FILE_QLDATE: 1545 case FILE_QWDATE: 1546 case FILE_BEQDATE: 1547 case FILE_BEQLDATE: 1548 case FILE_BEQWDATE: 1549 case FILE_LEQDATE: 1550 case FILE_LEQLDATE: 1551 case FILE_LEQWDATE: 1552 case FILE_DOUBLE: 1553 case FILE_BEDOUBLE: 1554 case FILE_LEDOUBLE: 1555 case FILE_OFFSET: 1556 case FILE_BEVARINT: 1557 case FILE_LEVARINT: 1558 v = CAST(int64_t, v); 1559 break; 1560 case FILE_STRING: 1561 case FILE_PSTRING: 1562 case FILE_BESTRING16: 1563 case FILE_LESTRING16: 1564 case FILE_REGEX: 1565 case FILE_SEARCH: 1566 case FILE_DEFAULT: 1567 case FILE_INDIRECT: 1568 case FILE_NAME: 1569 case FILE_USE: 1570 case FILE_CLEAR: 1571 case FILE_DER: 1572 case FILE_GUID: 1573 break; 1574 default: 1575 if (ms->flags & MAGIC_CHECK) 1576 file_magwarn(ms, "cannot happen: m->type=%d\n", 1577 m->type); 1578 return FILE_BADSIZE; 1579 } 1580 } 1581 return v; 1582 } 1583 1584 private int 1585 string_modifier_check(struct magic_set *ms, struct magic *m) 1586 { 1587 if ((ms->flags & MAGIC_CHECK) == 0) 1588 return 0; 1589 1590 if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) && 1591 (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) { 1592 file_magwarn(ms, 1593 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1594 return -1; 1595 } 1596 switch (m->type) { 1597 case FILE_BESTRING16: 1598 case FILE_LESTRING16: 1599 if (m->str_flags != 0) { 1600 file_magwarn(ms, 1601 "no modifiers allowed for 16-bit strings\n"); 1602 return -1; 1603 } 1604 break; 1605 case FILE_STRING: 1606 case FILE_PSTRING: 1607 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1608 file_magwarn(ms, 1609 "'/%c' only allowed on regex and search\n", 1610 CHAR_REGEX_OFFSET_START); 1611 return -1; 1612 } 1613 break; 1614 case FILE_SEARCH: 1615 if (m->str_range == 0) { 1616 file_magwarn(ms, 1617 "missing range; defaulting to %d\n", 1618 STRING_DEFAULT_RANGE); 1619 m->str_range = STRING_DEFAULT_RANGE; 1620 return -1; 1621 } 1622 break; 1623 case FILE_REGEX: 1624 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1625 file_magwarn(ms, "'/%c' not allowed on regex\n", 1626 CHAR_COMPACT_WHITESPACE); 1627 return -1; 1628 } 1629 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1630 file_magwarn(ms, "'/%c' not allowed on regex\n", 1631 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1632 return -1; 1633 } 1634 break; 1635 default: 1636 file_magwarn(ms, "coding error: m->type=%d\n", 1637 m->type); 1638 return -1; 1639 } 1640 return 0; 1641 } 1642 1643 private int 1644 get_op(char c) 1645 { 1646 switch (c) { 1647 case '&': 1648 return FILE_OPAND; 1649 case '|': 1650 return FILE_OPOR; 1651 case '^': 1652 return FILE_OPXOR; 1653 case '+': 1654 return FILE_OPADD; 1655 case '-': 1656 return FILE_OPMINUS; 1657 case '*': 1658 return FILE_OPMULTIPLY; 1659 case '/': 1660 return FILE_OPDIVIDE; 1661 case '%': 1662 return FILE_OPMODULO; 1663 default: 1664 return -1; 1665 } 1666 } 1667 1668 #ifdef ENABLE_CONDITIONALS 1669 private int 1670 get_cond(const char *l, const char **t) 1671 { 1672 static const struct cond_tbl_s { 1673 char name[8]; 1674 size_t len; 1675 int cond; 1676 } cond_tbl[] = { 1677 { "if", 2, COND_IF }, 1678 { "elif", 4, COND_ELIF }, 1679 { "else", 4, COND_ELSE }, 1680 { "", 0, COND_NONE }, 1681 }; 1682 const struct cond_tbl_s *p; 1683 1684 for (p = cond_tbl; p->len; p++) { 1685 if (strncmp(l, p->name, p->len) == 0 && 1686 isspace(CAST(unsigned char, l[p->len]))) { 1687 if (t) 1688 *t = l + p->len; 1689 break; 1690 } 1691 } 1692 return p->cond; 1693 } 1694 1695 private int 1696 check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1697 { 1698 int last_cond; 1699 last_cond = ms->c.li[cont_level].last_cond; 1700 1701 switch (cond) { 1702 case COND_IF: 1703 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1704 if (ms->flags & MAGIC_CHECK) 1705 file_magwarn(ms, "syntax error: `if'"); 1706 return -1; 1707 } 1708 last_cond = COND_IF; 1709 break; 1710 1711 case COND_ELIF: 1712 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1713 if (ms->flags & MAGIC_CHECK) 1714 file_magwarn(ms, "syntax error: `elif'"); 1715 return -1; 1716 } 1717 last_cond = COND_ELIF; 1718 break; 1719 1720 case COND_ELSE: 1721 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1722 if (ms->flags & MAGIC_CHECK) 1723 file_magwarn(ms, "syntax error: `else'"); 1724 return -1; 1725 } 1726 last_cond = COND_NONE; 1727 break; 1728 1729 case COND_NONE: 1730 last_cond = COND_NONE; 1731 break; 1732 } 1733 1734 ms->c.li[cont_level].last_cond = last_cond; 1735 return 0; 1736 } 1737 #endif /* ENABLE_CONDITIONALS */ 1738 1739 private int 1740 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp) 1741 { 1742 const char *l = *lp; 1743 1744 while (!isspace(CAST(unsigned char, *++l))) 1745 switch (*l) { 1746 case CHAR_INDIRECT_RELATIVE: 1747 m->str_flags |= INDIRECT_RELATIVE; 1748 break; 1749 default: 1750 if (ms->flags & MAGIC_CHECK) 1751 file_magwarn(ms, "indirect modifier `%c' " 1752 "invalid", *l); 1753 *lp = l; 1754 return -1; 1755 } 1756 *lp = l; 1757 return 0; 1758 } 1759 1760 private void 1761 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp, 1762 int op) 1763 { 1764 const char *l = *lp; 1765 char *t; 1766 uint64_t val; 1767 1768 ++l; 1769 m->mask_op |= op; 1770 val = CAST(uint64_t, strtoull(l, &t, 0)); 1771 l = t; 1772 m->num_mask = file_signextend(ms, m, val); 1773 eatsize(&l); 1774 *lp = l; 1775 } 1776 1777 private int 1778 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp) 1779 { 1780 const char *l = *lp; 1781 char *t; 1782 int have_range = 0; 1783 1784 while (!isspace(CAST(unsigned char, *++l))) { 1785 switch (*l) { 1786 case '0': case '1': case '2': 1787 case '3': case '4': case '5': 1788 case '6': case '7': case '8': 1789 case '9': 1790 if (have_range && (ms->flags & MAGIC_CHECK)) 1791 file_magwarn(ms, "multiple ranges"); 1792 have_range = 1; 1793 m->str_range = CAST(uint32_t, strtoul(l, &t, 0)); 1794 if (m->str_range == 0) 1795 file_magwarn(ms, "zero range"); 1796 l = t - 1; 1797 break; 1798 case CHAR_COMPACT_WHITESPACE: 1799 m->str_flags |= STRING_COMPACT_WHITESPACE; 1800 break; 1801 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1802 m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE; 1803 break; 1804 case CHAR_IGNORE_LOWERCASE: 1805 m->str_flags |= STRING_IGNORE_LOWERCASE; 1806 break; 1807 case CHAR_IGNORE_UPPERCASE: 1808 m->str_flags |= STRING_IGNORE_UPPERCASE; 1809 break; 1810 case CHAR_REGEX_OFFSET_START: 1811 m->str_flags |= REGEX_OFFSET_START; 1812 break; 1813 case CHAR_BINTEST: 1814 m->str_flags |= STRING_BINTEST; 1815 break; 1816 case CHAR_TEXTTEST: 1817 m->str_flags |= STRING_TEXTTEST; 1818 break; 1819 case CHAR_TRIM: 1820 m->str_flags |= STRING_TRIM; 1821 break; 1822 case CHAR_FULL_WORD: 1823 m->str_flags |= STRING_FULL_WORD; 1824 break; 1825 case CHAR_PSTRING_1_LE: 1826 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a) 1827 if (m->type != FILE_PSTRING) 1828 goto bad; 1829 SET_LENGTH(PSTRING_1_LE); 1830 break; 1831 case CHAR_PSTRING_2_BE: 1832 if (m->type != FILE_PSTRING) 1833 goto bad; 1834 SET_LENGTH(PSTRING_2_BE); 1835 break; 1836 case CHAR_PSTRING_2_LE: 1837 if (m->type != FILE_PSTRING) 1838 goto bad; 1839 SET_LENGTH(PSTRING_2_LE); 1840 break; 1841 case CHAR_PSTRING_4_BE: 1842 if (m->type != FILE_PSTRING) 1843 goto bad; 1844 SET_LENGTH(PSTRING_4_BE); 1845 break; 1846 case CHAR_PSTRING_4_LE: 1847 switch (m->type) { 1848 case FILE_PSTRING: 1849 case FILE_REGEX: 1850 break; 1851 default: 1852 goto bad; 1853 } 1854 SET_LENGTH(PSTRING_4_LE); 1855 break; 1856 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1857 if (m->type != FILE_PSTRING) 1858 goto bad; 1859 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1860 break; 1861 default: 1862 bad: 1863 if (ms->flags & MAGIC_CHECK) 1864 file_magwarn(ms, "string modifier `%c' " 1865 "invalid", *l); 1866 goto out; 1867 } 1868 /* allow multiple '/' for readability */ 1869 if (l[1] == '/' && !isspace(CAST(unsigned char, l[2]))) 1870 l++; 1871 } 1872 if (string_modifier_check(ms, m) == -1) 1873 goto out; 1874 *lp = l; 1875 return 0; 1876 out: 1877 *lp = l; 1878 return -1; 1879 } 1880 1881 /* 1882 * parse one line from magic file, put into magic[index++] if valid 1883 */ 1884 private int 1885 parse(struct magic_set *ms, struct magic_entry *me, const char *line, 1886 size_t lineno, int action) 1887 { 1888 #ifdef ENABLE_CONDITIONALS 1889 static uint32_t last_cont_level = 0; 1890 #endif 1891 size_t i; 1892 struct magic *m; 1893 const char *l = line; 1894 char *t; 1895 int op; 1896 uint32_t cont_level; 1897 int32_t diff; 1898 1899 cont_level = 0; 1900 1901 /* 1902 * Parse the offset. 1903 */ 1904 while (*l == '>') { 1905 ++l; /* step over */ 1906 cont_level++; 1907 } 1908 #ifdef ENABLE_CONDITIONALS 1909 if (cont_level == 0 || cont_level > last_cont_level) 1910 if (file_check_mem(ms, cont_level) == -1) 1911 return -1; 1912 last_cont_level = cont_level; 1913 #endif 1914 if (cont_level != 0) { 1915 if (me->mp == NULL) { 1916 file_magerror(ms, "No current entry for continuation"); 1917 return -1; 1918 } 1919 if (me->cont_count == 0) { 1920 file_magerror(ms, "Continuations present with 0 count"); 1921 return -1; 1922 } 1923 m = &me->mp[me->cont_count - 1]; 1924 diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level); 1925 if (diff > 1) 1926 file_magwarn(ms, "New continuation level %u is more " 1927 "than one larger than current level %u", cont_level, 1928 m->cont_level); 1929 if (me->cont_count == me->max_count) { 1930 struct magic *nm; 1931 size_t cnt = me->max_count + ALLOC_CHUNK; 1932 if ((nm = CAST(struct magic *, realloc(me->mp, 1933 sizeof(*nm) * cnt))) == NULL) { 1934 file_oomem(ms, sizeof(*nm) * cnt); 1935 return -1; 1936 } 1937 me->mp = nm; 1938 me->max_count = CAST(uint32_t, cnt); 1939 } 1940 m = &me->mp[me->cont_count++]; 1941 (void)memset(m, 0, sizeof(*m)); 1942 m->cont_level = cont_level; 1943 } else { 1944 static const size_t len = sizeof(*m) * ALLOC_CHUNK; 1945 if (me->mp != NULL) 1946 return 1; 1947 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1948 file_oomem(ms, len); 1949 return -1; 1950 } 1951 me->mp = m; 1952 me->max_count = ALLOC_CHUNK; 1953 (void)memset(m, 0, sizeof(*m)); 1954 m->factor_op = FILE_FACTOR_OP_NONE; 1955 m->cont_level = 0; 1956 me->cont_count = 1; 1957 } 1958 m->lineno = CAST(uint32_t, lineno); 1959 1960 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1961 ++l; /* step over */ 1962 m->flag |= OFFADD; 1963 } 1964 if (*l == '(') { 1965 ++l; /* step over */ 1966 m->flag |= INDIR; 1967 if (m->flag & OFFADD) 1968 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1969 1970 if (*l == '&') { /* m->cont_level == 0 checked below */ 1971 ++l; /* step over */ 1972 m->flag |= OFFADD; 1973 } 1974 } 1975 /* Indirect offsets are not valid at level 0. */ 1976 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) { 1977 if (ms->flags & MAGIC_CHECK) 1978 file_magwarn(ms, "relative offset at level 0"); 1979 return -1; 1980 } 1981 1982 /* get offset, then skip over it */ 1983 if (*l == '-') { 1984 ++l; /* step over */ 1985 m->flag |= OFFNEGATIVE; 1986 } 1987 m->offset = CAST(int32_t, strtol(l, &t, 0)); 1988 if (l == t) { 1989 if (ms->flags & MAGIC_CHECK) 1990 file_magwarn(ms, "offset `%s' invalid", l); 1991 return -1; 1992 } 1993 1994 l = t; 1995 1996 if (m->flag & INDIR) { 1997 m->in_type = FILE_LONG; 1998 m->in_offset = 0; 1999 m->in_op = 0; 2000 /* 2001 * read [.,lbs][+-]nnnnn) 2002 */ 2003 if (*l == '.' || *l == ',') { 2004 if (*l == ',') 2005 m->in_op |= FILE_OPSIGNED; 2006 l++; 2007 switch (*l) { 2008 case 'l': 2009 m->in_type = FILE_LELONG; 2010 break; 2011 case 'L': 2012 m->in_type = FILE_BELONG; 2013 break; 2014 case 'm': 2015 m->in_type = FILE_MELONG; 2016 break; 2017 case 'h': 2018 case 's': 2019 m->in_type = FILE_LESHORT; 2020 break; 2021 case 'H': 2022 case 'S': 2023 m->in_type = FILE_BESHORT; 2024 break; 2025 case 'c': 2026 case 'b': 2027 case 'C': 2028 case 'B': 2029 m->in_type = FILE_BYTE; 2030 break; 2031 case 'e': 2032 case 'f': 2033 case 'g': 2034 m->in_type = FILE_LEDOUBLE; 2035 break; 2036 case 'E': 2037 case 'F': 2038 case 'G': 2039 m->in_type = FILE_BEDOUBLE; 2040 break; 2041 case 'i': 2042 m->in_type = FILE_LEID3; 2043 break; 2044 case 'I': 2045 m->in_type = FILE_BEID3; 2046 break; 2047 case 'q': 2048 m->in_type = FILE_LEQUAD; 2049 break; 2050 case 'Q': 2051 m->in_type = FILE_BEQUAD; 2052 break; 2053 default: 2054 if (ms->flags & MAGIC_CHECK) 2055 file_magwarn(ms, 2056 "indirect offset type `%c' invalid", 2057 *l); 2058 return -1; 2059 } 2060 l++; 2061 } 2062 2063 if (*l == '~') { 2064 m->in_op |= FILE_OPINVERSE; 2065 l++; 2066 } 2067 if ((op = get_op(*l)) != -1) { 2068 m->in_op |= op; 2069 l++; 2070 } 2071 if (*l == '(') { 2072 m->in_op |= FILE_OPINDIRECT; 2073 l++; 2074 } 2075 if (isdigit(CAST(unsigned char, *l)) || *l == '-') { 2076 m->in_offset = CAST(int32_t, strtol(l, &t, 0)); 2077 if (l == t) { 2078 if (ms->flags & MAGIC_CHECK) 2079 file_magwarn(ms, 2080 "in_offset `%s' invalid", l); 2081 return -1; 2082 } 2083 l = t; 2084 } 2085 if (*l++ != ')' || 2086 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) { 2087 if (ms->flags & MAGIC_CHECK) 2088 file_magwarn(ms, 2089 "missing ')' in indirect offset"); 2090 return -1; 2091 } 2092 } 2093 EATAB; 2094 2095 #ifdef ENABLE_CONDITIONALS 2096 m->cond = get_cond(l, &l); 2097 if (check_cond(ms, m->cond, cont_level) == -1) 2098 return -1; 2099 2100 EATAB; 2101 #endif 2102 2103 /* 2104 * Parse the type. 2105 */ 2106 if (*l == 'u') { 2107 /* 2108 * Try it as a keyword type prefixed by "u"; match what 2109 * follows the "u". If that fails, try it as an SUS 2110 * integer type. 2111 */ 2112 m->type = get_type(type_tbl, l + 1, &l); 2113 if (m->type == FILE_INVALID) { 2114 /* 2115 * Not a keyword type; parse it as an SUS type, 2116 * 'u' possibly followed by a number or C/S/L. 2117 */ 2118 m->type = get_standard_integer_type(l, &l); 2119 } 2120 /* It's unsigned. */ 2121 if (m->type != FILE_INVALID) 2122 m->flag |= UNSIGNED; 2123 } else { 2124 /* 2125 * Try it as a keyword type. If that fails, try it as 2126 * an SUS integer type if it begins with "d" or as an 2127 * SUS string type if it begins with "s". In any case, 2128 * it's not unsigned. 2129 */ 2130 m->type = get_type(type_tbl, l, &l); 2131 if (m->type == FILE_INVALID) { 2132 /* 2133 * Not a keyword type; parse it as an SUS type, 2134 * either 'd' possibly followed by a number or 2135 * C/S/L, or just 's'. 2136 */ 2137 if (*l == 'd') 2138 m->type = get_standard_integer_type(l, &l); 2139 else if (*l == 's' 2140 && !isalpha(CAST(unsigned char, l[1]))) { 2141 m->type = FILE_STRING; 2142 ++l; 2143 } 2144 } 2145 } 2146 2147 if (m->type == FILE_INVALID) { 2148 /* Not found - try it as a special keyword. */ 2149 m->type = get_type(special_tbl, l, &l); 2150 } 2151 2152 if (m->type == FILE_INVALID) { 2153 if (ms->flags & MAGIC_CHECK) 2154 file_magwarn(ms, "type `%s' invalid", l); 2155 return -1; 2156 } 2157 2158 if (m->type == FILE_NAME && cont_level != 0) { 2159 if (ms->flags & MAGIC_CHECK) 2160 file_magwarn(ms, "`name%s' entries can only be " 2161 "declared at top level", l); 2162 return -1; 2163 } 2164 2165 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 2166 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 2167 2168 m->mask_op = 0; 2169 if (*l == '~') { 2170 if (!IS_STRING(m->type)) 2171 m->mask_op |= FILE_OPINVERSE; 2172 else if (ms->flags & MAGIC_CHECK) 2173 file_magwarn(ms, "'~' invalid for string types"); 2174 ++l; 2175 } 2176 m->str_range = 0; 2177 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 2178 if ((op = get_op(*l)) != -1) { 2179 if (IS_STRING(m->type)) { 2180 int r; 2181 2182 if (op != FILE_OPDIVIDE) { 2183 if (ms->flags & MAGIC_CHECK) 2184 file_magwarn(ms, 2185 "invalid string/indirect op: " 2186 "`%c'", *t); 2187 return -1; 2188 } 2189 2190 if (m->type == FILE_INDIRECT) 2191 r = parse_indirect_modifier(ms, m, &l); 2192 else 2193 r = parse_string_modifier(ms, m, &l); 2194 if (r == -1) 2195 return -1; 2196 } else 2197 parse_op_modifier(ms, m, &l, op); 2198 } 2199 2200 /* 2201 * We used to set mask to all 1's here, instead let's just not do 2202 * anything if mask = 0 (unless you have a better idea) 2203 */ 2204 EATAB; 2205 2206 switch (*l) { 2207 case '>': 2208 case '<': 2209 m->reln = *l; 2210 ++l; 2211 if (*l == '=') { 2212 if (ms->flags & MAGIC_CHECK) { 2213 file_magwarn(ms, "%c= not supported", 2214 m->reln); 2215 return -1; 2216 } 2217 ++l; 2218 } 2219 break; 2220 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 2221 case '&': 2222 case '^': 2223 case '=': 2224 m->reln = *l; 2225 ++l; 2226 if (*l == '=') { 2227 /* HP compat: ignore &= etc. */ 2228 ++l; 2229 } 2230 break; 2231 case '!': 2232 m->reln = *l; 2233 ++l; 2234 break; 2235 default: 2236 m->reln = '='; /* the default relation */ 2237 if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) && 2238 isspace(CAST(unsigned char, l[1]))) || !l[1])) { 2239 m->reln = *l; 2240 ++l; 2241 } 2242 break; 2243 } 2244 /* 2245 * Grab the value part, except for an 'x' reln. 2246 */ 2247 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 2248 return -1; 2249 2250 /* 2251 * TODO finish this macro and start using it! 2252 * #define offsetcheck {if (offset > ms->bytes_max -1) 2253 * magwarn("offset too big"); } 2254 */ 2255 2256 /* 2257 * Now get last part - the description 2258 */ 2259 EATAB; 2260 if (l[0] == '\b') { 2261 ++l; 2262 m->flag |= NOSPACE; 2263 } else if ((l[0] == '\\') && (l[1] == 'b')) { 2264 ++l; 2265 ++l; 2266 m->flag |= NOSPACE; 2267 } 2268 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 2269 continue; 2270 if (i == sizeof(m->desc)) { 2271 m->desc[sizeof(m->desc) - 1] = '\0'; 2272 if (ms->flags & MAGIC_CHECK) 2273 file_magwarn(ms, "description `%s' truncated", m->desc); 2274 } 2275 2276 /* 2277 * We only do this check while compiling, or if any of the magic 2278 * files were not compiled. 2279 */ 2280 if (ms->flags & MAGIC_CHECK) { 2281 if (check_format(ms, m) == -1) 2282 return -1; 2283 } 2284 #ifndef COMPILE_ONLY 2285 if (action == FILE_CHECK) { 2286 file_mdump(m); 2287 } 2288 #endif 2289 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 2290 return 0; 2291 } 2292 2293 /* 2294 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 2295 * if valid 2296 */ 2297 private int 2298 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line, 2299 size_t len __attribute__((__unused__))) 2300 { 2301 const char *l = line; 2302 char *el; 2303 unsigned long factor; 2304 struct magic *m = &me->mp[0]; 2305 2306 if (m->factor_op != FILE_FACTOR_OP_NONE) { 2307 file_magwarn(ms, 2308 "Current entry already has a strength type: %c %d", 2309 m->factor_op, m->factor); 2310 return -1; 2311 } 2312 if (m->type == FILE_NAME) { 2313 file_magwarn(ms, "%s: Strength setting is not supported in " 2314 "\"name\" magic entries", m->value.s); 2315 return -1; 2316 } 2317 EATAB; 2318 switch (*l) { 2319 case FILE_FACTOR_OP_NONE: 2320 case FILE_FACTOR_OP_PLUS: 2321 case FILE_FACTOR_OP_MINUS: 2322 case FILE_FACTOR_OP_TIMES: 2323 case FILE_FACTOR_OP_DIV: 2324 m->factor_op = *l++; 2325 break; 2326 default: 2327 file_magwarn(ms, "Unknown factor op `%c'", *l); 2328 return -1; 2329 } 2330 EATAB; 2331 factor = strtoul(l, &el, 0); 2332 if (factor > 255) { 2333 file_magwarn(ms, "Too large factor `%lu'", factor); 2334 goto out; 2335 } 2336 if (*el && !isspace(CAST(unsigned char, *el))) { 2337 file_magwarn(ms, "Bad factor `%s'", l); 2338 goto out; 2339 } 2340 m->factor = CAST(uint8_t, factor); 2341 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 2342 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 2343 m->factor_op, m->factor); 2344 goto out; 2345 } 2346 return 0; 2347 out: 2348 m->factor_op = FILE_FACTOR_OP_NONE; 2349 m->factor = 0; 2350 return -1; 2351 } 2352 2353 private int 2354 goodchar(unsigned char x, const char *extra) 2355 { 2356 return (isascii(x) && isalnum(x)) || strchr(extra, x); 2357 } 2358 2359 private int 2360 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, 2361 size_t llen, off_t off, size_t len, const char *name, const char *extra, 2362 int nt) 2363 { 2364 size_t i; 2365 const char *l = line; 2366 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 2367 char *buf = CAST(char *, CAST(void *, m)) + off; 2368 2369 if (buf[0] != '\0') { 2370 len = nt ? strlen(buf) : len; 2371 file_magwarn(ms, "Current entry already has a %s type " 2372 "`%.*s', new type `%s'", name, CAST(int, len), buf, l); 2373 return -1; 2374 } 2375 2376 if (*m->desc == '\0') { 2377 file_magwarn(ms, "Current entry does not yet have a " 2378 "description for adding a %s type", name); 2379 return -1; 2380 } 2381 2382 EATAB; 2383 for (i = 0; *l && i < llen && i < len && goodchar(*l, extra); 2384 buf[i++] = *l++) 2385 continue; 2386 2387 if (i == len && *l) { 2388 if (nt) 2389 buf[len - 1] = '\0'; 2390 if (ms->flags & MAGIC_CHECK) 2391 file_magwarn(ms, "%s type `%s' truncated %" 2392 SIZE_T_FORMAT "u", name, line, i); 2393 } else { 2394 if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra)) 2395 file_magwarn(ms, "%s type `%s' has bad char '%c'", 2396 name, line, *l); 2397 if (nt) 2398 buf[i] = '\0'; 2399 } 2400 2401 if (i > 0) 2402 return 0; 2403 2404 file_magerror(ms, "Bad magic entry '%s'", line); 2405 return -1; 2406 } 2407 2408 /* 2409 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 2410 * magic[index - 1] 2411 */ 2412 private int 2413 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line, 2414 size_t len) 2415 { 2416 struct magic *m = &me->mp[0]; 2417 2418 return parse_extra(ms, me, line, len, 2419 CAST(off_t, offsetof(struct magic, apple)), 2420 sizeof(m->apple), "APPLE", "!+-./?", 0); 2421 } 2422 2423 /* 2424 * Parse a comma-separated list of extensions 2425 */ 2426 private int 2427 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line, 2428 size_t len) 2429 { 2430 struct magic *m = &me->mp[0]; 2431 2432 return parse_extra(ms, me, line, len, 2433 CAST(off_t, offsetof(struct magic, ext)), 2434 sizeof(m->ext), "EXTENSION", ",!+-/@?_$&", 0); /* & for b&w */ 2435 } 2436 2437 /* 2438 * parse a MIME annotation line from magic file, put into magic[index - 1] 2439 * if valid 2440 */ 2441 private int 2442 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line, 2443 size_t len) 2444 { 2445 struct magic *m = &me->mp[0]; 2446 2447 return parse_extra(ms, me, line, len, 2448 CAST(off_t, offsetof(struct magic, mimetype)), 2449 sizeof(m->mimetype), "MIME", "+-/.$?:{}", 1); 2450 } 2451 2452 private int 2453 check_format_type(const char *ptr, int type, const char **estr) 2454 { 2455 int quad = 0, h; 2456 size_t len, cnt; 2457 if (*ptr == '\0') { 2458 /* Missing format string; bad */ 2459 *estr = "missing format spec"; 2460 return -1; 2461 } 2462 2463 switch (file_formats[type]) { 2464 case FILE_FMT_QUAD: 2465 quad = 1; 2466 /*FALLTHROUGH*/ 2467 case FILE_FMT_NUM: 2468 if (quad == 0) { 2469 switch (type) { 2470 case FILE_BYTE: 2471 h = 2; 2472 break; 2473 case FILE_SHORT: 2474 case FILE_BESHORT: 2475 case FILE_LESHORT: 2476 h = 1; 2477 break; 2478 case FILE_LONG: 2479 case FILE_BELONG: 2480 case FILE_LELONG: 2481 case FILE_MELONG: 2482 case FILE_LEID3: 2483 case FILE_BEID3: 2484 case FILE_INDIRECT: 2485 h = 0; 2486 break; 2487 default: 2488 abort(); 2489 } 2490 } else 2491 h = 0; 2492 while (*ptr && strchr("-.#", *ptr) != NULL) 2493 ptr++; 2494 #define CHECKLEN() do { \ 2495 for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \ 2496 len = len * 10 + (*ptr - '0'); \ 2497 if (cnt > 5 || len > 1024) \ 2498 goto toolong; \ 2499 } while (/*CONSTCOND*/0) 2500 2501 CHECKLEN(); 2502 if (*ptr == '.') 2503 ptr++; 2504 CHECKLEN(); 2505 if (quad) { 2506 if (*ptr++ != 'l') 2507 goto invalid; 2508 if (*ptr++ != 'l') 2509 goto invalid; 2510 } 2511 2512 switch (*ptr++) { 2513 #ifdef STRICT_FORMAT /* "long" formats are int formats for us */ 2514 /* so don't accept the 'l' modifier */ 2515 case 'l': 2516 switch (*ptr++) { 2517 case 'i': 2518 case 'd': 2519 case 'u': 2520 case 'o': 2521 case 'x': 2522 case 'X': 2523 if (h == 0) 2524 return 0; 2525 /*FALLTHROUGH*/ 2526 default: 2527 goto invalid; 2528 } 2529 2530 /* 2531 * Don't accept h and hh modifiers. They make writing 2532 * magic entries more complicated, for very little benefit 2533 */ 2534 case 'h': 2535 if (h-- <= 0) 2536 goto invalid; 2537 switch (*ptr++) { 2538 case 'h': 2539 if (h-- <= 0) 2540 goto invalid; 2541 switch (*ptr++) { 2542 case 'i': 2543 case 'd': 2544 case 'u': 2545 case 'o': 2546 case 'x': 2547 case 'X': 2548 return 0; 2549 default: 2550 goto invalid; 2551 } 2552 case 'i': 2553 case 'd': 2554 case 'u': 2555 case 'o': 2556 case 'x': 2557 case 'X': 2558 if (h == 0) 2559 return 0; 2560 /*FALLTHROUGH*/ 2561 default: 2562 goto invalid; 2563 } 2564 #endif 2565 case 'c': 2566 if (h == 2) 2567 return 0; 2568 goto invalid; 2569 case 'i': 2570 case 'd': 2571 case 'u': 2572 case 'o': 2573 case 'x': 2574 case 'X': 2575 #ifdef STRICT_FORMAT 2576 if (h == 0) 2577 return 0; 2578 /*FALLTHROUGH*/ 2579 #else 2580 return 0; 2581 #endif 2582 default: 2583 goto invalid; 2584 } 2585 2586 case FILE_FMT_FLOAT: 2587 case FILE_FMT_DOUBLE: 2588 if (*ptr == '-') 2589 ptr++; 2590 if (*ptr == '.') 2591 ptr++; 2592 CHECKLEN(); 2593 if (*ptr == '.') 2594 ptr++; 2595 CHECKLEN(); 2596 switch (*ptr++) { 2597 case 'e': 2598 case 'E': 2599 case 'f': 2600 case 'F': 2601 case 'g': 2602 case 'G': 2603 return 0; 2604 2605 default: 2606 goto invalid; 2607 } 2608 2609 2610 case FILE_FMT_STR: 2611 if (*ptr == '-') 2612 ptr++; 2613 while (isdigit(CAST(unsigned char, *ptr))) 2614 ptr++; 2615 if (*ptr == '.') { 2616 ptr++; 2617 while (isdigit(CAST(unsigned char , *ptr))) 2618 ptr++; 2619 } 2620 2621 switch (*ptr++) { 2622 case 's': 2623 return 0; 2624 default: 2625 goto invalid; 2626 } 2627 2628 default: 2629 /* internal error */ 2630 abort(); 2631 } 2632 invalid: 2633 *estr = "not valid"; 2634 toolong: 2635 *estr = "too long"; 2636 return -1; 2637 } 2638 2639 /* 2640 * Check that the optional printf format in description matches 2641 * the type of the magic. 2642 */ 2643 private int 2644 check_format(struct magic_set *ms, struct magic *m) 2645 { 2646 char *ptr; 2647 const char *estr; 2648 2649 for (ptr = m->desc; *ptr; ptr++) 2650 if (*ptr == '%') 2651 break; 2652 if (*ptr == '\0') { 2653 /* No format string; ok */ 2654 return 1; 2655 } 2656 2657 assert(file_nformats == file_nnames); 2658 2659 if (m->type >= file_nformats) { 2660 file_magwarn(ms, "Internal error inconsistency between " 2661 "m->type and format strings"); 2662 return -1; 2663 } 2664 if (file_formats[m->type] == FILE_FMT_NONE) { 2665 file_magwarn(ms, "No format string for `%s' with description " 2666 "`%s'", m->desc, file_names[m->type]); 2667 return -1; 2668 } 2669 2670 ptr++; 2671 if (check_format_type(ptr, m->type, &estr) == -1) { 2672 /* 2673 * TODO: this error message is unhelpful if the format 2674 * string is not one character long 2675 */ 2676 file_magwarn(ms, "Printf format is %s for type " 2677 "`%s' in description `%s'", estr, 2678 file_names[m->type], m->desc); 2679 return -1; 2680 } 2681 2682 for (; *ptr; ptr++) { 2683 if (*ptr == '%') { 2684 file_magwarn(ms, 2685 "Too many format strings (should have at most one) " 2686 "for `%s' with description `%s'", 2687 file_names[m->type], m->desc); 2688 return -1; 2689 } 2690 } 2691 return 0; 2692 } 2693 2694 /* 2695 * Read a numeric value from a pointer, into the value union of a magic 2696 * pointer, according to the magic type. Update the string pointer to point 2697 * just after the number read. Return 0 for success, non-zero for failure. 2698 */ 2699 private int 2700 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2701 { 2702 char *ep; 2703 uint64_t ull; 2704 2705 switch (m->type) { 2706 case FILE_BESTRING16: 2707 case FILE_LESTRING16: 2708 case FILE_STRING: 2709 case FILE_PSTRING: 2710 case FILE_REGEX: 2711 case FILE_SEARCH: 2712 case FILE_NAME: 2713 case FILE_USE: 2714 case FILE_DER: 2715 *p = getstr(ms, m, *p, action == FILE_COMPILE); 2716 if (*p == NULL) { 2717 if (ms->flags & MAGIC_CHECK) 2718 file_magwarn(ms, "cannot get string from `%s'", 2719 m->value.s); 2720 return -1; 2721 } 2722 if (m->type == FILE_REGEX) { 2723 file_regex_t rx; 2724 int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED); 2725 if (rc) { 2726 if (ms->flags & MAGIC_CHECK) 2727 file_regerror(&rx, rc, ms); 2728 } 2729 file_regfree(&rx); 2730 return rc ? -1 : 0; 2731 } 2732 return 0; 2733 default: 2734 if (m->reln == 'x') 2735 return 0; 2736 break; 2737 } 2738 2739 switch (m->type) { 2740 case FILE_FLOAT: 2741 case FILE_BEFLOAT: 2742 case FILE_LEFLOAT: 2743 errno = 0; 2744 #ifdef HAVE_STRTOF 2745 m->value.f = strtof(*p, &ep); 2746 #else 2747 m->value.f = (float)strtod(*p, &ep); 2748 #endif 2749 if (errno == 0) 2750 *p = ep; 2751 return 0; 2752 case FILE_DOUBLE: 2753 case FILE_BEDOUBLE: 2754 case FILE_LEDOUBLE: 2755 errno = 0; 2756 m->value.d = strtod(*p, &ep); 2757 if (errno == 0) 2758 *p = ep; 2759 return 0; 2760 case FILE_GUID: 2761 if (file_parse_guid(*p, m->value.guid) == -1) 2762 return -1; 2763 *p += FILE_GUID_SIZE - 1; 2764 return 0; 2765 default: 2766 errno = 0; 2767 ull = CAST(uint64_t, strtoull(*p, &ep, 0)); 2768 m->value.q = file_signextend(ms, m, ull); 2769 if (*p == ep) { 2770 file_magwarn(ms, "Unparsable number `%s'", *p); 2771 } else { 2772 size_t ts = typesize(m->type); 2773 uint64_t x; 2774 const char *q; 2775 2776 if (ts == FILE_BADSIZE) { 2777 file_magwarn(ms, 2778 "Expected numeric type got `%s'", 2779 type_tbl[m->type].name); 2780 } 2781 for (q = *p; isspace(CAST(unsigned char, *q)); q++) 2782 continue; 2783 if (*q == '-') 2784 ull = -CAST(int64_t, ull); 2785 switch (ts) { 2786 case 1: 2787 x = CAST(uint64_t, ull & ~0xffULL); 2788 break; 2789 case 2: 2790 x = CAST(uint64_t, ull & ~0xffffULL); 2791 break; 2792 case 4: 2793 x = CAST(uint64_t, ull & ~0xffffffffULL); 2794 break; 2795 case 8: 2796 x = 0; 2797 break; 2798 default: 2799 abort(); 2800 } 2801 if (x) { 2802 file_magwarn(ms, "Overflow for numeric" 2803 " type `%s' value %#" PRIx64, 2804 type_tbl[m->type].name, ull); 2805 } 2806 } 2807 if (errno == 0) { 2808 *p = ep; 2809 eatsize(p); 2810 } 2811 return 0; 2812 } 2813 } 2814 2815 /* 2816 * Convert a string containing C character escapes. Stop at an unescaped 2817 * space or tab. 2818 * Copy the converted version to "m->value.s", and the length in m->vallen. 2819 * Return updated scan pointer as function result. Warn if set. 2820 */ 2821 private const char * 2822 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2823 { 2824 const char *origs = s; 2825 char *p = m->value.s; 2826 size_t plen = sizeof(m->value.s); 2827 char *origp = p; 2828 char *pmax = p + plen - 1; 2829 int c; 2830 int val; 2831 2832 while ((c = *s++) != '\0') { 2833 if (isspace(CAST(unsigned char, c))) 2834 break; 2835 if (p >= pmax) { 2836 file_error(ms, 0, "string too long: `%s'", origs); 2837 return NULL; 2838 } 2839 if (c == '\\') { 2840 switch(c = *s++) { 2841 2842 case '\0': 2843 if (warn) 2844 file_magwarn(ms, "incomplete escape"); 2845 s--; 2846 goto out; 2847 2848 case '\t': 2849 if (warn) { 2850 file_magwarn(ms, 2851 "escaped tab found, use \\t instead"); 2852 warn = 0; /* already did */ 2853 } 2854 /*FALLTHROUGH*/ 2855 default: 2856 if (warn) { 2857 if (isprint(CAST(unsigned char, c))) { 2858 /* Allow escaping of 2859 * ``relations'' */ 2860 if (strchr("<>&^=!", c) == NULL 2861 && (m->type != FILE_REGEX || 2862 strchr("[]().*?^$|{}", c) 2863 == NULL)) { 2864 file_magwarn(ms, "no " 2865 "need to escape " 2866 "`%c'", c); 2867 } 2868 } else { 2869 file_magwarn(ms, 2870 "unknown escape sequence: " 2871 "\\%03o", c); 2872 } 2873 } 2874 /*FALLTHROUGH*/ 2875 /* space, perhaps force people to use \040? */ 2876 case ' ': 2877 #if 0 2878 /* 2879 * Other things people escape, but shouldn't need to, 2880 * so we disallow them 2881 */ 2882 case '\'': 2883 case '"': 2884 case '?': 2885 #endif 2886 /* Relations */ 2887 case '>': 2888 case '<': 2889 case '&': 2890 case '^': 2891 case '=': 2892 case '!': 2893 /* and baskslash itself */ 2894 case '\\': 2895 *p++ = CAST(char, c); 2896 break; 2897 2898 case 'a': 2899 *p++ = '\a'; 2900 break; 2901 2902 case 'b': 2903 *p++ = '\b'; 2904 break; 2905 2906 case 'f': 2907 *p++ = '\f'; 2908 break; 2909 2910 case 'n': 2911 *p++ = '\n'; 2912 break; 2913 2914 case 'r': 2915 *p++ = '\r'; 2916 break; 2917 2918 case 't': 2919 *p++ = '\t'; 2920 break; 2921 2922 case 'v': 2923 *p++ = '\v'; 2924 break; 2925 2926 /* \ and up to 3 octal digits */ 2927 case '0': 2928 case '1': 2929 case '2': 2930 case '3': 2931 case '4': 2932 case '5': 2933 case '6': 2934 case '7': 2935 val = c - '0'; 2936 c = *s++; /* try for 2 */ 2937 if (c >= '0' && c <= '7') { 2938 val = (val << 3) | (c - '0'); 2939 c = *s++; /* try for 3 */ 2940 if (c >= '0' && c <= '7') 2941 val = (val << 3) | (c-'0'); 2942 else 2943 --s; 2944 } 2945 else 2946 --s; 2947 *p++ = CAST(char, val); 2948 break; 2949 2950 /* \x and up to 2 hex digits */ 2951 case 'x': 2952 val = 'x'; /* Default if no digits */ 2953 c = hextoint(*s++); /* Get next char */ 2954 if (c >= 0) { 2955 val = c; 2956 c = hextoint(*s++); 2957 if (c >= 0) 2958 val = (val << 4) + c; 2959 else 2960 --s; 2961 } else 2962 --s; 2963 *p++ = CAST(char, val); 2964 break; 2965 } 2966 } else 2967 *p++ = CAST(char, c); 2968 } 2969 --s; 2970 out: 2971 *p = '\0'; 2972 m->vallen = CAST(unsigned char, (p - origp)); 2973 if (m->type == FILE_PSTRING) { 2974 size_t l = file_pstring_length_size(ms, m); 2975 if (l == FILE_BADSIZE) 2976 return NULL; 2977 m->vallen += CAST(unsigned char, l); 2978 } 2979 return s; 2980 } 2981 2982 2983 /* Single hex char to int; -1 if not a hex char. */ 2984 private int 2985 hextoint(int c) 2986 { 2987 if (!isascii(CAST(unsigned char, c))) 2988 return -1; 2989 if (isdigit(CAST(unsigned char, c))) 2990 return c - '0'; 2991 if ((c >= 'a') && (c <= 'f')) 2992 return c + 10 - 'a'; 2993 if (( c>= 'A') && (c <= 'F')) 2994 return c + 10 - 'A'; 2995 return -1; 2996 } 2997 2998 2999 /* 3000 * Print a string containing C character escapes. 3001 */ 3002 protected void 3003 file_showstr(FILE *fp, const char *s, size_t len) 3004 { 3005 char c; 3006 3007 for (;;) { 3008 if (len == FILE_BADSIZE) { 3009 c = *s++; 3010 if (c == '\0') 3011 break; 3012 } 3013 else { 3014 if (len-- == 0) 3015 break; 3016 c = *s++; 3017 } 3018 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 3019 (void) fputc(c, fp); 3020 else { 3021 (void) fputc('\\', fp); 3022 switch (c) { 3023 case '\a': 3024 (void) fputc('a', fp); 3025 break; 3026 3027 case '\b': 3028 (void) fputc('b', fp); 3029 break; 3030 3031 case '\f': 3032 (void) fputc('f', fp); 3033 break; 3034 3035 case '\n': 3036 (void) fputc('n', fp); 3037 break; 3038 3039 case '\r': 3040 (void) fputc('r', fp); 3041 break; 3042 3043 case '\t': 3044 (void) fputc('t', fp); 3045 break; 3046 3047 case '\v': 3048 (void) fputc('v', fp); 3049 break; 3050 3051 default: 3052 (void) fprintf(fp, "%.3o", c & 0377); 3053 break; 3054 } 3055 } 3056 } 3057 } 3058 3059 /* 3060 * eatsize(): Eat the size spec from a number [eg. 10UL] 3061 */ 3062 private void 3063 eatsize(const char **p) 3064 { 3065 const char *l = *p; 3066 3067 if (LOWCASE(*l) == 'u') 3068 l++; 3069 3070 switch (LOWCASE(*l)) { 3071 case 'l': /* long */ 3072 case 's': /* short */ 3073 case 'h': /* short */ 3074 case 'b': /* char/byte */ 3075 case 'c': /* char/byte */ 3076 l++; 3077 /*FALLTHROUGH*/ 3078 default: 3079 break; 3080 } 3081 3082 *p = l; 3083 } 3084 3085 /* 3086 * handle a buffer containing a compiled file. 3087 */ 3088 private struct magic_map * 3089 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len) 3090 { 3091 struct magic_map *map; 3092 3093 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 3094 file_oomem(ms, sizeof(*map)); 3095 return NULL; 3096 } 3097 map->len = len; 3098 map->p = buf; 3099 map->type = MAP_TYPE_USER; 3100 if (check_buffer(ms, map, "buffer") != 0) { 3101 apprentice_unmap(map); 3102 return NULL; 3103 } 3104 return map; 3105 } 3106 3107 /* 3108 * handle a compiled file. 3109 */ 3110 3111 private struct magic_map * 3112 apprentice_map(struct magic_set *ms, const char *fn) 3113 { 3114 int fd; 3115 struct stat st; 3116 char *dbname = NULL; 3117 struct magic_map *map; 3118 struct magic_map *rv = NULL; 3119 3120 fd = -1; 3121 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 3122 file_oomem(ms, sizeof(*map)); 3123 goto error; 3124 } 3125 map->type = MAP_TYPE_USER; /* unspecified */ 3126 3127 dbname = mkdbname(ms, fn, 0); 3128 if (dbname == NULL) 3129 goto error; 3130 3131 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 3132 goto error; 3133 3134 if (fstat(fd, &st) == -1) { 3135 file_error(ms, errno, "cannot stat `%s'", dbname); 3136 goto error; 3137 } 3138 if (st.st_size < 8 || st.st_size > maxoff_t()) { 3139 file_error(ms, 0, "file `%s' is too %s", dbname, 3140 st.st_size < 8 ? "small" : "large"); 3141 goto error; 3142 } 3143 3144 map->len = CAST(size_t, st.st_size); 3145 #ifdef QUICK 3146 map->type = MAP_TYPE_MMAP; 3147 if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE, 3148 MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) { 3149 file_error(ms, errno, "cannot map `%s'", dbname); 3150 goto error; 3151 } 3152 #else 3153 map->type = MAP_TYPE_MALLOC; 3154 if ((map->p = CAST(void *, malloc(map->len))) == NULL) { 3155 file_oomem(ms, map->len); 3156 goto error; 3157 } 3158 if (read(fd, map->p, map->len) != (ssize_t)map->len) { 3159 file_badread(ms); 3160 goto error; 3161 } 3162 #endif 3163 (void)close(fd); 3164 fd = -1; 3165 3166 if (check_buffer(ms, map, dbname) != 0) { 3167 goto error; 3168 } 3169 #ifdef QUICK 3170 if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) { 3171 file_error(ms, errno, "cannot mprotect `%s'", dbname); 3172 goto error; 3173 } 3174 #endif 3175 3176 free(dbname); 3177 return map; 3178 3179 error: 3180 if (fd != -1) 3181 (void)close(fd); 3182 apprentice_unmap(map); 3183 free(dbname); 3184 return rv; 3185 } 3186 3187 private int 3188 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname) 3189 { 3190 uint32_t *ptr; 3191 uint32_t entries, nentries; 3192 uint32_t version; 3193 int i, needsbyteswap; 3194 3195 ptr = CAST(uint32_t *, map->p); 3196 if (*ptr != MAGICNO) { 3197 if (swap4(*ptr) != MAGICNO) { 3198 file_error(ms, 0, "bad magic in `%s'", dbname); 3199 return -1; 3200 } 3201 needsbyteswap = 1; 3202 } else 3203 needsbyteswap = 0; 3204 if (needsbyteswap) 3205 version = swap4(ptr[1]); 3206 else 3207 version = ptr[1]; 3208 if (version != VERSIONNO) { 3209 file_error(ms, 0, "File %s supports only version %d magic " 3210 "files. `%s' is version %d", VERSION, 3211 VERSIONNO, dbname, version); 3212 return -1; 3213 } 3214 entries = CAST(uint32_t, map->len / sizeof(struct magic)); 3215 if ((entries * sizeof(struct magic)) != map->len) { 3216 file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not " 3217 "a multiple of %" SIZE_T_FORMAT "u", 3218 dbname, map->len, sizeof(struct magic)); 3219 return -1; 3220 } 3221 map->magic[0] = CAST(struct magic *, map->p) + 1; 3222 nentries = 0; 3223 for (i = 0; i < MAGIC_SETS; i++) { 3224 if (needsbyteswap) 3225 map->nmagic[i] = swap4(ptr[i + 2]); 3226 else 3227 map->nmagic[i] = ptr[i + 2]; 3228 if (i != MAGIC_SETS - 1) 3229 map->magic[i + 1] = map->magic[i] + map->nmagic[i]; 3230 nentries += map->nmagic[i]; 3231 } 3232 if (entries != nentries + 1) { 3233 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 3234 dbname, entries, nentries + 1); 3235 return -1; 3236 } 3237 if (needsbyteswap) 3238 for (i = 0; i < MAGIC_SETS; i++) 3239 byteswap(map->magic[i], map->nmagic[i]); 3240 return 0; 3241 } 3242 3243 /* 3244 * handle an mmaped file. 3245 */ 3246 private int 3247 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn) 3248 { 3249 static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS; 3250 static const size_t m = sizeof(**map->magic); 3251 int fd = -1; 3252 size_t len; 3253 char *dbname; 3254 int rv = -1; 3255 uint32_t i; 3256 union { 3257 struct magic m; 3258 uint32_t h[2 + MAGIC_SETS]; 3259 } hdr; 3260 3261 dbname = mkdbname(ms, fn, 1); 3262 3263 if (dbname == NULL) 3264 goto out; 3265 3266 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 3267 { 3268 file_error(ms, errno, "cannot open `%s'", dbname); 3269 goto out; 3270 } 3271 memset(&hdr, 0, sizeof(hdr)); 3272 hdr.h[0] = MAGICNO; 3273 hdr.h[1] = VERSIONNO; 3274 memcpy(hdr.h + 2, map->nmagic, nm); 3275 3276 if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) { 3277 file_error(ms, errno, "error writing `%s'", dbname); 3278 goto out2; 3279 } 3280 3281 for (i = 0; i < MAGIC_SETS; i++) { 3282 len = m * map->nmagic[i]; 3283 if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) { 3284 file_error(ms, errno, "error writing `%s'", dbname); 3285 goto out2; 3286 } 3287 } 3288 3289 rv = 0; 3290 out2: 3291 if (fd != -1) 3292 (void)close(fd); 3293 out: 3294 apprentice_unmap(map); 3295 free(dbname); 3296 return rv; 3297 } 3298 3299 private const char ext[] = ".mgc"; 3300 /* 3301 * make a dbname 3302 */ 3303 private char * 3304 mkdbname(struct magic_set *ms, const char *fn, int strip) 3305 { 3306 const char *p, *q; 3307 char *buf; 3308 3309 if (strip) { 3310 if ((p = strrchr(fn, '/')) != NULL) 3311 fn = ++p; 3312 } 3313 3314 for (q = fn; *q; q++) 3315 continue; 3316 /* Look for .mgc */ 3317 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 3318 if (*p != *q) 3319 break; 3320 3321 /* Did not find .mgc, restore q */ 3322 if (p >= ext) 3323 while (*q) 3324 q++; 3325 3326 q++; 3327 /* Compatibility with old code that looked in .mime */ 3328 if (ms->flags & MAGIC_MIME) { 3329 if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext) 3330 < 0) 3331 return NULL; 3332 if (access(buf, R_OK) != -1) { 3333 ms->flags &= MAGIC_MIME_TYPE; 3334 return buf; 3335 } 3336 free(buf); 3337 } 3338 if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0) 3339 return NULL; 3340 3341 /* Compatibility with old code that looked in .mime */ 3342 if (strstr(fn, ".mime") != NULL) 3343 ms->flags &= MAGIC_MIME_TYPE; 3344 return buf; 3345 } 3346 3347 /* 3348 * Byteswap an mmap'ed file if needed 3349 */ 3350 private void 3351 byteswap(struct magic *magic, uint32_t nmagic) 3352 { 3353 uint32_t i; 3354 for (i = 0; i < nmagic; i++) 3355 bs1(&magic[i]); 3356 } 3357 3358 /* 3359 * swap a short 3360 */ 3361 private uint16_t 3362 swap2(uint16_t sv) 3363 { 3364 uint16_t rv; 3365 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3366 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3367 d[0] = s[1]; 3368 d[1] = s[0]; 3369 return rv; 3370 } 3371 3372 /* 3373 * swap an int 3374 */ 3375 private uint32_t 3376 swap4(uint32_t sv) 3377 { 3378 uint32_t rv; 3379 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3380 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3381 d[0] = s[3]; 3382 d[1] = s[2]; 3383 d[2] = s[1]; 3384 d[3] = s[0]; 3385 return rv; 3386 } 3387 3388 /* 3389 * swap a quad 3390 */ 3391 private uint64_t 3392 swap8(uint64_t sv) 3393 { 3394 uint64_t rv; 3395 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3396 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3397 #if 0 3398 d[0] = s[3]; 3399 d[1] = s[2]; 3400 d[2] = s[1]; 3401 d[3] = s[0]; 3402 d[4] = s[7]; 3403 d[5] = s[6]; 3404 d[6] = s[5]; 3405 d[7] = s[4]; 3406 #else 3407 d[0] = s[7]; 3408 d[1] = s[6]; 3409 d[2] = s[5]; 3410 d[3] = s[4]; 3411 d[4] = s[3]; 3412 d[5] = s[2]; 3413 d[6] = s[1]; 3414 d[7] = s[0]; 3415 #endif 3416 return rv; 3417 } 3418 3419 protected uintmax_t 3420 file_varint2uintmax_t(const unsigned char *us, int t, size_t *l) 3421 { 3422 uintmax_t x = 0; 3423 const unsigned char *c; 3424 if (t == FILE_LEVARINT) { 3425 for (c = us; *c; c++) { 3426 if ((*c & 0x80) == 0) 3427 break; 3428 } 3429 if (l) 3430 *l = c - us + 1; 3431 for (; c >= us; c--) { 3432 x |= *c & 0x7f; 3433 x <<= 7; 3434 } 3435 } else { 3436 for (c = us; *c; c++) { 3437 x |= *c & 0x7f; 3438 if ((*c & 0x80) == 0) 3439 break; 3440 x <<= 7; 3441 } 3442 if (l) 3443 *l = c - us + 1; 3444 } 3445 return x; 3446 } 3447 3448 3449 /* 3450 * byteswap a single magic entry 3451 */ 3452 private void 3453 bs1(struct magic *m) 3454 { 3455 m->cont_level = swap2(m->cont_level); 3456 m->offset = swap4(CAST(uint32_t, m->offset)); 3457 m->in_offset = swap4(CAST(uint32_t, m->in_offset)); 3458 m->lineno = swap4(CAST(uint32_t, m->lineno)); 3459 if (IS_STRING(m->type)) { 3460 m->str_range = swap4(m->str_range); 3461 m->str_flags = swap4(m->str_flags); 3462 } 3463 else { 3464 m->value.q = swap8(m->value.q); 3465 m->num_mask = swap8(m->num_mask); 3466 } 3467 } 3468 3469 protected size_t 3470 file_pstring_length_size(struct magic_set *ms, const struct magic *m) 3471 { 3472 switch (m->str_flags & PSTRING_LEN) { 3473 case PSTRING_1_LE: 3474 return 1; 3475 case PSTRING_2_LE: 3476 case PSTRING_2_BE: 3477 return 2; 3478 case PSTRING_4_LE: 3479 case PSTRING_4_BE: 3480 return 4; 3481 default: 3482 file_error(ms, 0, "corrupt magic file " 3483 "(bad pascal string length %d)", 3484 m->str_flags & PSTRING_LEN); 3485 return FILE_BADSIZE; 3486 } 3487 } 3488 protected size_t 3489 file_pstring_get_length(struct magic_set *ms, const struct magic *m, 3490 const char *ss) 3491 { 3492 size_t len = 0; 3493 const unsigned char *s = RCAST(const unsigned char *, ss); 3494 unsigned int s3, s2, s1, s0; 3495 3496 switch (m->str_flags & PSTRING_LEN) { 3497 case PSTRING_1_LE: 3498 len = *s; 3499 break; 3500 case PSTRING_2_LE: 3501 s0 = s[0]; 3502 s1 = s[1]; 3503 len = (s1 << 8) | s0; 3504 break; 3505 case PSTRING_2_BE: 3506 s0 = s[0]; 3507 s1 = s[1]; 3508 len = (s0 << 8) | s1; 3509 break; 3510 case PSTRING_4_LE: 3511 s0 = s[0]; 3512 s1 = s[1]; 3513 s2 = s[2]; 3514 s3 = s[3]; 3515 len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0; 3516 break; 3517 case PSTRING_4_BE: 3518 s0 = s[0]; 3519 s1 = s[1]; 3520 s2 = s[2]; 3521 s3 = s[3]; 3522 len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3; 3523 break; 3524 default: 3525 file_error(ms, 0, "corrupt magic file " 3526 "(bad pascal string length %d)", 3527 m->str_flags & PSTRING_LEN); 3528 return FILE_BADSIZE; 3529 } 3530 3531 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) { 3532 size_t l = file_pstring_length_size(ms, m); 3533 if (l == FILE_BADSIZE) 3534 return l; 3535 len -= l; 3536 } 3537 3538 return len; 3539 } 3540 3541 protected int 3542 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 3543 { 3544 uint32_t i, j; 3545 struct mlist *mlist, *ml; 3546 3547 mlist = ms->mlist[1]; 3548 3549 for (ml = mlist->next; ml != mlist; ml = ml->next) { 3550 struct magic *ma = ml->magic; 3551 uint32_t nma = ml->nmagic; 3552 for (i = 0; i < nma; i++) { 3553 if (ma[i].type != FILE_NAME) 3554 continue; 3555 if (strcmp(ma[i].value.s, name) == 0) { 3556 v->magic = &ma[i]; 3557 for (j = i + 1; j < nma; j++) 3558 if (ma[j].cont_level == 0) 3559 break; 3560 v->nmagic = j - i; 3561 return 0; 3562 } 3563 } 3564 } 3565 return -1; 3566 } 3567