1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * file - find type of a file or files - main program. 30 */ 31 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: file.c,v 1.190 2021/09/24 14:14:26 christos Exp $") 36 #endif /* lint */ 37 38 #include "magic.h" 39 40 #include <stdlib.h> 41 #include <unistd.h> 42 #include <string.h> 43 #ifdef RESTORE_TIME 44 # if (__COHERENT__ >= 0x420) 45 # include <sys/utime.h> 46 # else 47 # ifdef USE_UTIMES 48 # include <sys/time.h> 49 # else 50 # include <utime.h> 51 # endif 52 # endif 53 #endif 54 #ifdef HAVE_UNISTD_H 55 #include <unistd.h> /* for read() */ 56 #endif 57 #ifdef HAVE_WCHAR_H 58 #include <wchar.h> 59 #endif 60 61 #if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION) 62 # include <getopt.h> 63 # ifndef HAVE_GETOPT_LONG 64 int getopt_long(int, char * const *, const char *, 65 const struct option *, int *); 66 # endif 67 # else 68 # include "mygetopt.h" 69 #endif 70 71 #ifdef S_IFLNK 72 # define IFLNK_h "h" 73 # define IFLNK_L "L" 74 #else 75 # define IFLNK_h "" 76 # define IFLNK_L "" 77 #endif 78 79 #define FILE_FLAGS "bcCdE" IFLNK_h "ik" IFLNK_L "lNnprsSvzZ0" 80 #define OPTSTRING "bcCde:Ef:F:hiklLm:nNpP:rsSvzZ0" 81 82 # define USAGE \ 83 "Usage: %s [-" FILE_FLAGS "] [--apple] [--extension] [--mime-encoding]\n" \ 84 " [--mime-type] [-e <testname>] [-F <separator>] " \ 85 " [-f <namefile>]\n" \ 86 " [-m <magicfiles>] [-P <parameter=value>] [--exclude-quiet]\n" \ 87 " <file> ...\n" \ 88 " %s -C [-m <magicfiles>]\n" \ 89 " %s [--help]\n" 90 91 private int /* Global command-line options */ 92 bflag = 0, /* brief output format */ 93 nopad = 0, /* Don't pad output */ 94 nobuffer = 0, /* Do not buffer stdout */ 95 nulsep = 0; /* Append '\0' to the separator */ 96 97 private const char *separator = ":"; /* Default field separator */ 98 private const struct option long_options[] = { 99 #define OPT_HELP 1 100 #define OPT_APPLE 2 101 #define OPT_EXTENSIONS 3 102 #define OPT_MIME_TYPE 4 103 #define OPT_MIME_ENCODING 5 104 #define OPT_EXCLUDE_QUIET 6 105 #define OPT(shortname, longname, opt, def, doc) \ 106 {longname, opt, NULL, shortname}, 107 #define OPT_LONGONLY(longname, opt, def, doc, id) \ 108 {longname, opt, NULL, id}, 109 #include "file_opts.h" 110 #undef OPT 111 #undef OPT_LONGONLY 112 {0, 0, NULL, 0} 113 }; 114 115 private const struct { 116 const char *name; 117 int value; 118 } nv[] = { 119 { "apptype", MAGIC_NO_CHECK_APPTYPE }, 120 { "ascii", MAGIC_NO_CHECK_ASCII }, 121 { "cdf", MAGIC_NO_CHECK_CDF }, 122 { "compress", MAGIC_NO_CHECK_COMPRESS }, 123 { "csv", MAGIC_NO_CHECK_CSV }, 124 { "elf", MAGIC_NO_CHECK_ELF }, 125 { "encoding", MAGIC_NO_CHECK_ENCODING }, 126 { "soft", MAGIC_NO_CHECK_SOFT }, 127 { "tar", MAGIC_NO_CHECK_TAR }, 128 { "json", MAGIC_NO_CHECK_JSON }, 129 { "text", MAGIC_NO_CHECK_TEXT }, /* synonym for ascii */ 130 { "tokens", MAGIC_NO_CHECK_TOKENS }, /* OBSOLETE: ignored for backwards compatibility */ 131 }; 132 133 private struct { 134 const char *name; 135 size_t value; 136 size_t def; 137 const char *desc; 138 int tag; 139 int set; 140 } pm[] = { 141 { "bytes", 0, FILE_BYTES_MAX, "max bytes to look inside file", 142 MAGIC_PARAM_BYTES_MAX, 0 }, 143 { "elf_notes", 0, FILE_ELF_NOTES_MAX, "max ELF notes processed", 144 MAGIC_PARAM_ELF_NOTES_MAX, 0 }, 145 { "elf_phnum", 0, FILE_ELF_PHNUM_MAX, "max ELF prog sections processed", 146 MAGIC_PARAM_ELF_PHNUM_MAX, 0 }, 147 { "elf_shnum", 0, FILE_ELF_SHNUM_MAX, "max ELF sections processed", 148 MAGIC_PARAM_ELF_SHNUM_MAX, 0 }, 149 { "encoding", 0, FILE_ENCODING_MAX, "max bytes to scan for encoding", 150 MAGIC_PARAM_ENCODING_MAX, 0 }, 151 { "indir", 0, FILE_INDIR_MAX, "recursion limit for indirection", 152 MAGIC_PARAM_INDIR_MAX, 0 }, 153 { "name", 0, FILE_NAME_MAX, "use limit for name/use magic", 154 MAGIC_PARAM_NAME_MAX, 0 }, 155 { "regex", 0, FILE_REGEX_MAX, "length limit for REGEX searches", 156 MAGIC_PARAM_REGEX_MAX, 0 }, 157 }; 158 159 private int posixly; 160 161 #ifdef __dead 162 __dead 163 #endif 164 private void usage(void); 165 private void docprint(const char *, int); 166 #ifdef __dead 167 __dead 168 #endif 169 private void help(void); 170 171 private int unwrap(struct magic_set *, const char *); 172 private int process(struct magic_set *ms, const char *, int); 173 private struct magic_set *load(const char *, int); 174 private void setparam(const char *); 175 private void applyparam(magic_t); 176 177 178 /* 179 * main - parse arguments and handle options 180 */ 181 int 182 main(int argc, char *argv[]) 183 { 184 int c; 185 size_t i; 186 int action = 0, didsomefiles = 0, errflg = 0; 187 int flags = 0, e = 0; 188 #ifdef HAVE_LIBSECCOMP 189 int sandbox = 1; 190 #endif 191 struct magic_set *magic = NULL; 192 int longindex; 193 const char *magicfile = NULL; /* where the magic is */ 194 char *progname; 195 196 /* makes islower etc work for other langs */ 197 (void)setlocale(LC_CTYPE, ""); 198 199 #ifdef __EMX__ 200 /* sh-like wildcard expansion! Shouldn't hurt at least ... */ 201 _wildcard(&argc, &argv); 202 #endif 203 204 if ((progname = strrchr(argv[0], '/')) != NULL) 205 progname++; 206 else 207 progname = argv[0]; 208 209 file_setprogname(progname); 210 211 212 #ifdef S_IFLNK 213 posixly = getenv("POSIXLY_CORRECT") != NULL; 214 flags |= posixly ? MAGIC_SYMLINK : 0; 215 #endif 216 while ((c = getopt_long(argc, argv, OPTSTRING, long_options, 217 &longindex)) != -1) 218 switch (c) { 219 case OPT_HELP: 220 help(); 221 break; 222 case OPT_APPLE: 223 flags |= MAGIC_APPLE; 224 break; 225 case OPT_EXTENSIONS: 226 flags |= MAGIC_EXTENSION; 227 break; 228 case OPT_MIME_TYPE: 229 flags |= MAGIC_MIME_TYPE; 230 break; 231 case OPT_MIME_ENCODING: 232 flags |= MAGIC_MIME_ENCODING; 233 break; 234 case '0': 235 nulsep++; 236 break; 237 case 'b': 238 bflag++; 239 break; 240 case 'c': 241 action = FILE_CHECK; 242 break; 243 case 'C': 244 action = FILE_COMPILE; 245 break; 246 case 'd': 247 flags |= MAGIC_DEBUG|MAGIC_CHECK; 248 break; 249 case 'E': 250 flags |= MAGIC_ERROR; 251 break; 252 case 'e': 253 case OPT_EXCLUDE_QUIET: 254 for (i = 0; i < __arraycount(nv); i++) 255 if (strcmp(nv[i].name, optarg) == 0) 256 break; 257 258 if (i == __arraycount(nv)) { 259 if (c != OPT_EXCLUDE_QUIET) 260 errflg++; 261 } else 262 flags |= nv[i].value; 263 break; 264 265 case 'f': 266 if(action) 267 usage(); 268 if (magic == NULL) 269 if ((magic = load(magicfile, flags)) == NULL) 270 return 1; 271 applyparam(magic); 272 e |= unwrap(magic, optarg); 273 ++didsomefiles; 274 break; 275 case 'F': 276 separator = optarg; 277 break; 278 case 'i': 279 flags |= MAGIC_MIME; 280 break; 281 case 'k': 282 flags |= MAGIC_CONTINUE; 283 break; 284 case 'l': 285 action = FILE_LIST; 286 break; 287 case 'm': 288 magicfile = optarg; 289 break; 290 case 'n': 291 ++nobuffer; 292 break; 293 case 'N': 294 ++nopad; 295 break; 296 #if defined(HAVE_UTIME) || defined(HAVE_UTIMES) 297 case 'p': 298 flags |= MAGIC_PRESERVE_ATIME; 299 break; 300 #endif 301 case 'P': 302 setparam(optarg); 303 break; 304 case 'r': 305 flags |= MAGIC_RAW; 306 break; 307 case 's': 308 flags |= MAGIC_DEVICES; 309 break; 310 case 'S': 311 #ifdef HAVE_LIBSECCOMP 312 sandbox = 0; 313 #endif 314 break; 315 case 'v': 316 if (magicfile == NULL) 317 magicfile = magic_getpath(magicfile, action); 318 (void)fprintf(stdout, "%s-%s\n", file_getprogname(), 319 VERSION); 320 (void)fprintf(stdout, "magic file from %s\n", 321 magicfile); 322 #ifdef HAVE_LIBSECCOMP 323 (void)fprintf(stdout, "seccomp support included\n"); 324 #endif 325 return 0; 326 case 'z': 327 flags |= MAGIC_COMPRESS; 328 break; 329 330 case 'Z': 331 flags |= MAGIC_COMPRESS|MAGIC_COMPRESS_TRANSP; 332 break; 333 #ifdef S_IFLNK 334 case 'L': 335 flags |= MAGIC_SYMLINK; 336 break; 337 case 'h': 338 flags &= ~MAGIC_SYMLINK; 339 break; 340 #endif 341 case '?': 342 default: 343 errflg++; 344 break; 345 } 346 347 if (errflg) { 348 usage(); 349 } 350 if (e) 351 return e; 352 353 #ifdef HAVE_LIBSECCOMP 354 #if 0 355 if (sandbox && enable_sandbox_basic() == -1) 356 #else 357 if (sandbox && enable_sandbox_full() == -1) 358 #endif 359 file_err(EXIT_FAILURE, "SECCOMP initialisation failed"); 360 #endif /* HAVE_LIBSECCOMP */ 361 362 if (MAGIC_VERSION != magic_version()) 363 file_warnx("Compiled magic version [%d] " 364 "does not match with shared library magic version [%d]\n", 365 MAGIC_VERSION, magic_version()); 366 367 switch(action) { 368 case FILE_CHECK: 369 case FILE_COMPILE: 370 case FILE_LIST: 371 /* 372 * Don't try to check/compile ~/.magic unless we explicitly 373 * ask for it. 374 */ 375 magic = magic_open(flags|MAGIC_CHECK); 376 if (magic == NULL) { 377 file_warn("Can't create magic"); 378 return 1; 379 } 380 381 382 switch(action) { 383 case FILE_CHECK: 384 c = magic_check(magic, magicfile); 385 break; 386 case FILE_COMPILE: 387 c = magic_compile(magic, magicfile); 388 break; 389 case FILE_LIST: 390 c = magic_list(magic, magicfile); 391 break; 392 default: 393 abort(); 394 } 395 if (c == -1) { 396 file_warnx("%s", magic_error(magic)); 397 e = 1; 398 goto out; 399 } 400 goto out; 401 default: 402 if (magic == NULL) 403 if ((magic = load(magicfile, flags)) == NULL) 404 return 1; 405 applyparam(magic); 406 } 407 408 if (optind == argc) { 409 if (!didsomefiles) 410 usage(); 411 } 412 else { 413 size_t j, wid, nw; 414 for (wid = 0, j = CAST(size_t, optind); j < CAST(size_t, argc); 415 j++) { 416 nw = file_mbswidth(argv[j]); 417 if (nw > wid) 418 wid = nw; 419 } 420 /* 421 * If bflag is only set twice, set it depending on 422 * number of files [this is undocumented, and subject to change] 423 */ 424 if (bflag == 2) { 425 bflag = optind >= argc - 1; 426 } 427 for (; optind < argc; optind++) 428 e |= process(magic, argv[optind], wid); 429 } 430 431 out: 432 if (magic) 433 magic_close(magic); 434 return e; 435 } 436 437 private void 438 applyparam(magic_t magic) 439 { 440 size_t i; 441 442 for (i = 0; i < __arraycount(pm); i++) { 443 if (!pm[i].set) 444 continue; 445 if (magic_setparam(magic, pm[i].tag, &pm[i].value) == -1) 446 file_err(EXIT_FAILURE, "Can't set %s", pm[i].name); 447 } 448 } 449 450 private void 451 setparam(const char *p) 452 { 453 size_t i; 454 char *s; 455 456 if ((s = strchr(p, '=')) == NULL) 457 goto badparm; 458 459 for (i = 0; i < __arraycount(pm); i++) { 460 if (strncmp(p, pm[i].name, s - p) != 0) 461 continue; 462 pm[i].value = atoi(s + 1); 463 pm[i].set = 1; 464 return; 465 } 466 badparm: 467 file_errx(EXIT_FAILURE, "Unknown param %s", p); 468 } 469 470 private struct magic_set * 471 /*ARGSUSED*/ 472 load(const char *magicfile, int flags) 473 { 474 struct magic_set *magic = magic_open(flags); 475 const char *e; 476 477 if (magic == NULL) { 478 file_warn("Can't create magic"); 479 return NULL; 480 } 481 if (magic_load(magic, magicfile) == -1) { 482 file_warn("%s", magic_error(magic)); 483 magic_close(magic); 484 return NULL; 485 } 486 if ((e = magic_error(magic)) != NULL) 487 file_warn("%s", e); 488 return magic; 489 } 490 491 /* 492 * unwrap -- read a file of filenames, do each one. 493 */ 494 private int 495 unwrap(struct magic_set *ms, const char *fn) 496 { 497 FILE *f; 498 ssize_t len; 499 char *line = NULL; 500 size_t llen = 0; 501 int wid = 0, cwid; 502 int e = 0; 503 504 if (strcmp("-", fn) == 0) { 505 f = stdin; 506 wid = 1; 507 } else { 508 if ((f = fopen(fn, "r")) == NULL) { 509 file_warn("Cannot open `%s'", fn); 510 return 1; 511 } 512 513 while ((len = getline(&line, &llen, f)) > 0) { 514 if (line[len - 1] == '\n') 515 line[len - 1] = '\0'; 516 cwid = file_mbswidth(line); 517 if (cwid > wid) 518 wid = cwid; 519 } 520 521 rewind(f); 522 } 523 524 while ((len = getline(&line, &llen, f)) > 0) { 525 if (line[len - 1] == '\n') 526 line[len - 1] = '\0'; 527 e |= process(ms, line, wid); 528 } 529 530 free(line); 531 (void)fclose(f); 532 return e; 533 } 534 535 /* 536 * Called for each input file on the command line (or in a list of files) 537 */ 538 private int 539 process(struct magic_set *ms, const char *inname, int wid) 540 { 541 const char *type, c = nulsep > 1 ? '\0' : '\n'; 542 int std_in = strcmp(inname, "-") == 0; 543 544 if (wid > 0 && !bflag) { 545 (void)printf("%s", std_in ? "/dev/stdin" : inname); 546 if (nulsep) 547 (void)putc('\0', stdout); 548 if (nulsep < 2) { 549 (void)printf("%s", separator); 550 (void)printf("%*s ", CAST(int, nopad ? 0 551 : (wid - file_mbswidth(inname))), ""); 552 } 553 } 554 555 type = magic_file(ms, std_in ? NULL : inname); 556 557 if (type == NULL) { 558 (void)printf("ERROR: %s%c", magic_error(ms), c); 559 } else { 560 (void)printf("%s%c", type, c); 561 } 562 if (nobuffer) 563 (void)fflush(stdout); 564 return type == NULL; 565 } 566 567 protected size_t 568 file_mbswidth(const char *s) 569 { 570 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 571 size_t bytesconsumed, old_n, n, width = 0; 572 mbstate_t state; 573 wchar_t nextchar; 574 (void)memset(&state, 0, sizeof(mbstate_t)); 575 old_n = n = strlen(s); 576 577 while (n > 0) { 578 bytesconsumed = mbrtowc(&nextchar, s, n, &state); 579 if (bytesconsumed == CAST(size_t, -1) || 580 bytesconsumed == CAST(size_t, -2)) { 581 /* Something went wrong, return something reasonable */ 582 return old_n; 583 } 584 if (s[0] == '\n') { 585 /* 586 * do what strlen() would do, so that caller 587 * is always right 588 */ 589 width++; 590 } else { 591 int w = wcwidth(nextchar); 592 if (w > 0) 593 width += w; 594 } 595 596 s += bytesconsumed, n -= bytesconsumed; 597 } 598 return width; 599 #else 600 return strlen(s); 601 #endif 602 } 603 604 private void 605 usage(void) 606 { 607 const char *pn = file_getprogname(); 608 (void)fprintf(stderr, USAGE, pn, pn, pn); 609 exit(EXIT_FAILURE); 610 } 611 612 private void 613 defprint(int def) 614 { 615 if (!def) 616 return; 617 if (((def & 1) && posixly) || ((def & 2) && !posixly)) 618 fprintf(stdout, " (default)"); 619 fputc('\n', stdout); 620 } 621 622 private void 623 docprint(const char *opts, int def) 624 { 625 size_t i; 626 int comma, pad; 627 char *sp, *p; 628 629 p = strchr(opts, '%'); 630 if (p == NULL) { 631 fprintf(stdout, "%s", opts); 632 defprint(def); 633 return; 634 } 635 636 for (sp = p - 1; sp > opts && *sp == ' '; sp--) 637 continue; 638 639 fprintf(stdout, "%.*s", CAST(int, p - opts), opts); 640 pad = (int)CAST(int, p - sp - 1); 641 642 switch (*++p) { 643 case 'e': 644 comma = 0; 645 for (i = 0; i < __arraycount(nv); i++) { 646 fprintf(stdout, "%s%s", comma++ ? ", " : "", nv[i].name); 647 if (i && i % 5 == 0 && i != __arraycount(nv) - 1) { 648 fprintf(stdout, ",\n%*s", pad, ""); 649 comma = 0; 650 } 651 } 652 break; 653 case 'P': 654 for (i = 0; i < __arraycount(pm); i++) { 655 fprintf(stdout, "%9s %7zu %s", pm[i].name, pm[i].def, 656 pm[i].desc); 657 if (i != __arraycount(pm) - 1) 658 fprintf(stdout, "\n%*s", pad, ""); 659 } 660 break; 661 default: 662 file_errx(EXIT_FAILURE, "Unknown escape `%c' in long options", 663 *p); 664 break; 665 } 666 fprintf(stdout, "%s", opts + (p - opts) + 1); 667 668 } 669 670 private void 671 help(void) 672 { 673 (void)fputs( 674 "Usage: file [OPTION...] [FILE...]\n" 675 "Determine type of FILEs.\n" 676 "\n", stdout); 677 #define OPT(shortname, longname, opt, def, doc) \ 678 fprintf(stdout, " -%c, --" longname, shortname), \ 679 docprint(doc, def); 680 #define OPT_LONGONLY(longname, opt, def, doc, id) \ 681 fprintf(stdout, " --" longname), \ 682 docprint(doc, def); 683 #include "file_opts.h" 684 #undef OPT 685 #undef OPT_LONGONLY 686 fprintf(stdout, "\nReport bugs to https://bugs.astron.com/\n"); 687 exit(EXIT_SUCCESS); 688 } 689 690 private const char *file_progname; 691 692 protected void 693 file_setprogname(const char *progname) 694 { 695 file_progname = progname; 696 } 697 698 protected const char * 699 file_getprogname(void) 700 { 701 return file_progname; 702 } 703 704 protected void 705 file_err(int e, const char *fmt, ...) 706 { 707 va_list ap; 708 int se = errno; 709 710 va_start(ap, fmt); 711 fprintf(stderr, "%s: ", file_progname); 712 vfprintf(stderr, fmt, ap); 713 va_end(ap); 714 if (se) 715 fprintf(stderr, " (%s)\n", strerror(se)); 716 else 717 fputc('\n', stderr); 718 exit(e); 719 } 720 721 protected void 722 file_errx(int e, const char *fmt, ...) 723 { 724 va_list ap; 725 726 va_start(ap, fmt); 727 fprintf(stderr, "%s: ", file_progname); 728 vfprintf(stderr, fmt, ap); 729 va_end(ap); 730 fprintf(stderr, "\n"); 731 exit(e); 732 } 733 734 protected void 735 file_warn(const char *fmt, ...) 736 { 737 va_list ap; 738 int se = errno; 739 740 va_start(ap, fmt); 741 fprintf(stderr, "%s: ", file_progname); 742 vfprintf(stderr, fmt, ap); 743 va_end(ap); 744 if (se) 745 fprintf(stderr, " (%s)\n", strerror(se)); 746 else 747 fputc('\n', stderr); 748 errno = se; 749 } 750 751 protected void 752 file_warnx(const char *fmt, ...) 753 { 754 va_list ap; 755 int se = errno; 756 757 va_start(ap, fmt); 758 fprintf(stderr, "%s: ", file_progname); 759 vfprintf(stderr, fmt, ap); 760 va_end(ap); 761 fprintf(stderr, "\n"); 762 errno = se; 763 } 764