1 /*- 2 * Copyright (c) 2008, 2016 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 #include "file.h" 27 28 #ifndef lint 29 FILE_RCSID("@(#)$File: readcdf.c,v 1.65 2017/04/08 20:58:03 christos Exp $") 30 #endif 31 32 #include <assert.h> 33 #include <stdlib.h> 34 #include <unistd.h> 35 #include <string.h> 36 #include <time.h> 37 #include <ctype.h> 38 39 #include "cdf.h" 40 #include "magic.h" 41 42 #ifndef __arraycount 43 #define __arraycount(a) (sizeof(a) / sizeof(a[0])) 44 #endif 45 46 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 47 48 static const struct nv { 49 const char *pattern; 50 const char *mime; 51 } app2mime[] = { 52 { "Word", "msword", }, 53 { "Excel", "vnd.ms-excel", }, 54 { "Powerpoint", "vnd.ms-powerpoint", }, 55 { "Crystal Reports", "x-rpt", }, 56 { "Advanced Installer", "vnd.ms-msi", }, 57 { "InstallShield", "vnd.ms-msi", }, 58 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 59 { "NAnt", "vnd.ms-msi", }, 60 { "Windows Installer", "vnd.ms-msi", }, 61 { NULL, NULL, }, 62 }, name2mime[] = { 63 { "Book", "vnd.ms-excel", }, 64 { "Workbook", "vnd.ms-excel", }, 65 { "WordDocument", "msword", }, 66 { "PowerPoint", "vnd.ms-powerpoint", }, 67 { "DigitalSignature", "vnd.ms-msi", }, 68 { NULL, NULL, }, 69 }, name2desc[] = { 70 { "Book", "Microsoft Excel", }, 71 { "Workbook", "Microsoft Excel", }, 72 { "WordDocument", "Microsoft Word", }, 73 { "PowerPoint", "Microsoft PowerPoint", }, 74 { "DigitalSignature", "Microsoft Installer", }, 75 { NULL, NULL, }, 76 }; 77 78 static const struct cv { 79 uint64_t clsid[2]; 80 const char *mime; 81 } clsid2mime[] = { 82 { 83 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 84 "x-msi", 85 }, 86 { { 0, 0 }, 87 NULL, 88 }, 89 }, clsid2desc[] = { 90 { 91 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 92 "MSI Installer", 93 }, 94 { { 0, 0 }, 95 NULL, 96 }, 97 }; 98 99 private const char * 100 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 101 { 102 size_t i; 103 for (i = 0; cv[i].mime != NULL; i++) { 104 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 105 return cv[i].mime; 106 } 107 #ifdef CDF_DEBUG 108 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0], 109 clsid[1]); 110 #endif 111 return NULL; 112 } 113 114 private const char * 115 cdf_app_to_mime(const char *vbuf, const struct nv *nv) 116 { 117 size_t i; 118 const char *rv = NULL; 119 #ifdef USE_C_LOCALE 120 locale_t old_lc_ctype, c_lc_ctype; 121 122 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 123 assert(c_lc_ctype != NULL); 124 old_lc_ctype = uselocale(c_lc_ctype); 125 assert(old_lc_ctype != NULL); 126 #else 127 char *old_lc_ctype = setlocale(LC_CTYPE, "C"); 128 #endif 129 for (i = 0; nv[i].pattern != NULL; i++) 130 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 131 rv = nv[i].mime; 132 break; 133 } 134 #ifdef CDF_DEBUG 135 fprintf(stderr, "unknown app %s\n", vbuf); 136 #endif 137 #ifdef USE_C_LOCALE 138 (void)uselocale(old_lc_ctype); 139 freelocale(c_lc_ctype); 140 #else 141 setlocale(LC_CTYPE, old_lc_ctype); 142 #endif 143 return rv; 144 } 145 146 private int 147 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 148 size_t count, const cdf_directory_t *root_storage) 149 { 150 size_t i; 151 cdf_timestamp_t tp; 152 struct timespec ts; 153 char buf[64]; 154 const char *str = NULL; 155 const char *s, *e; 156 int len; 157 158 if (!NOTMIME(ms) && root_storage) 159 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 160 clsid2mime); 161 162 for (i = 0; i < count; i++) { 163 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 164 switch (info[i].pi_type) { 165 case CDF_NULL: 166 break; 167 case CDF_SIGNED16: 168 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 169 info[i].pi_s16) == -1) 170 return -1; 171 break; 172 case CDF_SIGNED32: 173 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 174 info[i].pi_s32) == -1) 175 return -1; 176 break; 177 case CDF_UNSIGNED32: 178 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 179 info[i].pi_u32) == -1) 180 return -1; 181 break; 182 case CDF_FLOAT: 183 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 184 info[i].pi_f) == -1) 185 return -1; 186 break; 187 case CDF_DOUBLE: 188 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 189 info[i].pi_d) == -1) 190 return -1; 191 break; 192 case CDF_LENGTH32_STRING: 193 case CDF_LENGTH32_WSTRING: 194 len = info[i].pi_str.s_len; 195 if (len > 1) { 196 char vbuf[1024]; 197 size_t j, k = 1; 198 199 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 200 k++; 201 s = info[i].pi_str.s_buf; 202 e = info[i].pi_str.s_buf + len; 203 for (j = 0; s < e && j < sizeof(vbuf) 204 && len--; s += k) { 205 if (*s == '\0') 206 break; 207 if (isprint((unsigned char)*s)) 208 vbuf[j++] = *s; 209 } 210 if (j == sizeof(vbuf)) 211 --j; 212 vbuf[j] = '\0'; 213 if (NOTMIME(ms)) { 214 if (vbuf[0]) { 215 if (file_printf(ms, ", %s: %s", 216 buf, vbuf) == -1) 217 return -1; 218 } 219 } else if (str == NULL && info[i].pi_id == 220 CDF_PROPERTY_NAME_OF_APPLICATION) { 221 str = cdf_app_to_mime(vbuf, app2mime); 222 } 223 } 224 break; 225 case CDF_FILETIME: 226 tp = info[i].pi_tp; 227 if (tp != 0) { 228 char tbuf[64]; 229 if (tp < 1000000000000000LL) { 230 cdf_print_elapsed_time(tbuf, 231 sizeof(tbuf), tp); 232 if (NOTMIME(ms) && file_printf(ms, 233 ", %s: %s", buf, tbuf) == -1) 234 return -1; 235 } else { 236 char *c, *ec; 237 cdf_timestamp_to_timespec(&ts, tp); 238 c = cdf_ctime(&ts.tv_sec, tbuf); 239 if (c != NULL && 240 (ec = strchr(c, '\n')) != NULL) 241 *ec = '\0'; 242 243 if (NOTMIME(ms) && file_printf(ms, 244 ", %s: %s", buf, c) == -1) 245 return -1; 246 } 247 } 248 break; 249 case CDF_CLIPBOARD: 250 break; 251 default: 252 return -1; 253 } 254 } 255 if (!NOTMIME(ms)) { 256 if (str == NULL) 257 return 0; 258 if (file_printf(ms, "application/%s", str) == -1) 259 return -1; 260 } 261 return 1; 262 } 263 264 private int 265 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h, 266 const cdf_stream_t *sst) 267 { 268 cdf_catalog_t *cat; 269 size_t i; 270 char buf[256]; 271 cdf_catalog_entry_t *ce; 272 273 if (NOTMIME(ms)) { 274 if (file_printf(ms, "Microsoft Thumbs.db [") == -1) 275 return -1; 276 if (cdf_unpack_catalog(h, sst, &cat) == -1) 277 return -1; 278 ce = cat->cat_e; 279 /* skip first entry since it has a , or paren */ 280 for (i = 1; i < cat->cat_num; i++) 281 if (file_printf(ms, "%s%s", 282 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name), 283 i == cat->cat_num - 1 ? "]" : ", ") == -1) { 284 free(cat); 285 return -1; 286 } 287 free(cat); 288 } else { 289 if (file_printf(ms, "application/CDFV2") == -1) 290 return -1; 291 } 292 return 1; 293 } 294 295 private int 296 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 297 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 298 { 299 cdf_summary_info_header_t si; 300 cdf_property_info_t *info; 301 size_t count; 302 int m; 303 304 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 305 return -1; 306 307 if (NOTMIME(ms)) { 308 const char *str; 309 310 if (file_printf(ms, "Composite Document File V2 Document") 311 == -1) 312 return -1; 313 314 if (file_printf(ms, ", %s Endian", 315 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 316 return -2; 317 switch (si.si_os) { 318 case 2: 319 if (file_printf(ms, ", Os: Windows, Version %d.%d", 320 si.si_os_version & 0xff, 321 (uint32_t)si.si_os_version >> 8) == -1) 322 return -2; 323 break; 324 case 1: 325 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 326 (uint32_t)si.si_os_version >> 8, 327 si.si_os_version & 0xff) == -1) 328 return -2; 329 break; 330 default: 331 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 332 si.si_os_version & 0xff, 333 (uint32_t)si.si_os_version >> 8) == -1) 334 return -2; 335 break; 336 } 337 if (root_storage) { 338 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 339 clsid2desc); 340 if (str) { 341 if (file_printf(ms, ", %s", str) == -1) 342 return -2; 343 } 344 } 345 } 346 347 m = cdf_file_property_info(ms, info, count, root_storage); 348 free(info); 349 350 return m == -1 ? -2 : m; 351 } 352 353 #ifdef notdef 354 private char * 355 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 356 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 357 PRIx64 "-%.12" PRIx64, 358 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL, 359 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL, 360 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL, 361 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL, 362 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL); 363 return buf; 364 } 365 #endif 366 367 private int 368 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, 369 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 370 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn) 371 { 372 int i; 373 374 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst, 375 dir, "Catalog", scn)) == -1) 376 return i; 377 #ifdef CDF_DEBUG 378 cdf_dump_catalog(h, scn); 379 #endif 380 if ((i = cdf_file_catalog(ms, h, scn)) == -1) 381 return -1; 382 return i; 383 } 384 385 private int 386 cdf_check_summary_info(struct magic_set *ms, const cdf_info_t *info, 387 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 388 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn, 389 const cdf_directory_t *root_storage, const char **expn) 390 { 391 int i; 392 const char *str = NULL; 393 cdf_directory_t *d; 394 char name[__arraycount(d->d_name)]; 395 size_t j, k; 396 397 #ifdef CDF_DEBUG 398 cdf_dump_summary_info(h, scn); 399 #endif 400 if ((i = cdf_file_summary_info(ms, h, scn, root_storage)) < 0) { 401 *expn = "Can't expand summary_info"; 402 return i; 403 } 404 if (i == 1) 405 return i; 406 for (j = 0; str == NULL && j < dir->dir_len; j++) { 407 d = &dir->dir_tab[j]; 408 for (k = 0; k < sizeof(name); k++) 409 name[k] = (char)cdf_tole2(d->d_name[k]); 410 str = cdf_app_to_mime(name, 411 NOTMIME(ms) ? name2desc : name2mime); 412 } 413 if (NOTMIME(ms)) { 414 if (str != NULL) { 415 if (file_printf(ms, "%s", str) == -1) 416 return -1; 417 i = 1; 418 } 419 } else { 420 if (str == NULL) 421 str = "vnd.ms-office"; 422 if (file_printf(ms, "application/%s", str) == -1) 423 return -1; 424 i = 1; 425 } 426 if (i <= 0) { 427 i = cdf_file_catalog_info(ms, info, h, sat, ssat, sst, 428 dir, scn); 429 } 430 return i; 431 } 432 433 private struct sinfo { 434 const char *name; 435 const char *mime; 436 const char *sections[5]; 437 const int types[5]; 438 } sectioninfo[] = { 439 { "Encrypted", "encrypted", 440 { 441 "EncryptedPackage", "EncryptedSummary", 442 NULL, NULL, NULL, 443 }, 444 { 445 CDF_DIR_TYPE_USER_STREAM, 446 CDF_DIR_TYPE_USER_STREAM, 447 0, 0, 0, 448 449 }, 450 }, 451 { "QuickBooks", "quickbooks", 452 { 453 #if 0 454 "TaxForms", "PDFTaxForms", "modulesInBackup", 455 #endif 456 "mfbu_header", NULL, NULL, NULL, NULL, 457 }, 458 { 459 #if 0 460 CDF_DIR_TYPE_USER_STORAGE, 461 CDF_DIR_TYPE_USER_STORAGE, 462 CDF_DIR_TYPE_USER_STREAM, 463 #endif 464 CDF_DIR_TYPE_USER_STREAM, 465 0, 0, 0, 0 466 }, 467 }, 468 { "Microsoft Excel", "vnd.ms-excel", 469 { 470 "Book", "Workbook", NULL, NULL, NULL, 471 }, 472 { 473 CDF_DIR_TYPE_USER_STREAM, 474 CDF_DIR_TYPE_USER_STREAM, 475 0, 0, 0, 476 }, 477 }, 478 { "Microsoft Word", "msword", 479 { 480 "WordDocument", NULL, NULL, NULL, NULL, 481 }, 482 { 483 CDF_DIR_TYPE_USER_STREAM, 484 0, 0, 0, 0, 485 }, 486 }, 487 { "Microsoft PowerPoint", "vnd.ms-powerpoint", 488 { 489 "PowerPoint", NULL, NULL, NULL, NULL, 490 }, 491 { 492 CDF_DIR_TYPE_USER_STREAM, 493 0, 0, 0, 0, 494 }, 495 }, 496 { "Microsoft Outlook Message", "vnd.ms-outlook", 497 { 498 "__properties_version1.0", 499 "__recip_version1.0_#00000000", 500 NULL, NULL, NULL, 501 }, 502 { 503 CDF_DIR_TYPE_USER_STREAM, 504 CDF_DIR_TYPE_USER_STORAGE, 505 0, 0, 0, 506 }, 507 }, 508 }; 509 510 private int 511 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) 512 { 513 size_t sd, j; 514 515 for (sd = 0; sd < __arraycount(sectioninfo); sd++) { 516 const struct sinfo *si = §ioninfo[sd]; 517 for (j = 0; si->sections[j]; j++) { 518 if (cdf_find_stream(dir, si->sections[j], si->types[j]) 519 > 0) 520 break; 521 #ifdef CDF_DEBUG 522 fprintf(stderr, "Can't read %s\n", si->sections[j]); 523 #endif 524 } 525 if (si->sections[j] == NULL) 526 continue; 527 if (NOTMIME(ms)) { 528 if (file_printf(ms, "CDFV2 %s", si->name) == -1) 529 return -1; 530 } else { 531 if (file_printf(ms, "application/%s", si->mime) == -1) 532 return -1; 533 } 534 return 1; 535 } 536 return -1; 537 } 538 539 protected int 540 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, 541 size_t nbytes) 542 { 543 cdf_info_t info; 544 cdf_header_t h; 545 cdf_sat_t sat, ssat; 546 cdf_stream_t sst, scn; 547 cdf_dir_t dir; 548 int i; 549 const char *expn = ""; 550 const cdf_directory_t *root_storage; 551 552 scn.sst_tab = NULL; 553 info.i_fd = fd; 554 info.i_buf = buf; 555 info.i_len = nbytes; 556 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) 557 return 0; 558 if (cdf_read_header(&info, &h) == -1) 559 return 0; 560 #ifdef CDF_DEBUG 561 cdf_dump_header(&h); 562 #endif 563 564 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 565 expn = "Can't read SAT"; 566 goto out0; 567 } 568 #ifdef CDF_DEBUG 569 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 570 #endif 571 572 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 573 expn = "Can't read SSAT"; 574 goto out1; 575 } 576 #ifdef CDF_DEBUG 577 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 578 #endif 579 580 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 581 expn = "Can't read directory"; 582 goto out2; 583 } 584 585 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 586 &root_storage)) == -1) { 587 expn = "Cannot read short stream"; 588 goto out3; 589 } 590 #ifdef CDF_DEBUG 591 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 592 #endif 593 #ifdef notdef 594 if (root_storage) { 595 if (NOTMIME(ms)) { 596 char clsbuf[128]; 597 if (file_printf(ms, "CLSID %s, ", 598 format_clsid(clsbuf, sizeof(clsbuf), 599 root_storage->d_storage_uuid)) == -1) 600 return -1; 601 } 602 } 603 #endif 604 605 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 606 "FileHeader", &scn)) != -1) { 607 #define HWP5_SIGNATURE "HWP Document File" 608 if (scn.sst_len * scn.sst_ss >= sizeof(HWP5_SIGNATURE) - 1 609 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 610 sizeof(HWP5_SIGNATURE) - 1) == 0) { 611 if (NOTMIME(ms)) { 612 if (file_printf(ms, 613 "Hangul (Korean) Word Processor File 5.x") == -1) 614 return -1; 615 } else { 616 if (file_printf(ms, "application/x-hwp") == -1) 617 return -1; 618 } 619 i = 1; 620 goto out5; 621 } else { 622 cdf_zero_stream(&scn); 623 } 624 } 625 626 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 627 &scn)) == -1) { 628 if (errno != ESRCH) { 629 expn = "Cannot read summary info"; 630 } 631 } else { 632 i = cdf_check_summary_info(ms, &info, &h, 633 &sat, &ssat, &sst, &dir, &scn, root_storage, &expn); 634 cdf_zero_stream(&scn); 635 } 636 if (i <= 0) { 637 if ((i = cdf_read_doc_summary_info(&info, &h, &sat, &ssat, 638 &sst, &dir, &scn)) == -1) { 639 if (errno != ESRCH) { 640 expn = "Cannot read summary info"; 641 } 642 } else { 643 i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat, 644 &sst, &dir, &scn, root_storage, &expn); 645 } 646 } 647 if (i <= 0) { 648 i = cdf_file_dir_info(ms, &dir); 649 if (i < 0) 650 expn = "Cannot read section info"; 651 } 652 out5: 653 cdf_zero_stream(&scn); 654 cdf_zero_stream(&sst); 655 out3: 656 free(dir.dir_tab); 657 out2: 658 free(ssat.sat_tab); 659 out1: 660 free(sat.sat_tab); 661 out0: 662 if (i == -1) { 663 if (NOTMIME(ms)) { 664 if (file_printf(ms, 665 "Composite Document File V2 Document") == -1) 666 return -1; 667 if (*expn) 668 if (file_printf(ms, ", %s", expn) == -1) 669 return -1; 670 } else { 671 if (file_printf(ms, "application/CDFV2") == -1) 672 return -1; 673 } 674 i = 1; 675 } 676 return i; 677 } 678