1 /*- 2 * Copyright (c) 2008, 2016 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 #include "file.h" 27 28 #ifndef lint 29 FILE_RCSID("@(#)$File: readcdf.c,v 1.73 2019/03/12 20:43:05 christos Exp $") 30 #endif 31 32 #include <assert.h> 33 #include <stdlib.h> 34 #include <unistd.h> 35 #include <string.h> 36 #include <time.h> 37 #include <ctype.h> 38 39 #include "cdf.h" 40 #include "magic.h" 41 42 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 43 44 static const struct nv { 45 const char *pattern; 46 const char *mime; 47 } app2mime[] = { 48 { "Word", "msword", }, 49 { "Excel", "vnd.ms-excel", }, 50 { "Powerpoint", "vnd.ms-powerpoint", }, 51 { "Crystal Reports", "x-rpt", }, 52 { "Advanced Installer", "vnd.ms-msi", }, 53 { "InstallShield", "vnd.ms-msi", }, 54 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 55 { "NAnt", "vnd.ms-msi", }, 56 { "Windows Installer", "vnd.ms-msi", }, 57 { NULL, NULL, }, 58 }, name2mime[] = { 59 { "Book", "vnd.ms-excel", }, 60 { "Workbook", "vnd.ms-excel", }, 61 { "WordDocument", "msword", }, 62 { "PowerPoint", "vnd.ms-powerpoint", }, 63 { "DigitalSignature", "vnd.ms-msi", }, 64 { NULL, NULL, }, 65 }, name2desc[] = { 66 { "Book", "Microsoft Excel", }, 67 { "Workbook", "Microsoft Excel", }, 68 { "WordDocument", "Microsoft Word", }, 69 { "PowerPoint", "Microsoft PowerPoint", }, 70 { "DigitalSignature", "Microsoft Installer", }, 71 { NULL, NULL, }, 72 }; 73 74 static const struct cv { 75 uint64_t clsid[2]; 76 const char *mime; 77 } clsid2mime[] = { 78 { 79 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 80 "x-msi", 81 }, 82 { { 0, 0 }, 83 NULL, 84 }, 85 }, clsid2desc[] = { 86 { 87 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 88 "MSI Installer", 89 }, 90 { { 0, 0 }, 91 NULL, 92 }, 93 }; 94 95 private const char * 96 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 97 { 98 size_t i; 99 for (i = 0; cv[i].mime != NULL; i++) { 100 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 101 return cv[i].mime; 102 } 103 #ifdef CDF_DEBUG 104 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0], 105 clsid[1]); 106 #endif 107 return NULL; 108 } 109 110 private const char * 111 cdf_app_to_mime(const char *vbuf, const struct nv *nv) 112 { 113 size_t i; 114 const char *rv = NULL; 115 #ifdef USE_C_LOCALE 116 locale_t old_lc_ctype, c_lc_ctype; 117 118 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 119 assert(c_lc_ctype != NULL); 120 old_lc_ctype = uselocale(c_lc_ctype); 121 assert(old_lc_ctype != NULL); 122 #else 123 char *old_lc_ctype = setlocale(LC_CTYPE, "C"); 124 #endif 125 for (i = 0; nv[i].pattern != NULL; i++) 126 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 127 rv = nv[i].mime; 128 break; 129 } 130 #ifdef CDF_DEBUG 131 fprintf(stderr, "unknown app %s\n", vbuf); 132 #endif 133 #ifdef USE_C_LOCALE 134 (void)uselocale(old_lc_ctype); 135 freelocale(c_lc_ctype); 136 #else 137 setlocale(LC_CTYPE, old_lc_ctype); 138 #endif 139 return rv; 140 } 141 142 private int 143 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 144 size_t count, const cdf_directory_t *root_storage) 145 { 146 size_t i; 147 cdf_timestamp_t tp; 148 struct timespec ts; 149 char buf[64]; 150 const char *str = NULL; 151 const char *s, *e; 152 int len; 153 154 if (!NOTMIME(ms) && root_storage) 155 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 156 clsid2mime); 157 158 for (i = 0; i < count; i++) { 159 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 160 switch (info[i].pi_type) { 161 case CDF_NULL: 162 break; 163 case CDF_SIGNED16: 164 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 165 info[i].pi_s16) == -1) 166 return -1; 167 break; 168 case CDF_SIGNED32: 169 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 170 info[i].pi_s32) == -1) 171 return -1; 172 break; 173 case CDF_UNSIGNED32: 174 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 175 info[i].pi_u32) == -1) 176 return -1; 177 break; 178 case CDF_FLOAT: 179 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 180 info[i].pi_f) == -1) 181 return -1; 182 break; 183 case CDF_DOUBLE: 184 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 185 info[i].pi_d) == -1) 186 return -1; 187 break; 188 case CDF_LENGTH32_STRING: 189 case CDF_LENGTH32_WSTRING: 190 len = info[i].pi_str.s_len; 191 if (len > 1) { 192 char vbuf[1024]; 193 size_t j, k = 1; 194 195 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 196 k++; 197 s = info[i].pi_str.s_buf; 198 e = info[i].pi_str.s_buf + len; 199 for (j = 0; s < e && j < sizeof(vbuf) 200 && len--; s += k) { 201 if (*s == '\0') 202 break; 203 if (isprint(CAST(unsigned char, *s))) 204 vbuf[j++] = *s; 205 } 206 if (j == sizeof(vbuf)) 207 --j; 208 vbuf[j] = '\0'; 209 if (NOTMIME(ms)) { 210 if (vbuf[0]) { 211 if (file_printf(ms, ", %s: %s", 212 buf, vbuf) == -1) 213 return -1; 214 } 215 } else if (str == NULL && info[i].pi_id == 216 CDF_PROPERTY_NAME_OF_APPLICATION) { 217 str = cdf_app_to_mime(vbuf, app2mime); 218 } 219 } 220 break; 221 case CDF_FILETIME: 222 tp = info[i].pi_tp; 223 if (tp != 0) { 224 char tbuf[64]; 225 if (tp < 1000000000000000LL) { 226 cdf_print_elapsed_time(tbuf, 227 sizeof(tbuf), tp); 228 if (NOTMIME(ms) && file_printf(ms, 229 ", %s: %s", buf, tbuf) == -1) 230 return -1; 231 } else { 232 char *c, *ec; 233 cdf_timestamp_to_timespec(&ts, tp); 234 c = cdf_ctime(&ts.tv_sec, tbuf); 235 if (c != NULL && 236 (ec = strchr(c, '\n')) != NULL) 237 *ec = '\0'; 238 239 if (NOTMIME(ms) && file_printf(ms, 240 ", %s: %s", buf, c) == -1) 241 return -1; 242 } 243 } 244 break; 245 case CDF_CLIPBOARD: 246 break; 247 default: 248 return -1; 249 } 250 } 251 if (ms->flags & MAGIC_MIME_TYPE) { 252 if (str == NULL) 253 return 0; 254 if (file_printf(ms, "application/%s", str) == -1) 255 return -1; 256 } 257 return 1; 258 } 259 260 private int 261 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h, 262 const cdf_stream_t *sst) 263 { 264 cdf_catalog_t *cat; 265 size_t i; 266 char buf[256]; 267 cdf_catalog_entry_t *ce; 268 269 if (NOTMIME(ms)) { 270 if (file_printf(ms, "Microsoft Thumbs.db [") == -1) 271 return -1; 272 if (cdf_unpack_catalog(h, sst, &cat) == -1) 273 return -1; 274 ce = cat->cat_e; 275 /* skip first entry since it has a , or paren */ 276 for (i = 1; i < cat->cat_num; i++) 277 if (file_printf(ms, "%s%s", 278 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name), 279 i == cat->cat_num - 1 ? "]" : ", ") == -1) { 280 free(cat); 281 return -1; 282 } 283 free(cat); 284 } else if (ms->flags & MAGIC_MIME_TYPE) { 285 if (file_printf(ms, "application/CDFV2") == -1) 286 return -1; 287 } 288 return 1; 289 } 290 291 private int 292 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 293 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 294 { 295 cdf_summary_info_header_t si; 296 cdf_property_info_t *info; 297 size_t count; 298 int m; 299 300 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 301 return -1; 302 303 if (NOTMIME(ms)) { 304 const char *str; 305 306 if (file_printf(ms, "Composite Document File V2 Document") 307 == -1) 308 return -1; 309 310 if (file_printf(ms, ", %s Endian", 311 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 312 return -2; 313 switch (si.si_os) { 314 case 2: 315 if (file_printf(ms, ", Os: Windows, Version %d.%d", 316 si.si_os_version & 0xff, 317 CAST(uint32_t, si.si_os_version) >> 8) == -1) 318 return -2; 319 break; 320 case 1: 321 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 322 CAST(uint32_t, si.si_os_version) >> 8, 323 si.si_os_version & 0xff) == -1) 324 return -2; 325 break; 326 default: 327 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 328 si.si_os_version & 0xff, 329 CAST(uint32_t, si.si_os_version) >> 8) == -1) 330 return -2; 331 break; 332 } 333 if (root_storage) { 334 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 335 clsid2desc); 336 if (str) { 337 if (file_printf(ms, ", %s", str) == -1) 338 return -2; 339 } 340 } 341 } 342 343 m = cdf_file_property_info(ms, info, count, root_storage); 344 free(info); 345 346 return m == -1 ? -2 : m; 347 } 348 349 #ifdef notdef 350 private char * 351 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 352 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 353 PRIx64 "-%.12" PRIx64, 354 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL, 355 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL, 356 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL, 357 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL, 358 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL); 359 return buf; 360 } 361 #endif 362 363 private int 364 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, 365 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 366 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn) 367 { 368 int i; 369 370 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst, 371 dir, "Catalog", scn)) == -1) 372 return i; 373 #ifdef CDF_DEBUG 374 cdf_dump_catalog(h, scn); 375 #endif 376 if ((i = cdf_file_catalog(ms, h, scn)) == -1) 377 return -1; 378 return i; 379 } 380 381 private int 382 cdf_check_summary_info(struct magic_set *ms, const cdf_info_t *info, 383 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 384 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn, 385 const cdf_directory_t *root_storage, const char **expn) 386 { 387 int i; 388 const char *str = NULL; 389 cdf_directory_t *d; 390 char name[__arraycount(d->d_name)]; 391 size_t j, k; 392 393 #ifdef CDF_DEBUG 394 cdf_dump_summary_info(h, scn); 395 #endif 396 if ((i = cdf_file_summary_info(ms, h, scn, root_storage)) < 0) { 397 *expn = "Can't expand summary_info"; 398 return i; 399 } 400 if (i == 1) 401 return i; 402 for (j = 0; str == NULL && j < dir->dir_len; j++) { 403 d = &dir->dir_tab[j]; 404 for (k = 0; k < sizeof(name); k++) 405 name[k] = CAST(char, cdf_tole2(d->d_name[k])); 406 str = cdf_app_to_mime(name, 407 NOTMIME(ms) ? name2desc : name2mime); 408 } 409 if (NOTMIME(ms)) { 410 if (str != NULL) { 411 if (file_printf(ms, "%s", str) == -1) 412 return -1; 413 i = 1; 414 } 415 } else if (ms->flags & MAGIC_MIME_TYPE) { 416 if (str == NULL) 417 str = "vnd.ms-office"; 418 if (file_printf(ms, "application/%s", str) == -1) 419 return -1; 420 i = 1; 421 } 422 if (i <= 0) { 423 i = cdf_file_catalog_info(ms, info, h, sat, ssat, sst, 424 dir, scn); 425 } 426 return i; 427 } 428 429 private struct sinfo { 430 const char *name; 431 const char *mime; 432 const char *sections[5]; 433 const int types[5]; 434 } sectioninfo[] = { 435 { "Encrypted", "encrypted", 436 { 437 "EncryptedPackage", "EncryptedSummary", 438 NULL, NULL, NULL, 439 }, 440 { 441 CDF_DIR_TYPE_USER_STREAM, 442 CDF_DIR_TYPE_USER_STREAM, 443 0, 0, 0, 444 445 }, 446 }, 447 { "QuickBooks", "quickbooks", 448 { 449 #if 0 450 "TaxForms", "PDFTaxForms", "modulesInBackup", 451 #endif 452 "mfbu_header", NULL, NULL, NULL, NULL, 453 }, 454 { 455 #if 0 456 CDF_DIR_TYPE_USER_STORAGE, 457 CDF_DIR_TYPE_USER_STORAGE, 458 CDF_DIR_TYPE_USER_STREAM, 459 #endif 460 CDF_DIR_TYPE_USER_STREAM, 461 0, 0, 0, 0 462 }, 463 }, 464 { "Microsoft Excel", "vnd.ms-excel", 465 { 466 "Book", "Workbook", NULL, NULL, NULL, 467 }, 468 { 469 CDF_DIR_TYPE_USER_STREAM, 470 CDF_DIR_TYPE_USER_STREAM, 471 0, 0, 0, 472 }, 473 }, 474 { "Microsoft Word", "msword", 475 { 476 "WordDocument", NULL, NULL, NULL, NULL, 477 }, 478 { 479 CDF_DIR_TYPE_USER_STREAM, 480 0, 0, 0, 0, 481 }, 482 }, 483 { "Microsoft PowerPoint", "vnd.ms-powerpoint", 484 { 485 "PowerPoint", NULL, NULL, NULL, NULL, 486 }, 487 { 488 CDF_DIR_TYPE_USER_STREAM, 489 0, 0, 0, 0, 490 }, 491 }, 492 { "Microsoft Outlook Message", "vnd.ms-outlook", 493 { 494 "__properties_version1.0", 495 "__recip_version1.0_#00000000", 496 NULL, NULL, NULL, 497 }, 498 { 499 CDF_DIR_TYPE_USER_STREAM, 500 CDF_DIR_TYPE_USER_STORAGE, 501 0, 0, 0, 502 }, 503 }, 504 }; 505 506 private int 507 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) 508 { 509 size_t sd, j; 510 511 for (sd = 0; sd < __arraycount(sectioninfo); sd++) { 512 const struct sinfo *si = §ioninfo[sd]; 513 for (j = 0; si->sections[j]; j++) { 514 if (cdf_find_stream(dir, si->sections[j], si->types[j]) 515 > 0) 516 break; 517 #ifdef CDF_DEBUG 518 fprintf(stderr, "Can't read %s\n", si->sections[j]); 519 #endif 520 } 521 if (si->sections[j] == NULL) 522 continue; 523 if (NOTMIME(ms)) { 524 if (file_printf(ms, "CDFV2 %s", si->name) == -1) 525 return -1; 526 } else if (ms->flags & MAGIC_MIME_TYPE) { 527 if (file_printf(ms, "application/%s", si->mime) == -1) 528 return -1; 529 } 530 return 1; 531 } 532 return -1; 533 } 534 535 protected int 536 file_trycdf(struct magic_set *ms, const struct buffer *b) 537 { 538 int fd = b->fd; 539 const unsigned char *buf = CAST(const unsigned char *, b->fbuf); 540 size_t nbytes = b->flen; 541 cdf_info_t info; 542 cdf_header_t h; 543 cdf_sat_t sat, ssat; 544 cdf_stream_t sst, scn; 545 cdf_dir_t dir; 546 int i; 547 const char *expn = ""; 548 const cdf_directory_t *root_storage; 549 550 scn.sst_tab = NULL; 551 info.i_fd = fd; 552 info.i_buf = buf; 553 info.i_len = nbytes; 554 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) 555 return 0; 556 if (cdf_read_header(&info, &h) == -1) 557 return 0; 558 #ifdef CDF_DEBUG 559 cdf_dump_header(&h); 560 #endif 561 562 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 563 expn = "Can't read SAT"; 564 goto out0; 565 } 566 #ifdef CDF_DEBUG 567 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 568 #endif 569 570 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 571 expn = "Can't read SSAT"; 572 goto out1; 573 } 574 #ifdef CDF_DEBUG 575 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 576 #endif 577 578 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 579 expn = "Can't read directory"; 580 goto out2; 581 } 582 583 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 584 &root_storage)) == -1) { 585 expn = "Cannot read short stream"; 586 goto out3; 587 } 588 #ifdef CDF_DEBUG 589 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 590 #endif 591 #ifdef notdef 592 if (root_storage) { 593 if (NOTMIME(ms)) { 594 char clsbuf[128]; 595 if (file_printf(ms, "CLSID %s, ", 596 format_clsid(clsbuf, sizeof(clsbuf), 597 root_storage->d_storage_uuid)) == -1) 598 return -1; 599 } 600 } 601 #endif 602 603 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 604 "FileHeader", &scn)) != -1) { 605 #define HWP5_SIGNATURE "HWP Document File" 606 if (scn.sst_len * scn.sst_ss >= sizeof(HWP5_SIGNATURE) - 1 607 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 608 sizeof(HWP5_SIGNATURE) - 1) == 0) { 609 if (NOTMIME(ms)) { 610 if (file_printf(ms, 611 "Hangul (Korean) Word Processor File 5.x") == -1) 612 return -1; 613 } else if (ms->flags & MAGIC_MIME_TYPE) { 614 if (file_printf(ms, "application/x-hwp") == -1) 615 return -1; 616 } 617 i = 1; 618 goto out5; 619 } else { 620 cdf_zero_stream(&scn); 621 } 622 } 623 624 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 625 &scn)) == -1) { 626 if (errno != ESRCH) { 627 expn = "Cannot read summary info"; 628 } 629 } else { 630 i = cdf_check_summary_info(ms, &info, &h, 631 &sat, &ssat, &sst, &dir, &scn, root_storage, &expn); 632 cdf_zero_stream(&scn); 633 } 634 if (i <= 0) { 635 if ((i = cdf_read_doc_summary_info(&info, &h, &sat, &ssat, 636 &sst, &dir, &scn)) == -1) { 637 if (errno != ESRCH) { 638 expn = "Cannot read summary info"; 639 } 640 } else { 641 i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat, 642 &sst, &dir, &scn, root_storage, &expn); 643 } 644 } 645 if (i <= 0) { 646 i = cdf_file_dir_info(ms, &dir); 647 if (i < 0) 648 expn = "Cannot read section info"; 649 } 650 out5: 651 cdf_zero_stream(&scn); 652 cdf_zero_stream(&sst); 653 out3: 654 free(dir.dir_tab); 655 out2: 656 free(ssat.sat_tab); 657 out1: 658 free(sat.sat_tab); 659 out0: 660 /* If we handled it already, return */ 661 if (i != -1) 662 return i; 663 /* Provide a default handler */ 664 if (NOTMIME(ms)) { 665 if (file_printf(ms, 666 "Composite Document File V2 Document") == -1) 667 return -1; 668 if (*expn) 669 if (file_printf(ms, ", %s", expn) == -1) 670 return -1; 671 } else if (ms->flags & MAGIC_MIME_TYPE) { 672 if (file_printf(ms, "application/CDFV2") == -1) 673 return -1; 674 } 675 return 1; 676 } 677