1 /*- 2 * Copyright (c) 2008 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 #include "file.h" 27 28 #ifndef lint 29 FILE_RCSID("@(#)$File: readcdf.c,v 1.57 2016/05/03 16:08:49 christos Exp $") 30 #endif 31 32 #include <assert.h> 33 #include <stdlib.h> 34 #include <unistd.h> 35 #include <string.h> 36 #include <time.h> 37 #include <ctype.h> 38 39 #include "cdf.h" 40 #include "magic.h" 41 42 #ifndef __arraycount 43 #define __arraycount(a) (sizeof(a) / sizeof(a[0])) 44 #endif 45 46 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 47 48 static const struct nv { 49 const char *pattern; 50 const char *mime; 51 } app2mime[] = { 52 { "Word", "msword", }, 53 { "Excel", "vnd.ms-excel", }, 54 { "Powerpoint", "vnd.ms-powerpoint", }, 55 { "Crystal Reports", "x-rpt", }, 56 { "Advanced Installer", "vnd.ms-msi", }, 57 { "InstallShield", "vnd.ms-msi", }, 58 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 59 { "NAnt", "vnd.ms-msi", }, 60 { "Windows Installer", "vnd.ms-msi", }, 61 { NULL, NULL, }, 62 }, name2mime[] = { 63 { "Book", "vnd.ms-excel", }, 64 { "Workbook", "vnd.ms-excel", }, 65 { "WordDocument", "msword", }, 66 { "PowerPoint", "vnd.ms-powerpoint", }, 67 { "DigitalSignature", "vnd.ms-msi", }, 68 { NULL, NULL, }, 69 }, name2desc[] = { 70 { "Book", "Microsoft Excel", }, 71 { "Workbook", "Microsoft Excel", }, 72 { "WordDocument", "Microsoft Word", }, 73 { "PowerPoint", "Microsoft PowerPoint", }, 74 { "DigitalSignature", "Microsoft Installer", }, 75 { NULL, NULL, }, 76 }; 77 78 static const struct cv { 79 uint64_t clsid[2]; 80 const char *mime; 81 } clsid2mime[] = { 82 { 83 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 84 "x-msi", 85 }, 86 { { 0, 0 }, 87 NULL, 88 }, 89 }, clsid2desc[] = { 90 { 91 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 92 "MSI Installer", 93 }, 94 { { 0, 0 }, 95 NULL, 96 }, 97 }; 98 99 private const char * 100 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 101 { 102 size_t i; 103 for (i = 0; cv[i].mime != NULL; i++) { 104 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 105 return cv[i].mime; 106 } 107 #ifdef CDF_DEBUG 108 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0], 109 clsid[1]); 110 #endif 111 return NULL; 112 } 113 114 private const char * 115 cdf_app_to_mime(const char *vbuf, const struct nv *nv) 116 { 117 size_t i; 118 const char *rv = NULL; 119 #ifdef USE_C_LOCALE 120 locale_t old_lc_ctype, c_lc_ctype; 121 122 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 123 assert(c_lc_ctype != NULL); 124 old_lc_ctype = uselocale(c_lc_ctype); 125 assert(old_lc_ctype != NULL); 126 #else 127 char *old_lc_ctype = setlocale(LC_CTYPE, "C"); 128 #endif 129 for (i = 0; nv[i].pattern != NULL; i++) 130 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 131 rv = nv[i].mime; 132 break; 133 } 134 #ifdef CDF_DEBUG 135 fprintf(stderr, "unknown app %s\n", vbuf); 136 #endif 137 #ifdef USE_C_LOCALE 138 (void)uselocale(old_lc_ctype); 139 freelocale(c_lc_ctype); 140 #else 141 setlocale(LC_CTYPE, old_lc_ctype); 142 #endif 143 return rv; 144 } 145 146 private int 147 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 148 size_t count, const cdf_directory_t *root_storage) 149 { 150 size_t i; 151 cdf_timestamp_t tp; 152 struct timespec ts; 153 char buf[64]; 154 const char *str = NULL; 155 const char *s; 156 int len; 157 158 if (!NOTMIME(ms) && root_storage) 159 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 160 clsid2mime); 161 162 for (i = 0; i < count; i++) { 163 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 164 switch (info[i].pi_type) { 165 case CDF_NULL: 166 break; 167 case CDF_SIGNED16: 168 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 169 info[i].pi_s16) == -1) 170 return -1; 171 break; 172 case CDF_SIGNED32: 173 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 174 info[i].pi_s32) == -1) 175 return -1; 176 break; 177 case CDF_UNSIGNED32: 178 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 179 info[i].pi_u32) == -1) 180 return -1; 181 break; 182 case CDF_FLOAT: 183 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 184 info[i].pi_f) == -1) 185 return -1; 186 break; 187 case CDF_DOUBLE: 188 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 189 info[i].pi_d) == -1) 190 return -1; 191 break; 192 case CDF_LENGTH32_STRING: 193 case CDF_LENGTH32_WSTRING: 194 len = info[i].pi_str.s_len; 195 if (len > 1) { 196 char vbuf[1024]; 197 size_t j, k = 1; 198 199 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 200 k++; 201 s = info[i].pi_str.s_buf; 202 for (j = 0; j < sizeof(vbuf) && len--; s += k) { 203 if (*s == '\0') 204 break; 205 if (isprint((unsigned char)*s)) 206 vbuf[j++] = *s; 207 } 208 if (j == sizeof(vbuf)) 209 --j; 210 vbuf[j] = '\0'; 211 if (NOTMIME(ms)) { 212 if (vbuf[0]) { 213 if (file_printf(ms, ", %s: %s", 214 buf, vbuf) == -1) 215 return -1; 216 } 217 } else if (str == NULL && info[i].pi_id == 218 CDF_PROPERTY_NAME_OF_APPLICATION) { 219 str = cdf_app_to_mime(vbuf, app2mime); 220 } 221 } 222 break; 223 case CDF_FILETIME: 224 tp = info[i].pi_tp; 225 if (tp != 0) { 226 char tbuf[64]; 227 if (tp < 1000000000000000LL) { 228 cdf_print_elapsed_time(tbuf, 229 sizeof(tbuf), tp); 230 if (NOTMIME(ms) && file_printf(ms, 231 ", %s: %s", buf, tbuf) == -1) 232 return -1; 233 } else { 234 char *c, *ec; 235 cdf_timestamp_to_timespec(&ts, tp); 236 c = cdf_ctime(&ts.tv_sec, tbuf); 237 if (c != NULL && 238 (ec = strchr(c, '\n')) != NULL) 239 *ec = '\0'; 240 241 if (NOTMIME(ms) && file_printf(ms, 242 ", %s: %s", buf, c) == -1) 243 return -1; 244 } 245 } 246 break; 247 case CDF_CLIPBOARD: 248 break; 249 default: 250 return -1; 251 } 252 } 253 if (!NOTMIME(ms)) { 254 if (str == NULL) 255 return 0; 256 if (file_printf(ms, "application/%s", str) == -1) 257 return -1; 258 } 259 return 1; 260 } 261 262 private int 263 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h, 264 const cdf_stream_t *sst) 265 { 266 cdf_catalog_t *cat; 267 size_t i; 268 char buf[256]; 269 cdf_catalog_entry_t *ce; 270 271 if (NOTMIME(ms)) { 272 if (file_printf(ms, "Microsoft Thumbs.db [") == -1) 273 return -1; 274 if (cdf_unpack_catalog(h, sst, &cat) == -1) 275 return -1; 276 ce = cat->cat_e; 277 /* skip first entry since it has a , or paren */ 278 for (i = 1; i < cat->cat_num; i++) 279 if (file_printf(ms, "%s%s", 280 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name), 281 i == cat->cat_num - 1 ? "]" : ", ") == -1) { 282 free(cat); 283 return -1; 284 } 285 free(cat); 286 } else { 287 if (file_printf(ms, "application/CDFV2") == -1) 288 return -1; 289 } 290 return 1; 291 } 292 293 private int 294 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 295 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 296 { 297 cdf_summary_info_header_t si; 298 cdf_property_info_t *info; 299 size_t count; 300 int m; 301 302 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 303 return -1; 304 305 if (NOTMIME(ms)) { 306 const char *str; 307 308 if (file_printf(ms, "Composite Document File V2 Document") 309 == -1) 310 return -1; 311 312 if (file_printf(ms, ", %s Endian", 313 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 314 return -2; 315 switch (si.si_os) { 316 case 2: 317 if (file_printf(ms, ", Os: Windows, Version %d.%d", 318 si.si_os_version & 0xff, 319 (uint32_t)si.si_os_version >> 8) == -1) 320 return -2; 321 break; 322 case 1: 323 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 324 (uint32_t)si.si_os_version >> 8, 325 si.si_os_version & 0xff) == -1) 326 return -2; 327 break; 328 default: 329 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 330 si.si_os_version & 0xff, 331 (uint32_t)si.si_os_version >> 8) == -1) 332 return -2; 333 break; 334 } 335 if (root_storage) { 336 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 337 clsid2desc); 338 if (str) { 339 if (file_printf(ms, ", %s", str) == -1) 340 return -2; 341 } 342 } 343 } 344 345 m = cdf_file_property_info(ms, info, count, root_storage); 346 free(info); 347 348 return m == -1 ? -2 : m; 349 } 350 351 #ifdef notdef 352 private char * 353 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 354 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 355 PRIx64 "-%.12" PRIx64, 356 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL, 357 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL, 358 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL, 359 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL, 360 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL); 361 return buf; 362 } 363 #endif 364 365 private int 366 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, 367 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 368 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn) 369 { 370 int i; 371 372 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst, 373 dir, "Catalog", scn)) == -1) 374 return i; 375 #ifdef CDF_DEBUG 376 cdf_dump_catalog(&h, scn); 377 #endif 378 if ((i = cdf_file_catalog(ms, h, scn)) == -1) 379 return -1; 380 return i; 381 } 382 383 private struct sinfo { 384 const char *name; 385 const char *mime; 386 const char *sections[5]; 387 const int types[5]; 388 } sectioninfo[] = { 389 { "Encrypted", "encrypted", 390 { 391 "EncryptedPackage", NULL, NULL, NULL, NULL, 392 }, 393 { 394 CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0, 395 396 }, 397 }, 398 { "QuickBooks", "quickbooks", 399 { 400 #if 0 401 "TaxForms", "PDFTaxForms", "modulesInBackup", 402 #endif 403 "mfbu_header", NULL, NULL, NULL, NULL, 404 }, 405 { 406 #if 0 407 CDF_DIR_TYPE_USER_STORAGE, 408 CDF_DIR_TYPE_USER_STORAGE, 409 CDF_DIR_TYPE_USER_STREAM, 410 #endif 411 CDF_DIR_TYPE_USER_STREAM, 412 0, 0, 0, 0 413 }, 414 }, 415 }; 416 417 private int 418 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) 419 { 420 size_t sd, j; 421 422 for (sd = 0; sd < __arraycount(sectioninfo); sd++) { 423 const struct sinfo *si = §ioninfo[sd]; 424 for (j = 0; si->sections[j]; j++) { 425 if (cdf_find_stream(dir, si->sections[j], si->types[j]) 426 <= 0) { 427 #ifdef CDF_DEBUG 428 fprintf(stderr, "Can't read %s\n", 429 si->sections[j]); 430 #endif 431 break; 432 } 433 } 434 if (si->sections[j] != NULL) 435 continue; 436 if (NOTMIME(ms)) { 437 if (file_printf(ms, "CDFV2 %s", si->name) == -1) 438 return -1; 439 } else { 440 if (file_printf(ms, "application/CDFV2-%s", 441 si->mime) == -1) 442 return -1; 443 } 444 return 1; 445 } 446 return -1; 447 } 448 449 protected int 450 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, 451 size_t nbytes) 452 { 453 cdf_info_t info; 454 cdf_header_t h; 455 cdf_sat_t sat, ssat; 456 cdf_stream_t sst, scn; 457 cdf_dir_t dir; 458 int i; 459 const char *expn = ""; 460 const cdf_directory_t *root_storage; 461 462 info.i_fd = fd; 463 info.i_buf = buf; 464 info.i_len = nbytes; 465 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) 466 return 0; 467 if (cdf_read_header(&info, &h) == -1) 468 return 0; 469 #ifdef CDF_DEBUG 470 cdf_dump_header(&h); 471 #endif 472 473 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 474 expn = "Can't read SAT"; 475 goto out0; 476 } 477 #ifdef CDF_DEBUG 478 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 479 #endif 480 481 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 482 expn = "Can't read SSAT"; 483 goto out1; 484 } 485 #ifdef CDF_DEBUG 486 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 487 #endif 488 489 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 490 expn = "Can't read directory"; 491 goto out2; 492 } 493 494 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 495 &root_storage)) == -1) { 496 expn = "Cannot read short stream"; 497 goto out3; 498 } 499 #ifdef CDF_DEBUG 500 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 501 #endif 502 #ifdef notdef 503 if (root_storage) { 504 if (NOTMIME(ms)) { 505 char clsbuf[128]; 506 if (file_printf(ms, "CLSID %s, ", 507 format_clsid(clsbuf, sizeof(clsbuf), 508 root_storage->d_storage_uuid)) == -1) 509 return -1; 510 } 511 } 512 #endif 513 514 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 515 "FileHeader", &scn)) != -1) { 516 #define HWP5_SIGNATURE "HWP Document File" 517 if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1 518 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 519 sizeof(HWP5_SIGNATURE) - 1) == 0) { 520 if (NOTMIME(ms)) { 521 if (file_printf(ms, 522 "Hangul (Korean) Word Processor File 5.x") == -1) 523 return -1; 524 } else { 525 if (file_printf(ms, "application/x-hwp") == -1) 526 return -1; 527 } 528 i = 1; 529 goto out5; 530 } else { 531 free(scn.sst_tab); 532 scn.sst_tab = NULL; 533 scn.sst_len = 0; 534 scn.sst_dirlen = 0; 535 } 536 } 537 538 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 539 &scn)) == -1) { 540 if (errno != ESRCH) { 541 expn = "Cannot read summary info"; 542 goto out4; 543 } 544 i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst, 545 &dir, &scn); 546 if (i > 0) 547 goto out4; 548 i = cdf_file_dir_info(ms, &dir); 549 if (i < 0) 550 expn = "Cannot read section info"; 551 goto out4; 552 } 553 554 555 #ifdef CDF_DEBUG 556 cdf_dump_summary_info(&h, &scn); 557 #endif 558 if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0) 559 expn = "Can't expand summary_info"; 560 561 if (i == 0) { 562 const char *str = NULL; 563 cdf_directory_t *d; 564 char name[__arraycount(d->d_name)]; 565 size_t j, k; 566 567 for (j = 0; str == NULL && j < dir.dir_len; j++) { 568 d = &dir.dir_tab[j]; 569 for (k = 0; k < sizeof(name); k++) 570 name[k] = (char)cdf_tole2(d->d_name[k]); 571 str = cdf_app_to_mime(name, 572 NOTMIME(ms) ? name2desc : name2mime); 573 } 574 if (NOTMIME(ms)) { 575 if (str != NULL) { 576 if (file_printf(ms, "%s", str) == -1) 577 return -1; 578 i = 1; 579 } 580 } else { 581 if (str == NULL) 582 str = "vnd.ms-office"; 583 if (file_printf(ms, "application/%s", str) == -1) 584 return -1; 585 i = 1; 586 } 587 } 588 out5: 589 free(scn.sst_tab); 590 out4: 591 free(sst.sst_tab); 592 out3: 593 free(dir.dir_tab); 594 out2: 595 free(ssat.sat_tab); 596 out1: 597 free(sat.sat_tab); 598 out0: 599 if (i == -1) { 600 if (NOTMIME(ms)) { 601 if (file_printf(ms, 602 "Composite Document File V2 Document") == -1) 603 return -1; 604 if (*expn) 605 if (file_printf(ms, ", %s", expn) == -1) 606 return -1; 607 } else { 608 if (file_printf(ms, "application/CDFV2-unknown") == -1) 609 return -1; 610 } 611 i = 1; 612 } 613 return i; 614 } 615