1 /*- 2 * Copyright (c) 2008 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 #include "file.h" 27 28 #ifndef lint 29 FILE_RCSID("@(#)$File: readcdf.c,v 1.53 2015/04/09 20:01:41 christos Exp $") 30 #endif 31 32 #include <assert.h> 33 #include <stdlib.h> 34 #include <unistd.h> 35 #include <string.h> 36 #include <time.h> 37 #include <ctype.h> 38 39 #include "cdf.h" 40 #include "magic.h" 41 42 #ifndef __arraycount 43 #define __arraycount(a) (sizeof(a) / sizeof(a[0])) 44 #endif 45 46 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 47 48 static const struct nv { 49 const char *pattern; 50 const char *mime; 51 } app2mime[] = { 52 { "Word", "msword", }, 53 { "Excel", "vnd.ms-excel", }, 54 { "Powerpoint", "vnd.ms-powerpoint", }, 55 { "Crystal Reports", "x-rpt", }, 56 { "Advanced Installer", "vnd.ms-msi", }, 57 { "InstallShield", "vnd.ms-msi", }, 58 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 59 { "NAnt", "vnd.ms-msi", }, 60 { "Windows Installer", "vnd.ms-msi", }, 61 { NULL, NULL, }, 62 }, name2mime[] = { 63 { "WordDocument", "msword", }, 64 { "PowerPoint", "vnd.ms-powerpoint", }, 65 { "DigitalSignature", "vnd.ms-msi", }, 66 { NULL, NULL, }, 67 }, name2desc[] = { 68 { "WordDocument", "Microsoft Office Word",}, 69 { "PowerPoint", "Microsoft PowerPoint", }, 70 { "DigitalSignature", "Microsoft Installer", }, 71 { NULL, NULL, }, 72 }; 73 74 static const struct cv { 75 uint64_t clsid[2]; 76 const char *mime; 77 } clsid2mime[] = { 78 { 79 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 80 "x-msi", 81 }, 82 { { 0, 0 }, 83 NULL, 84 }, 85 }, clsid2desc[] = { 86 { 87 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 88 "MSI Installer", 89 }, 90 { { 0, 0 }, 91 NULL, 92 }, 93 }; 94 95 private const char * 96 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 97 { 98 size_t i; 99 for (i = 0; cv[i].mime != NULL; i++) { 100 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 101 return cv[i].mime; 102 } 103 #ifdef CDF_DEBUG 104 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0], 105 clsid[1]); 106 #endif 107 return NULL; 108 } 109 110 private const char * 111 cdf_app_to_mime(const char *vbuf, const struct nv *nv) 112 { 113 size_t i; 114 const char *rv = NULL; 115 #ifdef USE_C_LOCALE 116 locale_t old_lc_ctype, c_lc_ctype; 117 118 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 119 assert(c_lc_ctype != NULL); 120 old_lc_ctype = uselocale(c_lc_ctype); 121 assert(old_lc_ctype != NULL); 122 #endif 123 for (i = 0; nv[i].pattern != NULL; i++) 124 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 125 rv = nv[i].mime; 126 break; 127 } 128 #ifdef CDF_DEBUG 129 fprintf(stderr, "unknown app %s\n", vbuf); 130 #endif 131 #ifdef USE_C_LOCALE 132 (void)uselocale(old_lc_ctype); 133 freelocale(c_lc_ctype); 134 #endif 135 return rv; 136 } 137 138 private int 139 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 140 size_t count, const cdf_directory_t *root_storage) 141 { 142 size_t i; 143 cdf_timestamp_t tp; 144 struct timespec ts; 145 char buf[64]; 146 const char *str = NULL; 147 const char *s; 148 int len; 149 150 if (!NOTMIME(ms) && root_storage) 151 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 152 clsid2mime); 153 154 for (i = 0; i < count; i++) { 155 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 156 switch (info[i].pi_type) { 157 case CDF_NULL: 158 break; 159 case CDF_SIGNED16: 160 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 161 info[i].pi_s16) == -1) 162 return -1; 163 break; 164 case CDF_SIGNED32: 165 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 166 info[i].pi_s32) == -1) 167 return -1; 168 break; 169 case CDF_UNSIGNED32: 170 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 171 info[i].pi_u32) == -1) 172 return -1; 173 break; 174 case CDF_FLOAT: 175 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 176 info[i].pi_f) == -1) 177 return -1; 178 break; 179 case CDF_DOUBLE: 180 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 181 info[i].pi_d) == -1) 182 return -1; 183 break; 184 case CDF_LENGTH32_STRING: 185 case CDF_LENGTH32_WSTRING: 186 len = info[i].pi_str.s_len; 187 if (len > 1) { 188 char vbuf[1024]; 189 size_t j, k = 1; 190 191 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 192 k++; 193 s = info[i].pi_str.s_buf; 194 for (j = 0; j < sizeof(vbuf) && len--; s += k) { 195 if (*s == '\0') 196 break; 197 if (isprint((unsigned char)*s)) 198 vbuf[j++] = *s; 199 } 200 if (j == sizeof(vbuf)) 201 --j; 202 vbuf[j] = '\0'; 203 if (NOTMIME(ms)) { 204 if (vbuf[0]) { 205 if (file_printf(ms, ", %s: %s", 206 buf, vbuf) == -1) 207 return -1; 208 } 209 } else if (str == NULL && info[i].pi_id == 210 CDF_PROPERTY_NAME_OF_APPLICATION) { 211 str = cdf_app_to_mime(vbuf, app2mime); 212 } 213 } 214 break; 215 case CDF_FILETIME: 216 tp = info[i].pi_tp; 217 if (tp != 0) { 218 char tbuf[64]; 219 if (tp < 1000000000000000LL) { 220 cdf_print_elapsed_time(tbuf, 221 sizeof(tbuf), tp); 222 if (NOTMIME(ms) && file_printf(ms, 223 ", %s: %s", buf, tbuf) == -1) 224 return -1; 225 } else { 226 char *c, *ec; 227 cdf_timestamp_to_timespec(&ts, tp); 228 c = cdf_ctime(&ts.tv_sec, tbuf); 229 if (c != NULL && 230 (ec = strchr(c, '\n')) != NULL) 231 *ec = '\0'; 232 233 if (NOTMIME(ms) && file_printf(ms, 234 ", %s: %s", buf, c) == -1) 235 return -1; 236 } 237 } 238 break; 239 case CDF_CLIPBOARD: 240 break; 241 default: 242 return -1; 243 } 244 } 245 if (!NOTMIME(ms)) { 246 if (str == NULL) 247 return 0; 248 if (file_printf(ms, "application/%s", str) == -1) 249 return -1; 250 } 251 return 1; 252 } 253 254 private int 255 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h, 256 const cdf_stream_t *sst) 257 { 258 cdf_catalog_t *cat; 259 size_t i; 260 char buf[256]; 261 cdf_catalog_entry_t *ce; 262 263 if (NOTMIME(ms)) { 264 if (file_printf(ms, "Microsoft Thumbs.db [") == -1) 265 return -1; 266 if (cdf_unpack_catalog(h, sst, &cat) == -1) 267 return -1; 268 ce = cat->cat_e; 269 /* skip first entry since it has a , or paren */ 270 for (i = 1; i < cat->cat_num; i++) 271 if (file_printf(ms, "%s%s", 272 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name), 273 i == cat->cat_num - 1 ? "]" : ", ") == -1) { 274 free(cat); 275 return -1; 276 } 277 free(cat); 278 } else { 279 if (file_printf(ms, "application/CDFV2") == -1) 280 return -1; 281 } 282 return 1; 283 } 284 285 private int 286 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 287 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 288 { 289 cdf_summary_info_header_t si; 290 cdf_property_info_t *info; 291 size_t count; 292 int m; 293 294 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 295 return -1; 296 297 if (NOTMIME(ms)) { 298 const char *str; 299 300 if (file_printf(ms, "Composite Document File V2 Document") 301 == -1) 302 return -1; 303 304 if (file_printf(ms, ", %s Endian", 305 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 306 return -2; 307 switch (si.si_os) { 308 case 2: 309 if (file_printf(ms, ", Os: Windows, Version %d.%d", 310 si.si_os_version & 0xff, 311 (uint32_t)si.si_os_version >> 8) == -1) 312 return -2; 313 break; 314 case 1: 315 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 316 (uint32_t)si.si_os_version >> 8, 317 si.si_os_version & 0xff) == -1) 318 return -2; 319 break; 320 default: 321 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 322 si.si_os_version & 0xff, 323 (uint32_t)si.si_os_version >> 8) == -1) 324 return -2; 325 break; 326 } 327 if (root_storage) { 328 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 329 clsid2desc); 330 if (str) { 331 if (file_printf(ms, ", %s", str) == -1) 332 return -2; 333 } 334 } 335 } 336 337 m = cdf_file_property_info(ms, info, count, root_storage); 338 free(info); 339 340 return m == -1 ? -2 : m; 341 } 342 343 #ifdef notdef 344 private char * 345 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 346 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 347 PRIx64 "-%.12" PRIx64, 348 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL, 349 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL, 350 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL, 351 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL, 352 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL); 353 return buf; 354 } 355 #endif 356 357 private int 358 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, 359 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 360 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn) 361 { 362 int i; 363 364 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst, 365 dir, "Catalog", scn)) == -1) 366 return i; 367 #ifdef CDF_DEBUG 368 cdf_dump_catalog(&h, &scn); 369 #endif 370 if ((i = cdf_file_catalog(ms, h, scn)) == -1) 371 return -1; 372 return i; 373 } 374 375 private struct sinfo { 376 const char *name; 377 const char *mime; 378 const char *sections[5]; 379 const int types[5]; 380 } sectioninfo[] = { 381 { "Encrypted", "encrypted", 382 { 383 "EncryptedPackage", NULL, NULL, NULL, NULL, 384 }, 385 { 386 CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0, 387 388 }, 389 }, 390 { "QuickBooks", "quickbooks", 391 { 392 #if 0 393 "TaxForms", "PDFTaxForms", "modulesInBackup", 394 #endif 395 "mfbu_header", NULL, NULL, NULL, NULL, 396 }, 397 { 398 #if 0 399 CDF_DIR_TYPE_USER_STORAGE, 400 CDF_DIR_TYPE_USER_STORAGE, 401 CDF_DIR_TYPE_USER_STREAM, 402 #endif 403 CDF_DIR_TYPE_USER_STREAM, 404 0, 0, 0, 0 405 }, 406 }, 407 }; 408 409 private int 410 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) 411 { 412 size_t sd, j; 413 414 for (sd = 0; sd < __arraycount(sectioninfo); sd++) { 415 const struct sinfo *si = §ioninfo[sd]; 416 for (j = 0; si->sections[j]; j++) { 417 if (cdf_find_stream(dir, si->sections[j], si->types[j]) 418 <= 0) { 419 #ifdef CDF_DEBUG 420 fprintf(stderr, "Can't read %s\n", 421 si->sections[j]); 422 #endif 423 break; 424 } 425 } 426 if (si->sections[j] != NULL) 427 continue; 428 if (NOTMIME(ms)) { 429 if (file_printf(ms, "CDFV2 %s", si->name) == -1) 430 return -1; 431 } else { 432 if (file_printf(ms, "application/CDFV2-%s", 433 si->mime) == -1) 434 return -1; 435 } 436 return 1; 437 } 438 return -1; 439 } 440 441 protected int 442 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, 443 size_t nbytes) 444 { 445 cdf_info_t info; 446 cdf_header_t h; 447 cdf_sat_t sat, ssat; 448 cdf_stream_t sst, scn; 449 cdf_dir_t dir; 450 int i; 451 const char *expn = ""; 452 const cdf_directory_t *root_storage; 453 454 info.i_fd = fd; 455 info.i_buf = buf; 456 info.i_len = nbytes; 457 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) 458 return 0; 459 if (cdf_read_header(&info, &h) == -1) 460 return 0; 461 #ifdef CDF_DEBUG 462 cdf_dump_header(&h); 463 #endif 464 465 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 466 expn = "Can't read SAT"; 467 goto out0; 468 } 469 #ifdef CDF_DEBUG 470 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 471 #endif 472 473 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 474 expn = "Can't read SSAT"; 475 goto out1; 476 } 477 #ifdef CDF_DEBUG 478 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 479 #endif 480 481 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 482 expn = "Can't read directory"; 483 goto out2; 484 } 485 486 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 487 &root_storage)) == -1) { 488 expn = "Cannot read short stream"; 489 goto out3; 490 } 491 #ifdef CDF_DEBUG 492 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 493 #endif 494 #ifdef notdef 495 if (root_storage) { 496 if (NOTMIME(ms)) { 497 char clsbuf[128]; 498 if (file_printf(ms, "CLSID %s, ", 499 format_clsid(clsbuf, sizeof(clsbuf), 500 root_storage->d_storage_uuid)) == -1) 501 return -1; 502 } 503 } 504 #endif 505 506 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 507 "FileHeader", &scn)) != -1) { 508 #define HWP5_SIGNATURE "HWP Document File" 509 if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1 510 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 511 sizeof(HWP5_SIGNATURE) - 1) == 0) { 512 if (NOTMIME(ms)) { 513 if (file_printf(ms, 514 "Hangul (Korean) Word Processor File 5.x") == -1) 515 return -1; 516 } else { 517 if (file_printf(ms, "application/x-hwp") == -1) 518 return -1; 519 } 520 i = 1; 521 goto out5; 522 } else { 523 free(scn.sst_tab); 524 scn.sst_tab = NULL; 525 scn.sst_len = 0; 526 scn.sst_dirlen = 0; 527 } 528 } 529 530 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 531 &scn)) == -1) { 532 if (errno != ESRCH) { 533 expn = "Cannot read summary info"; 534 goto out4; 535 } 536 i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst, 537 &dir, &scn); 538 if (i > 0) 539 goto out4; 540 i = cdf_file_dir_info(ms, &dir); 541 if (i < 0) 542 expn = "Cannot read section info"; 543 goto out4; 544 } 545 546 547 #ifdef CDF_DEBUG 548 cdf_dump_summary_info(&h, &scn); 549 #endif 550 if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0) 551 expn = "Can't expand summary_info"; 552 553 if (i == 0) { 554 const char *str = NULL; 555 cdf_directory_t *d; 556 char name[__arraycount(d->d_name)]; 557 size_t j, k; 558 559 for (j = 0; str == NULL && j < dir.dir_len; j++) { 560 d = &dir.dir_tab[j]; 561 for (k = 0; k < sizeof(name); k++) 562 name[k] = (char)cdf_tole2(d->d_name[k]); 563 str = cdf_app_to_mime(name, 564 NOTMIME(ms) ? name2desc : name2mime); 565 } 566 if (NOTMIME(ms)) { 567 if (str != NULL) { 568 if (file_printf(ms, "%s", str) == -1) 569 return -1; 570 i = 1; 571 } 572 } else { 573 if (str == NULL) 574 str = "vnd.ms-office"; 575 if (file_printf(ms, "application/%s", str) == -1) 576 return -1; 577 i = 1; 578 } 579 } 580 out5: 581 free(scn.sst_tab); 582 out4: 583 free(sst.sst_tab); 584 out3: 585 free(dir.dir_tab); 586 out2: 587 free(ssat.sat_tab); 588 out1: 589 free(sat.sat_tab); 590 out0: 591 if (i == -1) { 592 if (NOTMIME(ms)) { 593 if (file_printf(ms, 594 "Composite Document File V2 Document") == -1) 595 return -1; 596 if (*expn) 597 if (file_printf(ms, ", %s", expn) == -1) 598 return -1; 599 } else { 600 if (file_printf(ms, "application/CDFV2-unknown") == -1) 601 return -1; 602 } 603 i = 1; 604 } 605 return i; 606 } 607