/*- * Copyright (c) 2008, 2016 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "file.h" #ifndef lint FILE_RCSID("@(#)$File: readcdf.c,v 1.80 2023/01/24 20:13:40 christos Exp $") #endif #include <assert.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <time.h> #include <ctype.h> #include "cdf.h" #include "magic.h" #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) static const struct nv { const char *pattern; const char *mime; } app2mime[] = { { "Word", "msword", }, { "Excel", "vnd.ms-excel", }, { "Powerpoint", "vnd.ms-powerpoint", }, { "Crystal Reports", "x-rpt", }, { "Advanced Installer", "vnd.ms-msi", }, { "InstallShield", "vnd.ms-msi", }, { "Microsoft Patch Compiler", "vnd.ms-msi", }, { "NAnt", "vnd.ms-msi", }, { "Windows Installer", "vnd.ms-msi", }, { NULL, NULL, }, }, name2mime[] = { { "Book", "vnd.ms-excel", }, { "Workbook", "vnd.ms-excel", }, { "WordDocument", "msword", }, { "PowerPoint", "vnd.ms-powerpoint", }, { "DigitalSignature", "vnd.ms-msi", }, { NULL, NULL, }, }, name2desc[] = { { "Book", "Microsoft Excel", }, { "Workbook", "Microsoft Excel", }, { "WordDocument", "Microsoft Word", }, { "PowerPoint", "Microsoft PowerPoint", }, { "DigitalSignature", "Microsoft Installer", }, { NULL, NULL, }, }; static const struct cv { uint64_t clsid[2]; const char *mime; } clsid2mime[] = { { { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, "x-msi", }, { { 0, 0 }, NULL, }, }, clsid2desc[] = { { { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, "MSI Installer", }, { { 0, 0 }, NULL, }, }; file_private const char * cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) { size_t i; for (i = 0; cv[i].mime != NULL; i++) { if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) return cv[i].mime; } #ifdef CDF_DEBUG fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0], clsid[1]); #endif return NULL; } file_private const char * cdf_app_to_mime(const char *vbuf, const struct nv *nv) { size_t i; const char *rv = NULL; #ifdef USE_C_LOCALE locale_t old_lc_ctype, c_lc_ctype; c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); assert(c_lc_ctype != NULL); old_lc_ctype = uselocale(c_lc_ctype); assert(old_lc_ctype != NULL); #else char *old_lc_ctype = setlocale(LC_CTYPE, NULL); assert(old_lc_ctype != NULL); old_lc_ctype = strdup(old_lc_ctype); assert(old_lc_ctype != NULL); (void)setlocale(LC_CTYPE, "C"); #endif for (i = 0; nv[i].pattern != NULL; i++) if (strcasestr(vbuf, nv[i].pattern) != NULL) { rv = nv[i].mime; break; } #ifdef CDF_DEBUG fprintf(stderr, "unknown app %s\n", vbuf); #endif #ifdef USE_C_LOCALE (void)uselocale(old_lc_ctype); freelocale(c_lc_ctype); #else (void)setlocale(LC_CTYPE, old_lc_ctype); free(old_lc_ctype); #endif return rv; } file_private int cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, size_t count, const cdf_directory_t *root_storage) { size_t i; cdf_timestamp_t tp; struct timespec ts; char buf[64]; const char *str = NULL; const char *s, *e; int len; if (!NOTMIME(ms) && root_storage) str = cdf_clsid_to_mime(root_storage->d_storage_uuid, clsid2mime); for (i = 0; i < count; i++) { cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); switch (info[i].pi_type) { case CDF_NULL: break; case CDF_SIGNED16: if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, info[i].pi_s16) == -1) return -1; break; case CDF_SIGNED32: if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, info[i].pi_s32) == -1) return -1; break; case CDF_UNSIGNED32: if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, info[i].pi_u32) == -1) return -1; break; case CDF_FLOAT: if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, info[i].pi_f) == -1) return -1; break; case CDF_DOUBLE: if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, info[i].pi_d) == -1) return -1; break; case CDF_LENGTH32_STRING: case CDF_LENGTH32_WSTRING: len = info[i].pi_str.s_len; if (len > 1) { char vbuf[1024]; size_t j, k = 1; if (info[i].pi_type == CDF_LENGTH32_WSTRING) k++; s = info[i].pi_str.s_buf; e = info[i].pi_str.s_buf + len; for (j = 0; s < e && j < sizeof(vbuf) && len--; s += k) { if (*s == '\0') break; if (isprint(CAST(unsigned char, *s))) vbuf[j++] = *s; } if (j == sizeof(vbuf)) --j; vbuf[j] = '\0'; if (NOTMIME(ms)) { if (vbuf[0]) { if (file_printf(ms, ", %s: %s", buf, vbuf) == -1) return -1; } } else if (str == NULL && info[i].pi_id == CDF_PROPERTY_NAME_OF_APPLICATION) { str = cdf_app_to_mime(vbuf, app2mime); } } break; case CDF_FILETIME: tp = info[i].pi_tp; if (tp != 0) { char tbuf[64]; if (tp < 1000000000000000LL) { cdf_print_elapsed_time(tbuf, sizeof(tbuf), tp); if (NOTMIME(ms) && file_printf(ms, ", %s: %s", buf, tbuf) == -1) return -1; } else { char *c, *ec; cdf_timestamp_to_timespec(&ts, tp); c = cdf_ctime(&ts.tv_sec, tbuf); if (c != NULL && (ec = strchr(c, '\n')) != NULL) *ec = '\0'; if (NOTMIME(ms) && file_printf(ms, ", %s: %s", buf, c) == -1) return -1; } } break; case CDF_CLIPBOARD: break; default: return -1; } } if (ms->flags & MAGIC_MIME_TYPE) { if (str == NULL) return 0; if (file_printf(ms, "application/%s", str) == -1) return -1; } return 1; } file_private int cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h, const cdf_stream_t *sst) { cdf_catalog_t *cat; size_t i; char buf[256]; cdf_catalog_entry_t *ce; if (NOTMIME(ms)) { if (file_printf(ms, "Microsoft Thumbs.db [") == -1) return -1; if (cdf_unpack_catalog(h, sst, &cat) == -1) return -1; ce = cat->cat_e; /* skip first entry since it has a , or paren */ for (i = 1; i < cat->cat_num; i++) if (file_printf(ms, "%s%s", cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name), i == cat->cat_num - 1 ? "]" : ", ") == -1) { free(cat); return -1; } free(cat); } else if (ms->flags & MAGIC_MIME_TYPE) { if (file_printf(ms, "application/CDFV2") == -1) return -1; } return 1; } file_private int cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, const cdf_stream_t *sst, const cdf_directory_t *root_storage) { cdf_summary_info_header_t si; cdf_property_info_t *info; size_t count; int m; if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) return -1; if (NOTMIME(ms)) { const char *str; if (file_printf(ms, "Composite Document File V2 Document") == -1) return -1; if (file_printf(ms, ", %s Endian", si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) return -2; switch (si.si_os) { case 2: if (file_printf(ms, ", Os: Windows, Version %d.%d", si.si_os_version & 0xff, CAST(uint32_t, si.si_os_version) >> 8) == -1) return -2; break; case 1: if (file_printf(ms, ", Os: MacOS, Version %d.%d", CAST(uint32_t, si.si_os_version) >> 8, si.si_os_version & 0xff) == -1) return -2; break; default: if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, si.si_os_version & 0xff, CAST(uint32_t, si.si_os_version) >> 8) == -1) return -2; break; } if (root_storage) { str = cdf_clsid_to_mime(root_storage->d_storage_uuid, clsid2desc); if (str) { if (file_printf(ms, ", %s", str) == -1) return -2; } } } m = cdf_file_property_info(ms, info, count, root_storage); free(info); return m == -1 ? -2 : m; } #ifdef notdef file_private char * format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.12" PRIx64, (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL, (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL, (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL, (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL, (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL); return buf; } #endif file_private int cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn) { int i; if ((i = cdf_read_user_stream(info, h, sat, ssat, sst, dir, "Catalog", scn)) == -1) return i; #ifdef CDF_DEBUG cdf_dump_catalog(h, scn); #endif if ((i = cdf_file_catalog(ms, h, scn)) == -1) return -1; return i; } file_private int cdf_check_summary_info(struct magic_set *ms, const cdf_info_t *info, const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn, const cdf_directory_t *root_storage, const char **expn) { int i; const char *str = NULL; cdf_directory_t *d; char name[__arraycount(d->d_name)]; size_t j, k; #ifdef CDF_DEBUG cdf_dump_summary_info(h, scn); #endif if ((i = cdf_file_summary_info(ms, h, scn, root_storage)) < 0) { *expn = "Can't expand summary_info"; return i; } if (i == 1) return i; for (j = 0; str == NULL && j < dir->dir_len; j++) { d = &dir->dir_tab[j]; for (k = 0; k < sizeof(name); k++) name[k] = CAST(char, cdf_tole2(d->d_name[k])); str = cdf_app_to_mime(name, NOTMIME(ms) ? name2desc : name2mime); } if (NOTMIME(ms)) { if (str != NULL) { if (file_printf(ms, "%s", str) == -1) return -1; i = 1; } } else if (ms->flags & MAGIC_MIME_TYPE) { if (str == NULL) str = "vnd.ms-office"; if (file_printf(ms, "application/%s", str) == -1) return -1; i = 1; } if (i <= 0) { i = cdf_file_catalog_info(ms, info, h, sat, ssat, sst, dir, scn); } return i; } file_private struct sinfo { const char *name; const char *mime; const char *sections[5]; const int types[5]; } sectioninfo[] = { { "Encrypted", "encrypted", { "EncryptedPackage", "EncryptedSummary", NULL, NULL, NULL, }, { CDF_DIR_TYPE_USER_STREAM, CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, }, }, { "QuickBooks", "quickbooks", { #if 0 "TaxForms", "PDFTaxForms", "modulesInBackup", #endif "mfbu_header", NULL, NULL, NULL, NULL, }, { #if 0 CDF_DIR_TYPE_USER_STORAGE, CDF_DIR_TYPE_USER_STORAGE, CDF_DIR_TYPE_USER_STREAM, #endif CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0 }, }, { "Microsoft Excel", "vnd.ms-excel", { "Book", "Workbook", NULL, NULL, NULL, }, { CDF_DIR_TYPE_USER_STREAM, CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, }, }, { "Microsoft Word", "msword", { "WordDocument", NULL, NULL, NULL, NULL, }, { CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0, }, }, { "Microsoft PowerPoint", "vnd.ms-powerpoint", { "PowerPoint", NULL, NULL, NULL, NULL, }, { CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0, }, }, { "Microsoft Outlook Message", "vnd.ms-outlook", { "__properties_version1.0", "__recip_version1.0_#00000000", NULL, NULL, NULL, }, { CDF_DIR_TYPE_USER_STREAM, CDF_DIR_TYPE_USER_STORAGE, 0, 0, 0, }, }, }; file_private int cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) { size_t sd, j; for (sd = 0; sd < __arraycount(sectioninfo); sd++) { const struct sinfo *si = §ioninfo[sd]; for (j = 0; si->sections[j]; j++) { if (cdf_find_stream(dir, si->sections[j], si->types[j]) > 0) break; #ifdef CDF_DEBUG fprintf(stderr, "Can't read %s\n", si->sections[j]); #endif } if (si->sections[j] == NULL) continue; if (NOTMIME(ms)) { if (file_printf(ms, "CDFV2 %s", si->name) == -1) return -1; } else if (ms->flags & MAGIC_MIME_TYPE) { if (file_printf(ms, "application/%s", si->mime) == -1) return -1; } return 1; } return -1; } file_protected int file_trycdf(struct magic_set *ms, const struct buffer *b) { int fd = b->fd; const unsigned char *buf = CAST(const unsigned char *, b->fbuf); size_t nbytes = b->flen; cdf_info_t info; cdf_header_t h; cdf_sat_t sat, ssat; cdf_stream_t sst, scn; cdf_dir_t dir; int i; const char *expn = ""; const cdf_directory_t *root_storage; scn.sst_tab = NULL; info.i_fd = fd; info.i_buf = buf; info.i_len = nbytes; if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) return 0; if (cdf_read_header(&info, &h) == -1) return 0; #ifdef CDF_DEBUG cdf_dump_header(&h); #endif if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { expn = "Can't read SAT"; goto out0; } #ifdef CDF_DEBUG cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); #endif if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { expn = "Can't read SSAT"; goto out1; } #ifdef CDF_DEBUG cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); #endif if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { expn = "Can't read directory"; goto out2; } if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, &root_storage)) == -1) { expn = "Cannot read short stream"; goto out3; } #ifdef CDF_DEBUG cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); #endif #ifdef notdef if (root_storage) { if (NOTMIME(ms)) { char clsbuf[128]; if (file_printf(ms, "CLSID %s, ", format_clsid(clsbuf, sizeof(clsbuf), root_storage->d_storage_uuid)) == -1) return -1; } } #endif if (cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, "FileHeader", &scn) != -1) { #define HWP5_SIGNATURE "HWP Document File" if (scn.sst_len * scn.sst_ss >= sizeof(HWP5_SIGNATURE) - 1 && memcmp(scn.sst_tab, HWP5_SIGNATURE, sizeof(HWP5_SIGNATURE) - 1) == 0) { if (NOTMIME(ms)) { if (file_printf(ms, "Hancom HWP (Hangul Word Processor) file, version 5.0") == -1) return -1; } else if (ms->flags & MAGIC_MIME_TYPE) { if (file_printf(ms, "application/x-hwp") == -1) return -1; } i = 1; goto out5; } else { cdf_zero_stream(&scn); } } if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, &scn)) == -1) { if (errno != ESRCH) { expn = "Cannot read summary info"; } } else { i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat, &sst, &dir, &scn, root_storage, &expn); cdf_zero_stream(&scn); } if (i <= 0) { if ((i = cdf_read_doc_summary_info(&info, &h, &sat, &ssat, &sst, &dir, &scn)) == -1) { if (errno != ESRCH) { expn = "Cannot read summary info"; } } else { i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat, &sst, &dir, &scn, root_storage, &expn); } } if (i <= 0) { i = cdf_file_dir_info(ms, &dir); if (i < 0) expn = "Cannot read section info"; } out5: cdf_zero_stream(&scn); cdf_zero_stream(&sst); out3: free(dir.dir_tab); out2: free(ssat.sat_tab); out1: free(sat.sat_tab); out0: /* If we handled it already, return */ if (i != -1) return i; /* Provide a default handler */ if (NOTMIME(ms)) { if (file_printf(ms, "Composite Document File V2 Document") == -1) return -1; if (*expn) if (file_printf(ms, ", %s", expn) == -1) return -1; } else if (ms->flags & MAGIC_MIME_TYPE) { /* https://reposcope.com/mimetype/application/x-ole-storage */ if (file_printf(ms, "application/x-ole-storage") == -1) return -1; } return 1; }