1 /*- 2 * Copyright (c) 2008 Christos Zoulas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 #include "file.h" 27 28 #ifndef lint 29 FILE_RCSID("@(#)$File: readcdf.c,v 1.44 2014/05/14 23:22:48 christos Exp $") 30 #endif 31 32 #include <assert.h> 33 #include <stdlib.h> 34 #include <unistd.h> 35 #include <string.h> 36 #include <time.h> 37 #include <ctype.h> 38 #if defined(HAVE_LOCALE_H) 39 #include <locale.h> 40 #endif 41 42 #include "cdf.h" 43 #include "magic.h" 44 45 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 46 47 static const struct nv { 48 const char *pattern; 49 const char *mime; 50 } app2mime[] = { 51 { "Word", "msword", }, 52 { "Excel", "vnd.ms-excel", }, 53 { "Powerpoint", "vnd.ms-powerpoint", }, 54 { "Crystal Reports", "x-rpt", }, 55 { "Advanced Installer", "vnd.ms-msi", }, 56 { "InstallShield", "vnd.ms-msi", }, 57 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 58 { "NAnt", "vnd.ms-msi", }, 59 { "Windows Installer", "vnd.ms-msi", }, 60 { NULL, NULL, }, 61 }, name2mime[] = { 62 { "WordDocument", "msword", }, 63 { "PowerPoint", "vnd.ms-powerpoint", }, 64 { "DigitalSignature", "vnd.ms-msi", }, 65 { NULL, NULL, }, 66 }, name2desc[] = { 67 { "WordDocument", "Microsoft Office Word",}, 68 { "PowerPoint", "Microsoft PowerPoint", }, 69 { "DigitalSignature", "Microsoft Installer", }, 70 { NULL, NULL, }, 71 }; 72 73 static const struct cv { 74 uint64_t clsid[2]; 75 const char *mime; 76 } clsid2mime[] = { 77 { 78 { 0x00000000000c1084LLU, 0x46000000000000c0LLU }, 79 "x-msi", 80 }, 81 { { 0, 0 }, 82 NULL, 83 }, 84 }, clsid2desc[] = { 85 { 86 { 0x00000000000c1084LLU, 0x46000000000000c0LLU }, 87 "MSI Installer", 88 }, 89 { { 0, 0 }, 90 NULL, 91 }, 92 }; 93 94 private const char * 95 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 96 { 97 size_t i; 98 for (i = 0; cv[i].mime != NULL; i++) { 99 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 100 return cv[i].mime; 101 } 102 return NULL; 103 } 104 105 private const char * 106 cdf_app_to_mime(const char *vbuf, const struct nv *nv) 107 { 108 size_t i; 109 const char *rv = NULL; 110 char *old_lc_ctype; 111 112 old_lc_ctype = setlocale(LC_CTYPE, NULL); 113 assert(old_lc_ctype != NULL); 114 old_lc_ctype = strdup(old_lc_ctype); 115 assert(old_lc_ctype != NULL); 116 (void)setlocale(LC_CTYPE, "C"); 117 for (i = 0; nv[i].pattern != NULL; i++) 118 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 119 rv = nv[i].mime; 120 break; 121 } 122 (void)setlocale(LC_CTYPE, old_lc_ctype); 123 free(old_lc_ctype); 124 return rv; 125 } 126 127 private int 128 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 129 size_t count, const cdf_directory_t *root_storage) 130 { 131 size_t i; 132 cdf_timestamp_t tp; 133 struct timespec ts; 134 char buf[64]; 135 const char *str = NULL; 136 const char *s; 137 int len; 138 139 if (!NOTMIME(ms) && root_storage) 140 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 141 clsid2mime); 142 143 for (i = 0; i < count; i++) { 144 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 145 switch (info[i].pi_type) { 146 case CDF_NULL: 147 break; 148 case CDF_SIGNED16: 149 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 150 info[i].pi_s16) == -1) 151 return -1; 152 break; 153 case CDF_SIGNED32: 154 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 155 info[i].pi_s32) == -1) 156 return -1; 157 break; 158 case CDF_UNSIGNED32: 159 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 160 info[i].pi_u32) == -1) 161 return -1; 162 break; 163 case CDF_FLOAT: 164 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 165 info[i].pi_f) == -1) 166 return -1; 167 break; 168 case CDF_DOUBLE: 169 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 170 info[i].pi_d) == -1) 171 return -1; 172 break; 173 case CDF_LENGTH32_STRING: 174 case CDF_LENGTH32_WSTRING: 175 len = info[i].pi_str.s_len; 176 if (len > 1) { 177 char vbuf[1024]; 178 size_t j, k = 1; 179 180 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 181 k++; 182 s = info[i].pi_str.s_buf; 183 for (j = 0; j < sizeof(vbuf) && len--; s += k) { 184 if (*s == '\0') 185 break; 186 if (isprint((unsigned char)*s)) 187 vbuf[j++] = *s; 188 } 189 if (j == sizeof(vbuf)) 190 --j; 191 vbuf[j] = '\0'; 192 if (NOTMIME(ms)) { 193 if (vbuf[0]) { 194 if (file_printf(ms, ", %s: %s", 195 buf, vbuf) == -1) 196 return -1; 197 } 198 } else if (str == NULL && info[i].pi_id == 199 CDF_PROPERTY_NAME_OF_APPLICATION) { 200 str = cdf_app_to_mime(vbuf, app2mime); 201 } 202 } 203 break; 204 case CDF_FILETIME: 205 tp = info[i].pi_tp; 206 if (tp != 0) { 207 char tbuf[64]; 208 if (tp < 1000000000000000LL) { 209 cdf_print_elapsed_time(tbuf, 210 sizeof(tbuf), tp); 211 if (NOTMIME(ms) && file_printf(ms, 212 ", %s: %s", buf, tbuf) == -1) 213 return -1; 214 } else { 215 char *c, *ec; 216 cdf_timestamp_to_timespec(&ts, tp); 217 c = cdf_ctime(&ts.tv_sec, tbuf); 218 if (c != NULL && 219 (ec = strchr(c, '\n')) != NULL) 220 *ec = '\0'; 221 222 if (NOTMIME(ms) && file_printf(ms, 223 ", %s: %s", buf, c) == -1) 224 return -1; 225 } 226 } 227 break; 228 case CDF_CLIPBOARD: 229 break; 230 default: 231 return -1; 232 } 233 } 234 if (!NOTMIME(ms)) { 235 if (str == NULL) 236 return 0; 237 if (file_printf(ms, "application/%s", str) == -1) 238 return -1; 239 } 240 return 1; 241 } 242 243 private int 244 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 245 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 246 { 247 cdf_summary_info_header_t si; 248 cdf_property_info_t *info; 249 size_t count; 250 int m; 251 252 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 253 return -1; 254 255 if (NOTMIME(ms)) { 256 const char *str; 257 258 if (file_printf(ms, "Composite Document File V2 Document") 259 == -1) 260 return -1; 261 262 if (file_printf(ms, ", %s Endian", 263 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 264 return -2; 265 switch (si.si_os) { 266 case 2: 267 if (file_printf(ms, ", Os: Windows, Version %d.%d", 268 si.si_os_version & 0xff, 269 (uint32_t)si.si_os_version >> 8) == -1) 270 return -2; 271 break; 272 case 1: 273 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 274 (uint32_t)si.si_os_version >> 8, 275 si.si_os_version & 0xff) == -1) 276 return -2; 277 break; 278 default: 279 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 280 si.si_os_version & 0xff, 281 (uint32_t)si.si_os_version >> 8) == -1) 282 return -2; 283 break; 284 } 285 if (root_storage) { 286 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 287 clsid2desc); 288 if (str) 289 if (file_printf(ms, ", %s", str) == -1) 290 return -2; 291 } 292 } 293 294 m = cdf_file_property_info(ms, info, count, root_storage); 295 free(info); 296 297 return m == -1 ? -2 : m; 298 } 299 300 #ifdef notdef 301 private char * 302 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 303 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 304 PRIx64 "-%.12" PRIx64, 305 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffLLU, 306 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffLLU, 307 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffLLU, 308 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffLLU, 309 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffLLU); 310 return buf; 311 } 312 #endif 313 314 protected int 315 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, 316 size_t nbytes) 317 { 318 cdf_info_t info; 319 cdf_header_t h; 320 cdf_sat_t sat, ssat; 321 cdf_stream_t sst, scn; 322 cdf_dir_t dir; 323 int i; 324 const char *expn = ""; 325 const char *corrupt = "corrupt: "; 326 327 info.i_fd = fd; 328 info.i_buf = buf; 329 info.i_len = nbytes; 330 if (ms->flags & MAGIC_APPLE) 331 return 0; 332 if (cdf_read_header(&info, &h) == -1) 333 return 0; 334 #ifdef CDF_DEBUG 335 cdf_dump_header(&h); 336 #endif 337 338 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 339 expn = "Can't read SAT"; 340 goto out0; 341 } 342 #ifdef CDF_DEBUG 343 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 344 #endif 345 346 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 347 expn = "Can't read SSAT"; 348 goto out1; 349 } 350 #ifdef CDF_DEBUG 351 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 352 #endif 353 354 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 355 expn = "Can't read directory"; 356 goto out2; 357 } 358 359 const cdf_directory_t *root_storage; 360 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 361 &root_storage)) == -1) { 362 expn = "Cannot read short stream"; 363 goto out3; 364 } 365 #ifdef CDF_DEBUG 366 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 367 #endif 368 #ifdef notdef 369 if (root_storage) { 370 if (NOTMIME(ms)) { 371 char clsbuf[128]; 372 if (file_printf(ms, "CLSID %s, ", 373 format_clsid(clsbuf, sizeof(clsbuf), 374 root_storage->d_storage_uuid)) == -1) 375 return -1; 376 } 377 } 378 #endif 379 380 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 381 "FileHeader", &scn)) != -1) { 382 #define HWP5_SIGNATURE "HWP Document File" 383 if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1 384 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 385 sizeof(HWP5_SIGNATURE) - 1) == 0) { 386 if (NOTMIME(ms)) { 387 if (file_printf(ms, 388 "Hangul (Korean) Word Processor File 5.x") == -1) 389 return -1; 390 } else { 391 if (file_printf(ms, "application/x-hwp") == -1) 392 return -1; 393 } 394 i = 1; 395 goto out5; 396 } else { 397 free(scn.sst_tab); 398 scn.sst_tab = NULL; 399 scn.sst_len = 0; 400 scn.sst_dirlen = 0; 401 } 402 } 403 404 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 405 &scn)) == -1) { 406 if (errno == ESRCH) { 407 corrupt = expn; 408 expn = "No summary info"; 409 } else { 410 expn = "Cannot read summary info"; 411 } 412 goto out4; 413 } 414 #ifdef CDF_DEBUG 415 cdf_dump_summary_info(&h, &scn); 416 #endif 417 if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0) 418 expn = "Can't expand summary_info"; 419 420 if (i == 0) { 421 const char *str = NULL; 422 cdf_directory_t *d; 423 char name[__arraycount(d->d_name)]; 424 size_t j, k; 425 426 for (j = 0; str == NULL && j < dir.dir_len; j++) { 427 d = &dir.dir_tab[j]; 428 for (k = 0; k < sizeof(name); k++) 429 name[k] = (char)cdf_tole2(d->d_name[k]); 430 str = cdf_app_to_mime(name, 431 NOTMIME(ms) ? name2desc : name2mime); 432 } 433 if (NOTMIME(ms)) { 434 if (str != NULL) { 435 if (file_printf(ms, "%s", str) == -1) 436 return -1; 437 i = 1; 438 } 439 } else { 440 if (str == NULL) 441 str = "vnd.ms-office"; 442 if (file_printf(ms, "application/%s", str) == -1) 443 return -1; 444 i = 1; 445 } 446 } 447 out5: 448 free(scn.sst_tab); 449 out4: 450 free(sst.sst_tab); 451 out3: 452 free(dir.dir_tab); 453 out2: 454 free(ssat.sat_tab); 455 out1: 456 free(sat.sat_tab); 457 out0: 458 if (i == -1) { 459 if (NOTMIME(ms)) { 460 if (file_printf(ms, 461 "Composite Document File V2 Document") == -1) 462 return -1; 463 if (*expn) 464 if (file_printf(ms, ", %s%s", corrupt, expn) == -1) 465 return -1; 466 } else { 467 if (file_printf(ms, "application/CDFV2-corrupt") == -1) 468 return -1; 469 } 470 i = 1; 471 } 472 return i; 473 } 474