xref: /freebsd/contrib/file/src/readcdf.c (revision 6b7b2d80ed4d728d3ffd12c422e57798c1b63a84)
1 /*-
2  * Copyright (c) 2008 Christos Zoulas
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 #include "file.h"
27 
28 #ifndef lint
29 FILE_RCSID("@(#)$File: readcdf.c,v 1.53 2015/04/09 20:01:41 christos Exp $")
30 #endif
31 
32 #include <assert.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <string.h>
36 #include <time.h>
37 #include <ctype.h>
38 
39 #include "cdf.h"
40 #include "magic.h"
41 
42 #ifndef __arraycount
43 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
44 #endif
45 
46 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
47 
48 static const struct nv {
49 	const char *pattern;
50 	const char *mime;
51 } app2mime[] =  {
52 	{ "Word",			"msword",		},
53 	{ "Excel",			"vnd.ms-excel",		},
54 	{ "Powerpoint",			"vnd.ms-powerpoint",	},
55 	{ "Crystal Reports",		"x-rpt",		},
56 	{ "Advanced Installer",		"vnd.ms-msi",		},
57 	{ "InstallShield",		"vnd.ms-msi",		},
58 	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
59 	{ "NAnt",			"vnd.ms-msi",		},
60 	{ "Windows Installer",		"vnd.ms-msi",		},
61 	{ NULL,				NULL,			},
62 }, name2mime[] = {
63 	{ "WordDocument",		"msword",		},
64 	{ "PowerPoint",			"vnd.ms-powerpoint",	},
65 	{ "DigitalSignature",		"vnd.ms-msi",		},
66 	{ NULL,				NULL,			},
67 }, name2desc[] = {
68 	{ "WordDocument",		"Microsoft Office Word",},
69 	{ "PowerPoint",			"Microsoft PowerPoint",	},
70 	{ "DigitalSignature",		"Microsoft Installer",	},
71 	{ NULL,				NULL,			},
72 };
73 
74 static const struct cv {
75 	uint64_t clsid[2];
76 	const char *mime;
77 } clsid2mime[] = {
78 	{
79 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
80 		"x-msi",
81 	},
82 	{	{ 0,			 0			},
83 		NULL,
84 	},
85 }, clsid2desc[] = {
86 	{
87 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
88 		"MSI Installer",
89 	},
90 	{	{ 0,			 0			},
91 		NULL,
92 	},
93 };
94 
95 private const char *
96 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
97 {
98 	size_t i;
99 	for (i = 0; cv[i].mime != NULL; i++) {
100 		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
101 			return cv[i].mime;
102 	}
103 #ifdef CDF_DEBUG
104 	fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0],
105 	    clsid[1]);
106 #endif
107 	return NULL;
108 }
109 
110 private const char *
111 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
112 {
113 	size_t i;
114 	const char *rv = NULL;
115 #ifdef USE_C_LOCALE
116 	locale_t old_lc_ctype, c_lc_ctype;
117 
118 	c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
119 	assert(c_lc_ctype != NULL);
120 	old_lc_ctype = uselocale(c_lc_ctype);
121 	assert(old_lc_ctype != NULL);
122 #endif
123 	for (i = 0; nv[i].pattern != NULL; i++)
124 		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
125 			rv = nv[i].mime;
126 			break;
127 		}
128 #ifdef CDF_DEBUG
129 	fprintf(stderr, "unknown app %s\n", vbuf);
130 #endif
131 #ifdef USE_C_LOCALE
132 	(void)uselocale(old_lc_ctype);
133 	freelocale(c_lc_ctype);
134 #endif
135 	return rv;
136 }
137 
138 private int
139 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
140     size_t count, const cdf_directory_t *root_storage)
141 {
142         size_t i;
143         cdf_timestamp_t tp;
144         struct timespec ts;
145         char buf[64];
146         const char *str = NULL;
147         const char *s;
148         int len;
149 
150         if (!NOTMIME(ms) && root_storage)
151 		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
152 		    clsid2mime);
153 
154         for (i = 0; i < count; i++) {
155                 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
156                 switch (info[i].pi_type) {
157                 case CDF_NULL:
158                         break;
159                 case CDF_SIGNED16:
160                         if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
161                             info[i].pi_s16) == -1)
162                                 return -1;
163                         break;
164                 case CDF_SIGNED32:
165                         if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
166                             info[i].pi_s32) == -1)
167                                 return -1;
168                         break;
169                 case CDF_UNSIGNED32:
170                         if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
171                             info[i].pi_u32) == -1)
172                                 return -1;
173                         break;
174                 case CDF_FLOAT:
175                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
176                             info[i].pi_f) == -1)
177                                 return -1;
178                         break;
179                 case CDF_DOUBLE:
180                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
181                             info[i].pi_d) == -1)
182                                 return -1;
183                         break;
184                 case CDF_LENGTH32_STRING:
185                 case CDF_LENGTH32_WSTRING:
186                         len = info[i].pi_str.s_len;
187                         if (len > 1) {
188                                 char vbuf[1024];
189                                 size_t j, k = 1;
190 
191                                 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
192                                     k++;
193                                 s = info[i].pi_str.s_buf;
194                                 for (j = 0; j < sizeof(vbuf) && len--; s += k) {
195                                         if (*s == '\0')
196                                                 break;
197                                         if (isprint((unsigned char)*s))
198                                                 vbuf[j++] = *s;
199                                 }
200                                 if (j == sizeof(vbuf))
201                                         --j;
202                                 vbuf[j] = '\0';
203                                 if (NOTMIME(ms)) {
204                                         if (vbuf[0]) {
205                                                 if (file_printf(ms, ", %s: %s",
206                                                     buf, vbuf) == -1)
207                                                         return -1;
208                                         }
209                                 } else if (str == NULL && info[i].pi_id ==
210 				    CDF_PROPERTY_NAME_OF_APPLICATION) {
211 					str = cdf_app_to_mime(vbuf, app2mime);
212 				}
213 			}
214                         break;
215                 case CDF_FILETIME:
216                         tp = info[i].pi_tp;
217                         if (tp != 0) {
218 				char tbuf[64];
219                                 if (tp < 1000000000000000LL) {
220                                         cdf_print_elapsed_time(tbuf,
221                                             sizeof(tbuf), tp);
222                                         if (NOTMIME(ms) && file_printf(ms,
223                                             ", %s: %s", buf, tbuf) == -1)
224                                                 return -1;
225                                 } else {
226                                         char *c, *ec;
227                                         cdf_timestamp_to_timespec(&ts, tp);
228                                         c = cdf_ctime(&ts.tv_sec, tbuf);
229                                         if (c != NULL &&
230 					    (ec = strchr(c, '\n')) != NULL)
231 						*ec = '\0';
232 
233                                         if (NOTMIME(ms) && file_printf(ms,
234                                             ", %s: %s", buf, c) == -1)
235                                                 return -1;
236                                 }
237                         }
238                         break;
239                 case CDF_CLIPBOARD:
240                         break;
241                 default:
242                         return -1;
243                 }
244         }
245         if (!NOTMIME(ms)) {
246 		if (str == NULL)
247 			return 0;
248                 if (file_printf(ms, "application/%s", str) == -1)
249                         return -1;
250         }
251         return 1;
252 }
253 
254 private int
255 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
256     const cdf_stream_t *sst)
257 {
258 	cdf_catalog_t *cat;
259 	size_t i;
260 	char buf[256];
261 	cdf_catalog_entry_t *ce;
262 
263         if (NOTMIME(ms)) {
264 		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
265 			return -1;
266 		if (cdf_unpack_catalog(h, sst, &cat) == -1)
267 			return -1;
268 		ce = cat->cat_e;
269 		/* skip first entry since it has a , or paren */
270 		for (i = 1; i < cat->cat_num; i++)
271 			if (file_printf(ms, "%s%s",
272 			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
273 			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
274 				free(cat);
275 				return -1;
276 			}
277 		free(cat);
278 	} else {
279 		if (file_printf(ms, "application/CDFV2") == -1)
280 			return -1;
281 	}
282 	return 1;
283 }
284 
285 private int
286 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
287     const cdf_stream_t *sst, const cdf_directory_t *root_storage)
288 {
289         cdf_summary_info_header_t si;
290         cdf_property_info_t *info;
291         size_t count;
292         int m;
293 
294         if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
295                 return -1;
296 
297         if (NOTMIME(ms)) {
298 		const char *str;
299 
300                 if (file_printf(ms, "Composite Document File V2 Document")
301 		    == -1)
302                         return -1;
303 
304                 if (file_printf(ms, ", %s Endian",
305                     si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
306                         return -2;
307                 switch (si.si_os) {
308                 case 2:
309                         if (file_printf(ms, ", Os: Windows, Version %d.%d",
310                             si.si_os_version & 0xff,
311                             (uint32_t)si.si_os_version >> 8) == -1)
312                                 return -2;
313                         break;
314                 case 1:
315                         if (file_printf(ms, ", Os: MacOS, Version %d.%d",
316                             (uint32_t)si.si_os_version >> 8,
317                             si.si_os_version & 0xff) == -1)
318                                 return -2;
319                         break;
320                 default:
321                         if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
322                             si.si_os_version & 0xff,
323                             (uint32_t)si.si_os_version >> 8) == -1)
324                                 return -2;
325                         break;
326                 }
327 		if (root_storage) {
328 			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
329 			    clsid2desc);
330 			if (str) {
331 				if (file_printf(ms, ", %s", str) == -1)
332 					return -2;
333 			}
334 		}
335 	}
336 
337         m = cdf_file_property_info(ms, info, count, root_storage);
338         free(info);
339 
340         return m == -1 ? -2 : m;
341 }
342 
343 #ifdef notdef
344 private char *
345 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
346 	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
347 	    PRIx64 "-%.12" PRIx64,
348 	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
349 	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
350 	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
351 	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
352 	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
353 	return buf;
354 }
355 #endif
356 
357 private int
358 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
359     const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
360     const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
361 {
362 	int i;
363 
364 	if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
365 	    dir, "Catalog", scn)) == -1)
366 		return i;
367 #ifdef CDF_DEBUG
368 	cdf_dump_catalog(&h, &scn);
369 #endif
370 	if ((i = cdf_file_catalog(ms, h, scn)) == -1)
371 		return -1;
372 	return i;
373 }
374 
375 private struct sinfo {
376 	const char *name;
377 	const char *mime;
378 	const char *sections[5];
379 	const int  types[5];
380 } sectioninfo[] = {
381 	{ "Encrypted", "encrypted",
382 		{
383 			"EncryptedPackage", NULL, NULL, NULL, NULL,
384 		},
385 		{
386 			CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0,
387 
388 		},
389 	},
390 	{ "QuickBooks", "quickbooks",
391 		{
392 #if 0
393 			"TaxForms", "PDFTaxForms", "modulesInBackup",
394 #endif
395 			"mfbu_header", NULL, NULL, NULL, NULL,
396 		},
397 		{
398 #if 0
399 			CDF_DIR_TYPE_USER_STORAGE,
400 			CDF_DIR_TYPE_USER_STORAGE,
401 			CDF_DIR_TYPE_USER_STREAM,
402 #endif
403 			CDF_DIR_TYPE_USER_STREAM,
404 			0, 0, 0, 0
405 		},
406 	},
407 };
408 
409 private int
410 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
411 {
412 	size_t sd, j;
413 
414 	for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
415 		const struct sinfo *si = &sectioninfo[sd];
416 		for (j = 0; si->sections[j]; j++) {
417 			if (cdf_find_stream(dir, si->sections[j], si->types[j])
418 			    <= 0) {
419 #ifdef CDF_DEBUG
420 				fprintf(stderr, "Can't read %s\n",
421 				    si->sections[j]);
422 #endif
423 				break;
424 			}
425 		}
426 		if (si->sections[j] != NULL)
427 			continue;
428 		if (NOTMIME(ms)) {
429 			if (file_printf(ms, "CDFV2 %s", si->name) == -1)
430 				return -1;
431 		} else {
432 			if (file_printf(ms, "application/CDFV2-%s",
433 			    si->mime) == -1)
434 				return -1;
435 		}
436 		return 1;
437 	}
438 	return -1;
439 }
440 
441 protected int
442 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
443     size_t nbytes)
444 {
445         cdf_info_t info;
446         cdf_header_t h;
447         cdf_sat_t sat, ssat;
448         cdf_stream_t sst, scn;
449         cdf_dir_t dir;
450         int i;
451         const char *expn = "";
452         const cdf_directory_t *root_storage;
453 
454         info.i_fd = fd;
455         info.i_buf = buf;
456         info.i_len = nbytes;
457         if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
458                 return 0;
459         if (cdf_read_header(&info, &h) == -1)
460                 return 0;
461 #ifdef CDF_DEBUG
462         cdf_dump_header(&h);
463 #endif
464 
465         if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
466                 expn = "Can't read SAT";
467                 goto out0;
468         }
469 #ifdef CDF_DEBUG
470         cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
471 #endif
472 
473         if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
474                 expn = "Can't read SSAT";
475                 goto out1;
476         }
477 #ifdef CDF_DEBUG
478         cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
479 #endif
480 
481         if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
482                 expn = "Can't read directory";
483                 goto out2;
484         }
485 
486         if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
487 	    &root_storage)) == -1) {
488                 expn = "Cannot read short stream";
489                 goto out3;
490         }
491 #ifdef CDF_DEBUG
492         cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
493 #endif
494 #ifdef notdef
495 	if (root_storage) {
496 		if (NOTMIME(ms)) {
497 			char clsbuf[128];
498 			if (file_printf(ms, "CLSID %s, ",
499 			    format_clsid(clsbuf, sizeof(clsbuf),
500 			    root_storage->d_storage_uuid)) == -1)
501 				return -1;
502 		}
503 	}
504 #endif
505 
506 	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
507 	    "FileHeader", &scn)) != -1) {
508 #define HWP5_SIGNATURE "HWP Document File"
509 		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
510 		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
511 		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
512 		    if (NOTMIME(ms)) {
513 			if (file_printf(ms,
514 			    "Hangul (Korean) Word Processor File 5.x") == -1)
515 			    return -1;
516 		    } else {
517 			if (file_printf(ms, "application/x-hwp") == -1)
518 			    return -1;
519 		    }
520 		    i = 1;
521 		    goto out5;
522 		} else {
523 		    free(scn.sst_tab);
524 		    scn.sst_tab = NULL;
525 		    scn.sst_len = 0;
526 		    scn.sst_dirlen = 0;
527 		}
528 	}
529 
530         if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
531             &scn)) == -1) {
532                 if (errno != ESRCH) {
533                         expn = "Cannot read summary info";
534 			goto out4;
535 		}
536 		i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst,
537 		    &dir, &scn);
538 		if (i > 0)
539 			goto out4;
540 		i = cdf_file_dir_info(ms, &dir);
541 		if (i < 0)
542                         expn = "Cannot read section info";
543 		goto out4;
544 	}
545 
546 
547 #ifdef CDF_DEBUG
548         cdf_dump_summary_info(&h, &scn);
549 #endif
550         if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
551             expn = "Can't expand summary_info";
552 
553 	if (i == 0) {
554 		const char *str = NULL;
555 		cdf_directory_t *d;
556 		char name[__arraycount(d->d_name)];
557 		size_t j, k;
558 
559 		for (j = 0; str == NULL && j < dir.dir_len; j++) {
560 			d = &dir.dir_tab[j];
561 			for (k = 0; k < sizeof(name); k++)
562 				name[k] = (char)cdf_tole2(d->d_name[k]);
563 			str = cdf_app_to_mime(name,
564 			    NOTMIME(ms) ? name2desc : name2mime);
565 		}
566 		if (NOTMIME(ms)) {
567 			if (str != NULL) {
568 				if (file_printf(ms, "%s", str) == -1)
569 					return -1;
570 				i = 1;
571 			}
572 		} else {
573 			if (str == NULL)
574 				str = "vnd.ms-office";
575 			if (file_printf(ms, "application/%s", str) == -1)
576 				return -1;
577 			i = 1;
578 		}
579 	}
580 out5:
581         free(scn.sst_tab);
582 out4:
583         free(sst.sst_tab);
584 out3:
585         free(dir.dir_tab);
586 out2:
587         free(ssat.sat_tab);
588 out1:
589         free(sat.sat_tab);
590 out0:
591 	if (i == -1) {
592 	    if (NOTMIME(ms)) {
593 		if (file_printf(ms,
594 		    "Composite Document File V2 Document") == -1)
595 		    return -1;
596 		if (*expn)
597 		    if (file_printf(ms, ", %s", expn) == -1)
598 			return -1;
599 	    } else {
600 		if (file_printf(ms, "application/CDFV2-unknown") == -1)
601 		    return -1;
602 	    }
603 	    i = 1;
604 	}
605         return i;
606 }
607