xref: /freebsd/contrib/file/src/readcdf.c (revision fed1ca4b719c56c930f2259d80663cd34be812bb)
1 /*-
2  * Copyright (c) 2008 Christos Zoulas
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 #include "file.h"
27 
28 #ifndef lint
29 FILE_RCSID("@(#)$File: readcdf.c,v 1.57 2016/05/03 16:08:49 christos Exp $")
30 #endif
31 
32 #include <assert.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <string.h>
36 #include <time.h>
37 #include <ctype.h>
38 
39 #include "cdf.h"
40 #include "magic.h"
41 
42 #ifndef __arraycount
43 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
44 #endif
45 
46 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
47 
48 static const struct nv {
49 	const char *pattern;
50 	const char *mime;
51 } app2mime[] =  {
52 	{ "Word",			"msword",		},
53 	{ "Excel",			"vnd.ms-excel",		},
54 	{ "Powerpoint",			"vnd.ms-powerpoint",	},
55 	{ "Crystal Reports",		"x-rpt",		},
56 	{ "Advanced Installer",		"vnd.ms-msi",		},
57 	{ "InstallShield",		"vnd.ms-msi",		},
58 	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
59 	{ "NAnt",			"vnd.ms-msi",		},
60 	{ "Windows Installer",		"vnd.ms-msi",		},
61 	{ NULL,				NULL,			},
62 }, name2mime[] = {
63 	{ "Book",			"vnd.ms-excel",		},
64 	{ "Workbook",			"vnd.ms-excel",		},
65 	{ "WordDocument",		"msword",		},
66 	{ "PowerPoint",			"vnd.ms-powerpoint",	},
67 	{ "DigitalSignature",		"vnd.ms-msi",		},
68 	{ NULL,				NULL,			},
69 }, name2desc[] = {
70 	{ "Book",			"Microsoft Excel",	},
71 	{ "Workbook",			"Microsoft Excel",	},
72 	{ "WordDocument",		"Microsoft Word",	},
73 	{ "PowerPoint",			"Microsoft PowerPoint",	},
74 	{ "DigitalSignature",		"Microsoft Installer",	},
75 	{ NULL,				NULL,			},
76 };
77 
78 static const struct cv {
79 	uint64_t clsid[2];
80 	const char *mime;
81 } clsid2mime[] = {
82 	{
83 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
84 		"x-msi",
85 	},
86 	{	{ 0,			 0			},
87 		NULL,
88 	},
89 }, clsid2desc[] = {
90 	{
91 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
92 		"MSI Installer",
93 	},
94 	{	{ 0,			 0			},
95 		NULL,
96 	},
97 };
98 
99 private const char *
100 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
101 {
102 	size_t i;
103 	for (i = 0; cv[i].mime != NULL; i++) {
104 		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
105 			return cv[i].mime;
106 	}
107 #ifdef CDF_DEBUG
108 	fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0],
109 	    clsid[1]);
110 #endif
111 	return NULL;
112 }
113 
114 private const char *
115 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
116 {
117 	size_t i;
118 	const char *rv = NULL;
119 #ifdef USE_C_LOCALE
120 	locale_t old_lc_ctype, c_lc_ctype;
121 
122 	c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
123 	assert(c_lc_ctype != NULL);
124 	old_lc_ctype = uselocale(c_lc_ctype);
125 	assert(old_lc_ctype != NULL);
126 #else
127 	char *old_lc_ctype = setlocale(LC_CTYPE, "C");
128 #endif
129 	for (i = 0; nv[i].pattern != NULL; i++)
130 		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
131 			rv = nv[i].mime;
132 			break;
133 		}
134 #ifdef CDF_DEBUG
135 	fprintf(stderr, "unknown app %s\n", vbuf);
136 #endif
137 #ifdef USE_C_LOCALE
138 	(void)uselocale(old_lc_ctype);
139 	freelocale(c_lc_ctype);
140 #else
141 	setlocale(LC_CTYPE, old_lc_ctype);
142 #endif
143 	return rv;
144 }
145 
146 private int
147 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
148     size_t count, const cdf_directory_t *root_storage)
149 {
150         size_t i;
151         cdf_timestamp_t tp;
152         struct timespec ts;
153         char buf[64];
154         const char *str = NULL;
155         const char *s;
156         int len;
157 
158         if (!NOTMIME(ms) && root_storage)
159 		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
160 		    clsid2mime);
161 
162         for (i = 0; i < count; i++) {
163                 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
164                 switch (info[i].pi_type) {
165                 case CDF_NULL:
166                         break;
167                 case CDF_SIGNED16:
168                         if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
169                             info[i].pi_s16) == -1)
170                                 return -1;
171                         break;
172                 case CDF_SIGNED32:
173                         if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
174                             info[i].pi_s32) == -1)
175                                 return -1;
176                         break;
177                 case CDF_UNSIGNED32:
178                         if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
179                             info[i].pi_u32) == -1)
180                                 return -1;
181                         break;
182                 case CDF_FLOAT:
183                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
184                             info[i].pi_f) == -1)
185                                 return -1;
186                         break;
187                 case CDF_DOUBLE:
188                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
189                             info[i].pi_d) == -1)
190                                 return -1;
191                         break;
192                 case CDF_LENGTH32_STRING:
193                 case CDF_LENGTH32_WSTRING:
194                         len = info[i].pi_str.s_len;
195                         if (len > 1) {
196                                 char vbuf[1024];
197                                 size_t j, k = 1;
198 
199                                 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
200                                     k++;
201                                 s = info[i].pi_str.s_buf;
202                                 for (j = 0; j < sizeof(vbuf) && len--; s += k) {
203                                         if (*s == '\0')
204                                                 break;
205                                         if (isprint((unsigned char)*s))
206                                                 vbuf[j++] = *s;
207                                 }
208                                 if (j == sizeof(vbuf))
209                                         --j;
210                                 vbuf[j] = '\0';
211                                 if (NOTMIME(ms)) {
212                                         if (vbuf[0]) {
213                                                 if (file_printf(ms, ", %s: %s",
214                                                     buf, vbuf) == -1)
215                                                         return -1;
216                                         }
217                                 } else if (str == NULL && info[i].pi_id ==
218 				    CDF_PROPERTY_NAME_OF_APPLICATION) {
219 					str = cdf_app_to_mime(vbuf, app2mime);
220 				}
221 			}
222                         break;
223                 case CDF_FILETIME:
224                         tp = info[i].pi_tp;
225                         if (tp != 0) {
226 				char tbuf[64];
227                                 if (tp < 1000000000000000LL) {
228                                         cdf_print_elapsed_time(tbuf,
229                                             sizeof(tbuf), tp);
230                                         if (NOTMIME(ms) && file_printf(ms,
231                                             ", %s: %s", buf, tbuf) == -1)
232                                                 return -1;
233                                 } else {
234                                         char *c, *ec;
235                                         cdf_timestamp_to_timespec(&ts, tp);
236                                         c = cdf_ctime(&ts.tv_sec, tbuf);
237                                         if (c != NULL &&
238 					    (ec = strchr(c, '\n')) != NULL)
239 						*ec = '\0';
240 
241                                         if (NOTMIME(ms) && file_printf(ms,
242                                             ", %s: %s", buf, c) == -1)
243                                                 return -1;
244                                 }
245                         }
246                         break;
247                 case CDF_CLIPBOARD:
248                         break;
249                 default:
250                         return -1;
251                 }
252         }
253         if (!NOTMIME(ms)) {
254 		if (str == NULL)
255 			return 0;
256                 if (file_printf(ms, "application/%s", str) == -1)
257                         return -1;
258         }
259         return 1;
260 }
261 
262 private int
263 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
264     const cdf_stream_t *sst)
265 {
266 	cdf_catalog_t *cat;
267 	size_t i;
268 	char buf[256];
269 	cdf_catalog_entry_t *ce;
270 
271         if (NOTMIME(ms)) {
272 		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
273 			return -1;
274 		if (cdf_unpack_catalog(h, sst, &cat) == -1)
275 			return -1;
276 		ce = cat->cat_e;
277 		/* skip first entry since it has a , or paren */
278 		for (i = 1; i < cat->cat_num; i++)
279 			if (file_printf(ms, "%s%s",
280 			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
281 			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
282 				free(cat);
283 				return -1;
284 			}
285 		free(cat);
286 	} else {
287 		if (file_printf(ms, "application/CDFV2") == -1)
288 			return -1;
289 	}
290 	return 1;
291 }
292 
293 private int
294 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
295     const cdf_stream_t *sst, const cdf_directory_t *root_storage)
296 {
297         cdf_summary_info_header_t si;
298         cdf_property_info_t *info;
299         size_t count;
300         int m;
301 
302         if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
303                 return -1;
304 
305         if (NOTMIME(ms)) {
306 		const char *str;
307 
308                 if (file_printf(ms, "Composite Document File V2 Document")
309 		    == -1)
310                         return -1;
311 
312                 if (file_printf(ms, ", %s Endian",
313                     si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
314                         return -2;
315                 switch (si.si_os) {
316                 case 2:
317                         if (file_printf(ms, ", Os: Windows, Version %d.%d",
318                             si.si_os_version & 0xff,
319                             (uint32_t)si.si_os_version >> 8) == -1)
320                                 return -2;
321                         break;
322                 case 1:
323                         if (file_printf(ms, ", Os: MacOS, Version %d.%d",
324                             (uint32_t)si.si_os_version >> 8,
325                             si.si_os_version & 0xff) == -1)
326                                 return -2;
327                         break;
328                 default:
329                         if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
330                             si.si_os_version & 0xff,
331                             (uint32_t)si.si_os_version >> 8) == -1)
332                                 return -2;
333                         break;
334                 }
335 		if (root_storage) {
336 			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
337 			    clsid2desc);
338 			if (str) {
339 				if (file_printf(ms, ", %s", str) == -1)
340 					return -2;
341 			}
342 		}
343 	}
344 
345         m = cdf_file_property_info(ms, info, count, root_storage);
346         free(info);
347 
348         return m == -1 ? -2 : m;
349 }
350 
351 #ifdef notdef
352 private char *
353 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
354 	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
355 	    PRIx64 "-%.12" PRIx64,
356 	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
357 	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
358 	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
359 	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
360 	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
361 	return buf;
362 }
363 #endif
364 
365 private int
366 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
367     const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
368     const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
369 {
370 	int i;
371 
372 	if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
373 	    dir, "Catalog", scn)) == -1)
374 		return i;
375 #ifdef CDF_DEBUG
376 	cdf_dump_catalog(&h, scn);
377 #endif
378 	if ((i = cdf_file_catalog(ms, h, scn)) == -1)
379 		return -1;
380 	return i;
381 }
382 
383 private struct sinfo {
384 	const char *name;
385 	const char *mime;
386 	const char *sections[5];
387 	const int  types[5];
388 } sectioninfo[] = {
389 	{ "Encrypted", "encrypted",
390 		{
391 			"EncryptedPackage", NULL, NULL, NULL, NULL,
392 		},
393 		{
394 			CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0,
395 
396 		},
397 	},
398 	{ "QuickBooks", "quickbooks",
399 		{
400 #if 0
401 			"TaxForms", "PDFTaxForms", "modulesInBackup",
402 #endif
403 			"mfbu_header", NULL, NULL, NULL, NULL,
404 		},
405 		{
406 #if 0
407 			CDF_DIR_TYPE_USER_STORAGE,
408 			CDF_DIR_TYPE_USER_STORAGE,
409 			CDF_DIR_TYPE_USER_STREAM,
410 #endif
411 			CDF_DIR_TYPE_USER_STREAM,
412 			0, 0, 0, 0
413 		},
414 	},
415 };
416 
417 private int
418 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
419 {
420 	size_t sd, j;
421 
422 	for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
423 		const struct sinfo *si = &sectioninfo[sd];
424 		for (j = 0; si->sections[j]; j++) {
425 			if (cdf_find_stream(dir, si->sections[j], si->types[j])
426 			    <= 0) {
427 #ifdef CDF_DEBUG
428 				fprintf(stderr, "Can't read %s\n",
429 				    si->sections[j]);
430 #endif
431 				break;
432 			}
433 		}
434 		if (si->sections[j] != NULL)
435 			continue;
436 		if (NOTMIME(ms)) {
437 			if (file_printf(ms, "CDFV2 %s", si->name) == -1)
438 				return -1;
439 		} else {
440 			if (file_printf(ms, "application/CDFV2-%s",
441 			    si->mime) == -1)
442 				return -1;
443 		}
444 		return 1;
445 	}
446 	return -1;
447 }
448 
449 protected int
450 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
451     size_t nbytes)
452 {
453         cdf_info_t info;
454         cdf_header_t h;
455         cdf_sat_t sat, ssat;
456         cdf_stream_t sst, scn;
457         cdf_dir_t dir;
458         int i;
459         const char *expn = "";
460         const cdf_directory_t *root_storage;
461 
462         info.i_fd = fd;
463         info.i_buf = buf;
464         info.i_len = nbytes;
465         if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
466                 return 0;
467         if (cdf_read_header(&info, &h) == -1)
468                 return 0;
469 #ifdef CDF_DEBUG
470         cdf_dump_header(&h);
471 #endif
472 
473         if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
474                 expn = "Can't read SAT";
475                 goto out0;
476         }
477 #ifdef CDF_DEBUG
478         cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
479 #endif
480 
481         if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
482                 expn = "Can't read SSAT";
483                 goto out1;
484         }
485 #ifdef CDF_DEBUG
486         cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
487 #endif
488 
489         if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
490                 expn = "Can't read directory";
491                 goto out2;
492         }
493 
494         if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
495 	    &root_storage)) == -1) {
496                 expn = "Cannot read short stream";
497                 goto out3;
498         }
499 #ifdef CDF_DEBUG
500         cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
501 #endif
502 #ifdef notdef
503 	if (root_storage) {
504 		if (NOTMIME(ms)) {
505 			char clsbuf[128];
506 			if (file_printf(ms, "CLSID %s, ",
507 			    format_clsid(clsbuf, sizeof(clsbuf),
508 			    root_storage->d_storage_uuid)) == -1)
509 				return -1;
510 		}
511 	}
512 #endif
513 
514 	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
515 	    "FileHeader", &scn)) != -1) {
516 #define HWP5_SIGNATURE "HWP Document File"
517 		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
518 		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
519 		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
520 		    if (NOTMIME(ms)) {
521 			if (file_printf(ms,
522 			    "Hangul (Korean) Word Processor File 5.x") == -1)
523 			    return -1;
524 		    } else {
525 			if (file_printf(ms, "application/x-hwp") == -1)
526 			    return -1;
527 		    }
528 		    i = 1;
529 		    goto out5;
530 		} else {
531 		    free(scn.sst_tab);
532 		    scn.sst_tab = NULL;
533 		    scn.sst_len = 0;
534 		    scn.sst_dirlen = 0;
535 		}
536 	}
537 
538         if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
539             &scn)) == -1) {
540                 if (errno != ESRCH) {
541                         expn = "Cannot read summary info";
542 			goto out4;
543 		}
544 		i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst,
545 		    &dir, &scn);
546 		if (i > 0)
547 			goto out4;
548 		i = cdf_file_dir_info(ms, &dir);
549 		if (i < 0)
550                         expn = "Cannot read section info";
551 		goto out4;
552 	}
553 
554 
555 #ifdef CDF_DEBUG
556         cdf_dump_summary_info(&h, &scn);
557 #endif
558         if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
559             expn = "Can't expand summary_info";
560 
561 	if (i == 0) {
562 		const char *str = NULL;
563 		cdf_directory_t *d;
564 		char name[__arraycount(d->d_name)];
565 		size_t j, k;
566 
567 		for (j = 0; str == NULL && j < dir.dir_len; j++) {
568 			d = &dir.dir_tab[j];
569 			for (k = 0; k < sizeof(name); k++)
570 				name[k] = (char)cdf_tole2(d->d_name[k]);
571 			str = cdf_app_to_mime(name,
572 			    NOTMIME(ms) ? name2desc : name2mime);
573 		}
574 		if (NOTMIME(ms)) {
575 			if (str != NULL) {
576 				if (file_printf(ms, "%s", str) == -1)
577 					return -1;
578 				i = 1;
579 			}
580 		} else {
581 			if (str == NULL)
582 				str = "vnd.ms-office";
583 			if (file_printf(ms, "application/%s", str) == -1)
584 				return -1;
585 			i = 1;
586 		}
587 	}
588 out5:
589         free(scn.sst_tab);
590 out4:
591         free(sst.sst_tab);
592 out3:
593         free(dir.dir_tab);
594 out2:
595         free(ssat.sat_tab);
596 out1:
597         free(sat.sat_tab);
598 out0:
599 	if (i == -1) {
600 	    if (NOTMIME(ms)) {
601 		if (file_printf(ms,
602 		    "Composite Document File V2 Document") == -1)
603 		    return -1;
604 		if (*expn)
605 		    if (file_printf(ms, ", %s", expn) == -1)
606 			return -1;
607 	    } else {
608 		if (file_printf(ms, "application/CDFV2-unknown") == -1)
609 		    return -1;
610 	    }
611 	    i = 1;
612 	}
613         return i;
614 }
615