xref: /freebsd/contrib/file/src/readcdf.c (revision ec0e626bafb335b30c499d06066997f54b10c092)
1 /*-
2  * Copyright (c) 2008 Christos Zoulas
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 #include "file.h"
27 
28 #ifndef lint
29 FILE_RCSID("@(#)$File: readcdf.c,v 1.49 2014/12/04 15:56:46 christos Exp $")
30 #endif
31 
32 #include <assert.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <string.h>
36 #include <time.h>
37 #include <ctype.h>
38 
39 #include "cdf.h"
40 #include "magic.h"
41 
42 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
43 
44 static const struct nv {
45 	const char *pattern;
46 	const char *mime;
47 } app2mime[] =  {
48 	{ "Word",			"msword",		},
49 	{ "Excel",			"vnd.ms-excel",		},
50 	{ "Powerpoint",			"vnd.ms-powerpoint",	},
51 	{ "Crystal Reports",		"x-rpt",		},
52 	{ "Advanced Installer",		"vnd.ms-msi",		},
53 	{ "InstallShield",		"vnd.ms-msi",		},
54 	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
55 	{ "NAnt",			"vnd.ms-msi",		},
56 	{ "Windows Installer",		"vnd.ms-msi",		},
57 	{ NULL,				NULL,			},
58 }, name2mime[] = {
59 	{ "WordDocument",		"msword",		},
60 	{ "PowerPoint",			"vnd.ms-powerpoint",	},
61 	{ "DigitalSignature",		"vnd.ms-msi",		},
62 	{ NULL,				NULL,			},
63 }, name2desc[] = {
64 	{ "WordDocument",		"Microsoft Office Word",},
65 	{ "PowerPoint",			"Microsoft PowerPoint",	},
66 	{ "DigitalSignature",		"Microsoft Installer",	},
67 	{ NULL,				NULL,			},
68 };
69 
70 static const struct cv {
71 	uint64_t clsid[2];
72 	const char *mime;
73 } clsid2mime[] = {
74 	{
75 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
76 		"x-msi",
77 	},
78 	{	{ 0,			 0			},
79 		NULL,
80 	},
81 }, clsid2desc[] = {
82 	{
83 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
84 		"MSI Installer",
85 	},
86 	{	{ 0,			 0			},
87 		NULL,
88 	},
89 };
90 
91 private const char *
92 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
93 {
94 	size_t i;
95 	for (i = 0; cv[i].mime != NULL; i++) {
96 		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
97 			return cv[i].mime;
98 	}
99 	return NULL;
100 }
101 
102 private const char *
103 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
104 {
105 	size_t i;
106 	const char *rv = NULL;
107 #ifdef USE_C_LOCALE
108 	locale_t old_lc_ctype, c_lc_ctype;
109 
110 	c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
111 	assert(c_lc_ctype != NULL);
112 	old_lc_ctype = uselocale(c_lc_ctype);
113 	assert(old_lc_ctype != NULL);
114 #endif
115 	for (i = 0; nv[i].pattern != NULL; i++)
116 		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
117 			rv = nv[i].mime;
118 			break;
119 		}
120 #ifdef USE_C_LOCALE
121 	(void)uselocale(old_lc_ctype);
122 	freelocale(c_lc_ctype);
123 #endif
124 	return rv;
125 }
126 
127 private int
128 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
129     size_t count, const cdf_directory_t *root_storage)
130 {
131         size_t i;
132         cdf_timestamp_t tp;
133         struct timespec ts;
134         char buf[64];
135         const char *str = NULL;
136         const char *s;
137         int len;
138 
139         if (!NOTMIME(ms) && root_storage)
140 		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
141 		    clsid2mime);
142 
143         for (i = 0; i < count; i++) {
144                 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
145                 switch (info[i].pi_type) {
146                 case CDF_NULL:
147                         break;
148                 case CDF_SIGNED16:
149                         if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
150                             info[i].pi_s16) == -1)
151                                 return -1;
152                         break;
153                 case CDF_SIGNED32:
154                         if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
155                             info[i].pi_s32) == -1)
156                                 return -1;
157                         break;
158                 case CDF_UNSIGNED32:
159                         if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
160                             info[i].pi_u32) == -1)
161                                 return -1;
162                         break;
163                 case CDF_FLOAT:
164                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
165                             info[i].pi_f) == -1)
166                                 return -1;
167                         break;
168                 case CDF_DOUBLE:
169                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
170                             info[i].pi_d) == -1)
171                                 return -1;
172                         break;
173                 case CDF_LENGTH32_STRING:
174                 case CDF_LENGTH32_WSTRING:
175                         len = info[i].pi_str.s_len;
176                         if (len > 1) {
177                                 char vbuf[1024];
178                                 size_t j, k = 1;
179 
180                                 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
181                                     k++;
182                                 s = info[i].pi_str.s_buf;
183                                 for (j = 0; j < sizeof(vbuf) && len--; s += k) {
184                                         if (*s == '\0')
185                                                 break;
186                                         if (isprint((unsigned char)*s))
187                                                 vbuf[j++] = *s;
188                                 }
189                                 if (j == sizeof(vbuf))
190                                         --j;
191                                 vbuf[j] = '\0';
192                                 if (NOTMIME(ms)) {
193                                         if (vbuf[0]) {
194                                                 if (file_printf(ms, ", %s: %s",
195                                                     buf, vbuf) == -1)
196                                                         return -1;
197                                         }
198                                 } else if (str == NULL && info[i].pi_id ==
199 				    CDF_PROPERTY_NAME_OF_APPLICATION) {
200 					str = cdf_app_to_mime(vbuf, app2mime);
201 				}
202 			}
203                         break;
204                 case CDF_FILETIME:
205                         tp = info[i].pi_tp;
206                         if (tp != 0) {
207 				char tbuf[64];
208                                 if (tp < 1000000000000000LL) {
209                                         cdf_print_elapsed_time(tbuf,
210                                             sizeof(tbuf), tp);
211                                         if (NOTMIME(ms) && file_printf(ms,
212                                             ", %s: %s", buf, tbuf) == -1)
213                                                 return -1;
214                                 } else {
215                                         char *c, *ec;
216                                         cdf_timestamp_to_timespec(&ts, tp);
217                                         c = cdf_ctime(&ts.tv_sec, tbuf);
218                                         if (c != NULL &&
219 					    (ec = strchr(c, '\n')) != NULL)
220 						*ec = '\0';
221 
222                                         if (NOTMIME(ms) && file_printf(ms,
223                                             ", %s: %s", buf, c) == -1)
224                                                 return -1;
225                                 }
226                         }
227                         break;
228                 case CDF_CLIPBOARD:
229                         break;
230                 default:
231                         return -1;
232                 }
233         }
234         if (!NOTMIME(ms)) {
235 		if (str == NULL)
236 			return 0;
237                 if (file_printf(ms, "application/%s", str) == -1)
238                         return -1;
239         }
240         return 1;
241 }
242 
243 private int
244 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
245     const cdf_stream_t *sst)
246 {
247 	cdf_catalog_t *cat;
248 	size_t i;
249 	char buf[256];
250 	cdf_catalog_entry_t *ce;
251 
252         if (NOTMIME(ms)) {
253 		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
254 			return -1;
255 		if (cdf_unpack_catalog(h, sst, &cat) == -1)
256 			return -1;
257 		ce = cat->cat_e;
258 		/* skip first entry since it has a , or paren */
259 		for (i = 1; i < cat->cat_num; i++)
260 			if (file_printf(ms, "%s%s",
261 			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
262 			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
263 				free(cat);
264 				return -1;
265 			}
266 		free(cat);
267 	} else {
268 		if (file_printf(ms, "application/CDFV2") == -1)
269 			return -1;
270 	}
271 	return 1;
272 }
273 
274 private int
275 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
276     const cdf_stream_t *sst, const cdf_directory_t *root_storage)
277 {
278         cdf_summary_info_header_t si;
279         cdf_property_info_t *info;
280         size_t count;
281         int m;
282 
283         if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
284                 return -1;
285 
286         if (NOTMIME(ms)) {
287 		const char *str;
288 
289                 if (file_printf(ms, "Composite Document File V2 Document")
290 		    == -1)
291                         return -1;
292 
293                 if (file_printf(ms, ", %s Endian",
294                     si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
295                         return -2;
296                 switch (si.si_os) {
297                 case 2:
298                         if (file_printf(ms, ", Os: Windows, Version %d.%d",
299                             si.si_os_version & 0xff,
300                             (uint32_t)si.si_os_version >> 8) == -1)
301                                 return -2;
302                         break;
303                 case 1:
304                         if (file_printf(ms, ", Os: MacOS, Version %d.%d",
305                             (uint32_t)si.si_os_version >> 8,
306                             si.si_os_version & 0xff) == -1)
307                                 return -2;
308                         break;
309                 default:
310                         if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
311                             si.si_os_version & 0xff,
312                             (uint32_t)si.si_os_version >> 8) == -1)
313                                 return -2;
314                         break;
315                 }
316 		if (root_storage) {
317 			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
318 			    clsid2desc);
319 			if (str) {
320 				if (file_printf(ms, ", %s", str) == -1)
321 					return -2;
322 			}
323 		}
324 	}
325 
326         m = cdf_file_property_info(ms, info, count, root_storage);
327         free(info);
328 
329         return m == -1 ? -2 : m;
330 }
331 
332 #ifdef notdef
333 private char *
334 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
335 	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
336 	    PRIx64 "-%.12" PRIx64,
337 	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
338 	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
339 	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
340 	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
341 	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
342 	return buf;
343 }
344 #endif
345 
346 protected int
347 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
348     size_t nbytes)
349 {
350         cdf_info_t info;
351         cdf_header_t h;
352         cdf_sat_t sat, ssat;
353         cdf_stream_t sst, scn;
354         cdf_dir_t dir;
355         int i;
356         const char *expn = "";
357         const char *corrupt = "corrupt: ";
358         const cdf_directory_t *root_storage;
359 
360         info.i_fd = fd;
361         info.i_buf = buf;
362         info.i_len = nbytes;
363         if (ms->flags & MAGIC_APPLE)
364                 return 0;
365         if (cdf_read_header(&info, &h) == -1)
366                 return 0;
367 #ifdef CDF_DEBUG
368         cdf_dump_header(&h);
369 #endif
370 
371         if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
372                 expn = "Can't read SAT";
373                 goto out0;
374         }
375 #ifdef CDF_DEBUG
376         cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
377 #endif
378 
379         if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
380                 expn = "Can't read SSAT";
381                 goto out1;
382         }
383 #ifdef CDF_DEBUG
384         cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
385 #endif
386 
387         if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
388                 expn = "Can't read directory";
389                 goto out2;
390         }
391 
392         if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
393 	    &root_storage)) == -1) {
394                 expn = "Cannot read short stream";
395                 goto out3;
396         }
397 #ifdef CDF_DEBUG
398         cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
399 #endif
400 #ifdef notdef
401 	if (root_storage) {
402 		if (NOTMIME(ms)) {
403 			char clsbuf[128];
404 			if (file_printf(ms, "CLSID %s, ",
405 			    format_clsid(clsbuf, sizeof(clsbuf),
406 			    root_storage->d_storage_uuid)) == -1)
407 				return -1;
408 		}
409 	}
410 #endif
411 
412 	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
413 	    "FileHeader", &scn)) != -1) {
414 #define HWP5_SIGNATURE "HWP Document File"
415 		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
416 		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
417 		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
418 		    if (NOTMIME(ms)) {
419 			if (file_printf(ms,
420 			    "Hangul (Korean) Word Processor File 5.x") == -1)
421 			    return -1;
422 		    } else {
423 			if (file_printf(ms, "application/x-hwp") == -1)
424 			    return -1;
425 		    }
426 		    i = 1;
427 		    goto out5;
428 		} else {
429 		    free(scn.sst_tab);
430 		    scn.sst_tab = NULL;
431 		    scn.sst_len = 0;
432 		    scn.sst_dirlen = 0;
433 		}
434 	}
435 
436         if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
437             &scn)) == -1) {
438                 if (errno == ESRCH) {
439 			if ((i = cdf_read_catalog(&info, &h, &sat, &ssat, &sst,
440 			    &dir, &scn)) == -1) {
441 				corrupt = expn;
442 				if ((i = cdf_read_encrypted_package(&info, &h,
443 				    &sat, &ssat, &sst, &dir, &scn)) == -1)
444 					expn = "No summary info";
445 				else {
446 					expn = "Encrypted";
447 					i = -1;
448 				}
449 				goto out4;
450 			}
451 #ifdef CDF_DEBUG
452 			cdf_dump_catalog(&h, &scn);
453 #endif
454 			if ((i = cdf_file_catalog(ms, &h, &scn))
455 			    < 0)
456 				expn = "Can't expand catalog";
457                 } else {
458                         expn = "Cannot read summary info";
459                 }
460                 goto out4;
461         }
462 #ifdef CDF_DEBUG
463         cdf_dump_summary_info(&h, &scn);
464 #endif
465         if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
466             expn = "Can't expand summary_info";
467 
468 	if (i == 0) {
469 		const char *str = NULL;
470 		cdf_directory_t *d;
471 		char name[__arraycount(d->d_name)];
472 		size_t j, k;
473 
474 		for (j = 0; str == NULL && j < dir.dir_len; j++) {
475 			d = &dir.dir_tab[j];
476 			for (k = 0; k < sizeof(name); k++)
477 				name[k] = (char)cdf_tole2(d->d_name[k]);
478 			str = cdf_app_to_mime(name,
479 			    NOTMIME(ms) ? name2desc : name2mime);
480 		}
481 		if (NOTMIME(ms)) {
482 			if (str != NULL) {
483 				if (file_printf(ms, "%s", str) == -1)
484 					return -1;
485 				i = 1;
486 			}
487 		} else {
488 			if (str == NULL)
489 				str = "vnd.ms-office";
490 			if (file_printf(ms, "application/%s", str) == -1)
491 				return -1;
492 			i = 1;
493 		}
494 	}
495 out5:
496         free(scn.sst_tab);
497 out4:
498         free(sst.sst_tab);
499 out3:
500         free(dir.dir_tab);
501 out2:
502         free(ssat.sat_tab);
503 out1:
504         free(sat.sat_tab);
505 out0:
506 	if (i == -1) {
507 	    if (NOTMIME(ms)) {
508 		if (file_printf(ms,
509 		    "Composite Document File V2 Document") == -1)
510 		    return -1;
511 		if (*expn)
512 		    if (file_printf(ms, ", %s%s", corrupt, expn) == -1)
513 			return -1;
514 	    } else {
515 		if (file_printf(ms, "application/CDFV2-%s",
516 		    *corrupt ? "corrupt" : "encrypted") == -1)
517 		    return -1;
518 	    }
519 	    i = 1;
520 	}
521         return i;
522 }
523