xref: /freebsd/contrib/file/src/apprentice.c (revision ae316d1d1cffd71ab7751f94e10118777a88e027)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * apprentice - make one pass through /etc/magic, learning its secrets.
30  */
31 
32 #include "file.h"
33 
34 #ifndef	lint
35 FILE_RCSID("@(#)$File: apprentice.c,v 1.356 2024/11/27 15:37:00 christos Exp $")
36 #endif	/* lint */
37 
38 #include "magic.h"
39 #include <stdlib.h>
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #include <stddef.h>
44 #include <string.h>
45 #include <assert.h>
46 #include <ctype.h>
47 #include <fcntl.h>
48 #ifdef QUICK
49 #include <sys/mman.h>
50 #endif
51 #include <dirent.h>
52 #include <limits.h>
53 #ifdef HAVE_BYTESWAP_H
54 #include <byteswap.h>
55 #endif
56 #ifdef HAVE_SYS_BSWAP_H
57 #include <sys/bswap.h>
58 #endif
59 
60 
61 #define	EATAB {while (isascii(CAST(unsigned char, *l)) && \
62 		      isspace(CAST(unsigned char, *l)))  ++l;}
63 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \
64 			tolower(CAST(unsigned char, l)) : (l))
65 /*
66  * Work around a bug in headers on Digital Unix.
67  * At least confirmed for: OSF1 V4.0 878
68  */
69 #if defined(__osf__) && defined(__DECC)
70 #ifdef MAP_FAILED
71 #undef MAP_FAILED
72 #endif
73 #endif
74 
75 #ifndef MAP_FAILED
76 #define MAP_FAILED (void *) -1
77 #endif
78 
79 #ifndef MAP_FILE
80 #define MAP_FILE 0
81 #endif
82 
83 #define ALLOC_CHUNK	CAST(size_t, 10)
84 #define ALLOC_INCR	CAST(size_t, 200)
85 
86 #define MAP_TYPE_USER	0
87 #define MAP_TYPE_MALLOC	1
88 #define MAP_TYPE_MMAP	2
89 
90 struct magic_entry {
91 	struct magic *mp;
92 	uint32_t cont_count;
93 	uint32_t max_count;
94 };
95 
96 struct magic_entry_set {
97 	struct magic_entry *me;
98 	uint32_t count;
99 	uint32_t max;
100 };
101 
102 struct magic_map {
103 	void *p;
104 	size_t len;
105 	int type;
106 	struct magic *magic[MAGIC_SETS];
107 	uint32_t nmagic[MAGIC_SETS];
108 };
109 
110 file_private int file_formats[FILE_NAMES_SIZE];
111 file_private const size_t file_nformats = FILE_NAMES_SIZE;
112 file_protected const char *file_names[FILE_NAMES_SIZE];
113 file_protected const size_t file_nnames = FILE_NAMES_SIZE;
114 
115 file_private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
116 file_private int hextoint(int);
117 file_private const char *getstr(struct magic_set *, struct magic *, const char *,
118     int);
119 file_private int parse(struct magic_set *, struct magic_entry *, const char *,
120     const char *, size_t, int);
121 file_private void eatsize(const char **);
122 file_private int apprentice_1(struct magic_set *, const char *, int);
123 file_private ssize_t apprentice_magic_strength_1(const struct magic *);
124 file_private int apprentice_sort(const void *, const void *);
125 file_private void apprentice_list(struct mlist *, int );
126 file_private struct magic_map *apprentice_load(struct magic_set *,
127     const char *, int);
128 file_private struct mlist *mlist_alloc(void);
129 file_private void mlist_free_all(struct magic_set *);
130 file_private void mlist_free(struct mlist *);
131 file_private void byteswap(struct magic *, uint32_t);
132 file_private void bs1(struct magic *);
133 
134 #if defined(HAVE_BYTESWAP_H)
135 #define swap2(x)	bswap_16(x)
136 #define swap4(x)	bswap_32(x)
137 #define swap8(x)	bswap_64(x)
138 #elif defined(HAVE_SYS_BSWAP_H)
139 #define swap2(x)	bswap16(x)
140 #define swap4(x)	bswap32(x)
141 #define swap8(x)	bswap64(x)
142 #else
143 file_private uint16_t swap2(uint16_t);
144 file_private uint32_t swap4(uint32_t);
145 file_private uint64_t swap8(uint64_t);
146 #endif
147 
148 file_private char *mkdbname(struct magic_set *, const char *, int);
149 file_private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
150     size_t);
151 file_private struct magic_map *apprentice_map(struct magic_set *, const char *);
152 file_private int check_buffer(struct magic_set *, struct magic_map *, const char *);
153 file_private void apprentice_unmap(struct magic_map *);
154 file_private int apprentice_compile(struct magic_set *, struct magic_map *,
155     const char *);
156 file_private int check_format_type(const char *, int, const char **);
157 file_private int check_format(struct magic_set *, struct magic *);
158 file_private int get_op(char);
159 file_private int parse_mime(struct magic_set *, struct magic_entry *, const char *,
160     size_t);
161 file_private int parse_strength(struct magic_set *, struct magic_entry *,
162     const char *, size_t);
163 file_private int parse_apple(struct magic_set *, struct magic_entry *, const char *,
164     size_t);
165 file_private int parse_ext(struct magic_set *, struct magic_entry *, const char *,
166     size_t);
167 
168 
169 file_private size_t magicsize = sizeof(struct magic);
170 
171 file_private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
172 
173 file_private struct {
174 	const char *name;
175 	size_t len;
176 	int (*fun)(struct magic_set *, struct magic_entry *, const char *,
177 	    size_t);
178 } bang[] = {
179 #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
180 	DECLARE_FIELD(mime),
181 	DECLARE_FIELD(apple),
182 	DECLARE_FIELD(ext),
183 	DECLARE_FIELD(strength),
184 #undef	DECLARE_FIELD
185 	{ NULL, 0, NULL }
186 };
187 
188 #ifdef COMPILE_ONLY
189 
190 int main(int, char *[]);
191 
192 int
main(int argc,char * argv[])193 main(int argc, char *argv[])
194 {
195 	int ret;
196 	struct magic_set *ms;
197 	char *progname;
198 
199 	if ((progname = strrchr(argv[0], '/')) != NULL)
200 		progname++;
201 	else
202 		progname = argv[0];
203 
204 	if (argc != 2) {
205 		(void)fprintf(stderr, "Usage: %s file\n", progname);
206 		return 1;
207 	}
208 
209 	if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
210 		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
211 		return 1;
212 	}
213 	ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
214 	if (ret == 1)
215 		(void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
216 	magic_close(ms);
217 	return ret;
218 }
219 #endif /* COMPILE_ONLY */
220 
221 struct type_tbl_s {
222 	const char name[16];
223 	const size_t len;
224 	const int type;
225 	const int format;
226 };
227 
228 /*
229  * XXX - the actual Single UNIX Specification says that "long" means "long",
230  * as in the C data type, but we treat it as meaning "4-byte integer".
231  * Given that the OS X version of file 5.04 did the same, I guess that passes
232  * the actual test; having "long" be dependent on how big a "long" is on
233  * the machine running "file" is silly.
234  */
235 static const struct type_tbl_s type_tbl[] = {
236 # define XX(s)		s, (sizeof(s) - 1)
237 # define XX_NULL	"", 0
238 	{ XX("invalid"),	FILE_INVALID,		FILE_FMT_NONE },
239 	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
240 	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
241 	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_NONE },
242 	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
243 	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
244 	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
245 	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
246 	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
247 	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
248 	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
249 	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
250 	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
251 	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
252 	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
253 	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
254 	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
255 	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
256 	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
257 	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
258 	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
259 	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
260 	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
261 	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
262 	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
263 	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
264 	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
265 	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
266 	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
267 	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
268 	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
269 	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
270 	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
271 	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
272 	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
273 	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
274 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
275 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
276 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
277 	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
278 	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
279 	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NUM },
280 	{ XX("qwdate"),		FILE_QWDATE,		FILE_FMT_STR },
281 	{ XX("leqwdate"),	FILE_LEQWDATE,		FILE_FMT_STR },
282 	{ XX("beqwdate"),	FILE_BEQWDATE,		FILE_FMT_STR },
283 	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
284 	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
285 	{ XX("clear"),		FILE_CLEAR,		FILE_FMT_NONE },
286 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
287 	{ XX("guid"),		FILE_GUID,		FILE_FMT_STR },
288 	{ XX("offset"),		FILE_OFFSET,		FILE_FMT_QUAD },
289 	{ XX("bevarint"),	FILE_BEVARINT,		FILE_FMT_STR },
290 	{ XX("levarint"),	FILE_LEVARINT,		FILE_FMT_STR },
291 	{ XX("msdosdate"),	FILE_MSDOSDATE,		FILE_FMT_STR },
292 	{ XX("lemsdosdate"),	FILE_LEMSDOSDATE,	FILE_FMT_STR },
293 	{ XX("bemsdosdate"),	FILE_BEMSDOSDATE,	FILE_FMT_STR },
294 	{ XX("msdostime"),	FILE_MSDOSTIME,		FILE_FMT_STR },
295 	{ XX("lemsdostime"),	FILE_LEMSDOSTIME,	FILE_FMT_STR },
296 	{ XX("bemsdostime"),	FILE_BEMSDOSTIME,	FILE_FMT_STR },
297 	{ XX("octal"),		FILE_OCTAL,		FILE_FMT_STR },
298 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
299 };
300 
301 /*
302  * These are not types, and cannot be preceded by "u" to make them
303  * unsigned.
304  */
305 static const struct type_tbl_s special_tbl[] = {
306 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
307 	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
308 	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
309 	{ XX("octal"),		FILE_OCTAL,		FILE_FMT_STR },
310 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
311 };
312 # undef XX
313 # undef XX_NULL
314 
315 file_private int
get_type(const struct type_tbl_s * tbl,const char * l,const char ** t)316 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
317 {
318 	const struct type_tbl_s *p;
319 
320 	for (p = tbl; p->len; p++) {
321 		if (strncmp(l, p->name, p->len) == 0) {
322 			if (t)
323 				*t = l + p->len;
324 			break;
325 		}
326 	}
327 	return p->type;
328 }
329 
330 file_private off_t
maxoff_t(void)331 maxoff_t(void) {
332 	if (/*CONSTCOND*/sizeof(off_t) == sizeof(int))
333 		return CAST(off_t, INT_MAX);
334 	if (/*CONSTCOND*/sizeof(off_t) == sizeof(long))
335 		return CAST(off_t, LONG_MAX);
336 	return 0x7fffffff;
337 }
338 
339 file_private int
get_standard_integer_type(const char * l,const char ** t)340 get_standard_integer_type(const char *l, const char **t)
341 {
342 	int type;
343 
344 	if (isalpha(CAST(unsigned char, l[1]))) {
345 		switch (l[1]) {
346 		case 'C':
347 			/* "dC" and "uC" */
348 			type = FILE_BYTE;
349 			break;
350 		case 'S':
351 			/* "dS" and "uS" */
352 			type = FILE_SHORT;
353 			break;
354 		case 'I':
355 		case 'L':
356 			/*
357 			 * "dI", "dL", "uI", and "uL".
358 			 *
359 			 * XXX - the actual Single UNIX Specification says
360 			 * that "L" means "long", as in the C data type,
361 			 * but we treat it as meaning "4-byte integer".
362 			 * Given that the OS X version of file 5.04 did
363 			 * the same, I guess that passes the actual SUS
364 			 * validation suite; having "dL" be dependent on
365 			 * how big a "long" is on the machine running
366 			 * "file" is silly.
367 			 */
368 			type = FILE_LONG;
369 			break;
370 		case 'Q':
371 			/* "dQ" and "uQ" */
372 			type = FILE_QUAD;
373 			break;
374 		default:
375 			/* "d{anything else}", "u{anything else}" */
376 			return FILE_INVALID;
377 		}
378 		l += 2;
379 	} else if (isdigit(CAST(unsigned char, l[1]))) {
380 		/*
381 		 * "d{num}" and "u{num}"; we only support {num} values
382 		 * of 1, 2, 4, and 8 - the Single UNIX Specification
383 		 * doesn't say anything about whether arbitrary
384 		 * values should be supported, but both the Solaris 10
385 		 * and OS X Mountain Lion versions of file passed the
386 		 * Single UNIX Specification validation suite, and
387 		 * neither of them support values bigger than 8 or
388 		 * non-power-of-2 values.
389 		 */
390 		if (isdigit(CAST(unsigned char, l[2]))) {
391 			/* Multi-digit, so > 9 */
392 			return FILE_INVALID;
393 		}
394 		switch (l[1]) {
395 		case '1':
396 			type = FILE_BYTE;
397 			break;
398 		case '2':
399 			type = FILE_SHORT;
400 			break;
401 		case '4':
402 			type = FILE_LONG;
403 			break;
404 		case '8':
405 			type = FILE_QUAD;
406 			break;
407 		default:
408 			/* XXX - what about 3, 5, 6, or 7? */
409 			return FILE_INVALID;
410 		}
411 		l += 2;
412 	} else {
413 		/*
414 		 * "d" or "u" by itself.
415 		 */
416 		type = FILE_LONG;
417 		++l;
418 	}
419 	if (t)
420 		*t = l;
421 	return type;
422 }
423 
424 file_private void
init_file_tables(void)425 init_file_tables(void)
426 {
427 	static int done = 0;
428 	const struct type_tbl_s *p;
429 
430 	if (done)
431 		return;
432 	done++;
433 
434 	for (p = type_tbl; p->len; p++) {
435 		assert(p->type < FILE_NAMES_SIZE);
436 		file_names[p->type] = p->name;
437 		file_formats[p->type] = p->format;
438 	}
439 	assert(p - type_tbl == FILE_NAMES_SIZE);
440 }
441 
442 file_private int
add_mlist(struct mlist * mlp,struct magic_map * map,size_t idx)443 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
444 {
445 	struct mlist *ml;
446 
447 	mlp->map = NULL;
448 	if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL)
449 		return -1;
450 
451 	ml->map = idx == 0 ? map : NULL;
452 	ml->magic = map->magic[idx];
453 	ml->nmagic = map->nmagic[idx];
454 	if (ml->nmagic) {
455 		ml->magic_rxcomp = CAST(file_regex_t **,
456 		    calloc(ml->nmagic, sizeof(*ml->magic_rxcomp)));
457 		if (ml->magic_rxcomp == NULL) {
458 			free(ml);
459 			return -1;
460 		}
461 	} else
462 		ml->magic_rxcomp = NULL;
463 	mlp->prev->next = ml;
464 	ml->prev = mlp->prev;
465 	ml->next = mlp;
466 	mlp->prev = ml;
467 	return 0;
468 }
469 
470 /*
471  * Handle one file or directory.
472  */
473 file_private int
apprentice_1(struct magic_set * ms,const char * fn,int action)474 apprentice_1(struct magic_set *ms, const char *fn, int action)
475 {
476 	struct magic_map *map;
477 #ifndef COMPILE_ONLY
478 	size_t i;
479 #endif
480 
481 	if (magicsize != FILE_MAGICSIZE) {
482 		file_error(ms, 0, "magic element size %lu != %lu",
483 		    CAST(unsigned long, sizeof(*map->magic[0])),
484 		    CAST(unsigned long, FILE_MAGICSIZE));
485 		return -1;
486 	}
487 
488 	if (action == FILE_COMPILE) {
489 		map = apprentice_load(ms, fn, action);
490 		if (map == NULL)
491 			return -1;
492 		return apprentice_compile(ms, map, fn);
493 	}
494 
495 #ifndef COMPILE_ONLY
496 	map = apprentice_map(ms, fn);
497 	if (map == NULL) {
498 		if (ms->flags & MAGIC_CHECK)
499 			file_magwarn(ms, "using regular magic file `%s'", fn);
500 		map = apprentice_load(ms, fn, action);
501 		if (map == NULL)
502 			return -1;
503 	}
504 
505 	for (i = 0; i < MAGIC_SETS; i++) {
506 		if (add_mlist(ms->mlist[i], map, i) == -1) {
507 			/* failed to add to any list, free explicitly */
508 			if (i == 0)
509 				apprentice_unmap(map);
510 			else
511 				mlist_free_all(ms);
512 			file_oomem(ms, sizeof(*ms->mlist[0]));
513 			return -1;
514 		}
515 	}
516 
517 	if (action == FILE_LIST) {
518 		for (i = 0; i < MAGIC_SETS; i++) {
519 			printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
520 			    i);
521 			apprentice_list(ms->mlist[i], BINTEST);
522 			printf("Text patterns:\n");
523 			apprentice_list(ms->mlist[i], TEXTTEST);
524 		}
525 	}
526 	return 0;
527 #else
528 	return 0;
529 #endif /* COMPILE_ONLY */
530 }
531 
532 file_protected void
file_ms_free(struct magic_set * ms)533 file_ms_free(struct magic_set *ms)
534 {
535 	size_t i;
536 	if (ms == NULL)
537 		return;
538 	for (i = 0; i < MAGIC_SETS; i++)
539 		mlist_free(ms->mlist[i]);
540 	free(ms->o.pbuf);
541 	free(ms->o.buf);
542 	free(ms->c.li);
543 	free(ms->fnamebuf);
544 #ifdef USE_C_LOCALE
545 	freelocale(ms->c_lc_ctype);
546 #endif
547 	free(ms);
548 }
549 
550 file_protected struct magic_set *
file_ms_alloc(int flags)551 file_ms_alloc(int flags)
552 {
553 	struct magic_set *ms;
554 	size_t i, len;
555 
556 	if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u),
557 	    sizeof(*ms)))) == NULL)
558 		return NULL;
559 
560 	if (magic_setflags(ms, flags) == -1) {
561 		errno = EINVAL;
562 		goto free;
563 	}
564 
565 	ms->o.buf = ms->o.pbuf = NULL;
566 	ms->o.blen = 0;
567 	len = (ms->c.len = 10) * sizeof(*ms->c.li);
568 
569 	if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
570 		goto free;
571 
572 	ms->event_flags = 0;
573 	ms->error = -1;
574 	for (i = 0; i < MAGIC_SETS; i++)
575 		ms->mlist[i] = NULL;
576 	ms->fnamebuf = NULL;
577 	ms->file = "unknown";
578 	ms->line = 0;
579 	ms->magwarn = 0;
580 	ms->indir_max = FILE_INDIR_MAX;
581 	ms->name_max = FILE_NAME_MAX;
582 	ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
583 	ms->elf_shsize_max = FILE_ELF_SHSIZE_MAX;
584 	ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
585 	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
586 	ms->regex_max = FILE_REGEX_MAX;
587 	ms->bytes_max = FILE_BYTES_MAX;
588 	ms->encoding_max = FILE_ENCODING_MAX;
589 	ms->magwarn_max = FILE_MAGWARN_MAX;
590 #ifdef USE_C_LOCALE
591 	ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
592 	assert(ms->c_lc_ctype != NULL);
593 #endif
594 	return ms;
595 free:
596 	free(ms);
597 	return NULL;
598 }
599 
600 file_private void
apprentice_unmap(struct magic_map * map)601 apprentice_unmap(struct magic_map *map)
602 {
603 	size_t i;
604 	char *p;
605 	if (map == NULL)
606 		return;
607 
608 	switch (map->type) {
609 	case MAP_TYPE_USER:
610 		break;
611 	case MAP_TYPE_MALLOC:
612 		p = CAST(char *, map->p);
613 		for (i = 0; i < MAGIC_SETS; i++) {
614 			char *b = RCAST(char *, map->magic[i]);
615 			if (p != NULL && b >= p && b <= p + map->len)
616 				continue;
617 			free(b);
618 		}
619 		free(p);
620 		break;
621 #ifdef QUICK
622 	case MAP_TYPE_MMAP:
623 		if (map->p && map->p != MAP_FAILED)
624 			(void)munmap(map->p, map->len);
625 		break;
626 #endif
627 	default:
628 		fprintf(stderr, "Bad map type %d", map->type);
629 		abort();
630 	}
631 	free(map);
632 }
633 
634 file_private struct mlist *
mlist_alloc(void)635 mlist_alloc(void)
636 {
637 	struct mlist *mlist;
638 	if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) {
639 		return NULL;
640 	}
641 	mlist->next = mlist->prev = mlist;
642 	return mlist;
643 }
644 
645 file_private void
mlist_free_all(struct magic_set * ms)646 mlist_free_all(struct magic_set *ms)
647 {
648 	size_t i;
649 
650 	for (i = 0; i < MAGIC_SETS; i++) {
651 		mlist_free(ms->mlist[i]);
652 		ms->mlist[i] = NULL;
653 	}
654 }
655 
656 file_private void
mlist_free_one(struct mlist * ml)657 mlist_free_one(struct mlist *ml)
658 {
659 	size_t i;
660 
661 	if (ml->map)
662 		apprentice_unmap(CAST(struct magic_map *, ml->map));
663 
664 	for (i = 0; i < ml->nmagic; ++i) {
665 		if (ml->magic_rxcomp[i]) {
666 			file_regfree(ml->magic_rxcomp[i]);
667 			free(ml->magic_rxcomp[i]);
668 			ml->magic_rxcomp[i] = NULL;
669 		}
670 	}
671 	free(ml->magic_rxcomp);
672 	ml->magic_rxcomp = NULL;
673 	free(ml);
674 }
675 
676 file_private void
mlist_free(struct mlist * mlist)677 mlist_free(struct mlist *mlist)
678 {
679 	struct mlist *ml, *next;
680 
681 	if (mlist == NULL)
682 		return;
683 
684 	for (ml = mlist->next; ml != mlist;) {
685 		next = ml->next;
686 		mlist_free_one(ml);
687 		ml = next;
688 	}
689 	mlist_free_one(mlist);
690 }
691 
692 #ifndef COMPILE_ONLY
693 /* void **bufs: an array of compiled magic files */
694 file_protected int
buffer_apprentice(struct magic_set * ms,struct magic ** bufs,size_t * sizes,size_t nbufs)695 buffer_apprentice(struct magic_set *ms, struct magic **bufs,
696     size_t *sizes, size_t nbufs)
697 {
698 	size_t i, j;
699 	struct magic_map *map;
700 
701 	if (nbufs == 0)
702 		return -1;
703 
704 	(void)file_reset(ms, 0);
705 
706 	init_file_tables();
707 
708 	for (i = 0; i < MAGIC_SETS; i++) {
709 		mlist_free(ms->mlist[i]);
710 		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
711 			file_oomem(ms, sizeof(*ms->mlist[0]));
712 			goto fail;
713 		}
714 	}
715 
716 	for (i = 0; i < nbufs; i++) {
717 		map = apprentice_buf(ms, bufs[i], sizes[i]);
718 		if (map == NULL)
719 			goto fail;
720 
721 		for (j = 0; j < MAGIC_SETS; j++) {
722 			if (add_mlist(ms->mlist[j], map, j) == -1) {
723 				file_oomem(ms, sizeof(*ms->mlist[0]));
724 				goto fail;
725 			}
726 		}
727 	}
728 
729 	return 0;
730 fail:
731 	mlist_free_all(ms);
732 	return -1;
733 }
734 #endif
735 
736 /* const char *fn: list of magic files and directories */
737 file_protected int
file_apprentice(struct magic_set * ms,const char * fn,int action)738 file_apprentice(struct magic_set *ms, const char *fn, int action)
739 {
740 	char *p;
741 	int fileerr, errs = -1;
742 	size_t i, j;
743 
744 	(void)file_reset(ms, 0);
745 
746 	if ((fn = magic_getpath(fn, action)) == NULL)
747 		return -1;
748 
749 	init_file_tables();
750 
751 	free(ms->fnamebuf);
752 	if ((ms->fnamebuf = strdup(fn)) == NULL) {
753 		file_oomem(ms, strlen(fn));
754 		return -1;
755 	}
756 
757 	for (i = 0; i < MAGIC_SETS; i++) {
758 		mlist_free(ms->mlist[i]);
759 		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
760 			file_oomem(ms, sizeof(*ms->mlist[0]));
761 			for (j = 0; j < i; j++) {
762 				mlist_free(ms->mlist[j]);
763 				ms->mlist[j] = NULL;
764 			}
765 			return -1;
766 		}
767 	}
768 	fn = ms->fnamebuf;
769 
770 	while (fn) {
771 		p = CCAST(char *, strchr(fn, PATHSEP));
772 		if (p)
773 			*p++ = '\0';
774 		if (*fn == '\0')
775 			break;
776 		fileerr = apprentice_1(ms, fn, action);
777 		errs = MAX(errs, fileerr);
778 		fn = p;
779 	}
780 
781 	if (errs == -1) {
782 		for (i = 0; i < MAGIC_SETS; i++) {
783 			mlist_free(ms->mlist[i]);
784 			ms->mlist[i] = NULL;
785 		}
786 		file_error(ms, 0, "could not find any valid magic files!");
787 		return -1;
788 	}
789 
790 #if 0
791 	/*
792 	 * Always leave the database loaded
793 	 */
794 	if (action == FILE_LOAD)
795 		return 0;
796 
797 	for (i = 0; i < MAGIC_SETS; i++) {
798 		mlist_free(ms->mlist[i]);
799 		ms->mlist[i] = NULL;
800 	}
801 #endif
802 
803 	switch (action) {
804 	case FILE_LOAD:
805 	case FILE_COMPILE:
806 	case FILE_CHECK:
807 	case FILE_LIST:
808 		return 0;
809 	default:
810 		file_error(ms, 0, "Invalid action %d", action);
811 		return -1;
812 	}
813 }
814 
815 /*
816  * Compute the real length of a magic expression, for the purposes
817  * of determining how "strong" a magic expression is (approximating
818  * how specific its matches are):
819  *	- magic characters count 0 unless escaped.
820  *	- [] expressions count 1
821  *	- {} expressions count 0
822  *	- regular characters or escaped magic characters count 1
823  *	- 0 length expressions count as one
824  */
825 file_private size_t
nonmagic(const char * str)826 nonmagic(const char *str)
827 {
828 	const char *p;
829 	size_t rv = 0;
830 
831 	for (p = str; *p; p++)
832 		switch (*p) {
833 		case '\\':	/* Escaped anything counts 1 */
834 			if (!*++p)
835 				p--;
836 			rv++;
837 			continue;
838 		case '?':	/* Magic characters count 0 */
839 		case '*':
840 		case '.':
841 		case '+':
842 		case '^':
843 		case '$':
844 			continue;
845 		case '[':	/* Bracketed expressions count 1 the ']' */
846 			while (*p && *p != ']')
847 				p++;
848 			p--;
849 			continue;
850 		case '{':	/* Braced expressions count 0 */
851 			while (*p && *p != '}')
852 				p++;
853 			if (!*p)
854 				p--;
855 			continue;
856 		default:	/* Anything else counts 1 */
857 			rv++;
858 			continue;
859 		}
860 
861 	return rv == 0 ? 1 : rv;	/* Return at least 1 */
862 }
863 
864 
865 file_private size_t
typesize(int type)866 typesize(int type)
867 {
868 	switch (type) {
869 	case FILE_BYTE:
870 		return 1;
871 
872 	case FILE_SHORT:
873 	case FILE_LESHORT:
874 	case FILE_BESHORT:
875 	case FILE_MSDOSDATE:
876 	case FILE_BEMSDOSDATE:
877 	case FILE_LEMSDOSDATE:
878 	case FILE_MSDOSTIME:
879 	case FILE_BEMSDOSTIME:
880 	case FILE_LEMSDOSTIME:
881 		return 2;
882 
883 	case FILE_LONG:
884 	case FILE_LELONG:
885 	case FILE_BELONG:
886 	case FILE_MELONG:
887 		return 4;
888 
889 	case FILE_DATE:
890 	case FILE_LEDATE:
891 	case FILE_BEDATE:
892 	case FILE_MEDATE:
893 	case FILE_LDATE:
894 	case FILE_LELDATE:
895 	case FILE_BELDATE:
896 	case FILE_MELDATE:
897 	case FILE_FLOAT:
898 	case FILE_BEFLOAT:
899 	case FILE_LEFLOAT:
900 	case FILE_BEID3:
901 	case FILE_LEID3:
902 		return 4;
903 
904 	case FILE_QUAD:
905 	case FILE_BEQUAD:
906 	case FILE_LEQUAD:
907 	case FILE_QDATE:
908 	case FILE_LEQDATE:
909 	case FILE_BEQDATE:
910 	case FILE_QLDATE:
911 	case FILE_LEQLDATE:
912 	case FILE_BEQLDATE:
913 	case FILE_QWDATE:
914 	case FILE_LEQWDATE:
915 	case FILE_BEQWDATE:
916 	case FILE_DOUBLE:
917 	case FILE_BEDOUBLE:
918 	case FILE_LEDOUBLE:
919 	case FILE_OFFSET:
920 	case FILE_BEVARINT:
921 	case FILE_LEVARINT:
922 		return 8;
923 
924 	case FILE_GUID:
925 		return 16;
926 
927 	default:
928 		return FILE_BADSIZE;
929 	}
930 }
931 
932 /*
933  * Get weight of this magic entry, for sorting purposes.
934  */
935 file_private ssize_t
apprentice_magic_strength_1(const struct magic * m)936 apprentice_magic_strength_1(const struct magic *m)
937 {
938 #define MULT 10U
939 	size_t ts, v;
940 	ssize_t val = 2 * MULT;	/* baseline strength */
941 
942 	switch (m->type) {
943 	case FILE_DEFAULT:	/* make sure this sorts last */
944 		if (m->factor_op != FILE_FACTOR_OP_NONE) {
945 			file_magwarn1("Unsupported factor_op in default %d",
946 			    m->factor_op);
947 		}
948 		return 0;
949 
950 	case FILE_BYTE:
951 	case FILE_SHORT:
952 	case FILE_LESHORT:
953 	case FILE_BESHORT:
954 	case FILE_LONG:
955 	case FILE_LELONG:
956 	case FILE_BELONG:
957 	case FILE_MELONG:
958 	case FILE_DATE:
959 	case FILE_LEDATE:
960 	case FILE_BEDATE:
961 	case FILE_MEDATE:
962 	case FILE_LDATE:
963 	case FILE_LELDATE:
964 	case FILE_BELDATE:
965 	case FILE_MELDATE:
966 	case FILE_FLOAT:
967 	case FILE_BEFLOAT:
968 	case FILE_LEFLOAT:
969 	case FILE_QUAD:
970 	case FILE_BEQUAD:
971 	case FILE_LEQUAD:
972 	case FILE_QDATE:
973 	case FILE_LEQDATE:
974 	case FILE_BEQDATE:
975 	case FILE_QLDATE:
976 	case FILE_LEQLDATE:
977 	case FILE_BEQLDATE:
978 	case FILE_QWDATE:
979 	case FILE_LEQWDATE:
980 	case FILE_BEQWDATE:
981 	case FILE_DOUBLE:
982 	case FILE_BEDOUBLE:
983 	case FILE_LEDOUBLE:
984 	case FILE_BEVARINT:
985 	case FILE_LEVARINT:
986 	case FILE_GUID:
987 	case FILE_BEID3:
988 	case FILE_LEID3:
989 	case FILE_OFFSET:
990 	case FILE_MSDOSDATE:
991 	case FILE_BEMSDOSDATE:
992 	case FILE_LEMSDOSDATE:
993 	case FILE_MSDOSTIME:
994 	case FILE_BEMSDOSTIME:
995 	case FILE_LEMSDOSTIME:
996 		ts = typesize(m->type);
997 		if (ts == FILE_BADSIZE) {
998 			(void)fprintf(stderr, "Bad size for type %d\n",
999 			    m->type);
1000 			abort();
1001 		}
1002 		val += ts * MULT;
1003 		break;
1004 
1005 	case FILE_PSTRING:
1006 	case FILE_STRING:
1007 	case FILE_OCTAL:
1008 		val += m->vallen * MULT;
1009 		break;
1010 
1011 	case FILE_BESTRING16:
1012 	case FILE_LESTRING16:
1013 		val += m->vallen * MULT / 2;
1014 		break;
1015 
1016 	case FILE_SEARCH:
1017 		if (m->vallen == 0)
1018 			break;
1019 		val += m->vallen * MAX(MULT / m->vallen, 1);
1020 		break;
1021 
1022 	case FILE_REGEX:
1023 		v = nonmagic(m->value.s);
1024 		val += v * MAX(MULT / v, 1);
1025 		break;
1026 
1027 	case FILE_INDIRECT:
1028 	case FILE_NAME:
1029 	case FILE_USE:
1030 	case FILE_CLEAR:
1031 		break;
1032 
1033 	case FILE_DER:
1034 		val += MULT;
1035 		break;
1036 
1037 	default:
1038 		(void)fprintf(stderr, "Bad type %d\n", m->type);
1039 		abort();
1040 	}
1041 
1042 	switch (m->reln) {
1043 	case 'x':	/* matches anything penalize */
1044 	case '!':       /* matches almost anything penalize */
1045 		val = 0;
1046 		break;
1047 
1048 	case '=':	/* Exact match, prefer */
1049 		val += MULT;
1050 		break;
1051 
1052 	case '>':
1053 	case '<':	/* comparison match reduce strength */
1054 		val -= 2 * MULT;
1055 		break;
1056 
1057 	case '^':
1058 	case '&':	/* masking bits, we could count them too */
1059 		val -= MULT;
1060 		break;
1061 
1062 	default:
1063 		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
1064 		abort();
1065 	}
1066 
1067 	return val;
1068 }
1069 
1070 
1071 /*ARGSUSED*/
1072 file_protected size_t
file_magic_strength(const struct magic * m,size_t nmagic)1073 file_magic_strength(const struct magic *m,
1074     size_t nmagic __attribute__((__unused__)))
1075 {
1076 	ssize_t val = apprentice_magic_strength_1(m);
1077 
1078 #ifdef notyet
1079 	if (m->desc[0] == '\0') {
1080 		size_t i;
1081 		/*
1082 		 * Magic entries with no description get their continuations
1083 		 * added
1084 		 */
1085 		for (i = 1; m[i].cont_level != 0 && i < MIN(nmagic, 3); i++) {
1086 			ssize_t v = apprentice_magic_strength_1(&m[i]) >>
1087 			    (i + 1);
1088 			val += v;
1089 			if (m[i].desc[0] != '\0')
1090 				break;
1091 		}
1092 	}
1093 #endif
1094 
1095 	switch (m->factor_op) {
1096 	case FILE_FACTOR_OP_NONE:
1097 		break;
1098 	case FILE_FACTOR_OP_PLUS:
1099 		val += m->factor;
1100 		break;
1101 	case FILE_FACTOR_OP_MINUS:
1102 		val -= m->factor;
1103 		break;
1104 	case FILE_FACTOR_OP_TIMES:
1105 		val *= m->factor;
1106 		break;
1107 	case FILE_FACTOR_OP_DIV:
1108 		val /= m->factor;
1109 		break;
1110 	default:
1111 		(void)fprintf(stderr, "Bad factor_op %u\n", m->factor_op);
1112 		abort();
1113 	}
1114 
1115 	if (val <= 0)	/* ensure we only return 0 for FILE_DEFAULT */
1116 		val = 1;
1117 
1118 #ifndef notyet
1119 	/*
1120 	 * Magic entries with no description get a bonus because they depend
1121 	 * on subsequent magic entries to print something.
1122 	 */
1123 	if (m->desc[0] == '\0')
1124 		val++;
1125 #endif
1126 
1127 	return val;
1128 }
1129 
1130 /*
1131  * Sort callback for sorting entries by "strength" (basically length)
1132  */
1133 file_private int
apprentice_sort(const void * a,const void * b)1134 apprentice_sort(const void *a, const void *b)
1135 {
1136 	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
1137 	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1138 	size_t sa = file_magic_strength(ma->mp, ma->cont_count);
1139 	size_t sb = file_magic_strength(mb->mp, mb->cont_count);
1140 	if (sa == sb) {
1141 		struct magic mpa = *ma->mp;
1142 		struct magic mpb = *mb->mp;
1143 		mpa.lineno = mpb.lineno = 0;
1144 		int x = memcmp(&mpa, &mpb, sizeof(mpa));
1145 		if (x == 0) {
1146 			// Don't warn for DER
1147 			if (mpa.type == FILE_DER)
1148 				return 0;
1149 			file_magwarn1("Duplicate magic entry `%s'",
1150 			    ma->mp->desc);
1151 #ifndef	COMPILE_ONLY
1152 			file_mdump(ma->mp);
1153 			file_mdump(mb->mp);
1154 #endif
1155 			return 0;
1156 		}
1157 		return x > 0 ? -1 : 1;
1158 	}
1159 	return sa > sb ? -1 : 1;
1160 }
1161 
1162 /*
1163  * Shows sorted patterns list in the order which is used for the matching
1164  */
1165 file_private void
apprentice_list(struct mlist * mlist,int mode)1166 apprentice_list(struct mlist *mlist, int mode)
1167 {
1168 	uint32_t magindex, descindex, mimeindex, lineindex;
1169 	struct mlist *ml;
1170 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
1171 		for (magindex = 0; magindex < ml->nmagic; magindex++) {
1172 			struct magic *m = &ml->magic[magindex];
1173 			if ((m->flag & mode) != mode) {
1174 				/* Skip sub-tests */
1175 				while (magindex + 1 < ml->nmagic &&
1176 				       ml->magic[magindex + 1].cont_level != 0)
1177 					++magindex;
1178 				continue; /* Skip to next top-level test*/
1179 			}
1180 
1181 			/*
1182 			 * Try to iterate over the tree until we find item with
1183 			 * description/mimetype.
1184 			 */
1185 			lineindex = descindex = mimeindex = magindex;
1186 			for (; magindex + 1 < ml->nmagic &&
1187 			   ml->magic[magindex + 1].cont_level != 0;
1188 			   magindex++) {
1189 				uint32_t mi = magindex + 1;
1190 				if (*ml->magic[descindex].desc == '\0'
1191 				    && *ml->magic[mi].desc)
1192 					descindex = mi;
1193 				if (*ml->magic[mimeindex].mimetype == '\0'
1194 				    && *ml->magic[mi].mimetype)
1195 					mimeindex = mi;
1196 			}
1197 
1198 			printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
1199 			    file_magic_strength(m, ml->nmagic - magindex),
1200 			    ml->magic[lineindex].lineno,
1201 			    ml->magic[descindex].desc,
1202 			    ml->magic[mimeindex].mimetype);
1203 		}
1204 	}
1205 }
1206 
1207 file_private void
set_test_type(struct magic * mstart,struct magic * m)1208 set_test_type(struct magic *mstart, struct magic *m)
1209 {
1210 	switch (m->type) {
1211 	case FILE_BYTE:
1212 	case FILE_SHORT:
1213 	case FILE_LONG:
1214 	case FILE_DATE:
1215 	case FILE_BESHORT:
1216 	case FILE_BELONG:
1217 	case FILE_BEDATE:
1218 	case FILE_LESHORT:
1219 	case FILE_LELONG:
1220 	case FILE_LEDATE:
1221 	case FILE_LDATE:
1222 	case FILE_BELDATE:
1223 	case FILE_LELDATE:
1224 	case FILE_MEDATE:
1225 	case FILE_MELDATE:
1226 	case FILE_MELONG:
1227 	case FILE_QUAD:
1228 	case FILE_LEQUAD:
1229 	case FILE_BEQUAD:
1230 	case FILE_QDATE:
1231 	case FILE_LEQDATE:
1232 	case FILE_BEQDATE:
1233 	case FILE_QLDATE:
1234 	case FILE_LEQLDATE:
1235 	case FILE_BEQLDATE:
1236 	case FILE_QWDATE:
1237 	case FILE_LEQWDATE:
1238 	case FILE_BEQWDATE:
1239 	case FILE_FLOAT:
1240 	case FILE_BEFLOAT:
1241 	case FILE_LEFLOAT:
1242 	case FILE_DOUBLE:
1243 	case FILE_BEDOUBLE:
1244 	case FILE_LEDOUBLE:
1245 	case FILE_BEVARINT:
1246 	case FILE_LEVARINT:
1247 	case FILE_DER:
1248 	case FILE_GUID:
1249 	case FILE_OFFSET:
1250 	case FILE_MSDOSDATE:
1251 	case FILE_BEMSDOSDATE:
1252 	case FILE_LEMSDOSDATE:
1253 	case FILE_MSDOSTIME:
1254 	case FILE_BEMSDOSTIME:
1255 	case FILE_LEMSDOSTIME:
1256 	case FILE_OCTAL:
1257 		mstart->flag |= BINTEST;
1258 		break;
1259 	case FILE_STRING:
1260 	case FILE_PSTRING:
1261 	case FILE_BESTRING16:
1262 	case FILE_LESTRING16:
1263 		/* Allow text overrides */
1264 		if (mstart->str_flags & STRING_TEXTTEST)
1265 			mstart->flag |= TEXTTEST;
1266 		else
1267 			mstart->flag |= BINTEST;
1268 		break;
1269 	case FILE_REGEX:
1270 	case FILE_SEARCH:
1271 		/* Check for override */
1272 		if (mstart->str_flags & STRING_BINTEST)
1273 			mstart->flag |= BINTEST;
1274 		if (mstart->str_flags & STRING_TEXTTEST)
1275 			mstart->flag |= TEXTTEST;
1276 
1277 		if (mstart->flag & (TEXTTEST|BINTEST))
1278 			break;
1279 
1280 		/* binary test if pattern is not text */
1281 		if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL,
1282 		    NULL) <= 0)
1283 			mstart->flag |= BINTEST;
1284 		else
1285 			mstart->flag |= TEXTTEST;
1286 		break;
1287 	case FILE_DEFAULT:
1288 		/* can't deduce anything; we shouldn't see this at the
1289 		   top level anyway */
1290 		break;
1291 	case FILE_INVALID:
1292 	default:
1293 		/* invalid search type, but no need to complain here */
1294 		break;
1295 	}
1296 }
1297 
1298 file_private int
addentry(struct magic_set * ms,struct magic_entry * me,struct magic_entry_set * mset)1299 addentry(struct magic_set *ms, struct magic_entry *me,
1300    struct magic_entry_set *mset)
1301 {
1302 	size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1303 	if (mset[i].me == NULL || mset[i].count == mset[i].max) {
1304 		struct magic_entry *mp;
1305 
1306 		size_t incr = mset[i].max + ALLOC_INCR;
1307 		if ((mp = CAST(struct magic_entry *,
1308 		    realloc(mset[i].me, sizeof(*mp) * incr))) ==
1309 		    NULL) {
1310 			file_oomem(ms, sizeof(*mp) * incr);
1311 			return -1;
1312 		}
1313 		(void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1314 		    ALLOC_INCR);
1315 		mset[i].me = mp;
1316 		mset[i].max = CAST(uint32_t, incr);
1317 		assert(mset[i].max == incr);
1318 	}
1319 	mset[i].me[mset[i].count++] = *me;
1320 	memset(me, 0, sizeof(*me));
1321 	return 0;
1322 }
1323 
1324 /*
1325  * Load and parse one file.
1326  */
1327 file_private void
load_1(struct magic_set * ms,int action,const char * fn,int * errs,struct magic_entry_set * mset)1328 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1329    struct magic_entry_set *mset)
1330 {
1331 	size_t lineno = 0, llen = 0;
1332 	char *line = NULL;
1333 	ssize_t len;
1334 	struct magic_entry me;
1335 
1336 	FILE *f = fopen(ms->file = fn, "r");
1337 	if (f == NULL) {
1338 		if (errno != ENOENT)
1339 			file_error(ms, errno, "cannot read magic file `%s'",
1340 				   fn);
1341 		(*errs)++;
1342 		return;
1343 	}
1344 
1345 	memset(&me, 0, sizeof(me));
1346 	/* read and parse this file */
1347 	for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
1348 	    ms->line++) {
1349 		if (ms->magwarn >= ms->magwarn_max)
1350 			break;
1351 		if (len == 0) /* null line, garbage, etc */
1352 			continue;
1353 		if (line[len - 1] == '\n') {
1354 			lineno++;
1355 			line[len - 1] = '\0'; /* delete newline */
1356 		}
1357 		switch (line[0]) {
1358 		case '\0':	/* empty, do not parse */
1359 		case '#':	/* comment, do not parse */
1360 			continue;
1361 		case '!':
1362 			if (line[1] == ':') {
1363 				size_t i;
1364 
1365 				for (i = 0; bang[i].name != NULL; i++) {
1366 					if (CAST(size_t, len - 2) > bang[i].len &&
1367 					    memcmp(bang[i].name, line + 2,
1368 					    bang[i].len) == 0)
1369 						break;
1370 				}
1371 				if (bang[i].name == NULL) {
1372 					file_error(ms, 0,
1373 					    "Unknown !: entry `%s'", line);
1374 					(*errs)++;
1375 					continue;
1376 				}
1377 				if (me.mp == NULL) {
1378 					file_error(ms, 0,
1379 					    "No current entry for :!%s type",
1380 						bang[i].name);
1381 					(*errs)++;
1382 					continue;
1383 				}
1384 				if ((*bang[i].fun)(ms, &me,
1385 				    line + bang[i].len + 2,
1386 				    len - bang[i].len - 2) != 0) {
1387 					(*errs)++;
1388 					continue;
1389 				}
1390 				continue;
1391 			}
1392 			/*FALLTHROUGH*/
1393 		default:
1394 		again:
1395 			switch (parse(ms, &me, fn, line, lineno, action)) {
1396 			case 0:
1397 				continue;
1398 			case 1:
1399 				(void)addentry(ms, &me, mset);
1400 				goto again;
1401 			default:
1402 				(*errs)++;
1403 				break;
1404 			}
1405 		}
1406 	}
1407 	if (me.mp)
1408 		(void)addentry(ms, &me, mset);
1409 	free(line);
1410 	(void)fclose(f);
1411 }
1412 
1413 /*
1414  * parse a file or directory of files
1415  * const char *fn: name of magic file or directory
1416  */
1417 file_private int
cmpstrp(const void * p1,const void * p2)1418 cmpstrp(const void *p1, const void *p2)
1419 {
1420         return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2));
1421 }
1422 
1423 
1424 file_private uint32_t
set_text_binary(struct magic_set * ms,struct magic_entry * me,uint32_t nme,uint32_t starttest)1425 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1426     uint32_t starttest)
1427 {
1428 	static const char text[] = "text";
1429 	static const char binary[] = "binary";
1430 	static const size_t len = sizeof(text);
1431 
1432 	uint32_t i = starttest;
1433 
1434 	do {
1435 		set_test_type(me[starttest].mp, me[i].mp);
1436 		if ((ms->flags & MAGIC_DEBUG) == 0)
1437 			continue;
1438 		(void)fprintf(stderr, "%s%s%s: %s\n",
1439 		    me[i].mp->mimetype,
1440 		    me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1441 		    me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1442 		    me[i].mp->flag & BINTEST ? binary : text);
1443 		if (me[i].mp->flag & BINTEST) {
1444 			char *p = strstr(me[i].mp->desc, text);
1445 			if (p && (p == me[i].mp->desc ||
1446 			    isspace(CAST(unsigned char, p[-1]))) &&
1447 			    (p + len - me[i].mp->desc == MAXstring
1448 			    || (p[len] == '\0' ||
1449 			    isspace(CAST(unsigned char, p[len])))))
1450 				(void)fprintf(stderr, "*** Possible "
1451 				    "binary test for text type\n");
1452 		}
1453 	} while (++i < nme && me[i].mp->cont_level != 0);
1454 	return i;
1455 }
1456 
1457 file_private void
set_last_default(struct magic_set * ms,struct magic_entry * me,uint32_t nme)1458 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1459 {
1460 	uint32_t i;
1461 	for (i = 0; i < nme; i++) {
1462 		if (me[i].mp->cont_level == 0 &&
1463 		    me[i].mp->type == FILE_DEFAULT) {
1464 			while (++i < nme)
1465 				if (me[i].mp->cont_level == 0)
1466 					break;
1467 			if (i != nme) {
1468 				/* XXX - Ugh! */
1469 				ms->line = me[i].mp->lineno;
1470 				file_magwarn(ms,
1471 				    "level 0 \"default\" did not sort last");
1472 			}
1473 			return;
1474 		}
1475 	}
1476 }
1477 
1478 file_private int
coalesce_entries(struct magic_set * ms,struct magic_entry * me,uint32_t nme,struct magic ** ma,uint32_t * nma)1479 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1480     struct magic **ma, uint32_t *nma)
1481 {
1482 	uint32_t i, mentrycount = 0;
1483 	size_t slen;
1484 
1485 	for (i = 0; i < nme; i++)
1486 		mentrycount += me[i].cont_count;
1487 
1488 	if (mentrycount == 0) {
1489 		*ma = NULL;
1490 		*nma = 0;
1491 		return 0;
1492 	}
1493 
1494 	slen = sizeof(**ma) * mentrycount;
1495 	if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) {
1496 		file_oomem(ms, slen);
1497 		return -1;
1498 	}
1499 
1500 	mentrycount = 0;
1501 	for (i = 0; i < nme; i++) {
1502 		(void)memcpy(*ma + mentrycount, me[i].mp,
1503 		    me[i].cont_count * sizeof(**ma));
1504 		mentrycount += me[i].cont_count;
1505 	}
1506 	*nma = mentrycount;
1507 	return 0;
1508 }
1509 
1510 file_private void
magic_entry_free(struct magic_entry * me,uint32_t nme)1511 magic_entry_free(struct magic_entry *me, uint32_t nme)
1512 {
1513 	uint32_t i;
1514 	if (me == NULL)
1515 		return;
1516 	for (i = 0; i < nme; i++)
1517 		free(me[i].mp);
1518 	free(me);
1519 }
1520 
1521 file_private struct magic_map *
apprentice_load(struct magic_set * ms,const char * fn,int action)1522 apprentice_load(struct magic_set *ms, const char *fn, int action)
1523 {
1524 	int errs = 0;
1525 	uint32_t i, j;
1526 	size_t files = 0, maxfiles = 0;
1527 	char **filearr = NULL, *mfn;
1528 	struct stat st;
1529 	struct magic_map *map;
1530 	struct magic_entry_set mset[MAGIC_SETS];
1531 	DIR *dir;
1532 	struct dirent *d;
1533 
1534 	memset(mset, 0, sizeof(mset));
1535 	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
1536 
1537 
1538 	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL)
1539 	{
1540 		file_oomem(ms, sizeof(*map));
1541 		return NULL;
1542 	}
1543 	map->type = MAP_TYPE_MALLOC;
1544 
1545 	/* print silly verbose header for USG compat. */
1546 	if (action == FILE_CHECK)
1547 		(void)fprintf(stderr, "%s\n", usg_hdr);
1548 
1549 	/* load directory or file */
1550 	if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1551 		dir = opendir(fn);
1552 		if (!dir) {
1553 			errs++;
1554 			goto out;
1555 		}
1556 		while ((d = readdir(dir)) != NULL) {
1557 			if (d->d_name[0] == '.')
1558 				continue;
1559 			if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
1560 				file_oomem(ms,
1561 				    strlen(fn) + strlen(d->d_name) + 2);
1562 				errs++;
1563 				closedir(dir);
1564 				goto out;
1565 			}
1566 			if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1567 				free(mfn);
1568 				continue;
1569 			}
1570 			if (files >= maxfiles) {
1571 				size_t mlen;
1572 				char **nfilearr;
1573 				maxfiles = (maxfiles + 1) * 2;
1574 				mlen = maxfiles * sizeof(*filearr);
1575 				if ((nfilearr = CAST(char **,
1576 				    realloc(filearr, mlen))) == NULL) {
1577 					file_oomem(ms, mlen);
1578 					free(mfn);
1579 					closedir(dir);
1580 					errs++;
1581 					goto out;
1582 				}
1583 				filearr = nfilearr;
1584 			}
1585 			filearr[files++] = mfn;
1586 		}
1587 		closedir(dir);
1588 		if (filearr) {
1589 			qsort(filearr, files, sizeof(*filearr), cmpstrp);
1590 			for (i = 0; i < files; i++) {
1591 				load_1(ms, action, filearr[i], &errs, mset);
1592 				free(filearr[i]);
1593 			}
1594 			free(filearr);
1595 			filearr = NULL;
1596 		}
1597 	} else
1598 		load_1(ms, action, fn, &errs, mset);
1599 	if (errs)
1600 		goto out;
1601 
1602 	for (j = 0; j < MAGIC_SETS; j++) {
1603 		/* Set types of tests */
1604 		for (i = 0; i < mset[j].count; ) {
1605 			if (mset[j].me[i].mp->cont_level != 0) {
1606 				i++;
1607 				continue;
1608 			}
1609 			i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1610 		}
1611 		if (mset[j].me)
1612 			qsort(mset[j].me, mset[j].count, sizeof(*mset[0].me),
1613 			    apprentice_sort);
1614 
1615 		/*
1616 		 * Make sure that any level 0 "default" line is last
1617 		 * (if one exists).
1618 		 */
1619 		set_last_default(ms, mset[j].me, mset[j].count);
1620 
1621 		/* coalesce per file arrays into a single one, if needed */
1622 		if (mset[j].count == 0)
1623 			continue;
1624 
1625 		if (coalesce_entries(ms, mset[j].me, mset[j].count,
1626 		    &map->magic[j], &map->nmagic[j]) == -1) {
1627 			errs++;
1628 			goto out;
1629 		}
1630 	}
1631 
1632 out:
1633 	free(filearr);
1634 	for (j = 0; j < MAGIC_SETS; j++)
1635 		magic_entry_free(mset[j].me, mset[j].count);
1636 
1637 	if (errs) {
1638 		apprentice_unmap(map);
1639 		return NULL;
1640 	}
1641 	return map;
1642 }
1643 
1644 /*
1645  * extend the sign bit if the comparison is to be signed
1646  */
1647 file_protected uint64_t
file_signextend(struct magic_set * ms,struct magic * m,uint64_t v)1648 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1649 {
1650 	if (!(m->flag & UNSIGNED)) {
1651 		switch(m->type) {
1652 		/*
1653 		 * Do not remove the casts below.  They are
1654 		 * vital.  When later compared with the data,
1655 		 * the sign extension must have happened.
1656 		 */
1657 		case FILE_BYTE:
1658 			v = CAST(signed char,  v);
1659 			break;
1660 		case FILE_SHORT:
1661 		case FILE_BESHORT:
1662 		case FILE_LESHORT:
1663 			v = CAST(short, v);
1664 			break;
1665 		case FILE_DATE:
1666 		case FILE_BEDATE:
1667 		case FILE_LEDATE:
1668 		case FILE_MEDATE:
1669 		case FILE_LDATE:
1670 		case FILE_BELDATE:
1671 		case FILE_LELDATE:
1672 		case FILE_MELDATE:
1673 		case FILE_LONG:
1674 		case FILE_BELONG:
1675 		case FILE_LELONG:
1676 		case FILE_MELONG:
1677 		case FILE_FLOAT:
1678 		case FILE_BEFLOAT:
1679 		case FILE_LEFLOAT:
1680 		case FILE_MSDOSDATE:
1681 		case FILE_BEMSDOSDATE:
1682 		case FILE_LEMSDOSDATE:
1683 		case FILE_MSDOSTIME:
1684 		case FILE_BEMSDOSTIME:
1685 		case FILE_LEMSDOSTIME:
1686 			v = CAST(int32_t, v);
1687 			break;
1688 		case FILE_QUAD:
1689 		case FILE_BEQUAD:
1690 		case FILE_LEQUAD:
1691 		case FILE_QDATE:
1692 		case FILE_QLDATE:
1693 		case FILE_QWDATE:
1694 		case FILE_BEQDATE:
1695 		case FILE_BEQLDATE:
1696 		case FILE_BEQWDATE:
1697 		case FILE_LEQDATE:
1698 		case FILE_LEQLDATE:
1699 		case FILE_LEQWDATE:
1700 		case FILE_DOUBLE:
1701 		case FILE_BEDOUBLE:
1702 		case FILE_LEDOUBLE:
1703 		case FILE_OFFSET:
1704 		case FILE_BEVARINT:
1705 		case FILE_LEVARINT:
1706 			v = CAST(int64_t, v);
1707 			break;
1708 		case FILE_STRING:
1709 		case FILE_PSTRING:
1710 		case FILE_BESTRING16:
1711 		case FILE_LESTRING16:
1712 		case FILE_REGEX:
1713 		case FILE_SEARCH:
1714 		case FILE_DEFAULT:
1715 		case FILE_INDIRECT:
1716 		case FILE_NAME:
1717 		case FILE_USE:
1718 		case FILE_CLEAR:
1719 		case FILE_DER:
1720 		case FILE_GUID:
1721 		case FILE_OCTAL:
1722 			break;
1723 		default:
1724 			if (ms->flags & MAGIC_CHECK)
1725 			    file_magwarn(ms, "cannot happen: m->type=%d\n",
1726 				    m->type);
1727 			return FILE_BADSIZE;
1728 		}
1729 	}
1730 	return v;
1731 }
1732 
1733 file_private int
string_modifier_check(struct magic_set * ms,struct magic * m)1734 string_modifier_check(struct magic_set *ms, struct magic *m)
1735 {
1736 	if ((ms->flags & MAGIC_CHECK) == 0)
1737 		return 0;
1738 
1739 	if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1740 	    (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1741 		file_magwarn(ms,
1742 		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
1743 		return -1;
1744 	}
1745 	switch (m->type) {
1746 	case FILE_BESTRING16:
1747 	case FILE_LESTRING16:
1748 		if (m->str_flags != 0) {
1749 			file_magwarn(ms,
1750 			    "no modifiers allowed for 16-bit strings\n");
1751 			return -1;
1752 		}
1753 		break;
1754 	case FILE_STRING:
1755 	case FILE_PSTRING:
1756 		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1757 			file_magwarn(ms,
1758 			    "'/%c' only allowed on regex and search\n",
1759 			    CHAR_REGEX_OFFSET_START);
1760 			return -1;
1761 		}
1762 		break;
1763 	case FILE_SEARCH:
1764 		if (m->str_range == 0) {
1765 			file_magwarn(ms,
1766 			    "missing range; defaulting to %d\n",
1767                             STRING_DEFAULT_RANGE);
1768 			m->str_range = STRING_DEFAULT_RANGE;
1769 			return -1;
1770 		}
1771 		break;
1772 	case FILE_REGEX:
1773 		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1774 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1775 			    CHAR_COMPACT_WHITESPACE);
1776 			return -1;
1777 		}
1778 		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1779 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1780 			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1781 			return -1;
1782 		}
1783 		break;
1784 	default:
1785 		file_magwarn(ms, "coding error: m->type=%d\n",
1786 		    m->type);
1787 		return -1;
1788 	}
1789 	return 0;
1790 }
1791 
1792 file_private int
get_op(char c)1793 get_op(char c)
1794 {
1795 	switch (c) {
1796 	case '&':
1797 		return FILE_OPAND;
1798 	case '|':
1799 		return FILE_OPOR;
1800 	case '^':
1801 		return FILE_OPXOR;
1802 	case '+':
1803 		return FILE_OPADD;
1804 	case '-':
1805 		return FILE_OPMINUS;
1806 	case '*':
1807 		return FILE_OPMULTIPLY;
1808 	case '/':
1809 		return FILE_OPDIVIDE;
1810 	case '%':
1811 		return FILE_OPMODULO;
1812 	default:
1813 		return -1;
1814 	}
1815 }
1816 
1817 #ifdef ENABLE_CONDITIONALS
1818 file_private int
get_cond(const char * l,const char ** t)1819 get_cond(const char *l, const char **t)
1820 {
1821 	static const struct cond_tbl_s {
1822 		char name[8];
1823 		size_t len;
1824 		int cond;
1825 	} cond_tbl[] = {
1826 		{ "if",		2,	COND_IF },
1827 		{ "elif",	4,	COND_ELIF },
1828 		{ "else",	4,	COND_ELSE },
1829 		{ "",		0,	COND_NONE },
1830 	};
1831 	const struct cond_tbl_s *p;
1832 
1833 	for (p = cond_tbl; p->len; p++) {
1834 		if (strncmp(l, p->name, p->len) == 0 &&
1835 		    isspace(CAST(unsigned char, l[p->len]))) {
1836 			if (t)
1837 				*t = l + p->len;
1838 			break;
1839 		}
1840 	}
1841 	return p->cond;
1842 }
1843 
1844 file_private int
check_cond(struct magic_set * ms,int cond,uint32_t cont_level)1845 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1846 {
1847 	int last_cond;
1848 	last_cond = ms->c.li[cont_level].last_cond;
1849 
1850 	switch (cond) {
1851 	case COND_IF:
1852 		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1853 			if (ms->flags & MAGIC_CHECK)
1854 				file_magwarn(ms, "syntax error: `if'");
1855 			return -1;
1856 		}
1857 		last_cond = COND_IF;
1858 		break;
1859 
1860 	case COND_ELIF:
1861 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1862 			if (ms->flags & MAGIC_CHECK)
1863 				file_magwarn(ms, "syntax error: `elif'");
1864 			return -1;
1865 		}
1866 		last_cond = COND_ELIF;
1867 		break;
1868 
1869 	case COND_ELSE:
1870 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1871 			if (ms->flags & MAGIC_CHECK)
1872 				file_magwarn(ms, "syntax error: `else'");
1873 			return -1;
1874 		}
1875 		last_cond = COND_NONE;
1876 		break;
1877 
1878 	case COND_NONE:
1879 		last_cond = COND_NONE;
1880 		break;
1881 	}
1882 
1883 	ms->c.li[cont_level].last_cond = last_cond;
1884 	return 0;
1885 }
1886 #endif /* ENABLE_CONDITIONALS */
1887 
1888 file_private int
parse_indirect_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1889 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1890 {
1891 	const char *l = *lp;
1892 
1893 	while (!isspace(CAST(unsigned char, *++l)))
1894 		switch (*l) {
1895 		case CHAR_INDIRECT_RELATIVE:
1896 			m->str_flags |= INDIRECT_RELATIVE;
1897 			break;
1898 		default:
1899 			if (ms->flags & MAGIC_CHECK)
1900 				file_magwarn(ms, "indirect modifier `%c' "
1901 					"invalid", *l);
1902 			*lp = l;
1903 			return -1;
1904 		}
1905 	*lp = l;
1906 	return 0;
1907 }
1908 
1909 file_private void
parse_op_modifier(struct magic_set * ms,struct magic * m,const char ** lp,int op)1910 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1911     int op)
1912 {
1913 	const char *l = *lp;
1914 	char *t;
1915 	uint64_t val;
1916 
1917 	++l;
1918 	m->mask_op |= op;
1919 	val = CAST(uint64_t, strtoull(l, &t, 0));
1920 	l = t;
1921 	m->num_mask = file_signextend(ms, m, val);
1922 	eatsize(&l);
1923 	*lp = l;
1924 }
1925 
1926 file_private int
parse_string_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1927 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1928 {
1929 	const char *l = *lp;
1930 	char *t;
1931 	int have_range = 0;
1932 
1933 	while (!isspace(CAST(unsigned char, *++l))) {
1934 		switch (*l) {
1935 		case '0':  case '1':  case '2':
1936 		case '3':  case '4':  case '5':
1937 		case '6':  case '7':  case '8':
1938 		case '9':
1939 			if (have_range && (ms->flags & MAGIC_CHECK))
1940 				file_magwarn(ms, "multiple ranges");
1941 			have_range = 1;
1942 			m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1943 			if (m->str_range == 0)
1944 				file_magwarn(ms, "zero range");
1945 			l = t - 1;
1946 			break;
1947 		case CHAR_COMPACT_WHITESPACE:
1948 			m->str_flags |= STRING_COMPACT_WHITESPACE;
1949 			break;
1950 		case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1951 			m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1952 			break;
1953 		case CHAR_IGNORE_LOWERCASE:
1954 			m->str_flags |= STRING_IGNORE_LOWERCASE;
1955 			break;
1956 		case CHAR_IGNORE_UPPERCASE:
1957 			m->str_flags |= STRING_IGNORE_UPPERCASE;
1958 			break;
1959 		case CHAR_REGEX_OFFSET_START:
1960 			m->str_flags |= REGEX_OFFSET_START;
1961 			break;
1962 		case CHAR_BINTEST:
1963 			m->str_flags |= STRING_BINTEST;
1964 			break;
1965 		case CHAR_TEXTTEST:
1966 			m->str_flags |= STRING_TEXTTEST;
1967 			break;
1968 		case CHAR_TRIM:
1969 			m->str_flags |= STRING_TRIM;
1970 			break;
1971 		case CHAR_FULL_WORD:
1972 			m->str_flags |= STRING_FULL_WORD;
1973 			break;
1974 		case CHAR_PSTRING_1_LE:
1975 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1976 			if (m->type != FILE_PSTRING)
1977 				goto bad;
1978 			SET_LENGTH(PSTRING_1_LE);
1979 			break;
1980 		case CHAR_PSTRING_2_BE:
1981 			if (m->type != FILE_PSTRING)
1982 				goto bad;
1983 			SET_LENGTH(PSTRING_2_BE);
1984 			break;
1985 		case CHAR_PSTRING_2_LE:
1986 			if (m->type != FILE_PSTRING)
1987 				goto bad;
1988 			SET_LENGTH(PSTRING_2_LE);
1989 			break;
1990 		case CHAR_PSTRING_4_BE:
1991 			if (m->type != FILE_PSTRING)
1992 				goto bad;
1993 			SET_LENGTH(PSTRING_4_BE);
1994 			break;
1995 		case CHAR_PSTRING_4_LE:
1996 			switch (m->type) {
1997 			case FILE_PSTRING:
1998 			case FILE_REGEX:
1999 				break;
2000 			default:
2001 				goto bad;
2002 			}
2003 			SET_LENGTH(PSTRING_4_LE);
2004 			break;
2005 		case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
2006 			if (m->type != FILE_PSTRING)
2007 				goto bad;
2008 			m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
2009 			break;
2010 		default:
2011 		bad:
2012 			if (ms->flags & MAGIC_CHECK)
2013 				file_magwarn(ms, "string modifier `%c' "
2014 					"invalid", *l);
2015 			goto out;
2016 		}
2017 		/* allow multiple '/' for readability */
2018 		if (l[1] == '/' && !isspace(CAST(unsigned char, l[2])))
2019 			l++;
2020 	}
2021 	if (string_modifier_check(ms, m) == -1)
2022 		goto out;
2023 	*lp = l;
2024 	return 0;
2025 out:
2026 	*lp = l;
2027 	return -1;
2028 }
2029 
2030 /*
2031  * parse one line from magic file, put into magic[index++] if valid
2032  */
2033 file_private int
parse(struct magic_set * ms,struct magic_entry * me,const char * file,const char * line,size_t lineno,int action)2034 parse(struct magic_set *ms, struct magic_entry *me, const char *file,
2035     const char *line, size_t lineno, int action)
2036 {
2037 #ifdef ENABLE_CONDITIONALS
2038 	static uint32_t last_cont_level = 0;
2039 #endif
2040 	size_t i;
2041 	struct magic *m;
2042 	const char *l = line;
2043 	char *t;
2044 	int op;
2045 	uint32_t cont_level;
2046 	int32_t diff;
2047 
2048 	cont_level = 0;
2049 
2050 	/*
2051 	 * Parse the offset.
2052 	 */
2053 	while (*l == '>') {
2054 		++l;		/* step over */
2055 		cont_level++;
2056 	}
2057 #ifdef ENABLE_CONDITIONALS
2058 	if (cont_level == 0 || cont_level > last_cont_level)
2059 		if (file_check_mem(ms, cont_level) == -1)
2060 			return -1;
2061 	last_cont_level = cont_level;
2062 #endif
2063 	if (cont_level != 0) {
2064 		if (me->mp == NULL) {
2065 			file_magerror(ms, "No current entry for continuation");
2066 			return -1;
2067 		}
2068 		if (me->cont_count == 0) {
2069 			file_magerror(ms, "Continuations present with 0 count");
2070 			return -1;
2071 		}
2072 		m = &me->mp[me->cont_count - 1];
2073 		diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level);
2074 		if (diff > 1)
2075 			file_magwarn(ms, "New continuation level %u is more "
2076 			    "than one larger than current level %u", cont_level,
2077 			    m->cont_level);
2078 		if (me->cont_count == me->max_count) {
2079 			struct magic *nm;
2080 			size_t cnt = me->max_count + ALLOC_CHUNK;
2081 			if ((nm = CAST(struct magic *, realloc(me->mp,
2082 			    sizeof(*nm) * cnt))) == NULL) {
2083 				file_oomem(ms, sizeof(*nm) * cnt);
2084 				return -1;
2085 			}
2086 			me->mp = nm;
2087 			me->max_count = CAST(uint32_t, cnt);
2088 		}
2089 		m = &me->mp[me->cont_count++];
2090 		(void)memset(m, 0, sizeof(*m));
2091 		m->cont_level = cont_level;
2092 	} else {
2093 		static const size_t len = sizeof(*m) * ALLOC_CHUNK;
2094 		if (me->mp != NULL)
2095 			return 1;
2096 		if ((m = CAST(struct magic *, malloc(len))) == NULL) {
2097 			file_oomem(ms, len);
2098 			return -1;
2099 		}
2100 		me->mp = m;
2101 		me->max_count = ALLOC_CHUNK;
2102 		(void)memset(m, 0, sizeof(*m));
2103 		m->factor_op = FILE_FACTOR_OP_NONE;
2104 		m->cont_level = 0;
2105 		me->cont_count = 1;
2106 	}
2107 	m->lineno = CAST(uint32_t, lineno);
2108 
2109 	if (*l == '&') {  /* m->cont_level == 0 checked below. */
2110                 ++l;            /* step over */
2111                 m->flag |= OFFADD;
2112         }
2113 	if (*l == '(') {
2114 		++l;		/* step over */
2115 		m->flag |= INDIR;
2116 		if (m->flag & OFFADD)
2117 			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
2118 
2119 		if (*l == '&') {  /* m->cont_level == 0 checked below */
2120 			++l;            /* step over */
2121 			m->flag |= OFFADD;
2122 		}
2123 	}
2124 	/* Indirect offsets are not valid at level 0. */
2125 	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
2126 		if (ms->flags & MAGIC_CHECK)
2127 			file_magwarn(ms, "relative offset at level 0");
2128 		return -1;
2129 	}
2130 
2131 	/* get offset, then skip over it */
2132 	if (*l == '-' || *l == '+') {
2133 		++l;            /* step over */
2134 		m->flag |= l[-1] == '-' ? OFFNEGATIVE : OFFPOSITIVE;
2135 	}
2136 	m->offset = CAST(int32_t, strtol(l, &t, 0));
2137         if (l == t) {
2138 		if (ms->flags & MAGIC_CHECK)
2139 			file_magwarn(ms, "offset `%s' invalid", l);
2140 		return -1;
2141 	}
2142 
2143         l = t;
2144 
2145 	if (m->flag & INDIR) {
2146 		m->in_type = FILE_LONG;
2147 		m->in_offset = 0;
2148 		m->in_op = 0;
2149 		/*
2150 		 * read [.,lbs][+-]nnnnn)
2151 		 */
2152 		if (*l == '.' || *l == ',') {
2153 			if (*l == ',')
2154 				m->in_op |= FILE_OPSIGNED;
2155 			l++;
2156 			switch (*l) {
2157 			case 'l':
2158 				m->in_type = FILE_LELONG;
2159 				break;
2160 			case 'L':
2161 				m->in_type = FILE_BELONG;
2162 				break;
2163 			case 'm':
2164 				m->in_type = FILE_MELONG;
2165 				break;
2166 			case 'h':
2167 			case 's':
2168 				m->in_type = FILE_LESHORT;
2169 				break;
2170 			case 'H':
2171 			case 'S':
2172 				m->in_type = FILE_BESHORT;
2173 				break;
2174 			case 'c':
2175 			case 'b':
2176 			case 'C':
2177 			case 'B':
2178 				m->in_type = FILE_BYTE;
2179 				break;
2180 			case 'e':
2181 			case 'f':
2182 			case 'g':
2183 				m->in_type = FILE_LEDOUBLE;
2184 				break;
2185 			case 'E':
2186 			case 'F':
2187 			case 'G':
2188 				m->in_type = FILE_BEDOUBLE;
2189 				break;
2190 			case 'i':
2191 				m->in_type = FILE_LEID3;
2192 				break;
2193 			case 'I':
2194 				m->in_type = FILE_BEID3;
2195 				break;
2196 			case 'o':
2197 				m->in_type = FILE_OCTAL;
2198 				break;
2199 			case 'q':
2200 				m->in_type = FILE_LEQUAD;
2201 				break;
2202 			case 'Q':
2203 				m->in_type = FILE_BEQUAD;
2204 				break;
2205 			default:
2206 				if (ms->flags & MAGIC_CHECK)
2207 					file_magwarn(ms,
2208 					    "indirect offset type `%c' invalid",
2209 					    *l);
2210 				return -1;
2211 			}
2212 			l++;
2213 		}
2214 
2215 		if (*l == '~') {
2216 			m->in_op |= FILE_OPINVERSE;
2217 			l++;
2218 		}
2219 		if ((op = get_op(*l)) != -1) {
2220 			m->in_op |= op;
2221 			l++;
2222 		}
2223 		if (*l == '(') {
2224 			m->in_op |= FILE_OPINDIRECT;
2225 			l++;
2226 		}
2227 		if (isdigit(CAST(unsigned char, *l)) || *l == '-') {
2228 			m->in_offset = CAST(int32_t, strtol(l, &t, 0));
2229 			if (l == t) {
2230 				if (ms->flags & MAGIC_CHECK)
2231 					file_magwarn(ms,
2232 					    "in_offset `%s' invalid", l);
2233 				return -1;
2234 			}
2235 			l = t;
2236 		}
2237 		if (*l++ != ')' ||
2238 		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
2239 			if (ms->flags & MAGIC_CHECK)
2240 				file_magwarn(ms,
2241 				    "missing ')' in indirect offset");
2242 			return -1;
2243 		}
2244 	}
2245 	EATAB;
2246 
2247 #ifdef ENABLE_CONDITIONALS
2248 	m->cond = get_cond(l, &l);
2249 	if (check_cond(ms, m->cond, cont_level) == -1)
2250 		return -1;
2251 
2252 	EATAB;
2253 #endif
2254 
2255 	/*
2256 	 * Parse the type.
2257 	 */
2258 	if (*l == 'u') {
2259 		/*
2260 		 * Try it as a keyword type prefixed by "u"; match what
2261 		 * follows the "u".  If that fails, try it as an SUS
2262 		 * integer type.
2263 		 */
2264 		m->type = get_type(type_tbl, l + 1, &l);
2265 		if (m->type == FILE_INVALID) {
2266 			/*
2267 			 * Not a keyword type; parse it as an SUS type,
2268 			 * 'u' possibly followed by a number or C/S/L.
2269 			 */
2270 			m->type = get_standard_integer_type(l, &l);
2271 		}
2272 		/* It's unsigned. */
2273 		if (m->type != FILE_INVALID)
2274 			m->flag |= UNSIGNED;
2275 	} else {
2276 		/*
2277 		 * Try it as a keyword type.  If that fails, try it as
2278 		 * an SUS integer type if it begins with "d" or as an
2279 		 * SUS string type if it begins with "s".  In any case,
2280 		 * it's not unsigned.
2281 		 */
2282 		m->type = get_type(type_tbl, l, &l);
2283 		if (m->type == FILE_INVALID) {
2284 			/*
2285 			 * Not a keyword type; parse it as an SUS type,
2286 			 * either 'd' possibly followed by a number or
2287 			 * C/S/L, or just 's'.
2288 			 */
2289 			if (*l == 'd')
2290 				m->type = get_standard_integer_type(l, &l);
2291 			else if (*l == 's'
2292 			    && !isalpha(CAST(unsigned char, l[1]))) {
2293 				m->type = FILE_STRING;
2294 				++l;
2295 			}
2296 		}
2297 	}
2298 
2299 	if (m->type == FILE_INVALID) {
2300 		/* Not found - try it as a special keyword. */
2301 		m->type = get_type(special_tbl, l, &l);
2302 	}
2303 
2304 	if (m->type == FILE_INVALID) {
2305 		if (ms->flags & MAGIC_CHECK)
2306 			file_magwarn(ms, "type `%s' invalid", l);
2307 		return -1;
2308 	}
2309 
2310 	if (m->type == FILE_NAME && cont_level != 0) {
2311 		if (ms->flags & MAGIC_CHECK)
2312 			file_magwarn(ms, "`name%s' entries can only be "
2313 			    "declared at top level", l);
2314 		return -1;
2315 	}
2316 
2317 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2318 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2319 
2320 	m->mask_op = 0;
2321 	if (*l == '~') {
2322 		if (!IS_STRING(m->type))
2323 			m->mask_op |= FILE_OPINVERSE;
2324 		else if (ms->flags & MAGIC_CHECK)
2325 			file_magwarn(ms, "'~' invalid for string types");
2326 		++l;
2327 	}
2328 	m->str_range = 0;
2329 	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2330 	if ((op = get_op(*l)) != -1) {
2331 		if (IS_STRING(m->type)) {
2332 			int r;
2333 
2334 			if (op != FILE_OPDIVIDE) {
2335 				if (ms->flags & MAGIC_CHECK)
2336 					file_magwarn(ms,
2337 					    "invalid string/indirect op: "
2338 					    "`%c'", *t);
2339 				return -1;
2340 			}
2341 
2342 			if (m->type == FILE_INDIRECT)
2343 				r = parse_indirect_modifier(ms, m, &l);
2344 			else
2345 				r = parse_string_modifier(ms, m, &l);
2346 			if (r == -1)
2347 				return -1;
2348 		} else
2349 			parse_op_modifier(ms, m, &l, op);
2350 	}
2351 
2352 	/*
2353 	 * We used to set mask to all 1's here, instead let's just not do
2354 	 * anything if mask = 0 (unless you have a better idea)
2355 	 */
2356 	EATAB;
2357 
2358 	switch (*l) {
2359 	case '>':
2360 	case '<':
2361   		m->reln = *l;
2362   		++l;
2363 		if (*l == '=') {
2364 			if (ms->flags & MAGIC_CHECK) {
2365 				file_magwarn(ms, "%c= not supported",
2366 				    m->reln);
2367 				return -1;
2368 			}
2369 		   ++l;
2370 		}
2371 		break;
2372 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
2373 	case '&':
2374 	case '^':
2375 	case '=':
2376   		m->reln = *l;
2377   		++l;
2378 		if (*l == '=') {
2379 		   /* HP compat: ignore &= etc. */
2380 		   ++l;
2381 		}
2382 		break;
2383 	case '!':
2384 		m->reln = *l;
2385 		++l;
2386 		break;
2387 	default:
2388   		m->reln = '=';	/* the default relation */
2389 		if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) &&
2390 		    isspace(CAST(unsigned char, l[1]))) || !l[1])) {
2391 			m->reln = *l;
2392 			++l;
2393 		}
2394 		break;
2395 	}
2396 	/*
2397 	 * Grab the value part, except for an 'x' reln.
2398 	 */
2399 	if (m->reln != 'x' && getvalue(ms, m, &l, action))
2400 		return -1;
2401 
2402 	/*
2403 	 * TODO finish this macro and start using it!
2404 	 * #define offsetcheck {if (offset > ms->bytes_max -1)
2405 	 *	magwarn("offset too big"); }
2406 	 */
2407 
2408 	/*
2409 	 * Now get last part - the description
2410 	 */
2411 	EATAB;
2412 	if (l[0] == '\b') {
2413 		++l;
2414 		m->flag |= NOSPACE;
2415 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
2416 		++l;
2417 		++l;
2418 		m->flag |= NOSPACE;
2419 	}
2420 	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2421 		continue;
2422 	if (m->desc[0] == '\0') {
2423 		// Tuck in the filename for debugging.
2424 		strlcpy(m->desc + 1, file, sizeof(m->desc) - 1);
2425 	}
2426 	if (i == sizeof(m->desc)) {
2427 		m->desc[sizeof(m->desc) - 1] = '\0';
2428 		if (ms->flags & MAGIC_CHECK)
2429 			file_magwarn(ms, "description `%s' truncated", m->desc);
2430 	}
2431 
2432         /*
2433 	 * We only do this check while compiling, or if any of the magic
2434 	 * files were not compiled.
2435          */
2436         if (ms->flags & MAGIC_CHECK) {
2437 		if (check_format(ms, m) == -1)
2438 			return -1;
2439 	}
2440 #ifndef COMPILE_ONLY
2441 	if (action == FILE_CHECK) {
2442 		file_mdump(m);
2443 	}
2444 #endif
2445 	m->mimetype[0] = '\0';		/* initialise MIME type to none */
2446 	return 0;
2447 }
2448 
2449 /*
2450  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2451  * if valid
2452  */
2453 /*ARGSUSED*/
2454 file_private int
parse_strength(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2455 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
2456     size_t len __attribute__((__unused__)))
2457 {
2458 	const char *l = line;
2459 	char *el;
2460 	unsigned long factor;
2461 	char sbuf[512];
2462 	struct magic *m = &me->mp[0];
2463 
2464 	if (m->factor_op != FILE_FACTOR_OP_NONE) {
2465 		file_magwarn(ms,
2466 		    "Current entry already has a strength type: %c %d",
2467 		    m->factor_op, m->factor);
2468 		return -1;
2469 	}
2470 	if (m->type == FILE_NAME) {
2471 		file_magwarn(ms, "%s: Strength setting is not supported in "
2472 		    "\"name\" magic entries",
2473 		    file_printable(ms, sbuf, sizeof(sbuf), m->value.s,
2474 		    sizeof(m->value.s)));
2475 		return -1;
2476 	}
2477 	EATAB;
2478 	switch (*l) {
2479 	case FILE_FACTOR_OP_NONE:
2480 		break;
2481 	case FILE_FACTOR_OP_PLUS:
2482 	case FILE_FACTOR_OP_MINUS:
2483 	case FILE_FACTOR_OP_TIMES:
2484 	case FILE_FACTOR_OP_DIV:
2485 		m->factor_op = *l++;
2486 		break;
2487 	default:
2488 		file_magwarn(ms, "Unknown factor op `%c'", *l);
2489 		return -1;
2490 	}
2491 	EATAB;
2492 	factor = strtoul(l, &el, 0);
2493 	if (factor > 255) {
2494 		file_magwarn(ms, "Too large factor `%lu'", factor);
2495 		goto out;
2496 	}
2497 	if (*el && !isspace(CAST(unsigned char, *el))) {
2498 		file_magwarn(ms, "Bad factor `%s'", l);
2499 		goto out;
2500 	}
2501 	m->factor = CAST(uint8_t, factor);
2502 	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2503 		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2504 		    m->factor_op, m->factor);
2505 		goto out;
2506 	}
2507 	return 0;
2508 out:
2509 	m->factor_op = FILE_FACTOR_OP_NONE;
2510 	m->factor = 0;
2511 	return -1;
2512 }
2513 
2514 file_private int
goodchar(unsigned char x,const char * extra)2515 goodchar(unsigned char x, const char *extra)
2516 {
2517 	return (isascii(x) && isalnum(x)) || strchr(extra, x);
2518 }
2519 
2520 file_private int
parse_extra(struct magic_set * ms,struct magic_entry * me,const char * line,size_t llen,off_t off,size_t len,const char * name,const char * extra,int nt)2521 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2522     size_t llen, off_t off, size_t len, const char *name, const char *extra,
2523     int nt)
2524 {
2525 	size_t i;
2526 	const char *l = line;
2527 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2528 	char *buf = CAST(char *, CAST(void *, m)) + off;
2529 
2530 	if (buf[0] != '\0') {
2531 		len = nt ? strlen(buf) : len;
2532 		file_magwarn(ms, "Current entry already has a %s type "
2533 		    "`%.*s', new type `%s'", name, CAST(int, len), buf, l);
2534 		return -1;
2535 	}
2536 
2537 	if (*m->desc == '\0') {
2538 		file_magwarn(ms, "Current entry does not yet have a "
2539 		    "description for adding a %s type", name);
2540 		return -1;
2541 	}
2542 
2543 	EATAB;
2544 	for (i = 0; *l && i < llen && i < len && goodchar(*l, extra);
2545 	    buf[i++] = *l++)
2546 		continue;
2547 
2548 	if (i == len && *l) {
2549 		if (nt)
2550 			buf[len - 1] = '\0';
2551 		if (ms->flags & MAGIC_CHECK)
2552 			file_magwarn(ms, "%s type `%s' truncated %"
2553 			    SIZE_T_FORMAT "u", name, line, i);
2554 	} else {
2555 		if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra))
2556 			file_magwarn(ms, "%s type `%s' has bad char '%c'",
2557 			    name, line, *l);
2558 		if (nt)
2559 			buf[i] = '\0';
2560 	}
2561 
2562 	if (i > 0)
2563 		return 0;
2564 
2565 	file_magerror(ms, "Bad magic entry '%s'", line);
2566 	return -1;
2567 }
2568 
2569 /*
2570  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2571  * magic[index - 1]
2572  */
2573 file_private int
parse_apple(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2574 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line,
2575     size_t len)
2576 {
2577 	return parse_extra(ms, me, line, len,
2578 	    CAST(off_t, offsetof(struct magic, apple)),
2579 	    sizeof(me->mp[me->cont_count - 1].apple), "APPLE", "!+-./?", 0);
2580 }
2581 
2582 /*
2583  * Parse a comma-separated list of extensions
2584  */
2585 file_private int
parse_ext(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2586 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line,
2587     size_t len)
2588 {
2589 	return parse_extra(ms, me, line, len,
2590 	    CAST(off_t, offsetof(struct magic, ext)),
2591 	    sizeof(me->mp[me->cont_count - 1].ext), "EXTENSION", ",!+-/@?_$&~",
2592 	    0);
2593 	    /* & for b&w */
2594 	    /* ~ for journal~ */
2595 }
2596 
2597 /*
2598  * parse a MIME annotation line from magic file, put into magic[index - 1]
2599  * if valid
2600  */
2601 file_private int
parse_mime(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2602 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line,
2603     size_t len)
2604 {
2605 	return parse_extra(ms, me, line, len,
2606 	    CAST(off_t, offsetof(struct magic, mimetype)),
2607 	    sizeof(me->mp[me->cont_count - 1].mimetype), "MIME", "+-/.$?:{};=",
2608 	    1);
2609 }
2610 
2611 file_private int
check_format_type(const char * ptr,int type,const char ** estr)2612 check_format_type(const char *ptr, int type, const char **estr)
2613 {
2614 	int quad = 0, h;
2615 	size_t len, cnt;
2616 	if (*ptr == '\0') {
2617 		/* Missing format string; bad */
2618 		*estr = "missing format spec";
2619 		return -1;
2620 	}
2621 
2622 	switch (file_formats[type]) {
2623 	case FILE_FMT_QUAD:
2624 		quad = 1;
2625 		/*FALLTHROUGH*/
2626 	case FILE_FMT_NUM:
2627 		if (quad == 0) {
2628 			switch (type) {
2629 			case FILE_BYTE:
2630 				h = 2;
2631 				break;
2632 			case FILE_SHORT:
2633 			case FILE_BESHORT:
2634 			case FILE_LESHORT:
2635 				h = 1;
2636 				break;
2637 			case FILE_LONG:
2638 			case FILE_BELONG:
2639 			case FILE_LELONG:
2640 			case FILE_MELONG:
2641 			case FILE_LEID3:
2642 			case FILE_BEID3:
2643 			case FILE_INDIRECT:
2644 				h = 0;
2645 				break;
2646 			default:
2647 				fprintf(stderr, "Bad number format %d", type);
2648 				abort();
2649 			}
2650 		} else
2651 			h = 0;
2652 		while (*ptr && strchr("-.#", *ptr) != NULL)
2653 			ptr++;
2654 #define CHECKLEN() do { \
2655 	for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \
2656 		len = len * 10 + (*ptr - '0'); \
2657 	if (cnt > 5 || len > 1024) \
2658 		goto toolong; \
2659 } while (/*CONSTCOND*/0)
2660 
2661 		CHECKLEN();
2662 		if (*ptr == '.')
2663 			ptr++;
2664 		CHECKLEN();
2665 		if (quad) {
2666 			if (*ptr++ != 'l')
2667 				goto invalid;
2668 			if (*ptr++ != 'l')
2669 				goto invalid;
2670 		}
2671 
2672 		switch (*ptr++) {
2673 #ifdef STRICT_FORMAT 	/* "long" formats are int formats for us */
2674 		/* so don't accept the 'l' modifier */
2675 		case 'l':
2676 			switch (*ptr++) {
2677 			case 'i':
2678 			case 'd':
2679 			case 'u':
2680 			case 'o':
2681 			case 'x':
2682 			case 'X':
2683 				if (h == 0)
2684 					return 0;
2685 				/*FALLTHROUGH*/
2686 			default:
2687 				goto invalid;
2688 			}
2689 
2690 		/*
2691 		 * Don't accept h and hh modifiers. They make writing
2692 		 * magic entries more complicated, for very little benefit
2693 		 */
2694 		case 'h':
2695 			if (h-- <= 0)
2696 				goto invalid;
2697 			switch (*ptr++) {
2698 			case 'h':
2699 				if (h-- <= 0)
2700 					goto invalid;
2701 				switch (*ptr++) {
2702 				case 'i':
2703 				case 'd':
2704 				case 'u':
2705 				case 'o':
2706 				case 'x':
2707 				case 'X':
2708 					return 0;
2709 				default:
2710 					goto invalid;
2711 				}
2712 			case 'i':
2713 			case 'd':
2714 			case 'u':
2715 			case 'o':
2716 			case 'x':
2717 			case 'X':
2718 				if (h == 0)
2719 					return 0;
2720 				/*FALLTHROUGH*/
2721 			default:
2722 				goto invalid;
2723 			}
2724 #endif
2725 		case 'c':
2726 			if (h == 2)
2727 				return 0;
2728 			goto invalid;
2729 		case 'i':
2730 		case 'd':
2731 		case 'u':
2732 		case 'o':
2733 		case 'x':
2734 		case 'X':
2735 #ifdef STRICT_FORMAT
2736 			if (h == 0)
2737 				return 0;
2738 			/*FALLTHROUGH*/
2739 #else
2740 			return 0;
2741 #endif
2742 		default:
2743 			goto invalid;
2744 		}
2745 
2746 	case FILE_FMT_FLOAT:
2747 	case FILE_FMT_DOUBLE:
2748 		if (*ptr == '-')
2749 			ptr++;
2750 		if (*ptr == '.')
2751 			ptr++;
2752 		CHECKLEN();
2753 		if (*ptr == '.')
2754 			ptr++;
2755 		CHECKLEN();
2756 		switch (*ptr++) {
2757 		case 'e':
2758 		case 'E':
2759 		case 'f':
2760 		case 'F':
2761 		case 'g':
2762 		case 'G':
2763 			return 0;
2764 
2765 		default:
2766 			goto invalid;
2767 		}
2768 
2769 
2770 	case FILE_FMT_STR:
2771 		if (*ptr == '-')
2772 			ptr++;
2773 		while (isdigit(CAST(unsigned char, *ptr)))
2774 			ptr++;
2775 		if (*ptr == '.') {
2776 			ptr++;
2777 			while (isdigit(CAST(unsigned char , *ptr)))
2778 				ptr++;
2779 		}
2780 
2781 		switch (*ptr++) {
2782 		case 's':
2783 			return 0;
2784 		default:
2785 			goto invalid;
2786 		}
2787 
2788 	default:
2789 		/* internal error */
2790 		fprintf(stderr, "Bad file format %d", type);
2791 		abort();
2792 	}
2793 invalid:
2794 	*estr = "not valid";
2795 	return -1;
2796 toolong:
2797 	*estr = "too long";
2798 	return -1;
2799 }
2800 
2801 /*
2802  * Check that the optional printf format in description matches
2803  * the type of the magic.
2804  */
2805 file_private int
check_format(struct magic_set * ms,struct magic * m)2806 check_format(struct magic_set *ms, struct magic *m)
2807 {
2808 	char *ptr;
2809 	const char *estr;
2810 
2811 	for (ptr = m->desc; *ptr; ptr++)
2812 		if (*ptr == '%')
2813 			break;
2814 	if (*ptr == '\0') {
2815 		/* No format string; ok */
2816 		return 1;
2817 	}
2818 
2819 	assert(file_nformats == file_nnames);
2820 
2821 	if (m->type >= file_nformats) {
2822 		file_magwarn(ms, "Internal error inconsistency between "
2823 		    "m->type and format strings");
2824 		return -1;
2825 	}
2826 	if (file_formats[m->type] == FILE_FMT_NONE) {
2827 		file_magwarn(ms, "No format string for `%s' with description "
2828 		    "`%s'", m->desc, file_names[m->type]);
2829 		return -1;
2830 	}
2831 
2832 	ptr++;
2833 	if (check_format_type(ptr, m->type, &estr) == -1) {
2834 		/*
2835 		 * TODO: this error message is unhelpful if the format
2836 		 * string is not one character long
2837 		 */
2838 		file_magwarn(ms, "Printf format is %s for type "
2839 		    "`%s' in description `%s'", estr,
2840 		    file_names[m->type], m->desc);
2841 		return -1;
2842 	}
2843 
2844 	for (; *ptr; ptr++) {
2845 		if (*ptr == '%') {
2846 			file_magwarn(ms,
2847 			    "Too many format strings (should have at most one) "
2848 			    "for `%s' with description `%s'",
2849 			    file_names[m->type], m->desc);
2850 			return -1;
2851 		}
2852 	}
2853 	return 0;
2854 }
2855 
2856 /*
2857  * Read a numeric value from a pointer, into the value union of a magic
2858  * pointer, according to the magic type.  Update the string pointer to point
2859  * just after the number read.  Return 0 for success, non-zero for failure.
2860  */
2861 file_private int
getvalue(struct magic_set * ms,struct magic * m,const char ** p,int action)2862 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2863 {
2864 	char *ep;
2865 	uint64_t ull;
2866 	int y;
2867 
2868 	switch (m->type) {
2869 	case FILE_BESTRING16:
2870 	case FILE_LESTRING16:
2871 	case FILE_STRING:
2872 	case FILE_PSTRING:
2873 	case FILE_REGEX:
2874 	case FILE_SEARCH:
2875 	case FILE_NAME:
2876 	case FILE_USE:
2877 	case FILE_DER:
2878 	case FILE_OCTAL:
2879 		*p = getstr(ms, m, *p, action == FILE_COMPILE);
2880 		if (*p == NULL) {
2881 			if (ms->flags & MAGIC_CHECK)
2882 				file_magwarn(ms, "cannot get string from `%s'",
2883 				    m->value.s);
2884 			return -1;
2885 		}
2886 		if (m->type == FILE_REGEX) {
2887 			file_regex_t rx;
2888 			int rc =
2889 			    file_regcomp(ms, &rx, m->value.s, REG_EXTENDED);
2890 			if (rc == 0) {
2891 				file_regfree(&rx);
2892 			}
2893 			return rc ? -1 : 0;
2894 		}
2895 		return 0;
2896 	default:
2897 		if (m->reln == 'x')
2898 			return 0;
2899 		break;
2900 	}
2901 
2902 	switch (m->type) {
2903 	case FILE_FLOAT:
2904 	case FILE_BEFLOAT:
2905 	case FILE_LEFLOAT:
2906 		errno = 0;
2907 #ifdef HAVE_STRTOF
2908 		m->value.f = strtof(*p, &ep);
2909 #else
2910 		m->value.f = (float)strtod(*p, &ep);
2911 #endif
2912 		if (errno == 0)
2913 			*p = ep;
2914 		return 0;
2915 	case FILE_DOUBLE:
2916 	case FILE_BEDOUBLE:
2917 	case FILE_LEDOUBLE:
2918 		errno = 0;
2919 		m->value.d = strtod(*p, &ep);
2920 		if (errno == 0)
2921 			*p = ep;
2922 		return 0;
2923 	case FILE_GUID:
2924 		if (file_parse_guid(*p, m->value.guid) == -1)
2925 			return -1;
2926 		*p += FILE_GUID_SIZE - 1;
2927 		return 0;
2928 	default:
2929 		errno = 0;
2930 		ull = CAST(uint64_t, strtoull(*p, &ep, 0));
2931 		m->value.q = file_signextend(ms, m, ull);
2932 		if (*p == ep) {
2933 			file_magwarn(ms, "Unparsable number `%s'", *p);
2934 			return -1;
2935 		} else {
2936 			size_t ts = typesize(m->type);
2937 			uint64_t x;
2938 			const char *q;
2939 
2940 			if (ts == FILE_BADSIZE) {
2941 				file_magwarn(ms,
2942 				    "Expected numeric type got `%s'",
2943 				    type_tbl[m->type].name);
2944 				return -1;
2945 			}
2946 			for (q = *p; isspace(CAST(unsigned char, *q)); q++)
2947 				continue;
2948 			if (*q == '-' && ull != UINT64_MAX)
2949 				ull = -CAST(int64_t, ull);
2950 			switch (ts) {
2951 			case 1:
2952 				x = CAST(uint64_t, ull & ~0xffULL);
2953 				y = (x & ~0xffULL) != ~0xffULL;
2954 				break;
2955 			case 2:
2956 				x = CAST(uint64_t, ull & ~0xffffULL);
2957 				y = (x & ~0xffffULL) != ~0xffffULL;
2958 				break;
2959 			case 4:
2960 				x = CAST(uint64_t, ull & ~0xffffffffULL);
2961 				y = (x & ~0xffffffffULL) != ~0xffffffffULL;
2962 				break;
2963 			case 8:
2964 				x = 0;
2965 				y = 0;
2966 				break;
2967 			default:
2968 				fprintf(stderr, "Bad width %zu", ts);
2969 				abort();
2970 			}
2971 			if (x && y) {
2972 				file_magwarn(ms, "Overflow for numeric"
2973 				    " type `%s' value %#" PRIx64,
2974 				    type_tbl[m->type].name, ull);
2975 				return -1;
2976 			}
2977 		}
2978 		if (errno == 0) {
2979 			*p = ep;
2980 			eatsize(p);
2981 		}
2982 		return 0;
2983 	}
2984 }
2985 
2986 /*
2987  * Convert a string containing C character escapes.  Stop at an unescaped
2988  * space or tab.
2989  * Copy the converted version to "m->value.s", and the length in m->vallen.
2990  * Return updated scan pointer as function result. Warn if set.
2991  */
2992 file_private const char *
getstr(struct magic_set * ms,struct magic * m,const char * s,int warn)2993 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2994 {
2995 	const char *origs = s;
2996 	char	*p = m->value.s;
2997 	size_t  plen = sizeof(m->value.s);
2998 	char 	*origp = p;
2999 	char	*pmax = p + plen - 1;
3000 	int	c;
3001 	int	val;
3002 	size_t	bracket_nesting = 0;
3003 
3004 	while ((c = *s++) != '\0') {
3005 		if (isspace(CAST(unsigned char, c)))
3006 			break;
3007 		if (p >= pmax) {
3008 			file_error(ms, 0, "string too long: `%s'", origs);
3009 			return NULL;
3010 		}
3011 		if (c != '\\') {
3012 		    if (c == '[') {
3013 			    bracket_nesting++;
3014 		    }
3015 		    if (c == ']' && bracket_nesting > 0) {
3016 			    bracket_nesting--;
3017 		    }
3018 		    *p++ = CAST(char, c);
3019 		    continue;
3020 		}
3021 		switch(c = *s++) {
3022 
3023 		case '\0':
3024 			if (warn)
3025 				file_magwarn(ms, "incomplete escape");
3026 			s--;
3027 			goto out;
3028 		case '.':
3029 			if (m->type == FILE_REGEX &&
3030 			    bracket_nesting == 0 && warn) {
3031 				file_magwarn(ms, "escaped dot ('.') found, "
3032 				    "use \\\\. instead");
3033 			}
3034 			warn = 0; /* already did */
3035 			/*FALLTHROUGH*/
3036 		case '\t':
3037 			if (warn) {
3038 				file_magwarn(ms,
3039 				    "escaped tab found, use \\\\t instead");
3040 				warn = 0;	/* already did */
3041 			}
3042 			/*FALLTHROUGH*/
3043 		default:
3044 			if (warn) {
3045 				if (isprint(CAST(unsigned char, c))) {
3046 					/* Allow escaping of
3047 					 * ``relations'' */
3048 					if (strchr("<>&^=!", c) == NULL
3049 					    && (m->type != FILE_REGEX ||
3050 					    strchr("[]().*?^$|{}", c)
3051 					    == NULL)) {
3052 						file_magwarn(ms, "no "
3053 						    "need to escape "
3054 						    "`%c'", c);
3055 					}
3056 				} else {
3057 					file_magwarn(ms,
3058 					    "unknown escape sequence: "
3059 					    "\\%03o", c);
3060 				}
3061 			}
3062 			/*FALLTHROUGH*/
3063 		/* space, perhaps force people to use \040? */
3064 		case ' ':
3065 #if 0
3066 		/*
3067 		 * Other things people escape, but shouldn't need to,
3068 		 * so we disallow them
3069 		 */
3070 		case '\'':
3071 		case '"':
3072 		case '?':
3073 #endif
3074 		/* Relations */
3075 		case '>':
3076 		case '<':
3077 		case '&':
3078 		case '^':
3079 		case '=':
3080 		case '!':
3081 		/* and backslash itself */
3082 		case '\\':
3083 			*p++ = CAST(char, c);
3084 			break;
3085 
3086 		case 'a':
3087 			*p++ = '\a';
3088 			break;
3089 
3090 		case 'b':
3091 			*p++ = '\b';
3092 			break;
3093 
3094 		case 'f':
3095 			*p++ = '\f';
3096 			break;
3097 
3098 		case 'n':
3099 			*p++ = '\n';
3100 			break;
3101 
3102 		case 'r':
3103 			*p++ = '\r';
3104 			break;
3105 
3106 		case 't':
3107 			*p++ = '\t';
3108 			break;
3109 
3110 		case 'v':
3111 			*p++ = '\v';
3112 			break;
3113 
3114 		/* \ and up to 3 octal digits */
3115 		case '0':
3116 		case '1':
3117 		case '2':
3118 		case '3':
3119 		case '4':
3120 		case '5':
3121 		case '6':
3122 		case '7':
3123 			val = c - '0';
3124 			c = *s++;  /* try for 2 */
3125 			if (c >= '0' && c <= '7') {
3126 				val = (val << 3) | (c - '0');
3127 				c = *s++;  /* try for 3 */
3128 				if (c >= '0' && c <= '7')
3129 					val = (val << 3) | (c-'0');
3130 				else
3131 					--s;
3132 			}
3133 			else
3134 				--s;
3135 			*p++ = CAST(char, val);
3136 			break;
3137 
3138 		/* \x and up to 2 hex digits */
3139 		case 'x':
3140 			val = 'x';	/* Default if no digits */
3141 			c = hextoint(*s++);	/* Get next char */
3142 			if (c >= 0) {
3143 				val = c;
3144 				c = hextoint(*s++);
3145 				if (c >= 0)
3146 					val = (val << 4) + c;
3147 				else
3148 					--s;
3149 			} else
3150 				--s;
3151 			*p++ = CAST(char, val);
3152 			break;
3153 		}
3154 	}
3155 	--s;
3156 out:
3157 	*p = '\0';
3158 	m->vallen = CAST(unsigned char, (p - origp));
3159 	if (m->type == FILE_PSTRING) {
3160 		size_t l =  file_pstring_length_size(ms, m);
3161 		if (l == FILE_BADSIZE)
3162 			return NULL;
3163 		m->vallen += CAST(unsigned char, l);
3164 	}
3165 	return s;
3166 }
3167 
3168 
3169 /* Single hex char to int; -1 if not a hex char. */
3170 file_private int
hextoint(int c)3171 hextoint(int c)
3172 {
3173 	if (!isascii(CAST(unsigned char, c)))
3174 		return -1;
3175 	if (isdigit(CAST(unsigned char, c)))
3176 		return c - '0';
3177 	if ((c >= 'a') && (c <= 'f'))
3178 		return c + 10 - 'a';
3179 	if (( c>= 'A') && (c <= 'F'))
3180 		return c + 10 - 'A';
3181 	return -1;
3182 }
3183 
3184 
3185 /*
3186  * Print a string containing C character escapes.
3187  */
3188 file_protected void
file_showstr(FILE * fp,const char * s,size_t len)3189 file_showstr(FILE *fp, const char *s, size_t len)
3190 {
3191 	char	c;
3192 
3193 	for (;;) {
3194 		if (len == FILE_BADSIZE) {
3195 			c = *s++;
3196 			if (c == '\0')
3197 				break;
3198 		}
3199 		else  {
3200 			if (len-- == 0)
3201 				break;
3202 			c = *s++;
3203 		}
3204 		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
3205 			(void) fputc(c, fp);
3206 		else {
3207 			(void) fputc('\\', fp);
3208 			switch (c) {
3209 			case '\a':
3210 				(void) fputc('a', fp);
3211 				break;
3212 
3213 			case '\b':
3214 				(void) fputc('b', fp);
3215 				break;
3216 
3217 			case '\f':
3218 				(void) fputc('f', fp);
3219 				break;
3220 
3221 			case '\n':
3222 				(void) fputc('n', fp);
3223 				break;
3224 
3225 			case '\r':
3226 				(void) fputc('r', fp);
3227 				break;
3228 
3229 			case '\t':
3230 				(void) fputc('t', fp);
3231 				break;
3232 
3233 			case '\v':
3234 				(void) fputc('v', fp);
3235 				break;
3236 
3237 			default:
3238 				(void) fprintf(fp, "%.3o", c & 0377);
3239 				break;
3240 			}
3241 		}
3242 	}
3243 }
3244 
3245 /*
3246  * eatsize(): Eat the size spec from a number [eg. 10UL]
3247  */
3248 file_private void
eatsize(const char ** p)3249 eatsize(const char **p)
3250 {
3251 	const char *l = *p;
3252 
3253 	if (LOWCASE(*l) == 'u')
3254 		l++;
3255 
3256 	switch (LOWCASE(*l)) {
3257 	case 'l':    /* long */
3258 	case 's':    /* short */
3259 	case 'h':    /* short */
3260 	case 'b':    /* char/byte */
3261 	case 'c':    /* char/byte */
3262 		l++;
3263 		/*FALLTHROUGH*/
3264 	default:
3265 		break;
3266 	}
3267 
3268 	*p = l;
3269 }
3270 
3271 /*
3272  * handle a buffer containing a compiled file.
3273  */
3274 file_private struct magic_map *
apprentice_buf(struct magic_set * ms,struct magic * buf,size_t len)3275 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
3276 {
3277 	struct magic_map *map;
3278 
3279 	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3280 		file_oomem(ms, sizeof(*map));
3281 		return NULL;
3282 	}
3283 	map->len = len;
3284 	map->p = buf;
3285 	map->type = MAP_TYPE_USER;
3286 	if (check_buffer(ms, map, "buffer") != 0) {
3287 		apprentice_unmap(map);
3288 		return NULL;
3289 	}
3290 	return map;
3291 }
3292 
3293 /*
3294  * handle a compiled file.
3295  */
3296 
3297 file_private struct magic_map *
apprentice_map(struct magic_set * ms,const char * fn)3298 apprentice_map(struct magic_set *ms, const char *fn)
3299 {
3300 	int fd;
3301 	struct stat st;
3302 	char *dbname = NULL;
3303 	struct magic_map *map;
3304 	struct magic_map *rv = NULL;
3305 
3306 	fd = -1;
3307 	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3308 		file_oomem(ms, sizeof(*map));
3309 		goto error;
3310 	}
3311 	map->type = MAP_TYPE_USER;	/* unspecified */
3312 
3313 	dbname = mkdbname(ms, fn, 0);
3314 	if (dbname == NULL)
3315 		goto error;
3316 
3317 	if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
3318 		goto error;
3319 
3320 	if (fstat(fd, &st) == -1) {
3321 		file_error(ms, errno, "cannot stat `%s'", dbname);
3322 		goto error;
3323 	}
3324 	if (st.st_size < 8 || st.st_size > maxoff_t()) {
3325 		file_error(ms, 0, "file `%s' is too %s", dbname,
3326 		    st.st_size < 8 ? "small" : "large");
3327 		goto error;
3328 	}
3329 
3330 	map->len = CAST(size_t, st.st_size);
3331 #ifdef QUICK
3332 	map->type = MAP_TYPE_MMAP;
3333 	if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE,
3334 	    MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) {
3335 		file_error(ms, errno, "cannot map `%s'", dbname);
3336 		goto error;
3337 	}
3338 #else
3339 	map->type = MAP_TYPE_MALLOC;
3340 	if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
3341 		file_oomem(ms, map->len);
3342 		goto error;
3343 	}
3344 	if (read(fd, map->p, map->len) != (ssize_t)map->len) {
3345 		file_badread(ms);
3346 		goto error;
3347 	}
3348 #endif
3349 	(void)close(fd);
3350 	fd = -1;
3351 
3352 	if (check_buffer(ms, map, dbname) != 0) {
3353 		goto error;
3354 	}
3355 #ifdef QUICK
3356 	if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) {
3357 		file_error(ms, errno, "cannot mprotect `%s'", dbname);
3358 		goto error;
3359 	}
3360 #endif
3361 
3362 	free(dbname);
3363 	return map;
3364 
3365 error:
3366 	if (fd != -1)
3367 		(void)close(fd);
3368 	apprentice_unmap(map);
3369 	free(dbname);
3370 	return rv;
3371 }
3372 
3373 file_private int
check_buffer(struct magic_set * ms,struct magic_map * map,const char * dbname)3374 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
3375 {
3376 	uint32_t *ptr;
3377 	uint32_t entries, nentries;
3378 	uint32_t version;
3379 	int i, needsbyteswap;
3380 
3381 	entries = CAST(uint32_t, map->len / sizeof(struct magic));
3382 	if (entries < MAGIC_SETS + 1) {
3383 		file_error(ms, 0, "Too few magic entries %u in `%s'",
3384 		    entries, dbname);
3385 		return -1;
3386 	}
3387 	if ((entries * sizeof(struct magic)) != map->len) {
3388 		file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
3389 		    "a multiple of %" SIZE_T_FORMAT "u",
3390 		    dbname, map->len, sizeof(struct magic));
3391 		return -1;
3392 	}
3393 
3394 	ptr = CAST(uint32_t *, map->p);
3395 	if (*ptr != MAGICNO) {
3396 		if (swap4(*ptr) != MAGICNO) {
3397 			file_error(ms, 0, "bad magic in `%s'", dbname);
3398 			return -1;
3399 		}
3400 		needsbyteswap = 1;
3401 	} else
3402 		needsbyteswap = 0;
3403 	if (needsbyteswap)
3404 		version = swap4(ptr[1]);
3405 	else
3406 		version = ptr[1];
3407 	if (version != VERSIONNO) {
3408 		file_error(ms, 0, "File %s supports only version %d magic "
3409 		    "files. `%s' is version %d", VERSION,
3410 		    VERSIONNO, dbname, version);
3411 		return -1;
3412 	}
3413 	map->magic[0] = CAST(struct magic *, map->p) + 1;
3414 	nentries = 0;
3415 	for (i = 0; i < MAGIC_SETS; i++) {
3416 		if (needsbyteswap)
3417 			map->nmagic[i] = swap4(ptr[i + 2]);
3418 		else
3419 			map->nmagic[i] = ptr[i + 2];
3420 		if (i != MAGIC_SETS - 1)
3421 			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3422 		nentries += map->nmagic[i];
3423 	}
3424 	if (entries != nentries + 1) {
3425 		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3426 		    dbname, entries, nentries + 1);
3427 		return -1;
3428 	}
3429 	if (needsbyteswap)
3430 		for (i = 0; i < MAGIC_SETS; i++)
3431 			byteswap(map->magic[i], map->nmagic[i]);
3432 	return 0;
3433 }
3434 
3435 /*
3436  * handle an mmaped file.
3437  */
3438 file_private int
apprentice_compile(struct magic_set * ms,struct magic_map * map,const char * fn)3439 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3440 {
3441 	static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3442 	static const size_t m = sizeof(**map->magic);
3443 	int fd = -1;
3444 	size_t len;
3445 	char *dbname;
3446 	int rv = -1;
3447 	uint32_t i;
3448 	union {
3449 		struct magic m;
3450 		uint32_t h[2 + MAGIC_SETS];
3451 	} hdr;
3452 
3453 	dbname = mkdbname(ms, fn, 1);
3454 
3455 	if (dbname == NULL)
3456 		goto out;
3457 
3458 	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1)
3459 	{
3460 		file_error(ms, errno, "cannot open `%s'", dbname);
3461 		goto out;
3462 	}
3463 	memset(&hdr, 0, sizeof(hdr));
3464 	hdr.h[0] = MAGICNO;
3465 	hdr.h[1] = VERSIONNO;
3466 	memcpy(hdr.h + 2, map->nmagic, nm);
3467 
3468 	if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) {
3469 		file_error(ms, errno, "error writing `%s'", dbname);
3470 		goto out2;
3471 	}
3472 
3473 	for (i = 0; i < MAGIC_SETS; i++) {
3474 		len = m * map->nmagic[i];
3475 		if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) {
3476 			file_error(ms, errno, "error writing `%s'", dbname);
3477 			goto out2;
3478 		}
3479 	}
3480 
3481 	rv = 0;
3482 out2:
3483 	if (fd != -1)
3484 		(void)close(fd);
3485 out:
3486 	apprentice_unmap(map);
3487 	free(dbname);
3488 	return rv;
3489 }
3490 
3491 file_private const char ext[] = ".mgc";
3492 /*
3493  * make a dbname
3494  */
3495 file_private char *
mkdbname(struct magic_set * ms,const char * fn,int strip)3496 mkdbname(struct magic_set *ms, const char *fn, int strip)
3497 {
3498 	const char *p, *q;
3499 	char *buf;
3500 
3501 	if (strip) {
3502 		if ((p = strrchr(fn, '/')) != NULL)
3503 			fn = ++p;
3504 	}
3505 
3506 	for (q = fn; *q; q++)
3507 		continue;
3508 	/* Look for .mgc */
3509 	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3510 		if (*p != *q)
3511 			break;
3512 
3513 	/* Did not find .mgc, restore q */
3514 	if (p >= ext)
3515 		for (q = fn; *q; q++)
3516 			continue;
3517 
3518 	q++;
3519 	/* Compatibility with old code that looked in .mime */
3520 	if (ms->flags & MAGIC_MIME) {
3521 		if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext)
3522 		    < 0)
3523 			return NULL;
3524 		if (access(buf, R_OK) != -1) {
3525 			ms->flags &= MAGIC_MIME_TYPE;
3526 			return buf;
3527 		}
3528 		free(buf);
3529 	}
3530 	if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0)
3531 		return NULL;
3532 
3533 	/* Compatibility with old code that looked in .mime */
3534 	if (strstr(fn, ".mime") != NULL)
3535 		ms->flags &= MAGIC_MIME_TYPE;
3536 	return buf;
3537 }
3538 
3539 /*
3540  * Byteswap an mmap'ed file if needed
3541  */
3542 file_private void
byteswap(struct magic * magic,uint32_t nmagic)3543 byteswap(struct magic *magic, uint32_t nmagic)
3544 {
3545 	uint32_t i;
3546 	for (i = 0; i < nmagic; i++)
3547 		bs1(&magic[i]);
3548 }
3549 
3550 #if !defined(HAVE_BYTESWAP_H) && !defined(HAVE_SYS_BSWAP_H)
3551 /*
3552  * swap a short
3553  */
3554 file_private uint16_t
swap2(uint16_t sv)3555 swap2(uint16_t sv)
3556 {
3557 	uint16_t rv;
3558 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3559 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3560 	d[0] = s[1];
3561 	d[1] = s[0];
3562 	return rv;
3563 }
3564 
3565 /*
3566  * swap an int
3567  */
3568 file_private uint32_t
swap4(uint32_t sv)3569 swap4(uint32_t sv)
3570 {
3571 	uint32_t rv;
3572 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3573 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3574 	d[0] = s[3];
3575 	d[1] = s[2];
3576 	d[2] = s[1];
3577 	d[3] = s[0];
3578 	return rv;
3579 }
3580 
3581 /*
3582  * swap a quad
3583  */
3584 file_private uint64_t
swap8(uint64_t sv)3585 swap8(uint64_t sv)
3586 {
3587 	uint64_t rv;
3588 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3589 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3590 # if 0
3591 	d[0] = s[3];
3592 	d[1] = s[2];
3593 	d[2] = s[1];
3594 	d[3] = s[0];
3595 	d[4] = s[7];
3596 	d[5] = s[6];
3597 	d[6] = s[5];
3598 	d[7] = s[4];
3599 # else
3600 	d[0] = s[7];
3601 	d[1] = s[6];
3602 	d[2] = s[5];
3603 	d[3] = s[4];
3604 	d[4] = s[3];
3605 	d[5] = s[2];
3606 	d[6] = s[1];
3607 	d[7] = s[0];
3608 # endif
3609 	return rv;
3610 }
3611 #endif
3612 
3613 file_protected uintmax_t
file_varint2uintmax_t(const unsigned char * us,int t,size_t * l)3614 file_varint2uintmax_t(const unsigned char *us, int t, size_t *l)
3615 {
3616         uintmax_t x = 0;
3617         const unsigned char *c;
3618         if (t == FILE_LEVARINT) {
3619                 for (c = us; *c; c++) {
3620                         if ((*c & 0x80) == 0)
3621                                 break;
3622                 }
3623 		if (l)
3624 			*l = c - us + 1;
3625                 for (; c >= us; c--) {
3626                         x |= *c & 0x7f;
3627                         x <<= 7;
3628                 }
3629         } else {
3630                 for (c = us; *c; c++) {
3631 			x |= *c & 0x7f;
3632 			if ((*c & 0x80) == 0)
3633 				break;
3634 			x <<= 7;
3635                 }
3636 		if (l)
3637 			*l = c - us + 1;
3638         }
3639 	return x;
3640 }
3641 
3642 
3643 /*
3644  * byteswap a single magic entry
3645  */
3646 file_private void
bs1(struct magic * m)3647 bs1(struct magic *m)
3648 {
3649 	m->cont_level = swap2(m->cont_level);
3650 	m->offset = swap4(CAST(uint32_t, m->offset));
3651 	m->in_offset = swap4(CAST(uint32_t, m->in_offset));
3652 	m->lineno = swap4(CAST(uint32_t, m->lineno));
3653 	if (IS_STRING(m->type)) {
3654 		m->str_range = swap4(m->str_range);
3655 		m->str_flags = swap4(m->str_flags);
3656 	}
3657 	else {
3658 		m->value.q = swap8(m->value.q);
3659 		m->num_mask = swap8(m->num_mask);
3660 	}
3661 }
3662 
3663 file_protected size_t
file_pstring_length_size(struct magic_set * ms,const struct magic * m)3664 file_pstring_length_size(struct magic_set *ms, const struct magic *m)
3665 {
3666 	switch (m->str_flags & PSTRING_LEN) {
3667 	case PSTRING_1_LE:
3668 		return 1;
3669 	case PSTRING_2_LE:
3670 	case PSTRING_2_BE:
3671 		return 2;
3672 	case PSTRING_4_LE:
3673 	case PSTRING_4_BE:
3674 		return 4;
3675 	default:
3676 		file_error(ms, 0, "corrupt magic file "
3677 		    "(bad pascal string length %d)",
3678 		    m->str_flags & PSTRING_LEN);
3679 		return FILE_BADSIZE;
3680 	}
3681 }
3682 file_protected size_t
file_pstring_get_length(struct magic_set * ms,const struct magic * m,const char * ss)3683 file_pstring_get_length(struct magic_set *ms, const struct magic *m,
3684     const char *ss)
3685 {
3686 	size_t len = 0;
3687 	const unsigned char *s = RCAST(const unsigned char *, ss);
3688 	unsigned int s3, s2, s1, s0;
3689 
3690 	switch (m->str_flags & PSTRING_LEN) {
3691 	case PSTRING_1_LE:
3692 		len = *s;
3693 		break;
3694 	case PSTRING_2_LE:
3695 		s0 = s[0];
3696 		s1 = s[1];
3697 		len = (s1 << 8) | s0;
3698 		break;
3699 	case PSTRING_2_BE:
3700 		s0 = s[0];
3701 		s1 = s[1];
3702 		len = (s0 << 8) | s1;
3703 		break;
3704 	case PSTRING_4_LE:
3705 		s0 = s[0];
3706 		s1 = s[1];
3707 		s2 = s[2];
3708 		s3 = s[3];
3709 		len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3710 		break;
3711 	case PSTRING_4_BE:
3712 		s0 = s[0];
3713 		s1 = s[1];
3714 		s2 = s[2];
3715 		s3 = s[3];
3716 		len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3717 		break;
3718 	default:
3719 		file_error(ms, 0, "corrupt magic file "
3720 		    "(bad pascal string length %d)",
3721 		    m->str_flags & PSTRING_LEN);
3722 		return FILE_BADSIZE;
3723 	}
3724 
3725 	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) {
3726 		size_t l = file_pstring_length_size(ms, m);
3727 		if (l == FILE_BADSIZE)
3728 			return l;
3729 		len -= l;
3730 	}
3731 
3732 	return len;
3733 }
3734 
3735 file_protected int
file_magicfind(struct magic_set * ms,const char * name,struct mlist * v)3736 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3737 {
3738 	uint32_t i, j;
3739 	struct mlist *mlist, *ml;
3740 
3741 	mlist = ms->mlist[1];
3742 
3743 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
3744 		struct magic *ma = ml->magic;
3745 		for (i = 0; i < ml->nmagic; i++) {
3746 			if (ma[i].type != FILE_NAME)
3747 				continue;
3748 			if (strcmp(ma[i].value.s, name) == 0) {
3749 				v->magic = &ma[i];
3750 				v->magic_rxcomp = &(ml->magic_rxcomp[i]);
3751 				for (j = i + 1; j < ml->nmagic; j++)
3752 				    if (ma[j].cont_level == 0)
3753 					    break;
3754 				v->nmagic = j - i;
3755 				return 0;
3756 			}
3757 		}
3758 	}
3759 	return -1;
3760 }
3761