xref: /freebsd/contrib/file/src/apprentice.c (revision 43a5ec4eb41567cc92586503212743d89686d78f)
1b6cee71dSXin LI /*
2b6cee71dSXin LI  * Copyright (c) Ian F. Darwin 1986-1995.
3b6cee71dSXin LI  * Software written by Ian F. Darwin and others;
4b6cee71dSXin LI  * maintained 1995-present by Christos Zoulas and others.
5b6cee71dSXin LI  *
6b6cee71dSXin LI  * Redistribution and use in source and binary forms, with or without
7b6cee71dSXin LI  * modification, are permitted provided that the following conditions
8b6cee71dSXin LI  * are met:
9b6cee71dSXin LI  * 1. Redistributions of source code must retain the above copyright
10b6cee71dSXin LI  *    notice immediately at the beginning of the file, without modification,
11b6cee71dSXin LI  *    this list of conditions, and the following disclaimer.
12b6cee71dSXin LI  * 2. Redistributions in binary form must reproduce the above copyright
13b6cee71dSXin LI  *    notice, this list of conditions and the following disclaimer in the
14b6cee71dSXin LI  *    documentation and/or other materials provided with the distribution.
15b6cee71dSXin LI  *
16b6cee71dSXin LI  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17b6cee71dSXin LI  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18b6cee71dSXin LI  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19b6cee71dSXin LI  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20b6cee71dSXin LI  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21b6cee71dSXin LI  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22b6cee71dSXin LI  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23b6cee71dSXin LI  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24b6cee71dSXin LI  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25b6cee71dSXin LI  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26b6cee71dSXin LI  * SUCH DAMAGE.
27b6cee71dSXin LI  */
28b6cee71dSXin LI /*
29b6cee71dSXin LI  * apprentice - make one pass through /etc/magic, learning its secrets.
30b6cee71dSXin LI  */
31b6cee71dSXin LI 
32b6cee71dSXin LI #include "file.h"
33b6cee71dSXin LI 
34b6cee71dSXin LI #ifndef	lint
35*43a5ec4eSXin LI FILE_RCSID("@(#)$File: apprentice.c,v 1.309 2021/09/24 13:59:19 christos Exp $")
36b6cee71dSXin LI #endif	/* lint */
37b6cee71dSXin LI 
38b6cee71dSXin LI #include "magic.h"
39b6cee71dSXin LI #include <stdlib.h>
40b6cee71dSXin LI #ifdef HAVE_UNISTD_H
41b6cee71dSXin LI #include <unistd.h>
42b6cee71dSXin LI #endif
43b6cee71dSXin LI #include <stddef.h>
44b6cee71dSXin LI #include <string.h>
45b6cee71dSXin LI #include <assert.h>
46b6cee71dSXin LI #include <ctype.h>
47b6cee71dSXin LI #include <fcntl.h>
48b6cee71dSXin LI #ifdef QUICK
49b6cee71dSXin LI #include <sys/mman.h>
50b6cee71dSXin LI #endif
51b6cee71dSXin LI #include <dirent.h>
52b6cee71dSXin LI #include <limits.h>
53b6cee71dSXin LI 
54b6cee71dSXin LI 
5548c779cdSXin LI #define	EATAB {while (isascii(CAST(unsigned char, *l)) && \
5648c779cdSXin LI 		      isspace(CAST(unsigned char, *l)))  ++l;}
5748c779cdSXin LI #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \
5848c779cdSXin LI 			tolower(CAST(unsigned char, l)) : (l))
59b6cee71dSXin LI /*
60b6cee71dSXin LI  * Work around a bug in headers on Digital Unix.
61b6cee71dSXin LI  * At least confirmed for: OSF1 V4.0 878
62b6cee71dSXin LI  */
63b6cee71dSXin LI #if defined(__osf__) && defined(__DECC)
64b6cee71dSXin LI #ifdef MAP_FAILED
65b6cee71dSXin LI #undef MAP_FAILED
66b6cee71dSXin LI #endif
67b6cee71dSXin LI #endif
68b6cee71dSXin LI 
69b6cee71dSXin LI #ifndef MAP_FAILED
70b6cee71dSXin LI #define MAP_FAILED (void *) -1
71b6cee71dSXin LI #endif
72b6cee71dSXin LI 
73b6cee71dSXin LI #ifndef MAP_FILE
74b6cee71dSXin LI #define MAP_FILE 0
75b6cee71dSXin LI #endif
76b6cee71dSXin LI 
7748c779cdSXin LI #define ALLOC_CHUNK	CAST(size_t, 10)
7848c779cdSXin LI #define ALLOC_INCR	CAST(size_t, 200)
79b6cee71dSXin LI 
803e41d09dSXin LI #define MAP_TYPE_USER	0
81c2931133SXin LI #define MAP_TYPE_MALLOC	1
823e41d09dSXin LI #define MAP_TYPE_MMAP	2
83c2931133SXin LI 
84b6cee71dSXin LI struct magic_entry {
85b6cee71dSXin LI 	struct magic *mp;
86b6cee71dSXin LI 	uint32_t cont_count;
87b6cee71dSXin LI 	uint32_t max_count;
88b6cee71dSXin LI };
89b6cee71dSXin LI 
90b6cee71dSXin LI struct magic_entry_set {
91b6cee71dSXin LI 	struct magic_entry *me;
92b6cee71dSXin LI 	uint32_t count;
93b6cee71dSXin LI 	uint32_t max;
94b6cee71dSXin LI };
95b6cee71dSXin LI 
96b6cee71dSXin LI struct magic_map {
97b6cee71dSXin LI 	void *p;
98b6cee71dSXin LI 	size_t len;
99c2931133SXin LI 	int type;
100b6cee71dSXin LI 	struct magic *magic[MAGIC_SETS];
101b6cee71dSXin LI 	uint32_t nmagic[MAGIC_SETS];
102b6cee71dSXin LI };
103b6cee71dSXin LI 
104b6cee71dSXin LI int file_formats[FILE_NAMES_SIZE];
105b6cee71dSXin LI const size_t file_nformats = FILE_NAMES_SIZE;
106b6cee71dSXin LI const char *file_names[FILE_NAMES_SIZE];
107b6cee71dSXin LI const size_t file_nnames = FILE_NAMES_SIZE;
108b6cee71dSXin LI 
109b6cee71dSXin LI private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
110b6cee71dSXin LI private int hextoint(int);
111b6cee71dSXin LI private const char *getstr(struct magic_set *, struct magic *, const char *,
112b6cee71dSXin LI     int);
113b6cee71dSXin LI private int parse(struct magic_set *, struct magic_entry *, const char *,
114b6cee71dSXin LI     size_t, int);
115b6cee71dSXin LI private void eatsize(const char **);
116b6cee71dSXin LI private int apprentice_1(struct magic_set *, const char *, int);
117b6cee71dSXin LI private size_t apprentice_magic_strength(const struct magic *);
118b6cee71dSXin LI private int apprentice_sort(const void *, const void *);
119b6cee71dSXin LI private void apprentice_list(struct mlist *, int );
120b6cee71dSXin LI private struct magic_map *apprentice_load(struct magic_set *,
121b6cee71dSXin LI     const char *, int);
122b6cee71dSXin LI private struct mlist *mlist_alloc(void);
1232726a701SXin LI private void mlist_free_all(struct magic_set *);
124b6cee71dSXin LI private void mlist_free(struct mlist *);
125b6cee71dSXin LI private void byteswap(struct magic *, uint32_t);
126b6cee71dSXin LI private void bs1(struct magic *);
127b6cee71dSXin LI private uint16_t swap2(uint16_t);
128b6cee71dSXin LI private uint32_t swap4(uint32_t);
129b6cee71dSXin LI private uint64_t swap8(uint64_t);
130b6cee71dSXin LI private char *mkdbname(struct magic_set *, const char *, int);
131c2931133SXin LI private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
132c2931133SXin LI     size_t);
133b6cee71dSXin LI private struct magic_map *apprentice_map(struct magic_set *, const char *);
134c2931133SXin LI private int check_buffer(struct magic_set *, struct magic_map *, const char *);
135b6cee71dSXin LI private void apprentice_unmap(struct magic_map *);
136b6cee71dSXin LI private int apprentice_compile(struct magic_set *, struct magic_map *,
137b6cee71dSXin LI     const char *);
1383e41d09dSXin LI private int check_format_type(const char *, int, const char **);
139b6cee71dSXin LI private int check_format(struct magic_set *, struct magic *);
140b6cee71dSXin LI private int get_op(char);
1412726a701SXin LI private int parse_mime(struct magic_set *, struct magic_entry *, const char *,
1422726a701SXin LI     size_t);
1432726a701SXin LI private int parse_strength(struct magic_set *, struct magic_entry *,
1442726a701SXin LI     const char *, size_t);
1452726a701SXin LI private int parse_apple(struct magic_set *, struct magic_entry *, const char *,
1462726a701SXin LI     size_t);
1472726a701SXin LI private int parse_ext(struct magic_set *, struct magic_entry *, const char *,
1482726a701SXin LI     size_t);
149b6cee71dSXin LI 
150b6cee71dSXin LI 
151b6cee71dSXin LI private size_t magicsize = sizeof(struct magic);
152b6cee71dSXin LI 
153b6cee71dSXin LI private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
154b6cee71dSXin LI 
155b6cee71dSXin LI private struct {
156b6cee71dSXin LI 	const char *name;
157b6cee71dSXin LI 	size_t len;
1582726a701SXin LI 	int (*fun)(struct magic_set *, struct magic_entry *, const char *,
1592726a701SXin LI 	    size_t);
160b6cee71dSXin LI } bang[] = {
161b6cee71dSXin LI #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
162b6cee71dSXin LI 	DECLARE_FIELD(mime),
163b6cee71dSXin LI 	DECLARE_FIELD(apple),
1645f0216bdSXin LI 	DECLARE_FIELD(ext),
165b6cee71dSXin LI 	DECLARE_FIELD(strength),
166b6cee71dSXin LI #undef	DECLARE_FIELD
167b6cee71dSXin LI 	{ NULL, 0, NULL }
168b6cee71dSXin LI };
169b6cee71dSXin LI 
170b6cee71dSXin LI #ifdef COMPILE_ONLY
171b6cee71dSXin LI 
172b6cee71dSXin LI int main(int, char *[]);
173b6cee71dSXin LI 
174b6cee71dSXin LI int
175b6cee71dSXin LI main(int argc, char *argv[])
176b6cee71dSXin LI {
177b6cee71dSXin LI 	int ret;
178b6cee71dSXin LI 	struct magic_set *ms;
179b6cee71dSXin LI 	char *progname;
180b6cee71dSXin LI 
181b6cee71dSXin LI 	if ((progname = strrchr(argv[0], '/')) != NULL)
182b6cee71dSXin LI 		progname++;
183b6cee71dSXin LI 	else
184b6cee71dSXin LI 		progname = argv[0];
185b6cee71dSXin LI 
186b6cee71dSXin LI 	if (argc != 2) {
187b6cee71dSXin LI 		(void)fprintf(stderr, "Usage: %s file\n", progname);
188b6cee71dSXin LI 		return 1;
189b6cee71dSXin LI 	}
190b6cee71dSXin LI 
191b6cee71dSXin LI 	if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
192b6cee71dSXin LI 		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
193b6cee71dSXin LI 		return 1;
194b6cee71dSXin LI 	}
195b6cee71dSXin LI 	ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
196b6cee71dSXin LI 	if (ret == 1)
197b6cee71dSXin LI 		(void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
198b6cee71dSXin LI 	magic_close(ms);
199b6cee71dSXin LI 	return ret;
200b6cee71dSXin LI }
201b6cee71dSXin LI #endif /* COMPILE_ONLY */
202b6cee71dSXin LI 
203b6cee71dSXin LI struct type_tbl_s {
204b6cee71dSXin LI 	const char name[16];
205b6cee71dSXin LI 	const size_t len;
206b6cee71dSXin LI 	const int type;
207b6cee71dSXin LI 	const int format;
208b6cee71dSXin LI };
209b6cee71dSXin LI 
210b6cee71dSXin LI /*
211b6cee71dSXin LI  * XXX - the actual Single UNIX Specification says that "long" means "long",
212b6cee71dSXin LI  * as in the C data type, but we treat it as meaning "4-byte integer".
213b6cee71dSXin LI  * Given that the OS X version of file 5.04 did the same, I guess that passes
214b6cee71dSXin LI  * the actual test; having "long" be dependent on how big a "long" is on
215b6cee71dSXin LI  * the machine running "file" is silly.
216b6cee71dSXin LI  */
217b6cee71dSXin LI static const struct type_tbl_s type_tbl[] = {
218b6cee71dSXin LI # define XX(s)		s, (sizeof(s) - 1)
219b6cee71dSXin LI # define XX_NULL	"", 0
220b6cee71dSXin LI 	{ XX("invalid"),	FILE_INVALID,		FILE_FMT_NONE },
221b6cee71dSXin LI 	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
222b6cee71dSXin LI 	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
223b6cee71dSXin LI 	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_NONE },
224b6cee71dSXin LI 	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
225b6cee71dSXin LI 	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
226b6cee71dSXin LI 	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
227b6cee71dSXin LI 	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
228b6cee71dSXin LI 	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
229b6cee71dSXin LI 	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
230b6cee71dSXin LI 	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
231b6cee71dSXin LI 	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
232b6cee71dSXin LI 	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
233b6cee71dSXin LI 	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
234b6cee71dSXin LI 	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
235b6cee71dSXin LI 	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
236b6cee71dSXin LI 	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
237b6cee71dSXin LI 	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
238b6cee71dSXin LI 	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
239b6cee71dSXin LI 	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
240b6cee71dSXin LI 	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
241b6cee71dSXin LI 	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
242b6cee71dSXin LI 	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
243b6cee71dSXin LI 	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
244b6cee71dSXin LI 	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
245b6cee71dSXin LI 	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
246b6cee71dSXin LI 	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
247b6cee71dSXin LI 	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
248b6cee71dSXin LI 	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
249b6cee71dSXin LI 	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
250b6cee71dSXin LI 	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
251b6cee71dSXin LI 	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
252b6cee71dSXin LI 	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
253b6cee71dSXin LI 	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
254b6cee71dSXin LI 	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
255b6cee71dSXin LI 	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
256b6cee71dSXin LI 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
257b6cee71dSXin LI 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
258b6cee71dSXin LI 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
259b6cee71dSXin LI 	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
260b6cee71dSXin LI 	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
261b6cee71dSXin LI 	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NUM },
262b6cee71dSXin LI 	{ XX("qwdate"),		FILE_QWDATE,		FILE_FMT_STR },
263b6cee71dSXin LI 	{ XX("leqwdate"),	FILE_LEQWDATE,		FILE_FMT_STR },
264b6cee71dSXin LI 	{ XX("beqwdate"),	FILE_BEQWDATE,		FILE_FMT_STR },
265b6cee71dSXin LI 	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
266b6cee71dSXin LI 	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
267b6cee71dSXin LI 	{ XX("clear"),		FILE_CLEAR,		FILE_FMT_NONE },
2683e41d09dSXin LI 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
2692726a701SXin LI 	{ XX("guid"),		FILE_GUID,		FILE_FMT_STR },
2702726a701SXin LI 	{ XX("offset"),		FILE_OFFSET,		FILE_FMT_QUAD },
271*43a5ec4eSXin LI 	{ XX("bevarint"),	FILE_BEVARINT,		FILE_FMT_STR },
272*43a5ec4eSXin LI 	{ XX("levarint"),	FILE_LEVARINT,		FILE_FMT_STR },
273b6cee71dSXin LI 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
274b6cee71dSXin LI };
275b6cee71dSXin LI 
276b6cee71dSXin LI /*
277b6cee71dSXin LI  * These are not types, and cannot be preceded by "u" to make them
278b6cee71dSXin LI  * unsigned.
279b6cee71dSXin LI  */
280b6cee71dSXin LI static const struct type_tbl_s special_tbl[] = {
2813e41d09dSXin LI 	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
282b6cee71dSXin LI 	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
283b6cee71dSXin LI 	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
284b6cee71dSXin LI 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
285b6cee71dSXin LI };
286b6cee71dSXin LI # undef XX
287b6cee71dSXin LI # undef XX_NULL
288b6cee71dSXin LI 
289b6cee71dSXin LI private int
290b6cee71dSXin LI get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
291b6cee71dSXin LI {
292b6cee71dSXin LI 	const struct type_tbl_s *p;
293b6cee71dSXin LI 
294b6cee71dSXin LI 	for (p = tbl; p->len; p++) {
295b6cee71dSXin LI 		if (strncmp(l, p->name, p->len) == 0) {
296b6cee71dSXin LI 			if (t)
297b6cee71dSXin LI 				*t = l + p->len;
298b6cee71dSXin LI 			break;
299b6cee71dSXin LI 		}
300b6cee71dSXin LI 	}
301b6cee71dSXin LI 	return p->type;
302b6cee71dSXin LI }
303b6cee71dSXin LI 
30448c779cdSXin LI private off_t
30548c779cdSXin LI maxoff_t(void) {
30648c779cdSXin LI 	if (/*CONSTCOND*/sizeof(off_t) == sizeof(int))
30748c779cdSXin LI 		return CAST(off_t, INT_MAX);
30848c779cdSXin LI 	if (/*CONSTCOND*/sizeof(off_t) == sizeof(long))
30948c779cdSXin LI 		return CAST(off_t, LONG_MAX);
31048c779cdSXin LI 	return 0x7fffffff;
31148c779cdSXin LI }
31248c779cdSXin LI 
313b6cee71dSXin LI private int
314b6cee71dSXin LI get_standard_integer_type(const char *l, const char **t)
315b6cee71dSXin LI {
316b6cee71dSXin LI 	int type;
317b6cee71dSXin LI 
31848c779cdSXin LI 	if (isalpha(CAST(unsigned char, l[1]))) {
319b6cee71dSXin LI 		switch (l[1]) {
320b6cee71dSXin LI 		case 'C':
321b6cee71dSXin LI 			/* "dC" and "uC" */
322b6cee71dSXin LI 			type = FILE_BYTE;
323b6cee71dSXin LI 			break;
324b6cee71dSXin LI 		case 'S':
325b6cee71dSXin LI 			/* "dS" and "uS" */
326b6cee71dSXin LI 			type = FILE_SHORT;
327b6cee71dSXin LI 			break;
328b6cee71dSXin LI 		case 'I':
329b6cee71dSXin LI 		case 'L':
330b6cee71dSXin LI 			/*
331b6cee71dSXin LI 			 * "dI", "dL", "uI", and "uL".
332b6cee71dSXin LI 			 *
333b6cee71dSXin LI 			 * XXX - the actual Single UNIX Specification says
334b6cee71dSXin LI 			 * that "L" means "long", as in the C data type,
335b6cee71dSXin LI 			 * but we treat it as meaning "4-byte integer".
336b6cee71dSXin LI 			 * Given that the OS X version of file 5.04 did
337b6cee71dSXin LI 			 * the same, I guess that passes the actual SUS
338b6cee71dSXin LI 			 * validation suite; having "dL" be dependent on
339b6cee71dSXin LI 			 * how big a "long" is on the machine running
340b6cee71dSXin LI 			 * "file" is silly.
341b6cee71dSXin LI 			 */
342b6cee71dSXin LI 			type = FILE_LONG;
343b6cee71dSXin LI 			break;
344b6cee71dSXin LI 		case 'Q':
345b6cee71dSXin LI 			/* "dQ" and "uQ" */
346b6cee71dSXin LI 			type = FILE_QUAD;
347b6cee71dSXin LI 			break;
348b6cee71dSXin LI 		default:
349b6cee71dSXin LI 			/* "d{anything else}", "u{anything else}" */
350b6cee71dSXin LI 			return FILE_INVALID;
351b6cee71dSXin LI 		}
352b6cee71dSXin LI 		l += 2;
35348c779cdSXin LI 	} else if (isdigit(CAST(unsigned char, l[1]))) {
354b6cee71dSXin LI 		/*
355b6cee71dSXin LI 		 * "d{num}" and "u{num}"; we only support {num} values
356b6cee71dSXin LI 		 * of 1, 2, 4, and 8 - the Single UNIX Specification
357b6cee71dSXin LI 		 * doesn't say anything about whether arbitrary
358b6cee71dSXin LI 		 * values should be supported, but both the Solaris 10
359b6cee71dSXin LI 		 * and OS X Mountain Lion versions of file passed the
360b6cee71dSXin LI 		 * Single UNIX Specification validation suite, and
361b6cee71dSXin LI 		 * neither of them support values bigger than 8 or
362b6cee71dSXin LI 		 * non-power-of-2 values.
363b6cee71dSXin LI 		 */
36448c779cdSXin LI 		if (isdigit(CAST(unsigned char, l[2]))) {
365b6cee71dSXin LI 			/* Multi-digit, so > 9 */
366b6cee71dSXin LI 			return FILE_INVALID;
367b6cee71dSXin LI 		}
368b6cee71dSXin LI 		switch (l[1]) {
369b6cee71dSXin LI 		case '1':
370b6cee71dSXin LI 			type = FILE_BYTE;
371b6cee71dSXin LI 			break;
372b6cee71dSXin LI 		case '2':
373b6cee71dSXin LI 			type = FILE_SHORT;
374b6cee71dSXin LI 			break;
375b6cee71dSXin LI 		case '4':
376b6cee71dSXin LI 			type = FILE_LONG;
377b6cee71dSXin LI 			break;
378b6cee71dSXin LI 		case '8':
379b6cee71dSXin LI 			type = FILE_QUAD;
380b6cee71dSXin LI 			break;
381b6cee71dSXin LI 		default:
382b6cee71dSXin LI 			/* XXX - what about 3, 5, 6, or 7? */
383b6cee71dSXin LI 			return FILE_INVALID;
384b6cee71dSXin LI 		}
385b6cee71dSXin LI 		l += 2;
386b6cee71dSXin LI 	} else {
387b6cee71dSXin LI 		/*
388b6cee71dSXin LI 		 * "d" or "u" by itself.
389b6cee71dSXin LI 		 */
390b6cee71dSXin LI 		type = FILE_LONG;
391b6cee71dSXin LI 		++l;
392b6cee71dSXin LI 	}
393b6cee71dSXin LI 	if (t)
394b6cee71dSXin LI 		*t = l;
395b6cee71dSXin LI 	return type;
396b6cee71dSXin LI }
397b6cee71dSXin LI 
398b6cee71dSXin LI private void
399b6cee71dSXin LI init_file_tables(void)
400b6cee71dSXin LI {
401b6cee71dSXin LI 	static int done = 0;
402b6cee71dSXin LI 	const struct type_tbl_s *p;
403b6cee71dSXin LI 
404b6cee71dSXin LI 	if (done)
405b6cee71dSXin LI 		return;
406b6cee71dSXin LI 	done++;
407b6cee71dSXin LI 
408b6cee71dSXin LI 	for (p = type_tbl; p->len; p++) {
409b6cee71dSXin LI 		assert(p->type < FILE_NAMES_SIZE);
410b6cee71dSXin LI 		file_names[p->type] = p->name;
411b6cee71dSXin LI 		file_formats[p->type] = p->format;
412b6cee71dSXin LI 	}
413b6cee71dSXin LI 	assert(p - type_tbl == FILE_NAMES_SIZE);
414b6cee71dSXin LI }
415b6cee71dSXin LI 
416b6cee71dSXin LI private int
417b6cee71dSXin LI add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
418b6cee71dSXin LI {
419b6cee71dSXin LI 	struct mlist *ml;
420b6cee71dSXin LI 
421a5d223e6SXin LI 	mlp->map = NULL;
422b6cee71dSXin LI 	if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL)
423b6cee71dSXin LI 		return -1;
424b6cee71dSXin LI 
425a5d223e6SXin LI 	ml->map = idx == 0 ? map : NULL;
426b6cee71dSXin LI 	ml->magic = map->magic[idx];
427b6cee71dSXin LI 	ml->nmagic = map->nmagic[idx];
428b6cee71dSXin LI 
429b6cee71dSXin LI 	mlp->prev->next = ml;
430b6cee71dSXin LI 	ml->prev = mlp->prev;
431b6cee71dSXin LI 	ml->next = mlp;
432b6cee71dSXin LI 	mlp->prev = ml;
433b6cee71dSXin LI 	return 0;
434b6cee71dSXin LI }
435b6cee71dSXin LI 
436b6cee71dSXin LI /*
437b6cee71dSXin LI  * Handle one file or directory.
438b6cee71dSXin LI  */
439b6cee71dSXin LI private int
440b6cee71dSXin LI apprentice_1(struct magic_set *ms, const char *fn, int action)
441b6cee71dSXin LI {
442b6cee71dSXin LI 	struct magic_map *map;
443b6cee71dSXin LI #ifndef COMPILE_ONLY
444c2931133SXin LI 	struct mlist *ml;
445b6cee71dSXin LI 	size_t i;
446c2931133SXin LI #endif
447b6cee71dSXin LI 
448b6cee71dSXin LI 	if (magicsize != FILE_MAGICSIZE) {
449b6cee71dSXin LI 		file_error(ms, 0, "magic element size %lu != %lu",
45048c779cdSXin LI 		    CAST(unsigned long, sizeof(*map->magic[0])),
45148c779cdSXin LI 		    CAST(unsigned long, FILE_MAGICSIZE));
452b6cee71dSXin LI 		return -1;
453b6cee71dSXin LI 	}
454b6cee71dSXin LI 
455b6cee71dSXin LI 	if (action == FILE_COMPILE) {
456b6cee71dSXin LI 		map = apprentice_load(ms, fn, action);
457b6cee71dSXin LI 		if (map == NULL)
458b6cee71dSXin LI 			return -1;
459b6cee71dSXin LI 		return apprentice_compile(ms, map, fn);
460b6cee71dSXin LI 	}
461b6cee71dSXin LI 
462b6cee71dSXin LI #ifndef COMPILE_ONLY
463b6cee71dSXin LI 	map = apprentice_map(ms, fn);
464b6cee71dSXin LI 	if (map == NULL) {
465b6cee71dSXin LI 		if (ms->flags & MAGIC_CHECK)
466b6cee71dSXin LI 			file_magwarn(ms, "using regular magic file `%s'", fn);
467b6cee71dSXin LI 		map = apprentice_load(ms, fn, action);
468b6cee71dSXin LI 		if (map == NULL)
469b6cee71dSXin LI 			return -1;
470b6cee71dSXin LI 	}
471b6cee71dSXin LI 
472b6cee71dSXin LI 	for (i = 0; i < MAGIC_SETS; i++) {
473b6cee71dSXin LI 		if (add_mlist(ms->mlist[i], map, i) == -1) {
4742726a701SXin LI 			/* failed to add to any list, free explicitly */
4752726a701SXin LI 			if (i == 0)
4762726a701SXin LI 				apprentice_unmap(map);
4772726a701SXin LI 			else
4782726a701SXin LI 				mlist_free_all(ms);
479b6cee71dSXin LI 			file_oomem(ms, sizeof(*ml));
480a5d223e6SXin LI 			return -1;
481b6cee71dSXin LI 		}
482b6cee71dSXin LI 	}
483b6cee71dSXin LI 
484b6cee71dSXin LI 	if (action == FILE_LIST) {
485b6cee71dSXin LI 		for (i = 0; i < MAGIC_SETS; i++) {
486c2931133SXin LI 			printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
487c2931133SXin LI 			    i);
488b6cee71dSXin LI 			apprentice_list(ms->mlist[i], BINTEST);
489b6cee71dSXin LI 			printf("Text patterns:\n");
490b6cee71dSXin LI 			apprentice_list(ms->mlist[i], TEXTTEST);
491b6cee71dSXin LI 		}
492b6cee71dSXin LI 	}
493b6cee71dSXin LI 	return 0;
494c2931133SXin LI #else
495c2931133SXin LI 	return 0;
496c2931133SXin LI #endif /* COMPILE_ONLY */
497b6cee71dSXin LI }
498b6cee71dSXin LI 
499b6cee71dSXin LI protected void
500b6cee71dSXin LI file_ms_free(struct magic_set *ms)
501b6cee71dSXin LI {
502b6cee71dSXin LI 	size_t i;
503b6cee71dSXin LI 	if (ms == NULL)
504b6cee71dSXin LI 		return;
505b6cee71dSXin LI 	for (i = 0; i < MAGIC_SETS; i++)
506b6cee71dSXin LI 		mlist_free(ms->mlist[i]);
507b6cee71dSXin LI 	free(ms->o.pbuf);
508b6cee71dSXin LI 	free(ms->o.buf);
509b6cee71dSXin LI 	free(ms->c.li);
510b6cee71dSXin LI 	free(ms);
511b6cee71dSXin LI }
512b6cee71dSXin LI 
513b6cee71dSXin LI protected struct magic_set *
514b6cee71dSXin LI file_ms_alloc(int flags)
515b6cee71dSXin LI {
516b6cee71dSXin LI 	struct magic_set *ms;
517b6cee71dSXin LI 	size_t i, len;
518b6cee71dSXin LI 
51948c779cdSXin LI 	if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u),
520b6cee71dSXin LI 	    sizeof(struct magic_set)))) == NULL)
521b6cee71dSXin LI 		return NULL;
522b6cee71dSXin LI 
523b6cee71dSXin LI 	if (magic_setflags(ms, flags) == -1) {
524b6cee71dSXin LI 		errno = EINVAL;
525b6cee71dSXin LI 		goto free;
526b6cee71dSXin LI 	}
527b6cee71dSXin LI 
528b6cee71dSXin LI 	ms->o.buf = ms->o.pbuf = NULL;
5292726a701SXin LI 	ms->o.blen = 0;
530b6cee71dSXin LI 	len = (ms->c.len = 10) * sizeof(*ms->c.li);
531b6cee71dSXin LI 
532b6cee71dSXin LI 	if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
533b6cee71dSXin LI 		goto free;
534b6cee71dSXin LI 
535b6cee71dSXin LI 	ms->event_flags = 0;
536b6cee71dSXin LI 	ms->error = -1;
537b6cee71dSXin LI 	for (i = 0; i < MAGIC_SETS; i++)
538b6cee71dSXin LI 		ms->mlist[i] = NULL;
539b6cee71dSXin LI 	ms->file = "unknown";
540b6cee71dSXin LI 	ms->line = 0;
541c2931133SXin LI 	ms->indir_max = FILE_INDIR_MAX;
542c2931133SXin LI 	ms->name_max = FILE_NAME_MAX;
543c2931133SXin LI 	ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
544c2931133SXin LI 	ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
5454460e5b0SXin LI 	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
5469ce06829SXin LI 	ms->regex_max = FILE_REGEX_MAX;
5473e41d09dSXin LI 	ms->bytes_max = FILE_BYTES_MAX;
548*43a5ec4eSXin LI 	ms->encoding_max = FILE_ENCODING_MAX;
549b6cee71dSXin LI 	return ms;
550b6cee71dSXin LI free:
551b6cee71dSXin LI 	free(ms);
552b6cee71dSXin LI 	return NULL;
553b6cee71dSXin LI }
554b6cee71dSXin LI 
555b6cee71dSXin LI private void
556b6cee71dSXin LI apprentice_unmap(struct magic_map *map)
557b6cee71dSXin LI {
5589ce06829SXin LI 	size_t i;
559*43a5ec4eSXin LI 	char *p;
560b6cee71dSXin LI 	if (map == NULL)
561b6cee71dSXin LI 		return;
562c2931133SXin LI 
563c2931133SXin LI 	switch (map->type) {
5643e41d09dSXin LI 	case MAP_TYPE_USER:
5653e41d09dSXin LI 		break;
5663e41d09dSXin LI 	case MAP_TYPE_MALLOC:
567*43a5ec4eSXin LI 		p = CAST(char *, map->p);
5683e41d09dSXin LI 		for (i = 0; i < MAGIC_SETS; i++) {
569*43a5ec4eSXin LI 			char *b = RCAST(char *, map->magic[i]);
570*43a5ec4eSXin LI 			if (p != NULL && b >= p && b <= p + map->len)
5713e41d09dSXin LI 				continue;
572*43a5ec4eSXin LI 			free(b);
5733e41d09dSXin LI 		}
574*43a5ec4eSXin LI 		free(p);
5753e41d09dSXin LI 		break;
576b6cee71dSXin LI #ifdef QUICK
577c2931133SXin LI 	case MAP_TYPE_MMAP:
5783e41d09dSXin LI 		if (map->p && map->p != MAP_FAILED)
579b6cee71dSXin LI 			(void)munmap(map->p, map->len);
580c2931133SXin LI 		break;
581b6cee71dSXin LI #endif
582c2931133SXin LI 	default:
583c2931133SXin LI 		abort();
584b6cee71dSXin LI 	}
585b6cee71dSXin LI 	free(map);
586b6cee71dSXin LI }
587b6cee71dSXin LI 
588b6cee71dSXin LI private struct mlist *
589b6cee71dSXin LI mlist_alloc(void)
590b6cee71dSXin LI {
591b6cee71dSXin LI 	struct mlist *mlist;
592b6cee71dSXin LI 	if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) {
593b6cee71dSXin LI 		return NULL;
594b6cee71dSXin LI 	}
595b6cee71dSXin LI 	mlist->next = mlist->prev = mlist;
596b6cee71dSXin LI 	return mlist;
597b6cee71dSXin LI }
598b6cee71dSXin LI 
599b6cee71dSXin LI private void
6002726a701SXin LI mlist_free_all(struct magic_set *ms)
6012726a701SXin LI {
6022726a701SXin LI 	size_t i;
6032726a701SXin LI 
6042726a701SXin LI 	for (i = 0; i < MAGIC_SETS; i++) {
6052726a701SXin LI 		mlist_free(ms->mlist[i]);
6062726a701SXin LI 		ms->mlist[i] = NULL;
6072726a701SXin LI 	}
6082726a701SXin LI }
6092726a701SXin LI 
6102726a701SXin LI private void
61148c779cdSXin LI mlist_free_one(struct mlist *ml)
61248c779cdSXin LI {
61348c779cdSXin LI 	if (ml->map)
61448c779cdSXin LI 		apprentice_unmap(CAST(struct magic_map *, ml->map));
61548c779cdSXin LI 	free(ml);
61648c779cdSXin LI }
61748c779cdSXin LI 
61848c779cdSXin LI private void
619b6cee71dSXin LI mlist_free(struct mlist *mlist)
620b6cee71dSXin LI {
621c2931133SXin LI 	struct mlist *ml, *next;
622b6cee71dSXin LI 
623b6cee71dSXin LI 	if (mlist == NULL)
624b6cee71dSXin LI 		return;
625b6cee71dSXin LI 
6262726a701SXin LI 	for (ml = mlist->next; ml != mlist;) {
62748c779cdSXin LI 		next = ml->next;
62848c779cdSXin LI 		mlist_free_one(ml);
6292726a701SXin LI 		ml = next;
630b6cee71dSXin LI 	}
63148c779cdSXin LI 	mlist_free_one(mlist);
632b6cee71dSXin LI }
633b6cee71dSXin LI 
634c2931133SXin LI #ifndef COMPILE_ONLY
635c2931133SXin LI /* void **bufs: an array of compiled magic files */
636c2931133SXin LI protected int
637c2931133SXin LI buffer_apprentice(struct magic_set *ms, struct magic **bufs,
638c2931133SXin LI     size_t *sizes, size_t nbufs)
639c2931133SXin LI {
640c2931133SXin LI 	size_t i, j;
641c2931133SXin LI 	struct mlist *ml;
642c2931133SXin LI 	struct magic_map *map;
643c2931133SXin LI 
644c2931133SXin LI 	if (nbufs == 0)
645c2931133SXin LI 		return -1;
646c2931133SXin LI 
64740427ccaSGordon Tetlow 	(void)file_reset(ms, 0);
648c2931133SXin LI 
649c2931133SXin LI 	init_file_tables();
650c2931133SXin LI 
651c2931133SXin LI 	for (i = 0; i < MAGIC_SETS; i++) {
652c2931133SXin LI 		mlist_free(ms->mlist[i]);
653c2931133SXin LI 		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
654c2931133SXin LI 			file_oomem(ms, sizeof(*ms->mlist[i]));
655c2931133SXin LI 			goto fail;
656c2931133SXin LI 		}
657c2931133SXin LI 	}
658c2931133SXin LI 
659c2931133SXin LI 	for (i = 0; i < nbufs; i++) {
660c2931133SXin LI 		map = apprentice_buf(ms, bufs[i], sizes[i]);
661c2931133SXin LI 		if (map == NULL)
662c2931133SXin LI 			goto fail;
663c2931133SXin LI 
664c2931133SXin LI 		for (j = 0; j < MAGIC_SETS; j++) {
665c2931133SXin LI 			if (add_mlist(ms->mlist[j], map, j) == -1) {
666c2931133SXin LI 				file_oomem(ms, sizeof(*ml));
667c2931133SXin LI 				goto fail;
668c2931133SXin LI 			}
669c2931133SXin LI 		}
670c2931133SXin LI 	}
671c2931133SXin LI 
672c2931133SXin LI 	return 0;
673c2931133SXin LI fail:
6742726a701SXin LI 	mlist_free_all(ms);
675c2931133SXin LI 	return -1;
676c2931133SXin LI }
677c2931133SXin LI #endif
678c2931133SXin LI 
679b6cee71dSXin LI /* const char *fn: list of magic files and directories */
680b6cee71dSXin LI protected int
681b6cee71dSXin LI file_apprentice(struct magic_set *ms, const char *fn, int action)
682b6cee71dSXin LI {
683b6cee71dSXin LI 	char *p, *mfn;
68458a0f0d0SEitan Adler 	int fileerr, errs = -1;
6852726a701SXin LI 	size_t i, j;
686b6cee71dSXin LI 
68740427ccaSGordon Tetlow 	(void)file_reset(ms, 0);
688b6cee71dSXin LI 
689b6cee71dSXin LI 	if ((fn = magic_getpath(fn, action)) == NULL)
690b6cee71dSXin LI 		return -1;
691b6cee71dSXin LI 
692b6cee71dSXin LI 	init_file_tables();
693b6cee71dSXin LI 
694b6cee71dSXin LI 	if ((mfn = strdup(fn)) == NULL) {
695b6cee71dSXin LI 		file_oomem(ms, strlen(fn));
696b6cee71dSXin LI 		return -1;
697b6cee71dSXin LI 	}
698b6cee71dSXin LI 
699b6cee71dSXin LI 	for (i = 0; i < MAGIC_SETS; i++) {
700b6cee71dSXin LI 		mlist_free(ms->mlist[i]);
701b6cee71dSXin LI 		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
702b6cee71dSXin LI 			file_oomem(ms, sizeof(*ms->mlist[i]));
7032726a701SXin LI 			for (j = 0; j < i; j++) {
7042726a701SXin LI 				mlist_free(ms->mlist[j]);
7052726a701SXin LI 				ms->mlist[j] = NULL;
706b6cee71dSXin LI 			}
707b6cee71dSXin LI 			free(mfn);
708b6cee71dSXin LI 			return -1;
709b6cee71dSXin LI 		}
710b6cee71dSXin LI 	}
711b6cee71dSXin LI 	fn = mfn;
712b6cee71dSXin LI 
713b6cee71dSXin LI 	while (fn) {
714b6cee71dSXin LI 		p = strchr(fn, PATHSEP);
715b6cee71dSXin LI 		if (p)
716b6cee71dSXin LI 			*p++ = '\0';
717b6cee71dSXin LI 		if (*fn == '\0')
718b6cee71dSXin LI 			break;
71958a0f0d0SEitan Adler 		fileerr = apprentice_1(ms, fn, action);
72058a0f0d0SEitan Adler 		errs = MAX(errs, fileerr);
721b6cee71dSXin LI 		fn = p;
722b6cee71dSXin LI 	}
723b6cee71dSXin LI 
724b6cee71dSXin LI 	free(mfn);
725b6cee71dSXin LI 
726b6cee71dSXin LI 	if (errs == -1) {
727b6cee71dSXin LI 		for (i = 0; i < MAGIC_SETS; i++) {
728b6cee71dSXin LI 			mlist_free(ms->mlist[i]);
729b6cee71dSXin LI 			ms->mlist[i] = NULL;
730b6cee71dSXin LI 		}
731b6cee71dSXin LI 		file_error(ms, 0, "could not find any valid magic files!");
732b6cee71dSXin LI 		return -1;
733b6cee71dSXin LI 	}
734b6cee71dSXin LI 
735b6cee71dSXin LI #if 0
736b6cee71dSXin LI 	/*
737b6cee71dSXin LI 	 * Always leave the database loaded
738b6cee71dSXin LI 	 */
739b6cee71dSXin LI 	if (action == FILE_LOAD)
740b6cee71dSXin LI 		return 0;
741b6cee71dSXin LI 
742b6cee71dSXin LI 	for (i = 0; i < MAGIC_SETS; i++) {
743b6cee71dSXin LI 		mlist_free(ms->mlist[i]);
744b6cee71dSXin LI 		ms->mlist[i] = NULL;
745b6cee71dSXin LI 	}
746b6cee71dSXin LI #endif
747b6cee71dSXin LI 
748b6cee71dSXin LI 	switch (action) {
749b6cee71dSXin LI 	case FILE_LOAD:
750b6cee71dSXin LI 	case FILE_COMPILE:
751b6cee71dSXin LI 	case FILE_CHECK:
752b6cee71dSXin LI 	case FILE_LIST:
753b6cee71dSXin LI 		return 0;
754b6cee71dSXin LI 	default:
755b6cee71dSXin LI 		file_error(ms, 0, "Invalid action %d", action);
756b6cee71dSXin LI 		return -1;
757b6cee71dSXin LI 	}
758b6cee71dSXin LI }
759b6cee71dSXin LI 
760b6cee71dSXin LI /*
761b6cee71dSXin LI  * Compute the real length of a magic expression, for the purposes
762b6cee71dSXin LI  * of determining how "strong" a magic expression is (approximating
763b6cee71dSXin LI  * how specific its matches are):
764b6cee71dSXin LI  *	- magic characters count 0 unless escaped.
765b6cee71dSXin LI  *	- [] expressions count 1
766b6cee71dSXin LI  *	- {} expressions count 0
767b6cee71dSXin LI  *	- regular characters or escaped magic characters count 1
768b6cee71dSXin LI  *	- 0 length expressions count as one
769b6cee71dSXin LI  */
770b6cee71dSXin LI private size_t
771b6cee71dSXin LI nonmagic(const char *str)
772b6cee71dSXin LI {
773b6cee71dSXin LI 	const char *p;
774b6cee71dSXin LI 	size_t rv = 0;
775b6cee71dSXin LI 
776b6cee71dSXin LI 	for (p = str; *p; p++)
777b6cee71dSXin LI 		switch (*p) {
778b6cee71dSXin LI 		case '\\':	/* Escaped anything counts 1 */
779b6cee71dSXin LI 			if (!*++p)
780b6cee71dSXin LI 				p--;
781b6cee71dSXin LI 			rv++;
782b6cee71dSXin LI 			continue;
783b6cee71dSXin LI 		case '?':	/* Magic characters count 0 */
784b6cee71dSXin LI 		case '*':
785b6cee71dSXin LI 		case '.':
786b6cee71dSXin LI 		case '+':
787b6cee71dSXin LI 		case '^':
788b6cee71dSXin LI 		case '$':
789b6cee71dSXin LI 			continue;
790b6cee71dSXin LI 		case '[':	/* Bracketed expressions count 1 the ']' */
791b6cee71dSXin LI 			while (*p && *p != ']')
792b6cee71dSXin LI 				p++;
793b6cee71dSXin LI 			p--;
794b6cee71dSXin LI 			continue;
795b6cee71dSXin LI 		case '{':	/* Braced expressions count 0 */
796b6cee71dSXin LI 			while (*p && *p != '}')
797b6cee71dSXin LI 				p++;
798b6cee71dSXin LI 			if (!*p)
799b6cee71dSXin LI 				p--;
800b6cee71dSXin LI 			continue;
801b6cee71dSXin LI 		default:	/* Anything else counts 1 */
802b6cee71dSXin LI 			rv++;
803b6cee71dSXin LI 			continue;
804b6cee71dSXin LI 		}
805b6cee71dSXin LI 
806b6cee71dSXin LI 	return rv == 0 ? 1 : rv;	/* Return at least 1 */
807b6cee71dSXin LI }
808b6cee71dSXin LI 
80940427ccaSGordon Tetlow 
81040427ccaSGordon Tetlow private size_t
81140427ccaSGordon Tetlow typesize(int type)
81240427ccaSGordon Tetlow {
81340427ccaSGordon Tetlow 	switch (type) {
81440427ccaSGordon Tetlow 	case FILE_BYTE:
81540427ccaSGordon Tetlow 		return 1;
81640427ccaSGordon Tetlow 
81740427ccaSGordon Tetlow 	case FILE_SHORT:
81840427ccaSGordon Tetlow 	case FILE_LESHORT:
81940427ccaSGordon Tetlow 	case FILE_BESHORT:
82040427ccaSGordon Tetlow 		return 2;
82140427ccaSGordon Tetlow 
82240427ccaSGordon Tetlow 	case FILE_LONG:
82340427ccaSGordon Tetlow 	case FILE_LELONG:
82440427ccaSGordon Tetlow 	case FILE_BELONG:
82540427ccaSGordon Tetlow 	case FILE_MELONG:
82640427ccaSGordon Tetlow 		return 4;
82740427ccaSGordon Tetlow 
82840427ccaSGordon Tetlow 	case FILE_DATE:
82940427ccaSGordon Tetlow 	case FILE_LEDATE:
83040427ccaSGordon Tetlow 	case FILE_BEDATE:
83140427ccaSGordon Tetlow 	case FILE_MEDATE:
83240427ccaSGordon Tetlow 	case FILE_LDATE:
83340427ccaSGordon Tetlow 	case FILE_LELDATE:
83440427ccaSGordon Tetlow 	case FILE_BELDATE:
83540427ccaSGordon Tetlow 	case FILE_MELDATE:
83640427ccaSGordon Tetlow 	case FILE_FLOAT:
83740427ccaSGordon Tetlow 	case FILE_BEFLOAT:
83840427ccaSGordon Tetlow 	case FILE_LEFLOAT:
83940427ccaSGordon Tetlow 		return 4;
84040427ccaSGordon Tetlow 
84140427ccaSGordon Tetlow 	case FILE_QUAD:
84240427ccaSGordon Tetlow 	case FILE_BEQUAD:
84340427ccaSGordon Tetlow 	case FILE_LEQUAD:
84440427ccaSGordon Tetlow 	case FILE_QDATE:
84540427ccaSGordon Tetlow 	case FILE_LEQDATE:
84640427ccaSGordon Tetlow 	case FILE_BEQDATE:
84740427ccaSGordon Tetlow 	case FILE_QLDATE:
84840427ccaSGordon Tetlow 	case FILE_LEQLDATE:
84940427ccaSGordon Tetlow 	case FILE_BEQLDATE:
85040427ccaSGordon Tetlow 	case FILE_QWDATE:
85140427ccaSGordon Tetlow 	case FILE_LEQWDATE:
85240427ccaSGordon Tetlow 	case FILE_BEQWDATE:
85340427ccaSGordon Tetlow 	case FILE_DOUBLE:
85440427ccaSGordon Tetlow 	case FILE_BEDOUBLE:
85540427ccaSGordon Tetlow 	case FILE_LEDOUBLE:
8562726a701SXin LI 	case FILE_OFFSET:
857*43a5ec4eSXin LI 	case FILE_BEVARINT:
858*43a5ec4eSXin LI 	case FILE_LEVARINT:
85940427ccaSGordon Tetlow 		return 8;
8602726a701SXin LI 
8612726a701SXin LI 	case FILE_GUID:
8622726a701SXin LI 		return 16;
8632726a701SXin LI 
86440427ccaSGordon Tetlow 	default:
8652726a701SXin LI 		return FILE_BADSIZE;
86640427ccaSGordon Tetlow 	}
86740427ccaSGordon Tetlow }
86840427ccaSGordon Tetlow 
869b6cee71dSXin LI /*
870b6cee71dSXin LI  * Get weight of this magic entry, for sorting purposes.
871b6cee71dSXin LI  */
872b6cee71dSXin LI private size_t
873b6cee71dSXin LI apprentice_magic_strength(const struct magic *m)
874b6cee71dSXin LI {
87548c779cdSXin LI #define MULT 10U
87648c779cdSXin LI 	size_t ts, v;
87748c779cdSXin LI 	ssize_t val = 2 * MULT;	/* baseline strength */
878b6cee71dSXin LI 
879b6cee71dSXin LI 	switch (m->type) {
880b6cee71dSXin LI 	case FILE_DEFAULT:	/* make sure this sorts last */
881b6cee71dSXin LI 		if (m->factor_op != FILE_FACTOR_OP_NONE)
882b6cee71dSXin LI 			abort();
883b6cee71dSXin LI 		return 0;
884b6cee71dSXin LI 
885b6cee71dSXin LI 	case FILE_BYTE:
886b6cee71dSXin LI 	case FILE_SHORT:
887b6cee71dSXin LI 	case FILE_LESHORT:
888b6cee71dSXin LI 	case FILE_BESHORT:
889b6cee71dSXin LI 	case FILE_LONG:
890b6cee71dSXin LI 	case FILE_LELONG:
891b6cee71dSXin LI 	case FILE_BELONG:
892b6cee71dSXin LI 	case FILE_MELONG:
89340427ccaSGordon Tetlow 	case FILE_DATE:
89440427ccaSGordon Tetlow 	case FILE_LEDATE:
89540427ccaSGordon Tetlow 	case FILE_BEDATE:
89640427ccaSGordon Tetlow 	case FILE_MEDATE:
89740427ccaSGordon Tetlow 	case FILE_LDATE:
89840427ccaSGordon Tetlow 	case FILE_LELDATE:
89940427ccaSGordon Tetlow 	case FILE_BELDATE:
90040427ccaSGordon Tetlow 	case FILE_MELDATE:
90140427ccaSGordon Tetlow 	case FILE_FLOAT:
90240427ccaSGordon Tetlow 	case FILE_BEFLOAT:
90340427ccaSGordon Tetlow 	case FILE_LEFLOAT:
90440427ccaSGordon Tetlow 	case FILE_QUAD:
90540427ccaSGordon Tetlow 	case FILE_BEQUAD:
90640427ccaSGordon Tetlow 	case FILE_LEQUAD:
90740427ccaSGordon Tetlow 	case FILE_QDATE:
90840427ccaSGordon Tetlow 	case FILE_LEQDATE:
90940427ccaSGordon Tetlow 	case FILE_BEQDATE:
91040427ccaSGordon Tetlow 	case FILE_QLDATE:
91140427ccaSGordon Tetlow 	case FILE_LEQLDATE:
91240427ccaSGordon Tetlow 	case FILE_BEQLDATE:
91340427ccaSGordon Tetlow 	case FILE_QWDATE:
91440427ccaSGordon Tetlow 	case FILE_LEQWDATE:
91540427ccaSGordon Tetlow 	case FILE_BEQWDATE:
91640427ccaSGordon Tetlow 	case FILE_DOUBLE:
91740427ccaSGordon Tetlow 	case FILE_BEDOUBLE:
91840427ccaSGordon Tetlow 	case FILE_LEDOUBLE:
919*43a5ec4eSXin LI 	case FILE_BEVARINT:
920*43a5ec4eSXin LI 	case FILE_LEVARINT:
9212726a701SXin LI 	case FILE_GUID:
9222726a701SXin LI 	case FILE_OFFSET:
92340427ccaSGordon Tetlow 		ts = typesize(m->type);
9242726a701SXin LI 		if (ts == FILE_BADSIZE)
92540427ccaSGordon Tetlow 			abort();
92640427ccaSGordon Tetlow 		val += ts * MULT;
927b6cee71dSXin LI 		break;
928b6cee71dSXin LI 
929b6cee71dSXin LI 	case FILE_PSTRING:
930b6cee71dSXin LI 	case FILE_STRING:
931b6cee71dSXin LI 		val += m->vallen * MULT;
932b6cee71dSXin LI 		break;
933b6cee71dSXin LI 
934b6cee71dSXin LI 	case FILE_BESTRING16:
935b6cee71dSXin LI 	case FILE_LESTRING16:
936b6cee71dSXin LI 		val += m->vallen * MULT / 2;
937b6cee71dSXin LI 		break;
938b6cee71dSXin LI 
939b6cee71dSXin LI 	case FILE_SEARCH:
94048c779cdSXin LI 		if (m->vallen == 0)
94148c779cdSXin LI 			break;
942b6cee71dSXin LI 		val += m->vallen * MAX(MULT / m->vallen, 1);
943b6cee71dSXin LI 		break;
944b6cee71dSXin LI 
945b6cee71dSXin LI 	case FILE_REGEX:
946b6cee71dSXin LI 		v = nonmagic(m->value.s);
947b6cee71dSXin LI 		val += v * MAX(MULT / v, 1);
948b6cee71dSXin LI 		break;
949b6cee71dSXin LI 
950b6cee71dSXin LI 	case FILE_INDIRECT:
951b6cee71dSXin LI 	case FILE_NAME:
952b6cee71dSXin LI 	case FILE_USE:
953b6cee71dSXin LI 		break;
954b6cee71dSXin LI 
9553e41d09dSXin LI 	case FILE_DER:
9563e41d09dSXin LI 		val += MULT;
9573e41d09dSXin LI 		break;
9583e41d09dSXin LI 
959b6cee71dSXin LI 	default:
960b6cee71dSXin LI 		(void)fprintf(stderr, "Bad type %d\n", m->type);
961b6cee71dSXin LI 		abort();
962b6cee71dSXin LI 	}
963b6cee71dSXin LI 
964b6cee71dSXin LI 	switch (m->reln) {
965b6cee71dSXin LI 	case 'x':	/* matches anything penalize */
966b6cee71dSXin LI 	case '!':       /* matches almost anything penalize */
967b6cee71dSXin LI 		val = 0;
968b6cee71dSXin LI 		break;
969b6cee71dSXin LI 
970b6cee71dSXin LI 	case '=':	/* Exact match, prefer */
971b6cee71dSXin LI 		val += MULT;
972b6cee71dSXin LI 		break;
973b6cee71dSXin LI 
974b6cee71dSXin LI 	case '>':
975b6cee71dSXin LI 	case '<':	/* comparison match reduce strength */
976b6cee71dSXin LI 		val -= 2 * MULT;
977b6cee71dSXin LI 		break;
978b6cee71dSXin LI 
979b6cee71dSXin LI 	case '^':
980b6cee71dSXin LI 	case '&':	/* masking bits, we could count them too */
981b6cee71dSXin LI 		val -= MULT;
982b6cee71dSXin LI 		break;
983b6cee71dSXin LI 
984b6cee71dSXin LI 	default:
985b6cee71dSXin LI 		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
986b6cee71dSXin LI 		abort();
987b6cee71dSXin LI 	}
988b6cee71dSXin LI 
989b6cee71dSXin LI 	switch (m->factor_op) {
990b6cee71dSXin LI 	case FILE_FACTOR_OP_NONE:
991b6cee71dSXin LI 		break;
992b6cee71dSXin LI 	case FILE_FACTOR_OP_PLUS:
993b6cee71dSXin LI 		val += m->factor;
994b6cee71dSXin LI 		break;
995b6cee71dSXin LI 	case FILE_FACTOR_OP_MINUS:
996b6cee71dSXin LI 		val -= m->factor;
997b6cee71dSXin LI 		break;
998b6cee71dSXin LI 	case FILE_FACTOR_OP_TIMES:
999b6cee71dSXin LI 		val *= m->factor;
1000b6cee71dSXin LI 		break;
1001b6cee71dSXin LI 	case FILE_FACTOR_OP_DIV:
1002b6cee71dSXin LI 		val /= m->factor;
1003b6cee71dSXin LI 		break;
1004b6cee71dSXin LI 	default:
1005b6cee71dSXin LI 		abort();
1006b6cee71dSXin LI 	}
1007b6cee71dSXin LI 
100848c779cdSXin LI 	if (val <= 0)	/* ensure we only return 0 for FILE_DEFAULT */
100948c779cdSXin LI 		val = 1;
101048c779cdSXin LI 
1011b6cee71dSXin LI 	/*
1012b6cee71dSXin LI 	 * Magic entries with no description get a bonus because they depend
1013b6cee71dSXin LI 	 * on subsequent magic entries to print something.
1014b6cee71dSXin LI 	 */
1015b6cee71dSXin LI 	if (m->desc[0] == '\0')
1016b6cee71dSXin LI 		val++;
1017b6cee71dSXin LI 	return val;
1018b6cee71dSXin LI }
1019b6cee71dSXin LI 
1020b6cee71dSXin LI /*
1021b6cee71dSXin LI  * Sort callback for sorting entries by "strength" (basically length)
1022b6cee71dSXin LI  */
1023b6cee71dSXin LI private int
1024b6cee71dSXin LI apprentice_sort(const void *a, const void *b)
1025b6cee71dSXin LI {
1026b6cee71dSXin LI 	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
1027b6cee71dSXin LI 	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1028b6cee71dSXin LI 	size_t sa = apprentice_magic_strength(ma->mp);
1029b6cee71dSXin LI 	size_t sb = apprentice_magic_strength(mb->mp);
1030b6cee71dSXin LI 	if (sa == sb)
1031b6cee71dSXin LI 		return 0;
1032b6cee71dSXin LI 	else if (sa > sb)
1033b6cee71dSXin LI 		return -1;
1034b6cee71dSXin LI 	else
1035b6cee71dSXin LI 		return 1;
1036b6cee71dSXin LI }
1037b6cee71dSXin LI 
1038b6cee71dSXin LI /*
1039b6cee71dSXin LI  * Shows sorted patterns list in the order which is used for the matching
1040b6cee71dSXin LI  */
1041b6cee71dSXin LI private void
1042b6cee71dSXin LI apprentice_list(struct mlist *mlist, int mode)
1043b6cee71dSXin LI {
1044b6cee71dSXin LI 	uint32_t magindex = 0;
1045b6cee71dSXin LI 	struct mlist *ml;
1046b6cee71dSXin LI 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
1047b6cee71dSXin LI 		for (magindex = 0; magindex < ml->nmagic; magindex++) {
1048b6cee71dSXin LI 			struct magic *m = &ml->magic[magindex];
1049b6cee71dSXin LI 			if ((m->flag & mode) != mode) {
1050b6cee71dSXin LI 				/* Skip sub-tests */
1051b6cee71dSXin LI 				while (magindex + 1 < ml->nmagic &&
1052b6cee71dSXin LI 				       ml->magic[magindex + 1].cont_level != 0)
1053b6cee71dSXin LI 					++magindex;
1054b6cee71dSXin LI 				continue; /* Skip to next top-level test*/
1055b6cee71dSXin LI 			}
1056b6cee71dSXin LI 
1057b6cee71dSXin LI 			/*
1058b6cee71dSXin LI 			 * Try to iterate over the tree until we find item with
1059b6cee71dSXin LI 			 * description/mimetype.
1060b6cee71dSXin LI 			 */
1061b6cee71dSXin LI 			while (magindex + 1 < ml->nmagic &&
1062b6cee71dSXin LI 			       ml->magic[magindex + 1].cont_level != 0 &&
1063b6cee71dSXin LI 			       *ml->magic[magindex].desc == '\0' &&
1064b6cee71dSXin LI 			       *ml->magic[magindex].mimetype == '\0')
1065b6cee71dSXin LI 				magindex++;
1066b6cee71dSXin LI 
10675f0216bdSXin LI 			printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
1068b6cee71dSXin LI 			    apprentice_magic_strength(m),
10695f0216bdSXin LI 			    ml->magic[magindex].lineno,
1070b6cee71dSXin LI 			    ml->magic[magindex].desc,
1071b6cee71dSXin LI 			    ml->magic[magindex].mimetype);
1072b6cee71dSXin LI 		}
1073b6cee71dSXin LI 	}
1074b6cee71dSXin LI }
1075b6cee71dSXin LI 
1076b6cee71dSXin LI private void
1077b6cee71dSXin LI set_test_type(struct magic *mstart, struct magic *m)
1078b6cee71dSXin LI {
1079b6cee71dSXin LI 	switch (m->type) {
1080b6cee71dSXin LI 	case FILE_BYTE:
1081b6cee71dSXin LI 	case FILE_SHORT:
1082b6cee71dSXin LI 	case FILE_LONG:
1083b6cee71dSXin LI 	case FILE_DATE:
1084b6cee71dSXin LI 	case FILE_BESHORT:
1085b6cee71dSXin LI 	case FILE_BELONG:
1086b6cee71dSXin LI 	case FILE_BEDATE:
1087b6cee71dSXin LI 	case FILE_LESHORT:
1088b6cee71dSXin LI 	case FILE_LELONG:
1089b6cee71dSXin LI 	case FILE_LEDATE:
1090b6cee71dSXin LI 	case FILE_LDATE:
1091b6cee71dSXin LI 	case FILE_BELDATE:
1092b6cee71dSXin LI 	case FILE_LELDATE:
1093b6cee71dSXin LI 	case FILE_MEDATE:
1094b6cee71dSXin LI 	case FILE_MELDATE:
1095b6cee71dSXin LI 	case FILE_MELONG:
1096b6cee71dSXin LI 	case FILE_QUAD:
1097b6cee71dSXin LI 	case FILE_LEQUAD:
1098b6cee71dSXin LI 	case FILE_BEQUAD:
1099b6cee71dSXin LI 	case FILE_QDATE:
1100b6cee71dSXin LI 	case FILE_LEQDATE:
1101b6cee71dSXin LI 	case FILE_BEQDATE:
1102b6cee71dSXin LI 	case FILE_QLDATE:
1103b6cee71dSXin LI 	case FILE_LEQLDATE:
1104b6cee71dSXin LI 	case FILE_BEQLDATE:
1105b6cee71dSXin LI 	case FILE_QWDATE:
1106b6cee71dSXin LI 	case FILE_LEQWDATE:
1107b6cee71dSXin LI 	case FILE_BEQWDATE:
1108b6cee71dSXin LI 	case FILE_FLOAT:
1109b6cee71dSXin LI 	case FILE_BEFLOAT:
1110b6cee71dSXin LI 	case FILE_LEFLOAT:
1111b6cee71dSXin LI 	case FILE_DOUBLE:
1112b6cee71dSXin LI 	case FILE_BEDOUBLE:
1113b6cee71dSXin LI 	case FILE_LEDOUBLE:
1114*43a5ec4eSXin LI 	case FILE_BEVARINT:
1115*43a5ec4eSXin LI 	case FILE_LEVARINT:
11163e41d09dSXin LI 	case FILE_DER:
11172726a701SXin LI 	case FILE_GUID:
11182726a701SXin LI 	case FILE_OFFSET:
1119b6cee71dSXin LI 		mstart->flag |= BINTEST;
1120b6cee71dSXin LI 		break;
1121b6cee71dSXin LI 	case FILE_STRING:
1122b6cee71dSXin LI 	case FILE_PSTRING:
1123b6cee71dSXin LI 	case FILE_BESTRING16:
1124b6cee71dSXin LI 	case FILE_LESTRING16:
1125b6cee71dSXin LI 		/* Allow text overrides */
1126b6cee71dSXin LI 		if (mstart->str_flags & STRING_TEXTTEST)
1127b6cee71dSXin LI 			mstart->flag |= TEXTTEST;
1128b6cee71dSXin LI 		else
1129b6cee71dSXin LI 			mstart->flag |= BINTEST;
1130b6cee71dSXin LI 		break;
1131b6cee71dSXin LI 	case FILE_REGEX:
1132b6cee71dSXin LI 	case FILE_SEARCH:
1133b6cee71dSXin LI 		/* Check for override */
1134b6cee71dSXin LI 		if (mstart->str_flags & STRING_BINTEST)
1135b6cee71dSXin LI 			mstart->flag |= BINTEST;
1136b6cee71dSXin LI 		if (mstart->str_flags & STRING_TEXTTEST)
1137b6cee71dSXin LI 			mstart->flag |= TEXTTEST;
1138b6cee71dSXin LI 
1139b6cee71dSXin LI 		if (mstart->flag & (TEXTTEST|BINTEST))
1140b6cee71dSXin LI 			break;
1141b6cee71dSXin LI 
1142b6cee71dSXin LI 		/* binary test if pattern is not text */
114348c779cdSXin LI 		if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL,
1144b6cee71dSXin LI 		    NULL) <= 0)
1145b6cee71dSXin LI 			mstart->flag |= BINTEST;
1146b6cee71dSXin LI 		else
1147b6cee71dSXin LI 			mstart->flag |= TEXTTEST;
1148b6cee71dSXin LI 		break;
1149b6cee71dSXin LI 	case FILE_DEFAULT:
1150b6cee71dSXin LI 		/* can't deduce anything; we shouldn't see this at the
1151b6cee71dSXin LI 		   top level anyway */
1152b6cee71dSXin LI 		break;
1153b6cee71dSXin LI 	case FILE_INVALID:
1154b6cee71dSXin LI 	default:
1155b6cee71dSXin LI 		/* invalid search type, but no need to complain here */
1156b6cee71dSXin LI 		break;
1157b6cee71dSXin LI 	}
1158b6cee71dSXin LI }
1159b6cee71dSXin LI 
1160b6cee71dSXin LI private int
1161b6cee71dSXin LI addentry(struct magic_set *ms, struct magic_entry *me,
1162b6cee71dSXin LI    struct magic_entry_set *mset)
1163b6cee71dSXin LI {
1164b6cee71dSXin LI 	size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1165b6cee71dSXin LI 	if (mset[i].count == mset[i].max) {
1166b6cee71dSXin LI 		struct magic_entry *mp;
1167b6cee71dSXin LI 
1168b6cee71dSXin LI 		mset[i].max += ALLOC_INCR;
1169b6cee71dSXin LI 		if ((mp = CAST(struct magic_entry *,
1170b6cee71dSXin LI 		    realloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
1171b6cee71dSXin LI 		    NULL) {
1172b6cee71dSXin LI 			file_oomem(ms, sizeof(*mp) * mset[i].max);
1173b6cee71dSXin LI 			return -1;
1174b6cee71dSXin LI 		}
1175b6cee71dSXin LI 		(void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1176b6cee71dSXin LI 		    ALLOC_INCR);
1177b6cee71dSXin LI 		mset[i].me = mp;
1178b6cee71dSXin LI 	}
1179b6cee71dSXin LI 	mset[i].me[mset[i].count++] = *me;
1180b6cee71dSXin LI 	memset(me, 0, sizeof(*me));
1181b6cee71dSXin LI 	return 0;
1182b6cee71dSXin LI }
1183b6cee71dSXin LI 
1184b6cee71dSXin LI /*
1185b6cee71dSXin LI  * Load and parse one file.
1186b6cee71dSXin LI  */
1187b6cee71dSXin LI private void
1188b6cee71dSXin LI load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1189b6cee71dSXin LI    struct magic_entry_set *mset)
1190b6cee71dSXin LI {
1191b6cee71dSXin LI 	size_t lineno = 0, llen = 0;
1192b6cee71dSXin LI 	char *line = NULL;
1193b6cee71dSXin LI 	ssize_t len;
1194b6cee71dSXin LI 	struct magic_entry me;
1195b6cee71dSXin LI 
1196b6cee71dSXin LI 	FILE *f = fopen(ms->file = fn, "r");
1197b6cee71dSXin LI 	if (f == NULL) {
1198b6cee71dSXin LI 		if (errno != ENOENT)
1199b6cee71dSXin LI 			file_error(ms, errno, "cannot read magic file `%s'",
1200b6cee71dSXin LI 				   fn);
1201b6cee71dSXin LI 		(*errs)++;
1202b6cee71dSXin LI 		return;
1203b6cee71dSXin LI 	}
1204b6cee71dSXin LI 
1205b6cee71dSXin LI 	memset(&me, 0, sizeof(me));
1206b6cee71dSXin LI 	/* read and parse this file */
1207b6cee71dSXin LI 	for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
1208b6cee71dSXin LI 	    ms->line++) {
1209b6cee71dSXin LI 		if (len == 0) /* null line, garbage, etc */
1210b6cee71dSXin LI 			continue;
1211b6cee71dSXin LI 		if (line[len - 1] == '\n') {
1212b6cee71dSXin LI 			lineno++;
1213b6cee71dSXin LI 			line[len - 1] = '\0'; /* delete newline */
1214b6cee71dSXin LI 		}
1215b6cee71dSXin LI 		switch (line[0]) {
1216b6cee71dSXin LI 		case '\0':	/* empty, do not parse */
1217b6cee71dSXin LI 		case '#':	/* comment, do not parse */
1218b6cee71dSXin LI 			continue;
1219b6cee71dSXin LI 		case '!':
1220b6cee71dSXin LI 			if (line[1] == ':') {
1221b6cee71dSXin LI 				size_t i;
1222b6cee71dSXin LI 
1223b6cee71dSXin LI 				for (i = 0; bang[i].name != NULL; i++) {
122448c779cdSXin LI 					if (CAST(size_t, len - 2) > bang[i].len &&
1225b6cee71dSXin LI 					    memcmp(bang[i].name, line + 2,
1226b6cee71dSXin LI 					    bang[i].len) == 0)
1227b6cee71dSXin LI 						break;
1228b6cee71dSXin LI 				}
1229b6cee71dSXin LI 				if (bang[i].name == NULL) {
1230b6cee71dSXin LI 					file_error(ms, 0,
1231b6cee71dSXin LI 					    "Unknown !: entry `%s'", line);
1232b6cee71dSXin LI 					(*errs)++;
1233b6cee71dSXin LI 					continue;
1234b6cee71dSXin LI 				}
1235b6cee71dSXin LI 				if (me.mp == NULL) {
1236b6cee71dSXin LI 					file_error(ms, 0,
1237b6cee71dSXin LI 					    "No current entry for :!%s type",
1238b6cee71dSXin LI 						bang[i].name);
1239b6cee71dSXin LI 					(*errs)++;
1240b6cee71dSXin LI 					continue;
1241b6cee71dSXin LI 				}
1242b6cee71dSXin LI 				if ((*bang[i].fun)(ms, &me,
12432726a701SXin LI 				    line + bang[i].len + 2,
12442726a701SXin LI 				    len - bang[i].len - 2) != 0) {
1245b6cee71dSXin LI 					(*errs)++;
1246b6cee71dSXin LI 					continue;
1247b6cee71dSXin LI 				}
1248b6cee71dSXin LI 				continue;
1249b6cee71dSXin LI 			}
1250b6cee71dSXin LI 			/*FALLTHROUGH*/
1251b6cee71dSXin LI 		default:
1252b6cee71dSXin LI 		again:
1253b6cee71dSXin LI 			switch (parse(ms, &me, line, lineno, action)) {
1254b6cee71dSXin LI 			case 0:
1255b6cee71dSXin LI 				continue;
1256b6cee71dSXin LI 			case 1:
1257b6cee71dSXin LI 				(void)addentry(ms, &me, mset);
1258b6cee71dSXin LI 				goto again;
1259b6cee71dSXin LI 			default:
1260b6cee71dSXin LI 				(*errs)++;
1261b6cee71dSXin LI 				break;
1262b6cee71dSXin LI 			}
1263b6cee71dSXin LI 		}
1264b6cee71dSXin LI 	}
1265b6cee71dSXin LI 	if (me.mp)
1266b6cee71dSXin LI 		(void)addentry(ms, &me, mset);
1267b6cee71dSXin LI 	free(line);
1268b6cee71dSXin LI 	(void)fclose(f);
1269b6cee71dSXin LI }
1270b6cee71dSXin LI 
1271b6cee71dSXin LI /*
1272b6cee71dSXin LI  * parse a file or directory of files
1273b6cee71dSXin LI  * const char *fn: name of magic file or directory
1274b6cee71dSXin LI  */
1275b6cee71dSXin LI private int
1276b6cee71dSXin LI cmpstrp(const void *p1, const void *p2)
1277b6cee71dSXin LI {
127848c779cdSXin LI         return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2));
1279b6cee71dSXin LI }
1280b6cee71dSXin LI 
1281b6cee71dSXin LI 
1282b6cee71dSXin LI private uint32_t
1283b6cee71dSXin LI set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1284b6cee71dSXin LI     uint32_t starttest)
1285b6cee71dSXin LI {
1286b6cee71dSXin LI 	static const char text[] = "text";
1287b6cee71dSXin LI 	static const char binary[] = "binary";
1288b6cee71dSXin LI 	static const size_t len = sizeof(text);
1289b6cee71dSXin LI 
1290b6cee71dSXin LI 	uint32_t i = starttest;
1291b6cee71dSXin LI 
1292b6cee71dSXin LI 	do {
1293b6cee71dSXin LI 		set_test_type(me[starttest].mp, me[i].mp);
1294b6cee71dSXin LI 		if ((ms->flags & MAGIC_DEBUG) == 0)
1295b6cee71dSXin LI 			continue;
1296b6cee71dSXin LI 		(void)fprintf(stderr, "%s%s%s: %s\n",
1297b6cee71dSXin LI 		    me[i].mp->mimetype,
1298b6cee71dSXin LI 		    me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1299b6cee71dSXin LI 		    me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1300b6cee71dSXin LI 		    me[i].mp->flag & BINTEST ? binary : text);
1301b6cee71dSXin LI 		if (me[i].mp->flag & BINTEST) {
1302b6cee71dSXin LI 			char *p = strstr(me[i].mp->desc, text);
1303b6cee71dSXin LI 			if (p && (p == me[i].mp->desc ||
130448c779cdSXin LI 			    isspace(CAST(unsigned char, p[-1]))) &&
1305b6cee71dSXin LI 			    (p + len - me[i].mp->desc == MAXstring
1306b6cee71dSXin LI 			    || (p[len] == '\0' ||
130748c779cdSXin LI 			    isspace(CAST(unsigned char, p[len])))))
1308b6cee71dSXin LI 				(void)fprintf(stderr, "*** Possible "
1309b6cee71dSXin LI 				    "binary test for text type\n");
1310b6cee71dSXin LI 		}
1311b6cee71dSXin LI 	} while (++i < nme && me[i].mp->cont_level != 0);
1312b6cee71dSXin LI 	return i;
1313b6cee71dSXin LI }
1314b6cee71dSXin LI 
1315b6cee71dSXin LI private void
1316b6cee71dSXin LI set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1317b6cee71dSXin LI {
1318b6cee71dSXin LI 	uint32_t i;
1319b6cee71dSXin LI 	for (i = 0; i < nme; i++) {
1320b6cee71dSXin LI 		if (me[i].mp->cont_level == 0 &&
1321b6cee71dSXin LI 		    me[i].mp->type == FILE_DEFAULT) {
1322b6cee71dSXin LI 			while (++i < nme)
1323b6cee71dSXin LI 				if (me[i].mp->cont_level == 0)
1324b6cee71dSXin LI 					break;
1325b6cee71dSXin LI 			if (i != nme) {
1326b6cee71dSXin LI 				/* XXX - Ugh! */
1327b6cee71dSXin LI 				ms->line = me[i].mp->lineno;
1328b6cee71dSXin LI 				file_magwarn(ms,
1329b6cee71dSXin LI 				    "level 0 \"default\" did not sort last");
1330b6cee71dSXin LI 			}
1331b6cee71dSXin LI 			return;
1332b6cee71dSXin LI 		}
1333b6cee71dSXin LI 	}
1334b6cee71dSXin LI }
1335b6cee71dSXin LI 
1336b6cee71dSXin LI private int
1337b6cee71dSXin LI coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1338b6cee71dSXin LI     struct magic **ma, uint32_t *nma)
1339b6cee71dSXin LI {
1340b6cee71dSXin LI 	uint32_t i, mentrycount = 0;
1341b6cee71dSXin LI 	size_t slen;
1342b6cee71dSXin LI 
1343b6cee71dSXin LI 	for (i = 0; i < nme; i++)
1344b6cee71dSXin LI 		mentrycount += me[i].cont_count;
1345b6cee71dSXin LI 
1346*43a5ec4eSXin LI 	if (mentrycount == 0) {
1347*43a5ec4eSXin LI 		*ma = NULL;
1348*43a5ec4eSXin LI 		*nma = 0;
1349*43a5ec4eSXin LI 		return 0;
1350*43a5ec4eSXin LI 	}
1351*43a5ec4eSXin LI 
1352b6cee71dSXin LI 	slen = sizeof(**ma) * mentrycount;
1353b6cee71dSXin LI 	if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) {
1354b6cee71dSXin LI 		file_oomem(ms, slen);
1355b6cee71dSXin LI 		return -1;
1356b6cee71dSXin LI 	}
1357b6cee71dSXin LI 
1358b6cee71dSXin LI 	mentrycount = 0;
1359b6cee71dSXin LI 	for (i = 0; i < nme; i++) {
1360b6cee71dSXin LI 		(void)memcpy(*ma + mentrycount, me[i].mp,
1361b6cee71dSXin LI 		    me[i].cont_count * sizeof(**ma));
1362b6cee71dSXin LI 		mentrycount += me[i].cont_count;
1363b6cee71dSXin LI 	}
1364b6cee71dSXin LI 	*nma = mentrycount;
1365b6cee71dSXin LI 	return 0;
1366b6cee71dSXin LI }
1367b6cee71dSXin LI 
1368b6cee71dSXin LI private void
1369b6cee71dSXin LI magic_entry_free(struct magic_entry *me, uint32_t nme)
1370b6cee71dSXin LI {
1371b6cee71dSXin LI 	uint32_t i;
1372b6cee71dSXin LI 	if (me == NULL)
1373b6cee71dSXin LI 		return;
1374b6cee71dSXin LI 	for (i = 0; i < nme; i++)
1375b6cee71dSXin LI 		free(me[i].mp);
1376b6cee71dSXin LI 	free(me);
1377b6cee71dSXin LI }
1378b6cee71dSXin LI 
1379b6cee71dSXin LI private struct magic_map *
1380b6cee71dSXin LI apprentice_load(struct magic_set *ms, const char *fn, int action)
1381b6cee71dSXin LI {
1382b6cee71dSXin LI 	int errs = 0;
1383b6cee71dSXin LI 	uint32_t i, j;
1384b6cee71dSXin LI 	size_t files = 0, maxfiles = 0;
1385b6cee71dSXin LI 	char **filearr = NULL, *mfn;
1386b6cee71dSXin LI 	struct stat st;
1387b6cee71dSXin LI 	struct magic_map *map;
1388b6cee71dSXin LI 	struct magic_entry_set mset[MAGIC_SETS];
1389b6cee71dSXin LI 	DIR *dir;
1390b6cee71dSXin LI 	struct dirent *d;
1391b6cee71dSXin LI 
1392b6cee71dSXin LI 	memset(mset, 0, sizeof(mset));
1393b6cee71dSXin LI 	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
1394b6cee71dSXin LI 
1395b6cee71dSXin LI 
1396b6cee71dSXin LI 	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL)
1397b6cee71dSXin LI 	{
1398b6cee71dSXin LI 		file_oomem(ms, sizeof(*map));
1399b6cee71dSXin LI 		return NULL;
1400b6cee71dSXin LI 	}
14019ce06829SXin LI 	map->type = MAP_TYPE_MALLOC;
1402b6cee71dSXin LI 
1403b6cee71dSXin LI 	/* print silly verbose header for USG compat. */
1404b6cee71dSXin LI 	if (action == FILE_CHECK)
1405b6cee71dSXin LI 		(void)fprintf(stderr, "%s\n", usg_hdr);
1406b6cee71dSXin LI 
1407b6cee71dSXin LI 	/* load directory or file */
1408b6cee71dSXin LI 	if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1409b6cee71dSXin LI 		dir = opendir(fn);
1410b6cee71dSXin LI 		if (!dir) {
1411b6cee71dSXin LI 			errs++;
1412b6cee71dSXin LI 			goto out;
1413b6cee71dSXin LI 		}
1414b6cee71dSXin LI 		while ((d = readdir(dir)) != NULL) {
141540427ccaSGordon Tetlow 			if (d->d_name[0] == '.')
141640427ccaSGordon Tetlow 				continue;
1417b6cee71dSXin LI 			if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
1418b6cee71dSXin LI 				file_oomem(ms,
1419b6cee71dSXin LI 				    strlen(fn) + strlen(d->d_name) + 2);
1420b6cee71dSXin LI 				errs++;
1421b6cee71dSXin LI 				closedir(dir);
1422b6cee71dSXin LI 				goto out;
1423b6cee71dSXin LI 			}
1424b6cee71dSXin LI 			if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1425b6cee71dSXin LI 				free(mfn);
1426b6cee71dSXin LI 				continue;
1427b6cee71dSXin LI 			}
1428b6cee71dSXin LI 			if (files >= maxfiles) {
1429b6cee71dSXin LI 				size_t mlen;
14302726a701SXin LI 				char **nfilearr;
1431b6cee71dSXin LI 				maxfiles = (maxfiles + 1) * 2;
1432b6cee71dSXin LI 				mlen = maxfiles * sizeof(*filearr);
14332726a701SXin LI 				if ((nfilearr = CAST(char **,
1434b6cee71dSXin LI 				    realloc(filearr, mlen))) == NULL) {
1435b6cee71dSXin LI 					file_oomem(ms, mlen);
1436b6cee71dSXin LI 					free(mfn);
1437b6cee71dSXin LI 					closedir(dir);
1438b6cee71dSXin LI 					errs++;
1439b6cee71dSXin LI 					goto out;
1440b6cee71dSXin LI 				}
14412726a701SXin LI 				filearr = nfilearr;
1442b6cee71dSXin LI 			}
1443b6cee71dSXin LI 			filearr[files++] = mfn;
1444b6cee71dSXin LI 		}
1445b6cee71dSXin LI 		closedir(dir);
144648c779cdSXin LI 		if (filearr) {
1447b6cee71dSXin LI 			qsort(filearr, files, sizeof(*filearr), cmpstrp);
1448b6cee71dSXin LI 			for (i = 0; i < files; i++) {
1449b6cee71dSXin LI 				load_1(ms, action, filearr[i], &errs, mset);
1450b6cee71dSXin LI 				free(filearr[i]);
1451b6cee71dSXin LI 			}
1452b6cee71dSXin LI 			free(filearr);
14532726a701SXin LI 			filearr = NULL;
145448c779cdSXin LI 		}
1455b6cee71dSXin LI 	} else
1456b6cee71dSXin LI 		load_1(ms, action, fn, &errs, mset);
1457b6cee71dSXin LI 	if (errs)
1458b6cee71dSXin LI 		goto out;
1459b6cee71dSXin LI 
1460b6cee71dSXin LI 	for (j = 0; j < MAGIC_SETS; j++) {
1461b6cee71dSXin LI 		/* Set types of tests */
1462b6cee71dSXin LI 		for (i = 0; i < mset[j].count; ) {
1463b6cee71dSXin LI 			if (mset[j].me[i].mp->cont_level != 0) {
1464b6cee71dSXin LI 				i++;
1465b6cee71dSXin LI 				continue;
1466b6cee71dSXin LI 			}
1467b6cee71dSXin LI 			i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1468b6cee71dSXin LI 		}
14699ce06829SXin LI 		if (mset[j].me)
1470b6cee71dSXin LI 			qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1471b6cee71dSXin LI 			    apprentice_sort);
1472b6cee71dSXin LI 
1473b6cee71dSXin LI 		/*
1474b6cee71dSXin LI 		 * Make sure that any level 0 "default" line is last
1475b6cee71dSXin LI 		 * (if one exists).
1476b6cee71dSXin LI 		 */
1477b6cee71dSXin LI 		set_last_default(ms, mset[j].me, mset[j].count);
1478b6cee71dSXin LI 
1479*43a5ec4eSXin LI 		/* coalesce per file arrays into a single one, if needed */
1480*43a5ec4eSXin LI 		if (mset[j].count == 0)
1481*43a5ec4eSXin LI 			continue;
1482*43a5ec4eSXin LI 
1483b6cee71dSXin LI 		if (coalesce_entries(ms, mset[j].me, mset[j].count,
1484b6cee71dSXin LI 		    &map->magic[j], &map->nmagic[j]) == -1) {
1485b6cee71dSXin LI 			errs++;
1486b6cee71dSXin LI 			goto out;
1487b6cee71dSXin LI 		}
1488b6cee71dSXin LI 	}
1489b6cee71dSXin LI 
1490b6cee71dSXin LI out:
14912726a701SXin LI 	free(filearr);
1492b6cee71dSXin LI 	for (j = 0; j < MAGIC_SETS; j++)
1493b6cee71dSXin LI 		magic_entry_free(mset[j].me, mset[j].count);
1494b6cee71dSXin LI 
1495b6cee71dSXin LI 	if (errs) {
1496b6cee71dSXin LI 		apprentice_unmap(map);
1497b6cee71dSXin LI 		return NULL;
1498b6cee71dSXin LI 	}
1499b6cee71dSXin LI 	return map;
1500b6cee71dSXin LI }
1501b6cee71dSXin LI 
1502b6cee71dSXin LI /*
1503b6cee71dSXin LI  * extend the sign bit if the comparison is to be signed
1504b6cee71dSXin LI  */
1505b6cee71dSXin LI protected uint64_t
1506b6cee71dSXin LI file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1507b6cee71dSXin LI {
1508b6cee71dSXin LI 	if (!(m->flag & UNSIGNED)) {
1509b6cee71dSXin LI 		switch(m->type) {
1510b6cee71dSXin LI 		/*
1511b6cee71dSXin LI 		 * Do not remove the casts below.  They are
1512b6cee71dSXin LI 		 * vital.  When later compared with the data,
1513b6cee71dSXin LI 		 * the sign extension must have happened.
1514b6cee71dSXin LI 		 */
1515b6cee71dSXin LI 		case FILE_BYTE:
151648c779cdSXin LI 			v = CAST(signed char,  v);
1517b6cee71dSXin LI 			break;
1518b6cee71dSXin LI 		case FILE_SHORT:
1519b6cee71dSXin LI 		case FILE_BESHORT:
1520b6cee71dSXin LI 		case FILE_LESHORT:
152148c779cdSXin LI 			v = CAST(short, v);
1522b6cee71dSXin LI 			break;
1523b6cee71dSXin LI 		case FILE_DATE:
1524b6cee71dSXin LI 		case FILE_BEDATE:
1525b6cee71dSXin LI 		case FILE_LEDATE:
1526b6cee71dSXin LI 		case FILE_MEDATE:
1527b6cee71dSXin LI 		case FILE_LDATE:
1528b6cee71dSXin LI 		case FILE_BELDATE:
1529b6cee71dSXin LI 		case FILE_LELDATE:
1530b6cee71dSXin LI 		case FILE_MELDATE:
1531b6cee71dSXin LI 		case FILE_LONG:
1532b6cee71dSXin LI 		case FILE_BELONG:
1533b6cee71dSXin LI 		case FILE_LELONG:
1534b6cee71dSXin LI 		case FILE_MELONG:
1535b6cee71dSXin LI 		case FILE_FLOAT:
1536b6cee71dSXin LI 		case FILE_BEFLOAT:
1537b6cee71dSXin LI 		case FILE_LEFLOAT:
153848c779cdSXin LI 			v = CAST(int32_t, v);
1539b6cee71dSXin LI 			break;
1540b6cee71dSXin LI 		case FILE_QUAD:
1541b6cee71dSXin LI 		case FILE_BEQUAD:
1542b6cee71dSXin LI 		case FILE_LEQUAD:
1543b6cee71dSXin LI 		case FILE_QDATE:
1544b6cee71dSXin LI 		case FILE_QLDATE:
1545b6cee71dSXin LI 		case FILE_QWDATE:
1546b6cee71dSXin LI 		case FILE_BEQDATE:
1547b6cee71dSXin LI 		case FILE_BEQLDATE:
1548b6cee71dSXin LI 		case FILE_BEQWDATE:
1549b6cee71dSXin LI 		case FILE_LEQDATE:
1550b6cee71dSXin LI 		case FILE_LEQLDATE:
1551b6cee71dSXin LI 		case FILE_LEQWDATE:
1552b6cee71dSXin LI 		case FILE_DOUBLE:
1553b6cee71dSXin LI 		case FILE_BEDOUBLE:
1554b6cee71dSXin LI 		case FILE_LEDOUBLE:
15552726a701SXin LI 		case FILE_OFFSET:
1556*43a5ec4eSXin LI 		case FILE_BEVARINT:
1557*43a5ec4eSXin LI 		case FILE_LEVARINT:
155848c779cdSXin LI 			v = CAST(int64_t, v);
1559b6cee71dSXin LI 			break;
1560b6cee71dSXin LI 		case FILE_STRING:
1561b6cee71dSXin LI 		case FILE_PSTRING:
1562b6cee71dSXin LI 		case FILE_BESTRING16:
1563b6cee71dSXin LI 		case FILE_LESTRING16:
1564b6cee71dSXin LI 		case FILE_REGEX:
1565b6cee71dSXin LI 		case FILE_SEARCH:
1566b6cee71dSXin LI 		case FILE_DEFAULT:
1567b6cee71dSXin LI 		case FILE_INDIRECT:
1568b6cee71dSXin LI 		case FILE_NAME:
1569b6cee71dSXin LI 		case FILE_USE:
1570b6cee71dSXin LI 		case FILE_CLEAR:
15713e41d09dSXin LI 		case FILE_DER:
15722726a701SXin LI 		case FILE_GUID:
1573b6cee71dSXin LI 			break;
1574b6cee71dSXin LI 		default:
1575b6cee71dSXin LI 			if (ms->flags & MAGIC_CHECK)
1576b6cee71dSXin LI 			    file_magwarn(ms, "cannot happen: m->type=%d\n",
1577b6cee71dSXin LI 				    m->type);
15782726a701SXin LI 			return FILE_BADSIZE;
1579b6cee71dSXin LI 		}
1580b6cee71dSXin LI 	}
1581b6cee71dSXin LI 	return v;
1582b6cee71dSXin LI }
1583b6cee71dSXin LI 
1584b6cee71dSXin LI private int
1585b6cee71dSXin LI string_modifier_check(struct magic_set *ms, struct magic *m)
1586b6cee71dSXin LI {
1587b6cee71dSXin LI 	if ((ms->flags & MAGIC_CHECK) == 0)
1588b6cee71dSXin LI 		return 0;
1589b6cee71dSXin LI 
1590b6cee71dSXin LI 	if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1591b6cee71dSXin LI 	    (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1592b6cee71dSXin LI 		file_magwarn(ms,
1593b6cee71dSXin LI 		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
1594b6cee71dSXin LI 		return -1;
1595b6cee71dSXin LI 	}
1596b6cee71dSXin LI 	switch (m->type) {
1597b6cee71dSXin LI 	case FILE_BESTRING16:
1598b6cee71dSXin LI 	case FILE_LESTRING16:
1599b6cee71dSXin LI 		if (m->str_flags != 0) {
1600b6cee71dSXin LI 			file_magwarn(ms,
1601b6cee71dSXin LI 			    "no modifiers allowed for 16-bit strings\n");
1602b6cee71dSXin LI 			return -1;
1603b6cee71dSXin LI 		}
1604b6cee71dSXin LI 		break;
1605b6cee71dSXin LI 	case FILE_STRING:
1606b6cee71dSXin LI 	case FILE_PSTRING:
1607b6cee71dSXin LI 		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1608b6cee71dSXin LI 			file_magwarn(ms,
1609b6cee71dSXin LI 			    "'/%c' only allowed on regex and search\n",
1610b6cee71dSXin LI 			    CHAR_REGEX_OFFSET_START);
1611b6cee71dSXin LI 			return -1;
1612b6cee71dSXin LI 		}
1613b6cee71dSXin LI 		break;
1614b6cee71dSXin LI 	case FILE_SEARCH:
1615b6cee71dSXin LI 		if (m->str_range == 0) {
1616b6cee71dSXin LI 			file_magwarn(ms,
1617b6cee71dSXin LI 			    "missing range; defaulting to %d\n",
1618b6cee71dSXin LI                             STRING_DEFAULT_RANGE);
1619b6cee71dSXin LI 			m->str_range = STRING_DEFAULT_RANGE;
1620b6cee71dSXin LI 			return -1;
1621b6cee71dSXin LI 		}
1622b6cee71dSXin LI 		break;
1623b6cee71dSXin LI 	case FILE_REGEX:
1624b6cee71dSXin LI 		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1625b6cee71dSXin LI 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1626b6cee71dSXin LI 			    CHAR_COMPACT_WHITESPACE);
1627b6cee71dSXin LI 			return -1;
1628b6cee71dSXin LI 		}
1629b6cee71dSXin LI 		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1630b6cee71dSXin LI 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1631b6cee71dSXin LI 			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1632b6cee71dSXin LI 			return -1;
1633b6cee71dSXin LI 		}
1634b6cee71dSXin LI 		break;
1635b6cee71dSXin LI 	default:
1636b6cee71dSXin LI 		file_magwarn(ms, "coding error: m->type=%d\n",
1637b6cee71dSXin LI 		    m->type);
1638b6cee71dSXin LI 		return -1;
1639b6cee71dSXin LI 	}
1640b6cee71dSXin LI 	return 0;
1641b6cee71dSXin LI }
1642b6cee71dSXin LI 
1643b6cee71dSXin LI private int
1644b6cee71dSXin LI get_op(char c)
1645b6cee71dSXin LI {
1646b6cee71dSXin LI 	switch (c) {
1647b6cee71dSXin LI 	case '&':
1648b6cee71dSXin LI 		return FILE_OPAND;
1649b6cee71dSXin LI 	case '|':
1650b6cee71dSXin LI 		return FILE_OPOR;
1651b6cee71dSXin LI 	case '^':
1652b6cee71dSXin LI 		return FILE_OPXOR;
1653b6cee71dSXin LI 	case '+':
1654b6cee71dSXin LI 		return FILE_OPADD;
1655b6cee71dSXin LI 	case '-':
1656b6cee71dSXin LI 		return FILE_OPMINUS;
1657b6cee71dSXin LI 	case '*':
1658b6cee71dSXin LI 		return FILE_OPMULTIPLY;
1659b6cee71dSXin LI 	case '/':
1660b6cee71dSXin LI 		return FILE_OPDIVIDE;
1661b6cee71dSXin LI 	case '%':
1662b6cee71dSXin LI 		return FILE_OPMODULO;
1663b6cee71dSXin LI 	default:
1664b6cee71dSXin LI 		return -1;
1665b6cee71dSXin LI 	}
1666b6cee71dSXin LI }
1667b6cee71dSXin LI 
1668b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS
1669b6cee71dSXin LI private int
1670b6cee71dSXin LI get_cond(const char *l, const char **t)
1671b6cee71dSXin LI {
1672b6cee71dSXin LI 	static const struct cond_tbl_s {
1673b6cee71dSXin LI 		char name[8];
1674b6cee71dSXin LI 		size_t len;
1675b6cee71dSXin LI 		int cond;
1676b6cee71dSXin LI 	} cond_tbl[] = {
1677b6cee71dSXin LI 		{ "if",		2,	COND_IF },
1678b6cee71dSXin LI 		{ "elif",	4,	COND_ELIF },
1679b6cee71dSXin LI 		{ "else",	4,	COND_ELSE },
1680b6cee71dSXin LI 		{ "",		0,	COND_NONE },
1681b6cee71dSXin LI 	};
1682b6cee71dSXin LI 	const struct cond_tbl_s *p;
1683b6cee71dSXin LI 
1684b6cee71dSXin LI 	for (p = cond_tbl; p->len; p++) {
1685b6cee71dSXin LI 		if (strncmp(l, p->name, p->len) == 0 &&
168648c779cdSXin LI 		    isspace(CAST(unsigned char, l[p->len]))) {
1687b6cee71dSXin LI 			if (t)
1688b6cee71dSXin LI 				*t = l + p->len;
1689b6cee71dSXin LI 			break;
1690b6cee71dSXin LI 		}
1691b6cee71dSXin LI 	}
1692b6cee71dSXin LI 	return p->cond;
1693b6cee71dSXin LI }
1694b6cee71dSXin LI 
1695b6cee71dSXin LI private int
1696b6cee71dSXin LI check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1697b6cee71dSXin LI {
1698b6cee71dSXin LI 	int last_cond;
1699b6cee71dSXin LI 	last_cond = ms->c.li[cont_level].last_cond;
1700b6cee71dSXin LI 
1701b6cee71dSXin LI 	switch (cond) {
1702b6cee71dSXin LI 	case COND_IF:
1703b6cee71dSXin LI 		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1704b6cee71dSXin LI 			if (ms->flags & MAGIC_CHECK)
1705b6cee71dSXin LI 				file_magwarn(ms, "syntax error: `if'");
1706b6cee71dSXin LI 			return -1;
1707b6cee71dSXin LI 		}
1708b6cee71dSXin LI 		last_cond = COND_IF;
1709b6cee71dSXin LI 		break;
1710b6cee71dSXin LI 
1711b6cee71dSXin LI 	case COND_ELIF:
1712b6cee71dSXin LI 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1713b6cee71dSXin LI 			if (ms->flags & MAGIC_CHECK)
1714b6cee71dSXin LI 				file_magwarn(ms, "syntax error: `elif'");
1715b6cee71dSXin LI 			return -1;
1716b6cee71dSXin LI 		}
1717b6cee71dSXin LI 		last_cond = COND_ELIF;
1718b6cee71dSXin LI 		break;
1719b6cee71dSXin LI 
1720b6cee71dSXin LI 	case COND_ELSE:
1721b6cee71dSXin LI 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1722b6cee71dSXin LI 			if (ms->flags & MAGIC_CHECK)
1723b6cee71dSXin LI 				file_magwarn(ms, "syntax error: `else'");
1724b6cee71dSXin LI 			return -1;
1725b6cee71dSXin LI 		}
1726b6cee71dSXin LI 		last_cond = COND_NONE;
1727b6cee71dSXin LI 		break;
1728b6cee71dSXin LI 
1729b6cee71dSXin LI 	case COND_NONE:
1730b6cee71dSXin LI 		last_cond = COND_NONE;
1731b6cee71dSXin LI 		break;
1732b6cee71dSXin LI 	}
1733b6cee71dSXin LI 
1734b6cee71dSXin LI 	ms->c.li[cont_level].last_cond = last_cond;
1735b6cee71dSXin LI 	return 0;
1736b6cee71dSXin LI }
1737b6cee71dSXin LI #endif /* ENABLE_CONDITIONALS */
1738b6cee71dSXin LI 
17394460e5b0SXin LI private int
17404460e5b0SXin LI parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
17414460e5b0SXin LI {
17424460e5b0SXin LI 	const char *l = *lp;
17434460e5b0SXin LI 
174448c779cdSXin LI 	while (!isspace(CAST(unsigned char, *++l)))
17454460e5b0SXin LI 		switch (*l) {
17464460e5b0SXin LI 		case CHAR_INDIRECT_RELATIVE:
17474460e5b0SXin LI 			m->str_flags |= INDIRECT_RELATIVE;
17484460e5b0SXin LI 			break;
17494460e5b0SXin LI 		default:
17504460e5b0SXin LI 			if (ms->flags & MAGIC_CHECK)
17514460e5b0SXin LI 				file_magwarn(ms, "indirect modifier `%c' "
17524460e5b0SXin LI 					"invalid", *l);
17534460e5b0SXin LI 			*lp = l;
17544460e5b0SXin LI 			return -1;
17554460e5b0SXin LI 		}
17564460e5b0SXin LI 	*lp = l;
17574460e5b0SXin LI 	return 0;
17584460e5b0SXin LI }
17594460e5b0SXin LI 
17604460e5b0SXin LI private void
17614460e5b0SXin LI parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
17624460e5b0SXin LI     int op)
17634460e5b0SXin LI {
17644460e5b0SXin LI 	const char *l = *lp;
17654460e5b0SXin LI 	char *t;
17664460e5b0SXin LI 	uint64_t val;
17674460e5b0SXin LI 
17684460e5b0SXin LI 	++l;
17694460e5b0SXin LI 	m->mask_op |= op;
177048c779cdSXin LI 	val = CAST(uint64_t, strtoull(l, &t, 0));
17714460e5b0SXin LI 	l = t;
17724460e5b0SXin LI 	m->num_mask = file_signextend(ms, m, val);
17734460e5b0SXin LI 	eatsize(&l);
17744460e5b0SXin LI 	*lp = l;
17754460e5b0SXin LI }
17764460e5b0SXin LI 
17774460e5b0SXin LI private int
17784460e5b0SXin LI parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
17794460e5b0SXin LI {
17804460e5b0SXin LI 	const char *l = *lp;
17814460e5b0SXin LI 	char *t;
17824460e5b0SXin LI 	int have_range = 0;
17834460e5b0SXin LI 
178448c779cdSXin LI 	while (!isspace(CAST(unsigned char, *++l))) {
17854460e5b0SXin LI 		switch (*l) {
17864460e5b0SXin LI 		case '0':  case '1':  case '2':
17874460e5b0SXin LI 		case '3':  case '4':  case '5':
17884460e5b0SXin LI 		case '6':  case '7':  case '8':
17894460e5b0SXin LI 		case '9':
17904460e5b0SXin LI 			if (have_range && (ms->flags & MAGIC_CHECK))
17914460e5b0SXin LI 				file_magwarn(ms, "multiple ranges");
17924460e5b0SXin LI 			have_range = 1;
17934460e5b0SXin LI 			m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
17944460e5b0SXin LI 			if (m->str_range == 0)
17954460e5b0SXin LI 				file_magwarn(ms, "zero range");
17964460e5b0SXin LI 			l = t - 1;
17974460e5b0SXin LI 			break;
17984460e5b0SXin LI 		case CHAR_COMPACT_WHITESPACE:
17994460e5b0SXin LI 			m->str_flags |= STRING_COMPACT_WHITESPACE;
18004460e5b0SXin LI 			break;
18014460e5b0SXin LI 		case CHAR_COMPACT_OPTIONAL_WHITESPACE:
18024460e5b0SXin LI 			m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
18034460e5b0SXin LI 			break;
18044460e5b0SXin LI 		case CHAR_IGNORE_LOWERCASE:
18054460e5b0SXin LI 			m->str_flags |= STRING_IGNORE_LOWERCASE;
18064460e5b0SXin LI 			break;
18074460e5b0SXin LI 		case CHAR_IGNORE_UPPERCASE:
18084460e5b0SXin LI 			m->str_flags |= STRING_IGNORE_UPPERCASE;
18094460e5b0SXin LI 			break;
18104460e5b0SXin LI 		case CHAR_REGEX_OFFSET_START:
18114460e5b0SXin LI 			m->str_flags |= REGEX_OFFSET_START;
18124460e5b0SXin LI 			break;
18134460e5b0SXin LI 		case CHAR_BINTEST:
18144460e5b0SXin LI 			m->str_flags |= STRING_BINTEST;
18154460e5b0SXin LI 			break;
18164460e5b0SXin LI 		case CHAR_TEXTTEST:
18174460e5b0SXin LI 			m->str_flags |= STRING_TEXTTEST;
18184460e5b0SXin LI 			break;
18194460e5b0SXin LI 		case CHAR_TRIM:
18204460e5b0SXin LI 			m->str_flags |= STRING_TRIM;
18214460e5b0SXin LI 			break;
1822*43a5ec4eSXin LI 		case CHAR_FULL_WORD:
1823*43a5ec4eSXin LI 			m->str_flags |= STRING_FULL_WORD;
1824*43a5ec4eSXin LI 			break;
18254460e5b0SXin LI 		case CHAR_PSTRING_1_LE:
18264460e5b0SXin LI #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
18274460e5b0SXin LI 			if (m->type != FILE_PSTRING)
18284460e5b0SXin LI 				goto bad;
18294460e5b0SXin LI 			SET_LENGTH(PSTRING_1_LE);
18304460e5b0SXin LI 			break;
18314460e5b0SXin LI 		case CHAR_PSTRING_2_BE:
18324460e5b0SXin LI 			if (m->type != FILE_PSTRING)
18334460e5b0SXin LI 				goto bad;
18344460e5b0SXin LI 			SET_LENGTH(PSTRING_2_BE);
18354460e5b0SXin LI 			break;
18364460e5b0SXin LI 		case CHAR_PSTRING_2_LE:
18374460e5b0SXin LI 			if (m->type != FILE_PSTRING)
18384460e5b0SXin LI 				goto bad;
18394460e5b0SXin LI 			SET_LENGTH(PSTRING_2_LE);
18404460e5b0SXin LI 			break;
18414460e5b0SXin LI 		case CHAR_PSTRING_4_BE:
18424460e5b0SXin LI 			if (m->type != FILE_PSTRING)
18434460e5b0SXin LI 				goto bad;
18444460e5b0SXin LI 			SET_LENGTH(PSTRING_4_BE);
18454460e5b0SXin LI 			break;
18464460e5b0SXin LI 		case CHAR_PSTRING_4_LE:
18474460e5b0SXin LI 			switch (m->type) {
18484460e5b0SXin LI 			case FILE_PSTRING:
18494460e5b0SXin LI 			case FILE_REGEX:
18504460e5b0SXin LI 				break;
18514460e5b0SXin LI 			default:
18524460e5b0SXin LI 				goto bad;
18534460e5b0SXin LI 			}
18544460e5b0SXin LI 			SET_LENGTH(PSTRING_4_LE);
18554460e5b0SXin LI 			break;
18564460e5b0SXin LI 		case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
18574460e5b0SXin LI 			if (m->type != FILE_PSTRING)
18584460e5b0SXin LI 				goto bad;
18594460e5b0SXin LI 			m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
18604460e5b0SXin LI 			break;
18614460e5b0SXin LI 		default:
18624460e5b0SXin LI 		bad:
18634460e5b0SXin LI 			if (ms->flags & MAGIC_CHECK)
18644460e5b0SXin LI 				file_magwarn(ms, "string modifier `%c' "
18654460e5b0SXin LI 					"invalid", *l);
18664460e5b0SXin LI 			goto out;
18674460e5b0SXin LI 		}
18684460e5b0SXin LI 		/* allow multiple '/' for readability */
186948c779cdSXin LI 		if (l[1] == '/' && !isspace(CAST(unsigned char, l[2])))
18704460e5b0SXin LI 			l++;
18714460e5b0SXin LI 	}
18724460e5b0SXin LI 	if (string_modifier_check(ms, m) == -1)
18734460e5b0SXin LI 		goto out;
18744460e5b0SXin LI 	*lp = l;
18754460e5b0SXin LI 	return 0;
18764460e5b0SXin LI out:
18774460e5b0SXin LI 	*lp = l;
18784460e5b0SXin LI 	return -1;
18794460e5b0SXin LI }
18804460e5b0SXin LI 
1881b6cee71dSXin LI /*
1882b6cee71dSXin LI  * parse one line from magic file, put into magic[index++] if valid
1883b6cee71dSXin LI  */
1884b6cee71dSXin LI private int
1885b6cee71dSXin LI parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1886b6cee71dSXin LI     size_t lineno, int action)
1887b6cee71dSXin LI {
1888b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS
1889b6cee71dSXin LI 	static uint32_t last_cont_level = 0;
1890b6cee71dSXin LI #endif
1891b6cee71dSXin LI 	size_t i;
1892b6cee71dSXin LI 	struct magic *m;
1893b6cee71dSXin LI 	const char *l = line;
1894b6cee71dSXin LI 	char *t;
1895b6cee71dSXin LI 	int op;
1896b6cee71dSXin LI 	uint32_t cont_level;
1897b6cee71dSXin LI 	int32_t diff;
1898b6cee71dSXin LI 
1899b6cee71dSXin LI 	cont_level = 0;
1900b6cee71dSXin LI 
1901b6cee71dSXin LI 	/*
1902b6cee71dSXin LI 	 * Parse the offset.
1903b6cee71dSXin LI 	 */
1904b6cee71dSXin LI 	while (*l == '>') {
1905b6cee71dSXin LI 		++l;		/* step over */
1906b6cee71dSXin LI 		cont_level++;
1907b6cee71dSXin LI 	}
1908b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS
1909b6cee71dSXin LI 	if (cont_level == 0 || cont_level > last_cont_level)
1910b6cee71dSXin LI 		if (file_check_mem(ms, cont_level) == -1)
1911b6cee71dSXin LI 			return -1;
1912b6cee71dSXin LI 	last_cont_level = cont_level;
1913b6cee71dSXin LI #endif
1914b6cee71dSXin LI 	if (cont_level != 0) {
1915b6cee71dSXin LI 		if (me->mp == NULL) {
1916b6cee71dSXin LI 			file_magerror(ms, "No current entry for continuation");
1917b6cee71dSXin LI 			return -1;
1918b6cee71dSXin LI 		}
1919b6cee71dSXin LI 		if (me->cont_count == 0) {
1920b6cee71dSXin LI 			file_magerror(ms, "Continuations present with 0 count");
1921b6cee71dSXin LI 			return -1;
1922b6cee71dSXin LI 		}
1923b6cee71dSXin LI 		m = &me->mp[me->cont_count - 1];
192448c779cdSXin LI 		diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level);
1925b6cee71dSXin LI 		if (diff > 1)
1926b6cee71dSXin LI 			file_magwarn(ms, "New continuation level %u is more "
1927b6cee71dSXin LI 			    "than one larger than current level %u", cont_level,
1928b6cee71dSXin LI 			    m->cont_level);
1929b6cee71dSXin LI 		if (me->cont_count == me->max_count) {
1930b6cee71dSXin LI 			struct magic *nm;
1931b6cee71dSXin LI 			size_t cnt = me->max_count + ALLOC_CHUNK;
1932b6cee71dSXin LI 			if ((nm = CAST(struct magic *, realloc(me->mp,
1933b6cee71dSXin LI 			    sizeof(*nm) * cnt))) == NULL) {
1934b6cee71dSXin LI 				file_oomem(ms, sizeof(*nm) * cnt);
1935b6cee71dSXin LI 				return -1;
1936b6cee71dSXin LI 			}
193748c779cdSXin LI 			me->mp = nm;
1938b6cee71dSXin LI 			me->max_count = CAST(uint32_t, cnt);
1939b6cee71dSXin LI 		}
1940b6cee71dSXin LI 		m = &me->mp[me->cont_count++];
1941b6cee71dSXin LI 		(void)memset(m, 0, sizeof(*m));
1942b6cee71dSXin LI 		m->cont_level = cont_level;
1943b6cee71dSXin LI 	} else {
1944b6cee71dSXin LI 		static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1945b6cee71dSXin LI 		if (me->mp != NULL)
1946b6cee71dSXin LI 			return 1;
1947b6cee71dSXin LI 		if ((m = CAST(struct magic *, malloc(len))) == NULL) {
1948b6cee71dSXin LI 			file_oomem(ms, len);
1949b6cee71dSXin LI 			return -1;
1950b6cee71dSXin LI 		}
1951b6cee71dSXin LI 		me->mp = m;
1952b6cee71dSXin LI 		me->max_count = ALLOC_CHUNK;
1953b6cee71dSXin LI 		(void)memset(m, 0, sizeof(*m));
1954b6cee71dSXin LI 		m->factor_op = FILE_FACTOR_OP_NONE;
1955b6cee71dSXin LI 		m->cont_level = 0;
1956b6cee71dSXin LI 		me->cont_count = 1;
1957b6cee71dSXin LI 	}
1958b6cee71dSXin LI 	m->lineno = CAST(uint32_t, lineno);
1959b6cee71dSXin LI 
1960b6cee71dSXin LI 	if (*l == '&') {  /* m->cont_level == 0 checked below. */
1961b6cee71dSXin LI                 ++l;            /* step over */
1962b6cee71dSXin LI                 m->flag |= OFFADD;
1963b6cee71dSXin LI         }
1964b6cee71dSXin LI 	if (*l == '(') {
1965b6cee71dSXin LI 		++l;		/* step over */
1966b6cee71dSXin LI 		m->flag |= INDIR;
1967b6cee71dSXin LI 		if (m->flag & OFFADD)
1968b6cee71dSXin LI 			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1969b6cee71dSXin LI 
1970b6cee71dSXin LI 		if (*l == '&') {  /* m->cont_level == 0 checked below */
1971b6cee71dSXin LI 			++l;            /* step over */
1972b6cee71dSXin LI 			m->flag |= OFFADD;
1973b6cee71dSXin LI 		}
1974b6cee71dSXin LI 	}
1975b6cee71dSXin LI 	/* Indirect offsets are not valid at level 0. */
19765f0216bdSXin LI 	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
1977b6cee71dSXin LI 		if (ms->flags & MAGIC_CHECK)
1978b6cee71dSXin LI 			file_magwarn(ms, "relative offset at level 0");
19795f0216bdSXin LI 		return -1;
19805f0216bdSXin LI 	}
1981b6cee71dSXin LI 
1982b6cee71dSXin LI 	/* get offset, then skip over it */
19832726a701SXin LI 	if (*l == '-') {
19842726a701SXin LI 		++l;            /* step over */
19852726a701SXin LI 		m->flag |= OFFNEGATIVE;
19862726a701SXin LI 	}
198748c779cdSXin LI 	m->offset = CAST(int32_t, strtol(l, &t, 0));
19885f0216bdSXin LI         if (l == t) {
1989b6cee71dSXin LI 		if (ms->flags & MAGIC_CHECK)
1990b6cee71dSXin LI 			file_magwarn(ms, "offset `%s' invalid", l);
19915f0216bdSXin LI 		return -1;
19925f0216bdSXin LI 	}
1993d38c30c0SXin LI 
1994b6cee71dSXin LI         l = t;
1995b6cee71dSXin LI 
1996b6cee71dSXin LI 	if (m->flag & INDIR) {
1997b6cee71dSXin LI 		m->in_type = FILE_LONG;
1998b6cee71dSXin LI 		m->in_offset = 0;
1999a5d223e6SXin LI 		m->in_op = 0;
2000b6cee71dSXin LI 		/*
2001a5d223e6SXin LI 		 * read [.,lbs][+-]nnnnn)
2002b6cee71dSXin LI 		 */
2003a5d223e6SXin LI 		if (*l == '.' || *l == ',') {
2004a5d223e6SXin LI 			if (*l == ',')
2005a5d223e6SXin LI 				m->in_op |= FILE_OPSIGNED;
2006b6cee71dSXin LI 			l++;
2007b6cee71dSXin LI 			switch (*l) {
2008b6cee71dSXin LI 			case 'l':
2009b6cee71dSXin LI 				m->in_type = FILE_LELONG;
2010b6cee71dSXin LI 				break;
2011b6cee71dSXin LI 			case 'L':
2012b6cee71dSXin LI 				m->in_type = FILE_BELONG;
2013b6cee71dSXin LI 				break;
2014b6cee71dSXin LI 			case 'm':
2015b6cee71dSXin LI 				m->in_type = FILE_MELONG;
2016b6cee71dSXin LI 				break;
2017b6cee71dSXin LI 			case 'h':
2018b6cee71dSXin LI 			case 's':
2019b6cee71dSXin LI 				m->in_type = FILE_LESHORT;
2020b6cee71dSXin LI 				break;
2021b6cee71dSXin LI 			case 'H':
2022b6cee71dSXin LI 			case 'S':
2023b6cee71dSXin LI 				m->in_type = FILE_BESHORT;
2024b6cee71dSXin LI 				break;
2025b6cee71dSXin LI 			case 'c':
2026b6cee71dSXin LI 			case 'b':
2027b6cee71dSXin LI 			case 'C':
2028b6cee71dSXin LI 			case 'B':
2029b6cee71dSXin LI 				m->in_type = FILE_BYTE;
2030b6cee71dSXin LI 				break;
2031b6cee71dSXin LI 			case 'e':
2032b6cee71dSXin LI 			case 'f':
2033b6cee71dSXin LI 			case 'g':
2034b6cee71dSXin LI 				m->in_type = FILE_LEDOUBLE;
2035b6cee71dSXin LI 				break;
2036b6cee71dSXin LI 			case 'E':
2037b6cee71dSXin LI 			case 'F':
2038b6cee71dSXin LI 			case 'G':
2039b6cee71dSXin LI 				m->in_type = FILE_BEDOUBLE;
2040b6cee71dSXin LI 				break;
2041b6cee71dSXin LI 			case 'i':
2042b6cee71dSXin LI 				m->in_type = FILE_LEID3;
2043b6cee71dSXin LI 				break;
2044b6cee71dSXin LI 			case 'I':
2045b6cee71dSXin LI 				m->in_type = FILE_BEID3;
2046b6cee71dSXin LI 				break;
20472dc4dbb9SEitan Adler 			case 'q':
20482dc4dbb9SEitan Adler 				m->in_type = FILE_LEQUAD;
20492dc4dbb9SEitan Adler 				break;
20502dc4dbb9SEitan Adler 			case 'Q':
20512dc4dbb9SEitan Adler 				m->in_type = FILE_BEQUAD;
20522dc4dbb9SEitan Adler 				break;
2053b6cee71dSXin LI 			default:
2054b6cee71dSXin LI 				if (ms->flags & MAGIC_CHECK)
2055b6cee71dSXin LI 					file_magwarn(ms,
2056b6cee71dSXin LI 					    "indirect offset type `%c' invalid",
2057b6cee71dSXin LI 					    *l);
20585f0216bdSXin LI 				return -1;
2059b6cee71dSXin LI 			}
2060b6cee71dSXin LI 			l++;
2061b6cee71dSXin LI 		}
2062b6cee71dSXin LI 
2063b6cee71dSXin LI 		if (*l == '~') {
2064b6cee71dSXin LI 			m->in_op |= FILE_OPINVERSE;
2065b6cee71dSXin LI 			l++;
2066b6cee71dSXin LI 		}
2067b6cee71dSXin LI 		if ((op = get_op(*l)) != -1) {
2068b6cee71dSXin LI 			m->in_op |= op;
2069b6cee71dSXin LI 			l++;
2070b6cee71dSXin LI 		}
2071b6cee71dSXin LI 		if (*l == '(') {
2072b6cee71dSXin LI 			m->in_op |= FILE_OPINDIRECT;
2073b6cee71dSXin LI 			l++;
2074b6cee71dSXin LI 		}
207548c779cdSXin LI 		if (isdigit(CAST(unsigned char, *l)) || *l == '-') {
207648c779cdSXin LI 			m->in_offset = CAST(int32_t, strtol(l, &t, 0));
20775f0216bdSXin LI 			if (l == t) {
2078b6cee71dSXin LI 				if (ms->flags & MAGIC_CHECK)
2079b6cee71dSXin LI 					file_magwarn(ms,
2080b6cee71dSXin LI 					    "in_offset `%s' invalid", l);
20815f0216bdSXin LI 				return -1;
20825f0216bdSXin LI 			}
2083b6cee71dSXin LI 			l = t;
2084b6cee71dSXin LI 		}
2085b6cee71dSXin LI 		if (*l++ != ')' ||
20865f0216bdSXin LI 		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
2087b6cee71dSXin LI 			if (ms->flags & MAGIC_CHECK)
2088b6cee71dSXin LI 				file_magwarn(ms,
2089b6cee71dSXin LI 				    "missing ')' in indirect offset");
20905f0216bdSXin LI 			return -1;
20915f0216bdSXin LI 		}
2092b6cee71dSXin LI 	}
2093b6cee71dSXin LI 	EATAB;
2094b6cee71dSXin LI 
2095b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS
2096b6cee71dSXin LI 	m->cond = get_cond(l, &l);
2097b6cee71dSXin LI 	if (check_cond(ms, m->cond, cont_level) == -1)
2098b6cee71dSXin LI 		return -1;
2099b6cee71dSXin LI 
2100b6cee71dSXin LI 	EATAB;
2101b6cee71dSXin LI #endif
2102b6cee71dSXin LI 
2103b6cee71dSXin LI 	/*
2104b6cee71dSXin LI 	 * Parse the type.
2105b6cee71dSXin LI 	 */
2106b6cee71dSXin LI 	if (*l == 'u') {
2107b6cee71dSXin LI 		/*
2108b6cee71dSXin LI 		 * Try it as a keyword type prefixed by "u"; match what
2109b6cee71dSXin LI 		 * follows the "u".  If that fails, try it as an SUS
2110b6cee71dSXin LI 		 * integer type.
2111b6cee71dSXin LI 		 */
2112b6cee71dSXin LI 		m->type = get_type(type_tbl, l + 1, &l);
2113b6cee71dSXin LI 		if (m->type == FILE_INVALID) {
2114b6cee71dSXin LI 			/*
2115b6cee71dSXin LI 			 * Not a keyword type; parse it as an SUS type,
2116b6cee71dSXin LI 			 * 'u' possibly followed by a number or C/S/L.
2117b6cee71dSXin LI 			 */
2118b6cee71dSXin LI 			m->type = get_standard_integer_type(l, &l);
2119b6cee71dSXin LI 		}
2120b6cee71dSXin LI 		/* It's unsigned. */
2121b6cee71dSXin LI 		if (m->type != FILE_INVALID)
2122b6cee71dSXin LI 			m->flag |= UNSIGNED;
2123b6cee71dSXin LI 	} else {
2124b6cee71dSXin LI 		/*
2125b6cee71dSXin LI 		 * Try it as a keyword type.  If that fails, try it as
2126b6cee71dSXin LI 		 * an SUS integer type if it begins with "d" or as an
2127b6cee71dSXin LI 		 * SUS string type if it begins with "s".  In any case,
2128b6cee71dSXin LI 		 * it's not unsigned.
2129b6cee71dSXin LI 		 */
2130b6cee71dSXin LI 		m->type = get_type(type_tbl, l, &l);
2131b6cee71dSXin LI 		if (m->type == FILE_INVALID) {
2132b6cee71dSXin LI 			/*
2133b6cee71dSXin LI 			 * Not a keyword type; parse it as an SUS type,
2134b6cee71dSXin LI 			 * either 'd' possibly followed by a number or
2135b6cee71dSXin LI 			 * C/S/L, or just 's'.
2136b6cee71dSXin LI 			 */
2137b6cee71dSXin LI 			if (*l == 'd')
2138b6cee71dSXin LI 				m->type = get_standard_integer_type(l, &l);
213948c779cdSXin LI 			else if (*l == 's'
214048c779cdSXin LI 			    && !isalpha(CAST(unsigned char, l[1]))) {
2141b6cee71dSXin LI 				m->type = FILE_STRING;
2142b6cee71dSXin LI 				++l;
2143b6cee71dSXin LI 			}
2144b6cee71dSXin LI 		}
2145b6cee71dSXin LI 	}
2146b6cee71dSXin LI 
2147b6cee71dSXin LI 	if (m->type == FILE_INVALID) {
2148b6cee71dSXin LI 		/* Not found - try it as a special keyword. */
2149b6cee71dSXin LI 		m->type = get_type(special_tbl, l, &l);
2150b6cee71dSXin LI 	}
2151b6cee71dSXin LI 
2152b6cee71dSXin LI 	if (m->type == FILE_INVALID) {
2153b6cee71dSXin LI 		if (ms->flags & MAGIC_CHECK)
2154b6cee71dSXin LI 			file_magwarn(ms, "type `%s' invalid", l);
2155b6cee71dSXin LI 		return -1;
2156b6cee71dSXin LI 	}
2157b6cee71dSXin LI 
2158*43a5ec4eSXin LI 	if (m->type == FILE_NAME && cont_level != 0) {
2159*43a5ec4eSXin LI 		if (ms->flags & MAGIC_CHECK)
2160*43a5ec4eSXin LI 			file_magwarn(ms, "`name%s' entries can only be "
2161*43a5ec4eSXin LI 			    "declared at top level", l);
2162*43a5ec4eSXin LI 		return -1;
2163*43a5ec4eSXin LI 	}
2164*43a5ec4eSXin LI 
2165b6cee71dSXin LI 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2166b6cee71dSXin LI 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2167b6cee71dSXin LI 
2168b6cee71dSXin LI 	m->mask_op = 0;
2169b6cee71dSXin LI 	if (*l == '~') {
2170b6cee71dSXin LI 		if (!IS_STRING(m->type))
2171b6cee71dSXin LI 			m->mask_op |= FILE_OPINVERSE;
2172b6cee71dSXin LI 		else if (ms->flags & MAGIC_CHECK)
2173b6cee71dSXin LI 			file_magwarn(ms, "'~' invalid for string types");
2174b6cee71dSXin LI 		++l;
2175b6cee71dSXin LI 	}
2176b6cee71dSXin LI 	m->str_range = 0;
2177b6cee71dSXin LI 	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2178b6cee71dSXin LI 	if ((op = get_op(*l)) != -1) {
21794460e5b0SXin LI 		if (IS_STRING(m->type)) {
21804460e5b0SXin LI 			int r;
21814460e5b0SXin LI 
21824460e5b0SXin LI 			if (op != FILE_OPDIVIDE) {
2183b6cee71dSXin LI 				if (ms->flags & MAGIC_CHECK)
2184b6cee71dSXin LI 					file_magwarn(ms,
21854460e5b0SXin LI 					    "invalid string/indirect op: "
21864460e5b0SXin LI 					    "`%c'", *t);
2187b6cee71dSXin LI 				return -1;
2188b6cee71dSXin LI 			}
21894460e5b0SXin LI 
21904460e5b0SXin LI 			if (m->type == FILE_INDIRECT)
21914460e5b0SXin LI 				r = parse_indirect_modifier(ms, m, &l);
21924460e5b0SXin LI 			else
21934460e5b0SXin LI 				r = parse_string_modifier(ms, m, &l);
21944460e5b0SXin LI 			if (r == -1)
2195b6cee71dSXin LI 				return -1;
21964460e5b0SXin LI 		} else
21974460e5b0SXin LI 			parse_op_modifier(ms, m, &l, op);
2198b6cee71dSXin LI 	}
21994460e5b0SXin LI 
2200b6cee71dSXin LI 	/*
2201b6cee71dSXin LI 	 * We used to set mask to all 1's here, instead let's just not do
2202b6cee71dSXin LI 	 * anything if mask = 0 (unless you have a better idea)
2203b6cee71dSXin LI 	 */
2204b6cee71dSXin LI 	EATAB;
2205b6cee71dSXin LI 
2206b6cee71dSXin LI 	switch (*l) {
2207b6cee71dSXin LI 	case '>':
2208b6cee71dSXin LI 	case '<':
2209b6cee71dSXin LI   		m->reln = *l;
2210b6cee71dSXin LI   		++l;
2211b6cee71dSXin LI 		if (*l == '=') {
2212b6cee71dSXin LI 			if (ms->flags & MAGIC_CHECK) {
2213b6cee71dSXin LI 				file_magwarn(ms, "%c= not supported",
2214b6cee71dSXin LI 				    m->reln);
2215b6cee71dSXin LI 				return -1;
2216b6cee71dSXin LI 			}
2217b6cee71dSXin LI 		   ++l;
2218b6cee71dSXin LI 		}
2219b6cee71dSXin LI 		break;
2220b6cee71dSXin LI 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
2221b6cee71dSXin LI 	case '&':
2222b6cee71dSXin LI 	case '^':
2223b6cee71dSXin LI 	case '=':
2224b6cee71dSXin LI   		m->reln = *l;
2225b6cee71dSXin LI   		++l;
2226b6cee71dSXin LI 		if (*l == '=') {
2227b6cee71dSXin LI 		   /* HP compat: ignore &= etc. */
2228b6cee71dSXin LI 		   ++l;
2229b6cee71dSXin LI 		}
2230b6cee71dSXin LI 		break;
2231b6cee71dSXin LI 	case '!':
2232b6cee71dSXin LI 		m->reln = *l;
2233b6cee71dSXin LI 		++l;
2234b6cee71dSXin LI 		break;
2235b6cee71dSXin LI 	default:
2236b6cee71dSXin LI   		m->reln = '=';	/* the default relation */
223748c779cdSXin LI 		if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) &&
223848c779cdSXin LI 		    isspace(CAST(unsigned char, l[1]))) || !l[1])) {
2239b6cee71dSXin LI 			m->reln = *l;
2240b6cee71dSXin LI 			++l;
2241b6cee71dSXin LI 		}
2242b6cee71dSXin LI 		break;
2243b6cee71dSXin LI 	}
2244b6cee71dSXin LI 	/*
2245b6cee71dSXin LI 	 * Grab the value part, except for an 'x' reln.
2246b6cee71dSXin LI 	 */
2247b6cee71dSXin LI 	if (m->reln != 'x' && getvalue(ms, m, &l, action))
2248b6cee71dSXin LI 		return -1;
2249b6cee71dSXin LI 
2250b6cee71dSXin LI 	/*
2251b6cee71dSXin LI 	 * TODO finish this macro and start using it!
22523e41d09dSXin LI 	 * #define offsetcheck {if (offset > ms->bytes_max -1)
2253b6cee71dSXin LI 	 *	magwarn("offset too big"); }
2254b6cee71dSXin LI 	 */
2255b6cee71dSXin LI 
2256b6cee71dSXin LI 	/*
2257b6cee71dSXin LI 	 * Now get last part - the description
2258b6cee71dSXin LI 	 */
2259b6cee71dSXin LI 	EATAB;
2260b6cee71dSXin LI 	if (l[0] == '\b') {
2261b6cee71dSXin LI 		++l;
2262b6cee71dSXin LI 		m->flag |= NOSPACE;
2263b6cee71dSXin LI 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
2264b6cee71dSXin LI 		++l;
2265b6cee71dSXin LI 		++l;
2266b6cee71dSXin LI 		m->flag |= NOSPACE;
2267b6cee71dSXin LI 	}
2268b6cee71dSXin LI 	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2269b6cee71dSXin LI 		continue;
2270b6cee71dSXin LI 	if (i == sizeof(m->desc)) {
2271b6cee71dSXin LI 		m->desc[sizeof(m->desc) - 1] = '\0';
2272b6cee71dSXin LI 		if (ms->flags & MAGIC_CHECK)
2273b6cee71dSXin LI 			file_magwarn(ms, "description `%s' truncated", m->desc);
2274b6cee71dSXin LI 	}
2275b6cee71dSXin LI 
2276b6cee71dSXin LI         /*
2277b6cee71dSXin LI 	 * We only do this check while compiling, or if any of the magic
2278b6cee71dSXin LI 	 * files were not compiled.
2279b6cee71dSXin LI          */
2280b6cee71dSXin LI         if (ms->flags & MAGIC_CHECK) {
2281b6cee71dSXin LI 		if (check_format(ms, m) == -1)
2282b6cee71dSXin LI 			return -1;
2283b6cee71dSXin LI 	}
2284b6cee71dSXin LI #ifndef COMPILE_ONLY
2285b6cee71dSXin LI 	if (action == FILE_CHECK) {
2286b6cee71dSXin LI 		file_mdump(m);
2287b6cee71dSXin LI 	}
2288b6cee71dSXin LI #endif
2289b6cee71dSXin LI 	m->mimetype[0] = '\0';		/* initialise MIME type to none */
2290b6cee71dSXin LI 	return 0;
2291b6cee71dSXin LI }
2292b6cee71dSXin LI 
2293b6cee71dSXin LI /*
2294b6cee71dSXin LI  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2295b6cee71dSXin LI  * if valid
2296b6cee71dSXin LI  */
2297b6cee71dSXin LI private int
22982726a701SXin LI parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
22992726a701SXin LI     size_t len __attribute__((__unused__)))
2300b6cee71dSXin LI {
2301b6cee71dSXin LI 	const char *l = line;
2302b6cee71dSXin LI 	char *el;
2303b6cee71dSXin LI 	unsigned long factor;
2304b6cee71dSXin LI 	struct magic *m = &me->mp[0];
2305b6cee71dSXin LI 
2306b6cee71dSXin LI 	if (m->factor_op != FILE_FACTOR_OP_NONE) {
2307b6cee71dSXin LI 		file_magwarn(ms,
2308b6cee71dSXin LI 		    "Current entry already has a strength type: %c %d",
2309b6cee71dSXin LI 		    m->factor_op, m->factor);
2310b6cee71dSXin LI 		return -1;
2311b6cee71dSXin LI 	}
2312b6cee71dSXin LI 	if (m->type == FILE_NAME) {
2313b6cee71dSXin LI 		file_magwarn(ms, "%s: Strength setting is not supported in "
2314b6cee71dSXin LI 		    "\"name\" magic entries", m->value.s);
2315b6cee71dSXin LI 		return -1;
2316b6cee71dSXin LI 	}
2317b6cee71dSXin LI 	EATAB;
2318b6cee71dSXin LI 	switch (*l) {
2319b6cee71dSXin LI 	case FILE_FACTOR_OP_NONE:
2320b6cee71dSXin LI 	case FILE_FACTOR_OP_PLUS:
2321b6cee71dSXin LI 	case FILE_FACTOR_OP_MINUS:
2322b6cee71dSXin LI 	case FILE_FACTOR_OP_TIMES:
2323b6cee71dSXin LI 	case FILE_FACTOR_OP_DIV:
2324b6cee71dSXin LI 		m->factor_op = *l++;
2325b6cee71dSXin LI 		break;
2326b6cee71dSXin LI 	default:
2327b6cee71dSXin LI 		file_magwarn(ms, "Unknown factor op `%c'", *l);
2328b6cee71dSXin LI 		return -1;
2329b6cee71dSXin LI 	}
2330b6cee71dSXin LI 	EATAB;
2331b6cee71dSXin LI 	factor = strtoul(l, &el, 0);
2332b6cee71dSXin LI 	if (factor > 255) {
2333b6cee71dSXin LI 		file_magwarn(ms, "Too large factor `%lu'", factor);
2334b6cee71dSXin LI 		goto out;
2335b6cee71dSXin LI 	}
233648c779cdSXin LI 	if (*el && !isspace(CAST(unsigned char, *el))) {
2337b6cee71dSXin LI 		file_magwarn(ms, "Bad factor `%s'", l);
2338b6cee71dSXin LI 		goto out;
2339b6cee71dSXin LI 	}
234048c779cdSXin LI 	m->factor = CAST(uint8_t, factor);
2341b6cee71dSXin LI 	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2342b6cee71dSXin LI 		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2343b6cee71dSXin LI 		    m->factor_op, m->factor);
2344b6cee71dSXin LI 		goto out;
2345b6cee71dSXin LI 	}
2346b6cee71dSXin LI 	return 0;
2347b6cee71dSXin LI out:
2348b6cee71dSXin LI 	m->factor_op = FILE_FACTOR_OP_NONE;
2349b6cee71dSXin LI 	m->factor = 0;
2350b6cee71dSXin LI 	return -1;
2351b6cee71dSXin LI }
2352b6cee71dSXin LI 
2353b6cee71dSXin LI private int
2354c2931133SXin LI goodchar(unsigned char x, const char *extra)
2355c2931133SXin LI {
2356c2931133SXin LI 	return (isascii(x) && isalnum(x)) || strchr(extra, x);
2357c2931133SXin LI }
2358c2931133SXin LI 
2359c2931133SXin LI private int
2360b6cee71dSXin LI parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
23612726a701SXin LI     size_t llen, off_t off, size_t len, const char *name, const char *extra,
23622726a701SXin LI     int nt)
2363b6cee71dSXin LI {
2364b6cee71dSXin LI 	size_t i;
2365b6cee71dSXin LI 	const char *l = line;
2366b6cee71dSXin LI 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
23675f0216bdSXin LI 	char *buf = CAST(char *, CAST(void *, m)) + off;
2368b6cee71dSXin LI 
2369b6cee71dSXin LI 	if (buf[0] != '\0') {
2370b6cee71dSXin LI 		len = nt ? strlen(buf) : len;
2371b6cee71dSXin LI 		file_magwarn(ms, "Current entry already has a %s type "
237248c779cdSXin LI 		    "`%.*s', new type `%s'", name, CAST(int, len), buf, l);
2373b6cee71dSXin LI 		return -1;
2374b6cee71dSXin LI 	}
2375b6cee71dSXin LI 
2376b6cee71dSXin LI 	if (*m->desc == '\0') {
2377b6cee71dSXin LI 		file_magwarn(ms, "Current entry does not yet have a "
2378b6cee71dSXin LI 		    "description for adding a %s type", name);
2379b6cee71dSXin LI 		return -1;
2380b6cee71dSXin LI 	}
2381b6cee71dSXin LI 
2382b6cee71dSXin LI 	EATAB;
23832726a701SXin LI 	for (i = 0; *l && i < llen && i < len && goodchar(*l, extra);
23842726a701SXin LI 	    buf[i++] = *l++)
2385b6cee71dSXin LI 		continue;
2386b6cee71dSXin LI 
2387b6cee71dSXin LI 	if (i == len && *l) {
2388b6cee71dSXin LI 		if (nt)
2389b6cee71dSXin LI 			buf[len - 1] = '\0';
2390b6cee71dSXin LI 		if (ms->flags & MAGIC_CHECK)
2391b6cee71dSXin LI 			file_magwarn(ms, "%s type `%s' truncated %"
2392b6cee71dSXin LI 			    SIZE_T_FORMAT "u", name, line, i);
2393b6cee71dSXin LI 	} else {
239448c779cdSXin LI 		if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra))
2395c2931133SXin LI 			file_magwarn(ms, "%s type `%s' has bad char '%c'",
2396c2931133SXin LI 			    name, line, *l);
2397b6cee71dSXin LI 		if (nt)
2398b6cee71dSXin LI 			buf[i] = '\0';
2399b6cee71dSXin LI 	}
2400b6cee71dSXin LI 
2401b6cee71dSXin LI 	if (i > 0)
2402b6cee71dSXin LI 		return 0;
2403c2931133SXin LI 
2404c2931133SXin LI 	file_magerror(ms, "Bad magic entry '%s'", line);
2405b6cee71dSXin LI 	return -1;
2406b6cee71dSXin LI }
2407b6cee71dSXin LI 
2408b6cee71dSXin LI /*
2409b6cee71dSXin LI  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2410b6cee71dSXin LI  * magic[index - 1]
2411b6cee71dSXin LI  */
2412b6cee71dSXin LI private int
24132726a701SXin LI parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line,
24142726a701SXin LI     size_t len)
2415b6cee71dSXin LI {
2416b6cee71dSXin LI 	struct magic *m = &me->mp[0];
2417b6cee71dSXin LI 
24182726a701SXin LI 	return parse_extra(ms, me, line, len,
24195f0216bdSXin LI 	    CAST(off_t, offsetof(struct magic, apple)),
24203e41d09dSXin LI 	    sizeof(m->apple), "APPLE", "!+-./?", 0);
2421b6cee71dSXin LI }
2422b6cee71dSXin LI 
2423b6cee71dSXin LI /*
24245f0216bdSXin LI  * Parse a comma-separated list of extensions
24255f0216bdSXin LI  */
24265f0216bdSXin LI private int
24272726a701SXin LI parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line,
24282726a701SXin LI     size_t len)
24295f0216bdSXin LI {
24305f0216bdSXin LI 	struct magic *m = &me->mp[0];
24315f0216bdSXin LI 
24322726a701SXin LI 	return parse_extra(ms, me, line, len,
24335f0216bdSXin LI 	    CAST(off_t, offsetof(struct magic, ext)),
2434*43a5ec4eSXin LI 	    sizeof(m->ext), "EXTENSION", ",!+-/@?_$&", 0); /* & for b&w */
24355f0216bdSXin LI }
24365f0216bdSXin LI 
24375f0216bdSXin LI /*
2438b6cee71dSXin LI  * parse a MIME annotation line from magic file, put into magic[index - 1]
2439b6cee71dSXin LI  * if valid
2440b6cee71dSXin LI  */
2441b6cee71dSXin LI private int
24422726a701SXin LI parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line,
24432726a701SXin LI     size_t len)
2444b6cee71dSXin LI {
2445b6cee71dSXin LI 	struct magic *m = &me->mp[0];
2446b6cee71dSXin LI 
24472726a701SXin LI 	return parse_extra(ms, me, line, len,
24485f0216bdSXin LI 	    CAST(off_t, offsetof(struct magic, mimetype)),
244958a0f0d0SEitan Adler 	    sizeof(m->mimetype), "MIME", "+-/.$?:{}", 1);
2450b6cee71dSXin LI }
2451b6cee71dSXin LI 
2452b6cee71dSXin LI private int
24533e41d09dSXin LI check_format_type(const char *ptr, int type, const char **estr)
2454b6cee71dSXin LI {
2455b6cee71dSXin LI 	int quad = 0, h;
24563e41d09dSXin LI 	size_t len, cnt;
2457b6cee71dSXin LI 	if (*ptr == '\0') {
2458b6cee71dSXin LI 		/* Missing format string; bad */
24593e41d09dSXin LI 		*estr = "missing format spec";
2460b6cee71dSXin LI 		return -1;
2461b6cee71dSXin LI 	}
2462b6cee71dSXin LI 
2463b6cee71dSXin LI 	switch (file_formats[type]) {
2464b6cee71dSXin LI 	case FILE_FMT_QUAD:
2465b6cee71dSXin LI 		quad = 1;
2466b6cee71dSXin LI 		/*FALLTHROUGH*/
2467b6cee71dSXin LI 	case FILE_FMT_NUM:
2468b6cee71dSXin LI 		if (quad == 0) {
2469b6cee71dSXin LI 			switch (type) {
2470b6cee71dSXin LI 			case FILE_BYTE:
2471b6cee71dSXin LI 				h = 2;
2472b6cee71dSXin LI 				break;
2473b6cee71dSXin LI 			case FILE_SHORT:
2474b6cee71dSXin LI 			case FILE_BESHORT:
2475b6cee71dSXin LI 			case FILE_LESHORT:
2476b6cee71dSXin LI 				h = 1;
2477b6cee71dSXin LI 				break;
2478b6cee71dSXin LI 			case FILE_LONG:
2479b6cee71dSXin LI 			case FILE_BELONG:
2480b6cee71dSXin LI 			case FILE_LELONG:
2481b6cee71dSXin LI 			case FILE_MELONG:
2482b6cee71dSXin LI 			case FILE_LEID3:
2483b6cee71dSXin LI 			case FILE_BEID3:
2484b6cee71dSXin LI 			case FILE_INDIRECT:
2485b6cee71dSXin LI 				h = 0;
2486b6cee71dSXin LI 				break;
2487b6cee71dSXin LI 			default:
2488b6cee71dSXin LI 				abort();
2489b6cee71dSXin LI 			}
2490b6cee71dSXin LI 		} else
2491b6cee71dSXin LI 			h = 0;
2492*43a5ec4eSXin LI 		while (*ptr && strchr("-.#", *ptr) != NULL)
249340427ccaSGordon Tetlow 			ptr++;
24943e41d09dSXin LI #define CHECKLEN() do { \
249548c779cdSXin LI 	for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \
24963e41d09dSXin LI 		len = len * 10 + (*ptr - '0'); \
24973e41d09dSXin LI 	if (cnt > 5 || len > 1024) \
24983e41d09dSXin LI 		goto toolong; \
24993e41d09dSXin LI } while (/*CONSTCOND*/0)
25003e41d09dSXin LI 
25013e41d09dSXin LI 		CHECKLEN();
2502b6cee71dSXin LI 		if (*ptr == '.')
2503b6cee71dSXin LI 			ptr++;
25043e41d09dSXin LI 		CHECKLEN();
2505b6cee71dSXin LI 		if (quad) {
2506b6cee71dSXin LI 			if (*ptr++ != 'l')
25073e41d09dSXin LI 				goto invalid;
2508b6cee71dSXin LI 			if (*ptr++ != 'l')
25093e41d09dSXin LI 				goto invalid;
2510b6cee71dSXin LI 		}
2511b6cee71dSXin LI 
2512b6cee71dSXin LI 		switch (*ptr++) {
2513b6cee71dSXin LI #ifdef STRICT_FORMAT 	/* "long" formats are int formats for us */
2514b6cee71dSXin LI 		/* so don't accept the 'l' modifier */
2515b6cee71dSXin LI 		case 'l':
2516b6cee71dSXin LI 			switch (*ptr++) {
2517b6cee71dSXin LI 			case 'i':
2518b6cee71dSXin LI 			case 'd':
2519b6cee71dSXin LI 			case 'u':
2520b6cee71dSXin LI 			case 'o':
2521b6cee71dSXin LI 			case 'x':
2522b6cee71dSXin LI 			case 'X':
25233e41d09dSXin LI 				if (h == 0)
25243e41d09dSXin LI 					return 0;
25253e41d09dSXin LI 				/*FALLTHROUGH*/
2526b6cee71dSXin LI 			default:
25273e41d09dSXin LI 				goto invalid;
2528b6cee71dSXin LI 			}
2529b6cee71dSXin LI 
2530b6cee71dSXin LI 		/*
2531b6cee71dSXin LI 		 * Don't accept h and hh modifiers. They make writing
2532b6cee71dSXin LI 		 * magic entries more complicated, for very little benefit
2533b6cee71dSXin LI 		 */
2534b6cee71dSXin LI 		case 'h':
2535b6cee71dSXin LI 			if (h-- <= 0)
25363e41d09dSXin LI 				goto invalid;
2537b6cee71dSXin LI 			switch (*ptr++) {
2538b6cee71dSXin LI 			case 'h':
2539b6cee71dSXin LI 				if (h-- <= 0)
25403e41d09dSXin LI 					goto invalid;
2541b6cee71dSXin LI 				switch (*ptr++) {
2542b6cee71dSXin LI 				case 'i':
2543b6cee71dSXin LI 				case 'd':
2544b6cee71dSXin LI 				case 'u':
2545b6cee71dSXin LI 				case 'o':
2546b6cee71dSXin LI 				case 'x':
2547b6cee71dSXin LI 				case 'X':
2548b6cee71dSXin LI 					return 0;
2549b6cee71dSXin LI 				default:
25503e41d09dSXin LI 					goto invalid;
2551b6cee71dSXin LI 				}
2552b6cee71dSXin LI 			case 'i':
2553b6cee71dSXin LI 			case 'd':
2554b6cee71dSXin LI 			case 'u':
2555b6cee71dSXin LI 			case 'o':
2556b6cee71dSXin LI 			case 'x':
2557b6cee71dSXin LI 			case 'X':
25583e41d09dSXin LI 				if (h == 0)
25593e41d09dSXin LI 					return 0;
25603e41d09dSXin LI 				/*FALLTHROUGH*/
2561b6cee71dSXin LI 			default:
25623e41d09dSXin LI 				goto invalid;
2563b6cee71dSXin LI 			}
2564b6cee71dSXin LI #endif
2565b6cee71dSXin LI 		case 'c':
25663e41d09dSXin LI 			if (h == 2)
25673e41d09dSXin LI 				return 0;
25683e41d09dSXin LI 			goto invalid;
2569b6cee71dSXin LI 		case 'i':
2570b6cee71dSXin LI 		case 'd':
2571b6cee71dSXin LI 		case 'u':
2572b6cee71dSXin LI 		case 'o':
2573b6cee71dSXin LI 		case 'x':
2574b6cee71dSXin LI 		case 'X':
2575b6cee71dSXin LI #ifdef STRICT_FORMAT
25763e41d09dSXin LI 			if (h == 0)
25773e41d09dSXin LI 				return 0;
25783e41d09dSXin LI 			/*FALLTHROUGH*/
2579b6cee71dSXin LI #else
2580b6cee71dSXin LI 			return 0;
2581b6cee71dSXin LI #endif
2582b6cee71dSXin LI 		default:
25833e41d09dSXin LI 			goto invalid;
2584b6cee71dSXin LI 		}
2585b6cee71dSXin LI 
2586b6cee71dSXin LI 	case FILE_FMT_FLOAT:
2587b6cee71dSXin LI 	case FILE_FMT_DOUBLE:
2588b6cee71dSXin LI 		if (*ptr == '-')
2589b6cee71dSXin LI 			ptr++;
2590b6cee71dSXin LI 		if (*ptr == '.')
2591b6cee71dSXin LI 			ptr++;
25923e41d09dSXin LI 		CHECKLEN();
2593b6cee71dSXin LI 		if (*ptr == '.')
2594b6cee71dSXin LI 			ptr++;
25953e41d09dSXin LI 		CHECKLEN();
2596b6cee71dSXin LI 		switch (*ptr++) {
2597b6cee71dSXin LI 		case 'e':
2598b6cee71dSXin LI 		case 'E':
2599b6cee71dSXin LI 		case 'f':
2600b6cee71dSXin LI 		case 'F':
2601b6cee71dSXin LI 		case 'g':
2602b6cee71dSXin LI 		case 'G':
2603b6cee71dSXin LI 			return 0;
2604b6cee71dSXin LI 
2605b6cee71dSXin LI 		default:
26063e41d09dSXin LI 			goto invalid;
2607b6cee71dSXin LI 		}
2608b6cee71dSXin LI 
2609b6cee71dSXin LI 
2610b6cee71dSXin LI 	case FILE_FMT_STR:
2611b6cee71dSXin LI 		if (*ptr == '-')
2612b6cee71dSXin LI 			ptr++;
261348c779cdSXin LI 		while (isdigit(CAST(unsigned char, *ptr)))
2614b6cee71dSXin LI 			ptr++;
2615b6cee71dSXin LI 		if (*ptr == '.') {
2616b6cee71dSXin LI 			ptr++;
261748c779cdSXin LI 			while (isdigit(CAST(unsigned char , *ptr)))
2618b6cee71dSXin LI 				ptr++;
2619b6cee71dSXin LI 		}
2620b6cee71dSXin LI 
2621b6cee71dSXin LI 		switch (*ptr++) {
2622b6cee71dSXin LI 		case 's':
2623b6cee71dSXin LI 			return 0;
2624b6cee71dSXin LI 		default:
26253e41d09dSXin LI 			goto invalid;
2626b6cee71dSXin LI 		}
2627b6cee71dSXin LI 
2628b6cee71dSXin LI 	default:
2629b6cee71dSXin LI 		/* internal error */
2630b6cee71dSXin LI 		abort();
2631b6cee71dSXin LI 	}
26323e41d09dSXin LI invalid:
26333e41d09dSXin LI 	*estr = "not valid";
26343e41d09dSXin LI toolong:
26353e41d09dSXin LI 	*estr = "too long";
2636b6cee71dSXin LI 	return -1;
2637b6cee71dSXin LI }
2638b6cee71dSXin LI 
2639b6cee71dSXin LI /*
2640b6cee71dSXin LI  * Check that the optional printf format in description matches
2641b6cee71dSXin LI  * the type of the magic.
2642b6cee71dSXin LI  */
2643b6cee71dSXin LI private int
2644b6cee71dSXin LI check_format(struct magic_set *ms, struct magic *m)
2645b6cee71dSXin LI {
2646b6cee71dSXin LI 	char *ptr;
26473e41d09dSXin LI 	const char *estr;
2648b6cee71dSXin LI 
2649b6cee71dSXin LI 	for (ptr = m->desc; *ptr; ptr++)
2650b6cee71dSXin LI 		if (*ptr == '%')
2651b6cee71dSXin LI 			break;
2652b6cee71dSXin LI 	if (*ptr == '\0') {
2653b6cee71dSXin LI 		/* No format string; ok */
2654b6cee71dSXin LI 		return 1;
2655b6cee71dSXin LI 	}
2656b6cee71dSXin LI 
2657b6cee71dSXin LI 	assert(file_nformats == file_nnames);
2658b6cee71dSXin LI 
2659b6cee71dSXin LI 	if (m->type >= file_nformats) {
2660b6cee71dSXin LI 		file_magwarn(ms, "Internal error inconsistency between "
2661b6cee71dSXin LI 		    "m->type and format strings");
2662b6cee71dSXin LI 		return -1;
2663b6cee71dSXin LI 	}
2664b6cee71dSXin LI 	if (file_formats[m->type] == FILE_FMT_NONE) {
2665b6cee71dSXin LI 		file_magwarn(ms, "No format string for `%s' with description "
2666b6cee71dSXin LI 		    "`%s'", m->desc, file_names[m->type]);
2667b6cee71dSXin LI 		return -1;
2668b6cee71dSXin LI 	}
2669b6cee71dSXin LI 
2670b6cee71dSXin LI 	ptr++;
26713e41d09dSXin LI 	if (check_format_type(ptr, m->type, &estr) == -1) {
2672b6cee71dSXin LI 		/*
2673b6cee71dSXin LI 		 * TODO: this error message is unhelpful if the format
2674b6cee71dSXin LI 		 * string is not one character long
2675b6cee71dSXin LI 		 */
26763e41d09dSXin LI 		file_magwarn(ms, "Printf format is %s for type "
26773e41d09dSXin LI 		    "`%s' in description `%s'", estr,
2678b6cee71dSXin LI 		    file_names[m->type], m->desc);
2679b6cee71dSXin LI 		return -1;
2680b6cee71dSXin LI 	}
2681b6cee71dSXin LI 
2682b6cee71dSXin LI 	for (; *ptr; ptr++) {
2683b6cee71dSXin LI 		if (*ptr == '%') {
2684b6cee71dSXin LI 			file_magwarn(ms,
2685b6cee71dSXin LI 			    "Too many format strings (should have at most one) "
2686b6cee71dSXin LI 			    "for `%s' with description `%s'",
2687b6cee71dSXin LI 			    file_names[m->type], m->desc);
2688b6cee71dSXin LI 			return -1;
2689b6cee71dSXin LI 		}
2690b6cee71dSXin LI 	}
2691b6cee71dSXin LI 	return 0;
2692b6cee71dSXin LI }
2693b6cee71dSXin LI 
2694b6cee71dSXin LI /*
2695b6cee71dSXin LI  * Read a numeric value from a pointer, into the value union of a magic
2696b6cee71dSXin LI  * pointer, according to the magic type.  Update the string pointer to point
2697b6cee71dSXin LI  * just after the number read.  Return 0 for success, non-zero for failure.
2698b6cee71dSXin LI  */
2699b6cee71dSXin LI private int
2700b6cee71dSXin LI getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2701b6cee71dSXin LI {
270258a0f0d0SEitan Adler 	char *ep;
270358a0f0d0SEitan Adler 	uint64_t ull;
270458a0f0d0SEitan Adler 
2705b6cee71dSXin LI 	switch (m->type) {
2706b6cee71dSXin LI 	case FILE_BESTRING16:
2707b6cee71dSXin LI 	case FILE_LESTRING16:
2708b6cee71dSXin LI 	case FILE_STRING:
2709b6cee71dSXin LI 	case FILE_PSTRING:
2710b6cee71dSXin LI 	case FILE_REGEX:
2711b6cee71dSXin LI 	case FILE_SEARCH:
2712b6cee71dSXin LI 	case FILE_NAME:
2713b6cee71dSXin LI 	case FILE_USE:
27143e41d09dSXin LI 	case FILE_DER:
2715b6cee71dSXin LI 		*p = getstr(ms, m, *p, action == FILE_COMPILE);
2716b6cee71dSXin LI 		if (*p == NULL) {
2717b6cee71dSXin LI 			if (ms->flags & MAGIC_CHECK)
2718b6cee71dSXin LI 				file_magwarn(ms, "cannot get string from `%s'",
2719b6cee71dSXin LI 				    m->value.s);
2720b6cee71dSXin LI 			return -1;
2721b6cee71dSXin LI 		}
2722b6cee71dSXin LI 		if (m->type == FILE_REGEX) {
2723b6cee71dSXin LI 			file_regex_t rx;
2724b6cee71dSXin LI 			int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED);
2725b6cee71dSXin LI 			if (rc) {
2726b6cee71dSXin LI 				if (ms->flags & MAGIC_CHECK)
2727b6cee71dSXin LI 					file_regerror(&rx, rc, ms);
2728b6cee71dSXin LI 			}
2729b6cee71dSXin LI 			file_regfree(&rx);
2730b6cee71dSXin LI 			return rc ? -1 : 0;
2731b6cee71dSXin LI 		}
2732b6cee71dSXin LI 		return 0;
273358a0f0d0SEitan Adler 	default:
273458a0f0d0SEitan Adler 		if (m->reln == 'x')
273558a0f0d0SEitan Adler 			return 0;
273658a0f0d0SEitan Adler 		break;
273758a0f0d0SEitan Adler 	}
273858a0f0d0SEitan Adler 
273958a0f0d0SEitan Adler 	switch (m->type) {
2740b6cee71dSXin LI 	case FILE_FLOAT:
2741b6cee71dSXin LI 	case FILE_BEFLOAT:
2742b6cee71dSXin LI 	case FILE_LEFLOAT:
27439ce06829SXin LI 		errno = 0;
2744b6cee71dSXin LI #ifdef HAVE_STRTOF
2745b6cee71dSXin LI 		m->value.f = strtof(*p, &ep);
2746b6cee71dSXin LI #else
2747b6cee71dSXin LI 		m->value.f = (float)strtod(*p, &ep);
2748b6cee71dSXin LI #endif
27499ce06829SXin LI 		if (errno == 0)
2750b6cee71dSXin LI 			*p = ep;
2751b6cee71dSXin LI 		return 0;
2752b6cee71dSXin LI 	case FILE_DOUBLE:
2753b6cee71dSXin LI 	case FILE_BEDOUBLE:
2754b6cee71dSXin LI 	case FILE_LEDOUBLE:
27559ce06829SXin LI 		errno = 0;
2756b6cee71dSXin LI 		m->value.d = strtod(*p, &ep);
27579ce06829SXin LI 		if (errno == 0)
2758b6cee71dSXin LI 			*p = ep;
2759b6cee71dSXin LI 		return 0;
27602726a701SXin LI 	case FILE_GUID:
27612726a701SXin LI 		if (file_parse_guid(*p, m->value.guid) == -1)
27622726a701SXin LI 			return -1;
27632726a701SXin LI 		*p += FILE_GUID_SIZE - 1;
27642726a701SXin LI 		return 0;
2765b6cee71dSXin LI 	default:
27669ce06829SXin LI 		errno = 0;
276748c779cdSXin LI 		ull = CAST(uint64_t, strtoull(*p, &ep, 0));
276840427ccaSGordon Tetlow 		m->value.q = file_signextend(ms, m, ull);
276940427ccaSGordon Tetlow 		if (*p == ep) {
2770*43a5ec4eSXin LI 			file_magwarn(ms, "Unparsable number `%s'", *p);
277140427ccaSGordon Tetlow 		} else {
277240427ccaSGordon Tetlow 			size_t ts = typesize(m->type);
277340427ccaSGordon Tetlow 			uint64_t x;
277440427ccaSGordon Tetlow 			const char *q;
277540427ccaSGordon Tetlow 
27762726a701SXin LI 			if (ts == FILE_BADSIZE) {
277758a0f0d0SEitan Adler 				file_magwarn(ms,
277858a0f0d0SEitan Adler 				    "Expected numeric type got `%s'",
277940427ccaSGordon Tetlow 				    type_tbl[m->type].name);
278040427ccaSGordon Tetlow 			}
278148c779cdSXin LI 			for (q = *p; isspace(CAST(unsigned char, *q)); q++)
278240427ccaSGordon Tetlow 				continue;
278340427ccaSGordon Tetlow 			if (*q == '-')
278448c779cdSXin LI 				ull = -CAST(int64_t, ull);
278540427ccaSGordon Tetlow 			switch (ts) {
278640427ccaSGordon Tetlow 			case 1:
278748c779cdSXin LI 				x = CAST(uint64_t, ull & ~0xffULL);
278840427ccaSGordon Tetlow 				break;
278940427ccaSGordon Tetlow 			case 2:
279048c779cdSXin LI 				x = CAST(uint64_t, ull & ~0xffffULL);
279140427ccaSGordon Tetlow 				break;
279240427ccaSGordon Tetlow 			case 4:
279348c779cdSXin LI 				x = CAST(uint64_t, ull & ~0xffffffffULL);
279440427ccaSGordon Tetlow 				break;
279540427ccaSGordon Tetlow 			case 8:
279640427ccaSGordon Tetlow 				x = 0;
279740427ccaSGordon Tetlow 				break;
279840427ccaSGordon Tetlow 			default:
279940427ccaSGordon Tetlow 				abort();
280040427ccaSGordon Tetlow 			}
280140427ccaSGordon Tetlow 			if (x) {
280258a0f0d0SEitan Adler 				file_magwarn(ms, "Overflow for numeric"
280358a0f0d0SEitan Adler 				    " type `%s' value %#" PRIx64,
280440427ccaSGordon Tetlow 				    type_tbl[m->type].name, ull);
280540427ccaSGordon Tetlow 			}
280640427ccaSGordon Tetlow 		}
28079ce06829SXin LI 		if (errno == 0) {
2808b6cee71dSXin LI 			*p = ep;
2809b6cee71dSXin LI 			eatsize(p);
2810b6cee71dSXin LI 		}
2811b6cee71dSXin LI 		return 0;
2812b6cee71dSXin LI 	}
2813b6cee71dSXin LI }
2814b6cee71dSXin LI 
2815b6cee71dSXin LI /*
2816b6cee71dSXin LI  * Convert a string containing C character escapes.  Stop at an unescaped
2817b6cee71dSXin LI  * space or tab.
2818b6cee71dSXin LI  * Copy the converted version to "m->value.s", and the length in m->vallen.
2819b6cee71dSXin LI  * Return updated scan pointer as function result. Warn if set.
2820b6cee71dSXin LI  */
2821b6cee71dSXin LI private const char *
2822b6cee71dSXin LI getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2823b6cee71dSXin LI {
2824b6cee71dSXin LI 	const char *origs = s;
2825b6cee71dSXin LI 	char	*p = m->value.s;
2826b6cee71dSXin LI 	size_t  plen = sizeof(m->value.s);
2827b6cee71dSXin LI 	char 	*origp = p;
2828b6cee71dSXin LI 	char	*pmax = p + plen - 1;
2829b6cee71dSXin LI 	int	c;
2830b6cee71dSXin LI 	int	val;
2831b6cee71dSXin LI 
2832b6cee71dSXin LI 	while ((c = *s++) != '\0') {
283348c779cdSXin LI 		if (isspace(CAST(unsigned char, c)))
2834b6cee71dSXin LI 			break;
2835b6cee71dSXin LI 		if (p >= pmax) {
2836b6cee71dSXin LI 			file_error(ms, 0, "string too long: `%s'", origs);
2837b6cee71dSXin LI 			return NULL;
2838b6cee71dSXin LI 		}
2839b6cee71dSXin LI 		if (c == '\\') {
2840b6cee71dSXin LI 			switch(c = *s++) {
2841b6cee71dSXin LI 
2842b6cee71dSXin LI 			case '\0':
2843b6cee71dSXin LI 				if (warn)
2844b6cee71dSXin LI 					file_magwarn(ms, "incomplete escape");
28459ce06829SXin LI 				s--;
2846b6cee71dSXin LI 				goto out;
2847b6cee71dSXin LI 
2848b6cee71dSXin LI 			case '\t':
2849b6cee71dSXin LI 				if (warn) {
2850b6cee71dSXin LI 					file_magwarn(ms,
2851b6cee71dSXin LI 					    "escaped tab found, use \\t instead");
2852b6cee71dSXin LI 					warn = 0;	/* already did */
2853b6cee71dSXin LI 				}
2854b6cee71dSXin LI 				/*FALLTHROUGH*/
2855b6cee71dSXin LI 			default:
2856b6cee71dSXin LI 				if (warn) {
285748c779cdSXin LI 					if (isprint(CAST(unsigned char, c))) {
2858b6cee71dSXin LI 						/* Allow escaping of
2859b6cee71dSXin LI 						 * ``relations'' */
2860b6cee71dSXin LI 						if (strchr("<>&^=!", c) == NULL
2861b6cee71dSXin LI 						    && (m->type != FILE_REGEX ||
2862b6cee71dSXin LI 						    strchr("[]().*?^$|{}", c)
2863b6cee71dSXin LI 						    == NULL)) {
2864b6cee71dSXin LI 							file_magwarn(ms, "no "
2865b6cee71dSXin LI 							    "need to escape "
2866b6cee71dSXin LI 							    "`%c'", c);
2867b6cee71dSXin LI 						}
2868b6cee71dSXin LI 					} else {
2869b6cee71dSXin LI 						file_magwarn(ms,
2870b6cee71dSXin LI 						    "unknown escape sequence: "
2871b6cee71dSXin LI 						    "\\%03o", c);
2872b6cee71dSXin LI 					}
2873b6cee71dSXin LI 				}
2874b6cee71dSXin LI 				/*FALLTHROUGH*/
2875b6cee71dSXin LI 			/* space, perhaps force people to use \040? */
2876b6cee71dSXin LI 			case ' ':
2877b6cee71dSXin LI #if 0
2878b6cee71dSXin LI 			/*
2879b6cee71dSXin LI 			 * Other things people escape, but shouldn't need to,
2880b6cee71dSXin LI 			 * so we disallow them
2881b6cee71dSXin LI 			 */
2882b6cee71dSXin LI 			case '\'':
2883b6cee71dSXin LI 			case '"':
2884b6cee71dSXin LI 			case '?':
2885b6cee71dSXin LI #endif
2886b6cee71dSXin LI 			/* Relations */
2887b6cee71dSXin LI 			case '>':
2888b6cee71dSXin LI 			case '<':
2889b6cee71dSXin LI 			case '&':
2890b6cee71dSXin LI 			case '^':
2891b6cee71dSXin LI 			case '=':
2892b6cee71dSXin LI 			case '!':
2893b6cee71dSXin LI 			/* and baskslash itself */
2894b6cee71dSXin LI 			case '\\':
289548c779cdSXin LI 				*p++ = CAST(char, c);
2896b6cee71dSXin LI 				break;
2897b6cee71dSXin LI 
2898b6cee71dSXin LI 			case 'a':
2899b6cee71dSXin LI 				*p++ = '\a';
2900b6cee71dSXin LI 				break;
2901b6cee71dSXin LI 
2902b6cee71dSXin LI 			case 'b':
2903b6cee71dSXin LI 				*p++ = '\b';
2904b6cee71dSXin LI 				break;
2905b6cee71dSXin LI 
2906b6cee71dSXin LI 			case 'f':
2907b6cee71dSXin LI 				*p++ = '\f';
2908b6cee71dSXin LI 				break;
2909b6cee71dSXin LI 
2910b6cee71dSXin LI 			case 'n':
2911b6cee71dSXin LI 				*p++ = '\n';
2912b6cee71dSXin LI 				break;
2913b6cee71dSXin LI 
2914b6cee71dSXin LI 			case 'r':
2915b6cee71dSXin LI 				*p++ = '\r';
2916b6cee71dSXin LI 				break;
2917b6cee71dSXin LI 
2918b6cee71dSXin LI 			case 't':
2919b6cee71dSXin LI 				*p++ = '\t';
2920b6cee71dSXin LI 				break;
2921b6cee71dSXin LI 
2922b6cee71dSXin LI 			case 'v':
2923b6cee71dSXin LI 				*p++ = '\v';
2924b6cee71dSXin LI 				break;
2925b6cee71dSXin LI 
2926b6cee71dSXin LI 			/* \ and up to 3 octal digits */
2927b6cee71dSXin LI 			case '0':
2928b6cee71dSXin LI 			case '1':
2929b6cee71dSXin LI 			case '2':
2930b6cee71dSXin LI 			case '3':
2931b6cee71dSXin LI 			case '4':
2932b6cee71dSXin LI 			case '5':
2933b6cee71dSXin LI 			case '6':
2934b6cee71dSXin LI 			case '7':
2935b6cee71dSXin LI 				val = c - '0';
2936b6cee71dSXin LI 				c = *s++;  /* try for 2 */
2937b6cee71dSXin LI 				if (c >= '0' && c <= '7') {
2938b6cee71dSXin LI 					val = (val << 3) | (c - '0');
2939b6cee71dSXin LI 					c = *s++;  /* try for 3 */
2940b6cee71dSXin LI 					if (c >= '0' && c <= '7')
2941b6cee71dSXin LI 						val = (val << 3) | (c-'0');
2942b6cee71dSXin LI 					else
2943b6cee71dSXin LI 						--s;
2944b6cee71dSXin LI 				}
2945b6cee71dSXin LI 				else
2946b6cee71dSXin LI 					--s;
294748c779cdSXin LI 				*p++ = CAST(char, val);
2948b6cee71dSXin LI 				break;
2949b6cee71dSXin LI 
2950b6cee71dSXin LI 			/* \x and up to 2 hex digits */
2951b6cee71dSXin LI 			case 'x':
2952b6cee71dSXin LI 				val = 'x';	/* Default if no digits */
2953b6cee71dSXin LI 				c = hextoint(*s++);	/* Get next char */
2954b6cee71dSXin LI 				if (c >= 0) {
2955b6cee71dSXin LI 					val = c;
2956b6cee71dSXin LI 					c = hextoint(*s++);
2957b6cee71dSXin LI 					if (c >= 0)
2958b6cee71dSXin LI 						val = (val << 4) + c;
2959b6cee71dSXin LI 					else
2960b6cee71dSXin LI 						--s;
2961b6cee71dSXin LI 				} else
2962b6cee71dSXin LI 					--s;
296348c779cdSXin LI 				*p++ = CAST(char, val);
2964b6cee71dSXin LI 				break;
2965b6cee71dSXin LI 			}
2966b6cee71dSXin LI 		} else
296748c779cdSXin LI 			*p++ = CAST(char, c);
2968b6cee71dSXin LI 	}
29699ce06829SXin LI 	--s;
2970b6cee71dSXin LI out:
2971b6cee71dSXin LI 	*p = '\0';
2972b6cee71dSXin LI 	m->vallen = CAST(unsigned char, (p - origp));
29732726a701SXin LI 	if (m->type == FILE_PSTRING) {
29742726a701SXin LI 		size_t l =  file_pstring_length_size(ms, m);
29752726a701SXin LI 		if (l == FILE_BADSIZE)
29762726a701SXin LI 			return NULL;
29772726a701SXin LI 		m->vallen += CAST(unsigned char, l);
29782726a701SXin LI 	}
2979b6cee71dSXin LI 	return s;
2980b6cee71dSXin LI }
2981b6cee71dSXin LI 
2982b6cee71dSXin LI 
2983b6cee71dSXin LI /* Single hex char to int; -1 if not a hex char. */
2984b6cee71dSXin LI private int
2985b6cee71dSXin LI hextoint(int c)
2986b6cee71dSXin LI {
298748c779cdSXin LI 	if (!isascii(CAST(unsigned char, c)))
2988b6cee71dSXin LI 		return -1;
298948c779cdSXin LI 	if (isdigit(CAST(unsigned char, c)))
2990b6cee71dSXin LI 		return c - '0';
2991b6cee71dSXin LI 	if ((c >= 'a') && (c <= 'f'))
2992b6cee71dSXin LI 		return c + 10 - 'a';
2993b6cee71dSXin LI 	if (( c>= 'A') && (c <= 'F'))
2994b6cee71dSXin LI 		return c + 10 - 'A';
2995b6cee71dSXin LI 	return -1;
2996b6cee71dSXin LI }
2997b6cee71dSXin LI 
2998b6cee71dSXin LI 
2999b6cee71dSXin LI /*
3000b6cee71dSXin LI  * Print a string containing C character escapes.
3001b6cee71dSXin LI  */
3002b6cee71dSXin LI protected void
3003b6cee71dSXin LI file_showstr(FILE *fp, const char *s, size_t len)
3004b6cee71dSXin LI {
3005b6cee71dSXin LI 	char	c;
3006b6cee71dSXin LI 
3007b6cee71dSXin LI 	for (;;) {
30082726a701SXin LI 		if (len == FILE_BADSIZE) {
3009b6cee71dSXin LI 			c = *s++;
3010b6cee71dSXin LI 			if (c == '\0')
3011b6cee71dSXin LI 				break;
3012b6cee71dSXin LI 		}
3013b6cee71dSXin LI 		else  {
3014b6cee71dSXin LI 			if (len-- == 0)
3015b6cee71dSXin LI 				break;
3016b6cee71dSXin LI 			c = *s++;
3017b6cee71dSXin LI 		}
3018b6cee71dSXin LI 		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
3019b6cee71dSXin LI 			(void) fputc(c, fp);
3020b6cee71dSXin LI 		else {
3021b6cee71dSXin LI 			(void) fputc('\\', fp);
3022b6cee71dSXin LI 			switch (c) {
3023b6cee71dSXin LI 			case '\a':
3024b6cee71dSXin LI 				(void) fputc('a', fp);
3025b6cee71dSXin LI 				break;
3026b6cee71dSXin LI 
3027b6cee71dSXin LI 			case '\b':
3028b6cee71dSXin LI 				(void) fputc('b', fp);
3029b6cee71dSXin LI 				break;
3030b6cee71dSXin LI 
3031b6cee71dSXin LI 			case '\f':
3032b6cee71dSXin LI 				(void) fputc('f', fp);
3033b6cee71dSXin LI 				break;
3034b6cee71dSXin LI 
3035b6cee71dSXin LI 			case '\n':
3036b6cee71dSXin LI 				(void) fputc('n', fp);
3037b6cee71dSXin LI 				break;
3038b6cee71dSXin LI 
3039b6cee71dSXin LI 			case '\r':
3040b6cee71dSXin LI 				(void) fputc('r', fp);
3041b6cee71dSXin LI 				break;
3042b6cee71dSXin LI 
3043b6cee71dSXin LI 			case '\t':
3044b6cee71dSXin LI 				(void) fputc('t', fp);
3045b6cee71dSXin LI 				break;
3046b6cee71dSXin LI 
3047b6cee71dSXin LI 			case '\v':
3048b6cee71dSXin LI 				(void) fputc('v', fp);
3049b6cee71dSXin LI 				break;
3050b6cee71dSXin LI 
3051b6cee71dSXin LI 			default:
3052b6cee71dSXin LI 				(void) fprintf(fp, "%.3o", c & 0377);
3053b6cee71dSXin LI 				break;
3054b6cee71dSXin LI 			}
3055b6cee71dSXin LI 		}
3056b6cee71dSXin LI 	}
3057b6cee71dSXin LI }
3058b6cee71dSXin LI 
3059b6cee71dSXin LI /*
3060b6cee71dSXin LI  * eatsize(): Eat the size spec from a number [eg. 10UL]
3061b6cee71dSXin LI  */
3062b6cee71dSXin LI private void
3063b6cee71dSXin LI eatsize(const char **p)
3064b6cee71dSXin LI {
3065b6cee71dSXin LI 	const char *l = *p;
3066b6cee71dSXin LI 
3067b6cee71dSXin LI 	if (LOWCASE(*l) == 'u')
3068b6cee71dSXin LI 		l++;
3069b6cee71dSXin LI 
3070b6cee71dSXin LI 	switch (LOWCASE(*l)) {
3071b6cee71dSXin LI 	case 'l':    /* long */
3072b6cee71dSXin LI 	case 's':    /* short */
3073b6cee71dSXin LI 	case 'h':    /* short */
3074b6cee71dSXin LI 	case 'b':    /* char/byte */
3075b6cee71dSXin LI 	case 'c':    /* char/byte */
3076b6cee71dSXin LI 		l++;
3077b6cee71dSXin LI 		/*FALLTHROUGH*/
3078b6cee71dSXin LI 	default:
3079b6cee71dSXin LI 		break;
3080b6cee71dSXin LI 	}
3081b6cee71dSXin LI 
3082b6cee71dSXin LI 	*p = l;
3083b6cee71dSXin LI }
3084b6cee71dSXin LI 
3085b6cee71dSXin LI /*
3086c2931133SXin LI  * handle a buffer containing a compiled file.
3087c2931133SXin LI  */
3088c2931133SXin LI private struct magic_map *
3089c2931133SXin LI apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
3090c2931133SXin LI {
3091c2931133SXin LI 	struct magic_map *map;
3092c2931133SXin LI 
3093c2931133SXin LI 	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3094c2931133SXin LI 		file_oomem(ms, sizeof(*map));
3095c2931133SXin LI 		return NULL;
3096c2931133SXin LI 	}
3097c2931133SXin LI 	map->len = len;
3098c2931133SXin LI 	map->p = buf;
3099c2931133SXin LI 	map->type = MAP_TYPE_USER;
3100c2931133SXin LI 	if (check_buffer(ms, map, "buffer") != 0) {
3101c2931133SXin LI 		apprentice_unmap(map);
3102c2931133SXin LI 		return NULL;
3103c2931133SXin LI 	}
3104c2931133SXin LI 	return map;
3105c2931133SXin LI }
3106c2931133SXin LI 
3107c2931133SXin LI /*
3108b6cee71dSXin LI  * handle a compiled file.
3109b6cee71dSXin LI  */
3110b6cee71dSXin LI 
3111b6cee71dSXin LI private struct magic_map *
3112b6cee71dSXin LI apprentice_map(struct magic_set *ms, const char *fn)
3113b6cee71dSXin LI {
3114b6cee71dSXin LI 	int fd;
3115b6cee71dSXin LI 	struct stat st;
3116b6cee71dSXin LI 	char *dbname = NULL;
3117b6cee71dSXin LI 	struct magic_map *map;
3118a5d223e6SXin LI 	struct magic_map *rv = NULL;
3119b6cee71dSXin LI 
3120b6cee71dSXin LI 	fd = -1;
3121b6cee71dSXin LI 	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3122b6cee71dSXin LI 		file_oomem(ms, sizeof(*map));
3123b6cee71dSXin LI 		goto error;
3124b6cee71dSXin LI 	}
31253e41d09dSXin LI 	map->type = MAP_TYPE_USER;	/* unspecified */
3126b6cee71dSXin LI 
3127b6cee71dSXin LI 	dbname = mkdbname(ms, fn, 0);
3128b6cee71dSXin LI 	if (dbname == NULL)
3129b6cee71dSXin LI 		goto error;
3130b6cee71dSXin LI 
3131b6cee71dSXin LI 	if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
3132b6cee71dSXin LI 		goto error;
3133b6cee71dSXin LI 
3134b6cee71dSXin LI 	if (fstat(fd, &st) == -1) {
3135b6cee71dSXin LI 		file_error(ms, errno, "cannot stat `%s'", dbname);
3136b6cee71dSXin LI 		goto error;
3137b6cee71dSXin LI 	}
313848c779cdSXin LI 	if (st.st_size < 8 || st.st_size > maxoff_t()) {
3139b6cee71dSXin LI 		file_error(ms, 0, "file `%s' is too %s", dbname,
3140b6cee71dSXin LI 		    st.st_size < 8 ? "small" : "large");
3141b6cee71dSXin LI 		goto error;
3142b6cee71dSXin LI 	}
3143b6cee71dSXin LI 
314448c779cdSXin LI 	map->len = CAST(size_t, st.st_size);
3145b6cee71dSXin LI #ifdef QUICK
31463e41d09dSXin LI 	map->type = MAP_TYPE_MMAP;
314748c779cdSXin LI 	if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE,
314848c779cdSXin LI 	    MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) {
3149b6cee71dSXin LI 		file_error(ms, errno, "cannot map `%s'", dbname);
3150b6cee71dSXin LI 		goto error;
3151b6cee71dSXin LI 	}
3152b6cee71dSXin LI #else
31533e41d09dSXin LI 	map->type = MAP_TYPE_MALLOC;
3154b6cee71dSXin LI 	if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
3155b6cee71dSXin LI 		file_oomem(ms, map->len);
3156b6cee71dSXin LI 		goto error;
3157b6cee71dSXin LI 	}
3158b6cee71dSXin LI 	if (read(fd, map->p, map->len) != (ssize_t)map->len) {
3159b6cee71dSXin LI 		file_badread(ms);
3160b6cee71dSXin LI 		goto error;
3161b6cee71dSXin LI 	}
3162b6cee71dSXin LI #endif
3163b6cee71dSXin LI 	(void)close(fd);
3164b6cee71dSXin LI 	fd = -1;
3165c2931133SXin LI 
3166a5d223e6SXin LI 	if (check_buffer(ms, map, dbname) != 0) {
3167c2931133SXin LI 		goto error;
3168a5d223e6SXin LI 	}
31693e41d09dSXin LI #ifdef QUICK
317048c779cdSXin LI 	if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) {
31713e41d09dSXin LI 		file_error(ms, errno, "cannot mprotect `%s'", dbname);
31723e41d09dSXin LI 		goto error;
31733e41d09dSXin LI 	}
31743e41d09dSXin LI #endif
3175c2931133SXin LI 
3176c2931133SXin LI 	free(dbname);
3177c2931133SXin LI 	return map;
3178c2931133SXin LI 
3179c2931133SXin LI error:
3180c2931133SXin LI 	if (fd != -1)
3181c2931133SXin LI 		(void)close(fd);
3182c2931133SXin LI 	apprentice_unmap(map);
3183c2931133SXin LI 	free(dbname);
3184a5d223e6SXin LI 	return rv;
3185c2931133SXin LI }
3186c2931133SXin LI 
3187c2931133SXin LI private int
3188c2931133SXin LI check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
3189c2931133SXin LI {
3190c2931133SXin LI 	uint32_t *ptr;
3191c2931133SXin LI 	uint32_t entries, nentries;
3192c2931133SXin LI 	uint32_t version;
3193c2931133SXin LI 	int i, needsbyteswap;
3194c2931133SXin LI 
3195b6cee71dSXin LI 	ptr = CAST(uint32_t *, map->p);
3196b6cee71dSXin LI 	if (*ptr != MAGICNO) {
3197b6cee71dSXin LI 		if (swap4(*ptr) != MAGICNO) {
3198b6cee71dSXin LI 			file_error(ms, 0, "bad magic in `%s'", dbname);
3199c2931133SXin LI 			return -1;
3200b6cee71dSXin LI 		}
3201b6cee71dSXin LI 		needsbyteswap = 1;
3202b6cee71dSXin LI 	} else
3203b6cee71dSXin LI 		needsbyteswap = 0;
3204b6cee71dSXin LI 	if (needsbyteswap)
3205b6cee71dSXin LI 		version = swap4(ptr[1]);
3206b6cee71dSXin LI 	else
3207b6cee71dSXin LI 		version = ptr[1];
3208b6cee71dSXin LI 	if (version != VERSIONNO) {
3209b6cee71dSXin LI 		file_error(ms, 0, "File %s supports only version %d magic "
3210b6cee71dSXin LI 		    "files. `%s' is version %d", VERSION,
3211b6cee71dSXin LI 		    VERSIONNO, dbname, version);
3212c2931133SXin LI 		return -1;
3213b6cee71dSXin LI 	}
321448c779cdSXin LI 	entries = CAST(uint32_t, map->len / sizeof(struct magic));
3215c2931133SXin LI 	if ((entries * sizeof(struct magic)) != map->len) {
3216c2931133SXin LI 		file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
3217b6cee71dSXin LI 		    "a multiple of %" SIZE_T_FORMAT "u",
3218c2931133SXin LI 		    dbname, map->len, sizeof(struct magic));
3219c2931133SXin LI 		return -1;
3220b6cee71dSXin LI 	}
3221b6cee71dSXin LI 	map->magic[0] = CAST(struct magic *, map->p) + 1;
3222b6cee71dSXin LI 	nentries = 0;
3223b6cee71dSXin LI 	for (i = 0; i < MAGIC_SETS; i++) {
3224b6cee71dSXin LI 		if (needsbyteswap)
3225b6cee71dSXin LI 			map->nmagic[i] = swap4(ptr[i + 2]);
3226b6cee71dSXin LI 		else
3227b6cee71dSXin LI 			map->nmagic[i] = ptr[i + 2];
3228b6cee71dSXin LI 		if (i != MAGIC_SETS - 1)
3229b6cee71dSXin LI 			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3230b6cee71dSXin LI 		nentries += map->nmagic[i];
3231b6cee71dSXin LI 	}
3232b6cee71dSXin LI 	if (entries != nentries + 1) {
3233b6cee71dSXin LI 		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3234b6cee71dSXin LI 		    dbname, entries, nentries + 1);
3235c2931133SXin LI 		return -1;
3236b6cee71dSXin LI 	}
3237b6cee71dSXin LI 	if (needsbyteswap)
3238b6cee71dSXin LI 		for (i = 0; i < MAGIC_SETS; i++)
3239b6cee71dSXin LI 			byteswap(map->magic[i], map->nmagic[i]);
3240c2931133SXin LI 	return 0;
3241b6cee71dSXin LI }
3242b6cee71dSXin LI 
3243b6cee71dSXin LI /*
3244b6cee71dSXin LI  * handle an mmaped file.
3245b6cee71dSXin LI  */
3246b6cee71dSXin LI private int
3247b6cee71dSXin LI apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3248b6cee71dSXin LI {
3249b6cee71dSXin LI 	static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3250b6cee71dSXin LI 	static const size_t m = sizeof(**map->magic);
3251b6cee71dSXin LI 	int fd = -1;
3252b6cee71dSXin LI 	size_t len;
3253b6cee71dSXin LI 	char *dbname;
3254b6cee71dSXin LI 	int rv = -1;
3255b6cee71dSXin LI 	uint32_t i;
3256b6cee71dSXin LI 	union {
3257b6cee71dSXin LI 		struct magic m;
3258b6cee71dSXin LI 		uint32_t h[2 + MAGIC_SETS];
3259b6cee71dSXin LI 	} hdr;
3260b6cee71dSXin LI 
3261b6cee71dSXin LI 	dbname = mkdbname(ms, fn, 1);
3262b6cee71dSXin LI 
3263b6cee71dSXin LI 	if (dbname == NULL)
3264b6cee71dSXin LI 		goto out;
3265b6cee71dSXin LI 
3266b6cee71dSXin LI 	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1)
3267b6cee71dSXin LI 	{
3268b6cee71dSXin LI 		file_error(ms, errno, "cannot open `%s'", dbname);
3269b6cee71dSXin LI 		goto out;
3270b6cee71dSXin LI 	}
3271b6cee71dSXin LI 	memset(&hdr, 0, sizeof(hdr));
3272b6cee71dSXin LI 	hdr.h[0] = MAGICNO;
3273b6cee71dSXin LI 	hdr.h[1] = VERSIONNO;
3274b6cee71dSXin LI 	memcpy(hdr.h + 2, map->nmagic, nm);
3275b6cee71dSXin LI 
327648c779cdSXin LI 	if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) {
3277b6cee71dSXin LI 		file_error(ms, errno, "error writing `%s'", dbname);
327858a0f0d0SEitan Adler 		goto out2;
3279b6cee71dSXin LI 	}
3280b6cee71dSXin LI 
3281b6cee71dSXin LI 	for (i = 0; i < MAGIC_SETS; i++) {
3282b6cee71dSXin LI 		len = m * map->nmagic[i];
328348c779cdSXin LI 		if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) {
3284b6cee71dSXin LI 			file_error(ms, errno, "error writing `%s'", dbname);
328558a0f0d0SEitan Adler 			goto out2;
3286b6cee71dSXin LI 		}
3287b6cee71dSXin LI 	}
3288b6cee71dSXin LI 
328958a0f0d0SEitan Adler 	rv = 0;
329058a0f0d0SEitan Adler out2:
3291b6cee71dSXin LI 	if (fd != -1)
3292b6cee71dSXin LI 		(void)close(fd);
3293b6cee71dSXin LI out:
3294282e23f0SXin LI 	apprentice_unmap(map);
3295b6cee71dSXin LI 	free(dbname);
3296b6cee71dSXin LI 	return rv;
3297b6cee71dSXin LI }
3298b6cee71dSXin LI 
3299b6cee71dSXin LI private const char ext[] = ".mgc";
3300b6cee71dSXin LI /*
3301b6cee71dSXin LI  * make a dbname
3302b6cee71dSXin LI  */
3303b6cee71dSXin LI private char *
3304b6cee71dSXin LI mkdbname(struct magic_set *ms, const char *fn, int strip)
3305b6cee71dSXin LI {
3306b6cee71dSXin LI 	const char *p, *q;
3307b6cee71dSXin LI 	char *buf;
3308b6cee71dSXin LI 
3309b6cee71dSXin LI 	if (strip) {
3310b6cee71dSXin LI 		if ((p = strrchr(fn, '/')) != NULL)
3311b6cee71dSXin LI 			fn = ++p;
3312b6cee71dSXin LI 	}
3313b6cee71dSXin LI 
3314b6cee71dSXin LI 	for (q = fn; *q; q++)
3315b6cee71dSXin LI 		continue;
3316b6cee71dSXin LI 	/* Look for .mgc */
3317b6cee71dSXin LI 	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3318b6cee71dSXin LI 		if (*p != *q)
3319b6cee71dSXin LI 			break;
3320b6cee71dSXin LI 
3321b6cee71dSXin LI 	/* Did not find .mgc, restore q */
3322b6cee71dSXin LI 	if (p >= ext)
3323b6cee71dSXin LI 		while (*q)
3324b6cee71dSXin LI 			q++;
3325b6cee71dSXin LI 
3326b6cee71dSXin LI 	q++;
3327b6cee71dSXin LI 	/* Compatibility with old code that looked in .mime */
3328b6cee71dSXin LI 	if (ms->flags & MAGIC_MIME) {
332948c779cdSXin LI 		if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext)
333048c779cdSXin LI 		    < 0)
3331b6cee71dSXin LI 			return NULL;
3332b6cee71dSXin LI 		if (access(buf, R_OK) != -1) {
3333b6cee71dSXin LI 			ms->flags &= MAGIC_MIME_TYPE;
3334b6cee71dSXin LI 			return buf;
3335b6cee71dSXin LI 		}
3336b6cee71dSXin LI 		free(buf);
3337b6cee71dSXin LI 	}
333848c779cdSXin LI 	if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0)
3339b6cee71dSXin LI 		return NULL;
3340b6cee71dSXin LI 
3341b6cee71dSXin LI 	/* Compatibility with old code that looked in .mime */
3342a5d223e6SXin LI 	if (strstr(fn, ".mime") != NULL)
3343b6cee71dSXin LI 		ms->flags &= MAGIC_MIME_TYPE;
3344b6cee71dSXin LI 	return buf;
3345b6cee71dSXin LI }
3346b6cee71dSXin LI 
3347b6cee71dSXin LI /*
3348b6cee71dSXin LI  * Byteswap an mmap'ed file if needed
3349b6cee71dSXin LI  */
3350b6cee71dSXin LI private void
3351b6cee71dSXin LI byteswap(struct magic *magic, uint32_t nmagic)
3352b6cee71dSXin LI {
3353b6cee71dSXin LI 	uint32_t i;
3354b6cee71dSXin LI 	for (i = 0; i < nmagic; i++)
3355b6cee71dSXin LI 		bs1(&magic[i]);
3356b6cee71dSXin LI }
3357b6cee71dSXin LI 
3358b6cee71dSXin LI /*
3359b6cee71dSXin LI  * swap a short
3360b6cee71dSXin LI  */
3361b6cee71dSXin LI private uint16_t
3362b6cee71dSXin LI swap2(uint16_t sv)
3363b6cee71dSXin LI {
3364b6cee71dSXin LI 	uint16_t rv;
336548c779cdSXin LI 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
336648c779cdSXin LI 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3367b6cee71dSXin LI 	d[0] = s[1];
3368b6cee71dSXin LI 	d[1] = s[0];
3369b6cee71dSXin LI 	return rv;
3370b6cee71dSXin LI }
3371b6cee71dSXin LI 
3372b6cee71dSXin LI /*
3373b6cee71dSXin LI  * swap an int
3374b6cee71dSXin LI  */
3375b6cee71dSXin LI private uint32_t
3376b6cee71dSXin LI swap4(uint32_t sv)
3377b6cee71dSXin LI {
3378b6cee71dSXin LI 	uint32_t rv;
337948c779cdSXin LI 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
338048c779cdSXin LI 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3381b6cee71dSXin LI 	d[0] = s[3];
3382b6cee71dSXin LI 	d[1] = s[2];
3383b6cee71dSXin LI 	d[2] = s[1];
3384b6cee71dSXin LI 	d[3] = s[0];
3385b6cee71dSXin LI 	return rv;
3386b6cee71dSXin LI }
3387b6cee71dSXin LI 
3388b6cee71dSXin LI /*
3389b6cee71dSXin LI  * swap a quad
3390b6cee71dSXin LI  */
3391b6cee71dSXin LI private uint64_t
3392b6cee71dSXin LI swap8(uint64_t sv)
3393b6cee71dSXin LI {
3394b6cee71dSXin LI 	uint64_t rv;
339548c779cdSXin LI 	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
339648c779cdSXin LI 	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3397b6cee71dSXin LI #if 0
3398b6cee71dSXin LI 	d[0] = s[3];
3399b6cee71dSXin LI 	d[1] = s[2];
3400b6cee71dSXin LI 	d[2] = s[1];
3401b6cee71dSXin LI 	d[3] = s[0];
3402b6cee71dSXin LI 	d[4] = s[7];
3403b6cee71dSXin LI 	d[5] = s[6];
3404b6cee71dSXin LI 	d[6] = s[5];
3405b6cee71dSXin LI 	d[7] = s[4];
3406b6cee71dSXin LI #else
3407b6cee71dSXin LI 	d[0] = s[7];
3408b6cee71dSXin LI 	d[1] = s[6];
3409b6cee71dSXin LI 	d[2] = s[5];
3410b6cee71dSXin LI 	d[3] = s[4];
3411b6cee71dSXin LI 	d[4] = s[3];
3412b6cee71dSXin LI 	d[5] = s[2];
3413b6cee71dSXin LI 	d[6] = s[1];
3414b6cee71dSXin LI 	d[7] = s[0];
3415b6cee71dSXin LI #endif
3416b6cee71dSXin LI 	return rv;
3417b6cee71dSXin LI }
3418b6cee71dSXin LI 
3419*43a5ec4eSXin LI protected uintmax_t
3420*43a5ec4eSXin LI file_varint2uintmax_t(const unsigned char *us, int t, size_t *l)
3421*43a5ec4eSXin LI {
3422*43a5ec4eSXin LI         uintmax_t x = 0;
3423*43a5ec4eSXin LI         const unsigned char *c;
3424*43a5ec4eSXin LI         if (t == FILE_LEVARINT) {
3425*43a5ec4eSXin LI                 for (c = us; *c; c++) {
3426*43a5ec4eSXin LI                         if ((*c & 0x80) == 0)
3427*43a5ec4eSXin LI                                 break;
3428*43a5ec4eSXin LI                 }
3429*43a5ec4eSXin LI 		if (l)
3430*43a5ec4eSXin LI 			*l = c - us + 1;
3431*43a5ec4eSXin LI                 for (; c >= us; c--) {
3432*43a5ec4eSXin LI                         x |= *c & 0x7f;
3433*43a5ec4eSXin LI                         x <<= 7;
3434*43a5ec4eSXin LI                 }
3435*43a5ec4eSXin LI         } else {
3436*43a5ec4eSXin LI                 for (c = us; *c; c++) {
3437*43a5ec4eSXin LI 			x |= *c & 0x7f;
3438*43a5ec4eSXin LI 			if ((*c & 0x80) == 0)
3439*43a5ec4eSXin LI 				break;
3440*43a5ec4eSXin LI 			x <<= 7;
3441*43a5ec4eSXin LI                 }
3442*43a5ec4eSXin LI 		if (l)
3443*43a5ec4eSXin LI 			*l = c - us + 1;
3444*43a5ec4eSXin LI         }
3445*43a5ec4eSXin LI 	return x;
3446*43a5ec4eSXin LI }
3447*43a5ec4eSXin LI 
3448*43a5ec4eSXin LI 
3449b6cee71dSXin LI /*
3450b6cee71dSXin LI  * byteswap a single magic entry
3451b6cee71dSXin LI  */
3452b6cee71dSXin LI private void
3453b6cee71dSXin LI bs1(struct magic *m)
3454b6cee71dSXin LI {
3455b6cee71dSXin LI 	m->cont_level = swap2(m->cont_level);
345648c779cdSXin LI 	m->offset = swap4(CAST(uint32_t, m->offset));
345748c779cdSXin LI 	m->in_offset = swap4(CAST(uint32_t, m->in_offset));
345848c779cdSXin LI 	m->lineno = swap4(CAST(uint32_t, m->lineno));
3459b6cee71dSXin LI 	if (IS_STRING(m->type)) {
3460b6cee71dSXin LI 		m->str_range = swap4(m->str_range);
3461b6cee71dSXin LI 		m->str_flags = swap4(m->str_flags);
3462b6cee71dSXin LI 	}
3463b6cee71dSXin LI 	else {
3464b6cee71dSXin LI 		m->value.q = swap8(m->value.q);
3465b6cee71dSXin LI 		m->num_mask = swap8(m->num_mask);
3466b6cee71dSXin LI 	}
3467b6cee71dSXin LI }
3468b6cee71dSXin LI 
3469b6cee71dSXin LI protected size_t
34702726a701SXin LI file_pstring_length_size(struct magic_set *ms, const struct magic *m)
3471b6cee71dSXin LI {
3472b6cee71dSXin LI 	switch (m->str_flags & PSTRING_LEN) {
3473b6cee71dSXin LI 	case PSTRING_1_LE:
3474b6cee71dSXin LI 		return 1;
3475b6cee71dSXin LI 	case PSTRING_2_LE:
3476b6cee71dSXin LI 	case PSTRING_2_BE:
3477b6cee71dSXin LI 		return 2;
3478b6cee71dSXin LI 	case PSTRING_4_LE:
3479b6cee71dSXin LI 	case PSTRING_4_BE:
3480b6cee71dSXin LI 		return 4;
3481b6cee71dSXin LI 	default:
34822726a701SXin LI 		file_error(ms, 0, "corrupt magic file "
34832726a701SXin LI 		    "(bad pascal string length %d)",
34842726a701SXin LI 		    m->str_flags & PSTRING_LEN);
34852726a701SXin LI 		return FILE_BADSIZE;
3486b6cee71dSXin LI 	}
3487b6cee71dSXin LI }
3488b6cee71dSXin LI protected size_t
34892726a701SXin LI file_pstring_get_length(struct magic_set *ms, const struct magic *m,
34902726a701SXin LI     const char *ss)
3491b6cee71dSXin LI {
3492b6cee71dSXin LI 	size_t len = 0;
349348c779cdSXin LI 	const unsigned char *s = RCAST(const unsigned char *, ss);
349440427ccaSGordon Tetlow 	unsigned int s3, s2, s1, s0;
3495b6cee71dSXin LI 
3496b6cee71dSXin LI 	switch (m->str_flags & PSTRING_LEN) {
3497b6cee71dSXin LI 	case PSTRING_1_LE:
3498b6cee71dSXin LI 		len = *s;
3499b6cee71dSXin LI 		break;
3500b6cee71dSXin LI 	case PSTRING_2_LE:
350140427ccaSGordon Tetlow 		s0 = s[0];
350240427ccaSGordon Tetlow 		s1 = s[1];
350340427ccaSGordon Tetlow 		len = (s1 << 8) | s0;
3504b6cee71dSXin LI 		break;
3505b6cee71dSXin LI 	case PSTRING_2_BE:
350640427ccaSGordon Tetlow 		s0 = s[0];
350740427ccaSGordon Tetlow 		s1 = s[1];
350840427ccaSGordon Tetlow 		len = (s0 << 8) | s1;
3509b6cee71dSXin LI 		break;
3510b6cee71dSXin LI 	case PSTRING_4_LE:
351140427ccaSGordon Tetlow 		s0 = s[0];
351240427ccaSGordon Tetlow 		s1 = s[1];
351340427ccaSGordon Tetlow 		s2 = s[2];
351440427ccaSGordon Tetlow 		s3 = s[3];
351540427ccaSGordon Tetlow 		len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3516b6cee71dSXin LI 		break;
3517b6cee71dSXin LI 	case PSTRING_4_BE:
351840427ccaSGordon Tetlow 		s0 = s[0];
351940427ccaSGordon Tetlow 		s1 = s[1];
352040427ccaSGordon Tetlow 		s2 = s[2];
352140427ccaSGordon Tetlow 		s3 = s[3];
352240427ccaSGordon Tetlow 		len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3523b6cee71dSXin LI 		break;
3524b6cee71dSXin LI 	default:
35252726a701SXin LI 		file_error(ms, 0, "corrupt magic file "
35262726a701SXin LI 		    "(bad pascal string length %d)",
35272726a701SXin LI 		    m->str_flags & PSTRING_LEN);
35282726a701SXin LI 		return FILE_BADSIZE;
3529b6cee71dSXin LI 	}
3530b6cee71dSXin LI 
35312726a701SXin LI 	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) {
35322726a701SXin LI 		size_t l = file_pstring_length_size(ms, m);
35332726a701SXin LI 		if (l == FILE_BADSIZE)
35342726a701SXin LI 			return l;
35352726a701SXin LI 		len -= l;
35362726a701SXin LI 	}
3537b6cee71dSXin LI 
3538b6cee71dSXin LI 	return len;
3539b6cee71dSXin LI }
3540b6cee71dSXin LI 
3541b6cee71dSXin LI protected int
3542b6cee71dSXin LI file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3543b6cee71dSXin LI {
3544b6cee71dSXin LI 	uint32_t i, j;
3545b6cee71dSXin LI 	struct mlist *mlist, *ml;
3546b6cee71dSXin LI 
3547b6cee71dSXin LI 	mlist = ms->mlist[1];
3548b6cee71dSXin LI 
3549b6cee71dSXin LI 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
3550b6cee71dSXin LI 		struct magic *ma = ml->magic;
3551b6cee71dSXin LI 		uint32_t nma = ml->nmagic;
3552b6cee71dSXin LI 		for (i = 0; i < nma; i++) {
3553b6cee71dSXin LI 			if (ma[i].type != FILE_NAME)
3554b6cee71dSXin LI 				continue;
3555b6cee71dSXin LI 			if (strcmp(ma[i].value.s, name) == 0) {
3556b6cee71dSXin LI 				v->magic = &ma[i];
3557b6cee71dSXin LI 				for (j = i + 1; j < nma; j++)
3558b6cee71dSXin LI 				    if (ma[j].cont_level == 0)
3559b6cee71dSXin LI 					    break;
3560b6cee71dSXin LI 				v->nmagic = j - i;
3561b6cee71dSXin LI 				return 0;
3562b6cee71dSXin LI 			}
3563b6cee71dSXin LI 		}
3564b6cee71dSXin LI 	}
3565b6cee71dSXin LI 	return -1;
3566b6cee71dSXin LI }
3567