1b6cee71dSXin LI /*
2b6cee71dSXin LI * Copyright (c) Ian F. Darwin 1986-1995.
3b6cee71dSXin LI * Software written by Ian F. Darwin and others;
4b6cee71dSXin LI * maintained 1995-present by Christos Zoulas and others.
5b6cee71dSXin LI *
6b6cee71dSXin LI * Redistribution and use in source and binary forms, with or without
7b6cee71dSXin LI * modification, are permitted provided that the following conditions
8b6cee71dSXin LI * are met:
9b6cee71dSXin LI * 1. Redistributions of source code must retain the above copyright
10b6cee71dSXin LI * notice immediately at the beginning of the file, without modification,
11b6cee71dSXin LI * this list of conditions, and the following disclaimer.
12b6cee71dSXin LI * 2. Redistributions in binary form must reproduce the above copyright
13b6cee71dSXin LI * notice, this list of conditions and the following disclaimer in the
14b6cee71dSXin LI * documentation and/or other materials provided with the distribution.
15b6cee71dSXin LI *
16b6cee71dSXin LI * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17b6cee71dSXin LI * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18b6cee71dSXin LI * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19b6cee71dSXin LI * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20b6cee71dSXin LI * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21b6cee71dSXin LI * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22b6cee71dSXin LI * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23b6cee71dSXin LI * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24b6cee71dSXin LI * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25b6cee71dSXin LI * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26b6cee71dSXin LI * SUCH DAMAGE.
27b6cee71dSXin LI */
28b6cee71dSXin LI /*
29b6cee71dSXin LI * apprentice - make one pass through /etc/magic, learning its secrets.
30b6cee71dSXin LI */
31b6cee71dSXin LI
32b6cee71dSXin LI #include "file.h"
33b6cee71dSXin LI
34b6cee71dSXin LI #ifndef lint
35*ae316d1dSXin LI FILE_RCSID("@(#)$File: apprentice.c,v 1.356 2024/11/27 15:37:00 christos Exp $")
36b6cee71dSXin LI #endif /* lint */
37b6cee71dSXin LI
38b6cee71dSXin LI #include "magic.h"
39b6cee71dSXin LI #include <stdlib.h>
40b6cee71dSXin LI #ifdef HAVE_UNISTD_H
41b6cee71dSXin LI #include <unistd.h>
42b6cee71dSXin LI #endif
43b6cee71dSXin LI #include <stddef.h>
44b6cee71dSXin LI #include <string.h>
45b6cee71dSXin LI #include <assert.h>
46b6cee71dSXin LI #include <ctype.h>
47b6cee71dSXin LI #include <fcntl.h>
48b6cee71dSXin LI #ifdef QUICK
49b6cee71dSXin LI #include <sys/mman.h>
50b6cee71dSXin LI #endif
51b6cee71dSXin LI #include <dirent.h>
52b6cee71dSXin LI #include <limits.h>
53a4d6d3b8SXin LI #ifdef HAVE_BYTESWAP_H
54a4d6d3b8SXin LI #include <byteswap.h>
55a4d6d3b8SXin LI #endif
56a4d6d3b8SXin LI #ifdef HAVE_SYS_BSWAP_H
57a4d6d3b8SXin LI #include <sys/bswap.h>
58a4d6d3b8SXin LI #endif
59b6cee71dSXin LI
60b6cee71dSXin LI
6148c779cdSXin LI #define EATAB {while (isascii(CAST(unsigned char, *l)) && \
6248c779cdSXin LI isspace(CAST(unsigned char, *l))) ++l;}
6348c779cdSXin LI #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \
6448c779cdSXin LI tolower(CAST(unsigned char, l)) : (l))
65b6cee71dSXin LI /*
66b6cee71dSXin LI * Work around a bug in headers on Digital Unix.
67b6cee71dSXin LI * At least confirmed for: OSF1 V4.0 878
68b6cee71dSXin LI */
69b6cee71dSXin LI #if defined(__osf__) && defined(__DECC)
70b6cee71dSXin LI #ifdef MAP_FAILED
71b6cee71dSXin LI #undef MAP_FAILED
72b6cee71dSXin LI #endif
73b6cee71dSXin LI #endif
74b6cee71dSXin LI
75b6cee71dSXin LI #ifndef MAP_FAILED
76b6cee71dSXin LI #define MAP_FAILED (void *) -1
77b6cee71dSXin LI #endif
78b6cee71dSXin LI
79b6cee71dSXin LI #ifndef MAP_FILE
80b6cee71dSXin LI #define MAP_FILE 0
81b6cee71dSXin LI #endif
82b6cee71dSXin LI
8348c779cdSXin LI #define ALLOC_CHUNK CAST(size_t, 10)
8448c779cdSXin LI #define ALLOC_INCR CAST(size_t, 200)
85b6cee71dSXin LI
863e41d09dSXin LI #define MAP_TYPE_USER 0
87c2931133SXin LI #define MAP_TYPE_MALLOC 1
883e41d09dSXin LI #define MAP_TYPE_MMAP 2
89c2931133SXin LI
90b6cee71dSXin LI struct magic_entry {
91b6cee71dSXin LI struct magic *mp;
92b6cee71dSXin LI uint32_t cont_count;
93b6cee71dSXin LI uint32_t max_count;
94b6cee71dSXin LI };
95b6cee71dSXin LI
96b6cee71dSXin LI struct magic_entry_set {
97b6cee71dSXin LI struct magic_entry *me;
98b6cee71dSXin LI uint32_t count;
99b6cee71dSXin LI uint32_t max;
100b6cee71dSXin LI };
101b6cee71dSXin LI
102b6cee71dSXin LI struct magic_map {
103b6cee71dSXin LI void *p;
104b6cee71dSXin LI size_t len;
105c2931133SXin LI int type;
106b6cee71dSXin LI struct magic *magic[MAGIC_SETS];
107b6cee71dSXin LI uint32_t nmagic[MAGIC_SETS];
108b6cee71dSXin LI };
109b6cee71dSXin LI
110*ae316d1dSXin LI file_private int file_formats[FILE_NAMES_SIZE];
111*ae316d1dSXin LI file_private const size_t file_nformats = FILE_NAMES_SIZE;
112*ae316d1dSXin LI file_protected const char *file_names[FILE_NAMES_SIZE];
113*ae316d1dSXin LI file_protected const size_t file_nnames = FILE_NAMES_SIZE;
114b6cee71dSXin LI
115898496eeSXin LI file_private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
116898496eeSXin LI file_private int hextoint(int);
117898496eeSXin LI file_private const char *getstr(struct magic_set *, struct magic *, const char *,
118b6cee71dSXin LI int);
119898496eeSXin LI file_private int parse(struct magic_set *, struct magic_entry *, const char *,
120*ae316d1dSXin LI const char *, size_t, int);
121898496eeSXin LI file_private void eatsize(const char **);
122898496eeSXin LI file_private int apprentice_1(struct magic_set *, const char *, int);
123898496eeSXin LI file_private ssize_t apprentice_magic_strength_1(const struct magic *);
124898496eeSXin LI file_private int apprentice_sort(const void *, const void *);
125898496eeSXin LI file_private void apprentice_list(struct mlist *, int );
126898496eeSXin LI file_private struct magic_map *apprentice_load(struct magic_set *,
127b6cee71dSXin LI const char *, int);
128898496eeSXin LI file_private struct mlist *mlist_alloc(void);
129898496eeSXin LI file_private void mlist_free_all(struct magic_set *);
130898496eeSXin LI file_private void mlist_free(struct mlist *);
131898496eeSXin LI file_private void byteswap(struct magic *, uint32_t);
132898496eeSXin LI file_private void bs1(struct magic *);
133a4d6d3b8SXin LI
134a4d6d3b8SXin LI #if defined(HAVE_BYTESWAP_H)
135a4d6d3b8SXin LI #define swap2(x) bswap_16(x)
136a4d6d3b8SXin LI #define swap4(x) bswap_32(x)
137a4d6d3b8SXin LI #define swap8(x) bswap_64(x)
138a4d6d3b8SXin LI #elif defined(HAVE_SYS_BSWAP_H)
139a4d6d3b8SXin LI #define swap2(x) bswap16(x)
140a4d6d3b8SXin LI #define swap4(x) bswap32(x)
141a4d6d3b8SXin LI #define swap8(x) bswap64(x)
142a4d6d3b8SXin LI #else
143898496eeSXin LI file_private uint16_t swap2(uint16_t);
144898496eeSXin LI file_private uint32_t swap4(uint32_t);
145898496eeSXin LI file_private uint64_t swap8(uint64_t);
146a4d6d3b8SXin LI #endif
147a4d6d3b8SXin LI
148898496eeSXin LI file_private char *mkdbname(struct magic_set *, const char *, int);
149898496eeSXin LI file_private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
150c2931133SXin LI size_t);
151898496eeSXin LI file_private struct magic_map *apprentice_map(struct magic_set *, const char *);
152898496eeSXin LI file_private int check_buffer(struct magic_set *, struct magic_map *, const char *);
153898496eeSXin LI file_private void apprentice_unmap(struct magic_map *);
154898496eeSXin LI file_private int apprentice_compile(struct magic_set *, struct magic_map *,
155b6cee71dSXin LI const char *);
156898496eeSXin LI file_private int check_format_type(const char *, int, const char **);
157898496eeSXin LI file_private int check_format(struct magic_set *, struct magic *);
158898496eeSXin LI file_private int get_op(char);
159898496eeSXin LI file_private int parse_mime(struct magic_set *, struct magic_entry *, const char *,
1602726a701SXin LI size_t);
161898496eeSXin LI file_private int parse_strength(struct magic_set *, struct magic_entry *,
1622726a701SXin LI const char *, size_t);
163898496eeSXin LI file_private int parse_apple(struct magic_set *, struct magic_entry *, const char *,
1642726a701SXin LI size_t);
165898496eeSXin LI file_private int parse_ext(struct magic_set *, struct magic_entry *, const char *,
1662726a701SXin LI size_t);
167b6cee71dSXin LI
168b6cee71dSXin LI
169898496eeSXin LI file_private size_t magicsize = sizeof(struct magic);
170b6cee71dSXin LI
171898496eeSXin LI file_private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
172b6cee71dSXin LI
173898496eeSXin LI file_private struct {
174b6cee71dSXin LI const char *name;
175b6cee71dSXin LI size_t len;
1762726a701SXin LI int (*fun)(struct magic_set *, struct magic_entry *, const char *,
1772726a701SXin LI size_t);
178b6cee71dSXin LI } bang[] = {
179b6cee71dSXin LI #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
180b6cee71dSXin LI DECLARE_FIELD(mime),
181b6cee71dSXin LI DECLARE_FIELD(apple),
1825f0216bdSXin LI DECLARE_FIELD(ext),
183b6cee71dSXin LI DECLARE_FIELD(strength),
184b6cee71dSXin LI #undef DECLARE_FIELD
185b6cee71dSXin LI { NULL, 0, NULL }
186b6cee71dSXin LI };
187b6cee71dSXin LI
188b6cee71dSXin LI #ifdef COMPILE_ONLY
189b6cee71dSXin LI
190b6cee71dSXin LI int main(int, char *[]);
191b6cee71dSXin LI
192b6cee71dSXin LI int
main(int argc,char * argv[])193b6cee71dSXin LI main(int argc, char *argv[])
194b6cee71dSXin LI {
195b6cee71dSXin LI int ret;
196b6cee71dSXin LI struct magic_set *ms;
197b6cee71dSXin LI char *progname;
198b6cee71dSXin LI
199b6cee71dSXin LI if ((progname = strrchr(argv[0], '/')) != NULL)
200b6cee71dSXin LI progname++;
201b6cee71dSXin LI else
202b6cee71dSXin LI progname = argv[0];
203b6cee71dSXin LI
204b6cee71dSXin LI if (argc != 2) {
205b6cee71dSXin LI (void)fprintf(stderr, "Usage: %s file\n", progname);
206b6cee71dSXin LI return 1;
207b6cee71dSXin LI }
208b6cee71dSXin LI
209b6cee71dSXin LI if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
210b6cee71dSXin LI (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
211b6cee71dSXin LI return 1;
212b6cee71dSXin LI }
213b6cee71dSXin LI ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
214b6cee71dSXin LI if (ret == 1)
215b6cee71dSXin LI (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
216b6cee71dSXin LI magic_close(ms);
217b6cee71dSXin LI return ret;
218b6cee71dSXin LI }
219b6cee71dSXin LI #endif /* COMPILE_ONLY */
220b6cee71dSXin LI
221b6cee71dSXin LI struct type_tbl_s {
222b6cee71dSXin LI const char name[16];
223b6cee71dSXin LI const size_t len;
224b6cee71dSXin LI const int type;
225b6cee71dSXin LI const int format;
226b6cee71dSXin LI };
227b6cee71dSXin LI
228b6cee71dSXin LI /*
229b6cee71dSXin LI * XXX - the actual Single UNIX Specification says that "long" means "long",
230b6cee71dSXin LI * as in the C data type, but we treat it as meaning "4-byte integer".
231b6cee71dSXin LI * Given that the OS X version of file 5.04 did the same, I guess that passes
232b6cee71dSXin LI * the actual test; having "long" be dependent on how big a "long" is on
233b6cee71dSXin LI * the machine running "file" is silly.
234b6cee71dSXin LI */
235b6cee71dSXin LI static const struct type_tbl_s type_tbl[] = {
236b6cee71dSXin LI # define XX(s) s, (sizeof(s) - 1)
237b6cee71dSXin LI # define XX_NULL "", 0
238b6cee71dSXin LI { XX("invalid"), FILE_INVALID, FILE_FMT_NONE },
239b6cee71dSXin LI { XX("byte"), FILE_BYTE, FILE_FMT_NUM },
240b6cee71dSXin LI { XX("short"), FILE_SHORT, FILE_FMT_NUM },
241b6cee71dSXin LI { XX("default"), FILE_DEFAULT, FILE_FMT_NONE },
242b6cee71dSXin LI { XX("long"), FILE_LONG, FILE_FMT_NUM },
243b6cee71dSXin LI { XX("string"), FILE_STRING, FILE_FMT_STR },
244b6cee71dSXin LI { XX("date"), FILE_DATE, FILE_FMT_STR },
245b6cee71dSXin LI { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM },
246b6cee71dSXin LI { XX("belong"), FILE_BELONG, FILE_FMT_NUM },
247b6cee71dSXin LI { XX("bedate"), FILE_BEDATE, FILE_FMT_STR },
248b6cee71dSXin LI { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM },
249b6cee71dSXin LI { XX("lelong"), FILE_LELONG, FILE_FMT_NUM },
250b6cee71dSXin LI { XX("ledate"), FILE_LEDATE, FILE_FMT_STR },
251b6cee71dSXin LI { XX("pstring"), FILE_PSTRING, FILE_FMT_STR },
252b6cee71dSXin LI { XX("ldate"), FILE_LDATE, FILE_FMT_STR },
253b6cee71dSXin LI { XX("beldate"), FILE_BELDATE, FILE_FMT_STR },
254b6cee71dSXin LI { XX("leldate"), FILE_LELDATE, FILE_FMT_STR },
255b6cee71dSXin LI { XX("regex"), FILE_REGEX, FILE_FMT_STR },
256b6cee71dSXin LI { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR },
257b6cee71dSXin LI { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR },
258b6cee71dSXin LI { XX("search"), FILE_SEARCH, FILE_FMT_STR },
259b6cee71dSXin LI { XX("medate"), FILE_MEDATE, FILE_FMT_STR },
260b6cee71dSXin LI { XX("meldate"), FILE_MELDATE, FILE_FMT_STR },
261b6cee71dSXin LI { XX("melong"), FILE_MELONG, FILE_FMT_NUM },
262b6cee71dSXin LI { XX("quad"), FILE_QUAD, FILE_FMT_QUAD },
263b6cee71dSXin LI { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD },
264b6cee71dSXin LI { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD },
265b6cee71dSXin LI { XX("qdate"), FILE_QDATE, FILE_FMT_STR },
266b6cee71dSXin LI { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR },
267b6cee71dSXin LI { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR },
268b6cee71dSXin LI { XX("qldate"), FILE_QLDATE, FILE_FMT_STR },
269b6cee71dSXin LI { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR },
270b6cee71dSXin LI { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR },
271b6cee71dSXin LI { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT },
272b6cee71dSXin LI { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT },
273b6cee71dSXin LI { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT },
274b6cee71dSXin LI { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE },
275b6cee71dSXin LI { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE },
276b6cee71dSXin LI { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE },
277b6cee71dSXin LI { XX("leid3"), FILE_LEID3, FILE_FMT_NUM },
278b6cee71dSXin LI { XX("beid3"), FILE_BEID3, FILE_FMT_NUM },
279b6cee71dSXin LI { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM },
280b6cee71dSXin LI { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR },
281b6cee71dSXin LI { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR },
282b6cee71dSXin LI { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR },
283b6cee71dSXin LI { XX("name"), FILE_NAME, FILE_FMT_NONE },
284b6cee71dSXin LI { XX("use"), FILE_USE, FILE_FMT_NONE },
285b6cee71dSXin LI { XX("clear"), FILE_CLEAR, FILE_FMT_NONE },
2863e41d09dSXin LI { XX("der"), FILE_DER, FILE_FMT_STR },
2872726a701SXin LI { XX("guid"), FILE_GUID, FILE_FMT_STR },
2882726a701SXin LI { XX("offset"), FILE_OFFSET, FILE_FMT_QUAD },
28943a5ec4eSXin LI { XX("bevarint"), FILE_BEVARINT, FILE_FMT_STR },
29043a5ec4eSXin LI { XX("levarint"), FILE_LEVARINT, FILE_FMT_STR },
291a4d6d3b8SXin LI { XX("msdosdate"), FILE_MSDOSDATE, FILE_FMT_STR },
292a4d6d3b8SXin LI { XX("lemsdosdate"), FILE_LEMSDOSDATE, FILE_FMT_STR },
293a4d6d3b8SXin LI { XX("bemsdosdate"), FILE_BEMSDOSDATE, FILE_FMT_STR },
294a4d6d3b8SXin LI { XX("msdostime"), FILE_MSDOSTIME, FILE_FMT_STR },
295a4d6d3b8SXin LI { XX("lemsdostime"), FILE_LEMSDOSTIME, FILE_FMT_STR },
296a4d6d3b8SXin LI { XX("bemsdostime"), FILE_BEMSDOSTIME, FILE_FMT_STR },
297a2dfb722SXin LI { XX("octal"), FILE_OCTAL, FILE_FMT_STR },
298b6cee71dSXin LI { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
299b6cee71dSXin LI };
300b6cee71dSXin LI
301b6cee71dSXin LI /*
302b6cee71dSXin LI * These are not types, and cannot be preceded by "u" to make them
303b6cee71dSXin LI * unsigned.
304b6cee71dSXin LI */
305b6cee71dSXin LI static const struct type_tbl_s special_tbl[] = {
3063e41d09dSXin LI { XX("der"), FILE_DER, FILE_FMT_STR },
307b6cee71dSXin LI { XX("name"), FILE_NAME, FILE_FMT_STR },
308b6cee71dSXin LI { XX("use"), FILE_USE, FILE_FMT_STR },
309a2dfb722SXin LI { XX("octal"), FILE_OCTAL, FILE_FMT_STR },
310b6cee71dSXin LI { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
311b6cee71dSXin LI };
312b6cee71dSXin LI # undef XX
313b6cee71dSXin LI # undef XX_NULL
314b6cee71dSXin LI
315898496eeSXin LI file_private int
get_type(const struct type_tbl_s * tbl,const char * l,const char ** t)316b6cee71dSXin LI get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
317b6cee71dSXin LI {
318b6cee71dSXin LI const struct type_tbl_s *p;
319b6cee71dSXin LI
320b6cee71dSXin LI for (p = tbl; p->len; p++) {
321b6cee71dSXin LI if (strncmp(l, p->name, p->len) == 0) {
322b6cee71dSXin LI if (t)
323b6cee71dSXin LI *t = l + p->len;
324b6cee71dSXin LI break;
325b6cee71dSXin LI }
326b6cee71dSXin LI }
327b6cee71dSXin LI return p->type;
328b6cee71dSXin LI }
329b6cee71dSXin LI
330898496eeSXin LI file_private off_t
maxoff_t(void)33148c779cdSXin LI maxoff_t(void) {
33248c779cdSXin LI if (/*CONSTCOND*/sizeof(off_t) == sizeof(int))
33348c779cdSXin LI return CAST(off_t, INT_MAX);
33448c779cdSXin LI if (/*CONSTCOND*/sizeof(off_t) == sizeof(long))
33548c779cdSXin LI return CAST(off_t, LONG_MAX);
33648c779cdSXin LI return 0x7fffffff;
33748c779cdSXin LI }
33848c779cdSXin LI
339898496eeSXin LI file_private int
get_standard_integer_type(const char * l,const char ** t)340b6cee71dSXin LI get_standard_integer_type(const char *l, const char **t)
341b6cee71dSXin LI {
342b6cee71dSXin LI int type;
343b6cee71dSXin LI
34448c779cdSXin LI if (isalpha(CAST(unsigned char, l[1]))) {
345b6cee71dSXin LI switch (l[1]) {
346b6cee71dSXin LI case 'C':
347b6cee71dSXin LI /* "dC" and "uC" */
348b6cee71dSXin LI type = FILE_BYTE;
349b6cee71dSXin LI break;
350b6cee71dSXin LI case 'S':
351b6cee71dSXin LI /* "dS" and "uS" */
352b6cee71dSXin LI type = FILE_SHORT;
353b6cee71dSXin LI break;
354b6cee71dSXin LI case 'I':
355b6cee71dSXin LI case 'L':
356b6cee71dSXin LI /*
357b6cee71dSXin LI * "dI", "dL", "uI", and "uL".
358b6cee71dSXin LI *
359b6cee71dSXin LI * XXX - the actual Single UNIX Specification says
360b6cee71dSXin LI * that "L" means "long", as in the C data type,
361b6cee71dSXin LI * but we treat it as meaning "4-byte integer".
362b6cee71dSXin LI * Given that the OS X version of file 5.04 did
363b6cee71dSXin LI * the same, I guess that passes the actual SUS
364b6cee71dSXin LI * validation suite; having "dL" be dependent on
365b6cee71dSXin LI * how big a "long" is on the machine running
366b6cee71dSXin LI * "file" is silly.
367b6cee71dSXin LI */
368b6cee71dSXin LI type = FILE_LONG;
369b6cee71dSXin LI break;
370b6cee71dSXin LI case 'Q':
371b6cee71dSXin LI /* "dQ" and "uQ" */
372b6cee71dSXin LI type = FILE_QUAD;
373b6cee71dSXin LI break;
374b6cee71dSXin LI default:
375b6cee71dSXin LI /* "d{anything else}", "u{anything else}" */
376b6cee71dSXin LI return FILE_INVALID;
377b6cee71dSXin LI }
378b6cee71dSXin LI l += 2;
37948c779cdSXin LI } else if (isdigit(CAST(unsigned char, l[1]))) {
380b6cee71dSXin LI /*
381b6cee71dSXin LI * "d{num}" and "u{num}"; we only support {num} values
382b6cee71dSXin LI * of 1, 2, 4, and 8 - the Single UNIX Specification
383b6cee71dSXin LI * doesn't say anything about whether arbitrary
384b6cee71dSXin LI * values should be supported, but both the Solaris 10
385b6cee71dSXin LI * and OS X Mountain Lion versions of file passed the
386b6cee71dSXin LI * Single UNIX Specification validation suite, and
387b6cee71dSXin LI * neither of them support values bigger than 8 or
388b6cee71dSXin LI * non-power-of-2 values.
389b6cee71dSXin LI */
39048c779cdSXin LI if (isdigit(CAST(unsigned char, l[2]))) {
391b6cee71dSXin LI /* Multi-digit, so > 9 */
392b6cee71dSXin LI return FILE_INVALID;
393b6cee71dSXin LI }
394b6cee71dSXin LI switch (l[1]) {
395b6cee71dSXin LI case '1':
396b6cee71dSXin LI type = FILE_BYTE;
397b6cee71dSXin LI break;
398b6cee71dSXin LI case '2':
399b6cee71dSXin LI type = FILE_SHORT;
400b6cee71dSXin LI break;
401b6cee71dSXin LI case '4':
402b6cee71dSXin LI type = FILE_LONG;
403b6cee71dSXin LI break;
404b6cee71dSXin LI case '8':
405b6cee71dSXin LI type = FILE_QUAD;
406b6cee71dSXin LI break;
407b6cee71dSXin LI default:
408b6cee71dSXin LI /* XXX - what about 3, 5, 6, or 7? */
409b6cee71dSXin LI return FILE_INVALID;
410b6cee71dSXin LI }
411b6cee71dSXin LI l += 2;
412b6cee71dSXin LI } else {
413b6cee71dSXin LI /*
414b6cee71dSXin LI * "d" or "u" by itself.
415b6cee71dSXin LI */
416b6cee71dSXin LI type = FILE_LONG;
417b6cee71dSXin LI ++l;
418b6cee71dSXin LI }
419b6cee71dSXin LI if (t)
420b6cee71dSXin LI *t = l;
421b6cee71dSXin LI return type;
422b6cee71dSXin LI }
423b6cee71dSXin LI
424898496eeSXin LI file_private void
init_file_tables(void)425b6cee71dSXin LI init_file_tables(void)
426b6cee71dSXin LI {
427b6cee71dSXin LI static int done = 0;
428b6cee71dSXin LI const struct type_tbl_s *p;
429b6cee71dSXin LI
430b6cee71dSXin LI if (done)
431b6cee71dSXin LI return;
432b6cee71dSXin LI done++;
433b6cee71dSXin LI
434b6cee71dSXin LI for (p = type_tbl; p->len; p++) {
435b6cee71dSXin LI assert(p->type < FILE_NAMES_SIZE);
436b6cee71dSXin LI file_names[p->type] = p->name;
437b6cee71dSXin LI file_formats[p->type] = p->format;
438b6cee71dSXin LI }
439b6cee71dSXin LI assert(p - type_tbl == FILE_NAMES_SIZE);
440b6cee71dSXin LI }
441b6cee71dSXin LI
442898496eeSXin LI file_private int
add_mlist(struct mlist * mlp,struct magic_map * map,size_t idx)443b6cee71dSXin LI add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
444b6cee71dSXin LI {
445b6cee71dSXin LI struct mlist *ml;
446b6cee71dSXin LI
447a5d223e6SXin LI mlp->map = NULL;
448b6cee71dSXin LI if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL)
449b6cee71dSXin LI return -1;
450b6cee71dSXin LI
451a5d223e6SXin LI ml->map = idx == 0 ? map : NULL;
452b6cee71dSXin LI ml->magic = map->magic[idx];
453b6cee71dSXin LI ml->nmagic = map->nmagic[idx];
454a4d6d3b8SXin LI if (ml->nmagic) {
455a4d6d3b8SXin LI ml->magic_rxcomp = CAST(file_regex_t **,
456a4d6d3b8SXin LI calloc(ml->nmagic, sizeof(*ml->magic_rxcomp)));
457a4d6d3b8SXin LI if (ml->magic_rxcomp == NULL) {
458a4d6d3b8SXin LI free(ml);
459a4d6d3b8SXin LI return -1;
460a4d6d3b8SXin LI }
461a4d6d3b8SXin LI } else
462a4d6d3b8SXin LI ml->magic_rxcomp = NULL;
463b6cee71dSXin LI mlp->prev->next = ml;
464b6cee71dSXin LI ml->prev = mlp->prev;
465b6cee71dSXin LI ml->next = mlp;
466b6cee71dSXin LI mlp->prev = ml;
467b6cee71dSXin LI return 0;
468b6cee71dSXin LI }
469b6cee71dSXin LI
470b6cee71dSXin LI /*
471b6cee71dSXin LI * Handle one file or directory.
472b6cee71dSXin LI */
473898496eeSXin LI file_private int
apprentice_1(struct magic_set * ms,const char * fn,int action)474b6cee71dSXin LI apprentice_1(struct magic_set *ms, const char *fn, int action)
475b6cee71dSXin LI {
476b6cee71dSXin LI struct magic_map *map;
477b6cee71dSXin LI #ifndef COMPILE_ONLY
478b6cee71dSXin LI size_t i;
479c2931133SXin LI #endif
480b6cee71dSXin LI
481b6cee71dSXin LI if (magicsize != FILE_MAGICSIZE) {
482b6cee71dSXin LI file_error(ms, 0, "magic element size %lu != %lu",
48348c779cdSXin LI CAST(unsigned long, sizeof(*map->magic[0])),
48448c779cdSXin LI CAST(unsigned long, FILE_MAGICSIZE));
485b6cee71dSXin LI return -1;
486b6cee71dSXin LI }
487b6cee71dSXin LI
488b6cee71dSXin LI if (action == FILE_COMPILE) {
489b6cee71dSXin LI map = apprentice_load(ms, fn, action);
490b6cee71dSXin LI if (map == NULL)
491b6cee71dSXin LI return -1;
492b6cee71dSXin LI return apprentice_compile(ms, map, fn);
493b6cee71dSXin LI }
494b6cee71dSXin LI
495b6cee71dSXin LI #ifndef COMPILE_ONLY
496b6cee71dSXin LI map = apprentice_map(ms, fn);
497b6cee71dSXin LI if (map == NULL) {
498b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
499*ae316d1dSXin LI file_magwarn(ms, "using regular magic file `%s'", fn);
500b6cee71dSXin LI map = apprentice_load(ms, fn, action);
501b6cee71dSXin LI if (map == NULL)
502b6cee71dSXin LI return -1;
503b6cee71dSXin LI }
504b6cee71dSXin LI
505b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) {
506b6cee71dSXin LI if (add_mlist(ms->mlist[i], map, i) == -1) {
5072726a701SXin LI /* failed to add to any list, free explicitly */
5082726a701SXin LI if (i == 0)
5092726a701SXin LI apprentice_unmap(map);
5102726a701SXin LI else
5112726a701SXin LI mlist_free_all(ms);
512898496eeSXin LI file_oomem(ms, sizeof(*ms->mlist[0]));
513a5d223e6SXin LI return -1;
514b6cee71dSXin LI }
515b6cee71dSXin LI }
516b6cee71dSXin LI
517b6cee71dSXin LI if (action == FILE_LIST) {
518b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) {
519c2931133SXin LI printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
520c2931133SXin LI i);
521b6cee71dSXin LI apprentice_list(ms->mlist[i], BINTEST);
522b6cee71dSXin LI printf("Text patterns:\n");
523b6cee71dSXin LI apprentice_list(ms->mlist[i], TEXTTEST);
524b6cee71dSXin LI }
525b6cee71dSXin LI }
526b6cee71dSXin LI return 0;
527c2931133SXin LI #else
528c2931133SXin LI return 0;
529c2931133SXin LI #endif /* COMPILE_ONLY */
530b6cee71dSXin LI }
531b6cee71dSXin LI
532898496eeSXin LI file_protected void
file_ms_free(struct magic_set * ms)533b6cee71dSXin LI file_ms_free(struct magic_set *ms)
534b6cee71dSXin LI {
535b6cee71dSXin LI size_t i;
536b6cee71dSXin LI if (ms == NULL)
537b6cee71dSXin LI return;
538b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++)
539b6cee71dSXin LI mlist_free(ms->mlist[i]);
540b6cee71dSXin LI free(ms->o.pbuf);
541b6cee71dSXin LI free(ms->o.buf);
542b6cee71dSXin LI free(ms->c.li);
543*ae316d1dSXin LI free(ms->fnamebuf);
544a4d6d3b8SXin LI #ifdef USE_C_LOCALE
545a4d6d3b8SXin LI freelocale(ms->c_lc_ctype);
546a4d6d3b8SXin LI #endif
547b6cee71dSXin LI free(ms);
548b6cee71dSXin LI }
549b6cee71dSXin LI
550898496eeSXin LI file_protected struct magic_set *
file_ms_alloc(int flags)551b6cee71dSXin LI file_ms_alloc(int flags)
552b6cee71dSXin LI {
553b6cee71dSXin LI struct magic_set *ms;
554b6cee71dSXin LI size_t i, len;
555b6cee71dSXin LI
55648c779cdSXin LI if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u),
557898496eeSXin LI sizeof(*ms)))) == NULL)
558b6cee71dSXin LI return NULL;
559b6cee71dSXin LI
560b6cee71dSXin LI if (magic_setflags(ms, flags) == -1) {
561b6cee71dSXin LI errno = EINVAL;
562b6cee71dSXin LI goto free;
563b6cee71dSXin LI }
564b6cee71dSXin LI
565b6cee71dSXin LI ms->o.buf = ms->o.pbuf = NULL;
5662726a701SXin LI ms->o.blen = 0;
567b6cee71dSXin LI len = (ms->c.len = 10) * sizeof(*ms->c.li);
568b6cee71dSXin LI
569b6cee71dSXin LI if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
570b6cee71dSXin LI goto free;
571b6cee71dSXin LI
572b6cee71dSXin LI ms->event_flags = 0;
573b6cee71dSXin LI ms->error = -1;
574b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++)
575b6cee71dSXin LI ms->mlist[i] = NULL;
576*ae316d1dSXin LI ms->fnamebuf = NULL;
577b6cee71dSXin LI ms->file = "unknown";
578b6cee71dSXin LI ms->line = 0;
579*ae316d1dSXin LI ms->magwarn = 0;
580c2931133SXin LI ms->indir_max = FILE_INDIR_MAX;
581c2931133SXin LI ms->name_max = FILE_NAME_MAX;
582c2931133SXin LI ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
583898496eeSXin LI ms->elf_shsize_max = FILE_ELF_SHSIZE_MAX;
584c2931133SXin LI ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
5854460e5b0SXin LI ms->elf_notes_max = FILE_ELF_NOTES_MAX;
5869ce06829SXin LI ms->regex_max = FILE_REGEX_MAX;
5873e41d09dSXin LI ms->bytes_max = FILE_BYTES_MAX;
58843a5ec4eSXin LI ms->encoding_max = FILE_ENCODING_MAX;
589*ae316d1dSXin LI ms->magwarn_max = FILE_MAGWARN_MAX;
590a4d6d3b8SXin LI #ifdef USE_C_LOCALE
591a4d6d3b8SXin LI ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
592a4d6d3b8SXin LI assert(ms->c_lc_ctype != NULL);
593a4d6d3b8SXin LI #endif
594b6cee71dSXin LI return ms;
595b6cee71dSXin LI free:
596b6cee71dSXin LI free(ms);
597b6cee71dSXin LI return NULL;
598b6cee71dSXin LI }
599b6cee71dSXin LI
600898496eeSXin LI file_private void
apprentice_unmap(struct magic_map * map)601b6cee71dSXin LI apprentice_unmap(struct magic_map *map)
602b6cee71dSXin LI {
6039ce06829SXin LI size_t i;
60443a5ec4eSXin LI char *p;
605b6cee71dSXin LI if (map == NULL)
606b6cee71dSXin LI return;
607c2931133SXin LI
608c2931133SXin LI switch (map->type) {
6093e41d09dSXin LI case MAP_TYPE_USER:
6103e41d09dSXin LI break;
6113e41d09dSXin LI case MAP_TYPE_MALLOC:
61243a5ec4eSXin LI p = CAST(char *, map->p);
6133e41d09dSXin LI for (i = 0; i < MAGIC_SETS; i++) {
61443a5ec4eSXin LI char *b = RCAST(char *, map->magic[i]);
61543a5ec4eSXin LI if (p != NULL && b >= p && b <= p + map->len)
6163e41d09dSXin LI continue;
61743a5ec4eSXin LI free(b);
6183e41d09dSXin LI }
61943a5ec4eSXin LI free(p);
6203e41d09dSXin LI break;
621b6cee71dSXin LI #ifdef QUICK
622c2931133SXin LI case MAP_TYPE_MMAP:
6233e41d09dSXin LI if (map->p && map->p != MAP_FAILED)
624b6cee71dSXin LI (void)munmap(map->p, map->len);
625c2931133SXin LI break;
626b6cee71dSXin LI #endif
627c2931133SXin LI default:
628a4d6d3b8SXin LI fprintf(stderr, "Bad map type %d", map->type);
629c2931133SXin LI abort();
630b6cee71dSXin LI }
631b6cee71dSXin LI free(map);
632b6cee71dSXin LI }
633b6cee71dSXin LI
634898496eeSXin LI file_private struct mlist *
mlist_alloc(void)635b6cee71dSXin LI mlist_alloc(void)
636b6cee71dSXin LI {
637b6cee71dSXin LI struct mlist *mlist;
638b6cee71dSXin LI if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) {
639b6cee71dSXin LI return NULL;
640b6cee71dSXin LI }
641b6cee71dSXin LI mlist->next = mlist->prev = mlist;
642b6cee71dSXin LI return mlist;
643b6cee71dSXin LI }
644b6cee71dSXin LI
645898496eeSXin LI file_private void
mlist_free_all(struct magic_set * ms)6462726a701SXin LI mlist_free_all(struct magic_set *ms)
6472726a701SXin LI {
6482726a701SXin LI size_t i;
6492726a701SXin LI
6502726a701SXin LI for (i = 0; i < MAGIC_SETS; i++) {
6512726a701SXin LI mlist_free(ms->mlist[i]);
6522726a701SXin LI ms->mlist[i] = NULL;
6532726a701SXin LI }
6542726a701SXin LI }
6552726a701SXin LI
656898496eeSXin LI file_private void
mlist_free_one(struct mlist * ml)65748c779cdSXin LI mlist_free_one(struct mlist *ml)
65848c779cdSXin LI {
659a4d6d3b8SXin LI size_t i;
660a4d6d3b8SXin LI
66148c779cdSXin LI if (ml->map)
66248c779cdSXin LI apprentice_unmap(CAST(struct magic_map *, ml->map));
663a4d6d3b8SXin LI
664a4d6d3b8SXin LI for (i = 0; i < ml->nmagic; ++i) {
665a4d6d3b8SXin LI if (ml->magic_rxcomp[i]) {
666a4d6d3b8SXin LI file_regfree(ml->magic_rxcomp[i]);
667a4d6d3b8SXin LI free(ml->magic_rxcomp[i]);
668a4d6d3b8SXin LI ml->magic_rxcomp[i] = NULL;
669a4d6d3b8SXin LI }
670a4d6d3b8SXin LI }
671a4d6d3b8SXin LI free(ml->magic_rxcomp);
672a4d6d3b8SXin LI ml->magic_rxcomp = NULL;
67348c779cdSXin LI free(ml);
67448c779cdSXin LI }
67548c779cdSXin LI
676898496eeSXin LI file_private void
mlist_free(struct mlist * mlist)677b6cee71dSXin LI mlist_free(struct mlist *mlist)
678b6cee71dSXin LI {
679c2931133SXin LI struct mlist *ml, *next;
680b6cee71dSXin LI
681b6cee71dSXin LI if (mlist == NULL)
682b6cee71dSXin LI return;
683b6cee71dSXin LI
6842726a701SXin LI for (ml = mlist->next; ml != mlist;) {
68548c779cdSXin LI next = ml->next;
68648c779cdSXin LI mlist_free_one(ml);
6872726a701SXin LI ml = next;
688b6cee71dSXin LI }
68948c779cdSXin LI mlist_free_one(mlist);
690b6cee71dSXin LI }
691b6cee71dSXin LI
692c2931133SXin LI #ifndef COMPILE_ONLY
693c2931133SXin LI /* void **bufs: an array of compiled magic files */
694898496eeSXin LI file_protected int
buffer_apprentice(struct magic_set * ms,struct magic ** bufs,size_t * sizes,size_t nbufs)695c2931133SXin LI buffer_apprentice(struct magic_set *ms, struct magic **bufs,
696c2931133SXin LI size_t *sizes, size_t nbufs)
697c2931133SXin LI {
698c2931133SXin LI size_t i, j;
699c2931133SXin LI struct magic_map *map;
700c2931133SXin LI
701c2931133SXin LI if (nbufs == 0)
702c2931133SXin LI return -1;
703c2931133SXin LI
70440427ccaSGordon Tetlow (void)file_reset(ms, 0);
705c2931133SXin LI
706c2931133SXin LI init_file_tables();
707c2931133SXin LI
708c2931133SXin LI for (i = 0; i < MAGIC_SETS; i++) {
709c2931133SXin LI mlist_free(ms->mlist[i]);
710c2931133SXin LI if ((ms->mlist[i] = mlist_alloc()) == NULL) {
711898496eeSXin LI file_oomem(ms, sizeof(*ms->mlist[0]));
712c2931133SXin LI goto fail;
713c2931133SXin LI }
714c2931133SXin LI }
715c2931133SXin LI
716c2931133SXin LI for (i = 0; i < nbufs; i++) {
717c2931133SXin LI map = apprentice_buf(ms, bufs[i], sizes[i]);
718c2931133SXin LI if (map == NULL)
719c2931133SXin LI goto fail;
720c2931133SXin LI
721c2931133SXin LI for (j = 0; j < MAGIC_SETS; j++) {
722c2931133SXin LI if (add_mlist(ms->mlist[j], map, j) == -1) {
723898496eeSXin LI file_oomem(ms, sizeof(*ms->mlist[0]));
724c2931133SXin LI goto fail;
725c2931133SXin LI }
726c2931133SXin LI }
727c2931133SXin LI }
728c2931133SXin LI
729c2931133SXin LI return 0;
730c2931133SXin LI fail:
7312726a701SXin LI mlist_free_all(ms);
732c2931133SXin LI return -1;
733c2931133SXin LI }
734c2931133SXin LI #endif
735c2931133SXin LI
736b6cee71dSXin LI /* const char *fn: list of magic files and directories */
737898496eeSXin LI file_protected int
file_apprentice(struct magic_set * ms,const char * fn,int action)738b6cee71dSXin LI file_apprentice(struct magic_set *ms, const char *fn, int action)
739b6cee71dSXin LI {
740*ae316d1dSXin LI char *p;
74158a0f0d0SEitan Adler int fileerr, errs = -1;
7422726a701SXin LI size_t i, j;
743b6cee71dSXin LI
74440427ccaSGordon Tetlow (void)file_reset(ms, 0);
745b6cee71dSXin LI
746b6cee71dSXin LI if ((fn = magic_getpath(fn, action)) == NULL)
747b6cee71dSXin LI return -1;
748b6cee71dSXin LI
749b6cee71dSXin LI init_file_tables();
750b6cee71dSXin LI
751*ae316d1dSXin LI free(ms->fnamebuf);
752*ae316d1dSXin LI if ((ms->fnamebuf = strdup(fn)) == NULL) {
753b6cee71dSXin LI file_oomem(ms, strlen(fn));
754b6cee71dSXin LI return -1;
755b6cee71dSXin LI }
756b6cee71dSXin LI
757b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) {
758b6cee71dSXin LI mlist_free(ms->mlist[i]);
759b6cee71dSXin LI if ((ms->mlist[i] = mlist_alloc()) == NULL) {
760898496eeSXin LI file_oomem(ms, sizeof(*ms->mlist[0]));
7612726a701SXin LI for (j = 0; j < i; j++) {
7622726a701SXin LI mlist_free(ms->mlist[j]);
7632726a701SXin LI ms->mlist[j] = NULL;
764b6cee71dSXin LI }
765b6cee71dSXin LI return -1;
766b6cee71dSXin LI }
767b6cee71dSXin LI }
768*ae316d1dSXin LI fn = ms->fnamebuf;
769b6cee71dSXin LI
770b6cee71dSXin LI while (fn) {
771a4d6d3b8SXin LI p = CCAST(char *, strchr(fn, PATHSEP));
772b6cee71dSXin LI if (p)
773b6cee71dSXin LI *p++ = '\0';
774b6cee71dSXin LI if (*fn == '\0')
775b6cee71dSXin LI break;
77658a0f0d0SEitan Adler fileerr = apprentice_1(ms, fn, action);
77758a0f0d0SEitan Adler errs = MAX(errs, fileerr);
778b6cee71dSXin LI fn = p;
779b6cee71dSXin LI }
780b6cee71dSXin LI
781b6cee71dSXin LI if (errs == -1) {
782b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) {
783b6cee71dSXin LI mlist_free(ms->mlist[i]);
784b6cee71dSXin LI ms->mlist[i] = NULL;
785b6cee71dSXin LI }
786b6cee71dSXin LI file_error(ms, 0, "could not find any valid magic files!");
787b6cee71dSXin LI return -1;
788b6cee71dSXin LI }
789b6cee71dSXin LI
790b6cee71dSXin LI #if 0
791b6cee71dSXin LI /*
792b6cee71dSXin LI * Always leave the database loaded
793b6cee71dSXin LI */
794b6cee71dSXin LI if (action == FILE_LOAD)
795b6cee71dSXin LI return 0;
796b6cee71dSXin LI
797b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) {
798b6cee71dSXin LI mlist_free(ms->mlist[i]);
799b6cee71dSXin LI ms->mlist[i] = NULL;
800b6cee71dSXin LI }
801b6cee71dSXin LI #endif
802b6cee71dSXin LI
803b6cee71dSXin LI switch (action) {
804b6cee71dSXin LI case FILE_LOAD:
805b6cee71dSXin LI case FILE_COMPILE:
806b6cee71dSXin LI case FILE_CHECK:
807b6cee71dSXin LI case FILE_LIST:
808b6cee71dSXin LI return 0;
809b6cee71dSXin LI default:
810b6cee71dSXin LI file_error(ms, 0, "Invalid action %d", action);
811b6cee71dSXin LI return -1;
812b6cee71dSXin LI }
813b6cee71dSXin LI }
814b6cee71dSXin LI
815b6cee71dSXin LI /*
816b6cee71dSXin LI * Compute the real length of a magic expression, for the purposes
817b6cee71dSXin LI * of determining how "strong" a magic expression is (approximating
818b6cee71dSXin LI * how specific its matches are):
819b6cee71dSXin LI * - magic characters count 0 unless escaped.
820b6cee71dSXin LI * - [] expressions count 1
821b6cee71dSXin LI * - {} expressions count 0
822b6cee71dSXin LI * - regular characters or escaped magic characters count 1
823b6cee71dSXin LI * - 0 length expressions count as one
824b6cee71dSXin LI */
825898496eeSXin LI file_private size_t
nonmagic(const char * str)826b6cee71dSXin LI nonmagic(const char *str)
827b6cee71dSXin LI {
828b6cee71dSXin LI const char *p;
829b6cee71dSXin LI size_t rv = 0;
830b6cee71dSXin LI
831b6cee71dSXin LI for (p = str; *p; p++)
832b6cee71dSXin LI switch (*p) {
833b6cee71dSXin LI case '\\': /* Escaped anything counts 1 */
834b6cee71dSXin LI if (!*++p)
835b6cee71dSXin LI p--;
836b6cee71dSXin LI rv++;
837b6cee71dSXin LI continue;
838b6cee71dSXin LI case '?': /* Magic characters count 0 */
839b6cee71dSXin LI case '*':
840b6cee71dSXin LI case '.':
841b6cee71dSXin LI case '+':
842b6cee71dSXin LI case '^':
843b6cee71dSXin LI case '$':
844b6cee71dSXin LI continue;
845b6cee71dSXin LI case '[': /* Bracketed expressions count 1 the ']' */
846b6cee71dSXin LI while (*p && *p != ']')
847b6cee71dSXin LI p++;
848b6cee71dSXin LI p--;
849b6cee71dSXin LI continue;
850b6cee71dSXin LI case '{': /* Braced expressions count 0 */
851b6cee71dSXin LI while (*p && *p != '}')
852b6cee71dSXin LI p++;
853b6cee71dSXin LI if (!*p)
854b6cee71dSXin LI p--;
855b6cee71dSXin LI continue;
856b6cee71dSXin LI default: /* Anything else counts 1 */
857b6cee71dSXin LI rv++;
858b6cee71dSXin LI continue;
859b6cee71dSXin LI }
860b6cee71dSXin LI
861b6cee71dSXin LI return rv == 0 ? 1 : rv; /* Return at least 1 */
862b6cee71dSXin LI }
863b6cee71dSXin LI
86440427ccaSGordon Tetlow
865898496eeSXin LI file_private size_t
typesize(int type)86640427ccaSGordon Tetlow typesize(int type)
86740427ccaSGordon Tetlow {
86840427ccaSGordon Tetlow switch (type) {
86940427ccaSGordon Tetlow case FILE_BYTE:
87040427ccaSGordon Tetlow return 1;
87140427ccaSGordon Tetlow
87240427ccaSGordon Tetlow case FILE_SHORT:
87340427ccaSGordon Tetlow case FILE_LESHORT:
87440427ccaSGordon Tetlow case FILE_BESHORT:
875a4d6d3b8SXin LI case FILE_MSDOSDATE:
876a4d6d3b8SXin LI case FILE_BEMSDOSDATE:
877a4d6d3b8SXin LI case FILE_LEMSDOSDATE:
878a4d6d3b8SXin LI case FILE_MSDOSTIME:
879a4d6d3b8SXin LI case FILE_BEMSDOSTIME:
880a4d6d3b8SXin LI case FILE_LEMSDOSTIME:
88140427ccaSGordon Tetlow return 2;
88240427ccaSGordon Tetlow
88340427ccaSGordon Tetlow case FILE_LONG:
88440427ccaSGordon Tetlow case FILE_LELONG:
88540427ccaSGordon Tetlow case FILE_BELONG:
88640427ccaSGordon Tetlow case FILE_MELONG:
88740427ccaSGordon Tetlow return 4;
88840427ccaSGordon Tetlow
88940427ccaSGordon Tetlow case FILE_DATE:
89040427ccaSGordon Tetlow case FILE_LEDATE:
89140427ccaSGordon Tetlow case FILE_BEDATE:
89240427ccaSGordon Tetlow case FILE_MEDATE:
89340427ccaSGordon Tetlow case FILE_LDATE:
89440427ccaSGordon Tetlow case FILE_LELDATE:
89540427ccaSGordon Tetlow case FILE_BELDATE:
89640427ccaSGordon Tetlow case FILE_MELDATE:
89740427ccaSGordon Tetlow case FILE_FLOAT:
89840427ccaSGordon Tetlow case FILE_BEFLOAT:
89940427ccaSGordon Tetlow case FILE_LEFLOAT:
900a4d6d3b8SXin LI case FILE_BEID3:
901a4d6d3b8SXin LI case FILE_LEID3:
90240427ccaSGordon Tetlow return 4;
90340427ccaSGordon Tetlow
90440427ccaSGordon Tetlow case FILE_QUAD:
90540427ccaSGordon Tetlow case FILE_BEQUAD:
90640427ccaSGordon Tetlow case FILE_LEQUAD:
90740427ccaSGordon Tetlow case FILE_QDATE:
90840427ccaSGordon Tetlow case FILE_LEQDATE:
90940427ccaSGordon Tetlow case FILE_BEQDATE:
91040427ccaSGordon Tetlow case FILE_QLDATE:
91140427ccaSGordon Tetlow case FILE_LEQLDATE:
91240427ccaSGordon Tetlow case FILE_BEQLDATE:
91340427ccaSGordon Tetlow case FILE_QWDATE:
91440427ccaSGordon Tetlow case FILE_LEQWDATE:
91540427ccaSGordon Tetlow case FILE_BEQWDATE:
91640427ccaSGordon Tetlow case FILE_DOUBLE:
91740427ccaSGordon Tetlow case FILE_BEDOUBLE:
91840427ccaSGordon Tetlow case FILE_LEDOUBLE:
9192726a701SXin LI case FILE_OFFSET:
92043a5ec4eSXin LI case FILE_BEVARINT:
92143a5ec4eSXin LI case FILE_LEVARINT:
92240427ccaSGordon Tetlow return 8;
9232726a701SXin LI
9242726a701SXin LI case FILE_GUID:
9252726a701SXin LI return 16;
9262726a701SXin LI
92740427ccaSGordon Tetlow default:
9282726a701SXin LI return FILE_BADSIZE;
92940427ccaSGordon Tetlow }
93040427ccaSGordon Tetlow }
93140427ccaSGordon Tetlow
932b6cee71dSXin LI /*
933b6cee71dSXin LI * Get weight of this magic entry, for sorting purposes.
934b6cee71dSXin LI */
935898496eeSXin LI file_private ssize_t
apprentice_magic_strength_1(const struct magic * m)936a4d6d3b8SXin LI apprentice_magic_strength_1(const struct magic *m)
937b6cee71dSXin LI {
93848c779cdSXin LI #define MULT 10U
93948c779cdSXin LI size_t ts, v;
94048c779cdSXin LI ssize_t val = 2 * MULT; /* baseline strength */
941b6cee71dSXin LI
942b6cee71dSXin LI switch (m->type) {
943b6cee71dSXin LI case FILE_DEFAULT: /* make sure this sorts last */
944a4d6d3b8SXin LI if (m->factor_op != FILE_FACTOR_OP_NONE) {
945*ae316d1dSXin LI file_magwarn1("Unsupported factor_op in default %d",
946898496eeSXin LI m->factor_op);
947a4d6d3b8SXin LI }
948b6cee71dSXin LI return 0;
949b6cee71dSXin LI
950b6cee71dSXin LI case FILE_BYTE:
951b6cee71dSXin LI case FILE_SHORT:
952b6cee71dSXin LI case FILE_LESHORT:
953b6cee71dSXin LI case FILE_BESHORT:
954b6cee71dSXin LI case FILE_LONG:
955b6cee71dSXin LI case FILE_LELONG:
956b6cee71dSXin LI case FILE_BELONG:
957b6cee71dSXin LI case FILE_MELONG:
95840427ccaSGordon Tetlow case FILE_DATE:
95940427ccaSGordon Tetlow case FILE_LEDATE:
96040427ccaSGordon Tetlow case FILE_BEDATE:
96140427ccaSGordon Tetlow case FILE_MEDATE:
96240427ccaSGordon Tetlow case FILE_LDATE:
96340427ccaSGordon Tetlow case FILE_LELDATE:
96440427ccaSGordon Tetlow case FILE_BELDATE:
96540427ccaSGordon Tetlow case FILE_MELDATE:
96640427ccaSGordon Tetlow case FILE_FLOAT:
96740427ccaSGordon Tetlow case FILE_BEFLOAT:
96840427ccaSGordon Tetlow case FILE_LEFLOAT:
96940427ccaSGordon Tetlow case FILE_QUAD:
97040427ccaSGordon Tetlow case FILE_BEQUAD:
97140427ccaSGordon Tetlow case FILE_LEQUAD:
97240427ccaSGordon Tetlow case FILE_QDATE:
97340427ccaSGordon Tetlow case FILE_LEQDATE:
97440427ccaSGordon Tetlow case FILE_BEQDATE:
97540427ccaSGordon Tetlow case FILE_QLDATE:
97640427ccaSGordon Tetlow case FILE_LEQLDATE:
97740427ccaSGordon Tetlow case FILE_BEQLDATE:
97840427ccaSGordon Tetlow case FILE_QWDATE:
97940427ccaSGordon Tetlow case FILE_LEQWDATE:
98040427ccaSGordon Tetlow case FILE_BEQWDATE:
98140427ccaSGordon Tetlow case FILE_DOUBLE:
98240427ccaSGordon Tetlow case FILE_BEDOUBLE:
98340427ccaSGordon Tetlow case FILE_LEDOUBLE:
98443a5ec4eSXin LI case FILE_BEVARINT:
98543a5ec4eSXin LI case FILE_LEVARINT:
9862726a701SXin LI case FILE_GUID:
987a4d6d3b8SXin LI case FILE_BEID3:
988a4d6d3b8SXin LI case FILE_LEID3:
9892726a701SXin LI case FILE_OFFSET:
990a4d6d3b8SXin LI case FILE_MSDOSDATE:
991a4d6d3b8SXin LI case FILE_BEMSDOSDATE:
992a4d6d3b8SXin LI case FILE_LEMSDOSDATE:
993a4d6d3b8SXin LI case FILE_MSDOSTIME:
994a4d6d3b8SXin LI case FILE_BEMSDOSTIME:
995a4d6d3b8SXin LI case FILE_LEMSDOSTIME:
99640427ccaSGordon Tetlow ts = typesize(m->type);
997a4d6d3b8SXin LI if (ts == FILE_BADSIZE) {
998a4d6d3b8SXin LI (void)fprintf(stderr, "Bad size for type %d\n",
999a4d6d3b8SXin LI m->type);
100040427ccaSGordon Tetlow abort();
1001a4d6d3b8SXin LI }
100240427ccaSGordon Tetlow val += ts * MULT;
1003b6cee71dSXin LI break;
1004b6cee71dSXin LI
1005b6cee71dSXin LI case FILE_PSTRING:
1006b6cee71dSXin LI case FILE_STRING:
1007a2dfb722SXin LI case FILE_OCTAL:
1008b6cee71dSXin LI val += m->vallen * MULT;
1009b6cee71dSXin LI break;
1010b6cee71dSXin LI
1011b6cee71dSXin LI case FILE_BESTRING16:
1012b6cee71dSXin LI case FILE_LESTRING16:
1013b6cee71dSXin LI val += m->vallen * MULT / 2;
1014b6cee71dSXin LI break;
1015b6cee71dSXin LI
1016b6cee71dSXin LI case FILE_SEARCH:
101748c779cdSXin LI if (m->vallen == 0)
101848c779cdSXin LI break;
1019b6cee71dSXin LI val += m->vallen * MAX(MULT / m->vallen, 1);
1020b6cee71dSXin LI break;
1021b6cee71dSXin LI
1022b6cee71dSXin LI case FILE_REGEX:
1023b6cee71dSXin LI v = nonmagic(m->value.s);
1024b6cee71dSXin LI val += v * MAX(MULT / v, 1);
1025b6cee71dSXin LI break;
1026b6cee71dSXin LI
1027b6cee71dSXin LI case FILE_INDIRECT:
1028b6cee71dSXin LI case FILE_NAME:
1029b6cee71dSXin LI case FILE_USE:
1030a4d6d3b8SXin LI case FILE_CLEAR:
1031b6cee71dSXin LI break;
1032b6cee71dSXin LI
10333e41d09dSXin LI case FILE_DER:
10343e41d09dSXin LI val += MULT;
10353e41d09dSXin LI break;
10363e41d09dSXin LI
1037b6cee71dSXin LI default:
1038b6cee71dSXin LI (void)fprintf(stderr, "Bad type %d\n", m->type);
1039b6cee71dSXin LI abort();
1040b6cee71dSXin LI }
1041b6cee71dSXin LI
1042b6cee71dSXin LI switch (m->reln) {
1043b6cee71dSXin LI case 'x': /* matches anything penalize */
1044b6cee71dSXin LI case '!': /* matches almost anything penalize */
1045b6cee71dSXin LI val = 0;
1046b6cee71dSXin LI break;
1047b6cee71dSXin LI
1048b6cee71dSXin LI case '=': /* Exact match, prefer */
1049b6cee71dSXin LI val += MULT;
1050b6cee71dSXin LI break;
1051b6cee71dSXin LI
1052b6cee71dSXin LI case '>':
1053b6cee71dSXin LI case '<': /* comparison match reduce strength */
1054b6cee71dSXin LI val -= 2 * MULT;
1055b6cee71dSXin LI break;
1056b6cee71dSXin LI
1057b6cee71dSXin LI case '^':
1058b6cee71dSXin LI case '&': /* masking bits, we could count them too */
1059b6cee71dSXin LI val -= MULT;
1060b6cee71dSXin LI break;
1061b6cee71dSXin LI
1062b6cee71dSXin LI default:
1063b6cee71dSXin LI (void)fprintf(stderr, "Bad relation %c\n", m->reln);
1064b6cee71dSXin LI abort();
1065b6cee71dSXin LI }
1066b6cee71dSXin LI
1067a4d6d3b8SXin LI return val;
1068a4d6d3b8SXin LI }
1069a4d6d3b8SXin LI
1070a4d6d3b8SXin LI
1071a2dfb722SXin LI /*ARGSUSED*/
1072898496eeSXin LI file_protected size_t
file_magic_strength(const struct magic * m,size_t nmagic)1073898496eeSXin LI file_magic_strength(const struct magic *m,
1074a4d6d3b8SXin LI size_t nmagic __attribute__((__unused__)))
1075a4d6d3b8SXin LI {
1076a4d6d3b8SXin LI ssize_t val = apprentice_magic_strength_1(m);
1077a4d6d3b8SXin LI
1078a4d6d3b8SXin LI #ifdef notyet
1079a4d6d3b8SXin LI if (m->desc[0] == '\0') {
1080a4d6d3b8SXin LI size_t i;
1081a4d6d3b8SXin LI /*
1082a4d6d3b8SXin LI * Magic entries with no description get their continuations
1083a4d6d3b8SXin LI * added
1084a4d6d3b8SXin LI */
1085a4d6d3b8SXin LI for (i = 1; m[i].cont_level != 0 && i < MIN(nmagic, 3); i++) {
1086a4d6d3b8SXin LI ssize_t v = apprentice_magic_strength_1(&m[i]) >>
1087a4d6d3b8SXin LI (i + 1);
1088a4d6d3b8SXin LI val += v;
1089a4d6d3b8SXin LI if (m[i].desc[0] != '\0')
1090a4d6d3b8SXin LI break;
1091a4d6d3b8SXin LI }
1092a4d6d3b8SXin LI }
1093a4d6d3b8SXin LI #endif
1094a4d6d3b8SXin LI
1095b6cee71dSXin LI switch (m->factor_op) {
1096b6cee71dSXin LI case FILE_FACTOR_OP_NONE:
1097b6cee71dSXin LI break;
1098b6cee71dSXin LI case FILE_FACTOR_OP_PLUS:
1099b6cee71dSXin LI val += m->factor;
1100b6cee71dSXin LI break;
1101b6cee71dSXin LI case FILE_FACTOR_OP_MINUS:
1102b6cee71dSXin LI val -= m->factor;
1103b6cee71dSXin LI break;
1104b6cee71dSXin LI case FILE_FACTOR_OP_TIMES:
1105b6cee71dSXin LI val *= m->factor;
1106b6cee71dSXin LI break;
1107b6cee71dSXin LI case FILE_FACTOR_OP_DIV:
1108b6cee71dSXin LI val /= m->factor;
1109b6cee71dSXin LI break;
1110b6cee71dSXin LI default:
1111a4d6d3b8SXin LI (void)fprintf(stderr, "Bad factor_op %u\n", m->factor_op);
1112b6cee71dSXin LI abort();
1113b6cee71dSXin LI }
1114b6cee71dSXin LI
111548c779cdSXin LI if (val <= 0) /* ensure we only return 0 for FILE_DEFAULT */
111648c779cdSXin LI val = 1;
111748c779cdSXin LI
1118a4d6d3b8SXin LI #ifndef notyet
1119b6cee71dSXin LI /*
1120b6cee71dSXin LI * Magic entries with no description get a bonus because they depend
1121b6cee71dSXin LI * on subsequent magic entries to print something.
1122b6cee71dSXin LI */
1123b6cee71dSXin LI if (m->desc[0] == '\0')
1124b6cee71dSXin LI val++;
1125a4d6d3b8SXin LI #endif
1126a4d6d3b8SXin LI
1127b6cee71dSXin LI return val;
1128b6cee71dSXin LI }
1129b6cee71dSXin LI
1130b6cee71dSXin LI /*
1131b6cee71dSXin LI * Sort callback for sorting entries by "strength" (basically length)
1132b6cee71dSXin LI */
1133898496eeSXin LI file_private int
apprentice_sort(const void * a,const void * b)1134b6cee71dSXin LI apprentice_sort(const void *a, const void *b)
1135b6cee71dSXin LI {
1136b6cee71dSXin LI const struct magic_entry *ma = CAST(const struct magic_entry *, a);
1137b6cee71dSXin LI const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1138898496eeSXin LI size_t sa = file_magic_strength(ma->mp, ma->cont_count);
1139898496eeSXin LI size_t sb = file_magic_strength(mb->mp, mb->cont_count);
1140*ae316d1dSXin LI if (sa == sb) {
1141*ae316d1dSXin LI struct magic mpa = *ma->mp;
1142*ae316d1dSXin LI struct magic mpb = *mb->mp;
1143*ae316d1dSXin LI mpa.lineno = mpb.lineno = 0;
1144*ae316d1dSXin LI int x = memcmp(&mpa, &mpb, sizeof(mpa));
1145*ae316d1dSXin LI if (x == 0) {
1146*ae316d1dSXin LI // Don't warn for DER
1147*ae316d1dSXin LI if (mpa.type == FILE_DER)
1148b6cee71dSXin LI return 0;
1149*ae316d1dSXin LI file_magwarn1("Duplicate magic entry `%s'",
1150*ae316d1dSXin LI ma->mp->desc);
1151*ae316d1dSXin LI #ifndef COMPILE_ONLY
1152*ae316d1dSXin LI file_mdump(ma->mp);
1153*ae316d1dSXin LI file_mdump(mb->mp);
1154*ae316d1dSXin LI #endif
1155*ae316d1dSXin LI return 0;
1156*ae316d1dSXin LI }
1157*ae316d1dSXin LI return x > 0 ? -1 : 1;
1158*ae316d1dSXin LI }
1159*ae316d1dSXin LI return sa > sb ? -1 : 1;
1160b6cee71dSXin LI }
1161b6cee71dSXin LI
1162b6cee71dSXin LI /*
1163b6cee71dSXin LI * Shows sorted patterns list in the order which is used for the matching
1164b6cee71dSXin LI */
1165898496eeSXin LI file_private void
apprentice_list(struct mlist * mlist,int mode)1166b6cee71dSXin LI apprentice_list(struct mlist *mlist, int mode)
1167b6cee71dSXin LI {
1168a4d6d3b8SXin LI uint32_t magindex, descindex, mimeindex, lineindex;
1169b6cee71dSXin LI struct mlist *ml;
1170b6cee71dSXin LI for (ml = mlist->next; ml != mlist; ml = ml->next) {
1171b6cee71dSXin LI for (magindex = 0; magindex < ml->nmagic; magindex++) {
1172b6cee71dSXin LI struct magic *m = &ml->magic[magindex];
1173b6cee71dSXin LI if ((m->flag & mode) != mode) {
1174b6cee71dSXin LI /* Skip sub-tests */
1175b6cee71dSXin LI while (magindex + 1 < ml->nmagic &&
1176b6cee71dSXin LI ml->magic[magindex + 1].cont_level != 0)
1177b6cee71dSXin LI ++magindex;
1178b6cee71dSXin LI continue; /* Skip to next top-level test*/
1179b6cee71dSXin LI }
1180b6cee71dSXin LI
1181b6cee71dSXin LI /*
1182b6cee71dSXin LI * Try to iterate over the tree until we find item with
1183b6cee71dSXin LI * description/mimetype.
1184b6cee71dSXin LI */
1185a4d6d3b8SXin LI lineindex = descindex = mimeindex = magindex;
1186898496eeSXin LI for (; magindex + 1 < ml->nmagic &&
1187898496eeSXin LI ml->magic[magindex + 1].cont_level != 0;
1188898496eeSXin LI magindex++) {
1189898496eeSXin LI uint32_t mi = magindex + 1;
1190a4d6d3b8SXin LI if (*ml->magic[descindex].desc == '\0'
1191898496eeSXin LI && *ml->magic[mi].desc)
1192898496eeSXin LI descindex = mi;
1193a4d6d3b8SXin LI if (*ml->magic[mimeindex].mimetype == '\0'
1194898496eeSXin LI && *ml->magic[mi].mimetype)
1195898496eeSXin LI mimeindex = mi;
1196a4d6d3b8SXin LI }
1197b6cee71dSXin LI
11985f0216bdSXin LI printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
1199898496eeSXin LI file_magic_strength(m, ml->nmagic - magindex),
1200a4d6d3b8SXin LI ml->magic[lineindex].lineno,
1201a4d6d3b8SXin LI ml->magic[descindex].desc,
1202a4d6d3b8SXin LI ml->magic[mimeindex].mimetype);
1203b6cee71dSXin LI }
1204b6cee71dSXin LI }
1205b6cee71dSXin LI }
1206b6cee71dSXin LI
1207898496eeSXin LI file_private void
set_test_type(struct magic * mstart,struct magic * m)1208b6cee71dSXin LI set_test_type(struct magic *mstart, struct magic *m)
1209b6cee71dSXin LI {
1210b6cee71dSXin LI switch (m->type) {
1211b6cee71dSXin LI case FILE_BYTE:
1212b6cee71dSXin LI case FILE_SHORT:
1213b6cee71dSXin LI case FILE_LONG:
1214b6cee71dSXin LI case FILE_DATE:
1215b6cee71dSXin LI case FILE_BESHORT:
1216b6cee71dSXin LI case FILE_BELONG:
1217b6cee71dSXin LI case FILE_BEDATE:
1218b6cee71dSXin LI case FILE_LESHORT:
1219b6cee71dSXin LI case FILE_LELONG:
1220b6cee71dSXin LI case FILE_LEDATE:
1221b6cee71dSXin LI case FILE_LDATE:
1222b6cee71dSXin LI case FILE_BELDATE:
1223b6cee71dSXin LI case FILE_LELDATE:
1224b6cee71dSXin LI case FILE_MEDATE:
1225b6cee71dSXin LI case FILE_MELDATE:
1226b6cee71dSXin LI case FILE_MELONG:
1227b6cee71dSXin LI case FILE_QUAD:
1228b6cee71dSXin LI case FILE_LEQUAD:
1229b6cee71dSXin LI case FILE_BEQUAD:
1230b6cee71dSXin LI case FILE_QDATE:
1231b6cee71dSXin LI case FILE_LEQDATE:
1232b6cee71dSXin LI case FILE_BEQDATE:
1233b6cee71dSXin LI case FILE_QLDATE:
1234b6cee71dSXin LI case FILE_LEQLDATE:
1235b6cee71dSXin LI case FILE_BEQLDATE:
1236b6cee71dSXin LI case FILE_QWDATE:
1237b6cee71dSXin LI case FILE_LEQWDATE:
1238b6cee71dSXin LI case FILE_BEQWDATE:
1239b6cee71dSXin LI case FILE_FLOAT:
1240b6cee71dSXin LI case FILE_BEFLOAT:
1241b6cee71dSXin LI case FILE_LEFLOAT:
1242b6cee71dSXin LI case FILE_DOUBLE:
1243b6cee71dSXin LI case FILE_BEDOUBLE:
1244b6cee71dSXin LI case FILE_LEDOUBLE:
124543a5ec4eSXin LI case FILE_BEVARINT:
124643a5ec4eSXin LI case FILE_LEVARINT:
12473e41d09dSXin LI case FILE_DER:
12482726a701SXin LI case FILE_GUID:
12492726a701SXin LI case FILE_OFFSET:
1250a4d6d3b8SXin LI case FILE_MSDOSDATE:
1251a4d6d3b8SXin LI case FILE_BEMSDOSDATE:
1252a4d6d3b8SXin LI case FILE_LEMSDOSDATE:
1253a4d6d3b8SXin LI case FILE_MSDOSTIME:
1254a4d6d3b8SXin LI case FILE_BEMSDOSTIME:
1255a4d6d3b8SXin LI case FILE_LEMSDOSTIME:
1256a2dfb722SXin LI case FILE_OCTAL:
1257b6cee71dSXin LI mstart->flag |= BINTEST;
1258b6cee71dSXin LI break;
1259b6cee71dSXin LI case FILE_STRING:
1260b6cee71dSXin LI case FILE_PSTRING:
1261b6cee71dSXin LI case FILE_BESTRING16:
1262b6cee71dSXin LI case FILE_LESTRING16:
1263b6cee71dSXin LI /* Allow text overrides */
1264b6cee71dSXin LI if (mstart->str_flags & STRING_TEXTTEST)
1265b6cee71dSXin LI mstart->flag |= TEXTTEST;
1266b6cee71dSXin LI else
1267b6cee71dSXin LI mstart->flag |= BINTEST;
1268b6cee71dSXin LI break;
1269b6cee71dSXin LI case FILE_REGEX:
1270b6cee71dSXin LI case FILE_SEARCH:
1271b6cee71dSXin LI /* Check for override */
1272b6cee71dSXin LI if (mstart->str_flags & STRING_BINTEST)
1273b6cee71dSXin LI mstart->flag |= BINTEST;
1274b6cee71dSXin LI if (mstart->str_flags & STRING_TEXTTEST)
1275b6cee71dSXin LI mstart->flag |= TEXTTEST;
1276b6cee71dSXin LI
1277b6cee71dSXin LI if (mstart->flag & (TEXTTEST|BINTEST))
1278b6cee71dSXin LI break;
1279b6cee71dSXin LI
1280b6cee71dSXin LI /* binary test if pattern is not text */
128148c779cdSXin LI if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL,
1282b6cee71dSXin LI NULL) <= 0)
1283b6cee71dSXin LI mstart->flag |= BINTEST;
1284b6cee71dSXin LI else
1285b6cee71dSXin LI mstart->flag |= TEXTTEST;
1286b6cee71dSXin LI break;
1287b6cee71dSXin LI case FILE_DEFAULT:
1288b6cee71dSXin LI /* can't deduce anything; we shouldn't see this at the
1289b6cee71dSXin LI top level anyway */
1290b6cee71dSXin LI break;
1291b6cee71dSXin LI case FILE_INVALID:
1292b6cee71dSXin LI default:
1293b6cee71dSXin LI /* invalid search type, but no need to complain here */
1294b6cee71dSXin LI break;
1295b6cee71dSXin LI }
1296b6cee71dSXin LI }
1297b6cee71dSXin LI
1298898496eeSXin LI file_private int
addentry(struct magic_set * ms,struct magic_entry * me,struct magic_entry_set * mset)1299b6cee71dSXin LI addentry(struct magic_set *ms, struct magic_entry *me,
1300b6cee71dSXin LI struct magic_entry_set *mset)
1301b6cee71dSXin LI {
1302b6cee71dSXin LI size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1303a4d6d3b8SXin LI if (mset[i].me == NULL || mset[i].count == mset[i].max) {
1304b6cee71dSXin LI struct magic_entry *mp;
1305b6cee71dSXin LI
1306a4d6d3b8SXin LI size_t incr = mset[i].max + ALLOC_INCR;
1307b6cee71dSXin LI if ((mp = CAST(struct magic_entry *,
1308a4d6d3b8SXin LI realloc(mset[i].me, sizeof(*mp) * incr))) ==
1309b6cee71dSXin LI NULL) {
1310a4d6d3b8SXin LI file_oomem(ms, sizeof(*mp) * incr);
1311b6cee71dSXin LI return -1;
1312b6cee71dSXin LI }
1313b6cee71dSXin LI (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1314b6cee71dSXin LI ALLOC_INCR);
1315b6cee71dSXin LI mset[i].me = mp;
1316a2dfb722SXin LI mset[i].max = CAST(uint32_t, incr);
1317a2dfb722SXin LI assert(mset[i].max == incr);
1318b6cee71dSXin LI }
1319b6cee71dSXin LI mset[i].me[mset[i].count++] = *me;
1320b6cee71dSXin LI memset(me, 0, sizeof(*me));
1321b6cee71dSXin LI return 0;
1322b6cee71dSXin LI }
1323b6cee71dSXin LI
1324b6cee71dSXin LI /*
1325b6cee71dSXin LI * Load and parse one file.
1326b6cee71dSXin LI */
1327898496eeSXin LI file_private void
load_1(struct magic_set * ms,int action,const char * fn,int * errs,struct magic_entry_set * mset)1328b6cee71dSXin LI load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1329b6cee71dSXin LI struct magic_entry_set *mset)
1330b6cee71dSXin LI {
1331b6cee71dSXin LI size_t lineno = 0, llen = 0;
1332b6cee71dSXin LI char *line = NULL;
1333b6cee71dSXin LI ssize_t len;
1334b6cee71dSXin LI struct magic_entry me;
1335b6cee71dSXin LI
1336b6cee71dSXin LI FILE *f = fopen(ms->file = fn, "r");
1337b6cee71dSXin LI if (f == NULL) {
1338b6cee71dSXin LI if (errno != ENOENT)
1339b6cee71dSXin LI file_error(ms, errno, "cannot read magic file `%s'",
1340b6cee71dSXin LI fn);
1341b6cee71dSXin LI (*errs)++;
1342b6cee71dSXin LI return;
1343b6cee71dSXin LI }
1344b6cee71dSXin LI
1345b6cee71dSXin LI memset(&me, 0, sizeof(me));
1346b6cee71dSXin LI /* read and parse this file */
1347b6cee71dSXin LI for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
1348b6cee71dSXin LI ms->line++) {
1349*ae316d1dSXin LI if (ms->magwarn >= ms->magwarn_max)
1350*ae316d1dSXin LI break;
1351b6cee71dSXin LI if (len == 0) /* null line, garbage, etc */
1352b6cee71dSXin LI continue;
1353b6cee71dSXin LI if (line[len - 1] == '\n') {
1354b6cee71dSXin LI lineno++;
1355b6cee71dSXin LI line[len - 1] = '\0'; /* delete newline */
1356b6cee71dSXin LI }
1357b6cee71dSXin LI switch (line[0]) {
1358b6cee71dSXin LI case '\0': /* empty, do not parse */
1359b6cee71dSXin LI case '#': /* comment, do not parse */
1360b6cee71dSXin LI continue;
1361b6cee71dSXin LI case '!':
1362b6cee71dSXin LI if (line[1] == ':') {
1363b6cee71dSXin LI size_t i;
1364b6cee71dSXin LI
1365b6cee71dSXin LI for (i = 0; bang[i].name != NULL; i++) {
136648c779cdSXin LI if (CAST(size_t, len - 2) > bang[i].len &&
1367b6cee71dSXin LI memcmp(bang[i].name, line + 2,
1368b6cee71dSXin LI bang[i].len) == 0)
1369b6cee71dSXin LI break;
1370b6cee71dSXin LI }
1371b6cee71dSXin LI if (bang[i].name == NULL) {
1372b6cee71dSXin LI file_error(ms, 0,
1373b6cee71dSXin LI "Unknown !: entry `%s'", line);
1374b6cee71dSXin LI (*errs)++;
1375b6cee71dSXin LI continue;
1376b6cee71dSXin LI }
1377b6cee71dSXin LI if (me.mp == NULL) {
1378b6cee71dSXin LI file_error(ms, 0,
1379b6cee71dSXin LI "No current entry for :!%s type",
1380b6cee71dSXin LI bang[i].name);
1381b6cee71dSXin LI (*errs)++;
1382b6cee71dSXin LI continue;
1383b6cee71dSXin LI }
1384b6cee71dSXin LI if ((*bang[i].fun)(ms, &me,
13852726a701SXin LI line + bang[i].len + 2,
13862726a701SXin LI len - bang[i].len - 2) != 0) {
1387b6cee71dSXin LI (*errs)++;
1388b6cee71dSXin LI continue;
1389b6cee71dSXin LI }
1390b6cee71dSXin LI continue;
1391b6cee71dSXin LI }
1392b6cee71dSXin LI /*FALLTHROUGH*/
1393b6cee71dSXin LI default:
1394b6cee71dSXin LI again:
1395*ae316d1dSXin LI switch (parse(ms, &me, fn, line, lineno, action)) {
1396b6cee71dSXin LI case 0:
1397b6cee71dSXin LI continue;
1398b6cee71dSXin LI case 1:
1399b6cee71dSXin LI (void)addentry(ms, &me, mset);
1400b6cee71dSXin LI goto again;
1401b6cee71dSXin LI default:
1402b6cee71dSXin LI (*errs)++;
1403b6cee71dSXin LI break;
1404b6cee71dSXin LI }
1405b6cee71dSXin LI }
1406b6cee71dSXin LI }
1407b6cee71dSXin LI if (me.mp)
1408b6cee71dSXin LI (void)addentry(ms, &me, mset);
1409b6cee71dSXin LI free(line);
1410b6cee71dSXin LI (void)fclose(f);
1411b6cee71dSXin LI }
1412b6cee71dSXin LI
1413b6cee71dSXin LI /*
1414b6cee71dSXin LI * parse a file or directory of files
1415b6cee71dSXin LI * const char *fn: name of magic file or directory
1416b6cee71dSXin LI */
1417898496eeSXin LI file_private int
cmpstrp(const void * p1,const void * p2)1418b6cee71dSXin LI cmpstrp(const void *p1, const void *p2)
1419b6cee71dSXin LI {
142048c779cdSXin LI return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2));
1421b6cee71dSXin LI }
1422b6cee71dSXin LI
1423b6cee71dSXin LI
1424898496eeSXin LI file_private uint32_t
set_text_binary(struct magic_set * ms,struct magic_entry * me,uint32_t nme,uint32_t starttest)1425b6cee71dSXin LI set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1426b6cee71dSXin LI uint32_t starttest)
1427b6cee71dSXin LI {
1428b6cee71dSXin LI static const char text[] = "text";
1429b6cee71dSXin LI static const char binary[] = "binary";
1430b6cee71dSXin LI static const size_t len = sizeof(text);
1431b6cee71dSXin LI
1432b6cee71dSXin LI uint32_t i = starttest;
1433b6cee71dSXin LI
1434b6cee71dSXin LI do {
1435b6cee71dSXin LI set_test_type(me[starttest].mp, me[i].mp);
1436b6cee71dSXin LI if ((ms->flags & MAGIC_DEBUG) == 0)
1437b6cee71dSXin LI continue;
1438b6cee71dSXin LI (void)fprintf(stderr, "%s%s%s: %s\n",
1439b6cee71dSXin LI me[i].mp->mimetype,
1440b6cee71dSXin LI me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1441b6cee71dSXin LI me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1442b6cee71dSXin LI me[i].mp->flag & BINTEST ? binary : text);
1443b6cee71dSXin LI if (me[i].mp->flag & BINTEST) {
1444b6cee71dSXin LI char *p = strstr(me[i].mp->desc, text);
1445b6cee71dSXin LI if (p && (p == me[i].mp->desc ||
144648c779cdSXin LI isspace(CAST(unsigned char, p[-1]))) &&
1447b6cee71dSXin LI (p + len - me[i].mp->desc == MAXstring
1448b6cee71dSXin LI || (p[len] == '\0' ||
144948c779cdSXin LI isspace(CAST(unsigned char, p[len])))))
1450b6cee71dSXin LI (void)fprintf(stderr, "*** Possible "
1451b6cee71dSXin LI "binary test for text type\n");
1452b6cee71dSXin LI }
1453b6cee71dSXin LI } while (++i < nme && me[i].mp->cont_level != 0);
1454b6cee71dSXin LI return i;
1455b6cee71dSXin LI }
1456b6cee71dSXin LI
1457898496eeSXin LI file_private void
set_last_default(struct magic_set * ms,struct magic_entry * me,uint32_t nme)1458b6cee71dSXin LI set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1459b6cee71dSXin LI {
1460b6cee71dSXin LI uint32_t i;
1461b6cee71dSXin LI for (i = 0; i < nme; i++) {
1462b6cee71dSXin LI if (me[i].mp->cont_level == 0 &&
1463b6cee71dSXin LI me[i].mp->type == FILE_DEFAULT) {
1464b6cee71dSXin LI while (++i < nme)
1465b6cee71dSXin LI if (me[i].mp->cont_level == 0)
1466b6cee71dSXin LI break;
1467b6cee71dSXin LI if (i != nme) {
1468b6cee71dSXin LI /* XXX - Ugh! */
1469b6cee71dSXin LI ms->line = me[i].mp->lineno;
1470b6cee71dSXin LI file_magwarn(ms,
1471b6cee71dSXin LI "level 0 \"default\" did not sort last");
1472b6cee71dSXin LI }
1473b6cee71dSXin LI return;
1474b6cee71dSXin LI }
1475b6cee71dSXin LI }
1476b6cee71dSXin LI }
1477b6cee71dSXin LI
1478898496eeSXin LI file_private int
coalesce_entries(struct magic_set * ms,struct magic_entry * me,uint32_t nme,struct magic ** ma,uint32_t * nma)1479b6cee71dSXin LI coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1480b6cee71dSXin LI struct magic **ma, uint32_t *nma)
1481b6cee71dSXin LI {
1482b6cee71dSXin LI uint32_t i, mentrycount = 0;
1483b6cee71dSXin LI size_t slen;
1484b6cee71dSXin LI
1485b6cee71dSXin LI for (i = 0; i < nme; i++)
1486b6cee71dSXin LI mentrycount += me[i].cont_count;
1487b6cee71dSXin LI
148843a5ec4eSXin LI if (mentrycount == 0) {
148943a5ec4eSXin LI *ma = NULL;
149043a5ec4eSXin LI *nma = 0;
149143a5ec4eSXin LI return 0;
149243a5ec4eSXin LI }
149343a5ec4eSXin LI
1494b6cee71dSXin LI slen = sizeof(**ma) * mentrycount;
1495b6cee71dSXin LI if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) {
1496b6cee71dSXin LI file_oomem(ms, slen);
1497b6cee71dSXin LI return -1;
1498b6cee71dSXin LI }
1499b6cee71dSXin LI
1500b6cee71dSXin LI mentrycount = 0;
1501b6cee71dSXin LI for (i = 0; i < nme; i++) {
1502b6cee71dSXin LI (void)memcpy(*ma + mentrycount, me[i].mp,
1503b6cee71dSXin LI me[i].cont_count * sizeof(**ma));
1504b6cee71dSXin LI mentrycount += me[i].cont_count;
1505b6cee71dSXin LI }
1506b6cee71dSXin LI *nma = mentrycount;
1507b6cee71dSXin LI return 0;
1508b6cee71dSXin LI }
1509b6cee71dSXin LI
1510898496eeSXin LI file_private void
magic_entry_free(struct magic_entry * me,uint32_t nme)1511b6cee71dSXin LI magic_entry_free(struct magic_entry *me, uint32_t nme)
1512b6cee71dSXin LI {
1513b6cee71dSXin LI uint32_t i;
1514b6cee71dSXin LI if (me == NULL)
1515b6cee71dSXin LI return;
1516b6cee71dSXin LI for (i = 0; i < nme; i++)
1517b6cee71dSXin LI free(me[i].mp);
1518b6cee71dSXin LI free(me);
1519b6cee71dSXin LI }
1520b6cee71dSXin LI
1521898496eeSXin LI file_private struct magic_map *
apprentice_load(struct magic_set * ms,const char * fn,int action)1522b6cee71dSXin LI apprentice_load(struct magic_set *ms, const char *fn, int action)
1523b6cee71dSXin LI {
1524b6cee71dSXin LI int errs = 0;
1525b6cee71dSXin LI uint32_t i, j;
1526b6cee71dSXin LI size_t files = 0, maxfiles = 0;
1527b6cee71dSXin LI char **filearr = NULL, *mfn;
1528b6cee71dSXin LI struct stat st;
1529b6cee71dSXin LI struct magic_map *map;
1530b6cee71dSXin LI struct magic_entry_set mset[MAGIC_SETS];
1531b6cee71dSXin LI DIR *dir;
1532b6cee71dSXin LI struct dirent *d;
1533b6cee71dSXin LI
1534b6cee71dSXin LI memset(mset, 0, sizeof(mset));
1535b6cee71dSXin LI ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */
1536b6cee71dSXin LI
1537b6cee71dSXin LI
1538b6cee71dSXin LI if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL)
1539b6cee71dSXin LI {
1540b6cee71dSXin LI file_oomem(ms, sizeof(*map));
1541b6cee71dSXin LI return NULL;
1542b6cee71dSXin LI }
15439ce06829SXin LI map->type = MAP_TYPE_MALLOC;
1544b6cee71dSXin LI
1545b6cee71dSXin LI /* print silly verbose header for USG compat. */
1546b6cee71dSXin LI if (action == FILE_CHECK)
1547b6cee71dSXin LI (void)fprintf(stderr, "%s\n", usg_hdr);
1548b6cee71dSXin LI
1549b6cee71dSXin LI /* load directory or file */
1550b6cee71dSXin LI if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1551b6cee71dSXin LI dir = opendir(fn);
1552b6cee71dSXin LI if (!dir) {
1553b6cee71dSXin LI errs++;
1554b6cee71dSXin LI goto out;
1555b6cee71dSXin LI }
1556b6cee71dSXin LI while ((d = readdir(dir)) != NULL) {
155740427ccaSGordon Tetlow if (d->d_name[0] == '.')
155840427ccaSGordon Tetlow continue;
1559b6cee71dSXin LI if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
1560b6cee71dSXin LI file_oomem(ms,
1561b6cee71dSXin LI strlen(fn) + strlen(d->d_name) + 2);
1562b6cee71dSXin LI errs++;
1563b6cee71dSXin LI closedir(dir);
1564b6cee71dSXin LI goto out;
1565b6cee71dSXin LI }
1566b6cee71dSXin LI if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1567b6cee71dSXin LI free(mfn);
1568b6cee71dSXin LI continue;
1569b6cee71dSXin LI }
1570b6cee71dSXin LI if (files >= maxfiles) {
1571b6cee71dSXin LI size_t mlen;
15722726a701SXin LI char **nfilearr;
1573b6cee71dSXin LI maxfiles = (maxfiles + 1) * 2;
1574b6cee71dSXin LI mlen = maxfiles * sizeof(*filearr);
15752726a701SXin LI if ((nfilearr = CAST(char **,
1576b6cee71dSXin LI realloc(filearr, mlen))) == NULL) {
1577b6cee71dSXin LI file_oomem(ms, mlen);
1578b6cee71dSXin LI free(mfn);
1579b6cee71dSXin LI closedir(dir);
1580b6cee71dSXin LI errs++;
1581b6cee71dSXin LI goto out;
1582b6cee71dSXin LI }
15832726a701SXin LI filearr = nfilearr;
1584b6cee71dSXin LI }
1585b6cee71dSXin LI filearr[files++] = mfn;
1586b6cee71dSXin LI }
1587b6cee71dSXin LI closedir(dir);
158848c779cdSXin LI if (filearr) {
1589b6cee71dSXin LI qsort(filearr, files, sizeof(*filearr), cmpstrp);
1590b6cee71dSXin LI for (i = 0; i < files; i++) {
1591b6cee71dSXin LI load_1(ms, action, filearr[i], &errs, mset);
1592b6cee71dSXin LI free(filearr[i]);
1593b6cee71dSXin LI }
1594b6cee71dSXin LI free(filearr);
15952726a701SXin LI filearr = NULL;
159648c779cdSXin LI }
1597b6cee71dSXin LI } else
1598b6cee71dSXin LI load_1(ms, action, fn, &errs, mset);
1599b6cee71dSXin LI if (errs)
1600b6cee71dSXin LI goto out;
1601b6cee71dSXin LI
1602b6cee71dSXin LI for (j = 0; j < MAGIC_SETS; j++) {
1603b6cee71dSXin LI /* Set types of tests */
1604b6cee71dSXin LI for (i = 0; i < mset[j].count; ) {
1605b6cee71dSXin LI if (mset[j].me[i].mp->cont_level != 0) {
1606b6cee71dSXin LI i++;
1607b6cee71dSXin LI continue;
1608b6cee71dSXin LI }
1609b6cee71dSXin LI i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1610b6cee71dSXin LI }
16119ce06829SXin LI if (mset[j].me)
1612898496eeSXin LI qsort(mset[j].me, mset[j].count, sizeof(*mset[0].me),
1613b6cee71dSXin LI apprentice_sort);
1614b6cee71dSXin LI
1615b6cee71dSXin LI /*
1616b6cee71dSXin LI * Make sure that any level 0 "default" line is last
1617b6cee71dSXin LI * (if one exists).
1618b6cee71dSXin LI */
1619b6cee71dSXin LI set_last_default(ms, mset[j].me, mset[j].count);
1620b6cee71dSXin LI
162143a5ec4eSXin LI /* coalesce per file arrays into a single one, if needed */
162243a5ec4eSXin LI if (mset[j].count == 0)
162343a5ec4eSXin LI continue;
162443a5ec4eSXin LI
1625b6cee71dSXin LI if (coalesce_entries(ms, mset[j].me, mset[j].count,
1626b6cee71dSXin LI &map->magic[j], &map->nmagic[j]) == -1) {
1627b6cee71dSXin LI errs++;
1628b6cee71dSXin LI goto out;
1629b6cee71dSXin LI }
1630b6cee71dSXin LI }
1631b6cee71dSXin LI
1632b6cee71dSXin LI out:
16332726a701SXin LI free(filearr);
1634b6cee71dSXin LI for (j = 0; j < MAGIC_SETS; j++)
1635b6cee71dSXin LI magic_entry_free(mset[j].me, mset[j].count);
1636b6cee71dSXin LI
1637b6cee71dSXin LI if (errs) {
1638b6cee71dSXin LI apprentice_unmap(map);
1639b6cee71dSXin LI return NULL;
1640b6cee71dSXin LI }
1641b6cee71dSXin LI return map;
1642b6cee71dSXin LI }
1643b6cee71dSXin LI
1644b6cee71dSXin LI /*
1645b6cee71dSXin LI * extend the sign bit if the comparison is to be signed
1646b6cee71dSXin LI */
1647898496eeSXin LI file_protected uint64_t
file_signextend(struct magic_set * ms,struct magic * m,uint64_t v)1648b6cee71dSXin LI file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1649b6cee71dSXin LI {
1650b6cee71dSXin LI if (!(m->flag & UNSIGNED)) {
1651b6cee71dSXin LI switch(m->type) {
1652b6cee71dSXin LI /*
1653b6cee71dSXin LI * Do not remove the casts below. They are
1654b6cee71dSXin LI * vital. When later compared with the data,
1655b6cee71dSXin LI * the sign extension must have happened.
1656b6cee71dSXin LI */
1657b6cee71dSXin LI case FILE_BYTE:
165848c779cdSXin LI v = CAST(signed char, v);
1659b6cee71dSXin LI break;
1660b6cee71dSXin LI case FILE_SHORT:
1661b6cee71dSXin LI case FILE_BESHORT:
1662b6cee71dSXin LI case FILE_LESHORT:
166348c779cdSXin LI v = CAST(short, v);
1664b6cee71dSXin LI break;
1665b6cee71dSXin LI case FILE_DATE:
1666b6cee71dSXin LI case FILE_BEDATE:
1667b6cee71dSXin LI case FILE_LEDATE:
1668b6cee71dSXin LI case FILE_MEDATE:
1669b6cee71dSXin LI case FILE_LDATE:
1670b6cee71dSXin LI case FILE_BELDATE:
1671b6cee71dSXin LI case FILE_LELDATE:
1672b6cee71dSXin LI case FILE_MELDATE:
1673b6cee71dSXin LI case FILE_LONG:
1674b6cee71dSXin LI case FILE_BELONG:
1675b6cee71dSXin LI case FILE_LELONG:
1676b6cee71dSXin LI case FILE_MELONG:
1677b6cee71dSXin LI case FILE_FLOAT:
1678b6cee71dSXin LI case FILE_BEFLOAT:
1679b6cee71dSXin LI case FILE_LEFLOAT:
1680a4d6d3b8SXin LI case FILE_MSDOSDATE:
1681a4d6d3b8SXin LI case FILE_BEMSDOSDATE:
1682a4d6d3b8SXin LI case FILE_LEMSDOSDATE:
1683a4d6d3b8SXin LI case FILE_MSDOSTIME:
1684a4d6d3b8SXin LI case FILE_BEMSDOSTIME:
1685a4d6d3b8SXin LI case FILE_LEMSDOSTIME:
168648c779cdSXin LI v = CAST(int32_t, v);
1687b6cee71dSXin LI break;
1688b6cee71dSXin LI case FILE_QUAD:
1689b6cee71dSXin LI case FILE_BEQUAD:
1690b6cee71dSXin LI case FILE_LEQUAD:
1691b6cee71dSXin LI case FILE_QDATE:
1692b6cee71dSXin LI case FILE_QLDATE:
1693b6cee71dSXin LI case FILE_QWDATE:
1694b6cee71dSXin LI case FILE_BEQDATE:
1695b6cee71dSXin LI case FILE_BEQLDATE:
1696b6cee71dSXin LI case FILE_BEQWDATE:
1697b6cee71dSXin LI case FILE_LEQDATE:
1698b6cee71dSXin LI case FILE_LEQLDATE:
1699b6cee71dSXin LI case FILE_LEQWDATE:
1700b6cee71dSXin LI case FILE_DOUBLE:
1701b6cee71dSXin LI case FILE_BEDOUBLE:
1702b6cee71dSXin LI case FILE_LEDOUBLE:
17032726a701SXin LI case FILE_OFFSET:
170443a5ec4eSXin LI case FILE_BEVARINT:
170543a5ec4eSXin LI case FILE_LEVARINT:
170648c779cdSXin LI v = CAST(int64_t, v);
1707b6cee71dSXin LI break;
1708b6cee71dSXin LI case FILE_STRING:
1709b6cee71dSXin LI case FILE_PSTRING:
1710b6cee71dSXin LI case FILE_BESTRING16:
1711b6cee71dSXin LI case FILE_LESTRING16:
1712b6cee71dSXin LI case FILE_REGEX:
1713b6cee71dSXin LI case FILE_SEARCH:
1714b6cee71dSXin LI case FILE_DEFAULT:
1715b6cee71dSXin LI case FILE_INDIRECT:
1716b6cee71dSXin LI case FILE_NAME:
1717b6cee71dSXin LI case FILE_USE:
1718b6cee71dSXin LI case FILE_CLEAR:
17193e41d09dSXin LI case FILE_DER:
17202726a701SXin LI case FILE_GUID:
1721a2dfb722SXin LI case FILE_OCTAL:
1722b6cee71dSXin LI break;
1723b6cee71dSXin LI default:
1724b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
1725b6cee71dSXin LI file_magwarn(ms, "cannot happen: m->type=%d\n",
1726b6cee71dSXin LI m->type);
17272726a701SXin LI return FILE_BADSIZE;
1728b6cee71dSXin LI }
1729b6cee71dSXin LI }
1730b6cee71dSXin LI return v;
1731b6cee71dSXin LI }
1732b6cee71dSXin LI
1733898496eeSXin LI file_private int
string_modifier_check(struct magic_set * ms,struct magic * m)1734b6cee71dSXin LI string_modifier_check(struct magic_set *ms, struct magic *m)
1735b6cee71dSXin LI {
1736b6cee71dSXin LI if ((ms->flags & MAGIC_CHECK) == 0)
1737b6cee71dSXin LI return 0;
1738b6cee71dSXin LI
1739b6cee71dSXin LI if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1740b6cee71dSXin LI (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1741b6cee71dSXin LI file_magwarn(ms,
1742b6cee71dSXin LI "'/BHhLl' modifiers are only allowed for pascal strings\n");
1743b6cee71dSXin LI return -1;
1744b6cee71dSXin LI }
1745b6cee71dSXin LI switch (m->type) {
1746b6cee71dSXin LI case FILE_BESTRING16:
1747b6cee71dSXin LI case FILE_LESTRING16:
1748b6cee71dSXin LI if (m->str_flags != 0) {
1749b6cee71dSXin LI file_magwarn(ms,
1750b6cee71dSXin LI "no modifiers allowed for 16-bit strings\n");
1751b6cee71dSXin LI return -1;
1752b6cee71dSXin LI }
1753b6cee71dSXin LI break;
1754b6cee71dSXin LI case FILE_STRING:
1755b6cee71dSXin LI case FILE_PSTRING:
1756b6cee71dSXin LI if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1757b6cee71dSXin LI file_magwarn(ms,
1758b6cee71dSXin LI "'/%c' only allowed on regex and search\n",
1759b6cee71dSXin LI CHAR_REGEX_OFFSET_START);
1760b6cee71dSXin LI return -1;
1761b6cee71dSXin LI }
1762b6cee71dSXin LI break;
1763b6cee71dSXin LI case FILE_SEARCH:
1764b6cee71dSXin LI if (m->str_range == 0) {
1765b6cee71dSXin LI file_magwarn(ms,
1766b6cee71dSXin LI "missing range; defaulting to %d\n",
1767b6cee71dSXin LI STRING_DEFAULT_RANGE);
1768b6cee71dSXin LI m->str_range = STRING_DEFAULT_RANGE;
1769b6cee71dSXin LI return -1;
1770b6cee71dSXin LI }
1771b6cee71dSXin LI break;
1772b6cee71dSXin LI case FILE_REGEX:
1773b6cee71dSXin LI if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1774b6cee71dSXin LI file_magwarn(ms, "'/%c' not allowed on regex\n",
1775b6cee71dSXin LI CHAR_COMPACT_WHITESPACE);
1776b6cee71dSXin LI return -1;
1777b6cee71dSXin LI }
1778b6cee71dSXin LI if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1779b6cee71dSXin LI file_magwarn(ms, "'/%c' not allowed on regex\n",
1780b6cee71dSXin LI CHAR_COMPACT_OPTIONAL_WHITESPACE);
1781b6cee71dSXin LI return -1;
1782b6cee71dSXin LI }
1783b6cee71dSXin LI break;
1784b6cee71dSXin LI default:
1785b6cee71dSXin LI file_magwarn(ms, "coding error: m->type=%d\n",
1786b6cee71dSXin LI m->type);
1787b6cee71dSXin LI return -1;
1788b6cee71dSXin LI }
1789b6cee71dSXin LI return 0;
1790b6cee71dSXin LI }
1791b6cee71dSXin LI
1792898496eeSXin LI file_private int
get_op(char c)1793b6cee71dSXin LI get_op(char c)
1794b6cee71dSXin LI {
1795b6cee71dSXin LI switch (c) {
1796b6cee71dSXin LI case '&':
1797b6cee71dSXin LI return FILE_OPAND;
1798b6cee71dSXin LI case '|':
1799b6cee71dSXin LI return FILE_OPOR;
1800b6cee71dSXin LI case '^':
1801b6cee71dSXin LI return FILE_OPXOR;
1802b6cee71dSXin LI case '+':
1803b6cee71dSXin LI return FILE_OPADD;
1804b6cee71dSXin LI case '-':
1805b6cee71dSXin LI return FILE_OPMINUS;
1806b6cee71dSXin LI case '*':
1807b6cee71dSXin LI return FILE_OPMULTIPLY;
1808b6cee71dSXin LI case '/':
1809b6cee71dSXin LI return FILE_OPDIVIDE;
1810b6cee71dSXin LI case '%':
1811b6cee71dSXin LI return FILE_OPMODULO;
1812b6cee71dSXin LI default:
1813b6cee71dSXin LI return -1;
1814b6cee71dSXin LI }
1815b6cee71dSXin LI }
1816b6cee71dSXin LI
1817b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS
1818898496eeSXin LI file_private int
get_cond(const char * l,const char ** t)1819b6cee71dSXin LI get_cond(const char *l, const char **t)
1820b6cee71dSXin LI {
1821b6cee71dSXin LI static const struct cond_tbl_s {
1822b6cee71dSXin LI char name[8];
1823b6cee71dSXin LI size_t len;
1824b6cee71dSXin LI int cond;
1825b6cee71dSXin LI } cond_tbl[] = {
1826b6cee71dSXin LI { "if", 2, COND_IF },
1827b6cee71dSXin LI { "elif", 4, COND_ELIF },
1828b6cee71dSXin LI { "else", 4, COND_ELSE },
1829b6cee71dSXin LI { "", 0, COND_NONE },
1830b6cee71dSXin LI };
1831b6cee71dSXin LI const struct cond_tbl_s *p;
1832b6cee71dSXin LI
1833b6cee71dSXin LI for (p = cond_tbl; p->len; p++) {
1834b6cee71dSXin LI if (strncmp(l, p->name, p->len) == 0 &&
183548c779cdSXin LI isspace(CAST(unsigned char, l[p->len]))) {
1836b6cee71dSXin LI if (t)
1837b6cee71dSXin LI *t = l + p->len;
1838b6cee71dSXin LI break;
1839b6cee71dSXin LI }
1840b6cee71dSXin LI }
1841b6cee71dSXin LI return p->cond;
1842b6cee71dSXin LI }
1843b6cee71dSXin LI
1844898496eeSXin LI file_private int
check_cond(struct magic_set * ms,int cond,uint32_t cont_level)1845b6cee71dSXin LI check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1846b6cee71dSXin LI {
1847b6cee71dSXin LI int last_cond;
1848b6cee71dSXin LI last_cond = ms->c.li[cont_level].last_cond;
1849b6cee71dSXin LI
1850b6cee71dSXin LI switch (cond) {
1851b6cee71dSXin LI case COND_IF:
1852b6cee71dSXin LI if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1853b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
1854b6cee71dSXin LI file_magwarn(ms, "syntax error: `if'");
1855b6cee71dSXin LI return -1;
1856b6cee71dSXin LI }
1857b6cee71dSXin LI last_cond = COND_IF;
1858b6cee71dSXin LI break;
1859b6cee71dSXin LI
1860b6cee71dSXin LI case COND_ELIF:
1861b6cee71dSXin LI if (last_cond != COND_IF && last_cond != COND_ELIF) {
1862b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
1863b6cee71dSXin LI file_magwarn(ms, "syntax error: `elif'");
1864b6cee71dSXin LI return -1;
1865b6cee71dSXin LI }
1866b6cee71dSXin LI last_cond = COND_ELIF;
1867b6cee71dSXin LI break;
1868b6cee71dSXin LI
1869b6cee71dSXin LI case COND_ELSE:
1870b6cee71dSXin LI if (last_cond != COND_IF && last_cond != COND_ELIF) {
1871b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
1872b6cee71dSXin LI file_magwarn(ms, "syntax error: `else'");
1873b6cee71dSXin LI return -1;
1874b6cee71dSXin LI }
1875b6cee71dSXin LI last_cond = COND_NONE;
1876b6cee71dSXin LI break;
1877b6cee71dSXin LI
1878b6cee71dSXin LI case COND_NONE:
1879b6cee71dSXin LI last_cond = COND_NONE;
1880b6cee71dSXin LI break;
1881b6cee71dSXin LI }
1882b6cee71dSXin LI
1883b6cee71dSXin LI ms->c.li[cont_level].last_cond = last_cond;
1884b6cee71dSXin LI return 0;
1885b6cee71dSXin LI }
1886b6cee71dSXin LI #endif /* ENABLE_CONDITIONALS */
1887b6cee71dSXin LI
1888898496eeSXin LI file_private int
parse_indirect_modifier(struct magic_set * ms,struct magic * m,const char ** lp)18894460e5b0SXin LI parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
18904460e5b0SXin LI {
18914460e5b0SXin LI const char *l = *lp;
18924460e5b0SXin LI
189348c779cdSXin LI while (!isspace(CAST(unsigned char, *++l)))
18944460e5b0SXin LI switch (*l) {
18954460e5b0SXin LI case CHAR_INDIRECT_RELATIVE:
18964460e5b0SXin LI m->str_flags |= INDIRECT_RELATIVE;
18974460e5b0SXin LI break;
18984460e5b0SXin LI default:
18994460e5b0SXin LI if (ms->flags & MAGIC_CHECK)
19004460e5b0SXin LI file_magwarn(ms, "indirect modifier `%c' "
19014460e5b0SXin LI "invalid", *l);
19024460e5b0SXin LI *lp = l;
19034460e5b0SXin LI return -1;
19044460e5b0SXin LI }
19054460e5b0SXin LI *lp = l;
19064460e5b0SXin LI return 0;
19074460e5b0SXin LI }
19084460e5b0SXin LI
1909898496eeSXin LI file_private void
parse_op_modifier(struct magic_set * ms,struct magic * m,const char ** lp,int op)19104460e5b0SXin LI parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
19114460e5b0SXin LI int op)
19124460e5b0SXin LI {
19134460e5b0SXin LI const char *l = *lp;
19144460e5b0SXin LI char *t;
19154460e5b0SXin LI uint64_t val;
19164460e5b0SXin LI
19174460e5b0SXin LI ++l;
19184460e5b0SXin LI m->mask_op |= op;
191948c779cdSXin LI val = CAST(uint64_t, strtoull(l, &t, 0));
19204460e5b0SXin LI l = t;
19214460e5b0SXin LI m->num_mask = file_signextend(ms, m, val);
19224460e5b0SXin LI eatsize(&l);
19234460e5b0SXin LI *lp = l;
19244460e5b0SXin LI }
19254460e5b0SXin LI
1926898496eeSXin LI file_private int
parse_string_modifier(struct magic_set * ms,struct magic * m,const char ** lp)19274460e5b0SXin LI parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
19284460e5b0SXin LI {
19294460e5b0SXin LI const char *l = *lp;
19304460e5b0SXin LI char *t;
19314460e5b0SXin LI int have_range = 0;
19324460e5b0SXin LI
193348c779cdSXin LI while (!isspace(CAST(unsigned char, *++l))) {
19344460e5b0SXin LI switch (*l) {
19354460e5b0SXin LI case '0': case '1': case '2':
19364460e5b0SXin LI case '3': case '4': case '5':
19374460e5b0SXin LI case '6': case '7': case '8':
19384460e5b0SXin LI case '9':
19394460e5b0SXin LI if (have_range && (ms->flags & MAGIC_CHECK))
19404460e5b0SXin LI file_magwarn(ms, "multiple ranges");
19414460e5b0SXin LI have_range = 1;
19424460e5b0SXin LI m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
19434460e5b0SXin LI if (m->str_range == 0)
19444460e5b0SXin LI file_magwarn(ms, "zero range");
19454460e5b0SXin LI l = t - 1;
19464460e5b0SXin LI break;
19474460e5b0SXin LI case CHAR_COMPACT_WHITESPACE:
19484460e5b0SXin LI m->str_flags |= STRING_COMPACT_WHITESPACE;
19494460e5b0SXin LI break;
19504460e5b0SXin LI case CHAR_COMPACT_OPTIONAL_WHITESPACE:
19514460e5b0SXin LI m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
19524460e5b0SXin LI break;
19534460e5b0SXin LI case CHAR_IGNORE_LOWERCASE:
19544460e5b0SXin LI m->str_flags |= STRING_IGNORE_LOWERCASE;
19554460e5b0SXin LI break;
19564460e5b0SXin LI case CHAR_IGNORE_UPPERCASE:
19574460e5b0SXin LI m->str_flags |= STRING_IGNORE_UPPERCASE;
19584460e5b0SXin LI break;
19594460e5b0SXin LI case CHAR_REGEX_OFFSET_START:
19604460e5b0SXin LI m->str_flags |= REGEX_OFFSET_START;
19614460e5b0SXin LI break;
19624460e5b0SXin LI case CHAR_BINTEST:
19634460e5b0SXin LI m->str_flags |= STRING_BINTEST;
19644460e5b0SXin LI break;
19654460e5b0SXin LI case CHAR_TEXTTEST:
19664460e5b0SXin LI m->str_flags |= STRING_TEXTTEST;
19674460e5b0SXin LI break;
19684460e5b0SXin LI case CHAR_TRIM:
19694460e5b0SXin LI m->str_flags |= STRING_TRIM;
19704460e5b0SXin LI break;
197143a5ec4eSXin LI case CHAR_FULL_WORD:
197243a5ec4eSXin LI m->str_flags |= STRING_FULL_WORD;
197343a5ec4eSXin LI break;
19744460e5b0SXin LI case CHAR_PSTRING_1_LE:
19754460e5b0SXin LI #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
19764460e5b0SXin LI if (m->type != FILE_PSTRING)
19774460e5b0SXin LI goto bad;
19784460e5b0SXin LI SET_LENGTH(PSTRING_1_LE);
19794460e5b0SXin LI break;
19804460e5b0SXin LI case CHAR_PSTRING_2_BE:
19814460e5b0SXin LI if (m->type != FILE_PSTRING)
19824460e5b0SXin LI goto bad;
19834460e5b0SXin LI SET_LENGTH(PSTRING_2_BE);
19844460e5b0SXin LI break;
19854460e5b0SXin LI case CHAR_PSTRING_2_LE:
19864460e5b0SXin LI if (m->type != FILE_PSTRING)
19874460e5b0SXin LI goto bad;
19884460e5b0SXin LI SET_LENGTH(PSTRING_2_LE);
19894460e5b0SXin LI break;
19904460e5b0SXin LI case CHAR_PSTRING_4_BE:
19914460e5b0SXin LI if (m->type != FILE_PSTRING)
19924460e5b0SXin LI goto bad;
19934460e5b0SXin LI SET_LENGTH(PSTRING_4_BE);
19944460e5b0SXin LI break;
19954460e5b0SXin LI case CHAR_PSTRING_4_LE:
19964460e5b0SXin LI switch (m->type) {
19974460e5b0SXin LI case FILE_PSTRING:
19984460e5b0SXin LI case FILE_REGEX:
19994460e5b0SXin LI break;
20004460e5b0SXin LI default:
20014460e5b0SXin LI goto bad;
20024460e5b0SXin LI }
20034460e5b0SXin LI SET_LENGTH(PSTRING_4_LE);
20044460e5b0SXin LI break;
20054460e5b0SXin LI case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
20064460e5b0SXin LI if (m->type != FILE_PSTRING)
20074460e5b0SXin LI goto bad;
20084460e5b0SXin LI m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
20094460e5b0SXin LI break;
20104460e5b0SXin LI default:
20114460e5b0SXin LI bad:
20124460e5b0SXin LI if (ms->flags & MAGIC_CHECK)
20134460e5b0SXin LI file_magwarn(ms, "string modifier `%c' "
20144460e5b0SXin LI "invalid", *l);
20154460e5b0SXin LI goto out;
20164460e5b0SXin LI }
20174460e5b0SXin LI /* allow multiple '/' for readability */
201848c779cdSXin LI if (l[1] == '/' && !isspace(CAST(unsigned char, l[2])))
20194460e5b0SXin LI l++;
20204460e5b0SXin LI }
20214460e5b0SXin LI if (string_modifier_check(ms, m) == -1)
20224460e5b0SXin LI goto out;
20234460e5b0SXin LI *lp = l;
20244460e5b0SXin LI return 0;
20254460e5b0SXin LI out:
20264460e5b0SXin LI *lp = l;
20274460e5b0SXin LI return -1;
20284460e5b0SXin LI }
20294460e5b0SXin LI
2030b6cee71dSXin LI /*
2031b6cee71dSXin LI * parse one line from magic file, put into magic[index++] if valid
2032b6cee71dSXin LI */
2033898496eeSXin LI file_private int
parse(struct magic_set * ms,struct magic_entry * me,const char * file,const char * line,size_t lineno,int action)2034*ae316d1dSXin LI parse(struct magic_set *ms, struct magic_entry *me, const char *file,
2035*ae316d1dSXin LI const char *line, size_t lineno, int action)
2036b6cee71dSXin LI {
2037b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS
2038b6cee71dSXin LI static uint32_t last_cont_level = 0;
2039b6cee71dSXin LI #endif
2040b6cee71dSXin LI size_t i;
2041b6cee71dSXin LI struct magic *m;
2042b6cee71dSXin LI const char *l = line;
2043b6cee71dSXin LI char *t;
2044b6cee71dSXin LI int op;
2045b6cee71dSXin LI uint32_t cont_level;
2046b6cee71dSXin LI int32_t diff;
2047b6cee71dSXin LI
2048b6cee71dSXin LI cont_level = 0;
2049b6cee71dSXin LI
2050b6cee71dSXin LI /*
2051b6cee71dSXin LI * Parse the offset.
2052b6cee71dSXin LI */
2053b6cee71dSXin LI while (*l == '>') {
2054b6cee71dSXin LI ++l; /* step over */
2055b6cee71dSXin LI cont_level++;
2056b6cee71dSXin LI }
2057b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS
2058b6cee71dSXin LI if (cont_level == 0 || cont_level > last_cont_level)
2059b6cee71dSXin LI if (file_check_mem(ms, cont_level) == -1)
2060b6cee71dSXin LI return -1;
2061b6cee71dSXin LI last_cont_level = cont_level;
2062b6cee71dSXin LI #endif
2063b6cee71dSXin LI if (cont_level != 0) {
2064b6cee71dSXin LI if (me->mp == NULL) {
2065b6cee71dSXin LI file_magerror(ms, "No current entry for continuation");
2066b6cee71dSXin LI return -1;
2067b6cee71dSXin LI }
2068b6cee71dSXin LI if (me->cont_count == 0) {
2069b6cee71dSXin LI file_magerror(ms, "Continuations present with 0 count");
2070b6cee71dSXin LI return -1;
2071b6cee71dSXin LI }
2072b6cee71dSXin LI m = &me->mp[me->cont_count - 1];
207348c779cdSXin LI diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level);
2074b6cee71dSXin LI if (diff > 1)
2075b6cee71dSXin LI file_magwarn(ms, "New continuation level %u is more "
2076b6cee71dSXin LI "than one larger than current level %u", cont_level,
2077b6cee71dSXin LI m->cont_level);
2078b6cee71dSXin LI if (me->cont_count == me->max_count) {
2079b6cee71dSXin LI struct magic *nm;
2080b6cee71dSXin LI size_t cnt = me->max_count + ALLOC_CHUNK;
2081b6cee71dSXin LI if ((nm = CAST(struct magic *, realloc(me->mp,
2082b6cee71dSXin LI sizeof(*nm) * cnt))) == NULL) {
2083b6cee71dSXin LI file_oomem(ms, sizeof(*nm) * cnt);
2084b6cee71dSXin LI return -1;
2085b6cee71dSXin LI }
208648c779cdSXin LI me->mp = nm;
2087b6cee71dSXin LI me->max_count = CAST(uint32_t, cnt);
2088b6cee71dSXin LI }
2089b6cee71dSXin LI m = &me->mp[me->cont_count++];
2090b6cee71dSXin LI (void)memset(m, 0, sizeof(*m));
2091b6cee71dSXin LI m->cont_level = cont_level;
2092b6cee71dSXin LI } else {
2093b6cee71dSXin LI static const size_t len = sizeof(*m) * ALLOC_CHUNK;
2094b6cee71dSXin LI if (me->mp != NULL)
2095b6cee71dSXin LI return 1;
2096b6cee71dSXin LI if ((m = CAST(struct magic *, malloc(len))) == NULL) {
2097b6cee71dSXin LI file_oomem(ms, len);
2098b6cee71dSXin LI return -1;
2099b6cee71dSXin LI }
2100b6cee71dSXin LI me->mp = m;
2101b6cee71dSXin LI me->max_count = ALLOC_CHUNK;
2102b6cee71dSXin LI (void)memset(m, 0, sizeof(*m));
2103b6cee71dSXin LI m->factor_op = FILE_FACTOR_OP_NONE;
2104b6cee71dSXin LI m->cont_level = 0;
2105b6cee71dSXin LI me->cont_count = 1;
2106b6cee71dSXin LI }
2107b6cee71dSXin LI m->lineno = CAST(uint32_t, lineno);
2108b6cee71dSXin LI
2109b6cee71dSXin LI if (*l == '&') { /* m->cont_level == 0 checked below. */
2110b6cee71dSXin LI ++l; /* step over */
2111b6cee71dSXin LI m->flag |= OFFADD;
2112b6cee71dSXin LI }
2113b6cee71dSXin LI if (*l == '(') {
2114b6cee71dSXin LI ++l; /* step over */
2115b6cee71dSXin LI m->flag |= INDIR;
2116b6cee71dSXin LI if (m->flag & OFFADD)
2117b6cee71dSXin LI m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
2118b6cee71dSXin LI
2119b6cee71dSXin LI if (*l == '&') { /* m->cont_level == 0 checked below */
2120b6cee71dSXin LI ++l; /* step over */
2121b6cee71dSXin LI m->flag |= OFFADD;
2122b6cee71dSXin LI }
2123b6cee71dSXin LI }
2124b6cee71dSXin LI /* Indirect offsets are not valid at level 0. */
21255f0216bdSXin LI if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
2126b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2127b6cee71dSXin LI file_magwarn(ms, "relative offset at level 0");
21285f0216bdSXin LI return -1;
21295f0216bdSXin LI }
2130b6cee71dSXin LI
2131b6cee71dSXin LI /* get offset, then skip over it */
2132*ae316d1dSXin LI if (*l == '-' || *l == '+') {
21332726a701SXin LI ++l; /* step over */
2134*ae316d1dSXin LI m->flag |= l[-1] == '-' ? OFFNEGATIVE : OFFPOSITIVE;
21352726a701SXin LI }
213648c779cdSXin LI m->offset = CAST(int32_t, strtol(l, &t, 0));
21375f0216bdSXin LI if (l == t) {
2138b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2139b6cee71dSXin LI file_magwarn(ms, "offset `%s' invalid", l);
21405f0216bdSXin LI return -1;
21415f0216bdSXin LI }
2142d38c30c0SXin LI
2143b6cee71dSXin LI l = t;
2144b6cee71dSXin LI
2145b6cee71dSXin LI if (m->flag & INDIR) {
2146b6cee71dSXin LI m->in_type = FILE_LONG;
2147b6cee71dSXin LI m->in_offset = 0;
2148a5d223e6SXin LI m->in_op = 0;
2149b6cee71dSXin LI /*
2150a5d223e6SXin LI * read [.,lbs][+-]nnnnn)
2151b6cee71dSXin LI */
2152a5d223e6SXin LI if (*l == '.' || *l == ',') {
2153a5d223e6SXin LI if (*l == ',')
2154a5d223e6SXin LI m->in_op |= FILE_OPSIGNED;
2155b6cee71dSXin LI l++;
2156b6cee71dSXin LI switch (*l) {
2157b6cee71dSXin LI case 'l':
2158b6cee71dSXin LI m->in_type = FILE_LELONG;
2159b6cee71dSXin LI break;
2160b6cee71dSXin LI case 'L':
2161b6cee71dSXin LI m->in_type = FILE_BELONG;
2162b6cee71dSXin LI break;
2163b6cee71dSXin LI case 'm':
2164b6cee71dSXin LI m->in_type = FILE_MELONG;
2165b6cee71dSXin LI break;
2166b6cee71dSXin LI case 'h':
2167b6cee71dSXin LI case 's':
2168b6cee71dSXin LI m->in_type = FILE_LESHORT;
2169b6cee71dSXin LI break;
2170b6cee71dSXin LI case 'H':
2171b6cee71dSXin LI case 'S':
2172b6cee71dSXin LI m->in_type = FILE_BESHORT;
2173b6cee71dSXin LI break;
2174b6cee71dSXin LI case 'c':
2175b6cee71dSXin LI case 'b':
2176b6cee71dSXin LI case 'C':
2177b6cee71dSXin LI case 'B':
2178b6cee71dSXin LI m->in_type = FILE_BYTE;
2179b6cee71dSXin LI break;
2180b6cee71dSXin LI case 'e':
2181b6cee71dSXin LI case 'f':
2182b6cee71dSXin LI case 'g':
2183b6cee71dSXin LI m->in_type = FILE_LEDOUBLE;
2184b6cee71dSXin LI break;
2185b6cee71dSXin LI case 'E':
2186b6cee71dSXin LI case 'F':
2187b6cee71dSXin LI case 'G':
2188b6cee71dSXin LI m->in_type = FILE_BEDOUBLE;
2189b6cee71dSXin LI break;
2190b6cee71dSXin LI case 'i':
2191b6cee71dSXin LI m->in_type = FILE_LEID3;
2192b6cee71dSXin LI break;
2193b6cee71dSXin LI case 'I':
2194b6cee71dSXin LI m->in_type = FILE_BEID3;
2195b6cee71dSXin LI break;
2196a2dfb722SXin LI case 'o':
2197a2dfb722SXin LI m->in_type = FILE_OCTAL;
2198a2dfb722SXin LI break;
21992dc4dbb9SEitan Adler case 'q':
22002dc4dbb9SEitan Adler m->in_type = FILE_LEQUAD;
22012dc4dbb9SEitan Adler break;
22022dc4dbb9SEitan Adler case 'Q':
22032dc4dbb9SEitan Adler m->in_type = FILE_BEQUAD;
22042dc4dbb9SEitan Adler break;
2205b6cee71dSXin LI default:
2206b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2207b6cee71dSXin LI file_magwarn(ms,
2208b6cee71dSXin LI "indirect offset type `%c' invalid",
2209b6cee71dSXin LI *l);
22105f0216bdSXin LI return -1;
2211b6cee71dSXin LI }
2212b6cee71dSXin LI l++;
2213b6cee71dSXin LI }
2214b6cee71dSXin LI
2215b6cee71dSXin LI if (*l == '~') {
2216b6cee71dSXin LI m->in_op |= FILE_OPINVERSE;
2217b6cee71dSXin LI l++;
2218b6cee71dSXin LI }
2219b6cee71dSXin LI if ((op = get_op(*l)) != -1) {
2220b6cee71dSXin LI m->in_op |= op;
2221b6cee71dSXin LI l++;
2222b6cee71dSXin LI }
2223b6cee71dSXin LI if (*l == '(') {
2224b6cee71dSXin LI m->in_op |= FILE_OPINDIRECT;
2225b6cee71dSXin LI l++;
2226b6cee71dSXin LI }
222748c779cdSXin LI if (isdigit(CAST(unsigned char, *l)) || *l == '-') {
222848c779cdSXin LI m->in_offset = CAST(int32_t, strtol(l, &t, 0));
22295f0216bdSXin LI if (l == t) {
2230b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2231b6cee71dSXin LI file_magwarn(ms,
2232b6cee71dSXin LI "in_offset `%s' invalid", l);
22335f0216bdSXin LI return -1;
22345f0216bdSXin LI }
2235b6cee71dSXin LI l = t;
2236b6cee71dSXin LI }
2237b6cee71dSXin LI if (*l++ != ')' ||
22385f0216bdSXin LI ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
2239b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2240b6cee71dSXin LI file_magwarn(ms,
2241b6cee71dSXin LI "missing ')' in indirect offset");
22425f0216bdSXin LI return -1;
22435f0216bdSXin LI }
2244b6cee71dSXin LI }
2245b6cee71dSXin LI EATAB;
2246b6cee71dSXin LI
2247b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS
2248b6cee71dSXin LI m->cond = get_cond(l, &l);
2249b6cee71dSXin LI if (check_cond(ms, m->cond, cont_level) == -1)
2250b6cee71dSXin LI return -1;
2251b6cee71dSXin LI
2252b6cee71dSXin LI EATAB;
2253b6cee71dSXin LI #endif
2254b6cee71dSXin LI
2255b6cee71dSXin LI /*
2256b6cee71dSXin LI * Parse the type.
2257b6cee71dSXin LI */
2258b6cee71dSXin LI if (*l == 'u') {
2259b6cee71dSXin LI /*
2260b6cee71dSXin LI * Try it as a keyword type prefixed by "u"; match what
2261b6cee71dSXin LI * follows the "u". If that fails, try it as an SUS
2262b6cee71dSXin LI * integer type.
2263b6cee71dSXin LI */
2264b6cee71dSXin LI m->type = get_type(type_tbl, l + 1, &l);
2265b6cee71dSXin LI if (m->type == FILE_INVALID) {
2266b6cee71dSXin LI /*
2267b6cee71dSXin LI * Not a keyword type; parse it as an SUS type,
2268b6cee71dSXin LI * 'u' possibly followed by a number or C/S/L.
2269b6cee71dSXin LI */
2270b6cee71dSXin LI m->type = get_standard_integer_type(l, &l);
2271b6cee71dSXin LI }
2272b6cee71dSXin LI /* It's unsigned. */
2273b6cee71dSXin LI if (m->type != FILE_INVALID)
2274b6cee71dSXin LI m->flag |= UNSIGNED;
2275b6cee71dSXin LI } else {
2276b6cee71dSXin LI /*
2277b6cee71dSXin LI * Try it as a keyword type. If that fails, try it as
2278b6cee71dSXin LI * an SUS integer type if it begins with "d" or as an
2279b6cee71dSXin LI * SUS string type if it begins with "s". In any case,
2280b6cee71dSXin LI * it's not unsigned.
2281b6cee71dSXin LI */
2282b6cee71dSXin LI m->type = get_type(type_tbl, l, &l);
2283b6cee71dSXin LI if (m->type == FILE_INVALID) {
2284b6cee71dSXin LI /*
2285b6cee71dSXin LI * Not a keyword type; parse it as an SUS type,
2286b6cee71dSXin LI * either 'd' possibly followed by a number or
2287b6cee71dSXin LI * C/S/L, or just 's'.
2288b6cee71dSXin LI */
2289b6cee71dSXin LI if (*l == 'd')
2290b6cee71dSXin LI m->type = get_standard_integer_type(l, &l);
229148c779cdSXin LI else if (*l == 's'
229248c779cdSXin LI && !isalpha(CAST(unsigned char, l[1]))) {
2293b6cee71dSXin LI m->type = FILE_STRING;
2294b6cee71dSXin LI ++l;
2295b6cee71dSXin LI }
2296b6cee71dSXin LI }
2297b6cee71dSXin LI }
2298b6cee71dSXin LI
2299b6cee71dSXin LI if (m->type == FILE_INVALID) {
2300b6cee71dSXin LI /* Not found - try it as a special keyword. */
2301b6cee71dSXin LI m->type = get_type(special_tbl, l, &l);
2302b6cee71dSXin LI }
2303b6cee71dSXin LI
2304b6cee71dSXin LI if (m->type == FILE_INVALID) {
2305b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2306b6cee71dSXin LI file_magwarn(ms, "type `%s' invalid", l);
2307b6cee71dSXin LI return -1;
2308b6cee71dSXin LI }
2309b6cee71dSXin LI
231043a5ec4eSXin LI if (m->type == FILE_NAME && cont_level != 0) {
231143a5ec4eSXin LI if (ms->flags & MAGIC_CHECK)
231243a5ec4eSXin LI file_magwarn(ms, "`name%s' entries can only be "
231343a5ec4eSXin LI "declared at top level", l);
231443a5ec4eSXin LI return -1;
231543a5ec4eSXin LI }
231643a5ec4eSXin LI
2317b6cee71dSXin LI /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2318b6cee71dSXin LI /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2319b6cee71dSXin LI
2320b6cee71dSXin LI m->mask_op = 0;
2321b6cee71dSXin LI if (*l == '~') {
2322b6cee71dSXin LI if (!IS_STRING(m->type))
2323b6cee71dSXin LI m->mask_op |= FILE_OPINVERSE;
2324b6cee71dSXin LI else if (ms->flags & MAGIC_CHECK)
2325b6cee71dSXin LI file_magwarn(ms, "'~' invalid for string types");
2326b6cee71dSXin LI ++l;
2327b6cee71dSXin LI }
2328b6cee71dSXin LI m->str_range = 0;
2329b6cee71dSXin LI m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2330b6cee71dSXin LI if ((op = get_op(*l)) != -1) {
23314460e5b0SXin LI if (IS_STRING(m->type)) {
23324460e5b0SXin LI int r;
23334460e5b0SXin LI
23344460e5b0SXin LI if (op != FILE_OPDIVIDE) {
2335b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2336b6cee71dSXin LI file_magwarn(ms,
23374460e5b0SXin LI "invalid string/indirect op: "
23384460e5b0SXin LI "`%c'", *t);
2339b6cee71dSXin LI return -1;
2340b6cee71dSXin LI }
23414460e5b0SXin LI
23424460e5b0SXin LI if (m->type == FILE_INDIRECT)
23434460e5b0SXin LI r = parse_indirect_modifier(ms, m, &l);
23444460e5b0SXin LI else
23454460e5b0SXin LI r = parse_string_modifier(ms, m, &l);
23464460e5b0SXin LI if (r == -1)
2347b6cee71dSXin LI return -1;
23484460e5b0SXin LI } else
23494460e5b0SXin LI parse_op_modifier(ms, m, &l, op);
2350b6cee71dSXin LI }
23514460e5b0SXin LI
2352b6cee71dSXin LI /*
2353b6cee71dSXin LI * We used to set mask to all 1's here, instead let's just not do
2354b6cee71dSXin LI * anything if mask = 0 (unless you have a better idea)
2355b6cee71dSXin LI */
2356b6cee71dSXin LI EATAB;
2357b6cee71dSXin LI
2358b6cee71dSXin LI switch (*l) {
2359b6cee71dSXin LI case '>':
2360b6cee71dSXin LI case '<':
2361b6cee71dSXin LI m->reln = *l;
2362b6cee71dSXin LI ++l;
2363b6cee71dSXin LI if (*l == '=') {
2364b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) {
2365b6cee71dSXin LI file_magwarn(ms, "%c= not supported",
2366b6cee71dSXin LI m->reln);
2367b6cee71dSXin LI return -1;
2368b6cee71dSXin LI }
2369b6cee71dSXin LI ++l;
2370b6cee71dSXin LI }
2371b6cee71dSXin LI break;
2372b6cee71dSXin LI /* Old-style anding: "0 byte &0x80 dynamically linked" */
2373b6cee71dSXin LI case '&':
2374b6cee71dSXin LI case '^':
2375b6cee71dSXin LI case '=':
2376b6cee71dSXin LI m->reln = *l;
2377b6cee71dSXin LI ++l;
2378b6cee71dSXin LI if (*l == '=') {
2379b6cee71dSXin LI /* HP compat: ignore &= etc. */
2380b6cee71dSXin LI ++l;
2381b6cee71dSXin LI }
2382b6cee71dSXin LI break;
2383b6cee71dSXin LI case '!':
2384b6cee71dSXin LI m->reln = *l;
2385b6cee71dSXin LI ++l;
2386b6cee71dSXin LI break;
2387b6cee71dSXin LI default:
2388b6cee71dSXin LI m->reln = '='; /* the default relation */
238948c779cdSXin LI if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) &&
239048c779cdSXin LI isspace(CAST(unsigned char, l[1]))) || !l[1])) {
2391b6cee71dSXin LI m->reln = *l;
2392b6cee71dSXin LI ++l;
2393b6cee71dSXin LI }
2394b6cee71dSXin LI break;
2395b6cee71dSXin LI }
2396b6cee71dSXin LI /*
2397b6cee71dSXin LI * Grab the value part, except for an 'x' reln.
2398b6cee71dSXin LI */
2399b6cee71dSXin LI if (m->reln != 'x' && getvalue(ms, m, &l, action))
2400b6cee71dSXin LI return -1;
2401b6cee71dSXin LI
2402b6cee71dSXin LI /*
2403b6cee71dSXin LI * TODO finish this macro and start using it!
24043e41d09dSXin LI * #define offsetcheck {if (offset > ms->bytes_max -1)
2405b6cee71dSXin LI * magwarn("offset too big"); }
2406b6cee71dSXin LI */
2407b6cee71dSXin LI
2408b6cee71dSXin LI /*
2409b6cee71dSXin LI * Now get last part - the description
2410b6cee71dSXin LI */
2411b6cee71dSXin LI EATAB;
2412b6cee71dSXin LI if (l[0] == '\b') {
2413b6cee71dSXin LI ++l;
2414b6cee71dSXin LI m->flag |= NOSPACE;
2415b6cee71dSXin LI } else if ((l[0] == '\\') && (l[1] == 'b')) {
2416b6cee71dSXin LI ++l;
2417b6cee71dSXin LI ++l;
2418b6cee71dSXin LI m->flag |= NOSPACE;
2419b6cee71dSXin LI }
2420b6cee71dSXin LI for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2421b6cee71dSXin LI continue;
2422*ae316d1dSXin LI if (m->desc[0] == '\0') {
2423*ae316d1dSXin LI // Tuck in the filename for debugging.
2424*ae316d1dSXin LI strlcpy(m->desc + 1, file, sizeof(m->desc) - 1);
2425*ae316d1dSXin LI }
2426b6cee71dSXin LI if (i == sizeof(m->desc)) {
2427b6cee71dSXin LI m->desc[sizeof(m->desc) - 1] = '\0';
2428b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2429b6cee71dSXin LI file_magwarn(ms, "description `%s' truncated", m->desc);
2430b6cee71dSXin LI }
2431b6cee71dSXin LI
2432b6cee71dSXin LI /*
2433b6cee71dSXin LI * We only do this check while compiling, or if any of the magic
2434b6cee71dSXin LI * files were not compiled.
2435b6cee71dSXin LI */
2436b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) {
2437b6cee71dSXin LI if (check_format(ms, m) == -1)
2438b6cee71dSXin LI return -1;
2439b6cee71dSXin LI }
2440b6cee71dSXin LI #ifndef COMPILE_ONLY
2441b6cee71dSXin LI if (action == FILE_CHECK) {
2442b6cee71dSXin LI file_mdump(m);
2443b6cee71dSXin LI }
2444b6cee71dSXin LI #endif
2445b6cee71dSXin LI m->mimetype[0] = '\0'; /* initialise MIME type to none */
2446b6cee71dSXin LI return 0;
2447b6cee71dSXin LI }
2448b6cee71dSXin LI
2449b6cee71dSXin LI /*
2450b6cee71dSXin LI * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2451b6cee71dSXin LI * if valid
2452b6cee71dSXin LI */
2453a2dfb722SXin LI /*ARGSUSED*/
2454898496eeSXin LI file_private int
parse_strength(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)24552726a701SXin LI parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
24562726a701SXin LI size_t len __attribute__((__unused__)))
2457b6cee71dSXin LI {
2458b6cee71dSXin LI const char *l = line;
2459b6cee71dSXin LI char *el;
2460b6cee71dSXin LI unsigned long factor;
2461898496eeSXin LI char sbuf[512];
2462b6cee71dSXin LI struct magic *m = &me->mp[0];
2463b6cee71dSXin LI
2464b6cee71dSXin LI if (m->factor_op != FILE_FACTOR_OP_NONE) {
2465b6cee71dSXin LI file_magwarn(ms,
2466b6cee71dSXin LI "Current entry already has a strength type: %c %d",
2467b6cee71dSXin LI m->factor_op, m->factor);
2468b6cee71dSXin LI return -1;
2469b6cee71dSXin LI }
2470b6cee71dSXin LI if (m->type == FILE_NAME) {
2471b6cee71dSXin LI file_magwarn(ms, "%s: Strength setting is not supported in "
2472898496eeSXin LI "\"name\" magic entries",
2473898496eeSXin LI file_printable(ms, sbuf, sizeof(sbuf), m->value.s,
2474898496eeSXin LI sizeof(m->value.s)));
2475b6cee71dSXin LI return -1;
2476b6cee71dSXin LI }
2477b6cee71dSXin LI EATAB;
2478b6cee71dSXin LI switch (*l) {
2479b6cee71dSXin LI case FILE_FACTOR_OP_NONE:
2480898496eeSXin LI break;
2481b6cee71dSXin LI case FILE_FACTOR_OP_PLUS:
2482b6cee71dSXin LI case FILE_FACTOR_OP_MINUS:
2483b6cee71dSXin LI case FILE_FACTOR_OP_TIMES:
2484b6cee71dSXin LI case FILE_FACTOR_OP_DIV:
2485b6cee71dSXin LI m->factor_op = *l++;
2486b6cee71dSXin LI break;
2487b6cee71dSXin LI default:
2488b6cee71dSXin LI file_magwarn(ms, "Unknown factor op `%c'", *l);
2489b6cee71dSXin LI return -1;
2490b6cee71dSXin LI }
2491b6cee71dSXin LI EATAB;
2492b6cee71dSXin LI factor = strtoul(l, &el, 0);
2493b6cee71dSXin LI if (factor > 255) {
2494b6cee71dSXin LI file_magwarn(ms, "Too large factor `%lu'", factor);
2495b6cee71dSXin LI goto out;
2496b6cee71dSXin LI }
249748c779cdSXin LI if (*el && !isspace(CAST(unsigned char, *el))) {
2498b6cee71dSXin LI file_magwarn(ms, "Bad factor `%s'", l);
2499b6cee71dSXin LI goto out;
2500b6cee71dSXin LI }
250148c779cdSXin LI m->factor = CAST(uint8_t, factor);
2502b6cee71dSXin LI if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2503b6cee71dSXin LI file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2504b6cee71dSXin LI m->factor_op, m->factor);
2505b6cee71dSXin LI goto out;
2506b6cee71dSXin LI }
2507b6cee71dSXin LI return 0;
2508b6cee71dSXin LI out:
2509b6cee71dSXin LI m->factor_op = FILE_FACTOR_OP_NONE;
2510b6cee71dSXin LI m->factor = 0;
2511b6cee71dSXin LI return -1;
2512b6cee71dSXin LI }
2513b6cee71dSXin LI
2514898496eeSXin LI file_private int
goodchar(unsigned char x,const char * extra)2515c2931133SXin LI goodchar(unsigned char x, const char *extra)
2516c2931133SXin LI {
2517c2931133SXin LI return (isascii(x) && isalnum(x)) || strchr(extra, x);
2518c2931133SXin LI }
2519c2931133SXin LI
2520898496eeSXin LI file_private int
parse_extra(struct magic_set * ms,struct magic_entry * me,const char * line,size_t llen,off_t off,size_t len,const char * name,const char * extra,int nt)2521b6cee71dSXin LI parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
25222726a701SXin LI size_t llen, off_t off, size_t len, const char *name, const char *extra,
25232726a701SXin LI int nt)
2524b6cee71dSXin LI {
2525b6cee71dSXin LI size_t i;
2526b6cee71dSXin LI const char *l = line;
2527b6cee71dSXin LI struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
25285f0216bdSXin LI char *buf = CAST(char *, CAST(void *, m)) + off;
2529b6cee71dSXin LI
2530b6cee71dSXin LI if (buf[0] != '\0') {
2531b6cee71dSXin LI len = nt ? strlen(buf) : len;
2532b6cee71dSXin LI file_magwarn(ms, "Current entry already has a %s type "
253348c779cdSXin LI "`%.*s', new type `%s'", name, CAST(int, len), buf, l);
2534b6cee71dSXin LI return -1;
2535b6cee71dSXin LI }
2536b6cee71dSXin LI
2537b6cee71dSXin LI if (*m->desc == '\0') {
2538b6cee71dSXin LI file_magwarn(ms, "Current entry does not yet have a "
2539b6cee71dSXin LI "description for adding a %s type", name);
2540b6cee71dSXin LI return -1;
2541b6cee71dSXin LI }
2542b6cee71dSXin LI
2543b6cee71dSXin LI EATAB;
25442726a701SXin LI for (i = 0; *l && i < llen && i < len && goodchar(*l, extra);
25452726a701SXin LI buf[i++] = *l++)
2546b6cee71dSXin LI continue;
2547b6cee71dSXin LI
2548b6cee71dSXin LI if (i == len && *l) {
2549b6cee71dSXin LI if (nt)
2550b6cee71dSXin LI buf[len - 1] = '\0';
2551b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2552b6cee71dSXin LI file_magwarn(ms, "%s type `%s' truncated %"
2553b6cee71dSXin LI SIZE_T_FORMAT "u", name, line, i);
2554b6cee71dSXin LI } else {
255548c779cdSXin LI if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra))
2556c2931133SXin LI file_magwarn(ms, "%s type `%s' has bad char '%c'",
2557c2931133SXin LI name, line, *l);
2558b6cee71dSXin LI if (nt)
2559b6cee71dSXin LI buf[i] = '\0';
2560b6cee71dSXin LI }
2561b6cee71dSXin LI
2562b6cee71dSXin LI if (i > 0)
2563b6cee71dSXin LI return 0;
2564c2931133SXin LI
2565c2931133SXin LI file_magerror(ms, "Bad magic entry '%s'", line);
2566b6cee71dSXin LI return -1;
2567b6cee71dSXin LI }
2568b6cee71dSXin LI
2569b6cee71dSXin LI /*
2570b6cee71dSXin LI * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2571b6cee71dSXin LI * magic[index - 1]
2572b6cee71dSXin LI */
2573898496eeSXin LI file_private int
parse_apple(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)25742726a701SXin LI parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line,
25752726a701SXin LI size_t len)
2576b6cee71dSXin LI {
25772726a701SXin LI return parse_extra(ms, me, line, len,
25785f0216bdSXin LI CAST(off_t, offsetof(struct magic, apple)),
2579*ae316d1dSXin LI sizeof(me->mp[me->cont_count - 1].apple), "APPLE", "!+-./?", 0);
2580b6cee71dSXin LI }
2581b6cee71dSXin LI
2582b6cee71dSXin LI /*
25835f0216bdSXin LI * Parse a comma-separated list of extensions
25845f0216bdSXin LI */
2585898496eeSXin LI file_private int
parse_ext(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)25862726a701SXin LI parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line,
25872726a701SXin LI size_t len)
25885f0216bdSXin LI {
25892726a701SXin LI return parse_extra(ms, me, line, len,
25905f0216bdSXin LI CAST(off_t, offsetof(struct magic, ext)),
2591*ae316d1dSXin LI sizeof(me->mp[me->cont_count - 1].ext), "EXTENSION", ",!+-/@?_$&~",
2592*ae316d1dSXin LI 0);
2593898496eeSXin LI /* & for b&w */
2594898496eeSXin LI /* ~ for journal~ */
25955f0216bdSXin LI }
25965f0216bdSXin LI
25975f0216bdSXin LI /*
2598b6cee71dSXin LI * parse a MIME annotation line from magic file, put into magic[index - 1]
2599b6cee71dSXin LI * if valid
2600b6cee71dSXin LI */
2601898496eeSXin LI file_private int
parse_mime(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)26022726a701SXin LI parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line,
26032726a701SXin LI size_t len)
2604b6cee71dSXin LI {
26052726a701SXin LI return parse_extra(ms, me, line, len,
26065f0216bdSXin LI CAST(off_t, offsetof(struct magic, mimetype)),
2607*ae316d1dSXin LI sizeof(me->mp[me->cont_count - 1].mimetype), "MIME", "+-/.$?:{};=",
2608*ae316d1dSXin LI 1);
2609b6cee71dSXin LI }
2610b6cee71dSXin LI
2611898496eeSXin LI file_private int
check_format_type(const char * ptr,int type,const char ** estr)26123e41d09dSXin LI check_format_type(const char *ptr, int type, const char **estr)
2613b6cee71dSXin LI {
2614b6cee71dSXin LI int quad = 0, h;
26153e41d09dSXin LI size_t len, cnt;
2616b6cee71dSXin LI if (*ptr == '\0') {
2617b6cee71dSXin LI /* Missing format string; bad */
26183e41d09dSXin LI *estr = "missing format spec";
2619b6cee71dSXin LI return -1;
2620b6cee71dSXin LI }
2621b6cee71dSXin LI
2622b6cee71dSXin LI switch (file_formats[type]) {
2623b6cee71dSXin LI case FILE_FMT_QUAD:
2624b6cee71dSXin LI quad = 1;
2625b6cee71dSXin LI /*FALLTHROUGH*/
2626b6cee71dSXin LI case FILE_FMT_NUM:
2627b6cee71dSXin LI if (quad == 0) {
2628b6cee71dSXin LI switch (type) {
2629b6cee71dSXin LI case FILE_BYTE:
2630b6cee71dSXin LI h = 2;
2631b6cee71dSXin LI break;
2632b6cee71dSXin LI case FILE_SHORT:
2633b6cee71dSXin LI case FILE_BESHORT:
2634b6cee71dSXin LI case FILE_LESHORT:
2635b6cee71dSXin LI h = 1;
2636b6cee71dSXin LI break;
2637b6cee71dSXin LI case FILE_LONG:
2638b6cee71dSXin LI case FILE_BELONG:
2639b6cee71dSXin LI case FILE_LELONG:
2640b6cee71dSXin LI case FILE_MELONG:
2641b6cee71dSXin LI case FILE_LEID3:
2642b6cee71dSXin LI case FILE_BEID3:
2643b6cee71dSXin LI case FILE_INDIRECT:
2644b6cee71dSXin LI h = 0;
2645b6cee71dSXin LI break;
2646b6cee71dSXin LI default:
2647a4d6d3b8SXin LI fprintf(stderr, "Bad number format %d", type);
2648b6cee71dSXin LI abort();
2649b6cee71dSXin LI }
2650b6cee71dSXin LI } else
2651b6cee71dSXin LI h = 0;
265243a5ec4eSXin LI while (*ptr && strchr("-.#", *ptr) != NULL)
265340427ccaSGordon Tetlow ptr++;
26543e41d09dSXin LI #define CHECKLEN() do { \
265548c779cdSXin LI for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \
26563e41d09dSXin LI len = len * 10 + (*ptr - '0'); \
26573e41d09dSXin LI if (cnt > 5 || len > 1024) \
26583e41d09dSXin LI goto toolong; \
26593e41d09dSXin LI } while (/*CONSTCOND*/0)
26603e41d09dSXin LI
26613e41d09dSXin LI CHECKLEN();
2662b6cee71dSXin LI if (*ptr == '.')
2663b6cee71dSXin LI ptr++;
26643e41d09dSXin LI CHECKLEN();
2665b6cee71dSXin LI if (quad) {
2666b6cee71dSXin LI if (*ptr++ != 'l')
26673e41d09dSXin LI goto invalid;
2668b6cee71dSXin LI if (*ptr++ != 'l')
26693e41d09dSXin LI goto invalid;
2670b6cee71dSXin LI }
2671b6cee71dSXin LI
2672b6cee71dSXin LI switch (*ptr++) {
2673b6cee71dSXin LI #ifdef STRICT_FORMAT /* "long" formats are int formats for us */
2674b6cee71dSXin LI /* so don't accept the 'l' modifier */
2675b6cee71dSXin LI case 'l':
2676b6cee71dSXin LI switch (*ptr++) {
2677b6cee71dSXin LI case 'i':
2678b6cee71dSXin LI case 'd':
2679b6cee71dSXin LI case 'u':
2680b6cee71dSXin LI case 'o':
2681b6cee71dSXin LI case 'x':
2682b6cee71dSXin LI case 'X':
26833e41d09dSXin LI if (h == 0)
26843e41d09dSXin LI return 0;
26853e41d09dSXin LI /*FALLTHROUGH*/
2686b6cee71dSXin LI default:
26873e41d09dSXin LI goto invalid;
2688b6cee71dSXin LI }
2689b6cee71dSXin LI
2690b6cee71dSXin LI /*
2691b6cee71dSXin LI * Don't accept h and hh modifiers. They make writing
2692b6cee71dSXin LI * magic entries more complicated, for very little benefit
2693b6cee71dSXin LI */
2694b6cee71dSXin LI case 'h':
2695b6cee71dSXin LI if (h-- <= 0)
26963e41d09dSXin LI goto invalid;
2697b6cee71dSXin LI switch (*ptr++) {
2698b6cee71dSXin LI case 'h':
2699b6cee71dSXin LI if (h-- <= 0)
27003e41d09dSXin LI goto invalid;
2701b6cee71dSXin LI switch (*ptr++) {
2702b6cee71dSXin LI case 'i':
2703b6cee71dSXin LI case 'd':
2704b6cee71dSXin LI case 'u':
2705b6cee71dSXin LI case 'o':
2706b6cee71dSXin LI case 'x':
2707b6cee71dSXin LI case 'X':
2708b6cee71dSXin LI return 0;
2709b6cee71dSXin LI default:
27103e41d09dSXin LI goto invalid;
2711b6cee71dSXin LI }
2712b6cee71dSXin LI case 'i':
2713b6cee71dSXin LI case 'd':
2714b6cee71dSXin LI case 'u':
2715b6cee71dSXin LI case 'o':
2716b6cee71dSXin LI case 'x':
2717b6cee71dSXin LI case 'X':
27183e41d09dSXin LI if (h == 0)
27193e41d09dSXin LI return 0;
27203e41d09dSXin LI /*FALLTHROUGH*/
2721b6cee71dSXin LI default:
27223e41d09dSXin LI goto invalid;
2723b6cee71dSXin LI }
2724b6cee71dSXin LI #endif
2725b6cee71dSXin LI case 'c':
27263e41d09dSXin LI if (h == 2)
27273e41d09dSXin LI return 0;
27283e41d09dSXin LI goto invalid;
2729b6cee71dSXin LI case 'i':
2730b6cee71dSXin LI case 'd':
2731b6cee71dSXin LI case 'u':
2732b6cee71dSXin LI case 'o':
2733b6cee71dSXin LI case 'x':
2734b6cee71dSXin LI case 'X':
2735b6cee71dSXin LI #ifdef STRICT_FORMAT
27363e41d09dSXin LI if (h == 0)
27373e41d09dSXin LI return 0;
27383e41d09dSXin LI /*FALLTHROUGH*/
2739b6cee71dSXin LI #else
2740b6cee71dSXin LI return 0;
2741b6cee71dSXin LI #endif
2742b6cee71dSXin LI default:
27433e41d09dSXin LI goto invalid;
2744b6cee71dSXin LI }
2745b6cee71dSXin LI
2746b6cee71dSXin LI case FILE_FMT_FLOAT:
2747b6cee71dSXin LI case FILE_FMT_DOUBLE:
2748b6cee71dSXin LI if (*ptr == '-')
2749b6cee71dSXin LI ptr++;
2750b6cee71dSXin LI if (*ptr == '.')
2751b6cee71dSXin LI ptr++;
27523e41d09dSXin LI CHECKLEN();
2753b6cee71dSXin LI if (*ptr == '.')
2754b6cee71dSXin LI ptr++;
27553e41d09dSXin LI CHECKLEN();
2756b6cee71dSXin LI switch (*ptr++) {
2757b6cee71dSXin LI case 'e':
2758b6cee71dSXin LI case 'E':
2759b6cee71dSXin LI case 'f':
2760b6cee71dSXin LI case 'F':
2761b6cee71dSXin LI case 'g':
2762b6cee71dSXin LI case 'G':
2763b6cee71dSXin LI return 0;
2764b6cee71dSXin LI
2765b6cee71dSXin LI default:
27663e41d09dSXin LI goto invalid;
2767b6cee71dSXin LI }
2768b6cee71dSXin LI
2769b6cee71dSXin LI
2770b6cee71dSXin LI case FILE_FMT_STR:
2771b6cee71dSXin LI if (*ptr == '-')
2772b6cee71dSXin LI ptr++;
277348c779cdSXin LI while (isdigit(CAST(unsigned char, *ptr)))
2774b6cee71dSXin LI ptr++;
2775b6cee71dSXin LI if (*ptr == '.') {
2776b6cee71dSXin LI ptr++;
277748c779cdSXin LI while (isdigit(CAST(unsigned char , *ptr)))
2778b6cee71dSXin LI ptr++;
2779b6cee71dSXin LI }
2780b6cee71dSXin LI
2781b6cee71dSXin LI switch (*ptr++) {
2782b6cee71dSXin LI case 's':
2783b6cee71dSXin LI return 0;
2784b6cee71dSXin LI default:
27853e41d09dSXin LI goto invalid;
2786b6cee71dSXin LI }
2787b6cee71dSXin LI
2788b6cee71dSXin LI default:
2789b6cee71dSXin LI /* internal error */
2790a4d6d3b8SXin LI fprintf(stderr, "Bad file format %d", type);
2791b6cee71dSXin LI abort();
2792b6cee71dSXin LI }
27933e41d09dSXin LI invalid:
27943e41d09dSXin LI *estr = "not valid";
2795898496eeSXin LI return -1;
27963e41d09dSXin LI toolong:
27973e41d09dSXin LI *estr = "too long";
2798b6cee71dSXin LI return -1;
2799b6cee71dSXin LI }
2800b6cee71dSXin LI
2801b6cee71dSXin LI /*
2802b6cee71dSXin LI * Check that the optional printf format in description matches
2803b6cee71dSXin LI * the type of the magic.
2804b6cee71dSXin LI */
2805898496eeSXin LI file_private int
check_format(struct magic_set * ms,struct magic * m)2806b6cee71dSXin LI check_format(struct magic_set *ms, struct magic *m)
2807b6cee71dSXin LI {
2808b6cee71dSXin LI char *ptr;
28093e41d09dSXin LI const char *estr;
2810b6cee71dSXin LI
2811b6cee71dSXin LI for (ptr = m->desc; *ptr; ptr++)
2812b6cee71dSXin LI if (*ptr == '%')
2813b6cee71dSXin LI break;
2814b6cee71dSXin LI if (*ptr == '\0') {
2815b6cee71dSXin LI /* No format string; ok */
2816b6cee71dSXin LI return 1;
2817b6cee71dSXin LI }
2818b6cee71dSXin LI
2819b6cee71dSXin LI assert(file_nformats == file_nnames);
2820b6cee71dSXin LI
2821b6cee71dSXin LI if (m->type >= file_nformats) {
2822b6cee71dSXin LI file_magwarn(ms, "Internal error inconsistency between "
2823b6cee71dSXin LI "m->type and format strings");
2824b6cee71dSXin LI return -1;
2825b6cee71dSXin LI }
2826b6cee71dSXin LI if (file_formats[m->type] == FILE_FMT_NONE) {
2827b6cee71dSXin LI file_magwarn(ms, "No format string for `%s' with description "
2828b6cee71dSXin LI "`%s'", m->desc, file_names[m->type]);
2829b6cee71dSXin LI return -1;
2830b6cee71dSXin LI }
2831b6cee71dSXin LI
2832b6cee71dSXin LI ptr++;
28333e41d09dSXin LI if (check_format_type(ptr, m->type, &estr) == -1) {
2834b6cee71dSXin LI /*
2835b6cee71dSXin LI * TODO: this error message is unhelpful if the format
2836b6cee71dSXin LI * string is not one character long
2837b6cee71dSXin LI */
28383e41d09dSXin LI file_magwarn(ms, "Printf format is %s for type "
28393e41d09dSXin LI "`%s' in description `%s'", estr,
2840b6cee71dSXin LI file_names[m->type], m->desc);
2841b6cee71dSXin LI return -1;
2842b6cee71dSXin LI }
2843b6cee71dSXin LI
2844b6cee71dSXin LI for (; *ptr; ptr++) {
2845b6cee71dSXin LI if (*ptr == '%') {
2846b6cee71dSXin LI file_magwarn(ms,
2847b6cee71dSXin LI "Too many format strings (should have at most one) "
2848b6cee71dSXin LI "for `%s' with description `%s'",
2849b6cee71dSXin LI file_names[m->type], m->desc);
2850b6cee71dSXin LI return -1;
2851b6cee71dSXin LI }
2852b6cee71dSXin LI }
2853b6cee71dSXin LI return 0;
2854b6cee71dSXin LI }
2855b6cee71dSXin LI
2856b6cee71dSXin LI /*
2857b6cee71dSXin LI * Read a numeric value from a pointer, into the value union of a magic
2858b6cee71dSXin LI * pointer, according to the magic type. Update the string pointer to point
2859b6cee71dSXin LI * just after the number read. Return 0 for success, non-zero for failure.
2860b6cee71dSXin LI */
2861898496eeSXin LI file_private int
getvalue(struct magic_set * ms,struct magic * m,const char ** p,int action)2862b6cee71dSXin LI getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2863b6cee71dSXin LI {
286458a0f0d0SEitan Adler char *ep;
286558a0f0d0SEitan Adler uint64_t ull;
2866898496eeSXin LI int y;
286758a0f0d0SEitan Adler
2868b6cee71dSXin LI switch (m->type) {
2869b6cee71dSXin LI case FILE_BESTRING16:
2870b6cee71dSXin LI case FILE_LESTRING16:
2871b6cee71dSXin LI case FILE_STRING:
2872b6cee71dSXin LI case FILE_PSTRING:
2873b6cee71dSXin LI case FILE_REGEX:
2874b6cee71dSXin LI case FILE_SEARCH:
2875b6cee71dSXin LI case FILE_NAME:
2876b6cee71dSXin LI case FILE_USE:
28773e41d09dSXin LI case FILE_DER:
2878a2dfb722SXin LI case FILE_OCTAL:
2879b6cee71dSXin LI *p = getstr(ms, m, *p, action == FILE_COMPILE);
2880b6cee71dSXin LI if (*p == NULL) {
2881b6cee71dSXin LI if (ms->flags & MAGIC_CHECK)
2882b6cee71dSXin LI file_magwarn(ms, "cannot get string from `%s'",
2883b6cee71dSXin LI m->value.s);
2884b6cee71dSXin LI return -1;
2885b6cee71dSXin LI }
2886b6cee71dSXin LI if (m->type == FILE_REGEX) {
2887b6cee71dSXin LI file_regex_t rx;
2888898496eeSXin LI int rc =
2889898496eeSXin LI file_regcomp(ms, &rx, m->value.s, REG_EXTENDED);
2890a4d6d3b8SXin LI if (rc == 0) {
2891b6cee71dSXin LI file_regfree(&rx);
2892a4d6d3b8SXin LI }
2893b6cee71dSXin LI return rc ? -1 : 0;
2894b6cee71dSXin LI }
2895b6cee71dSXin LI return 0;
289658a0f0d0SEitan Adler default:
289758a0f0d0SEitan Adler if (m->reln == 'x')
289858a0f0d0SEitan Adler return 0;
289958a0f0d0SEitan Adler break;
290058a0f0d0SEitan Adler }
290158a0f0d0SEitan Adler
290258a0f0d0SEitan Adler switch (m->type) {
2903b6cee71dSXin LI case FILE_FLOAT:
2904b6cee71dSXin LI case FILE_BEFLOAT:
2905b6cee71dSXin LI case FILE_LEFLOAT:
29069ce06829SXin LI errno = 0;
2907b6cee71dSXin LI #ifdef HAVE_STRTOF
2908b6cee71dSXin LI m->value.f = strtof(*p, &ep);
2909b6cee71dSXin LI #else
2910b6cee71dSXin LI m->value.f = (float)strtod(*p, &ep);
2911b6cee71dSXin LI #endif
29129ce06829SXin LI if (errno == 0)
2913b6cee71dSXin LI *p = ep;
2914b6cee71dSXin LI return 0;
2915b6cee71dSXin LI case FILE_DOUBLE:
2916b6cee71dSXin LI case FILE_BEDOUBLE:
2917b6cee71dSXin LI case FILE_LEDOUBLE:
29189ce06829SXin LI errno = 0;
2919b6cee71dSXin LI m->value.d = strtod(*p, &ep);
29209ce06829SXin LI if (errno == 0)
2921b6cee71dSXin LI *p = ep;
2922b6cee71dSXin LI return 0;
29232726a701SXin LI case FILE_GUID:
29242726a701SXin LI if (file_parse_guid(*p, m->value.guid) == -1)
29252726a701SXin LI return -1;
29262726a701SXin LI *p += FILE_GUID_SIZE - 1;
29272726a701SXin LI return 0;
2928b6cee71dSXin LI default:
29299ce06829SXin LI errno = 0;
293048c779cdSXin LI ull = CAST(uint64_t, strtoull(*p, &ep, 0));
293140427ccaSGordon Tetlow m->value.q = file_signextend(ms, m, ull);
293240427ccaSGordon Tetlow if (*p == ep) {
293343a5ec4eSXin LI file_magwarn(ms, "Unparsable number `%s'", *p);
2934898496eeSXin LI return -1;
293540427ccaSGordon Tetlow } else {
293640427ccaSGordon Tetlow size_t ts = typesize(m->type);
293740427ccaSGordon Tetlow uint64_t x;
293840427ccaSGordon Tetlow const char *q;
293940427ccaSGordon Tetlow
29402726a701SXin LI if (ts == FILE_BADSIZE) {
294158a0f0d0SEitan Adler file_magwarn(ms,
294258a0f0d0SEitan Adler "Expected numeric type got `%s'",
294340427ccaSGordon Tetlow type_tbl[m->type].name);
2944898496eeSXin LI return -1;
294540427ccaSGordon Tetlow }
294648c779cdSXin LI for (q = *p; isspace(CAST(unsigned char, *q)); q++)
294740427ccaSGordon Tetlow continue;
2948898496eeSXin LI if (*q == '-' && ull != UINT64_MAX)
294948c779cdSXin LI ull = -CAST(int64_t, ull);
295040427ccaSGordon Tetlow switch (ts) {
295140427ccaSGordon Tetlow case 1:
295248c779cdSXin LI x = CAST(uint64_t, ull & ~0xffULL);
2953898496eeSXin LI y = (x & ~0xffULL) != ~0xffULL;
295440427ccaSGordon Tetlow break;
295540427ccaSGordon Tetlow case 2:
295648c779cdSXin LI x = CAST(uint64_t, ull & ~0xffffULL);
2957898496eeSXin LI y = (x & ~0xffffULL) != ~0xffffULL;
295840427ccaSGordon Tetlow break;
295940427ccaSGordon Tetlow case 4:
296048c779cdSXin LI x = CAST(uint64_t, ull & ~0xffffffffULL);
2961898496eeSXin LI y = (x & ~0xffffffffULL) != ~0xffffffffULL;
296240427ccaSGordon Tetlow break;
296340427ccaSGordon Tetlow case 8:
296440427ccaSGordon Tetlow x = 0;
2965898496eeSXin LI y = 0;
296640427ccaSGordon Tetlow break;
296740427ccaSGordon Tetlow default:
2968a4d6d3b8SXin LI fprintf(stderr, "Bad width %zu", ts);
296940427ccaSGordon Tetlow abort();
297040427ccaSGordon Tetlow }
2971898496eeSXin LI if (x && y) {
297258a0f0d0SEitan Adler file_magwarn(ms, "Overflow for numeric"
297358a0f0d0SEitan Adler " type `%s' value %#" PRIx64,
297440427ccaSGordon Tetlow type_tbl[m->type].name, ull);
2975898496eeSXin LI return -1;
297640427ccaSGordon Tetlow }
297740427ccaSGordon Tetlow }
29789ce06829SXin LI if (errno == 0) {
2979b6cee71dSXin LI *p = ep;
2980b6cee71dSXin LI eatsize(p);
2981b6cee71dSXin LI }
2982b6cee71dSXin LI return 0;
2983b6cee71dSXin LI }
2984b6cee71dSXin LI }
2985b6cee71dSXin LI
2986b6cee71dSXin LI /*
2987b6cee71dSXin LI * Convert a string containing C character escapes. Stop at an unescaped
2988b6cee71dSXin LI * space or tab.
2989b6cee71dSXin LI * Copy the converted version to "m->value.s", and the length in m->vallen.
2990b6cee71dSXin LI * Return updated scan pointer as function result. Warn if set.
2991b6cee71dSXin LI */
2992898496eeSXin LI file_private const char *
getstr(struct magic_set * ms,struct magic * m,const char * s,int warn)2993b6cee71dSXin LI getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2994b6cee71dSXin LI {
2995b6cee71dSXin LI const char *origs = s;
2996b6cee71dSXin LI char *p = m->value.s;
2997b6cee71dSXin LI size_t plen = sizeof(m->value.s);
2998b6cee71dSXin LI char *origp = p;
2999b6cee71dSXin LI char *pmax = p + plen - 1;
3000b6cee71dSXin LI int c;
3001b6cee71dSXin LI int val;
3002a4d6d3b8SXin LI size_t bracket_nesting = 0;
3003b6cee71dSXin LI
3004b6cee71dSXin LI while ((c = *s++) != '\0') {
300548c779cdSXin LI if (isspace(CAST(unsigned char, c)))
3006b6cee71dSXin LI break;
3007b6cee71dSXin LI if (p >= pmax) {
3008b6cee71dSXin LI file_error(ms, 0, "string too long: `%s'", origs);
3009b6cee71dSXin LI return NULL;
3010b6cee71dSXin LI }
3011a4d6d3b8SXin LI if (c != '\\') {
3012a4d6d3b8SXin LI if (c == '[') {
3013a4d6d3b8SXin LI bracket_nesting++;
3014a4d6d3b8SXin LI }
3015a4d6d3b8SXin LI if (c == ']' && bracket_nesting > 0) {
3016a4d6d3b8SXin LI bracket_nesting--;
3017a4d6d3b8SXin LI }
3018a4d6d3b8SXin LI *p++ = CAST(char, c);
3019a4d6d3b8SXin LI continue;
3020a4d6d3b8SXin LI }
3021b6cee71dSXin LI switch(c = *s++) {
3022b6cee71dSXin LI
3023b6cee71dSXin LI case '\0':
3024b6cee71dSXin LI if (warn)
3025b6cee71dSXin LI file_magwarn(ms, "incomplete escape");
30269ce06829SXin LI s--;
3027b6cee71dSXin LI goto out;
3028a4d6d3b8SXin LI case '.':
3029a4d6d3b8SXin LI if (m->type == FILE_REGEX &&
3030a4d6d3b8SXin LI bracket_nesting == 0 && warn) {
3031a4d6d3b8SXin LI file_magwarn(ms, "escaped dot ('.') found, "
3032a4d6d3b8SXin LI "use \\\\. instead");
3033a4d6d3b8SXin LI }
3034a4d6d3b8SXin LI warn = 0; /* already did */
3035a4d6d3b8SXin LI /*FALLTHROUGH*/
3036b6cee71dSXin LI case '\t':
3037b6cee71dSXin LI if (warn) {
3038b6cee71dSXin LI file_magwarn(ms,
3039a4d6d3b8SXin LI "escaped tab found, use \\\\t instead");
3040b6cee71dSXin LI warn = 0; /* already did */
3041b6cee71dSXin LI }
3042b6cee71dSXin LI /*FALLTHROUGH*/
3043b6cee71dSXin LI default:
3044b6cee71dSXin LI if (warn) {
304548c779cdSXin LI if (isprint(CAST(unsigned char, c))) {
3046b6cee71dSXin LI /* Allow escaping of
3047b6cee71dSXin LI * ``relations'' */
3048b6cee71dSXin LI if (strchr("<>&^=!", c) == NULL
3049b6cee71dSXin LI && (m->type != FILE_REGEX ||
3050b6cee71dSXin LI strchr("[]().*?^$|{}", c)
3051b6cee71dSXin LI == NULL)) {
3052b6cee71dSXin LI file_magwarn(ms, "no "
3053b6cee71dSXin LI "need to escape "
3054b6cee71dSXin LI "`%c'", c);
3055b6cee71dSXin LI }
3056b6cee71dSXin LI } else {
3057b6cee71dSXin LI file_magwarn(ms,
3058b6cee71dSXin LI "unknown escape sequence: "
3059b6cee71dSXin LI "\\%03o", c);
3060b6cee71dSXin LI }
3061b6cee71dSXin LI }
3062b6cee71dSXin LI /*FALLTHROUGH*/
3063b6cee71dSXin LI /* space, perhaps force people to use \040? */
3064b6cee71dSXin LI case ' ':
3065b6cee71dSXin LI #if 0
3066b6cee71dSXin LI /*
3067b6cee71dSXin LI * Other things people escape, but shouldn't need to,
3068b6cee71dSXin LI * so we disallow them
3069b6cee71dSXin LI */
3070b6cee71dSXin LI case '\'':
3071b6cee71dSXin LI case '"':
3072b6cee71dSXin LI case '?':
3073b6cee71dSXin LI #endif
3074b6cee71dSXin LI /* Relations */
3075b6cee71dSXin LI case '>':
3076b6cee71dSXin LI case '<':
3077b6cee71dSXin LI case '&':
3078b6cee71dSXin LI case '^':
3079b6cee71dSXin LI case '=':
3080b6cee71dSXin LI case '!':
3081a4d6d3b8SXin LI /* and backslash itself */
3082b6cee71dSXin LI case '\\':
308348c779cdSXin LI *p++ = CAST(char, c);
3084b6cee71dSXin LI break;
3085b6cee71dSXin LI
3086b6cee71dSXin LI case 'a':
3087b6cee71dSXin LI *p++ = '\a';
3088b6cee71dSXin LI break;
3089b6cee71dSXin LI
3090b6cee71dSXin LI case 'b':
3091b6cee71dSXin LI *p++ = '\b';
3092b6cee71dSXin LI break;
3093b6cee71dSXin LI
3094b6cee71dSXin LI case 'f':
3095b6cee71dSXin LI *p++ = '\f';
3096b6cee71dSXin LI break;
3097b6cee71dSXin LI
3098b6cee71dSXin LI case 'n':
3099b6cee71dSXin LI *p++ = '\n';
3100b6cee71dSXin LI break;
3101b6cee71dSXin LI
3102b6cee71dSXin LI case 'r':
3103b6cee71dSXin LI *p++ = '\r';
3104b6cee71dSXin LI break;
3105b6cee71dSXin LI
3106b6cee71dSXin LI case 't':
3107b6cee71dSXin LI *p++ = '\t';
3108b6cee71dSXin LI break;
3109b6cee71dSXin LI
3110b6cee71dSXin LI case 'v':
3111b6cee71dSXin LI *p++ = '\v';
3112b6cee71dSXin LI break;
3113b6cee71dSXin LI
3114b6cee71dSXin LI /* \ and up to 3 octal digits */
3115b6cee71dSXin LI case '0':
3116b6cee71dSXin LI case '1':
3117b6cee71dSXin LI case '2':
3118b6cee71dSXin LI case '3':
3119b6cee71dSXin LI case '4':
3120b6cee71dSXin LI case '5':
3121b6cee71dSXin LI case '6':
3122b6cee71dSXin LI case '7':
3123b6cee71dSXin LI val = c - '0';
3124b6cee71dSXin LI c = *s++; /* try for 2 */
3125b6cee71dSXin LI if (c >= '0' && c <= '7') {
3126b6cee71dSXin LI val = (val << 3) | (c - '0');
3127b6cee71dSXin LI c = *s++; /* try for 3 */
3128b6cee71dSXin LI if (c >= '0' && c <= '7')
3129b6cee71dSXin LI val = (val << 3) | (c-'0');
3130b6cee71dSXin LI else
3131b6cee71dSXin LI --s;
3132b6cee71dSXin LI }
3133b6cee71dSXin LI else
3134b6cee71dSXin LI --s;
313548c779cdSXin LI *p++ = CAST(char, val);
3136b6cee71dSXin LI break;
3137b6cee71dSXin LI
3138b6cee71dSXin LI /* \x and up to 2 hex digits */
3139b6cee71dSXin LI case 'x':
3140b6cee71dSXin LI val = 'x'; /* Default if no digits */
3141b6cee71dSXin LI c = hextoint(*s++); /* Get next char */
3142b6cee71dSXin LI if (c >= 0) {
3143b6cee71dSXin LI val = c;
3144b6cee71dSXin LI c = hextoint(*s++);
3145b6cee71dSXin LI if (c >= 0)
3146b6cee71dSXin LI val = (val << 4) + c;
3147b6cee71dSXin LI else
3148b6cee71dSXin LI --s;
3149b6cee71dSXin LI } else
3150b6cee71dSXin LI --s;
315148c779cdSXin LI *p++ = CAST(char, val);
3152b6cee71dSXin LI break;
3153b6cee71dSXin LI }
3154b6cee71dSXin LI }
31559ce06829SXin LI --s;
3156b6cee71dSXin LI out:
3157b6cee71dSXin LI *p = '\0';
3158b6cee71dSXin LI m->vallen = CAST(unsigned char, (p - origp));
31592726a701SXin LI if (m->type == FILE_PSTRING) {
31602726a701SXin LI size_t l = file_pstring_length_size(ms, m);
31612726a701SXin LI if (l == FILE_BADSIZE)
31622726a701SXin LI return NULL;
31632726a701SXin LI m->vallen += CAST(unsigned char, l);
31642726a701SXin LI }
3165b6cee71dSXin LI return s;
3166b6cee71dSXin LI }
3167b6cee71dSXin LI
3168b6cee71dSXin LI
3169b6cee71dSXin LI /* Single hex char to int; -1 if not a hex char. */
3170898496eeSXin LI file_private int
hextoint(int c)3171b6cee71dSXin LI hextoint(int c)
3172b6cee71dSXin LI {
317348c779cdSXin LI if (!isascii(CAST(unsigned char, c)))
3174b6cee71dSXin LI return -1;
317548c779cdSXin LI if (isdigit(CAST(unsigned char, c)))
3176b6cee71dSXin LI return c - '0';
3177b6cee71dSXin LI if ((c >= 'a') && (c <= 'f'))
3178b6cee71dSXin LI return c + 10 - 'a';
3179b6cee71dSXin LI if (( c>= 'A') && (c <= 'F'))
3180b6cee71dSXin LI return c + 10 - 'A';
3181b6cee71dSXin LI return -1;
3182b6cee71dSXin LI }
3183b6cee71dSXin LI
3184b6cee71dSXin LI
3185b6cee71dSXin LI /*
3186b6cee71dSXin LI * Print a string containing C character escapes.
3187b6cee71dSXin LI */
3188898496eeSXin LI file_protected void
file_showstr(FILE * fp,const char * s,size_t len)3189b6cee71dSXin LI file_showstr(FILE *fp, const char *s, size_t len)
3190b6cee71dSXin LI {
3191b6cee71dSXin LI char c;
3192b6cee71dSXin LI
3193b6cee71dSXin LI for (;;) {
31942726a701SXin LI if (len == FILE_BADSIZE) {
3195b6cee71dSXin LI c = *s++;
3196b6cee71dSXin LI if (c == '\0')
3197b6cee71dSXin LI break;
3198b6cee71dSXin LI }
3199b6cee71dSXin LI else {
3200b6cee71dSXin LI if (len-- == 0)
3201b6cee71dSXin LI break;
3202b6cee71dSXin LI c = *s++;
3203b6cee71dSXin LI }
3204b6cee71dSXin LI if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
3205b6cee71dSXin LI (void) fputc(c, fp);
3206b6cee71dSXin LI else {
3207b6cee71dSXin LI (void) fputc('\\', fp);
3208b6cee71dSXin LI switch (c) {
3209b6cee71dSXin LI case '\a':
3210b6cee71dSXin LI (void) fputc('a', fp);
3211b6cee71dSXin LI break;
3212b6cee71dSXin LI
3213b6cee71dSXin LI case '\b':
3214b6cee71dSXin LI (void) fputc('b', fp);
3215b6cee71dSXin LI break;
3216b6cee71dSXin LI
3217b6cee71dSXin LI case '\f':
3218b6cee71dSXin LI (void) fputc('f', fp);
3219b6cee71dSXin LI break;
3220b6cee71dSXin LI
3221b6cee71dSXin LI case '\n':
3222b6cee71dSXin LI (void) fputc('n', fp);
3223b6cee71dSXin LI break;
3224b6cee71dSXin LI
3225b6cee71dSXin LI case '\r':
3226b6cee71dSXin LI (void) fputc('r', fp);
3227b6cee71dSXin LI break;
3228b6cee71dSXin LI
3229b6cee71dSXin LI case '\t':
3230b6cee71dSXin LI (void) fputc('t', fp);
3231b6cee71dSXin LI break;
3232b6cee71dSXin LI
3233b6cee71dSXin LI case '\v':
3234b6cee71dSXin LI (void) fputc('v', fp);
3235b6cee71dSXin LI break;
3236b6cee71dSXin LI
3237b6cee71dSXin LI default:
3238b6cee71dSXin LI (void) fprintf(fp, "%.3o", c & 0377);
3239b6cee71dSXin LI break;
3240b6cee71dSXin LI }
3241b6cee71dSXin LI }
3242b6cee71dSXin LI }
3243b6cee71dSXin LI }
3244b6cee71dSXin LI
3245b6cee71dSXin LI /*
3246b6cee71dSXin LI * eatsize(): Eat the size spec from a number [eg. 10UL]
3247b6cee71dSXin LI */
3248898496eeSXin LI file_private void
eatsize(const char ** p)3249b6cee71dSXin LI eatsize(const char **p)
3250b6cee71dSXin LI {
3251b6cee71dSXin LI const char *l = *p;
3252b6cee71dSXin LI
3253b6cee71dSXin LI if (LOWCASE(*l) == 'u')
3254b6cee71dSXin LI l++;
3255b6cee71dSXin LI
3256b6cee71dSXin LI switch (LOWCASE(*l)) {
3257b6cee71dSXin LI case 'l': /* long */
3258b6cee71dSXin LI case 's': /* short */
3259b6cee71dSXin LI case 'h': /* short */
3260b6cee71dSXin LI case 'b': /* char/byte */
3261b6cee71dSXin LI case 'c': /* char/byte */
3262b6cee71dSXin LI l++;
3263b6cee71dSXin LI /*FALLTHROUGH*/
3264b6cee71dSXin LI default:
3265b6cee71dSXin LI break;
3266b6cee71dSXin LI }
3267b6cee71dSXin LI
3268b6cee71dSXin LI *p = l;
3269b6cee71dSXin LI }
3270b6cee71dSXin LI
3271b6cee71dSXin LI /*
3272c2931133SXin LI * handle a buffer containing a compiled file.
3273c2931133SXin LI */
3274898496eeSXin LI file_private struct magic_map *
apprentice_buf(struct magic_set * ms,struct magic * buf,size_t len)3275c2931133SXin LI apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
3276c2931133SXin LI {
3277c2931133SXin LI struct magic_map *map;
3278c2931133SXin LI
3279c2931133SXin LI if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3280c2931133SXin LI file_oomem(ms, sizeof(*map));
3281c2931133SXin LI return NULL;
3282c2931133SXin LI }
3283c2931133SXin LI map->len = len;
3284c2931133SXin LI map->p = buf;
3285c2931133SXin LI map->type = MAP_TYPE_USER;
3286c2931133SXin LI if (check_buffer(ms, map, "buffer") != 0) {
3287c2931133SXin LI apprentice_unmap(map);
3288c2931133SXin LI return NULL;
3289c2931133SXin LI }
3290c2931133SXin LI return map;
3291c2931133SXin LI }
3292c2931133SXin LI
3293c2931133SXin LI /*
3294b6cee71dSXin LI * handle a compiled file.
3295b6cee71dSXin LI */
3296b6cee71dSXin LI
3297898496eeSXin LI file_private struct magic_map *
apprentice_map(struct magic_set * ms,const char * fn)3298b6cee71dSXin LI apprentice_map(struct magic_set *ms, const char *fn)
3299b6cee71dSXin LI {
3300b6cee71dSXin LI int fd;
3301b6cee71dSXin LI struct stat st;
3302b6cee71dSXin LI char *dbname = NULL;
3303b6cee71dSXin LI struct magic_map *map;
3304a5d223e6SXin LI struct magic_map *rv = NULL;
3305b6cee71dSXin LI
3306b6cee71dSXin LI fd = -1;
3307b6cee71dSXin LI if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3308b6cee71dSXin LI file_oomem(ms, sizeof(*map));
3309b6cee71dSXin LI goto error;
3310b6cee71dSXin LI }
33113e41d09dSXin LI map->type = MAP_TYPE_USER; /* unspecified */
3312b6cee71dSXin LI
3313b6cee71dSXin LI dbname = mkdbname(ms, fn, 0);
3314b6cee71dSXin LI if (dbname == NULL)
3315b6cee71dSXin LI goto error;
3316b6cee71dSXin LI
3317b6cee71dSXin LI if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
3318b6cee71dSXin LI goto error;
3319b6cee71dSXin LI
3320b6cee71dSXin LI if (fstat(fd, &st) == -1) {
3321b6cee71dSXin LI file_error(ms, errno, "cannot stat `%s'", dbname);
3322b6cee71dSXin LI goto error;
3323b6cee71dSXin LI }
332448c779cdSXin LI if (st.st_size < 8 || st.st_size > maxoff_t()) {
3325b6cee71dSXin LI file_error(ms, 0, "file `%s' is too %s", dbname,
3326b6cee71dSXin LI st.st_size < 8 ? "small" : "large");
3327b6cee71dSXin LI goto error;
3328b6cee71dSXin LI }
3329b6cee71dSXin LI
333048c779cdSXin LI map->len = CAST(size_t, st.st_size);
3331b6cee71dSXin LI #ifdef QUICK
33323e41d09dSXin LI map->type = MAP_TYPE_MMAP;
333348c779cdSXin LI if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE,
333448c779cdSXin LI MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) {
3335b6cee71dSXin LI file_error(ms, errno, "cannot map `%s'", dbname);
3336b6cee71dSXin LI goto error;
3337b6cee71dSXin LI }
3338b6cee71dSXin LI #else
33393e41d09dSXin LI map->type = MAP_TYPE_MALLOC;
3340b6cee71dSXin LI if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
3341b6cee71dSXin LI file_oomem(ms, map->len);
3342b6cee71dSXin LI goto error;
3343b6cee71dSXin LI }
3344b6cee71dSXin LI if (read(fd, map->p, map->len) != (ssize_t)map->len) {
3345b6cee71dSXin LI file_badread(ms);
3346b6cee71dSXin LI goto error;
3347b6cee71dSXin LI }
3348b6cee71dSXin LI #endif
3349b6cee71dSXin LI (void)close(fd);
3350b6cee71dSXin LI fd = -1;
3351c2931133SXin LI
3352a5d223e6SXin LI if (check_buffer(ms, map, dbname) != 0) {
3353c2931133SXin LI goto error;
3354a5d223e6SXin LI }
33553e41d09dSXin LI #ifdef QUICK
335648c779cdSXin LI if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) {
33573e41d09dSXin LI file_error(ms, errno, "cannot mprotect `%s'", dbname);
33583e41d09dSXin LI goto error;
33593e41d09dSXin LI }
33603e41d09dSXin LI #endif
3361c2931133SXin LI
3362c2931133SXin LI free(dbname);
3363c2931133SXin LI return map;
3364c2931133SXin LI
3365c2931133SXin LI error:
3366c2931133SXin LI if (fd != -1)
3367c2931133SXin LI (void)close(fd);
3368c2931133SXin LI apprentice_unmap(map);
3369c2931133SXin LI free(dbname);
3370a5d223e6SXin LI return rv;
3371c2931133SXin LI }
3372c2931133SXin LI
3373898496eeSXin LI file_private int
check_buffer(struct magic_set * ms,struct magic_map * map,const char * dbname)3374c2931133SXin LI check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
3375c2931133SXin LI {
3376c2931133SXin LI uint32_t *ptr;
3377c2931133SXin LI uint32_t entries, nentries;
3378c2931133SXin LI uint32_t version;
3379c2931133SXin LI int i, needsbyteswap;
3380c2931133SXin LI
3381*ae316d1dSXin LI entries = CAST(uint32_t, map->len / sizeof(struct magic));
3382*ae316d1dSXin LI if (entries < MAGIC_SETS + 1) {
3383*ae316d1dSXin LI file_error(ms, 0, "Too few magic entries %u in `%s'",
3384*ae316d1dSXin LI entries, dbname);
3385*ae316d1dSXin LI return -1;
3386*ae316d1dSXin LI }
3387*ae316d1dSXin LI if ((entries * sizeof(struct magic)) != map->len) {
3388*ae316d1dSXin LI file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
3389*ae316d1dSXin LI "a multiple of %" SIZE_T_FORMAT "u",
3390*ae316d1dSXin LI dbname, map->len, sizeof(struct magic));
3391*ae316d1dSXin LI return -1;
3392*ae316d1dSXin LI }
3393*ae316d1dSXin LI
3394b6cee71dSXin LI ptr = CAST(uint32_t *, map->p);
3395b6cee71dSXin LI if (*ptr != MAGICNO) {
3396b6cee71dSXin LI if (swap4(*ptr) != MAGICNO) {
3397b6cee71dSXin LI file_error(ms, 0, "bad magic in `%s'", dbname);
3398c2931133SXin LI return -1;
3399b6cee71dSXin LI }
3400b6cee71dSXin LI needsbyteswap = 1;
3401b6cee71dSXin LI } else
3402b6cee71dSXin LI needsbyteswap = 0;
3403b6cee71dSXin LI if (needsbyteswap)
3404b6cee71dSXin LI version = swap4(ptr[1]);
3405b6cee71dSXin LI else
3406b6cee71dSXin LI version = ptr[1];
3407b6cee71dSXin LI if (version != VERSIONNO) {
3408b6cee71dSXin LI file_error(ms, 0, "File %s supports only version %d magic "
3409b6cee71dSXin LI "files. `%s' is version %d", VERSION,
3410b6cee71dSXin LI VERSIONNO, dbname, version);
3411c2931133SXin LI return -1;
3412b6cee71dSXin LI }
3413b6cee71dSXin LI map->magic[0] = CAST(struct magic *, map->p) + 1;
3414b6cee71dSXin LI nentries = 0;
3415b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) {
3416b6cee71dSXin LI if (needsbyteswap)
3417b6cee71dSXin LI map->nmagic[i] = swap4(ptr[i + 2]);
3418b6cee71dSXin LI else
3419b6cee71dSXin LI map->nmagic[i] = ptr[i + 2];
3420b6cee71dSXin LI if (i != MAGIC_SETS - 1)
3421b6cee71dSXin LI map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3422b6cee71dSXin LI nentries += map->nmagic[i];
3423b6cee71dSXin LI }
3424b6cee71dSXin LI if (entries != nentries + 1) {
3425b6cee71dSXin LI file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3426b6cee71dSXin LI dbname, entries, nentries + 1);
3427c2931133SXin LI return -1;
3428b6cee71dSXin LI }
3429b6cee71dSXin LI if (needsbyteswap)
3430b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++)
3431b6cee71dSXin LI byteswap(map->magic[i], map->nmagic[i]);
3432c2931133SXin LI return 0;
3433b6cee71dSXin LI }
3434b6cee71dSXin LI
3435b6cee71dSXin LI /*
3436b6cee71dSXin LI * handle an mmaped file.
3437b6cee71dSXin LI */
3438898496eeSXin LI file_private int
apprentice_compile(struct magic_set * ms,struct magic_map * map,const char * fn)3439b6cee71dSXin LI apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3440b6cee71dSXin LI {
3441b6cee71dSXin LI static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3442b6cee71dSXin LI static const size_t m = sizeof(**map->magic);
3443b6cee71dSXin LI int fd = -1;
3444b6cee71dSXin LI size_t len;
3445b6cee71dSXin LI char *dbname;
3446b6cee71dSXin LI int rv = -1;
3447b6cee71dSXin LI uint32_t i;
3448b6cee71dSXin LI union {
3449b6cee71dSXin LI struct magic m;
3450b6cee71dSXin LI uint32_t h[2 + MAGIC_SETS];
3451b6cee71dSXin LI } hdr;
3452b6cee71dSXin LI
3453b6cee71dSXin LI dbname = mkdbname(ms, fn, 1);
3454b6cee71dSXin LI
3455b6cee71dSXin LI if (dbname == NULL)
3456b6cee71dSXin LI goto out;
3457b6cee71dSXin LI
3458b6cee71dSXin LI if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1)
3459b6cee71dSXin LI {
3460b6cee71dSXin LI file_error(ms, errno, "cannot open `%s'", dbname);
3461b6cee71dSXin LI goto out;
3462b6cee71dSXin LI }
3463b6cee71dSXin LI memset(&hdr, 0, sizeof(hdr));
3464b6cee71dSXin LI hdr.h[0] = MAGICNO;
3465b6cee71dSXin LI hdr.h[1] = VERSIONNO;
3466b6cee71dSXin LI memcpy(hdr.h + 2, map->nmagic, nm);
3467b6cee71dSXin LI
346848c779cdSXin LI if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) {
3469b6cee71dSXin LI file_error(ms, errno, "error writing `%s'", dbname);
347058a0f0d0SEitan Adler goto out2;
3471b6cee71dSXin LI }
3472b6cee71dSXin LI
3473b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) {
3474b6cee71dSXin LI len = m * map->nmagic[i];
347548c779cdSXin LI if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) {
3476b6cee71dSXin LI file_error(ms, errno, "error writing `%s'", dbname);
347758a0f0d0SEitan Adler goto out2;
3478b6cee71dSXin LI }
3479b6cee71dSXin LI }
3480b6cee71dSXin LI
348158a0f0d0SEitan Adler rv = 0;
348258a0f0d0SEitan Adler out2:
3483b6cee71dSXin LI if (fd != -1)
3484b6cee71dSXin LI (void)close(fd);
3485b6cee71dSXin LI out:
3486282e23f0SXin LI apprentice_unmap(map);
3487b6cee71dSXin LI free(dbname);
3488b6cee71dSXin LI return rv;
3489b6cee71dSXin LI }
3490b6cee71dSXin LI
3491898496eeSXin LI file_private const char ext[] = ".mgc";
3492b6cee71dSXin LI /*
3493b6cee71dSXin LI * make a dbname
3494b6cee71dSXin LI */
3495898496eeSXin LI file_private char *
mkdbname(struct magic_set * ms,const char * fn,int strip)3496b6cee71dSXin LI mkdbname(struct magic_set *ms, const char *fn, int strip)
3497b6cee71dSXin LI {
3498b6cee71dSXin LI const char *p, *q;
3499b6cee71dSXin LI char *buf;
3500b6cee71dSXin LI
3501b6cee71dSXin LI if (strip) {
3502b6cee71dSXin LI if ((p = strrchr(fn, '/')) != NULL)
3503b6cee71dSXin LI fn = ++p;
3504b6cee71dSXin LI }
3505b6cee71dSXin LI
3506b6cee71dSXin LI for (q = fn; *q; q++)
3507b6cee71dSXin LI continue;
3508b6cee71dSXin LI /* Look for .mgc */
3509b6cee71dSXin LI for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3510b6cee71dSXin LI if (*p != *q)
3511b6cee71dSXin LI break;
3512b6cee71dSXin LI
3513b6cee71dSXin LI /* Did not find .mgc, restore q */
3514b6cee71dSXin LI if (p >= ext)
3515*ae316d1dSXin LI for (q = fn; *q; q++)
3516*ae316d1dSXin LI continue;
3517b6cee71dSXin LI
3518b6cee71dSXin LI q++;
3519b6cee71dSXin LI /* Compatibility with old code that looked in .mime */
3520b6cee71dSXin LI if (ms->flags & MAGIC_MIME) {
352148c779cdSXin LI if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext)
352248c779cdSXin LI < 0)
3523b6cee71dSXin LI return NULL;
3524b6cee71dSXin LI if (access(buf, R_OK) != -1) {
3525b6cee71dSXin LI ms->flags &= MAGIC_MIME_TYPE;
3526b6cee71dSXin LI return buf;
3527b6cee71dSXin LI }
3528b6cee71dSXin LI free(buf);
3529b6cee71dSXin LI }
353048c779cdSXin LI if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0)
3531b6cee71dSXin LI return NULL;
3532b6cee71dSXin LI
3533b6cee71dSXin LI /* Compatibility with old code that looked in .mime */
3534a5d223e6SXin LI if (strstr(fn, ".mime") != NULL)
3535b6cee71dSXin LI ms->flags &= MAGIC_MIME_TYPE;
3536b6cee71dSXin LI return buf;
3537b6cee71dSXin LI }
3538b6cee71dSXin LI
3539b6cee71dSXin LI /*
3540b6cee71dSXin LI * Byteswap an mmap'ed file if needed
3541b6cee71dSXin LI */
3542898496eeSXin LI file_private void
byteswap(struct magic * magic,uint32_t nmagic)3543b6cee71dSXin LI byteswap(struct magic *magic, uint32_t nmagic)
3544b6cee71dSXin LI {
3545b6cee71dSXin LI uint32_t i;
3546b6cee71dSXin LI for (i = 0; i < nmagic; i++)
3547b6cee71dSXin LI bs1(&magic[i]);
3548b6cee71dSXin LI }
3549b6cee71dSXin LI
3550a4d6d3b8SXin LI #if !defined(HAVE_BYTESWAP_H) && !defined(HAVE_SYS_BSWAP_H)
3551b6cee71dSXin LI /*
3552b6cee71dSXin LI * swap a short
3553b6cee71dSXin LI */
3554898496eeSXin LI file_private uint16_t
swap2(uint16_t sv)3555b6cee71dSXin LI swap2(uint16_t sv)
3556b6cee71dSXin LI {
3557b6cee71dSXin LI uint16_t rv;
355848c779cdSXin LI uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
355948c779cdSXin LI uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3560b6cee71dSXin LI d[0] = s[1];
3561b6cee71dSXin LI d[1] = s[0];
3562b6cee71dSXin LI return rv;
3563b6cee71dSXin LI }
3564b6cee71dSXin LI
3565b6cee71dSXin LI /*
3566b6cee71dSXin LI * swap an int
3567b6cee71dSXin LI */
3568898496eeSXin LI file_private uint32_t
swap4(uint32_t sv)3569b6cee71dSXin LI swap4(uint32_t sv)
3570b6cee71dSXin LI {
3571b6cee71dSXin LI uint32_t rv;
357248c779cdSXin LI uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
357348c779cdSXin LI uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3574b6cee71dSXin LI d[0] = s[3];
3575b6cee71dSXin LI d[1] = s[2];
3576b6cee71dSXin LI d[2] = s[1];
3577b6cee71dSXin LI d[3] = s[0];
3578b6cee71dSXin LI return rv;
3579b6cee71dSXin LI }
3580b6cee71dSXin LI
3581b6cee71dSXin LI /*
3582b6cee71dSXin LI * swap a quad
3583b6cee71dSXin LI */
3584898496eeSXin LI file_private uint64_t
swap8(uint64_t sv)3585b6cee71dSXin LI swap8(uint64_t sv)
3586b6cee71dSXin LI {
3587b6cee71dSXin LI uint64_t rv;
358848c779cdSXin LI uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
358948c779cdSXin LI uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3590b6cee71dSXin LI # if 0
3591b6cee71dSXin LI d[0] = s[3];
3592b6cee71dSXin LI d[1] = s[2];
3593b6cee71dSXin LI d[2] = s[1];
3594b6cee71dSXin LI d[3] = s[0];
3595b6cee71dSXin LI d[4] = s[7];
3596b6cee71dSXin LI d[5] = s[6];
3597b6cee71dSXin LI d[6] = s[5];
3598b6cee71dSXin LI d[7] = s[4];
3599b6cee71dSXin LI # else
3600b6cee71dSXin LI d[0] = s[7];
3601b6cee71dSXin LI d[1] = s[6];
3602b6cee71dSXin LI d[2] = s[5];
3603b6cee71dSXin LI d[3] = s[4];
3604b6cee71dSXin LI d[4] = s[3];
3605b6cee71dSXin LI d[5] = s[2];
3606b6cee71dSXin LI d[6] = s[1];
3607b6cee71dSXin LI d[7] = s[0];
3608b6cee71dSXin LI # endif
3609b6cee71dSXin LI return rv;
3610b6cee71dSXin LI }
3611a4d6d3b8SXin LI #endif
3612b6cee71dSXin LI
3613898496eeSXin LI file_protected uintmax_t
file_varint2uintmax_t(const unsigned char * us,int t,size_t * l)361443a5ec4eSXin LI file_varint2uintmax_t(const unsigned char *us, int t, size_t *l)
361543a5ec4eSXin LI {
361643a5ec4eSXin LI uintmax_t x = 0;
361743a5ec4eSXin LI const unsigned char *c;
361843a5ec4eSXin LI if (t == FILE_LEVARINT) {
361943a5ec4eSXin LI for (c = us; *c; c++) {
362043a5ec4eSXin LI if ((*c & 0x80) == 0)
362143a5ec4eSXin LI break;
362243a5ec4eSXin LI }
362343a5ec4eSXin LI if (l)
362443a5ec4eSXin LI *l = c - us + 1;
362543a5ec4eSXin LI for (; c >= us; c--) {
362643a5ec4eSXin LI x |= *c & 0x7f;
362743a5ec4eSXin LI x <<= 7;
362843a5ec4eSXin LI }
362943a5ec4eSXin LI } else {
363043a5ec4eSXin LI for (c = us; *c; c++) {
363143a5ec4eSXin LI x |= *c & 0x7f;
363243a5ec4eSXin LI if ((*c & 0x80) == 0)
363343a5ec4eSXin LI break;
363443a5ec4eSXin LI x <<= 7;
363543a5ec4eSXin LI }
363643a5ec4eSXin LI if (l)
363743a5ec4eSXin LI *l = c - us + 1;
363843a5ec4eSXin LI }
363943a5ec4eSXin LI return x;
364043a5ec4eSXin LI }
364143a5ec4eSXin LI
364243a5ec4eSXin LI
3643b6cee71dSXin LI /*
3644b6cee71dSXin LI * byteswap a single magic entry
3645b6cee71dSXin LI */
3646898496eeSXin LI file_private void
bs1(struct magic * m)3647b6cee71dSXin LI bs1(struct magic *m)
3648b6cee71dSXin LI {
3649b6cee71dSXin LI m->cont_level = swap2(m->cont_level);
365048c779cdSXin LI m->offset = swap4(CAST(uint32_t, m->offset));
365148c779cdSXin LI m->in_offset = swap4(CAST(uint32_t, m->in_offset));
365248c779cdSXin LI m->lineno = swap4(CAST(uint32_t, m->lineno));
3653b6cee71dSXin LI if (IS_STRING(m->type)) {
3654b6cee71dSXin LI m->str_range = swap4(m->str_range);
3655b6cee71dSXin LI m->str_flags = swap4(m->str_flags);
3656b6cee71dSXin LI }
3657b6cee71dSXin LI else {
3658b6cee71dSXin LI m->value.q = swap8(m->value.q);
3659b6cee71dSXin LI m->num_mask = swap8(m->num_mask);
3660b6cee71dSXin LI }
3661b6cee71dSXin LI }
3662b6cee71dSXin LI
3663898496eeSXin LI file_protected size_t
file_pstring_length_size(struct magic_set * ms,const struct magic * m)36642726a701SXin LI file_pstring_length_size(struct magic_set *ms, const struct magic *m)
3665b6cee71dSXin LI {
3666b6cee71dSXin LI switch (m->str_flags & PSTRING_LEN) {
3667b6cee71dSXin LI case PSTRING_1_LE:
3668b6cee71dSXin LI return 1;
3669b6cee71dSXin LI case PSTRING_2_LE:
3670b6cee71dSXin LI case PSTRING_2_BE:
3671b6cee71dSXin LI return 2;
3672b6cee71dSXin LI case PSTRING_4_LE:
3673b6cee71dSXin LI case PSTRING_4_BE:
3674b6cee71dSXin LI return 4;
3675b6cee71dSXin LI default:
36762726a701SXin LI file_error(ms, 0, "corrupt magic file "
36772726a701SXin LI "(bad pascal string length %d)",
36782726a701SXin LI m->str_flags & PSTRING_LEN);
36792726a701SXin LI return FILE_BADSIZE;
3680b6cee71dSXin LI }
3681b6cee71dSXin LI }
3682898496eeSXin LI file_protected size_t
file_pstring_get_length(struct magic_set * ms,const struct magic * m,const char * ss)36832726a701SXin LI file_pstring_get_length(struct magic_set *ms, const struct magic *m,
36842726a701SXin LI const char *ss)
3685b6cee71dSXin LI {
3686b6cee71dSXin LI size_t len = 0;
368748c779cdSXin LI const unsigned char *s = RCAST(const unsigned char *, ss);
368840427ccaSGordon Tetlow unsigned int s3, s2, s1, s0;
3689b6cee71dSXin LI
3690b6cee71dSXin LI switch (m->str_flags & PSTRING_LEN) {
3691b6cee71dSXin LI case PSTRING_1_LE:
3692b6cee71dSXin LI len = *s;
3693b6cee71dSXin LI break;
3694b6cee71dSXin LI case PSTRING_2_LE:
369540427ccaSGordon Tetlow s0 = s[0];
369640427ccaSGordon Tetlow s1 = s[1];
369740427ccaSGordon Tetlow len = (s1 << 8) | s0;
3698b6cee71dSXin LI break;
3699b6cee71dSXin LI case PSTRING_2_BE:
370040427ccaSGordon Tetlow s0 = s[0];
370140427ccaSGordon Tetlow s1 = s[1];
370240427ccaSGordon Tetlow len = (s0 << 8) | s1;
3703b6cee71dSXin LI break;
3704b6cee71dSXin LI case PSTRING_4_LE:
370540427ccaSGordon Tetlow s0 = s[0];
370640427ccaSGordon Tetlow s1 = s[1];
370740427ccaSGordon Tetlow s2 = s[2];
370840427ccaSGordon Tetlow s3 = s[3];
370940427ccaSGordon Tetlow len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3710b6cee71dSXin LI break;
3711b6cee71dSXin LI case PSTRING_4_BE:
371240427ccaSGordon Tetlow s0 = s[0];
371340427ccaSGordon Tetlow s1 = s[1];
371440427ccaSGordon Tetlow s2 = s[2];
371540427ccaSGordon Tetlow s3 = s[3];
371640427ccaSGordon Tetlow len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3717b6cee71dSXin LI break;
3718b6cee71dSXin LI default:
37192726a701SXin LI file_error(ms, 0, "corrupt magic file "
37202726a701SXin LI "(bad pascal string length %d)",
37212726a701SXin LI m->str_flags & PSTRING_LEN);
37222726a701SXin LI return FILE_BADSIZE;
3723b6cee71dSXin LI }
3724b6cee71dSXin LI
37252726a701SXin LI if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) {
37262726a701SXin LI size_t l = file_pstring_length_size(ms, m);
37272726a701SXin LI if (l == FILE_BADSIZE)
37282726a701SXin LI return l;
37292726a701SXin LI len -= l;
37302726a701SXin LI }
3731b6cee71dSXin LI
3732b6cee71dSXin LI return len;
3733b6cee71dSXin LI }
3734b6cee71dSXin LI
3735898496eeSXin LI file_protected int
file_magicfind(struct magic_set * ms,const char * name,struct mlist * v)3736b6cee71dSXin LI file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3737b6cee71dSXin LI {
3738b6cee71dSXin LI uint32_t i, j;
3739b6cee71dSXin LI struct mlist *mlist, *ml;
3740b6cee71dSXin LI
3741b6cee71dSXin LI mlist = ms->mlist[1];
3742b6cee71dSXin LI
3743b6cee71dSXin LI for (ml = mlist->next; ml != mlist; ml = ml->next) {
3744b6cee71dSXin LI struct magic *ma = ml->magic;
3745a4d6d3b8SXin LI for (i = 0; i < ml->nmagic; i++) {
3746b6cee71dSXin LI if (ma[i].type != FILE_NAME)
3747b6cee71dSXin LI continue;
3748b6cee71dSXin LI if (strcmp(ma[i].value.s, name) == 0) {
3749b6cee71dSXin LI v->magic = &ma[i];
3750a4d6d3b8SXin LI v->magic_rxcomp = &(ml->magic_rxcomp[i]);
3751a4d6d3b8SXin LI for (j = i + 1; j < ml->nmagic; j++)
3752b6cee71dSXin LI if (ma[j].cont_level == 0)
3753b6cee71dSXin LI break;
3754b6cee71dSXin LI v->nmagic = j - i;
3755b6cee71dSXin LI return 0;
3756b6cee71dSXin LI }
3757b6cee71dSXin LI }
3758b6cee71dSXin LI }
3759b6cee71dSXin LI return -1;
3760b6cee71dSXin LI }
3761