1*b6cee71dSXin LI /* 2*b6cee71dSXin LI * Copyright (c) Ian F. Darwin 1986-1995. 3*b6cee71dSXin LI * Software written by Ian F. Darwin and others; 4*b6cee71dSXin LI * maintained 1995-present by Christos Zoulas and others. 5*b6cee71dSXin LI * 6*b6cee71dSXin LI * Redistribution and use in source and binary forms, with or without 7*b6cee71dSXin LI * modification, are permitted provided that the following conditions 8*b6cee71dSXin LI * are met: 9*b6cee71dSXin LI * 1. Redistributions of source code must retain the above copyright 10*b6cee71dSXin LI * notice immediately at the beginning of the file, without modification, 11*b6cee71dSXin LI * this list of conditions, and the following disclaimer. 12*b6cee71dSXin LI * 2. Redistributions in binary form must reproduce the above copyright 13*b6cee71dSXin LI * notice, this list of conditions and the following disclaimer in the 14*b6cee71dSXin LI * documentation and/or other materials provided with the distribution. 15*b6cee71dSXin LI * 16*b6cee71dSXin LI * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17*b6cee71dSXin LI * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18*b6cee71dSXin LI * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19*b6cee71dSXin LI * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20*b6cee71dSXin LI * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21*b6cee71dSXin LI * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22*b6cee71dSXin LI * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23*b6cee71dSXin LI * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24*b6cee71dSXin LI * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25*b6cee71dSXin LI * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26*b6cee71dSXin LI * SUCH DAMAGE. 27*b6cee71dSXin LI */ 28*b6cee71dSXin LI /* 29*b6cee71dSXin LI * apprentice - make one pass through /etc/magic, learning its secrets. 30*b6cee71dSXin LI */ 31*b6cee71dSXin LI 32*b6cee71dSXin LI #include "file.h" 33*b6cee71dSXin LI 34*b6cee71dSXin LI #ifndef lint 35*b6cee71dSXin LI FILE_RCSID("@(#)$File: apprentice.c,v 1.211 2014/06/03 19:01:34 christos Exp $") 36*b6cee71dSXin LI #endif /* lint */ 37*b6cee71dSXin LI 38*b6cee71dSXin LI #include "magic.h" 39*b6cee71dSXin LI #include <stdlib.h> 40*b6cee71dSXin LI #ifdef HAVE_UNISTD_H 41*b6cee71dSXin LI #include <unistd.h> 42*b6cee71dSXin LI #endif 43*b6cee71dSXin LI #ifdef HAVE_STDDEF_H 44*b6cee71dSXin LI #include <stddef.h> 45*b6cee71dSXin LI #endif 46*b6cee71dSXin LI #include <string.h> 47*b6cee71dSXin LI #include <assert.h> 48*b6cee71dSXin LI #include <ctype.h> 49*b6cee71dSXin LI #include <fcntl.h> 50*b6cee71dSXin LI #ifdef QUICK 51*b6cee71dSXin LI #include <sys/mman.h> 52*b6cee71dSXin LI #endif 53*b6cee71dSXin LI #include <dirent.h> 54*b6cee71dSXin LI #if defined(HAVE_LIMITS_H) 55*b6cee71dSXin LI #include <limits.h> 56*b6cee71dSXin LI #endif 57*b6cee71dSXin LI 58*b6cee71dSXin LI #ifndef SSIZE_MAX 59*b6cee71dSXin LI #define MAXMAGIC_SIZE ((ssize_t)0x7fffffff) 60*b6cee71dSXin LI #else 61*b6cee71dSXin LI #define MAXMAGIC_SIZE SSIZE_MAX 62*b6cee71dSXin LI #endif 63*b6cee71dSXin LI 64*b6cee71dSXin LI #define EATAB {while (isascii((unsigned char) *l) && \ 65*b6cee71dSXin LI isspace((unsigned char) *l)) ++l;} 66*b6cee71dSXin LI #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 67*b6cee71dSXin LI tolower((unsigned char) (l)) : (l)) 68*b6cee71dSXin LI /* 69*b6cee71dSXin LI * Work around a bug in headers on Digital Unix. 70*b6cee71dSXin LI * At least confirmed for: OSF1 V4.0 878 71*b6cee71dSXin LI */ 72*b6cee71dSXin LI #if defined(__osf__) && defined(__DECC) 73*b6cee71dSXin LI #ifdef MAP_FAILED 74*b6cee71dSXin LI #undef MAP_FAILED 75*b6cee71dSXin LI #endif 76*b6cee71dSXin LI #endif 77*b6cee71dSXin LI 78*b6cee71dSXin LI #ifndef MAP_FAILED 79*b6cee71dSXin LI #define MAP_FAILED (void *) -1 80*b6cee71dSXin LI #endif 81*b6cee71dSXin LI 82*b6cee71dSXin LI #ifndef MAP_FILE 83*b6cee71dSXin LI #define MAP_FILE 0 84*b6cee71dSXin LI #endif 85*b6cee71dSXin LI 86*b6cee71dSXin LI #define ALLOC_CHUNK (size_t)10 87*b6cee71dSXin LI #define ALLOC_INCR (size_t)200 88*b6cee71dSXin LI 89*b6cee71dSXin LI struct magic_entry { 90*b6cee71dSXin LI struct magic *mp; 91*b6cee71dSXin LI uint32_t cont_count; 92*b6cee71dSXin LI uint32_t max_count; 93*b6cee71dSXin LI }; 94*b6cee71dSXin LI 95*b6cee71dSXin LI struct magic_entry_set { 96*b6cee71dSXin LI struct magic_entry *me; 97*b6cee71dSXin LI uint32_t count; 98*b6cee71dSXin LI uint32_t max; 99*b6cee71dSXin LI }; 100*b6cee71dSXin LI 101*b6cee71dSXin LI struct magic_map { 102*b6cee71dSXin LI void *p; 103*b6cee71dSXin LI size_t len; 104*b6cee71dSXin LI struct magic *magic[MAGIC_SETS]; 105*b6cee71dSXin LI uint32_t nmagic[MAGIC_SETS]; 106*b6cee71dSXin LI }; 107*b6cee71dSXin LI 108*b6cee71dSXin LI int file_formats[FILE_NAMES_SIZE]; 109*b6cee71dSXin LI const size_t file_nformats = FILE_NAMES_SIZE; 110*b6cee71dSXin LI const char *file_names[FILE_NAMES_SIZE]; 111*b6cee71dSXin LI const size_t file_nnames = FILE_NAMES_SIZE; 112*b6cee71dSXin LI 113*b6cee71dSXin LI private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 114*b6cee71dSXin LI private int hextoint(int); 115*b6cee71dSXin LI private const char *getstr(struct magic_set *, struct magic *, const char *, 116*b6cee71dSXin LI int); 117*b6cee71dSXin LI private int parse(struct magic_set *, struct magic_entry *, const char *, 118*b6cee71dSXin LI size_t, int); 119*b6cee71dSXin LI private void eatsize(const char **); 120*b6cee71dSXin LI private int apprentice_1(struct magic_set *, const char *, int); 121*b6cee71dSXin LI private size_t apprentice_magic_strength(const struct magic *); 122*b6cee71dSXin LI private int apprentice_sort(const void *, const void *); 123*b6cee71dSXin LI private void apprentice_list(struct mlist *, int ); 124*b6cee71dSXin LI private struct magic_map *apprentice_load(struct magic_set *, 125*b6cee71dSXin LI const char *, int); 126*b6cee71dSXin LI private struct mlist *mlist_alloc(void); 127*b6cee71dSXin LI private void mlist_free(struct mlist *); 128*b6cee71dSXin LI private void byteswap(struct magic *, uint32_t); 129*b6cee71dSXin LI private void bs1(struct magic *); 130*b6cee71dSXin LI private uint16_t swap2(uint16_t); 131*b6cee71dSXin LI private uint32_t swap4(uint32_t); 132*b6cee71dSXin LI private uint64_t swap8(uint64_t); 133*b6cee71dSXin LI private char *mkdbname(struct magic_set *, const char *, int); 134*b6cee71dSXin LI private struct magic_map *apprentice_map(struct magic_set *, const char *); 135*b6cee71dSXin LI private void apprentice_unmap(struct magic_map *); 136*b6cee71dSXin LI private int apprentice_compile(struct magic_set *, struct magic_map *, 137*b6cee71dSXin LI const char *); 138*b6cee71dSXin LI private int check_format_type(const char *, int); 139*b6cee71dSXin LI private int check_format(struct magic_set *, struct magic *); 140*b6cee71dSXin LI private int get_op(char); 141*b6cee71dSXin LI private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 142*b6cee71dSXin LI private int parse_strength(struct magic_set *, struct magic_entry *, const char *); 143*b6cee71dSXin LI private int parse_apple(struct magic_set *, struct magic_entry *, const char *); 144*b6cee71dSXin LI 145*b6cee71dSXin LI 146*b6cee71dSXin LI private size_t magicsize = sizeof(struct magic); 147*b6cee71dSXin LI 148*b6cee71dSXin LI private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 149*b6cee71dSXin LI 150*b6cee71dSXin LI private struct { 151*b6cee71dSXin LI const char *name; 152*b6cee71dSXin LI size_t len; 153*b6cee71dSXin LI int (*fun)(struct magic_set *, struct magic_entry *, const char *); 154*b6cee71dSXin LI } bang[] = { 155*b6cee71dSXin LI #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 156*b6cee71dSXin LI DECLARE_FIELD(mime), 157*b6cee71dSXin LI DECLARE_FIELD(apple), 158*b6cee71dSXin LI DECLARE_FIELD(strength), 159*b6cee71dSXin LI #undef DECLARE_FIELD 160*b6cee71dSXin LI { NULL, 0, NULL } 161*b6cee71dSXin LI }; 162*b6cee71dSXin LI 163*b6cee71dSXin LI #ifdef COMPILE_ONLY 164*b6cee71dSXin LI 165*b6cee71dSXin LI int main(int, char *[]); 166*b6cee71dSXin LI 167*b6cee71dSXin LI int 168*b6cee71dSXin LI main(int argc, char *argv[]) 169*b6cee71dSXin LI { 170*b6cee71dSXin LI int ret; 171*b6cee71dSXin LI struct magic_set *ms; 172*b6cee71dSXin LI char *progname; 173*b6cee71dSXin LI 174*b6cee71dSXin LI if ((progname = strrchr(argv[0], '/')) != NULL) 175*b6cee71dSXin LI progname++; 176*b6cee71dSXin LI else 177*b6cee71dSXin LI progname = argv[0]; 178*b6cee71dSXin LI 179*b6cee71dSXin LI if (argc != 2) { 180*b6cee71dSXin LI (void)fprintf(stderr, "Usage: %s file\n", progname); 181*b6cee71dSXin LI return 1; 182*b6cee71dSXin LI } 183*b6cee71dSXin LI 184*b6cee71dSXin LI if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 185*b6cee71dSXin LI (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 186*b6cee71dSXin LI return 1; 187*b6cee71dSXin LI } 188*b6cee71dSXin LI ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 189*b6cee71dSXin LI if (ret == 1) 190*b6cee71dSXin LI (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 191*b6cee71dSXin LI magic_close(ms); 192*b6cee71dSXin LI return ret; 193*b6cee71dSXin LI } 194*b6cee71dSXin LI #endif /* COMPILE_ONLY */ 195*b6cee71dSXin LI 196*b6cee71dSXin LI struct type_tbl_s { 197*b6cee71dSXin LI const char name[16]; 198*b6cee71dSXin LI const size_t len; 199*b6cee71dSXin LI const int type; 200*b6cee71dSXin LI const int format; 201*b6cee71dSXin LI }; 202*b6cee71dSXin LI 203*b6cee71dSXin LI /* 204*b6cee71dSXin LI * XXX - the actual Single UNIX Specification says that "long" means "long", 205*b6cee71dSXin LI * as in the C data type, but we treat it as meaning "4-byte integer". 206*b6cee71dSXin LI * Given that the OS X version of file 5.04 did the same, I guess that passes 207*b6cee71dSXin LI * the actual test; having "long" be dependent on how big a "long" is on 208*b6cee71dSXin LI * the machine running "file" is silly. 209*b6cee71dSXin LI */ 210*b6cee71dSXin LI static const struct type_tbl_s type_tbl[] = { 211*b6cee71dSXin LI # define XX(s) s, (sizeof(s) - 1) 212*b6cee71dSXin LI # define XX_NULL "", 0 213*b6cee71dSXin LI { XX("invalid"), FILE_INVALID, FILE_FMT_NONE }, 214*b6cee71dSXin LI { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 215*b6cee71dSXin LI { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 216*b6cee71dSXin LI { XX("default"), FILE_DEFAULT, FILE_FMT_NONE }, 217*b6cee71dSXin LI { XX("long"), FILE_LONG, FILE_FMT_NUM }, 218*b6cee71dSXin LI { XX("string"), FILE_STRING, FILE_FMT_STR }, 219*b6cee71dSXin LI { XX("date"), FILE_DATE, FILE_FMT_STR }, 220*b6cee71dSXin LI { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 221*b6cee71dSXin LI { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 222*b6cee71dSXin LI { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 223*b6cee71dSXin LI { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 224*b6cee71dSXin LI { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 225*b6cee71dSXin LI { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 226*b6cee71dSXin LI { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 227*b6cee71dSXin LI { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 228*b6cee71dSXin LI { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 229*b6cee71dSXin LI { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 230*b6cee71dSXin LI { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 231*b6cee71dSXin LI { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 232*b6cee71dSXin LI { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 233*b6cee71dSXin LI { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 234*b6cee71dSXin LI { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 235*b6cee71dSXin LI { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 236*b6cee71dSXin LI { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 237*b6cee71dSXin LI { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 238*b6cee71dSXin LI { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 239*b6cee71dSXin LI { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 240*b6cee71dSXin LI { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 241*b6cee71dSXin LI { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 242*b6cee71dSXin LI { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 243*b6cee71dSXin LI { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 244*b6cee71dSXin LI { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 245*b6cee71dSXin LI { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 246*b6cee71dSXin LI { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 247*b6cee71dSXin LI { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 248*b6cee71dSXin LI { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 249*b6cee71dSXin LI { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 250*b6cee71dSXin LI { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 251*b6cee71dSXin LI { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 252*b6cee71dSXin LI { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 253*b6cee71dSXin LI { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 254*b6cee71dSXin LI { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 255*b6cee71dSXin LI { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 256*b6cee71dSXin LI { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 257*b6cee71dSXin LI { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 258*b6cee71dSXin LI { XX("name"), FILE_NAME, FILE_FMT_NONE }, 259*b6cee71dSXin LI { XX("use"), FILE_USE, FILE_FMT_NONE }, 260*b6cee71dSXin LI { XX("clear"), FILE_CLEAR, FILE_FMT_NONE }, 261*b6cee71dSXin LI { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 262*b6cee71dSXin LI }; 263*b6cee71dSXin LI 264*b6cee71dSXin LI /* 265*b6cee71dSXin LI * These are not types, and cannot be preceded by "u" to make them 266*b6cee71dSXin LI * unsigned. 267*b6cee71dSXin LI */ 268*b6cee71dSXin LI static const struct type_tbl_s special_tbl[] = { 269*b6cee71dSXin LI { XX("name"), FILE_NAME, FILE_FMT_STR }, 270*b6cee71dSXin LI { XX("use"), FILE_USE, FILE_FMT_STR }, 271*b6cee71dSXin LI { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 272*b6cee71dSXin LI }; 273*b6cee71dSXin LI # undef XX 274*b6cee71dSXin LI # undef XX_NULL 275*b6cee71dSXin LI 276*b6cee71dSXin LI private int 277*b6cee71dSXin LI get_type(const struct type_tbl_s *tbl, const char *l, const char **t) 278*b6cee71dSXin LI { 279*b6cee71dSXin LI const struct type_tbl_s *p; 280*b6cee71dSXin LI 281*b6cee71dSXin LI for (p = tbl; p->len; p++) { 282*b6cee71dSXin LI if (strncmp(l, p->name, p->len) == 0) { 283*b6cee71dSXin LI if (t) 284*b6cee71dSXin LI *t = l + p->len; 285*b6cee71dSXin LI break; 286*b6cee71dSXin LI } 287*b6cee71dSXin LI } 288*b6cee71dSXin LI return p->type; 289*b6cee71dSXin LI } 290*b6cee71dSXin LI 291*b6cee71dSXin LI private int 292*b6cee71dSXin LI get_standard_integer_type(const char *l, const char **t) 293*b6cee71dSXin LI { 294*b6cee71dSXin LI int type; 295*b6cee71dSXin LI 296*b6cee71dSXin LI if (isalpha((unsigned char)l[1])) { 297*b6cee71dSXin LI switch (l[1]) { 298*b6cee71dSXin LI case 'C': 299*b6cee71dSXin LI /* "dC" and "uC" */ 300*b6cee71dSXin LI type = FILE_BYTE; 301*b6cee71dSXin LI break; 302*b6cee71dSXin LI case 'S': 303*b6cee71dSXin LI /* "dS" and "uS" */ 304*b6cee71dSXin LI type = FILE_SHORT; 305*b6cee71dSXin LI break; 306*b6cee71dSXin LI case 'I': 307*b6cee71dSXin LI case 'L': 308*b6cee71dSXin LI /* 309*b6cee71dSXin LI * "dI", "dL", "uI", and "uL". 310*b6cee71dSXin LI * 311*b6cee71dSXin LI * XXX - the actual Single UNIX Specification says 312*b6cee71dSXin LI * that "L" means "long", as in the C data type, 313*b6cee71dSXin LI * but we treat it as meaning "4-byte integer". 314*b6cee71dSXin LI * Given that the OS X version of file 5.04 did 315*b6cee71dSXin LI * the same, I guess that passes the actual SUS 316*b6cee71dSXin LI * validation suite; having "dL" be dependent on 317*b6cee71dSXin LI * how big a "long" is on the machine running 318*b6cee71dSXin LI * "file" is silly. 319*b6cee71dSXin LI */ 320*b6cee71dSXin LI type = FILE_LONG; 321*b6cee71dSXin LI break; 322*b6cee71dSXin LI case 'Q': 323*b6cee71dSXin LI /* "dQ" and "uQ" */ 324*b6cee71dSXin LI type = FILE_QUAD; 325*b6cee71dSXin LI break; 326*b6cee71dSXin LI default: 327*b6cee71dSXin LI /* "d{anything else}", "u{anything else}" */ 328*b6cee71dSXin LI return FILE_INVALID; 329*b6cee71dSXin LI } 330*b6cee71dSXin LI l += 2; 331*b6cee71dSXin LI } else if (isdigit((unsigned char)l[1])) { 332*b6cee71dSXin LI /* 333*b6cee71dSXin LI * "d{num}" and "u{num}"; we only support {num} values 334*b6cee71dSXin LI * of 1, 2, 4, and 8 - the Single UNIX Specification 335*b6cee71dSXin LI * doesn't say anything about whether arbitrary 336*b6cee71dSXin LI * values should be supported, but both the Solaris 10 337*b6cee71dSXin LI * and OS X Mountain Lion versions of file passed the 338*b6cee71dSXin LI * Single UNIX Specification validation suite, and 339*b6cee71dSXin LI * neither of them support values bigger than 8 or 340*b6cee71dSXin LI * non-power-of-2 values. 341*b6cee71dSXin LI */ 342*b6cee71dSXin LI if (isdigit((unsigned char)l[2])) { 343*b6cee71dSXin LI /* Multi-digit, so > 9 */ 344*b6cee71dSXin LI return FILE_INVALID; 345*b6cee71dSXin LI } 346*b6cee71dSXin LI switch (l[1]) { 347*b6cee71dSXin LI case '1': 348*b6cee71dSXin LI type = FILE_BYTE; 349*b6cee71dSXin LI break; 350*b6cee71dSXin LI case '2': 351*b6cee71dSXin LI type = FILE_SHORT; 352*b6cee71dSXin LI break; 353*b6cee71dSXin LI case '4': 354*b6cee71dSXin LI type = FILE_LONG; 355*b6cee71dSXin LI break; 356*b6cee71dSXin LI case '8': 357*b6cee71dSXin LI type = FILE_QUAD; 358*b6cee71dSXin LI break; 359*b6cee71dSXin LI default: 360*b6cee71dSXin LI /* XXX - what about 3, 5, 6, or 7? */ 361*b6cee71dSXin LI return FILE_INVALID; 362*b6cee71dSXin LI } 363*b6cee71dSXin LI l += 2; 364*b6cee71dSXin LI } else { 365*b6cee71dSXin LI /* 366*b6cee71dSXin LI * "d" or "u" by itself. 367*b6cee71dSXin LI */ 368*b6cee71dSXin LI type = FILE_LONG; 369*b6cee71dSXin LI ++l; 370*b6cee71dSXin LI } 371*b6cee71dSXin LI if (t) 372*b6cee71dSXin LI *t = l; 373*b6cee71dSXin LI return type; 374*b6cee71dSXin LI } 375*b6cee71dSXin LI 376*b6cee71dSXin LI private void 377*b6cee71dSXin LI init_file_tables(void) 378*b6cee71dSXin LI { 379*b6cee71dSXin LI static int done = 0; 380*b6cee71dSXin LI const struct type_tbl_s *p; 381*b6cee71dSXin LI 382*b6cee71dSXin LI if (done) 383*b6cee71dSXin LI return; 384*b6cee71dSXin LI done++; 385*b6cee71dSXin LI 386*b6cee71dSXin LI for (p = type_tbl; p->len; p++) { 387*b6cee71dSXin LI assert(p->type < FILE_NAMES_SIZE); 388*b6cee71dSXin LI file_names[p->type] = p->name; 389*b6cee71dSXin LI file_formats[p->type] = p->format; 390*b6cee71dSXin LI } 391*b6cee71dSXin LI assert(p - type_tbl == FILE_NAMES_SIZE); 392*b6cee71dSXin LI } 393*b6cee71dSXin LI 394*b6cee71dSXin LI private int 395*b6cee71dSXin LI add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) 396*b6cee71dSXin LI { 397*b6cee71dSXin LI struct mlist *ml; 398*b6cee71dSXin LI 399*b6cee71dSXin LI if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 400*b6cee71dSXin LI return -1; 401*b6cee71dSXin LI 402*b6cee71dSXin LI ml->map = idx == 0 ? map : NULL; 403*b6cee71dSXin LI ml->magic = map->magic[idx]; 404*b6cee71dSXin LI ml->nmagic = map->nmagic[idx]; 405*b6cee71dSXin LI 406*b6cee71dSXin LI mlp->prev->next = ml; 407*b6cee71dSXin LI ml->prev = mlp->prev; 408*b6cee71dSXin LI ml->next = mlp; 409*b6cee71dSXin LI mlp->prev = ml; 410*b6cee71dSXin LI return 0; 411*b6cee71dSXin LI } 412*b6cee71dSXin LI 413*b6cee71dSXin LI /* 414*b6cee71dSXin LI * Handle one file or directory. 415*b6cee71dSXin LI */ 416*b6cee71dSXin LI private int 417*b6cee71dSXin LI apprentice_1(struct magic_set *ms, const char *fn, int action) 418*b6cee71dSXin LI { 419*b6cee71dSXin LI #ifndef COMPILE_ONLY 420*b6cee71dSXin LI struct mlist *ml; 421*b6cee71dSXin LI #endif /* COMPILE_ONLY */ 422*b6cee71dSXin LI struct magic_map *map; 423*b6cee71dSXin LI #ifndef COMPILE_ONLY 424*b6cee71dSXin LI size_t i; 425*b6cee71dSXin LI #endif /* COMPILE_ONLY */ 426*b6cee71dSXin LI 427*b6cee71dSXin LI if (magicsize != FILE_MAGICSIZE) { 428*b6cee71dSXin LI file_error(ms, 0, "magic element size %lu != %lu", 429*b6cee71dSXin LI (unsigned long)sizeof(*map->magic[0]), 430*b6cee71dSXin LI (unsigned long)FILE_MAGICSIZE); 431*b6cee71dSXin LI return -1; 432*b6cee71dSXin LI } 433*b6cee71dSXin LI 434*b6cee71dSXin LI if (action == FILE_COMPILE) { 435*b6cee71dSXin LI map = apprentice_load(ms, fn, action); 436*b6cee71dSXin LI if (map == NULL) 437*b6cee71dSXin LI return -1; 438*b6cee71dSXin LI return apprentice_compile(ms, map, fn); 439*b6cee71dSXin LI } 440*b6cee71dSXin LI 441*b6cee71dSXin LI #ifndef COMPILE_ONLY 442*b6cee71dSXin LI map = apprentice_map(ms, fn); 443*b6cee71dSXin LI if (map == NULL) { 444*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 445*b6cee71dSXin LI file_magwarn(ms, "using regular magic file `%s'", fn); 446*b6cee71dSXin LI map = apprentice_load(ms, fn, action); 447*b6cee71dSXin LI if (map == NULL) 448*b6cee71dSXin LI return -1; 449*b6cee71dSXin LI } 450*b6cee71dSXin LI 451*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) { 452*b6cee71dSXin LI if (add_mlist(ms->mlist[i], map, i) == -1) { 453*b6cee71dSXin LI file_oomem(ms, sizeof(*ml)); 454*b6cee71dSXin LI apprentice_unmap(map); 455*b6cee71dSXin LI return -1; 456*b6cee71dSXin LI } 457*b6cee71dSXin LI } 458*b6cee71dSXin LI 459*b6cee71dSXin LI if (action == FILE_LIST) { 460*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) { 461*b6cee71dSXin LI printf("Set %zu:\nBinary patterns:\n", i); 462*b6cee71dSXin LI apprentice_list(ms->mlist[i], BINTEST); 463*b6cee71dSXin LI printf("Text patterns:\n"); 464*b6cee71dSXin LI apprentice_list(ms->mlist[i], TEXTTEST); 465*b6cee71dSXin LI } 466*b6cee71dSXin LI } 467*b6cee71dSXin LI #endif /* COMPILE_ONLY */ 468*b6cee71dSXin LI 469*b6cee71dSXin LI return 0; 470*b6cee71dSXin LI } 471*b6cee71dSXin LI 472*b6cee71dSXin LI protected void 473*b6cee71dSXin LI file_ms_free(struct magic_set *ms) 474*b6cee71dSXin LI { 475*b6cee71dSXin LI size_t i; 476*b6cee71dSXin LI if (ms == NULL) 477*b6cee71dSXin LI return; 478*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) 479*b6cee71dSXin LI mlist_free(ms->mlist[i]); 480*b6cee71dSXin LI free(ms->o.pbuf); 481*b6cee71dSXin LI free(ms->o.buf); 482*b6cee71dSXin LI free(ms->c.li); 483*b6cee71dSXin LI free(ms); 484*b6cee71dSXin LI } 485*b6cee71dSXin LI 486*b6cee71dSXin LI protected struct magic_set * 487*b6cee71dSXin LI file_ms_alloc(int flags) 488*b6cee71dSXin LI { 489*b6cee71dSXin LI struct magic_set *ms; 490*b6cee71dSXin LI size_t i, len; 491*b6cee71dSXin LI 492*b6cee71dSXin LI if ((ms = CAST(struct magic_set *, calloc((size_t)1, 493*b6cee71dSXin LI sizeof(struct magic_set)))) == NULL) 494*b6cee71dSXin LI return NULL; 495*b6cee71dSXin LI 496*b6cee71dSXin LI if (magic_setflags(ms, flags) == -1) { 497*b6cee71dSXin LI errno = EINVAL; 498*b6cee71dSXin LI goto free; 499*b6cee71dSXin LI } 500*b6cee71dSXin LI 501*b6cee71dSXin LI ms->o.buf = ms->o.pbuf = NULL; 502*b6cee71dSXin LI len = (ms->c.len = 10) * sizeof(*ms->c.li); 503*b6cee71dSXin LI 504*b6cee71dSXin LI if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 505*b6cee71dSXin LI goto free; 506*b6cee71dSXin LI 507*b6cee71dSXin LI ms->event_flags = 0; 508*b6cee71dSXin LI ms->error = -1; 509*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) 510*b6cee71dSXin LI ms->mlist[i] = NULL; 511*b6cee71dSXin LI ms->file = "unknown"; 512*b6cee71dSXin LI ms->line = 0; 513*b6cee71dSXin LI return ms; 514*b6cee71dSXin LI free: 515*b6cee71dSXin LI free(ms); 516*b6cee71dSXin LI return NULL; 517*b6cee71dSXin LI } 518*b6cee71dSXin LI 519*b6cee71dSXin LI private void 520*b6cee71dSXin LI apprentice_unmap(struct magic_map *map) 521*b6cee71dSXin LI { 522*b6cee71dSXin LI if (map == NULL) 523*b6cee71dSXin LI return; 524*b6cee71dSXin LI if (map->p != NULL) { 525*b6cee71dSXin LI #ifdef QUICK 526*b6cee71dSXin LI if (map->len) 527*b6cee71dSXin LI (void)munmap(map->p, map->len); 528*b6cee71dSXin LI else 529*b6cee71dSXin LI #endif 530*b6cee71dSXin LI free(map->p); 531*b6cee71dSXin LI } else { 532*b6cee71dSXin LI uint32_t j; 533*b6cee71dSXin LI for (j = 0; j < MAGIC_SETS; j++) 534*b6cee71dSXin LI free(map->magic[j]); 535*b6cee71dSXin LI } 536*b6cee71dSXin LI free(map); 537*b6cee71dSXin LI } 538*b6cee71dSXin LI 539*b6cee71dSXin LI private struct mlist * 540*b6cee71dSXin LI mlist_alloc(void) 541*b6cee71dSXin LI { 542*b6cee71dSXin LI struct mlist *mlist; 543*b6cee71dSXin LI if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) { 544*b6cee71dSXin LI return NULL; 545*b6cee71dSXin LI } 546*b6cee71dSXin LI mlist->next = mlist->prev = mlist; 547*b6cee71dSXin LI return mlist; 548*b6cee71dSXin LI } 549*b6cee71dSXin LI 550*b6cee71dSXin LI private void 551*b6cee71dSXin LI mlist_free(struct mlist *mlist) 552*b6cee71dSXin LI { 553*b6cee71dSXin LI struct mlist *ml; 554*b6cee71dSXin LI 555*b6cee71dSXin LI if (mlist == NULL) 556*b6cee71dSXin LI return; 557*b6cee71dSXin LI 558*b6cee71dSXin LI for (ml = mlist->next; ml != mlist;) { 559*b6cee71dSXin LI struct mlist *next = ml->next; 560*b6cee71dSXin LI if (ml->map) 561*b6cee71dSXin LI apprentice_unmap(ml->map); 562*b6cee71dSXin LI free(ml); 563*b6cee71dSXin LI ml = next; 564*b6cee71dSXin LI } 565*b6cee71dSXin LI free(ml); 566*b6cee71dSXin LI } 567*b6cee71dSXin LI 568*b6cee71dSXin LI /* const char *fn: list of magic files and directories */ 569*b6cee71dSXin LI protected int 570*b6cee71dSXin LI file_apprentice(struct magic_set *ms, const char *fn, int action) 571*b6cee71dSXin LI { 572*b6cee71dSXin LI char *p, *mfn; 573*b6cee71dSXin LI int file_err, errs = -1; 574*b6cee71dSXin LI size_t i; 575*b6cee71dSXin LI 576*b6cee71dSXin LI if (ms->mlist[0] != NULL) 577*b6cee71dSXin LI file_reset(ms); 578*b6cee71dSXin LI 579*b6cee71dSXin LI if ((fn = magic_getpath(fn, action)) == NULL) 580*b6cee71dSXin LI return -1; 581*b6cee71dSXin LI 582*b6cee71dSXin LI init_file_tables(); 583*b6cee71dSXin LI 584*b6cee71dSXin LI if ((mfn = strdup(fn)) == NULL) { 585*b6cee71dSXin LI file_oomem(ms, strlen(fn)); 586*b6cee71dSXin LI return -1; 587*b6cee71dSXin LI } 588*b6cee71dSXin LI 589*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) { 590*b6cee71dSXin LI mlist_free(ms->mlist[i]); 591*b6cee71dSXin LI if ((ms->mlist[i] = mlist_alloc()) == NULL) { 592*b6cee71dSXin LI file_oomem(ms, sizeof(*ms->mlist[i])); 593*b6cee71dSXin LI if (i != 0) { 594*b6cee71dSXin LI --i; 595*b6cee71dSXin LI do 596*b6cee71dSXin LI mlist_free(ms->mlist[i]); 597*b6cee71dSXin LI while (i != 0); 598*b6cee71dSXin LI } 599*b6cee71dSXin LI free(mfn); 600*b6cee71dSXin LI return -1; 601*b6cee71dSXin LI } 602*b6cee71dSXin LI } 603*b6cee71dSXin LI fn = mfn; 604*b6cee71dSXin LI 605*b6cee71dSXin LI while (fn) { 606*b6cee71dSXin LI p = strchr(fn, PATHSEP); 607*b6cee71dSXin LI if (p) 608*b6cee71dSXin LI *p++ = '\0'; 609*b6cee71dSXin LI if (*fn == '\0') 610*b6cee71dSXin LI break; 611*b6cee71dSXin LI file_err = apprentice_1(ms, fn, action); 612*b6cee71dSXin LI errs = MAX(errs, file_err); 613*b6cee71dSXin LI fn = p; 614*b6cee71dSXin LI } 615*b6cee71dSXin LI 616*b6cee71dSXin LI free(mfn); 617*b6cee71dSXin LI 618*b6cee71dSXin LI if (errs == -1) { 619*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) { 620*b6cee71dSXin LI mlist_free(ms->mlist[i]); 621*b6cee71dSXin LI ms->mlist[i] = NULL; 622*b6cee71dSXin LI } 623*b6cee71dSXin LI file_error(ms, 0, "could not find any valid magic files!"); 624*b6cee71dSXin LI return -1; 625*b6cee71dSXin LI } 626*b6cee71dSXin LI 627*b6cee71dSXin LI #if 0 628*b6cee71dSXin LI /* 629*b6cee71dSXin LI * Always leave the database loaded 630*b6cee71dSXin LI */ 631*b6cee71dSXin LI if (action == FILE_LOAD) 632*b6cee71dSXin LI return 0; 633*b6cee71dSXin LI 634*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) { 635*b6cee71dSXin LI mlist_free(ms->mlist[i]); 636*b6cee71dSXin LI ms->mlist[i] = NULL; 637*b6cee71dSXin LI } 638*b6cee71dSXin LI #endif 639*b6cee71dSXin LI 640*b6cee71dSXin LI switch (action) { 641*b6cee71dSXin LI case FILE_LOAD: 642*b6cee71dSXin LI case FILE_COMPILE: 643*b6cee71dSXin LI case FILE_CHECK: 644*b6cee71dSXin LI case FILE_LIST: 645*b6cee71dSXin LI return 0; 646*b6cee71dSXin LI default: 647*b6cee71dSXin LI file_error(ms, 0, "Invalid action %d", action); 648*b6cee71dSXin LI return -1; 649*b6cee71dSXin LI } 650*b6cee71dSXin LI } 651*b6cee71dSXin LI 652*b6cee71dSXin LI /* 653*b6cee71dSXin LI * Compute the real length of a magic expression, for the purposes 654*b6cee71dSXin LI * of determining how "strong" a magic expression is (approximating 655*b6cee71dSXin LI * how specific its matches are): 656*b6cee71dSXin LI * - magic characters count 0 unless escaped. 657*b6cee71dSXin LI * - [] expressions count 1 658*b6cee71dSXin LI * - {} expressions count 0 659*b6cee71dSXin LI * - regular characters or escaped magic characters count 1 660*b6cee71dSXin LI * - 0 length expressions count as one 661*b6cee71dSXin LI */ 662*b6cee71dSXin LI private size_t 663*b6cee71dSXin LI nonmagic(const char *str) 664*b6cee71dSXin LI { 665*b6cee71dSXin LI const char *p; 666*b6cee71dSXin LI size_t rv = 0; 667*b6cee71dSXin LI 668*b6cee71dSXin LI for (p = str; *p; p++) 669*b6cee71dSXin LI switch (*p) { 670*b6cee71dSXin LI case '\\': /* Escaped anything counts 1 */ 671*b6cee71dSXin LI if (!*++p) 672*b6cee71dSXin LI p--; 673*b6cee71dSXin LI rv++; 674*b6cee71dSXin LI continue; 675*b6cee71dSXin LI case '?': /* Magic characters count 0 */ 676*b6cee71dSXin LI case '*': 677*b6cee71dSXin LI case '.': 678*b6cee71dSXin LI case '+': 679*b6cee71dSXin LI case '^': 680*b6cee71dSXin LI case '$': 681*b6cee71dSXin LI continue; 682*b6cee71dSXin LI case '[': /* Bracketed expressions count 1 the ']' */ 683*b6cee71dSXin LI while (*p && *p != ']') 684*b6cee71dSXin LI p++; 685*b6cee71dSXin LI p--; 686*b6cee71dSXin LI continue; 687*b6cee71dSXin LI case '{': /* Braced expressions count 0 */ 688*b6cee71dSXin LI while (*p && *p != '}') 689*b6cee71dSXin LI p++; 690*b6cee71dSXin LI if (!*p) 691*b6cee71dSXin LI p--; 692*b6cee71dSXin LI continue; 693*b6cee71dSXin LI default: /* Anything else counts 1 */ 694*b6cee71dSXin LI rv++; 695*b6cee71dSXin LI continue; 696*b6cee71dSXin LI } 697*b6cee71dSXin LI 698*b6cee71dSXin LI return rv == 0 ? 1 : rv; /* Return at least 1 */ 699*b6cee71dSXin LI } 700*b6cee71dSXin LI 701*b6cee71dSXin LI /* 702*b6cee71dSXin LI * Get weight of this magic entry, for sorting purposes. 703*b6cee71dSXin LI */ 704*b6cee71dSXin LI private size_t 705*b6cee71dSXin LI apprentice_magic_strength(const struct magic *m) 706*b6cee71dSXin LI { 707*b6cee71dSXin LI #define MULT 10 708*b6cee71dSXin LI size_t v, val = 2 * MULT; /* baseline strength */ 709*b6cee71dSXin LI 710*b6cee71dSXin LI switch (m->type) { 711*b6cee71dSXin LI case FILE_DEFAULT: /* make sure this sorts last */ 712*b6cee71dSXin LI if (m->factor_op != FILE_FACTOR_OP_NONE) 713*b6cee71dSXin LI abort(); 714*b6cee71dSXin LI return 0; 715*b6cee71dSXin LI 716*b6cee71dSXin LI case FILE_BYTE: 717*b6cee71dSXin LI val += 1 * MULT; 718*b6cee71dSXin LI break; 719*b6cee71dSXin LI 720*b6cee71dSXin LI case FILE_SHORT: 721*b6cee71dSXin LI case FILE_LESHORT: 722*b6cee71dSXin LI case FILE_BESHORT: 723*b6cee71dSXin LI val += 2 * MULT; 724*b6cee71dSXin LI break; 725*b6cee71dSXin LI 726*b6cee71dSXin LI case FILE_LONG: 727*b6cee71dSXin LI case FILE_LELONG: 728*b6cee71dSXin LI case FILE_BELONG: 729*b6cee71dSXin LI case FILE_MELONG: 730*b6cee71dSXin LI val += 4 * MULT; 731*b6cee71dSXin LI break; 732*b6cee71dSXin LI 733*b6cee71dSXin LI case FILE_PSTRING: 734*b6cee71dSXin LI case FILE_STRING: 735*b6cee71dSXin LI val += m->vallen * MULT; 736*b6cee71dSXin LI break; 737*b6cee71dSXin LI 738*b6cee71dSXin LI case FILE_BESTRING16: 739*b6cee71dSXin LI case FILE_LESTRING16: 740*b6cee71dSXin LI val += m->vallen * MULT / 2; 741*b6cee71dSXin LI break; 742*b6cee71dSXin LI 743*b6cee71dSXin LI case FILE_SEARCH: 744*b6cee71dSXin LI val += m->vallen * MAX(MULT / m->vallen, 1); 745*b6cee71dSXin LI break; 746*b6cee71dSXin LI 747*b6cee71dSXin LI case FILE_REGEX: 748*b6cee71dSXin LI v = nonmagic(m->value.s); 749*b6cee71dSXin LI val += v * MAX(MULT / v, 1); 750*b6cee71dSXin LI break; 751*b6cee71dSXin LI 752*b6cee71dSXin LI case FILE_DATE: 753*b6cee71dSXin LI case FILE_LEDATE: 754*b6cee71dSXin LI case FILE_BEDATE: 755*b6cee71dSXin LI case FILE_MEDATE: 756*b6cee71dSXin LI case FILE_LDATE: 757*b6cee71dSXin LI case FILE_LELDATE: 758*b6cee71dSXin LI case FILE_BELDATE: 759*b6cee71dSXin LI case FILE_MELDATE: 760*b6cee71dSXin LI case FILE_FLOAT: 761*b6cee71dSXin LI case FILE_BEFLOAT: 762*b6cee71dSXin LI case FILE_LEFLOAT: 763*b6cee71dSXin LI val += 4 * MULT; 764*b6cee71dSXin LI break; 765*b6cee71dSXin LI 766*b6cee71dSXin LI case FILE_QUAD: 767*b6cee71dSXin LI case FILE_BEQUAD: 768*b6cee71dSXin LI case FILE_LEQUAD: 769*b6cee71dSXin LI case FILE_QDATE: 770*b6cee71dSXin LI case FILE_LEQDATE: 771*b6cee71dSXin LI case FILE_BEQDATE: 772*b6cee71dSXin LI case FILE_QLDATE: 773*b6cee71dSXin LI case FILE_LEQLDATE: 774*b6cee71dSXin LI case FILE_BEQLDATE: 775*b6cee71dSXin LI case FILE_QWDATE: 776*b6cee71dSXin LI case FILE_LEQWDATE: 777*b6cee71dSXin LI case FILE_BEQWDATE: 778*b6cee71dSXin LI case FILE_DOUBLE: 779*b6cee71dSXin LI case FILE_BEDOUBLE: 780*b6cee71dSXin LI case FILE_LEDOUBLE: 781*b6cee71dSXin LI val += 8 * MULT; 782*b6cee71dSXin LI break; 783*b6cee71dSXin LI 784*b6cee71dSXin LI case FILE_INDIRECT: 785*b6cee71dSXin LI case FILE_NAME: 786*b6cee71dSXin LI case FILE_USE: 787*b6cee71dSXin LI break; 788*b6cee71dSXin LI 789*b6cee71dSXin LI default: 790*b6cee71dSXin LI (void)fprintf(stderr, "Bad type %d\n", m->type); 791*b6cee71dSXin LI abort(); 792*b6cee71dSXin LI } 793*b6cee71dSXin LI 794*b6cee71dSXin LI switch (m->reln) { 795*b6cee71dSXin LI case 'x': /* matches anything penalize */ 796*b6cee71dSXin LI case '!': /* matches almost anything penalize */ 797*b6cee71dSXin LI val = 0; 798*b6cee71dSXin LI break; 799*b6cee71dSXin LI 800*b6cee71dSXin LI case '=': /* Exact match, prefer */ 801*b6cee71dSXin LI val += MULT; 802*b6cee71dSXin LI break; 803*b6cee71dSXin LI 804*b6cee71dSXin LI case '>': 805*b6cee71dSXin LI case '<': /* comparison match reduce strength */ 806*b6cee71dSXin LI val -= 2 * MULT; 807*b6cee71dSXin LI break; 808*b6cee71dSXin LI 809*b6cee71dSXin LI case '^': 810*b6cee71dSXin LI case '&': /* masking bits, we could count them too */ 811*b6cee71dSXin LI val -= MULT; 812*b6cee71dSXin LI break; 813*b6cee71dSXin LI 814*b6cee71dSXin LI default: 815*b6cee71dSXin LI (void)fprintf(stderr, "Bad relation %c\n", m->reln); 816*b6cee71dSXin LI abort(); 817*b6cee71dSXin LI } 818*b6cee71dSXin LI 819*b6cee71dSXin LI if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 820*b6cee71dSXin LI val = 1; 821*b6cee71dSXin LI 822*b6cee71dSXin LI switch (m->factor_op) { 823*b6cee71dSXin LI case FILE_FACTOR_OP_NONE: 824*b6cee71dSXin LI break; 825*b6cee71dSXin LI case FILE_FACTOR_OP_PLUS: 826*b6cee71dSXin LI val += m->factor; 827*b6cee71dSXin LI break; 828*b6cee71dSXin LI case FILE_FACTOR_OP_MINUS: 829*b6cee71dSXin LI val -= m->factor; 830*b6cee71dSXin LI break; 831*b6cee71dSXin LI case FILE_FACTOR_OP_TIMES: 832*b6cee71dSXin LI val *= m->factor; 833*b6cee71dSXin LI break; 834*b6cee71dSXin LI case FILE_FACTOR_OP_DIV: 835*b6cee71dSXin LI val /= m->factor; 836*b6cee71dSXin LI break; 837*b6cee71dSXin LI default: 838*b6cee71dSXin LI abort(); 839*b6cee71dSXin LI } 840*b6cee71dSXin LI 841*b6cee71dSXin LI /* 842*b6cee71dSXin LI * Magic entries with no description get a bonus because they depend 843*b6cee71dSXin LI * on subsequent magic entries to print something. 844*b6cee71dSXin LI */ 845*b6cee71dSXin LI if (m->desc[0] == '\0') 846*b6cee71dSXin LI val++; 847*b6cee71dSXin LI return val; 848*b6cee71dSXin LI } 849*b6cee71dSXin LI 850*b6cee71dSXin LI /* 851*b6cee71dSXin LI * Sort callback for sorting entries by "strength" (basically length) 852*b6cee71dSXin LI */ 853*b6cee71dSXin LI private int 854*b6cee71dSXin LI apprentice_sort(const void *a, const void *b) 855*b6cee71dSXin LI { 856*b6cee71dSXin LI const struct magic_entry *ma = CAST(const struct magic_entry *, a); 857*b6cee71dSXin LI const struct magic_entry *mb = CAST(const struct magic_entry *, b); 858*b6cee71dSXin LI size_t sa = apprentice_magic_strength(ma->mp); 859*b6cee71dSXin LI size_t sb = apprentice_magic_strength(mb->mp); 860*b6cee71dSXin LI if (sa == sb) 861*b6cee71dSXin LI return 0; 862*b6cee71dSXin LI else if (sa > sb) 863*b6cee71dSXin LI return -1; 864*b6cee71dSXin LI else 865*b6cee71dSXin LI return 1; 866*b6cee71dSXin LI } 867*b6cee71dSXin LI 868*b6cee71dSXin LI /* 869*b6cee71dSXin LI * Shows sorted patterns list in the order which is used for the matching 870*b6cee71dSXin LI */ 871*b6cee71dSXin LI private void 872*b6cee71dSXin LI apprentice_list(struct mlist *mlist, int mode) 873*b6cee71dSXin LI { 874*b6cee71dSXin LI uint32_t magindex = 0; 875*b6cee71dSXin LI struct mlist *ml; 876*b6cee71dSXin LI for (ml = mlist->next; ml != mlist; ml = ml->next) { 877*b6cee71dSXin LI for (magindex = 0; magindex < ml->nmagic; magindex++) { 878*b6cee71dSXin LI struct magic *m = &ml->magic[magindex]; 879*b6cee71dSXin LI if ((m->flag & mode) != mode) { 880*b6cee71dSXin LI /* Skip sub-tests */ 881*b6cee71dSXin LI while (magindex + 1 < ml->nmagic && 882*b6cee71dSXin LI ml->magic[magindex + 1].cont_level != 0) 883*b6cee71dSXin LI ++magindex; 884*b6cee71dSXin LI continue; /* Skip to next top-level test*/ 885*b6cee71dSXin LI } 886*b6cee71dSXin LI 887*b6cee71dSXin LI /* 888*b6cee71dSXin LI * Try to iterate over the tree until we find item with 889*b6cee71dSXin LI * description/mimetype. 890*b6cee71dSXin LI */ 891*b6cee71dSXin LI while (magindex + 1 < ml->nmagic && 892*b6cee71dSXin LI ml->magic[magindex + 1].cont_level != 0 && 893*b6cee71dSXin LI *ml->magic[magindex].desc == '\0' && 894*b6cee71dSXin LI *ml->magic[magindex].mimetype == '\0') 895*b6cee71dSXin LI magindex++; 896*b6cee71dSXin LI 897*b6cee71dSXin LI printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", 898*b6cee71dSXin LI apprentice_magic_strength(m), 899*b6cee71dSXin LI ml->magic[magindex].desc, 900*b6cee71dSXin LI ml->magic[magindex].mimetype); 901*b6cee71dSXin LI } 902*b6cee71dSXin LI } 903*b6cee71dSXin LI } 904*b6cee71dSXin LI 905*b6cee71dSXin LI private void 906*b6cee71dSXin LI set_test_type(struct magic *mstart, struct magic *m) 907*b6cee71dSXin LI { 908*b6cee71dSXin LI switch (m->type) { 909*b6cee71dSXin LI case FILE_BYTE: 910*b6cee71dSXin LI case FILE_SHORT: 911*b6cee71dSXin LI case FILE_LONG: 912*b6cee71dSXin LI case FILE_DATE: 913*b6cee71dSXin LI case FILE_BESHORT: 914*b6cee71dSXin LI case FILE_BELONG: 915*b6cee71dSXin LI case FILE_BEDATE: 916*b6cee71dSXin LI case FILE_LESHORT: 917*b6cee71dSXin LI case FILE_LELONG: 918*b6cee71dSXin LI case FILE_LEDATE: 919*b6cee71dSXin LI case FILE_LDATE: 920*b6cee71dSXin LI case FILE_BELDATE: 921*b6cee71dSXin LI case FILE_LELDATE: 922*b6cee71dSXin LI case FILE_MEDATE: 923*b6cee71dSXin LI case FILE_MELDATE: 924*b6cee71dSXin LI case FILE_MELONG: 925*b6cee71dSXin LI case FILE_QUAD: 926*b6cee71dSXin LI case FILE_LEQUAD: 927*b6cee71dSXin LI case FILE_BEQUAD: 928*b6cee71dSXin LI case FILE_QDATE: 929*b6cee71dSXin LI case FILE_LEQDATE: 930*b6cee71dSXin LI case FILE_BEQDATE: 931*b6cee71dSXin LI case FILE_QLDATE: 932*b6cee71dSXin LI case FILE_LEQLDATE: 933*b6cee71dSXin LI case FILE_BEQLDATE: 934*b6cee71dSXin LI case FILE_QWDATE: 935*b6cee71dSXin LI case FILE_LEQWDATE: 936*b6cee71dSXin LI case FILE_BEQWDATE: 937*b6cee71dSXin LI case FILE_FLOAT: 938*b6cee71dSXin LI case FILE_BEFLOAT: 939*b6cee71dSXin LI case FILE_LEFLOAT: 940*b6cee71dSXin LI case FILE_DOUBLE: 941*b6cee71dSXin LI case FILE_BEDOUBLE: 942*b6cee71dSXin LI case FILE_LEDOUBLE: 943*b6cee71dSXin LI mstart->flag |= BINTEST; 944*b6cee71dSXin LI break; 945*b6cee71dSXin LI case FILE_STRING: 946*b6cee71dSXin LI case FILE_PSTRING: 947*b6cee71dSXin LI case FILE_BESTRING16: 948*b6cee71dSXin LI case FILE_LESTRING16: 949*b6cee71dSXin LI /* Allow text overrides */ 950*b6cee71dSXin LI if (mstart->str_flags & STRING_TEXTTEST) 951*b6cee71dSXin LI mstart->flag |= TEXTTEST; 952*b6cee71dSXin LI else 953*b6cee71dSXin LI mstart->flag |= BINTEST; 954*b6cee71dSXin LI break; 955*b6cee71dSXin LI case FILE_REGEX: 956*b6cee71dSXin LI case FILE_SEARCH: 957*b6cee71dSXin LI /* Check for override */ 958*b6cee71dSXin LI if (mstart->str_flags & STRING_BINTEST) 959*b6cee71dSXin LI mstart->flag |= BINTEST; 960*b6cee71dSXin LI if (mstart->str_flags & STRING_TEXTTEST) 961*b6cee71dSXin LI mstart->flag |= TEXTTEST; 962*b6cee71dSXin LI 963*b6cee71dSXin LI if (mstart->flag & (TEXTTEST|BINTEST)) 964*b6cee71dSXin LI break; 965*b6cee71dSXin LI 966*b6cee71dSXin LI /* binary test if pattern is not text */ 967*b6cee71dSXin LI if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, 968*b6cee71dSXin LI NULL) <= 0) 969*b6cee71dSXin LI mstart->flag |= BINTEST; 970*b6cee71dSXin LI else 971*b6cee71dSXin LI mstart->flag |= TEXTTEST; 972*b6cee71dSXin LI break; 973*b6cee71dSXin LI case FILE_DEFAULT: 974*b6cee71dSXin LI /* can't deduce anything; we shouldn't see this at the 975*b6cee71dSXin LI top level anyway */ 976*b6cee71dSXin LI break; 977*b6cee71dSXin LI case FILE_INVALID: 978*b6cee71dSXin LI default: 979*b6cee71dSXin LI /* invalid search type, but no need to complain here */ 980*b6cee71dSXin LI break; 981*b6cee71dSXin LI } 982*b6cee71dSXin LI } 983*b6cee71dSXin LI 984*b6cee71dSXin LI private int 985*b6cee71dSXin LI addentry(struct magic_set *ms, struct magic_entry *me, 986*b6cee71dSXin LI struct magic_entry_set *mset) 987*b6cee71dSXin LI { 988*b6cee71dSXin LI size_t i = me->mp->type == FILE_NAME ? 1 : 0; 989*b6cee71dSXin LI if (mset[i].count == mset[i].max) { 990*b6cee71dSXin LI struct magic_entry *mp; 991*b6cee71dSXin LI 992*b6cee71dSXin LI mset[i].max += ALLOC_INCR; 993*b6cee71dSXin LI if ((mp = CAST(struct magic_entry *, 994*b6cee71dSXin LI realloc(mset[i].me, sizeof(*mp) * mset[i].max))) == 995*b6cee71dSXin LI NULL) { 996*b6cee71dSXin LI file_oomem(ms, sizeof(*mp) * mset[i].max); 997*b6cee71dSXin LI return -1; 998*b6cee71dSXin LI } 999*b6cee71dSXin LI (void)memset(&mp[mset[i].count], 0, sizeof(*mp) * 1000*b6cee71dSXin LI ALLOC_INCR); 1001*b6cee71dSXin LI mset[i].me = mp; 1002*b6cee71dSXin LI } 1003*b6cee71dSXin LI mset[i].me[mset[i].count++] = *me; 1004*b6cee71dSXin LI memset(me, 0, sizeof(*me)); 1005*b6cee71dSXin LI return 0; 1006*b6cee71dSXin LI } 1007*b6cee71dSXin LI 1008*b6cee71dSXin LI /* 1009*b6cee71dSXin LI * Load and parse one file. 1010*b6cee71dSXin LI */ 1011*b6cee71dSXin LI private void 1012*b6cee71dSXin LI load_1(struct magic_set *ms, int action, const char *fn, int *errs, 1013*b6cee71dSXin LI struct magic_entry_set *mset) 1014*b6cee71dSXin LI { 1015*b6cee71dSXin LI size_t lineno = 0, llen = 0; 1016*b6cee71dSXin LI char *line = NULL; 1017*b6cee71dSXin LI ssize_t len; 1018*b6cee71dSXin LI struct magic_entry me; 1019*b6cee71dSXin LI 1020*b6cee71dSXin LI FILE *f = fopen(ms->file = fn, "r"); 1021*b6cee71dSXin LI if (f == NULL) { 1022*b6cee71dSXin LI if (errno != ENOENT) 1023*b6cee71dSXin LI file_error(ms, errno, "cannot read magic file `%s'", 1024*b6cee71dSXin LI fn); 1025*b6cee71dSXin LI (*errs)++; 1026*b6cee71dSXin LI return; 1027*b6cee71dSXin LI } 1028*b6cee71dSXin LI 1029*b6cee71dSXin LI memset(&me, 0, sizeof(me)); 1030*b6cee71dSXin LI /* read and parse this file */ 1031*b6cee71dSXin LI for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 1032*b6cee71dSXin LI ms->line++) { 1033*b6cee71dSXin LI if (len == 0) /* null line, garbage, etc */ 1034*b6cee71dSXin LI continue; 1035*b6cee71dSXin LI if (line[len - 1] == '\n') { 1036*b6cee71dSXin LI lineno++; 1037*b6cee71dSXin LI line[len - 1] = '\0'; /* delete newline */ 1038*b6cee71dSXin LI } 1039*b6cee71dSXin LI switch (line[0]) { 1040*b6cee71dSXin LI case '\0': /* empty, do not parse */ 1041*b6cee71dSXin LI case '#': /* comment, do not parse */ 1042*b6cee71dSXin LI continue; 1043*b6cee71dSXin LI case '!': 1044*b6cee71dSXin LI if (line[1] == ':') { 1045*b6cee71dSXin LI size_t i; 1046*b6cee71dSXin LI 1047*b6cee71dSXin LI for (i = 0; bang[i].name != NULL; i++) { 1048*b6cee71dSXin LI if ((size_t)(len - 2) > bang[i].len && 1049*b6cee71dSXin LI memcmp(bang[i].name, line + 2, 1050*b6cee71dSXin LI bang[i].len) == 0) 1051*b6cee71dSXin LI break; 1052*b6cee71dSXin LI } 1053*b6cee71dSXin LI if (bang[i].name == NULL) { 1054*b6cee71dSXin LI file_error(ms, 0, 1055*b6cee71dSXin LI "Unknown !: entry `%s'", line); 1056*b6cee71dSXin LI (*errs)++; 1057*b6cee71dSXin LI continue; 1058*b6cee71dSXin LI } 1059*b6cee71dSXin LI if (me.mp == NULL) { 1060*b6cee71dSXin LI file_error(ms, 0, 1061*b6cee71dSXin LI "No current entry for :!%s type", 1062*b6cee71dSXin LI bang[i].name); 1063*b6cee71dSXin LI (*errs)++; 1064*b6cee71dSXin LI continue; 1065*b6cee71dSXin LI } 1066*b6cee71dSXin LI if ((*bang[i].fun)(ms, &me, 1067*b6cee71dSXin LI line + bang[i].len + 2) != 0) { 1068*b6cee71dSXin LI (*errs)++; 1069*b6cee71dSXin LI continue; 1070*b6cee71dSXin LI } 1071*b6cee71dSXin LI continue; 1072*b6cee71dSXin LI } 1073*b6cee71dSXin LI /*FALLTHROUGH*/ 1074*b6cee71dSXin LI default: 1075*b6cee71dSXin LI again: 1076*b6cee71dSXin LI switch (parse(ms, &me, line, lineno, action)) { 1077*b6cee71dSXin LI case 0: 1078*b6cee71dSXin LI continue; 1079*b6cee71dSXin LI case 1: 1080*b6cee71dSXin LI (void)addentry(ms, &me, mset); 1081*b6cee71dSXin LI goto again; 1082*b6cee71dSXin LI default: 1083*b6cee71dSXin LI (*errs)++; 1084*b6cee71dSXin LI break; 1085*b6cee71dSXin LI } 1086*b6cee71dSXin LI } 1087*b6cee71dSXin LI } 1088*b6cee71dSXin LI if (me.mp) 1089*b6cee71dSXin LI (void)addentry(ms, &me, mset); 1090*b6cee71dSXin LI free(line); 1091*b6cee71dSXin LI (void)fclose(f); 1092*b6cee71dSXin LI } 1093*b6cee71dSXin LI 1094*b6cee71dSXin LI /* 1095*b6cee71dSXin LI * parse a file or directory of files 1096*b6cee71dSXin LI * const char *fn: name of magic file or directory 1097*b6cee71dSXin LI */ 1098*b6cee71dSXin LI private int 1099*b6cee71dSXin LI cmpstrp(const void *p1, const void *p2) 1100*b6cee71dSXin LI { 1101*b6cee71dSXin LI return strcmp(*(char *const *)p1, *(char *const *)p2); 1102*b6cee71dSXin LI } 1103*b6cee71dSXin LI 1104*b6cee71dSXin LI 1105*b6cee71dSXin LI private uint32_t 1106*b6cee71dSXin LI set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1107*b6cee71dSXin LI uint32_t starttest) 1108*b6cee71dSXin LI { 1109*b6cee71dSXin LI static const char text[] = "text"; 1110*b6cee71dSXin LI static const char binary[] = "binary"; 1111*b6cee71dSXin LI static const size_t len = sizeof(text); 1112*b6cee71dSXin LI 1113*b6cee71dSXin LI uint32_t i = starttest; 1114*b6cee71dSXin LI 1115*b6cee71dSXin LI do { 1116*b6cee71dSXin LI set_test_type(me[starttest].mp, me[i].mp); 1117*b6cee71dSXin LI if ((ms->flags & MAGIC_DEBUG) == 0) 1118*b6cee71dSXin LI continue; 1119*b6cee71dSXin LI (void)fprintf(stderr, "%s%s%s: %s\n", 1120*b6cee71dSXin LI me[i].mp->mimetype, 1121*b6cee71dSXin LI me[i].mp->mimetype[0] == '\0' ? "" : "; ", 1122*b6cee71dSXin LI me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 1123*b6cee71dSXin LI me[i].mp->flag & BINTEST ? binary : text); 1124*b6cee71dSXin LI if (me[i].mp->flag & BINTEST) { 1125*b6cee71dSXin LI char *p = strstr(me[i].mp->desc, text); 1126*b6cee71dSXin LI if (p && (p == me[i].mp->desc || 1127*b6cee71dSXin LI isspace((unsigned char)p[-1])) && 1128*b6cee71dSXin LI (p + len - me[i].mp->desc == MAXstring 1129*b6cee71dSXin LI || (p[len] == '\0' || 1130*b6cee71dSXin LI isspace((unsigned char)p[len])))) 1131*b6cee71dSXin LI (void)fprintf(stderr, "*** Possible " 1132*b6cee71dSXin LI "binary test for text type\n"); 1133*b6cee71dSXin LI } 1134*b6cee71dSXin LI } while (++i < nme && me[i].mp->cont_level != 0); 1135*b6cee71dSXin LI return i; 1136*b6cee71dSXin LI } 1137*b6cee71dSXin LI 1138*b6cee71dSXin LI private void 1139*b6cee71dSXin LI set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 1140*b6cee71dSXin LI { 1141*b6cee71dSXin LI uint32_t i; 1142*b6cee71dSXin LI for (i = 0; i < nme; i++) { 1143*b6cee71dSXin LI if (me[i].mp->cont_level == 0 && 1144*b6cee71dSXin LI me[i].mp->type == FILE_DEFAULT) { 1145*b6cee71dSXin LI while (++i < nme) 1146*b6cee71dSXin LI if (me[i].mp->cont_level == 0) 1147*b6cee71dSXin LI break; 1148*b6cee71dSXin LI if (i != nme) { 1149*b6cee71dSXin LI /* XXX - Ugh! */ 1150*b6cee71dSXin LI ms->line = me[i].mp->lineno; 1151*b6cee71dSXin LI file_magwarn(ms, 1152*b6cee71dSXin LI "level 0 \"default\" did not sort last"); 1153*b6cee71dSXin LI } 1154*b6cee71dSXin LI return; 1155*b6cee71dSXin LI } 1156*b6cee71dSXin LI } 1157*b6cee71dSXin LI } 1158*b6cee71dSXin LI 1159*b6cee71dSXin LI private int 1160*b6cee71dSXin LI coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1161*b6cee71dSXin LI struct magic **ma, uint32_t *nma) 1162*b6cee71dSXin LI { 1163*b6cee71dSXin LI uint32_t i, mentrycount = 0; 1164*b6cee71dSXin LI size_t slen; 1165*b6cee71dSXin LI 1166*b6cee71dSXin LI for (i = 0; i < nme; i++) 1167*b6cee71dSXin LI mentrycount += me[i].cont_count; 1168*b6cee71dSXin LI 1169*b6cee71dSXin LI slen = sizeof(**ma) * mentrycount; 1170*b6cee71dSXin LI if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 1171*b6cee71dSXin LI file_oomem(ms, slen); 1172*b6cee71dSXin LI return -1; 1173*b6cee71dSXin LI } 1174*b6cee71dSXin LI 1175*b6cee71dSXin LI mentrycount = 0; 1176*b6cee71dSXin LI for (i = 0; i < nme; i++) { 1177*b6cee71dSXin LI (void)memcpy(*ma + mentrycount, me[i].mp, 1178*b6cee71dSXin LI me[i].cont_count * sizeof(**ma)); 1179*b6cee71dSXin LI mentrycount += me[i].cont_count; 1180*b6cee71dSXin LI } 1181*b6cee71dSXin LI *nma = mentrycount; 1182*b6cee71dSXin LI return 0; 1183*b6cee71dSXin LI } 1184*b6cee71dSXin LI 1185*b6cee71dSXin LI private void 1186*b6cee71dSXin LI magic_entry_free(struct magic_entry *me, uint32_t nme) 1187*b6cee71dSXin LI { 1188*b6cee71dSXin LI uint32_t i; 1189*b6cee71dSXin LI if (me == NULL) 1190*b6cee71dSXin LI return; 1191*b6cee71dSXin LI for (i = 0; i < nme; i++) 1192*b6cee71dSXin LI free(me[i].mp); 1193*b6cee71dSXin LI free(me); 1194*b6cee71dSXin LI } 1195*b6cee71dSXin LI 1196*b6cee71dSXin LI private struct magic_map * 1197*b6cee71dSXin LI apprentice_load(struct magic_set *ms, const char *fn, int action) 1198*b6cee71dSXin LI { 1199*b6cee71dSXin LI int errs = 0; 1200*b6cee71dSXin LI uint32_t i, j; 1201*b6cee71dSXin LI size_t files = 0, maxfiles = 0; 1202*b6cee71dSXin LI char **filearr = NULL, *mfn; 1203*b6cee71dSXin LI struct stat st; 1204*b6cee71dSXin LI struct magic_map *map; 1205*b6cee71dSXin LI struct magic_entry_set mset[MAGIC_SETS]; 1206*b6cee71dSXin LI DIR *dir; 1207*b6cee71dSXin LI struct dirent *d; 1208*b6cee71dSXin LI 1209*b6cee71dSXin LI memset(mset, 0, sizeof(mset)); 1210*b6cee71dSXin LI ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1211*b6cee71dSXin LI 1212*b6cee71dSXin LI 1213*b6cee71dSXin LI if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) 1214*b6cee71dSXin LI { 1215*b6cee71dSXin LI file_oomem(ms, sizeof(*map)); 1216*b6cee71dSXin LI return NULL; 1217*b6cee71dSXin LI } 1218*b6cee71dSXin LI 1219*b6cee71dSXin LI /* print silly verbose header for USG compat. */ 1220*b6cee71dSXin LI if (action == FILE_CHECK) 1221*b6cee71dSXin LI (void)fprintf(stderr, "%s\n", usg_hdr); 1222*b6cee71dSXin LI 1223*b6cee71dSXin LI /* load directory or file */ 1224*b6cee71dSXin LI if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1225*b6cee71dSXin LI dir = opendir(fn); 1226*b6cee71dSXin LI if (!dir) { 1227*b6cee71dSXin LI errs++; 1228*b6cee71dSXin LI goto out; 1229*b6cee71dSXin LI } 1230*b6cee71dSXin LI while ((d = readdir(dir)) != NULL) { 1231*b6cee71dSXin LI if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1232*b6cee71dSXin LI file_oomem(ms, 1233*b6cee71dSXin LI strlen(fn) + strlen(d->d_name) + 2); 1234*b6cee71dSXin LI errs++; 1235*b6cee71dSXin LI closedir(dir); 1236*b6cee71dSXin LI goto out; 1237*b6cee71dSXin LI } 1238*b6cee71dSXin LI if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1239*b6cee71dSXin LI free(mfn); 1240*b6cee71dSXin LI continue; 1241*b6cee71dSXin LI } 1242*b6cee71dSXin LI if (files >= maxfiles) { 1243*b6cee71dSXin LI size_t mlen; 1244*b6cee71dSXin LI maxfiles = (maxfiles + 1) * 2; 1245*b6cee71dSXin LI mlen = maxfiles * sizeof(*filearr); 1246*b6cee71dSXin LI if ((filearr = CAST(char **, 1247*b6cee71dSXin LI realloc(filearr, mlen))) == NULL) { 1248*b6cee71dSXin LI file_oomem(ms, mlen); 1249*b6cee71dSXin LI free(mfn); 1250*b6cee71dSXin LI closedir(dir); 1251*b6cee71dSXin LI errs++; 1252*b6cee71dSXin LI goto out; 1253*b6cee71dSXin LI } 1254*b6cee71dSXin LI } 1255*b6cee71dSXin LI filearr[files++] = mfn; 1256*b6cee71dSXin LI } 1257*b6cee71dSXin LI closedir(dir); 1258*b6cee71dSXin LI qsort(filearr, files, sizeof(*filearr), cmpstrp); 1259*b6cee71dSXin LI for (i = 0; i < files; i++) { 1260*b6cee71dSXin LI load_1(ms, action, filearr[i], &errs, mset); 1261*b6cee71dSXin LI free(filearr[i]); 1262*b6cee71dSXin LI } 1263*b6cee71dSXin LI free(filearr); 1264*b6cee71dSXin LI } else 1265*b6cee71dSXin LI load_1(ms, action, fn, &errs, mset); 1266*b6cee71dSXin LI if (errs) 1267*b6cee71dSXin LI goto out; 1268*b6cee71dSXin LI 1269*b6cee71dSXin LI for (j = 0; j < MAGIC_SETS; j++) { 1270*b6cee71dSXin LI /* Set types of tests */ 1271*b6cee71dSXin LI for (i = 0; i < mset[j].count; ) { 1272*b6cee71dSXin LI if (mset[j].me[i].mp->cont_level != 0) { 1273*b6cee71dSXin LI i++; 1274*b6cee71dSXin LI continue; 1275*b6cee71dSXin LI } 1276*b6cee71dSXin LI i = set_text_binary(ms, mset[j].me, mset[j].count, i); 1277*b6cee71dSXin LI } 1278*b6cee71dSXin LI qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me), 1279*b6cee71dSXin LI apprentice_sort); 1280*b6cee71dSXin LI 1281*b6cee71dSXin LI /* 1282*b6cee71dSXin LI * Make sure that any level 0 "default" line is last 1283*b6cee71dSXin LI * (if one exists). 1284*b6cee71dSXin LI */ 1285*b6cee71dSXin LI set_last_default(ms, mset[j].me, mset[j].count); 1286*b6cee71dSXin LI 1287*b6cee71dSXin LI /* coalesce per file arrays into a single one */ 1288*b6cee71dSXin LI if (coalesce_entries(ms, mset[j].me, mset[j].count, 1289*b6cee71dSXin LI &map->magic[j], &map->nmagic[j]) == -1) { 1290*b6cee71dSXin LI errs++; 1291*b6cee71dSXin LI goto out; 1292*b6cee71dSXin LI } 1293*b6cee71dSXin LI } 1294*b6cee71dSXin LI 1295*b6cee71dSXin LI out: 1296*b6cee71dSXin LI for (j = 0; j < MAGIC_SETS; j++) 1297*b6cee71dSXin LI magic_entry_free(mset[j].me, mset[j].count); 1298*b6cee71dSXin LI 1299*b6cee71dSXin LI if (errs) { 1300*b6cee71dSXin LI apprentice_unmap(map); 1301*b6cee71dSXin LI return NULL; 1302*b6cee71dSXin LI } 1303*b6cee71dSXin LI return map; 1304*b6cee71dSXin LI } 1305*b6cee71dSXin LI 1306*b6cee71dSXin LI /* 1307*b6cee71dSXin LI * extend the sign bit if the comparison is to be signed 1308*b6cee71dSXin LI */ 1309*b6cee71dSXin LI protected uint64_t 1310*b6cee71dSXin LI file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1311*b6cee71dSXin LI { 1312*b6cee71dSXin LI if (!(m->flag & UNSIGNED)) { 1313*b6cee71dSXin LI switch(m->type) { 1314*b6cee71dSXin LI /* 1315*b6cee71dSXin LI * Do not remove the casts below. They are 1316*b6cee71dSXin LI * vital. When later compared with the data, 1317*b6cee71dSXin LI * the sign extension must have happened. 1318*b6cee71dSXin LI */ 1319*b6cee71dSXin LI case FILE_BYTE: 1320*b6cee71dSXin LI v = (char) v; 1321*b6cee71dSXin LI break; 1322*b6cee71dSXin LI case FILE_SHORT: 1323*b6cee71dSXin LI case FILE_BESHORT: 1324*b6cee71dSXin LI case FILE_LESHORT: 1325*b6cee71dSXin LI v = (short) v; 1326*b6cee71dSXin LI break; 1327*b6cee71dSXin LI case FILE_DATE: 1328*b6cee71dSXin LI case FILE_BEDATE: 1329*b6cee71dSXin LI case FILE_LEDATE: 1330*b6cee71dSXin LI case FILE_MEDATE: 1331*b6cee71dSXin LI case FILE_LDATE: 1332*b6cee71dSXin LI case FILE_BELDATE: 1333*b6cee71dSXin LI case FILE_LELDATE: 1334*b6cee71dSXin LI case FILE_MELDATE: 1335*b6cee71dSXin LI case FILE_LONG: 1336*b6cee71dSXin LI case FILE_BELONG: 1337*b6cee71dSXin LI case FILE_LELONG: 1338*b6cee71dSXin LI case FILE_MELONG: 1339*b6cee71dSXin LI case FILE_FLOAT: 1340*b6cee71dSXin LI case FILE_BEFLOAT: 1341*b6cee71dSXin LI case FILE_LEFLOAT: 1342*b6cee71dSXin LI v = (int32_t) v; 1343*b6cee71dSXin LI break; 1344*b6cee71dSXin LI case FILE_QUAD: 1345*b6cee71dSXin LI case FILE_BEQUAD: 1346*b6cee71dSXin LI case FILE_LEQUAD: 1347*b6cee71dSXin LI case FILE_QDATE: 1348*b6cee71dSXin LI case FILE_QLDATE: 1349*b6cee71dSXin LI case FILE_QWDATE: 1350*b6cee71dSXin LI case FILE_BEQDATE: 1351*b6cee71dSXin LI case FILE_BEQLDATE: 1352*b6cee71dSXin LI case FILE_BEQWDATE: 1353*b6cee71dSXin LI case FILE_LEQDATE: 1354*b6cee71dSXin LI case FILE_LEQLDATE: 1355*b6cee71dSXin LI case FILE_LEQWDATE: 1356*b6cee71dSXin LI case FILE_DOUBLE: 1357*b6cee71dSXin LI case FILE_BEDOUBLE: 1358*b6cee71dSXin LI case FILE_LEDOUBLE: 1359*b6cee71dSXin LI v = (int64_t) v; 1360*b6cee71dSXin LI break; 1361*b6cee71dSXin LI case FILE_STRING: 1362*b6cee71dSXin LI case FILE_PSTRING: 1363*b6cee71dSXin LI case FILE_BESTRING16: 1364*b6cee71dSXin LI case FILE_LESTRING16: 1365*b6cee71dSXin LI case FILE_REGEX: 1366*b6cee71dSXin LI case FILE_SEARCH: 1367*b6cee71dSXin LI case FILE_DEFAULT: 1368*b6cee71dSXin LI case FILE_INDIRECT: 1369*b6cee71dSXin LI case FILE_NAME: 1370*b6cee71dSXin LI case FILE_USE: 1371*b6cee71dSXin LI case FILE_CLEAR: 1372*b6cee71dSXin LI break; 1373*b6cee71dSXin LI default: 1374*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1375*b6cee71dSXin LI file_magwarn(ms, "cannot happen: m->type=%d\n", 1376*b6cee71dSXin LI m->type); 1377*b6cee71dSXin LI return ~0U; 1378*b6cee71dSXin LI } 1379*b6cee71dSXin LI } 1380*b6cee71dSXin LI return v; 1381*b6cee71dSXin LI } 1382*b6cee71dSXin LI 1383*b6cee71dSXin LI private int 1384*b6cee71dSXin LI string_modifier_check(struct magic_set *ms, struct magic *m) 1385*b6cee71dSXin LI { 1386*b6cee71dSXin LI if ((ms->flags & MAGIC_CHECK) == 0) 1387*b6cee71dSXin LI return 0; 1388*b6cee71dSXin LI 1389*b6cee71dSXin LI if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) && 1390*b6cee71dSXin LI (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) { 1391*b6cee71dSXin LI file_magwarn(ms, 1392*b6cee71dSXin LI "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1393*b6cee71dSXin LI return -1; 1394*b6cee71dSXin LI } 1395*b6cee71dSXin LI switch (m->type) { 1396*b6cee71dSXin LI case FILE_BESTRING16: 1397*b6cee71dSXin LI case FILE_LESTRING16: 1398*b6cee71dSXin LI if (m->str_flags != 0) { 1399*b6cee71dSXin LI file_magwarn(ms, 1400*b6cee71dSXin LI "no modifiers allowed for 16-bit strings\n"); 1401*b6cee71dSXin LI return -1; 1402*b6cee71dSXin LI } 1403*b6cee71dSXin LI break; 1404*b6cee71dSXin LI case FILE_STRING: 1405*b6cee71dSXin LI case FILE_PSTRING: 1406*b6cee71dSXin LI if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1407*b6cee71dSXin LI file_magwarn(ms, 1408*b6cee71dSXin LI "'/%c' only allowed on regex and search\n", 1409*b6cee71dSXin LI CHAR_REGEX_OFFSET_START); 1410*b6cee71dSXin LI return -1; 1411*b6cee71dSXin LI } 1412*b6cee71dSXin LI break; 1413*b6cee71dSXin LI case FILE_SEARCH: 1414*b6cee71dSXin LI if (m->str_range == 0) { 1415*b6cee71dSXin LI file_magwarn(ms, 1416*b6cee71dSXin LI "missing range; defaulting to %d\n", 1417*b6cee71dSXin LI STRING_DEFAULT_RANGE); 1418*b6cee71dSXin LI m->str_range = STRING_DEFAULT_RANGE; 1419*b6cee71dSXin LI return -1; 1420*b6cee71dSXin LI } 1421*b6cee71dSXin LI break; 1422*b6cee71dSXin LI case FILE_REGEX: 1423*b6cee71dSXin LI if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1424*b6cee71dSXin LI file_magwarn(ms, "'/%c' not allowed on regex\n", 1425*b6cee71dSXin LI CHAR_COMPACT_WHITESPACE); 1426*b6cee71dSXin LI return -1; 1427*b6cee71dSXin LI } 1428*b6cee71dSXin LI if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1429*b6cee71dSXin LI file_magwarn(ms, "'/%c' not allowed on regex\n", 1430*b6cee71dSXin LI CHAR_COMPACT_OPTIONAL_WHITESPACE); 1431*b6cee71dSXin LI return -1; 1432*b6cee71dSXin LI } 1433*b6cee71dSXin LI break; 1434*b6cee71dSXin LI default: 1435*b6cee71dSXin LI file_magwarn(ms, "coding error: m->type=%d\n", 1436*b6cee71dSXin LI m->type); 1437*b6cee71dSXin LI return -1; 1438*b6cee71dSXin LI } 1439*b6cee71dSXin LI return 0; 1440*b6cee71dSXin LI } 1441*b6cee71dSXin LI 1442*b6cee71dSXin LI private int 1443*b6cee71dSXin LI get_op(char c) 1444*b6cee71dSXin LI { 1445*b6cee71dSXin LI switch (c) { 1446*b6cee71dSXin LI case '&': 1447*b6cee71dSXin LI return FILE_OPAND; 1448*b6cee71dSXin LI case '|': 1449*b6cee71dSXin LI return FILE_OPOR; 1450*b6cee71dSXin LI case '^': 1451*b6cee71dSXin LI return FILE_OPXOR; 1452*b6cee71dSXin LI case '+': 1453*b6cee71dSXin LI return FILE_OPADD; 1454*b6cee71dSXin LI case '-': 1455*b6cee71dSXin LI return FILE_OPMINUS; 1456*b6cee71dSXin LI case '*': 1457*b6cee71dSXin LI return FILE_OPMULTIPLY; 1458*b6cee71dSXin LI case '/': 1459*b6cee71dSXin LI return FILE_OPDIVIDE; 1460*b6cee71dSXin LI case '%': 1461*b6cee71dSXin LI return FILE_OPMODULO; 1462*b6cee71dSXin LI default: 1463*b6cee71dSXin LI return -1; 1464*b6cee71dSXin LI } 1465*b6cee71dSXin LI } 1466*b6cee71dSXin LI 1467*b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS 1468*b6cee71dSXin LI private int 1469*b6cee71dSXin LI get_cond(const char *l, const char **t) 1470*b6cee71dSXin LI { 1471*b6cee71dSXin LI static const struct cond_tbl_s { 1472*b6cee71dSXin LI char name[8]; 1473*b6cee71dSXin LI size_t len; 1474*b6cee71dSXin LI int cond; 1475*b6cee71dSXin LI } cond_tbl[] = { 1476*b6cee71dSXin LI { "if", 2, COND_IF }, 1477*b6cee71dSXin LI { "elif", 4, COND_ELIF }, 1478*b6cee71dSXin LI { "else", 4, COND_ELSE }, 1479*b6cee71dSXin LI { "", 0, COND_NONE }, 1480*b6cee71dSXin LI }; 1481*b6cee71dSXin LI const struct cond_tbl_s *p; 1482*b6cee71dSXin LI 1483*b6cee71dSXin LI for (p = cond_tbl; p->len; p++) { 1484*b6cee71dSXin LI if (strncmp(l, p->name, p->len) == 0 && 1485*b6cee71dSXin LI isspace((unsigned char)l[p->len])) { 1486*b6cee71dSXin LI if (t) 1487*b6cee71dSXin LI *t = l + p->len; 1488*b6cee71dSXin LI break; 1489*b6cee71dSXin LI } 1490*b6cee71dSXin LI } 1491*b6cee71dSXin LI return p->cond; 1492*b6cee71dSXin LI } 1493*b6cee71dSXin LI 1494*b6cee71dSXin LI private int 1495*b6cee71dSXin LI check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1496*b6cee71dSXin LI { 1497*b6cee71dSXin LI int last_cond; 1498*b6cee71dSXin LI last_cond = ms->c.li[cont_level].last_cond; 1499*b6cee71dSXin LI 1500*b6cee71dSXin LI switch (cond) { 1501*b6cee71dSXin LI case COND_IF: 1502*b6cee71dSXin LI if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1503*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1504*b6cee71dSXin LI file_magwarn(ms, "syntax error: `if'"); 1505*b6cee71dSXin LI return -1; 1506*b6cee71dSXin LI } 1507*b6cee71dSXin LI last_cond = COND_IF; 1508*b6cee71dSXin LI break; 1509*b6cee71dSXin LI 1510*b6cee71dSXin LI case COND_ELIF: 1511*b6cee71dSXin LI if (last_cond != COND_IF && last_cond != COND_ELIF) { 1512*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1513*b6cee71dSXin LI file_magwarn(ms, "syntax error: `elif'"); 1514*b6cee71dSXin LI return -1; 1515*b6cee71dSXin LI } 1516*b6cee71dSXin LI last_cond = COND_ELIF; 1517*b6cee71dSXin LI break; 1518*b6cee71dSXin LI 1519*b6cee71dSXin LI case COND_ELSE: 1520*b6cee71dSXin LI if (last_cond != COND_IF && last_cond != COND_ELIF) { 1521*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1522*b6cee71dSXin LI file_magwarn(ms, "syntax error: `else'"); 1523*b6cee71dSXin LI return -1; 1524*b6cee71dSXin LI } 1525*b6cee71dSXin LI last_cond = COND_NONE; 1526*b6cee71dSXin LI break; 1527*b6cee71dSXin LI 1528*b6cee71dSXin LI case COND_NONE: 1529*b6cee71dSXin LI last_cond = COND_NONE; 1530*b6cee71dSXin LI break; 1531*b6cee71dSXin LI } 1532*b6cee71dSXin LI 1533*b6cee71dSXin LI ms->c.li[cont_level].last_cond = last_cond; 1534*b6cee71dSXin LI return 0; 1535*b6cee71dSXin LI } 1536*b6cee71dSXin LI #endif /* ENABLE_CONDITIONALS */ 1537*b6cee71dSXin LI 1538*b6cee71dSXin LI /* 1539*b6cee71dSXin LI * parse one line from magic file, put into magic[index++] if valid 1540*b6cee71dSXin LI */ 1541*b6cee71dSXin LI private int 1542*b6cee71dSXin LI parse(struct magic_set *ms, struct magic_entry *me, const char *line, 1543*b6cee71dSXin LI size_t lineno, int action) 1544*b6cee71dSXin LI { 1545*b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS 1546*b6cee71dSXin LI static uint32_t last_cont_level = 0; 1547*b6cee71dSXin LI #endif 1548*b6cee71dSXin LI size_t i; 1549*b6cee71dSXin LI struct magic *m; 1550*b6cee71dSXin LI const char *l = line; 1551*b6cee71dSXin LI char *t; 1552*b6cee71dSXin LI int op; 1553*b6cee71dSXin LI uint32_t cont_level; 1554*b6cee71dSXin LI int32_t diff; 1555*b6cee71dSXin LI 1556*b6cee71dSXin LI cont_level = 0; 1557*b6cee71dSXin LI 1558*b6cee71dSXin LI /* 1559*b6cee71dSXin LI * Parse the offset. 1560*b6cee71dSXin LI */ 1561*b6cee71dSXin LI while (*l == '>') { 1562*b6cee71dSXin LI ++l; /* step over */ 1563*b6cee71dSXin LI cont_level++; 1564*b6cee71dSXin LI } 1565*b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS 1566*b6cee71dSXin LI if (cont_level == 0 || cont_level > last_cont_level) 1567*b6cee71dSXin LI if (file_check_mem(ms, cont_level) == -1) 1568*b6cee71dSXin LI return -1; 1569*b6cee71dSXin LI last_cont_level = cont_level; 1570*b6cee71dSXin LI #endif 1571*b6cee71dSXin LI if (cont_level != 0) { 1572*b6cee71dSXin LI if (me->mp == NULL) { 1573*b6cee71dSXin LI file_magerror(ms, "No current entry for continuation"); 1574*b6cee71dSXin LI return -1; 1575*b6cee71dSXin LI } 1576*b6cee71dSXin LI if (me->cont_count == 0) { 1577*b6cee71dSXin LI file_magerror(ms, "Continuations present with 0 count"); 1578*b6cee71dSXin LI return -1; 1579*b6cee71dSXin LI } 1580*b6cee71dSXin LI m = &me->mp[me->cont_count - 1]; 1581*b6cee71dSXin LI diff = (int32_t)cont_level - (int32_t)m->cont_level; 1582*b6cee71dSXin LI if (diff > 1) 1583*b6cee71dSXin LI file_magwarn(ms, "New continuation level %u is more " 1584*b6cee71dSXin LI "than one larger than current level %u", cont_level, 1585*b6cee71dSXin LI m->cont_level); 1586*b6cee71dSXin LI if (me->cont_count == me->max_count) { 1587*b6cee71dSXin LI struct magic *nm; 1588*b6cee71dSXin LI size_t cnt = me->max_count + ALLOC_CHUNK; 1589*b6cee71dSXin LI if ((nm = CAST(struct magic *, realloc(me->mp, 1590*b6cee71dSXin LI sizeof(*nm) * cnt))) == NULL) { 1591*b6cee71dSXin LI file_oomem(ms, sizeof(*nm) * cnt); 1592*b6cee71dSXin LI return -1; 1593*b6cee71dSXin LI } 1594*b6cee71dSXin LI me->mp = m = nm; 1595*b6cee71dSXin LI me->max_count = CAST(uint32_t, cnt); 1596*b6cee71dSXin LI } 1597*b6cee71dSXin LI m = &me->mp[me->cont_count++]; 1598*b6cee71dSXin LI (void)memset(m, 0, sizeof(*m)); 1599*b6cee71dSXin LI m->cont_level = cont_level; 1600*b6cee71dSXin LI } else { 1601*b6cee71dSXin LI static const size_t len = sizeof(*m) * ALLOC_CHUNK; 1602*b6cee71dSXin LI if (me->mp != NULL) 1603*b6cee71dSXin LI return 1; 1604*b6cee71dSXin LI if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1605*b6cee71dSXin LI file_oomem(ms, len); 1606*b6cee71dSXin LI return -1; 1607*b6cee71dSXin LI } 1608*b6cee71dSXin LI me->mp = m; 1609*b6cee71dSXin LI me->max_count = ALLOC_CHUNK; 1610*b6cee71dSXin LI (void)memset(m, 0, sizeof(*m)); 1611*b6cee71dSXin LI m->factor_op = FILE_FACTOR_OP_NONE; 1612*b6cee71dSXin LI m->cont_level = 0; 1613*b6cee71dSXin LI me->cont_count = 1; 1614*b6cee71dSXin LI } 1615*b6cee71dSXin LI m->lineno = CAST(uint32_t, lineno); 1616*b6cee71dSXin LI 1617*b6cee71dSXin LI if (*l == '&') { /* m->cont_level == 0 checked below. */ 1618*b6cee71dSXin LI ++l; /* step over */ 1619*b6cee71dSXin LI m->flag |= OFFADD; 1620*b6cee71dSXin LI } 1621*b6cee71dSXin LI if (*l == '(') { 1622*b6cee71dSXin LI ++l; /* step over */ 1623*b6cee71dSXin LI m->flag |= INDIR; 1624*b6cee71dSXin LI if (m->flag & OFFADD) 1625*b6cee71dSXin LI m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1626*b6cee71dSXin LI 1627*b6cee71dSXin LI if (*l == '&') { /* m->cont_level == 0 checked below */ 1628*b6cee71dSXin LI ++l; /* step over */ 1629*b6cee71dSXin LI m->flag |= OFFADD; 1630*b6cee71dSXin LI } 1631*b6cee71dSXin LI } 1632*b6cee71dSXin LI /* Indirect offsets are not valid at level 0. */ 1633*b6cee71dSXin LI if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1634*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1635*b6cee71dSXin LI file_magwarn(ms, "relative offset at level 0"); 1636*b6cee71dSXin LI 1637*b6cee71dSXin LI /* get offset, then skip over it */ 1638*b6cee71dSXin LI m->offset = (uint32_t)strtoul(l, &t, 0); 1639*b6cee71dSXin LI if (l == t) 1640*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1641*b6cee71dSXin LI file_magwarn(ms, "offset `%s' invalid", l); 1642*b6cee71dSXin LI l = t; 1643*b6cee71dSXin LI 1644*b6cee71dSXin LI if (m->flag & INDIR) { 1645*b6cee71dSXin LI m->in_type = FILE_LONG; 1646*b6cee71dSXin LI m->in_offset = 0; 1647*b6cee71dSXin LI /* 1648*b6cee71dSXin LI * read [.lbs][+-]nnnnn) 1649*b6cee71dSXin LI */ 1650*b6cee71dSXin LI if (*l == '.') { 1651*b6cee71dSXin LI l++; 1652*b6cee71dSXin LI switch (*l) { 1653*b6cee71dSXin LI case 'l': 1654*b6cee71dSXin LI m->in_type = FILE_LELONG; 1655*b6cee71dSXin LI break; 1656*b6cee71dSXin LI case 'L': 1657*b6cee71dSXin LI m->in_type = FILE_BELONG; 1658*b6cee71dSXin LI break; 1659*b6cee71dSXin LI case 'm': 1660*b6cee71dSXin LI m->in_type = FILE_MELONG; 1661*b6cee71dSXin LI break; 1662*b6cee71dSXin LI case 'h': 1663*b6cee71dSXin LI case 's': 1664*b6cee71dSXin LI m->in_type = FILE_LESHORT; 1665*b6cee71dSXin LI break; 1666*b6cee71dSXin LI case 'H': 1667*b6cee71dSXin LI case 'S': 1668*b6cee71dSXin LI m->in_type = FILE_BESHORT; 1669*b6cee71dSXin LI break; 1670*b6cee71dSXin LI case 'c': 1671*b6cee71dSXin LI case 'b': 1672*b6cee71dSXin LI case 'C': 1673*b6cee71dSXin LI case 'B': 1674*b6cee71dSXin LI m->in_type = FILE_BYTE; 1675*b6cee71dSXin LI break; 1676*b6cee71dSXin LI case 'e': 1677*b6cee71dSXin LI case 'f': 1678*b6cee71dSXin LI case 'g': 1679*b6cee71dSXin LI m->in_type = FILE_LEDOUBLE; 1680*b6cee71dSXin LI break; 1681*b6cee71dSXin LI case 'E': 1682*b6cee71dSXin LI case 'F': 1683*b6cee71dSXin LI case 'G': 1684*b6cee71dSXin LI m->in_type = FILE_BEDOUBLE; 1685*b6cee71dSXin LI break; 1686*b6cee71dSXin LI case 'i': 1687*b6cee71dSXin LI m->in_type = FILE_LEID3; 1688*b6cee71dSXin LI break; 1689*b6cee71dSXin LI case 'I': 1690*b6cee71dSXin LI m->in_type = FILE_BEID3; 1691*b6cee71dSXin LI break; 1692*b6cee71dSXin LI default: 1693*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1694*b6cee71dSXin LI file_magwarn(ms, 1695*b6cee71dSXin LI "indirect offset type `%c' invalid", 1696*b6cee71dSXin LI *l); 1697*b6cee71dSXin LI break; 1698*b6cee71dSXin LI } 1699*b6cee71dSXin LI l++; 1700*b6cee71dSXin LI } 1701*b6cee71dSXin LI 1702*b6cee71dSXin LI m->in_op = 0; 1703*b6cee71dSXin LI if (*l == '~') { 1704*b6cee71dSXin LI m->in_op |= FILE_OPINVERSE; 1705*b6cee71dSXin LI l++; 1706*b6cee71dSXin LI } 1707*b6cee71dSXin LI if ((op = get_op(*l)) != -1) { 1708*b6cee71dSXin LI m->in_op |= op; 1709*b6cee71dSXin LI l++; 1710*b6cee71dSXin LI } 1711*b6cee71dSXin LI if (*l == '(') { 1712*b6cee71dSXin LI m->in_op |= FILE_OPINDIRECT; 1713*b6cee71dSXin LI l++; 1714*b6cee71dSXin LI } 1715*b6cee71dSXin LI if (isdigit((unsigned char)*l) || *l == '-') { 1716*b6cee71dSXin LI m->in_offset = (int32_t)strtol(l, &t, 0); 1717*b6cee71dSXin LI if (l == t) 1718*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1719*b6cee71dSXin LI file_magwarn(ms, 1720*b6cee71dSXin LI "in_offset `%s' invalid", l); 1721*b6cee71dSXin LI l = t; 1722*b6cee71dSXin LI } 1723*b6cee71dSXin LI if (*l++ != ')' || 1724*b6cee71dSXin LI ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1725*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1726*b6cee71dSXin LI file_magwarn(ms, 1727*b6cee71dSXin LI "missing ')' in indirect offset"); 1728*b6cee71dSXin LI } 1729*b6cee71dSXin LI EATAB; 1730*b6cee71dSXin LI 1731*b6cee71dSXin LI #ifdef ENABLE_CONDITIONALS 1732*b6cee71dSXin LI m->cond = get_cond(l, &l); 1733*b6cee71dSXin LI if (check_cond(ms, m->cond, cont_level) == -1) 1734*b6cee71dSXin LI return -1; 1735*b6cee71dSXin LI 1736*b6cee71dSXin LI EATAB; 1737*b6cee71dSXin LI #endif 1738*b6cee71dSXin LI 1739*b6cee71dSXin LI /* 1740*b6cee71dSXin LI * Parse the type. 1741*b6cee71dSXin LI */ 1742*b6cee71dSXin LI if (*l == 'u') { 1743*b6cee71dSXin LI /* 1744*b6cee71dSXin LI * Try it as a keyword type prefixed by "u"; match what 1745*b6cee71dSXin LI * follows the "u". If that fails, try it as an SUS 1746*b6cee71dSXin LI * integer type. 1747*b6cee71dSXin LI */ 1748*b6cee71dSXin LI m->type = get_type(type_tbl, l + 1, &l); 1749*b6cee71dSXin LI if (m->type == FILE_INVALID) { 1750*b6cee71dSXin LI /* 1751*b6cee71dSXin LI * Not a keyword type; parse it as an SUS type, 1752*b6cee71dSXin LI * 'u' possibly followed by a number or C/S/L. 1753*b6cee71dSXin LI */ 1754*b6cee71dSXin LI m->type = get_standard_integer_type(l, &l); 1755*b6cee71dSXin LI } 1756*b6cee71dSXin LI /* It's unsigned. */ 1757*b6cee71dSXin LI if (m->type != FILE_INVALID) 1758*b6cee71dSXin LI m->flag |= UNSIGNED; 1759*b6cee71dSXin LI } else { 1760*b6cee71dSXin LI /* 1761*b6cee71dSXin LI * Try it as a keyword type. If that fails, try it as 1762*b6cee71dSXin LI * an SUS integer type if it begins with "d" or as an 1763*b6cee71dSXin LI * SUS string type if it begins with "s". In any case, 1764*b6cee71dSXin LI * it's not unsigned. 1765*b6cee71dSXin LI */ 1766*b6cee71dSXin LI m->type = get_type(type_tbl, l, &l); 1767*b6cee71dSXin LI if (m->type == FILE_INVALID) { 1768*b6cee71dSXin LI /* 1769*b6cee71dSXin LI * Not a keyword type; parse it as an SUS type, 1770*b6cee71dSXin LI * either 'd' possibly followed by a number or 1771*b6cee71dSXin LI * C/S/L, or just 's'. 1772*b6cee71dSXin LI */ 1773*b6cee71dSXin LI if (*l == 'd') 1774*b6cee71dSXin LI m->type = get_standard_integer_type(l, &l); 1775*b6cee71dSXin LI else if (*l == 's' && !isalpha((unsigned char)l[1])) { 1776*b6cee71dSXin LI m->type = FILE_STRING; 1777*b6cee71dSXin LI ++l; 1778*b6cee71dSXin LI } 1779*b6cee71dSXin LI } 1780*b6cee71dSXin LI } 1781*b6cee71dSXin LI 1782*b6cee71dSXin LI if (m->type == FILE_INVALID) { 1783*b6cee71dSXin LI /* Not found - try it as a special keyword. */ 1784*b6cee71dSXin LI m->type = get_type(special_tbl, l, &l); 1785*b6cee71dSXin LI } 1786*b6cee71dSXin LI 1787*b6cee71dSXin LI if (m->type == FILE_INVALID) { 1788*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1789*b6cee71dSXin LI file_magwarn(ms, "type `%s' invalid", l); 1790*b6cee71dSXin LI return -1; 1791*b6cee71dSXin LI } 1792*b6cee71dSXin LI 1793*b6cee71dSXin LI /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1794*b6cee71dSXin LI /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1795*b6cee71dSXin LI 1796*b6cee71dSXin LI m->mask_op = 0; 1797*b6cee71dSXin LI if (*l == '~') { 1798*b6cee71dSXin LI if (!IS_STRING(m->type)) 1799*b6cee71dSXin LI m->mask_op |= FILE_OPINVERSE; 1800*b6cee71dSXin LI else if (ms->flags & MAGIC_CHECK) 1801*b6cee71dSXin LI file_magwarn(ms, "'~' invalid for string types"); 1802*b6cee71dSXin LI ++l; 1803*b6cee71dSXin LI } 1804*b6cee71dSXin LI m->str_range = 0; 1805*b6cee71dSXin LI m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 1806*b6cee71dSXin LI if ((op = get_op(*l)) != -1) { 1807*b6cee71dSXin LI if (!IS_STRING(m->type)) { 1808*b6cee71dSXin LI uint64_t val; 1809*b6cee71dSXin LI ++l; 1810*b6cee71dSXin LI m->mask_op |= op; 1811*b6cee71dSXin LI val = (uint64_t)strtoull(l, &t, 0); 1812*b6cee71dSXin LI l = t; 1813*b6cee71dSXin LI m->num_mask = file_signextend(ms, m, val); 1814*b6cee71dSXin LI eatsize(&l); 1815*b6cee71dSXin LI } 1816*b6cee71dSXin LI else if (op == FILE_OPDIVIDE) { 1817*b6cee71dSXin LI int have_range = 0; 1818*b6cee71dSXin LI while (!isspace((unsigned char)*++l)) { 1819*b6cee71dSXin LI switch (*l) { 1820*b6cee71dSXin LI case '0': case '1': case '2': 1821*b6cee71dSXin LI case '3': case '4': case '5': 1822*b6cee71dSXin LI case '6': case '7': case '8': 1823*b6cee71dSXin LI case '9': 1824*b6cee71dSXin LI if (have_range && 1825*b6cee71dSXin LI (ms->flags & MAGIC_CHECK)) 1826*b6cee71dSXin LI file_magwarn(ms, 1827*b6cee71dSXin LI "multiple ranges"); 1828*b6cee71dSXin LI have_range = 1; 1829*b6cee71dSXin LI m->str_range = CAST(uint32_t, 1830*b6cee71dSXin LI strtoul(l, &t, 0)); 1831*b6cee71dSXin LI if (m->str_range == 0) 1832*b6cee71dSXin LI file_magwarn(ms, 1833*b6cee71dSXin LI "zero range"); 1834*b6cee71dSXin LI l = t - 1; 1835*b6cee71dSXin LI break; 1836*b6cee71dSXin LI case CHAR_COMPACT_WHITESPACE: 1837*b6cee71dSXin LI m->str_flags |= 1838*b6cee71dSXin LI STRING_COMPACT_WHITESPACE; 1839*b6cee71dSXin LI break; 1840*b6cee71dSXin LI case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1841*b6cee71dSXin LI m->str_flags |= 1842*b6cee71dSXin LI STRING_COMPACT_OPTIONAL_WHITESPACE; 1843*b6cee71dSXin LI break; 1844*b6cee71dSXin LI case CHAR_IGNORE_LOWERCASE: 1845*b6cee71dSXin LI m->str_flags |= STRING_IGNORE_LOWERCASE; 1846*b6cee71dSXin LI break; 1847*b6cee71dSXin LI case CHAR_IGNORE_UPPERCASE: 1848*b6cee71dSXin LI m->str_flags |= STRING_IGNORE_UPPERCASE; 1849*b6cee71dSXin LI break; 1850*b6cee71dSXin LI case CHAR_REGEX_OFFSET_START: 1851*b6cee71dSXin LI m->str_flags |= REGEX_OFFSET_START; 1852*b6cee71dSXin LI break; 1853*b6cee71dSXin LI case CHAR_BINTEST: 1854*b6cee71dSXin LI m->str_flags |= STRING_BINTEST; 1855*b6cee71dSXin LI break; 1856*b6cee71dSXin LI case CHAR_TEXTTEST: 1857*b6cee71dSXin LI m->str_flags |= STRING_TEXTTEST; 1858*b6cee71dSXin LI break; 1859*b6cee71dSXin LI case CHAR_TRIM: 1860*b6cee71dSXin LI m->str_flags |= STRING_TRIM; 1861*b6cee71dSXin LI break; 1862*b6cee71dSXin LI case CHAR_PSTRING_1_LE: 1863*b6cee71dSXin LI if (m->type != FILE_PSTRING) 1864*b6cee71dSXin LI goto bad; 1865*b6cee71dSXin LI m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; 1866*b6cee71dSXin LI break; 1867*b6cee71dSXin LI case CHAR_PSTRING_2_BE: 1868*b6cee71dSXin LI if (m->type != FILE_PSTRING) 1869*b6cee71dSXin LI goto bad; 1870*b6cee71dSXin LI m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; 1871*b6cee71dSXin LI break; 1872*b6cee71dSXin LI case CHAR_PSTRING_2_LE: 1873*b6cee71dSXin LI if (m->type != FILE_PSTRING) 1874*b6cee71dSXin LI goto bad; 1875*b6cee71dSXin LI m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; 1876*b6cee71dSXin LI break; 1877*b6cee71dSXin LI case CHAR_PSTRING_4_BE: 1878*b6cee71dSXin LI if (m->type != FILE_PSTRING) 1879*b6cee71dSXin LI goto bad; 1880*b6cee71dSXin LI m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; 1881*b6cee71dSXin LI break; 1882*b6cee71dSXin LI case CHAR_PSTRING_4_LE: 1883*b6cee71dSXin LI switch (m->type) { 1884*b6cee71dSXin LI case FILE_PSTRING: 1885*b6cee71dSXin LI case FILE_REGEX: 1886*b6cee71dSXin LI break; 1887*b6cee71dSXin LI default: 1888*b6cee71dSXin LI goto bad; 1889*b6cee71dSXin LI } 1890*b6cee71dSXin LI m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; 1891*b6cee71dSXin LI break; 1892*b6cee71dSXin LI case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1893*b6cee71dSXin LI if (m->type != FILE_PSTRING) 1894*b6cee71dSXin LI goto bad; 1895*b6cee71dSXin LI m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1896*b6cee71dSXin LI break; 1897*b6cee71dSXin LI default: 1898*b6cee71dSXin LI bad: 1899*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1900*b6cee71dSXin LI file_magwarn(ms, 1901*b6cee71dSXin LI "string extension `%c' " 1902*b6cee71dSXin LI "invalid", *l); 1903*b6cee71dSXin LI return -1; 1904*b6cee71dSXin LI } 1905*b6cee71dSXin LI /* allow multiple '/' for readability */ 1906*b6cee71dSXin LI if (l[1] == '/' && 1907*b6cee71dSXin LI !isspace((unsigned char)l[2])) 1908*b6cee71dSXin LI l++; 1909*b6cee71dSXin LI } 1910*b6cee71dSXin LI if (string_modifier_check(ms, m) == -1) 1911*b6cee71dSXin LI return -1; 1912*b6cee71dSXin LI } 1913*b6cee71dSXin LI else { 1914*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1915*b6cee71dSXin LI file_magwarn(ms, "invalid string op: %c", *t); 1916*b6cee71dSXin LI return -1; 1917*b6cee71dSXin LI } 1918*b6cee71dSXin LI } 1919*b6cee71dSXin LI /* 1920*b6cee71dSXin LI * We used to set mask to all 1's here, instead let's just not do 1921*b6cee71dSXin LI * anything if mask = 0 (unless you have a better idea) 1922*b6cee71dSXin LI */ 1923*b6cee71dSXin LI EATAB; 1924*b6cee71dSXin LI 1925*b6cee71dSXin LI switch (*l) { 1926*b6cee71dSXin LI case '>': 1927*b6cee71dSXin LI case '<': 1928*b6cee71dSXin LI m->reln = *l; 1929*b6cee71dSXin LI ++l; 1930*b6cee71dSXin LI if (*l == '=') { 1931*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) { 1932*b6cee71dSXin LI file_magwarn(ms, "%c= not supported", 1933*b6cee71dSXin LI m->reln); 1934*b6cee71dSXin LI return -1; 1935*b6cee71dSXin LI } 1936*b6cee71dSXin LI ++l; 1937*b6cee71dSXin LI } 1938*b6cee71dSXin LI break; 1939*b6cee71dSXin LI /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1940*b6cee71dSXin LI case '&': 1941*b6cee71dSXin LI case '^': 1942*b6cee71dSXin LI case '=': 1943*b6cee71dSXin LI m->reln = *l; 1944*b6cee71dSXin LI ++l; 1945*b6cee71dSXin LI if (*l == '=') { 1946*b6cee71dSXin LI /* HP compat: ignore &= etc. */ 1947*b6cee71dSXin LI ++l; 1948*b6cee71dSXin LI } 1949*b6cee71dSXin LI break; 1950*b6cee71dSXin LI case '!': 1951*b6cee71dSXin LI m->reln = *l; 1952*b6cee71dSXin LI ++l; 1953*b6cee71dSXin LI break; 1954*b6cee71dSXin LI default: 1955*b6cee71dSXin LI m->reln = '='; /* the default relation */ 1956*b6cee71dSXin LI if (*l == 'x' && ((isascii((unsigned char)l[1]) && 1957*b6cee71dSXin LI isspace((unsigned char)l[1])) || !l[1])) { 1958*b6cee71dSXin LI m->reln = *l; 1959*b6cee71dSXin LI ++l; 1960*b6cee71dSXin LI } 1961*b6cee71dSXin LI break; 1962*b6cee71dSXin LI } 1963*b6cee71dSXin LI /* 1964*b6cee71dSXin LI * Grab the value part, except for an 'x' reln. 1965*b6cee71dSXin LI */ 1966*b6cee71dSXin LI if (m->reln != 'x' && getvalue(ms, m, &l, action)) 1967*b6cee71dSXin LI return -1; 1968*b6cee71dSXin LI 1969*b6cee71dSXin LI /* 1970*b6cee71dSXin LI * TODO finish this macro and start using it! 1971*b6cee71dSXin LI * #define offsetcheck {if (offset > HOWMANY-1) 1972*b6cee71dSXin LI * magwarn("offset too big"); } 1973*b6cee71dSXin LI */ 1974*b6cee71dSXin LI 1975*b6cee71dSXin LI /* 1976*b6cee71dSXin LI * Now get last part - the description 1977*b6cee71dSXin LI */ 1978*b6cee71dSXin LI EATAB; 1979*b6cee71dSXin LI if (l[0] == '\b') { 1980*b6cee71dSXin LI ++l; 1981*b6cee71dSXin LI m->flag |= NOSPACE; 1982*b6cee71dSXin LI } else if ((l[0] == '\\') && (l[1] == 'b')) { 1983*b6cee71dSXin LI ++l; 1984*b6cee71dSXin LI ++l; 1985*b6cee71dSXin LI m->flag |= NOSPACE; 1986*b6cee71dSXin LI } 1987*b6cee71dSXin LI for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 1988*b6cee71dSXin LI continue; 1989*b6cee71dSXin LI if (i == sizeof(m->desc)) { 1990*b6cee71dSXin LI m->desc[sizeof(m->desc) - 1] = '\0'; 1991*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 1992*b6cee71dSXin LI file_magwarn(ms, "description `%s' truncated", m->desc); 1993*b6cee71dSXin LI } 1994*b6cee71dSXin LI 1995*b6cee71dSXin LI /* 1996*b6cee71dSXin LI * We only do this check while compiling, or if any of the magic 1997*b6cee71dSXin LI * files were not compiled. 1998*b6cee71dSXin LI */ 1999*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) { 2000*b6cee71dSXin LI if (check_format(ms, m) == -1) 2001*b6cee71dSXin LI return -1; 2002*b6cee71dSXin LI } 2003*b6cee71dSXin LI #ifndef COMPILE_ONLY 2004*b6cee71dSXin LI if (action == FILE_CHECK) { 2005*b6cee71dSXin LI file_mdump(m); 2006*b6cee71dSXin LI } 2007*b6cee71dSXin LI #endif 2008*b6cee71dSXin LI m->mimetype[0] = '\0'; /* initialise MIME type to none */ 2009*b6cee71dSXin LI return 0; 2010*b6cee71dSXin LI } 2011*b6cee71dSXin LI 2012*b6cee71dSXin LI /* 2013*b6cee71dSXin LI * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 2014*b6cee71dSXin LI * if valid 2015*b6cee71dSXin LI */ 2016*b6cee71dSXin LI private int 2017*b6cee71dSXin LI parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 2018*b6cee71dSXin LI { 2019*b6cee71dSXin LI const char *l = line; 2020*b6cee71dSXin LI char *el; 2021*b6cee71dSXin LI unsigned long factor; 2022*b6cee71dSXin LI struct magic *m = &me->mp[0]; 2023*b6cee71dSXin LI 2024*b6cee71dSXin LI if (m->factor_op != FILE_FACTOR_OP_NONE) { 2025*b6cee71dSXin LI file_magwarn(ms, 2026*b6cee71dSXin LI "Current entry already has a strength type: %c %d", 2027*b6cee71dSXin LI m->factor_op, m->factor); 2028*b6cee71dSXin LI return -1; 2029*b6cee71dSXin LI } 2030*b6cee71dSXin LI if (m->type == FILE_NAME) { 2031*b6cee71dSXin LI file_magwarn(ms, "%s: Strength setting is not supported in " 2032*b6cee71dSXin LI "\"name\" magic entries", m->value.s); 2033*b6cee71dSXin LI return -1; 2034*b6cee71dSXin LI } 2035*b6cee71dSXin LI EATAB; 2036*b6cee71dSXin LI switch (*l) { 2037*b6cee71dSXin LI case FILE_FACTOR_OP_NONE: 2038*b6cee71dSXin LI case FILE_FACTOR_OP_PLUS: 2039*b6cee71dSXin LI case FILE_FACTOR_OP_MINUS: 2040*b6cee71dSXin LI case FILE_FACTOR_OP_TIMES: 2041*b6cee71dSXin LI case FILE_FACTOR_OP_DIV: 2042*b6cee71dSXin LI m->factor_op = *l++; 2043*b6cee71dSXin LI break; 2044*b6cee71dSXin LI default: 2045*b6cee71dSXin LI file_magwarn(ms, "Unknown factor op `%c'", *l); 2046*b6cee71dSXin LI return -1; 2047*b6cee71dSXin LI } 2048*b6cee71dSXin LI EATAB; 2049*b6cee71dSXin LI factor = strtoul(l, &el, 0); 2050*b6cee71dSXin LI if (factor > 255) { 2051*b6cee71dSXin LI file_magwarn(ms, "Too large factor `%lu'", factor); 2052*b6cee71dSXin LI goto out; 2053*b6cee71dSXin LI } 2054*b6cee71dSXin LI if (*el && !isspace((unsigned char)*el)) { 2055*b6cee71dSXin LI file_magwarn(ms, "Bad factor `%s'", l); 2056*b6cee71dSXin LI goto out; 2057*b6cee71dSXin LI } 2058*b6cee71dSXin LI m->factor = (uint8_t)factor; 2059*b6cee71dSXin LI if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 2060*b6cee71dSXin LI file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 2061*b6cee71dSXin LI m->factor_op, m->factor); 2062*b6cee71dSXin LI goto out; 2063*b6cee71dSXin LI } 2064*b6cee71dSXin LI return 0; 2065*b6cee71dSXin LI out: 2066*b6cee71dSXin LI m->factor_op = FILE_FACTOR_OP_NONE; 2067*b6cee71dSXin LI m->factor = 0; 2068*b6cee71dSXin LI return -1; 2069*b6cee71dSXin LI } 2070*b6cee71dSXin LI 2071*b6cee71dSXin LI private int 2072*b6cee71dSXin LI parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, 2073*b6cee71dSXin LI off_t off, size_t len, const char *name, int nt) 2074*b6cee71dSXin LI { 2075*b6cee71dSXin LI size_t i; 2076*b6cee71dSXin LI const char *l = line; 2077*b6cee71dSXin LI struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 2078*b6cee71dSXin LI char *buf = (char *)m + off; 2079*b6cee71dSXin LI 2080*b6cee71dSXin LI if (buf[0] != '\0') { 2081*b6cee71dSXin LI len = nt ? strlen(buf) : len; 2082*b6cee71dSXin LI file_magwarn(ms, "Current entry already has a %s type " 2083*b6cee71dSXin LI "`%.*s', new type `%s'", name, (int)len, buf, l); 2084*b6cee71dSXin LI return -1; 2085*b6cee71dSXin LI } 2086*b6cee71dSXin LI 2087*b6cee71dSXin LI if (*m->desc == '\0') { 2088*b6cee71dSXin LI file_magwarn(ms, "Current entry does not yet have a " 2089*b6cee71dSXin LI "description for adding a %s type", name); 2090*b6cee71dSXin LI return -1; 2091*b6cee71dSXin LI } 2092*b6cee71dSXin LI 2093*b6cee71dSXin LI EATAB; 2094*b6cee71dSXin LI for (i = 0; *l && ((isascii((unsigned char)*l) && 2095*b6cee71dSXin LI isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 2096*b6cee71dSXin LI i < len; buf[i++] = *l++) 2097*b6cee71dSXin LI continue; 2098*b6cee71dSXin LI 2099*b6cee71dSXin LI if (i == len && *l) { 2100*b6cee71dSXin LI if (nt) 2101*b6cee71dSXin LI buf[len - 1] = '\0'; 2102*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 2103*b6cee71dSXin LI file_magwarn(ms, "%s type `%s' truncated %" 2104*b6cee71dSXin LI SIZE_T_FORMAT "u", name, line, i); 2105*b6cee71dSXin LI } else { 2106*b6cee71dSXin LI if (nt) 2107*b6cee71dSXin LI buf[i] = '\0'; 2108*b6cee71dSXin LI } 2109*b6cee71dSXin LI 2110*b6cee71dSXin LI if (i > 0) 2111*b6cee71dSXin LI return 0; 2112*b6cee71dSXin LI else 2113*b6cee71dSXin LI return -1; 2114*b6cee71dSXin LI } 2115*b6cee71dSXin LI 2116*b6cee71dSXin LI /* 2117*b6cee71dSXin LI * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 2118*b6cee71dSXin LI * magic[index - 1] 2119*b6cee71dSXin LI */ 2120*b6cee71dSXin LI private int 2121*b6cee71dSXin LI parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) 2122*b6cee71dSXin LI { 2123*b6cee71dSXin LI struct magic *m = &me->mp[0]; 2124*b6cee71dSXin LI 2125*b6cee71dSXin LI return parse_extra(ms, me, line, offsetof(struct magic, apple), 2126*b6cee71dSXin LI sizeof(m->apple), "APPLE", 0); 2127*b6cee71dSXin LI } 2128*b6cee71dSXin LI 2129*b6cee71dSXin LI /* 2130*b6cee71dSXin LI * parse a MIME annotation line from magic file, put into magic[index - 1] 2131*b6cee71dSXin LI * if valid 2132*b6cee71dSXin LI */ 2133*b6cee71dSXin LI private int 2134*b6cee71dSXin LI parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 2135*b6cee71dSXin LI { 2136*b6cee71dSXin LI struct magic *m = &me->mp[0]; 2137*b6cee71dSXin LI 2138*b6cee71dSXin LI return parse_extra(ms, me, line, offsetof(struct magic, mimetype), 2139*b6cee71dSXin LI sizeof(m->mimetype), "MIME", 1); 2140*b6cee71dSXin LI } 2141*b6cee71dSXin LI 2142*b6cee71dSXin LI private int 2143*b6cee71dSXin LI check_format_type(const char *ptr, int type) 2144*b6cee71dSXin LI { 2145*b6cee71dSXin LI int quad = 0, h; 2146*b6cee71dSXin LI if (*ptr == '\0') { 2147*b6cee71dSXin LI /* Missing format string; bad */ 2148*b6cee71dSXin LI return -1; 2149*b6cee71dSXin LI } 2150*b6cee71dSXin LI 2151*b6cee71dSXin LI switch (file_formats[type]) { 2152*b6cee71dSXin LI case FILE_FMT_QUAD: 2153*b6cee71dSXin LI quad = 1; 2154*b6cee71dSXin LI /*FALLTHROUGH*/ 2155*b6cee71dSXin LI case FILE_FMT_NUM: 2156*b6cee71dSXin LI if (quad == 0) { 2157*b6cee71dSXin LI switch (type) { 2158*b6cee71dSXin LI case FILE_BYTE: 2159*b6cee71dSXin LI h = 2; 2160*b6cee71dSXin LI break; 2161*b6cee71dSXin LI case FILE_SHORT: 2162*b6cee71dSXin LI case FILE_BESHORT: 2163*b6cee71dSXin LI case FILE_LESHORT: 2164*b6cee71dSXin LI h = 1; 2165*b6cee71dSXin LI break; 2166*b6cee71dSXin LI case FILE_LONG: 2167*b6cee71dSXin LI case FILE_BELONG: 2168*b6cee71dSXin LI case FILE_LELONG: 2169*b6cee71dSXin LI case FILE_MELONG: 2170*b6cee71dSXin LI case FILE_LEID3: 2171*b6cee71dSXin LI case FILE_BEID3: 2172*b6cee71dSXin LI case FILE_INDIRECT: 2173*b6cee71dSXin LI h = 0; 2174*b6cee71dSXin LI break; 2175*b6cee71dSXin LI default: 2176*b6cee71dSXin LI abort(); 2177*b6cee71dSXin LI } 2178*b6cee71dSXin LI } else 2179*b6cee71dSXin LI h = 0; 2180*b6cee71dSXin LI if (*ptr == '-') 2181*b6cee71dSXin LI ptr++; 2182*b6cee71dSXin LI if (*ptr == '.') 2183*b6cee71dSXin LI ptr++; 2184*b6cee71dSXin LI while (isdigit((unsigned char)*ptr)) ptr++; 2185*b6cee71dSXin LI if (*ptr == '.') 2186*b6cee71dSXin LI ptr++; 2187*b6cee71dSXin LI while (isdigit((unsigned char)*ptr)) ptr++; 2188*b6cee71dSXin LI if (quad) { 2189*b6cee71dSXin LI if (*ptr++ != 'l') 2190*b6cee71dSXin LI return -1; 2191*b6cee71dSXin LI if (*ptr++ != 'l') 2192*b6cee71dSXin LI return -1; 2193*b6cee71dSXin LI } 2194*b6cee71dSXin LI 2195*b6cee71dSXin LI switch (*ptr++) { 2196*b6cee71dSXin LI #ifdef STRICT_FORMAT /* "long" formats are int formats for us */ 2197*b6cee71dSXin LI /* so don't accept the 'l' modifier */ 2198*b6cee71dSXin LI case 'l': 2199*b6cee71dSXin LI switch (*ptr++) { 2200*b6cee71dSXin LI case 'i': 2201*b6cee71dSXin LI case 'd': 2202*b6cee71dSXin LI case 'u': 2203*b6cee71dSXin LI case 'o': 2204*b6cee71dSXin LI case 'x': 2205*b6cee71dSXin LI case 'X': 2206*b6cee71dSXin LI return h != 0 ? -1 : 0; 2207*b6cee71dSXin LI default: 2208*b6cee71dSXin LI return -1; 2209*b6cee71dSXin LI } 2210*b6cee71dSXin LI 2211*b6cee71dSXin LI /* 2212*b6cee71dSXin LI * Don't accept h and hh modifiers. They make writing 2213*b6cee71dSXin LI * magic entries more complicated, for very little benefit 2214*b6cee71dSXin LI */ 2215*b6cee71dSXin LI case 'h': 2216*b6cee71dSXin LI if (h-- <= 0) 2217*b6cee71dSXin LI return -1; 2218*b6cee71dSXin LI switch (*ptr++) { 2219*b6cee71dSXin LI case 'h': 2220*b6cee71dSXin LI if (h-- <= 0) 2221*b6cee71dSXin LI return -1; 2222*b6cee71dSXin LI switch (*ptr++) { 2223*b6cee71dSXin LI case 'i': 2224*b6cee71dSXin LI case 'd': 2225*b6cee71dSXin LI case 'u': 2226*b6cee71dSXin LI case 'o': 2227*b6cee71dSXin LI case 'x': 2228*b6cee71dSXin LI case 'X': 2229*b6cee71dSXin LI return 0; 2230*b6cee71dSXin LI default: 2231*b6cee71dSXin LI return -1; 2232*b6cee71dSXin LI } 2233*b6cee71dSXin LI case 'i': 2234*b6cee71dSXin LI case 'd': 2235*b6cee71dSXin LI case 'u': 2236*b6cee71dSXin LI case 'o': 2237*b6cee71dSXin LI case 'x': 2238*b6cee71dSXin LI case 'X': 2239*b6cee71dSXin LI return h != 0 ? -1 : 0; 2240*b6cee71dSXin LI default: 2241*b6cee71dSXin LI return -1; 2242*b6cee71dSXin LI } 2243*b6cee71dSXin LI #endif 2244*b6cee71dSXin LI case 'c': 2245*b6cee71dSXin LI return h != 2 ? -1 : 0; 2246*b6cee71dSXin LI case 'i': 2247*b6cee71dSXin LI case 'd': 2248*b6cee71dSXin LI case 'u': 2249*b6cee71dSXin LI case 'o': 2250*b6cee71dSXin LI case 'x': 2251*b6cee71dSXin LI case 'X': 2252*b6cee71dSXin LI #ifdef STRICT_FORMAT 2253*b6cee71dSXin LI return h != 0 ? -1 : 0; 2254*b6cee71dSXin LI #else 2255*b6cee71dSXin LI return 0; 2256*b6cee71dSXin LI #endif 2257*b6cee71dSXin LI default: 2258*b6cee71dSXin LI return -1; 2259*b6cee71dSXin LI } 2260*b6cee71dSXin LI 2261*b6cee71dSXin LI case FILE_FMT_FLOAT: 2262*b6cee71dSXin LI case FILE_FMT_DOUBLE: 2263*b6cee71dSXin LI if (*ptr == '-') 2264*b6cee71dSXin LI ptr++; 2265*b6cee71dSXin LI if (*ptr == '.') 2266*b6cee71dSXin LI ptr++; 2267*b6cee71dSXin LI while (isdigit((unsigned char)*ptr)) ptr++; 2268*b6cee71dSXin LI if (*ptr == '.') 2269*b6cee71dSXin LI ptr++; 2270*b6cee71dSXin LI while (isdigit((unsigned char)*ptr)) ptr++; 2271*b6cee71dSXin LI 2272*b6cee71dSXin LI switch (*ptr++) { 2273*b6cee71dSXin LI case 'e': 2274*b6cee71dSXin LI case 'E': 2275*b6cee71dSXin LI case 'f': 2276*b6cee71dSXin LI case 'F': 2277*b6cee71dSXin LI case 'g': 2278*b6cee71dSXin LI case 'G': 2279*b6cee71dSXin LI return 0; 2280*b6cee71dSXin LI 2281*b6cee71dSXin LI default: 2282*b6cee71dSXin LI return -1; 2283*b6cee71dSXin LI } 2284*b6cee71dSXin LI 2285*b6cee71dSXin LI 2286*b6cee71dSXin LI case FILE_FMT_STR: 2287*b6cee71dSXin LI if (*ptr == '-') 2288*b6cee71dSXin LI ptr++; 2289*b6cee71dSXin LI while (isdigit((unsigned char )*ptr)) 2290*b6cee71dSXin LI ptr++; 2291*b6cee71dSXin LI if (*ptr == '.') { 2292*b6cee71dSXin LI ptr++; 2293*b6cee71dSXin LI while (isdigit((unsigned char )*ptr)) 2294*b6cee71dSXin LI ptr++; 2295*b6cee71dSXin LI } 2296*b6cee71dSXin LI 2297*b6cee71dSXin LI switch (*ptr++) { 2298*b6cee71dSXin LI case 's': 2299*b6cee71dSXin LI return 0; 2300*b6cee71dSXin LI default: 2301*b6cee71dSXin LI return -1; 2302*b6cee71dSXin LI } 2303*b6cee71dSXin LI 2304*b6cee71dSXin LI default: 2305*b6cee71dSXin LI /* internal error */ 2306*b6cee71dSXin LI abort(); 2307*b6cee71dSXin LI } 2308*b6cee71dSXin LI /*NOTREACHED*/ 2309*b6cee71dSXin LI return -1; 2310*b6cee71dSXin LI } 2311*b6cee71dSXin LI 2312*b6cee71dSXin LI /* 2313*b6cee71dSXin LI * Check that the optional printf format in description matches 2314*b6cee71dSXin LI * the type of the magic. 2315*b6cee71dSXin LI */ 2316*b6cee71dSXin LI private int 2317*b6cee71dSXin LI check_format(struct magic_set *ms, struct magic *m) 2318*b6cee71dSXin LI { 2319*b6cee71dSXin LI char *ptr; 2320*b6cee71dSXin LI 2321*b6cee71dSXin LI for (ptr = m->desc; *ptr; ptr++) 2322*b6cee71dSXin LI if (*ptr == '%') 2323*b6cee71dSXin LI break; 2324*b6cee71dSXin LI if (*ptr == '\0') { 2325*b6cee71dSXin LI /* No format string; ok */ 2326*b6cee71dSXin LI return 1; 2327*b6cee71dSXin LI } 2328*b6cee71dSXin LI 2329*b6cee71dSXin LI assert(file_nformats == file_nnames); 2330*b6cee71dSXin LI 2331*b6cee71dSXin LI if (m->type >= file_nformats) { 2332*b6cee71dSXin LI file_magwarn(ms, "Internal error inconsistency between " 2333*b6cee71dSXin LI "m->type and format strings"); 2334*b6cee71dSXin LI return -1; 2335*b6cee71dSXin LI } 2336*b6cee71dSXin LI if (file_formats[m->type] == FILE_FMT_NONE) { 2337*b6cee71dSXin LI file_magwarn(ms, "No format string for `%s' with description " 2338*b6cee71dSXin LI "`%s'", m->desc, file_names[m->type]); 2339*b6cee71dSXin LI return -1; 2340*b6cee71dSXin LI } 2341*b6cee71dSXin LI 2342*b6cee71dSXin LI ptr++; 2343*b6cee71dSXin LI if (check_format_type(ptr, m->type) == -1) { 2344*b6cee71dSXin LI /* 2345*b6cee71dSXin LI * TODO: this error message is unhelpful if the format 2346*b6cee71dSXin LI * string is not one character long 2347*b6cee71dSXin LI */ 2348*b6cee71dSXin LI file_magwarn(ms, "Printf format `%c' is not valid for type " 2349*b6cee71dSXin LI "`%s' in description `%s'", *ptr ? *ptr : '?', 2350*b6cee71dSXin LI file_names[m->type], m->desc); 2351*b6cee71dSXin LI return -1; 2352*b6cee71dSXin LI } 2353*b6cee71dSXin LI 2354*b6cee71dSXin LI for (; *ptr; ptr++) { 2355*b6cee71dSXin LI if (*ptr == '%') { 2356*b6cee71dSXin LI file_magwarn(ms, 2357*b6cee71dSXin LI "Too many format strings (should have at most one) " 2358*b6cee71dSXin LI "for `%s' with description `%s'", 2359*b6cee71dSXin LI file_names[m->type], m->desc); 2360*b6cee71dSXin LI return -1; 2361*b6cee71dSXin LI } 2362*b6cee71dSXin LI } 2363*b6cee71dSXin LI return 0; 2364*b6cee71dSXin LI } 2365*b6cee71dSXin LI 2366*b6cee71dSXin LI /* 2367*b6cee71dSXin LI * Read a numeric value from a pointer, into the value union of a magic 2368*b6cee71dSXin LI * pointer, according to the magic type. Update the string pointer to point 2369*b6cee71dSXin LI * just after the number read. Return 0 for success, non-zero for failure. 2370*b6cee71dSXin LI */ 2371*b6cee71dSXin LI private int 2372*b6cee71dSXin LI getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2373*b6cee71dSXin LI { 2374*b6cee71dSXin LI switch (m->type) { 2375*b6cee71dSXin LI case FILE_BESTRING16: 2376*b6cee71dSXin LI case FILE_LESTRING16: 2377*b6cee71dSXin LI case FILE_STRING: 2378*b6cee71dSXin LI case FILE_PSTRING: 2379*b6cee71dSXin LI case FILE_REGEX: 2380*b6cee71dSXin LI case FILE_SEARCH: 2381*b6cee71dSXin LI case FILE_NAME: 2382*b6cee71dSXin LI case FILE_USE: 2383*b6cee71dSXin LI *p = getstr(ms, m, *p, action == FILE_COMPILE); 2384*b6cee71dSXin LI if (*p == NULL) { 2385*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 2386*b6cee71dSXin LI file_magwarn(ms, "cannot get string from `%s'", 2387*b6cee71dSXin LI m->value.s); 2388*b6cee71dSXin LI return -1; 2389*b6cee71dSXin LI } 2390*b6cee71dSXin LI if (m->type == FILE_REGEX) { 2391*b6cee71dSXin LI file_regex_t rx; 2392*b6cee71dSXin LI int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED); 2393*b6cee71dSXin LI if (rc) { 2394*b6cee71dSXin LI if (ms->flags & MAGIC_CHECK) 2395*b6cee71dSXin LI file_regerror(&rx, rc, ms); 2396*b6cee71dSXin LI } 2397*b6cee71dSXin LI file_regfree(&rx); 2398*b6cee71dSXin LI return rc ? -1 : 0; 2399*b6cee71dSXin LI } 2400*b6cee71dSXin LI return 0; 2401*b6cee71dSXin LI case FILE_FLOAT: 2402*b6cee71dSXin LI case FILE_BEFLOAT: 2403*b6cee71dSXin LI case FILE_LEFLOAT: 2404*b6cee71dSXin LI if (m->reln != 'x') { 2405*b6cee71dSXin LI char *ep; 2406*b6cee71dSXin LI #ifdef HAVE_STRTOF 2407*b6cee71dSXin LI m->value.f = strtof(*p, &ep); 2408*b6cee71dSXin LI #else 2409*b6cee71dSXin LI m->value.f = (float)strtod(*p, &ep); 2410*b6cee71dSXin LI #endif 2411*b6cee71dSXin LI *p = ep; 2412*b6cee71dSXin LI } 2413*b6cee71dSXin LI return 0; 2414*b6cee71dSXin LI case FILE_DOUBLE: 2415*b6cee71dSXin LI case FILE_BEDOUBLE: 2416*b6cee71dSXin LI case FILE_LEDOUBLE: 2417*b6cee71dSXin LI if (m->reln != 'x') { 2418*b6cee71dSXin LI char *ep; 2419*b6cee71dSXin LI m->value.d = strtod(*p, &ep); 2420*b6cee71dSXin LI *p = ep; 2421*b6cee71dSXin LI } 2422*b6cee71dSXin LI return 0; 2423*b6cee71dSXin LI default: 2424*b6cee71dSXin LI if (m->reln != 'x') { 2425*b6cee71dSXin LI char *ep; 2426*b6cee71dSXin LI m->value.q = file_signextend(ms, m, 2427*b6cee71dSXin LI (uint64_t)strtoull(*p, &ep, 0)); 2428*b6cee71dSXin LI *p = ep; 2429*b6cee71dSXin LI eatsize(p); 2430*b6cee71dSXin LI } 2431*b6cee71dSXin LI return 0; 2432*b6cee71dSXin LI } 2433*b6cee71dSXin LI } 2434*b6cee71dSXin LI 2435*b6cee71dSXin LI /* 2436*b6cee71dSXin LI * Convert a string containing C character escapes. Stop at an unescaped 2437*b6cee71dSXin LI * space or tab. 2438*b6cee71dSXin LI * Copy the converted version to "m->value.s", and the length in m->vallen. 2439*b6cee71dSXin LI * Return updated scan pointer as function result. Warn if set. 2440*b6cee71dSXin LI */ 2441*b6cee71dSXin LI private const char * 2442*b6cee71dSXin LI getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2443*b6cee71dSXin LI { 2444*b6cee71dSXin LI const char *origs = s; 2445*b6cee71dSXin LI char *p = m->value.s; 2446*b6cee71dSXin LI size_t plen = sizeof(m->value.s); 2447*b6cee71dSXin LI char *origp = p; 2448*b6cee71dSXin LI char *pmax = p + plen - 1; 2449*b6cee71dSXin LI int c; 2450*b6cee71dSXin LI int val; 2451*b6cee71dSXin LI 2452*b6cee71dSXin LI while ((c = *s++) != '\0') { 2453*b6cee71dSXin LI if (isspace((unsigned char) c)) 2454*b6cee71dSXin LI break; 2455*b6cee71dSXin LI if (p >= pmax) { 2456*b6cee71dSXin LI file_error(ms, 0, "string too long: `%s'", origs); 2457*b6cee71dSXin LI return NULL; 2458*b6cee71dSXin LI } 2459*b6cee71dSXin LI if (c == '\\') { 2460*b6cee71dSXin LI switch(c = *s++) { 2461*b6cee71dSXin LI 2462*b6cee71dSXin LI case '\0': 2463*b6cee71dSXin LI if (warn) 2464*b6cee71dSXin LI file_magwarn(ms, "incomplete escape"); 2465*b6cee71dSXin LI goto out; 2466*b6cee71dSXin LI 2467*b6cee71dSXin LI case '\t': 2468*b6cee71dSXin LI if (warn) { 2469*b6cee71dSXin LI file_magwarn(ms, 2470*b6cee71dSXin LI "escaped tab found, use \\t instead"); 2471*b6cee71dSXin LI warn = 0; /* already did */ 2472*b6cee71dSXin LI } 2473*b6cee71dSXin LI /*FALLTHROUGH*/ 2474*b6cee71dSXin LI default: 2475*b6cee71dSXin LI if (warn) { 2476*b6cee71dSXin LI if (isprint((unsigned char)c)) { 2477*b6cee71dSXin LI /* Allow escaping of 2478*b6cee71dSXin LI * ``relations'' */ 2479*b6cee71dSXin LI if (strchr("<>&^=!", c) == NULL 2480*b6cee71dSXin LI && (m->type != FILE_REGEX || 2481*b6cee71dSXin LI strchr("[]().*?^$|{}", c) 2482*b6cee71dSXin LI == NULL)) { 2483*b6cee71dSXin LI file_magwarn(ms, "no " 2484*b6cee71dSXin LI "need to escape " 2485*b6cee71dSXin LI "`%c'", c); 2486*b6cee71dSXin LI } 2487*b6cee71dSXin LI } else { 2488*b6cee71dSXin LI file_magwarn(ms, 2489*b6cee71dSXin LI "unknown escape sequence: " 2490*b6cee71dSXin LI "\\%03o", c); 2491*b6cee71dSXin LI } 2492*b6cee71dSXin LI } 2493*b6cee71dSXin LI /*FALLTHROUGH*/ 2494*b6cee71dSXin LI /* space, perhaps force people to use \040? */ 2495*b6cee71dSXin LI case ' ': 2496*b6cee71dSXin LI #if 0 2497*b6cee71dSXin LI /* 2498*b6cee71dSXin LI * Other things people escape, but shouldn't need to, 2499*b6cee71dSXin LI * so we disallow them 2500*b6cee71dSXin LI */ 2501*b6cee71dSXin LI case '\'': 2502*b6cee71dSXin LI case '"': 2503*b6cee71dSXin LI case '?': 2504*b6cee71dSXin LI #endif 2505*b6cee71dSXin LI /* Relations */ 2506*b6cee71dSXin LI case '>': 2507*b6cee71dSXin LI case '<': 2508*b6cee71dSXin LI case '&': 2509*b6cee71dSXin LI case '^': 2510*b6cee71dSXin LI case '=': 2511*b6cee71dSXin LI case '!': 2512*b6cee71dSXin LI /* and baskslash itself */ 2513*b6cee71dSXin LI case '\\': 2514*b6cee71dSXin LI *p++ = (char) c; 2515*b6cee71dSXin LI break; 2516*b6cee71dSXin LI 2517*b6cee71dSXin LI case 'a': 2518*b6cee71dSXin LI *p++ = '\a'; 2519*b6cee71dSXin LI break; 2520*b6cee71dSXin LI 2521*b6cee71dSXin LI case 'b': 2522*b6cee71dSXin LI *p++ = '\b'; 2523*b6cee71dSXin LI break; 2524*b6cee71dSXin LI 2525*b6cee71dSXin LI case 'f': 2526*b6cee71dSXin LI *p++ = '\f'; 2527*b6cee71dSXin LI break; 2528*b6cee71dSXin LI 2529*b6cee71dSXin LI case 'n': 2530*b6cee71dSXin LI *p++ = '\n'; 2531*b6cee71dSXin LI break; 2532*b6cee71dSXin LI 2533*b6cee71dSXin LI case 'r': 2534*b6cee71dSXin LI *p++ = '\r'; 2535*b6cee71dSXin LI break; 2536*b6cee71dSXin LI 2537*b6cee71dSXin LI case 't': 2538*b6cee71dSXin LI *p++ = '\t'; 2539*b6cee71dSXin LI break; 2540*b6cee71dSXin LI 2541*b6cee71dSXin LI case 'v': 2542*b6cee71dSXin LI *p++ = '\v'; 2543*b6cee71dSXin LI break; 2544*b6cee71dSXin LI 2545*b6cee71dSXin LI /* \ and up to 3 octal digits */ 2546*b6cee71dSXin LI case '0': 2547*b6cee71dSXin LI case '1': 2548*b6cee71dSXin LI case '2': 2549*b6cee71dSXin LI case '3': 2550*b6cee71dSXin LI case '4': 2551*b6cee71dSXin LI case '5': 2552*b6cee71dSXin LI case '6': 2553*b6cee71dSXin LI case '7': 2554*b6cee71dSXin LI val = c - '0'; 2555*b6cee71dSXin LI c = *s++; /* try for 2 */ 2556*b6cee71dSXin LI if (c >= '0' && c <= '7') { 2557*b6cee71dSXin LI val = (val << 3) | (c - '0'); 2558*b6cee71dSXin LI c = *s++; /* try for 3 */ 2559*b6cee71dSXin LI if (c >= '0' && c <= '7') 2560*b6cee71dSXin LI val = (val << 3) | (c-'0'); 2561*b6cee71dSXin LI else 2562*b6cee71dSXin LI --s; 2563*b6cee71dSXin LI } 2564*b6cee71dSXin LI else 2565*b6cee71dSXin LI --s; 2566*b6cee71dSXin LI *p++ = (char)val; 2567*b6cee71dSXin LI break; 2568*b6cee71dSXin LI 2569*b6cee71dSXin LI /* \x and up to 2 hex digits */ 2570*b6cee71dSXin LI case 'x': 2571*b6cee71dSXin LI val = 'x'; /* Default if no digits */ 2572*b6cee71dSXin LI c = hextoint(*s++); /* Get next char */ 2573*b6cee71dSXin LI if (c >= 0) { 2574*b6cee71dSXin LI val = c; 2575*b6cee71dSXin LI c = hextoint(*s++); 2576*b6cee71dSXin LI if (c >= 0) 2577*b6cee71dSXin LI val = (val << 4) + c; 2578*b6cee71dSXin LI else 2579*b6cee71dSXin LI --s; 2580*b6cee71dSXin LI } else 2581*b6cee71dSXin LI --s; 2582*b6cee71dSXin LI *p++ = (char)val; 2583*b6cee71dSXin LI break; 2584*b6cee71dSXin LI } 2585*b6cee71dSXin LI } else 2586*b6cee71dSXin LI *p++ = (char)c; 2587*b6cee71dSXin LI } 2588*b6cee71dSXin LI out: 2589*b6cee71dSXin LI *p = '\0'; 2590*b6cee71dSXin LI m->vallen = CAST(unsigned char, (p - origp)); 2591*b6cee71dSXin LI if (m->type == FILE_PSTRING) 2592*b6cee71dSXin LI m->vallen += (unsigned char)file_pstring_length_size(m); 2593*b6cee71dSXin LI return s; 2594*b6cee71dSXin LI } 2595*b6cee71dSXin LI 2596*b6cee71dSXin LI 2597*b6cee71dSXin LI /* Single hex char to int; -1 if not a hex char. */ 2598*b6cee71dSXin LI private int 2599*b6cee71dSXin LI hextoint(int c) 2600*b6cee71dSXin LI { 2601*b6cee71dSXin LI if (!isascii((unsigned char) c)) 2602*b6cee71dSXin LI return -1; 2603*b6cee71dSXin LI if (isdigit((unsigned char) c)) 2604*b6cee71dSXin LI return c - '0'; 2605*b6cee71dSXin LI if ((c >= 'a') && (c <= 'f')) 2606*b6cee71dSXin LI return c + 10 - 'a'; 2607*b6cee71dSXin LI if (( c>= 'A') && (c <= 'F')) 2608*b6cee71dSXin LI return c + 10 - 'A'; 2609*b6cee71dSXin LI return -1; 2610*b6cee71dSXin LI } 2611*b6cee71dSXin LI 2612*b6cee71dSXin LI 2613*b6cee71dSXin LI /* 2614*b6cee71dSXin LI * Print a string containing C character escapes. 2615*b6cee71dSXin LI */ 2616*b6cee71dSXin LI protected void 2617*b6cee71dSXin LI file_showstr(FILE *fp, const char *s, size_t len) 2618*b6cee71dSXin LI { 2619*b6cee71dSXin LI char c; 2620*b6cee71dSXin LI 2621*b6cee71dSXin LI for (;;) { 2622*b6cee71dSXin LI if (len == ~0U) { 2623*b6cee71dSXin LI c = *s++; 2624*b6cee71dSXin LI if (c == '\0') 2625*b6cee71dSXin LI break; 2626*b6cee71dSXin LI } 2627*b6cee71dSXin LI else { 2628*b6cee71dSXin LI if (len-- == 0) 2629*b6cee71dSXin LI break; 2630*b6cee71dSXin LI c = *s++; 2631*b6cee71dSXin LI } 2632*b6cee71dSXin LI if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2633*b6cee71dSXin LI (void) fputc(c, fp); 2634*b6cee71dSXin LI else { 2635*b6cee71dSXin LI (void) fputc('\\', fp); 2636*b6cee71dSXin LI switch (c) { 2637*b6cee71dSXin LI case '\a': 2638*b6cee71dSXin LI (void) fputc('a', fp); 2639*b6cee71dSXin LI break; 2640*b6cee71dSXin LI 2641*b6cee71dSXin LI case '\b': 2642*b6cee71dSXin LI (void) fputc('b', fp); 2643*b6cee71dSXin LI break; 2644*b6cee71dSXin LI 2645*b6cee71dSXin LI case '\f': 2646*b6cee71dSXin LI (void) fputc('f', fp); 2647*b6cee71dSXin LI break; 2648*b6cee71dSXin LI 2649*b6cee71dSXin LI case '\n': 2650*b6cee71dSXin LI (void) fputc('n', fp); 2651*b6cee71dSXin LI break; 2652*b6cee71dSXin LI 2653*b6cee71dSXin LI case '\r': 2654*b6cee71dSXin LI (void) fputc('r', fp); 2655*b6cee71dSXin LI break; 2656*b6cee71dSXin LI 2657*b6cee71dSXin LI case '\t': 2658*b6cee71dSXin LI (void) fputc('t', fp); 2659*b6cee71dSXin LI break; 2660*b6cee71dSXin LI 2661*b6cee71dSXin LI case '\v': 2662*b6cee71dSXin LI (void) fputc('v', fp); 2663*b6cee71dSXin LI break; 2664*b6cee71dSXin LI 2665*b6cee71dSXin LI default: 2666*b6cee71dSXin LI (void) fprintf(fp, "%.3o", c & 0377); 2667*b6cee71dSXin LI break; 2668*b6cee71dSXin LI } 2669*b6cee71dSXin LI } 2670*b6cee71dSXin LI } 2671*b6cee71dSXin LI } 2672*b6cee71dSXin LI 2673*b6cee71dSXin LI /* 2674*b6cee71dSXin LI * eatsize(): Eat the size spec from a number [eg. 10UL] 2675*b6cee71dSXin LI */ 2676*b6cee71dSXin LI private void 2677*b6cee71dSXin LI eatsize(const char **p) 2678*b6cee71dSXin LI { 2679*b6cee71dSXin LI const char *l = *p; 2680*b6cee71dSXin LI 2681*b6cee71dSXin LI if (LOWCASE(*l) == 'u') 2682*b6cee71dSXin LI l++; 2683*b6cee71dSXin LI 2684*b6cee71dSXin LI switch (LOWCASE(*l)) { 2685*b6cee71dSXin LI case 'l': /* long */ 2686*b6cee71dSXin LI case 's': /* short */ 2687*b6cee71dSXin LI case 'h': /* short */ 2688*b6cee71dSXin LI case 'b': /* char/byte */ 2689*b6cee71dSXin LI case 'c': /* char/byte */ 2690*b6cee71dSXin LI l++; 2691*b6cee71dSXin LI /*FALLTHROUGH*/ 2692*b6cee71dSXin LI default: 2693*b6cee71dSXin LI break; 2694*b6cee71dSXin LI } 2695*b6cee71dSXin LI 2696*b6cee71dSXin LI *p = l; 2697*b6cee71dSXin LI } 2698*b6cee71dSXin LI 2699*b6cee71dSXin LI /* 2700*b6cee71dSXin LI * handle a compiled file. 2701*b6cee71dSXin LI */ 2702*b6cee71dSXin LI 2703*b6cee71dSXin LI private struct magic_map * 2704*b6cee71dSXin LI apprentice_map(struct magic_set *ms, const char *fn) 2705*b6cee71dSXin LI { 2706*b6cee71dSXin LI int fd; 2707*b6cee71dSXin LI struct stat st; 2708*b6cee71dSXin LI uint32_t *ptr; 2709*b6cee71dSXin LI uint32_t version, entries, nentries; 2710*b6cee71dSXin LI int needsbyteswap; 2711*b6cee71dSXin LI char *dbname = NULL; 2712*b6cee71dSXin LI struct magic_map *map; 2713*b6cee71dSXin LI size_t i; 2714*b6cee71dSXin LI 2715*b6cee71dSXin LI fd = -1; 2716*b6cee71dSXin LI if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 2717*b6cee71dSXin LI file_oomem(ms, sizeof(*map)); 2718*b6cee71dSXin LI goto error; 2719*b6cee71dSXin LI } 2720*b6cee71dSXin LI 2721*b6cee71dSXin LI dbname = mkdbname(ms, fn, 0); 2722*b6cee71dSXin LI if (dbname == NULL) 2723*b6cee71dSXin LI goto error; 2724*b6cee71dSXin LI 2725*b6cee71dSXin LI if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 2726*b6cee71dSXin LI goto error; 2727*b6cee71dSXin LI 2728*b6cee71dSXin LI if (fstat(fd, &st) == -1) { 2729*b6cee71dSXin LI file_error(ms, errno, "cannot stat `%s'", dbname); 2730*b6cee71dSXin LI goto error; 2731*b6cee71dSXin LI } 2732*b6cee71dSXin LI if (st.st_size < 8 || st.st_size > MAXMAGIC_SIZE) { 2733*b6cee71dSXin LI file_error(ms, 0, "file `%s' is too %s", dbname, 2734*b6cee71dSXin LI st.st_size < 8 ? "small" : "large"); 2735*b6cee71dSXin LI goto error; 2736*b6cee71dSXin LI } 2737*b6cee71dSXin LI 2738*b6cee71dSXin LI map->len = (size_t)st.st_size; 2739*b6cee71dSXin LI #ifdef QUICK 2740*b6cee71dSXin LI if ((map->p = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 2741*b6cee71dSXin LI MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 2742*b6cee71dSXin LI file_error(ms, errno, "cannot map `%s'", dbname); 2743*b6cee71dSXin LI goto error; 2744*b6cee71dSXin LI } 2745*b6cee71dSXin LI #else 2746*b6cee71dSXin LI if ((map->p = CAST(void *, malloc(map->len))) == NULL) { 2747*b6cee71dSXin LI file_oomem(ms, map->len); 2748*b6cee71dSXin LI goto error; 2749*b6cee71dSXin LI } 2750*b6cee71dSXin LI if (read(fd, map->p, map->len) != (ssize_t)map->len) { 2751*b6cee71dSXin LI file_badread(ms); 2752*b6cee71dSXin LI goto error; 2753*b6cee71dSXin LI } 2754*b6cee71dSXin LI map->len = 0; 2755*b6cee71dSXin LI #define RET 1 2756*b6cee71dSXin LI #endif 2757*b6cee71dSXin LI (void)close(fd); 2758*b6cee71dSXin LI fd = -1; 2759*b6cee71dSXin LI ptr = CAST(uint32_t *, map->p); 2760*b6cee71dSXin LI if (*ptr != MAGICNO) { 2761*b6cee71dSXin LI if (swap4(*ptr) != MAGICNO) { 2762*b6cee71dSXin LI file_error(ms, 0, "bad magic in `%s'", dbname); 2763*b6cee71dSXin LI goto error; 2764*b6cee71dSXin LI } 2765*b6cee71dSXin LI needsbyteswap = 1; 2766*b6cee71dSXin LI } else 2767*b6cee71dSXin LI needsbyteswap = 0; 2768*b6cee71dSXin LI if (needsbyteswap) 2769*b6cee71dSXin LI version = swap4(ptr[1]); 2770*b6cee71dSXin LI else 2771*b6cee71dSXin LI version = ptr[1]; 2772*b6cee71dSXin LI if (version != VERSIONNO) { 2773*b6cee71dSXin LI file_error(ms, 0, "File %s supports only version %d magic " 2774*b6cee71dSXin LI "files. `%s' is version %d", VERSION, 2775*b6cee71dSXin LI VERSIONNO, dbname, version); 2776*b6cee71dSXin LI goto error; 2777*b6cee71dSXin LI } 2778*b6cee71dSXin LI entries = (uint32_t)(st.st_size / sizeof(struct magic)); 2779*b6cee71dSXin LI if ((off_t)(entries * sizeof(struct magic)) != st.st_size) { 2780*b6cee71dSXin LI file_error(ms, 0, "Size of `%s' %" INT64_T_FORMAT "u is not " 2781*b6cee71dSXin LI "a multiple of %" SIZE_T_FORMAT "u", 2782*b6cee71dSXin LI dbname, (unsigned long long)st.st_size, 2783*b6cee71dSXin LI sizeof(struct magic)); 2784*b6cee71dSXin LI goto error; 2785*b6cee71dSXin LI } 2786*b6cee71dSXin LI map->magic[0] = CAST(struct magic *, map->p) + 1; 2787*b6cee71dSXin LI nentries = 0; 2788*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) { 2789*b6cee71dSXin LI if (needsbyteswap) 2790*b6cee71dSXin LI map->nmagic[i] = swap4(ptr[i + 2]); 2791*b6cee71dSXin LI else 2792*b6cee71dSXin LI map->nmagic[i] = ptr[i + 2]; 2793*b6cee71dSXin LI if (i != MAGIC_SETS - 1) 2794*b6cee71dSXin LI map->magic[i + 1] = map->magic[i] + map->nmagic[i]; 2795*b6cee71dSXin LI nentries += map->nmagic[i]; 2796*b6cee71dSXin LI } 2797*b6cee71dSXin LI if (entries != nentries + 1) { 2798*b6cee71dSXin LI file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 2799*b6cee71dSXin LI dbname, entries, nentries + 1); 2800*b6cee71dSXin LI goto error; 2801*b6cee71dSXin LI } 2802*b6cee71dSXin LI if (needsbyteswap) 2803*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) 2804*b6cee71dSXin LI byteswap(map->magic[i], map->nmagic[i]); 2805*b6cee71dSXin LI free(dbname); 2806*b6cee71dSXin LI return map; 2807*b6cee71dSXin LI 2808*b6cee71dSXin LI error: 2809*b6cee71dSXin LI if (fd != -1) 2810*b6cee71dSXin LI (void)close(fd); 2811*b6cee71dSXin LI apprentice_unmap(map); 2812*b6cee71dSXin LI free(dbname); 2813*b6cee71dSXin LI return NULL; 2814*b6cee71dSXin LI } 2815*b6cee71dSXin LI 2816*b6cee71dSXin LI /* 2817*b6cee71dSXin LI * handle an mmaped file. 2818*b6cee71dSXin LI */ 2819*b6cee71dSXin LI private int 2820*b6cee71dSXin LI apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn) 2821*b6cee71dSXin LI { 2822*b6cee71dSXin LI static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS; 2823*b6cee71dSXin LI static const size_t m = sizeof(**map->magic); 2824*b6cee71dSXin LI int fd = -1; 2825*b6cee71dSXin LI size_t len; 2826*b6cee71dSXin LI char *dbname; 2827*b6cee71dSXin LI int rv = -1; 2828*b6cee71dSXin LI uint32_t i; 2829*b6cee71dSXin LI union { 2830*b6cee71dSXin LI struct magic m; 2831*b6cee71dSXin LI uint32_t h[2 + MAGIC_SETS]; 2832*b6cee71dSXin LI } hdr; 2833*b6cee71dSXin LI 2834*b6cee71dSXin LI dbname = mkdbname(ms, fn, 1); 2835*b6cee71dSXin LI 2836*b6cee71dSXin LI if (dbname == NULL) 2837*b6cee71dSXin LI goto out; 2838*b6cee71dSXin LI 2839*b6cee71dSXin LI if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 2840*b6cee71dSXin LI { 2841*b6cee71dSXin LI file_error(ms, errno, "cannot open `%s'", dbname); 2842*b6cee71dSXin LI goto out; 2843*b6cee71dSXin LI } 2844*b6cee71dSXin LI memset(&hdr, 0, sizeof(hdr)); 2845*b6cee71dSXin LI hdr.h[0] = MAGICNO; 2846*b6cee71dSXin LI hdr.h[1] = VERSIONNO; 2847*b6cee71dSXin LI memcpy(hdr.h + 2, map->nmagic, nm); 2848*b6cee71dSXin LI 2849*b6cee71dSXin LI if (write(fd, &hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) { 2850*b6cee71dSXin LI file_error(ms, errno, "error writing `%s'", dbname); 2851*b6cee71dSXin LI goto out; 2852*b6cee71dSXin LI } 2853*b6cee71dSXin LI 2854*b6cee71dSXin LI for (i = 0; i < MAGIC_SETS; i++) { 2855*b6cee71dSXin LI len = m * map->nmagic[i]; 2856*b6cee71dSXin LI if (write(fd, map->magic[i], len) != (ssize_t)len) { 2857*b6cee71dSXin LI file_error(ms, errno, "error writing `%s'", dbname); 2858*b6cee71dSXin LI goto out; 2859*b6cee71dSXin LI } 2860*b6cee71dSXin LI } 2861*b6cee71dSXin LI 2862*b6cee71dSXin LI if (fd != -1) 2863*b6cee71dSXin LI (void)close(fd); 2864*b6cee71dSXin LI rv = 0; 2865*b6cee71dSXin LI out: 2866*b6cee71dSXin LI free(dbname); 2867*b6cee71dSXin LI return rv; 2868*b6cee71dSXin LI } 2869*b6cee71dSXin LI 2870*b6cee71dSXin LI private const char ext[] = ".mgc"; 2871*b6cee71dSXin LI /* 2872*b6cee71dSXin LI * make a dbname 2873*b6cee71dSXin LI */ 2874*b6cee71dSXin LI private char * 2875*b6cee71dSXin LI mkdbname(struct magic_set *ms, const char *fn, int strip) 2876*b6cee71dSXin LI { 2877*b6cee71dSXin LI const char *p, *q; 2878*b6cee71dSXin LI char *buf; 2879*b6cee71dSXin LI 2880*b6cee71dSXin LI if (strip) { 2881*b6cee71dSXin LI if ((p = strrchr(fn, '/')) != NULL) 2882*b6cee71dSXin LI fn = ++p; 2883*b6cee71dSXin LI } 2884*b6cee71dSXin LI 2885*b6cee71dSXin LI for (q = fn; *q; q++) 2886*b6cee71dSXin LI continue; 2887*b6cee71dSXin LI /* Look for .mgc */ 2888*b6cee71dSXin LI for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 2889*b6cee71dSXin LI if (*p != *q) 2890*b6cee71dSXin LI break; 2891*b6cee71dSXin LI 2892*b6cee71dSXin LI /* Did not find .mgc, restore q */ 2893*b6cee71dSXin LI if (p >= ext) 2894*b6cee71dSXin LI while (*q) 2895*b6cee71dSXin LI q++; 2896*b6cee71dSXin LI 2897*b6cee71dSXin LI q++; 2898*b6cee71dSXin LI /* Compatibility with old code that looked in .mime */ 2899*b6cee71dSXin LI if (ms->flags & MAGIC_MIME) { 2900*b6cee71dSXin LI if (asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext) < 0) 2901*b6cee71dSXin LI return NULL; 2902*b6cee71dSXin LI if (access(buf, R_OK) != -1) { 2903*b6cee71dSXin LI ms->flags &= MAGIC_MIME_TYPE; 2904*b6cee71dSXin LI return buf; 2905*b6cee71dSXin LI } 2906*b6cee71dSXin LI free(buf); 2907*b6cee71dSXin LI } 2908*b6cee71dSXin LI if (asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext) < 0) 2909*b6cee71dSXin LI return NULL; 2910*b6cee71dSXin LI 2911*b6cee71dSXin LI /* Compatibility with old code that looked in .mime */ 2912*b6cee71dSXin LI if (strstr(p, ".mime") != NULL) 2913*b6cee71dSXin LI ms->flags &= MAGIC_MIME_TYPE; 2914*b6cee71dSXin LI return buf; 2915*b6cee71dSXin LI } 2916*b6cee71dSXin LI 2917*b6cee71dSXin LI /* 2918*b6cee71dSXin LI * Byteswap an mmap'ed file if needed 2919*b6cee71dSXin LI */ 2920*b6cee71dSXin LI private void 2921*b6cee71dSXin LI byteswap(struct magic *magic, uint32_t nmagic) 2922*b6cee71dSXin LI { 2923*b6cee71dSXin LI uint32_t i; 2924*b6cee71dSXin LI for (i = 0; i < nmagic; i++) 2925*b6cee71dSXin LI bs1(&magic[i]); 2926*b6cee71dSXin LI } 2927*b6cee71dSXin LI 2928*b6cee71dSXin LI /* 2929*b6cee71dSXin LI * swap a short 2930*b6cee71dSXin LI */ 2931*b6cee71dSXin LI private uint16_t 2932*b6cee71dSXin LI swap2(uint16_t sv) 2933*b6cee71dSXin LI { 2934*b6cee71dSXin LI uint16_t rv; 2935*b6cee71dSXin LI uint8_t *s = (uint8_t *)(void *)&sv; 2936*b6cee71dSXin LI uint8_t *d = (uint8_t *)(void *)&rv; 2937*b6cee71dSXin LI d[0] = s[1]; 2938*b6cee71dSXin LI d[1] = s[0]; 2939*b6cee71dSXin LI return rv; 2940*b6cee71dSXin LI } 2941*b6cee71dSXin LI 2942*b6cee71dSXin LI /* 2943*b6cee71dSXin LI * swap an int 2944*b6cee71dSXin LI */ 2945*b6cee71dSXin LI private uint32_t 2946*b6cee71dSXin LI swap4(uint32_t sv) 2947*b6cee71dSXin LI { 2948*b6cee71dSXin LI uint32_t rv; 2949*b6cee71dSXin LI uint8_t *s = (uint8_t *)(void *)&sv; 2950*b6cee71dSXin LI uint8_t *d = (uint8_t *)(void *)&rv; 2951*b6cee71dSXin LI d[0] = s[3]; 2952*b6cee71dSXin LI d[1] = s[2]; 2953*b6cee71dSXin LI d[2] = s[1]; 2954*b6cee71dSXin LI d[3] = s[0]; 2955*b6cee71dSXin LI return rv; 2956*b6cee71dSXin LI } 2957*b6cee71dSXin LI 2958*b6cee71dSXin LI /* 2959*b6cee71dSXin LI * swap a quad 2960*b6cee71dSXin LI */ 2961*b6cee71dSXin LI private uint64_t 2962*b6cee71dSXin LI swap8(uint64_t sv) 2963*b6cee71dSXin LI { 2964*b6cee71dSXin LI uint64_t rv; 2965*b6cee71dSXin LI uint8_t *s = (uint8_t *)(void *)&sv; 2966*b6cee71dSXin LI uint8_t *d = (uint8_t *)(void *)&rv; 2967*b6cee71dSXin LI #if 0 2968*b6cee71dSXin LI d[0] = s[3]; 2969*b6cee71dSXin LI d[1] = s[2]; 2970*b6cee71dSXin LI d[2] = s[1]; 2971*b6cee71dSXin LI d[3] = s[0]; 2972*b6cee71dSXin LI d[4] = s[7]; 2973*b6cee71dSXin LI d[5] = s[6]; 2974*b6cee71dSXin LI d[6] = s[5]; 2975*b6cee71dSXin LI d[7] = s[4]; 2976*b6cee71dSXin LI #else 2977*b6cee71dSXin LI d[0] = s[7]; 2978*b6cee71dSXin LI d[1] = s[6]; 2979*b6cee71dSXin LI d[2] = s[5]; 2980*b6cee71dSXin LI d[3] = s[4]; 2981*b6cee71dSXin LI d[4] = s[3]; 2982*b6cee71dSXin LI d[5] = s[2]; 2983*b6cee71dSXin LI d[6] = s[1]; 2984*b6cee71dSXin LI d[7] = s[0]; 2985*b6cee71dSXin LI #endif 2986*b6cee71dSXin LI return rv; 2987*b6cee71dSXin LI } 2988*b6cee71dSXin LI 2989*b6cee71dSXin LI /* 2990*b6cee71dSXin LI * byteswap a single magic entry 2991*b6cee71dSXin LI */ 2992*b6cee71dSXin LI private void 2993*b6cee71dSXin LI bs1(struct magic *m) 2994*b6cee71dSXin LI { 2995*b6cee71dSXin LI m->cont_level = swap2(m->cont_level); 2996*b6cee71dSXin LI m->offset = swap4((uint32_t)m->offset); 2997*b6cee71dSXin LI m->in_offset = swap4((uint32_t)m->in_offset); 2998*b6cee71dSXin LI m->lineno = swap4((uint32_t)m->lineno); 2999*b6cee71dSXin LI if (IS_STRING(m->type)) { 3000*b6cee71dSXin LI m->str_range = swap4(m->str_range); 3001*b6cee71dSXin LI m->str_flags = swap4(m->str_flags); 3002*b6cee71dSXin LI } 3003*b6cee71dSXin LI else { 3004*b6cee71dSXin LI m->value.q = swap8(m->value.q); 3005*b6cee71dSXin LI m->num_mask = swap8(m->num_mask); 3006*b6cee71dSXin LI } 3007*b6cee71dSXin LI } 3008*b6cee71dSXin LI 3009*b6cee71dSXin LI protected size_t 3010*b6cee71dSXin LI file_pstring_length_size(const struct magic *m) 3011*b6cee71dSXin LI { 3012*b6cee71dSXin LI switch (m->str_flags & PSTRING_LEN) { 3013*b6cee71dSXin LI case PSTRING_1_LE: 3014*b6cee71dSXin LI return 1; 3015*b6cee71dSXin LI case PSTRING_2_LE: 3016*b6cee71dSXin LI case PSTRING_2_BE: 3017*b6cee71dSXin LI return 2; 3018*b6cee71dSXin LI case PSTRING_4_LE: 3019*b6cee71dSXin LI case PSTRING_4_BE: 3020*b6cee71dSXin LI return 4; 3021*b6cee71dSXin LI default: 3022*b6cee71dSXin LI abort(); /* Impossible */ 3023*b6cee71dSXin LI return 1; 3024*b6cee71dSXin LI } 3025*b6cee71dSXin LI } 3026*b6cee71dSXin LI protected size_t 3027*b6cee71dSXin LI file_pstring_get_length(const struct magic *m, const char *s) 3028*b6cee71dSXin LI { 3029*b6cee71dSXin LI size_t len = 0; 3030*b6cee71dSXin LI 3031*b6cee71dSXin LI switch (m->str_flags & PSTRING_LEN) { 3032*b6cee71dSXin LI case PSTRING_1_LE: 3033*b6cee71dSXin LI len = *s; 3034*b6cee71dSXin LI break; 3035*b6cee71dSXin LI case PSTRING_2_LE: 3036*b6cee71dSXin LI len = (s[1] << 8) | s[0]; 3037*b6cee71dSXin LI break; 3038*b6cee71dSXin LI case PSTRING_2_BE: 3039*b6cee71dSXin LI len = (s[0] << 8) | s[1]; 3040*b6cee71dSXin LI break; 3041*b6cee71dSXin LI case PSTRING_4_LE: 3042*b6cee71dSXin LI len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; 3043*b6cee71dSXin LI break; 3044*b6cee71dSXin LI case PSTRING_4_BE: 3045*b6cee71dSXin LI len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; 3046*b6cee71dSXin LI break; 3047*b6cee71dSXin LI default: 3048*b6cee71dSXin LI abort(); /* Impossible */ 3049*b6cee71dSXin LI } 3050*b6cee71dSXin LI 3051*b6cee71dSXin LI if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) 3052*b6cee71dSXin LI len -= file_pstring_length_size(m); 3053*b6cee71dSXin LI 3054*b6cee71dSXin LI return len; 3055*b6cee71dSXin LI } 3056*b6cee71dSXin LI 3057*b6cee71dSXin LI protected int 3058*b6cee71dSXin LI file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 3059*b6cee71dSXin LI { 3060*b6cee71dSXin LI uint32_t i, j; 3061*b6cee71dSXin LI struct mlist *mlist, *ml; 3062*b6cee71dSXin LI 3063*b6cee71dSXin LI mlist = ms->mlist[1]; 3064*b6cee71dSXin LI 3065*b6cee71dSXin LI for (ml = mlist->next; ml != mlist; ml = ml->next) { 3066*b6cee71dSXin LI struct magic *ma = ml->magic; 3067*b6cee71dSXin LI uint32_t nma = ml->nmagic; 3068*b6cee71dSXin LI for (i = 0; i < nma; i++) { 3069*b6cee71dSXin LI if (ma[i].type != FILE_NAME) 3070*b6cee71dSXin LI continue; 3071*b6cee71dSXin LI if (strcmp(ma[i].value.s, name) == 0) { 3072*b6cee71dSXin LI v->magic = &ma[i]; 3073*b6cee71dSXin LI for (j = i + 1; j < nma; j++) 3074*b6cee71dSXin LI if (ma[j].cont_level == 0) 3075*b6cee71dSXin LI break; 3076*b6cee71dSXin LI v->nmagic = j - i; 3077*b6cee71dSXin LI return 0; 3078*b6cee71dSXin LI } 3079*b6cee71dSXin LI } 3080*b6cee71dSXin LI } 3081*b6cee71dSXin LI return -1; 3082*b6cee71dSXin LI } 3083