xref: /freebsd/contrib/file/src/BNF (revision 43a5ec4eb41567cc92586503212743d89686d78f)
1*43a5ec4eSXin LIThis is a first attempt to document the grammar used by magic(5), with
2*43a5ec4eSXin LIhopes of eventually incorporating something like this into the manpage.
3*43a5ec4eSXin LI
4*43a5ec4eSXin LINote: Currently, the parser varies slightly from this, but only in
5*43a5ec4eSXin LIvery minor ways, e.g., the strflags maybe separated by '/' characters
6*43a5ec4eSXin LIand at most one strcount is allowed; likewise for regflags.
7*43a5ec4eSXin LI
8*43a5ec4eSXin LI------------------------------------------------------------------------
9*43a5ec4eSXin LImagic = 1*query
10*43a5ec4eSXin LI
11*43a5ec4eSXin LIquery = line *( 1*level line )
12*43a5ec4eSXin LI
13*43a5ec4eSXin LIlevel = ">"		;; Increment the level by 1.
14*43a5ec4eSXin LI			;; The first line of a query is at level 0.
15*43a5ec4eSXin LI
16*43a5ec4eSXin LIline = offset HWS type HWS test HWS message EOL
17*43a5ec4eSXin LI
18*43a5ec4eSXin LI------------------------------------------------------------------------
19*43a5ec4eSXin LIoffset = absoffset | reloffset | indoffset
20*43a5ec4eSXin LI			;; The offset in the file at which to apply
21*43a5ec4eSXin LI			;; the <test>.
22*43a5ec4eSXin LI
23*43a5ec4eSXin LIabsoffset = NUMBER	;; An absolute offset from the start of the file.
24*43a5ec4eSXin LI
25*43a5ec4eSXin LIreloffset = "&" NUMBER	;; The offset relative to the last match offset
26*43a5ec4eSXin LI			;; at one level up.
27*43a5ec4eSXin LI			;; Not allowed at level == 0.
28*43a5ec4eSXin LI
29*43a5ec4eSXin LIindoffset = indoff | relindoff
30*43a5ec4eSXin LI
31*43a5ec4eSXin LIindoff = "(" offset1 [ "." size ] [ op disp ] ")"
32*43a5ec4eSXin LI			;; Read the file at <offset1> of width <size>.
33*43a5ec4eSXin LI			;; If size is not specified, assume a long.
34*43a5ec4eSXin LI			;; If <op> is given, then preform that
35*43a5ec4eSXin LI			;; operation on the result and the <disp>.
36*43a5ec4eSXin LI
37*43a5ec4eSXin LIoffset1 = absoffset | reloffset
38*43a5ec4eSXin LI
39*43a5ec4eSXin LIsize = byte | leshort | beshort | lelong | belong | melong
40*43a5ec4eSXin LI
41*43a5ec4eSXin LIbyte = "B" | "b" | "C" | "c"	;; A one-byte value.
42*43a5ec4eSXin LIleshort = "s" | "h"		;; A two-byte little-endian value.
43*43a5ec4eSXin LIbeshort = "S" | "H"		;; A two-byte big-endian value.
44*43a5ec4eSXin LIlelong = "l"			;; A four-byte little-endian value.
45*43a5ec4eSXin LIbelong = "L"			;; A four-byte big-endian value.
46*43a5ec4eSXin LImelong = "m"			;; A four-byte middle-endian value.
47*43a5ec4eSXin LI
48*43a5ec4eSXin LIop = [ invert ] ( "+" | "-" | "*" | "/" | "%" | "&" | "|" | "^" )
49*43a5ec4eSXin LI
50*43a5ec4eSXin LIinvert = "~"		;; Flip the bits on result of the <op>.
51*43a5ec4eSXin LI
52*43a5ec4eSXin LIdisp =  NUMBER | memvalue
53*43a5ec4eSXin LI
54*43a5ec4eSXin LImemvalue = "(" NUMBER ")"
55*43a5ec4eSXin LI			;; NUMBER is interpreted as an absolute or
56*43a5ec4eSXin LI			;; relative offset matching that of <offset1>.
57*43a5ec4eSXin LI			;; Read the file at the resulting offset with
58*43a5ec4eSXin LI			;; the same size as <offset1>
59*43a5ec4eSXin LI
60*43a5ec4eSXin LIrelindoff = "&" indoff	;; add <indoff> to the last match offset at
61*43a5ec4eSXin LI			;; one level up.
62*43a5ec4eSXin LI
63*43a5ec4eSXin LI------------------------------------------------------------------------
64*43a5ec4eSXin LItype = [ unsigned ] ( numeric | strtype | default )
65*43a5ec4eSXin LI
66*43a5ec4eSXin LIunsigned = "u"		;; The value is unsigned.
67*43a5ec4eSXin LI			;; This affects the sign extension of numeric
68*43a5ec4eSXin LI			;; types and the '<' and '>' compares.  It is
69*43a5ec4eSXin LI			;; intended for numeric types, but allowed on
70*43a5ec4eSXin LI			;; all types.
71*43a5ec4eSXin LI
72*43a5ec4eSXin LInumeric = ( numtype | datatype ) [ nummask ]
73*43a5ec4eSXin LI
74*43a5ec4eSXin LInumtype = byte | short | long | quad
75*43a5ec4eSXin LI
76*43a5ec4eSXin LIbyte = "byte"
77*43a5ec4eSXin LIshort = "short" | "beshort" | "leshort"
78*43a5ec4eSXin LIlong = "long" | "lelong" | "belong" | "melong"
79*43a5ec4eSXin LIquad = "quad" | "lequad" | "bequad"
80*43a5ec4eSXin LI
81*43a5ec4eSXin LIdatetype = udate32 | ldate32 | udate64 | ldate64
82*43a5ec4eSXin LI
83*43a5ec4eSXin LIudate32 = "date" | "bedate" | "ledate" | "medate"	;; UTC dates
84*43a5ec4eSXin LIldate32 = "ldate" | "beldate" | "leldate" | "meldate"	;; local dates
85*43a5ec4eSXin LIudate64 = "qdate" | "leqdate" | "beqdate"		;; UTC dates
86*43a5ec4eSXin LIldate64 = "qldate" | "leqldate" | "beqldate"		;; local dates
87*43a5ec4eSXin LI
88*43a5ec4eSXin LInummask = op NUMBER
89*43a5ec4eSXin LI
90*43a5ec4eSXin LIstrtype = regex | search | string8 | string16
91*43a5ec4eSXin LI
92*43a5ec4eSXin LIregex = "regex" [ "/" 1*regflag ]
93*43a5ec4eSXin LI
94*43a5ec4eSXin LIregflag = "c" | "s" | linecnt
95*43a5ec4eSXin LI
96*43a5ec4eSXin LIlinecnt = NUMBER	;; The number of lines to search.  If this
97*43a5ec4eSXin LI			;; is missing or zero, the rest of the
98*43a5ec4eSXin LI			;; file is searched.
99*43a5ec4eSXin LI
100*43a5ec4eSXin LIsearch = "string" [ "/" 1*srchflag ]
101*43a5ec4eSXin LI
102*43a5ec4eSXin LIsrchflag = strflag | srchcnt
103*43a5ec4eSXin LI
104*43a5ec4eSXin LIsrchcnt = NUMBER	;; The number of search tries.  If this
105*43a5ec4eSXin LI			;; is missing or zero, the rest of the
106*43a5ec4eSXin LI			;; file is searched.
107*43a5ec4eSXin LI
108*43a5ec4eSXin LIstring8 = ( "string" | "pstring" ) [ "/" 1*strflag ]
109*43a5ec4eSXin LI
110*43a5ec4eSXin LIstrflag = "b" | "B" | "c" | "C"
111*43a5ec4eSXin LI
112*43a5ec4eSXin LIstring16 = "bestring16" | "lestring16"
113*43a5ec4eSXin LI
114*43a5ec4eSXin LIdefault = "default"	;; This is intended to be used with the
115*43a5ec4eSXin LI			;; <truetest> ("x" below).  It is matched if
116*43a5ec4eSXin LI			;; there has been no previous match at its
117*43a5ec4eSXin LI			;; level or none since the last default at
118*43a5ec4eSXin LI			;; that level.  It is useful for implementing
119*43a5ec4eSXin LI			;; switch-like and if/else constructions.
120*43a5ec4eSXin LI
121*43a5ec4eSXin LI------------------------------------------------------------------------
122*43a5ec4eSXin LItest = numtest | strtest | truetest
123*43a5ec4eSXin LI				;; Test to preform on <type> read from file.
124*43a5ec4eSXin LI
125*43a5ec4eSXin LInumtest = [ compare ] NUMBER	;; If compare is missing, "=" is assumed.
126*43a5ec4eSXin LI
127*43a5ec4eSXin LIstrtest = [ compare ] STRING	;; If compare is missing, "=" is assumed.
128*43a5ec4eSXin LI				;; Note: If the STRING begins with a <compare>
129*43a5ec4eSXin LI				;; character, the <compare> field cannot be
130*43a5ec4eSXin LI				;; omitted.
131*43a5ec4eSXin LI
132*43a5ec4eSXin LIcompare = "=" | "!" | "<" | ">" | "&" | "^"
133*43a5ec4eSXin LI
134*43a5ec4eSXin LItruetest = "x"		;; This always returns true.
135*43a5ec4eSXin LI			;; To test for the string "x" use "=x".
136*43a5ec4eSXin LI
137*43a5ec4eSXin LI------------------------------------------------------------------------
138*43a5ec4eSXin LImessage = [ nospflag ] ( STRING | FMT_STRING )
139*43a5ec4eSXin LI			;; Message to print if test result is true.
140*43a5ec4eSXin LI
141*43a5ec4eSXin LInospflag = %x08 | "\\b"	;; Do not insert a space before the message.
142*43a5ec4eSXin LI			;; By default, messages are separated by a " ".
143*43a5ec4eSXin LI
144*43a5ec4eSXin LI------------------------------------------------------------------------
145*43a5ec4eSXin LIHWS = <horizontal white space>
146*43a5ec4eSXin LIEOL = <end of line marker>
147*43a5ec4eSXin LINUMBER = <C-style unsigned number>
148*43a5ec4eSXin LISTRING = <C-style string without delimiting quotes>
149*43a5ec4eSXin LIFMTSTR = <printf format string with exactly one % construct>
150*43a5ec4eSXin LI
151*43a5ec4eSXin LI------------------------------------------------------------------------
152