xref: /freebsd/contrib/file/magic/Magdir/ispell (revision 0e8011faf58b743cc652e3b2ad0f7671227610df)
1
2#------------------------------------------------------------------------------
3# $File: ispell,v 1.10 2023/10/23 19:49:58 christos Exp $
4# ispell:  file(1) magic for ispell, MySpell, Hunspell and aspell
5#
6# Ispell 3.0 has a magic of 0x9601 and ispell 3.1 has 0x9602.  This magic
7# will match 0x9600 through 0x9603 in *both* little endian and big endian.
8# (No other current magic entries collide.)
9#
10# Updated by Daniel Quinlan (quinlan@yggdrasil.com)
11#
120	leshort&0xFFFC	0x9600		little endian ispell
13>0	byte		0		hash file (?),
14>0	byte		1		3.0 hash file,
15>0	byte		2		3.1 hash file,
16>0	byte		3		hash file (?),
17>2	leshort		0x00		8-bit, no capitalization, 26 flags
18>2	leshort		0x01		7-bit, no capitalization, 26 flags
19>2	leshort		0x02		8-bit, capitalization, 26 flags
20>2	leshort		0x03		7-bit, capitalization, 26 flags
21>2	leshort		0x04		8-bit, no capitalization, 52 flags
22>2	leshort		0x05		7-bit, no capitalization, 52 flags
23>2	leshort		0x06		8-bit, capitalization, 52 flags
24>2	leshort		0x07		7-bit, capitalization, 52 flags
25>2	leshort		0x08		8-bit, no capitalization, 128 flags
26>2	leshort		0x09		7-bit, no capitalization, 128 flags
27>2	leshort		0x0A		8-bit, capitalization, 128 flags
28>2	leshort		0x0B		7-bit, capitalization, 128 flags
29>2	leshort		0x0C		8-bit, no capitalization, 256 flags
30>2	leshort		0x0D		7-bit, no capitalization, 256 flags
31>2	leshort		0x0E		8-bit, capitalization, 256 flags
32>2	leshort		0x0F		7-bit, capitalization, 256 flags
33>4	leshort		>0		and %d string characters
340	beshort&0xFFFC	0x9600		big endian ispell
35>1	byte		0		hash file (?),
36>1	byte		1		3.0 hash file,
37>1	byte		2		3.1 hash file,
38>1	byte		3		hash file (?),
39>2	beshort		0x00		8-bit, no capitalization, 26 flags
40>2	beshort		0x01		7-bit, no capitalization, 26 flags
41>2	beshort		0x02		8-bit, capitalization, 26 flags
42>2	beshort		0x03		7-bit, capitalization, 26 flags
43>2	beshort		0x04		8-bit, no capitalization, 52 flags
44>2	beshort		0x05		7-bit, no capitalization, 52 flags
45>2	beshort		0x06		8-bit, capitalization, 52 flags
46>2	beshort		0x07		7-bit, capitalization, 52 flags
47>2	beshort		0x08		8-bit, no capitalization, 128 flags
48>2	beshort		0x09		7-bit, no capitalization, 128 flags
49>2	beshort		0x0A		8-bit, capitalization, 128 flags
50>2	beshort		0x0B		7-bit, capitalization, 128 flags
51>2	beshort		0x0C		8-bit, no capitalization, 256 flags
52>2	beshort		0x0D		7-bit, no capitalization, 256 flags
53>2	beshort		0x0E		8-bit, capitalization, 256 flags
54>2	beshort		0x0F		7-bit, capitalization, 256 flags
55>4	beshort		>0		and %d string characters
56# ispell 4.0 hash files  kromJx <kromJx@crosswinds.net>
57# Ispell 4.0
580       string          ISPL            ispell
59>4      long            x               hash file version %d,
60>8      long            x               lexletters %d,
61>12     long            x               lexsize %d,
62>16     long            x               hashsize %d,
63>20     long            x               stblsize %d
64
65# Summary:	affixes defition text files for Ispell/MySpell/Hunspell
66# From:		Joerg Jenderek
67# URL:		https://www.openoffice.org/lingucomponent/affix.readme
68#		https://man.archlinux.org/man/hunspell.5.en
69# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/affix.trid.xml
70# Note:		called "Affix file" by TrID
71# variant starting with comment character
720		ubyte		0x23
73# look for SET character command followed by whitespace (seems to be often 1 space character) like in:
74# /usr/share/calibre/dictionaries/en-GB/en-GB.aff
75>0		search/60459	SET\040
76# skip scripts like /bin/affixcompress /bin/setupcon /bin/imdbpy2sql.py by checking for valid character SET argument
77# character SET argument like: UTF-8
78>>&0		string		UTF-8
79>>>0		use					spell-aff
80# character SET argument like: ISO8859-1 - ISO8859-10 ISO8859-13 - ISO8859-15
81>>&0		string		ISO8859-
82>>>0		use				spell-aff
83# character SET argument for Russian with Cyrillic alphabet like: KOI8-R KOI8-U
84# no russian support until war against ukraine
85>>&0		string		KOI8-
86#>>>0		use				spell-aff
87# character SET argument for languages with Cyrillic alphabet like: cp1251
88# no cyrillic support until russia war against ukraine
89>>&0		string		cp1251
90#>>>0		use				spell-aff
91# character SET argument for Indian Script Code for Information Interchange (ISCII) like: ISCII-DEVANAGARI
92>>&0		string		ISCII-
93# no example found
94>>>0		use				spell-aff
95# not "real" affix rule files but found as tests unit inside thunderbird sources like:
96# 1463589.aff 1695964.aff 2970240.aff
97>0		default		x
98# look for suffix SFX command followed by whitespace like in:
99# 1695964.aff
100>>0		search/164	SFX\040
101>>>0		use				spell-aff
102# if not real Hunspell/MySpell affix look for ispell variant
103>>0		default		x
104# URL:		https://manpages.debian.org/testing/ispell/ispell.5.en.html
105# look for ispell declaration like in: /usr/lib/ispell/espanol.aff
106>>>0		search/8251	defstringtype
107# defstringtype declaration start with unique name (like "list" "lat" "utf8" "iso" "nroff" often like formatter name)
108# followed by formatter name (like "nroff" "tex")
109# followed by suffix list (like ".mm" ".ms" ".me" ".man" ".NeXT" ".txt" ".list")
110#>>>>&1		string		x		DECLARATION=%s
111>>>>0		use				spell-aff
112# ispell variant without declaration like in: /usr/lib/ispell/bulgarian.aff /usr/lib/ispell/russian.aff
113>>>0		default		x
114# skip /etc/nilfs_cleanerd.conf by looking for ispell suffix section
115>>>>0		search/3233	suffixes\n
116>>>>>0		use				spell-aff
117# variant starting with empty line and comment character at the beginning of 2nd line like in: /usr/lib/ispell/polish.aff
1180		ubeshort	0x0a23
119# skip /etc/discover-modprobe.conf by looking for ispell declaration
120>2		search/3118	defstringtype
121>>0		use				spell-aff
122# starting with UTF-8 Byte Order Mark (BOM) https://en.wikipedia.org/wiki/Byte_order_mark
1230		string		\xEF\xBB\xBF
124# starting with UTF-8 Byte Order Mark (BOM) followed by comment starting character
125>3		string		\x23
126# starting with UTF-8 BOM and with SET character command followed by whitespace
127# like in: /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/lt.aff
128# look for character SET command used in MySpell and Hunspell
129>3		search/9883	SET\040
130>>0		use				spell-aff
131# look for FLAG type command used in MySpell and Hunspell
1320		string		FLAG
133# followed by space character like in
134# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/en_US.aff
135>4		ubyte		0x20
136>>0		use				spell-aff
137# or followed by tabulator character like in
138# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/ar.aff
139>4		ubyte		0x09
140>>0		use				spell-aff
141# starting with character SET command used in MySpell and Hunspell like in: org/languagetool/resource/sv/hunspell/sv_SE.aff
1420		string		SET\040
143>0		use				spell-aff
144# starting with language code LANG used in MySpell and Hunspell like in: /usr/share/hunspell/tr_TR.aff
1450		string		LANG\040
146>0		use				spell-aff
147# starting with affix flag command AF used in MySpell and Hunspell like in: /usr/lib/thunderbird/extensions/langpack-hu@thunderbird.mozilla.org/dictionaries/hu.aff
1480		string		AF\040
149# look for number of flag vector aliases
150>3		regex		[0-9]{1,4}
151>>0		use				spell-aff
152#	display information (encoding,language,...) about affixes rules text for Ispell/MySpell/Hunspell
1530		name				spell-aff
154>1		ubeshort	x		affix definition
155#!:mime		text/plain
156!:mime		text/x-affix
157!:ext		aff
158# GRR: need extra test so that default clause works
159>0		ubyte		x
160# look for ispell declaration
161>>0		search/8251	defstringtype	for Ispell
162# ispell variant without declaration
163>>0		default		x
164# look for ispell suffixes command
165>>>0		search/3233	suffixes
166# skip "suffixes used to create first part of a compound" by checking for flag argument like in: languagetool\resource\sv\hunspell\sv_SE.aff
167>>>>&0		search/2	flag		for Ispell
168>>>>&0		default		x		for MySpell/Hunspell
169# without suffixes keyword
170>>>0		default		x		for MySpell/Hunspell
171# look for language code command used in MySpell and Hunspell
172# like in: /usr/share/hunspell/de_AT.aff /usr/share/hunspell/it_IT.aff /usr/share/hunspell/tr_TR.aff /usr/lib/firefox/browser/extensions/langpack-hu@firefox.mozilla.org/dictionaries/hu.aff
173>>0		search/1117643	LANG\040	\b, language
174# language code argument like: de_DE hu_HU it_IT mn_MN tr_TR
175>>>&0		string		x		%s
176# look for character SET command used in MySpell and Hunspell
177>>0		search/1117729	SET
178# skip SETTINGS like in /usr/lib/ispell/ngerman.aff
179# SET command followed often by space character (0x20) or tabulator (0x09) like in
180# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/ar.aff
181>>>&0	ubyte&0xD6	=0x00
182# skip SSET	#     schosS in /usr/lib/ispell/ogerman.aff
183>>>>&0		ubyte		>0x48		\b,
184# character SET argument like: cp1251 ISCII-DEVANAGAR ISO8859-1 - ISO8859-10 ISO8859-13 - ISO8859-15 KOI8-R KOI8-U UTF-8
185>>>>>&-1	string	x			"%s" encoded
186# for control reasons show first non empty lines for ASCII or ISO-8859 text variant
187>1		ubeshort	!0xBBBF
188# 1st line starting with 0x0A like in /usr/src/dicts/sjp-ispell-pl-20140213/polish.aff
189>>0		ubyte		=0x0A
190>>>1		ubyte		!0x0A		\b, 2nd line
191>>>>&-1		string		x		"%s"
192# 3rd line starting with 0x0A like in polish.aff
193>>>>>&1		ubyte		=0x0A
194>>>>>>&0	string		x		\b, 4th line "%s"
195# 1st line starting with ASCII text like:
196# this is the affix file of the de_DE Hunspell dictionary
197>>0		ubyte		!0x0A
198>>>0		string		x		\b, 1st line "%s"
199>>>>&1		ubyte		>0x1F		\b, 2nd line
200>>>>>&-1	string		x		"%s"
201# 2nd line starting with 0x0A like in /usr/lib/ispell/bulgarian.aff
202>>>>&1		ubyte		=0x0A		\b, 3rd line
203>>>>>&0		string		x		"%s"
204# for control reasons show first lines for variant starting with ByteOrderMark (BOM=\xEF\xBB\xBF)
205>1		ubeshort	=0xBBBF	   	\b, with BOM
206>>3		string		x		\b, 1st line "%s"
207>>>&1		ubyte		>0x1F		\b, 2nd line
208>>>>&-1		string		x		"%s"
209
210# From:		Joerg Jenderek
211# URL:		https://en.wikipedia.org/wiki/GNU_Aspell
212#		https://manpages.ubuntu.com/manpages/trusty/en/man8/aspell-autobuildhash.8.html
213# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/r/rws-aspell.trid.xml
214#		https://ftp.gnu.org/gnu/aspell/aspell-0.60.8.tar.gz
215#		aspell-0.60.8/modules/speller/default/data.cpp
216#		aspell-0.60.8/modules/speller/default/readonly_ws.cpp
217# Note:		called "aspell dictionary" by TrID
2180	string	aspell\040default\040speller\040rowl	aspell dictionary
219#!:mime	application/octet-stream
220!:mime	application/x-aspell-dictionary
221!:ext	rws
222# version like: 1.10 1.4
223>28	string	x					\b, version %s
224# u32int endian_check; 12345678=00BC614Eh
225#>64	ulelong	x					\b, endian_check=%u
226>>64	ulelong	12345678				\b, little endian
227# not tested
228>>64	ubelong	12345678				\b, big endian
229# older aspell version not like 0.60.8
230>>64	default	x					\b, old
231# URL:		https://en.wikipedia.org/wiki/GNU_Aspell
232# Reference	http://aspell.net/man-html/Format-of-the-Personal-and-Replacement-Dictionaries.html
233# personal_ws-1.1 lang num [encoding]
2340	string	personal_				aspell personal
235# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/p/pws-aspell.trid.xml
236# Note:		called "aspell Personal dictionary" by TrID
237>9	string	ws-					dictionary
238#!:mime	text/plain
239!:mime	text/x-aspell-dictionary
240# like: ~/.aspell.en.pws ~/.aspell.de_DE.pws ~/.aspell.it.pws
241!:ext	pws
242# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/p/prepl-aspell.trid.xml
243# Note:		called "aspell Personal Replacement dictionary" by TrID
244# personal_repl-1.1 lang num [encoding]
245>9	string	repl-					replacement dictionary
246#!:mime	text/plain
247!:mime	text/x-aspell-dictionary
248# like: ~/.aspell.en.prepl ~/.aspell.de_DE.prepl ~/.aspell.it.prepl
249!:ext	prepl
250