xref: /freebsd/contrib/file/magic/Magdir/ispell (revision ae316d1d1cffd71ab7751f94e10118777a88e027)
1b6cee71dSXin LI
2b6cee71dSXin LI#------------------------------------------------------------------------------
3*ae316d1dSXin LI# $File: ispell,v 1.10 2023/10/23 19:49:58 christos Exp $
4*ae316d1dSXin LI# ispell:  file(1) magic for ispell, MySpell, Hunspell and aspell
5b6cee71dSXin LI#
6b6cee71dSXin LI# Ispell 3.0 has a magic of 0x9601 and ispell 3.1 has 0x9602.  This magic
7b6cee71dSXin LI# will match 0x9600 through 0x9603 in *both* little endian and big endian.
8b6cee71dSXin LI# (No other current magic entries collide.)
9b6cee71dSXin LI#
10b6cee71dSXin LI# Updated by Daniel Quinlan (quinlan@yggdrasil.com)
11b6cee71dSXin LI#
12b6cee71dSXin LI0	leshort&0xFFFC	0x9600		little endian ispell
13b6cee71dSXin LI>0	byte		0		hash file (?),
14b6cee71dSXin LI>0	byte		1		3.0 hash file,
15b6cee71dSXin LI>0	byte		2		3.1 hash file,
16b6cee71dSXin LI>0	byte		3		hash file (?),
17b6cee71dSXin LI>2	leshort		0x00		8-bit, no capitalization, 26 flags
18b6cee71dSXin LI>2	leshort		0x01		7-bit, no capitalization, 26 flags
19b6cee71dSXin LI>2	leshort		0x02		8-bit, capitalization, 26 flags
20b6cee71dSXin LI>2	leshort		0x03		7-bit, capitalization, 26 flags
21b6cee71dSXin LI>2	leshort		0x04		8-bit, no capitalization, 52 flags
22b6cee71dSXin LI>2	leshort		0x05		7-bit, no capitalization, 52 flags
23b6cee71dSXin LI>2	leshort		0x06		8-bit, capitalization, 52 flags
24b6cee71dSXin LI>2	leshort		0x07		7-bit, capitalization, 52 flags
25b6cee71dSXin LI>2	leshort		0x08		8-bit, no capitalization, 128 flags
26b6cee71dSXin LI>2	leshort		0x09		7-bit, no capitalization, 128 flags
27b6cee71dSXin LI>2	leshort		0x0A		8-bit, capitalization, 128 flags
28b6cee71dSXin LI>2	leshort		0x0B		7-bit, capitalization, 128 flags
29b6cee71dSXin LI>2	leshort		0x0C		8-bit, no capitalization, 256 flags
30b6cee71dSXin LI>2	leshort		0x0D		7-bit, no capitalization, 256 flags
31b6cee71dSXin LI>2	leshort		0x0E		8-bit, capitalization, 256 flags
32b6cee71dSXin LI>2	leshort		0x0F		7-bit, capitalization, 256 flags
33b6cee71dSXin LI>4	leshort		>0		and %d string characters
34b6cee71dSXin LI0	beshort&0xFFFC	0x9600		big endian ispell
35b6cee71dSXin LI>1	byte		0		hash file (?),
36b6cee71dSXin LI>1	byte		1		3.0 hash file,
37b6cee71dSXin LI>1	byte		2		3.1 hash file,
38b6cee71dSXin LI>1	byte		3		hash file (?),
39b6cee71dSXin LI>2	beshort		0x00		8-bit, no capitalization, 26 flags
40b6cee71dSXin LI>2	beshort		0x01		7-bit, no capitalization, 26 flags
41b6cee71dSXin LI>2	beshort		0x02		8-bit, capitalization, 26 flags
42b6cee71dSXin LI>2	beshort		0x03		7-bit, capitalization, 26 flags
43b6cee71dSXin LI>2	beshort		0x04		8-bit, no capitalization, 52 flags
44b6cee71dSXin LI>2	beshort		0x05		7-bit, no capitalization, 52 flags
45b6cee71dSXin LI>2	beshort		0x06		8-bit, capitalization, 52 flags
46b6cee71dSXin LI>2	beshort		0x07		7-bit, capitalization, 52 flags
47b6cee71dSXin LI>2	beshort		0x08		8-bit, no capitalization, 128 flags
48b6cee71dSXin LI>2	beshort		0x09		7-bit, no capitalization, 128 flags
49b6cee71dSXin LI>2	beshort		0x0A		8-bit, capitalization, 128 flags
50b6cee71dSXin LI>2	beshort		0x0B		7-bit, capitalization, 128 flags
51b6cee71dSXin LI>2	beshort		0x0C		8-bit, no capitalization, 256 flags
52b6cee71dSXin LI>2	beshort		0x0D		7-bit, no capitalization, 256 flags
53b6cee71dSXin LI>2	beshort		0x0E		8-bit, capitalization, 256 flags
54b6cee71dSXin LI>2	beshort		0x0F		7-bit, capitalization, 256 flags
55b6cee71dSXin LI>4	beshort		>0		and %d string characters
56b6cee71dSXin LI# ispell 4.0 hash files  kromJx <kromJx@crosswinds.net>
57b6cee71dSXin LI# Ispell 4.0
58b6cee71dSXin LI0       string          ISPL            ispell
59b6cee71dSXin LI>4      long            x               hash file version %d,
60b6cee71dSXin LI>8      long            x               lexletters %d,
61b6cee71dSXin LI>12     long            x               lexsize %d,
62b6cee71dSXin LI>16     long            x               hashsize %d,
63b6cee71dSXin LI>20     long            x               stblsize %d
64*ae316d1dSXin LI
65*ae316d1dSXin LI# Summary:	affixes defition text files for Ispell/MySpell/Hunspell
66*ae316d1dSXin LI# From:		Joerg Jenderek
67*ae316d1dSXin LI# URL:		https://www.openoffice.org/lingucomponent/affix.readme
68*ae316d1dSXin LI#		https://man.archlinux.org/man/hunspell.5.en
69*ae316d1dSXin LI# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/affix.trid.xml
70*ae316d1dSXin LI# Note:		called "Affix file" by TrID
71*ae316d1dSXin LI# variant starting with comment character
72*ae316d1dSXin LI0		ubyte		0x23
73*ae316d1dSXin LI# look for SET character command followed by whitespace (seems to be often 1 space character) like in:
74*ae316d1dSXin LI# /usr/share/calibre/dictionaries/en-GB/en-GB.aff
75*ae316d1dSXin LI>0		search/60459	SET\040
76*ae316d1dSXin LI# skip scripts like /bin/affixcompress /bin/setupcon /bin/imdbpy2sql.py by checking for valid character SET argument
77*ae316d1dSXin LI# character SET argument like: UTF-8
78*ae316d1dSXin LI>>&0		string		UTF-8
79*ae316d1dSXin LI>>>0		use					spell-aff
80*ae316d1dSXin LI# character SET argument like: ISO8859-1 - ISO8859-10 ISO8859-13 - ISO8859-15
81*ae316d1dSXin LI>>&0		string		ISO8859-
82*ae316d1dSXin LI>>>0		use				spell-aff
83*ae316d1dSXin LI# character SET argument for Russian with Cyrillic alphabet like: KOI8-R KOI8-U
84*ae316d1dSXin LI# no russian support until war against ukraine
85*ae316d1dSXin LI>>&0		string		KOI8-
86*ae316d1dSXin LI#>>>0		use				spell-aff
87*ae316d1dSXin LI# character SET argument for languages with Cyrillic alphabet like: cp1251
88*ae316d1dSXin LI# no cyrillic support until russia war against ukraine
89*ae316d1dSXin LI>>&0		string		cp1251
90*ae316d1dSXin LI#>>>0		use				spell-aff
91*ae316d1dSXin LI# character SET argument for Indian Script Code for Information Interchange (ISCII) like: ISCII-DEVANAGARI
92*ae316d1dSXin LI>>&0		string		ISCII-
93*ae316d1dSXin LI# no example found
94*ae316d1dSXin LI>>>0		use				spell-aff
95*ae316d1dSXin LI# not "real" affix rule files but found as tests unit inside thunderbird sources like:
96*ae316d1dSXin LI# 1463589.aff 1695964.aff 2970240.aff
97*ae316d1dSXin LI>0		default		x
98*ae316d1dSXin LI# look for suffix SFX command followed by whitespace like in:
99*ae316d1dSXin LI# 1695964.aff
100*ae316d1dSXin LI>>0		search/164	SFX\040
101*ae316d1dSXin LI>>>0		use				spell-aff
102*ae316d1dSXin LI# if not real Hunspell/MySpell affix look for ispell variant
103*ae316d1dSXin LI>>0		default		x
104*ae316d1dSXin LI# URL:		https://manpages.debian.org/testing/ispell/ispell.5.en.html
105*ae316d1dSXin LI# look for ispell declaration like in: /usr/lib/ispell/espanol.aff
106*ae316d1dSXin LI>>>0		search/8251	defstringtype
107*ae316d1dSXin LI# defstringtype declaration start with unique name (like "list" "lat" "utf8" "iso" "nroff" often like formatter name)
108*ae316d1dSXin LI# followed by formatter name (like "nroff" "tex")
109*ae316d1dSXin LI# followed by suffix list (like ".mm" ".ms" ".me" ".man" ".NeXT" ".txt" ".list")
110*ae316d1dSXin LI#>>>>&1		string		x		DECLARATION=%s
111*ae316d1dSXin LI>>>>0		use				spell-aff
112*ae316d1dSXin LI# ispell variant without declaration like in: /usr/lib/ispell/bulgarian.aff /usr/lib/ispell/russian.aff
113*ae316d1dSXin LI>>>0		default		x
114*ae316d1dSXin LI# skip /etc/nilfs_cleanerd.conf by looking for ispell suffix section
115*ae316d1dSXin LI>>>>0		search/3233	suffixes\n
116*ae316d1dSXin LI>>>>>0		use				spell-aff
117*ae316d1dSXin LI# variant starting with empty line and comment character at the beginning of 2nd line like in: /usr/lib/ispell/polish.aff
118*ae316d1dSXin LI0		ubeshort	0x0a23
119*ae316d1dSXin LI# skip /etc/discover-modprobe.conf by looking for ispell declaration
120*ae316d1dSXin LI>2		search/3118	defstringtype
121*ae316d1dSXin LI>>0		use				spell-aff
122*ae316d1dSXin LI# starting with UTF-8 Byte Order Mark (BOM) https://en.wikipedia.org/wiki/Byte_order_mark
123*ae316d1dSXin LI0		string		\xEF\xBB\xBF
124*ae316d1dSXin LI# starting with UTF-8 Byte Order Mark (BOM) followed by comment starting character
125*ae316d1dSXin LI>3		string		\x23
126*ae316d1dSXin LI# starting with UTF-8 BOM and with SET character command followed by whitespace
127*ae316d1dSXin LI# like in: /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/lt.aff
128*ae316d1dSXin LI# look for character SET command used in MySpell and Hunspell
129*ae316d1dSXin LI>3		search/9883	SET\040
130*ae316d1dSXin LI>>0		use				spell-aff
131*ae316d1dSXin LI# look for FLAG type command used in MySpell and Hunspell
132*ae316d1dSXin LI0		string		FLAG
133*ae316d1dSXin LI# followed by space character like in
134*ae316d1dSXin LI# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/en_US.aff
135*ae316d1dSXin LI>4		ubyte		0x20
136*ae316d1dSXin LI>>0		use				spell-aff
137*ae316d1dSXin LI# or followed by tabulator character like in
138*ae316d1dSXin LI# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/ar.aff
139*ae316d1dSXin LI>4		ubyte		0x09
140*ae316d1dSXin LI>>0		use				spell-aff
141*ae316d1dSXin LI# starting with character SET command used in MySpell and Hunspell like in: org/languagetool/resource/sv/hunspell/sv_SE.aff
142*ae316d1dSXin LI0		string		SET\040
143*ae316d1dSXin LI>0		use				spell-aff
144*ae316d1dSXin LI# starting with language code LANG used in MySpell and Hunspell like in: /usr/share/hunspell/tr_TR.aff
145*ae316d1dSXin LI0		string		LANG\040
146*ae316d1dSXin LI>0		use				spell-aff
147*ae316d1dSXin LI# starting with affix flag command AF used in MySpell and Hunspell like in: /usr/lib/thunderbird/extensions/langpack-hu@thunderbird.mozilla.org/dictionaries/hu.aff
148*ae316d1dSXin LI0		string		AF\040
149*ae316d1dSXin LI# look for number of flag vector aliases
150*ae316d1dSXin LI>3		regex		[0-9]{1,4}
151*ae316d1dSXin LI>>0		use				spell-aff
152*ae316d1dSXin LI#	display information (encoding,language,...) about affixes rules text for Ispell/MySpell/Hunspell
153*ae316d1dSXin LI0		name				spell-aff
154*ae316d1dSXin LI>1		ubeshort	x		affix definition
155*ae316d1dSXin LI#!:mime		text/plain
156*ae316d1dSXin LI!:mime		text/x-affix
157*ae316d1dSXin LI!:ext		aff
158*ae316d1dSXin LI# GRR: need extra test so that default clause works
159*ae316d1dSXin LI>0		ubyte		x
160*ae316d1dSXin LI# look for ispell declaration
161*ae316d1dSXin LI>>0		search/8251	defstringtype	for Ispell
162*ae316d1dSXin LI# ispell variant without declaration
163*ae316d1dSXin LI>>0		default		x
164*ae316d1dSXin LI# look for ispell suffixes command
165*ae316d1dSXin LI>>>0		search/3233	suffixes
166*ae316d1dSXin LI# skip "suffixes used to create first part of a compound" by checking for flag argument like in: languagetool\resource\sv\hunspell\sv_SE.aff
167*ae316d1dSXin LI>>>>&0		search/2	flag		for Ispell
168*ae316d1dSXin LI>>>>&0		default		x		for MySpell/Hunspell
169*ae316d1dSXin LI# without suffixes keyword
170*ae316d1dSXin LI>>>0		default		x		for MySpell/Hunspell
171*ae316d1dSXin LI# look for language code command used in MySpell and Hunspell
172*ae316d1dSXin LI# like in: /usr/share/hunspell/de_AT.aff /usr/share/hunspell/it_IT.aff /usr/share/hunspell/tr_TR.aff /usr/lib/firefox/browser/extensions/langpack-hu@firefox.mozilla.org/dictionaries/hu.aff
173*ae316d1dSXin LI>>0		search/1117643	LANG\040	\b, language
174*ae316d1dSXin LI# language code argument like: de_DE hu_HU it_IT mn_MN tr_TR
175*ae316d1dSXin LI>>>&0		string		x		%s
176*ae316d1dSXin LI# look for character SET command used in MySpell and Hunspell
177*ae316d1dSXin LI>>0		search/1117729	SET
178*ae316d1dSXin LI# skip SETTINGS like in /usr/lib/ispell/ngerman.aff
179*ae316d1dSXin LI# SET command followed often by space character (0x20) or tabulator (0x09) like in
180*ae316d1dSXin LI# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/ar.aff
181*ae316d1dSXin LI>>>&0	ubyte&0xD6	=0x00
182*ae316d1dSXin LI# skip SSET	#     schosS in /usr/lib/ispell/ogerman.aff
183*ae316d1dSXin LI>>>>&0		ubyte		>0x48		\b,
184*ae316d1dSXin LI# character SET argument like: cp1251 ISCII-DEVANAGAR ISO8859-1 - ISO8859-10 ISO8859-13 - ISO8859-15 KOI8-R KOI8-U UTF-8
185*ae316d1dSXin LI>>>>>&-1	string	x			"%s" encoded
186*ae316d1dSXin LI# for control reasons show first non empty lines for ASCII or ISO-8859 text variant
187*ae316d1dSXin LI>1		ubeshort	!0xBBBF
188*ae316d1dSXin LI# 1st line starting with 0x0A like in /usr/src/dicts/sjp-ispell-pl-20140213/polish.aff
189*ae316d1dSXin LI>>0		ubyte		=0x0A
190*ae316d1dSXin LI>>>1		ubyte		!0x0A		\b, 2nd line
191*ae316d1dSXin LI>>>>&-1		string		x		"%s"
192*ae316d1dSXin LI# 3rd line starting with 0x0A like in polish.aff
193*ae316d1dSXin LI>>>>>&1		ubyte		=0x0A
194*ae316d1dSXin LI>>>>>>&0	string		x		\b, 4th line "%s"
195*ae316d1dSXin LI# 1st line starting with ASCII text like:
196*ae316d1dSXin LI# this is the affix file of the de_DE Hunspell dictionary
197*ae316d1dSXin LI>>0		ubyte		!0x0A
198*ae316d1dSXin LI>>>0		string		x		\b, 1st line "%s"
199*ae316d1dSXin LI>>>>&1		ubyte		>0x1F		\b, 2nd line
200*ae316d1dSXin LI>>>>>&-1	string		x		"%s"
201*ae316d1dSXin LI# 2nd line starting with 0x0A like in /usr/lib/ispell/bulgarian.aff
202*ae316d1dSXin LI>>>>&1		ubyte		=0x0A		\b, 3rd line
203*ae316d1dSXin LI>>>>>&0		string		x		"%s"
204*ae316d1dSXin LI# for control reasons show first lines for variant starting with ByteOrderMark (BOM=\xEF\xBB\xBF)
205*ae316d1dSXin LI>1		ubeshort	=0xBBBF	   	\b, with BOM
206*ae316d1dSXin LI>>3		string		x		\b, 1st line "%s"
207*ae316d1dSXin LI>>>&1		ubyte		>0x1F		\b, 2nd line
208*ae316d1dSXin LI>>>>&-1		string		x		"%s"
209*ae316d1dSXin LI
210*ae316d1dSXin LI# From:		Joerg Jenderek
211*ae316d1dSXin LI# URL:		https://en.wikipedia.org/wiki/GNU_Aspell
212*ae316d1dSXin LI#		https://manpages.ubuntu.com/manpages/trusty/en/man8/aspell-autobuildhash.8.html
213*ae316d1dSXin LI# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/r/rws-aspell.trid.xml
214*ae316d1dSXin LI#		https://ftp.gnu.org/gnu/aspell/aspell-0.60.8.tar.gz
215*ae316d1dSXin LI#		aspell-0.60.8/modules/speller/default/data.cpp
216*ae316d1dSXin LI#		aspell-0.60.8/modules/speller/default/readonly_ws.cpp
217*ae316d1dSXin LI# Note:		called "aspell dictionary" by TrID
218*ae316d1dSXin LI0	string	aspell\040default\040speller\040rowl	aspell dictionary
219*ae316d1dSXin LI#!:mime	application/octet-stream
220*ae316d1dSXin LI!:mime	application/x-aspell-dictionary
221*ae316d1dSXin LI!:ext	rws
222*ae316d1dSXin LI# version like: 1.10 1.4
223*ae316d1dSXin LI>28	string	x					\b, version %s
224*ae316d1dSXin LI# u32int endian_check; 12345678=00BC614Eh
225*ae316d1dSXin LI#>64	ulelong	x					\b, endian_check=%u
226*ae316d1dSXin LI>>64	ulelong	12345678				\b, little endian
227*ae316d1dSXin LI# not tested
228*ae316d1dSXin LI>>64	ubelong	12345678				\b, big endian
229*ae316d1dSXin LI# older aspell version not like 0.60.8
230*ae316d1dSXin LI>>64	default	x					\b, old
231*ae316d1dSXin LI# URL:		https://en.wikipedia.org/wiki/GNU_Aspell
232*ae316d1dSXin LI# Reference	http://aspell.net/man-html/Format-of-the-Personal-and-Replacement-Dictionaries.html
233*ae316d1dSXin LI# personal_ws-1.1 lang num [encoding]
234*ae316d1dSXin LI0	string	personal_				aspell personal
235*ae316d1dSXin LI# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/p/pws-aspell.trid.xml
236*ae316d1dSXin LI# Note:		called "aspell Personal dictionary" by TrID
237*ae316d1dSXin LI>9	string	ws-					dictionary
238*ae316d1dSXin LI#!:mime	text/plain
239*ae316d1dSXin LI!:mime	text/x-aspell-dictionary
240*ae316d1dSXin LI# like: ~/.aspell.en.pws ~/.aspell.de_DE.pws ~/.aspell.it.pws
241*ae316d1dSXin LI!:ext	pws
242*ae316d1dSXin LI# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/p/prepl-aspell.trid.xml
243*ae316d1dSXin LI# Note:		called "aspell Personal Replacement dictionary" by TrID
244*ae316d1dSXin LI# personal_repl-1.1 lang num [encoding]
245*ae316d1dSXin LI>9	string	repl-					replacement dictionary
246*ae316d1dSXin LI#!:mime	text/plain
247*ae316d1dSXin LI!:mime	text/x-aspell-dictionary
248*ae316d1dSXin LI# like: ~/.aspell.en.prepl ~/.aspell.de_DE.prepl ~/.aspell.it.prepl
249*ae316d1dSXin LI!:ext	prepl
250