1 2#------------------------------------------------------------------------------ 3# $File: ispell,v 1.10 2023/10/23 19:49:58 christos Exp $ 4# ispell: file(1) magic for ispell, MySpell, Hunspell and aspell 5# 6# Ispell 3.0 has a magic of 0x9601 and ispell 3.1 has 0x9602. This magic 7# will match 0x9600 through 0x9603 in *both* little endian and big endian. 8# (No other current magic entries collide.) 9# 10# Updated by Daniel Quinlan (quinlan@yggdrasil.com) 11# 120 leshort&0xFFFC 0x9600 little endian ispell 13>0 byte 0 hash file (?), 14>0 byte 1 3.0 hash file, 15>0 byte 2 3.1 hash file, 16>0 byte 3 hash file (?), 17>2 leshort 0x00 8-bit, no capitalization, 26 flags 18>2 leshort 0x01 7-bit, no capitalization, 26 flags 19>2 leshort 0x02 8-bit, capitalization, 26 flags 20>2 leshort 0x03 7-bit, capitalization, 26 flags 21>2 leshort 0x04 8-bit, no capitalization, 52 flags 22>2 leshort 0x05 7-bit, no capitalization, 52 flags 23>2 leshort 0x06 8-bit, capitalization, 52 flags 24>2 leshort 0x07 7-bit, capitalization, 52 flags 25>2 leshort 0x08 8-bit, no capitalization, 128 flags 26>2 leshort 0x09 7-bit, no capitalization, 128 flags 27>2 leshort 0x0A 8-bit, capitalization, 128 flags 28>2 leshort 0x0B 7-bit, capitalization, 128 flags 29>2 leshort 0x0C 8-bit, no capitalization, 256 flags 30>2 leshort 0x0D 7-bit, no capitalization, 256 flags 31>2 leshort 0x0E 8-bit, capitalization, 256 flags 32>2 leshort 0x0F 7-bit, capitalization, 256 flags 33>4 leshort >0 and %d string characters 340 beshort&0xFFFC 0x9600 big endian ispell 35>1 byte 0 hash file (?), 36>1 byte 1 3.0 hash file, 37>1 byte 2 3.1 hash file, 38>1 byte 3 hash file (?), 39>2 beshort 0x00 8-bit, no capitalization, 26 flags 40>2 beshort 0x01 7-bit, no capitalization, 26 flags 41>2 beshort 0x02 8-bit, capitalization, 26 flags 42>2 beshort 0x03 7-bit, capitalization, 26 flags 43>2 beshort 0x04 8-bit, no capitalization, 52 flags 44>2 beshort 0x05 7-bit, no capitalization, 52 flags 45>2 beshort 0x06 8-bit, capitalization, 52 flags 46>2 beshort 0x07 7-bit, capitalization, 52 flags 47>2 beshort 0x08 8-bit, no capitalization, 128 flags 48>2 beshort 0x09 7-bit, no capitalization, 128 flags 49>2 beshort 0x0A 8-bit, capitalization, 128 flags 50>2 beshort 0x0B 7-bit, capitalization, 128 flags 51>2 beshort 0x0C 8-bit, no capitalization, 256 flags 52>2 beshort 0x0D 7-bit, no capitalization, 256 flags 53>2 beshort 0x0E 8-bit, capitalization, 256 flags 54>2 beshort 0x0F 7-bit, capitalization, 256 flags 55>4 beshort >0 and %d string characters 56# ispell 4.0 hash files kromJx <kromJx@crosswinds.net> 57# Ispell 4.0 580 string ISPL ispell 59>4 long x hash file version %d, 60>8 long x lexletters %d, 61>12 long x lexsize %d, 62>16 long x hashsize %d, 63>20 long x stblsize %d 64 65# Summary: affixes defition text files for Ispell/MySpell/Hunspell 66# From: Joerg Jenderek 67# URL: https://www.openoffice.org/lingucomponent/affix.readme 68# https://man.archlinux.org/man/hunspell.5.en 69# Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/affix.trid.xml 70# Note: called "Affix file" by TrID 71# variant starting with comment character 720 ubyte 0x23 73# look for SET character command followed by whitespace (seems to be often 1 space character) like in: 74# /usr/share/calibre/dictionaries/en-GB/en-GB.aff 75>0 search/60459 SET\040 76# skip scripts like /bin/affixcompress /bin/setupcon /bin/imdbpy2sql.py by checking for valid character SET argument 77# character SET argument like: UTF-8 78>>&0 string UTF-8 79>>>0 use spell-aff 80# character SET argument like: ISO8859-1 - ISO8859-10 ISO8859-13 - ISO8859-15 81>>&0 string ISO8859- 82>>>0 use spell-aff 83# character SET argument for Russian with Cyrillic alphabet like: KOI8-R KOI8-U 84# no russian support until war against ukraine 85>>&0 string KOI8- 86#>>>0 use spell-aff 87# character SET argument for languages with Cyrillic alphabet like: cp1251 88# no cyrillic support until russia war against ukraine 89>>&0 string cp1251 90#>>>0 use spell-aff 91# character SET argument for Indian Script Code for Information Interchange (ISCII) like: ISCII-DEVANAGARI 92>>&0 string ISCII- 93# no example found 94>>>0 use spell-aff 95# not "real" affix rule files but found as tests unit inside thunderbird sources like: 96# 1463589.aff 1695964.aff 2970240.aff 97>0 default x 98# look for suffix SFX command followed by whitespace like in: 99# 1695964.aff 100>>0 search/164 SFX\040 101>>>0 use spell-aff 102# if not real Hunspell/MySpell affix look for ispell variant 103>>0 default x 104# URL: https://manpages.debian.org/testing/ispell/ispell.5.en.html 105# look for ispell declaration like in: /usr/lib/ispell/espanol.aff 106>>>0 search/8251 defstringtype 107# defstringtype declaration start with unique name (like "list" "lat" "utf8" "iso" "nroff" often like formatter name) 108# followed by formatter name (like "nroff" "tex") 109# followed by suffix list (like ".mm" ".ms" ".me" ".man" ".NeXT" ".txt" ".list") 110#>>>>&1 string x DECLARATION=%s 111>>>>0 use spell-aff 112# ispell variant without declaration like in: /usr/lib/ispell/bulgarian.aff /usr/lib/ispell/russian.aff 113>>>0 default x 114# skip /etc/nilfs_cleanerd.conf by looking for ispell suffix section 115>>>>0 search/3233 suffixes\n 116>>>>>0 use spell-aff 117# variant starting with empty line and comment character at the beginning of 2nd line like in: /usr/lib/ispell/polish.aff 1180 ubeshort 0x0a23 119# skip /etc/discover-modprobe.conf by looking for ispell declaration 120>2 search/3118 defstringtype 121>>0 use spell-aff 122# starting with UTF-8 Byte Order Mark (BOM) https://en.wikipedia.org/wiki/Byte_order_mark 1230 string \xEF\xBB\xBF 124# starting with UTF-8 Byte Order Mark (BOM) followed by comment starting character 125>3 string \x23 126# starting with UTF-8 BOM and with SET character command followed by whitespace 127# like in: /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/lt.aff 128# look for character SET command used in MySpell and Hunspell 129>3 search/9883 SET\040 130>>0 use spell-aff 131# look for FLAG type command used in MySpell and Hunspell 1320 string FLAG 133# followed by space character like in 134# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/en_US.aff 135>4 ubyte 0x20 136>>0 use spell-aff 137# or followed by tabulator character like in 138# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/ar.aff 139>4 ubyte 0x09 140>>0 use spell-aff 141# starting with character SET command used in MySpell and Hunspell like in: org/languagetool/resource/sv/hunspell/sv_SE.aff 1420 string SET\040 143>0 use spell-aff 144# starting with language code LANG used in MySpell and Hunspell like in: /usr/share/hunspell/tr_TR.aff 1450 string LANG\040 146>0 use spell-aff 147# starting with affix flag command AF used in MySpell and Hunspell like in: /usr/lib/thunderbird/extensions/langpack-hu@thunderbird.mozilla.org/dictionaries/hu.aff 1480 string AF\040 149# look for number of flag vector aliases 150>3 regex [0-9]{1,4} 151>>0 use spell-aff 152# display information (encoding,language,...) about affixes rules text for Ispell/MySpell/Hunspell 1530 name spell-aff 154>1 ubeshort x affix definition 155#!:mime text/plain 156!:mime text/x-affix 157!:ext aff 158# GRR: need extra test so that default clause works 159>0 ubyte x 160# look for ispell declaration 161>>0 search/8251 defstringtype for Ispell 162# ispell variant without declaration 163>>0 default x 164# look for ispell suffixes command 165>>>0 search/3233 suffixes 166# skip "suffixes used to create first part of a compound" by checking for flag argument like in: languagetool\resource\sv\hunspell\sv_SE.aff 167>>>>&0 search/2 flag for Ispell 168>>>>&0 default x for MySpell/Hunspell 169# without suffixes keyword 170>>>0 default x for MySpell/Hunspell 171# look for language code command used in MySpell and Hunspell 172# like in: /usr/share/hunspell/de_AT.aff /usr/share/hunspell/it_IT.aff /usr/share/hunspell/tr_TR.aff /usr/lib/firefox/browser/extensions/langpack-hu@firefox.mozilla.org/dictionaries/hu.aff 173>>0 search/1117643 LANG\040 \b, language 174# language code argument like: de_DE hu_HU it_IT mn_MN tr_TR 175>>>&0 string x %s 176# look for character SET command used in MySpell and Hunspell 177>>0 search/1117729 SET 178# skip SETTINGS like in /usr/lib/ispell/ngerman.aff 179# SET command followed often by space character (0x20) or tabulator (0x09) like in 180# /opt/Wolfram/WolframEngine/13.1/SystemFiles/Components/SpellingData/SpellingDictionaries/ar.aff 181>>>&0 ubyte&0xD6 =0x00 182# skip SSET # schosS in /usr/lib/ispell/ogerman.aff 183>>>>&0 ubyte >0x48 \b, 184# character SET argument like: cp1251 ISCII-DEVANAGAR ISO8859-1 - ISO8859-10 ISO8859-13 - ISO8859-15 KOI8-R KOI8-U UTF-8 185>>>>>&-1 string x "%s" encoded 186# for control reasons show first non empty lines for ASCII or ISO-8859 text variant 187>1 ubeshort !0xBBBF 188# 1st line starting with 0x0A like in /usr/src/dicts/sjp-ispell-pl-20140213/polish.aff 189>>0 ubyte =0x0A 190>>>1 ubyte !0x0A \b, 2nd line 191>>>>&-1 string x "%s" 192# 3rd line starting with 0x0A like in polish.aff 193>>>>>&1 ubyte =0x0A 194>>>>>>&0 string x \b, 4th line "%s" 195# 1st line starting with ASCII text like: 196# this is the affix file of the de_DE Hunspell dictionary 197>>0 ubyte !0x0A 198>>>0 string x \b, 1st line "%s" 199>>>>&1 ubyte >0x1F \b, 2nd line 200>>>>>&-1 string x "%s" 201# 2nd line starting with 0x0A like in /usr/lib/ispell/bulgarian.aff 202>>>>&1 ubyte =0x0A \b, 3rd line 203>>>>>&0 string x "%s" 204# for control reasons show first lines for variant starting with ByteOrderMark (BOM=\xEF\xBB\xBF) 205>1 ubeshort =0xBBBF \b, with BOM 206>>3 string x \b, 1st line "%s" 207>>>&1 ubyte >0x1F \b, 2nd line 208>>>>&-1 string x "%s" 209 210# From: Joerg Jenderek 211# URL: https://en.wikipedia.org/wiki/GNU_Aspell 212# https://manpages.ubuntu.com/manpages/trusty/en/man8/aspell-autobuildhash.8.html 213# Reference: http://mark0.net/download/triddefs_xml.7z/defs/r/rws-aspell.trid.xml 214# https://ftp.gnu.org/gnu/aspell/aspell-0.60.8.tar.gz 215# aspell-0.60.8/modules/speller/default/data.cpp 216# aspell-0.60.8/modules/speller/default/readonly_ws.cpp 217# Note: called "aspell dictionary" by TrID 2180 string aspell\040default\040speller\040rowl aspell dictionary 219#!:mime application/octet-stream 220!:mime application/x-aspell-dictionary 221!:ext rws 222# version like: 1.10 1.4 223>28 string x \b, version %s 224# u32int endian_check; 12345678=00BC614Eh 225#>64 ulelong x \b, endian_check=%u 226>>64 ulelong 12345678 \b, little endian 227# not tested 228>>64 ubelong 12345678 \b, big endian 229# older aspell version not like 0.60.8 230>>64 default x \b, old 231# URL: https://en.wikipedia.org/wiki/GNU_Aspell 232# Reference http://aspell.net/man-html/Format-of-the-Personal-and-Replacement-Dictionaries.html 233# personal_ws-1.1 lang num [encoding] 2340 string personal_ aspell personal 235# Reference: http://mark0.net/download/triddefs_xml.7z/defs/p/pws-aspell.trid.xml 236# Note: called "aspell Personal dictionary" by TrID 237>9 string ws- dictionary 238#!:mime text/plain 239!:mime text/x-aspell-dictionary 240# like: ~/.aspell.en.pws ~/.aspell.de_DE.pws ~/.aspell.it.pws 241!:ext pws 242# Reference: http://mark0.net/download/triddefs_xml.7z/defs/p/prepl-aspell.trid.xml 243# Note: called "aspell Personal Replacement dictionary" by TrID 244# personal_repl-1.1 lang num [encoding] 245>9 string repl- replacement dictionary 246#!:mime text/plain 247!:mime text/x-aspell-dictionary 248# like: ~/.aspell.en.prepl ~/.aspell.de_DE.prepl ~/.aspell.it.prepl 249!:ext prepl 250