xref: /freebsd/contrib/file/magic/Magdir/archive (revision e949ce9dc0e6fff26e83904f1008b76d36ba0a37)
1#------------------------------------------------------------------------------
2# $File: archive,v 1.218 2026/01/10 16:16:27 christos Exp $
3# archive:  file(1) magic for archive formats (see also "msdos" for self-
4#           extracting compressed archives)
5#
6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c.
8
9# POSIX tar archives
10# URL: https://en.wikipedia.org/wiki/Tar_(computing)
11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current
12# header mainly padded with nul bytes
13500	quad		0
14!:strength /2
15# filename or extended attribute printable strings in range space null til umlaut ue
16>0	ubeshort	>0x1F00
17>>0	ubeshort	<0xFCFD
18# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad
19# at https://sourceforge.net/projects/s-tar/files/testscripts/
20>>>508	ubelong&0x8B9E8DFF	0
21# nul, space or ascii digit 0-7 at start of mode
22>>>>100	ubyte&0xC8	=0
23>>>>>101 ubyte&0xC8	=0
24# nul, space at end of check sum
25>>>>>>155 ubyte&0xDF	=0
26# space or ascii digit 0 at start of check sum
27>>>>>>>148	ubyte&0xEF	=0x20
28# check for specific 1st member name that indicates other mime type and file name suffix
29>>>>>>>>0	string		TpmEmuTpms/permall
30# maybe also look for 2nd tar member efi/nvram containing UEFI variables part
31#>>>>>>>>>512	search/0x1800	efi/nvram\0		EFI_PART_FOUND
32>>>>>>>>>0	use	tar-nvram
33# FOR DEBUGGING:
34#>>>>>>>>0	regex		\^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp)	NAME "%s"
35# check for 1st image main name with digits used for sorting
36# and for name extension case insensitive like: PNG JPG JPEG TIF TIFF GIF BMP
37>>>>>>>>0	regex		\^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp)
38>>>>>>>>>0	use	tar-cbt
39# check for 1st member name with ovf suffix
40>>>>>>>>0	regex		\^.{1,96}[.](ovf)
41>>>>>>>>>0	use	tar-ova
42# look for relative directory ./var/ or ./lte/ as 1st member name that indicates AVM firmware with other file name suffix
43>>>>>>>>0	ubequad&0xFFffE5eaE8ffFFff	0x2e2f6460602f0000
44>>>>>>>>>0	use	tar-avm
45# maybe look for AVM specific 2nd name entry
46# >>>>>>>>>517	string		/content\0	content~
47# >>>>>>>>>>0	use	tar-avm
48# >>>>>>>>>517	string		/install\0	install~
49# >>>>>>>>>>0	use	tar-avm
50# >>>>>>>>>517	string		/chksum\0	chksum~
51# >>>>>>>>>>0	use	tar-avm
52# >>>>>>>>>517	string		/modfw.nfo\0	modfw~
53# >>>>>>>>>>0	use	tar-avm
54# most (419/429) *.WBM (71/71) *.WBT with user name jcameron of Webmin developer Jamie Cameron in first tar archive member
55>>>>>>>>265	string		jcameron
56>>>>>>>>>0	use	tar-webmin
57# if 1st member name without digits and without used image suffix, without *.ovf,
58# ./var/ , ./lte/ and TpmEmuTpms/ then it is a pure TAR archive or Webmin without jcameron user name
59>>>>>>>>0	default		x
60# few (10/429) *.WBM without user name jcameron in 1st tar member but with WBM module.info name like:
61# apcupsd-0.81-2.wbm csavupdate.wbm cwmail.wbm dac960.wbm etcupdate.wbm logviewer.wbm memcached.wbm rinetd.wbm shoutcast.wbm vacationadmin-webmin-module-1.1.2.wbm
62# few (10/95) *.WBT without user name jcameron in 1st tar member but with WBT theme.info name like:
63# authentic-theme-21.09.5.wbt Mozilla-Modern.wbt virtual-server-theme-2.7.wbt fkn-webmintheme.0.6.0.wbt
64>>>>>>>>>512	search/210965/s	e.info\0
65>>>>>>>>>>0	use	tar-webmin
66# pure TAR
67>>>>>>>>>0	default		x
68>>>>>>>>>>0	use	tar-file
69# Note:	called "TAR - Tape ARchive" by TrID, "Tape Archive Format" by DROID via PUID x-fmt/265
70#	and "Tar archive" by shared MIME-info database from freedesktop.org
71#	minimal check and then display tar archive information which can also be
72#	embedded inside others like Android Backup, Clam AntiVirus database
730	name		tar-file
74>257	string		!ustar
75# header padded with nuls
76>>257	ulong		=0
77# GNU tar version 1.29 with non pax format option without refusing
78# creates misleading V7 header for Long path, Multi-volume, Volume type
79>>>156	ubyte		0x4c		GNU tar archive
80!:mime	application/x-gtar
81!:ext	tar/gtar
82>>>156	ubyte		0x4d		GNU tar archive
83!:mime	application/x-gtar
84!:ext	tar/gtar
85>>>156	ubyte		0x56		GNU tar archive
86!:mime	application/x-gtar
87!:ext	tar/gtar
88>>>156	default		x		tar archive (V7)
89!:mime	application/x-tar
90!:ext	tar
91# other stuff in padding
92# some implementations add new fields to the blank area at the end of the header record
93# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option
94>>257	ulong		!0		tar archive (old)
95!:mime	application/x-tar
96!:ext	tar
97# magic in newer, GNU, posix variants
98>257	string		=ustar
99# 2 last char of magic and UStar version because string expression does not work
100# 2 space characters followed by a null for GNU variant
101>>261	ubelong		=0x72202000	POSIX tar archive (GNU)
102!:mime	application/x-gtar
103!:ext	tar/gtar
104# UStar version with ASCII "00"
105>>261	ubelong		0x72003030	POSIX
106# gLOBAL and ExTENSION type only found in POSIX.1-2001 format
107>>>156	ubyte		0x67		\b.1-2001
108>>>156	ubyte		0x78		\b.1-2001
109>>>156	ubyte		x		tar archive
110!:mime	application/x-ustar
111!:ext	tar/ustar
112# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab
113>>261	ubelong		0x72000000	tar archive (ustar)
114!:mime	application/x-ustar
115!:ext	tar/ustar
116# not seen ustar variant with garbish version
117>>261	default		x		tar archive (unknown ustar)
118!:mime	application/x-ustar
119!:ext	tar/ustar
120# show information for 1st tar archive member
121>0	use	tar-entry
122#	display information of tar archive member (file type, name, permissions, user, group)
1230	name		tar-entry
124# type flag of tar archive member
125#>156	ubyte		x		\b, %c-type
126>156	ubyte		x
127>>156	ubyte		0		\b, file
128>>156	ubyte		0x30		\b, file
129>>156	ubyte		0x31		\b, hard link
130>>156	ubyte		0x32		\b, symlink
131>>156	ubyte		0x33		\b, char device
132>>156	ubyte		0x34		\b, block device
133>>156	ubyte		0x35		\b, directory
134>>156	ubyte		0x36		\b, fifo
135>>156	ubyte		0x37		\b, reserved
136>>156	ubyte		0x4c		\b, long path
137>>156	ubyte		0x4d		\b, multi volume
138>>156	ubyte		0x56		\b, volume
139>>156	ubyte		0x67		\b, global
140>>156	ubyte		0x78		\b, extension
141>>156	default		x		\b, type
142>>>156	ubyte		x		'%c'
143# name[100]
144>0	string		>\0		%-.60s
145# mode mainly stored as an octal number in ASCII null or space terminated
146>100	string		>\0		\b, mode %-.7s
147# user id mainly as octal numbers in ASCII null or space terminated
148>108	string		>\0		\b, uid %-.7s
149# group id mainly as octal numbers in ASCII null or space terminated
150>116	string		>\0		\b, gid %-.7s
151# size mainly as octal number in ASCII
152>124	ubyte		<0x38
153>>124	string		>\0		\b, size %-.12s
154# coding indicated by setting the high-order bit of the leftmost byte
155>124	ubyte		>0xEF		\b, size 0x
156>>124	ubyte		!0xff		\b%2.2x
157>>125	ubyte		!0xff		\b%2.2x
158>>126	ubyte		!0xff		\b%2.2x
159>>127	ubyte		!0xff		\b%2.2x
160>>128	ubyte		!0xff		\b%2.2x
161>>129	ubyte		!0xff		\b%2.2x
162>>130	ubyte		!0xff		\b%2.2x
163>>131	ubyte		!0xff		\b%2.2x
164>>132	ubyte		!0xff		\b%2.2x
165>>133	ubyte		!0xff		\b%2.2x
166>>134	ubyte		!0xff		\b%2.2x
167>>135	ubyte		!0xff		\b%2.2x
168# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated
169>136	string		>\0		\b, seconds %-.11s
170# header checksum stored as an octal number in ASCII null or space terminated
171#>148	string		x		\b, cksum %.7s
172# linkname[100]
173>157	string		>\0		\b, linkname %-.40s
174# additional fields for ustar
175>257	string		=ustar
176# owner user name null terminated
177>>265	string		>\0		\b, user %-.32s
178# group name null terminated
179>>297	string		>\0		\b, group %-.32s
180# device major minor if not zero (binary or ASCII)
181>>329	ubequad&0xCFCFCFCFcFcFcFdf	!0
182>>>329	string		x		\b, devmaj %-.7s
183>>337	ubequad&0xCFCFCFCFcFcFcFdf	!0
184>>>337	string		x		\b, devmin %-.7s
185# prefix[155]
186>>345	string		>\0		\b, prefix %-.155s
187# old non ustar/POSIX tar
188>257	string		!ustar
189>>508	string		=tar\0
190# padding[255] in old star
191>>>257	string		>\0		\b, padding: %-.40s
192>>508	default		x
193# padding[255] in old tar sometimes comment field
194>>>257	string		>\0		\b, comment: %-.40s
195# Summary:	VirtualBox NvramFile with UEFI variables packed inside TAR archive
196# URL:		hhttps://www.virtualbox.org/manual/ch08.html#vboxmanage-modifynvram
197# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/n/nvram-virtualbox-tar.trid.xml
198# Note:		called "VirtualBox saved (U)EFI BIOS settings (TAR) by TrID and
199#		verified by 7-Zip `7z l -ttar Mint-21.1.nvram` and
200#		VirtualBox `VBoxManage modifynvram "Mint-21.1" listvars`
2010	name		tar-nvram
202#
203>0	string		x		VirtualBox NVRAM file
204#!:mime	application/x-gtar
205!:mime	application/x-virtualbox-nvram
206!:ext	nvram
207# first name[100] like: TpmEmuTpms/permall
208>0	use	tar-entry
209# 2nd tar member efi/nvram contains UEFI variables part described by ./virtual
210>512	search/0x1800/s	efi/nvram\0
211>>&0	use	tar-entry
212# 2nd tar member efi/nvram content could be described by ./virtual
213#>>&512	indirect	x
214# Summary:	Comic Book Archive *.CBT with TAR format
215# URL:		https://en.wikipedia.org/wiki/Comic_book_archive
216#		http://fileformats.archiveteam.org/wiki/Comic_Book_Archive
217# Note:		there exist also RAR, ZIP, ACE and 7Z packed variants
2180	name		tar-cbt
219>0	string		x		Comic Book archive, tar archive
220#!:mime	application/x-tar
221!:mime	application/vnd.comicbook
222#!:mime	application/vnd.comicbook+tar
223!:ext	cbt
224# name[100] probably like: 19.jpg 0001.png 0002.png
225# or maybe like ComicInfo.xml
226#>0	string		>\0		\b, 1st image %-.60s
227>0	use	tar-entry
228# Summary:	Open Virtualization Format *.OVF with disk images and more packed as TAR archive *.OVA
229# From:		Joerg Jenderek
230# URL:		https://en.wikipedia.org/wiki/Open_Virtualization_Format
231#		http://fileformats.archiveteam.org/wiki/OVF_(Open_Virtualization_Format)
232# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/o/ova.trid.xml
233# Note:		called "Open Virtualization Format package" by TrID
234#		assuming *.ovf comes first
2350	name		tar-ova
236>0	string		x		Open Virtualization Format Archive
237#!:mime	application/x-ustar
238# http://extension.nirsoft.net/ova
239!:mime	application/x-virtualbox-ova
240!:ext	ova
241# assuming name[100] like: DOS-0.9.ovf FreeDOS_1.ovf Win98SE_DE.ovf
242#>0	string		>\0		\b, with %-.60s
243>0	use	tar-entry
244# Summary:	AVM firmware (FRITZ!OS) for the FRITZ!Box (router)
245# From:		Joerg Jenderek
246# URL:		https://en.wikipedia.org/wiki/Fritz!Box
247#		https://www.redteam-pentesting.de/de/advisories/rt-sa-2014-010/-avm-fritz-box-firmware-signature-bypass
248# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/i/image-avm.trid.xml
249# Note:		verified by 7-Zip `7z l -ttar FRITZ.Box_4040-07.57.image`
2500	name		tar-avm
251>0	string		x		AVM FRITZ!Box firmware
252#!:mime	application/x-gtar
253!:mime	application/x-avm-image
254!:ext	image
255# tar member ./var/content starts with line like "Product=Fritz_Box_HW227 (FRITZ!Box 4040)"
256>>1024	search/512	Product=Fritz_Box_
257>>>&0	string		x		%s
258# version string like: 07.57 07.58
259>>>1044	search		Version=	\b, version
260>>>>&0	string		x		%s
261# product phrase too far behind (dozen MB) in many samples like: FRITZ.Box_4040-07.12.image FRITZ.Box_6820v3_LTE-07.57.image
262# so try to look for other characteristic foo
263# >>1024	default		x		OTHER_PATTERN!
264# >>>1023	search		AVM_PATTERN	PATTERNfound
265# first name[100] like: ./var/ ./lte/
266>0	use	tar-entry
267# if 1st entry is directory then show 2nd entry
268>156	ubyte		0x35
269# 2nd tar member name like: ./var/content (often ) ./var/install ./var/chksum ./lte/modfw.nfo
270>>512	use	tar-entry
271# Summary:	Webmin Module or Theme
272# From:		Joerg Jenderek
273# URL:		https://en.wikipedia.org/wiki/Webmin
274#		https://webmin.com/docs/development/creating-modules/
275#		https://webmin.com/docs/development/creating-themes/
276# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/w/wbm.trid.xml
277#		http://mark0.net/download/triddefs_xml.7z/defs/w/wbt.trid.xml
278#		http://mark0.net/download/triddefs_xml.7z/defs/w/wbt-gif.trid.xml
279# Note:		called "Webmin Module" "Webmin Theme" by TrID
280#		most verfied by 7-Zip `7z l -ttar *.wbm | grep "\module.info"` and
281#		`7z l -ttar *.wbt | grep "\theme.info"`
2820	name		tar-webmin
283>0	string		x		Webmin
284# Webmin module or theme
285>>512	search/1767941/s /module.info	Module
286!:mime	application/x-webmin-module
287!:ext	wbm
288# According to documentation module.info is mandatory but instead theme.info is found in
289# old-blue-theme.wbm old-blue-theme-1.0.wbm old-mscstyle3.wbm virtual-server-mobile.wbm
290# GRR:  maybe here wrong file name suffix WBM instead of WBT
291>>512	default		x
292>>>512	search/3149333/s /theme.info	Theme
293!:mime	application/x-webmin-theme
294!:ext	wbt
295# next 3 lines should not happen
296>>>512	default		x		Module or Theme
297!:mime	application/x-webmin
298!:ext	wbm/wbt
299# GNU or POSIX tar
300>257	string		=ustar		(
301# 2 space characters followed by a null for GNU variant for most (428/429) WBM samples
302>>261	ubelong		=0x72202000	\bGNU tar)
303#!:mime	application/x-gtar
304# UStar version variant with ASCII "00" as in few (1/429) samples like cwmail.wbm
305>>261	ubelong		0x72003030	\bPOSIX tar)
306#!:mime	application/x-ustar
307#>>>156	ubyte		x		tar archive
308# Apparently first archive member name[100] is directory like: dynbind/ ssh/ virtualmin-powerdns/ virtual-server-mobile/ vnc/
309>>0	use	tar-entry
310# look for characteristic WBM module info name starting with "module.info" for language variant like in: ssh2.wbm
311>>512	search/1767941/s /module.info
312# look for TAR magic of WBM archive module info
313>>>&0	search/257/s	ustar
314# show details for WBM archive member module info
315>>>>&-257 use	tar-entry
316# look for characteristic WBT theme info name with "theme.info" like in: authentic-theme-21.09.5.wbt
317>>512	search/3149333/s /theme.info\0
318# look for TAR magic of WBT archive theme info
319>>>&0	search/257/s	ustar
320>>>>&-257 use	tar-entry
321
322# Incremental snapshot gnu-tar format from:
323# https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
3240	string		GNU\ tar-	GNU tar incremental snapshot data
325>&0	regex		[0-9]\\.[0-9]+-[0-9]+	version %s
326
327# cpio archives
328#
329# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
330# The idea is to indicate archives produced on machines with the same
331# byte order as the machine running "file" with "cpio archive", and
332# to indicate archives produced on machines with the opposite byte order
333# from the machine running "file" with "byte-swapped cpio archive".
334#
335# The SVR4 "cpio(4)" hints that there are additional formats, but they
336# are defined as "short"s; I think all the new formats are
337# character-header formats and thus are strings, not numbers.
338# URL:		http://fileformats.archiveteam.org/wiki/Cpio
339#		https://en.wikipedia.org/wiki/Cpio
340# Reference:	https://people.freebsd.org/~kientzle/libarchive/man/cpio.5.txt
341# Update:	Joerg Jenderek
342#
343# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio-bin.trid.xml
344# Note:		called "CPIO archive (binary)" by TrID, "cpio/Binary LE" by 7-Zip and "CPIO" by DROID via PUID fmt/635
3450	short		070707
346# skip DROID fmt-635-signature-id-960.cpio by looking for pathname of 1st entry
347>26	string		>\0		cpio archive
348!:mime	application/x-cpio
349# https://download.opensuse.org/distribution/leap/15.4/iso/openSUSE-Leap-15.4-NET-x86_64-Media.iso
350# boot/x86_64/loader/bootlogo
351# message.cpi
352!:ext	/cpio/cpi
353>>0	use	cpio-bin
354# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio-bin-sw.trid.xml
355# Note:		called "CPIO archive (byte swapped binary)" by TrID and "Cpio/Binary BE" by 7-Zip
3560	short		0143561		byte-swapped cpio archive
357!:mime	application/x-cpio # encoding: swapped
358# https://telparia.com/fileFormatSamples/archive/cpio/skeleton2.cpio
359!:ext	cpio
360>0	use	cpio-bin-be
361# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio.trid.xml
362# Note:		called "CPIO archive (portable)" by TrID, "cpio/Portable ASCII" by 7-Zip and "cpio/odc" by GNU cpio
3630	string		070707		ASCII cpio archive (pre-SVR4 or odc)
364!:mime	application/x-cpio
365# https://telparia.com/fileFormatSamples/archive/cpio/ pthreads-1.60B5.osr5src.cpio cinema.cpi VOL.000.008 VOL.000.012
366!:ext	cpio/cpi/008/012
367# Note:		called "CPIO archive (portable)" by TrID, "cpio/New ASCII" by 7-Zip and "cpio/newc" by GNU cpio
3680	string		070701		ASCII cpio archive (SVR4 with no CRC)
369!:mime	application/x-cpio
370# https://telparia.com/fileFormatSamples/archive/cpio/MainActor-2.06.3.cpio
371!:ext	cpio
372# Note:		called "CPIO archive (portable)" by TrID, "cpio/New CRC" by 7-Zip and "cpio/crc" by GNU cpio
3730	string		070702		ASCII cpio archive (SVR4 with CRC)
374!:mime	application/x-cpio
375# http://ftp.gnu.org/gnu/tar/tar-1.27.cpio.gz
376# https://telparia.com/fileFormatSamples/archive/cpio/pcmcia
377!:ext	/cpio
378#	display information of old binary cpio archive
379# Note:	verfied by 7-Zip `7z l -tcpio -slt *.cpio` and
380#	`cpio -ivt --numeric-uid-gid --file=clam.bin-le.cpio`
3810	name	cpio-bin
382# c_dev; device number; WHAT IS THAT?
383>2	uleshort	x		\b; device %u
384# c_ino; truncated inode number; use `ls --inode`
385>4	uleshort	x		\b, inode %u
386# c_mode; mode specifies permissions and file type like: ?622~?rw-r--r-- by `ls -l`
387>6	uleshort	x		\b, mode %o
388# c_uid; numeric user id; use `ls --numeric-uid-gid`
389>8	uleshort	x		\b, uid %u
390# c_gid; numeric group id
391>10	uleshort	x		\b, gid %u
392# c_nlink; links to this file; directories at least 2
393>12	uleshort	>1		\b, %u links
394# c_rdev; device number for block and character entries; zero for all other entries by writers
395# like 0x0440 for /dev/ttyS0
396>14	uleshort	>0		\b, device %#4.4x
397# c_mtime[2]; modification time in seconds since 1 January 1970; most-significant 16 bits first
398>16	medate		x		\b, modified %s
399# c_filesize[2]; size of pathname; most-significant 16 bits first like: 544
400>22	melong		x		\b, %u bytes
401# c_namesize; bytes in the pathname that follows the header like: 9
402#>20	uleshort	x		\b, namesize %u
403# pathname of entry like: "clam.exe"
404>26	string		x		"%s"
405#	display information of old binary byte swapped cpio archive
406# Note:	verfied by 7-Zip `7z l -tcpio -slt *.cpio` and
407#	`LANGUAGE=C cpio -ivt --numeric-uid-gid --file=clam.bin-be.cpio`
4080	name	cpio-bin-be
409>2	ubeshort	x		\b; device %u
410>4	ubeshort	x		\b, inode %u
411>6	ubeshort	x		\b, mode %o
412>8	ubeshort	x		\b, uid %u
413>10	ubeshort	x		\b, gid %u
414>12	ubeshort	>1		\b, %u links
415>14	ubeshort	>0		\b, device %#4.4x
416>16	bedate		x		\b, modified %s
417>22	ubelong	 	x		\b, %u bytes
418#>20	ubeshort	x		\b, namesize %u
419>26	string		x		"%s"
420
421#
422# Various archive formats used by various versions of the "ar"
423# command.
424#
425
426#
427# Original UNIX archive formats.
428# They were written with binary values in host byte order, and
429# the magic number was a host "int", which might have been 16 bits
430# or 32 bits.  We don't say "PDP-11" or "VAX", as there might have
431# been ports to little-endian 16-bit-int or 32-bit-int platforms
432# (x86?) using some of those formats; if none existed, feel free
433# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
434# 32-bit.  There might have been big-endian ports of that sort as
435# well.
436#
4370	leshort		0177555		very old 16-bit-int little-endian archive
4380	beshort		0177555		very old 16-bit-int big-endian archive
4390	lelong		0177555		very old 32-bit-int little-endian archive
4400	belong		0177555		very old 32-bit-int big-endian archive
441
4420	leshort		0177545		old 16-bit-int little-endian archive
443>2	string		__.SYMDEF	random library
4440	beshort		0177545		old 16-bit-int big-endian archive
445>2	string		__.SYMDEF	random library
4460	lelong		0177545		old 32-bit-int little-endian archive
447>4	string		__.SYMDEF	random library
4480	belong		0177545		old 32-bit-int big-endian archive
449>4	string		__.SYMDEF	random library
450
451#
452# From "pdp" (but why a 4-byte quantity?)
453#
4540	lelong		0x39bed		PDP-11 old archive
4550	lelong		0x39bee		PDP-11 4.0 archive
456
457#
458# XXX - what flavor of APL used this, and was it a variant of
459# some ar archive format?  It's similar to, but not the same
460# as, the APL workspace magic numbers in pdp.
461#
4620	long		0100554		apl workspace
463
464#
465# System V Release 1 portable(?) archive format.
466#
4670	string		=<ar>		System V Release 1 ar archive
468!:mime	application/x-archive
469
470#
471# Debian package; it's in the portable archive format, and needs to go
472# before the entry for regular portable archives, as it's recognized as
473# a portable archive whose first member has a name beginning with
474# "debian".
475#
476# Update: Joerg Jenderek
477# URL: https://en.wikipedia.org/wiki/Deb_(file_format)
4780	string		=!<arch>\ndebian
479# https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html
480>14	string		-split	part of multipart Debian package
481!:mime	application/vnd.debian.binary-package
482# udeb is used for stripped down deb file
483!:ext	deb/udeb
484>14	string		-binary	Debian binary package
485!:mime	application/vnd.debian.binary-package
486# For ipk packager see also https://en.wikipedia.org/wiki/Opkg
487!:ext	deb/udeb/ipk
488# This should not happen
489>14	default		x	Unknown Debian package
490# NL terminated version; for most Debian cases this is 2.0 or 2.1 for split
491>68	string		>\0		(format %s)
492#>68	string		!2.0\n
493#>>68	string		x		(format %.3s)
494>68	string		=2.0\n
495# 2nd archive name=control archive name like control.tar.gz or control.tar.xz
496# or control.tar.zst
497>>72	string		>\0		\b, with %.15s
498# look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma}
499>>0	search/0x93e4f	data.tar.	\b, data compression
500# the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised
501# for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb
502>>>&0	string		x		%.2s
503# skip space (0x20 BSD) and slash (0x2f System V) character marking end of name
504>>>&2	ubyte		!0x20
505>>>>&-1	ubyte		!0x2f
506# display 3rd character of file name extension like 2 of bz2 or m of lzma
507>>>>>&-1	ubyte	x		\b%c
508>>>>>>&0	ubyte	!0x20
509>>>>>>>&-1	ubyte	!0x2f
510# display 4th character of file name extension like a of lzma
511>>>>>>>>&-1	ubyte	x		\b%c
512# split debian package case
513>68	string		=2.1\n
514# dpkg-1.18.25/dpkg-split/info.c
515# NL terminated ASCII package name like ckermit
516>>&0	string		x		\b, %s
517# NL terminated package version like 302-5.3
518>>>&1	string		x		%s
519# NL terminated MD5 checksum
520>>>>&1	string		x		\b, MD5 %s
521# NL terminated original package length
522>>>>>&1	string		x		\b, unsplitted size %s
523# NL terminated part length
524>>>>>>&1	string	x		\b, part length %s
525# NL terminated package part like n/m
526>>>>>>>&1	string	x		\b, part %s
527# NL terminated package architecture like armhf since dpkg 1.16.1 or later
528>>>>>>>>&1	string	x		\b, %s
529
530#
531# MIPS archive; they're in the portable archive format, and need to go
532# before the entry for regular portable archives, as it's recognized as
533# a portable archive whose first member has a name beginning with
534# "__________E".
535#
5360	string	=!<arch>\n__________E	MIPS archive
537!:mime	application/x-archive
538>20	string	U			with MIPS Ucode members
539>21	string	L			with MIPSEL members
540>21	string	B			with MIPSEB members
541>19	string	L			and an EL hash table
542>19	string	B			and an EB hash table
543>22	string	X			-- out of date
544
545#
546# BSD/SVR2-and-later portable archive formats.
547#
548# Update: Joerg Jenderek
549# URL:		http://fileformats.archiveteam.org/wiki/AR
550# Reference:	https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/
551# Note:		Mach-O universal binary in ./cafebabe is dependent
552# TODO:		unify current ar archive, MIPS archive, Debian package
553#		distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR;
554#		*.ar packages from *.a libraries. handle empty archive
5550	string		=!<arch>\n		current ar archive
556# print first and possibly second ar_name[16] for debugging purpose
557#>8			string	x	\b, 1st "%.16s"
558#>68			string	x	\b, 2nd "%.16s"
559!:mime	application/x-archive
560# a in most case for libraries; lib for Microsoft libraries; ar else cases
561!:ext	a/lib/ar
562>8	string		__.SYMDEF	random library
563# first member with long marked name __.SYMDEF SORTED implies BSD library
564>68	string		__.SYMDEF\ SORTED	random library
565# Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf
566# "archive file" entry moved from ./hp
567# LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture
568# LST header a_magic 0619h~relocatable library
569>68	belong 		0x020b0619	- PA-RISC1.0 relocatable library
570>68	belong	 	0x02100619	- PA-RISC1.1 relocatable library
571>68	belong 		0x02110619	- PA-RISC1.2 relocatable library
572>68	belong 		0x02140619	- PA-RISC2.0 relocatable library
573#EOF for common ar archives
574
575#
576# "Thin" archive, as can be produced by GNU ar.
577#
5780	string		=!<thin>\n	thin archive with
579>68	belong		0		no symbol entries
580>68	belong		1		%d symbol entry
581>68	belong		>1		%d symbol entries
582
5830	search/1	-h-		Software Tools format archive text
584
585# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
586#
587# The first byte is the magic (0x1a), byte 2 is the compression type for
588# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
589# filename of the first file (null terminated).  Since some types collide
590# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
591# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
5920	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
593!:mime	application/x-arc
5940	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
595!:mime	application/x-arc
5960	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
597!:mime	application/x-arc
5980	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
599!:mime	application/x-arc
6000	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
601!:mime	application/x-arc
6020	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
603!:mime	application/x-arc
604# [JW] stuff taken from idarc, obviously ARC successors:
6050	lelong&0x8080ffff	0x00000a1a	PAK archive data
606!:mime	application/x-arc
6070	lelong&0x8080ffff	0x0000141a	ARC+ archive data
608!:mime	application/x-arc
6090	lelong&0x8080ffff	0x0000481a	HYP archive data
610!:mime	application/x-arc
611
612# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
613# I can't create either SPARK or ArcFS archives so I have not tested this stuff
614# [GRR:  the original entries collide with ARC, above; replaced with combined
615#  version (not tested)]
616#0	byte		0x1a		RISC OS archive (spark format)
6170	string		\032archive	RISC OS archive (ArcFS format)
6180       string          Archive\000     RISC OS archive (ArcFS format)
619
620# All these were taken from idarc, many could not be verified. Unfortunately,
621# there were many low-quality sigs, i.e. easy to trigger false positives.
622# Please notify me of any real-world fishy/ambiguous signatures and I'll try
623# to get my hands on the actual archiver and see if I find something better. [JW]
624# probably many can be enhanced by finding some 0-byte or control char near the start
625
626# idarc calls this Crush/Uncompressed... *shrug*
6270	string	CRUSH Crush archive data
628# Squeeze It (.sqz)
6290	string	HLSQZ Squeeze It archive data
630# SQWEZ
6310	string	SQWEZ SQWEZ archive data
632# HPack (.hpk)
6330	string	HPAK HPack archive data
634# HAP
6350	string	\x91\x33HF HAP archive data
636# MD/MDCD
6370	string	MDmd MDCD archive data
638# LIM
6390	string	LIM\x1a LIM archive data
640# SAR
6413	string	LH5 SAR archive data
642# BSArc/BS2
6430	string	\212\3SB\020\0	BSArc/BS2 archive data
644# Bethesda Softworks Archive (Oblivion)
6450	string	BSA\0 		BSArc archive data
646>4	lelong	x		version %d
647# MAR
6482	string	=-ah MAR archive data
649# ACB
650#0	belong&0x00f800ff	0x00800000 ACB archive data
651# CPZ
652# TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
653# JRC
6540	string	JRchive JRC archive data
655# Quantum
6560	string	DS\0 Quantum archive data
657# ReSOF
6580	string	PK\3\6 ReSOF archive data
659# QuArk
6600	string	7\4 QuArk archive data
661# YAC
66214	string	YC YAC archive data
663# X1
6640	string	X1 X1 archive data
6650	string	XhDr X1 archive data
666# CDC Codec (.dqt)
6670	belong&0xffffe000	0x76ff2000 CDC Codec archive data
668# AMGC
6690	string	\xad6" AMGC archive data
670# NuLIB
6710	string	N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
672# PakLeo
6730	string	LEOLZW PAKLeo archive data
674# ChArc
6750	string	SChF ChArc archive data
676# PSA
6770	string	PSA PSA archive data
678# CrossePAC
6790	string	DSIGDCC CrossePAC archive data
680# Freeze
6810	string	\x1f\x9f\x4a\x10\x0a Freeze archive data
682# KBoom
6830	string	\xc2\xa8MP\xc2\xa8 KBoom archive data
684# NSQ, must go after CDC Codec
6850	string	\x76\xff NSQ archive data
686# DPA
6870	string	Dirk\ Paehl DPA archive data
688# BA
689# TODO: idarc says "bytes 0-2 == bytes 3-5"
690# TTComp
691# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive
692# Update: Joerg Jenderek, A Iooss
693# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others
6940	string	\0\6
695# look for first keyword of Panorama database *.pan
696>12	search/261	DESIGN
697# skip keyword with low entropy
698>12	default		x
699# skip DOS 2.0 backup id file, sequence 6 with many nils like BACKUPID_xx6.@@@ handled by ./msdos
700>>8	quad		!0
701>>>0	use	ttcomp
702# variant ASCII, 4K dictionary (strength=48=50-2). With strength=49 wrong order! WHY?
7030	string	\1\6
704# TODO:
705# skip VAX-order 68k Blit mpx/mux executable (strength=50) handled by ./blit
706!:strength	-2
707>0	use	ttcomp
7080	string	\0\5
709# skip some DOS 2.0 backup id file, sequence 5 with many nils like BACKUPID_075.@@@ handled by ./msdos
710>8	quad	!0
711>>0	use	ttcomp
7120	string	\1\5
713# TODO:
714# variant ASCII, 2K dictionary (strength=48=50-2). With strength=49 wrong order! WHY?
715# skip ctab data (strength=50) handled by ./ibm6000
716# skip locale data table (strength=50) handled by ./digital
717!:strength	-2
718>0	use	ttcomp
7190	string	\0\4
720# skip many Maple help database *.hdb with version tag handled by ./maple
721>1028	string	!version
722# skip veclib maple.hdb by looking for Mable keyword
723>>4	search/1091	Maple\040
724#>4	search/34090	Maple\040
725>>4	default		x
726# skip DOS 2.0-3.2 backed up sequence 4 with many nils like LOTUS5.RAR handled by ./msdos
727# skip xBASE Compound Index file *.CDX with many nils
728>>>0x54	quad		!0
729>>>>0	use	ttcomp
7300	string	\1\4
731# TODO:
732# skip shared library (strength=50) handled by ./ibm6000
733!:strength	-2
734# skip Commodore PET BASIC programs (Mastermind.prg) with last 3 nil bytes (\0~end of line followed by 0000h line offset)
735#>-4	ubelong		x	LAST_BYTES=%8.8x
736>-4	ubelong&0x00FFffFF	!0
737>>0	use	ttcomp
738# match end of TTComp to reduce false positives
739# see https://mark0.net/forum/index.php?topic=848
7400	name	ttcomp
741>-2	string	\x01\xff
742>>+0	use	ttcomp-display
743>-2	string	\x80\x7f
744>>+0	use	ttcomp-display
745>-2	string	\xc0\x3f
746>>+0	use	ttcomp-display
747>-2	string	\xe0\x1f
748>>+0	use	ttcomp-display
749>-2	string	\xf0\x0f
750>>+0	use	ttcomp-display
751>-2	string	\xf8\x07
752>>+0	use	ttcomp-display
753>-2	string	\xfc\x03
754>>+0	use	ttcomp-display
755>-2	string	\xfe\x01
756>>+0	use	ttcomp-display
757# display information of TTComp archive
7580	name	ttcomp-display
759# (version 5.25) labeled the entry as "TTComp archive data"
760>0	ubyte	x	TTComp archive data
761!:mime	application/x-compress-ttcomp
762# PBACKSCR.PI1
763!:ext	$xe/$ts/pi1/__d
764# compression type: 0~binary compression 1~ASCII compression
765>0	ubyte	0	\b, binary
766>0	ubyte	1	\b, ASCII
767# size of the dictionary:  4~1024 bytes 5~2048 bytes 6~4096 bytes
768>1	ubyte	4	\b, 1K dictionary
769>1	ubyte	5	\b, 2K dictionary
770>1	ubyte	6	\b, 4K dictionary
771
772# From:		Joerg Jenderek
773# URL:		https://en.wikipedia.org/wiki/Disk_Copy
774# reference:	http://nulib.com/library/FTN.e00005.htm
7750x52	ubeshort	0x0100
776# test for disk image size equal or above 400k
777>0x40	ubelong		>409599
778# test also for disk image size equal or below 1440k to skip
779# windows7en.mbr UNICODE.DAT
780#>>0x40	ubelong		<1474561
781# test now for "low" disk image size equal or below 64 MiB to skip
782# windows7en.mbr (B441BBAAh) UNICODE.DAT (0400AF05h)
783>>0x40	ubelong		<0x04000001
784# To skip Flags$StringJoiner.class with size 00106A61h test also for valid disk image sizes
785# 00064000 for  400k GCR disks	dc42-400k-gcr.trid.xml
786# 000c8000 for  800k GCR disks	dc42-800k-gcr.trid.xml
787# 000b4000 for  720k MFM disks	dc42-720k-mfm.trid.xml
788# 00168000 for 1440k MFM disks	dc42-1440k-mfm.trid.xml
789#	https://lisaem.sunder.net/LisaProjectDocs.txt
790# 00500000	05M	available
791# 00A00000	10M	available
792# 01800000	24M	possible
793# 02000000	32M	uncertain
794# 04000000	64M	uncertain
795>>>0x40	ubelong&0xf8003fFF	0
796# skip samples with invalid disk name length like:
797# 181 (biosmd80.rom) 202 (Flags$StringJoiner.class) 90 (UNICODE.DAT)
798>>>>0x0	ubyte			<64
799>>>>>0	use			dc42-floppy
800#	display information of Apple DiskCopy 4.2 floppy image
8010	name		dc42-floppy
802# disk name length; maximal 63
803#>0	ubyte	    	x	DISK NAME LENGTH %u
804# ASCII image pascal (maximal 63 bytes) name padded with NULs like:
805# "Microsoft Mail" "Disquette 2" "IIe Installer Disk"
806# "-lisaem.sunder.net hd-" (dc42-lisaem.trid.xml) "-not a Macintosh disk" (dc42-nonmac.trid.xml)
807>00	pstring/B	x	Apple DiskCopy 4.2 image %s
808#!:mime	application/octet-stream
809!:mime	application/x-dc42-floppy-image
810!:apple	dCpydImg
811# probably also img like: "Utilitaires 2.img" "Installation 7.img"
812!:ext	image/dc42/img
813# data size in bytes like: 409600 737280 819200 1474560
814>0x40	ubelong		x	\b, %u bytes
815# for debugging purpose size in hexadecimal
816#>0x40	ubelong		x	(%#8.8x)
817# tag size in bytes like: 0 (often) 2580h (PUID fmt/625) 4B00h (Microsoft Mail.image)
818>0x44	ubelong		>0	\b, %#x tag size
819# data checksum
820#>0x48	ubelong		x	\b, %#x checksum
821# tag checksum
822#>0x4c	ubelong		x	\b, %#x tag checksum
823# disk encoding like: 0 1 2 3 (PUID: fmt/625)
824>0x50	ubyte		0	\b, GCR CLV ssdd (400k)
825>0x50	ubyte		1	\b, GCR CLV dsdd (800k)
826>0x50	ubyte		2	\b, MFM CAV dsdd (720k)
827>0x50	ubyte		3	\b, MFM CAV dshd (1440k)
828>0x50	ubyte		>3	\b, %#x encoding
829# format byte like: 12h (Lisa 400K) 24h (400K Macintosh) 96h (800K Apple II disk)
830# 2 (Mac 400k "Disquette Installation 13.image")
831# 22h (double-sided MFM or Mac 800k "Disco 12.image" "IIe Installer Disk.image")
832>0x51	ubyte		x	\b, %#x format
833#>0x54	ubequad		x	\b, data %#16.16llx
834# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
8350	string	ESP ESP archive data
836# ZPack
8370	string	\1ZPK\1 ZPack archive data
838# Sky
8390	string	\xbc\x40 Sky archive data
840# UFA
8410	string	UFA UFA archive data
842# Dry
8430	string	=-H2O DRY archive data
844# FoxSQZ
8450	string	FOXSQZ FoxSQZ archive data
846# AR7
8470	string	,AR7 AR7 archive data
848# PPMZ
8490	string	PPMZ PPMZ archive data
850# MS Compress
851# Update: Joerg Jenderek
852# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression
853# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html
854# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z
8554	string	\x88\xf0\x27
856#		KWAJ variant
857>0	string	KWAJ		MS Compress archive data, KWAJ variant
858!:mime	application/x-ms-compress-kwaj
859# extension not working in version 5.32
860# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?'
861# file: line 284: Bad magic entry '   ??_'
862!:ext	??_
863# compression method (0-4)
864>>8	uleshort	x	\b, %u method
865# offset of compressed data
866>>10	uleshort	x	\b, %#x offset
867#>>(10.s)	uleshort	x
868#>>>&-6		string	x	\b, TEST extension %-.3s
869# header flags to mark header extensions
870>>12	uleshort	>0	\b, %#x flags
871# 4 bytes: decompressed length of file
872>>12	uleshort	&0x01
873>>>14	ulelong		x	\b, original size: %u bytes
874# 2 bytes: unknown purpose
875# 2 bytes: length of unknown data + mentioned bytes
876# 1-9 bytes: null-terminated file name
877# 1-4 bytes: null-terminated file extension
878>>12	uleshort	&0x08
879>>>12	uleshort				^0x01
880>>>>12		uleshort			^0x02
881>>>>>12			uleshort		^0x04
882>>>>>>12			uleshort	^0x10
883>>>>>>>14				string	x	\b, %-.8s
884>>>>>>12			uleshort	&0x10
885>>>>>>>14				string	x	\b, %-.8s
886>>>>>>>>&1				string	x	\b.%-.3s
887>>>>>12			uleshort		&0x04
888>>>>>>12			uleshort	^0x10
889>>>>>>>(14.s)			uleshort	x
890>>>>>>>>&14				string	x	\b, %-.8s
891>>>>>>12			uleshort	&0x10
892>>>>>>>(14.s)			uleshort	x
893>>>>>>>>&14				string	x	\b, %-.8s
894>>>>>>>>>&1				string	x	\b.%-.3s
895>>>>12		uleshort			&0x02
896>>>>>12			uleshort		^0x04
897>>>>>>12			uleshort	^0x10
898>>>>>>>16				string	x	\b, %-.8s
899>>>>>>12			uleshort	&0x10
900>>>>>>>16				string	x	\b, %-.8s
901>>>>>>>>&1				string	x	\b.%-.3s
902>>>>>12			uleshort		&0x04
903>>>>>>12			uleshort	^0x10
904>>>>>>>(16.s)			uleshort	x
905>>>>>>>>&16				string	x	\b, %-.8s
906>>>>>>12			uleshort	&0x10
907#>>>>>>>(16.s)			uleshort	x
908>>>>>>>&16				string	x	%-.8s
909>>>>>>>>&1				string	x	\b.%-.3s
910>>>12	uleshort				&0x01
911>>>>12		uleshort			^0x02
912>>>>>12			uleshort		^0x04
913>>>>>>12			uleshort	^0x10
914>>>>>>>18				string	x	\b, %-.8s
915>>>>>>12			uleshort	&0x10
916>>>>>>>18				string	x	\b, %-.8s
917>>>>>>>>&1				string	x	\b.%-.3s
918>>>>>12			uleshort		&0x04
919>>>>>>12			uleshort	^0x10
920>>>>>>>(18.s)			uleshort	x
921>>>>>>>>&18				string	x	\b, %-.8s
922>>>>>>12			uleshort	&0x10
923>>>>>>>(18.s)			uleshort	x
924>>>>>>>>&18				string	x	\b, %-.8s
925>>>>>>>>>&1				string	x	\b.%-.3s
926>>>>12		uleshort			&0x02
927>>>>>12			uleshort		^0x04
928>>>>>>12			uleshort	^0x10
929>>>>>>>20				string	x	\b, %-.8s
930>>>>>>12			uleshort	&0x10
931>>>>>>>20				string	x	\b, %-.8s
932>>>>>>>>&1				string	x	\b.%-.3s
933>>>>>12			uleshort		&0x04
934>>>>>>12			uleshort	^0x10
935>>>>>>>(20.s)			uleshort	x
936>>>>>>>>&20				string	x	\b, %-.8s
937>>>>>>12			uleshort	&0x10
938>>>>>>>(20.s)			uleshort	x
939>>>>>>>>&20				string	x	\b, %-.8s
940>>>>>>>>>&1				string	x	\b.%-.3s
941# 2 bytes: length of data + mentioned bytes
942#
943#		SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ
944# URL:		http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression
945# Reference:	http://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html
946#		http://mark0.net/download/triddefs_xml.7z/defs/s/szdd.trid.xml
947# Note:		called "Microsoft SZDD compressed (Haruhiko Okumura's LZSS)" by TrID
948#		verfied by 7-Zip `7z l -tMsLZ -slt *.??_` as MsLZ
949#		`deark -l -m lzss_oku -d2 setup-1-41.bin` as "LZSS.C by Haruhiko Okumura"
950>0	string	SZDD		MS Compress archive data, SZDD variant
951# 2nd part of signature
952#>>4	ubelong	0x88F02733	\b, SIGNATURE OK
953!:mime	application/x-ms-compress-szdd
954!:ext	??_
955# The character missing from the end of the filename (0=unknown)
956>>9	string	>\0		\b, %-.1s is last character of original name
957# https://www.betaarchive.com/forum/viewtopic.php?t=26161
958# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e
959>>8	string	!A		\b, %-.1s method
960>>10	ulelong	>0		\b, original size: %u bytes
961# Summary:	InstallShield archive with SZDD compressed
962# URL:		https://community.flexera.com/t5/InstallShield-Knowledge-Base/InstallShield-Redistributable-Files/ta-p/5647
963# From:		Joerg Jenderek
9641	search/48/bs	SZDD\x88\xF0\x27\x33	InstallShield archive
965#!:mime	application/octet-stream
966!:mime	application/x-installshield-compress-szdd
967!:ext	ibt
968# name of compressed archive member like: setup.dl_ _setup7int.dl_ _setup2k.dl_ _igdi.dl_ cabinet.dl_
969>0	string	x		%s
970# name of uncompressed archive member like: setup.dll _Setup.dll IGdi.dll CABINET.DLL
971>>&1	string	x		(%s)
972# probably version like: 9.0.0.333 9.1.0.429 11.50.0.42618
973>>>&1	string	x		\b, version %s
974# SZDD member length like: 168048 169333 181842
975>>>>&1	string	x		\b, %s bytes
976# MS Compress archive data
977#>&0	string		SZDD	\b, SIGNATURE FOUND
978>&0	indirect	x
979#		QBasic SZDD variant
9803	string	\x88\xf0\x27
981>0	string	SZ\x20		MS Compress archive data, QBasic variant
982!:mime	application/x-ms-compress-sz
983!:ext	??$
984>>8	ulelong	>0		\b, original size: %u bytes
985
986# Summary:	lzss compressed/EDI Pack
987# From:		Joerg Jenderek
988# URL:		http://fileformats.archiveteam.org/wiki/EDI_Install_packed_file
989# Note:		called "EDI Install LZS compressed data" by TrID and verified by
990#		command like `deark -l -m edi_pack -d2 BOOK01A.IC$` as "EDI Pack LZSS1"
9910	string					EDILZSS
992>7	string					1
993# look for point character before orginal file name extension
994>>8	search/9/b				.
995# check suffix of possible orginal file anme
996#>>>&0		ubelong				x	SUFFIX=%8.8x
997# samples without valid character after point in original file name field like: FENNEL.LZS PLANTAIN.LZS
998>>>&0		ubyte				<0x20
999>>>>0			use				edi-lzs
1000# samples with valid character after point in original file name field
1001>>>&0		ubyte				>0x1F
1002# check 2nd charcter of suffix
1003#>>>>&0			ubyte	x			2ND_SUFFIX=%x
1004# sample with one valid character after point followed by \0 in original file name field like: SPELMATE.H$
1005>>>>&0			ubyte			=0
1006>>>>>0				use			edi-pack
1007>>>>&0			ubyte			>0x1F
1008# check 3rd charcter of suffix
1009#>>>>>&0				ubyte		x	3RD_SUFFIX=%x
1010# no sample with 2 valid characters after point followed by \0 in original file name field
1011>>>>>&0				ubyte		=0
1012>>>>>>0					use		edi-pack
1013# samples with valid 3rd character after point in original file name field
1014>>>>>&0				ubyte		>0x1F
1015# sample with 3 valid character after point followed by \0 in original file name field like: BOOK01A.IC$ CTL3D.DL$
1016>>>>>>&0				ubyte	=0
1017>>>>>>>0					use	edi-pack
1018# sample with 3 valid character after point followed by no \0 in original file name field like: HERBTEXT.LZS
1019>>>>>>&0				ubyte	!0
1020>>>>>>>0					use	edi-lzs
1021# no sample with invalid 3rd character after point in original file name field
1022>>>>>&0				default		x
1023>>>>>>0					use		edi-lzs
1024# sample with invalid 2nd character after point in original file name field like: LACERATE.LZS SPLINTER.LZS
1025>>>>&0			default			x
1026>>>>>0	use						edi-lzs
1027# sample without point character in original file name field like GUNSHOT.LZS
1028>>8	default					x
1029>>>0		use					edi-lzs
1030# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/e/edi-lzss2.trid.xml
1031# Note:		called "EDI Install Pro LZSS2 compressed data" by TrID and verified by
1032#		command like `deark -l -m edi_pack -d2 4WAY.WA$` as "EDI Pack LZSS2"
1033>7	string			2			EDI LZSS2 packed
1034#!:mime	application/octet-stream
1035!:mime	application/x-edi-pack-lzss
1036# the name of a compressed file often ends in character '$' or '_'
1037!:ext	??$/??_
1038# original filename, NUL-terminated, padded to 13 bytes like: mci.vbx 4way.wav skymap.exe cmdialog.vbx
1039>>8		string		x			"%-0.13s"
1040# original file size, as a 4-byte integer.
1041>>21		ulelong		x			\b, %u bytes
1042# compressed data like: ff5249464606ec00 ff4d5aa601010000
1043>>>25		ubequad		x			\b, data %#16.16llx...
10440	name		edi-pack
1045# Note:		verified by command like `deark -l -d2 SPELMATE.H$` as "EDI Pack LZSS1"
1046# original filename, NUL-terminated, padded to 13 bytes like: ctl3d.dll spelmate.h filemenu.rc owl.def index-it.exe
1047# but not like \377Aloe.lzs\273 (HERBTEXT.LZS)
1048>8	string		x				EDI LZSS packed "%-.13s"
1049#!:mime	application/octet-stream
1050!:mime	application/x-edi-pack-lzss
1051# the name of a compressed file often ends in character '$' or '_'
1052!:ext	??$/?$
1053# compressed data like: f7000001eff02020 ff4d5aa900020000 ff2f2a207370656c
1054>21	ubequad		x				\b, data %#16.16llx...
1055# URL:		http://fileformats.archiveteam.org/wiki/EDI_LZSSLib
1056# Note:		verified partly by command like `deark -l -m edi_pack -d2 GUNSHOT.LZS` as "EDI LZSSLib"
10570	name		edi-lzs
1058# Note:		verified by command like `deark -l -d2 GUNSHOT.LZS` as "EDI LZSSLib"
1059# no original filename looks like: \277BM\226.\0 \277BM.n\001 \277BM\226.\0 \277BM.g\001 \377Aloe.lzs\273
1060>8	string		x				EDI LZSSLib packed
1061#!:mime	application/octet-stream
1062!:mime	application/x-edi-pack-lzss
1063# The name of a compressed file ends with LZS suffix
1064!:ext	lzs
1065# compressed data like: bf424df6e10100f3 ff416c6f652e6c7a ff416c6f652e6c7a
1066>8	ubequad		x				\b, data %#16.16llx...
1067
1068# Summary:	CAZIP compressed file
1069# From:		Joerg Jenderek
1070# URL:		http://fileformats.archiveteam.org/wiki/CAZIP
1071# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/c/caz.trid.xml
1072# Note:		Format is distinct from CAZIPXP compressed
10730	string	\x0D\x0A\x1ACAZIP	CAZIP compressed file
1074#!:mime	application/octet-stream
1075!:mime	application/x-compress-cazip
1076# like: BLINKER.WR_ CLIPDEFS._ CAOSETUP.EX_ CLIPPER.EX_ FILEIO.C_
1077!:ext	??_/?_/_
1078
1079# Summary:	FTCOMP compressed archive
1080# From:		Joerg Jenderek
1081# URL:		http://fileformats.archiveteam.org/wiki/FTCOMP
1082# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ftcomp.trid.xml
1083# Note:		called by TrID "FTCOMP compressed archive"
1084#		extracted by `unpack seahelp.hl_`
108524	string/b	FTCOMP		FTCOMP compressed archive
1086#!:mime	application/octet-stream
1087!:mime	application/x-compress-ftcomp
1088!:ext	??_/??@/dll/drv/pk2/
1089# probably A596FDFF magic at the beginning
1090>0	ubelong		!0xA596FDFF	\b, at beginning %#x
1091# probably original file name with directory like: \OS2\unpack.exe \SYSTEM\8514.DRV MAHJONGG.EXE
1092>41	string		x		"%s"
1093
1094# MP3 (archiver, not lossy audio compression)
10950	string	MP3\x1a MP3-Archiver archive data
1096# ZET
10970	string	OZ\xc3\x9d ZET archive data
1098# TSComp
1099# Update:	Joerg Jenderek 2023 Nov
1100# URL:		http://fileformats.archiveteam.org/wiki/TSComp
1101# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/t/tscomp.trid.xml
1102#		https://entropymine.com/deark/releases/deark-1.6.5.tar.gz
1103#		deark-1.6.5/modules/installshld.c
1104# Note:		called "TSComp compressed data" by TrID
1105#		verified by command like `deark -m tscomp -l -d2 MAKERRES.DL$`
1106#		The "13" might be a version number. The "8c" is a mystery
11070	string	\x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive
1108#!:mime	application/octet-stream
1109!:mime	application/x-tscomp-compressed
1110# filename style: 0~old version 1~without wildcard 2~with wildcard
1111#>0x08	ubyte		x				\b, filename style %u
1112>0x08	ubyte		0				data, filename style 0
1113# no example found
1114!:ext	??$
1115#>0x08	ubyte		1				data, without wildcard
1116>0x08	ubyte		1				data
1117# for single-file archives, often the last letter of the filename extension is changed to "$"; but also name like: BUILD3.BM!
1118!:ext	??$/??!
1119>0x08	ubyte		2				data, with wildcard
1120# for multi-file archives common extensions seem to be .lib and .cmp, but also names like: SAMPMIF$ OTDATA.$$$ TWOFILES.TSC WIN.PAK
1121!:ext	/lib/cmp/$$$/tsc/pak
1122# fnlen; pascal string length; original 1st file name like: CHFORMAT.MML
1123>0x1c	pstring		x				\b, %s
1124# md->fi->timestamp
1125>0x16	lemsdosdate	x				\b, modified %s
1126>0x18	lemsdostime	x				%s
1127# 1st compressed size: like 180 (SAMPMML$$)
1128>0x0E	ulelong		x				\b, compressed size %u
1129# de_dbg_indent(c, 1): like: 12h
1130#>0x0d	ubyte		x				b, at 0xD %#x
1131# like: 0
1132#>0x1A	ubeshort	x				\b, at 0x1A %#x
1133# 2nd member offset
1134#>0x12	ulelong		x				\b, next offset %#x
1135>0x12	ulelong		>0
1136# original 2nd archive member name like: FORMATS.MML
1137>>(0x12.l+15)	pstring	x				\b, %s ...
1138# ARQ
11390	string	gW\4\1 ARQ archive data
1140# Squash
11413	string	OctSqu Squash archive data
1142# Terse
11430	string	\5\1\1\0 Terse archive data
1144# UHarc
11450	string	UHA UHarc archive data
1146# ABComp
11470	string	\2AB ABComp archive data
11480	string	\3AB2 ABComp archive data
1149# CMP
11500	string	CO\0 CMP archive data
1151# Splint
11520	string	\x93\xb9\x06 Splint archive data
1153# InstallShield
11540	string	\x13\x5d\x65\x8c InstallShield Z archive Data
1155# Gather
11561	string	GTH Gather archive data
1157# BOA
11580	string	BOA BOA archive data
1159# RAX
11600	string	ULEB\xa RAX archive data
1161# Xtreme
11620	string	ULEB\0 Xtreme archive data
1163# Pack Magic
11640	string	@\xc3\xa2\1\0 Pack Magic archive data
1165# BTS
11660	belong&0xfeffffff	0x1a034465 BTS archive data
1167# ELI 5750
11680	string	Ora\  ELI 5750 archive data
1169# QFC
11700	string	\x1aFC\x1a QFC archive data
11710	string	\x1aQF\x1a QFC archive data
1172# PRO-PACK https://www.segaretro.org/Rob_Northen_compression
11730	string	RNC
1174>3	byte	1	PRO-PACK archive data (compression 1)
1175>3	byte	2	PRO-PACK archive data (compression 2)
1176# 777
11770	string	777 777 archive data
1178# LZS221
11790	string	sTaC LZS221 archive data
1180# HPA
11810	string	HPA HPA archive data
1182# Arhangel
11830	string	LG Arhangel archive data
1184# EXP1, uses bzip2
11850	string	0123456789012345BZh EXP1 archive data
1186# IMP
11870	string	IMP\xa IMP archive data
1188# NRV
11890	string	\x00\x9E\x6E\x72\x76\xFF NRV archive data
1190# Squish
11910	string	\x73\xb2\x90\xf4 Squish archive data
1192# Par
11930	string	PHILIPP Par archive data
11940	string	PAR Par archive data
1195# HIT
11960	string	UB HIT archive data
1197# SBX
11980	belong&0xfffff000	0x53423000 SBX archive data
1199# NaShrink
12000	string	NSK NaShrink archive data
1201# SAPCAR
12020	string	#\ CAR\ archive\ header SAPCAR archive data
12030	string	CAR\ 2.00 SAPCAR archive data
12040	string	CAR\ 2.01 SAPCAR archive data
1205#!:mime	application/octet-stream
1206!:mime	application/vnd.sar
1207!:ext	sar
1208# Disintegrator
12090	string	DST Disintegrator archive data
1210# ASD
12110	string	ASD ASD archive data
1212# InstallShield CAB
1213# Update:	Joerg Jenderek at Nov 2021
1214# URL:		https://en.wikipedia.org/wiki/InstallShield
1215# Reference:	https://github.com/twogood/unshield/blob/master/lib/cabfile.h
1216# Note:		Not compatible with Microsoft CAB files
1217# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield.trid.xml
1218# CAB_SIGNATURE 0x28635349
12190	string	ISc( InstallShield
1220#!:mime		application/octet-stream
1221!:mime		application/x-installshield
1222# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield-hdr.trid.xml
1223>16	ulelong	!0	setup header
1224# like: _SYS1.HDR _USER1.HDR data1.hdr
1225!:ext	hdr
1226>16	ulelong	=0	CAB
1227# like: _SYS1.CAB _USER1.CAB DATA1.CAB  data2.cab
1228!:ext	cab
1229# https://github.com/twogood/unshield/blob/master/lib/helper.c
1230# version like:	0x1005201 0x100600c 0x1007000 0x1009500
1231#		0x2000578 0x20005dc 0x2000640 0x40007d0 0x4000834
1232>4	ulelong	x	\b, version %#x
1233# volume_info like: 0
1234>8	ulelong	!0	\b, volume_info %#x
1235# cab_descriptor_offset like: 0x200
1236>12	ulelong	!0x200	\b, offset %#x
1237#>0x200	ubequad	x	\b, at 0x200 %#16.16llx
1238# cab_descriptor_size like: 0 (*.cab) BD5 C8B DA5 E2A E36 116C 251D 4DA9 56F0 5CC2 6E4B 777D 779E 1F7C2
1239>16	ulelong	!0	\b, descriptor size %#x
1240>(12.l+40)      lelong  x               ]b, %u files
1241
1242# TOP4
12430	string	T4\x1a TOP4 archive data
1244# BatComp left out: sig looks like COM executable
1245# so TODO: get real 4dos batcomp file and find sig
1246# BlakHole
12470	string	BH\5\7 BlakHole archive data
1248# BIX
12490	string	BIX0 BIX archive data
1250# ChiefLZA
12510	string	ChfLZ ChiefLZA archive data
1252# Blink
12530	string	Blink Blink archive data
1254# Logitech Compress
12550	string	\xda\xfa Logitech Compress archive data
1256# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
12571	string	(C)\ STEPANYUK ARS-Sfx archive data
1258# AKT/AKT32
12590	string	AKT32 AKT32 archive data
12600	string	AKT AKT archive data
1261# NPack
12620	string	MSTSM NPack archive data
1263# PFT
12640	string	\0\x50\0\x14 PFT archive data
1265# SemOne
12660	string	SEM SemOne archive data
1267# PPMD
12680	string	\x8f\xaf\xac\x84 PPMD archive data
1269# FIZ
12700	string	FIZ FIZ archive data
1271# MSXiE
12720	belong&0xfffff0f0	0x4d530000 MSXiE archive data
1273# DeepFreezer
12740	belong&0xfffffff0	0x797a3030 DeepFreezer archive data
1275# DC
12760	string	=<DC- DC archive data
1277# TPac
12780	string	\4TPAC\3 TPac archive data
1279# Ai
1280# Update:	Joerg Jenderek
1281# URL:		http://fileformats.archiveteam.org/wiki/Ai_Archiver
12820	string	Ai\1\1\0 Ai archive data
1283#!:mime	application/octet-stream
1284!:mime	application/x-compress-ai
1285!:ext	ai
12860	string	Ai\1\0\0 Ai archive data
1287#!:mime	application/octet-stream
1288!:mime	application/x-compress-ai
1289!:ext	ai
1290# Ai32
1291# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ai.trid.xml
1292# Note:		called "Ai Archivator compressed archive" by TrID
12930	string	Ai\2\0 Ai32 archive data
1294#!:mime	application/octet-stream
1295!:mime	application/x-compress-ai
1296!:ext	ai
1297# original file name
1298>8	pstring/h x	"%s"
1299# according to TrID the next 3 bytes are nil
1300>5	ubyte	!0	\b, at 5 %#x
1301>6	ubyte	!0	\b, at 6 %#x
1302>7	ubyte	!0	\b, at 7 %#x
1303# the fourth byte with value 0 is probably a flag for "non solid" mode
1304#>3	ubyte	=0x00	\b, unsolid mode
13050	string	Ai\2\1 Ai32 archive data
1306#!:mime	application/octet-stream
1307!:mime	application/x-compress-ai
1308!:ext	ai
1309# original file name
1310>8	pstring/h x	"%s"
1311# the fourth byte with value 0x01 is probably a flag for "solid" mode; this is not the default
1312>3	ubyte	=0x01	\b, solid mode
1313# SBC
13140	string	SBC SBC archive data
1315# Ybs
13160	string	YBS Ybs archive data
1317# DitPack
13180	string	\x9e\0\0 DitPack archive data
1319# DMS
13200	string	DMS! DMS archive data
1321# EPC
13220	string	\x8f\xaf\xac\x8c EPC archive data
1323# VSARC
13240	string	VS\x1a VSARC archive data
1325# PDZ
13260	string	PDZ PDZ archive data
1327# ReDuq
13280	string	rdqx ReDuq archive data
1329# GCA
13300	string	GCAX GCA archive data
1331# PPMN
13320	string	pN PPMN archive data
1333# WinImage
13343	string	WINIMAGE WinImage archive data
1335# Compressia
13360	string	CMP0CMP Compressia archive data
1337# UHBC
13380	string	UHB UHBC archive data
1339# WinHKI
13400	string	\x61\x5C\x04\x05 WinHKI archive data
1341# WWPack data file
13420	string	WWP WWPack archive data
1343# BSN (BSA, PTS-DOS)
13440	string	\xffBSG BSN archive data
13451	string	\xffBSG BSN archive data
13463	string	\xffBSG BSN archive data
13471	string	\0\xae\2 BSN archive data
13481	string	\0\xae\3 BSN archive data
13491	string	\0\xae\7 BSN archive data
1350# AIN
13510	string	\x33\x18 AIN archive data
13520	string	\x33\x17 AIN archive data
1353# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
1354# SZip (TODO: doesn't catch all versions)
13550	string	SZ\x0a\4 SZip archive data
1356# XPack DiskImage
1357# *.XDI updated by Joerg Jenderek Sep 2015
1358# ftp://ftp.sac.sk/pub/sac/pack/0index.txt
1359# GRR: this test is still too general as it catches also text files starting with jm
13600	string	jm
1361# only found examples with this additional characteristic 2 bytes
1362>2	string	\x2\x4	Xpack DiskImage archive data
1363#!:ext xdi
1364# XPack Data
1365# *.xpa updated by Joerg Jenderek Sep 2015
1366# ftp://ftp.elf.stuba.sk/pub/pc/pack/
13670	string	xpa	XPA
1368!:ext	xpa
1369# XPA32
1370# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
1371# created by XPA32.EXE version 1.0.2 for Windows
1372>0	string	xpa\0\1 \b32 archive data
1373# created by XPACK.COM version 1.67m or 1.67r with short 0x1800
1374>3	ubeshort	!0x0001	\bck archive data
1375# XPack Single Data
1376# changed by Joerg Jenderek Sep 2015 back to like in version 5.12
1377# letter 'I'+ acute accent is equivalent to \xcd
13780	string	\xcd\ jm	Xpack single archive data
1379#!:mime	application/x-xpa-compressed
1380!:ext xpa
1381
1382# TODO: missing due to unknown magic/magic at end of file:
1383#DWC
1384#ARG
1385#ZAR
1386#PC/3270
1387#InstallIt
1388#RKive
1389#RK
1390#XPack Diskimage
1391
1392# These were inspired by idarc, but actually verified
1393# Dzip archiver (.dz)
1394# Update: Joerg Jenderek
1395# URL: http://speeddemosarchive.com/dzip/
1396# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c
1397# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt
13980	string	DZ
1399# latest version is 2.9 dated 7 may 2003
1400>2	byte	<4 Dzip archive data
1401!:mime	application/x-dzip
1402!:ext	dz
1403>>2	byte	x \b, version %i
1404>>3	byte	x \b.%i
1405>>4	ulelong	x \b, offset %#x
1406>>8	ulelong	x \b, %u files
1407# ZZip archiver (.zz)
14080	string	ZZ\ \0\0 ZZip archive data
14090	string	ZZ0 ZZip archive data
1410# PAQ archiver (.paq)
14110	string	\xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
14120	string	PAQ PAQ archive data
1413>3	byte&0xf0	0x30
1414>>3	byte	x (v%c)
1415# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
1416# Update:	Joerg Jenderek
1417# URL:		http://fileformats.archiveteam.org/wiki/JAR_(ARJ_Software)
1418# reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jar.trid.xml
1419#		https://www.sac.sk/download/pack/jar102x.exe/TECHNOTE.DOC
1420# Note:		called "JAR compressed archive" by TrID
14210xe	string	\x1aJar\x1b JAR (ARJ Software, Inc.) archive data
1422#!:mime	application/octet-stream
1423!:mime	application/x-compress-j
1424>0	ulelong	x		\b, CRC32 %#x
1425# standard suffix is ".j"; for multi volumes following order j01 j02 ... j99 100 ... 990
1426!:ext	j/j01/j02
1427# URL:		http://fileformats.archiveteam.org/wiki/JARCS
1428# reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jarcs.trid.xml
1429# Note:		called "JARCS compressed archive" by TrID
14300	string	JARCS JAR (ARJ Software, Inc.) archive data
1431#!:mime	application/octet-stream
1432!:mime	application/x-compress-jar
1433!:ext	jar
1434
1435# ARJ archiver (jason@jarthur.Claremont.EDU)
1436# URL:		http://fileformats.archiveteam.org/wiki/ARJ
1437# reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-arj.trid.xml
1438#		https://github.com/FarGroup/FarManager/
1439#		blob/master/plugins/multiarc/arc.doc/arj.txt
1440# Note:		called "ARJ compressed archive" by TrID and
1441#		"ARJ File Format" by DROID via PUID fmt/610
1442#		verified by `7z l -tarj PHRACK1.ARJ` and
1443#		`arj.exe l TEST-hk9.ARJ`
14440	leshort		0xea60
1445# skip DROID fmt-610-signature-id-946.arj by check for valid file type of main header
1446>0xA	ubyte		2
1447>>0	use		arj-archive
14480	name		arj-archive
1449>0	leshort		x		ARJ archive
1450!:mime	application/x-arj
1451# look for terminating 0-character of filename
1452>0x26	search/1024	\0
1453# file name extension is normally .arj but not for parts of multi volume
1454#>>&-5	string		x		extension %.4s
1455>>&-5	string/c	.arj		data
1456!:ext	arj
1457>>&-5	default		x
1458# for multi volume first name is archive.arj then following parts archive.a01 archive.a02 ...
1459>>>8	byte		&0x04		data
1460!:ext	a01/a02
1461# for SFX first name is archive.exe then following parts archive.e01 archive.e02 ...
1462>>>8	byte		^0x04		data, SFX multi-volume
1463!:ext	e01/e02
1464# basic header size like: 0x002b 0x002c 0x04e0 0x04e3 0x04e7
1465#>2	uleshort	x		basic header size %#4.4x
1466# next fragment content like: 0x0a200a003a8fc713 0x524a000010bb3471 0x524a0000c73c70f9
1467#>(2.s)	ubequad		x		NEXT FRAGMENT CONTENT %#16.16llx
1468# first_hdr_size; seems to be same as basic header size
1469#>2	uleshort	x		1st header size %#x
1470# archiver version number like: 3 4 6 11 102
1471>5	byte		x		\b, v%d
1472# minimum archiver version to extract like: 1
1473>6	ubyte		!1		\b, minimum %u to extract
1474# FOR DEBUGGING
1475#>8	byte		x		\b, FLAGS %#x
1476# GARBLED_FLAG1; garble with password; g switch
1477>8	byte		&0x01		\b, password-protected
1478# encryption version: 0~old  1~old 2~new 3~reserved 4~40 bit key GOST
1479>>0x20	ubyte		x		(v%u)
1480#>8	byte		&0x02		\b, secured
1481# ANSIPAGE_FLAG; indicates ANSI codepage used by ARJ32; hy switch
1482>8	byte		&0x02		\b, ANSI codepage
1483# VOLUME_FLAG indicates presence of succeeding volume; but apparently not for SFX
1484>8	byte		&0x04		\b, multi-volume
1485#>8	byte		&0x08		\b, file-offset
1486# ARJPROT_FLAG; build with data protection record; hk switch
1487>8	byte		&0x08		\b, recoverable
1488# arj protection factor; maximal 10; switch hky -> factor=y+1
1489>>0x22	byte		x		(factor %u)
1490>8	byte		&0x10		\b, slash-switched
1491# BACKUP_FLAG; obsolete
1492>8	byte		&0x20		\b, backup
1493# SECURED_FLAG;
1494>8	byte		&0x40		\b, secured,
1495# ALTNAME_FLAG; indicates dual-name archive
1496>8	byte		&0x80		\b, dual-name
1497# security version; 0~old 2~current
1498>9	ubyte		!0
1499>>9	ubyte		!2		\b, security version %u
1500# file type; 2 in main header; 0~binary 1~7-bitText 2~comment 3~directory 4~VolumeLabel 5=ChapterLabel
1501>0xA	ubyte		!2		\b, file type %u
1502# date+time when original archive was created in MS-DOS format
1503>0xE	lemsdosdate	x		\b, created %s
1504>0xC	lemsdostime	x		%s
1505# Archive mod time, added in format v6 (ARJ 2.39c)
1506>5	ubyte		>5
1507>>0x10	ulelong		>0		\b, modified
1508>>>0x12	lemsdosdate	x		%s
1509>>>0x10	lemsdostime	x		%s
1510
1511# FOR DEBUGGING
1512#>0x12	uleshort	x		RAW DATE %#4.4x
1513#>0x10	uleshort	x		RAW TIME %#4.4x
1514# archive size (currently used only for secured archives); MAYBE?
1515#>0x14	ulelong		!0		\b, file size %u
1516# security envelope file position; MAYBE?
1517#>0x18	ulelong		!0		\b, at %#x security envelope
1518# filespec position in filename; WHAT IS THAT?
1519#>0x1C	uleshort	>0		\b, filespec position %#x
1520# length in bytes of security envelope data like: 2CAh 301h 364h 471h
1521>0x1E	uleshort	!0		\b, security envelope length %#x
1522# last chapter like: 0 1
1523>0x21	ubyte		!0		\b, last chapter %u
1524# filename (null-terminated string); sometimes at 0x26 when 4 bytes for extra data
1525>34	byte		x		\b, original name:
1526# with extras data
1527>34	byte		<0x0B
1528>>38	string		x		%s
1529# without extras data
1530>34	byte		>0x0A
1531>>34	string		x		%s
1532# host OS: 0~MSDOS ... 11~WIN32
1533>7	byte		0		\b, os: MS-DOS
1534>7	byte		1		\b, os: PRIMOS
1535>7	byte		2		\b, os: Unix
1536>7	byte		3		\b, os: Amiga
1537>7	byte		4		\b, os: Macintosh
1538>7	byte		5		\b, os: OS/2
1539>7	byte		6		\b, os: Apple ][ GS
1540>7	byte		7		\b, os: Atari ST
1541>7	byte		8		\b, os: NeXT
1542>7	byte		9		\b, os: VAX/VMS
1543>7	byte		10		\b, os: WIN95
1544>7	byte		11		\b, os: WIN32
1545# [JW] idarc says this is also possible
15462	leshort		0xea60		ARJ archive data
1547#2	leshort		0xea60
1548#>2	use		arj-archive
1549
1550# HA archiver (Greg Roelofs, newt@uchicago.edu)
1551# This is a really bad format. A file containing HAWAII will match this...
1552#0	string		HA		HA archive data,
1553#>2	leshort		=1		1 file,
1554#>2	leshort		>1		%u files,
1555#>4	byte&0x0f	=0		first is type CPY
1556#>4	byte&0x0f	=1		first is type ASC
1557#>4	byte&0x0f	=2		first is type HSC
1558#>4	byte&0x0f	=0x0e		first is type DIR
1559#>4	byte&0x0f	=0x0f		first is type SPECIAL
1560# suggestion: at least identify small archives (<1024 files)
15610  belong&0xffff00fc 0x48410000 HA archive data
1562>2	leshort		=1		1 file,
1563>2	leshort		>1		%u files,
1564>4	byte&0x0f	=0		first is type CPY
1565>4	byte&0x0f	=1		first is type ASC
1566>4	byte&0x0f	=2		first is type HSC
1567>4	byte&0x0f	=0x0e		first is type DIR
1568>4	byte&0x0f	=0x0f		first is type SPECIAL
1569
1570# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
15710	string		HPAK		HPACK archive data
1572
1573# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
15740	string		\351,\001JAM\ 		JAM archive,
1575>7	string		>\0			version %.4s
1576>0x26	byte		=0x27			-
1577>>0x2b	string          >\0			label %.11s,
1578>>0x27	lelong		x			serial %08x,
1579>>0x36	string		>\0			fstype %.8s
1580
1581# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
1582# Update: Joerg Jenderek
1583# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
1584# Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html
1585#
1586#	check and display information of lharc (LHa,PMarc) file
15870	name				lharc-file
1588# check 1st character of method id like -lz4- -lh5- or -pm2-
1589>2	string		-
1590# check 5th character of method id
1591>>6	string		-
1592# check header level 0 1 2 3
1593>>>20	ubyte		<4
1594# check 2nd, 3th and 4th character of method id
1595>>>>3	regex		\^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1)		\b
1596!:mime	application/x-lzh-compressed
1597# creator type "LHA "
1598!:apple	????LHA
1599# display archive type name like "LHa/LZS archive data" or "LArc archive"
1600>>>>>2	string		-lz		\b
1601!:ext	lzs
1602# already known  -lzs- -lz4- -lz5- with old names
1603>>>>>>2	string	-lzs		LHa/LZS archive data
1604>>>>>>3	regex	\^lz[45]	LHarc 1.x archive data
1605# missing -lz?- with wikipedia names
1606>>>>>>3	regex	\^lz[2378]	LArc archive
1607# display archive type name like "LHa (2.x) archive data"
1608>>>>>2	string		-lh		\b
1609# already known -lh0- -lh1- -lh2- -lh3-  -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names
1610>>>>>>3	regex		\^lh[01]	LHarc 1.x/ARX archive data
1611# LHice archiver use ".ICE" as name extension instead usual one ".lzh"
1612# FOOBAR archiver use ".foo" as name extension instead usual one
1613# "Florian Orjanov's and Olga Bachetska's ARchiver" not found at the moment
1614>>>>>>>2	string	-lh1		\b
1615!:ext lha/lzh/ice
1616>>>>>>3	regex		\^lh[23d]	LHa 2.x? archive data
1617>>>>>>3	regex		\^lh[7]		LHa (2.x)/LHark archive data
1618>>>>>>3	regex		\^lh[456]	LHa (2.x) archive data
1619>>>>>>>2	string	-lh5		\b
1620# https://en.wikipedia.org/wiki/BIOS
1621# Some mainboard BIOS like Award use LHa compression. So archives with unusual extension are found like
1622# bios.rom , kd7_v14.bin, 1010.004, ...
1623!:ext lha/lzh/rom/bin
1624# missing -lh?- variants (Joe Jared)
1625>>>>>>3	regex		\^lh[89a-ce]	LHa (Joe Jared) archive
1626# UNLHA32 2.67a
1627>>>>>>2	string		-lhx		LHa (UNLHA32) archive
1628# lha archives with standard file name extensions ".lha" ".lzh"
1629>>>>>>3	regex		!\^(lh1|lh5)	\b
1630!:ext lha/lzh
1631# this should not happen if all -lh variants are described
1632>>>>>>2	default		x		LHa (unknown) archive
1633#!:ext	lha
1634# PMarc
1635>>>>>3	regex		\^pm[012]	PMarc archive data
1636!:ext pma
1637# append method id without leading and trailing minus character
1638>>>>>3	string		x		[%3.3s]
1639>>>>>>0	use	lharc-header
1640#
1641#	check and display information of lharc header
16420	name				lharc-header
1643# header size 0x4 , 0x1b-0x61
1644#>0	ubyte		x
1645# compressed data size != compressed file size
1646#>7	ulelong		x		\b, data size %d
1647# attribute: 0x2~?? 0x10~symlink|target 0x20~normal
1648#>19	ubyte		x		\b, 19_%#x
1649# level identifier 0 1 2 3
1650#>20	ubyte		x		\b, level %d
1651# time stamp
1652#>15		ubelong	x		DATE %#8.8x
1653# OS ID for level 1
1654>20	ubyte		1
1655# 0x20 types find for *.rom files
1656>>(21.b+24)	ubyte	<0x21		\b, %#x OS
1657# ascii type like M for MSDOS
1658>>(21.b+24)	ubyte	>0x20		\b, '%c' OS
1659# OS ID for level 2
1660>20	ubyte		2
1661#>>23	ubyte		x		\b, OS ID %#x
1662>>23	ubyte		<0x21		\b, %#x OS
1663>>23	ubyte		>0x20		\b, '%c' OS
1664# filename only for level 0 and 1
1665>20	ubyte		<2
1666# length of filename
1667>>21		ubyte	>0		\b, with
1668# filename
1669>>>21		pstring	x		"%s"
1670#
1671#2	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
1672#!:mime	application/x-lharc
16732	string		-lh0-
1674>0	use	lharc-file
1675#2	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
1676#!:mime	application/x-lharc
16772	string		-lh1-
1678>0	use	lharc-file
1679# NEW -lz2- ... -lz8-
16802	string		-lz2-
1681>0	use	lharc-file
16822	string		-lz3-
1683>0	use	lharc-file
16842	string		-lz4-
1685>0	use	lharc-file
16862	string		-lz5-
1687>0	use	lharc-file
16882	string		-lz7-
1689>0	use	lharc-file
16902	string		-lz8-
1691>0	use	lharc-file
1692#	[never seen any but the last; -lh4- reported in comp.compression:]
1693#2	string		-lzs-		LHa/LZS archive data [lzs]
16942	string		-lzs-
1695>0	use	lharc-file
1696# According to wikipedia and others such a version does not exist
1697#2	string		-lh\40-		LHa 2.x? archive data [lh ]
1698#2	string		-lhd-		LHa 2.x? archive data [lhd]
16992	string		-lhd-
1700>0	use	lharc-file
1701#2	string		-lh2-		LHa 2.x? archive data [lh2]
17022	string		-lh2-
1703>0	use	lharc-file
1704#2	string		-lh3-		LHa 2.x? archive data [lh3]
17052	string		-lh3-
1706>0	use	lharc-file
1707#2	string		-lh4-		LHa (2.x) archive data [lh4]
17082	string		-lh4-
1709>0	use	lharc-file
1710#2	string		-lh5-		LHa (2.x) archive data [lh5]
17112	string		-lh5-
1712>0	use	lharc-file
1713#2	string		-lh6-		LHa (2.x) archive data [lh6]
17142	string		-lh6-
1715>0	use	lharc-file
1716#2	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
17172	string		-lh7-
1718# !:mime	application/x-lha
1719# >20	byte		x		- header level %d
1720>0	use	lharc-file
1721# NEW -lh8- ... -lhe- , -lhx-
17222	string		-lh8-
1723>0	use	lharc-file
17242	string		-lh9-
1725>0	use	lharc-file
17262	string		-lha-
1727>0	use	lharc-file
17282	string		-lhb-
1729>0	use	lharc-file
17302	string		-lhc-
1731>0	use	lharc-file
17322	string		-lhe-
1733>0	use	lharc-file
17342	string		-lhx-
1735>0	use	lharc-file
1736# taken from idarc [JW]
17372   string      -lZ         PUT archive data
1738# already done by LHarc magics
1739# this should never happen if all sub types of LZS archive are identified
1740#2   string      -lz         LZS archive data
17412   string      -sw1-       Swag archive data
1742
17430	name		rar-file-header
1744>24	byte		15		\b, v1.5
1745>24	byte		20		\b, v2.0
1746>24	byte		29		\b, v4
1747>15	byte		0		\b, os: MS-DOS
1748>15	byte		1		\b, os: OS/2
1749>15	byte		2		\b, os: Win32
1750>15	byte		3		\b, os: Unix
1751>15	byte		4		\b, os: Mac OS
1752>15	byte		5		\b, os: BeOS
1753
17540	name		rar-archive-header
1755>3	leshort&0x1ff	>0		\b, flags:
1756>>3	leshort		&0x01		ArchiveVolume
1757>>3	leshort		&0x02		Commented
1758>>3	leshort		&0x04		Locked
1759>>3	leshort		&0x10		NewVolumeNaming
1760>>3	leshort		&0x08		Solid
1761>>3	leshort		&0x20		Authenticated
1762>>3	leshort		&0x40		RecoveryRecordPresent
1763>>3	leshort		&0x80		EncryptedBlockHeader
1764>>3	leshort		&0x100		FirstVolume
1765
1766# RAR (Roshal Archive) archive
17670	string		Rar!\x1a\7\0		RAR archive data
1768!:mime	application/vnd.rar
1769!:ext	rar/cbr
1770# file header
1771>(0xc.l+9)	byte	0x74
1772>>(0xc.l+7)	use	rar-file-header
1773# subblock seems to share information with file header
1774>(0xc.l+9)	byte	0x7a
1775>>(0xc.l+7)	use	rar-file-header
1776>9		byte	0x73
1777>>7		use	rar-archive-header
1778
17790	string		Rar!\x1a\7\1\0		RAR archive data, v5
1780!:mime	application/vnd.rar
1781!:ext	rar
1782
1783# Very old RAR archive
1784# https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf
17850	string		RE\x7e\x5e  RAR archive data (<v1.5)
1786!:mime	application/vnd.rar
1787!:ext	rar/cbr
1788
1789# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
17900	string		SQSH		squished archive data (Acorn RISCOS)
1791
1792# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
1793# [JW] see exe section for self-extracting version
17940	string		UC2\x1a		UC2 archive data
1795
1796# PKZIP multi-volume archive
17970	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
1798!:mime	application/zip
1799!:ext zip/cbz
1800
1801
1802# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
1803#   Next line excludes specialized formats:
18040	name	zipgeneric
1805>4	beshort		x			Zip archive data, at least
1806!:mime	application/zip
1807>4	use		zipversion
1808>4	beshort		x			to extract
1809>8	beshort		x			\b, compression method=
1810>8	use		zipcompression
1811>0x161	string		WINZIP		\b, WinZIP self-extracting
1812
1813# Zip archives that can be either APK or JAR. Checks for resources.arsc, classes.dex, etc.
18140	name	apk_or_jar
1815# Contains resources.arsc (near the end, in the central directory)
1816>-512	search	resources.arsc	Android package (APK), with MANIFEST.MF and resources.arsc
1817!:mime	application/vnd.android.package-archive
1818!:ext	apk
1819>>-22	string	PK\005\006
1820>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1821>-512	default x
1822# Contains classes.dex (near the end, in the central directory)
1823>>-512	search	classes.dex	Android package (APK), with MANIFEST.MF and classes.dex
1824!:mime	application/vnd.android.package-archive
1825!:ext	apk
1826>>>-22	string	PK\005\006
1827>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1828>>-512	default x
1829# Contains lib/armeabi (near the end, in the central directory)
1830>>>-512	search	lib/armeabi	Android package (APK), with MANIFEST.MF and armeabi lib
1831!:mime	application/vnd.android.package-archive
1832!:ext	apk
1833>>>>-22	string	PK\005\006
1834>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1835>>>-512	default x
1836# Contains drawables (near the end, in the central directory)
1837>>>>-512	search	res/drawable	Android package (APK), with MANIFEST.MF and drawables
1838!:mime	application/vnd.android.package-archive
1839!:ext	apk
1840>>>>>-22	string	PK\005\006
1841>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1842# It may or may not be an APK file, but it's definitely a Java JAR file
1843>>>>-512	default x	Java archive data (JAR)
1844!:mime	application/java-archive
1845!:ext	jar
1846
18470	string		PK\003\004
1848!:strength +1
1849# IOS/IPadOS IPA file (Zip archive)
1850# Starts with Payload (file name length = 19)
1851>26	uleshort	8
1852>>30  	string		Payload		IOS/iPadOS IPA file
1853>>>&26  search/6000	PK\003\004
1854>>>>&34	string		x	containing %s
1855!:mime	application/x-ios-app
1856!:ext	ipa
1857
1858# Android APK file (Zip archive)
1859# Starts with AndroidManifest.xml (file name length = 19)
1860>26	uleshort	19
1861>>30	string	AndroidManifest.xml	Android package (APK), with AndroidManifest.xml
1862!:mime	application/vnd.android.package-archive
1863!:ext	apk
1864>>>-22	string	PK\005\006
1865>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1866# Starts with META-INF/com/android/build/gradle/app-metadata.properties
1867>26	uleshort	57
1868>>30	string	META-INF/com/android/build/gradle/
1869>>>&0	string	app-metadata.properties	Android package (APK), with gradle app-metadata.properties
1870!:mime	application/vnd.android.package-archive
1871!:ext	apk
1872>>>>-22	string	PK\005\006
1873>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1874# Starts with classes.dex (file name length = 11)
1875>26	uleshort	11
1876>>30	string	classes.dex	Android package (APK), with classes.dex
1877!:mime	application/vnd.android.package-archive
1878!:ext	apk
1879>>>-22	string	PK\005\006
1880>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1881# Starts with META-INF/MANIFEST.MF (file name length = 20)
1882>26	uleshort	20
1883>>30	string	META-INF/MANIFEST.MF
1884>>>0	use		apk_or_jar
1885# Starts with META-INF/ folder (file name length = 9)
1886>26	uleshort	9
1887>>30	string	META-INF/
1888>>>0	use		apk_or_jar
1889
1890# Starts with zipflinger virtual entry (28 + 104 = 132 bytes)
1891# See https://github.com/obfusk/apksigcopier/blob/666f5b7/apksigcopier/__init__.py#L230
1892>4	string	\x00\x00\x00\x00\x00\x00
1893>>&0	string	\x21\x08\x21\x02
1894>>>&0	string	\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00
1895>>>>&0	string	\x00\x00	Android package (APK), with zipflinger virtual entry
1896!:mime	application/vnd.android.package-archive
1897!:ext	apk
1898>>>>>-22	string	PK\005\006
1899>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1900
1901
1902# Specialised zip formats which start with a member named 'mimetype'
1903# (stored uncompressed, with no 'extra field') containing the file's MIME type.
1904# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
1905#  contents starting with "application/":
1906>26	string		\x8\0\0\0mimetypeapplication/
1907
1908#  KOffice / OpenOffice & StarOffice / OpenDocument formats
1909#    From: Abel Cheung <abel@oaka.org>
1910
1911#   KOffice (1.2 or above) formats
1912#    (mimetype contains "application/vnd.kde.<SUBTYPE>")
1913>>50	string	vnd.kde.		KOffice (>=1.2)
1914>>>58	string	karbon			Karbon document
1915>>>58	string	kchart			KChart document
1916>>>58	string	kformula		KFormula document
1917>>>58	string	kivio			Kivio document
1918>>>58	string	kontour			Kontour document
1919>>>58	string	kpresenter		KPresenter document
1920>>>58	string	kspread			KSpread document
1921>>>58	string	kword			KWord document
1922
1923#   OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
1924#    (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
1925# URL:		https://en.wikipedia.org/wiki/OpenOffice.org_XML
1926# reference:	http://fileformats.archiveteam.org/wiki/OpenOffice.org_XML
1927>>50	string	vnd.sun.xml.		OpenOffice.org 1.x
1928>>>62	string	writer			Writer
1929>>>>68	byte	!0x2e			document
1930!:mime	application/vnd.sun.xml.writer
1931!:ext	sxw
1932>>>>68	string	.template		template
1933!:mime	application/vnd.sun.xml.writer.template
1934!:ext	stw
1935>>>>68	string	.web			Web template
1936!:mime	application/vnd.sun.xml.writer.web
1937!:ext	stw
1938>>>>68	string	.global			global document
1939!:mime	application/vnd.sun.xml.writer.global
1940!:ext	sxg
1941>>>62	string	calc			Calc
1942>>>>66	byte	!0x2e			spreadsheet
1943!:mime	application/vnd.sun.xml.calc
1944!:ext	sxc
1945>>>>66	string	.template		template
1946!:mime	application/vnd.sun.xml.calc.template
1947!:ext	stc
1948>>>62	string	draw			Draw
1949>>>>66	byte	!0x2e			document
1950!:mime	application/vnd.sun.xml.draw
1951!:ext	sxd
1952>>>>66	string	.template		template
1953!:mime	application/vnd.sun.xml.draw.template
1954!:ext	std
1955>>>62	string	impress			Impress
1956>>>>69	byte	!0x2e			presentation
1957!:mime	application/vnd.sun.xml.impress
1958!:ext	sxi
1959>>>>69	string	.template		template
1960!:mime	application/vnd.sun.xml.impress.template
1961!:ext	sti
1962>>>62	string	math			Math document
1963!:mime	application/vnd.sun.xml.math
1964!:ext	sxm
1965>>>62	string	base			Database file
1966!:mime	application/vnd.sun.xml.base
1967!:ext	sdb
1968
1969# URL:	https://wiki.openoffice.org/wiki/Documentation/DevGuide/Extensions/File_Format
1970# From:	Joerg Jenderek
1971# Note:	only few OXT samples are detected here by mimetype member
1972#	is used by OpenOffice and LibreOffice and probably also NeoOffice
1973#	verified by `unzip -Zv *.oxt` or `7z l -slt *.oxt`
1974>>50	string	vnd.openofficeorg.		OpenOffice
1975>>>68	string	extension			\b/LibreOffice Extension
1976# http://extension.nirsoft.net/oxt
1977!:mime	application/vnd.openofficeorg.extension
1978# like: Gallery-Puzzle.2.1.0.1.oxt
1979!:ext	oxt
1980
1981#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
1982#   URL: http://fileformats.archiveteam.org/wiki/OpenDocument
1983#    https://lists.oasis-open.org/archives/office/200505/msg00006.html
1984#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
1985>>50	string	vnd.oasis.opendocument.	OpenDocument
1986>>>73	string	text
1987>>>>77	byte	!0x2d			Text
1988!:mime	application/vnd.oasis.opendocument.text
1989!:ext	odt
1990>>>>77	string	-template		Text Template
1991!:mime	application/vnd.oasis.opendocument.text-template
1992!:ext	ott
1993>>>>77	string	-web			HTML Document Template
1994!:mime	application/vnd.oasis.opendocument.text-web
1995!:ext	oth
1996>>>>77	string	-master
1997>>>>>84	byte	!0x2d			Master Document
1998!:mime	application/vnd.oasis.opendocument.text-master
1999!:ext	odm
2000>>>>>84	string	-template		Master Template
2001!:mime	application/vnd.oasis.opendocument.text-master-template
2002!:ext	otm
2003>>>73	string	graphics
2004>>>>81	byte	!0x2d			Drawing
2005!:mime	application/vnd.oasis.opendocument.graphics
2006!:ext	odg
2007>>>>81	string	-template		Drawing Template
2008!:mime	application/vnd.oasis.opendocument.graphics-template
2009!:ext	otg
2010>>>73	string	presentation
2011>>>>85	byte	!0x2d			Presentation
2012!:mime	application/vnd.oasis.opendocument.presentation
2013!:ext	odp
2014>>>>85	string	-template		Presentation Template
2015!:mime	application/vnd.oasis.opendocument.presentation-template
2016!:ext	otp
2017>>>73	string	spreadsheet
2018>>>>84	byte	!0x2d			Spreadsheet
2019!:mime	application/vnd.oasis.opendocument.spreadsheet
2020!:ext	ods
2021>>>>84	string	-template		Spreadsheet Template
2022!:mime	application/vnd.oasis.opendocument.spreadsheet-template
2023!:ext	ots
2024>>>73	string	chart
2025>>>>78	byte	!0x2d			Chart
2026!:mime	application/vnd.oasis.opendocument.chart
2027!:ext	odc
2028>>>>78	string	-template		Chart Template
2029!:mime	application/vnd.oasis.opendocument.chart-template
2030!:ext	otc
2031>>>73	string	formula
2032>>>>80	byte	!0x2d			Formula
2033!:mime	application/vnd.oasis.opendocument.formula
2034!:ext	odf
2035>>>>80	string	-template		Formula Template
2036!:mime	application/vnd.oasis.opendocument.formula-template
2037!:ext	otf
2038# https://www.loc.gov/preservation/digital/formats/fdd/fdd000441.shtml
2039>>>73	string	database		Database
2040!:mime	application/vnd.oasis.opendocument.database
2041!:ext	odb
2042# Valid for LibreOffice Base 6.0.1.1 at least
2043>>>73	string	base 			Database
2044# https://bugs.documentfoundation.org/show_bug.cgi?id=45854
2045!:mime	application/vnd.oasis.opendocument.base
2046!:ext	odb
2047>>>73	string	image
2048>>>>78	byte	!0x2d			Image
2049!:mime	application/vnd.oasis.opendocument.image
2050!:ext	odi
2051>>>>78	string	-template		Image Template
2052!:mime	application/vnd.oasis.opendocument.image-template
2053!:ext	oti
2054
2055#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
2056#    https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
2057#    From: Ralf Brown <ralf.brown@gmail.com>
2058>>50	string	epub+zip	EPUB document
2059!:mime application/epub+zip
2060
2061# From: Hajin Jang <jb6804@naver.com>
2062# hwpx (OWPML) document format follows OCF specification.
2063# Hangul Word Processor 2010+ supports HWPX format.
2064# URL: https://www.hancom.com/etc/hwpDownload.do
2065#      https://standard.go.kr/KSCI/standardIntro/getStandardSearchView.do?menuId=503&topMenuId=502&ksNo=KSX6101
2066#      https://e-ks.kr/streamdocs/view/sd;streamdocsId=72059197557727331
2067>>50	string	hwp+zip     Hancom HWP (Hangul Word Processor) file, HWPX
2068!:mime application/x-hwp+zip
2069!:ext	hwpx
2070
2071# From:	Joerg Jenderek
2072# URL:	http://en.wikipedia.org/wiki/CorelDRAW
2073# NOTE:	version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based
2074>>50	string	x-vnd.corel.	 Corel
2075>>>62	string	draw.document+zip	Draw drawing, version 14-16
2076!:mime	application/x-vnd.corel.draw.document+zip
2077!:ext	cdr
2078>>>62	string	draw.template+zip	Draw template, version 14-16
2079!:mime	application/x-vnd.corel.draw.template+zip
2080!:ext	cdrt
2081>>>62	string	zcf.draw.document+zip	Draw drawing, version 17-22
2082!:mime	application/x-vnd.corel.zcf.draw.document+zip
2083!:ext	cdr
2084>>>62	string	zcf.draw.template+zip	Draw template, version 17-22
2085!:mime	application/x-vnd.corel.zcf.draw.template+zip
2086!:ext	cdt/cdrt
2087# URL:	http://product.corel.com/help/CorelDRAW/540240626/Main/EN/Doc/CorelDRAW-Other-file-formats.html
2088>>>62	string	zcf.pattern+zip		Draw pattern, version 22
2089!:mime	application/x-vnd.corel.zcf.pattern+zip
2090!:ext	pat
2091# URL:		https://en.wikipedia.org/wiki/Corel_Designer
2092# Reference:	http://fileformats.archiveteam.org/wiki/Corel_Designer
2093# Note:		called by TrID "Corel DESIGN graphics"
2094>>>62	string	designer.document+zip		DESIGNER graphics, version 14-16
2095!:mime	application/x-vnd.corel.designer.document+zip
2096!:ext	des
2097>>>62	string	zcf.designer.document+zip	DESIGNER graphics, version 17-21
2098!:mime	application/x-vnd.corel.zcf.designer.document+zip
2099!:ext	des
2100# URL:	http://product.corel.com/help/CorelDRAW/540223850/Main/EN/Documentation/
2101#	CorelDRAW-Corel-Symbol-Library-CSL.html
2102>>>62	string	symbol.library+zip		Symbol Library, version 6-16.3
2103!:mime	application/x-vnd.corel.symbol.library+zip
2104!:ext	csl
2105>>>62	string	zcf.symbol.library+zip		Symbol Library, version 17-22
2106!:mime	application/x-vnd.corel.zcf.symbol.library+zip
2107!:ext	csl
2108
2109#  Catch other ZIP-with-mimetype formats
2110#	In a ZIP file, the bytes immediately after a member's contents are
2111#	always "PK". The 2 regex rules here print the "mimetype" member's
2112#	contents up to the first 'P'. Luckily, most MIME types don't contain
2113#	any capital 'P's. This is a kludge.
2114#    (mimetype contains "application/<OTHER>")
2115>>50		default	x			Zip data
2116>>>38		regex	[!-OQ-~]+		(MIME type "%s"?)
2117!:mime	application/zip
2118#    (mimetype contents other than "application/*")
2119>26		string	\x8\0\0\0mimetype
2120>>38		string	!application/
2121>>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
2122!:mime	application/zip
2123
2124# Java Jar files (see also APK files above)
2125>(26.s+30)	leshort	0xcafe		Java archive data (JAR)
2126!:mime	application/java-archive
2127!:ext	jar
2128
2129# iOS App
2130>(26.s+30)	leshort	!0xcafe
2131>>26		string	!\x8\0\0\0mimetype
2132>>>30		string	Payload/
2133>>>>38		search/64       .app/   iOS App
2134!:mime application/x-ios-app
2135
2136# Dup, see above.
2137#>30	search/100/b application/epub+zip	EPUB document
2138#!:mime application/epub+zip
2139
2140# APK Signing Block
2141>(26.s+30)	leshort	!0xcafe
2142>>30	search/100/b !application/epub+zip
2143>>>26    string          !\x8\0\0\0mimetype
2144>>>>-22	string	PK\005\006
2145>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	Android package (APK), with APK Signing Block
2146!:mime	application/vnd.android.package-archive
2147!:ext	apk
2148
2149# Keyman Compiled Package File (keyman.com)
2150# https://help.keyman.com/developer/current-version/reference/file-types/kmp
2151# Find start of central directory
2152>>>>>(-6.l)	string		PK\001\002
2153# Scan central directory for string 'kmp.json', will suffice for a
2154# package containing about 150 files
2155>>>>>>(-6.l)	search/9000	kmp.json	Keyman Compiled Package File
2156!:mime	application/vnd.keyman.kmp+zip
2157!:ext kmp
2158>>>>>+4	default		x
2159>>>>>>0	use		zipgeneric
2160
2161>>>>+4	default		x
2162>>>>>0	use		zipgeneric
2163
2164# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
21650	string		PK\005\006	Zip archive data (empty)
2166!:mime application/zip
2167!:ext zip/cbz
2168!:strength +1
2169
2170# StarView Metafile
2171# From Pierre Ducroquet <pinaraf@pinaraf.info>
21720	string	VCLMTF	StarView MetaFile
2173>6	beshort	x	\b, version %d
2174>8	belong	x	\b, size %d
2175
2176# Zoo archiver
2177# Update: Joerg Jenderek
2178# URL:		https://en.wikipedia.org/wiki/Zoo_(file_format)
2179#		http://fileformats.archiveteam.org/wiki/Zoo
2180# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-zoo-strict.trid.xml
2181#		http://distcache.freebsd.org/ports-distfiles/zoo-2.10pl1.tar.gz/zoo.h
2182# Note:		called "ZOO compressed archive (strict)" by TrID and "ZOO Compressed Archive" by DROID via PUID x-fmt/269
2183#		verified by command like `deark -m zoo -l -d2 WHRCGA.ZOO`
218420	lelong		0xfdc4a7dc
2185# skip DROID x-fmt-269-signature-id-621.zoo by looking for valid major version to manipulate archive
2186>32	byte		>0		Zoo archive data
2187!:mime	application/x-zoo
2188# bak is extension of backup-ed zoo
2189!:ext	zoo/bak
2190# version in text form like: 1.50 2.00 2.10
2191>>4	byte		>48		\b, v%c.
2192>>>6	byte		>47		\b%c
2193>>>>7	byte		>47		\b%c
2194# ZOO files typically start with "ZOO ?.?? Archive.", followed by the bytes 0x1a 0x0 0x0; not used by Zoo and they may be anything
2195>>8	string		!\040Archive.\032 \b, at 8
2196>>>8	string		x		text "%0.10s"
2197# major_ver.minor_ver; minimum version needed to manipulate archive like: 1.0 2.0
2198>>32	byte		>0		\b, modify: v%d
2199>>>33	byte		x		\b.%d+
2200# major_ver.minor_ver; minimum version needed to extract after modify like in old versions
2201>>(24.l+28)	ubyte	x		\b, extract: v%u
2202>>(24.l+29)	ubyte	x		\b.%u+
2203# with zoo 2.00 additional fields have been added in the archive header
2204>>32	byte		>1
2205# type; type of archive header like: 1 2
2206>>>34		ubyte	!1		\b, header type %u
2207# acmt_pos; position of archive comment like: 6258 30599 61369 149501
2208>>>35		lelong	>0		\b, at %d
2209# acmt_len; length of archive comment like: 258
2210>>>>39			uleshort x	%u bytes comment
2211#>>>>(35.l)		ubequad	x	COMMENT=%16.16llx
2212# 1st character of comment maybe is CarriageReturn (0x0d)
2213>>>>(35.l) 		ubyte	<040
2214# 2nd character of comment maybe is LineFeed (0x0a)
2215>>>>>(35.l+1) 		ubyte	<040
2216# comment string after CRLF like "Anonymous ftp site garbo.uwasa.fi 128.214.87.1 moderated by"
2217>>>>>>(35.l+2)		string	x	%s
2218# next character of remaining comment maybe is CarriageReturn (0x0d)
2219>>>>>>>&0		ubyte	<040
2220>>>>>>>>&0		ubyte	<040
2221# 2nd comment part like: Timo Salmi ts@chyde.uwasa.fi      PC directories and uploads\015\012Harri Valkama hv@chyde.uwasa.fi   PC, Mac, Unix files, and upload
2222>>>>>>>>>&0		string	>037	%s
2223# vdata; archive-level versioning byte like: 1 3
2224>>>41		ubyte	!1		\b, vdata %#x
2225# zoo_start; pointer to 1st entry header
2226>>24	lelong		x		\b; at %u
2227# zoo_minus; zoo_start -1 for consistency checking
2228#>>28	lelong		x		\b, zoo_minus %#x
2229# zoo_tag; tag for check
2230#>>(24.l+0) ulelong	!0xfdc4a7dc	\b, zoo_tag=%8.8x
2231# type; type of directory entry like: 1 2
2232>>(24.l+4)	ubyte	!2		type=%u
2233# packing_method; 0~no packing 1~normal LZW 2~lzh
2234>>(24.l+5)	ubyte		x	method=
2235>>>(24.l+5)	ubyte		0	\bnot-compressed
2236>>>(24.l+5)	ubyte		1	\blzd
2237>>>(24.l+5)	ubyte		2	\blzh
2238# next; position of next directory entry
2239>>(24.l+6)	ulelong		x	\b, next entry at %u
2240# offset; position of file data for this entry
2241#>>(24.l+10) ulelong		x	\b, data at %u
2242# file_crc; CRC-16 of file data
2243>>(24.l+18)	uleshort	x	\b, CRC %#4.4x
2244# comment; zero if none or points to entry comment like ADD9h (WHRCGA.ZOO)
2245>>(24.l+32)	lelong		>0	\b, at %#x
2246# cmt_size; if not 0 for none then length of entry comment like: 46
2247>>>(24.l+36)	uleshort	>0	%u bytes comment
2248# entry comment itself like: "CGA .GL file showing menu input from keyboard"
2249>>>>(&-6.l)	string		x	"%s"
2250# org_size; original size of file
2251>>(24.l+20)	ulelong		x	\b, size %u
2252# size_now; compressed size of file
2253>>(24.l+24)	ulelong		x	(%u compressed)
2254# major_ver.minor_ver; minimum version needed to extract already done
2255# deleted; will be 1 if deleted, 0 if not
2256>>(24.l+30)	ubyte		=1	\b, deleted
2257# struc; file structure if any; WHAT IS THAT?
2258>>(24.l+31)	ubyte		!0	\b, structured
2259# fname[13]; short/DOS file name like 12345678.012
2260>>(24.l+38)	string	x		\b, %0.13s
2261# for directory entry type 2 with variable part
2262>>(24.l+4)	ubyte	=2
2263# var_dir_len; length of variable part of dir entry
2264>>>(24.l+51)		uleshort >0
2265#>>>(24.l+51)		uleshort >0	\b, variable part length %u
2266# namlen; length of long filename
2267#>>>>(24.l+56)		ubyte	x	\b, namlen %u
2268# dirlen; length of directory name
2269#>>>>(24.l+57)		ubyte	x	\b, dirlen %u
2270# if file length positive then show long file name
2271>>>>(24.l+56)		ubyte	>0
2272# lfname[256]; long file name \0-terminated
2273>>>>>(24.l+58)		string	x	"%s"
2274# if directory length positive then jump before file name field and then jump this addtional length plus 2 (\0-terminator + dirlen field) to following directory name
2275>>>>(24.l+57)		ubyte	>0
2276>>>>>(24.l+55)		ubyte	x
2277# dirname[256]; directory name \0-terminated
2278>>>>>>&(&0.b+2)		string	x	in "%s"
2279# dir_crc; CRC of directory entry
2280#>>>(24.l+54)		uleshort x	\b, entry CRC %#4.4x
2281# tz; timezone where file was archived; 7Fh~unknown 4~1.00hoursWestOfUTC 12 16 20~5.00hoursWestOfUTC -107~26.75hoursEastOfUTC -4~1.00hoursEastOfUTC
2282>>>(24.l+53)		byte	!0x7f	\b, time zone %d/4
2283# date; last mod file date in DOS format
2284>>>(24.l+14)		lemsdosdate x	\b, modified %s
2285# time; last mod file time in DOS format
2286>>>(24.l+16)		lemsdostime x	%s
2287
2288# Shell archives
228910	string		#\ This\ is\ a\ shell\ archive	shell archive text
2290!:mime	application/octet-stream
2291
2292#
2293# LBR. NB: May conflict with the questionable
2294#          "binary Computer Graphics Metafile" format.
2295#
22960       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
2297#
2298# PMA (CP/M derivative of LHA)
2299# Update: Joerg Jenderek
2300# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
2301#
2302#2       string          -pm0-           PMarc archive data [pm0]
23032	string		-pm0-
2304>0	use	lharc-file
2305#2       string          -pm1-           PMarc archive data [pm1]
23062	string		-pm1-
2307>0	use	lharc-file
2308#2       string          -pm2-           PMarc archive data [pm2]
23092	string		-pm2-
2310>0	use	lharc-file
23112       string          -pms-           PMarc SFX archive (CP/M, DOS)
2312#!:mime	application/x-foobar-exec
2313!:ext com
23145       string          -pc1-           PopCom compressed executable (CP/M)
2315#!:mime	application/x-
2316#!:ext com
2317
2318# From Rafael Laboissiere <rafael@laboissiere.net>
2319# The Project Revision Control System (see
2320# http://prcs.sourceforge.net) generates a packaged project
2321# file which is recognized by the following entry:
23220	leshort		0xeb81	PRCS packaged project
2323
2324# Microsoft cabinets
2325# by David Necas (Yeti) <yeti@physics.muni.cz>
2326#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
2327#>25	byte	x		v%d
2328#>24	byte	x		\b.%d
2329# MPi: All CABs have version 1.3, so this is pointless.
2330# Better magic in debian-additions.
2331
2332# GTKtalog catalogs
2333# by David Necas (Yeti) <yeti@physics.muni.cz>
23344	string	gtktalog\ 	GTKtalog catalog data,
2335>13	string	3		version 3
2336>>14	beshort	0x677a		(gzipped)
2337>>14	beshort	!0x677a		(not gzipped)
2338>13	string	>3		version %s
2339
2340############################################################################
2341# Parity archive reconstruction file, the 'par' file format now used on Usenet.
23420       string          PAR\0	PARity archive data
2343>48	leshort		=0	- Index file
2344>48	leshort		>0	- file number %d
2345
2346# Felix von Leitner <felix-file@fefe.de>
23470	string	d8:announce	BitTorrent file
2348!:mime	application/x-bittorrent
2349!:ext	torrent
2350# Durval Menezes, <jmgthbfile at durval dot com>
23510	string	d13:announce-list	BitTorrent file
2352!:mime	application/x-bittorrent
2353!:ext	torrent
23540	string	d7:comment	BitTorrent file
2355!:mime	application/x-bittorrent
2356!:ext	torrent
23570	string	d4:info		BitTorrent file
2358!:mime	application/x-bittorrent
2359!:ext	torrent
2360
2361# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
2362# URL:		http://fileformats.archiveteam.org/wiki/MSA_(Magic_Shadow_Archiver)
2363# Reference:	http://info-coach.fr/atari/documents/_mydoc/FD_Image_File_Format.pdf
2364#		http://mark0.net/download/triddefs_xml.7z/defs/m/msa.trid.xml
2365# Update:	Joerg Jenderek
2366# Note:		called by TrID "Atari MSA Disk Image" and verified by
2367#		command like `deark -l -m msa -d2 PDATS578.msa` as " Atari ST floppy disk image"
2368# GRR: line below is too general as it matches setup.skin
23690	beshort 0x0e0f
2370# skip foo setup.skin with unrealistic high number 52255 of sides by check for valid "low" value
2371>4	ubeshort <2		Atari MSA archive data
2372#!:mime	application/octet-stream
2373!:mime	application/x-atari-msa
2374!:ext	msa
2375# sectors per track like: 9 10
2376>>2	beshort x		\b, %d sectors per track
2377# sides (0 or 1; add 1 to this to get correct number of sides)
2378>>4	beshort 0		\b, 1 sided
2379>>4	beshort 1		\b, 2 sided
2380# starting track like: 0
2381>>6	beshort x		\b, starting track: %d
2382# ending track like: 39 79 80 81
2383>>8	beshort x		\b, ending track: %d
2384# tracks content
2385#>>10	ubequad x		\b, track content %#16.16llx
2386
2387# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
23880	string	PK00PK\003\004	Zip archive data
2389!:mime	application/zip
2390!:ext zip/cbz
2391
2392# Recognize ZIP archives with prepended data by end-of-central-directory record
2393# https://en.wikipedia.org/wiki/ZIP_(file_format)#End_of_central_directory_record_(EOCD)
2394# by Michal Gorny <mgorny@gentoo.org>
2395-2	uleshort	0
2396>&-22	string	PK\005\006
2397# without #!
2398>>0	string	!#!	Zip archive, with extra data prepended
2399!:mime	application/zip
2400!:ext zip/cbz
2401# with #!
2402>>0	string/w	#!\ 	a
2403>>>&-1	string/T	x	%s script executable (Zip archive)
2404
2405# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
2406# by Stefan `Sec` Zehl <sec@42.org>
24077	string		**ACE**		ACE archive data
2408!:mime	application/x-ace-compressed
2409!:ext	ace
2410>15	byte	>0		version %d
2411>16	byte	=0x00		\b, from MS-DOS
2412>16	byte	=0x01		\b, from OS/2
2413>16	byte	=0x02		\b, from Win/32
2414>16	byte	=0x03		\b, from Unix
2415>16	byte	=0x04		\b, from MacOS
2416>16	byte	=0x05		\b, from WinNT
2417>16	byte	=0x06		\b, from Primos
2418>16	byte	=0x07		\b, from AppleGS
2419>16	byte	=0x08		\b, from Atari
2420>16	byte	=0x09		\b, from Vax/VMS
2421>16	byte	=0x0A		\b, from Amiga
2422>16	byte	=0x0B		\b, from Next
2423>14	byte	x		\b, version %d to extract
2424>5	leshort &0x0080		\b, multiple volumes,
2425>>17	byte	x		\b (part %d),
2426>5	leshort &0x0002		\b, contains comment
2427>5	leshort	&0x0200		\b, sfx
2428>5	leshort	&0x0400		\b, small dictionary
2429>5	leshort	&0x0800		\b, multi-volume
2430>5	leshort	&0x1000		\b, contains AV-String
2431>>30	string	\x16*UNREGISTERED\x20VERSION*	(unregistered)
2432>5	leshort &0x2000		\b, with recovery record
2433>5	leshort &0x4000		\b, locked
2434>5	leshort &0x8000		\b, solid
2435# Date in MS-DOS format (whatever that is)
2436#>18	lelong	x		Created on
2437
2438# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
2439# <doj@cubic.org>
24400x1A	string	sfArk		sfArk compressed Soundfont
2441>0x15	string	2
2442>>0x1	string	>\0		Version %s
2443>>0x2A	string	>\0		: %s
2444
2445# DR-DOS 7.03 Packed File *.??_
2446# Reference: http://www.antonis.de/dos/dos-tuts/mpdostip/html/nwdostip.htm
2447# Note:	unpacked by PNUNPACK.EXE
24480	string	Packed\ File\
2449# by looking for Control-Z skip ASCII text starting with Packed File
2450>0x18	ubyte	0x1a		Personal NetWare Packed File
2451!:mime	application/x-novell-compress
2452!:ext	??_
2453>>12	string	x		\b, was "%.12s"
2454# 1 or 2
2455#>>0x19	ubyte	x		\b, at 0x19 %u
2456>>0x1b	ulelong	x		with %u bytes
2457
2458# EET archive
2459# From: Tilman Sauerbeck <tilman@code-monkey.de>
24600	belong	0x1ee7ff00	EET archive
2461!:mime	application/x-eet
2462
2463# From:		Joerg Jenderek
2464# URL:		https://help.foxitsoftware.com/kb/install-fzip-file.php
2465# reference:	http://mark0.net/download/triddefs_xml.7z/
2466#		defs/f/fzip.trid.xml
2467# Note: unknown compression; No "PK" zip magic; normally in directory like
2468#	"%APPDATA%\Foxit Software\Addon\Foxit Reader\Install"
24690	ubequad	0x2506781901010000	Foxit add-on/update
2470!:mime	application/x-fzip
2471!:ext	fzip
2472
2473# From: "Robert Dale" <robdale@gmail.com>
24740	belong	123		dar archive,
2475>4	belong	x		label "%.8x
2476>>8	belong	x		%.8x
2477>>>12	beshort	x		%.4x"
2478>14	byte	0x54		end slice
2479>14	beshort	0x4e4e		multi-part
2480>14	beshort	0x4e53		multi-part, with -S
2481
2482# Symbian installation files
2483#  https://www.thouky.co.uk/software/psifs/sis.html
2484#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
24858	lelong	0x10000419	Symbian installation file
2486!:mime	application/vnd.symbian.install
2487>4	lelong	0x1000006D	(EPOC release 3/4/5)
2488>4	lelong	0x10003A12	(EPOC release 6)
24890	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
2490!:mime	x-epoc/x-sisx-app
2491
2492# From "Nelson A. de Oliveira" <naoliv@gmail.com>
24930	string	MPQ\032		MoPaQ (MPQ) archive
2494
2495# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
2496# .kgb
24970	string KGB_arch		KGB Archiver file
2498>10	string x		with compression level %.1s
2499
2500# xar (eXtensible ARchiver) archive
2501# URL: https://en.wikipedia.org/wiki/Xar_(archiver)
2502# xar archive format: https://code.google.com/p/xar/
2503# From: "David Remahl" <dremahl@apple.com>
2504# Update: Joerg Jenderek
2505# TODO: lzma compression; X509Data for pkg and xip
2506# Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or
2507# 7z t -txar Xcode_10.2_beta_4.xip`
25080	string	xar!		xar archive
2509!:mime	application/x-xar
2510# pkg for Mac OSX installer package like FullBundleUpdate.pkg
2511# xip for signed Apple software like Xcode_10.2_beta_4.xip
2512!:ext	xar/pkg/xip
2513# always 28 in older archives
2514>4	ubeshort >28		\b, header size %u
2515# currently there exit only version 1 since about 2014
2516>6	ubeshort >1		version %u,
2517>8	ubequad	x		compressed TOC: %llu,
2518#>16	ubequad	x		uncompressed TOC: %llu,
2519# cksum_alg 0-2 in older and also 3-4 in newer
2520>24	belong	0		no checksum
2521>24	belong	1		SHA-1 checksum
2522>24	belong	2		MD5 checksum
2523>24	belong	3		SHA-256 checksum
2524>24	belong	4		SHA-512 checksum
2525>24	belong	>4		unknown %#x checksum
2526#>24	belong	>4		checksum
2527#			For no compression jump 0 bytes
2528>24	belong	0
2529>>0		ubyte	x
2530# jump more bytes forward by header size
2531>>>&(4.S)	ubyte	x
2532# jump more bytes forward by compressed table of contents size
2533#>>>>&(8.Q)	ubequad	x	\b, heap data %#llx
2534>>>>&(8.Q)	ubyte	x
2535# look for data by ./compress after message with 1 space at end
2536>>>>>&-3	indirect x	\b, contains
2537#			For SHA-1 jump 20 minus 2 bytes
2538>24	belong	1
2539>>18		ubyte	x
2540# jump more bytes forward by header size
2541>>>&(4.S)	ubyte	x
2542# jump more bytes forward by compressed table of contents size
2543>>>>&(8.Q)	ubyte	x
2544# data compressed by gzip, bzip, lzma or none
2545>>>>>&-1	indirect x	\b, contains
2546#			For SHA-256 jump 32 minus 2 bytes
2547>24	belong	3
2548>>30		ubyte	x
2549# jump more bytes forward by header size
2550>>>&(4.S)	ubyte	x
2551# jump more bytes forward by compressed table of contents size
2552>>>>&(8.Q)	ubyte	x
2553>>>>>&-1	indirect x	\b, contains
2554#			For SHA-512 jump 64 minus 2 bytes
2555>24	belong	4
2556>>62		ubyte	x
2557# jump more bytes forward by header size
2558>>>&(4.S)	ubyte	x
2559# jump more bytes forward by compressed table of contents size
2560>>>>&(8.Q)	ubyte	x
2561>>>>>&-1	indirect x	\b, contains
2562
2563# Type: Parity Archive
2564# From: Daniel van Eeden <daniel_e@dds.nl>
25650	string	PAR2		Parity Archive Volume Set
2566
2567# Bacula volume format. (Volumes always start with a block header.)
2568# URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
2569# From: Adam Buchbinder <adam.buchbinder@gmail.com>
257012	string	BB02		Bacula volume
2571>20	bedate	x		\b, started %s
2572
2573# ePub is XHTML + XML inside a ZIP archive.  The first member of the
2574#   archive must be an uncompressed file called 'mimetype' with contents
2575#   'application/epub+zip'
2576
2577
2578# From: "Michael Gorny" <mgorny@gentoo.org>
2579# ZPAQ: http://mattmahoney.net/dc/zpaq.html
25800	string	zPQ	ZPAQ stream
2581>3	byte	x	\b, level %d
2582# From: Barry Carter <carter.barry@gmail.com>
2583# https://encode.ru/threads/456-zpaq-updates/page32
25840	string	7kSt	ZPAQ file
2585
2586# BBeB ebook, unencrypted (LRF format)
2587# URL: https://www.sven.de/librie/Librie/LrfFormat
2588# From: Adam Buchbinder <adam.buchbinder@gmail.com>
25890	string	L\0R\0F\0\0\0	BBeB ebook data, unencrypted
2590>8	beshort	x		\b, version %d
2591>36	byte	1		\b, front-to-back
2592>36	byte	16		\b, back-to-front
2593>42	beshort	x		\b, (%dx,
2594>44	beshort	x		%d)
2595
2596# Symantec GHOST image by Joerg Jenderek at May 2014
2597# https://us.norton.com/ghost/
2598# https://www.garykessler.net/library/file_sigs.html
25990		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
2600# *.GHO
2601>2		ubyte&0x08		0x00		\b, first file
2602# *.GHS or *.[0-9] with cns program option
2603>2		ubyte&0x08		0x08		\b, split file
2604# part of split index interesting for *.ghs
2605>>4		ubyte			x		id=%#x
2606# compression tag minus one equals numeric compression command line switch z[1-9]
2607>3		ubyte			0		\b, no compression
2608>3		ubyte			2		\b, fast compression (Z1)
2609>3		ubyte			3		\b, medium compression (Z2)
2610>3		ubyte			>3
2611>>3		ubyte			<11		\b, compression (Z%d-1)
2612>2		ubyte&0x08		0x00
2613# ~ 30 byte password field only for *.gho
2614>>12		ubequad			!0		\b, password protected
2615>>44		ubyte			!1
2616# 1~Image All, sector-by-sector only for *.gho
2617>>>10		ubyte			1		\b, sector copy
2618# 1~Image Boot track only for *.gho
2619>>>43		ubyte			1		\b, boot track
2620# 1~Image Disc only for *.gho implies Image Boot track and sector copy
2621>>44		ubyte			1		\b, disc sector copy
2622# optional image description only *.gho
2623>>0xff		string			>\0		"%-.254s"
2624# look for DOS sector end sequence
2625>0xE08	search/7776		\x55\xAA
2626>>&-512	indirect		x		\b; contains
2627
2628# Google Chrome extensions
2629# https://developer.chrome.com/extensions/crx
2630# https://developer.chrome.com/extensions/hosting
26310	string	Cr24	Google Chrome extension
2632!:mime	application/x-chrome-extension
2633>4	ulong	x	\b, version %u
2634
2635# SeqBox - Sequenced container
2636# ext: sbx, seqbox
2637# Marco Pontello marcopon@gmail.com
2638# reference: https://github.com/MarcoPon/SeqBox
26390	string	SBx	SeqBox,
2640>3	byte	x	version %d
2641
2642# LyNX archive
2643# Update:	Joerg Jenderek
2644# URL:		http://fileformats.archiveteam.org/wiki/Lynx_archive
2645# Reference:	http://ist.uwaterloo.ca/~schepers/formats/LNX.TXT
2646#		http://mark0.net/download/triddefs_xml.7z/defs/a/ark-lnx.trid.xml
2647# Note:		called "Lynx archive" by TrID and "Commodore C64 BASIC program" with "POKE 53280" by ./c64
2648# TODO:		merge and unify with Commodore C64 BASIC program
264956	string	USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE	 LyNX archive
2650# display "Lynx archive" (strength=330) before Commodore C64 BASIC program (strength=50) handled by ./c64
2651#!:strength +0
2652#!:mime	application/octet-stream
2653!:mime	application/x-commodore-lnx
2654!:ext	lnx
2655# afterwards look for BASIC tokenized GOTO (89h) 10, line terminator \0, end of programm tag \0\0 and CarriageReturn
2656>86		search/10	\x8910\0\0\0\r	\b,
2657# for DEBUGGING
2658#>>&0		string		x	STRING="%s"
2659# number in ASCII of directory blocks with spaces on both sides like: 1 2 3 5
2660>>&0		regex		[0-9]{1,5}	%s directory blocks
2661# signature like: "*LYNX XII BY WILL CORLEY" " LYNX IX  BY WILL CORLEY" "*LYNX BY CBMCONVERT 2.0*"
2662>>>&2		regex		[^\r]{1,24}	\b, signature "%s"
2663# number of files in ASCII surrounded by spaces and delimited by CR like: 2 3 6 13 69 144 (maximum?)
2664>>>>&1		regex		[0-9]{1,3}	\b, %s files
2665
2666# From: Joerg Jenderek
2667# URL: https://www.acronis.com/
2668# Reference: https://en.wikipedia.org/wiki/TIB_(file_format)
2669# Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110
26700	ubequad		0xce24b9a220000000	Acronis True Image backup
2671!:mime	application/x-acronis-tib
2672!:ext	tib
2673# 01000000
2674#>20	ubelong		x			\b, at 20 %#x
2675# 20000000
2676#>28	ubelong		x			\b, at 28 %#x
2677# strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0"
2678# ???
2679# strings like "\Device\0000011e" "\Device\0000015a"
2680#>0	search/0x6852300/cs	\\Device\\
2681#>>&-1	pstring		x			\b, %s
2682# "\Device\HarddiskVolume30" "\Device\HarddiskVolume39"
2683#>>>&1	search/180/cs	\\Device\\
2684#>>>>&-1	pstring		x			\b, %s
2685#>>>>>&0	search/29/cs	\0\0\xc8\0
2686# disk label
2687#>>>>>>&10	lestring16	x		\b, disk label %11.11s
2688#>>>>>>&9	plestring16	x		\b, disk label "%11.11s"
2689#>>>>>>&10	ubequad	x			%16.16llx
2690
2691
2692# Gentoo XPAK binary package
2693# by Michal Gorny <mgorny@gentoo.org>
2694# https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5
2695-4	string	STOP
2696>-16	string	XPAKSTOP	Gentoo binary package (XPAK)
2697!:mime	application/vnd.gentoo.xpak
2698
2699# From:		Joerg Jenderek
2700# URL:		https://kodi.wiki/view/TexturePacker
2701# Reference:	https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz
2702# /xbmc-Krypton/xbmc/guilib/XBTF.h
2703# /xbmc-Krypton/xbmc/guilib/XBTF.cpp
27040	string	XBTF
2705# skip ASCII text by looking for terminating \0 of path
2706>264	ubyte	0		XBMC texture package
2707!:mime	application/x-xbmc-xbt
2708!:ext	xbt
2709# XBTF_VERSION 2
2710>>4	string	!2		\b, version %-.1s
2711# nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp
2712>>5	ulelong	x		\b, %u file
2713# plural s
2714>>5	ulelong	>1		\bs
2715# path[CXBTFFile[MaximumPathLength=256]
2716>>9	string	x		\b, 1st %s
2717
2718# ALZIP archive
2719# by Hyungjun Park <hyungjun.park@worksmobile.com>, Hajin Jang <hajin_jang@worksmobile.com>
2720# http://kippler.com/win/unalz/
2721# https://salsa.debian.org/l10n-korean-team/unalz
27220	string	ALZ\001		ALZ archive data
2723!:ext   alz
2724
2725# https://cf-aldn.altools.co.kr/setup/EGG_Specification.zip
27260	string	EGGA		EGG archive data,
2727!:ext   egg
2728>5	byte	x		version %u
2729>4	byte	x		\b.%u
2730>>0x0E	ulelong	=0x08E28222
2731>>0x0E	ulelong	=0x24F5A262	\b, split
2732>>0x0E	ulelong	=0x24E5A060	\b, solid
2733>>0x0E	default	x		\b, unknown
2734
2735# PAQ9A archive
2736# URL: http://mattmahoney.net/dc/#paq9a
2737# Note: Line 1186 of paq9a.cpp gives the magic bytes
27380	string	pQ9\001		PAQ9A archive
2739
2740# From wof (wof@stachelkaktus.net)
27410	string	Unison\ archive\ format	Unison archive format
2742
2743# https://ankiweb.net
274430	string	collection.anki2	Anki APKG file
2745#!:ext	.apkg
2746
2747# Synology archive (DiskStation Manager 7.0+)
2748# From: Alexandre Iooss <erdnaxe@crans.org>
2749# Note: These archives are signed and encrypted.
27500		ulelong&0xFFFFFF00	0xEFBEAD00
2751# MessagePack header (fixarray of 5 elements starting with a bin of 32 bytes)
2752>8  	ulelong&0x00FFFFFF	0x20C495	Synology archive
2753!:ext	spk
2754# Extract some properties from MessagePack third item
2755>>43	search/0x10000		package=
2756>>>&0	string				x			\b, package %s
2757>>43	search/0x10000		arch=
2758>>>&0	string				x			%s
2759>>43	search/0x10000		version=
2760>>>&0	string				x			%s
2761>>43	search/0x10000		create_time=
2762>>>&0	string				x			\b, created on %s
2763
2764# MonoGame/XNA processed assets archive
2765# From: Alexandre Iooss <erdnaxe@crans.org>
2766# URL: https://github.com/MonoGame/MonoGame/blob/v3.8.1/MonoGame.Framework/Content/ContentManager.cs
27670	string	XNB
2768# XNB must be version 4 or 5
2769>4	byte	<6
2770>>4	byte	>3
2771# Size must be positive
2772>>>6	lelong	>0	MonoGame/XNA processed assets
2773!:ext	xnb
2774>>>>3	string	=w	\b, for Windows
2775>>>>3	string	=x	\b, for Xbox360
2776>>>>3	string	=i	\b, for iOS
2777>>>>3	string	=a	\b, for Android
2778>>>>3	string	=d	\b, for DesktopGL
2779>>>>3	string	=X	\b, for MacOSX
2780>>>>3	string	=W	\b, for WindowsStoreApp
2781>>>>3	string	=n	\b, for NativeClient
2782>>>>3	string	=M	\b, for WindowsPhone8
2783>>>>3	string	=r	\b, for RaspberryPi
2784>>>>3	string	=P	\b, for PlayStation4
2785>>>>3	string	=5	\b, for PlayStation5
2786>>>>3	string	=O	\b, for XboxOne
2787>>>>3	string	=S	\b, for Nintendo Switch
2788>>>>3	string	=G	\b, for Google Stadia
2789>>>>3	string	=b	\b, for WebAssembly and Bridge.NET
2790>>>>3	string	=m	\b, for WindowsPhone7.0 (XNA)
2791>>>>3	string	=p	\b, for PlayStationMobile
2792>>>>3	string	=v	\b, for PSVita
2793>>>>3	string	=g	\b, for Windows (OpenGL)
2794>>>>3	string	=l	\b, for Linux
2795>>>>4	byte	x	\b, version %d
2796>>>>5	byte	&0x80	\b, LZX compressed
2797>>>>>10	lelong	x	\b, decompressed size: %d bytes
2798>>>>5	byte	&0x40	\b, LZ4 compressed
2799>>>>>10	lelong	x	\b, decompressed size: %d bytes
2800
2801# Electron ASAR archive
2802# From: Alexandre Iooss <erdnaxe@crans.org>
2803# URL: https://github.com/electron/asar
28040		ulelong	4
2805# Match JSON header start and end
2806>16		string	{"files":{"
2807>>(12.l+12)	string }}}}		Electron ASAR archive
2808!:ext	asar
2809>>>12		ulelong	x		\b, header length: %d bytes
2810
2811# Wasay ImageIt DataPack
2812# From: Alexandre Iooss <erdnaxe@crans.org>
2813# URL: https://www.neowin.net/forum/topic/615151-anyone-know-what-program-opens-dsi-and-wsi-files/
2814# Note: Used in Acer eRecovery and Lenovo OneKey Recovery (OKR)
28154	string		WSVD
2816# bytes 3-4 are the checksum or the first 32 bytes of the file
2817>0	uleshort	0x40	Wasay ImageIt DataPack
2818>>8	uleshort	x	v%u
2819>>10	uleshort	x	\b.%u
2820>>16	lestring16/8	x	\b, "%s"
2821>>12	uleshort	x	(%u)
2822>>32	byte		x	\b, created on %02d
2823>>33	byte		x	\b%02d
2824>>34	byte		x	\b/%02d
2825>>35	byte		x	\b/%02d
2826>>36	byte		x	%02d
2827>>37	byte		x	\b:%02d
2828>>38	byte		x	\b:%02d
2829>>56	ulelong		x	\b, size: %u bytes
2830
2831# Stone archive file - Serpent OS moss package manager's native format
2832# https://github.com/serpent-os/tools,
2833# (Ikey Doherty)
28340	string	\0mos	Stone archive
2835>28	belong	1	(format v%d)
2836>>27	byte	1	binary package
2837!:mime	application/x-stone-binary
2838!:ext	stone
2839>>27	byte	2	delta package
2840!:mime	application/x-stone-delta
2841!:ext	stone
2842>>27	byte	3	repository index
2843!:mime	application/x-stone-repository
2844!:ext	index
2845>>27	byte	4	build manifest
2846!:mime	application/x-stone-manifest
2847!:ext	bin
2848
2849# * VOS <file_name> <sequential|relative|fixed|stream> <record_size> ,
2850# * [encapsulated|not_encapsulated] =
2851# * [encoded|not_encoded|seq_encoded|base64_encoded]
28520	string		VOS\040	Stratus OpenVOS EFV archive
2853>4	regex	[^[:space:]]+ 	\b, (%s)
2854>>&0	regex	[^[:space:]]+	\b, %s
2855>>>&0	regex	[^[:space:]]+	\b, record_size=%s
2856>>>>&0	regex	[^[:space:]]+	\b, %s
2857>>>>>&0	regex	[^[:space:]]+	\b, %s
2858
2859
2860# https://www.vm.ibm.com/devpages/bkw/vmarc.html magic in EBCDIC
28610	string	\x7a\xc3\xc6\xc6\x40\x40\x40\x40	VM Archive
2862
2863# https://pbs.proxmox.com/docs/file-formats.html
28640	string	\x42\xab\x38\x07\xbe\x83\x70\xa1 Proxmox Backup Server unencrypted uncompressed blob
2865
28660	string	\x31\xb9\x58\x42\x6f\xb6\xa3\x7f Proxmox Backup Server unencrypted compressed blob
2867
28680	string	\x7b\x67\x85\xbe\x22\x2d\x4c\xf0 Proxmox Backup Server encrypted uncompressed blob
2869
28700	string	\xe6\x59\x1b\xbf\x0b\xbf\xd8\x0b Proxmox Backup Server encrypted compressed blob
2871
28720	string	\x2f\x7f\x41\xed\x91\xfd\x0f\xcd Proxmox Backup Server fixed index
2873
28740	string	\x1c\x91\x4e\xa5\x19\xba\xb3\xcd Proxmox Backup Server dynamic index
2875
28760	string	\xef\xac\x88\xe5\x74\x64\x95\xd5 Proxmox File Archive Format v1 / pxar
2877
28780	string	\x0d\xa4\x16\xdf\x75\x6c\x0f\x73\x18\x00\x00\x00\x00\x00\x00\x00\x02 Proxmox File Archive Format v2+ / mpxar
2879
28800	string	\xd2\x4e\x4a\x19\xc2\x68\x4c\x83\x10 Proxmox File Archive Format payload stream / ppxar
2881
2882