xref: /freebsd/contrib/file/magic/Magdir/archive (revision ae316d1d1cffd71ab7751f94e10118777a88e027)
1#------------------------------------------------------------------------------
2# $File: archive,v 1.207 2024/11/27 15:37:46 christos Exp $
3# archive:  file(1) magic for archive formats (see also "msdos" for self-
4#           extracting compressed archives)
5#
6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c.
8
9# POSIX tar archives
10# URL: https://en.wikipedia.org/wiki/Tar_(computing)
11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current
12# header mainly padded with nul bytes
13500	quad		0
14!:strength /2
15# filename or extended attribute printable strings in range space null til umlaut ue
16>0	ubeshort	>0x1F00
17>>0	ubeshort	<0xFCFD
18# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad
19# at https://sourceforge.net/projects/s-tar/files/testscripts/
20>>>508	ubelong&0x8B9E8DFF	0
21# nul, space or ascii digit 0-7 at start of mode
22>>>>100	ubyte&0xC8	=0
23>>>>>101 ubyte&0xC8	=0
24# nul, space at end of check sum
25>>>>>>155 ubyte&0xDF	=0
26# space or ascii digit 0 at start of check sum
27>>>>>>>148	ubyte&0xEF	=0x20
28# check for specific 1st member name that indicates other mime type and file name suffix
29>>>>>>>>0	string		TpmEmuTpms/permall
30# maybe also look for 2nd tar member efi/nvram containing UEFI variables part
31#>>>>>>>>>512	search/0x1800	efi/nvram\0		EFI_PART_FOUND
32>>>>>>>>>0	use	tar-nvram
33# FOR DEBUGGING:
34#>>>>>>>>0	regex		\^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp)	NAME "%s"
35# check for 1st image main name with digits used for sorting
36# and for name extension case insensitive like: PNG JPG JPEG TIF TIFF GIF BMP
37>>>>>>>>0	regex		\^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp)
38>>>>>>>>>0	use	tar-cbt
39# check for 1st member name with ovf suffix
40>>>>>>>>0	regex		\^.{1,96}[.](ovf)
41>>>>>>>>>0	use	tar-ova
42# look for relative directory ./var/ or ./lte/ as 1st member name that indicates AVM firmware with other file name suffix
43>>>>>>>>0	ubequad&0xFFffE5eaE8ffFFff	0x2e2f6460602f0000
44>>>>>>>>>0	use	tar-avm
45# maybe look for AVM specific 2nd name entry
46# >>>>>>>>>517	string		/content\0	content~
47# >>>>>>>>>>0	use	tar-avm
48# >>>>>>>>>517	string		/install\0	install~
49# >>>>>>>>>>0	use	tar-avm
50# >>>>>>>>>517	string		/chksum\0	chksum~
51# >>>>>>>>>>0	use	tar-avm
52# >>>>>>>>>517	string		/modfw.nfo\0	modfw~
53# >>>>>>>>>>0	use	tar-avm
54# most (419/429) *.WBM (71/71) *.WBT with user name jcameron of Webmin developer Jamie Cameron in first tar archive member
55>>>>>>>>265	string		jcameron
56>>>>>>>>>0	use	tar-webmin
57# if 1st member name without digits and without used image suffix, without *.ovf,
58# ./var/ , ./lte/ and TpmEmuTpms/ then it is a pure TAR archive or Webmin without jcameron user name
59>>>>>>>>0	default		x
60# few (10/429) *.WBM without user name jcameron in 1st tar member but with WBM module.info name like:
61# apcupsd-0.81-2.wbm csavupdate.wbm cwmail.wbm dac960.wbm etcupdate.wbm logviewer.wbm memcached.wbm rinetd.wbm shoutcast.wbm vacationadmin-webmin-module-1.1.2.wbm
62# few (10/95) *.WBT without user name jcameron in 1st tar member but with WBT theme.info name like:
63# authentic-theme-21.09.5.wbt Mozilla-Modern.wbt virtual-server-theme-2.7.wbt fkn-webmintheme.0.6.0.wbt
64>>>>>>>>>512	search/210965/s	e.info\0
65>>>>>>>>>>0	use	tar-webmin
66# pure TAR
67>>>>>>>>>0	default		x
68>>>>>>>>>>0	use	tar-file
69# Note:	called "TAR - Tape ARchive" by TrID, "Tape Archive Format" by DROID via PUID x-fmt/265
70#	and "Tar archive" by shared MIME-info database from freedesktop.org
71#	minimal check and then display tar archive information which can also be
72#	embedded inside others like Android Backup, Clam AntiVirus database
730	name		tar-file
74>257	string		!ustar
75# header padded with nuls
76>>257	ulong		=0
77# GNU tar version 1.29 with non pax format option without refusing
78# creates misleading V7 header for Long path, Multi-volume, Volume type
79>>>156	ubyte		0x4c		GNU tar archive
80!:mime	application/x-gtar
81!:ext	tar/gtar
82>>>156	ubyte		0x4d		GNU tar archive
83!:mime	application/x-gtar
84!:ext	tar/gtar
85>>>156	ubyte		0x56		GNU tar archive
86!:mime	application/x-gtar
87!:ext	tar/gtar
88>>>156	default		x		tar archive (V7)
89!:mime	application/x-tar
90!:ext	tar
91# other stuff in padding
92# some implementations add new fields to the blank area at the end of the header record
93# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option
94>>257	ulong		!0		tar archive (old)
95!:mime	application/x-tar
96!:ext	tar
97# magic in newer, GNU, posix variants
98>257	string		=ustar
99# 2 last char of magic and UStar version because string expression does not work
100# 2 space characters followed by a null for GNU variant
101>>261	ubelong		=0x72202000	POSIX tar archive (GNU)
102!:mime	application/x-gtar
103!:ext	tar/gtar
104# UStar version with ASCII "00"
105>>261	ubelong		0x72003030	POSIX
106# gLOBAL and ExTENSION type only found in POSIX.1-2001 format
107>>>156	ubyte		0x67		\b.1-2001
108>>>156	ubyte		0x78		\b.1-2001
109>>>156	ubyte		x		tar archive
110!:mime	application/x-ustar
111!:ext	tar/ustar
112# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab
113>>261	ubelong		0x72000000	tar archive (ustar)
114!:mime	application/x-ustar
115!:ext	tar/ustar
116# not seen ustar variant with garbish version
117>>261	default		x		tar archive (unknown ustar)
118!:mime	application/x-ustar
119!:ext	tar/ustar
120# show information for 1st tar archive member
121>0	use	tar-entry
122#	display information of tar archive member (file type, name, permissions, user, group)
1230	name		tar-entry
124# type flag of tar archive member
125#>156	ubyte		x		\b, %c-type
126>156	ubyte		x
127>>156	ubyte		0		\b, file
128>>156	ubyte		0x30		\b, file
129>>156	ubyte		0x31		\b, hard link
130>>156	ubyte		0x32		\b, symlink
131>>156	ubyte		0x33		\b, char device
132>>156	ubyte		0x34		\b, block device
133>>156	ubyte		0x35		\b, directory
134>>156	ubyte		0x36		\b, fifo
135>>156	ubyte		0x37		\b, reserved
136>>156	ubyte		0x4c		\b, long path
137>>156	ubyte		0x4d		\b, multi volume
138>>156	ubyte		0x56		\b, volume
139>>156	ubyte		0x67		\b, global
140>>156	ubyte		0x78		\b, extension
141>>156	default		x		\b, type
142>>>156	ubyte		x		'%c'
143# name[100]
144>0	string		>\0		%-.60s
145# mode mainly stored as an octal number in ASCII null or space terminated
146>100	string		>\0		\b, mode %-.7s
147# user id mainly as octal numbers in ASCII null or space terminated
148>108	string		>\0		\b, uid %-.7s
149# group id mainly as octal numbers in ASCII null or space terminated
150>116	string		>\0		\b, gid %-.7s
151# size mainly as octal number in ASCII
152>124	ubyte		<0x38
153>>124	string		>\0		\b, size %-.12s
154# coding indicated by setting the high-order bit of the leftmost byte
155>124	ubyte		>0xEF		\b, size 0x
156>>124	ubyte		!0xff		\b%2.2x
157>>125	ubyte		!0xff		\b%2.2x
158>>126	ubyte		!0xff		\b%2.2x
159>>127	ubyte		!0xff		\b%2.2x
160>>128	ubyte		!0xff		\b%2.2x
161>>129	ubyte		!0xff		\b%2.2x
162>>130	ubyte		!0xff		\b%2.2x
163>>131	ubyte		!0xff		\b%2.2x
164>>132	ubyte		!0xff		\b%2.2x
165>>133	ubyte		!0xff		\b%2.2x
166>>134	ubyte		!0xff		\b%2.2x
167>>135	ubyte		!0xff		\b%2.2x
168# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated
169>136	string		>\0		\b, seconds %-.11s
170# header checksum stored as an octal number in ASCII null or space terminated
171#>148	string		x		\b, cksum %.7s
172# linkname[100]
173>157	string		>\0		\b, linkname %-.40s
174# additional fields for ustar
175>257	string		=ustar
176# owner user name null terminated
177>>265	string		>\0		\b, user %-.32s
178# group name null terminated
179>>297	string		>\0		\b, group %-.32s
180# device major minor if not zero (binary or ASCII)
181>>329	ubequad&0xCFCFCFCFcFcFcFdf	!0
182>>>329	string		x		\b, devmaj %-.7s
183>>337	ubequad&0xCFCFCFCFcFcFcFdf	!0
184>>>337	string		x		\b, devmin %-.7s
185# prefix[155]
186>>345	string		>\0		\b, prefix %-.155s
187# old non ustar/POSIX tar
188>257	string		!ustar
189>>508	string		=tar\0
190# padding[255] in old star
191>>>257	string		>\0		\b, padding: %-.40s
192>>508	default		x
193# padding[255] in old tar sometimes comment field
194>>>257	string		>\0		\b, comment: %-.40s
195# Summary:	VirtualBox NvramFile with UEFI variables packed inside TAR archive
196# URL:		hhttps://www.virtualbox.org/manual/ch08.html#vboxmanage-modifynvram
197# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/n/nvram-virtualbox-tar.trid.xml
198# Note:		called "VirtualBox saved (U)EFI BIOS settings (TAR) by TrID and
199#		verified by 7-Zip `7z l -ttar Mint-21.1.nvram` and
200#		VirtualBox `VBoxManage modifynvram "Mint-21.1" listvars`
2010	name		tar-nvram
202#
203>0	string		x		VirtualBox NVRAM file
204#!:mime	application/x-gtar
205!:mime	application/x-virtualbox-nvram
206!:ext	nvram
207# first name[100] like: TpmEmuTpms/permall
208>0	use	tar-entry
209# 2nd tar member efi/nvram contains UEFI variables part described by ./virtual
210>512	search/0x1800/s	efi/nvram\0
211>>&0	use	tar-entry
212# 2nd tar member efi/nvram content could be described by ./virtual
213#>>&512	indirect	x
214# Summary:	Comic Book Archive *.CBT with TAR format
215# URL:		https://en.wikipedia.org/wiki/Comic_book_archive
216#		http://fileformats.archiveteam.org/wiki/Comic_Book_Archive
217# Note:		there exist also RAR, ZIP, ACE and 7Z packed variants
2180	name		tar-cbt
219>0	string		x		Comic Book archive, tar archive
220#!:mime	application/x-tar
221!:mime	application/vnd.comicbook
222#!:mime	application/vnd.comicbook+tar
223!:ext	cbt
224# name[100] probably like: 19.jpg 0001.png 0002.png
225# or maybe like ComicInfo.xml
226#>0	string		>\0		\b, 1st image %-.60s
227>0	use	tar-entry
228# Summary:	Open Virtualization Format *.OVF with disk images and more packed as TAR archive *.OVA
229# From:		Joerg Jenderek
230# URL:		https://en.wikipedia.org/wiki/Open_Virtualization_Format
231#		http://fileformats.archiveteam.org/wiki/OVF_(Open_Virtualization_Format)
232# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/o/ova.trid.xml
233# Note:		called "Open Virtualization Format package" by TrID
234#		assuming *.ovf comes first
2350	name		tar-ova
236>0	string		x		Open Virtualization Format Archive
237#!:mime	application/x-ustar
238# http://extension.nirsoft.net/ova
239!:mime	application/x-virtualbox-ova
240!:ext	ova
241# assuming name[100] like: DOS-0.9.ovf FreeDOS_1.ovf Win98SE_DE.ovf
242#>0	string		>\0		\b, with %-.60s
243>0	use	tar-entry
244# Summary:	AVM firmware (FRITZ!OS) for the FRITZ!Box (router)
245# From:		Joerg Jenderek
246# URL:		https://en.wikipedia.org/wiki/Fritz!Box
247#		https://www.redteam-pentesting.de/de/advisories/rt-sa-2014-010/-avm-fritz-box-firmware-signature-bypass
248# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/i/image-avm.trid.xml
249# Note:		verified by 7-Zip `7z l -ttar FRITZ.Box_4040-07.57.image`
2500	name		tar-avm
251>0	string		x		AVM FRITZ!Box firmware
252#!:mime	application/x-gtar
253!:mime	application/x-avm-image
254!:ext	image
255# tar member ./var/content starts with line like "Product=Fritz_Box_HW227 (FRITZ!Box 4040)"
256>>1024	search/512	Product=Fritz_Box_
257>>>&0	string		x		%s
258# version string like: 07.57 07.58
259>>>1044	search		Version=	\b, version
260>>>>&0	string		x		%s
261# product phrase too far behind (dozen MB) in many samples like: FRITZ.Box_4040-07.12.image FRITZ.Box_6820v3_LTE-07.57.image
262# so try to look for other characteristic foo
263# >>1024	default		x		OTHER_PATTERN!
264# >>>1023	search		AVM_PATTERN	PATTERNfound
265# first name[100] like: ./var/ ./lte/
266>0	use	tar-entry
267# if 1st entry is directory then show 2nd entry
268>156	ubyte		0x35
269# 2nd tar member name like: ./var/content (often ) ./var/install ./var/chksum ./lte/modfw.nfo
270>>512	use	tar-entry
271# Summary:	Webmin Module or Theme
272# From:		Joerg Jenderek
273# URL:		https://en.wikipedia.org/wiki/Webmin
274#		https://webmin.com/docs/development/creating-modules/
275#		https://webmin.com/docs/development/creating-themes/
276# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/w/wbm.trid.xml
277#		http://mark0.net/download/triddefs_xml.7z/defs/w/wbt.trid.xml
278#		http://mark0.net/download/triddefs_xml.7z/defs/w/wbt-gif.trid.xml
279# Note:		called "Webmin Module" "Webmin Theme" by TrID
280#		most verfied by 7-Zip `7z l -ttar *.wbm | grep "\module.info"` and
281#		`7z l -ttar *.wbt | grep "\theme.info"`
2820	name		tar-webmin
283>0	string		x		Webmin
284# Webmin module or theme
285>>512	search/1767941/s /module.info	Module
286!:mime	application/x-webmin-module
287!:ext	wbm
288# According to documentation module.info is mandatory but instead theme.info is found in
289# old-blue-theme.wbm old-blue-theme-1.0.wbm old-mscstyle3.wbm virtual-server-mobile.wbm
290# GRR:  maybe here wrong file name suffix WBM instead of WBT
291>>512	default		x
292>>>512	search/3149333/s /theme.info	Theme
293!:mime	application/x-webmin-theme
294!:ext	wbt
295# next 3 lines should not happen
296>>>512	default		x		Module or Theme
297!:mime	application/x-webmin
298!:ext	wbm/wbt
299# GNU or POSIX tar
300>257	string		=ustar		(
301# 2 space characters followed by a null for GNU variant for most (428/429) WBM samples
302>>261	ubelong		=0x72202000	\bGNU tar)
303#!:mime	application/x-gtar
304# UStar version variant with ASCII "00" as in few (1/429) samples like cwmail.wbm
305>>261	ubelong		0x72003030	\bPOSIX tar)
306#!:mime	application/x-ustar
307#>>>156	ubyte		x		tar archive
308# Apparently first archive member name[100] is directory like: dynbind/ ssh/ virtualmin-powerdns/ virtual-server-mobile/ vnc/
309>>0	use	tar-entry
310# look for characteristic WBM module info name starting with "module.info" for language variant like in: ssh2.wbm
311>>512	search/1767941/s /module.info
312# look for TAR magic of WBM archive module info
313>>>&0	search/257/s	ustar
314# show details for WBM archive member module info
315>>>>&-257 use	tar-entry
316# look for characteristic WBT theme info name with "theme.info" like in: authentic-theme-21.09.5.wbt
317>>512	search/3149333/s /theme.info\0
318# look for TAR magic of WBT archive theme info
319>>>&0	search/257/s	ustar
320>>>>&-257 use	tar-entry
321
322# Incremental snapshot gnu-tar format from:
323# https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
3240	string		GNU\ tar-	GNU tar incremental snapshot data
325>&0	regex		[0-9]\\.[0-9]+-[0-9]+	version %s
326
327# cpio archives
328#
329# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
330# The idea is to indicate archives produced on machines with the same
331# byte order as the machine running "file" with "cpio archive", and
332# to indicate archives produced on machines with the opposite byte order
333# from the machine running "file" with "byte-swapped cpio archive".
334#
335# The SVR4 "cpio(4)" hints that there are additional formats, but they
336# are defined as "short"s; I think all the new formats are
337# character-header formats and thus are strings, not numbers.
338# URL:		http://fileformats.archiveteam.org/wiki/Cpio
339#		https://en.wikipedia.org/wiki/Cpio
340# Reference:	https://people.freebsd.org/~kientzle/libarchive/man/cpio.5.txt
341# Update:	Joerg Jenderek
342#
343# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio-bin.trid.xml
344# Note:		called "CPIO archive (binary)" by TrID, "cpio/Binary LE" by 7-Zip and "CPIO" by DROID via PUID fmt/635
3450	short		070707
346# skip DROID fmt-635-signature-id-960.cpio by looking for pathname of 1st entry
347>26	string		>\0		cpio archive
348!:mime	application/x-cpio
349# https://download.opensuse.org/distribution/leap/15.4/iso/openSUSE-Leap-15.4-NET-x86_64-Media.iso
350# boot/x86_64/loader/bootlogo
351# message.cpi
352!:ext	/cpio/cpi
353>>0	use	cpio-bin
354# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio-bin-sw.trid.xml
355# Note:		called "CPIO archive (byte swapped binary)" by TrID and "Cpio/Binary BE" by 7-Zip
3560	short		0143561		byte-swapped cpio archive
357!:mime	application/x-cpio # encoding: swapped
358# https://telparia.com/fileFormatSamples/archive/cpio/skeleton2.cpio
359!:ext	cpio
360>0	use	cpio-bin-be
361# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio.trid.xml
362# Note:		called "CPIO archive (portable)" by TrID, "cpio/Portable ASCII" by 7-Zip and "cpio/odc" by GNU cpio
3630	string		070707		ASCII cpio archive (pre-SVR4 or odc)
364!:mime	application/x-cpio
365# https://telparia.com/fileFormatSamples/archive/cpio/ pthreads-1.60B5.osr5src.cpio cinema.cpi VOL.000.008 VOL.000.012
366!:ext	cpio/cpi/008/012
367# Note:		called "CPIO archive (portable)" by TrID, "cpio/New ASCII" by 7-Zip and "cpio/newc" by GNU cpio
3680	string		070701		ASCII cpio archive (SVR4 with no CRC)
369!:mime	application/x-cpio
370# https://telparia.com/fileFormatSamples/archive/cpio/MainActor-2.06.3.cpio
371!:ext	cpio
372# Note:		called "CPIO archive (portable)" by TrID, "cpio/New CRC" by 7-Zip and "cpio/crc" by GNU cpio
3730	string		070702		ASCII cpio archive (SVR4 with CRC)
374!:mime	application/x-cpio
375# http://ftp.gnu.org/gnu/tar/tar-1.27.cpio.gz
376# https://telparia.com/fileFormatSamples/archive/cpio/pcmcia
377!:ext	/cpio
378#	display information of old binary cpio archive
379# Note:	verfied by 7-Zip `7z l -tcpio -slt *.cpio` and
380#	`cpio -ivt --numeric-uid-gid --file=clam.bin-le.cpio`
3810	name	cpio-bin
382# c_dev; device number; WHAT IS THAT?
383>2	uleshort	x		\b; device %u
384# c_ino; truncated inode number; use `ls --inode`
385>4	uleshort	x		\b, inode %u
386# c_mode; mode specifies permissions and file type like: ?622~?rw-r--r-- by `ls -l`
387>6	uleshort	x		\b, mode %o
388# c_uid; numeric user id; use `ls --numeric-uid-gid`
389>8	uleshort	x		\b, uid %u
390# c_gid; numeric group id
391>10	uleshort	x		\b, gid %u
392# c_nlink; links to this file; directories at least 2
393>12	uleshort	>1		\b, %u links
394# c_rdev; device number for block and character entries; zero for all other entries by writers
395# like 0x0440 for /dev/ttyS0
396>14	uleshort	>0		\b, device %#4.4x
397# c_mtime[2]; modification time in seconds since 1 January 1970; most-significant 16 bits first
398>16	medate		x		\b, modified %s
399# c_filesize[2]; size of pathname; most-significant 16 bits first like: 544
400>22	melong		x		\b, %u bytes
401# c_namesize; bytes in the pathname that follows the header like: 9
402#>20	uleshort	x		\b, namesize %u
403# pathname of entry like: "clam.exe"
404>26	string		x		"%s"
405#	display information of old binary byte swapped cpio archive
406# Note:	verfied by 7-Zip `7z l -tcpio -slt *.cpio` and
407#	`LANGUAGE=C cpio -ivt --numeric-uid-gid --file=clam.bin-be.cpio`
4080	name	cpio-bin-be
409>2	ubeshort	x		\b; device %u
410>4	ubeshort	x		\b, inode %u
411>6	ubeshort	x		\b, mode %o
412>8	ubeshort	x		\b, uid %u
413>10	ubeshort	x		\b, gid %u
414>12	ubeshort	>1		\b, %u links
415>14	ubeshort	>0		\b, device %#4.4x
416>16	bedate		x		\b, modified %s
417>22	ubelong	 	x		\b, %u bytes
418#>20	ubeshort	x		\b, namesize %u
419>26	string		x		"%s"
420
421#
422# Various archive formats used by various versions of the "ar"
423# command.
424#
425
426#
427# Original UNIX archive formats.
428# They were written with binary values in host byte order, and
429# the magic number was a host "int", which might have been 16 bits
430# or 32 bits.  We don't say "PDP-11" or "VAX", as there might have
431# been ports to little-endian 16-bit-int or 32-bit-int platforms
432# (x86?) using some of those formats; if none existed, feel free
433# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
434# 32-bit.  There might have been big-endian ports of that sort as
435# well.
436#
4370	leshort		0177555		very old 16-bit-int little-endian archive
4380	beshort		0177555		very old 16-bit-int big-endian archive
4390	lelong		0177555		very old 32-bit-int little-endian archive
4400	belong		0177555		very old 32-bit-int big-endian archive
441
4420	leshort		0177545		old 16-bit-int little-endian archive
443>2	string		__.SYMDEF	random library
4440	beshort		0177545		old 16-bit-int big-endian archive
445>2	string		__.SYMDEF	random library
4460	lelong		0177545		old 32-bit-int little-endian archive
447>4	string		__.SYMDEF	random library
4480	belong		0177545		old 32-bit-int big-endian archive
449>4	string		__.SYMDEF	random library
450
451#
452# From "pdp" (but why a 4-byte quantity?)
453#
4540	lelong		0x39bed		PDP-11 old archive
4550	lelong		0x39bee		PDP-11 4.0 archive
456
457#
458# XXX - what flavor of APL used this, and was it a variant of
459# some ar archive format?  It's similar to, but not the same
460# as, the APL workspace magic numbers in pdp.
461#
4620	long		0100554		apl workspace
463
464#
465# System V Release 1 portable(?) archive format.
466#
4670	string		=<ar>		System V Release 1 ar archive
468!:mime	application/x-archive
469
470#
471# Debian package; it's in the portable archive format, and needs to go
472# before the entry for regular portable archives, as it's recognized as
473# a portable archive whose first member has a name beginning with
474# "debian".
475#
476# Update: Joerg Jenderek
477# URL: https://en.wikipedia.org/wiki/Deb_(file_format)
4780	string		=!<arch>\ndebian
479# https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html
480>14	string		-split	part of multipart Debian package
481!:mime	application/vnd.debian.binary-package
482# udeb is used for stripped down deb file
483!:ext	deb/udeb
484>14	string		-binary	Debian binary package
485!:mime	application/vnd.debian.binary-package
486# For ipk packager see also https://en.wikipedia.org/wiki/Opkg
487!:ext	deb/udeb/ipk
488# This should not happen
489>14	default		x	Unknown Debian package
490# NL terminated version; for most Debian cases this is 2.0 or 2.1 for split
491>68	string		>\0		(format %s)
492#>68	string		!2.0\n
493#>>68	string		x		(format %.3s)
494>68	string		=2.0\n
495# 2nd archive name=control archive name like control.tar.gz or control.tar.xz
496# or control.tar.zst
497>>72	string		>\0		\b, with %.15s
498# look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma}
499>>0	search/0x93e4f	data.tar.	\b, data compression
500# the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised
501# for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb
502>>>&0	string		x		%.2s
503# skip space (0x20 BSD) and slash (0x2f System V) character marking end of name
504>>>&2	ubyte		!0x20
505>>>>&-1	ubyte		!0x2f
506# display 3rd character of file name extension like 2 of bz2 or m of lzma
507>>>>>&-1	ubyte	x		\b%c
508>>>>>>&0	ubyte	!0x20
509>>>>>>>&-1	ubyte	!0x2f
510# display 4th character of file name extension like a of lzma
511>>>>>>>>&-1	ubyte	x		\b%c
512# split debian package case
513>68	string		=2.1\n
514# dpkg-1.18.25/dpkg-split/info.c
515# NL terminated ASCII package name like ckermit
516>>&0	string		x		\b, %s
517# NL terminated package version like 302-5.3
518>>>&1	string		x		%s
519# NL terminated MD5 checksum
520>>>>&1	string		x		\b, MD5 %s
521# NL terminated original package length
522>>>>>&1	string		x		\b, unsplitted size %s
523# NL terminated part length
524>>>>>>&1	string	x		\b, part length %s
525# NL terminated package part like n/m
526>>>>>>>&1	string	x		\b, part %s
527# NL terminated package architecture like armhf since dpkg 1.16.1 or later
528>>>>>>>>&1	string	x		\b, %s
529
530#
531# MIPS archive; they're in the portable archive format, and need to go
532# before the entry for regular portable archives, as it's recognized as
533# a portable archive whose first member has a name beginning with
534# "__________E".
535#
5360	string	=!<arch>\n__________E	MIPS archive
537!:mime	application/x-archive
538>20	string	U			with MIPS Ucode members
539>21	string	L			with MIPSEL members
540>21	string	B			with MIPSEB members
541>19	string	L			and an EL hash table
542>19	string	B			and an EB hash table
543>22	string	X			-- out of date
544
545#
546# BSD/SVR2-and-later portable archive formats.
547#
548# Update: Joerg Jenderek
549# URL:		http://fileformats.archiveteam.org/wiki/AR
550# Reference:	https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/
551# Note:		Mach-O universal binary in ./cafebabe is dependent
552# TODO:		unify current ar archive, MIPS archive, Debian package
553#		distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR;
554#		*.ar packages from *.a libraries. handle empty archive
5550	string		=!<arch>\n		current ar archive
556# print first and possibly second ar_name[16] for debugging purpose
557#>8			string	x	\b, 1st "%.16s"
558#>68			string	x	\b, 2nd "%.16s"
559!:mime	application/x-archive
560# a in most case for libraries; lib for Microsoft libraries; ar else cases
561!:ext	a/lib/ar
562>8	string		__.SYMDEF	random library
563# first member with long marked name __.SYMDEF SORTED implies BSD library
564>68	string		__.SYMDEF\ SORTED	random library
565# Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf
566# "archive file" entry moved from ./hp
567# LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture
568# LST header a_magic 0619h~relocatable library
569>68	belong 		0x020b0619	- PA-RISC1.0 relocatable library
570>68	belong	 	0x02100619	- PA-RISC1.1 relocatable library
571>68	belong 		0x02110619	- PA-RISC1.2 relocatable library
572>68	belong 		0x02140619	- PA-RISC2.0 relocatable library
573#EOF for common ar archives
574
575#
576# "Thin" archive, as can be produced by GNU ar.
577#
5780	string		=!<thin>\n	thin archive with
579>68	belong		0		no symbol entries
580>68	belong		1		%d symbol entry
581>68	belong		>1		%d symbol entries
582
5830	search/1	-h-		Software Tools format archive text
584
585# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
586#
587# The first byte is the magic (0x1a), byte 2 is the compression type for
588# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
589# filename of the first file (null terminated).  Since some types collide
590# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
591# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
5920	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
593!:mime	application/x-arc
5940	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
595!:mime	application/x-arc
5960	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
597!:mime	application/x-arc
5980	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
599!:mime	application/x-arc
6000	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
601!:mime	application/x-arc
6020	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
603!:mime	application/x-arc
604# [JW] stuff taken from idarc, obviously ARC successors:
6050	lelong&0x8080ffff	0x00000a1a	PAK archive data
606!:mime	application/x-arc
6070	lelong&0x8080ffff	0x0000141a	ARC+ archive data
608!:mime	application/x-arc
6090	lelong&0x8080ffff	0x0000481a	HYP archive data
610!:mime	application/x-arc
611
612# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
613# I can't create either SPARK or ArcFS archives so I have not tested this stuff
614# [GRR:  the original entries collide with ARC, above; replaced with combined
615#  version (not tested)]
616#0	byte		0x1a		RISC OS archive (spark format)
6170	string		\032archive	RISC OS archive (ArcFS format)
6180       string          Archive\000     RISC OS archive (ArcFS format)
619
620# All these were taken from idarc, many could not be verified. Unfortunately,
621# there were many low-quality sigs, i.e. easy to trigger false positives.
622# Please notify me of any real-world fishy/ambiguous signatures and I'll try
623# to get my hands on the actual archiver and see if I find something better. [JW]
624# probably many can be enhanced by finding some 0-byte or control char near the start
625
626# idarc calls this Crush/Uncompressed... *shrug*
6270	string	CRUSH Crush archive data
628# Squeeze It (.sqz)
6290	string	HLSQZ Squeeze It archive data
630# SQWEZ
6310	string	SQWEZ SQWEZ archive data
632# HPack (.hpk)
6330	string	HPAK HPack archive data
634# HAP
6350	string	\x91\x33HF HAP archive data
636# MD/MDCD
6370	string	MDmd MDCD archive data
638# LIM
6390	string	LIM\x1a LIM archive data
640# SAR
6413	string	LH5 SAR archive data
642# BSArc/BS2
6430	string	\212\3SB\020\0	BSArc/BS2 archive data
644# Bethesda Softworks Archive (Oblivion)
6450	string	BSA\0 		BSArc archive data
646>4	lelong	x		version %d
647# MAR
6482	string	=-ah MAR archive data
649# ACB
650#0	belong&0x00f800ff	0x00800000 ACB archive data
651# CPZ
652# TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
653# JRC
6540	string	JRchive JRC archive data
655# Quantum
6560	string	DS\0 Quantum archive data
657# ReSOF
6580	string	PK\3\6 ReSOF archive data
659# QuArk
6600	string	7\4 QuArk archive data
661# YAC
66214	string	YC YAC archive data
663# X1
6640	string	X1 X1 archive data
6650	string	XhDr X1 archive data
666# CDC Codec (.dqt)
6670	belong&0xffffe000	0x76ff2000 CDC Codec archive data
668# AMGC
6690	string	\xad6" AMGC archive data
670# NuLIB
6710	string	N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
672# PakLeo
6730	string	LEOLZW PAKLeo archive data
674# ChArc
6750	string	SChF ChArc archive data
676# PSA
6770	string	PSA PSA archive data
678# CrossePAC
6790	string	DSIGDCC CrossePAC archive data
680# Freeze
6810	string	\x1f\x9f\x4a\x10\x0a Freeze archive data
682# KBoom
6830	string	\xc2\xa8MP\xc2\xa8 KBoom archive data
684# NSQ, must go after CDC Codec
6850	string	\x76\xff NSQ archive data
686# DPA
6870	string	Dirk\ Paehl DPA archive data
688# BA
689# TODO: idarc says "bytes 0-2 == bytes 3-5"
690# TTComp
691# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive
692# Update: Joerg Jenderek
693# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others
6940	string	\0\6
695# look for first keyword of Panorama database *.pan
696>12	search/261	DESIGN
697# skip keyword with low entropy
698>12	default		x
699# skip DOS 2.0 backup id file, sequence 6 with many nils like BACKUPID_xx6.@@@ handled by ./msdos
700>>8	quad		!0
701>>>0	use	ttcomp
702# variant ASCII, 4K dictionary (strength=48=50-2). With strength=49 wrong order! WHY?
7030	string	\1\6
704# TODO:
705# skip VAX-order 68k Blit mpx/mux executable (strength=50) handled by ./blit
706!:strength	-2
707>0	use	ttcomp
7080	string	\0\5
709# skip some DOS 2.0 backup id file, sequence 5 with many nils like BACKUPID_075.@@@ handled by ./msdos
710>8	quad	!0
711>>0	use	ttcomp
7120	string	\1\5
713# TODO:
714# variant ASCII, 2K dictionary (strength=48=50-2). With strength=49 wrong order! WHY?
715# skip ctab data (strength=50) handled by ./ibm6000
716# skip locale data table (strength=50) handled by ./digital
717!:strength	-2
718>0	use	ttcomp
7190	string	\0\4
720# skip many Maple help database *.hdb with version tag handled by ./maple
721>1028	string	!version
722# skip veclib maple.hdb by looking for Mable keyword
723>>4	search/1091	Maple\040
724#>4	search/34090	Maple\040
725>>4	default		x
726# skip DOS 2.0-3.2 backed up sequence 4 with many nils like LOTUS5.RAR handled by ./msdos
727# skip xBASE Compound Index file *.CDX with many nils
728>>>0x54	quad		!0
729>>>>0	use	ttcomp
7300	string	\1\4
731# TODO:
732# skip shared library (strength=50) handled by ./ibm6000
733!:strength	-2
734# skip Commodore PET BASIC programs (Mastermind.prg) with last 3 nil bytes (\0~end of line followed by 0000h line offset)
735#>-4	ubelong		x	LAST_BYTES=%8.8x
736>-4	ubelong&0x00FFffFF	!0
737>>0	use	ttcomp
738#	display information of TTComp archive
7390	name	ttcomp
740# (version 5.25) labeled the entry as "TTComp archive data"
741>0	ubyte	x	TTComp archive data
742!:mime	application/x-compress-ttcomp
743# PBACKSCR.PI1
744!:ext	$xe/$ts/pi1/__d
745# compression type: 0~binary compression 1~ASCII compression
746>0	ubyte	0	\b, binary
747>0	ubyte	1	\b, ASCII
748# size of the dictionary:  4~1024 bytes 5~2048 bytes 6~4096 bytes
749>1	ubyte	4	\b, 1K
750>1	ubyte	5	\b, 2K
751>1	ubyte	6	\b, 4K
752>1	ubyte	x	dictionary
753#	https://mark0.net/forum/index.php?topic=848
754# last 3 bytes probably have only 8 possible bit sequences
755# xxxxxxxx 0000000x 11111111	____FFh
756# xxxxxxxx 10000000 01111111	__807Fh
757# 0xxxxxxx 11000000 00111111	__C03Fh
758# 00xxxxxx 11100000 00011111	__E01Fh
759# 000xxxxx 11110000 00001111	__F00Fh
760# 0000xxxx 11111000 00000111	__F807h
761# 00000xxx 11111100 00000011	__FC03h
762# 000000xx 11111110 00000001	__FE01h
763# but for quickgif.__d 0A7DD4h
764#>-3	ubyte		x	\b, last 3 bytes 0x%2.2x
765#>-2	ubeshort	x	\b%4.4x
766# From:		Joerg Jenderek
767# URL:		https://en.wikipedia.org/wiki/Disk_Copy
768# reference:	http://nulib.com/library/FTN.e00005.htm
7690x52	ubeshort	0x0100
770# test for disk image size equal or above 400k
771>0x40	ubelong		>409599
772# test also for disk image size equal or below 1440k to skip
773# windows7en.mbr UNICODE.DAT
774#>>0x40	ubelong		<1474561
775# test now for "low" disk image size equal or below 64 MiB to skip
776# windows7en.mbr (B441BBAAh) UNICODE.DAT (0400AF05h)
777>>0x40	ubelong		<0x04000001
778# To skip Flags$StringJoiner.class with size 00106A61h test also for valid disk image sizes
779# 00064000 for  400k GCR disks	dc42-400k-gcr.trid.xml
780# 000c8000 for  800k GCR disks	dc42-800k-gcr.trid.xml
781# 000b4000 for  720k MFM disks	dc42-720k-mfm.trid.xml
782# 00168000 for 1440k MFM disks	dc42-1440k-mfm.trid.xml
783#	https://lisaem.sunder.net/LisaProjectDocs.txt
784# 00500000	05M	available
785# 00A00000	10M	available
786# 01800000	24M	possible
787# 02000000	32M	uncertain
788# 04000000	64M	uncertain
789>>>0x40	ubelong&0xf8003fFF	0
790# skip samples with invalid disk name length like:
791# 181 (biosmd80.rom) 202 (Flags$StringJoiner.class) 90 (UNICODE.DAT)
792>>>>0x0	ubyte			<64
793>>>>>0	use			dc42-floppy
794#	display information of Apple DiskCopy 4.2 floppy image
7950	name		dc42-floppy
796# disk name length; maximal 63
797#>0	ubyte	    	x	DISK NAME LENGTH %u
798# ASCII image pascal (maximal 63 bytes) name padded with NULs like:
799# "Microsoft Mail" "Disquette 2" "IIe Installer Disk"
800# "-lisaem.sunder.net hd-" (dc42-lisaem.trid.xml) "-not a Macintosh disk" (dc42-nonmac.trid.xml)
801>00	pstring/B	x	Apple DiskCopy 4.2 image %s
802#!:mime	application/octet-stream
803!:mime	application/x-dc42-floppy-image
804!:apple	dCpydImg
805# probably also img like: "Utilitaires 2.img" "Installation 7.img"
806!:ext	image/dc42/img
807# data size in bytes like: 409600 737280 819200 1474560
808>0x40	ubelong		x	\b, %u bytes
809# for debugging purpose size in hexadecimal
810#>0x40	ubelong		x	(%#8.8x)
811# tag size in bytes like: 0 (often) 2580h (PUID fmt/625) 4B00h (Microsoft Mail.image)
812>0x44	ubelong		>0	\b, %#x tag size
813# data checksum
814#>0x48	ubelong		x	\b, %#x checksum
815# tag checksum
816#>0x4c	ubelong		x	\b, %#x tag checksum
817# disk encoding like: 0 1 2 3 (PUID: fmt/625)
818>0x50	ubyte		0	\b, GCR CLV ssdd (400k)
819>0x50	ubyte		1	\b, GCR CLV dsdd (800k)
820>0x50	ubyte		2	\b, MFM CAV dsdd (720k)
821>0x50	ubyte		3	\b, MFM CAV dshd (1440k)
822>0x50	ubyte		>3	\b, %#x encoding
823# format byte like: 12h (Lisa 400K) 24h (400K Macintosh) 96h (800K Apple II disk)
824# 2 (Mac 400k "Disquette Installation 13.image")
825# 22h (double-sided MFM or Mac 800k "Disco 12.image" "IIe Installer Disk.image")
826>0x51	ubyte		x	\b, %#x format
827#>0x54	ubequad		x	\b, data %#16.16llx
828# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
8290	string	ESP ESP archive data
830# ZPack
8310	string	\1ZPK\1 ZPack archive data
832# Sky
8330	string	\xbc\x40 Sky archive data
834# UFA
8350	string	UFA UFA archive data
836# Dry
8370	string	=-H2O DRY archive data
838# FoxSQZ
8390	string	FOXSQZ FoxSQZ archive data
840# AR7
8410	string	,AR7 AR7 archive data
842# PPMZ
8430	string	PPMZ PPMZ archive data
844# MS Compress
845# Update: Joerg Jenderek
846# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression
847# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html
848# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z
8494	string	\x88\xf0\x27
850#		KWAJ variant
851>0	string	KWAJ		MS Compress archive data, KWAJ variant
852!:mime	application/x-ms-compress-kwaj
853# extension not working in version 5.32
854# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?'
855# file: line 284: Bad magic entry '   ??_'
856!:ext	??_
857# compression method (0-4)
858>>8	uleshort	x	\b, %u method
859# offset of compressed data
860>>10	uleshort	x	\b, %#x offset
861#>>(10.s)	uleshort	x
862#>>>&-6		string	x	\b, TEST extension %-.3s
863# header flags to mark header extensions
864>>12	uleshort	>0	\b, %#x flags
865# 4 bytes: decompressed length of file
866>>12	uleshort	&0x01
867>>>14	ulelong		x	\b, original size: %u bytes
868# 2 bytes: unknown purpose
869# 2 bytes: length of unknown data + mentioned bytes
870# 1-9 bytes: null-terminated file name
871# 1-4 bytes: null-terminated file extension
872>>12	uleshort	&0x08
873>>>12	uleshort				^0x01
874>>>>12		uleshort			^0x02
875>>>>>12			uleshort		^0x04
876>>>>>>12			uleshort	^0x10
877>>>>>>>14				string	x	\b, %-.8s
878>>>>>>12			uleshort	&0x10
879>>>>>>>14				string	x	\b, %-.8s
880>>>>>>>>&1				string	x	\b.%-.3s
881>>>>>12			uleshort		&0x04
882>>>>>>12			uleshort	^0x10
883>>>>>>>(14.s)			uleshort	x
884>>>>>>>>&14				string	x	\b, %-.8s
885>>>>>>12			uleshort	&0x10
886>>>>>>>(14.s)			uleshort	x
887>>>>>>>>&14				string	x	\b, %-.8s
888>>>>>>>>>&1				string	x	\b.%-.3s
889>>>>12		uleshort			&0x02
890>>>>>12			uleshort		^0x04
891>>>>>>12			uleshort	^0x10
892>>>>>>>16				string	x	\b, %-.8s
893>>>>>>12			uleshort	&0x10
894>>>>>>>16				string	x	\b, %-.8s
895>>>>>>>>&1				string	x	\b.%-.3s
896>>>>>12			uleshort		&0x04
897>>>>>>12			uleshort	^0x10
898>>>>>>>(16.s)			uleshort	x
899>>>>>>>>&16				string	x	\b, %-.8s
900>>>>>>12			uleshort	&0x10
901#>>>>>>>(16.s)			uleshort	x
902>>>>>>>&16				string	x	%-.8s
903>>>>>>>>&1				string	x	\b.%-.3s
904>>>12	uleshort				&0x01
905>>>>12		uleshort			^0x02
906>>>>>12			uleshort		^0x04
907>>>>>>12			uleshort	^0x10
908>>>>>>>18				string	x	\b, %-.8s
909>>>>>>12			uleshort	&0x10
910>>>>>>>18				string	x	\b, %-.8s
911>>>>>>>>&1				string	x	\b.%-.3s
912>>>>>12			uleshort		&0x04
913>>>>>>12			uleshort	^0x10
914>>>>>>>(18.s)			uleshort	x
915>>>>>>>>&18				string	x	\b, %-.8s
916>>>>>>12			uleshort	&0x10
917>>>>>>>(18.s)			uleshort	x
918>>>>>>>>&18				string	x	\b, %-.8s
919>>>>>>>>>&1				string	x	\b.%-.3s
920>>>>12		uleshort			&0x02
921>>>>>12			uleshort		^0x04
922>>>>>>12			uleshort	^0x10
923>>>>>>>20				string	x	\b, %-.8s
924>>>>>>12			uleshort	&0x10
925>>>>>>>20				string	x	\b, %-.8s
926>>>>>>>>&1				string	x	\b.%-.3s
927>>>>>12			uleshort		&0x04
928>>>>>>12			uleshort	^0x10
929>>>>>>>(20.s)			uleshort	x
930>>>>>>>>&20				string	x	\b, %-.8s
931>>>>>>12			uleshort	&0x10
932>>>>>>>(20.s)			uleshort	x
933>>>>>>>>&20				string	x	\b, %-.8s
934>>>>>>>>>&1				string	x	\b.%-.3s
935# 2 bytes: length of data + mentioned bytes
936#
937#		SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ
938# URL:		http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression
939# Reference:	http://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html
940#		http://mark0.net/download/triddefs_xml.7z/defs/s/szdd.trid.xml
941# Note:		called "Microsoft SZDD compressed (Haruhiko Okumura's LZSS)" by TrID
942#		verfied by 7-Zip `7z l -tMsLZ -slt *.??_` as MsLZ
943#		`deark -l -m lzss_oku -d2 setup-1-41.bin` as "LZSS.C by Haruhiko Okumura"
944>0	string	SZDD		MS Compress archive data, SZDD variant
945# 2nd part of signature
946#>>4	ubelong	0x88F02733	\b, SIGNATURE OK
947!:mime	application/x-ms-compress-szdd
948!:ext	??_
949# The character missing from the end of the filename (0=unknown)
950>>9	string	>\0		\b, %-.1s is last character of original name
951# https://www.betaarchive.com/forum/viewtopic.php?t=26161
952# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e
953>>8	string	!A		\b, %-.1s method
954>>10	ulelong	>0		\b, original size: %u bytes
955# Summary:	InstallShield archive with SZDD compressed
956# URL:		https://community.flexera.com/t5/InstallShield-Knowledge-Base/InstallShield-Redistributable-Files/ta-p/5647
957# From:		Joerg Jenderek
9581	search/48/bs	SZDD\x88\xF0\x27\x33	InstallShield archive
959#!:mime	application/octet-stream
960!:mime	application/x-installshield-compress-szdd
961!:ext	ibt
962# name of compressed archive member like: setup.dl_ _setup7int.dl_ _setup2k.dl_ _igdi.dl_ cabinet.dl_
963>0	string	x		%s
964# name of uncompressed archive member like: setup.dll _Setup.dll IGdi.dll CABINET.DLL
965>>&1	string	x		(%s)
966# probably version like: 9.0.0.333 9.1.0.429 11.50.0.42618
967>>>&1	string	x		\b, version %s
968# SZDD member length like: 168048 169333 181842
969>>>>&1	string	x		\b, %s bytes
970# MS Compress archive data
971#>&0	string		SZDD	\b, SIGNATURE FOUND
972>&0	indirect	x
973#		QBasic SZDD variant
9743	string	\x88\xf0\x27
975>0	string	SZ\x20		MS Compress archive data, QBasic variant
976!:mime	application/x-ms-compress-sz
977!:ext	??$
978>>8	ulelong	>0		\b, original size: %u bytes
979
980# Summary:	lzss compressed/EDI Pack
981# From:		Joerg Jenderek
982# URL:		http://fileformats.archiveteam.org/wiki/EDI_Install_packed_file
983# Note:		called "EDI Install LZS compressed data" by TrID and verified by
984#		command like `deark -l -m edi_pack -d2 BOOK01A.IC$` as "EDI Pack LZSS1"
9850	string					EDILZSS
986>7	string					1
987# look for point character before orginal file name extension
988>>8	search/9/b				.
989# check suffix of possible orginal file anme
990#>>>&0		ubelong				x	SUFFIX=%8.8x
991# samples without valid character after point in original file name field like: FENNEL.LZS PLANTAIN.LZS
992>>>&0		ubyte				<0x20
993>>>>0			use				edi-lzs
994# samples with valid character after point in original file name field
995>>>&0		ubyte				>0x1F
996# check 2nd charcter of suffix
997#>>>>&0			ubyte	x			2ND_SUFFIX=%x
998# sample with one valid character after point followed by \0 in original file name field like: SPELMATE.H$
999>>>>&0			ubyte			=0
1000>>>>>0				use			edi-pack
1001>>>>&0			ubyte			>0x1F
1002# check 3rd charcter of suffix
1003#>>>>>&0				ubyte		x	3RD_SUFFIX=%x
1004# no sample with 2 valid characters after point followed by \0 in original file name field
1005>>>>>&0				ubyte		=0
1006>>>>>>0					use		edi-pack
1007# samples with valid 3rd character after point in original file name field
1008>>>>>&0				ubyte		>0x1F
1009# sample with 3 valid character after point followed by \0 in original file name field like: BOOK01A.IC$ CTL3D.DL$
1010>>>>>>&0				ubyte	=0
1011>>>>>>>0					use	edi-pack
1012# sample with 3 valid character after point followed by no \0 in original file name field like: HERBTEXT.LZS
1013>>>>>>&0				ubyte	!0
1014>>>>>>>0					use	edi-lzs
1015# no sample with invalid 3rd character after point in original file name field
1016>>>>>&0				default		x
1017>>>>>>0					use		edi-lzs
1018# sample with invalid 2nd character after point in original file name field like: LACERATE.LZS SPLINTER.LZS
1019>>>>&0			default			x
1020>>>>>0	use						edi-lzs
1021# sample without point character in original file name field like GUNSHOT.LZS
1022>>8	default					x
1023>>>0		use					edi-lzs
1024# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/e/edi-lzss2.trid.xml
1025# Note:		called "EDI Install Pro LZSS2 compressed data" by TrID and verified by
1026#		command like `deark -l -m edi_pack -d2 4WAY.WA$` as "EDI Pack LZSS2"
1027>7	string			2			EDI LZSS2 packed
1028#!:mime	application/octet-stream
1029!:mime	application/x-edi-pack-lzss
1030# the name of a compressed file often ends in character '$' or '_'
1031!:ext	??$/??_
1032# original filename, NUL-terminated, padded to 13 bytes like: mci.vbx 4way.wav skymap.exe cmdialog.vbx
1033>>8		string		x			"%-0.13s"
1034# original file size, as a 4-byte integer.
1035>>21		ulelong		x			\b, %u bytes
1036# compressed data like: ff5249464606ec00 ff4d5aa601010000
1037>>>25		ubequad		x			\b, data %#16.16llx...
10380	name		edi-pack
1039# Note:		verified by command like `deark -l -d2 SPELMATE.H$` as "EDI Pack LZSS1"
1040# original filename, NUL-terminated, padded to 13 bytes like: ctl3d.dll spelmate.h filemenu.rc owl.def index-it.exe
1041# but not like \377Aloe.lzs\273 (HERBTEXT.LZS)
1042>8	string		x				EDI LZSS packed "%-.13s"
1043#!:mime	application/octet-stream
1044!:mime	application/x-edi-pack-lzss
1045# the name of a compressed file often ends in character '$' or '_'
1046!:ext	??$/?$
1047# compressed data like: f7000001eff02020 ff4d5aa900020000 ff2f2a207370656c
1048>21	ubequad		x				\b, data %#16.16llx...
1049# URL:		http://fileformats.archiveteam.org/wiki/EDI_LZSSLib
1050# Note:		verified partly by command like `deark -l -m edi_pack -d2 GUNSHOT.LZS` as "EDI LZSSLib"
10510	name		edi-lzs
1052# Note:		verified by command like `deark -l -d2 GUNSHOT.LZS` as "EDI LZSSLib"
1053# no original filename looks like: \277BM\226.\0 \277BM.n\001 \277BM\226.\0 \277BM.g\001 \377Aloe.lzs\273
1054>8	string		x				EDI LZSSLib packed
1055#!:mime	application/octet-stream
1056!:mime	application/x-edi-pack-lzss
1057# The name of a compressed file ends with LZS suffix
1058!:ext	lzs
1059# compressed data like: bf424df6e10100f3 ff416c6f652e6c7a ff416c6f652e6c7a
1060>8	ubequad		x				\b, data %#16.16llx...
1061
1062# Summary:	CAZIP compressed file
1063# From:		Joerg Jenderek
1064# URL:		http://fileformats.archiveteam.org/wiki/CAZIP
1065# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/c/caz.trid.xml
1066# Note:		Format is distinct from CAZIPXP compressed
10670	string	\x0D\x0A\x1ACAZIP	CAZIP compressed file
1068#!:mime	application/octet-stream
1069!:mime	application/x-compress-cazip
1070# like: BLINKER.WR_ CLIPDEFS._ CAOSETUP.EX_ CLIPPER.EX_ FILEIO.C_
1071!:ext	??_/?_/_
1072
1073# Summary:	FTCOMP compressed archive
1074# From:		Joerg Jenderek
1075# URL:		http://fileformats.archiveteam.org/wiki/FTCOMP
1076# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ftcomp.trid.xml
1077# Note:		called by TrID "FTCOMP compressed archive"
1078#		extracted by `unpack seahelp.hl_`
107924	string/b	FTCOMP		FTCOMP compressed archive
1080#!:mime	application/octet-stream
1081!:mime	application/x-compress-ftcomp
1082!:ext	??_/??@/dll/drv/pk2/
1083# probably A596FDFF magic at the beginning
1084>0	ubelong		!0xA596FDFF	\b, at beginning %#x
1085# probably original file name with directory like: \OS2\unpack.exe \SYSTEM\8514.DRV MAHJONGG.EXE
1086>41	string		x		"%s"
1087
1088# MP3 (archiver, not lossy audio compression)
10890	string	MP3\x1a MP3-Archiver archive data
1090# ZET
10910	string	OZ\xc3\x9d ZET archive data
1092# TSComp
1093# Update:	Joerg Jenderek 2023 Nov
1094# URL:		http://fileformats.archiveteam.org/wiki/TSComp
1095# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/t/tscomp.trid.xml
1096#		https://entropymine.com/deark/releases/deark-1.6.5.tar.gz
1097#		deark-1.6.5/modules/installshld.c
1098# Note:		called "TSComp compressed data" by TrID
1099#		verified by command like `deark -m tscomp -l -d2 MAKERRES.DL$`
1100#		The "13" might be a version number. The "8c" is a mystery
11010	string	\x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive
1102#!:mime	application/octet-stream
1103!:mime	application/x-tscomp-compressed
1104# filename style: 0~old version 1~without wildcard 2~with wildcard
1105#>0x08	ubyte		x				\b, filename style %u
1106>0x08	ubyte		0				data, filename style 0
1107# no example found
1108!:ext	??$
1109#>0x08	ubyte		1				data, without wildcard
1110>0x08	ubyte		1				data
1111# for single-file archives, often the last letter of the filename extension is changed to "$"; but also name like: BUILD3.BM!
1112!:ext	??$/??!
1113>0x08	ubyte		2				data, with wildcard
1114# for multi-file archives common extensions seem to be .lib and .cmp, but also names like: SAMPMIF$ OTDATA.$$$ TWOFILES.TSC WIN.PAK
1115!:ext	/lib/cmp/$$$/tsc/pak
1116# fnlen; pascal string length; original 1st file name like: CHFORMAT.MML
1117>0x1c	pstring		x				\b, %s
1118# md->fi->timestamp
1119>0x16	lemsdosdate	x				\b, modified %s
1120>0x18	lemsdostime	x				%s
1121# 1st compressed size: like 180 (SAMPMML$$)
1122>0x0E	ulelong		x				\b, compressed size %u
1123# de_dbg_indent(c, 1): like: 12h
1124#>0x0d	ubyte		x				b, at 0xD %#x
1125# like: 0
1126#>0x1A	ubeshort	x				\b, at 0x1A %#x
1127# 2nd member offset
1128#>0x12	ulelong		x				\b, next offset %#x
1129>0x12	ulelong		>0
1130# original 2nd archive member name like: FORMATS.MML
1131>>(0x12.l+15)	pstring	x				\b, %s ...
1132# ARQ
11330	string	gW\4\1 ARQ archive data
1134# Squash
11353	string	OctSqu Squash archive data
1136# Terse
11370	string	\5\1\1\0 Terse archive data
1138# UHarc
11390	string	UHA UHarc archive data
1140# ABComp
11410	string	\2AB ABComp archive data
11420	string	\3AB2 ABComp archive data
1143# CMP
11440	string	CO\0 CMP archive data
1145# Splint
11460	string	\x93\xb9\x06 Splint archive data
1147# InstallShield
11480	string	\x13\x5d\x65\x8c InstallShield Z archive Data
1149# Gather
11501	string	GTH Gather archive data
1151# BOA
11520	string	BOA BOA archive data
1153# RAX
11540	string	ULEB\xa RAX archive data
1155# Xtreme
11560	string	ULEB\0 Xtreme archive data
1157# Pack Magic
11580	string	@\xc3\xa2\1\0 Pack Magic archive data
1159# BTS
11600	belong&0xfeffffff	0x1a034465 BTS archive data
1161# ELI 5750
11620	string	Ora\  ELI 5750 archive data
1163# QFC
11640	string	\x1aFC\x1a QFC archive data
11650	string	\x1aQF\x1a QFC archive data
1166# PRO-PACK https://www.segaretro.org/Rob_Northen_compression
11670	string	RNC
1168>3	byte	1	PRO-PACK archive data (compression 1)
1169>3	byte	2	PRO-PACK archive data (compression 2)
1170# 777
11710	string	777 777 archive data
1172# LZS221
11730	string	sTaC LZS221 archive data
1174# HPA
11750	string	HPA HPA archive data
1176# Arhangel
11770	string	LG Arhangel archive data
1178# EXP1, uses bzip2
11790	string	0123456789012345BZh EXP1 archive data
1180# IMP
11810	string	IMP\xa IMP archive data
1182# NRV
11830	string	\x00\x9E\x6E\x72\x76\xFF NRV archive data
1184# Squish
11850	string	\x73\xb2\x90\xf4 Squish archive data
1186# Par
11870	string	PHILIPP Par archive data
11880	string	PAR Par archive data
1189# HIT
11900	string	UB HIT archive data
1191# SBX
11920	belong&0xfffff000	0x53423000 SBX archive data
1193# NaShrink
11940	string	NSK NaShrink archive data
1195# SAPCAR
11960	string	#\ CAR\ archive\ header SAPCAR archive data
11970	string	CAR\ 2.00 SAPCAR archive data
11980	string	CAR\ 2.01 SAPCAR archive data
1199#!:mime	application/octet-stream
1200!:mime	application/vnd.sar
1201!:ext	sar
1202# Disintegrator
12030	string	DST Disintegrator archive data
1204# ASD
12050	string	ASD ASD archive data
1206# InstallShield CAB
1207# Update:	Joerg Jenderek at Nov 2021
1208# URL:		https://en.wikipedia.org/wiki/InstallShield
1209# Reference:	https://github.com/twogood/unshield/blob/master/lib/cabfile.h
1210# Note:		Not compatible with Microsoft CAB files
1211# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield.trid.xml
1212# CAB_SIGNATURE 0x28635349
12130	string	ISc( InstallShield
1214#!:mime		application/octet-stream
1215!:mime		application/x-installshield
1216# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield-hdr.trid.xml
1217>16	ulelong	!0	setup header
1218# like: _SYS1.HDR _USER1.HDR data1.hdr
1219!:ext	hdr
1220>16	ulelong	=0	CAB
1221# like: _SYS1.CAB _USER1.CAB DATA1.CAB  data2.cab
1222!:ext	cab
1223# https://github.com/twogood/unshield/blob/master/lib/helper.c
1224# version like:	0x1005201 0x100600c 0x1007000 0x1009500
1225#		0x2000578 0x20005dc 0x2000640 0x40007d0 0x4000834
1226>4	ulelong	x	\b, version %#x
1227# volume_info like: 0
1228>8	ulelong	!0	\b, volume_info %#x
1229# cab_descriptor_offset like: 0x200
1230>12	ulelong	!0x200	\b, offset %#x
1231#>0x200	ubequad	x	\b, at 0x200 %#16.16llx
1232# cab_descriptor_size like: 0 (*.cab) BD5 C8B DA5 E2A E36 116C 251D 4DA9 56F0 5CC2 6E4B 777D 779E 1F7C2
1233>16	ulelong	!0	\b, descriptor size %#x
1234# TOP4
12350	string	T4\x1a TOP4 archive data
1236# BatComp left out: sig looks like COM executable
1237# so TODO: get real 4dos batcomp file and find sig
1238# BlakHole
12390	string	BH\5\7 BlakHole archive data
1240# BIX
12410	string	BIX0 BIX archive data
1242# ChiefLZA
12430	string	ChfLZ ChiefLZA archive data
1244# Blink
12450	string	Blink Blink archive data
1246# Logitech Compress
12470	string	\xda\xfa Logitech Compress archive data
1248# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
12491	string	(C)\ STEPANYUK ARS-Sfx archive data
1250# AKT/AKT32
12510	string	AKT32 AKT32 archive data
12520	string	AKT AKT archive data
1253# NPack
12540	string	MSTSM NPack archive data
1255# PFT
12560	string	\0\x50\0\x14 PFT archive data
1257# SemOne
12580	string	SEM SemOne archive data
1259# PPMD
12600	string	\x8f\xaf\xac\x84 PPMD archive data
1261# FIZ
12620	string	FIZ FIZ archive data
1263# MSXiE
12640	belong&0xfffff0f0	0x4d530000 MSXiE archive data
1265# DeepFreezer
12660	belong&0xfffffff0	0x797a3030 DeepFreezer archive data
1267# DC
12680	string	=<DC- DC archive data
1269# TPac
12700	string	\4TPAC\3 TPac archive data
1271# Ai
1272# Update:	Joerg Jenderek
1273# URL:		http://fileformats.archiveteam.org/wiki/Ai_Archiver
12740	string	Ai\1\1\0 Ai archive data
1275#!:mime	application/octet-stream
1276!:mime	application/x-compress-ai
1277!:ext	ai
12780	string	Ai\1\0\0 Ai archive data
1279#!:mime	application/octet-stream
1280!:mime	application/x-compress-ai
1281!:ext	ai
1282# Ai32
1283# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ai.trid.xml
1284# Note:		called "Ai Archivator compressed archive" by TrID
12850	string	Ai\2\0 Ai32 archive data
1286#!:mime	application/octet-stream
1287!:mime	application/x-compress-ai
1288!:ext	ai
1289# original file name
1290>8	pstring/h x	"%s"
1291# according to TrID the next 3 bytes are nil
1292>5	ubyte	!0	\b, at 5 %#x
1293>6	ubyte	!0	\b, at 6 %#x
1294>7	ubyte	!0	\b, at 7 %#x
1295# the fourth byte with value 0 is probably a flag for "non solid" mode
1296#>3	ubyte	=0x00	\b, unsolid mode
12970	string	Ai\2\1 Ai32 archive data
1298#!:mime	application/octet-stream
1299!:mime	application/x-compress-ai
1300!:ext	ai
1301# original file name
1302>8	pstring/h x	"%s"
1303# the fourth byte with value 0x01 is probably a flag for "solid" mode; this is not the default
1304>3	ubyte	=0x01	\b, solid mode
1305# SBC
13060	string	SBC SBC archive data
1307# Ybs
13080	string	YBS Ybs archive data
1309# DitPack
13100	string	\x9e\0\0 DitPack archive data
1311# DMS
13120	string	DMS! DMS archive data
1313# EPC
13140	string	\x8f\xaf\xac\x8c EPC archive data
1315# VSARC
13160	string	VS\x1a VSARC archive data
1317# PDZ
13180	string	PDZ PDZ archive data
1319# ReDuq
13200	string	rdqx ReDuq archive data
1321# GCA
13220	string	GCAX GCA archive data
1323# PPMN
13240	string	pN PPMN archive data
1325# WinImage
13263	string	WINIMAGE WinImage archive data
1327# Compressia
13280	string	CMP0CMP Compressia archive data
1329# UHBC
13300	string	UHB UHBC archive data
1331# WinHKI
13320	string	\x61\x5C\x04\x05 WinHKI archive data
1333# WWPack data file
13340	string	WWP WWPack archive data
1335# BSN (BSA, PTS-DOS)
13360	string	\xffBSG BSN archive data
13371	string	\xffBSG BSN archive data
13383	string	\xffBSG BSN archive data
13391	string	\0\xae\2 BSN archive data
13401	string	\0\xae\3 BSN archive data
13411	string	\0\xae\7 BSN archive data
1342# AIN
13430	string	\x33\x18 AIN archive data
13440	string	\x33\x17 AIN archive data
1345# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
1346# SZip (TODO: doesn't catch all versions)
13470	string	SZ\x0a\4 SZip archive data
1348# XPack DiskImage
1349# *.XDI updated by Joerg Jenderek Sep 2015
1350# ftp://ftp.sac.sk/pub/sac/pack/0index.txt
1351# GRR: this test is still too general as it catches also text files starting with jm
13520	string	jm
1353# only found examples with this additional characteristic 2 bytes
1354>2	string	\x2\x4	Xpack DiskImage archive data
1355#!:ext xdi
1356# XPack Data
1357# *.xpa updated by Joerg Jenderek Sep 2015
1358# ftp://ftp.elf.stuba.sk/pub/pc/pack/
13590	string	xpa	XPA
1360!:ext	xpa
1361# XPA32
1362# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
1363# created by XPA32.EXE version 1.0.2 for Windows
1364>0	string	xpa\0\1 \b32 archive data
1365# created by XPACK.COM version 1.67m or 1.67r with short 0x1800
1366>3	ubeshort	!0x0001	\bck archive data
1367# XPack Single Data
1368# changed by Joerg Jenderek Sep 2015 back to like in version 5.12
1369# letter 'I'+ acute accent is equivalent to \xcd
13700	string	\xcd\ jm	Xpack single archive data
1371#!:mime	application/x-xpa-compressed
1372!:ext xpa
1373
1374# TODO: missing due to unknown magic/magic at end of file:
1375#DWC
1376#ARG
1377#ZAR
1378#PC/3270
1379#InstallIt
1380#RKive
1381#RK
1382#XPack Diskimage
1383
1384# These were inspired by idarc, but actually verified
1385# Dzip archiver (.dz)
1386# Update: Joerg Jenderek
1387# URL: http://speeddemosarchive.com/dzip/
1388# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c
1389# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt
13900	string	DZ
1391# latest version is 2.9 dated 7 may 2003
1392>2	byte	<4 Dzip archive data
1393!:mime	application/x-dzip
1394!:ext	dz
1395>>2	byte	x \b, version %i
1396>>3	byte	x \b.%i
1397>>4	ulelong	x \b, offset %#x
1398>>8	ulelong	x \b, %u files
1399# ZZip archiver (.zz)
14000	string	ZZ\ \0\0 ZZip archive data
14010	string	ZZ0 ZZip archive data
1402# PAQ archiver (.paq)
14030	string	\xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
14040	string	PAQ PAQ archive data
1405>3	byte&0xf0	0x30
1406>>3	byte	x (v%c)
1407# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
1408# Update:	Joerg Jenderek
1409# URL:		http://fileformats.archiveteam.org/wiki/JAR_(ARJ_Software)
1410# reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jar.trid.xml
1411#		https://www.sac.sk/download/pack/jar102x.exe/TECHNOTE.DOC
1412# Note:		called "JAR compressed archive" by TrID
14130xe	string	\x1aJar\x1b JAR (ARJ Software, Inc.) archive data
1414#!:mime	application/octet-stream
1415!:mime	application/x-compress-j
1416>0	ulelong	x		\b, CRC32 %#x
1417# standard suffix is ".j"; for multi volumes following order j01 j02 ... j99 100 ... 990
1418!:ext	j/j01/j02
1419# URL:		http://fileformats.archiveteam.org/wiki/JARCS
1420# reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jarcs.trid.xml
1421# Note:		called "JARCS compressed archive" by TrID
14220	string	JARCS JAR (ARJ Software, Inc.) archive data
1423#!:mime	application/octet-stream
1424!:mime	application/x-compress-jar
1425!:ext	jar
1426
1427# ARJ archiver (jason@jarthur.Claremont.EDU)
1428# URL:		http://fileformats.archiveteam.org/wiki/ARJ
1429# reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-arj.trid.xml
1430#		https://github.com/FarGroup/FarManager/
1431#		blob/master/plugins/multiarc/arc.doc/arj.txt
1432# Note:		called "ARJ compressed archive" by TrID and
1433#		"ARJ File Format" by DROID via PUID fmt/610
1434#		verified by `7z l -tarj PHRACK1.ARJ` and
1435#		`arj.exe l TEST-hk9.ARJ`
14360	leshort		0xea60
1437# skip DROID fmt-610-signature-id-946.arj by check for valid file type of main header
1438>0xA	ubyte		2
1439>>0	use		arj-archive
14400	name		arj-archive
1441>0	leshort		x		ARJ archive
1442!:mime	application/x-arj
1443# look for terminating 0-character of filename
1444>0x26	search/1024	\0
1445# file name extension is normally .arj but not for parts of multi volume
1446#>>&-5	string		x		extension %.4s
1447>>&-5	string/c	.arj		data
1448!:ext	arj
1449>>&-5	default		x
1450# for multi volume first name is archive.arj then following parts archive.a01 archive.a02 ...
1451>>>8	byte		&0x04		data
1452!:ext	a01/a02
1453# for SFX first name is archive.exe then following parts archive.e01 archive.e02 ...
1454>>>8	byte		^0x04		data, SFX multi-volume
1455!:ext	e01/e02
1456# basic header size like: 0x002b 0x002c 0x04e0 0x04e3 0x04e7
1457#>2	uleshort	x		basic header size %#4.4x
1458# next fragment content like: 0x0a200a003a8fc713 0x524a000010bb3471 0x524a0000c73c70f9
1459#>(2.s)	ubequad		x		NEXT FRAGMENT CONTENT %#16.16llx
1460# first_hdr_size; seems to be same as basic header size
1461#>2	uleshort	x		1st header size %#x
1462# archiver version number like: 3 4 6 11 102
1463>5	byte		x		\b, v%d
1464# minimum archiver version to extract like: 1
1465>6	ubyte		!1		\b, minimum %u to extract
1466# FOR DEBUGGING
1467#>8	byte		x		\b, FLAGS %#x
1468# GARBLED_FLAG1; garble with password; g switch
1469>8	byte		&0x01		\b, password-protected
1470# encryption version: 0~old  1~old 2~new 3~reserved 4~40 bit key GOST
1471>>0x20	ubyte		x		(v%u)
1472#>8	byte		&0x02		\b, secured
1473# ANSIPAGE_FLAG; indicates ANSI codepage used by ARJ32; hy switch
1474>8	byte		&0x02		\b, ANSI codepage
1475# VOLUME_FLAG indicates presence of succeeding volume; but apparently not for SFX
1476>8	byte		&0x04		\b, multi-volume
1477#>8	byte		&0x08		\b, file-offset
1478# ARJPROT_FLAG; build with data protection record; hk switch
1479>8	byte		&0x08		\b, recoverable
1480# arj protection factor; maximal 10; switch hky -> factor=y+1
1481>>0x22	byte		x		(factor %u)
1482>8	byte		&0x10		\b, slash-switched
1483# BACKUP_FLAG; obsolete
1484>8	byte		&0x20		\b, backup
1485# SECURED_FLAG;
1486>8	byte		&0x40		\b, secured,
1487# ALTNAME_FLAG; indicates dual-name archive
1488>8	byte		&0x80		\b, dual-name
1489# security version; 0~old 2~current
1490>9	ubyte		!0
1491>>9	ubyte		!2		\b, security version %u
1492# file type; 2 in main header; 0~binary 1~7-bitText 2~comment 3~directory 4~VolumeLabel 5=ChapterLabel
1493>0xA	ubyte		!2		\b, file type %u
1494# date+time when original archive was created in MS-DOS format via ./msdos
1495>0xC	ulelong		x		\b, created
1496>0xC	use		dos-date
1497# or date and time by new internal function
1498#>0xE	lemsdosdate	x		%s
1499#>0xC	lemsdostime	x		%s
1500# FOR DEBUGGING
1501#>0x12	uleshort	x		RAW DATE %#4.4x
1502#>0x10	uleshort	x		RAW TIME %#4.4x
1503# date+time when archive was last modified; sometimes nil or
1504# maybe wrong like in HP4DRVR.ARJ
1505#>0x10	ulelong		>0		\b, modified
1506#>>0x10	use		dos-date
1507# or date and time by new internal function
1508#>>0x12	lemsdosdate	x		%s
1509#>>0x10	lemsdostime	x		%s
1510# archive size (currently used only for secured archives); MAYBE?
1511#>0x14	ulelong		!0		\b, file size %u
1512# security envelope file position; MAYBE?
1513#>0x18	ulelong		!0		\b, at %#x security envelope
1514# filespec position in filename; WHAT IS THAT?
1515#>0x1C	uleshort	>0		\b, filespec position %#x
1516# length in bytes of security envelope data like: 2CAh 301h 364h 471h
1517>0x1E	uleshort	!0		\b, security envelope length %#x
1518# last chapter like: 0 1
1519>0x21	ubyte		!0		\b, last chapter %u
1520# filename (null-terminated string); sometimes at 0x26 when 4 bytes for extra data
1521>34	byte		x		\b, original name:
1522# with extras data
1523>34	byte		<0x0B
1524>>38	string		x		%s
1525# without extras data
1526>34	byte		>0x0A
1527>>34	string		x		%s
1528# host OS: 0~MSDOS ... 11~WIN32
1529>7	byte		0		\b, os: MS-DOS
1530>7	byte		1		\b, os: PRIMOS
1531>7	byte		2		\b, os: Unix
1532>7	byte		3		\b, os: Amiga
1533>7	byte		4		\b, os: Macintosh
1534>7	byte		5		\b, os: OS/2
1535>7	byte		6		\b, os: Apple ][ GS
1536>7	byte		7		\b, os: Atari ST
1537>7	byte		8		\b, os: NeXT
1538>7	byte		9		\b, os: VAX/VMS
1539>7	byte		10		\b, os: WIN95
1540>7	byte		11		\b, os: WIN32
1541# [JW] idarc says this is also possible
15422	leshort		0xea60		ARJ archive data
1543#2	leshort		0xea60
1544#>2	use		arj-archive
1545
1546# HA archiver (Greg Roelofs, newt@uchicago.edu)
1547# This is a really bad format. A file containing HAWAII will match this...
1548#0	string		HA		HA archive data,
1549#>2	leshort		=1		1 file,
1550#>2	leshort		>1		%u files,
1551#>4	byte&0x0f	=0		first is type CPY
1552#>4	byte&0x0f	=1		first is type ASC
1553#>4	byte&0x0f	=2		first is type HSC
1554#>4	byte&0x0f	=0x0e		first is type DIR
1555#>4	byte&0x0f	=0x0f		first is type SPECIAL
1556# suggestion: at least identify small archives (<1024 files)
15570  belong&0xffff00fc 0x48410000 HA archive data
1558>2	leshort		=1		1 file,
1559>2	leshort		>1		%u files,
1560>4	byte&0x0f	=0		first is type CPY
1561>4	byte&0x0f	=1		first is type ASC
1562>4	byte&0x0f	=2		first is type HSC
1563>4	byte&0x0f	=0x0e		first is type DIR
1564>4	byte&0x0f	=0x0f		first is type SPECIAL
1565
1566# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
15670	string		HPAK		HPACK archive data
1568
1569# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
15700	string		\351,\001JAM\ 		JAM archive,
1571>7	string		>\0			version %.4s
1572>0x26	byte		=0x27			-
1573>>0x2b	string          >\0			label %.11s,
1574>>0x27	lelong		x			serial %08x,
1575>>0x36	string		>\0			fstype %.8s
1576
1577# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
1578# Update: Joerg Jenderek
1579# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
1580# Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html
1581#
1582#	check and display information of lharc (LHa,PMarc) file
15830	name				lharc-file
1584# check 1st character of method id like -lz4- -lh5- or -pm2-
1585>2	string		-
1586# check 5th character of method id
1587>>6	string		-
1588# check header level 0 1 2 3
1589>>>20	ubyte		<4
1590# check 2nd, 3th and 4th character of method id
1591>>>>3	regex		\^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1)		\b
1592!:mime	application/x-lzh-compressed
1593# creator type "LHA "
1594!:apple	????LHA
1595# display archive type name like "LHa/LZS archive data" or "LArc archive"
1596>>>>>2	string		-lz		\b
1597!:ext	lzs
1598# already known  -lzs- -lz4- -lz5- with old names
1599>>>>>>2	string	-lzs		LHa/LZS archive data
1600>>>>>>3	regex	\^lz[45]	LHarc 1.x archive data
1601# missing -lz?- with wikipedia names
1602>>>>>>3	regex	\^lz[2378]	LArc archive
1603# display archive type name like "LHa (2.x) archive data"
1604>>>>>2	string		-lh		\b
1605# already known -lh0- -lh1- -lh2- -lh3-  -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names
1606>>>>>>3	regex		\^lh[01]	LHarc 1.x/ARX archive data
1607# LHice archiver use ".ICE" as name extension instead usual one ".lzh"
1608# FOOBAR archiver use ".foo" as name extension instead usual one
1609# "Florian Orjanov's and Olga Bachetska's ARchiver" not found at the moment
1610>>>>>>>2	string	-lh1		\b
1611!:ext lha/lzh/ice
1612>>>>>>3	regex		\^lh[23d]	LHa 2.x? archive data
1613>>>>>>3	regex		\^lh[7]		LHa (2.x)/LHark archive data
1614>>>>>>3	regex		\^lh[456]	LHa (2.x) archive data
1615>>>>>>>2	string	-lh5		\b
1616# https://en.wikipedia.org/wiki/BIOS
1617# Some mainboard BIOS like Award use LHa compression. So archives with unusual extension are found like
1618# bios.rom , kd7_v14.bin, 1010.004, ...
1619!:ext lha/lzh/rom/bin
1620# missing -lh?- variants (Joe Jared)
1621>>>>>>3	regex		\^lh[89a-ce]	LHa (Joe Jared) archive
1622# UNLHA32 2.67a
1623>>>>>>2	string		-lhx		LHa (UNLHA32) archive
1624# lha archives with standard file name extensions ".lha" ".lzh"
1625>>>>>>3	regex		!\^(lh1|lh5)	\b
1626!:ext lha/lzh
1627# this should not happen if all -lh variants are described
1628>>>>>>2	default		x		LHa (unknown) archive
1629#!:ext	lha
1630# PMarc
1631>>>>>3	regex		\^pm[012]	PMarc archive data
1632!:ext pma
1633# append method id without leading and trailing minus character
1634>>>>>3	string		x		[%3.3s]
1635>>>>>>0	use	lharc-header
1636#
1637#	check and display information of lharc header
16380	name				lharc-header
1639# header size 0x4 , 0x1b-0x61
1640#>0	ubyte		x
1641# compressed data size != compressed file size
1642#>7	ulelong		x		\b, data size %d
1643# attribute: 0x2~?? 0x10~symlink|target 0x20~normal
1644#>19	ubyte		x		\b, 19_%#x
1645# level identifier 0 1 2 3
1646#>20	ubyte		x		\b, level %d
1647# time stamp
1648#>15		ubelong	x		DATE %#8.8x
1649# OS ID for level 1
1650>20	ubyte		1
1651# 0x20 types find for *.rom files
1652>>(21.b+24)	ubyte	<0x21		\b, %#x OS
1653# ascii type like M for MSDOS
1654>>(21.b+24)	ubyte	>0x20		\b, '%c' OS
1655# OS ID for level 2
1656>20	ubyte		2
1657#>>23	ubyte		x		\b, OS ID %#x
1658>>23	ubyte		<0x21		\b, %#x OS
1659>>23	ubyte		>0x20		\b, '%c' OS
1660# filename only for level 0 and 1
1661>20	ubyte		<2
1662# length of filename
1663>>21		ubyte	>0		\b, with
1664# filename
1665>>>21		pstring	x		"%s"
1666#
1667#2	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
1668#!:mime	application/x-lharc
16692	string		-lh0-
1670>0	use	lharc-file
1671#2	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
1672#!:mime	application/x-lharc
16732	string		-lh1-
1674>0	use	lharc-file
1675# NEW -lz2- ... -lz8-
16762	string		-lz2-
1677>0	use	lharc-file
16782	string		-lz3-
1679>0	use	lharc-file
16802	string		-lz4-
1681>0	use	lharc-file
16822	string		-lz5-
1683>0	use	lharc-file
16842	string		-lz7-
1685>0	use	lharc-file
16862	string		-lz8-
1687>0	use	lharc-file
1688#	[never seen any but the last; -lh4- reported in comp.compression:]
1689#2	string		-lzs-		LHa/LZS archive data [lzs]
16902	string		-lzs-
1691>0	use	lharc-file
1692# According to wikipedia and others such a version does not exist
1693#2	string		-lh\40-		LHa 2.x? archive data [lh ]
1694#2	string		-lhd-		LHa 2.x? archive data [lhd]
16952	string		-lhd-
1696>0	use	lharc-file
1697#2	string		-lh2-		LHa 2.x? archive data [lh2]
16982	string		-lh2-
1699>0	use	lharc-file
1700#2	string		-lh3-		LHa 2.x? archive data [lh3]
17012	string		-lh3-
1702>0	use	lharc-file
1703#2	string		-lh4-		LHa (2.x) archive data [lh4]
17042	string		-lh4-
1705>0	use	lharc-file
1706#2	string		-lh5-		LHa (2.x) archive data [lh5]
17072	string		-lh5-
1708>0	use	lharc-file
1709#2	string		-lh6-		LHa (2.x) archive data [lh6]
17102	string		-lh6-
1711>0	use	lharc-file
1712#2	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
17132	string		-lh7-
1714# !:mime	application/x-lha
1715# >20	byte		x		- header level %d
1716>0	use	lharc-file
1717# NEW -lh8- ... -lhe- , -lhx-
17182	string		-lh8-
1719>0	use	lharc-file
17202	string		-lh9-
1721>0	use	lharc-file
17222	string		-lha-
1723>0	use	lharc-file
17242	string		-lhb-
1725>0	use	lharc-file
17262	string		-lhc-
1727>0	use	lharc-file
17282	string		-lhe-
1729>0	use	lharc-file
17302	string		-lhx-
1731>0	use	lharc-file
1732# taken from idarc [JW]
17332   string      -lZ         PUT archive data
1734# already done by LHarc magics
1735# this should never happen if all sub types of LZS archive are identified
1736#2   string      -lz         LZS archive data
17372   string      -sw1-       Swag archive data
1738
17390	name		rar-file-header
1740>24	byte		15		\b, v1.5
1741>24	byte		20		\b, v2.0
1742>24	byte		29		\b, v4
1743>15	byte		0		\b, os: MS-DOS
1744>15	byte		1		\b, os: OS/2
1745>15	byte		2		\b, os: Win32
1746>15	byte		3		\b, os: Unix
1747>15	byte		4		\b, os: Mac OS
1748>15	byte		5		\b, os: BeOS
1749
17500	name		rar-archive-header
1751>3	leshort&0x1ff	>0		\b, flags:
1752>>3	leshort		&0x01		ArchiveVolume
1753>>3	leshort		&0x02		Commented
1754>>3	leshort		&0x04		Locked
1755>>3	leshort		&0x10		NewVolumeNaming
1756>>3	leshort		&0x08		Solid
1757>>3	leshort		&0x20		Authenticated
1758>>3	leshort		&0x40		RecoveryRecordPresent
1759>>3	leshort		&0x80		EncryptedBlockHeader
1760>>3	leshort		&0x100		FirstVolume
1761
1762# RAR (Roshal Archive) archive
17630	string		Rar!\x1a\7\0		RAR archive data
1764!:mime	application/vnd.rar
1765!:ext	rar/cbr
1766# file header
1767>(0xc.l+9)	byte	0x74
1768>>(0xc.l+7)	use	rar-file-header
1769# subblock seems to share information with file header
1770>(0xc.l+9)	byte	0x7a
1771>>(0xc.l+7)	use	rar-file-header
1772>9		byte	0x73
1773>>7		use	rar-archive-header
1774
17750	string		Rar!\x1a\7\1\0		RAR archive data, v5
1776!:mime	application/vnd.rar
1777!:ext	rar
1778
1779# Very old RAR archive
1780# https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf
17810	string		RE\x7e\x5e  RAR archive data (<v1.5)
1782!:mime	application/vnd.rar
1783!:ext	rar/cbr
1784
1785# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
17860	string		SQSH		squished archive data (Acorn RISCOS)
1787
1788# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
1789# [JW] see exe section for self-extracting version
17900	string		UC2\x1a		UC2 archive data
1791
1792# PKZIP multi-volume archive
17930	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
1794!:mime	application/zip
1795!:ext zip/cbz
1796
1797
17980	string		PK\003\004
1799!:strength +1
1800# IOS/IPadOS IPA file (Zip archive)
1801# Starts with Payload (file name length = 19)
1802>26	uleshort	8
1803>>30  	string		Payload		IOS/iPadOS IPA file
1804>>>&26  search/6000	PK\003\004
1805>>>>&34	string		x	containing %s
1806!:mime	application/x-ios-app
1807!:ext	ipa
1808
1809# Android APK file (Zip archive)
1810# Starts with AndroidManifest.xml (file name length = 19)
1811>26	uleshort	19
1812>>30	string	AndroidManifest.xml	Android package (APK), with AndroidManifest.xml
1813!:mime	application/vnd.android.package-archive
1814!:ext	apk
1815>>>-22	string	PK\005\006
1816>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1817# Starts with META-INF/com/android/build/gradle/app-metadata.properties
1818>26	uleshort	57
1819>>30	string	META-INF/com/android/build/gradle/
1820>>>&0	string	app-metadata.properties	Android package (APK), with gradle app-metadata.properties
1821!:mime	application/vnd.android.package-archive
1822!:ext	apk
1823>>>>-22	string	PK\005\006
1824>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1825# Starts with classes.dex (file name length = 11)
1826>26	uleshort	11
1827>>30	string	classes.dex	Android package (APK), with classes.dex
1828!:mime	application/vnd.android.package-archive
1829!:ext	apk
1830>>>-22	string	PK\005\006
1831>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1832# Starts with META-INF/MANIFEST.MF (file name length = 20)
1833# NB: checks for resources.arsc, classes.dex, etc. as well to avoid matching JAR files
1834>26	uleshort	20
1835>>30	string	META-INF/MANIFEST.MF
1836# Contains resources.arsc (near the end, in the central directory)
1837>>>-512	search	resources.arsc	Android package (APK), with MANIFEST.MF and resources.arsc
1838!:mime	application/vnd.android.package-archive
1839!:ext	apk
1840>>>>-22	string	PK\005\006
1841>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1842>>>-512	default x
1843# Contains classes.dex (near the end, in the central directory)
1844>>>>-512	search	classes.dex	Android package (APK), with MANIFEST.MF and classes.dex
1845!:mime	application/vnd.android.package-archive
1846!:ext	apk
1847>>>>>-22	string	PK\005\006
1848>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1849>>>>-512	default x
1850# Contains lib/armeabi (near the end, in the central directory)
1851>>>>>-512	search	lib/armeabi	Android package (APK), with MANIFEST.MF and armeabi lib
1852!:mime	application/vnd.android.package-archive
1853!:ext	apk
1854>>>>>>-22	string	PK\005\006
1855>>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1856>>>>>-512	default x
1857# Contains drawables (near the end, in the central directory)
1858>>>>>>-512	search	res/drawable	Android package (APK), with MANIFEST.MF and drawables
1859!:mime	application/vnd.android.package-archive
1860!:ext	apk
1861>>>>>>>-22	string	PK\005\006
1862>>>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1863# It may or may not be an APK file, but it's definitely a Java JAR file
1864>>>>>>-512	default x	Java archive data (JAR)
1865!:mime	application/java-archive
1866!:ext	jar
1867# Starts with zipflinger virtual entry (28 + 104 = 132 bytes)
1868# See https://github.com/obfusk/apksigcopier/blob/666f5b7/apksigcopier/__init__.py#L230
1869>4	string	\x00\x00\x00\x00\x00\x00
1870>>&0	string	\x21\x08\x21\x02
1871>>>&0	string	\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00
1872>>>>&0	string	\x00\x00	Android package (APK), with zipflinger virtual entry
1873!:mime	application/vnd.android.package-archive
1874!:ext	apk
1875>>>>>-22	string	PK\005\006
1876>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
1877
1878
1879# Specialised zip formats which start with a member named 'mimetype'
1880# (stored uncompressed, with no 'extra field') containing the file's MIME type.
1881# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
1882#  contents starting with "application/":
1883>26	string		\x8\0\0\0mimetypeapplication/
1884
1885#  KOffice / OpenOffice & StarOffice / OpenDocument formats
1886#    From: Abel Cheung <abel@oaka.org>
1887
1888#   KOffice (1.2 or above) formats
1889#    (mimetype contains "application/vnd.kde.<SUBTYPE>")
1890>>50	string	vnd.kde.		KOffice (>=1.2)
1891>>>58	string	karbon			Karbon document
1892>>>58	string	kchart			KChart document
1893>>>58	string	kformula		KFormula document
1894>>>58	string	kivio			Kivio document
1895>>>58	string	kontour			Kontour document
1896>>>58	string	kpresenter		KPresenter document
1897>>>58	string	kspread			KSpread document
1898>>>58	string	kword			KWord document
1899
1900#   OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
1901#    (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
1902# URL:		https://en.wikipedia.org/wiki/OpenOffice.org_XML
1903# reference:	http://fileformats.archiveteam.org/wiki/OpenOffice.org_XML
1904>>50	string	vnd.sun.xml.		OpenOffice.org 1.x
1905>>>62	string	writer			Writer
1906>>>>68	byte	!0x2e			document
1907!:mime	application/vnd.sun.xml.writer
1908!:ext	sxw
1909>>>>68	string	.template		template
1910!:mime	application/vnd.sun.xml.writer.template
1911!:ext	stw
1912>>>>68	string	.web			Web template
1913!:mime	application/vnd.sun.xml.writer.web
1914!:ext	stw
1915>>>>68	string	.global			global document
1916!:mime	application/vnd.sun.xml.writer.global
1917!:ext	sxg
1918>>>62	string	calc			Calc
1919>>>>66	byte	!0x2e			spreadsheet
1920!:mime	application/vnd.sun.xml.calc
1921!:ext	sxc
1922>>>>66	string	.template		template
1923!:mime	application/vnd.sun.xml.calc.template
1924!:ext	stc
1925>>>62	string	draw			Draw
1926>>>>66	byte	!0x2e			document
1927!:mime	application/vnd.sun.xml.draw
1928!:ext	sxd
1929>>>>66	string	.template		template
1930!:mime	application/vnd.sun.xml.draw.template
1931!:ext	std
1932>>>62	string	impress			Impress
1933>>>>69	byte	!0x2e			presentation
1934!:mime	application/vnd.sun.xml.impress
1935!:ext	sxi
1936>>>>69	string	.template		template
1937!:mime	application/vnd.sun.xml.impress.template
1938!:ext	sti
1939>>>62	string	math			Math document
1940!:mime	application/vnd.sun.xml.math
1941!:ext	sxm
1942>>>62	string	base			Database file
1943!:mime	application/vnd.sun.xml.base
1944!:ext	sdb
1945
1946# URL:	https://wiki.openoffice.org/wiki/Documentation/DevGuide/Extensions/File_Format
1947# From:	Joerg Jenderek
1948# Note:	only few OXT samples are detected here by mimetype member
1949#	is used by OpenOffice and LibreOffice and probably also NeoOffice
1950#	verified by `unzip -Zv *.oxt` or `7z l -slt *.oxt`
1951>>50	string	vnd.openofficeorg.		OpenOffice
1952>>>68	string	extension			\b/LibreOffice Extension
1953# http://extension.nirsoft.net/oxt
1954!:mime	application/vnd.openofficeorg.extension
1955# like: Gallery-Puzzle.2.1.0.1.oxt
1956!:ext	oxt
1957
1958#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
1959#   URL: http://fileformats.archiveteam.org/wiki/OpenDocument
1960#    https://lists.oasis-open.org/archives/office/200505/msg00006.html
1961#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
1962>>50	string	vnd.oasis.opendocument.	OpenDocument
1963>>>73	string	text
1964>>>>77	byte	!0x2d			Text
1965!:mime	application/vnd.oasis.opendocument.text
1966!:ext	odt
1967>>>>77	string	-template		Text Template
1968!:mime	application/vnd.oasis.opendocument.text-template
1969!:ext	ott
1970>>>>77	string	-web			HTML Document Template
1971!:mime	application/vnd.oasis.opendocument.text-web
1972!:ext	oth
1973>>>>77	string	-master
1974>>>>>84	byte	!0x2d			Master Document
1975!:mime	application/vnd.oasis.opendocument.text-master
1976!:ext	odm
1977>>>>>84	string	-template		Master Template
1978!:mime	application/vnd.oasis.opendocument.text-master-template
1979!:ext	otm
1980>>>73	string	graphics
1981>>>>81	byte	!0x2d			Drawing
1982!:mime	application/vnd.oasis.opendocument.graphics
1983!:ext	odg
1984>>>>81	string	-template		Drawing Template
1985!:mime	application/vnd.oasis.opendocument.graphics-template
1986!:ext	otg
1987>>>73	string	presentation
1988>>>>85	byte	!0x2d			Presentation
1989!:mime	application/vnd.oasis.opendocument.presentation
1990!:ext	odp
1991>>>>85	string	-template		Presentation Template
1992!:mime	application/vnd.oasis.opendocument.presentation-template
1993!:ext	otp
1994>>>73	string	spreadsheet
1995>>>>84	byte	!0x2d			Spreadsheet
1996!:mime	application/vnd.oasis.opendocument.spreadsheet
1997!:ext	ods
1998>>>>84	string	-template		Spreadsheet Template
1999!:mime	application/vnd.oasis.opendocument.spreadsheet-template
2000!:ext	ots
2001>>>73	string	chart
2002>>>>78	byte	!0x2d			Chart
2003!:mime	application/vnd.oasis.opendocument.chart
2004!:ext	odc
2005>>>>78	string	-template		Chart Template
2006!:mime	application/vnd.oasis.opendocument.chart-template
2007!:ext	otc
2008>>>73	string	formula
2009>>>>80	byte	!0x2d			Formula
2010!:mime	application/vnd.oasis.opendocument.formula
2011!:ext	odf
2012>>>>80	string	-template		Formula Template
2013!:mime	application/vnd.oasis.opendocument.formula-template
2014!:ext	otf
2015# https://www.loc.gov/preservation/digital/formats/fdd/fdd000441.shtml
2016>>>73	string	database		Database
2017!:mime	application/vnd.oasis.opendocument.database
2018!:ext	odb
2019# Valid for LibreOffice Base 6.0.1.1 at least
2020>>>73	string	base 			Database
2021# https://bugs.documentfoundation.org/show_bug.cgi?id=45854
2022!:mime	application/vnd.oasis.opendocument.base
2023!:ext	odb
2024>>>73	string	image
2025>>>>78	byte	!0x2d			Image
2026!:mime	application/vnd.oasis.opendocument.image
2027!:ext	odi
2028>>>>78	string	-template		Image Template
2029!:mime	application/vnd.oasis.opendocument.image-template
2030!:ext	oti
2031
2032#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
2033#    https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
2034#    From: Ralf Brown <ralf.brown@gmail.com>
2035>>50	string	epub+zip	EPUB document
2036!:mime application/epub+zip
2037
2038# From: Hajin Jang <jb6804@naver.com>
2039# hwpx (OWPML) document format follows OCF specification.
2040# Hangul Word Processor 2010+ supports HWPX format.
2041# URL: https://www.hancom.com/etc/hwpDownload.do
2042#      https://standard.go.kr/KSCI/standardIntro/getStandardSearchView.do?menuId=503&topMenuId=502&ksNo=KSX6101
2043#      https://e-ks.kr/streamdocs/view/sd;streamdocsId=72059197557727331
2044>>50	string	hwp+zip     Hancom HWP (Hangul Word Processor) file, HWPX
2045!:mime application/x-hwp+zip
2046!:ext	hwpx
2047
2048# From:	Joerg Jenderek
2049# URL:	http://en.wikipedia.org/wiki/CorelDRAW
2050# NOTE:	version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based
2051>>50	string	x-vnd.corel.	 Corel
2052>>>62	string	draw.document+zip	Draw drawing, version 14-16
2053!:mime	application/x-vnd.corel.draw.document+zip
2054!:ext	cdr
2055>>>62	string	draw.template+zip	Draw template, version 14-16
2056!:mime	application/x-vnd.corel.draw.template+zip
2057!:ext	cdrt
2058>>>62	string	zcf.draw.document+zip	Draw drawing, version 17-22
2059!:mime	application/x-vnd.corel.zcf.draw.document+zip
2060!:ext	cdr
2061>>>62	string	zcf.draw.template+zip	Draw template, version 17-22
2062!:mime	application/x-vnd.corel.zcf.draw.template+zip
2063!:ext	cdt/cdrt
2064# URL:	http://product.corel.com/help/CorelDRAW/540240626/Main/EN/Doc/CorelDRAW-Other-file-formats.html
2065>>>62	string	zcf.pattern+zip		Draw pattern, version 22
2066!:mime	application/x-vnd.corel.zcf.pattern+zip
2067!:ext	pat
2068# URL:		https://en.wikipedia.org/wiki/Corel_Designer
2069# Reference:	http://fileformats.archiveteam.org/wiki/Corel_Designer
2070# Note:		called by TrID "Corel DESIGN graphics"
2071>>>62	string	designer.document+zip		DESIGNER graphics, version 14-16
2072!:mime	application/x-vnd.corel.designer.document+zip
2073!:ext	des
2074>>>62	string	zcf.designer.document+zip	DESIGNER graphics, version 17-21
2075!:mime	application/x-vnd.corel.zcf.designer.document+zip
2076!:ext	des
2077# URL:	http://product.corel.com/help/CorelDRAW/540223850/Main/EN/Documentation/
2078#	CorelDRAW-Corel-Symbol-Library-CSL.html
2079>>>62	string	symbol.library+zip		Symbol Library, version 6-16.3
2080!:mime	application/x-vnd.corel.symbol.library+zip
2081!:ext	csl
2082>>>62	string	zcf.symbol.library+zip		Symbol Library, version 17-22
2083!:mime	application/x-vnd.corel.zcf.symbol.library+zip
2084!:ext	csl
2085
2086#  Catch other ZIP-with-mimetype formats
2087#	In a ZIP file, the bytes immediately after a member's contents are
2088#	always "PK". The 2 regex rules here print the "mimetype" member's
2089#	contents up to the first 'P'. Luckily, most MIME types don't contain
2090#	any capital 'P's. This is a kludge.
2091#    (mimetype contains "application/<OTHER>")
2092>>50		default	x			Zip data
2093>>>38		regex	[!-OQ-~]+		(MIME type "%s"?)
2094!:mime	application/zip
2095#    (mimetype contents other than "application/*")
2096>26		string	\x8\0\0\0mimetype
2097>>38		string	!application/
2098>>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
2099!:mime	application/zip
2100
2101# Java Jar files (see also APK files above)
2102>(26.s+30)	leshort	0xcafe		Java archive data (JAR)
2103!:mime	application/java-archive
2104!:ext	jar
2105
2106# iOS App
2107>(26.s+30)	leshort	!0xcafe
2108>>26		string	!\x8\0\0\0mimetype
2109>>>30		string	Payload/
2110>>>>38		search/64       .app/   iOS App
2111!:mime application/x-ios-app
2112
2113# Dup, see above.
2114#>30	search/100/b application/epub+zip	EPUB document
2115#!:mime application/epub+zip
2116
2117# APK Signing Block
2118>(26.s+30)	leshort	!0xcafe
2119>>30	search/100/b !application/epub+zip
2120>>>26    string          !\x8\0\0\0mimetype
2121>>>>-22	string	PK\005\006
2122>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	Android package (APK), with APK Signing Block
2123!:mime	application/vnd.android.package-archive
2124!:ext	apk
2125
2126# Keyman Compiled Package File (keyman.com)
2127# https://help.keyman.com/developer/current-version/reference/file-types/kmp
2128# Find start of central directory
2129>>>>>(-6.l)	string		PK\001\002
2130# Scan central directory for string 'kmp.json', will suffice for a
2131# package containing about 150 files
2132>>>>>>(-6.l)	search/9000	kmp.json	Keyman Compiled Package File
2133!:mime	application/vnd.keyman.kmp+zip
2134!:ext kmp
2135
2136# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
2137#   Next line excludes specialized formats:
2138>>>>+4	default		x
2139>>>>>4	beshort		x			Zip archive data, at least
2140!:mime	application/zip
2141>>>>>4	use		zipversion
2142>>>>>4	beshort		x			to extract
2143>>>>>8	beshort		x			\b, compression method=
2144>>>>>8	use		zipcompression
2145>>>>>0x161	string		WINZIP		\b, WinZIP self-extracting
2146
2147# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
21480	string		PK\005\006	Zip archive data (empty)
2149!:mime application/zip
2150!:ext zip/cbz
2151!:strength +1
2152
2153# StarView Metafile
2154# From Pierre Ducroquet <pinaraf@pinaraf.info>
21550	string	VCLMTF	StarView MetaFile
2156>6	beshort	x	\b, version %d
2157>8	belong	x	\b, size %d
2158
2159# Zoo archiver
2160# Update: Joerg Jenderek
2161# URL:		https://en.wikipedia.org/wiki/Zoo_(file_format)
2162#		http://fileformats.archiveteam.org/wiki/Zoo
2163# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/a/ark-zoo-strict.trid.xml
2164#		http://distcache.freebsd.org/ports-distfiles/zoo-2.10pl1.tar.gz/zoo.h
2165# Note:		called "ZOO compressed archive (strict)" by TrID and "ZOO Compressed Archive" by DROID via PUID x-fmt/269
2166#		verified by command like `deark -m zoo -l -d2 WHRCGA.ZOO`
216720	lelong		0xfdc4a7dc
2168# skip DROID x-fmt-269-signature-id-621.zoo by looking for valid major version to manipulate archive
2169>32	byte		>0		Zoo archive data
2170!:mime	application/x-zoo
2171# bak is extension of backup-ed zoo
2172!:ext	zoo/bak
2173# version in text form like: 1.50 2.00 2.10
2174>>4	byte		>48		\b, v%c.
2175>>>6	byte		>47		\b%c
2176>>>>7	byte		>47		\b%c
2177# ZOO files typically start with "ZOO ?.?? Archive.", followed by the bytes 0x1a 0x0 0x0; not used by Zoo and they may be anything
2178>>8	string		!\040Archive.\032 \b, at 8
2179>>>8	string		x		text "%0.10s"
2180# major_ver.minor_ver; minimum version needed to manipulate archive like: 1.0 2.0
2181>>32	byte		>0		\b, modify: v%d
2182>>>33	byte		x		\b.%d+
2183# major_ver.minor_ver; minimum version needed to extract after modify like in old versions
2184>>(24.l+28)	ubyte	x		\b, extract: v%u
2185>>(24.l+29)	ubyte	x		\b.%u+
2186# with zoo 2.00 additional fields have been added in the archive header
2187>>32	byte		>1
2188# type; type of archive header like: 1 2
2189>>>34		ubyte	!1		\b, header type %u
2190# acmt_pos; position of archive comment like: 6258 30599 61369 149501
2191>>>35		lelong	>0		\b, at %d
2192# acmt_len; length of archive comment like: 258
2193>>>>39			uleshort x	%u bytes comment
2194#>>>>(35.l)		ubequad	x	COMMENT=%16.16llx
2195# 1st character of comment maybe is CarriageReturn (0x0d)
2196>>>>(35.l) 		ubyte	<040
2197# 2nd character of comment maybe is LineFeed (0x0a)
2198>>>>>(35.l+1) 		ubyte	<040
2199# comment string after CRLF like "Anonymous ftp site garbo.uwasa.fi 128.214.87.1 moderated by"
2200>>>>>>(35.l+2)		string	x	%s
2201# next character of remaining comment maybe is CarriageReturn (0x0d)
2202>>>>>>>&0		ubyte	<040
2203>>>>>>>>&0		ubyte	<040
2204# 2nd comment part like: Timo Salmi ts@chyde.uwasa.fi      PC directories and uploads\015\012Harri Valkama hv@chyde.uwasa.fi   PC, Mac, Unix files, and upload
2205>>>>>>>>>&0		string	>037	%s
2206# vdata; archive-level versioning byte like: 1 3
2207>>>41		ubyte	!1		\b, vdata %#x
2208# zoo_start; pointer to 1st entry header
2209>>24	lelong		x		\b; at %u
2210# zoo_minus; zoo_start -1 for consistency checking
2211#>>28	lelong		x		\b, zoo_minus %#x
2212# zoo_tag; tag for check
2213#>>(24.l+0) ulelong	!0xfdc4a7dc	\b, zoo_tag=%8.8x
2214# type; type of directory entry like: 1 2
2215>>(24.l+4)	ubyte	!2		type=%u
2216# packing_method; 0~no packing 1~normal LZW 2~lzh
2217>>(24.l+5)	ubyte		x	method=
2218>>>(24.l+5)	ubyte		0	\bnot-compressed
2219>>>(24.l+5)	ubyte		1	\blzd
2220>>>(24.l+5)	ubyte		2	\blzh
2221# next; position of next directory entry
2222>>(24.l+6)	ulelong		x	\b, next entry at %u
2223# offset; position of file data for this entry
2224#>>(24.l+10) ulelong		x	\b, data at %u
2225# file_crc; CRC-16 of file data
2226>>(24.l+18)	uleshort	x	\b, CRC %#4.4x
2227# comment; zero if none or points to entry comment like ADD9h (WHRCGA.ZOO)
2228>>(24.l+32)	lelong		>0	\b, at %#x
2229# cmt_size; if not 0 for none then length of entry comment like: 46
2230>>>(24.l+36)	uleshort	>0	%u bytes comment
2231# entry comment itself like: "CGA .GL file showing menu input from keyboard"
2232>>>>(&-6.l)	string		x	"%s"
2233# org_size; original size of file
2234>>(24.l+20)	ulelong		x	\b, size %u
2235# size_now; compressed size of file
2236>>(24.l+24)	ulelong		x	(%u compressed)
2237# major_ver.minor_ver; minimum version needed to extract already done
2238# deleted; will be 1 if deleted, 0 if not
2239>>(24.l+30)	ubyte		=1	\b, deleted
2240# struc; file structure if any; WHAT IS THAT?
2241>>(24.l+31)	ubyte		!0	\b, structured
2242# fname[13]; short/DOS file name like 12345678.012
2243>>(24.l+38)	string	x		\b, %0.13s
2244# for directory entry type 2 with variable part
2245>>(24.l+4)	ubyte	=2
2246# var_dir_len; length of variable part of dir entry
2247>>>(24.l+51)		uleshort >0
2248#>>>(24.l+51)		uleshort >0	\b, variable part length %u
2249# namlen; length of long filename
2250#>>>>(24.l+56)		ubyte	x	\b, namlen %u
2251# dirlen; length of directory name
2252#>>>>(24.l+57)		ubyte	x	\b, dirlen %u
2253# if file length positive then show long file name
2254>>>>(24.l+56)		ubyte	>0
2255# lfname[256]; long file name \0-terminated
2256>>>>>(24.l+58)		string	x	"%s"
2257# if directory length positive then jump before file name field and then jump this addtional length plus 2 (\0-terminator + dirlen field) to following directory name
2258>>>>(24.l+57)		ubyte	>0
2259>>>>>(24.l+55)		ubyte	x
2260# dirname[256]; directory name \0-terminated
2261>>>>>>&(&0.b+2)		string	x	in "%s"
2262# dir_crc; CRC of directory entry
2263#>>>(24.l+54)		uleshort x	\b, entry CRC %#4.4x
2264# tz; timezone where file was archived; 7Fh~unknown 4~1.00hoursWestOfUTC 12 16 20~5.00hoursWestOfUTC -107~26.75hoursEastOfUTC -4~1.00hoursEastOfUTC
2265>>>(24.l+53)		byte	!0x7f	\b, time zone %d/4
2266# date; last mod file date in DOS format
2267>>>(24.l+14)		lemsdosdate x	\b, modified %s
2268# time; last mod file time in DOS format
2269>>>(24.l+16)		lemsdostime x	%s
2270
2271# Shell archives
227210	string		#\ This\ is\ a\ shell\ archive	shell archive text
2273!:mime	application/octet-stream
2274
2275#
2276# LBR. NB: May conflict with the questionable
2277#          "binary Computer Graphics Metafile" format.
2278#
22790       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
2280#
2281# PMA (CP/M derivative of LHA)
2282# Update: Joerg Jenderek
2283# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
2284#
2285#2       string          -pm0-           PMarc archive data [pm0]
22862	string		-pm0-
2287>0	use	lharc-file
2288#2       string          -pm1-           PMarc archive data [pm1]
22892	string		-pm1-
2290>0	use	lharc-file
2291#2       string          -pm2-           PMarc archive data [pm2]
22922	string		-pm2-
2293>0	use	lharc-file
22942       string          -pms-           PMarc SFX archive (CP/M, DOS)
2295#!:mime	application/x-foobar-exec
2296!:ext com
22975       string          -pc1-           PopCom compressed executable (CP/M)
2298#!:mime	application/x-
2299#!:ext com
2300
2301# From Rafael Laboissiere <rafael@laboissiere.net>
2302# The Project Revision Control System (see
2303# http://prcs.sourceforge.net) generates a packaged project
2304# file which is recognized by the following entry:
23050	leshort		0xeb81	PRCS packaged project
2306
2307# Microsoft cabinets
2308# by David Necas (Yeti) <yeti@physics.muni.cz>
2309#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
2310#>25	byte	x		v%d
2311#>24	byte	x		\b.%d
2312# MPi: All CABs have version 1.3, so this is pointless.
2313# Better magic in debian-additions.
2314
2315# GTKtalog catalogs
2316# by David Necas (Yeti) <yeti@physics.muni.cz>
23174	string	gtktalog\ 	GTKtalog catalog data,
2318>13	string	3		version 3
2319>>14	beshort	0x677a		(gzipped)
2320>>14	beshort	!0x677a		(not gzipped)
2321>13	string	>3		version %s
2322
2323############################################################################
2324# Parity archive reconstruction file, the 'par' file format now used on Usenet.
23250       string          PAR\0	PARity archive data
2326>48	leshort		=0	- Index file
2327>48	leshort		>0	- file number %d
2328
2329# Felix von Leitner <felix-file@fefe.de>
23300	string	d8:announce	BitTorrent file
2331!:mime	application/x-bittorrent
2332!:ext	torrent
2333# Durval Menezes, <jmgthbfile at durval dot com>
23340	string	d13:announce-list	BitTorrent file
2335!:mime	application/x-bittorrent
2336!:ext	torrent
23370	string	d7:comment	BitTorrent file
2338!:mime	application/x-bittorrent
2339!:ext	torrent
23400	string	d4:info		BitTorrent file
2341!:mime	application/x-bittorrent
2342!:ext	torrent
2343
2344# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
2345# URL:		http://fileformats.archiveteam.org/wiki/MSA_(Magic_Shadow_Archiver)
2346# Reference:	http://info-coach.fr/atari/documents/_mydoc/FD_Image_File_Format.pdf
2347#		http://mark0.net/download/triddefs_xml.7z/defs/m/msa.trid.xml
2348# Update:	Joerg Jenderek
2349# Note:		called by TrID "Atari MSA Disk Image" and verified by
2350#		command like `deark -l -m msa -d2 PDATS578.msa` as " Atari ST floppy disk image"
2351# GRR: line below is too general as it matches setup.skin
23520	beshort 0x0e0f
2353# skip foo setup.skin with unrealistic high number 52255 of sides by check for valid "low" value
2354>4	ubeshort <2		Atari MSA archive data
2355#!:mime	application/octet-stream
2356!:mime	application/x-atari-msa
2357!:ext	msa
2358# sectors per track like: 9 10
2359>>2	beshort x		\b, %d sectors per track
2360# sides (0 or 1; add 1 to this to get correct number of sides)
2361>>4	beshort 0		\b, 1 sided
2362>>4	beshort 1		\b, 2 sided
2363# starting track like: 0
2364>>6	beshort x		\b, starting track: %d
2365# ending track like: 39 79 80 81
2366>>8	beshort x		\b, ending track: %d
2367# tracks content
2368#>>10	ubequad x		\b, track content %#16.16llx
2369
2370# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
23710	string	PK00PK\003\004	Zip archive data
2372!:mime	application/zip
2373!:ext zip/cbz
2374
2375# Recognize ZIP archives with prepended data by end-of-central-directory record
2376# https://en.wikipedia.org/wiki/ZIP_(file_format)#End_of_central_directory_record_(EOCD)
2377# by Michal Gorny <mgorny@gentoo.org>
2378-2	uleshort	0
2379>&-22	string	PK\005\006
2380# without #!
2381>>0	string	!#!	Zip archive, with extra data prepended
2382!:mime	application/zip
2383!:ext zip/cbz
2384# with #!
2385>>0	string/w	#!\ 	a
2386>>>&-1	string/T	x	%s script executable (Zip archive)
2387
2388# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
2389# by Stefan `Sec` Zehl <sec@42.org>
23907	string		**ACE**		ACE archive data
2391!:mime	application/x-ace-compressed
2392!:ext	ace
2393>15	byte	>0		version %d
2394>16	byte	=0x00		\b, from MS-DOS
2395>16	byte	=0x01		\b, from OS/2
2396>16	byte	=0x02		\b, from Win/32
2397>16	byte	=0x03		\b, from Unix
2398>16	byte	=0x04		\b, from MacOS
2399>16	byte	=0x05		\b, from WinNT
2400>16	byte	=0x06		\b, from Primos
2401>16	byte	=0x07		\b, from AppleGS
2402>16	byte	=0x08		\b, from Atari
2403>16	byte	=0x09		\b, from Vax/VMS
2404>16	byte	=0x0A		\b, from Amiga
2405>16	byte	=0x0B		\b, from Next
2406>14	byte	x		\b, version %d to extract
2407>5	leshort &0x0080		\b, multiple volumes,
2408>>17	byte	x		\b (part %d),
2409>5	leshort &0x0002		\b, contains comment
2410>5	leshort	&0x0200		\b, sfx
2411>5	leshort	&0x0400		\b, small dictionary
2412>5	leshort	&0x0800		\b, multi-volume
2413>5	leshort	&0x1000		\b, contains AV-String
2414>>30	string	\x16*UNREGISTERED\x20VERSION*	(unregistered)
2415>5	leshort &0x2000		\b, with recovery record
2416>5	leshort &0x4000		\b, locked
2417>5	leshort &0x8000		\b, solid
2418# Date in MS-DOS format (whatever that is)
2419#>18	lelong	x		Created on
2420
2421# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
2422# <doj@cubic.org>
24230x1A	string	sfArk		sfArk compressed Soundfont
2424>0x15	string	2
2425>>0x1	string	>\0		Version %s
2426>>0x2A	string	>\0		: %s
2427
2428# DR-DOS 7.03 Packed File *.??_
2429# Reference: http://www.antonis.de/dos/dos-tuts/mpdostip/html/nwdostip.htm
2430# Note:	unpacked by PNUNPACK.EXE
24310	string	Packed\ File\
2432# by looking for Control-Z skip ASCII text starting with Packed File
2433>0x18	ubyte	0x1a		Personal NetWare Packed File
2434!:mime	application/x-novell-compress
2435!:ext	??_
2436>>12	string	x		\b, was "%.12s"
2437# 1 or 2
2438#>>0x19	ubyte	x		\b, at 0x19 %u
2439>>0x1b	ulelong	x		with %u bytes
2440
2441# EET archive
2442# From: Tilman Sauerbeck <tilman@code-monkey.de>
24430	belong	0x1ee7ff00	EET archive
2444!:mime	application/x-eet
2445
2446# From:		Joerg Jenderek
2447# URL:		https://help.foxitsoftware.com/kb/install-fzip-file.php
2448# reference:	http://mark0.net/download/triddefs_xml.7z/
2449#		defs/f/fzip.trid.xml
2450# Note: unknown compression; No "PK" zip magic; normally in directory like
2451#	"%APPDATA%\Foxit Software\Addon\Foxit Reader\Install"
24520	ubequad	0x2506781901010000	Foxit add-on/update
2453!:mime	application/x-fzip
2454!:ext	fzip
2455
2456# From: "Robert Dale" <robdale@gmail.com>
24570	belong	123		dar archive,
2458>4	belong	x		label "%.8x
2459>>8	belong	x		%.8x
2460>>>12	beshort	x		%.4x"
2461>14	byte	0x54		end slice
2462>14	beshort	0x4e4e		multi-part
2463>14	beshort	0x4e53		multi-part, with -S
2464
2465# Symbian installation files
2466#  https://www.thouky.co.uk/software/psifs/sis.html
2467#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
24688	lelong	0x10000419	Symbian installation file
2469!:mime	application/vnd.symbian.install
2470>4	lelong	0x1000006D	(EPOC release 3/4/5)
2471>4	lelong	0x10003A12	(EPOC release 6)
24720	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
2473!:mime	x-epoc/x-sisx-app
2474
2475# From "Nelson A. de Oliveira" <naoliv@gmail.com>
24760	string	MPQ\032		MoPaQ (MPQ) archive
2477
2478# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
2479# .kgb
24800	string KGB_arch		KGB Archiver file
2481>10	string x		with compression level %.1s
2482
2483# xar (eXtensible ARchiver) archive
2484# URL: https://en.wikipedia.org/wiki/Xar_(archiver)
2485# xar archive format: https://code.google.com/p/xar/
2486# From: "David Remahl" <dremahl@apple.com>
2487# Update: Joerg Jenderek
2488# TODO: lzma compression; X509Data for pkg and xip
2489# Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or
2490# 7z t -txar Xcode_10.2_beta_4.xip`
24910	string	xar!		xar archive
2492!:mime	application/x-xar
2493# pkg for Mac OSX installer package like FullBundleUpdate.pkg
2494# xip for signed Apple software like Xcode_10.2_beta_4.xip
2495!:ext	xar/pkg/xip
2496# always 28 in older archives
2497>4	ubeshort >28		\b, header size %u
2498# currently there exit only version 1 since about 2014
2499>6	ubeshort >1		version %u,
2500>8	ubequad	x		compressed TOC: %llu,
2501#>16	ubequad	x		uncompressed TOC: %llu,
2502# cksum_alg 0-2 in older and also 3-4 in newer
2503>24	belong	0		no checksum
2504>24	belong	1		SHA-1 checksum
2505>24	belong	2		MD5 checksum
2506>24	belong	3		SHA-256 checksum
2507>24	belong	4		SHA-512 checksum
2508>24	belong	>4		unknown %#x checksum
2509#>24	belong	>4		checksum
2510#			For no compression jump 0 bytes
2511>24	belong	0
2512>>0		ubyte	x
2513# jump more bytes forward by header size
2514>>>&(4.S)	ubyte	x
2515# jump more bytes forward by compressed table of contents size
2516#>>>>&(8.Q)	ubequad	x	\b, heap data %#llx
2517>>>>&(8.Q)	ubyte	x
2518# look for data by ./compress after message with 1 space at end
2519>>>>>&-3	indirect x	\b, contains
2520#			For SHA-1 jump 20 minus 2 bytes
2521>24	belong	1
2522>>18		ubyte	x
2523# jump more bytes forward by header size
2524>>>&(4.S)	ubyte	x
2525# jump more bytes forward by compressed table of contents size
2526>>>>&(8.Q)	ubyte	x
2527# data compressed by gzip, bzip, lzma or none
2528>>>>>&-1	indirect x	\b, contains
2529#			For SHA-256 jump 32 minus 2 bytes
2530>24	belong	3
2531>>30		ubyte	x
2532# jump more bytes forward by header size
2533>>>&(4.S)	ubyte	x
2534# jump more bytes forward by compressed table of contents size
2535>>>>&(8.Q)	ubyte	x
2536>>>>>&-1	indirect x	\b, contains
2537#			For SHA-512 jump 64 minus 2 bytes
2538>24	belong	4
2539>>62		ubyte	x
2540# jump more bytes forward by header size
2541>>>&(4.S)	ubyte	x
2542# jump more bytes forward by compressed table of contents size
2543>>>>&(8.Q)	ubyte	x
2544>>>>>&-1	indirect x	\b, contains
2545
2546# Type: Parity Archive
2547# From: Daniel van Eeden <daniel_e@dds.nl>
25480	string	PAR2		Parity Archive Volume Set
2549
2550# Bacula volume format. (Volumes always start with a block header.)
2551# URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
2552# From: Adam Buchbinder <adam.buchbinder@gmail.com>
255312	string	BB02		Bacula volume
2554>20	bedate	x		\b, started %s
2555
2556# ePub is XHTML + XML inside a ZIP archive.  The first member of the
2557#   archive must be an uncompressed file called 'mimetype' with contents
2558#   'application/epub+zip'
2559
2560
2561# From: "Michael Gorny" <mgorny@gentoo.org>
2562# ZPAQ: http://mattmahoney.net/dc/zpaq.html
25630	string	zPQ	ZPAQ stream
2564>3	byte	x	\b, level %d
2565# From: Barry Carter <carter.barry@gmail.com>
2566# https://encode.ru/threads/456-zpaq-updates/page32
25670	string	7kSt	ZPAQ file
2568
2569# BBeB ebook, unencrypted (LRF format)
2570# URL: https://www.sven.de/librie/Librie/LrfFormat
2571# From: Adam Buchbinder <adam.buchbinder@gmail.com>
25720	string	L\0R\0F\0\0\0	BBeB ebook data, unencrypted
2573>8	beshort	x		\b, version %d
2574>36	byte	1		\b, front-to-back
2575>36	byte	16		\b, back-to-front
2576>42	beshort	x		\b, (%dx,
2577>44	beshort	x		%d)
2578
2579# Symantec GHOST image by Joerg Jenderek at May 2014
2580# https://us.norton.com/ghost/
2581# https://www.garykessler.net/library/file_sigs.html
25820		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
2583# *.GHO
2584>2		ubyte&0x08		0x00		\b, first file
2585# *.GHS or *.[0-9] with cns program option
2586>2		ubyte&0x08		0x08		\b, split file
2587# part of split index interesting for *.ghs
2588>>4		ubyte			x		id=%#x
2589# compression tag minus one equals numeric compression command line switch z[1-9]
2590>3		ubyte			0		\b, no compression
2591>3		ubyte			2		\b, fast compression (Z1)
2592>3		ubyte			3		\b, medium compression (Z2)
2593>3		ubyte			>3
2594>>3		ubyte			<11		\b, compression (Z%d-1)
2595>2		ubyte&0x08		0x00
2596# ~ 30 byte password field only for *.gho
2597>>12		ubequad			!0		\b, password protected
2598>>44		ubyte			!1
2599# 1~Image All, sector-by-sector only for *.gho
2600>>>10		ubyte			1		\b, sector copy
2601# 1~Image Boot track only for *.gho
2602>>>43		ubyte			1		\b, boot track
2603# 1~Image Disc only for *.gho implies Image Boot track and sector copy
2604>>44		ubyte			1		\b, disc sector copy
2605# optional image description only *.gho
2606>>0xff		string			>\0		"%-.254s"
2607# look for DOS sector end sequence
2608>0xE08	search/7776		\x55\xAA
2609>>&-512	indirect		x		\b; contains
2610
2611# Google Chrome extensions
2612# https://developer.chrome.com/extensions/crx
2613# https://developer.chrome.com/extensions/hosting
26140	string	Cr24	Google Chrome extension
2615!:mime	application/x-chrome-extension
2616>4	ulong	x	\b, version %u
2617
2618# SeqBox - Sequenced container
2619# ext: sbx, seqbox
2620# Marco Pontello marcopon@gmail.com
2621# reference: https://github.com/MarcoPon/SeqBox
26220	string	SBx	SeqBox,
2623>3	byte	x	version %d
2624
2625# LyNX archive
2626# Update:	Joerg Jenderek
2627# URL:		http://fileformats.archiveteam.org/wiki/Lynx_archive
2628# Reference:	http://ist.uwaterloo.ca/~schepers/formats/LNX.TXT
2629#		http://mark0.net/download/triddefs_xml.7z/defs/a/ark-lnx.trid.xml
2630# Note:		called "Lynx archive" by TrID and "Commodore C64 BASIC program" with "POKE 53280" by ./c64
2631# TODO:		merge and unify with Commodore C64 BASIC program
263256	string	USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE	 LyNX archive
2633# display "Lynx archive" (strength=330) before Commodore C64 BASIC program (strength=50) handled by ./c64
2634#!:strength +0
2635#!:mime	application/octet-stream
2636!:mime	application/x-commodore-lnx
2637!:ext	lnx
2638# afterwards look for BASIC tokenized GOTO (89h) 10, line terminator \0, end of programm tag \0\0 and CarriageReturn
2639>86		search/10	\x8910\0\0\0\r	\b,
2640# for DEBUGGING
2641#>>&0		string		x	STRING="%s"
2642# number in ASCII of directory blocks with spaces on both sides like: 1 2 3 5
2643>>&0		regex		[0-9]{1,5}	%s directory blocks
2644# signature like: "*LYNX XII BY WILL CORLEY" " LYNX IX  BY WILL CORLEY" "*LYNX BY CBMCONVERT 2.0*"
2645>>>&2		regex		[^\r]{1,24}	\b, signature "%s"
2646# number of files in ASCII surrounded by spaces and delimited by CR like: 2 3 6 13 69 144 (maximum?)
2647>>>>&1		regex		[0-9]{1,3}	\b, %s files
2648
2649# From: Joerg Jenderek
2650# URL: https://www.acronis.com/
2651# Reference: https://en.wikipedia.org/wiki/TIB_(file_format)
2652# Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110
26530	ubequad		0xce24b9a220000000	Acronis True Image backup
2654!:mime	application/x-acronis-tib
2655!:ext	tib
2656# 01000000
2657#>20	ubelong		x			\b, at 20 %#x
2658# 20000000
2659#>28	ubelong		x			\b, at 28 %#x
2660# strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0"
2661# ???
2662# strings like "\Device\0000011e" "\Device\0000015a"
2663#>0	search/0x6852300/cs	\\Device\\
2664#>>&-1	pstring		x			\b, %s
2665# "\Device\HarddiskVolume30" "\Device\HarddiskVolume39"
2666#>>>&1	search/180/cs	\\Device\\
2667#>>>>&-1	pstring		x			\b, %s
2668#>>>>>&0	search/29/cs	\0\0\xc8\0
2669# disk label
2670#>>>>>>&10	lestring16	x		\b, disk label %11.11s
2671#>>>>>>&9	plestring16	x		\b, disk label "%11.11s"
2672#>>>>>>&10	ubequad	x			%16.16llx
2673
2674
2675# Gentoo XPAK binary package
2676# by Michal Gorny <mgorny@gentoo.org>
2677# https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5
2678-4	string	STOP
2679>-16	string	XPAKSTOP	Gentoo binary package (XPAK)
2680!:mime	application/vnd.gentoo.xpak
2681
2682# From:		Joerg Jenderek
2683# URL:		https://kodi.wiki/view/TexturePacker
2684# Reference:	https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz
2685# /xbmc-Krypton/xbmc/guilib/XBTF.h
2686# /xbmc-Krypton/xbmc/guilib/XBTF.cpp
26870	string	XBTF
2688# skip ASCII text by looking for terminating \0 of path
2689>264	ubyte	0		XBMC texture package
2690!:mime	application/x-xbmc-xbt
2691!:ext	xbt
2692# XBTF_VERSION 2
2693>>4	string	!2		\b, version %-.1s
2694# nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp
2695>>5	ulelong	x		\b, %u file
2696# plural s
2697>>5	ulelong	>1		\bs
2698# path[CXBTFFile[MaximumPathLength=256]
2699>>9	string	x		\b, 1st %s
2700
2701# ALZIP archive
2702# by Hyungjun Park <hyungjun.park@worksmobile.com>, Hajin Jang <hajin_jang@worksmobile.com>
2703# http://kippler.com/win/unalz/
2704# https://salsa.debian.org/l10n-korean-team/unalz
27050	string	ALZ\001		ALZ archive data
2706!:ext   alz
2707
2708# https://cf-aldn.altools.co.kr/setup/EGG_Specification.zip
27090	string	EGGA		EGG archive data,
2710!:ext   egg
2711>5	byte	x		version %u
2712>4	byte	x		\b.%u
2713>>0x0E	ulelong	=0x08E28222
2714>>0x0E	ulelong	=0x24F5A262	\b, split
2715>>0x0E	ulelong	=0x24E5A060	\b, solid
2716>>0x0E	default	x		\b, unknown
2717
2718# PAQ9A archive
2719# URL: http://mattmahoney.net/dc/#paq9a
2720# Note: Line 1186 of paq9a.cpp gives the magic bytes
27210	string	pQ9\001		PAQ9A archive
2722
2723# From wof (wof@stachelkaktus.net)
27240	string	Unison\ archive\ format	Unison archive format
2725
2726# https://ankiweb.net
272730	string	collection.anki2	Anki APKG file
2728#!:ext	.apkg
2729
2730# Synology archive (DiskStation Manager 7.0+)
2731# From: Alexandre Iooss <erdnaxe@crans.org>
2732# Note: These archives are signed and encrypted.
27330		ulelong&0xFFFFFF00	0xEFBEAD00
2734# MessagePack header (fixarray of 5 elements starting with a bin of 32 bytes)
2735>8  	ulelong&0x00FFFFFF	0x20C495	Synology archive
2736!:ext	spk
2737# Extract some properties from MessagePack third item
2738>>43	search/0x10000		package=
2739>>>&0	string				x			\b, package %s
2740>>43	search/0x10000		arch=
2741>>>&0	string				x			%s
2742>>43	search/0x10000		version=
2743>>>&0	string				x			%s
2744>>43	search/0x10000		create_time=
2745>>>&0	string				x			\b, created on %s
2746
2747# MonoGame/XNA processed assets archive
2748# From: Alexandre Iooss <erdnaxe@crans.org>
2749# URL: https://github.com/MonoGame/MonoGame/blob/v3.8.1/MonoGame.Framework/Content/ContentManager.cs
27500	string	XNB
2751# XNB must be version 4 or 5
2752>4	byte	<6
2753>>4	byte	>3
2754# Size must be positive
2755>>>6	lelong	>0	MonoGame/XNA processed assets
2756!:ext	xnb
2757>>>>3	string	=w	\b, for Windows
2758>>>>3	string	=x	\b, for Xbox360
2759>>>>3	string	=i	\b, for iOS
2760>>>>3	string	=a	\b, for Android
2761>>>>3	string	=d	\b, for DesktopGL
2762>>>>3	string	=X	\b, for MacOSX
2763>>>>3	string	=W	\b, for WindowsStoreApp
2764>>>>3	string	=n	\b, for NativeClient
2765>>>>3	string	=M	\b, for WindowsPhone8
2766>>>>3	string	=r	\b, for RaspberryPi
2767>>>>3	string	=P	\b, for PlayStation4
2768>>>>3	string	=5	\b, for PlayStation5
2769>>>>3	string	=O	\b, for XboxOne
2770>>>>3	string	=S	\b, for Nintendo Switch
2771>>>>3	string	=G	\b, for Google Stadia
2772>>>>3	string	=b	\b, for WebAssembly and Bridge.NET
2773>>>>3	string	=m	\b, for WindowsPhone7.0 (XNA)
2774>>>>3	string	=p	\b, for PlayStationMobile
2775>>>>3	string	=v	\b, for PSVita
2776>>>>3	string	=g	\b, for Windows (OpenGL)
2777>>>>3	string	=l	\b, for Linux
2778>>>>4	byte	x	\b, version %d
2779>>>>5	byte	&0x80	\b, LZX compressed
2780>>>>>10	lelong	x	\b, decompressed size: %d bytes
2781>>>>5	byte	&0x40	\b, LZ4 compressed
2782>>>>>10	lelong	x	\b, decompressed size: %d bytes
2783
2784# Electron ASAR archive
2785# From: Alexandre Iooss <erdnaxe@crans.org>
2786# URL: https://github.com/electron/asar
27870		ulelong	4
2788# Match JSON header start and end
2789>16		string	{"files":{"
2790>>(12.l+12)	string }}}}		Electron ASAR archive
2791!:ext	asar
2792>>>12		ulelong	x		\b, header length: %d bytes
2793
2794# Wasay ImageIt DataPack
2795# From: Alexandre Iooss <erdnaxe@crans.org>
2796# URL: https://www.neowin.net/forum/topic/615151-anyone-know-what-program-opens-dsi-and-wsi-files/
2797# Note: Used in Acer eRecovery and Lenovo OneKey Recovery (OKR)
27984	string		WSVD
2799# bytes 3-4 are the checksum or the first 32 bytes of the file
2800>0	uleshort	0x40	Wasay ImageIt DataPack
2801>>8	uleshort	x	v%u
2802>>10	uleshort	x	\b.%u
2803>>16	lestring16/8	x	\b, "%s"
2804>>12	uleshort	x	(%u)
2805>>32	byte		x	\b, created on %02d
2806>>33	byte		x	\b%02d
2807>>34	byte		x	\b/%02d
2808>>35	byte		x	\b/%02d
2809>>36	byte		x	%02d
2810>>37	byte		x	\b:%02d
2811>>38	byte		x	\b:%02d
2812>>56	ulelong		x	\b, size: %u bytes
2813