xref: /freebsd/contrib/file/magic/Magdir/archive (revision 1f8b431d185416f70e96f03b8fd69b98442b1913)
1#------------------------------------------------------------------------------
2# $File: archive,v 1.117 2018/03/17 02:11:04 christos Exp $
3# archive:  file(1) magic for archive formats (see also "msdos" for self-
4#           extracting compressed archives)
5#
6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c.
8
9# POSIX tar archives
10# URL: https://en.wikipedia.org/wiki/Tar_(computing)
11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current
12# header mainly padded with nul bytes
13500	quad		0
14# filename or extended attribute printable strings in range space null til umlaut ue
15>0	ubeshort	>0x1F00
16>>0	ubeshort	<0xFCFD
17# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad
18# at https://sourceforge.net/projects/s-tar/files/testscripts/
19>>>508	ubelong&0x8B9E8DFF	0
20# nul, space or ascii digit 0-7 at start of mode
21>>>>100	ubyte&0xC8	=0
22>>>>>101 ubyte&0xC8	=0
23# nul, space at end of check sum
24>>>>>>155 ubyte&0xDF	=0
25# space or ascii digit 0 at start of check sum
26>>>>>>>148	ubyte&0xEF	=0x20
27>>>>>>>>0	use	tar-file
28#	minimal check and then display tar archive information which can also be
29#	embedded inside others like Android Backup, Clam AntiVirus database
300	name		tar-file
31>257	string		!ustar
32# header padded with nuls
33>>257	ulong		=0
34# GNU tar version 1.29 with non pax format option without refusing
35# creates misleading V7 header for Long path, Multi-volume, Volume type
36>>>156	ubyte		0x4c		GNU tar archive
37!:mime	application/x-gtar
38!:ext	tar/gtar
39>>>156	ubyte		0x4d		GNU tar archive
40!:mime	application/x-gtar
41!:ext	tar/gtar
42>>>156	ubyte		0x56		GNU tar archive
43!:mime	application/x-gtar
44!:ext	tar/gtar
45>>>156	default		x		tar archive (V7)
46!:mime	application/x-tar
47!:ext	tar
48# other stuff in padding
49# some implementations add new fields to the blank area at the end of the header record
50# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option
51>>257	ulong		!0		tar archive (old)
52!:mime	application/x-tar
53!:ext	tar
54# magic in newer, GNU, posix variants
55>257	string		=ustar
56# 2 last char of magic and UStar version because string expression does not work
57# 2 space characters followed by a null for GNU variant
58>>261	ubelong		=0x72202000	POSIX tar archive (GNU)
59!:mime	application/x-gtar
60!:ext	tar/gtar
61# UStar version with ASCII "00"
62>>261	ubelong		0x72003030	POSIX
63# gLOBAL and ExTENSION type only found in POSIX.1-2001 format
64>>>156	ubyte		0x67		\b.1-2001
65>>>156	ubyte		0x78		\b.1-2001
66>>>156	ubyte		x		tar archive
67!:mime	application/x-ustar
68!:ext	tar/ustar
69# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab
70>>261	ubelong		0x72000000	tar archive (ustar)
71!:mime	application/x-ustar
72!:ext	tar/ustar
73# not seen ustar variant with garbish version
74>>261	default		x		tar archive (unknown ustar)
75!:mime	application/x-ustar
76!:ext	tar/ustar
77# type flag of 1st tar archive member
78#>156	ubyte		x		\b, %c-type
79>156	ubyte		x
80>>156	ubyte		0		\b, file
81>>156	ubyte		0x30		\b, file
82>>156	ubyte		0x31		\b, hard link
83>>156	ubyte		0x32		\b, symlink
84>>156	ubyte		0x33		\b, char device
85>>156	ubyte		0x34		\b, block device
86>>156	ubyte		0x35		\b, directory
87>>156	ubyte		0x36		\b, fifo
88>>156	ubyte		0x37		\b, reserved
89>>156	ubyte		0x4c		\b, long path
90>>156	ubyte		0x4d		\b, multi volume
91>>156	ubyte		0x56		\b, volume
92>>156	ubyte		0x67		\b, global
93>>156	ubyte		0x78		\b, extension
94>>156	default		x		\b, type
95>>>156	ubyte		x		'%c'
96# name[100]
97>0	string		>\0		%-.60s
98# mode mainly stored as an octal number in ASCII null or space terminated
99>100	string		>\0		\b, mode %-.7s
100# user id mainly as octal numbers in ASCII null or space terminated
101>108	string		>\0		\b, uid %-.7s
102# group id mainly as octal numbers in ASCII null or space terminated
103>116	string		>\0		\b, gid %-.7s
104# size mainly as octal number in ASCII
105>124	ubyte		<0x38
106>>124	string		>\0		\b, size %-.12s
107# coding indicated by setting the high-order bit of the leftmost byte
108>124	ubyte		>0xEF		\b, size 0x
109>>124	ubyte		!0xff		\b%2.2x
110>>125	ubyte		!0xff		\b%2.2x
111>>126	ubyte		!0xff		\b%2.2x
112>>127	ubyte		!0xff		\b%2.2x
113>>128	ubyte		!0xff		\b%2.2x
114>>129	ubyte		!0xff		\b%2.2x
115>>130	ubyte		!0xff		\b%2.2x
116>>131	ubyte		!0xff		\b%2.2x
117>>132	ubyte		!0xff		\b%2.2x
118>>133	ubyte		!0xff		\b%2.2x
119>>134	ubyte		!0xff		\b%2.2x
120>>135	ubyte		!0xff		\b%2.2x
121# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated
122>136	string		>\0		\b, seconds %-.11s
123# header checksum stored as an octal number in ASCII null or space terminated
124#>148	string		x		\b, cksum %.7s
125# linkname[100]
126>157	string		>\0		\b, linkname %-.40s
127# additional fields for ustar
128>257	string		=ustar
129# owner user name null terminated
130>>265	string		>\0		\b, user %-.32s
131# group name null terminated
132>>297	string		>\0		\b, group %-.32s
133# device major minor if not zero
134>>329	ubequad&0xCFCFCFCFcFcFcFdf	!0
135>>>329	string		x		\b, devmaj %-.7s
136>>337	ubequad&0xCFCFCFCFcFcFcFdf	!0
137>>>337	string		x		\b, devmin %-.7s
138# prefix[155]
139>>345	string		>\0		\b, prefix %-.155s
140# old non ustar/POSIX tar
141>257	string		!ustar
142>>508	string		=tar\0
143# padding[255] in old star
144>>>257	string		>\0		\b, padding: %-.40s
145>>508	default		x
146# padding[255] in old tar sometimes comment field
147>>>257	string		>\0		\b, comment: %-.40s
148
149# Incremental snapshot gnu-tar format from:
150# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
1510	string		GNU\ tar-	GNU tar incremental snapshot data
152>&0	regex		[0-9]\.[0-9]+-[0-9]+	version %s
153
154# cpio archives
155#
156# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
157# The idea is to indicate archives produced on machines with the same
158# byte order as the machine running "file" with "cpio archive", and
159# to indicate archives produced on machines with the opposite byte order
160# from the machine running "file" with "byte-swapped cpio archive".
161#
162# The SVR4 "cpio(4)" hints that there are additional formats, but they
163# are defined as "short"s; I think all the new formats are
164# character-header formats and thus are strings, not numbers.
1650	short		070707		cpio archive
166!:mime	application/x-cpio
1670	short		0143561		byte-swapped cpio archive
168!:mime	application/x-cpio # encoding: swapped
1690	string		070707		ASCII cpio archive (pre-SVR4 or odc)
1700	string		070701		ASCII cpio archive (SVR4 with no CRC)
1710	string		070702		ASCII cpio archive (SVR4 with CRC)
172
173#
174# Various archive formats used by various versions of the "ar"
175# command.
176#
177
178#
179# Original UNIX archive formats.
180# They were written with binary values in host byte order, and
181# the magic number was a host "int", which might have been 16 bits
182# or 32 bits.  We don't say "PDP-11" or "VAX", as there might have
183# been ports to little-endian 16-bit-int or 32-bit-int platforms
184# (x86?) using some of those formats; if none existed, feel free
185# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
186# 32-bit.  There might have been big-endian ports of that sort as
187# well.
188#
1890	leshort		0177555		very old 16-bit-int little-endian archive
1900	beshort		0177555		very old 16-bit-int big-endian archive
1910	lelong		0177555		very old 32-bit-int little-endian archive
1920	belong		0177555		very old 32-bit-int big-endian archive
193
1940	leshort		0177545		old 16-bit-int little-endian archive
195>2	string		__.SYMDEF	random library
1960	beshort		0177545		old 16-bit-int big-endian archive
197>2	string		__.SYMDEF	random library
1980	lelong		0177545		old 32-bit-int little-endian archive
199>4	string		__.SYMDEF	random library
2000	belong		0177545		old 32-bit-int big-endian archive
201>4	string		__.SYMDEF	random library
202
203#
204# From "pdp" (but why a 4-byte quantity?)
205#
2060	lelong		0x39bed		PDP-11 old archive
2070	lelong		0x39bee		PDP-11 4.0 archive
208
209#
210# XXX - what flavor of APL used this, and was it a variant of
211# some ar archive format?  It's similar to, but not the same
212# as, the APL workspace magic numbers in pdp.
213#
2140	long		0100554		apl workspace
215
216#
217# System V Release 1 portable(?) archive format.
218#
2190	string		=<ar>		System V Release 1 ar archive
220!:mime	application/x-archive
221
222#
223# Debian package; it's in the portable archive format, and needs to go
224# before the entry for regular portable archives, as it's recognized as
225# a portable archive whose first member has a name beginning with
226# "debian".
227#
2280	string		=!<arch>\ndebian
229>8	string		debian-split	part of multipart Debian package
230!:mime	application/vnd.debian.binary-package
231>8	string		debian-binary	Debian binary package
232!:mime	application/vnd.debian.binary-package
233>8	string		!debian
234>68	string		>\0		(format %s)
235# These next two lines do not work, because a bzip2 Debian archive
236# still uses gzip for the control.tar (first in the archive).  Only
237# data.tar varies, and the location of its filename varies too.
238# file/libmagic does not current have support for ascii-string based
239# (offsets) as of 2005-09-15.
240#>81	string		bz2		\b, uses bzip2 compression
241#>84	string		gz		\b, uses gzip compression
242#>136	ledate		x		created: %s
243
244#
245# MIPS archive; they're in the portable archive format, and need to go
246# before the entry for regular portable archives, as it's recognized as
247# a portable archive whose first member has a name beginning with
248# "__________E".
249#
2500	string	=!<arch>\n__________E	MIPS archive
251!:mime	application/x-archive
252>20	string	U			with MIPS Ucode members
253>21	string	L			with MIPSEL members
254>21	string	B			with MIPSEB members
255>19	string	L			and an EL hash table
256>19	string	B			and an EB hash table
257>22	string	X			-- out of date
258
2590	search/1	-h-		Software Tools format archive text
260
261#
262# BSD/SVR2-and-later portable archive formats.
263#
2640	string		=!<arch>		current ar archive
265!:mime	application/x-archive
266>8	string		__.SYMDEF	random library
267>68	string		__.SYMDEF\ SORTED	random library
268
269#
270# "Thin" archive, as can be produced by GNU ar.
271#
2720	string		=!<thin>\n	thin archive with
273>68	belong		0		no symbol entries
274>68	belong		1		%d symbol entry
275>68	belong		>1		%d symbol entries
276
277# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
278#
279# The first byte is the magic (0x1a), byte 2 is the compression type for
280# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
281# filename of the first file (null terminated).  Since some types collide
282# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
283# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
2840	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
285!:mime	application/x-arc
2860	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
287!:mime	application/x-arc
2880	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
289!:mime	application/x-arc
2900	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
291!:mime	application/x-arc
2920	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
293!:mime	application/x-arc
2940	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
295!:mime	application/x-arc
296# [JW] stuff taken from idarc, obviously ARC successors:
2970	lelong&0x8080ffff	0x00000a1a	PAK archive data
298!:mime	application/x-arc
2990	lelong&0x8080ffff	0x0000141a	ARC+ archive data
300!:mime	application/x-arc
3010	lelong&0x8080ffff	0x0000481a	HYP archive data
302!:mime	application/x-arc
303
304# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
305# I can't create either SPARK or ArcFS archives so I have not tested this stuff
306# [GRR:  the original entries collide with ARC, above; replaced with combined
307#  version (not tested)]
308#0	byte		0x1a		RISC OS archive (spark format)
3090	string		\032archive	RISC OS archive (ArcFS format)
3100       string          Archive\000     RISC OS archive (ArcFS format)
311
312# All these were taken from idarc, many could not be verified. Unfortunately,
313# there were many low-quality sigs, i.e. easy to trigger false positives.
314# Please notify me of any real-world fishy/ambiguous signatures and I'll try
315# to get my hands on the actual archiver and see if I find something better. [JW]
316# probably many can be enhanced by finding some 0-byte or control char near the start
317
318# idarc calls this Crush/Uncompressed... *shrug*
3190	string	CRUSH Crush archive data
320# Squeeze It (.sqz)
3210	string	HLSQZ Squeeze It archive data
322# SQWEZ
3230	string	SQWEZ SQWEZ archive data
324# HPack (.hpk)
3250	string	HPAK HPack archive data
326# HAP
3270	string	\x91\x33HF HAP archive data
328# MD/MDCD
3290	string	MDmd MDCD archive data
330# LIM
3310	string	LIM\x1a LIM archive data
332# SAR
3333	string	LH5 SAR archive data
334# BSArc/BS2
3350	string	\212\3SB\020\0	BSArc/BS2 archive data
336# Bethesda Softworks Archive (Oblivion)
3370	string	BSA\0 		BSArc archive data
338>4	lelong	x		version %d
339# MAR
3402	string	=-ah MAR archive data
341# ACB
342#0	belong&0x00f800ff	0x00800000 ACB archive data
343# CPZ
344# TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
345# JRC
3460	string	JRchive JRC archive data
347# Quantum
3480	string	DS\0 Quantum archive data
349# ReSOF
3500	string	PK\3\6 ReSOF archive data
351# QuArk
3520	string	7\4 QuArk archive data
353# YAC
35414	string	YC YAC archive data
355# X1
3560	string	X1 X1 archive data
3570	string	XhDr X1 archive data
358# CDC Codec (.dqt)
3590	belong&0xffffe000	0x76ff2000 CDC Codec archive data
360# AMGC
3610	string	\xad6" AMGC archive data
362# NuLIB
3630	string	N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
364# PakLeo
3650	string	LEOLZW PAKLeo archive data
366# ChArc
3670	string	SChF ChArc archive data
368# PSA
3690	string	PSA PSA archive data
370# CrossePAC
3710	string	DSIGDCC CrossePAC archive data
372# Freeze
3730	string	\x1f\x9f\x4a\x10\x0a Freeze archive data
374# KBoom
3750	string	\xc2\xa8MP\xc2\xa8 KBoom archive data
376# NSQ, must go after CDC Codec
3770	string	\x76\xff NSQ archive data
378# DPA
3790	string	Dirk\ Paehl DPA archive data
380# BA
381# TODO: idarc says "bytes 0-2 == bytes 3-5"
382# TTComp
383# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive
384# Update: Joerg Jenderek
385# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others
3860	string	\0\6
387# look for first keyword of Panorama database *.pan
388>12	search/261	DESIGN
389# skip keyword with low entropy
390>12	default		x	TTComp archive, binary, 4K dictionary
391# (version 5.25) labeled the above entry as "TTComp archive data"
392# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
3930	string	ESP ESP archive data
394# ZPack
3950	string	\1ZPK\1 ZPack archive data
396# Sky
3970	string	\xbc\x40 Sky archive data
398# UFA
3990	string	UFA UFA archive data
400# Dry
4010	string	=-H2O DRY archive data
402# FoxSQZ
4030	string	FOXSQZ FoxSQZ archive data
404# AR7
4050	string	,AR7 AR7 archive data
406# PPMZ
4070	string	PPMZ PPMZ archive data
408# MS Compress
409# Update: Joerg Jenderek
410# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression
411# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html
412# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z
4134	string	\x88\xf0\x27
414#		KWAJ variant
415>0	string	KWAJ		MS Compress archive data, KWAJ variant
416!:mime	application/x-ms-compress-kwaj
417# extension not working in version 5.32
418# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?'
419# file: line 284: Bad magic entry '   ??_'
420!:ext	??_
421# compression method (0-4)
422>>8	uleshort	x	\b, %u method
423# offset of compressed data
424>>10	uleshort	x	\b, 0x%x offset
425#>>(10.s)	uleshort	x
426#>>>&-6		string	x	\b, TEST extension %-.3s
427# header flags to mark header extensions
428>>12	uleshort	>0	\b, 0x%x flags
429# 4 bytes: decompressed length of file
430>>12	uleshort	&0x01
431>>>14	ulelong		x	\b, original size: %u bytes
432# 2 bytes: unknown purpose
433# 2 bytes: length of unknown data + mentioned bytes
434# 1-9 bytes: null-terminated file name
435# 1-4 bytes: null-terminated file extension
436>>12	uleshort	&0x08
437>>>12	uleshort				^0x01
438>>>>12		uleshort			^0x02
439>>>>>12			uleshort		^0x04
440>>>>>>12			uleshort	^0x10
441>>>>>>>14				string	x	\b, %-.8s
442>>>>>>12			uleshort	&0x10
443>>>>>>>14				string	x	\b, %-.8s
444>>>>>>>>&1				string	x	\b.%-.3s
445>>>>>12			uleshort		&0x04
446>>>>>>12			uleshort	^0x10
447>>>>>>>(14.s)			uleshort	x
448>>>>>>>>&14				string	x	\b, %-.8s
449>>>>>>12			uleshort	&0x10
450>>>>>>>(14.s)			uleshort	x
451>>>>>>>>&14				string	x	\b, %-.8s
452>>>>>>>>>&1				string	x	\b.%-.3s
453>>>>12		uleshort			&0x02
454>>>>>12			uleshort		^0x04
455>>>>>>12			uleshort	^0x10
456>>>>>>>16				string	x	\b, %-.8s
457>>>>>>12			uleshort	&0x10
458>>>>>>>16				string	x	\b, %-.8s
459>>>>>>>>&1				string	x	\b.%-.3s
460>>>>>12			uleshort		&0x04
461>>>>>>12			uleshort	^0x10
462>>>>>>>(16.s)			uleshort	x
463>>>>>>>>&16				string	x	\b, %-.8s
464>>>>>>12			uleshort	&0x10
465>>>>>>>(16.s)			uleshort	x
466>>>>>>>&16				string	x	%-.8s
467>>>>>>>>&1				string	x	\b.%-.3s
468>>>12	uleshort				&0x01
469>>>>12		uleshort			^0x02
470>>>>>12			uleshort		^0x04
471>>>>>>12			uleshort	^0x10
472>>>>>>>18				string	x	\b, %-.8s
473>>>>>>12			uleshort	&0x10
474>>>>>>>18				string	x	\b, %-.8s
475>>>>>>>>&1				string	x	\b.%-.3s
476>>>>>12			uleshort		&0x04
477>>>>>>12			uleshort	^0x10
478>>>>>>>(18.s)			uleshort	x
479>>>>>>>>&18				string	x	\b, %-.8s
480>>>>>>12			uleshort	&0x10
481>>>>>>>(18.s)			uleshort	x
482>>>>>>>>&18				string	x	\b, %-.8s
483>>>>>>>>>&1				string	x	\b.%-.3s
484>>>>12		uleshort			&0x02
485>>>>>12			uleshort		^0x04
486>>>>>>12			uleshort	^0x10
487>>>>>>>20				string	x	\b, %-.8s
488>>>>>>12			uleshort	&0x10
489>>>>>>>20				string	x	\b, %-.8s
490>>>>>>>>&1				string	x	\b.%-.3s
491>>>>>12			uleshort		&0x04
492>>>>>>12			uleshort	^0x10
493>>>>>>>(20.s)			uleshort	x
494>>>>>>>>&20				string	x	\b, %-.8s
495>>>>>>12			uleshort	&0x10
496>>>>>>>(20.s)			uleshort	x
497>>>>>>>>&20				string	x	\b, %-.8s
498>>>>>>>>>&1				string	x	\b.%-.3s
499# 2 bytes: length of data + mentioned bytes
500#
501#		SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ
502>0	string	SZDD		MS Compress archive data, SZDD variant
503!:mime	application/x-ms-compress-szdd
504!:ext	??_
505# The character missing from the end of the filename (0=unknown)
506>>9	string	>\0		\b, %-.1s is last character of original name
507# https://www.betaarchive.com/forum/viewtopic.php?t=26161
508# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e
509>>8	string	!A		\b, %-.1s method
510>>10	ulelong	>0		\b, original size: %u bytes
511#		QBasic SZDD variant
5123	string	\x88\xf0\x27
513>0	string	SZ\x20		MS Compress archive data, QBasic variant
514!:mime	application/x-ms-compress-sz
515!:ext	??$
516>>8	ulelong	>0		\b, original size: %u bytes
517
518# MP3 (archiver, not lossy audio compression)
5190	string	MP3\x1a MP3-Archiver archive data
520# ZET
5210	string	OZ\xc3\x9d ZET archive data
522# TSComp
5230	string	\x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
524# ARQ
5250	string	gW\4\1 ARQ archive data
526# Squash
5273	string	OctSqu Squash archive data
528# Terse
5290	string	\5\1\1\0 Terse archive data
530# PUCrunch
5310	string	\x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
532# UHarc
5330	string	UHA UHarc archive data
534# ABComp
5350	string	\2AB ABComp archive data
5360	string	\3AB2 ABComp archive data
537# CMP
5380	string	CO\0 CMP archive data
539# Splint
5400	string	\x93\xb9\x06 Splint archive data
541# InstallShield
5420	string	\x13\x5d\x65\x8c InstallShield Z archive Data
543# Gather
5441	string	GTH Gather archive data
545# BOA
5460	string	BOA BOA archive data
547# RAX
5480	string	ULEB\xa RAX archive data
549# Xtreme
5500	string	ULEB\0 Xtreme archive data
551# Pack Magic
5520	string	@\xc3\xa2\1\0 Pack Magic archive data
553# BTS
5540	belong&0xfeffffff	0x1a034465 BTS archive data
555# ELI 5750
5560	string	Ora\  ELI 5750 archive data
557# QFC
5580	string	\x1aFC\x1a QFC archive data
5590	string	\x1aQF\x1a QFC archive data
560# PRO-PACK
5610	string	RNC PRO-PACK archive data
562# 777
5630	string	777 777 archive data
564# LZS221
5650	string	sTaC LZS221 archive data
566# HPA
5670	string	HPA HPA archive data
568# Arhangel
5690	string	LG Arhangel archive data
570# EXP1, uses bzip2
5710	string	0123456789012345BZh EXP1 archive data
572# IMP
5730	string	IMP\xa IMP archive data
574# NRV
5750	string	\x00\x9E\x6E\x72\x76\xFF NRV archive data
576# Squish
5770	string	\x73\xb2\x90\xf4 Squish archive data
578# Par
5790	string	PHILIPP Par archive data
5800	string	PAR Par archive data
581# HIT
5820	string	UB HIT archive data
583# SBX
5840	belong&0xfffff000	0x53423000 SBX archive data
585# NaShrink
5860	string	NSK NaShrink archive data
587# SAPCAR
5880	string	#\ CAR\ archive\ header SAPCAR archive data
5890	string	CAR\ 2.00RG SAPCAR archive data
590# Disintegrator
5910	string	DST Disintegrator archive data
592# ASD
5930	string	ASD ASD archive data
594# InstallShield CAB
5950	string	ISc( InstallShield CAB
596# TOP4
5970	string	T4\x1a TOP4 archive data
598# BatComp left out: sig looks like COM executable
599# so TODO: get real 4dos batcomp file and find sig
600# BlakHole
6010	string	BH\5\7 BlakHole archive data
602# BIX
6030	string	BIX0 BIX archive data
604# ChiefLZA
6050	string	ChfLZ ChiefLZA archive data
606# Blink
6070	string	Blink Blink archive data
608# Logitech Compress
6090	string	\xda\xfa Logitech Compress archive data
610# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
6111	string	(C)\ STEPANYUK ARS-Sfx archive data
612# AKT/AKT32
6130	string	AKT32 AKT32 archive data
6140	string	AKT AKT archive data
615# NPack
6160	string	MSTSM NPack archive data
617# PFT
6180	string	\0\x50\0\x14 PFT archive data
619# SemOne
6200	string	SEM SemOne archive data
621# PPMD
6220	string	\x8f\xaf\xac\x84 PPMD archive data
623# FIZ
6240	string	FIZ FIZ archive data
625# MSXiE
6260	belong&0xfffff0f0	0x4d530000 MSXiE archive data
627# DeepFreezer
6280	belong&0xfffffff0	0x797a3030 DeepFreezer archive data
629# DC
6300	string	=<DC- DC archive data
631# TPac
6320	string	\4TPAC\3 TPac archive data
633# Ai
6340	string	Ai\1\1\0 Ai archive data
6350	string	Ai\1\0\0 Ai archive data
636# Ai32
6370	string	Ai\2\0 Ai32 archive data
6380	string	Ai\2\1 Ai32 archive data
639# SBC
6400	string	SBC SBC archive data
641# Ybs
6420	string	YBS Ybs archive data
643# DitPack
6440	string	\x9e\0\0 DitPack archive data
645# DMS
6460	string	DMS! DMS archive data
647# EPC
6480	string	\x8f\xaf\xac\x8c EPC archive data
649# VSARC
6500	string	VS\x1a VSARC archive data
651# PDZ
6520	string	PDZ PDZ archive data
653# ReDuq
6540	string	rdqx ReDuq archive data
655# GCA
6560	string	GCAX GCA archive data
657# PPMN
6580	string	pN PPMN archive data
659# WinImage
6603	string	WINIMAGE WinImage archive data
661# Compressia
6620	string	CMP0CMP Compressia archive data
663# UHBC
6640	string	UHB UHBC archive data
665# WinHKI
6660	string	\x61\x5C\x04\x05 WinHKI archive data
667# WWPack data file
6680	string	WWP WWPack archive data
669# BSN (BSA, PTS-DOS)
6700	string	\xffBSG BSN archive data
6711	string	\xffBSG BSN archive data
6723	string	\xffBSG BSN archive data
6731	string	\0\xae\2 BSN archive data
6741	string	\0\xae\3 BSN archive data
6751	string	\0\xae\7 BSN archive data
676# AIN
6770	string	\x33\x18 AIN archive data
6780	string	\x33\x17 AIN archive data
679# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
680# SZip (TODO: doesn't catch all versions)
6810	string	SZ\x0a\4 SZip archive data
682# XPack DiskImage
683# *.XDI updated by Joerg Jenderek Sep 2015
684# ftp://ftp.sac.sk/pub/sac/pack/0index.txt
685# GRR: this test is still too general as it catches also text files starting with jm
6860	string	jm
687# only found examples with this additional characteristic 2 bytes
688>2	string	\x2\x4	Xpack DiskImage archive data
689#!:ext xdi
690# XPack Data
691# *.xpa updated by Joerg Jenderek Sep 2015
692# ftp://ftp.elf.stuba.sk/pub/pc/pack/
6930	string	xpa	XPA
694!:ext	xpa
695# XPA32
696# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
697# created by XPA32.EXE version 1.0.2 for Windows
698>0	string	xpa\0\1 \b32 archive data
699# created by XPACK.COM version 1.67m or 1.67r with short 0x1800
700>3	ubeshort	!0x0001	\bck archive data
701# XPack Single Data
702# changed by Joerg Jenderek Sep 2015 back to like in version 5.12
703# letter 'I'+ acute accent is equivalent to \xcd
7040	string	\xcd\ jm	Xpack single archive data
705#!:mime	application/x-xpa-compressed
706!:ext xpa
707
708# TODO: missing due to unknown magic/magic at end of file:
709#DWC
710#ARG
711#ZAR
712#PC/3270
713#InstallIt
714#RKive
715#RK
716#XPack Diskimage
717
718# These were inspired by idarc, but actually verified
719# Dzip archiver (.dz)
720# Update: Joerg Jenderek
721# URL: http://speeddemosarchive.com/dzip/
722# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c
723# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt
7240	string	DZ
725# latest version is 2.9 dated 7 may 2003
726>2	byte	<4 Dzip archive data
727!:mime	application/x-dzip
728!:ext	dz
729>>2	byte	x \b, version %i
730>>3	byte	x \b.%i
731>>4	ulelong	x \b, offset 0x%x
732>>8	ulelong	x \b, %u files
733# ZZip archiver (.zz)
7340	string	ZZ\ \0\0 ZZip archive data
7350	string	ZZ0 ZZip archive data
736# PAQ archiver (.paq)
7370	string	\xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
7380	string	PAQ PAQ archive data
739>3	byte&0xf0	0x30
740>>3	byte	x (v%c)
741# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
7420xe	string	\x1aJar\x1b JAR (ARJ Software, Inc.) archive data
7430	string	JARCS JAR (ARJ Software, Inc.) archive data
744
745# ARJ archiver (jason@jarthur.Claremont.EDU)
7460	leshort		0xea60		ARJ archive data
747!:mime	application/x-arj
748>5	byte		x		\b, v%d,
749>8	byte		&0x04		multi-volume,
750>8	byte		&0x10		slash-switched,
751>8	byte		&0x20		backup,
752>34	string		x		original name: %s,
753>7	byte		0		os: MS-DOS
754>7	byte		1		os: PRIMOS
755>7	byte		2		os: Unix
756>7	byte		3		os: Amiga
757>7	byte		4		os: Macintosh
758>7	byte		5		os: OS/2
759>7	byte		6		os: Apple ][ GS
760>7	byte		7		os: Atari ST
761>7	byte		8		os: NeXT
762>7	byte		9		os: VAX/VMS
763>3	byte		>0		%d]
764# [JW] idarc says this is also possible
7652	leshort		0xea60		ARJ archive data
766
767# HA archiver (Greg Roelofs, newt@uchicago.edu)
768# This is a really bad format. A file containing HAWAII will match this...
769#0	string		HA		HA archive data,
770#>2	leshort		=1		1 file,
771#>2	leshort		>1		%hu files,
772#>4	byte&0x0f	=0		first is type CPY
773#>4	byte&0x0f	=1		first is type ASC
774#>4	byte&0x0f	=2		first is type HSC
775#>4	byte&0x0f	=0x0e		first is type DIR
776#>4	byte&0x0f	=0x0f		first is type SPECIAL
777# suggestion: at least identify small archives (<1024 files)
7780  belong&0xffff00fc 0x48410000 HA archive data
779>2	leshort		=1		1 file,
780>2	leshort		>1		%u files,
781>4	byte&0x0f	=0		first is type CPY
782>4	byte&0x0f	=1		first is type ASC
783>4	byte&0x0f	=2		first is type HSC
784>4	byte&0x0f	=0x0e		first is type DIR
785>4	byte&0x0f	=0x0f		first is type SPECIAL
786
787# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
7880	string		HPAK		HPACK archive data
789
790# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
7910	string		\351,\001JAM\ 		JAM archive,
792>7	string		>\0			version %.4s
793>0x26	byte		=0x27			-
794>>0x2b	string          >\0			label %.11s,
795>>0x27	lelong		x			serial %08x,
796>>0x36	string		>\0			fstype %.8s
797
798# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
799# Update: Joerg Jenderek
800# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
801# Reference: http://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html
802#
803#	check and display information of lharc (LHa,PMarc) file
8040	name				lharc-file
805# check 1st character of method id like -lz4- -lh5- or -pm2-
806>2	string		-
807# check 5th character of method id
808>>6	string		-
809# check header level 0 1 2 3
810>>>20	ubyte		<4
811# check 2nd, 3th and 4th character of method id
812>>>>3	regex		\^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1)		\b
813!:mime	application/x-lzh-compressed
814# creator type "LHA "
815!:apple	????LHA
816# display archive type name like "LHa/LZS archive data" or "LArc archive"
817>>>>>2	string		-lz		\b
818!:ext	lzs
819# already known  -lzs- -lz4- -lz5- with old names
820>>>>>>2	string	-lzs		LHa/LZS archive data
821>>>>>>3	regex	\^lz[45]	LHarc 1.x archive data
822# missing -lz?- with wikipedia names
823>>>>>>3	regex	\^lz[2378]	LArc archive
824# display archive type name like "LHa (2.x) archive data"
825>>>>>2	string		-lh		\b
826# already known -lh0- -lh1- -lh2- -lh3-  -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names
827>>>>>>3	regex		\^lh[01]	LHarc 1.x/ARX archive data
828# LHice archiver use ".ICE" as name extension instead usual one ".lzh"
829# FOOBAR archiver use ".foo" as name extension instead usual one
830# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment
831>>>>>>>2	string	-lh1		\b
832!:ext lha/lzh/ice
833>>>>>>3	regex		\^lh[23d]	LHa 2.x? archive data
834>>>>>>3	regex		\^lh[7]		LHa (2.x)/LHark archive data
835>>>>>>3	regex		\^lh[456]	LHa (2.x) archive data
836>>>>>>>2	string	-lh5		\b
837# https://en.wikipedia.org/wiki/BIOS
838# Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like
839# bios.rom , kd7_v14.bin, 1010.004, ...
840!:ext lha/lzh/rom/bin
841# missing -lh?- variants (Joe Jared)
842>>>>>>3	regex		\^lh[89a-ce]	LHa (Joe Jared) archive
843# UNLHA32 2.67a
844>>>>>>2	string		-lhx		LHa (UNLHA32) archive
845# lha archives with standard file name extensions ".lha" ".lzh"
846>>>>>>3	regex		!\^(lh1|lh5)	\b
847!:ext lha/lzh
848# this should not happen if all -lh variants are described
849>>>>>>2	default		x		LHa (unknown) archive
850#!:ext	lha
851# PMarc
852>>>>>3	regex		\^pm[012]	PMarc archive data
853!:ext pma
854# append method id without leading and trailing minus character
855>>>>>3	string		x		[%3.3s]
856>>>>>>0	use	lharc-header
857#
858#	check and display information of lharc header
8590	name				lharc-header
860# header size 0x4 , 0x1b-0x61
861>0	ubyte		x
862# compressed data size != compressed file size
863#>7	ulelong		x		\b, data size %d
864# attribute: 0x2~?? 0x10~symlink|target 0x20~normal
865#>19	ubyte		x		\b, 19_0x%x
866# level identifier 0 1 2 3
867#>20	ubyte		x		\b, level %d
868# time stamp
869#>15		ubelong	x		DATE 0x%8.8x
870# OS ID for level 1
871>20	ubyte		1
872# 0x20 types find for *.rom files
873>>(21.b+24)	ubyte	<0x21		\b, 0x%x OS
874# ascii type like M for MSDOS
875>>(21.b+24)	ubyte	>0x20		\b, '%c' OS
876# OS ID for level 2
877>20	ubyte		2
878#>>23	ubyte		x		\b, OS ID 0x%x
879>>23	ubyte		<0x21		\b, 0x%x OS
880>>23	ubyte		>0x20		\b, '%c' OS
881# filename only for level 0 and 1
882>20	ubyte		<2
883# length of filename
884>>21		ubyte	>0		\b, with
885# filename
886>>>21		pstring	x		"%s"
887#
888#2	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
889#!:mime	application/x-lharc
8902	string		-lh0-
891>0	use	lharc-file
892#2	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
893#!:mime	application/x-lharc
8942	string		-lh1-
895>0	use	lharc-file
896# NEW -lz2- ... -lz8-
8972	string		-lz2-
898>0	use	lharc-file
8992	string		-lz3-
900>0	use	lharc-file
9012	string		-lz4-
902>0	use	lharc-file
9032	string		-lz5-
904>0	use	lharc-file
9052	string		-lz7-
906>0	use	lharc-file
9072	string		-lz8-
908>0	use	lharc-file
909#	[never seen any but the last; -lh4- reported in comp.compression:]
910#2	string		-lzs-		LHa/LZS archive data [lzs]
9112	string		-lzs-
912>0	use	lharc-file
913# According to wikipedia and others such a version does not exist
914#2	string		-lh\40-		LHa 2.x? archive data [lh ]
915#2	string		-lhd-		LHa 2.x? archive data [lhd]
9162	string		-lhd-
917>0	use	lharc-file
918#2	string		-lh2-		LHa 2.x? archive data [lh2]
9192	string		-lh2-
920>0	use	lharc-file
921#2	string		-lh3-		LHa 2.x? archive data [lh3]
9222	string		-lh3-
923>0	use	lharc-file
924#2	string		-lh4-		LHa (2.x) archive data [lh4]
9252	string		-lh4-
926>0	use	lharc-file
927#2	string		-lh5-		LHa (2.x) archive data [lh5]
9282	string		-lh5-
929>0	use	lharc-file
930#2	string		-lh6-		LHa (2.x) archive data [lh6]
9312	string		-lh6-
932>0	use	lharc-file
933#2	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
9342	string		-lh7-
935# !:mime	application/x-lha
936# >20	byte		x		- header level %d
937>0	use	lharc-file
938# NEW -lh8- ... -lhe- , -lhx-
9392	string		-lh8-
940>0	use	lharc-file
9412	string		-lh9-
942>0	use	lharc-file
9432	string		-lha-
944>0	use	lharc-file
9452	string		-lhb-
946>0	use	lharc-file
9472	string		-lhc-
948>0	use	lharc-file
9492	string		-lhe-
950>0	use	lharc-file
9512	string		-lhx-
952>0	use	lharc-file
953# taken from idarc [JW]
9542   string      -lZ         PUT archive data
955# already done by LHarc magics
956# this should never happen if all sub types of LZS archive are identified
957#2   string      -lz         LZS archive data
9582   string      -sw1-       Swag archive data
959
9600	name		rar-file-header
961>24	byte		15		\b, v1.5
962>24	byte		20		\b, v2.0
963>24	byte		29		\b, v4
964>15	byte		0		\b, os: MS-DOS
965>15	byte		1		\b, os: OS/2
966>15	byte		2		\b, os: Win32
967>15	byte		3		\b, os: Unix
968>15	byte		4		\b, os: Mac OS
969>15	byte		5		\b, os: BeOS
970
9710	name		rar-archive-header
972>3	leshort&0x1ff	>0		\b, flags:
973>>3	leshort		&0x01		ArchiveVolume
974>>3	leshort		&0x02		Commented
975>>3	leshort		&0x04		Locked
976>>3	leshort		&0x10		NewVolumeNaming
977>>3	leshort		&0x08		Solid
978>>3	leshort		&0x20		Authenticated
979>>3	leshort		&0x40		RecoveryRecordPresent
980>>3	leshort		&0x80		EncryptedBlockHeader
981>>3	leshort		&0x100		FirstVolume
982
983# RAR (Roshal Archive) archive
9840	string		Rar!\x1a\7\0		RAR archive data
985!:mime	application/x-rar
986!:ext	rar/cbr
987# file header
988>(0xc.l+9)	byte	0x74
989>>(0xc.l+7)	use	rar-file-header
990# subblock seems to share information with file header
991>(0xc.l+9)	byte	0x7a
992>>(0xc.l+7)	use	rar-file-header
993>9		byte	0x73
994>>7		use	rar-archive-header
995
9960	string		Rar!\x1a\7\1\0		RAR archive data, v5
997!:mime	application/x-rar
998!:ext	rar
999
1000# Very old RAR archive
1001# http://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf
10020	string		RE\x7e\x5e  RAR archive data (<v1.5)
1003!:mime	application/x-rar
1004!:ext	rar/cbr
1005
1006# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
10070	string		SQSH		squished archive data (Acorn RISCOS)
1008
1009# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
1010# [JW] see exe section for self-extracting version
10110	string		UC2\x1a		UC2 archive data
1012
1013# PKZIP multi-volume archive
10140	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
1015!:mime	application/zip
1016!:ext zip/cbz
1017
1018# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
10190	string		PK\005\006	Zip archive data (empty)
1020!:mime application/zip
1021!:ext zip/cbz
10220	string		PK\003\004
1023
1024# Specialised zip formats which start with a member named 'mimetype'
1025# (stored uncompressed, with no 'extra field') containing the file's MIME type.
1026# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
1027#  contents starting with "application/":
1028>26	string		\x8\0\0\0mimetypeapplication/
1029
1030#  KOffice / OpenOffice & StarOffice / OpenDocument formats
1031#    From: Abel Cheung <abel@oaka.org>
1032
1033#   KOffice (1.2 or above) formats
1034#    (mimetype contains "application/vnd.kde.<SUBTYPE>")
1035>>50	string	vnd.kde.		KOffice (>=1.2)
1036>>>58	string	karbon			Karbon document
1037>>>58	string	kchart			KChart document
1038>>>58	string	kformula		KFormula document
1039>>>58	string	kivio			Kivio document
1040>>>58	string	kontour			Kontour document
1041>>>58	string	kpresenter		KPresenter document
1042>>>58	string	kspread			KSpread document
1043>>>58	string	kword			KWord document
1044
1045#   OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
1046#    (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
1047>>50	string	vnd.sun.xml.		OpenOffice.org 1.x
1048>>>62	string	writer			Writer
1049>>>>68	byte	!0x2e			document
1050>>>>68	string	.template		template
1051>>>>68	string	.global			global document
1052>>>62	string	calc			Calc
1053>>>>66	byte	!0x2e			spreadsheet
1054>>>>66	string	.template		template
1055>>>62	string	draw			Draw
1056>>>>66	byte	!0x2e			document
1057>>>>66	string	.template		template
1058>>>62	string	impress			Impress
1059>>>>69	byte	!0x2e			presentation
1060>>>>69	string	.template		template
1061>>>62	string	math			Math document
1062>>>62	string	base			Database file
1063
1064#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
1065#    http://lists.oasis-open.org/archives/office/200505/msg00006.html
1066#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
1067>>50	string	vnd.oasis.opendocument.	OpenDocument
1068>>>73	string	text
1069>>>>77	byte	!0x2d			Text
1070!:mime	application/vnd.oasis.opendocument.text
1071>>>>77	string	-template		Text Template
1072!:mime	application/vnd.oasis.opendocument.text-template
1073>>>>77	string	-web			HTML Document Template
1074!:mime	application/vnd.oasis.opendocument.text-web
1075>>>>77	string	-master			Master Document
1076!:mime	application/vnd.oasis.opendocument.text-master
1077>>>73	string	graphics
1078>>>>81	byte	!0x2d			Drawing
1079!:mime	application/vnd.oasis.opendocument.graphics
1080>>>>81	string	-template		Template
1081!:mime	application/vnd.oasis.opendocument.graphics-template
1082>>>73	string	presentation
1083>>>>85	byte	!0x2d			Presentation
1084!:mime	application/vnd.oasis.opendocument.presentation
1085>>>>85	string	-template		Template
1086!:mime	application/vnd.oasis.opendocument.presentation-template
1087>>>73	string	spreadsheet
1088>>>>84	byte	!0x2d			Spreadsheet
1089!:mime	application/vnd.oasis.opendocument.spreadsheet
1090>>>>84	string	-template		Template
1091!:mime	application/vnd.oasis.opendocument.spreadsheet-template
1092>>>73	string	chart
1093>>>>78	byte	!0x2d			Chart
1094!:mime	application/vnd.oasis.opendocument.chart
1095>>>>78	string	-template		Template
1096!:mime	application/vnd.oasis.opendocument.chart-template
1097>>>73	string	formula
1098>>>>80	byte	!0x2d			Formula
1099!:mime	application/vnd.oasis.opendocument.formula
1100>>>>80	string	-template		Template
1101!:mime	application/vnd.oasis.opendocument.formula-template
1102>>>73	string	database		Database
1103!:mime	application/vnd.oasis.opendocument.database
1104# Valid for LibreOffice Base 6.0.1.1 at least
1105>>>73	string	base 			Database
1106!:mime	application/vnd.oasis.opendocument.base
1107>>>73	string	image
1108>>>>78	byte	!0x2d			Image
1109!:mime	application/vnd.oasis.opendocument.image
1110>>>>78	string	-template		Template
1111!:mime	application/vnd.oasis.opendocument.image-template
1112
1113#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
1114#    http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
1115#    From: Ralf Brown <ralf.brown@gmail.com>
1116>>50	string	epub+zip	EPUB document
1117!:mime application/epub+zip
1118
1119#  Catch other ZIP-with-mimetype formats
1120#	In a ZIP file, the bytes immediately after a member's contents are
1121#	always "PK". The 2 regex rules here print the "mimetype" member's
1122#	contents up to the first 'P'. Luckily, most MIME types don't contain
1123#	any capital 'P's. This is a kludge.
1124#    (mimetype contains "application/<OTHER>")
1125>>50		string	!epub+zip
1126>>>50		string	!vnd.oasis.opendocument.
1127>>>>50		string	!vnd.sun.xml.
1128>>>>>50		string	!vnd.kde.
1129>>>>>>38	regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
1130!:mime	application/zip
1131#    (mimetype contents other than "application/*")
1132>26		string	\x8\0\0\0mimetype
1133>>38		string	!application/
1134>>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
1135!:mime	application/zip
1136
1137# Java Jar files
1138>(26.s+30)	leshort	0xcafe		Java archive data (JAR)
1139!:mime	application/java-archive
1140
1141# iOS App
1142>(26.s+30)	leshort	!0xcafe
1143>>26		string	!\x8\0\0\0mimetype
1144>>>30		string	Payload/
1145>>>>38		search/64       .app/   iOS App
1146!:mime application/x-ios-app
1147
1148
1149# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
1150#   Next line excludes specialized formats:
1151>(26.s+30)	leshort	!0xcafe
1152>>26    string          !\x8\0\0\0mimetype	Zip archive data
1153!:mime	application/zip
1154>>>4	beshort		x			\b, at least
1155>>>4	use		zipversion
1156>>>4	beshort		x			to extract
1157>>>0x161	string		WINZIP		\b, WinZIP self-extracting
1158
1159# StarView Metafile
1160# From Pierre Ducroquet <pinaraf@pinaraf.info>
11610	string	VCLMTF	StarView MetaFile
1162>6	beshort	x	\b, version %d
1163>8	belong	x	\b, size %d
1164
1165# Zoo archiver
116620	lelong		0xfdc4a7dc	Zoo archive data
1167!:mime	application/x-zoo
1168>4	byte		>48		\b, v%c.
1169>>6	byte		>47		\b%c
1170>>>7	byte		>47		\b%c
1171>32	byte		>0		\b, modify: v%d
1172>>33	byte		x		\b.%d+
1173>42	lelong		0xfdc4a7dc	\b,
1174>>70	byte		>0		extract: v%d
1175>>>71	byte		x		\b.%d+
1176
1177# Shell archives
117810	string		#\ This\ is\ a\ shell\ archive	shell archive text
1179!:mime	application/octet-stream
1180
1181#
1182# LBR. NB: May conflict with the questionable
1183#          "binary Computer Graphics Metafile" format.
1184#
11850       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
1186#
1187# PMA (CP/M derivative of LHA)
1188# Update: Joerg Jenderek
1189# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
1190#
1191#2       string          -pm0-           PMarc archive data [pm0]
11922	string		-pm0-
1193>0	use	lharc-file
1194#2       string          -pm1-           PMarc archive data [pm1]
11952	string		-pm1-
1196>0	use	lharc-file
1197#2       string          -pm2-           PMarc archive data [pm2]
11982	string		-pm2-
1199>0	use	lharc-file
12002       string          -pms-           PMarc SFX archive (CP/M, DOS)
1201#!:mime	application/x-foobar-exec
1202!:ext com
12035       string          -pc1-           PopCom compressed executable (CP/M)
1204#!:mime	application/x-
1205#!:ext com
1206
1207# From Rafael Laboissiere <rafael@laboissiere.net>
1208# The Project Revision Control System (see
1209# http://prcs.sourceforge.net) generates a packaged project
1210# file which is recognized by the following entry:
12110	leshort		0xeb81	PRCS packaged project
1212
1213# Microsoft cabinets
1214# by David Necas (Yeti) <yeti@physics.muni.cz>
1215#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
1216#>25	byte	x		v%d
1217#>24	byte	x		\b.%d
1218# MPi: All CABs have version 1.3, so this is pointless.
1219# Better magic in debian-additions.
1220
1221# GTKtalog catalogs
1222# by David Necas (Yeti) <yeti@physics.muni.cz>
12234	string	gtktalog\ 	GTKtalog catalog data,
1224>13	string	3		version 3
1225>>14	beshort	0x677a		(gzipped)
1226>>14	beshort	!0x677a		(not gzipped)
1227>13	string	>3		version %s
1228
1229############################################################################
1230# Parity archive reconstruction file, the 'par' file format now used on Usenet.
12310       string          PAR\0	PARity archive data
1232>48	leshort		=0	- Index file
1233>48	leshort		>0	- file number %d
1234
1235# Felix von Leitner <felix-file@fefe.de>
12360	string	d8:announce	BitTorrent file
1237!:mime	application/x-bittorrent
1238# Durval Menezes, <jmgthbfile at durval dot com>
12390	string	d13:announce-list	BitTorrent file
1240!:mime	application/x-bittorrent
1241
1242# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
12430	beshort 0x0e0f		Atari MSA archive data
1244>2	beshort x		\b, %d sectors per track
1245>4	beshort 0		\b, 1 sided
1246>4	beshort 1		\b, 2 sided
1247>6	beshort x		\b, starting track: %d
1248>8	beshort x		\b, ending track: %d
1249
1250# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
12510	string	PK00PK\003\004	Zip archive data
1252!:mime	application/zip
1253!:ext zip/cbz
1254
1255# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
1256# by Stefan `Sec` Zehl <sec@42.org>
12577	string		**ACE**		ACE archive data
1258>15	byte	>0		version %d
1259>16	byte	=0x00		\b, from MS-DOS
1260>16	byte	=0x01		\b, from OS/2
1261>16	byte	=0x02		\b, from Win/32
1262>16	byte	=0x03		\b, from Unix
1263>16	byte	=0x04		\b, from MacOS
1264>16	byte	=0x05		\b, from WinNT
1265>16	byte	=0x06		\b, from Primos
1266>16	byte	=0x07		\b, from AppleGS
1267>16	byte	=0x08		\b, from Atari
1268>16	byte	=0x09		\b, from Vax/VMS
1269>16	byte	=0x0A		\b, from Amiga
1270>16	byte	=0x0B		\b, from Next
1271>14	byte	x		\b, version %d to extract
1272>5	leshort &0x0080		\b, multiple volumes,
1273>>17	byte	x		\b (part %d),
1274>5	leshort &0x0002		\b, contains comment
1275>5	leshort	&0x0200		\b, sfx
1276>5	leshort	&0x0400		\b, small dictionary
1277>5	leshort	&0x0800		\b, multi-volume
1278>5	leshort	&0x1000		\b, contains AV-String
1279>>30	string	\x16*UNREGISTERED\x20VERSION*	(unregistered)
1280>5	leshort &0x2000		\b, with recovery record
1281>5	leshort &0x4000		\b, locked
1282>5	leshort &0x8000		\b, solid
1283# Date in MS-DOS format (whatever that is)
1284#>18	lelong	x		Created on
1285
1286# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
1287# <doj@cubic.org>
12880x1A	string	sfArk		sfArk compressed Soundfont
1289>0x15	string	2
1290>>0x1	string	>\0		Version %s
1291>>0x2A	string	>\0		: %s
1292
1293# DR-DOS 7.03 Packed File *.??_
12940	string	Packed\ File\ 	Personal NetWare Packed File
1295>12	string	x		\b, was "%.12s"
1296
1297# EET archive
1298# From: Tilman Sauerbeck <tilman@code-monkey.de>
12990	belong	0x1ee7ff00	EET archive
1300!:mime	application/x-eet
1301
1302# rzip archives
13030	string	RZIP		rzip compressed data
1304>4	byte	x		- version %d
1305>5	byte	x		\b.%d
1306>6	belong	x		(%d bytes)
1307
1308# From: "Robert Dale" <robdale@gmail.com>
13090	belong	123		dar archive,
1310>4	belong	x		label "%.8x
1311>>8	belong	x		%.8x
1312>>>12	beshort	x		%.4x"
1313>14	byte	0x54		end slice
1314>14	beshort	0x4e4e		multi-part
1315>14	beshort	0x4e53		multi-part, with -S
1316
1317# Symbian installation files
1318#  http://www.thouky.co.uk/software/psifs/sis.html
1319#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
13208	lelong	0x10000419	Symbian installation file
1321!:mime	application/vnd.symbian.install
1322>4	lelong	0x1000006D	(EPOC release 3/4/5)
1323>4	lelong	0x10003A12	(EPOC release 6)
13240	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
1325!:mime	x-epoc/x-sisx-app
1326
1327# From "Nelson A. de Oliveira" <naoliv@gmail.com>
13280	string	MPQ\032		MoPaQ (MPQ) archive
1329
1330# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
1331# .kgb
13320	string KGB_arch		KGB Archiver file
1333>10	string x		with compression level %.1s
1334
1335# xar (eXtensible ARchiver) archive
1336# xar archive format: http://code.google.com/p/xar/
1337# From: "David Remahl" <dremahl@apple.com>
13380	string	xar!		xar archive
1339!:mime	application/x-xar
1340#>4	beshort	x		header size %d
1341>6	beshort	x		version %d,
1342#>8	quad	x		compressed TOC: %d,
1343#>16	quad	x		uncompressed TOC: %d,
1344>24	belong	0		no checksum
1345>24	belong	1		SHA-1 checksum
1346>24	belong	2		MD5 checksum
1347
1348# Type: Parity Archive
1349# From: Daniel van Eeden <daniel_e@dds.nl>
13500	string	PAR2		Parity Archive Volume Set
1351
1352# Bacula volume format. (Volumes always start with a block header.)
1353# URL: http://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
1354# From: Adam Buchbinder <adam.buchbinder@gmail.com>
135512	string	BB02		Bacula volume
1356>20	bedate	x		\b, started %s
1357
1358# ePub is XHTML + XML inside a ZIP archive.  The first member of the
1359#   archive must be an uncompressed file called 'mimetype' with contents
1360#   'application/epub+zip'
1361
1362
1363# From: "Michael Gorny" <mgorny@gentoo.org>
1364# ZPAQ: http://mattmahoney.net/dc/zpaq.html
13650	string	zPQ	ZPAQ stream
1366>3	byte	x	\b, level %d
1367# From: Barry Carter <carter.barry@gmail.com>
1368# http://encode.ru/threads/456-zpaq-updates/page32
13690	string	7kSt	ZPAQ file
1370
1371# BBeB ebook, unencrypted (LRF format)
1372# URL: http://www.sven.de/librie/Librie/LrfFormat
1373# From: Adam Buchbinder <adam.buchbinder@gmail.com>
13740	string	L\0R\0F\0\0\0	BBeB ebook data, unencrypted
1375>8	beshort	x		\b, version %d
1376>36	byte	1		\b, front-to-back
1377>36	byte	16		\b, back-to-front
1378>42	beshort	x		\b, (%dx,
1379>44	beshort	x		%d)
1380
1381# Symantec GHOST image by Joerg Jenderek at May 2014
1382# http://us.norton.com/ghost/
1383# http://www.garykessler.net/library/file_sigs.html
13840		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
1385# *.GHO
1386>2		ubyte&0x08		0x00		\b, first file
1387# *.GHS or *.[0-9] with cns program option
1388>2		ubyte&0x08		0x08		\b, split file
1389# part of split index interesting for *.ghs
1390>>4		ubyte			x		id=0x%x
1391# compression tag minus one equals numeric compression command line switch z[1-9]
1392>3		ubyte			0		\b, no compression
1393>3		ubyte			2		\b, fast compression (Z1)
1394>3		ubyte			3		\b, medium compression (Z2)
1395>3		ubyte			>3
1396>>3		ubyte			<11		\b, compression (Z%d-1)
1397>2		ubyte&0x08		0x00
1398# ~ 30 byte password field only for *.gho
1399>>12		ubequad			!0		\b, password protected
1400>>44		ubyte			!1
1401# 1~Image All, sector-by-sector only for *.gho
1402>>>10		ubyte			1		\b, sector copy
1403# 1~Image Boot track only for *.gho
1404>>>43		ubyte			1		\b, boot track
1405# 1~Image Disc only for *.gho implies Image Boot track and sector copy
1406>>44		ubyte			1		\b, disc sector copy
1407# optional image description only *.gho
1408>>0xff		string			>\0		"%-.254s"
1409# look for DOS sector end sequence
1410>0xE08	search/7776		\x55\xAA
1411>>&-512	indirect		x		\b; contains
1412
1413# Google Chrome extensions
1414# https://developer.chrome.com/extensions/crx
1415# https://developer.chrome.com/extensions/hosting
14160	string	Cr24	Google Chrome extension
1417!:mime	application/x-chrome-extension
1418>4	ulong	x	\b, version %u
1419
1420# SeqBox - Sequenced container
1421# ext: sbx, seqbox
1422# Marco Pontello marcopon@gmail.com
1423# reference: https://github.com/MarcoPon/SeqBox
14240	string	SBx	SeqBox,
1425>3	byte	x	version %d
1426
1427# LyNX archive
142856	string	USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE	 LyNX archive
1429