xref: /freebsd/contrib/file/magic/Magdir/archive (revision 559af1ec16576f9f3e41318d66147f4df4fb8e87)
1#------------------------------------------------------------------------------
2# $File: archive,v 1.119 2018/04/24 23:19:45 christos Exp $
3# archive:  file(1) magic for archive formats (see also "msdos" for self-
4#           extracting compressed archives)
5#
6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c.
8
9# POSIX tar archives
10# URL: https://en.wikipedia.org/wiki/Tar_(computing)
11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current
12# header mainly padded with nul bytes
13500	quad		0
14!:strength /2
15# filename or extended attribute printable strings in range space null til umlaut ue
16>0	ubeshort	>0x1F00
17>>0	ubeshort	<0xFCFD
18# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad
19# at https://sourceforge.net/projects/s-tar/files/testscripts/
20>>>508	ubelong&0x8B9E8DFF	0
21# nul, space or ascii digit 0-7 at start of mode
22>>>>100	ubyte&0xC8	=0
23>>>>>101 ubyte&0xC8	=0
24# nul, space at end of check sum
25>>>>>>155 ubyte&0xDF	=0
26# space or ascii digit 0 at start of check sum
27>>>>>>>148	ubyte&0xEF	=0x20
28>>>>>>>>0	use	tar-file
29#	minimal check and then display tar archive information which can also be
30#	embedded inside others like Android Backup, Clam AntiVirus database
310	name		tar-file
32>257	string		!ustar
33# header padded with nuls
34>>257	ulong		=0
35# GNU tar version 1.29 with non pax format option without refusing
36# creates misleading V7 header for Long path, Multi-volume, Volume type
37>>>156	ubyte		0x4c		GNU tar archive
38!:mime	application/x-gtar
39!:ext	tar/gtar
40>>>156	ubyte		0x4d		GNU tar archive
41!:mime	application/x-gtar
42!:ext	tar/gtar
43>>>156	ubyte		0x56		GNU tar archive
44!:mime	application/x-gtar
45!:ext	tar/gtar
46>>>156	default		x		tar archive (V7)
47!:mime	application/x-tar
48!:ext	tar
49# other stuff in padding
50# some implementations add new fields to the blank area at the end of the header record
51# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option
52>>257	ulong		!0		tar archive (old)
53!:mime	application/x-tar
54!:ext	tar
55# magic in newer, GNU, posix variants
56>257	string		=ustar
57# 2 last char of magic and UStar version because string expression does not work
58# 2 space characters followed by a null for GNU variant
59>>261	ubelong		=0x72202000	POSIX tar archive (GNU)
60!:mime	application/x-gtar
61!:ext	tar/gtar
62# UStar version with ASCII "00"
63>>261	ubelong		0x72003030	POSIX
64# gLOBAL and ExTENSION type only found in POSIX.1-2001 format
65>>>156	ubyte		0x67		\b.1-2001
66>>>156	ubyte		0x78		\b.1-2001
67>>>156	ubyte		x		tar archive
68!:mime	application/x-ustar
69!:ext	tar/ustar
70# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab
71>>261	ubelong		0x72000000	tar archive (ustar)
72!:mime	application/x-ustar
73!:ext	tar/ustar
74# not seen ustar variant with garbish version
75>>261	default		x		tar archive (unknown ustar)
76!:mime	application/x-ustar
77!:ext	tar/ustar
78# type flag of 1st tar archive member
79#>156	ubyte		x		\b, %c-type
80>156	ubyte		x
81>>156	ubyte		0		\b, file
82>>156	ubyte		0x30		\b, file
83>>156	ubyte		0x31		\b, hard link
84>>156	ubyte		0x32		\b, symlink
85>>156	ubyte		0x33		\b, char device
86>>156	ubyte		0x34		\b, block device
87>>156	ubyte		0x35		\b, directory
88>>156	ubyte		0x36		\b, fifo
89>>156	ubyte		0x37		\b, reserved
90>>156	ubyte		0x4c		\b, long path
91>>156	ubyte		0x4d		\b, multi volume
92>>156	ubyte		0x56		\b, volume
93>>156	ubyte		0x67		\b, global
94>>156	ubyte		0x78		\b, extension
95>>156	default		x		\b, type
96>>>156	ubyte		x		'%c'
97# name[100]
98>0	string		>\0		%-.60s
99# mode mainly stored as an octal number in ASCII null or space terminated
100>100	string		>\0		\b, mode %-.7s
101# user id mainly as octal numbers in ASCII null or space terminated
102>108	string		>\0		\b, uid %-.7s
103# group id mainly as octal numbers in ASCII null or space terminated
104>116	string		>\0		\b, gid %-.7s
105# size mainly as octal number in ASCII
106>124	ubyte		<0x38
107>>124	string		>\0		\b, size %-.12s
108# coding indicated by setting the high-order bit of the leftmost byte
109>124	ubyte		>0xEF		\b, size 0x
110>>124	ubyte		!0xff		\b%2.2x
111>>125	ubyte		!0xff		\b%2.2x
112>>126	ubyte		!0xff		\b%2.2x
113>>127	ubyte		!0xff		\b%2.2x
114>>128	ubyte		!0xff		\b%2.2x
115>>129	ubyte		!0xff		\b%2.2x
116>>130	ubyte		!0xff		\b%2.2x
117>>131	ubyte		!0xff		\b%2.2x
118>>132	ubyte		!0xff		\b%2.2x
119>>133	ubyte		!0xff		\b%2.2x
120>>134	ubyte		!0xff		\b%2.2x
121>>135	ubyte		!0xff		\b%2.2x
122# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated
123>136	string		>\0		\b, seconds %-.11s
124# header checksum stored as an octal number in ASCII null or space terminated
125#>148	string		x		\b, cksum %.7s
126# linkname[100]
127>157	string		>\0		\b, linkname %-.40s
128# additional fields for ustar
129>257	string		=ustar
130# owner user name null terminated
131>>265	string		>\0		\b, user %-.32s
132# group name null terminated
133>>297	string		>\0		\b, group %-.32s
134# device major minor if not zero
135>>329	ubequad&0xCFCFCFCFcFcFcFdf	!0
136>>>329	string		x		\b, devmaj %-.7s
137>>337	ubequad&0xCFCFCFCFcFcFcFdf	!0
138>>>337	string		x		\b, devmin %-.7s
139# prefix[155]
140>>345	string		>\0		\b, prefix %-.155s
141# old non ustar/POSIX tar
142>257	string		!ustar
143>>508	string		=tar\0
144# padding[255] in old star
145>>>257	string		>\0		\b, padding: %-.40s
146>>508	default		x
147# padding[255] in old tar sometimes comment field
148>>>257	string		>\0		\b, comment: %-.40s
149
150# Incremental snapshot gnu-tar format from:
151# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
1520	string		GNU\ tar-	GNU tar incremental snapshot data
153>&0	regex		[0-9]\.[0-9]+-[0-9]+	version %s
154
155# cpio archives
156#
157# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
158# The idea is to indicate archives produced on machines with the same
159# byte order as the machine running "file" with "cpio archive", and
160# to indicate archives produced on machines with the opposite byte order
161# from the machine running "file" with "byte-swapped cpio archive".
162#
163# The SVR4 "cpio(4)" hints that there are additional formats, but they
164# are defined as "short"s; I think all the new formats are
165# character-header formats and thus are strings, not numbers.
1660	short		070707		cpio archive
167!:mime	application/x-cpio
1680	short		0143561		byte-swapped cpio archive
169!:mime	application/x-cpio # encoding: swapped
1700	string		070707		ASCII cpio archive (pre-SVR4 or odc)
1710	string		070701		ASCII cpio archive (SVR4 with no CRC)
1720	string		070702		ASCII cpio archive (SVR4 with CRC)
173
174#
175# Various archive formats used by various versions of the "ar"
176# command.
177#
178
179#
180# Original UNIX archive formats.
181# They were written with binary values in host byte order, and
182# the magic number was a host "int", which might have been 16 bits
183# or 32 bits.  We don't say "PDP-11" or "VAX", as there might have
184# been ports to little-endian 16-bit-int or 32-bit-int platforms
185# (x86?) using some of those formats; if none existed, feel free
186# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
187# 32-bit.  There might have been big-endian ports of that sort as
188# well.
189#
1900	leshort		0177555		very old 16-bit-int little-endian archive
1910	beshort		0177555		very old 16-bit-int big-endian archive
1920	lelong		0177555		very old 32-bit-int little-endian archive
1930	belong		0177555		very old 32-bit-int big-endian archive
194
1950	leshort		0177545		old 16-bit-int little-endian archive
196>2	string		__.SYMDEF	random library
1970	beshort		0177545		old 16-bit-int big-endian archive
198>2	string		__.SYMDEF	random library
1990	lelong		0177545		old 32-bit-int little-endian archive
200>4	string		__.SYMDEF	random library
2010	belong		0177545		old 32-bit-int big-endian archive
202>4	string		__.SYMDEF	random library
203
204#
205# From "pdp" (but why a 4-byte quantity?)
206#
2070	lelong		0x39bed		PDP-11 old archive
2080	lelong		0x39bee		PDP-11 4.0 archive
209
210#
211# XXX - what flavor of APL used this, and was it a variant of
212# some ar archive format?  It's similar to, but not the same
213# as, the APL workspace magic numbers in pdp.
214#
2150	long		0100554		apl workspace
216
217#
218# System V Release 1 portable(?) archive format.
219#
2200	string		=<ar>		System V Release 1 ar archive
221!:mime	application/x-archive
222
223#
224# Debian package; it's in the portable archive format, and needs to go
225# before the entry for regular portable archives, as it's recognized as
226# a portable archive whose first member has a name beginning with
227# "debian".
228#
2290	string		=!<arch>\ndebian
230>8	string		debian-split	part of multipart Debian package
231!:mime	application/vnd.debian.binary-package
232>8	string		debian-binary	Debian binary package
233!:mime	application/vnd.debian.binary-package
234>8	string		!debian
235>68	string		>\0		(format %s)
236# These next two lines do not work, because a bzip2 Debian archive
237# still uses gzip for the control.tar (first in the archive).  Only
238# data.tar varies, and the location of its filename varies too.
239# file/libmagic does not current have support for ascii-string based
240# (offsets) as of 2005-09-15.
241#>81	string		bz2		\b, uses bzip2 compression
242#>84	string		gz		\b, uses gzip compression
243#>136	ledate		x		created: %s
244
245#
246# MIPS archive; they're in the portable archive format, and need to go
247# before the entry for regular portable archives, as it's recognized as
248# a portable archive whose first member has a name beginning with
249# "__________E".
250#
2510	string	=!<arch>\n__________E	MIPS archive
252!:mime	application/x-archive
253>20	string	U			with MIPS Ucode members
254>21	string	L			with MIPSEL members
255>21	string	B			with MIPSEB members
256>19	string	L			and an EL hash table
257>19	string	B			and an EB hash table
258>22	string	X			-- out of date
259
2600	search/1	-h-		Software Tools format archive text
261
262#
263# BSD/SVR2-and-later portable archive formats.
264#
2650	string		=!<arch>\n		current ar archive
266!:mime	application/x-archive
267>8	string		__.SYMDEF	random library
268>68	string		__.SYMDEF\ SORTED	random library
269
270#
271# "Thin" archive, as can be produced by GNU ar.
272#
2730	string		=!<thin>\n	thin archive with
274>68	belong		0		no symbol entries
275>68	belong		1		%d symbol entry
276>68	belong		>1		%d symbol entries
277
278# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
279#
280# The first byte is the magic (0x1a), byte 2 is the compression type for
281# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
282# filename of the first file (null terminated).  Since some types collide
283# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
284# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
2850	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
286!:mime	application/x-arc
2870	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
288!:mime	application/x-arc
2890	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
290!:mime	application/x-arc
2910	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
292!:mime	application/x-arc
2930	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
294!:mime	application/x-arc
2950	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
296!:mime	application/x-arc
297# [JW] stuff taken from idarc, obviously ARC successors:
2980	lelong&0x8080ffff	0x00000a1a	PAK archive data
299!:mime	application/x-arc
3000	lelong&0x8080ffff	0x0000141a	ARC+ archive data
301!:mime	application/x-arc
3020	lelong&0x8080ffff	0x0000481a	HYP archive data
303!:mime	application/x-arc
304
305# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
306# I can't create either SPARK or ArcFS archives so I have not tested this stuff
307# [GRR:  the original entries collide with ARC, above; replaced with combined
308#  version (not tested)]
309#0	byte		0x1a		RISC OS archive (spark format)
3100	string		\032archive	RISC OS archive (ArcFS format)
3110       string          Archive\000     RISC OS archive (ArcFS format)
312
313# All these were taken from idarc, many could not be verified. Unfortunately,
314# there were many low-quality sigs, i.e. easy to trigger false positives.
315# Please notify me of any real-world fishy/ambiguous signatures and I'll try
316# to get my hands on the actual archiver and see if I find something better. [JW]
317# probably many can be enhanced by finding some 0-byte or control char near the start
318
319# idarc calls this Crush/Uncompressed... *shrug*
3200	string	CRUSH Crush archive data
321# Squeeze It (.sqz)
3220	string	HLSQZ Squeeze It archive data
323# SQWEZ
3240	string	SQWEZ SQWEZ archive data
325# HPack (.hpk)
3260	string	HPAK HPack archive data
327# HAP
3280	string	\x91\x33HF HAP archive data
329# MD/MDCD
3300	string	MDmd MDCD archive data
331# LIM
3320	string	LIM\x1a LIM archive data
333# SAR
3343	string	LH5 SAR archive data
335# BSArc/BS2
3360	string	\212\3SB\020\0	BSArc/BS2 archive data
337# Bethesda Softworks Archive (Oblivion)
3380	string	BSA\0 		BSArc archive data
339>4	lelong	x		version %d
340# MAR
3412	string	=-ah MAR archive data
342# ACB
343#0	belong&0x00f800ff	0x00800000 ACB archive data
344# CPZ
345# TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
346# JRC
3470	string	JRchive JRC archive data
348# Quantum
3490	string	DS\0 Quantum archive data
350# ReSOF
3510	string	PK\3\6 ReSOF archive data
352# QuArk
3530	string	7\4 QuArk archive data
354# YAC
35514	string	YC YAC archive data
356# X1
3570	string	X1 X1 archive data
3580	string	XhDr X1 archive data
359# CDC Codec (.dqt)
3600	belong&0xffffe000	0x76ff2000 CDC Codec archive data
361# AMGC
3620	string	\xad6" AMGC archive data
363# NuLIB
3640	string	N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
365# PakLeo
3660	string	LEOLZW PAKLeo archive data
367# ChArc
3680	string	SChF ChArc archive data
369# PSA
3700	string	PSA PSA archive data
371# CrossePAC
3720	string	DSIGDCC CrossePAC archive data
373# Freeze
3740	string	\x1f\x9f\x4a\x10\x0a Freeze archive data
375# KBoom
3760	string	\xc2\xa8MP\xc2\xa8 KBoom archive data
377# NSQ, must go after CDC Codec
3780	string	\x76\xff NSQ archive data
379# DPA
3800	string	Dirk\ Paehl DPA archive data
381# BA
382# TODO: idarc says "bytes 0-2 == bytes 3-5"
383# TTComp
384# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive
385# Update: Joerg Jenderek
386# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others
3870	string	\0\6
388# look for first keyword of Panorama database *.pan
389>12	search/261	DESIGN
390# skip keyword with low entropy
391>12	default		x	TTComp archive, binary, 4K dictionary
392# (version 5.25) labeled the above entry as "TTComp archive data"
393# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
3940	string	ESP ESP archive data
395# ZPack
3960	string	\1ZPK\1 ZPack archive data
397# Sky
3980	string	\xbc\x40 Sky archive data
399# UFA
4000	string	UFA UFA archive data
401# Dry
4020	string	=-H2O DRY archive data
403# FoxSQZ
4040	string	FOXSQZ FoxSQZ archive data
405# AR7
4060	string	,AR7 AR7 archive data
407# PPMZ
4080	string	PPMZ PPMZ archive data
409# MS Compress
410# Update: Joerg Jenderek
411# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression
412# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html
413# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z
4144	string	\x88\xf0\x27
415#		KWAJ variant
416>0	string	KWAJ		MS Compress archive data, KWAJ variant
417!:mime	application/x-ms-compress-kwaj
418# extension not working in version 5.32
419# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?'
420# file: line 284: Bad magic entry '   ??_'
421!:ext	??_
422# compression method (0-4)
423>>8	uleshort	x	\b, %u method
424# offset of compressed data
425>>10	uleshort	x	\b, 0x%x offset
426#>>(10.s)	uleshort	x
427#>>>&-6		string	x	\b, TEST extension %-.3s
428# header flags to mark header extensions
429>>12	uleshort	>0	\b, 0x%x flags
430# 4 bytes: decompressed length of file
431>>12	uleshort	&0x01
432>>>14	ulelong		x	\b, original size: %u bytes
433# 2 bytes: unknown purpose
434# 2 bytes: length of unknown data + mentioned bytes
435# 1-9 bytes: null-terminated file name
436# 1-4 bytes: null-terminated file extension
437>>12	uleshort	&0x08
438>>>12	uleshort				^0x01
439>>>>12		uleshort			^0x02
440>>>>>12			uleshort		^0x04
441>>>>>>12			uleshort	^0x10
442>>>>>>>14				string	x	\b, %-.8s
443>>>>>>12			uleshort	&0x10
444>>>>>>>14				string	x	\b, %-.8s
445>>>>>>>>&1				string	x	\b.%-.3s
446>>>>>12			uleshort		&0x04
447>>>>>>12			uleshort	^0x10
448>>>>>>>(14.s)			uleshort	x
449>>>>>>>>&14				string	x	\b, %-.8s
450>>>>>>12			uleshort	&0x10
451>>>>>>>(14.s)			uleshort	x
452>>>>>>>>&14				string	x	\b, %-.8s
453>>>>>>>>>&1				string	x	\b.%-.3s
454>>>>12		uleshort			&0x02
455>>>>>12			uleshort		^0x04
456>>>>>>12			uleshort	^0x10
457>>>>>>>16				string	x	\b, %-.8s
458>>>>>>12			uleshort	&0x10
459>>>>>>>16				string	x	\b, %-.8s
460>>>>>>>>&1				string	x	\b.%-.3s
461>>>>>12			uleshort		&0x04
462>>>>>>12			uleshort	^0x10
463>>>>>>>(16.s)			uleshort	x
464>>>>>>>>&16				string	x	\b, %-.8s
465>>>>>>12			uleshort	&0x10
466>>>>>>>(16.s)			uleshort	x
467>>>>>>>&16				string	x	%-.8s
468>>>>>>>>&1				string	x	\b.%-.3s
469>>>12	uleshort				&0x01
470>>>>12		uleshort			^0x02
471>>>>>12			uleshort		^0x04
472>>>>>>12			uleshort	^0x10
473>>>>>>>18				string	x	\b, %-.8s
474>>>>>>12			uleshort	&0x10
475>>>>>>>18				string	x	\b, %-.8s
476>>>>>>>>&1				string	x	\b.%-.3s
477>>>>>12			uleshort		&0x04
478>>>>>>12			uleshort	^0x10
479>>>>>>>(18.s)			uleshort	x
480>>>>>>>>&18				string	x	\b, %-.8s
481>>>>>>12			uleshort	&0x10
482>>>>>>>(18.s)			uleshort	x
483>>>>>>>>&18				string	x	\b, %-.8s
484>>>>>>>>>&1				string	x	\b.%-.3s
485>>>>12		uleshort			&0x02
486>>>>>12			uleshort		^0x04
487>>>>>>12			uleshort	^0x10
488>>>>>>>20				string	x	\b, %-.8s
489>>>>>>12			uleshort	&0x10
490>>>>>>>20				string	x	\b, %-.8s
491>>>>>>>>&1				string	x	\b.%-.3s
492>>>>>12			uleshort		&0x04
493>>>>>>12			uleshort	^0x10
494>>>>>>>(20.s)			uleshort	x
495>>>>>>>>&20				string	x	\b, %-.8s
496>>>>>>12			uleshort	&0x10
497>>>>>>>(20.s)			uleshort	x
498>>>>>>>>&20				string	x	\b, %-.8s
499>>>>>>>>>&1				string	x	\b.%-.3s
500# 2 bytes: length of data + mentioned bytes
501#
502#		SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ
503>0	string	SZDD		MS Compress archive data, SZDD variant
504!:mime	application/x-ms-compress-szdd
505!:ext	??_
506# The character missing from the end of the filename (0=unknown)
507>>9	string	>\0		\b, %-.1s is last character of original name
508# https://www.betaarchive.com/forum/viewtopic.php?t=26161
509# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e
510>>8	string	!A		\b, %-.1s method
511>>10	ulelong	>0		\b, original size: %u bytes
512#		QBasic SZDD variant
5133	string	\x88\xf0\x27
514>0	string	SZ\x20		MS Compress archive data, QBasic variant
515!:mime	application/x-ms-compress-sz
516!:ext	??$
517>>8	ulelong	>0		\b, original size: %u bytes
518
519# MP3 (archiver, not lossy audio compression)
5200	string	MP3\x1a MP3-Archiver archive data
521# ZET
5220	string	OZ\xc3\x9d ZET archive data
523# TSComp
5240	string	\x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
525# ARQ
5260	string	gW\4\1 ARQ archive data
527# Squash
5283	string	OctSqu Squash archive data
529# Terse
5300	string	\5\1\1\0 Terse archive data
531# PUCrunch
5320	string	\x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
533# UHarc
5340	string	UHA UHarc archive data
535# ABComp
5360	string	\2AB ABComp archive data
5370	string	\3AB2 ABComp archive data
538# CMP
5390	string	CO\0 CMP archive data
540# Splint
5410	string	\x93\xb9\x06 Splint archive data
542# InstallShield
5430	string	\x13\x5d\x65\x8c InstallShield Z archive Data
544# Gather
5451	string	GTH Gather archive data
546# BOA
5470	string	BOA BOA archive data
548# RAX
5490	string	ULEB\xa RAX archive data
550# Xtreme
5510	string	ULEB\0 Xtreme archive data
552# Pack Magic
5530	string	@\xc3\xa2\1\0 Pack Magic archive data
554# BTS
5550	belong&0xfeffffff	0x1a034465 BTS archive data
556# ELI 5750
5570	string	Ora\  ELI 5750 archive data
558# QFC
5590	string	\x1aFC\x1a QFC archive data
5600	string	\x1aQF\x1a QFC archive data
561# PRO-PACK
5620	string	RNC PRO-PACK archive data
563# 777
5640	string	777 777 archive data
565# LZS221
5660	string	sTaC LZS221 archive data
567# HPA
5680	string	HPA HPA archive data
569# Arhangel
5700	string	LG Arhangel archive data
571# EXP1, uses bzip2
5720	string	0123456789012345BZh EXP1 archive data
573# IMP
5740	string	IMP\xa IMP archive data
575# NRV
5760	string	\x00\x9E\x6E\x72\x76\xFF NRV archive data
577# Squish
5780	string	\x73\xb2\x90\xf4 Squish archive data
579# Par
5800	string	PHILIPP Par archive data
5810	string	PAR Par archive data
582# HIT
5830	string	UB HIT archive data
584# SBX
5850	belong&0xfffff000	0x53423000 SBX archive data
586# NaShrink
5870	string	NSK NaShrink archive data
588# SAPCAR
5890	string	#\ CAR\ archive\ header SAPCAR archive data
5900	string	CAR\ 2.00RG SAPCAR archive data
591# Disintegrator
5920	string	DST Disintegrator archive data
593# ASD
5940	string	ASD ASD archive data
595# InstallShield CAB
5960	string	ISc( InstallShield CAB
597# TOP4
5980	string	T4\x1a TOP4 archive data
599# BatComp left out: sig looks like COM executable
600# so TODO: get real 4dos batcomp file and find sig
601# BlakHole
6020	string	BH\5\7 BlakHole archive data
603# BIX
6040	string	BIX0 BIX archive data
605# ChiefLZA
6060	string	ChfLZ ChiefLZA archive data
607# Blink
6080	string	Blink Blink archive data
609# Logitech Compress
6100	string	\xda\xfa Logitech Compress archive data
611# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
6121	string	(C)\ STEPANYUK ARS-Sfx archive data
613# AKT/AKT32
6140	string	AKT32 AKT32 archive data
6150	string	AKT AKT archive data
616# NPack
6170	string	MSTSM NPack archive data
618# PFT
6190	string	\0\x50\0\x14 PFT archive data
620# SemOne
6210	string	SEM SemOne archive data
622# PPMD
6230	string	\x8f\xaf\xac\x84 PPMD archive data
624# FIZ
6250	string	FIZ FIZ archive data
626# MSXiE
6270	belong&0xfffff0f0	0x4d530000 MSXiE archive data
628# DeepFreezer
6290	belong&0xfffffff0	0x797a3030 DeepFreezer archive data
630# DC
6310	string	=<DC- DC archive data
632# TPac
6330	string	\4TPAC\3 TPac archive data
634# Ai
6350	string	Ai\1\1\0 Ai archive data
6360	string	Ai\1\0\0 Ai archive data
637# Ai32
6380	string	Ai\2\0 Ai32 archive data
6390	string	Ai\2\1 Ai32 archive data
640# SBC
6410	string	SBC SBC archive data
642# Ybs
6430	string	YBS Ybs archive data
644# DitPack
6450	string	\x9e\0\0 DitPack archive data
646# DMS
6470	string	DMS! DMS archive data
648# EPC
6490	string	\x8f\xaf\xac\x8c EPC archive data
650# VSARC
6510	string	VS\x1a VSARC archive data
652# PDZ
6530	string	PDZ PDZ archive data
654# ReDuq
6550	string	rdqx ReDuq archive data
656# GCA
6570	string	GCAX GCA archive data
658# PPMN
6590	string	pN PPMN archive data
660# WinImage
6613	string	WINIMAGE WinImage archive data
662# Compressia
6630	string	CMP0CMP Compressia archive data
664# UHBC
6650	string	UHB UHBC archive data
666# WinHKI
6670	string	\x61\x5C\x04\x05 WinHKI archive data
668# WWPack data file
6690	string	WWP WWPack archive data
670# BSN (BSA, PTS-DOS)
6710	string	\xffBSG BSN archive data
6721	string	\xffBSG BSN archive data
6733	string	\xffBSG BSN archive data
6741	string	\0\xae\2 BSN archive data
6751	string	\0\xae\3 BSN archive data
6761	string	\0\xae\7 BSN archive data
677# AIN
6780	string	\x33\x18 AIN archive data
6790	string	\x33\x17 AIN archive data
680# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
681# SZip (TODO: doesn't catch all versions)
6820	string	SZ\x0a\4 SZip archive data
683# XPack DiskImage
684# *.XDI updated by Joerg Jenderek Sep 2015
685# ftp://ftp.sac.sk/pub/sac/pack/0index.txt
686# GRR: this test is still too general as it catches also text files starting with jm
6870	string	jm
688# only found examples with this additional characteristic 2 bytes
689>2	string	\x2\x4	Xpack DiskImage archive data
690#!:ext xdi
691# XPack Data
692# *.xpa updated by Joerg Jenderek Sep 2015
693# ftp://ftp.elf.stuba.sk/pub/pc/pack/
6940	string	xpa	XPA
695!:ext	xpa
696# XPA32
697# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
698# created by XPA32.EXE version 1.0.2 for Windows
699>0	string	xpa\0\1 \b32 archive data
700# created by XPACK.COM version 1.67m or 1.67r with short 0x1800
701>3	ubeshort	!0x0001	\bck archive data
702# XPack Single Data
703# changed by Joerg Jenderek Sep 2015 back to like in version 5.12
704# letter 'I'+ acute accent is equivalent to \xcd
7050	string	\xcd\ jm	Xpack single archive data
706#!:mime	application/x-xpa-compressed
707!:ext xpa
708
709# TODO: missing due to unknown magic/magic at end of file:
710#DWC
711#ARG
712#ZAR
713#PC/3270
714#InstallIt
715#RKive
716#RK
717#XPack Diskimage
718
719# These were inspired by idarc, but actually verified
720# Dzip archiver (.dz)
721# Update: Joerg Jenderek
722# URL: http://speeddemosarchive.com/dzip/
723# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c
724# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt
7250	string	DZ
726# latest version is 2.9 dated 7 may 2003
727>2	byte	<4 Dzip archive data
728!:mime	application/x-dzip
729!:ext	dz
730>>2	byte	x \b, version %i
731>>3	byte	x \b.%i
732>>4	ulelong	x \b, offset 0x%x
733>>8	ulelong	x \b, %u files
734# ZZip archiver (.zz)
7350	string	ZZ\ \0\0 ZZip archive data
7360	string	ZZ0 ZZip archive data
737# PAQ archiver (.paq)
7380	string	\xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
7390	string	PAQ PAQ archive data
740>3	byte&0xf0	0x30
741>>3	byte	x (v%c)
742# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
7430xe	string	\x1aJar\x1b JAR (ARJ Software, Inc.) archive data
7440	string	JARCS JAR (ARJ Software, Inc.) archive data
745
746# ARJ archiver (jason@jarthur.Claremont.EDU)
7470	leshort		0xea60		ARJ archive data
748!:mime	application/x-arj
749>5	byte		x		\b, v%d,
750>8	byte		&0x04		multi-volume,
751>8	byte		&0x10		slash-switched,
752>8	byte		&0x20		backup,
753>34	string		x		original name: %s,
754>7	byte		0		os: MS-DOS
755>7	byte		1		os: PRIMOS
756>7	byte		2		os: Unix
757>7	byte		3		os: Amiga
758>7	byte		4		os: Macintosh
759>7	byte		5		os: OS/2
760>7	byte		6		os: Apple ][ GS
761>7	byte		7		os: Atari ST
762>7	byte		8		os: NeXT
763>7	byte		9		os: VAX/VMS
764>3	byte		>0		%d]
765# [JW] idarc says this is also possible
7662	leshort		0xea60		ARJ archive data
767
768# HA archiver (Greg Roelofs, newt@uchicago.edu)
769# This is a really bad format. A file containing HAWAII will match this...
770#0	string		HA		HA archive data,
771#>2	leshort		=1		1 file,
772#>2	leshort		>1		%hu files,
773#>4	byte&0x0f	=0		first is type CPY
774#>4	byte&0x0f	=1		first is type ASC
775#>4	byte&0x0f	=2		first is type HSC
776#>4	byte&0x0f	=0x0e		first is type DIR
777#>4	byte&0x0f	=0x0f		first is type SPECIAL
778# suggestion: at least identify small archives (<1024 files)
7790  belong&0xffff00fc 0x48410000 HA archive data
780>2	leshort		=1		1 file,
781>2	leshort		>1		%u files,
782>4	byte&0x0f	=0		first is type CPY
783>4	byte&0x0f	=1		first is type ASC
784>4	byte&0x0f	=2		first is type HSC
785>4	byte&0x0f	=0x0e		first is type DIR
786>4	byte&0x0f	=0x0f		first is type SPECIAL
787
788# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
7890	string		HPAK		HPACK archive data
790
791# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
7920	string		\351,\001JAM\ 		JAM archive,
793>7	string		>\0			version %.4s
794>0x26	byte		=0x27			-
795>>0x2b	string          >\0			label %.11s,
796>>0x27	lelong		x			serial %08x,
797>>0x36	string		>\0			fstype %.8s
798
799# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
800# Update: Joerg Jenderek
801# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
802# Reference: http://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html
803#
804#	check and display information of lharc (LHa,PMarc) file
8050	name				lharc-file
806# check 1st character of method id like -lz4- -lh5- or -pm2-
807>2	string		-
808# check 5th character of method id
809>>6	string		-
810# check header level 0 1 2 3
811>>>20	ubyte		<4
812# check 2nd, 3th and 4th character of method id
813>>>>3	regex		\^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1)		\b
814!:mime	application/x-lzh-compressed
815# creator type "LHA "
816!:apple	????LHA
817# display archive type name like "LHa/LZS archive data" or "LArc archive"
818>>>>>2	string		-lz		\b
819!:ext	lzs
820# already known  -lzs- -lz4- -lz5- with old names
821>>>>>>2	string	-lzs		LHa/LZS archive data
822>>>>>>3	regex	\^lz[45]	LHarc 1.x archive data
823# missing -lz?- with wikipedia names
824>>>>>>3	regex	\^lz[2378]	LArc archive
825# display archive type name like "LHa (2.x) archive data"
826>>>>>2	string		-lh		\b
827# already known -lh0- -lh1- -lh2- -lh3-  -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names
828>>>>>>3	regex		\^lh[01]	LHarc 1.x/ARX archive data
829# LHice archiver use ".ICE" as name extension instead usual one ".lzh"
830# FOOBAR archiver use ".foo" as name extension instead usual one
831# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment
832>>>>>>>2	string	-lh1		\b
833!:ext lha/lzh/ice
834>>>>>>3	regex		\^lh[23d]	LHa 2.x? archive data
835>>>>>>3	regex		\^lh[7]		LHa (2.x)/LHark archive data
836>>>>>>3	regex		\^lh[456]	LHa (2.x) archive data
837>>>>>>>2	string	-lh5		\b
838# https://en.wikipedia.org/wiki/BIOS
839# Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like
840# bios.rom , kd7_v14.bin, 1010.004, ...
841!:ext lha/lzh/rom/bin
842# missing -lh?- variants (Joe Jared)
843>>>>>>3	regex		\^lh[89a-ce]	LHa (Joe Jared) archive
844# UNLHA32 2.67a
845>>>>>>2	string		-lhx		LHa (UNLHA32) archive
846# lha archives with standard file name extensions ".lha" ".lzh"
847>>>>>>3	regex		!\^(lh1|lh5)	\b
848!:ext lha/lzh
849# this should not happen if all -lh variants are described
850>>>>>>2	default		x		LHa (unknown) archive
851#!:ext	lha
852# PMarc
853>>>>>3	regex		\^pm[012]	PMarc archive data
854!:ext pma
855# append method id without leading and trailing minus character
856>>>>>3	string		x		[%3.3s]
857>>>>>>0	use	lharc-header
858#
859#	check and display information of lharc header
8600	name				lharc-header
861# header size 0x4 , 0x1b-0x61
862>0	ubyte		x
863# compressed data size != compressed file size
864#>7	ulelong		x		\b, data size %d
865# attribute: 0x2~?? 0x10~symlink|target 0x20~normal
866#>19	ubyte		x		\b, 19_0x%x
867# level identifier 0 1 2 3
868#>20	ubyte		x		\b, level %d
869# time stamp
870#>15		ubelong	x		DATE 0x%8.8x
871# OS ID for level 1
872>20	ubyte		1
873# 0x20 types find for *.rom files
874>>(21.b+24)	ubyte	<0x21		\b, 0x%x OS
875# ascii type like M for MSDOS
876>>(21.b+24)	ubyte	>0x20		\b, '%c' OS
877# OS ID for level 2
878>20	ubyte		2
879#>>23	ubyte		x		\b, OS ID 0x%x
880>>23	ubyte		<0x21		\b, 0x%x OS
881>>23	ubyte		>0x20		\b, '%c' OS
882# filename only for level 0 and 1
883>20	ubyte		<2
884# length of filename
885>>21		ubyte	>0		\b, with
886# filename
887>>>21		pstring	x		"%s"
888#
889#2	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
890#!:mime	application/x-lharc
8912	string		-lh0-
892>0	use	lharc-file
893#2	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
894#!:mime	application/x-lharc
8952	string		-lh1-
896>0	use	lharc-file
897# NEW -lz2- ... -lz8-
8982	string		-lz2-
899>0	use	lharc-file
9002	string		-lz3-
901>0	use	lharc-file
9022	string		-lz4-
903>0	use	lharc-file
9042	string		-lz5-
905>0	use	lharc-file
9062	string		-lz7-
907>0	use	lharc-file
9082	string		-lz8-
909>0	use	lharc-file
910#	[never seen any but the last; -lh4- reported in comp.compression:]
911#2	string		-lzs-		LHa/LZS archive data [lzs]
9122	string		-lzs-
913>0	use	lharc-file
914# According to wikipedia and others such a version does not exist
915#2	string		-lh\40-		LHa 2.x? archive data [lh ]
916#2	string		-lhd-		LHa 2.x? archive data [lhd]
9172	string		-lhd-
918>0	use	lharc-file
919#2	string		-lh2-		LHa 2.x? archive data [lh2]
9202	string		-lh2-
921>0	use	lharc-file
922#2	string		-lh3-		LHa 2.x? archive data [lh3]
9232	string		-lh3-
924>0	use	lharc-file
925#2	string		-lh4-		LHa (2.x) archive data [lh4]
9262	string		-lh4-
927>0	use	lharc-file
928#2	string		-lh5-		LHa (2.x) archive data [lh5]
9292	string		-lh5-
930>0	use	lharc-file
931#2	string		-lh6-		LHa (2.x) archive data [lh6]
9322	string		-lh6-
933>0	use	lharc-file
934#2	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
9352	string		-lh7-
936# !:mime	application/x-lha
937# >20	byte		x		- header level %d
938>0	use	lharc-file
939# NEW -lh8- ... -lhe- , -lhx-
9402	string		-lh8-
941>0	use	lharc-file
9422	string		-lh9-
943>0	use	lharc-file
9442	string		-lha-
945>0	use	lharc-file
9462	string		-lhb-
947>0	use	lharc-file
9482	string		-lhc-
949>0	use	lharc-file
9502	string		-lhe-
951>0	use	lharc-file
9522	string		-lhx-
953>0	use	lharc-file
954# taken from idarc [JW]
9552   string      -lZ         PUT archive data
956# already done by LHarc magics
957# this should never happen if all sub types of LZS archive are identified
958#2   string      -lz         LZS archive data
9592   string      -sw1-       Swag archive data
960
9610	name		rar-file-header
962>24	byte		15		\b, v1.5
963>24	byte		20		\b, v2.0
964>24	byte		29		\b, v4
965>15	byte		0		\b, os: MS-DOS
966>15	byte		1		\b, os: OS/2
967>15	byte		2		\b, os: Win32
968>15	byte		3		\b, os: Unix
969>15	byte		4		\b, os: Mac OS
970>15	byte		5		\b, os: BeOS
971
9720	name		rar-archive-header
973>3	leshort&0x1ff	>0		\b, flags:
974>>3	leshort		&0x01		ArchiveVolume
975>>3	leshort		&0x02		Commented
976>>3	leshort		&0x04		Locked
977>>3	leshort		&0x10		NewVolumeNaming
978>>3	leshort		&0x08		Solid
979>>3	leshort		&0x20		Authenticated
980>>3	leshort		&0x40		RecoveryRecordPresent
981>>3	leshort		&0x80		EncryptedBlockHeader
982>>3	leshort		&0x100		FirstVolume
983
984# RAR (Roshal Archive) archive
9850	string		Rar!\x1a\7\0		RAR archive data
986!:mime	application/x-rar
987!:ext	rar/cbr
988# file header
989>(0xc.l+9)	byte	0x74
990>>(0xc.l+7)	use	rar-file-header
991# subblock seems to share information with file header
992>(0xc.l+9)	byte	0x7a
993>>(0xc.l+7)	use	rar-file-header
994>9		byte	0x73
995>>7		use	rar-archive-header
996
9970	string		Rar!\x1a\7\1\0		RAR archive data, v5
998!:mime	application/x-rar
999!:ext	rar
1000
1001# Very old RAR archive
1002# http://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf
10030	string		RE\x7e\x5e  RAR archive data (<v1.5)
1004!:mime	application/x-rar
1005!:ext	rar/cbr
1006
1007# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
10080	string		SQSH		squished archive data (Acorn RISCOS)
1009
1010# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
1011# [JW] see exe section for self-extracting version
10120	string		UC2\x1a		UC2 archive data
1013
1014# PKZIP multi-volume archive
10150	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
1016!:mime	application/zip
1017!:ext zip/cbz
1018
1019# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
10200	string		PK\005\006	Zip archive data (empty)
1021!:mime application/zip
1022!:ext zip/cbz
10230	string		PK\003\004
1024
1025# Specialised zip formats which start with a member named 'mimetype'
1026# (stored uncompressed, with no 'extra field') containing the file's MIME type.
1027# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
1028#  contents starting with "application/":
1029>26	string		\x8\0\0\0mimetypeapplication/
1030
1031#  KOffice / OpenOffice & StarOffice / OpenDocument formats
1032#    From: Abel Cheung <abel@oaka.org>
1033
1034#   KOffice (1.2 or above) formats
1035#    (mimetype contains "application/vnd.kde.<SUBTYPE>")
1036>>50	string	vnd.kde.		KOffice (>=1.2)
1037>>>58	string	karbon			Karbon document
1038>>>58	string	kchart			KChart document
1039>>>58	string	kformula		KFormula document
1040>>>58	string	kivio			Kivio document
1041>>>58	string	kontour			Kontour document
1042>>>58	string	kpresenter		KPresenter document
1043>>>58	string	kspread			KSpread document
1044>>>58	string	kword			KWord document
1045
1046#   OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
1047#    (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
1048>>50	string	vnd.sun.xml.		OpenOffice.org 1.x
1049>>>62	string	writer			Writer
1050>>>>68	byte	!0x2e			document
1051>>>>68	string	.template		template
1052>>>>68	string	.global			global document
1053>>>62	string	calc			Calc
1054>>>>66	byte	!0x2e			spreadsheet
1055>>>>66	string	.template		template
1056>>>62	string	draw			Draw
1057>>>>66	byte	!0x2e			document
1058>>>>66	string	.template		template
1059>>>62	string	impress			Impress
1060>>>>69	byte	!0x2e			presentation
1061>>>>69	string	.template		template
1062>>>62	string	math			Math document
1063>>>62	string	base			Database file
1064
1065#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
1066#    http://lists.oasis-open.org/archives/office/200505/msg00006.html
1067#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
1068>>50	string	vnd.oasis.opendocument.	OpenDocument
1069>>>73	string	text
1070>>>>77	byte	!0x2d			Text
1071!:mime	application/vnd.oasis.opendocument.text
1072>>>>77	string	-template		Text Template
1073!:mime	application/vnd.oasis.opendocument.text-template
1074>>>>77	string	-web			HTML Document Template
1075!:mime	application/vnd.oasis.opendocument.text-web
1076>>>>77	string	-master			Master Document
1077!:mime	application/vnd.oasis.opendocument.text-master
1078>>>73	string	graphics
1079>>>>81	byte	!0x2d			Drawing
1080!:mime	application/vnd.oasis.opendocument.graphics
1081>>>>81	string	-template		Template
1082!:mime	application/vnd.oasis.opendocument.graphics-template
1083>>>73	string	presentation
1084>>>>85	byte	!0x2d			Presentation
1085!:mime	application/vnd.oasis.opendocument.presentation
1086>>>>85	string	-template		Template
1087!:mime	application/vnd.oasis.opendocument.presentation-template
1088>>>73	string	spreadsheet
1089>>>>84	byte	!0x2d			Spreadsheet
1090!:mime	application/vnd.oasis.opendocument.spreadsheet
1091>>>>84	string	-template		Template
1092!:mime	application/vnd.oasis.opendocument.spreadsheet-template
1093>>>73	string	chart
1094>>>>78	byte	!0x2d			Chart
1095!:mime	application/vnd.oasis.opendocument.chart
1096>>>>78	string	-template		Template
1097!:mime	application/vnd.oasis.opendocument.chart-template
1098>>>73	string	formula
1099>>>>80	byte	!0x2d			Formula
1100!:mime	application/vnd.oasis.opendocument.formula
1101>>>>80	string	-template		Template
1102!:mime	application/vnd.oasis.opendocument.formula-template
1103>>>73	string	database		Database
1104!:mime	application/vnd.oasis.opendocument.database
1105# Valid for LibreOffice Base 6.0.1.1 at least
1106>>>73	string	base 			Database
1107!:mime	application/vnd.oasis.opendocument.base
1108>>>73	string	image
1109>>>>78	byte	!0x2d			Image
1110!:mime	application/vnd.oasis.opendocument.image
1111>>>>78	string	-template		Template
1112!:mime	application/vnd.oasis.opendocument.image-template
1113
1114#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
1115#    http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
1116#    From: Ralf Brown <ralf.brown@gmail.com>
1117>>50	string	epub+zip	EPUB document
1118!:mime application/epub+zip
1119
1120#  Catch other ZIP-with-mimetype formats
1121#	In a ZIP file, the bytes immediately after a member's contents are
1122#	always "PK". The 2 regex rules here print the "mimetype" member's
1123#	contents up to the first 'P'. Luckily, most MIME types don't contain
1124#	any capital 'P's. This is a kludge.
1125#    (mimetype contains "application/<OTHER>")
1126>>50		string	!epub+zip
1127>>>50		string	!vnd.oasis.opendocument.
1128>>>>50		string	!vnd.sun.xml.
1129>>>>>50		string	!vnd.kde.
1130>>>>>>38	regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
1131!:mime	application/zip
1132#    (mimetype contents other than "application/*")
1133>26		string	\x8\0\0\0mimetype
1134>>38		string	!application/
1135>>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
1136!:mime	application/zip
1137
1138# Java Jar files
1139>(26.s+30)	leshort	0xcafe		Java archive data (JAR)
1140!:mime	application/java-archive
1141
1142# iOS App
1143>(26.s+30)	leshort	!0xcafe
1144>>26		string	!\x8\0\0\0mimetype
1145>>>30		string	Payload/
1146>>>>38		search/64       .app/   iOS App
1147!:mime application/x-ios-app
1148
1149
1150# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
1151#   Next line excludes specialized formats:
1152>(26.s+30)	leshort	!0xcafe
1153>>26    string          !\x8\0\0\0mimetype	Zip archive data
1154!:mime	application/zip
1155>>>4	beshort		x			\b, at least
1156>>>4	use		zipversion
1157>>>4	beshort		x			to extract
1158>>>0x161	string		WINZIP		\b, WinZIP self-extracting
1159
1160# StarView Metafile
1161# From Pierre Ducroquet <pinaraf@pinaraf.info>
11620	string	VCLMTF	StarView MetaFile
1163>6	beshort	x	\b, version %d
1164>8	belong	x	\b, size %d
1165
1166# Zoo archiver
116720	lelong		0xfdc4a7dc	Zoo archive data
1168!:mime	application/x-zoo
1169>4	byte		>48		\b, v%c.
1170>>6	byte		>47		\b%c
1171>>>7	byte		>47		\b%c
1172>32	byte		>0		\b, modify: v%d
1173>>33	byte		x		\b.%d+
1174>42	lelong		0xfdc4a7dc	\b,
1175>>70	byte		>0		extract: v%d
1176>>>71	byte		x		\b.%d+
1177
1178# Shell archives
117910	string		#\ This\ is\ a\ shell\ archive	shell archive text
1180!:mime	application/octet-stream
1181
1182#
1183# LBR. NB: May conflict with the questionable
1184#          "binary Computer Graphics Metafile" format.
1185#
11860       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
1187#
1188# PMA (CP/M derivative of LHA)
1189# Update: Joerg Jenderek
1190# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
1191#
1192#2       string          -pm0-           PMarc archive data [pm0]
11932	string		-pm0-
1194>0	use	lharc-file
1195#2       string          -pm1-           PMarc archive data [pm1]
11962	string		-pm1-
1197>0	use	lharc-file
1198#2       string          -pm2-           PMarc archive data [pm2]
11992	string		-pm2-
1200>0	use	lharc-file
12012       string          -pms-           PMarc SFX archive (CP/M, DOS)
1202#!:mime	application/x-foobar-exec
1203!:ext com
12045       string          -pc1-           PopCom compressed executable (CP/M)
1205#!:mime	application/x-
1206#!:ext com
1207
1208# From Rafael Laboissiere <rafael@laboissiere.net>
1209# The Project Revision Control System (see
1210# http://prcs.sourceforge.net) generates a packaged project
1211# file which is recognized by the following entry:
12120	leshort		0xeb81	PRCS packaged project
1213
1214# Microsoft cabinets
1215# by David Necas (Yeti) <yeti@physics.muni.cz>
1216#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
1217#>25	byte	x		v%d
1218#>24	byte	x		\b.%d
1219# MPi: All CABs have version 1.3, so this is pointless.
1220# Better magic in debian-additions.
1221
1222# GTKtalog catalogs
1223# by David Necas (Yeti) <yeti@physics.muni.cz>
12244	string	gtktalog\ 	GTKtalog catalog data,
1225>13	string	3		version 3
1226>>14	beshort	0x677a		(gzipped)
1227>>14	beshort	!0x677a		(not gzipped)
1228>13	string	>3		version %s
1229
1230############################################################################
1231# Parity archive reconstruction file, the 'par' file format now used on Usenet.
12320       string          PAR\0	PARity archive data
1233>48	leshort		=0	- Index file
1234>48	leshort		>0	- file number %d
1235
1236# Felix von Leitner <felix-file@fefe.de>
12370	string	d8:announce	BitTorrent file
1238!:mime	application/x-bittorrent
1239# Durval Menezes, <jmgthbfile at durval dot com>
12400	string	d13:announce-list	BitTorrent file
1241!:mime	application/x-bittorrent
1242
1243# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
12440	beshort 0x0e0f		Atari MSA archive data
1245>2	beshort x		\b, %d sectors per track
1246>4	beshort 0		\b, 1 sided
1247>4	beshort 1		\b, 2 sided
1248>6	beshort x		\b, starting track: %d
1249>8	beshort x		\b, ending track: %d
1250
1251# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
12520	string	PK00PK\003\004	Zip archive data
1253!:mime	application/zip
1254!:ext zip/cbz
1255
1256# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
1257# by Stefan `Sec` Zehl <sec@42.org>
12587	string		**ACE**		ACE archive data
1259>15	byte	>0		version %d
1260>16	byte	=0x00		\b, from MS-DOS
1261>16	byte	=0x01		\b, from OS/2
1262>16	byte	=0x02		\b, from Win/32
1263>16	byte	=0x03		\b, from Unix
1264>16	byte	=0x04		\b, from MacOS
1265>16	byte	=0x05		\b, from WinNT
1266>16	byte	=0x06		\b, from Primos
1267>16	byte	=0x07		\b, from AppleGS
1268>16	byte	=0x08		\b, from Atari
1269>16	byte	=0x09		\b, from Vax/VMS
1270>16	byte	=0x0A		\b, from Amiga
1271>16	byte	=0x0B		\b, from Next
1272>14	byte	x		\b, version %d to extract
1273>5	leshort &0x0080		\b, multiple volumes,
1274>>17	byte	x		\b (part %d),
1275>5	leshort &0x0002		\b, contains comment
1276>5	leshort	&0x0200		\b, sfx
1277>5	leshort	&0x0400		\b, small dictionary
1278>5	leshort	&0x0800		\b, multi-volume
1279>5	leshort	&0x1000		\b, contains AV-String
1280>>30	string	\x16*UNREGISTERED\x20VERSION*	(unregistered)
1281>5	leshort &0x2000		\b, with recovery record
1282>5	leshort &0x4000		\b, locked
1283>5	leshort &0x8000		\b, solid
1284# Date in MS-DOS format (whatever that is)
1285#>18	lelong	x		Created on
1286
1287# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
1288# <doj@cubic.org>
12890x1A	string	sfArk		sfArk compressed Soundfont
1290>0x15	string	2
1291>>0x1	string	>\0		Version %s
1292>>0x2A	string	>\0		: %s
1293
1294# DR-DOS 7.03 Packed File *.??_
12950	string	Packed\ File\ 	Personal NetWare Packed File
1296>12	string	x		\b, was "%.12s"
1297
1298# EET archive
1299# From: Tilman Sauerbeck <tilman@code-monkey.de>
13000	belong	0x1ee7ff00	EET archive
1301!:mime	application/x-eet
1302
1303# rzip archives
13040	string	RZIP		rzip compressed data
1305>4	byte	x		- version %d
1306>5	byte	x		\b.%d
1307>6	belong	x		(%d bytes)
1308
1309# From: "Robert Dale" <robdale@gmail.com>
13100	belong	123		dar archive,
1311>4	belong	x		label "%.8x
1312>>8	belong	x		%.8x
1313>>>12	beshort	x		%.4x"
1314>14	byte	0x54		end slice
1315>14	beshort	0x4e4e		multi-part
1316>14	beshort	0x4e53		multi-part, with -S
1317
1318# Symbian installation files
1319#  http://www.thouky.co.uk/software/psifs/sis.html
1320#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
13218	lelong	0x10000419	Symbian installation file
1322!:mime	application/vnd.symbian.install
1323>4	lelong	0x1000006D	(EPOC release 3/4/5)
1324>4	lelong	0x10003A12	(EPOC release 6)
13250	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
1326!:mime	x-epoc/x-sisx-app
1327
1328# From "Nelson A. de Oliveira" <naoliv@gmail.com>
13290	string	MPQ\032		MoPaQ (MPQ) archive
1330
1331# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
1332# .kgb
13330	string KGB_arch		KGB Archiver file
1334>10	string x		with compression level %.1s
1335
1336# xar (eXtensible ARchiver) archive
1337# xar archive format: http://code.google.com/p/xar/
1338# From: "David Remahl" <dremahl@apple.com>
13390	string	xar!		xar archive
1340!:mime	application/x-xar
1341#>4	beshort	x		header size %d
1342>6	beshort	x		version %d,
1343#>8	quad	x		compressed TOC: %d,
1344#>16	quad	x		uncompressed TOC: %d,
1345>24	belong	0		no checksum
1346>24	belong	1		SHA-1 checksum
1347>24	belong	2		MD5 checksum
1348
1349# Type: Parity Archive
1350# From: Daniel van Eeden <daniel_e@dds.nl>
13510	string	PAR2		Parity Archive Volume Set
1352
1353# Bacula volume format. (Volumes always start with a block header.)
1354# URL: http://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
1355# From: Adam Buchbinder <adam.buchbinder@gmail.com>
135612	string	BB02		Bacula volume
1357>20	bedate	x		\b, started %s
1358
1359# ePub is XHTML + XML inside a ZIP archive.  The first member of the
1360#   archive must be an uncompressed file called 'mimetype' with contents
1361#   'application/epub+zip'
1362
1363
1364# From: "Michael Gorny" <mgorny@gentoo.org>
1365# ZPAQ: http://mattmahoney.net/dc/zpaq.html
13660	string	zPQ	ZPAQ stream
1367>3	byte	x	\b, level %d
1368# From: Barry Carter <carter.barry@gmail.com>
1369# http://encode.ru/threads/456-zpaq-updates/page32
13700	string	7kSt	ZPAQ file
1371
1372# BBeB ebook, unencrypted (LRF format)
1373# URL: http://www.sven.de/librie/Librie/LrfFormat
1374# From: Adam Buchbinder <adam.buchbinder@gmail.com>
13750	string	L\0R\0F\0\0\0	BBeB ebook data, unencrypted
1376>8	beshort	x		\b, version %d
1377>36	byte	1		\b, front-to-back
1378>36	byte	16		\b, back-to-front
1379>42	beshort	x		\b, (%dx,
1380>44	beshort	x		%d)
1381
1382# Symantec GHOST image by Joerg Jenderek at May 2014
1383# http://us.norton.com/ghost/
1384# http://www.garykessler.net/library/file_sigs.html
13850		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
1386# *.GHO
1387>2		ubyte&0x08		0x00		\b, first file
1388# *.GHS or *.[0-9] with cns program option
1389>2		ubyte&0x08		0x08		\b, split file
1390# part of split index interesting for *.ghs
1391>>4		ubyte			x		id=0x%x
1392# compression tag minus one equals numeric compression command line switch z[1-9]
1393>3		ubyte			0		\b, no compression
1394>3		ubyte			2		\b, fast compression (Z1)
1395>3		ubyte			3		\b, medium compression (Z2)
1396>3		ubyte			>3
1397>>3		ubyte			<11		\b, compression (Z%d-1)
1398>2		ubyte&0x08		0x00
1399# ~ 30 byte password field only for *.gho
1400>>12		ubequad			!0		\b, password protected
1401>>44		ubyte			!1
1402# 1~Image All, sector-by-sector only for *.gho
1403>>>10		ubyte			1		\b, sector copy
1404# 1~Image Boot track only for *.gho
1405>>>43		ubyte			1		\b, boot track
1406# 1~Image Disc only for *.gho implies Image Boot track and sector copy
1407>>44		ubyte			1		\b, disc sector copy
1408# optional image description only *.gho
1409>>0xff		string			>\0		"%-.254s"
1410# look for DOS sector end sequence
1411>0xE08	search/7776		\x55\xAA
1412>>&-512	indirect		x		\b; contains
1413
1414# Google Chrome extensions
1415# https://developer.chrome.com/extensions/crx
1416# https://developer.chrome.com/extensions/hosting
14170	string	Cr24	Google Chrome extension
1418!:mime	application/x-chrome-extension
1419>4	ulong	x	\b, version %u
1420
1421# SeqBox - Sequenced container
1422# ext: sbx, seqbox
1423# Marco Pontello marcopon@gmail.com
1424# reference: https://github.com/MarcoPon/SeqBox
14250	string	SBx	SeqBox,
1426>3	byte	x	version %d
1427
1428# LyNX archive
142956	string	USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE	 LyNX archive
1430