xref: /freebsd/contrib/file/magic/Magdir/archive (revision 3823d5e198425b4f5e5a80267d195769d1063773)
1#------------------------------------------------------------------------------
2# $File: archive,v 1.87 2014/06/03 19:15:58 christos Exp $
3# archive:  file(1) magic for archive formats (see also "msdos" for self-
4#           extracting compressed archives)
5#
6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7# pre-POSIX "tar" archives are handled in the C code.
8
9# POSIX tar archives
10257	string		ustar\0		POSIX tar archive
11!:mime	application/x-tar # encoding: posix
12257	string		ustar\040\040\0	GNU tar archive
13!:mime	application/x-tar # encoding: gnu
14
15# Incremental snapshot gnu-tar format from:
16# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
170	string		GNU\ tar-	GNU tar incremental snapshot data
18>&0	regex		[0-9]\.[0-9]+-[0-9]+	version %s
19
20# cpio archives
21#
22# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
23# The idea is to indicate archives produced on machines with the same
24# byte order as the machine running "file" with "cpio archive", and
25# to indicate archives produced on machines with the opposite byte order
26# from the machine running "file" with "byte-swapped cpio archive".
27#
28# The SVR4 "cpio(4)" hints that there are additional formats, but they
29# are defined as "short"s; I think all the new formats are
30# character-header formats and thus are strings, not numbers.
310	short		070707		cpio archive
32!:mime	application/x-cpio
330	short		0143561		byte-swapped cpio archive
34!:mime	application/x-cpio # encoding: swapped
350	string		070707		ASCII cpio archive (pre-SVR4 or odc)
360	string		070701		ASCII cpio archive (SVR4 with no CRC)
370	string		070702		ASCII cpio archive (SVR4 with CRC)
38
39#
40# Various archive formats used by various versions of the "ar"
41# command.
42#
43
44#
45# Original UNIX archive formats.
46# They were written with binary values in host byte order, and
47# the magic number was a host "int", which might have been 16 bits
48# or 32 bits.  We don't say "PDP-11" or "VAX", as there might have
49# been ports to little-endian 16-bit-int or 32-bit-int platforms
50# (x86?) using some of those formats; if none existed, feel free
51# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
52# 32-bit.  There might have been big-endian ports of that sort as
53# well.
54#
550	leshort		0177555		very old 16-bit-int little-endian archive
560	beshort		0177555		very old 16-bit-int big-endian archive
570	lelong		0177555		very old 32-bit-int little-endian archive
580	belong		0177555		very old 32-bit-int big-endian archive
59
600	leshort		0177545		old 16-bit-int little-endian archive
61>2	string		__.SYMDEF	random library
620	beshort		0177545		old 16-bit-int big-endian archive
63>2	string		__.SYMDEF	random library
640	lelong		0177545		old 32-bit-int little-endian archive
65>4	string		__.SYMDEF	random library
660	belong		0177545		old 32-bit-int big-endian archive
67>4	string		__.SYMDEF	random library
68
69#
70# From "pdp" (but why a 4-byte quantity?)
71#
720	lelong		0x39bed		PDP-11 old archive
730	lelong		0x39bee		PDP-11 4.0 archive
74
75#
76# XXX - what flavor of APL used this, and was it a variant of
77# some ar archive format?  It's similar to, but not the same
78# as, the APL workspace magic numbers in pdp.
79#
800	long		0100554		apl workspace
81
82#
83# System V Release 1 portable(?) archive format.
84#
850	string		=<ar>		System V Release 1 ar archive
86!:mime	application/x-archive
87
88#
89# Debian package; it's in the portable archive format, and needs to go
90# before the entry for regular portable archives, as it's recognized as
91# a portable archive whose first member has a name beginning with
92# "debian".
93#
940	string		=!<arch>\ndebian
95>8	string		debian-split	part of multipart Debian package
96!:mime	application/vnd.debian.binary-package
97>8	string		debian-binary	Debian binary package
98!:mime	application/vnd.debian.binary-package
99>8	string		!debian
100>68	string		>\0		(format %s)
101# These next two lines do not work, because a bzip2 Debian archive
102# still uses gzip for the control.tar (first in the archive).  Only
103# data.tar varies, and the location of its filename varies too.
104# file/libmagic does not current have support for ascii-string based
105# (offsets) as of 2005-09-15.
106#>81	string		bz2		\b, uses bzip2 compression
107#>84	string		gz		\b, uses gzip compression
108#>136	ledate		x		created: %s
109
110#
111# MIPS archive; they're in the portable archive format, and need to go
112# before the entry for regular portable archives, as it's recognized as
113# a portable archive whose first member has a name beginning with
114# "__________E".
115#
1160	string	=!<arch>\n__________E	MIPS archive
117!:mime	application/x-archive
118>20	string	U			with MIPS Ucode members
119>21	string	L			with MIPSEL members
120>21	string	B			with MIPSEB members
121>19	string	L			and an EL hash table
122>19	string	B			and an EB hash table
123>22	string	X			-- out of date
124
1250	search/1	-h-		Software Tools format archive text
126
127#
128# BSD/SVR2-and-later portable archive formats.
129#
1300	string		=!<arch>		current ar archive
131!:mime	application/x-archive
132>8	string		__.SYMDEF	random library
133>68	string		__.SYMDEF\ SORTED	random library
134
135#
136# "Thin" archive, as can be produced by GNU ar.
137#
1380	string		=!<thin>\n	thin archive with
139>68	belong		0		no symbol entries
140>68	belong		1		%d symbol entry
141>68	belong		>1		%d symbol entries
142
143# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
144#
145# The first byte is the magic (0x1a), byte 2 is the compression type for
146# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
147# filename of the first file (null terminated).  Since some types collide
148# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
149# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
1500	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
151!:mime	application/x-arc
1520	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
153!:mime	application/x-arc
1540	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
155!:mime	application/x-arc
1560	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
157!:mime	application/x-arc
1580	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
159!:mime	application/x-arc
1600	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
161!:mime	application/x-arc
162# [JW] stuff taken from idarc, obviously ARC successors:
1630	lelong&0x8080ffff	0x00000a1a	PAK archive data
164!:mime	application/x-arc
1650	lelong&0x8080ffff	0x0000141a	ARC+ archive data
166!:mime	application/x-arc
1670	lelong&0x8080ffff	0x0000481a	HYP archive data
168!:mime	application/x-arc
169
170# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
171# I can't create either SPARK or ArcFS archives so I have not tested this stuff
172# [GRR:  the original entries collide with ARC, above; replaced with combined
173#  version (not tested)]
174#0	byte		0x1a		RISC OS archive (spark format)
1750	string		\032archive	RISC OS archive (ArcFS format)
1760       string          Archive\000     RISC OS archive (ArcFS format)
177
178# All these were taken from idarc, many could not be verified. Unfortunately,
179# there were many low-quality sigs, i.e. easy to trigger false positives.
180# Please notify me of any real-world fishy/ambiguous signatures and I'll try
181# to get my hands on the actual archiver and see if I find something better. [JW]
182# probably many can be enhanced by finding some 0-byte or control char near the start
183
184# idarc calls this Crush/Uncompressed... *shrug*
1850	string	CRUSH Crush archive data
186# Squeeze It (.sqz)
1870	string	HLSQZ Squeeze It archive data
188# SQWEZ
1890	string	SQWEZ SQWEZ archive data
190# HPack (.hpk)
1910	string	HPAK HPack archive data
192# HAP
1930	string	\x91\x33HF HAP archive data
194# MD/MDCD
1950	string	MDmd MDCD archive data
196# LIM
1970	string	LIM\x1a LIM archive data
198# SAR
1993	string	LH5 SAR archive data
200# BSArc/BS2
2010	string	\212\3SB\020\0	BSArc/BS2 archive data
202# Bethesda Softworks Archive (Oblivion)
2030	string	BSA\0 		BSArc archive data
204>4	lelong	x		version %d
205# MAR
2062	string	=-ah MAR archive data
207# ACB
208#0	belong&0x00f800ff	0x00800000 ACB archive data
209# CPZ
210# TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
211# JRC
2120	string	JRchive JRC archive data
213# Quantum
2140	string	DS\0 Quantum archive data
215# ReSOF
2160	string	PK\3\6 ReSOF archive data
217# QuArk
2180	string	7\4 QuArk archive data
219# YAC
22014	string	YC YAC archive data
221# X1
2220	string	X1 X1 archive data
2230	string	XhDr X1 archive data
224# CDC Codec (.dqt)
2250	belong&0xffffe000	0x76ff2000 CDC Codec archive data
226# AMGC
2270	string	\xad6" AMGC archive data
228# NuLIB
2290	string	N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
230# PakLeo
2310	string	LEOLZW PAKLeo archive data
232# ChArc
2330	string	SChF ChArc archive data
234# PSA
2350	string	PSA PSA archive data
236# CrossePAC
2370	string	DSIGDCC CrossePAC archive data
238# Freeze
2390	string	\x1f\x9f\x4a\x10\x0a Freeze archive data
240# KBoom
2410	string	\xc2\xa8MP\xc2\xa8 KBoom archive data
242# NSQ, must go after CDC Codec
2430	string	\x76\xff NSQ archive data
244# DPA
2450	string	Dirk\ Paehl DPA archive data
246# BA
247# TODO: idarc says "bytes 0-2 == bytes 3-5"
248# TTComp
2490	string	\0\6 TTComp archive data
250# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
2510	string	ESP ESP archive data
252# ZPack
2530	string	\1ZPK\1 ZPack archive data
254# Sky
2550	string	\xbc\x40 Sky archive data
256# UFA
2570	string	UFA UFA archive data
258# Dry
2590	string	=-H2O DRY archive data
260# FoxSQZ
2610	string	FOXSQZ FoxSQZ archive data
262# AR7
2630	string	,AR7 AR7 archive data
264# PPMZ
2650	string	PPMZ PPMZ archive data
266# MS Compress
2674	string	\x88\xf0\x27 MS Compress archive data
268# updated by Joerg Jenderek
269>9	string	\0
270>>0	string	KWAJ
271>>>7	string	\321\003	MS Compress archive data
272>>>>14	ulong	>0		\b, original size: %d bytes
273>>>>18		ubyte	>0x65
274>>>>>18		string	x       \b, was %.8s
275>>>>>(10.b-4)	string	x       \b.%.3s
276# MP3 (archiver, not lossy audio compression)
2770	string	MP3\x1a MP3-Archiver archive data
278# ZET
2790	string	OZ\xc3\x9d ZET archive data
280# TSComp
2810	string	\x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
282# ARQ
2830	string	gW\4\1 ARQ archive data
284# Squash
2853	string	OctSqu Squash archive data
286# Terse
2870	string	\5\1\1\0 Terse archive data
288# PUCrunch
2890	string	\x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
290# UHarc
2910	string	UHA UHarc archive data
292# ABComp
2930	string	\2AB ABComp archive data
2940	string	\3AB2 ABComp archive data
295# CMP
2960	string	CO\0 CMP archive data
297# Splint
2980	string	\x93\xb9\x06 Splint archive data
299# InstallShield
3000	string	\x13\x5d\x65\x8c InstallShield Z archive Data
301# Gather
3021	string	GTH Gather archive data
303# BOA
3040	string	BOA BOA archive data
305# RAX
3060	string	ULEB\xa RAX archive data
307# Xtreme
3080	string	ULEB\0 Xtreme archive data
309# Pack Magic
3100	string	@\xc3\xa2\1\0 Pack Magic archive data
311# BTS
3120	belong&0xfeffffff	0x1a034465 BTS archive data
313# ELI 5750
3140	string	Ora\  ELI 5750 archive data
315# QFC
3160	string	\x1aFC\x1a QFC archive data
3170	string	\x1aQF\x1a QFC archive data
318# PRO-PACK
3190	string	RNC PRO-PACK archive data
320# 777
3210	string	777 777 archive data
322# LZS221
3230	string	sTaC LZS221 archive data
324# HPA
3250	string	HPA HPA archive data
326# Arhangel
3270	string	LG Arhangel archive data
328# EXP1, uses bzip2
3290	string	0123456789012345BZh EXP1 archive data
330# IMP
3310	string	IMP\xa IMP archive data
332# NRV
3330	string	\x00\x9E\x6E\x72\x76\xFF NRV archive data
334# Squish
3350	string	\x73\xb2\x90\xf4 Squish archive data
336# Par
3370	string	PHILIPP Par archive data
3380	string	PAR Par archive data
339# HIT
3400	string	UB HIT archive data
341# SBX
3420	belong&0xfffff000	0x53423000 SBX archive data
343# NaShrink
3440	string	NSK NaShrink archive data
345# SAPCAR
3460	string	#\ CAR\ archive\ header SAPCAR archive data
3470	string	CAR\ 2.00RG SAPCAR archive data
348# Disintegrator
3490	string	DST Disintegrator archive data
350# ASD
3510	string	ASD ASD archive data
352# InstallShield CAB
3530	string	ISc( InstallShield CAB
354# TOP4
3550	string	T4\x1a TOP4 archive data
356# BatComp left out: sig looks like COM executable
357# so TODO: get real 4dos batcomp file and find sig
358# BlakHole
3590	string	BH\5\7 BlakHole archive data
360# BIX
3610	string	BIX0 BIX archive data
362# ChiefLZA
3630	string	ChfLZ ChiefLZA archive data
364# Blink
3650	string	Blink Blink archive data
366# Logitech Compress
3670	string	\xda\xfa Logitech Compress archive data
368# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
3691	string	(C)\ STEPANYUK ARS-Sfx archive data
370# AKT/AKT32
3710	string	AKT32 AKT32 archive data
3720	string	AKT AKT archive data
373# NPack
3740	string	MSTSM NPack archive data
375# PFT
3760	string	\0\x50\0\x14 PFT archive data
377# SemOne
3780	string	SEM SemOne archive data
379# PPMD
3800	string	\x8f\xaf\xac\x84 PPMD archive data
381# FIZ
3820	string	FIZ FIZ archive data
383# MSXiE
3840	belong&0xfffff0f0	0x4d530000 MSXiE archive data
385# DeepFreezer
3860	belong&0xfffffff0	0x797a3030 DeepFreezer archive data
387# DC
3880	string	=<DC- DC archive data
389# TPac
3900	string	\4TPAC\3 TPac archive data
391# Ai
3920	string	Ai\1\1\0 Ai archive data
3930	string	Ai\1\0\0 Ai archive data
394# Ai32
3950	string	Ai\2\0 Ai32 archive data
3960	string	Ai\2\1 Ai32 archive data
397# SBC
3980	string	SBC SBC archive data
399# Ybs
4000	string	YBS Ybs archive data
401# DitPack
4020	string	\x9e\0\0 DitPack archive data
403# DMS
4040	string	DMS! DMS archive data
405# EPC
4060	string	\x8f\xaf\xac\x8c EPC archive data
407# VSARC
4080	string	VS\x1a VSARC archive data
409# PDZ
4100	string	PDZ PDZ archive data
411# ReDuq
4120	string	rdqx ReDuq archive data
413# GCA
4140	string	GCAX GCA archive data
415# PPMN
4160	string	pN PPMN archive data
417# WinImage
4183	string	WINIMAGE WinImage archive data
419# Compressia
4200	string	CMP0CMP Compressia archive data
421# UHBC
4220	string	UHB UHBC archive data
423# WinHKI
4240	string	\x61\x5C\x04\x05 WinHKI archive data
425# WWPack data file
4260	string	WWP WWPack archive data
427# BSN (BSA, PTS-DOS)
4280	string	\xffBSG BSN archive data
4291	string	\xffBSG BSN archive data
4303	string	\xffBSG BSN archive data
4311	string	\0\xae\2 BSN archive data
4321	string	\0\xae\3 BSN archive data
4331	string	\0\xae\7 BSN archive data
434# AIN
4350	string	\x33\x18 AIN archive data
4360	string	\x33\x17 AIN archive data
437# XPA32
4380	string	xpa\0\1 XPA32 archive data
439# SZip (TODO: doesn't catch all versions)
4400	string	SZ\x0a\4 SZip archive data
441# XPack DiskImage
4420	string	jm XPack DiskImage archive data
443# XPack Data
4440	string	xpa XPack archive data
445# XPack Single Data
4460	string	\xc3\x8d\ jm XPack single archive data
447
448# TODO: missing due to unknown magic/magic at end of file:
449#DWC
450#ARG
451#ZAR
452#PC/3270
453#InstallIt
454#RKive
455#RK
456#XPack Diskimage
457
458# These were inspired by idarc, but actually verified
459# Dzip archiver (.dz)
4600	string	DZ Dzip archive data
461>2	byte	x \b, version %i
462>3	byte	x \b.%i
463# ZZip archiver (.zz)
4640	string	ZZ\ \0\0 ZZip archive data
4650	string	ZZ0 ZZip archive data
466# PAQ archiver (.paq)
4670	string	\xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
4680	string	PAQ PAQ archive data
469>3	byte&0xf0	0x30
470>>3	byte	x (v%c)
471# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
4720xe	string	\x1aJar\x1b JAR (ARJ Software, Inc.) archive data
4730	string	JARCS JAR (ARJ Software, Inc.) archive data
474
475# ARJ archiver (jason@jarthur.Claremont.EDU)
4760	leshort		0xea60		ARJ archive data
477!:mime	application/x-arj
478>5	byte		x		\b, v%d,
479>8	byte		&0x04		multi-volume,
480>8	byte		&0x10		slash-switched,
481>8	byte		&0x20		backup,
482>34	string		x		original name: %s,
483>7	byte		0		os: MS-DOS
484>7	byte		1		os: PRIMOS
485>7	byte		2		os: Unix
486>7	byte		3		os: Amiga
487>7	byte		4		os: Macintosh
488>7	byte		5		os: OS/2
489>7	byte		6		os: Apple ][ GS
490>7	byte		7		os: Atari ST
491>7	byte		8		os: NeXT
492>7	byte		9		os: VAX/VMS
493>3	byte		>0		%d]
494# [JW] idarc says this is also possible
4952	leshort		0xea60		ARJ archive data
496
497# HA archiver (Greg Roelofs, newt@uchicago.edu)
498# This is a really bad format. A file containing HAWAII will match this...
499#0	string		HA		HA archive data,
500#>2	leshort		=1		1 file,
501#>2	leshort		>1		%hu files,
502#>4	byte&0x0f	=0		first is type CPY
503#>4	byte&0x0f	=1		first is type ASC
504#>4	byte&0x0f	=2		first is type HSC
505#>4	byte&0x0f	=0x0e		first is type DIR
506#>4	byte&0x0f	=0x0f		first is type SPECIAL
507# suggestion: at least identify small archives (<1024 files)
5080  belong&0xffff00fc 0x48410000 HA archive data
509>2	leshort		=1		1 file,
510>2	leshort		>1		%u files,
511>4	byte&0x0f	=0		first is type CPY
512>4	byte&0x0f	=1		first is type ASC
513>4	byte&0x0f	=2		first is type HSC
514>4	byte&0x0f	=0x0e		first is type DIR
515>4	byte&0x0f	=0x0f		first is type SPECIAL
516
517# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
5180	string		HPAK		HPACK archive data
519
520# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
5210	string		\351,\001JAM\ 		JAM archive,
522>7	string		>\0			version %.4s
523>0x26	byte		=0x27			-
524>>0x2b	string          >\0			label %.11s,
525>>0x27	lelong		x			serial %08x,
526>>0x36	string		>\0			fstype %.8s
527
528# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
5292	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
530!:mime	application/x-lharc
5312	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
532!:mime	application/x-lharc
5332	string		-lz4-		LHarc 1.x archive data [lz4]
534!:mime	application/x-lharc
5352	string		-lz5-		LHarc 1.x archive data [lz5]
536!:mime	application/x-lharc
537#	[never seen any but the last; -lh4- reported in comp.compression:]
5382	string		-lzs-		LHa/LZS archive data [lzs]
539!:mime	application/x-lha
5402	string		-lh\40-		LHa 2.x? archive data [lh ]
541!:mime	application/x-lha
5422	string		-lhd-		LHa 2.x? archive data [lhd]
543!:mime	application/x-lha
5442	string		-lh2-		LHa 2.x? archive data [lh2]
545!:mime	application/x-lha
5462	string		-lh3-		LHa 2.x? archive data [lh3]
547!:mime	application/x-lha
5482	string		-lh4-		LHa (2.x) archive data [lh4]
549!:mime	application/x-lha
5502	string		-lh5-		LHa (2.x) archive data [lh5]
551!:mime	application/x-lha
5522	string		-lh6-		LHa (2.x) archive data [lh6]
553!:mime	application/x-lha
5542	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
555!:mime	application/x-lha
556>20	byte		x		- header level %d
557# taken from idarc [JW]
5582   string      -lZ         PUT archive data
5592   string      -lz         LZS archive data
5602   string      -sw1-       Swag archive data
561
562# RAR archiver (Greg Roelofs, newt@uchicago.edu)
5630	string		Rar!		RAR archive data,
564!:mime	application/x-rar
565>44	byte		x		v%0x,
566>10	byte		>0		flags:
567>>10	byte		&0x01		Archive volume,
568>>10	byte		&0x02		Commented,
569>>10	byte		&0x04		Locked,
570>>10	byte		&0x08		Solid,
571>>10	byte		&0x20		Authenticated,
572>35	byte		0		os: MS-DOS
573>35	byte		1		os: OS/2
574>35	byte		2		os: Win32
575>35	byte		3		os: Unix
576# some old version? idarc says:
5770   string      RE\x7e\x5e  RAR archive data
578
579# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
5800	string		SQSH		squished archive data (Acorn RISCOS)
581
582# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
583# [JW] see exe section for self-extracting version
5840	string		UC2\x1a		UC2 archive data
585
586# PKZIP multi-volume archive
5870	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
588!:mime	application/zip
589
590# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
5910	string		PK\005\006	Zip archive data (empty)
5920	string		PK\003\004
593
594# Specialised zip formats which start with a member named 'mimetype'
595# (stored uncompressed, with no 'extra field') containing the file's MIME type.
596# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
597#  contents starting with "application/":
598>26	string		\x8\0\0\0mimetypeapplication/
599
600#  KOffice / OpenOffice & StarOffice / OpenDocument formats
601#    From: Abel Cheung <abel@oaka.org>
602
603#   KOffice (1.2 or above) formats
604#    (mimetype contains "application/vnd.kde.<SUBTYPE>")
605>>50	string	vnd.kde.		KOffice (>=1.2)
606>>>58	string	karbon			Karbon document
607>>>58	string	kchart			KChart document
608>>>58	string	kformula		KFormula document
609>>>58	string	kivio			Kivio document
610>>>58	string	kontour			Kontour document
611>>>58	string	kpresenter		KPresenter document
612>>>58	string	kspread			KSpread document
613>>>58	string	kword			KWord document
614
615#   OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
616#    (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
617>>50	string	vnd.sun.xml.		OpenOffice.org 1.x
618>>>62	string	writer			Writer
619>>>>68	byte	!0x2e			document
620>>>>68	string	.template		template
621>>>>68	string	.global			global document
622>>>62	string	calc			Calc
623>>>>66	byte	!0x2e			spreadsheet
624>>>>66	string	.template		template
625>>>62	string	draw			Draw
626>>>>66	byte	!0x2e			document
627>>>>66	string	.template		template
628>>>62	string	impress			Impress
629>>>>69	byte	!0x2e			presentation
630>>>>69	string	.template		template
631>>>62	string	math			Math document
632>>>62	string	base			Database file
633
634#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
635#    http://lists.oasis-open.org/archives/office/200505/msg00006.html
636#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
637>>50	string	vnd.oasis.opendocument.	OpenDocument
638>>>73	string	text
639>>>>77	byte	!0x2d			Text
640!:mime	application/vnd.oasis.opendocument.text
641>>>>77	string	-template		Text Template
642!:mime	application/vnd.oasis.opendocument.text-template
643>>>>77	string	-web			HTML Document Template
644!:mime	application/vnd.oasis.opendocument.text-web
645>>>>77	string	-master			Master Document
646!:mime	application/vnd.oasis.opendocument.text-master
647>>>73	string	graphics
648>>>>81	byte	!0x2d			Drawing
649!:mime	application/vnd.oasis.opendocument.graphics
650>>>>81	string	-template		Template
651!:mime	application/vnd.oasis.opendocument.graphics-template
652>>>73	string	presentation
653>>>>85	byte	!0x2d			Presentation
654!:mime	application/vnd.oasis.opendocument.presentation
655>>>>85	string	-template		Template
656!:mime	application/vnd.oasis.opendocument.presentation-template
657>>>73	string	spreadsheet
658>>>>84	byte	!0x2d			Spreadsheet
659!:mime	application/vnd.oasis.opendocument.spreadsheet
660>>>>84	string	-template		Template
661!:mime	application/vnd.oasis.opendocument.spreadsheet-template
662>>>73	string	chart
663>>>>78	byte	!0x2d			Chart
664!:mime	application/vnd.oasis.opendocument.chart
665>>>>78	string	-template		Template
666!:mime	application/vnd.oasis.opendocument.chart-template
667>>>73	string	formula
668>>>>80	byte	!0x2d			Formula
669!:mime	application/vnd.oasis.opendocument.formula
670>>>>80	string	-template		Template
671!:mime	application/vnd.oasis.opendocument.formula-template
672>>>73	string	database		Database
673!:mime	application/vnd.oasis.opendocument.database
674>>>73	string	image
675>>>>78	byte	!0x2d			Image
676!:mime	application/vnd.oasis.opendocument.image
677>>>>78	string	-template		Template
678!:mime	application/vnd.oasis.opendocument.image-template
679
680#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
681#    http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
682#    From: Ralf Brown <ralf.brown@gmail.com>
683>>50	string	epub+zip	EPUB document
684!:mime application/epub+zip
685
686#  Catch other ZIP-with-mimetype formats
687#	In a ZIP file, the bytes immediately after a member's contents are
688#	always "PK". The 2 regex rules here print the "mimetype" member's
689#	contents up to the first 'P'. Luckily, most MIME types don't contain
690#	any capital 'P's. This is a kludge.
691#    (mimetype contains "application/<OTHER>")
692>>50		string	!epub+zip
693>>>50		string	!vnd.oasis.opendocument.
694>>>>50		string	!vnd.sun.xml.
695>>>>>50		string	!vnd.kde.
696>>>>>>38	regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
697!:mime	application/zip
698#    (mimetype contents other than "application/*")
699>26		string	\x8\0\0\0mimetype
700>>38		string	!application/
701>>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
702!:mime	application/zip
703
704# Java Jar files
705>(26.s+30)	leshort	0xcafe		Java archive data (JAR)
706!:mime	application/java-archive
707
708# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
709#   Next line excludes specialized formats:
710>(26.s+30)	leshort	!0xcafe
711>>26    string          !\x8\0\0\0mimetype	Zip archive data
712!:mime	application/zip
713>>>4	byte		0x09		\b, at least v0.9 to extract
714>>>4	byte		0x0a		\b, at least v1.0 to extract
715>>>4	byte		0x0b		\b, at least v1.1 to extract
716>>>4	byte		0x14		\b, at least v2.0 to extract
717>>>4	byte		0x2d		\b, at least v3.0 to extract
718>>>0x161	string		WINZIP		\b, WinZIP self-extracting
719
720# StarView Metafile
721# From Pierre Ducroquet <pinaraf@pinaraf.info>
7220	string	VCLMTF	StarView MetaFile
723>6	beshort	x	\b, version %d
724>8	belong	x	\b, size %d
725
726# Zoo archiver
72720	lelong		0xfdc4a7dc	Zoo archive data
728!:mime	application/x-zoo
729>4	byte		>48		\b, v%c.
730>>6	byte		>47		\b%c
731>>>7	byte		>47		\b%c
732>32	byte		>0		\b, modify: v%d
733>>33	byte		x		\b.%d+
734>42	lelong		0xfdc4a7dc	\b,
735>>70	byte		>0		extract: v%d
736>>>71	byte		x		\b.%d+
737
738# Shell archives
73910	string		#\ This\ is\ a\ shell\ archive	shell archive text
740!:mime	application/octet-stream
741
742#
743# LBR. NB: May conflict with the questionable
744#          "binary Computer Graphics Metafile" format.
745#
7460       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
747#
748# PMA (CP/M derivative of LHA)
749#
7502       string          -pm0-           PMarc archive data [pm0]
7512       string          -pm1-           PMarc archive data [pm1]
7522       string          -pm2-           PMarc archive data [pm2]
7532       string          -pms-           PMarc SFX archive (CP/M, DOS)
7545       string          -pc1-           PopCom compressed executable (CP/M)
755
756# From Rafael Laboissiere <rafael@laboissiere.net>
757# The Project Revision Control System (see
758# http://prcs.sourceforge.net) generates a packaged project
759# file which is recognized by the following entry:
7600	leshort		0xeb81	PRCS packaged project
761
762# Microsoft cabinets
763# by David Necas (Yeti) <yeti@physics.muni.cz>
764#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
765#>25	byte	x		v%d
766#>24	byte	x		\b.%d
767# MPi: All CABs have version 1.3, so this is pointless.
768# Better magic in debian-additions.
769
770# GTKtalog catalogs
771# by David Necas (Yeti) <yeti@physics.muni.cz>
7724	string	gtktalog\ 	GTKtalog catalog data,
773>13	string	3		version 3
774>>14	beshort	0x677a		(gzipped)
775>>14	beshort	!0x677a		(not gzipped)
776>13	string	>3		version %s
777
778############################################################################
779# Parity archive reconstruction file, the 'par' file format now used on Usenet.
7800       string          PAR\0	PARity archive data
781>48	leshort		=0	- Index file
782>48	leshort		>0	- file number %d
783
784# Felix von Leitner <felix-file@fefe.de>
7850	string	d8:announce	BitTorrent file
786!:mime	application/x-bittorrent
787
788# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
7890	beshort 0x0e0f		Atari MSA archive data
790>2	beshort x		\b, %d sectors per track
791>4	beshort 0		\b, 1 sided
792>4	beshort 1		\b, 2 sided
793>6	beshort x		\b, starting track: %d
794>8	beshort x		\b, ending track: %d
795
796# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
7970	string	PK00PK\003\004	Zip archive data
798
799# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
800# by Stefan `Sec` Zehl <sec@42.org>
8017	string		**ACE**		ACE archive data
802>15	byte	>0		version %d
803>16	byte	=0x00		\b, from MS-DOS
804>16	byte	=0x01		\b, from OS/2
805>16	byte	=0x02		\b, from Win/32
806>16	byte	=0x03		\b, from Unix
807>16	byte	=0x04		\b, from MacOS
808>16	byte	=0x05		\b, from WinNT
809>16	byte	=0x06		\b, from Primos
810>16	byte	=0x07		\b, from AppleGS
811>16	byte	=0x08		\b, from Atari
812>16	byte	=0x09		\b, from Vax/VMS
813>16	byte	=0x0A		\b, from Amiga
814>16	byte	=0x0B		\b, from Next
815>14	byte	x		\b, version %d to extract
816>5	leshort &0x0080		\b, multiple volumes,
817>>17	byte	x		\b (part %d),
818>5	leshort &0x0002		\b, contains comment
819>5	leshort	&0x0200		\b, sfx
820>5	leshort	&0x0400		\b, small dictionary
821>5	leshort	&0x0800		\b, multi-volume
822>5	leshort	&0x1000		\b, contains AV-String
823>>30	string	\x16*UNREGISTERED\x20VERSION*	(unregistered)
824>5	leshort &0x2000		\b, with recovery record
825>5	leshort &0x4000		\b, locked
826>5	leshort &0x8000		\b, solid
827# Date in MS-DOS format (whatever that is)
828#>18	lelong	x		Created on
829
830# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
831# <doj@cubic.org>
8320x1A	string	sfArk		sfArk compressed Soundfont
833>0x15	string	2
834>>0x1	string	>\0		Version %s
835>>0x2A	string	>\0		: %s
836
837# DR-DOS 7.03 Packed File *.??_
8380	string	Packed\ File\ 	Personal NetWare Packed File
839>12	string	x		\b, was "%.12s"
840
841# EET archive
842# From: Tilman Sauerbeck <tilman@code-monkey.de>
8430	belong	0x1ee7ff00	EET archive
844!:mime	application/x-eet
845
846# rzip archives
8470	string	RZIP		rzip compressed data
848>4	byte	x		- version %d
849>5	byte	x		\b.%d
850>6	belong	x		(%d bytes)
851
852# From: "Robert Dale" <robdale@gmail.com>
8530	belong	123		dar archive,
854>4	belong	x		label "%.8x
855>>8	belong	x		%.8x
856>>>12	beshort	x		%.4x"
857>14	byte	0x54		end slice
858>14	beshort	0x4e4e		multi-part
859>14	beshort	0x4e53		multi-part, with -S
860
861# Symbian installation files
862#  http://www.thouky.co.uk/software/psifs/sis.html
863#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
8648	lelong	0x10000419	Symbian installation file
865!:mime	application/vnd.symbian.install
866>4	lelong	0x1000006D	(EPOC release 3/4/5)
867>4	lelong	0x10003A12	(EPOC release 6)
8680	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
869!:mime	x-epoc/x-sisx-app
870
871# From "Nelson A. de Oliveira" <naoliv@gmail.com>
8720	string	MPQ\032		MoPaQ (MPQ) archive
873
874# From: Dirk Jagdmann <doj@cubic.org>
875# xar archive format: http://code.google.com/p/xar/
8760	string	xar!		xar archive
877>6	beshort	x		- version %d
878
879# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
880# .kgb
8810	string KGB_arch		KGB Archiver file
882>10	string x		with compression level %.1s
883
884# xar (eXtensible ARchiver) archive
885# From: "David Remahl" <dremahl@apple.com>
8860	string	xar!		xar archive
887#>4	beshort	x		header size %d
888>6	beshort	x		version %d,
889#>8	quad	x		compressed TOC: %d,
890#>16	quad	x		uncompressed TOC: %d,
891>24	belong	0		no checksum
892>24	belong	1		SHA-1 checksum
893>24	belong	2		MD5 checksum
894
895# Type: Parity Archive
896# From: Daniel van Eeden <daniel_e@dds.nl>
8970	string	PAR2		Parity Archive Volume Set
898
899# Bacula volume format. (Volumes always start with a block header.)
900# URL: http://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
901# From: Adam Buchbinder <adam.buchbinder@gmail.com>
90212	string	BB02		Bacula volume
903>20	bedate	x		\b, started %s
904
905# ePub is XHTML + XML inside a ZIP archive.  The first member of the
906#   archive must be an uncompressed file called 'mimetype' with contents
907#   'application/epub+zip'
908
909
910# From: "Michael Gorny" <mgorny@gentoo.org>
911# ZPAQ: http://mattmahoney.net/dc/zpaq.html
9120	string	zPQ	ZPAQ stream
913>3	byte	x	\b, level %d
914
915# BBeB ebook, unencrypted (LRF format)
916# URL: http://www.sven.de/librie/Librie/LrfFormat
917# From: Adam Buchbinder <adam.buchbinder@gmail.com>
9180	string	L\0R\0F\0\0\0	BBeB ebook data, unencrypted
919>8	beshort	x		\b, version %d
920>36	byte	1		\b, front-to-back
921>36	byte	16		\b, back-to-front
922>42	beshort	x		\b, (%dx,
923>44	beshort	x		%d)
924
925# Symantec GHOST image by Joerg Jenderek at May 2014
926# http://us.norton.com/ghost/
927# http://www.garykessler.net/library/file_sigs.html
9280		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
929# *.GHO
930>2		ubyte&0x08		0x00		\b, first file
931# *.GHS or *.[0-9] with cns program option
932>2		ubyte&0x08		0x08		\b, split file
933# part of split index interesting for *.ghs
934>>4		ubyte			x		id=0x%x
935# compression tag minus one equals numeric compression command line switch z[1-9]
936>3		ubyte			0		\b, no compression
937>3		ubyte			2		\b, fast compression (Z1)
938>3		ubyte			3		\b, medium compression (Z2)
939>3		ubyte			>3
940>>3		ubyte			<11		\b, compression (Z%d-1)
941>2		ubyte&0x08		0x00
942# ~ 30 byte password field only for *.gho
943>>12		ubequad			!0		\b, password protected
944>>44		ubyte			!1
945# 1~Image All, sector-by-sector only for *.gho
946>>>10		ubyte			1		\b, sector copy
947# 1~Image Boot track only for *.gho
948>>>43		ubyte			1		\b, boot track
949# 1~Image Disc only for *.gho implies Image Boot track and sector copy
950>>44		ubyte			1		\b, disc sector copy
951# optional image description only *.gho
952>>0xff		string			>\0		"%-.254s"
953# look for DOS sector end sequence
954>0xE08	search/7776		\x55\xAA
955>>&-512	indirect		x		\b; contains
956
957# Symantec GHOST image by Joerg Jenderek at May 2014
958# http://us.norton.com/ghost/
959# http://www.garykessler.net/library/file_sigs.html
9600		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
961# *.GHO
962>2		ubyte&0x08		0x00		\b, first file
963# *.GHS or *.[0-9] with cns program option
964>2		ubyte&0x08		0x08		\b, split file
965# part of split index interesting for *.ghs
966>>4		ubyte			x		id=0x%x
967# compression tag minus one equals numeric compression command line switch z[1-9]
968>3		ubyte			0		\b, no compression
969>3		ubyte			2		\b, fast compression (Z1)
970>3		ubyte			3		\b, medium compression (Z2)
971>3		ubyte			>3
972>>3		ubyte			<11		\b, compression (Z%d-1)
973>2		ubyte&0x08		0x00
974# ~ 30 byte password field only for *.gho
975>>12		ubequad			!0		\b, password protected
976>>44		ubyte			!1
977# 1~Image All, sector-by-sector only for *.gho
978>>>10		ubyte			1		\b, sector copy
979# 1~Image Boot track only for *.gho
980>>>43		ubyte			1		\b, boot track
981# 1~Image Disc only for *.gho implies Image Boot track and sector copy
982>>44		ubyte			1		\b, disc sector copy
983# optional image description only *.gho
984>>0xff		string			>\0		"%-.254s"
985# look for DOS sector end sequence
986>0xE08	search/7776		\x55\xAA
987>>&-512	indirect		x		\b; contains
988