xref: /freebsd/contrib/file/magic/Magdir/archive (revision 0b37c1590418417c894529d371800dfac71ef887)
1#------------------------------------------------------------------------------
2# $File: archive,v 1.129 2019/05/09 18:58:02 christos Exp $
3# archive:  file(1) magic for archive formats (see also "msdos" for self-
4#           extracting compressed archives)
5#
6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c.
8
9# POSIX tar archives
10# URL: https://en.wikipedia.org/wiki/Tar_(computing)
11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current
12# header mainly padded with nul bytes
13500	quad		0
14!:strength /2
15# filename or extended attribute printable strings in range space null til umlaut ue
16>0	ubeshort	>0x1F00
17>>0	ubeshort	<0xFCFD
18# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad
19# at https://sourceforge.net/projects/s-tar/files/testscripts/
20>>>508	ubelong&0x8B9E8DFF	0
21# nul, space or ascii digit 0-7 at start of mode
22>>>>100	ubyte&0xC8	=0
23>>>>>101 ubyte&0xC8	=0
24# nul, space at end of check sum
25>>>>>>155 ubyte&0xDF	=0
26# space or ascii digit 0 at start of check sum
27>>>>>>>148	ubyte&0xEF	=0x20
28>>>>>>>>0	use	tar-file
29#	minimal check and then display tar archive information which can also be
30#	embedded inside others like Android Backup, Clam AntiVirus database
310	name		tar-file
32>257	string		!ustar
33# header padded with nuls
34>>257	ulong		=0
35# GNU tar version 1.29 with non pax format option without refusing
36# creates misleading V7 header for Long path, Multi-volume, Volume type
37>>>156	ubyte		0x4c		GNU tar archive
38!:mime	application/x-gtar
39!:ext	tar/gtar
40>>>156	ubyte		0x4d		GNU tar archive
41!:mime	application/x-gtar
42!:ext	tar/gtar
43>>>156	ubyte		0x56		GNU tar archive
44!:mime	application/x-gtar
45!:ext	tar/gtar
46>>>156	default		x		tar archive (V7)
47!:mime	application/x-tar
48!:ext	tar
49# other stuff in padding
50# some implementations add new fields to the blank area at the end of the header record
51# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option
52>>257	ulong		!0		tar archive (old)
53!:mime	application/x-tar
54!:ext	tar
55# magic in newer, GNU, posix variants
56>257	string		=ustar
57# 2 last char of magic and UStar version because string expression does not work
58# 2 space characters followed by a null for GNU variant
59>>261	ubelong		=0x72202000	POSIX tar archive (GNU)
60!:mime	application/x-gtar
61!:ext	tar/gtar
62# UStar version with ASCII "00"
63>>261	ubelong		0x72003030	POSIX
64# gLOBAL and ExTENSION type only found in POSIX.1-2001 format
65>>>156	ubyte		0x67		\b.1-2001
66>>>156	ubyte		0x78		\b.1-2001
67>>>156	ubyte		x		tar archive
68!:mime	application/x-ustar
69!:ext	tar/ustar
70# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab
71>>261	ubelong		0x72000000	tar archive (ustar)
72!:mime	application/x-ustar
73!:ext	tar/ustar
74# not seen ustar variant with garbish version
75>>261	default		x		tar archive (unknown ustar)
76!:mime	application/x-ustar
77!:ext	tar/ustar
78# type flag of 1st tar archive member
79#>156	ubyte		x		\b, %c-type
80>156	ubyte		x
81>>156	ubyte		0		\b, file
82>>156	ubyte		0x30		\b, file
83>>156	ubyte		0x31		\b, hard link
84>>156	ubyte		0x32		\b, symlink
85>>156	ubyte		0x33		\b, char device
86>>156	ubyte		0x34		\b, block device
87>>156	ubyte		0x35		\b, directory
88>>156	ubyte		0x36		\b, fifo
89>>156	ubyte		0x37		\b, reserved
90>>156	ubyte		0x4c		\b, long path
91>>156	ubyte		0x4d		\b, multi volume
92>>156	ubyte		0x56		\b, volume
93>>156	ubyte		0x67		\b, global
94>>156	ubyte		0x78		\b, extension
95>>156	default		x		\b, type
96>>>156	ubyte		x		'%c'
97# name[100]
98>0	string		>\0		%-.60s
99# mode mainly stored as an octal number in ASCII null or space terminated
100>100	string		>\0		\b, mode %-.7s
101# user id mainly as octal numbers in ASCII null or space terminated
102>108	string		>\0		\b, uid %-.7s
103# group id mainly as octal numbers in ASCII null or space terminated
104>116	string		>\0		\b, gid %-.7s
105# size mainly as octal number in ASCII
106>124	ubyte		<0x38
107>>124	string		>\0		\b, size %-.12s
108# coding indicated by setting the high-order bit of the leftmost byte
109>124	ubyte		>0xEF		\b, size 0x
110>>124	ubyte		!0xff		\b%2.2x
111>>125	ubyte		!0xff		\b%2.2x
112>>126	ubyte		!0xff		\b%2.2x
113>>127	ubyte		!0xff		\b%2.2x
114>>128	ubyte		!0xff		\b%2.2x
115>>129	ubyte		!0xff		\b%2.2x
116>>130	ubyte		!0xff		\b%2.2x
117>>131	ubyte		!0xff		\b%2.2x
118>>132	ubyte		!0xff		\b%2.2x
119>>133	ubyte		!0xff		\b%2.2x
120>>134	ubyte		!0xff		\b%2.2x
121>>135	ubyte		!0xff		\b%2.2x
122# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated
123>136	string		>\0		\b, seconds %-.11s
124# header checksum stored as an octal number in ASCII null or space terminated
125#>148	string		x		\b, cksum %.7s
126# linkname[100]
127>157	string		>\0		\b, linkname %-.40s
128# additional fields for ustar
129>257	string		=ustar
130# owner user name null terminated
131>>265	string		>\0		\b, user %-.32s
132# group name null terminated
133>>297	string		>\0		\b, group %-.32s
134# device major minor if not zero
135>>329	ubequad&0xCFCFCFCFcFcFcFdf	!0
136>>>329	string		x		\b, devmaj %-.7s
137>>337	ubequad&0xCFCFCFCFcFcFcFdf	!0
138>>>337	string		x		\b, devmin %-.7s
139# prefix[155]
140>>345	string		>\0		\b, prefix %-.155s
141# old non ustar/POSIX tar
142>257	string		!ustar
143>>508	string		=tar\0
144# padding[255] in old star
145>>>257	string		>\0		\b, padding: %-.40s
146>>508	default		x
147# padding[255] in old tar sometimes comment field
148>>>257	string		>\0		\b, comment: %-.40s
149
150# Incremental snapshot gnu-tar format from:
151# https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
1520	string		GNU\ tar-	GNU tar incremental snapshot data
153>&0	regex		[0-9]\.[0-9]+-[0-9]+	version %s
154
155# cpio archives
156#
157# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
158# The idea is to indicate archives produced on machines with the same
159# byte order as the machine running "file" with "cpio archive", and
160# to indicate archives produced on machines with the opposite byte order
161# from the machine running "file" with "byte-swapped cpio archive".
162#
163# The SVR4 "cpio(4)" hints that there are additional formats, but they
164# are defined as "short"s; I think all the new formats are
165# character-header formats and thus are strings, not numbers.
1660	short		070707		cpio archive
167!:mime	application/x-cpio
1680	short		0143561		byte-swapped cpio archive
169!:mime	application/x-cpio # encoding: swapped
1700	string		070707		ASCII cpio archive (pre-SVR4 or odc)
1710	string		070701		ASCII cpio archive (SVR4 with no CRC)
1720	string		070702		ASCII cpio archive (SVR4 with CRC)
173
174#
175# Various archive formats used by various versions of the "ar"
176# command.
177#
178
179#
180# Original UNIX archive formats.
181# They were written with binary values in host byte order, and
182# the magic number was a host "int", which might have been 16 bits
183# or 32 bits.  We don't say "PDP-11" or "VAX", as there might have
184# been ports to little-endian 16-bit-int or 32-bit-int platforms
185# (x86?) using some of those formats; if none existed, feel free
186# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
187# 32-bit.  There might have been big-endian ports of that sort as
188# well.
189#
1900	leshort		0177555		very old 16-bit-int little-endian archive
1910	beshort		0177555		very old 16-bit-int big-endian archive
1920	lelong		0177555		very old 32-bit-int little-endian archive
1930	belong		0177555		very old 32-bit-int big-endian archive
194
1950	leshort		0177545		old 16-bit-int little-endian archive
196>2	string		__.SYMDEF	random library
1970	beshort		0177545		old 16-bit-int big-endian archive
198>2	string		__.SYMDEF	random library
1990	lelong		0177545		old 32-bit-int little-endian archive
200>4	string		__.SYMDEF	random library
2010	belong		0177545		old 32-bit-int big-endian archive
202>4	string		__.SYMDEF	random library
203
204#
205# From "pdp" (but why a 4-byte quantity?)
206#
2070	lelong		0x39bed		PDP-11 old archive
2080	lelong		0x39bee		PDP-11 4.0 archive
209
210#
211# XXX - what flavor of APL used this, and was it a variant of
212# some ar archive format?  It's similar to, but not the same
213# as, the APL workspace magic numbers in pdp.
214#
2150	long		0100554		apl workspace
216
217#
218# System V Release 1 portable(?) archive format.
219#
2200	string		=<ar>		System V Release 1 ar archive
221!:mime	application/x-archive
222
223#
224# Debian package; it's in the portable archive format, and needs to go
225# before the entry for regular portable archives, as it's recognized as
226# a portable archive whose first member has a name beginning with
227# "debian".
228#
229# Update: Joerg Jenderek
230# URL: https://en.wikipedia.org/wiki/Deb_(file_format)
2310	string		=!<arch>\ndebian
232# https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html
233>14	string		-split	part of multipart Debian package
234!:mime	application/vnd.debian.binary-package
235# udeb is used for stripped down deb file
236!:ext	deb/udeb
237>14	string		-binary	Debian binary package
238!:mime	application/vnd.debian.binary-package
239!:ext	deb/udeb
240# This should not happen
241>14	default		x	Unknown Debian package
242# NL terminated version; for most Debian cases this is 2.0 or 2.1 for splitted
243>68	string		>\0		(format %s)
244#>68	string		!2.0\n
245#>>68	string		x		(format %.3s)
246>68	string		=2.0\n
247# 2nd archive name=control archive name like control.tar.gz or control.tar.xz
248>>72	string		>\0		\b, with %.14s
249# look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma}
250>>0	search/0x93e4f	data.tar.	\b, data compression
251# the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised
252# for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb
253>>>&0	string		x		%.4s
254# splitted debian package case
255>68	string		=2.1\n
256# dpkg-1.18.25/dpkg-split/info.c
257# NL terminated ASCII package name like ckermit
258>>&0	string		x		\b, %s
259# NL terminated package version like 302-5.3
260>>>&1	string		x		%s
261# NL terminated MD5 checksum
262>>>>&1	string		x		\b, MD5 %s
263# NL terminated original package length
264>>>>>&1	string		x		\b, unsplitted size %s
265# NL terminated part length
266>>>>>>&1	string	x		\b, part lenght %s
267# NL terminated package part like n/m
268>>>>>>>&1	string	x		\b, part %s
269# NL terminated package architecture like armhf since dpkg 1.16.1 or later
270>>>>>>>>&1	string	x		\b, %s
271
272#
273# MIPS archive; they're in the portable archive format, and need to go
274# before the entry for regular portable archives, as it's recognized as
275# a portable archive whose first member has a name beginning with
276# "__________E".
277#
2780	string	=!<arch>\n__________E	MIPS archive
279!:mime	application/x-archive
280>20	string	U			with MIPS Ucode members
281>21	string	L			with MIPSEL members
282>21	string	B			with MIPSEB members
283>19	string	L			and an EL hash table
284>19	string	B			and an EB hash table
285>22	string	X			-- out of date
286
287#
288# BSD/SVR2-and-later portable archive formats.
289#
290# Update: Joerg Jenderek
291# URL:		http://fileformats.archiveteam.org/wiki/AR
292# Reference:	https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/
293# Note:		Mach-O universal binary in ./cafebabe is dependent
294# TODO:		unify current ar archive, MIPS archive, Debian package
295#		distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR;
296#		*.ar packages from *.a libraries. handle empty archive
2970	string		=!<arch>\n		current ar archive
298# print first and possibly second ar_name[16] for debugging purpose
299#>8			string	x	\b, 1st "%.16s"
300#>68			string	x	\b, 2nd "%.16s"
301!:mime	application/x-archive
302# a in most case for libraries; lib for Microsoft libraries; ar else cases
303!:ext	a/lib/ar
304>8	string		__.SYMDEF	random library
305# first member with long marked name __.SYMDEF SORTED implies BSD library
306>68	string		__.SYMDEF\ SORTED	random library
307# Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf
308# "archive file" entry moved from ./hp
309# LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture
310# LST header a_magic 0619h~relocatable library
311>68	belong 		0x020b0619	- PA-RISC1.0 relocatable library
312>68	belong	 	0x02100619	- PA-RISC1.1 relocatable library
313>68	belong 		0x02110619	- PA-RISC1.2 relocatable library
314>68	belong 		0x02140619	- PA-RISC2.0 relocatable library
315#EOF for common ar archives
316
317#
318# "Thin" archive, as can be produced by GNU ar.
319#
3200	string		=!<thin>\n	thin archive with
321>68	belong		0		no symbol entries
322>68	belong		1		%d symbol entry
323>68	belong		>1		%d symbol entries
324
3250	search/1	-h-		Software Tools format archive text
326
327# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
328#
329# The first byte is the magic (0x1a), byte 2 is the compression type for
330# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
331# filename of the first file (null terminated).  Since some types collide
332# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
333# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
3340	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
335!:mime	application/x-arc
3360	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
337!:mime	application/x-arc
3380	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
339!:mime	application/x-arc
3400	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
341!:mime	application/x-arc
3420	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
343!:mime	application/x-arc
3440	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
345!:mime	application/x-arc
346# [JW] stuff taken from idarc, obviously ARC successors:
3470	lelong&0x8080ffff	0x00000a1a	PAK archive data
348!:mime	application/x-arc
3490	lelong&0x8080ffff	0x0000141a	ARC+ archive data
350!:mime	application/x-arc
3510	lelong&0x8080ffff	0x0000481a	HYP archive data
352!:mime	application/x-arc
353
354# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
355# I can't create either SPARK or ArcFS archives so I have not tested this stuff
356# [GRR:  the original entries collide with ARC, above; replaced with combined
357#  version (not tested)]
358#0	byte		0x1a		RISC OS archive (spark format)
3590	string		\032archive	RISC OS archive (ArcFS format)
3600       string          Archive\000     RISC OS archive (ArcFS format)
361
362# All these were taken from idarc, many could not be verified. Unfortunately,
363# there were many low-quality sigs, i.e. easy to trigger false positives.
364# Please notify me of any real-world fishy/ambiguous signatures and I'll try
365# to get my hands on the actual archiver and see if I find something better. [JW]
366# probably many can be enhanced by finding some 0-byte or control char near the start
367
368# idarc calls this Crush/Uncompressed... *shrug*
3690	string	CRUSH Crush archive data
370# Squeeze It (.sqz)
3710	string	HLSQZ Squeeze It archive data
372# SQWEZ
3730	string	SQWEZ SQWEZ archive data
374# HPack (.hpk)
3750	string	HPAK HPack archive data
376# HAP
3770	string	\x91\x33HF HAP archive data
378# MD/MDCD
3790	string	MDmd MDCD archive data
380# LIM
3810	string	LIM\x1a LIM archive data
382# SAR
3833	string	LH5 SAR archive data
384# BSArc/BS2
3850	string	\212\3SB\020\0	BSArc/BS2 archive data
386# Bethesda Softworks Archive (Oblivion)
3870	string	BSA\0 		BSArc archive data
388>4	lelong	x		version %d
389# MAR
3902	string	=-ah MAR archive data
391# ACB
392#0	belong&0x00f800ff	0x00800000 ACB archive data
393# CPZ
394# TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
395# JRC
3960	string	JRchive JRC archive data
397# Quantum
3980	string	DS\0 Quantum archive data
399# ReSOF
4000	string	PK\3\6 ReSOF archive data
401# QuArk
4020	string	7\4 QuArk archive data
403# YAC
40414	string	YC YAC archive data
405# X1
4060	string	X1 X1 archive data
4070	string	XhDr X1 archive data
408# CDC Codec (.dqt)
4090	belong&0xffffe000	0x76ff2000 CDC Codec archive data
410# AMGC
4110	string	\xad6" AMGC archive data
412# NuLIB
4130	string	N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
414# PakLeo
4150	string	LEOLZW PAKLeo archive data
416# ChArc
4170	string	SChF ChArc archive data
418# PSA
4190	string	PSA PSA archive data
420# CrossePAC
4210	string	DSIGDCC CrossePAC archive data
422# Freeze
4230	string	\x1f\x9f\x4a\x10\x0a Freeze archive data
424# KBoom
4250	string	\xc2\xa8MP\xc2\xa8 KBoom archive data
426# NSQ, must go after CDC Codec
4270	string	\x76\xff NSQ archive data
428# DPA
4290	string	Dirk\ Paehl DPA archive data
430# BA
431# TODO: idarc says "bytes 0-2 == bytes 3-5"
432# TTComp
433# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive
434# Update: Joerg Jenderek
435# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others
4360	string	\0\6
437# look for first keyword of Panorama database *.pan
438>12	search/261	DESIGN
439# skip keyword with low entropy
440>12	default		x	TTComp archive, binary, 4K dictionary
441# (version 5.25) labeled the above entry as "TTComp archive data"
442# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
4430	string	ESP ESP archive data
444# ZPack
4450	string	\1ZPK\1 ZPack archive data
446# Sky
4470	string	\xbc\x40 Sky archive data
448# UFA
4490	string	UFA UFA archive data
450# Dry
4510	string	=-H2O DRY archive data
452# FoxSQZ
4530	string	FOXSQZ FoxSQZ archive data
454# AR7
4550	string	,AR7 AR7 archive data
456# PPMZ
4570	string	PPMZ PPMZ archive data
458# MS Compress
459# Update: Joerg Jenderek
460# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression
461# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html
462# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z
4634	string	\x88\xf0\x27
464#		KWAJ variant
465>0	string	KWAJ		MS Compress archive data, KWAJ variant
466!:mime	application/x-ms-compress-kwaj
467# extension not working in version 5.32
468# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?'
469# file: line 284: Bad magic entry '   ??_'
470!:ext	??_
471# compression method (0-4)
472>>8	uleshort	x	\b, %u method
473# offset of compressed data
474>>10	uleshort	x	\b, 0x%x offset
475#>>(10.s)	uleshort	x
476#>>>&-6		string	x	\b, TEST extension %-.3s
477# header flags to mark header extensions
478>>12	uleshort	>0	\b, 0x%x flags
479# 4 bytes: decompressed length of file
480>>12	uleshort	&0x01
481>>>14	ulelong		x	\b, original size: %u bytes
482# 2 bytes: unknown purpose
483# 2 bytes: length of unknown data + mentioned bytes
484# 1-9 bytes: null-terminated file name
485# 1-4 bytes: null-terminated file extension
486>>12	uleshort	&0x08
487>>>12	uleshort				^0x01
488>>>>12		uleshort			^0x02
489>>>>>12			uleshort		^0x04
490>>>>>>12			uleshort	^0x10
491>>>>>>>14				string	x	\b, %-.8s
492>>>>>>12			uleshort	&0x10
493>>>>>>>14				string	x	\b, %-.8s
494>>>>>>>>&1				string	x	\b.%-.3s
495>>>>>12			uleshort		&0x04
496>>>>>>12			uleshort	^0x10
497>>>>>>>(14.s)			uleshort	x
498>>>>>>>>&14				string	x	\b, %-.8s
499>>>>>>12			uleshort	&0x10
500>>>>>>>(14.s)			uleshort	x
501>>>>>>>>&14				string	x	\b, %-.8s
502>>>>>>>>>&1				string	x	\b.%-.3s
503>>>>12		uleshort			&0x02
504>>>>>12			uleshort		^0x04
505>>>>>>12			uleshort	^0x10
506>>>>>>>16				string	x	\b, %-.8s
507>>>>>>12			uleshort	&0x10
508>>>>>>>16				string	x	\b, %-.8s
509>>>>>>>>&1				string	x	\b.%-.3s
510>>>>>12			uleshort		&0x04
511>>>>>>12			uleshort	^0x10
512>>>>>>>(16.s)			uleshort	x
513>>>>>>>>&16				string	x	\b, %-.8s
514>>>>>>12			uleshort	&0x10
515>>>>>>>(16.s)			uleshort	x
516>>>>>>>&16				string	x	%-.8s
517>>>>>>>>&1				string	x	\b.%-.3s
518>>>12	uleshort				&0x01
519>>>>12		uleshort			^0x02
520>>>>>12			uleshort		^0x04
521>>>>>>12			uleshort	^0x10
522>>>>>>>18				string	x	\b, %-.8s
523>>>>>>12			uleshort	&0x10
524>>>>>>>18				string	x	\b, %-.8s
525>>>>>>>>&1				string	x	\b.%-.3s
526>>>>>12			uleshort		&0x04
527>>>>>>12			uleshort	^0x10
528>>>>>>>(18.s)			uleshort	x
529>>>>>>>>&18				string	x	\b, %-.8s
530>>>>>>12			uleshort	&0x10
531>>>>>>>(18.s)			uleshort	x
532>>>>>>>>&18				string	x	\b, %-.8s
533>>>>>>>>>&1				string	x	\b.%-.3s
534>>>>12		uleshort			&0x02
535>>>>>12			uleshort		^0x04
536>>>>>>12			uleshort	^0x10
537>>>>>>>20				string	x	\b, %-.8s
538>>>>>>12			uleshort	&0x10
539>>>>>>>20				string	x	\b, %-.8s
540>>>>>>>>&1				string	x	\b.%-.3s
541>>>>>12			uleshort		&0x04
542>>>>>>12			uleshort	^0x10
543>>>>>>>(20.s)			uleshort	x
544>>>>>>>>&20				string	x	\b, %-.8s
545>>>>>>12			uleshort	&0x10
546>>>>>>>(20.s)			uleshort	x
547>>>>>>>>&20				string	x	\b, %-.8s
548>>>>>>>>>&1				string	x	\b.%-.3s
549# 2 bytes: length of data + mentioned bytes
550#
551#		SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ
552>0	string	SZDD		MS Compress archive data, SZDD variant
553!:mime	application/x-ms-compress-szdd
554!:ext	??_
555# The character missing from the end of the filename (0=unknown)
556>>9	string	>\0		\b, %-.1s is last character of original name
557# https://www.betaarchive.com/forum/viewtopic.php?t=26161
558# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e
559>>8	string	!A		\b, %-.1s method
560>>10	ulelong	>0		\b, original size: %u bytes
561#		QBasic SZDD variant
5623	string	\x88\xf0\x27
563>0	string	SZ\x20		MS Compress archive data, QBasic variant
564!:mime	application/x-ms-compress-sz
565!:ext	??$
566>>8	ulelong	>0		\b, original size: %u bytes
567
568# MP3 (archiver, not lossy audio compression)
5690	string	MP3\x1a MP3-Archiver archive data
570# ZET
5710	string	OZ\xc3\x9d ZET archive data
572# TSComp
5730	string	\x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
574# ARQ
5750	string	gW\4\1 ARQ archive data
576# Squash
5773	string	OctSqu Squash archive data
578# Terse
5790	string	\5\1\1\0 Terse archive data
580# PUCrunch
5810	string	\x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
582# UHarc
5830	string	UHA UHarc archive data
584# ABComp
5850	string	\2AB ABComp archive data
5860	string	\3AB2 ABComp archive data
587# CMP
5880	string	CO\0 CMP archive data
589# Splint
5900	string	\x93\xb9\x06 Splint archive data
591# InstallShield
5920	string	\x13\x5d\x65\x8c InstallShield Z archive Data
593# Gather
5941	string	GTH Gather archive data
595# BOA
5960	string	BOA BOA archive data
597# RAX
5980	string	ULEB\xa RAX archive data
599# Xtreme
6000	string	ULEB\0 Xtreme archive data
601# Pack Magic
6020	string	@\xc3\xa2\1\0 Pack Magic archive data
603# BTS
6040	belong&0xfeffffff	0x1a034465 BTS archive data
605# ELI 5750
6060	string	Ora\  ELI 5750 archive data
607# QFC
6080	string	\x1aFC\x1a QFC archive data
6090	string	\x1aQF\x1a QFC archive data
610# PRO-PACK
6110	string	RNC PRO-PACK archive data
612# 777
6130	string	777 777 archive data
614# LZS221
6150	string	sTaC LZS221 archive data
616# HPA
6170	string	HPA HPA archive data
618# Arhangel
6190	string	LG Arhangel archive data
620# EXP1, uses bzip2
6210	string	0123456789012345BZh EXP1 archive data
622# IMP
6230	string	IMP\xa IMP archive data
624# NRV
6250	string	\x00\x9E\x6E\x72\x76\xFF NRV archive data
626# Squish
6270	string	\x73\xb2\x90\xf4 Squish archive data
628# Par
6290	string	PHILIPP Par archive data
6300	string	PAR Par archive data
631# HIT
6320	string	UB HIT archive data
633# SBX
6340	belong&0xfffff000	0x53423000 SBX archive data
635# NaShrink
6360	string	NSK NaShrink archive data
637# SAPCAR
6380	string	#\ CAR\ archive\ header SAPCAR archive data
6390	string	CAR\ 2.00RG SAPCAR archive data
640# Disintegrator
6410	string	DST Disintegrator archive data
642# ASD
6430	string	ASD ASD archive data
644# InstallShield CAB
6450	string	ISc( InstallShield CAB
646# TOP4
6470	string	T4\x1a TOP4 archive data
648# BatComp left out: sig looks like COM executable
649# so TODO: get real 4dos batcomp file and find sig
650# BlakHole
6510	string	BH\5\7 BlakHole archive data
652# BIX
6530	string	BIX0 BIX archive data
654# ChiefLZA
6550	string	ChfLZ ChiefLZA archive data
656# Blink
6570	string	Blink Blink archive data
658# Logitech Compress
6590	string	\xda\xfa Logitech Compress archive data
660# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
6611	string	(C)\ STEPANYUK ARS-Sfx archive data
662# AKT/AKT32
6630	string	AKT32 AKT32 archive data
6640	string	AKT AKT archive data
665# NPack
6660	string	MSTSM NPack archive data
667# PFT
6680	string	\0\x50\0\x14 PFT archive data
669# SemOne
6700	string	SEM SemOne archive data
671# PPMD
6720	string	\x8f\xaf\xac\x84 PPMD archive data
673# FIZ
6740	string	FIZ FIZ archive data
675# MSXiE
6760	belong&0xfffff0f0	0x4d530000 MSXiE archive data
677# DeepFreezer
6780	belong&0xfffffff0	0x797a3030 DeepFreezer archive data
679# DC
6800	string	=<DC- DC archive data
681# TPac
6820	string	\4TPAC\3 TPac archive data
683# Ai
6840	string	Ai\1\1\0 Ai archive data
6850	string	Ai\1\0\0 Ai archive data
686# Ai32
6870	string	Ai\2\0 Ai32 archive data
6880	string	Ai\2\1 Ai32 archive data
689# SBC
6900	string	SBC SBC archive data
691# Ybs
6920	string	YBS Ybs archive data
693# DitPack
6940	string	\x9e\0\0 DitPack archive data
695# DMS
6960	string	DMS! DMS archive data
697# EPC
6980	string	\x8f\xaf\xac\x8c EPC archive data
699# VSARC
7000	string	VS\x1a VSARC archive data
701# PDZ
7020	string	PDZ PDZ archive data
703# ReDuq
7040	string	rdqx ReDuq archive data
705# GCA
7060	string	GCAX GCA archive data
707# PPMN
7080	string	pN PPMN archive data
709# WinImage
7103	string	WINIMAGE WinImage archive data
711# Compressia
7120	string	CMP0CMP Compressia archive data
713# UHBC
7140	string	UHB UHBC archive data
715# WinHKI
7160	string	\x61\x5C\x04\x05 WinHKI archive data
717# WWPack data file
7180	string	WWP WWPack archive data
719# BSN (BSA, PTS-DOS)
7200	string	\xffBSG BSN archive data
7211	string	\xffBSG BSN archive data
7223	string	\xffBSG BSN archive data
7231	string	\0\xae\2 BSN archive data
7241	string	\0\xae\3 BSN archive data
7251	string	\0\xae\7 BSN archive data
726# AIN
7270	string	\x33\x18 AIN archive data
7280	string	\x33\x17 AIN archive data
729# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
730# SZip (TODO: doesn't catch all versions)
7310	string	SZ\x0a\4 SZip archive data
732# XPack DiskImage
733# *.XDI updated by Joerg Jenderek Sep 2015
734# ftp://ftp.sac.sk/pub/sac/pack/0index.txt
735# GRR: this test is still too general as it catches also text files starting with jm
7360	string	jm
737# only found examples with this additional characteristic 2 bytes
738>2	string	\x2\x4	Xpack DiskImage archive data
739#!:ext xdi
740# XPack Data
741# *.xpa updated by Joerg Jenderek Sep 2015
742# ftp://ftp.elf.stuba.sk/pub/pc/pack/
7430	string	xpa	XPA
744!:ext	xpa
745# XPA32
746# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
747# created by XPA32.EXE version 1.0.2 for Windows
748>0	string	xpa\0\1 \b32 archive data
749# created by XPACK.COM version 1.67m or 1.67r with short 0x1800
750>3	ubeshort	!0x0001	\bck archive data
751# XPack Single Data
752# changed by Joerg Jenderek Sep 2015 back to like in version 5.12
753# letter 'I'+ acute accent is equivalent to \xcd
7540	string	\xcd\ jm	Xpack single archive data
755#!:mime	application/x-xpa-compressed
756!:ext xpa
757
758# TODO: missing due to unknown magic/magic at end of file:
759#DWC
760#ARG
761#ZAR
762#PC/3270
763#InstallIt
764#RKive
765#RK
766#XPack Diskimage
767
768# These were inspired by idarc, but actually verified
769# Dzip archiver (.dz)
770# Update: Joerg Jenderek
771# URL: http://speeddemosarchive.com/dzip/
772# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c
773# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt
7740	string	DZ
775# latest version is 2.9 dated 7 may 2003
776>2	byte	<4 Dzip archive data
777!:mime	application/x-dzip
778!:ext	dz
779>>2	byte	x \b, version %i
780>>3	byte	x \b.%i
781>>4	ulelong	x \b, offset 0x%x
782>>8	ulelong	x \b, %u files
783# ZZip archiver (.zz)
7840	string	ZZ\ \0\0 ZZip archive data
7850	string	ZZ0 ZZip archive data
786# PAQ archiver (.paq)
7870	string	\xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
7880	string	PAQ PAQ archive data
789>3	byte&0xf0	0x30
790>>3	byte	x (v%c)
791# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
7920xe	string	\x1aJar\x1b JAR (ARJ Software, Inc.) archive data
7930	string	JARCS JAR (ARJ Software, Inc.) archive data
794
795# ARJ archiver (jason@jarthur.Claremont.EDU)
7960	leshort		0xea60		ARJ archive data
797!:mime	application/x-arj
798>5	byte		x		\b, v%d,
799>8	byte		&0x04		multi-volume,
800>8	byte		&0x10		slash-switched,
801>8	byte		&0x20		backup,
802>34	string		x		original name: %s,
803>7	byte		0		os: MS-DOS
804>7	byte		1		os: PRIMOS
805>7	byte		2		os: Unix
806>7	byte		3		os: Amiga
807>7	byte		4		os: Macintosh
808>7	byte		5		os: OS/2
809>7	byte		6		os: Apple ][ GS
810>7	byte		7		os: Atari ST
811>7	byte		8		os: NeXT
812>7	byte		9		os: VAX/VMS
813>3	byte		>0		%d]
814# [JW] idarc says this is also possible
8152	leshort		0xea60		ARJ archive data
816
817# HA archiver (Greg Roelofs, newt@uchicago.edu)
818# This is a really bad format. A file containing HAWAII will match this...
819#0	string		HA		HA archive data,
820#>2	leshort		=1		1 file,
821#>2	leshort		>1		%hu files,
822#>4	byte&0x0f	=0		first is type CPY
823#>4	byte&0x0f	=1		first is type ASC
824#>4	byte&0x0f	=2		first is type HSC
825#>4	byte&0x0f	=0x0e		first is type DIR
826#>4	byte&0x0f	=0x0f		first is type SPECIAL
827# suggestion: at least identify small archives (<1024 files)
8280  belong&0xffff00fc 0x48410000 HA archive data
829>2	leshort		=1		1 file,
830>2	leshort		>1		%u files,
831>4	byte&0x0f	=0		first is type CPY
832>4	byte&0x0f	=1		first is type ASC
833>4	byte&0x0f	=2		first is type HSC
834>4	byte&0x0f	=0x0e		first is type DIR
835>4	byte&0x0f	=0x0f		first is type SPECIAL
836
837# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
8380	string		HPAK		HPACK archive data
839
840# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
8410	string		\351,\001JAM\ 		JAM archive,
842>7	string		>\0			version %.4s
843>0x26	byte		=0x27			-
844>>0x2b	string          >\0			label %.11s,
845>>0x27	lelong		x			serial %08x,
846>>0x36	string		>\0			fstype %.8s
847
848# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
849# Update: Joerg Jenderek
850# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
851# Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html
852#
853#	check and display information of lharc (LHa,PMarc) file
8540	name				lharc-file
855# check 1st character of method id like -lz4- -lh5- or -pm2-
856>2	string		-
857# check 5th character of method id
858>>6	string		-
859# check header level 0 1 2 3
860>>>20	ubyte		<4
861# check 2nd, 3th and 4th character of method id
862>>>>3	regex		\^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1)		\b
863!:mime	application/x-lzh-compressed
864# creator type "LHA "
865!:apple	????LHA
866# display archive type name like "LHa/LZS archive data" or "LArc archive"
867>>>>>2	string		-lz		\b
868!:ext	lzs
869# already known  -lzs- -lz4- -lz5- with old names
870>>>>>>2	string	-lzs		LHa/LZS archive data
871>>>>>>3	regex	\^lz[45]	LHarc 1.x archive data
872# missing -lz?- with wikipedia names
873>>>>>>3	regex	\^lz[2378]	LArc archive
874# display archive type name like "LHa (2.x) archive data"
875>>>>>2	string		-lh		\b
876# already known -lh0- -lh1- -lh2- -lh3-  -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names
877>>>>>>3	regex		\^lh[01]	LHarc 1.x/ARX archive data
878# LHice archiver use ".ICE" as name extension instead usual one ".lzh"
879# FOOBAR archiver use ".foo" as name extension instead usual one
880# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment
881>>>>>>>2	string	-lh1		\b
882!:ext lha/lzh/ice
883>>>>>>3	regex		\^lh[23d]	LHa 2.x? archive data
884>>>>>>3	regex		\^lh[7]		LHa (2.x)/LHark archive data
885>>>>>>3	regex		\^lh[456]	LHa (2.x) archive data
886>>>>>>>2	string	-lh5		\b
887# https://en.wikipedia.org/wiki/BIOS
888# Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like
889# bios.rom , kd7_v14.bin, 1010.004, ...
890!:ext lha/lzh/rom/bin
891# missing -lh?- variants (Joe Jared)
892>>>>>>3	regex		\^lh[89a-ce]	LHa (Joe Jared) archive
893# UNLHA32 2.67a
894>>>>>>2	string		-lhx		LHa (UNLHA32) archive
895# lha archives with standard file name extensions ".lha" ".lzh"
896>>>>>>3	regex		!\^(lh1|lh5)	\b
897!:ext lha/lzh
898# this should not happen if all -lh variants are described
899>>>>>>2	default		x		LHa (unknown) archive
900#!:ext	lha
901# PMarc
902>>>>>3	regex		\^pm[012]	PMarc archive data
903!:ext pma
904# append method id without leading and trailing minus character
905>>>>>3	string		x		[%3.3s]
906>>>>>>0	use	lharc-header
907#
908#	check and display information of lharc header
9090	name				lharc-header
910# header size 0x4 , 0x1b-0x61
911>0	ubyte		x
912# compressed data size != compressed file size
913#>7	ulelong		x		\b, data size %d
914# attribute: 0x2~?? 0x10~symlink|target 0x20~normal
915#>19	ubyte		x		\b, 19_0x%x
916# level identifier 0 1 2 3
917#>20	ubyte		x		\b, level %d
918# time stamp
919#>15		ubelong	x		DATE 0x%8.8x
920# OS ID for level 1
921>20	ubyte		1
922# 0x20 types find for *.rom files
923>>(21.b+24)	ubyte	<0x21		\b, 0x%x OS
924# ascii type like M for MSDOS
925>>(21.b+24)	ubyte	>0x20		\b, '%c' OS
926# OS ID for level 2
927>20	ubyte		2
928#>>23	ubyte		x		\b, OS ID 0x%x
929>>23	ubyte		<0x21		\b, 0x%x OS
930>>23	ubyte		>0x20		\b, '%c' OS
931# filename only for level 0 and 1
932>20	ubyte		<2
933# length of filename
934>>21		ubyte	>0		\b, with
935# filename
936>>>21		pstring	x		"%s"
937#
938#2	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
939#!:mime	application/x-lharc
9402	string		-lh0-
941>0	use	lharc-file
942#2	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
943#!:mime	application/x-lharc
9442	string		-lh1-
945>0	use	lharc-file
946# NEW -lz2- ... -lz8-
9472	string		-lz2-
948>0	use	lharc-file
9492	string		-lz3-
950>0	use	lharc-file
9512	string		-lz4-
952>0	use	lharc-file
9532	string		-lz5-
954>0	use	lharc-file
9552	string		-lz7-
956>0	use	lharc-file
9572	string		-lz8-
958>0	use	lharc-file
959#	[never seen any but the last; -lh4- reported in comp.compression:]
960#2	string		-lzs-		LHa/LZS archive data [lzs]
9612	string		-lzs-
962>0	use	lharc-file
963# According to wikipedia and others such a version does not exist
964#2	string		-lh\40-		LHa 2.x? archive data [lh ]
965#2	string		-lhd-		LHa 2.x? archive data [lhd]
9662	string		-lhd-
967>0	use	lharc-file
968#2	string		-lh2-		LHa 2.x? archive data [lh2]
9692	string		-lh2-
970>0	use	lharc-file
971#2	string		-lh3-		LHa 2.x? archive data [lh3]
9722	string		-lh3-
973>0	use	lharc-file
974#2	string		-lh4-		LHa (2.x) archive data [lh4]
9752	string		-lh4-
976>0	use	lharc-file
977#2	string		-lh5-		LHa (2.x) archive data [lh5]
9782	string		-lh5-
979>0	use	lharc-file
980#2	string		-lh6-		LHa (2.x) archive data [lh6]
9812	string		-lh6-
982>0	use	lharc-file
983#2	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
9842	string		-lh7-
985# !:mime	application/x-lha
986# >20	byte		x		- header level %d
987>0	use	lharc-file
988# NEW -lh8- ... -lhe- , -lhx-
9892	string		-lh8-
990>0	use	lharc-file
9912	string		-lh9-
992>0	use	lharc-file
9932	string		-lha-
994>0	use	lharc-file
9952	string		-lhb-
996>0	use	lharc-file
9972	string		-lhc-
998>0	use	lharc-file
9992	string		-lhe-
1000>0	use	lharc-file
10012	string		-lhx-
1002>0	use	lharc-file
1003# taken from idarc [JW]
10042   string      -lZ         PUT archive data
1005# already done by LHarc magics
1006# this should never happen if all sub types of LZS archive are identified
1007#2   string      -lz         LZS archive data
10082   string      -sw1-       Swag archive data
1009
10100	name		rar-file-header
1011>24	byte		15		\b, v1.5
1012>24	byte		20		\b, v2.0
1013>24	byte		29		\b, v4
1014>15	byte		0		\b, os: MS-DOS
1015>15	byte		1		\b, os: OS/2
1016>15	byte		2		\b, os: Win32
1017>15	byte		3		\b, os: Unix
1018>15	byte		4		\b, os: Mac OS
1019>15	byte		5		\b, os: BeOS
1020
10210	name		rar-archive-header
1022>3	leshort&0x1ff	>0		\b, flags:
1023>>3	leshort		&0x01		ArchiveVolume
1024>>3	leshort		&0x02		Commented
1025>>3	leshort		&0x04		Locked
1026>>3	leshort		&0x10		NewVolumeNaming
1027>>3	leshort		&0x08		Solid
1028>>3	leshort		&0x20		Authenticated
1029>>3	leshort		&0x40		RecoveryRecordPresent
1030>>3	leshort		&0x80		EncryptedBlockHeader
1031>>3	leshort		&0x100		FirstVolume
1032
1033# RAR (Roshal Archive) archive
10340	string		Rar!\x1a\7\0		RAR archive data
1035!:mime	application/x-rar
1036!:ext	rar/cbr
1037# file header
1038>(0xc.l+9)	byte	0x74
1039>>(0xc.l+7)	use	rar-file-header
1040# subblock seems to share information with file header
1041>(0xc.l+9)	byte	0x7a
1042>>(0xc.l+7)	use	rar-file-header
1043>9		byte	0x73
1044>>7		use	rar-archive-header
1045
10460	string		Rar!\x1a\7\1\0		RAR archive data, v5
1047!:mime	application/x-rar
1048!:ext	rar
1049
1050# Very old RAR archive
1051# https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf
10520	string		RE\x7e\x5e  RAR archive data (<v1.5)
1053!:mime	application/x-rar
1054!:ext	rar/cbr
1055
1056# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
10570	string		SQSH		squished archive data (Acorn RISCOS)
1058
1059# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
1060# [JW] see exe section for self-extracting version
10610	string		UC2\x1a		UC2 archive data
1062
1063# PKZIP multi-volume archive
10640	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
1065!:mime	application/zip
1066!:ext zip/cbz
1067
1068# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
10690	string		PK\005\006	Zip archive data (empty)
1070!:mime application/zip
1071!:ext zip/cbz
1072!:strength +1
10730	string		PK\003\004
1074!:strength +1
1075
1076# Specialised zip formats which start with a member named 'mimetype'
1077# (stored uncompressed, with no 'extra field') containing the file's MIME type.
1078# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
1079#  contents starting with "application/":
1080>26	string		\x8\0\0\0mimetypeapplication/
1081
1082#  KOffice / OpenOffice & StarOffice / OpenDocument formats
1083#    From: Abel Cheung <abel@oaka.org>
1084
1085#   KOffice (1.2 or above) formats
1086#    (mimetype contains "application/vnd.kde.<SUBTYPE>")
1087>>50	string	vnd.kde.		KOffice (>=1.2)
1088>>>58	string	karbon			Karbon document
1089>>>58	string	kchart			KChart document
1090>>>58	string	kformula		KFormula document
1091>>>58	string	kivio			Kivio document
1092>>>58	string	kontour			Kontour document
1093>>>58	string	kpresenter		KPresenter document
1094>>>58	string	kspread			KSpread document
1095>>>58	string	kword			KWord document
1096
1097#   OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
1098#    (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
1099>>50	string	vnd.sun.xml.		OpenOffice.org 1.x
1100>>>62	string	writer			Writer
1101>>>>68	byte	!0x2e			document
1102>>>>68	string	.template		template
1103>>>>68	string	.global			global document
1104>>>62	string	calc			Calc
1105>>>>66	byte	!0x2e			spreadsheet
1106>>>>66	string	.template		template
1107>>>62	string	draw			Draw
1108>>>>66	byte	!0x2e			document
1109>>>>66	string	.template		template
1110>>>62	string	impress			Impress
1111>>>>69	byte	!0x2e			presentation
1112>>>>69	string	.template		template
1113>>>62	string	math			Math document
1114>>>62	string	base			Database file
1115
1116#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
1117#    https://lists.oasis-open.org/archives/office/200505/msg00006.html
1118#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
1119>>50	string	vnd.oasis.opendocument.	OpenDocument
1120>>>73	string	text
1121>>>>77	byte	!0x2d			Text
1122!:mime	application/vnd.oasis.opendocument.text
1123>>>>77	string	-template		Text Template
1124!:mime	application/vnd.oasis.opendocument.text-template
1125>>>>77	string	-web			HTML Document Template
1126!:mime	application/vnd.oasis.opendocument.text-web
1127>>>>77	string	-master			Master Document
1128!:mime	application/vnd.oasis.opendocument.text-master
1129>>>73	string	graphics
1130>>>>81	byte	!0x2d			Drawing
1131!:mime	application/vnd.oasis.opendocument.graphics
1132>>>>81	string	-template		Template
1133!:mime	application/vnd.oasis.opendocument.graphics-template
1134>>>73	string	presentation
1135>>>>85	byte	!0x2d			Presentation
1136!:mime	application/vnd.oasis.opendocument.presentation
1137>>>>85	string	-template		Template
1138!:mime	application/vnd.oasis.opendocument.presentation-template
1139>>>73	string	spreadsheet
1140>>>>84	byte	!0x2d			Spreadsheet
1141!:mime	application/vnd.oasis.opendocument.spreadsheet
1142>>>>84	string	-template		Template
1143!:mime	application/vnd.oasis.opendocument.spreadsheet-template
1144>>>73	string	chart
1145>>>>78	byte	!0x2d			Chart
1146!:mime	application/vnd.oasis.opendocument.chart
1147>>>>78	string	-template		Template
1148!:mime	application/vnd.oasis.opendocument.chart-template
1149>>>73	string	formula
1150>>>>80	byte	!0x2d			Formula
1151!:mime	application/vnd.oasis.opendocument.formula
1152>>>>80	string	-template		Template
1153!:mime	application/vnd.oasis.opendocument.formula-template
1154>>>73	string	database		Database
1155!:mime	application/vnd.oasis.opendocument.database
1156# Valid for LibreOffice Base 6.0.1.1 at least
1157>>>73	string	base 			Database
1158!:mime	application/vnd.oasis.opendocument.base
1159>>>73	string	image
1160>>>>78	byte	!0x2d			Image
1161!:mime	application/vnd.oasis.opendocument.image
1162>>>>78	string	-template		Template
1163!:mime	application/vnd.oasis.opendocument.image-template
1164
1165#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
1166#    https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
1167#    From: Ralf Brown <ralf.brown@gmail.com>
1168>>50	string	epub+zip	EPUB document
1169!:mime application/epub+zip
1170
1171#  Catch other ZIP-with-mimetype formats
1172#	In a ZIP file, the bytes immediately after a member's contents are
1173#	always "PK". The 2 regex rules here print the "mimetype" member's
1174#	contents up to the first 'P'. Luckily, most MIME types don't contain
1175#	any capital 'P's. This is a kludge.
1176#    (mimetype contains "application/<OTHER>")
1177>>50		string	!epub+zip
1178>>>50		string	!vnd.oasis.opendocument.
1179>>>>50		string	!vnd.sun.xml.
1180>>>>>50		string	!vnd.kde.
1181>>>>>>38	regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
1182!:mime	application/zip
1183#    (mimetype contents other than "application/*")
1184>26		string	\x8\0\0\0mimetype
1185>>38		string	!application/
1186>>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
1187!:mime	application/zip
1188
1189# Java Jar files
1190>(26.s+30)	leshort	0xcafe		Java archive data (JAR)
1191!:mime	application/java-archive
1192
1193# iOS App
1194>(26.s+30)	leshort	!0xcafe
1195>>26		string	!\x8\0\0\0mimetype
1196>>>30		string	Payload/
1197>>>>38		search/64       .app/   iOS App
1198!:mime application/x-ios-app
1199
1200
1201# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
1202#   Next line excludes specialized formats:
1203>(26.s+30)	leshort	!0xcafe
1204>>26    string          !\x8\0\0\0mimetype	Zip archive data
1205!:mime	application/zip
1206>>>4	beshort		x			\b, at least
1207>>>4	use		zipversion
1208>>>4	beshort		x			to extract
1209>>>0x161	string		WINZIP		\b, WinZIP self-extracting
1210
1211# StarView Metafile
1212# From Pierre Ducroquet <pinaraf@pinaraf.info>
12130	string	VCLMTF	StarView MetaFile
1214>6	beshort	x	\b, version %d
1215>8	belong	x	\b, size %d
1216
1217# Zoo archiver
121820	lelong		0xfdc4a7dc	Zoo archive data
1219!:mime	application/x-zoo
1220>4	byte		>48		\b, v%c.
1221>>6	byte		>47		\b%c
1222>>>7	byte		>47		\b%c
1223>32	byte		>0		\b, modify: v%d
1224>>33	byte		x		\b.%d+
1225>42	lelong		0xfdc4a7dc	\b,
1226>>70	byte		>0		extract: v%d
1227>>>71	byte		x		\b.%d+
1228
1229# Shell archives
123010	string		#\ This\ is\ a\ shell\ archive	shell archive text
1231!:mime	application/octet-stream
1232
1233#
1234# LBR. NB: May conflict with the questionable
1235#          "binary Computer Graphics Metafile" format.
1236#
12370       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
1238#
1239# PMA (CP/M derivative of LHA)
1240# Update: Joerg Jenderek
1241# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
1242#
1243#2       string          -pm0-           PMarc archive data [pm0]
12442	string		-pm0-
1245>0	use	lharc-file
1246#2       string          -pm1-           PMarc archive data [pm1]
12472	string		-pm1-
1248>0	use	lharc-file
1249#2       string          -pm2-           PMarc archive data [pm2]
12502	string		-pm2-
1251>0	use	lharc-file
12522       string          -pms-           PMarc SFX archive (CP/M, DOS)
1253#!:mime	application/x-foobar-exec
1254!:ext com
12555       string          -pc1-           PopCom compressed executable (CP/M)
1256#!:mime	application/x-
1257#!:ext com
1258
1259# From Rafael Laboissiere <rafael@laboissiere.net>
1260# The Project Revision Control System (see
1261# http://prcs.sourceforge.net) generates a packaged project
1262# file which is recognized by the following entry:
12630	leshort		0xeb81	PRCS packaged project
1264
1265# Microsoft cabinets
1266# by David Necas (Yeti) <yeti@physics.muni.cz>
1267#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
1268#>25	byte	x		v%d
1269#>24	byte	x		\b.%d
1270# MPi: All CABs have version 1.3, so this is pointless.
1271# Better magic in debian-additions.
1272
1273# GTKtalog catalogs
1274# by David Necas (Yeti) <yeti@physics.muni.cz>
12754	string	gtktalog\ 	GTKtalog catalog data,
1276>13	string	3		version 3
1277>>14	beshort	0x677a		(gzipped)
1278>>14	beshort	!0x677a		(not gzipped)
1279>13	string	>3		version %s
1280
1281############################################################################
1282# Parity archive reconstruction file, the 'par' file format now used on Usenet.
12830       string          PAR\0	PARity archive data
1284>48	leshort		=0	- Index file
1285>48	leshort		>0	- file number %d
1286
1287# Felix von Leitner <felix-file@fefe.de>
12880	string	d8:announce	BitTorrent file
1289!:mime	application/x-bittorrent
1290# Durval Menezes, <jmgthbfile at durval dot com>
12910	string	d13:announce-list	BitTorrent file
1292!:mime	application/x-bittorrent
1293
1294# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
12950	beshort 0x0e0f		Atari MSA archive data
1296>2	beshort x		\b, %d sectors per track
1297>4	beshort 0		\b, 1 sided
1298>4	beshort 1		\b, 2 sided
1299>6	beshort x		\b, starting track: %d
1300>8	beshort x		\b, ending track: %d
1301
1302# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
13030	string	PK00PK\003\004	Zip archive data
1304!:mime	application/zip
1305!:ext zip/cbz
1306
1307# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
1308# by Stefan `Sec` Zehl <sec@42.org>
13097	string		**ACE**		ACE archive data
1310>15	byte	>0		version %d
1311>16	byte	=0x00		\b, from MS-DOS
1312>16	byte	=0x01		\b, from OS/2
1313>16	byte	=0x02		\b, from Win/32
1314>16	byte	=0x03		\b, from Unix
1315>16	byte	=0x04		\b, from MacOS
1316>16	byte	=0x05		\b, from WinNT
1317>16	byte	=0x06		\b, from Primos
1318>16	byte	=0x07		\b, from AppleGS
1319>16	byte	=0x08		\b, from Atari
1320>16	byte	=0x09		\b, from Vax/VMS
1321>16	byte	=0x0A		\b, from Amiga
1322>16	byte	=0x0B		\b, from Next
1323>14	byte	x		\b, version %d to extract
1324>5	leshort &0x0080		\b, multiple volumes,
1325>>17	byte	x		\b (part %d),
1326>5	leshort &0x0002		\b, contains comment
1327>5	leshort	&0x0200		\b, sfx
1328>5	leshort	&0x0400		\b, small dictionary
1329>5	leshort	&0x0800		\b, multi-volume
1330>5	leshort	&0x1000		\b, contains AV-String
1331>>30	string	\x16*UNREGISTERED\x20VERSION*	(unregistered)
1332>5	leshort &0x2000		\b, with recovery record
1333>5	leshort &0x4000		\b, locked
1334>5	leshort &0x8000		\b, solid
1335# Date in MS-DOS format (whatever that is)
1336#>18	lelong	x		Created on
1337
1338# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
1339# <doj@cubic.org>
13400x1A	string	sfArk		sfArk compressed Soundfont
1341>0x15	string	2
1342>>0x1	string	>\0		Version %s
1343>>0x2A	string	>\0		: %s
1344
1345# DR-DOS 7.03 Packed File *.??_
13460	string	Packed\ File\ 	Personal NetWare Packed File
1347>12	string	x		\b, was "%.12s"
1348
1349# EET archive
1350# From: Tilman Sauerbeck <tilman@code-monkey.de>
13510	belong	0x1ee7ff00	EET archive
1352!:mime	application/x-eet
1353
1354# rzip archives
13550	string	RZIP		rzip compressed data
1356>4	byte	x		- version %d
1357>5	byte	x		\b.%d
1358>6	belong	x		(%d bytes)
1359
1360# From:		Joerg Jenderek
1361# URL:		https://help.foxitsoftware.com/kb/install-fzip-file.php
1362# reference:	http://mark0.net/download/triddefs_xml.7z/
1363#		defs/f/fzip.trid.xml
1364# Note: unknown compression; No "PK" zip magic; normally in directory like
1365#	"%APPDATA%\Foxit Software\Addon\Foxit Reader\Install"
13660	ubequad	0x2506781901010000	Foxit add-on/update
1367!:mime	application/x-fzip
1368!:ext	fzip
1369
1370# From: "Robert Dale" <robdale@gmail.com>
13710	belong	123		dar archive,
1372>4	belong	x		label "%.8x
1373>>8	belong	x		%.8x
1374>>>12	beshort	x		%.4x"
1375>14	byte	0x54		end slice
1376>14	beshort	0x4e4e		multi-part
1377>14	beshort	0x4e53		multi-part, with -S
1378
1379# Symbian installation files
1380#  https://www.thouky.co.uk/software/psifs/sis.html
1381#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
13828	lelong	0x10000419	Symbian installation file
1383!:mime	application/vnd.symbian.install
1384>4	lelong	0x1000006D	(EPOC release 3/4/5)
1385>4	lelong	0x10003A12	(EPOC release 6)
13860	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
1387!:mime	x-epoc/x-sisx-app
1388
1389# From "Nelson A. de Oliveira" <naoliv@gmail.com>
13900	string	MPQ\032		MoPaQ (MPQ) archive
1391
1392# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
1393# .kgb
13940	string KGB_arch		KGB Archiver file
1395>10	string x		with compression level %.1s
1396
1397# xar (eXtensible ARchiver) archive
1398# URL: https://en.wikipedia.org/wiki/Xar_(archiver)
1399# xar archive format: https://code.google.com/p/xar/
1400# From: "David Remahl" <dremahl@apple.com>
1401# Update: Joerg Jenderek
1402# TODO: lzma compression; X509Data for pkg and xip
1403# Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or
1404# 7z t -txar Xcode_10.2_beta_4.xip`
14050	string	xar!		xar archive
1406!:mime	application/x-xar
1407# pkg for Mac OSX installer package like FullBundleUpdate.pkg
1408# xip for signed Apple software like Xcode_10.2_beta_4.xip
1409!:ext	xar/pkg/xip
1410# always 28 in older archives
1411>4	ubeshort >28		\b, header size %u
1412# currently there exit only version 1 since about 2014
1413>6	ubeshort >1		version %u,
1414>8	ubequad	x		compressed TOC: %llu,
1415#>16	ubequad	x		uncompressed TOC: %llu,
1416# cksum_alg 0-2 in older and also 3-4 in newer
1417>24	belong	0		no checksum
1418>24	belong	1		SHA-1 checksum
1419>24	belong	2		MD5 checksum
1420>24	belong	3		SHA-256 checksum
1421>24	belong	4		SHA-512 checksum
1422>24	belong	>4		unknown 0x%x checksum
1423#>24	belong	>4		checksum
1424#			For no compression jump 0 bytes
1425>24	belong	0
1426>>0		ubyte	x
1427# jump more bytes forward by header size
1428>>>&(4.S)	ubyte	x
1429# jump more bytes forward by compressed table of contents size
1430#>>>>&(8.Q)	ubequad	x	\b, heap data 0x%llx
1431>>>>&(8.Q)	ubyte	x
1432# look for data by ./compress after message with 1 space at end
1433>>>>>&-3	indirect x	\b, contains
1434#			For SHA-1 jump 20 minus 2 bytes
1435>24	belong	1
1436>>18		ubyte	x
1437# jump more bytes forward by header size
1438>>>&(4.S)	ubyte	x
1439# jump more bytes forward by compressed table of contents size
1440>>>>&(8.Q)	ubyte	x
1441# data compressed by gzip, bzip, lzma or none
1442>>>>>&-1	indirect x	\b, contains
1443#			For SHA-256 jump 32 minus 2 bytes
1444>24	belong	3
1445>>30		ubyte	x
1446# jump more bytes forward by header size
1447>>>&(4.S)	ubyte	x
1448# jump more bytes forward by compressed table of contents size
1449>>>>&(8.Q)	ubyte	x
1450>>>>>&-1	indirect x	\b, contains
1451#			For SHA-512 jump 64 minus 2 bytes
1452>24	belong	4
1453>>62		ubyte	x
1454# jump more bytes forward by header size
1455>>>&(4.S)	ubyte	x
1456# jump more bytes forward by compressed table of contents size
1457>>>>&(8.Q)	ubyte	x
1458>>>>>&-1	indirect x	\b, contains
1459
1460# Type: Parity Archive
1461# From: Daniel van Eeden <daniel_e@dds.nl>
14620	string	PAR2		Parity Archive Volume Set
1463
1464# Bacula volume format. (Volumes always start with a block header.)
1465# URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
1466# From: Adam Buchbinder <adam.buchbinder@gmail.com>
146712	string	BB02		Bacula volume
1468>20	bedate	x		\b, started %s
1469
1470# ePub is XHTML + XML inside a ZIP archive.  The first member of the
1471#   archive must be an uncompressed file called 'mimetype' with contents
1472#   'application/epub+zip'
1473
1474
1475# From: "Michael Gorny" <mgorny@gentoo.org>
1476# ZPAQ: http://mattmahoney.net/dc/zpaq.html
14770	string	zPQ	ZPAQ stream
1478>3	byte	x	\b, level %d
1479# From: Barry Carter <carter.barry@gmail.com>
1480# https://encode.ru/threads/456-zpaq-updates/page32
14810	string	7kSt	ZPAQ file
1482
1483# BBeB ebook, unencrypted (LRF format)
1484# URL: https://www.sven.de/librie/Librie/LrfFormat
1485# From: Adam Buchbinder <adam.buchbinder@gmail.com>
14860	string	L\0R\0F\0\0\0	BBeB ebook data, unencrypted
1487>8	beshort	x		\b, version %d
1488>36	byte	1		\b, front-to-back
1489>36	byte	16		\b, back-to-front
1490>42	beshort	x		\b, (%dx,
1491>44	beshort	x		%d)
1492
1493# Symantec GHOST image by Joerg Jenderek at May 2014
1494# https://us.norton.com/ghost/
1495# https://www.garykessler.net/library/file_sigs.html
14960		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
1497# *.GHO
1498>2		ubyte&0x08		0x00		\b, first file
1499# *.GHS or *.[0-9] with cns program option
1500>2		ubyte&0x08		0x08		\b, split file
1501# part of split index interesting for *.ghs
1502>>4		ubyte			x		id=0x%x
1503# compression tag minus one equals numeric compression command line switch z[1-9]
1504>3		ubyte			0		\b, no compression
1505>3		ubyte			2		\b, fast compression (Z1)
1506>3		ubyte			3		\b, medium compression (Z2)
1507>3		ubyte			>3
1508>>3		ubyte			<11		\b, compression (Z%d-1)
1509>2		ubyte&0x08		0x00
1510# ~ 30 byte password field only for *.gho
1511>>12		ubequad			!0		\b, password protected
1512>>44		ubyte			!1
1513# 1~Image All, sector-by-sector only for *.gho
1514>>>10		ubyte			1		\b, sector copy
1515# 1~Image Boot track only for *.gho
1516>>>43		ubyte			1		\b, boot track
1517# 1~Image Disc only for *.gho implies Image Boot track and sector copy
1518>>44		ubyte			1		\b, disc sector copy
1519# optional image description only *.gho
1520>>0xff		string			>\0		"%-.254s"
1521# look for DOS sector end sequence
1522>0xE08	search/7776		\x55\xAA
1523>>&-512	indirect		x		\b; contains
1524
1525# Google Chrome extensions
1526# https://developer.chrome.com/extensions/crx
1527# https://developer.chrome.com/extensions/hosting
15280	string	Cr24	Google Chrome extension
1529!:mime	application/x-chrome-extension
1530>4	ulong	x	\b, version %u
1531
1532# SeqBox - Sequenced container
1533# ext: sbx, seqbox
1534# Marco Pontello marcopon@gmail.com
1535# reference: https://github.com/MarcoPon/SeqBox
15360	string	SBx	SeqBox,
1537>3	byte	x	version %d
1538
1539# LyNX archive
154056	string	USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE	 LyNX archive
1541
1542# From: Joerg Jenderek
1543# URL: https://www.acronis.com/
1544# Reference: https://en.wikipedia.org/wiki/TIB_(file_format)
1545# Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110
15460	ubequad		0xce24b9a220000000	Acronis True Image backup
1547!:mime	application/x-acronis-tib
1548!:ext	tib
1549# 01000000
1550#>20	ubelong		x			\b, at 20 0x%x
1551# 20000000
1552#>28	ubelong		x			\b, at 28 0x%x
1553# strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0"
1554# ???
1555# strings like "\Device\0000011e" "\Device\0000015a"
1556#>0	search/0x6852300/cs	\\Device\\
1557#>>&-1	pstring		x			\b, %s
1558# "\Device\HarddiskVolume30" "\Device\HarddiskVolume39"
1559#>>>&1	search/180/cs	\\Device\\
1560#>>>>&-1	pstring		x			\b, %s
1561#>>>>>&0	search/29/cs	\0\0\xc8\0
1562# disk label
1563#>>>>>>&10	lestring16	x		\b, disk label %11.11s
1564#>>>>>>&9	plestring16	x		\b, disk label "%11.11s"
1565#>>>>>>&10	ubequad	x			%16.16llx
1566
1567
1568# Gentoo XPAK binary package
1569# by Michal Gorny <mgorny@gentoo.org>
1570# https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5
1571-4	string	STOP
1572>-16	string	XPAKSTOP	Gentoo binary package (XPAK)
1573
1574# From:		Joerg Jenderek
1575# URL:		https://kodi.wiki/view/TexturePacker
1576# Reference:	https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz
1577# /xbmc-Krypton/xbmc/guilib/XBTF.h
1578# /xbmc-Krypton/xbmc/guilib/XBTF.cpp
15790	string	XBTF
1580# skip ASCII text by looking for terminating \0 of path
1581>264	ubyte	0		XBMC texture package
1582!:mime	application/x-xbmc-xbt
1583!:ext	xbt
1584# XBTF_VERSION 2
1585>>4	string	!2		\b, version %-.1s
1586# nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp
1587>>5	ulelong	x		\b, %u file
1588# plural s
1589>>5	ulelong	>1		\bs
1590# path[CXBTFFile[MaximumPathLength=256]
1591>>9	string	x		\b, 1st %s
1592
1593