xref: /freebsd/contrib/file/magic/Magdir/compress (revision e1e636193db45630c7881246d25902e57c43d24e)
1#------------------------------------------------------------------------------
2# $File: compress,v 1.91 2023/06/16 19:37:47 christos Exp $
3# compress:  file(1) magic for pure-compression formats (no archives)
4#
5# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
6#
7# Formats for various forms of compressed data
8# Formats for "compress" proper have been moved into "compress.c",
9# because it tries to uncompress it to figure out what's inside.
10
11# standard unix compress
120	string		\037\235	compress'd data
13!:mime	application/x-compress
14!:apple	LZIVZIVU
15!:ext	Z
16>2	byte&0x80	>0		block compressed
17>2	byte&0x1f	x		%d bits
18
19# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
20# URL: https://en.wikipedia.org/wiki/Gzip
21# Reference: https://tools.ietf.org/html/rfc1952
22# Update: Joerg Jenderek, Apr 2019, Dec 2022
23#   Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
24#	* Original filename is only at offset 10 if "extra field" absent
25#	* Produce shorter output - notably, only report compression methods
26#         other than 8 ("deflate", the only method defined in RFC 1952).
27# Note: find defs -iname '*.trid.xml' -exec grep -q '<Bytes>1F8B08' {} \; -ls
28# TODO:
29# FBR	Blueberry FlashBack screen Record	https://www.flashbackrecorder.com/
30# KPR	KOffice/Calligra KPresenter		application/x-kpresenter
31# KPT	KOffice/Calligra KPresenter template?	application/x-kpresenter
32# SAV	Diggles Saved Game File			http://www.innonics.com
33# SAV	FarCry (demo) saved game		http://www.farcry-thegame.com
34# DAT	ZOAGZIP game data format		http://en.wikipedia.org/wiki/SD_Gundam_Capsule_Fighter
350       string          \037\213
36# to display gzip compressed (strength=100=2*50) before other (strength=50)?
37#!:strength * 2
38# no FNAME and FCOMMENT bit implies no file name/comment. That means only binary
39>3	byte&0x18	=0
40# For binary gzipped no ASCII text should occur
41#	mcd-monu-cad.trid.xml
42>>10	string		MCD			Monu-Cad Drawing, Component or Font
43#>>36	string		Created\ with\ MONU-CAD
44#!:mime	application/octet-stream
45# http://fileformats.archiveteam.org/wiki/Monu-CAD
46#	http://www.monucad.com/downloads/FullDemo-2005.EXE
47#	/HANDS96.MCC	Component
48#	/DEMO_DD01.MCD	Drawing
49#	/MCALF020.FNT	Font
50!:ext	mcc/mcd/fnt
51# http://www.generalcadd.com
52>>10	string		GXD			General CADD, Drawing or Component
53#!:mime	application/octet-stream
54#	/gxc/BUILDINGEDGE.gxc			Component
55#	/gxd/HOCKETT-STPAUL-WRHSE.gxd		Drawing
56#	/gxd/POWERLAND-MILL-ADD-11.gxd		Drawing		v9.1.06
57!:ext	gxc/gxd
58#>>>13	ubyte		0			\b, version 0
59>>>13	string		09			\b, version 9
60# other gzipped binary like gzipped tar, VirtualBox extension package,...
61>>10	default		x		gzip compressed data
62!:mime	application/gzip
63>>>0	use	gzip-info
64# size of the original (uncompressed) input data modulo 2^32
65# TODO: check for GXD MCD cad the reported size
66>>>-4	ulelong		x		\b, original size modulo 2^32 %u
67# gzipped TAR or VirtualBox extension package
68#!:mime	application/x-compressed-tar
69#!:mime	application/x-virtualbox-vbox-extpack
70# https://www.w3.org/TR/SVG/mimereg.html
71#!:mime	image/svg+xml-compressed
72#	zlib.3.gz
73#	microcode-20180312.tgz
74#	tpz same as tgz
75#	lua-md5_1.2-1_i386_i486.ipk	https://en.wikipedia.org/wiki/Opkg
76#	Oracle_VM_VirtualBox_Extension_Pack-5.0.12-104815.vbox-extpack
77#	trees.blend			http://fileformats.archiveteam.org/wiki/BLEND
78#	2020-07-19-Note-16-24.xoj	https://xournal.sourceforge.net/manual.html
79#	MYgnucash-gz.gnucash		https://wiki.gnucash.org/wiki/GnuCash_XML_format
80#	text-rotate.dia			https://en.wikipedia.org/wiki/Dia_(software)
81#	MYrdata.RData			https://en.wikipedia.org/wiki/R_(programming_language)
82!:ext	gz/tgz/tpz/ipk/vbox-extpack/svgz/blend/dia/gnucash/rdata/xoj
83# FNAME/FCOMMENT bit implies file name/comment as iso-8859-1 text
84>3	byte&0x18	>0		gzip compressed data
85!:mime	application/gzip
86# gzipped tar, gzipped Abiword document
87#!:mime	application/x-compressed-tar
88#!:mime	application/x-abiword-compressed
89#!:mime	image/image/svg+xml-compressed
90#	kleopatra_splashscreen.svgz	gzipped .svg
91#	RSI-Mega-Demo_Disk1.adz		gzipped .adf	http://fileformats.archiveteam.org/wiki/ADF_(Amiga)
92#	PostbankTest.kmy		gzipped XML	https://docs.kde.org/stable5/en/kmymoney/kmymoney/details.formats.compressed.html
93#	Logo.xcfgz			gzipped .xcf	http://fileformats.archiveteam.org/wiki/XCF
94!:ext	gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz
95>>0	use	gzip-info
96# size of the original (uncompressed) input data modulo 2^32
97>>-4	ulelong		x		\b, original size modulo 2^32 %u
98#	display information of gzip compressed files
990	name				gzip-info
100#>2	byte		x		THIS iS GZIP
101>2	byte		<8		\b, reserved method
102>2	byte		>8		\b, unknown method
103>3	byte		&0x01		\b, ASCII
104>3	byte		&0x02		\b, has CRC
105>3	byte		&0x04		\b, extra field
106>3	byte&0xC	=0x08
107>>10	string		x		\b, was "%s"
108>3	byte		&0x10		\b, has comment
109>3	byte		&0x20		\b, encrypted
110>4	ledate		>0		\b, last modified: %s
111>8	byte		2		\b, max compression
112>8	byte		4		\b, max speed
113>9	byte		=0x00		\b, from FAT filesystem (MS-DOS, OS/2, NT)
114>9	byte		=0x01		\b, from Amiga
115>9	byte		=0x02		\b, from VMS
116>9	byte		=0x03		\b, from Unix
117>9	byte		=0x04		\b, from VM/CMS
118>9	byte		=0x05		\b, from Atari
119>9	byte		=0x06		\b, from HPFS filesystem (OS/2, NT)
120>9	byte		=0x07		\b, from MacOS
121>9	byte		=0x08		\b, from Z-System
122>9	byte		=0x09		\b, from CP/M
123>9	byte		=0x0A		\b, from TOPS/20
124>9	byte		=0x0B		\b, from NTFS filesystem (NT)
125>9	byte		=0x0C		\b, from QDOS
126>9	byte		=0x0D		\b, from Acorn RISCOS
127# size of the original (uncompressed) input data modulo 2^32
128#>-4	ulelong		x		\b, original size modulo 2^32 %u
129#ERROR: line 114: non zero offset 1048572 at level 1
130
131# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
1320	string		\037\036	packed data
133!:mime	application/octet-stream
134!:ext	z
135>2	belong		>1		\b, %d characters originally
136>2	belong		=1		\b, %d character originally
137#
138# This magic number is byte-order-independent.
1390	short		0x1f1f		old packed data
140!:mime	application/octet-stream
141
142# XXX - why *two* entries for "compacted data", one of which is
143# byte-order independent, and one of which is byte-order dependent?
144#
1450	short		0x1fff		compacted data
146!:mime	application/octet-stream
147# This string is valid for SunOS (BE) and a matching "short" is listed
148# in the Ultrix (LE) magic file.
1490	string		\377\037	compacted data
150!:mime	application/octet-stream
1510	short		0145405		huf output
152!:mime	application/octet-stream
153
154# bzip2
1550	string		BZh		bzip2 compressed data
156!:mime	application/x-bzip2
157!:ext	bz2
158>3	byte		>47		\b, block size = %c00k
159
160# bzip	a block-sorting file compressor
161#	by Julian Seward <sewardj@cs.man.ac.uk> and others
1620	string		BZ0		bzip compressed data
163!:mime	application/x-bzip
164>3	byte		>47		\b, block size = %c00k
165
166# lzip
1670	string		LZIP		lzip compressed data
168!:mime application/x-lzip
169!:ext lz
170>4	byte		x		\b, version: %d
171
172# squeeze and crunch
173# Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
1740	beshort		0x76FF		squeezed data,
175>4	string		x		original name %s
1760	beshort		0x76FE		crunched data,
177>2	string		x		original name %s
1780	beshort		0x76FD		LZH compressed data,
179>2	string		x		original name %s
180
181# Freeze
1820	string		\037\237	frozen file 2.1
1830	string		\037\236	frozen file 1.0 (or gzip 0.5)
184
185# SCO compress -H (LZH)
1860	string		\037\240	SCO compress -H (LZH) data
187
188# European GSM 06.10 is a provisional standard for full-rate speech
189# transcoding, prI-ETS 300 036, which uses RPE/LTP (residual pulse
190# excitation/long term prediction) coding at 13 kbit/s.
191#
192# There's only a magic nibble (4 bits); that nibble repeats every 33
193# bytes.  This isn't suited for use, but maybe we can use it someday.
194#
195# This will cause very short GSM files to be declared as data and
196# mismatches to be declared as data too!
197#0	byte&0xF0	0xd0		data
198#>33	byte&0xF0	0xd0
199#>66	byte&0xF0	0xd0
200#>99	byte&0xF0	0xd0
201#>132	byte&0xF0	0xd0		GSM 06.10 compressed audio
202
203# lzop from <markus.oberhumer@jk.uni-linz.ac.at>
2040	string		\x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a	lzop compressed data
205!:ext	lzo
206>9	beshort		<0x0940
207>>9	byte&0xf0	=0x00		- version 0.
208>>9	beshort&0x0fff	x		\b%03x,
209>>13	byte		1		LZO1X-1,
210>>13	byte		2		LZO1X-1(15),
211>>13	byte		3		LZO1X-999,
212## >>22	bedate		>0		last modified: %s,
213>>14	byte		=0x00		os: MS-DOS
214>>14	byte		=0x01		os: Amiga
215>>14	byte		=0x02		os: VMS
216>>14	byte		=0x03		os: Unix
217>>14	byte		=0x05		os: Atari
218>>14	byte		=0x06		os: OS/2
219>>14	byte		=0x07		os: MacOS
220>>14	byte		=0x0A		os: Tops/20
221>>14	byte		=0x0B		os: WinNT
222>>14	byte		=0x0E		os: Win32
223>9	beshort		>0x0939
224>>9	byte&0xf0	=0x00		- version 0.
225>>9	byte&0xf0	=0x10		- version 1.
226>>9	byte&0xf0	=0x20		- version 2.
227>>9	beshort&0x0fff	x		\b%03x,
228>>15	byte		1		LZO1X-1,
229>>15	byte		2		LZO1X-1(15),
230>>15	byte		3		LZO1X-999,
231## >>25	bedate		>0		last modified: %s,
232>>17	byte		=0x00		os: MS-DOS
233>>17	byte		=0x01		os: Amiga
234>>17	byte		=0x02		os: VMS
235>>17	byte		=0x03		os: Unix
236>>17	byte		=0x05		os: Atari
237>>17	byte		=0x06		os: OS/2
238>>17	byte		=0x07		os: MacOS
239>>17	byte		=0x0A		os: Tops/20
240>>17	byte		=0x0B		os: WinNT
241>>17	byte		=0x0E		os: Win32
242
243# 4.3BSD-Quasijarus Strong Compression
244# https://minnie.tuhs.org/Quasijarus/compress.html
2450	string		\037\241	Quasijarus strong compressed data
246
247# From: Cory Dikkers <cdikkers@swbell.net>
2480	string		XPKF		Amiga xpkf.library compressed data
2490	string		PP11		Power Packer 1.1 compressed data
2500	string		PP20		Power Packer 2.0 compressed data,
251>4	belong		0x09090909	fast compression
252>4	belong		0x090A0A0A	mediocre compression
253>4	belong		0x090A0B0B	good compression
254>4	belong		0x090A0C0C	very good compression
255>4	belong		0x090A0C0D	best compression
256
257# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
258# https://www.7-zip.org or DOC/7zFormat.txt
259#
2600	string		7z\274\257\047\034	7-zip archive data,
261>6	byte		x			version %d
262>7	byte		x			\b.%d
263!:mime	application/x-7z-compressed
264!:ext 7z/cb7
265
2660	name		lzma			LZMA compressed data,
267!:mime	application/x-lzma
268!:ext	lzma
269>5	lequad		=0xffffffffffffffff	streamed
270>5	lequad		!0xffffffffffffffff	non-streamed, size %lld
271
272# Type: LZMA
2730	lelong&0xffffff	=0x5d
274>12	leshort		0xff
275>>0	use		lzma
276>12	leshort		0
277>>0	use		lzma
278
279# http://tukaani.org/xz/xz-file-format.txt
2800	ustring		\xFD7zXZ\x00		XZ compressed data, checksum
281!:strength * 2
282!:mime	application/x-xz
283!:ext	xz
284>7	byte&0xf	0x0			NONE
285>7	byte&0xf	0x1			CRC32
286>7	byte&0xf	0x4			CRC64
287>7	byte&0xf	0xa			SHA-256
288
289# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
2900	string		LRZI			LRZIP compressed data
291!:mime  application/x-lrzip
292>4	byte		x			- version %d
293>5	byte		x			\b.%d
294>22	byte		1			\b, encrypted
295
296# https://fastcompression.blogspot.fi/2013/04/lz4-streaming-format-final.html
2970	lelong		0x184d2204	LZ4 compressed data (v1.4+)
298!:mime	application/x-lz4
299!:ext	lz4
300# Added by osm0sis@xda-developers.com
3010 	lelong		0x184c2103	LZ4 compressed data (v1.0-v1.3)
302!:mime	application/x-lz4
3030	lelong		0x184c2102	LZ4 compressed data (v0.1-v0.9)
304!:mime	application/x-lz4
305
306# Zstandard/LZ4 skippable frames
307# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3080         lelong&0xFFFFFFF0  0x184D2A50
309>(4.l+8)  indirect	x
310
311# Zstandard Dictionary ID subroutine
3120     name        zstd-dictionary-id
313# Single Segment = True
314>0    byte        &0x20   \b, Dictionary ID:
315>>0   byte&0x03   0       None
316>>0   byte&0x03   1
317>>>1  byte        x       %u
318>>0   byte&0x03   2
319>>>1  leshort     x       %u
320>>0   byte&0x03   3
321>>>1  lelong      x       %u
322# Single Segment = False
323>0    byte        ^0x20   \b, Dictionary ID:
324>>0   byte&0x03   0       None
325>>0   byte&0x03   1
326>>>2  byte        x       %u
327>>0   byte&0x03   2
328>>>2  leshort     x       %u
329>>0   byte&0x03   3
330>>>2  lelong      x       %u
331
332# Zstandard compressed data
333# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3340     lelong       0xFD2FB522  Zstandard compressed data (v0.2)
335!:mime  application/zstd
336!:ext zst
3370     lelong       0xFD2FB523  Zstandard compressed data (v0.3)
338!:mime  application/zstd
339!:ext zst
3400     lelong       0xFD2FB524  Zstandard compressed data (v0.4)
341!:mime  application/zstd
342!:ext zst
3430     lelong       0xFD2FB525  Zstandard compressed data (v0.5)
344!:mime  application/zstd
345!:ext zst
3460     lelong       0xFD2FB526  Zstandard compressed data (v0.6)
347!:mime  application/zstd
348!:ext zst
3490     lelong       0xFD2FB527  Zstandard compressed data (v0.7)
350!:mime  application/zstd
351!:ext zst
352>4    use          zstd-dictionary-id
3530     lelong       0xFD2FB528  Zstandard compressed data (v0.8+)
354!:mime  application/zstd
355!:ext zst
356>4    use          zstd-dictionary-id
357
358# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3590  lelong    0xEC30A437  Zstandard dictionary
360!:mime  application/x-std-dictionary
361>4 lelong    x           (ID %u)
362
363# AFX compressed files (Wolfram Kleff)
3642	string		-afx-		AFX compressed file data
365
366# Supplementary magic data for the file(1) command to support
367# rzip(1).  The format is described in magic(5).
368#
369# Copyright (C) 2003 by Andrew Tridgell.  You may do whatever you want with
370# this file.
371#
3720	string		RZIP		rzip compressed data
373>4	byte		x		- version %d
374>5	byte		x		\b.%d
375>6	belong		x		(%d bytes)
376
3770	string		ArC\x01		FreeArc archive <http://freearc.org>
378
379# Type:	DACT compressed files
3800	long	0x444354C3	DACT compressed data
381>4	byte	>-1		(version %i.
382>5	byte	>-1		%i.
383>6	byte	>-1		%i)
384>7	long	>0		, original size: %i bytes
385>15	long	>30		, block size: %i bytes
386
387# Valve Pack (VPK) files
3880	lelong	0x55aa1234	Valve Pak file
389>0x4	lelong	x		\b, version %u
390>0x8	lelong	x		\b, %u entries
391
392# Snappy framing format
393# https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
3940	string	\377\006\0\0sNaPpY	snappy framed data
395!:mime	application/x-snappy-framed
396
397# qpress, https://www.quicklz.com/
3980	string	qpress10	qpress compressed data
399!:mime	application/x-qpress
400
401# Zlib https://www.ietf.org/rfc/rfc6713.txt
4020	string/b	x
403>0	beshort%31	=0
404>>0	byte&0xf	=8
405>>>0	byte&0x80 	=0	zlib compressed data
406!:mime	application/zlib
407
408# BWC compression
4090	string		BWC
410>3	byte		0	BWC compressed data
411
412# UCL compression
4130	bequad		0x00e955434cff011a	UCL compressed data
414
415# Softlib archive
4160	string		SLIB	Softlib archive
417>4	leshort		x	\b, version %d
418>6	leshort		x	(contains %d files)
419
420# URL:  https://github.com/lzfse/lzfse/blob/master/src/lzfse_internal.h#L276
421# From: Eric Hall <eric.hall@darkart.com>
4220	string	bvx-	lzfse encoded, no compression
4230	string	bvx1	lzfse compressed, uncompressed tables
4240	string	bvx2	lzfse compressed, compressed tables
4250	string	bvxn	lzfse encoded, lzvn compressed
426
427# pcxLib.exe compression program
428# http://www.shikadi.net/moddingwiki/PCX_Library
4290	string/b	pcxLib
430>0x0A	string/b	Copyright\020(c)\020Genus\020Microprogramming,\020Inc.	pcxLib compressed
431
432# https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/SW/ORA/ORAFormatSpecification.htm
4330	uleshort	0x7c49
434>2	lelong		0x80	ORA FASTQ compressed file
435>>6	ulelong		x	\b, DNA size %u
436>>10	ulelong		x	\b, read names size %u
437>>14	ulelong		x	\b, quality buffer 1 size %u
438>>18	ulelong		x	\b, quality buffer 2 size %u
439>>22	ulelong		x	\b, sequence buffer size %u
440>>26	ulelong		x	\b, N-position buffer size %u
441>>30	ulelong		x	\b, crypto buffer size %u
442>>34	ulelong		x	\b, misc  buffer 1 size %u
443>>38	ulelong		x	\b, misc  buffer 2 size %u
444>>42	ulelong		x	\b, flags %#x
445>>46	lelong		x	\b, read size %d
446>>50	lelong		x	\b, number of reads %d
447>>54	leshort		x	\b, version %d
448
449# https://github.com/kspalaiologos/bzip3/blob/master/doc/file_format.md
4500	string/b	BZ3v1	bzip3 compressed data
451>5	ulelong		x	\b, blocksize %u
452
453
454# https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/\
455# SW/ORA/ORAFormatSpecification.htm
456# From Guillaume Rizk
4570	short	=0x7C49 DRAGEN ORA file,
458>-261	short	=0x7C49 with metadata:
459>-125	u8	x	NB reads: %llu,
460>-109	u8	x	NB bases: %llu.
461>-219	u4&0x02	2	File contains interleaved paired reads
462