xref: /freebsd/contrib/file/magic/Magdir/compress (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1#------------------------------------------------------------------------------
2# $File: compress,v 1.96 2024/11/09 23:47:04 christos Exp $
3# compress:  file(1) magic for pure-compression formats (no archives)
4#
5# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
6#
7# Formats for various forms of compressed data
8# Formats for "compress" proper have been moved into "compress.c",
9# because it tries to uncompress it to figure out what's inside.
10
11# standard unix compress
120	string		\037\235	compress'd data
13!:mime	application/x-compress
14!:apple	LZIVZIVU
15!:ext	Z
16>2	byte&0x80	>0		block compressed
17>2	byte&0x1f	x		%d bits
18
19# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
20# URL: https://en.wikipedia.org/wiki/Gzip
21# Reference: https://tools.ietf.org/html/rfc1952
22# Update: Joerg Jenderek, Apr 2019, Dec 2022
23#   Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
24#	* Original filename is only at offset 10 if "extra field" absent
25#	* Produce shorter output - notably, only report compression methods
26#         other than 8 ("deflate", the only method defined in RFC 1952).
27# Note: find defs -iname '*.trid.xml' -exec grep -q '<Bytes>1F8B08' {} \; -ls
28# TODO:
29# FBR	Blueberry FlashBack screen Record	https://www.flashbackrecorder.com/
30# KPR	KOffice/Calligra KPresenter		application/x-kpresenter
31# KPT	KOffice/Calligra KPresenter template?	application/x-kpresenter
32# SAV	Diggles Saved Game File			http://www.innonics.com
33# SAV	FarCry (demo) saved game		http://www.farcry-thegame.com
34# DAT	ZOAGZIP game data format		http://en.wikipedia.org/wiki/SD_Gundam_Capsule_Fighter
350       string          \037\213
36# to display gzip compressed (strength=100=2*50) before other (strength=50)?
37#!:strength * 2
38# no FNAME and FCOMMENT bit implies no file name/comment. That means only binary
39>3	byte&0x18	=0
40# For binary gzipped no ASCII text should occur
41#	mcd-monu-cad.trid.xml
42>>10	string		MCD			Monu-Cad Drawing, Component or Font
43#>>36	string		Created\ with\ MONU-CAD
44#!:mime	application/octet-stream
45# http://fileformats.archiveteam.org/wiki/Monu-CAD
46#	http://www.monucad.com/downloads/FullDemo-2005.EXE
47#	/HANDS96.MCC	Component
48#	/DEMO_DD01.MCD	Drawing
49#	/MCALF020.FNT	Font
50!:ext	mcc/mcd/fnt
51# http://www.generalcadd.com
52>>10	string		GXD			General CADD, Drawing or Component
53#!:mime	application/octet-stream
54#	/gxc/BUILDINGEDGE.gxc			Component
55#	/gxd/HOCKETT-STPAUL-WRHSE.gxd		Drawing
56#	/gxd/POWERLAND-MILL-ADD-11.gxd		Drawing		v9.1.06
57!:ext	gxc/gxd
58#>>>13	ubyte		0			\b, version 0
59>>>13	string		09			\b, version 9
60# other gzipped binary like gzipped tar, VirtualBox extension package,...
61>>10	default		x		gzip compressed data
62!:mime	application/gzip
63>>>0	use	gzip-info
64# size of the original (uncompressed) input data modulo 2^32
65# TODO: check for GXD MCD cad the reported size
66>>>-4	ulelong		x		\b, original size modulo 2^32 %u
67# gzipped TAR or VirtualBox extension package
68#!:mime	application/x-compressed-tar
69#!:mime	application/x-virtualbox-vbox-extpack
70# https://www.w3.org/TR/SVG/mimereg.html
71#!:mime	image/svg+xml-compressed
72#	zlib.3.gz
73#	microcode-20180312.tgz
74#	tpz same as tgz
75#	lua-md5_1.2-1_i386_i486.ipk	https://en.wikipedia.org/wiki/Opkg
76#	Oracle_VM_VirtualBox_Extension_Pack-5.0.12-104815.vbox-extpack
77#	trees.blend			http://fileformats.archiveteam.org/wiki/BLEND
78#	2020-07-19-Note-16-24.xoj	https://xournal.sourceforge.net/manual.html
79#	MYgnucash-gz.gnucash		https://wiki.gnucash.org/wiki/GnuCash_XML_format
80#	text-rotate.dia			https://en.wikipedia.org/wiki/Dia_(software)
81#	MYrdata.RData			https://en.wikipedia.org/wiki/R_(programming_language)
82!:ext	gz/tgz/tpz/ipk/vbox-extpack/svgz/blend/dia/gnucash/rdata/xoj
83# FNAME/FCOMMENT bit implies file name/comment as iso-8859-1 text
84>3	byte&0x18	>0		gzip compressed data
85!:mime	application/gzip
86# gzipped tar, gzipped Abiword document
87#!:mime	application/x-compressed-tar
88#!:mime	application/x-abiword-compressed
89#!:mime	image/image/svg+xml-compressed
90#	kleopatra_splashscreen.svgz	gzipped .svg
91#	RSI-Mega-Demo_Disk1.adz		gzipped .adf	http://fileformats.archiveteam.org/wiki/ADF_(Amiga)
92#	PostbankTest.kmy		gzipped XML	https://docs.kde.org/stable5/en/kmymoney/kmymoney/details.formats.compressed.html
93#	Logo.xcfgz			gzipped .xcf	http://fileformats.archiveteam.org/wiki/XCF
94!:ext	gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz
95>>0	use	gzip-info
96# size of the original (uncompressed) input data modulo 2^32
97>>-4	ulelong		x		\b, original size modulo 2^32 %u
98#	display information of gzip compressed files
990	name				gzip-info
100#>2	byte		x		THIS iS GZIP
101>2	byte		<8		\b, reserved method
102>2	byte		>8		\b, unknown method
103>3	byte		&0x01		\b, ASCII
104>3	byte		&0x02		\b, has CRC
105>3	byte		&0x04		\b, extra field
106>3	byte&0xC	=0x08
107>>10	string		x		\b, was "%s"
108>3	byte		&0x10		\b, has comment
109>3	byte		&0x20		\b, encrypted
110>4	ledate		>0		\b, last modified: %s
111>8	byte		2		\b, max compression
112>8	byte		4		\b, max speed
113>9	byte		=0x00		\b, from FAT filesystem (MS-DOS, OS/2, NT)
114>9	byte		=0x01		\b, from Amiga
115>9	byte		=0x02		\b, from VMS
116>9	byte		=0x03		\b, from Unix
117>9	byte		=0x04		\b, from VM/CMS
118>9	byte		=0x05		\b, from Atari
119>9	byte		=0x06		\b, from HPFS filesystem (OS/2, NT)
120>9	byte		=0x07		\b, from MacOS
121>9	byte		=0x08		\b, from Z-System
122>9	byte		=0x09		\b, from CP/M
123>9	byte		=0x0A		\b, from TOPS/20
124>9	byte		=0x0B		\b, from NTFS filesystem (NT)
125>9	byte		=0x0C		\b, from QDOS
126>9	byte		=0x0D		\b, from Acorn RISCOS
127# size of the original (uncompressed) input data modulo 2^32
128#>-4	ulelong		x		\b, original size modulo 2^32 %u
129#ERROR: line 114: non zero offset 1048572 at level 1
130
131# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
1320	string		\037\036	packed data
133!:mime	application/octet-stream
134!:ext	z
135>2	belong		>1		\b, %d characters originally
136>2	belong		=1		\b, %d character originally
137#
138# This magic number is byte-order-independent.
1390	short		0x1f1f		old packed data
140!:mime	application/octet-stream
141
142# XXX - why *two* entries for "compacted data", one of which is
143# byte-order independent, and one of which is byte-order dependent?
144#
1450	short		0x1fff		compacted data
146!:mime	application/octet-stream
147# This string is valid for SunOS (BE) and a matching "short" is listed
148# in the Ultrix (LE) magic file.
1490	string		\377\037	compacted data
150!:mime	application/octet-stream
1510	short		0145405		huf output
152!:mime	application/octet-stream
153
154# bzip2
1550	string		BZh		bzip2 compressed data
156!:mime	application/x-bzip2
157!:ext	bz2
158>3	byte		>47		\b, block size = %c00k
159
160# bzip	a block-sorting file compressor
161#	by Julian Seward <sewardj@cs.man.ac.uk> and others
1620	string		BZ0		bzip compressed data
163!:mime	application/x-bzip
164>3	byte		>47		\b, block size = %c00k
165
166# lzip
1670	string		LZIP		lzip compressed data
168!:mime application/x-lzip
169!:ext lz
170>4	byte		x		\b, version: %d
171
172# squeeze and crunch
173# Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
1740	beshort		0x76FF		squeezed data,
175>4	string		x		original name %s
1760	beshort		0x76FE		crunched data,
177>2	string		x		original name %s
1780	beshort		0x76FD		LZH compressed data,
179>2	string		x		original name %s
180
181# Freeze
1820	string		\037\237	frozen file 2.1
1830	string		\037\236	frozen file 1.0 (or gzip 0.5)
184
185# SCO compress -H (LZH)
1860	string		\037\240	SCO compress -H (LZH) data
187
188# European GSM 06.10 is a provisional standard for full-rate speech
189# transcoding, prI-ETS 300 036, which uses RPE/LTP (residual pulse
190# excitation/long term prediction) coding at 13 kbit/s.
191#
192# There's only a magic nibble (4 bits); that nibble repeats every 33
193# bytes.  This isn't suited for use, but maybe we can use it someday.
194#
195# This will cause very short GSM files to be declared as data and
196# mismatches to be declared as data too!
197#0	byte&0xF0	0xd0		data
198#>33	byte&0xF0	0xd0
199#>66	byte&0xF0	0xd0
200#>99	byte&0xF0	0xd0
201#>132	byte&0xF0	0xd0		GSM 06.10 compressed audio
202
203# lzop from <markus.oberhumer@jk.uni-linz.ac.at>
2040	string		\x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a	lzop compressed data
205!:ext	lzo
206!:mime	application/x-lzop
207>9	beshort		<0x0940
208>>9	byte&0xf0	=0x00		- version 0.
209>>9	beshort&0x0fff	x		\b%03x,
210>>13	byte		1		LZO1X-1,
211>>13	byte		2		LZO1X-1(15),
212>>13	byte		3		LZO1X-999,
213## >>22	bedate		>0		last modified: %s,
214>>14	byte		=0x00		os: MS-DOS
215>>14	byte		=0x01		os: Amiga
216>>14	byte		=0x02		os: VMS
217>>14	byte		=0x03		os: Unix
218>>14	byte		=0x05		os: Atari
219>>14	byte		=0x06		os: OS/2
220>>14	byte		=0x07		os: MacOS
221>>14	byte		=0x0A		os: Tops/20
222>>14	byte		=0x0B		os: WinNT
223>>14	byte		=0x0E		os: Win32
224>9	beshort		>0x0939
225>>9	byte&0xf0	=0x00		- version 0.
226>>9	byte&0xf0	=0x10		- version 1.
227>>9	byte&0xf0	=0x20		- version 2.
228>>9	beshort&0x0fff	x		\b%03x,
229>>15	byte		1		LZO1X-1,
230>>15	byte		2		LZO1X-1(15),
231>>15	byte		3		LZO1X-999,
232## >>25	bedate		>0		last modified: %s,
233>>17	byte		=0x00		os: MS-DOS
234>>17	byte		=0x01		os: Amiga
235>>17	byte		=0x02		os: VMS
236>>17	byte		=0x03		os: Unix
237>>17	byte		=0x05		os: Atari
238>>17	byte		=0x06		os: OS/2
239>>17	byte		=0x07		os: MacOS
240>>17	byte		=0x0A		os: Tops/20
241>>17	byte		=0x0B		os: WinNT
242>>17	byte		=0x0E		os: Win32
243
244# 4.3BSD-Quasijarus Strong Compression
245# https://minnie.tuhs.org/Quasijarus/compress.html
2460	string		\037\241	Quasijarus strong compressed data
247
248# From: Cory Dikkers <cdikkers@swbell.net>
2490	string		XPKF		Amiga xpkf.library compressed data
2500	string		PP11		Power Packer 1.1 compressed data
2510	string		PP20		Power Packer 2.0 compressed data,
252>4	belong		0x09090909	fast compression
253>4	belong		0x090A0A0A	mediocre compression
254>4	belong		0x090A0B0B	good compression
255>4	belong		0x090A0C0C	very good compression
256>4	belong		0x090A0C0D	best compression
257
258# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
259# https://www.7-zip.org or DOC/7zFormat.txt
260#
2610	string		7z\274\257\047\034	7-zip archive data,
262>6	byte		x			version %d
263>7	byte		x			\b.%d
264!:mime	application/x-7z-compressed
265!:ext 7z/cb7
266
2670	name		lzma			LZMA compressed data,
268!:mime	application/x-lzma
269!:ext	lzma
270>5	lequad		=0xffffffffffffffff	streamed
271>5	lequad		!0xffffffffffffffff	non-streamed, size %lld
272
273# Type: LZMA
2740	lelong&0xffffff	=0x5d
275>12	leshort		0xff
276>>0	use		lzma
277>12	leshort		0
278>>0	use		lzma
279
280# http://tukaani.org/xz/xz-file-format.txt
2810	ustring		\xFD7zXZ\x00		XZ compressed data, checksum
282!:strength * 2
283!:mime	application/x-xz
284!:ext	xz
285>7	byte&0xf	0x0			NONE
286>7	byte&0xf	0x1			CRC32
287>7	byte&0xf	0x4			CRC64
288>7	byte&0xf	0xa			SHA-256
289
290# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
2910	string		LRZI			LRZIP compressed data
292!:mime  application/x-lrzip
293>4	byte		x			- version %d
294>5	byte		x			\b.%d
295>22	byte		1			\b, encrypted
296
297# https://fastcompression.blogspot.fi/2013/04/lz4-streaming-format-final.html
2980	lelong		0x184d2204	LZ4 compressed data (v1.4+)
299!:mime	application/x-lz4
300!:ext	lz4
301# Added by osm0sis@xda-developers.com
3020 	lelong		0x184c2103	LZ4 compressed data (v1.0-v1.3)
303!:mime	application/x-lz4
3040	lelong		0x184c2102	LZ4 compressed data (v0.1-v0.9)
305!:mime	application/x-lz4
306
307# Zstandard/LZ4 skippable frames
308# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3090         lelong&0xFFFFFFF0  0x184D2A50
310>(4.l+8)  indirect	x
311
312# Zstandard Dictionary ID subroutine
3130     name        zstd-dictionary-id
314# Single Segment = True
315>0    byte        &0x20   \b, Dictionary ID:
316>>0   byte&0x03   0       None
317>>0   byte&0x03   1
318>>>1  byte        x       %u
319>>0   byte&0x03   2
320>>>1  leshort     x       %u
321>>0   byte&0x03   3
322>>>1  lelong      x       %u
323# Single Segment = False
324>0    byte        ^0x20   \b, Dictionary ID:
325>>0   byte&0x03   0       None
326>>0   byte&0x03   1
327>>>2  byte        x       %u
328>>0   byte&0x03   2
329>>>2  leshort     x       %u
330>>0   byte&0x03   3
331>>>2  lelong      x       %u
332
333# Zstandard compressed data
334# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3350     lelong       0xFD2FB522  Zstandard compressed data (v0.2)
336!:mime  application/zstd
337!:ext zst
3380     lelong       0xFD2FB523  Zstandard compressed data (v0.3)
339!:mime  application/zstd
340!:ext zst
3410     lelong       0xFD2FB524  Zstandard compressed data (v0.4)
342!:mime  application/zstd
343!:ext zst
3440     lelong       0xFD2FB525  Zstandard compressed data (v0.5)
345!:mime  application/zstd
346!:ext zst
3470     lelong       0xFD2FB526  Zstandard compressed data (v0.6)
348!:mime  application/zstd
349!:ext zst
3500     lelong       0xFD2FB527  Zstandard compressed data (v0.7)
351!:mime  application/zstd
352!:ext zst
353>4    use          zstd-dictionary-id
3540     lelong       0xFD2FB528  Zstandard compressed data (v0.8+)
355!:mime  application/zstd
356!:ext zst
357>4    use          zstd-dictionary-id
358
359# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3600  lelong    0xEC30A437  Zstandard dictionary
361!:mime  application/x-std-dictionary
362>4 lelong    x           (ID %u)
363
364# AFX compressed files (Wolfram Kleff)
3652	string		-afx-		AFX compressed file data
366
367# Supplementary magic data for the file(1) command to support
368# rzip(1).  The format is described in magic(5).
369#
370# Copyright (C) 2003 by Andrew Tridgell.  You may do whatever you want with
371# this file.
372#
3730	string		RZIP		rzip compressed data
374>4	byte		x		- version %d
375>5	byte		x		\b.%d
376>6	belong		x		(%d bytes)
377
3780	string		ArC\x01		FreeArc archive <http://freearc.org>
379
380# Valve Pack (VPK) files
381# https://developer.valvesoftware.com/wiki/VPK_(file_format)#File_Format
3820	lelong	0x55aa1234	Valve Pak file
383>0x4	lelong	x		\b, version %u
384>0x8	lelong	x		\b, tree size %u
385>0x12	lelong	x		\b, file data size %u
386>0x16	lelong	x		\b, archive MD5 size %u
387>0x20	lelong	x		\b, other MD5 size %u
388>0x24	lelong	x		\b, signature size %u
389
390# Snappy framing format
391# https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
3920	string	\377\006\0\0sNaPpY	snappy framed data
393!:mime	application/x-snappy-framed
394
395# qpress, https://www.quicklz.com/
3960	string	qpress10	qpress compressed data
397!:mime	application/x-qpress
398
399# Zlib https://www.ietf.org/rfc/rfc6713.txt
4000	string/b	x
401>0	beshort%31	=0
402>>0	byte&0xf	=8
403>>>0	byte&0x80 	=0	zlib compressed data
404!:mime	application/zlib
405
406# BWC compression
4070	string		BWC
408>3	byte		0	BWC compressed data
409
410# UCL compression
4110	bequad		0x00e955434cff011a	UCL compressed data
412
413# Softlib archive
4140	string		SLIB	Softlib archive
415>4	leshort		x	\b, version %d
416>6	leshort		x	(contains %d files)
417
418# URL:  https://github.com/lzfse/lzfse/blob/master/src/lzfse_internal.h#L276
419# From: Eric Hall <eric.hall@darkart.com>
4200	string	bvx-	lzfse encoded, no compression
4210	string	bvx1	lzfse compressed, uncompressed tables
4220	string	bvx2	lzfse compressed, compressed tables
4230	string	bvxn	lzfse encoded, lzvn compressed
424
425# https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/SW/ORA/ORAFormatSpecification.htm
4260	uleshort	0x7c49
427>2	lelong		0x80	ORA FASTQ compressed file
428>>6	ulelong		x	\b, DNA size %u
429>>10	ulelong		x	\b, read names size %u
430>>14	ulelong		x	\b, quality buffer 1 size %u
431>>18	ulelong		x	\b, quality buffer 2 size %u
432>>22	ulelong		x	\b, sequence buffer size %u
433>>26	ulelong		x	\b, N-position buffer size %u
434>>30	ulelong		x	\b, crypto buffer size %u
435>>34	ulelong		x	\b, misc  buffer 1 size %u
436>>38	ulelong		x	\b, misc  buffer 2 size %u
437>>42	ulelong		x	\b, flags %#x
438>>46	lelong		x	\b, read size %d
439>>50	lelong		x	\b, number of reads %d
440>>54	leshort		x	\b, version %d
441
442# https://github.com/kspalaiologos/bzip3/blob/master/doc/file_format.md
4430	string/b	BZ3v1	bzip3 compressed data
444>5	ulelong		x	\b, blocksize %u
445
446
447# https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/\
448# SW/ORA/ORAFormatSpecification.htm
449# From Guillaume Rizk
4500	short	=0x7C49 DRAGEN ORA file,
451>-261	short	=0x7C49 with metadata:
452>-125	u8	x	NB reads: %llu,
453>-109	u8	x	NB bases: %llu.
454>-219	u4&0x02	2	File contains interleaved paired reads
455
456# https://github.com/xamarin/xamarin-android/pull/4686
4570	string	XALZ	Xamarin LZ4-compressed assembly
458>8	ulelong	x	\b, uncompressed size %u
459