1#------------------------------------------------------------------------------ 2# $File: archive,v 1.133 2019/11/15 21:03:14 christos Exp $ 3# archive: file(1) magic for archive formats (see also "msdos" for self- 4# extracting compressed archives) 5# 6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc. 7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c. 8 9# POSIX tar archives 10# URL: https://en.wikipedia.org/wiki/Tar_(computing) 11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current 12# header mainly padded with nul bytes 13500 quad 0 14!:strength /2 15# filename or extended attribute printable strings in range space null til umlaut ue 16>0 ubeshort >0x1F00 17>>0 ubeshort <0xFCFD 18# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad 19# at https://sourceforge.net/projects/s-tar/files/testscripts/ 20>>>508 ubelong&0x8B9E8DFF 0 21# nul, space or ascii digit 0-7 at start of mode 22>>>>100 ubyte&0xC8 =0 23>>>>>101 ubyte&0xC8 =0 24# nul, space at end of check sum 25>>>>>>155 ubyte&0xDF =0 26# space or ascii digit 0 at start of check sum 27>>>>>>>148 ubyte&0xEF =0x20 28>>>>>>>>0 use tar-file 29# minimal check and then display tar archive information which can also be 30# embedded inside others like Android Backup, Clam AntiVirus database 310 name tar-file 32>257 string !ustar 33# header padded with nuls 34>>257 ulong =0 35# GNU tar version 1.29 with non pax format option without refusing 36# creates misleading V7 header for Long path, Multi-volume, Volume type 37>>>156 ubyte 0x4c GNU tar archive 38!:mime application/x-gtar 39!:ext tar/gtar 40>>>156 ubyte 0x4d GNU tar archive 41!:mime application/x-gtar 42!:ext tar/gtar 43>>>156 ubyte 0x56 GNU tar archive 44!:mime application/x-gtar 45!:ext tar/gtar 46>>>156 default x tar archive (V7) 47!:mime application/x-tar 48!:ext tar 49# other stuff in padding 50# some implementations add new fields to the blank area at the end of the header record 51# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option 52>>257 ulong !0 tar archive (old) 53!:mime application/x-tar 54!:ext tar 55# magic in newer, GNU, posix variants 56>257 string =ustar 57# 2 last char of magic and UStar version because string expression does not work 58# 2 space characters followed by a null for GNU variant 59>>261 ubelong =0x72202000 POSIX tar archive (GNU) 60!:mime application/x-gtar 61!:ext tar/gtar 62# UStar version with ASCII "00" 63>>261 ubelong 0x72003030 POSIX 64# gLOBAL and ExTENSION type only found in POSIX.1-2001 format 65>>>156 ubyte 0x67 \b.1-2001 66>>>156 ubyte 0x78 \b.1-2001 67>>>156 ubyte x tar archive 68!:mime application/x-ustar 69!:ext tar/ustar 70# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab 71>>261 ubelong 0x72000000 tar archive (ustar) 72!:mime application/x-ustar 73!:ext tar/ustar 74# not seen ustar variant with garbish version 75>>261 default x tar archive (unknown ustar) 76!:mime application/x-ustar 77!:ext tar/ustar 78# type flag of 1st tar archive member 79#>156 ubyte x \b, %c-type 80>156 ubyte x 81>>156 ubyte 0 \b, file 82>>156 ubyte 0x30 \b, file 83>>156 ubyte 0x31 \b, hard link 84>>156 ubyte 0x32 \b, symlink 85>>156 ubyte 0x33 \b, char device 86>>156 ubyte 0x34 \b, block device 87>>156 ubyte 0x35 \b, directory 88>>156 ubyte 0x36 \b, fifo 89>>156 ubyte 0x37 \b, reserved 90>>156 ubyte 0x4c \b, long path 91>>156 ubyte 0x4d \b, multi volume 92>>156 ubyte 0x56 \b, volume 93>>156 ubyte 0x67 \b, global 94>>156 ubyte 0x78 \b, extension 95>>156 default x \b, type 96>>>156 ubyte x '%c' 97# name[100] 98>0 string >\0 %-.60s 99# mode mainly stored as an octal number in ASCII null or space terminated 100>100 string >\0 \b, mode %-.7s 101# user id mainly as octal numbers in ASCII null or space terminated 102>108 string >\0 \b, uid %-.7s 103# group id mainly as octal numbers in ASCII null or space terminated 104>116 string >\0 \b, gid %-.7s 105# size mainly as octal number in ASCII 106>124 ubyte <0x38 107>>124 string >\0 \b, size %-.12s 108# coding indicated by setting the high-order bit of the leftmost byte 109>124 ubyte >0xEF \b, size 0x 110>>124 ubyte !0xff \b%2.2x 111>>125 ubyte !0xff \b%2.2x 112>>126 ubyte !0xff \b%2.2x 113>>127 ubyte !0xff \b%2.2x 114>>128 ubyte !0xff \b%2.2x 115>>129 ubyte !0xff \b%2.2x 116>>130 ubyte !0xff \b%2.2x 117>>131 ubyte !0xff \b%2.2x 118>>132 ubyte !0xff \b%2.2x 119>>133 ubyte !0xff \b%2.2x 120>>134 ubyte !0xff \b%2.2x 121>>135 ubyte !0xff \b%2.2x 122# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated 123>136 string >\0 \b, seconds %-.11s 124# header checksum stored as an octal number in ASCII null or space terminated 125#>148 string x \b, cksum %.7s 126# linkname[100] 127>157 string >\0 \b, linkname %-.40s 128# additional fields for ustar 129>257 string =ustar 130# owner user name null terminated 131>>265 string >\0 \b, user %-.32s 132# group name null terminated 133>>297 string >\0 \b, group %-.32s 134# device major minor if not zero 135>>329 ubequad&0xCFCFCFCFcFcFcFdf !0 136>>>329 string x \b, devmaj %-.7s 137>>337 ubequad&0xCFCFCFCFcFcFcFdf !0 138>>>337 string x \b, devmin %-.7s 139# prefix[155] 140>>345 string >\0 \b, prefix %-.155s 141# old non ustar/POSIX tar 142>257 string !ustar 143>>508 string =tar\0 144# padding[255] in old star 145>>>257 string >\0 \b, padding: %-.40s 146>>508 default x 147# padding[255] in old tar sometimes comment field 148>>>257 string >\0 \b, comment: %-.40s 149 150# Incremental snapshot gnu-tar format from: 151# https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html 1520 string GNU\ tar- GNU tar incremental snapshot data 153>&0 regex [0-9]\.[0-9]+-[0-9]+ version %s 154 155# cpio archives 156# 157# Yes, the top two "cpio archive" formats *are* supposed to just be "short". 158# The idea is to indicate archives produced on machines with the same 159# byte order as the machine running "file" with "cpio archive", and 160# to indicate archives produced on machines with the opposite byte order 161# from the machine running "file" with "byte-swapped cpio archive". 162# 163# The SVR4 "cpio(4)" hints that there are additional formats, but they 164# are defined as "short"s; I think all the new formats are 165# character-header formats and thus are strings, not numbers. 1660 short 070707 cpio archive 167!:mime application/x-cpio 1680 short 0143561 byte-swapped cpio archive 169!:mime application/x-cpio # encoding: swapped 1700 string 070707 ASCII cpio archive (pre-SVR4 or odc) 1710 string 070701 ASCII cpio archive (SVR4 with no CRC) 1720 string 070702 ASCII cpio archive (SVR4 with CRC) 173 174# 175# Various archive formats used by various versions of the "ar" 176# command. 177# 178 179# 180# Original UNIX archive formats. 181# They were written with binary values in host byte order, and 182# the magic number was a host "int", which might have been 16 bits 183# or 32 bits. We don't say "PDP-11" or "VAX", as there might have 184# been ports to little-endian 16-bit-int or 32-bit-int platforms 185# (x86?) using some of those formats; if none existed, feel free 186# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian 187# 32-bit. There might have been big-endian ports of that sort as 188# well. 189# 1900 leshort 0177555 very old 16-bit-int little-endian archive 1910 beshort 0177555 very old 16-bit-int big-endian archive 1920 lelong 0177555 very old 32-bit-int little-endian archive 1930 belong 0177555 very old 32-bit-int big-endian archive 194 1950 leshort 0177545 old 16-bit-int little-endian archive 196>2 string __.SYMDEF random library 1970 beshort 0177545 old 16-bit-int big-endian archive 198>2 string __.SYMDEF random library 1990 lelong 0177545 old 32-bit-int little-endian archive 200>4 string __.SYMDEF random library 2010 belong 0177545 old 32-bit-int big-endian archive 202>4 string __.SYMDEF random library 203 204# 205# From "pdp" (but why a 4-byte quantity?) 206# 2070 lelong 0x39bed PDP-11 old archive 2080 lelong 0x39bee PDP-11 4.0 archive 209 210# 211# XXX - what flavor of APL used this, and was it a variant of 212# some ar archive format? It's similar to, but not the same 213# as, the APL workspace magic numbers in pdp. 214# 2150 long 0100554 apl workspace 216 217# 218# System V Release 1 portable(?) archive format. 219# 2200 string =<ar> System V Release 1 ar archive 221!:mime application/x-archive 222 223# 224# Debian package; it's in the portable archive format, and needs to go 225# before the entry for regular portable archives, as it's recognized as 226# a portable archive whose first member has a name beginning with 227# "debian". 228# 229# Update: Joerg Jenderek 230# URL: https://en.wikipedia.org/wiki/Deb_(file_format) 2310 string =!<arch>\ndebian 232# https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html 233>14 string -split part of multipart Debian package 234!:mime application/vnd.debian.binary-package 235# udeb is used for stripped down deb file 236!:ext deb/udeb 237>14 string -binary Debian binary package 238!:mime application/vnd.debian.binary-package 239!:ext deb/udeb 240# This should not happen 241>14 default x Unknown Debian package 242# NL terminated version; for most Debian cases this is 2.0 or 2.1 for splitted 243>68 string >\0 (format %s) 244#>68 string !2.0\n 245#>>68 string x (format %.3s) 246>68 string =2.0\n 247# 2nd archive name=control archive name like control.tar.gz or control.tar.xz 248>>72 string >\0 \b, with %.14s 249# look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma} 250>>0 search/0x93e4f data.tar. \b, data compression 251# the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised 252# for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb 253>>>&0 string x %.4s 254# splitted debian package case 255>68 string =2.1\n 256# dpkg-1.18.25/dpkg-split/info.c 257# NL terminated ASCII package name like ckermit 258>>&0 string x \b, %s 259# NL terminated package version like 302-5.3 260>>>&1 string x %s 261# NL terminated MD5 checksum 262>>>>&1 string x \b, MD5 %s 263# NL terminated original package length 264>>>>>&1 string x \b, unsplitted size %s 265# NL terminated part length 266>>>>>>&1 string x \b, part length %s 267# NL terminated package part like n/m 268>>>>>>>&1 string x \b, part %s 269# NL terminated package architecture like armhf since dpkg 1.16.1 or later 270>>>>>>>>&1 string x \b, %s 271 272# 273# MIPS archive; they're in the portable archive format, and need to go 274# before the entry for regular portable archives, as it's recognized as 275# a portable archive whose first member has a name beginning with 276# "__________E". 277# 2780 string =!<arch>\n__________E MIPS archive 279!:mime application/x-archive 280>20 string U with MIPS Ucode members 281>21 string L with MIPSEL members 282>21 string B with MIPSEB members 283>19 string L and an EL hash table 284>19 string B and an EB hash table 285>22 string X -- out of date 286 287# 288# BSD/SVR2-and-later portable archive formats. 289# 290# Update: Joerg Jenderek 291# URL: http://fileformats.archiveteam.org/wiki/AR 292# Reference: https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/ 293# Note: Mach-O universal binary in ./cafebabe is dependent 294# TODO: unify current ar archive, MIPS archive, Debian package 295# distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR; 296# *.ar packages from *.a libraries. handle empty archive 2970 string =!<arch>\n current ar archive 298# print first and possibly second ar_name[16] for debugging purpose 299#>8 string x \b, 1st "%.16s" 300#>68 string x \b, 2nd "%.16s" 301!:mime application/x-archive 302# a in most case for libraries; lib for Microsoft libraries; ar else cases 303!:ext a/lib/ar 304>8 string __.SYMDEF random library 305# first member with long marked name __.SYMDEF SORTED implies BSD library 306>68 string __.SYMDEF\ SORTED random library 307# Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf 308# "archive file" entry moved from ./hp 309# LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture 310# LST header a_magic 0619h~relocatable library 311>68 belong 0x020b0619 - PA-RISC1.0 relocatable library 312>68 belong 0x02100619 - PA-RISC1.1 relocatable library 313>68 belong 0x02110619 - PA-RISC1.2 relocatable library 314>68 belong 0x02140619 - PA-RISC2.0 relocatable library 315#EOF for common ar archives 316 317# 318# "Thin" archive, as can be produced by GNU ar. 319# 3200 string =!<thin>\n thin archive with 321>68 belong 0 no symbol entries 322>68 belong 1 %d symbol entry 323>68 belong >1 %d symbol entries 324 3250 search/1 -h- Software Tools format archive text 326 327# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com) 328# 329# The first byte is the magic (0x1a), byte 2 is the compression type for 330# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS 331# filename of the first file (null terminated). Since some types collide 332# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%), 333# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo. 3340 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW 335!:mime application/x-arc 3360 lelong&0x8080ffff 0x0000091a ARC archive data, squashed 337!:mime application/x-arc 3380 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed 339!:mime application/x-arc 3400 lelong&0x8080ffff 0x0000031a ARC archive data, packed 341!:mime application/x-arc 3420 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed 343!:mime application/x-arc 3440 lelong&0x8080ffff 0x0000061a ARC archive data, crunched 345!:mime application/x-arc 346# [JW] stuff taken from idarc, obviously ARC successors: 3470 lelong&0x8080ffff 0x00000a1a PAK archive data 348!:mime application/x-arc 3490 lelong&0x8080ffff 0x0000141a ARC+ archive data 350!:mime application/x-arc 3510 lelong&0x8080ffff 0x0000481a HYP archive data 352!:mime application/x-arc 353 354# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk) 355# I can't create either SPARK or ArcFS archives so I have not tested this stuff 356# [GRR: the original entries collide with ARC, above; replaced with combined 357# version (not tested)] 358#0 byte 0x1a RISC OS archive (spark format) 3590 string \032archive RISC OS archive (ArcFS format) 3600 string Archive\000 RISC OS archive (ArcFS format) 361 362# All these were taken from idarc, many could not be verified. Unfortunately, 363# there were many low-quality sigs, i.e. easy to trigger false positives. 364# Please notify me of any real-world fishy/ambiguous signatures and I'll try 365# to get my hands on the actual archiver and see if I find something better. [JW] 366# probably many can be enhanced by finding some 0-byte or control char near the start 367 368# idarc calls this Crush/Uncompressed... *shrug* 3690 string CRUSH Crush archive data 370# Squeeze It (.sqz) 3710 string HLSQZ Squeeze It archive data 372# SQWEZ 3730 string SQWEZ SQWEZ archive data 374# HPack (.hpk) 3750 string HPAK HPack archive data 376# HAP 3770 string \x91\x33HF HAP archive data 378# MD/MDCD 3790 string MDmd MDCD archive data 380# LIM 3810 string LIM\x1a LIM archive data 382# SAR 3833 string LH5 SAR archive data 384# BSArc/BS2 3850 string \212\3SB\020\0 BSArc/BS2 archive data 386# Bethesda Softworks Archive (Oblivion) 3870 string BSA\0 BSArc archive data 388>4 lelong x version %d 389# MAR 3902 string =-ah MAR archive data 391# ACB 392#0 belong&0x00f800ff 0x00800000 ACB archive data 393# CPZ 394# TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data 395# JRC 3960 string JRchive JRC archive data 397# Quantum 3980 string DS\0 Quantum archive data 399# ReSOF 4000 string PK\3\6 ReSOF archive data 401# QuArk 4020 string 7\4 QuArk archive data 403# YAC 40414 string YC YAC archive data 405# X1 4060 string X1 X1 archive data 4070 string XhDr X1 archive data 408# CDC Codec (.dqt) 4090 belong&0xffffe000 0x76ff2000 CDC Codec archive data 410# AMGC 4110 string \xad6" AMGC archive data 412# NuLIB 4130 string N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data 414# PakLeo 4150 string LEOLZW PAKLeo archive data 416# ChArc 4170 string SChF ChArc archive data 418# PSA 4190 string PSA PSA archive data 420# CrossePAC 4210 string DSIGDCC CrossePAC archive data 422# Freeze 4230 string \x1f\x9f\x4a\x10\x0a Freeze archive data 424# KBoom 4250 string \xc2\xa8MP\xc2\xa8 KBoom archive data 426# NSQ, must go after CDC Codec 4270 string \x76\xff NSQ archive data 428# DPA 4290 string Dirk\ Paehl DPA archive data 430# BA 431# TODO: idarc says "bytes 0-2 == bytes 3-5" 432# TTComp 433# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive 434# Update: Joerg Jenderek 435# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others 4360 string \0\6 437# look for first keyword of Panorama database *.pan 438>12 search/261 DESIGN 439# skip keyword with low entropy 440>12 default x TTComp archive, binary, 4K dictionary 441# (version 5.25) labeled the above entry as "TTComp archive data" 442# From: Joerg Jenderek 443# URL: https://wiki.68kmla.org/DiskCopy_4.2_format_specification 444# reference: http://nulib.com/library/FTN.e00005.htm 4450x52 ubeshort 0x0100 446# test for disk size equal or above 400k 447>0x40 ubelong >409599 Apple DiskCopy 4.2 image 448#!:mime application/octet-stream 449!:apple dCpydImg 450!:ext image/dc42 451# image pascal name padded with NULs like Microsoft Mail 452>>00 pstring/B x %s 453# data size in bytes like 409600 454>>0x40 ubelong x \b, %u bytes 455# tag size in bytes 456>>0x44 ubelong >0 \b, 0x%x tag size 457# data checksum 458#>>0x48 ubelong x \b, 0x%x checksum 459# tag checksum 460#>>0x4c ubelong x \b, 0x%x tag checksum 461# disk encoding 462>>0x50 ubyte 0 \b, GCR CLV ssdd (400k) 463>>0x50 ubyte 1 \b, GCR CLV dsdd (800k) 464>>0x50 ubyte 2 \b, MFM CAV dsdd (720k) 465>>0x50 ubyte 3 \b, MFM CAV dshd (1440k) 466>>0x50 ubyte >3 \b, 0x%x encoding 467# format byte 468>>0x51 ubyte x \b, 0x%x format 469#>>0x54 ubequad x \b, data 0x%16.16llx 470# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation? 4710 string ESP ESP archive data 472# ZPack 4730 string \1ZPK\1 ZPack archive data 474# Sky 4750 string \xbc\x40 Sky archive data 476# UFA 4770 string UFA UFA archive data 478# Dry 4790 string =-H2O DRY archive data 480# FoxSQZ 4810 string FOXSQZ FoxSQZ archive data 482# AR7 4830 string ,AR7 AR7 archive data 484# PPMZ 4850 string PPMZ PPMZ archive data 486# MS Compress 487# Update: Joerg Jenderek 488# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression 489# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html 490# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z 4914 string \x88\xf0\x27 492# KWAJ variant 493>0 string KWAJ MS Compress archive data, KWAJ variant 494!:mime application/x-ms-compress-kwaj 495# extension not working in version 5.32 496# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?' 497# file: line 284: Bad magic entry ' ??_' 498!:ext ??_ 499# compression method (0-4) 500>>8 uleshort x \b, %u method 501# offset of compressed data 502>>10 uleshort x \b, 0x%x offset 503#>>(10.s) uleshort x 504#>>>&-6 string x \b, TEST extension %-.3s 505# header flags to mark header extensions 506>>12 uleshort >0 \b, 0x%x flags 507# 4 bytes: decompressed length of file 508>>12 uleshort &0x01 509>>>14 ulelong x \b, original size: %u bytes 510# 2 bytes: unknown purpose 511# 2 bytes: length of unknown data + mentioned bytes 512# 1-9 bytes: null-terminated file name 513# 1-4 bytes: null-terminated file extension 514>>12 uleshort &0x08 515>>>12 uleshort ^0x01 516>>>>12 uleshort ^0x02 517>>>>>12 uleshort ^0x04 518>>>>>>12 uleshort ^0x10 519>>>>>>>14 string x \b, %-.8s 520>>>>>>12 uleshort &0x10 521>>>>>>>14 string x \b, %-.8s 522>>>>>>>>&1 string x \b.%-.3s 523>>>>>12 uleshort &0x04 524>>>>>>12 uleshort ^0x10 525>>>>>>>(14.s) uleshort x 526>>>>>>>>&14 string x \b, %-.8s 527>>>>>>12 uleshort &0x10 528>>>>>>>(14.s) uleshort x 529>>>>>>>>&14 string x \b, %-.8s 530>>>>>>>>>&1 string x \b.%-.3s 531>>>>12 uleshort &0x02 532>>>>>12 uleshort ^0x04 533>>>>>>12 uleshort ^0x10 534>>>>>>>16 string x \b, %-.8s 535>>>>>>12 uleshort &0x10 536>>>>>>>16 string x \b, %-.8s 537>>>>>>>>&1 string x \b.%-.3s 538>>>>>12 uleshort &0x04 539>>>>>>12 uleshort ^0x10 540>>>>>>>(16.s) uleshort x 541>>>>>>>>&16 string x \b, %-.8s 542>>>>>>12 uleshort &0x10 543>>>>>>>(16.s) uleshort x 544>>>>>>>&16 string x %-.8s 545>>>>>>>>&1 string x \b.%-.3s 546>>>12 uleshort &0x01 547>>>>12 uleshort ^0x02 548>>>>>12 uleshort ^0x04 549>>>>>>12 uleshort ^0x10 550>>>>>>>18 string x \b, %-.8s 551>>>>>>12 uleshort &0x10 552>>>>>>>18 string x \b, %-.8s 553>>>>>>>>&1 string x \b.%-.3s 554>>>>>12 uleshort &0x04 555>>>>>>12 uleshort ^0x10 556>>>>>>>(18.s) uleshort x 557>>>>>>>>&18 string x \b, %-.8s 558>>>>>>12 uleshort &0x10 559>>>>>>>(18.s) uleshort x 560>>>>>>>>&18 string x \b, %-.8s 561>>>>>>>>>&1 string x \b.%-.3s 562>>>>12 uleshort &0x02 563>>>>>12 uleshort ^0x04 564>>>>>>12 uleshort ^0x10 565>>>>>>>20 string x \b, %-.8s 566>>>>>>12 uleshort &0x10 567>>>>>>>20 string x \b, %-.8s 568>>>>>>>>&1 string x \b.%-.3s 569>>>>>12 uleshort &0x04 570>>>>>>12 uleshort ^0x10 571>>>>>>>(20.s) uleshort x 572>>>>>>>>&20 string x \b, %-.8s 573>>>>>>12 uleshort &0x10 574>>>>>>>(20.s) uleshort x 575>>>>>>>>&20 string x \b, %-.8s 576>>>>>>>>>&1 string x \b.%-.3s 577# 2 bytes: length of data + mentioned bytes 578# 579# SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ 580>0 string SZDD MS Compress archive data, SZDD variant 581!:mime application/x-ms-compress-szdd 582!:ext ??_ 583# The character missing from the end of the filename (0=unknown) 584>>9 string >\0 \b, %-.1s is last character of original name 585# https://www.betaarchive.com/forum/viewtopic.php?t=26161 586# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e 587>>8 string !A \b, %-.1s method 588>>10 ulelong >0 \b, original size: %u bytes 589# QBasic SZDD variant 5903 string \x88\xf0\x27 591>0 string SZ\x20 MS Compress archive data, QBasic variant 592!:mime application/x-ms-compress-sz 593!:ext ??$ 594>>8 ulelong >0 \b, original size: %u bytes 595 596# MP3 (archiver, not lossy audio compression) 5970 string MP3\x1a MP3-Archiver archive data 598# ZET 5990 string OZ\xc3\x9d ZET archive data 600# TSComp 6010 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data 602# ARQ 6030 string gW\4\1 ARQ archive data 604# Squash 6053 string OctSqu Squash archive data 606# Terse 6070 string \5\1\1\0 Terse archive data 608# PUCrunch 6090 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data 610# UHarc 6110 string UHA UHarc archive data 612# ABComp 6130 string \2AB ABComp archive data 6140 string \3AB2 ABComp archive data 615# CMP 6160 string CO\0 CMP archive data 617# Splint 6180 string \x93\xb9\x06 Splint archive data 619# InstallShield 6200 string \x13\x5d\x65\x8c InstallShield Z archive Data 621# Gather 6221 string GTH Gather archive data 623# BOA 6240 string BOA BOA archive data 625# RAX 6260 string ULEB\xa RAX archive data 627# Xtreme 6280 string ULEB\0 Xtreme archive data 629# Pack Magic 6300 string @\xc3\xa2\1\0 Pack Magic archive data 631# BTS 6320 belong&0xfeffffff 0x1a034465 BTS archive data 633# ELI 5750 6340 string Ora\ ELI 5750 archive data 635# QFC 6360 string \x1aFC\x1a QFC archive data 6370 string \x1aQF\x1a QFC archive data 638# PRO-PACK 6390 string RNC PRO-PACK archive data 640# 777 6410 string 777 777 archive data 642# LZS221 6430 string sTaC LZS221 archive data 644# HPA 6450 string HPA HPA archive data 646# Arhangel 6470 string LG Arhangel archive data 648# EXP1, uses bzip2 6490 string 0123456789012345BZh EXP1 archive data 650# IMP 6510 string IMP\xa IMP archive data 652# NRV 6530 string \x00\x9E\x6E\x72\x76\xFF NRV archive data 654# Squish 6550 string \x73\xb2\x90\xf4 Squish archive data 656# Par 6570 string PHILIPP Par archive data 6580 string PAR Par archive data 659# HIT 6600 string UB HIT archive data 661# SBX 6620 belong&0xfffff000 0x53423000 SBX archive data 663# NaShrink 6640 string NSK NaShrink archive data 665# SAPCAR 6660 string #\ CAR\ archive\ header SAPCAR archive data 6670 string CAR\ 2.00RG SAPCAR archive data 668# Disintegrator 6690 string DST Disintegrator archive data 670# ASD 6710 string ASD ASD archive data 672# InstallShield CAB 6730 string ISc( InstallShield CAB 674# TOP4 6750 string T4\x1a TOP4 archive data 676# BatComp left out: sig looks like COM executable 677# so TODO: get real 4dos batcomp file and find sig 678# BlakHole 6790 string BH\5\7 BlakHole archive data 680# BIX 6810 string BIX0 BIX archive data 682# ChiefLZA 6830 string ChfLZ ChiefLZA archive data 684# Blink 6850 string Blink Blink archive data 686# Logitech Compress 6870 string \xda\xfa Logitech Compress archive data 688# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE) 6891 string (C)\ STEPANYUK ARS-Sfx archive data 690# AKT/AKT32 6910 string AKT32 AKT32 archive data 6920 string AKT AKT archive data 693# NPack 6940 string MSTSM NPack archive data 695# PFT 6960 string \0\x50\0\x14 PFT archive data 697# SemOne 6980 string SEM SemOne archive data 699# PPMD 7000 string \x8f\xaf\xac\x84 PPMD archive data 701# FIZ 7020 string FIZ FIZ archive data 703# MSXiE 7040 belong&0xfffff0f0 0x4d530000 MSXiE archive data 705# DeepFreezer 7060 belong&0xfffffff0 0x797a3030 DeepFreezer archive data 707# DC 7080 string =<DC- DC archive data 709# TPac 7100 string \4TPAC\3 TPac archive data 711# Ai 7120 string Ai\1\1\0 Ai archive data 7130 string Ai\1\0\0 Ai archive data 714# Ai32 7150 string Ai\2\0 Ai32 archive data 7160 string Ai\2\1 Ai32 archive data 717# SBC 7180 string SBC SBC archive data 719# Ybs 7200 string YBS Ybs archive data 721# DitPack 7220 string \x9e\0\0 DitPack archive data 723# DMS 7240 string DMS! DMS archive data 725# EPC 7260 string \x8f\xaf\xac\x8c EPC archive data 727# VSARC 7280 string VS\x1a VSARC archive data 729# PDZ 7300 string PDZ PDZ archive data 731# ReDuq 7320 string rdqx ReDuq archive data 733# GCA 7340 string GCAX GCA archive data 735# PPMN 7360 string pN PPMN archive data 737# WinImage 7383 string WINIMAGE WinImage archive data 739# Compressia 7400 string CMP0CMP Compressia archive data 741# UHBC 7420 string UHB UHBC archive data 743# WinHKI 7440 string \x61\x5C\x04\x05 WinHKI archive data 745# WWPack data file 7460 string WWP WWPack archive data 747# BSN (BSA, PTS-DOS) 7480 string \xffBSG BSN archive data 7491 string \xffBSG BSN archive data 7503 string \xffBSG BSN archive data 7511 string \0\xae\2 BSN archive data 7521 string \0\xae\3 BSN archive data 7531 string \0\xae\7 BSN archive data 754# AIN 7550 string \x33\x18 AIN archive data 7560 string \x33\x17 AIN archive data 757# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015 758# SZip (TODO: doesn't catch all versions) 7590 string SZ\x0a\4 SZip archive data 760# XPack DiskImage 761# *.XDI updated by Joerg Jenderek Sep 2015 762# ftp://ftp.sac.sk/pub/sac/pack/0index.txt 763# GRR: this test is still too general as it catches also text files starting with jm 7640 string jm 765# only found examples with this additional characteristic 2 bytes 766>2 string \x2\x4 Xpack DiskImage archive data 767#!:ext xdi 768# XPack Data 769# *.xpa updated by Joerg Jenderek Sep 2015 770# ftp://ftp.elf.stuba.sk/pub/pc/pack/ 7710 string xpa XPA 772!:ext xpa 773# XPA32 774# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip 775# created by XPA32.EXE version 1.0.2 for Windows 776>0 string xpa\0\1 \b32 archive data 777# created by XPACK.COM version 1.67m or 1.67r with short 0x1800 778>3 ubeshort !0x0001 \bck archive data 779# XPack Single Data 780# changed by Joerg Jenderek Sep 2015 back to like in version 5.12 781# letter 'I'+ acute accent is equivalent to \xcd 7820 string \xcd\ jm Xpack single archive data 783#!:mime application/x-xpa-compressed 784!:ext xpa 785 786# TODO: missing due to unknown magic/magic at end of file: 787#DWC 788#ARG 789#ZAR 790#PC/3270 791#InstallIt 792#RKive 793#RK 794#XPack Diskimage 795 796# These were inspired by idarc, but actually verified 797# Dzip archiver (.dz) 798# Update: Joerg Jenderek 799# URL: http://speeddemosarchive.com/dzip/ 800# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c 801# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt 8020 string DZ 803# latest version is 2.9 dated 7 may 2003 804>2 byte <4 Dzip archive data 805!:mime application/x-dzip 806!:ext dz 807>>2 byte x \b, version %i 808>>3 byte x \b.%i 809>>4 ulelong x \b, offset 0x%x 810>>8 ulelong x \b, %u files 811# ZZip archiver (.zz) 8120 string ZZ\ \0\0 ZZip archive data 8130 string ZZ0 ZZip archive data 814# PAQ archiver (.paq) 8150 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data 8160 string PAQ PAQ archive data 817>3 byte&0xf0 0x30 818>>3 byte x (v%c) 819# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP) 8200xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data 8210 string JARCS JAR (ARJ Software, Inc.) archive data 822 823# ARJ archiver (jason@jarthur.Claremont.EDU) 8240 leshort 0xea60 ARJ archive data 825!:mime application/x-arj 826>5 byte x \b, v%d, 827>8 byte &0x04 multi-volume, 828>8 byte &0x10 slash-switched, 829>8 byte &0x20 backup, 830>34 string x original name: %s, 831>7 byte 0 os: MS-DOS 832>7 byte 1 os: PRIMOS 833>7 byte 2 os: Unix 834>7 byte 3 os: Amiga 835>7 byte 4 os: Macintosh 836>7 byte 5 os: OS/2 837>7 byte 6 os: Apple ][ GS 838>7 byte 7 os: Atari ST 839>7 byte 8 os: NeXT 840>7 byte 9 os: VAX/VMS 841>3 byte >0 %d] 842# [JW] idarc says this is also possible 8432 leshort 0xea60 ARJ archive data 844 845# HA archiver (Greg Roelofs, newt@uchicago.edu) 846# This is a really bad format. A file containing HAWAII will match this... 847#0 string HA HA archive data, 848#>2 leshort =1 1 file, 849#>2 leshort >1 %hu files, 850#>4 byte&0x0f =0 first is type CPY 851#>4 byte&0x0f =1 first is type ASC 852#>4 byte&0x0f =2 first is type HSC 853#>4 byte&0x0f =0x0e first is type DIR 854#>4 byte&0x0f =0x0f first is type SPECIAL 855# suggestion: at least identify small archives (<1024 files) 8560 belong&0xffff00fc 0x48410000 HA archive data 857>2 leshort =1 1 file, 858>2 leshort >1 %u files, 859>4 byte&0x0f =0 first is type CPY 860>4 byte&0x0f =1 first is type ASC 861>4 byte&0x0f =2 first is type HSC 862>4 byte&0x0f =0x0e first is type DIR 863>4 byte&0x0f =0x0f first is type SPECIAL 864 865# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz) 8660 string HPAK HPACK archive data 867 868# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net 8690 string \351,\001JAM\ JAM archive, 870>7 string >\0 version %.4s 871>0x26 byte =0x27 - 872>>0x2b string >\0 label %.11s, 873>>0x27 lelong x serial %08x, 874>>0x36 string >\0 fstype %.8s 875 876# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) 877# Update: Joerg Jenderek 878# URL: https://en.wikipedia.org/wiki/LHA_(file_format) 879# Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html 880# 881# check and display information of lharc (LHa,PMarc) file 8820 name lharc-file 883# check 1st character of method id like -lz4- -lh5- or -pm2- 884>2 string - 885# check 5th character of method id 886>>6 string - 887# check header level 0 1 2 3 888>>>20 ubyte <4 889# check 2nd, 3th and 4th character of method id 890>>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b 891!:mime application/x-lzh-compressed 892# creator type "LHA " 893!:apple ????LHA 894# display archive type name like "LHa/LZS archive data" or "LArc archive" 895>>>>>2 string -lz \b 896!:ext lzs 897# already known -lzs- -lz4- -lz5- with old names 898>>>>>>2 string -lzs LHa/LZS archive data 899>>>>>>3 regex \^lz[45] LHarc 1.x archive data 900# missing -lz?- with wikipedia names 901>>>>>>3 regex \^lz[2378] LArc archive 902# display archive type name like "LHa (2.x) archive data" 903>>>>>2 string -lh \b 904# already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names 905>>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data 906# LHice archiver use ".ICE" as name extension instead usual one ".lzh" 907# FOOBAR archiver use ".foo" as name extension instead usual one 908# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment 909>>>>>>>2 string -lh1 \b 910!:ext lha/lzh/ice 911>>>>>>3 regex \^lh[23d] LHa 2.x? archive data 912>>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data 913>>>>>>3 regex \^lh[456] LHa (2.x) archive data 914>>>>>>>2 string -lh5 \b 915# https://en.wikipedia.org/wiki/BIOS 916# Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like 917# bios.rom , kd7_v14.bin, 1010.004, ... 918!:ext lha/lzh/rom/bin 919# missing -lh?- variants (Joe Jared) 920>>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive 921# UNLHA32 2.67a 922>>>>>>2 string -lhx LHa (UNLHA32) archive 923# lha archives with standard file name extensions ".lha" ".lzh" 924>>>>>>3 regex !\^(lh1|lh5) \b 925!:ext lha/lzh 926# this should not happen if all -lh variants are described 927>>>>>>2 default x LHa (unknown) archive 928#!:ext lha 929# PMarc 930>>>>>3 regex \^pm[012] PMarc archive data 931!:ext pma 932# append method id without leading and trailing minus character 933>>>>>3 string x [%3.3s] 934>>>>>>0 use lharc-header 935# 936# check and display information of lharc header 9370 name lharc-header 938# header size 0x4 , 0x1b-0x61 939>0 ubyte x 940# compressed data size != compressed file size 941#>7 ulelong x \b, data size %d 942# attribute: 0x2~?? 0x10~symlink|target 0x20~normal 943#>19 ubyte x \b, 19_0x%x 944# level identifier 0 1 2 3 945#>20 ubyte x \b, level %d 946# time stamp 947#>15 ubelong x DATE 0x%8.8x 948# OS ID for level 1 949>20 ubyte 1 950# 0x20 types find for *.rom files 951>>(21.b+24) ubyte <0x21 \b, 0x%x OS 952# ascii type like M for MSDOS 953>>(21.b+24) ubyte >0x20 \b, '%c' OS 954# OS ID for level 2 955>20 ubyte 2 956#>>23 ubyte x \b, OS ID 0x%x 957>>23 ubyte <0x21 \b, 0x%x OS 958>>23 ubyte >0x20 \b, '%c' OS 959# filename only for level 0 and 1 960>20 ubyte <2 961# length of filename 962>>21 ubyte >0 \b, with 963# filename 964>>>21 pstring x "%s" 965# 966#2 string -lh0- LHarc 1.x/ARX archive data [lh0] 967#!:mime application/x-lharc 9682 string -lh0- 969>0 use lharc-file 970#2 string -lh1- LHarc 1.x/ARX archive data [lh1] 971#!:mime application/x-lharc 9722 string -lh1- 973>0 use lharc-file 974# NEW -lz2- ... -lz8- 9752 string -lz2- 976>0 use lharc-file 9772 string -lz3- 978>0 use lharc-file 9792 string -lz4- 980>0 use lharc-file 9812 string -lz5- 982>0 use lharc-file 9832 string -lz7- 984>0 use lharc-file 9852 string -lz8- 986>0 use lharc-file 987# [never seen any but the last; -lh4- reported in comp.compression:] 988#2 string -lzs- LHa/LZS archive data [lzs] 9892 string -lzs- 990>0 use lharc-file 991# According to wikipedia and others such a version does not exist 992#2 string -lh\40- LHa 2.x? archive data [lh ] 993#2 string -lhd- LHa 2.x? archive data [lhd] 9942 string -lhd- 995>0 use lharc-file 996#2 string -lh2- LHa 2.x? archive data [lh2] 9972 string -lh2- 998>0 use lharc-file 999#2 string -lh3- LHa 2.x? archive data [lh3] 10002 string -lh3- 1001>0 use lharc-file 1002#2 string -lh4- LHa (2.x) archive data [lh4] 10032 string -lh4- 1004>0 use lharc-file 1005#2 string -lh5- LHa (2.x) archive data [lh5] 10062 string -lh5- 1007>0 use lharc-file 1008#2 string -lh6- LHa (2.x) archive data [lh6] 10092 string -lh6- 1010>0 use lharc-file 1011#2 string -lh7- LHa (2.x)/LHark archive data [lh7] 10122 string -lh7- 1013# !:mime application/x-lha 1014# >20 byte x - header level %d 1015>0 use lharc-file 1016# NEW -lh8- ... -lhe- , -lhx- 10172 string -lh8- 1018>0 use lharc-file 10192 string -lh9- 1020>0 use lharc-file 10212 string -lha- 1022>0 use lharc-file 10232 string -lhb- 1024>0 use lharc-file 10252 string -lhc- 1026>0 use lharc-file 10272 string -lhe- 1028>0 use lharc-file 10292 string -lhx- 1030>0 use lharc-file 1031# taken from idarc [JW] 10322 string -lZ PUT archive data 1033# already done by LHarc magics 1034# this should never happen if all sub types of LZS archive are identified 1035#2 string -lz LZS archive data 10362 string -sw1- Swag archive data 1037 10380 name rar-file-header 1039>24 byte 15 \b, v1.5 1040>24 byte 20 \b, v2.0 1041>24 byte 29 \b, v4 1042>15 byte 0 \b, os: MS-DOS 1043>15 byte 1 \b, os: OS/2 1044>15 byte 2 \b, os: Win32 1045>15 byte 3 \b, os: Unix 1046>15 byte 4 \b, os: Mac OS 1047>15 byte 5 \b, os: BeOS 1048 10490 name rar-archive-header 1050>3 leshort&0x1ff >0 \b, flags: 1051>>3 leshort &0x01 ArchiveVolume 1052>>3 leshort &0x02 Commented 1053>>3 leshort &0x04 Locked 1054>>3 leshort &0x10 NewVolumeNaming 1055>>3 leshort &0x08 Solid 1056>>3 leshort &0x20 Authenticated 1057>>3 leshort &0x40 RecoveryRecordPresent 1058>>3 leshort &0x80 EncryptedBlockHeader 1059>>3 leshort &0x100 FirstVolume 1060 1061# RAR (Roshal Archive) archive 10620 string Rar!\x1a\7\0 RAR archive data 1063!:mime application/x-rar 1064!:ext rar/cbr 1065# file header 1066>(0xc.l+9) byte 0x74 1067>>(0xc.l+7) use rar-file-header 1068# subblock seems to share information with file header 1069>(0xc.l+9) byte 0x7a 1070>>(0xc.l+7) use rar-file-header 1071>9 byte 0x73 1072>>7 use rar-archive-header 1073 10740 string Rar!\x1a\7\1\0 RAR archive data, v5 1075!:mime application/x-rar 1076!:ext rar 1077 1078# Very old RAR archive 1079# https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf 10800 string RE\x7e\x5e RAR archive data (<v1.5) 1081!:mime application/x-rar 1082!:ext rar/cbr 1083 1084# SQUISH archiver (Greg Roelofs, newt@uchicago.edu) 10850 string SQSH squished archive data (Acorn RISCOS) 1086 1087# UC2 archiver (Greg Roelofs, newt@uchicago.edu) 1088# [JW] see exe section for self-extracting version 10890 string UC2\x1a UC2 archive data 1090 1091# PKZIP multi-volume archive 10920 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract 1093!:mime application/zip 1094!:ext zip/cbz 1095 1096# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 10970 string PK\005\006 Zip archive data (empty) 1098!:mime application/zip 1099!:ext zip/cbz 1100!:strength +1 11010 string PK\003\004 1102!:strength +1 1103 1104# Specialised zip formats which start with a member named 'mimetype' 1105# (stored uncompressed, with no 'extra field') containing the file's MIME type. 1106# Check for have 8-byte name, 0-byte extra field, name "mimetype", and 1107# contents starting with "application/": 1108>26 string \x8\0\0\0mimetypeapplication/ 1109 1110# KOffice / OpenOffice & StarOffice / OpenDocument formats 1111# From: Abel Cheung <abel@oaka.org> 1112 1113# KOffice (1.2 or above) formats 1114# (mimetype contains "application/vnd.kde.<SUBTYPE>") 1115>>50 string vnd.kde. KOffice (>=1.2) 1116>>>58 string karbon Karbon document 1117>>>58 string kchart KChart document 1118>>>58 string kformula KFormula document 1119>>>58 string kivio Kivio document 1120>>>58 string kontour Kontour document 1121>>>58 string kpresenter KPresenter document 1122>>>58 string kspread KSpread document 1123>>>58 string kword KWord document 1124 1125# OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7) 1126# (mimetype contains "application/vnd.sun.xml.<SUBTYPE>") 1127>>50 string vnd.sun.xml. OpenOffice.org 1.x 1128>>>62 string writer Writer 1129>>>>68 byte !0x2e document 1130>>>>68 string .template template 1131>>>>68 string .global global document 1132>>>62 string calc Calc 1133>>>>66 byte !0x2e spreadsheet 1134>>>>66 string .template template 1135>>>62 string draw Draw 1136>>>>66 byte !0x2e document 1137>>>>66 string .template template 1138>>>62 string impress Impress 1139>>>>69 byte !0x2e presentation 1140>>>>69 string .template template 1141>>>62 string math Math document 1142>>>62 string base Database file 1143 1144# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) 1145# https://lists.oasis-open.org/archives/office/200505/msg00006.html 1146# (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>") 1147>>50 string vnd.oasis.opendocument. OpenDocument 1148>>>73 string text 1149>>>>77 byte !0x2d Text 1150!:mime application/vnd.oasis.opendocument.text 1151>>>>77 string -template Text Template 1152!:mime application/vnd.oasis.opendocument.text-template 1153>>>>77 string -web HTML Document Template 1154!:mime application/vnd.oasis.opendocument.text-web 1155>>>>77 string -master Master Document 1156!:mime application/vnd.oasis.opendocument.text-master 1157>>>73 string graphics 1158>>>>81 byte !0x2d Drawing 1159!:mime application/vnd.oasis.opendocument.graphics 1160>>>>81 string -template Template 1161!:mime application/vnd.oasis.opendocument.graphics-template 1162>>>73 string presentation 1163>>>>85 byte !0x2d Presentation 1164!:mime application/vnd.oasis.opendocument.presentation 1165>>>>85 string -template Template 1166!:mime application/vnd.oasis.opendocument.presentation-template 1167>>>73 string spreadsheet 1168>>>>84 byte !0x2d Spreadsheet 1169!:mime application/vnd.oasis.opendocument.spreadsheet 1170>>>>84 string -template Template 1171!:mime application/vnd.oasis.opendocument.spreadsheet-template 1172>>>73 string chart 1173>>>>78 byte !0x2d Chart 1174!:mime application/vnd.oasis.opendocument.chart 1175>>>>78 string -template Template 1176!:mime application/vnd.oasis.opendocument.chart-template 1177>>>73 string formula 1178>>>>80 byte !0x2d Formula 1179!:mime application/vnd.oasis.opendocument.formula 1180>>>>80 string -template Template 1181!:mime application/vnd.oasis.opendocument.formula-template 1182>>>73 string database Database 1183!:mime application/vnd.oasis.opendocument.database 1184# Valid for LibreOffice Base 6.0.1.1 at least 1185>>>73 string base Database 1186!:mime application/vnd.oasis.opendocument.base 1187>>>73 string image 1188>>>>78 byte !0x2d Image 1189!:mime application/vnd.oasis.opendocument.image 1190>>>>78 string -template Template 1191!:mime application/vnd.oasis.opendocument.image-template 1192 1193# EPUB (OEBPS) books using OCF (OEBPS Container Format) 1194# https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. 1195# From: Ralf Brown <ralf.brown@gmail.com> 1196>>50 string epub+zip EPUB document 1197!:mime application/epub+zip 1198 1199# From: Joerg Jenderek 1200# URL: http://en.wikipedia.org/wiki/CorelDRAW 1201# NOTE: version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based 1202>>50 string x-vnd.corel. Corel 1203>>>62 string draw.document+zip Draw drawing, version 14-16 1204!:mime application/x-vnd.corel.draw.document+zip 1205!:ext cdr 1206>>>62 string draw.template+zip Draw template, version 14-16 1207!:mime application/x-vnd.corel.draw.template+zip 1208!:ext cdrt 1209>>>62 string zcf.draw.document+zip Draw drawing, version 17-21 1210!:mime application/x-vnd.corel.zcf.draw.document+zip 1211!:ext cdr 1212>>>62 string zcf.draw.template+zip Draw template, version 17-21 1213!:mime application/x-vnd.corel.zcf.draw.template+zip 1214!:ext cdt/cdrt 1215 1216# Catch other ZIP-with-mimetype formats 1217# In a ZIP file, the bytes immediately after a member's contents are 1218# always "PK". The 2 regex rules here print the "mimetype" member's 1219# contents up to the first 'P'. Luckily, most MIME types don't contain 1220# any capital 'P's. This is a kludge. 1221# (mimetype contains "application/<OTHER>") 1222>>50 default x Zip data 1223>>>38 regex [!-OQ-~]+ (MIME type "%s"?) 1224!:mime application/zip 1225# (mimetype contents other than "application/*") 1226>26 string \x8\0\0\0mimetype 1227>>38 string !application/ 1228>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) 1229!:mime application/zip 1230 1231# Java Jar files 1232>(26.s+30) leshort 0xcafe Java archive data (JAR) 1233!:mime application/java-archive 1234 1235# iOS App 1236>(26.s+30) leshort !0xcafe 1237>>26 string !\x8\0\0\0mimetype 1238>>>30 string Payload/ 1239>>>>38 search/64 .app/ iOS App 1240!:mime application/x-ios-app 1241 1242 1243# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 1244# Next line excludes specialized formats: 1245>(26.s+30) leshort !0xcafe 1246>>26 string !\x8\0\0\0mimetype Zip archive data 1247!:mime application/zip 1248>>>4 beshort x \b, at least 1249>>>4 use zipversion 1250>>>4 beshort x to extract 1251>>>0x161 string WINZIP \b, WinZIP self-extracting 1252 1253# StarView Metafile 1254# From Pierre Ducroquet <pinaraf@pinaraf.info> 12550 string VCLMTF StarView MetaFile 1256>6 beshort x \b, version %d 1257>8 belong x \b, size %d 1258 1259# Zoo archiver 126020 lelong 0xfdc4a7dc Zoo archive data 1261!:mime application/x-zoo 1262>4 byte >48 \b, v%c. 1263>>6 byte >47 \b%c 1264>>>7 byte >47 \b%c 1265>32 byte >0 \b, modify: v%d 1266>>33 byte x \b.%d+ 1267>42 lelong 0xfdc4a7dc \b, 1268>>70 byte >0 extract: v%d 1269>>>71 byte x \b.%d+ 1270 1271# Shell archives 127210 string #\ This\ is\ a\ shell\ archive shell archive text 1273!:mime application/octet-stream 1274 1275# 1276# LBR. NB: May conflict with the questionable 1277# "binary Computer Graphics Metafile" format. 1278# 12790 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data 1280# 1281# PMA (CP/M derivative of LHA) 1282# Update: Joerg Jenderek 1283# URL: https://en.wikipedia.org/wiki/LHA_(file_format) 1284# 1285#2 string -pm0- PMarc archive data [pm0] 12862 string -pm0- 1287>0 use lharc-file 1288#2 string -pm1- PMarc archive data [pm1] 12892 string -pm1- 1290>0 use lharc-file 1291#2 string -pm2- PMarc archive data [pm2] 12922 string -pm2- 1293>0 use lharc-file 12942 string -pms- PMarc SFX archive (CP/M, DOS) 1295#!:mime application/x-foobar-exec 1296!:ext com 12975 string -pc1- PopCom compressed executable (CP/M) 1298#!:mime application/x- 1299#!:ext com 1300 1301# From Rafael Laboissiere <rafael@laboissiere.net> 1302# The Project Revision Control System (see 1303# http://prcs.sourceforge.net) generates a packaged project 1304# file which is recognized by the following entry: 13050 leshort 0xeb81 PRCS packaged project 1306 1307# Microsoft cabinets 1308# by David Necas (Yeti) <yeti@physics.muni.cz> 1309#0 string MSCF\0\0\0\0 Microsoft cabinet file data, 1310#>25 byte x v%d 1311#>24 byte x \b.%d 1312# MPi: All CABs have version 1.3, so this is pointless. 1313# Better magic in debian-additions. 1314 1315# GTKtalog catalogs 1316# by David Necas (Yeti) <yeti@physics.muni.cz> 13174 string gtktalog\ GTKtalog catalog data, 1318>13 string 3 version 3 1319>>14 beshort 0x677a (gzipped) 1320>>14 beshort !0x677a (not gzipped) 1321>13 string >3 version %s 1322 1323############################################################################ 1324# Parity archive reconstruction file, the 'par' file format now used on Usenet. 13250 string PAR\0 PARity archive data 1326>48 leshort =0 - Index file 1327>48 leshort >0 - file number %d 1328 1329# Felix von Leitner <felix-file@fefe.de> 13300 string d8:announce BitTorrent file 1331!:mime application/x-bittorrent 1332# Durval Menezes, <jmgthbfile at durval dot com> 13330 string d13:announce-list BitTorrent file 1334!:mime application/x-bittorrent 13350 string d7:comment BitTorrent file 1336!:mime application/x-bittorrent 13370 string d4:info BitTorrent file 1338!:mime application/x-bittorrent 1339 1340# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi> 13410 beshort 0x0e0f Atari MSA archive data 1342>2 beshort x \b, %d sectors per track 1343>4 beshort 0 \b, 1 sided 1344>4 beshort 1 \b, 2 sided 1345>6 beshort x \b, starting track: %d 1346>8 beshort x \b, ending track: %d 1347 1348# Alternate ZIP string (amc@arwen.cs.berkeley.edu) 13490 string PK00PK\003\004 Zip archive data 1350!:mime application/zip 1351!:ext zip/cbz 1352 1353# ACE archive (from http://www.wotsit.org/download.asp?f=ace) 1354# by Stefan `Sec` Zehl <sec@42.org> 13557 string **ACE** ACE archive data 1356>15 byte >0 version %d 1357>16 byte =0x00 \b, from MS-DOS 1358>16 byte =0x01 \b, from OS/2 1359>16 byte =0x02 \b, from Win/32 1360>16 byte =0x03 \b, from Unix 1361>16 byte =0x04 \b, from MacOS 1362>16 byte =0x05 \b, from WinNT 1363>16 byte =0x06 \b, from Primos 1364>16 byte =0x07 \b, from AppleGS 1365>16 byte =0x08 \b, from Atari 1366>16 byte =0x09 \b, from Vax/VMS 1367>16 byte =0x0A \b, from Amiga 1368>16 byte =0x0B \b, from Next 1369>14 byte x \b, version %d to extract 1370>5 leshort &0x0080 \b, multiple volumes, 1371>>17 byte x \b (part %d), 1372>5 leshort &0x0002 \b, contains comment 1373>5 leshort &0x0200 \b, sfx 1374>5 leshort &0x0400 \b, small dictionary 1375>5 leshort &0x0800 \b, multi-volume 1376>5 leshort &0x1000 \b, contains AV-String 1377>>30 string \x16*UNREGISTERED\x20VERSION* (unregistered) 1378>5 leshort &0x2000 \b, with recovery record 1379>5 leshort &0x4000 \b, locked 1380>5 leshort &0x8000 \b, solid 1381# Date in MS-DOS format (whatever that is) 1382#>18 lelong x Created on 1383 1384# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann 1385# <doj@cubic.org> 13860x1A string sfArk sfArk compressed Soundfont 1387>0x15 string 2 1388>>0x1 string >\0 Version %s 1389>>0x2A string >\0 : %s 1390 1391# DR-DOS 7.03 Packed File *.??_ 13920 string Packed\ File\ Personal NetWare Packed File 1393>12 string x \b, was "%.12s" 1394 1395# EET archive 1396# From: Tilman Sauerbeck <tilman@code-monkey.de> 13970 belong 0x1ee7ff00 EET archive 1398!:mime application/x-eet 1399 1400# rzip archives 14010 string RZIP rzip compressed data 1402>4 byte x - version %d 1403>5 byte x \b.%d 1404>6 belong x (%d bytes) 1405 1406# From: Joerg Jenderek 1407# URL: https://help.foxitsoftware.com/kb/install-fzip-file.php 1408# reference: http://mark0.net/download/triddefs_xml.7z/ 1409# defs/f/fzip.trid.xml 1410# Note: unknown compression; No "PK" zip magic; normally in directory like 1411# "%APPDATA%\Foxit Software\Addon\Foxit Reader\Install" 14120 ubequad 0x2506781901010000 Foxit add-on/update 1413!:mime application/x-fzip 1414!:ext fzip 1415 1416# From: "Robert Dale" <robdale@gmail.com> 14170 belong 123 dar archive, 1418>4 belong x label "%.8x 1419>>8 belong x %.8x 1420>>>12 beshort x %.4x" 1421>14 byte 0x54 end slice 1422>14 beshort 0x4e4e multi-part 1423>14 beshort 0x4e53 multi-part, with -S 1424 1425# Symbian installation files 1426# https://www.thouky.co.uk/software/psifs/sis.html 1427# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf 14288 lelong 0x10000419 Symbian installation file 1429!:mime application/vnd.symbian.install 1430>4 lelong 0x1000006D (EPOC release 3/4/5) 1431>4 lelong 0x10003A12 (EPOC release 6) 14320 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x) 1433!:mime x-epoc/x-sisx-app 1434 1435# From "Nelson A. de Oliveira" <naoliv@gmail.com> 14360 string MPQ\032 MoPaQ (MPQ) archive 1437 1438# From: "Nelson A. de Oliveira" <naoliv@gmail.com> 1439# .kgb 14400 string KGB_arch KGB Archiver file 1441>10 string x with compression level %.1s 1442 1443# xar (eXtensible ARchiver) archive 1444# URL: https://en.wikipedia.org/wiki/Xar_(archiver) 1445# xar archive format: https://code.google.com/p/xar/ 1446# From: "David Remahl" <dremahl@apple.com> 1447# Update: Joerg Jenderek 1448# TODO: lzma compression; X509Data for pkg and xip 1449# Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or 1450# 7z t -txar Xcode_10.2_beta_4.xip` 14510 string xar! xar archive 1452!:mime application/x-xar 1453# pkg for Mac OSX installer package like FullBundleUpdate.pkg 1454# xip for signed Apple software like Xcode_10.2_beta_4.xip 1455!:ext xar/pkg/xip 1456# always 28 in older archives 1457>4 ubeshort >28 \b, header size %u 1458# currently there exit only version 1 since about 2014 1459>6 ubeshort >1 version %u, 1460>8 ubequad x compressed TOC: %llu, 1461#>16 ubequad x uncompressed TOC: %llu, 1462# cksum_alg 0-2 in older and also 3-4 in newer 1463>24 belong 0 no checksum 1464>24 belong 1 SHA-1 checksum 1465>24 belong 2 MD5 checksum 1466>24 belong 3 SHA-256 checksum 1467>24 belong 4 SHA-512 checksum 1468>24 belong >4 unknown 0x%x checksum 1469#>24 belong >4 checksum 1470# For no compression jump 0 bytes 1471>24 belong 0 1472>>0 ubyte x 1473# jump more bytes forward by header size 1474>>>&(4.S) ubyte x 1475# jump more bytes forward by compressed table of contents size 1476#>>>>&(8.Q) ubequad x \b, heap data 0x%llx 1477>>>>&(8.Q) ubyte x 1478# look for data by ./compress after message with 1 space at end 1479>>>>>&-3 indirect x \b, contains 1480# For SHA-1 jump 20 minus 2 bytes 1481>24 belong 1 1482>>18 ubyte x 1483# jump more bytes forward by header size 1484>>>&(4.S) ubyte x 1485# jump more bytes forward by compressed table of contents size 1486>>>>&(8.Q) ubyte x 1487# data compressed by gzip, bzip, lzma or none 1488>>>>>&-1 indirect x \b, contains 1489# For SHA-256 jump 32 minus 2 bytes 1490>24 belong 3 1491>>30 ubyte x 1492# jump more bytes forward by header size 1493>>>&(4.S) ubyte x 1494# jump more bytes forward by compressed table of contents size 1495>>>>&(8.Q) ubyte x 1496>>>>>&-1 indirect x \b, contains 1497# For SHA-512 jump 64 minus 2 bytes 1498>24 belong 4 1499>>62 ubyte x 1500# jump more bytes forward by header size 1501>>>&(4.S) ubyte x 1502# jump more bytes forward by compressed table of contents size 1503>>>>&(8.Q) ubyte x 1504>>>>>&-1 indirect x \b, contains 1505 1506# Type: Parity Archive 1507# From: Daniel van Eeden <daniel_e@dds.nl> 15080 string PAR2 Parity Archive Volume Set 1509 1510# Bacula volume format. (Volumes always start with a block header.) 1511# URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html 1512# From: Adam Buchbinder <adam.buchbinder@gmail.com> 151312 string BB02 Bacula volume 1514>20 bedate x \b, started %s 1515 1516# ePub is XHTML + XML inside a ZIP archive. The first member of the 1517# archive must be an uncompressed file called 'mimetype' with contents 1518# 'application/epub+zip' 1519 1520 1521# From: "Michael Gorny" <mgorny@gentoo.org> 1522# ZPAQ: http://mattmahoney.net/dc/zpaq.html 15230 string zPQ ZPAQ stream 1524>3 byte x \b, level %d 1525# From: Barry Carter <carter.barry@gmail.com> 1526# https://encode.ru/threads/456-zpaq-updates/page32 15270 string 7kSt ZPAQ file 1528 1529# BBeB ebook, unencrypted (LRF format) 1530# URL: https://www.sven.de/librie/Librie/LrfFormat 1531# From: Adam Buchbinder <adam.buchbinder@gmail.com> 15320 string L\0R\0F\0\0\0 BBeB ebook data, unencrypted 1533>8 beshort x \b, version %d 1534>36 byte 1 \b, front-to-back 1535>36 byte 16 \b, back-to-front 1536>42 beshort x \b, (%dx, 1537>44 beshort x %d) 1538 1539# Symantec GHOST image by Joerg Jenderek at May 2014 1540# https://us.norton.com/ghost/ 1541# https://www.garykessler.net/library/file_sigs.html 15420 ubelong&0xFFFFf7f0 0xFEEF0100 Norton GHost image 1543# *.GHO 1544>2 ubyte&0x08 0x00 \b, first file 1545# *.GHS or *.[0-9] with cns program option 1546>2 ubyte&0x08 0x08 \b, split file 1547# part of split index interesting for *.ghs 1548>>4 ubyte x id=0x%x 1549# compression tag minus one equals numeric compression command line switch z[1-9] 1550>3 ubyte 0 \b, no compression 1551>3 ubyte 2 \b, fast compression (Z1) 1552>3 ubyte 3 \b, medium compression (Z2) 1553>3 ubyte >3 1554>>3 ubyte <11 \b, compression (Z%d-1) 1555>2 ubyte&0x08 0x00 1556# ~ 30 byte password field only for *.gho 1557>>12 ubequad !0 \b, password protected 1558>>44 ubyte !1 1559# 1~Image All, sector-by-sector only for *.gho 1560>>>10 ubyte 1 \b, sector copy 1561# 1~Image Boot track only for *.gho 1562>>>43 ubyte 1 \b, boot track 1563# 1~Image Disc only for *.gho implies Image Boot track and sector copy 1564>>44 ubyte 1 \b, disc sector copy 1565# optional image description only *.gho 1566>>0xff string >\0 "%-.254s" 1567# look for DOS sector end sequence 1568>0xE08 search/7776 \x55\xAA 1569>>&-512 indirect x \b; contains 1570 1571# Google Chrome extensions 1572# https://developer.chrome.com/extensions/crx 1573# https://developer.chrome.com/extensions/hosting 15740 string Cr24 Google Chrome extension 1575!:mime application/x-chrome-extension 1576>4 ulong x \b, version %u 1577 1578# SeqBox - Sequenced container 1579# ext: sbx, seqbox 1580# Marco Pontello marcopon@gmail.com 1581# reference: https://github.com/MarcoPon/SeqBox 15820 string SBx SeqBox, 1583>3 byte x version %d 1584 1585# LyNX archive 158656 string USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE LyNX archive 1587 1588# From: Joerg Jenderek 1589# URL: https://www.acronis.com/ 1590# Reference: https://en.wikipedia.org/wiki/TIB_(file_format) 1591# Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110 15920 ubequad 0xce24b9a220000000 Acronis True Image backup 1593!:mime application/x-acronis-tib 1594!:ext tib 1595# 01000000 1596#>20 ubelong x \b, at 20 0x%x 1597# 20000000 1598#>28 ubelong x \b, at 28 0x%x 1599# strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0" 1600# ??? 1601# strings like "\Device\0000011e" "\Device\0000015a" 1602#>0 search/0x6852300/cs \\Device\\ 1603#>>&-1 pstring x \b, %s 1604# "\Device\HarddiskVolume30" "\Device\HarddiskVolume39" 1605#>>>&1 search/180/cs \\Device\\ 1606#>>>>&-1 pstring x \b, %s 1607#>>>>>&0 search/29/cs \0\0\xc8\0 1608# disk label 1609#>>>>>>&10 lestring16 x \b, disk label %11.11s 1610#>>>>>>&9 plestring16 x \b, disk label "%11.11s" 1611#>>>>>>&10 ubequad x %16.16llx 1612 1613 1614# Gentoo XPAK binary package 1615# by Michal Gorny <mgorny@gentoo.org> 1616# https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5 1617-4 string STOP 1618>-16 string XPAKSTOP Gentoo binary package (XPAK) 1619 1620# From: Joerg Jenderek 1621# URL: https://kodi.wiki/view/TexturePacker 1622# Reference: https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz 1623# /xbmc-Krypton/xbmc/guilib/XBTF.h 1624# /xbmc-Krypton/xbmc/guilib/XBTF.cpp 16250 string XBTF 1626# skip ASCII text by looking for terminating \0 of path 1627>264 ubyte 0 XBMC texture package 1628!:mime application/x-xbmc-xbt 1629!:ext xbt 1630# XBTF_VERSION 2 1631>>4 string !2 \b, version %-.1s 1632# nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp 1633>>5 ulelong x \b, %u file 1634# plural s 1635>>5 ulelong >1 \bs 1636# path[CXBTFFile[MaximumPathLength=256] 1637>>9 string x \b, 1st %s 1638 1639