1#------------------------------------------------------------------------------ 2# $File: archive,v 1.129 2019/05/09 18:58:02 christos Exp $ 3# archive: file(1) magic for archive formats (see also "msdos" for self- 4# extracting compressed archives) 5# 6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc. 7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c. 8 9# POSIX tar archives 10# URL: https://en.wikipedia.org/wiki/Tar_(computing) 11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current 12# header mainly padded with nul bytes 13500 quad 0 14!:strength /2 15# filename or extended attribute printable strings in range space null til umlaut ue 16>0 ubeshort >0x1F00 17>>0 ubeshort <0xFCFD 18# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad 19# at https://sourceforge.net/projects/s-tar/files/testscripts/ 20>>>508 ubelong&0x8B9E8DFF 0 21# nul, space or ascii digit 0-7 at start of mode 22>>>>100 ubyte&0xC8 =0 23>>>>>101 ubyte&0xC8 =0 24# nul, space at end of check sum 25>>>>>>155 ubyte&0xDF =0 26# space or ascii digit 0 at start of check sum 27>>>>>>>148 ubyte&0xEF =0x20 28>>>>>>>>0 use tar-file 29# minimal check and then display tar archive information which can also be 30# embedded inside others like Android Backup, Clam AntiVirus database 310 name tar-file 32>257 string !ustar 33# header padded with nuls 34>>257 ulong =0 35# GNU tar version 1.29 with non pax format option without refusing 36# creates misleading V7 header for Long path, Multi-volume, Volume type 37>>>156 ubyte 0x4c GNU tar archive 38!:mime application/x-gtar 39!:ext tar/gtar 40>>>156 ubyte 0x4d GNU tar archive 41!:mime application/x-gtar 42!:ext tar/gtar 43>>>156 ubyte 0x56 GNU tar archive 44!:mime application/x-gtar 45!:ext tar/gtar 46>>>156 default x tar archive (V7) 47!:mime application/x-tar 48!:ext tar 49# other stuff in padding 50# some implementations add new fields to the blank area at the end of the header record 51# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option 52>>257 ulong !0 tar archive (old) 53!:mime application/x-tar 54!:ext tar 55# magic in newer, GNU, posix variants 56>257 string =ustar 57# 2 last char of magic and UStar version because string expression does not work 58# 2 space characters followed by a null for GNU variant 59>>261 ubelong =0x72202000 POSIX tar archive (GNU) 60!:mime application/x-gtar 61!:ext tar/gtar 62# UStar version with ASCII "00" 63>>261 ubelong 0x72003030 POSIX 64# gLOBAL and ExTENSION type only found in POSIX.1-2001 format 65>>>156 ubyte 0x67 \b.1-2001 66>>>156 ubyte 0x78 \b.1-2001 67>>>156 ubyte x tar archive 68!:mime application/x-ustar 69!:ext tar/ustar 70# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab 71>>261 ubelong 0x72000000 tar archive (ustar) 72!:mime application/x-ustar 73!:ext tar/ustar 74# not seen ustar variant with garbish version 75>>261 default x tar archive (unknown ustar) 76!:mime application/x-ustar 77!:ext tar/ustar 78# type flag of 1st tar archive member 79#>156 ubyte x \b, %c-type 80>156 ubyte x 81>>156 ubyte 0 \b, file 82>>156 ubyte 0x30 \b, file 83>>156 ubyte 0x31 \b, hard link 84>>156 ubyte 0x32 \b, symlink 85>>156 ubyte 0x33 \b, char device 86>>156 ubyte 0x34 \b, block device 87>>156 ubyte 0x35 \b, directory 88>>156 ubyte 0x36 \b, fifo 89>>156 ubyte 0x37 \b, reserved 90>>156 ubyte 0x4c \b, long path 91>>156 ubyte 0x4d \b, multi volume 92>>156 ubyte 0x56 \b, volume 93>>156 ubyte 0x67 \b, global 94>>156 ubyte 0x78 \b, extension 95>>156 default x \b, type 96>>>156 ubyte x '%c' 97# name[100] 98>0 string >\0 %-.60s 99# mode mainly stored as an octal number in ASCII null or space terminated 100>100 string >\0 \b, mode %-.7s 101# user id mainly as octal numbers in ASCII null or space terminated 102>108 string >\0 \b, uid %-.7s 103# group id mainly as octal numbers in ASCII null or space terminated 104>116 string >\0 \b, gid %-.7s 105# size mainly as octal number in ASCII 106>124 ubyte <0x38 107>>124 string >\0 \b, size %-.12s 108# coding indicated by setting the high-order bit of the leftmost byte 109>124 ubyte >0xEF \b, size 0x 110>>124 ubyte !0xff \b%2.2x 111>>125 ubyte !0xff \b%2.2x 112>>126 ubyte !0xff \b%2.2x 113>>127 ubyte !0xff \b%2.2x 114>>128 ubyte !0xff \b%2.2x 115>>129 ubyte !0xff \b%2.2x 116>>130 ubyte !0xff \b%2.2x 117>>131 ubyte !0xff \b%2.2x 118>>132 ubyte !0xff \b%2.2x 119>>133 ubyte !0xff \b%2.2x 120>>134 ubyte !0xff \b%2.2x 121>>135 ubyte !0xff \b%2.2x 122# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated 123>136 string >\0 \b, seconds %-.11s 124# header checksum stored as an octal number in ASCII null or space terminated 125#>148 string x \b, cksum %.7s 126# linkname[100] 127>157 string >\0 \b, linkname %-.40s 128# additional fields for ustar 129>257 string =ustar 130# owner user name null terminated 131>>265 string >\0 \b, user %-.32s 132# group name null terminated 133>>297 string >\0 \b, group %-.32s 134# device major minor if not zero 135>>329 ubequad&0xCFCFCFCFcFcFcFdf !0 136>>>329 string x \b, devmaj %-.7s 137>>337 ubequad&0xCFCFCFCFcFcFcFdf !0 138>>>337 string x \b, devmin %-.7s 139# prefix[155] 140>>345 string >\0 \b, prefix %-.155s 141# old non ustar/POSIX tar 142>257 string !ustar 143>>508 string =tar\0 144# padding[255] in old star 145>>>257 string >\0 \b, padding: %-.40s 146>>508 default x 147# padding[255] in old tar sometimes comment field 148>>>257 string >\0 \b, comment: %-.40s 149 150# Incremental snapshot gnu-tar format from: 151# https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html 1520 string GNU\ tar- GNU tar incremental snapshot data 153>&0 regex [0-9]\.[0-9]+-[0-9]+ version %s 154 155# cpio archives 156# 157# Yes, the top two "cpio archive" formats *are* supposed to just be "short". 158# The idea is to indicate archives produced on machines with the same 159# byte order as the machine running "file" with "cpio archive", and 160# to indicate archives produced on machines with the opposite byte order 161# from the machine running "file" with "byte-swapped cpio archive". 162# 163# The SVR4 "cpio(4)" hints that there are additional formats, but they 164# are defined as "short"s; I think all the new formats are 165# character-header formats and thus are strings, not numbers. 1660 short 070707 cpio archive 167!:mime application/x-cpio 1680 short 0143561 byte-swapped cpio archive 169!:mime application/x-cpio # encoding: swapped 1700 string 070707 ASCII cpio archive (pre-SVR4 or odc) 1710 string 070701 ASCII cpio archive (SVR4 with no CRC) 1720 string 070702 ASCII cpio archive (SVR4 with CRC) 173 174# 175# Various archive formats used by various versions of the "ar" 176# command. 177# 178 179# 180# Original UNIX archive formats. 181# They were written with binary values in host byte order, and 182# the magic number was a host "int", which might have been 16 bits 183# or 32 bits. We don't say "PDP-11" or "VAX", as there might have 184# been ports to little-endian 16-bit-int or 32-bit-int platforms 185# (x86?) using some of those formats; if none existed, feel free 186# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian 187# 32-bit. There might have been big-endian ports of that sort as 188# well. 189# 1900 leshort 0177555 very old 16-bit-int little-endian archive 1910 beshort 0177555 very old 16-bit-int big-endian archive 1920 lelong 0177555 very old 32-bit-int little-endian archive 1930 belong 0177555 very old 32-bit-int big-endian archive 194 1950 leshort 0177545 old 16-bit-int little-endian archive 196>2 string __.SYMDEF random library 1970 beshort 0177545 old 16-bit-int big-endian archive 198>2 string __.SYMDEF random library 1990 lelong 0177545 old 32-bit-int little-endian archive 200>4 string __.SYMDEF random library 2010 belong 0177545 old 32-bit-int big-endian archive 202>4 string __.SYMDEF random library 203 204# 205# From "pdp" (but why a 4-byte quantity?) 206# 2070 lelong 0x39bed PDP-11 old archive 2080 lelong 0x39bee PDP-11 4.0 archive 209 210# 211# XXX - what flavor of APL used this, and was it a variant of 212# some ar archive format? It's similar to, but not the same 213# as, the APL workspace magic numbers in pdp. 214# 2150 long 0100554 apl workspace 216 217# 218# System V Release 1 portable(?) archive format. 219# 2200 string =<ar> System V Release 1 ar archive 221!:mime application/x-archive 222 223# 224# Debian package; it's in the portable archive format, and needs to go 225# before the entry for regular portable archives, as it's recognized as 226# a portable archive whose first member has a name beginning with 227# "debian". 228# 229# Update: Joerg Jenderek 230# URL: https://en.wikipedia.org/wiki/Deb_(file_format) 2310 string =!<arch>\ndebian 232# https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html 233>14 string -split part of multipart Debian package 234!:mime application/vnd.debian.binary-package 235# udeb is used for stripped down deb file 236!:ext deb/udeb 237>14 string -binary Debian binary package 238!:mime application/vnd.debian.binary-package 239!:ext deb/udeb 240# This should not happen 241>14 default x Unknown Debian package 242# NL terminated version; for most Debian cases this is 2.0 or 2.1 for splitted 243>68 string >\0 (format %s) 244#>68 string !2.0\n 245#>>68 string x (format %.3s) 246>68 string =2.0\n 247# 2nd archive name=control archive name like control.tar.gz or control.tar.xz 248>>72 string >\0 \b, with %.14s 249# look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma} 250>>0 search/0x93e4f data.tar. \b, data compression 251# the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised 252# for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb 253>>>&0 string x %.4s 254# splitted debian package case 255>68 string =2.1\n 256# dpkg-1.18.25/dpkg-split/info.c 257# NL terminated ASCII package name like ckermit 258>>&0 string x \b, %s 259# NL terminated package version like 302-5.3 260>>>&1 string x %s 261# NL terminated MD5 checksum 262>>>>&1 string x \b, MD5 %s 263# NL terminated original package length 264>>>>>&1 string x \b, unsplitted size %s 265# NL terminated part length 266>>>>>>&1 string x \b, part lenght %s 267# NL terminated package part like n/m 268>>>>>>>&1 string x \b, part %s 269# NL terminated package architecture like armhf since dpkg 1.16.1 or later 270>>>>>>>>&1 string x \b, %s 271 272# 273# MIPS archive; they're in the portable archive format, and need to go 274# before the entry for regular portable archives, as it's recognized as 275# a portable archive whose first member has a name beginning with 276# "__________E". 277# 2780 string =!<arch>\n__________E MIPS archive 279!:mime application/x-archive 280>20 string U with MIPS Ucode members 281>21 string L with MIPSEL members 282>21 string B with MIPSEB members 283>19 string L and an EL hash table 284>19 string B and an EB hash table 285>22 string X -- out of date 286 287# 288# BSD/SVR2-and-later portable archive formats. 289# 290# Update: Joerg Jenderek 291# URL: http://fileformats.archiveteam.org/wiki/AR 292# Reference: https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/ 293# Note: Mach-O universal binary in ./cafebabe is dependent 294# TODO: unify current ar archive, MIPS archive, Debian package 295# distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR; 296# *.ar packages from *.a libraries. handle empty archive 2970 string =!<arch>\n current ar archive 298# print first and possibly second ar_name[16] for debugging purpose 299#>8 string x \b, 1st "%.16s" 300#>68 string x \b, 2nd "%.16s" 301!:mime application/x-archive 302# a in most case for libraries; lib for Microsoft libraries; ar else cases 303!:ext a/lib/ar 304>8 string __.SYMDEF random library 305# first member with long marked name __.SYMDEF SORTED implies BSD library 306>68 string __.SYMDEF\ SORTED random library 307# Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf 308# "archive file" entry moved from ./hp 309# LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture 310# LST header a_magic 0619h~relocatable library 311>68 belong 0x020b0619 - PA-RISC1.0 relocatable library 312>68 belong 0x02100619 - PA-RISC1.1 relocatable library 313>68 belong 0x02110619 - PA-RISC1.2 relocatable library 314>68 belong 0x02140619 - PA-RISC2.0 relocatable library 315#EOF for common ar archives 316 317# 318# "Thin" archive, as can be produced by GNU ar. 319# 3200 string =!<thin>\n thin archive with 321>68 belong 0 no symbol entries 322>68 belong 1 %d symbol entry 323>68 belong >1 %d symbol entries 324 3250 search/1 -h- Software Tools format archive text 326 327# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com) 328# 329# The first byte is the magic (0x1a), byte 2 is the compression type for 330# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS 331# filename of the first file (null terminated). Since some types collide 332# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%), 333# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo. 3340 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW 335!:mime application/x-arc 3360 lelong&0x8080ffff 0x0000091a ARC archive data, squashed 337!:mime application/x-arc 3380 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed 339!:mime application/x-arc 3400 lelong&0x8080ffff 0x0000031a ARC archive data, packed 341!:mime application/x-arc 3420 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed 343!:mime application/x-arc 3440 lelong&0x8080ffff 0x0000061a ARC archive data, crunched 345!:mime application/x-arc 346# [JW] stuff taken from idarc, obviously ARC successors: 3470 lelong&0x8080ffff 0x00000a1a PAK archive data 348!:mime application/x-arc 3490 lelong&0x8080ffff 0x0000141a ARC+ archive data 350!:mime application/x-arc 3510 lelong&0x8080ffff 0x0000481a HYP archive data 352!:mime application/x-arc 353 354# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk) 355# I can't create either SPARK or ArcFS archives so I have not tested this stuff 356# [GRR: the original entries collide with ARC, above; replaced with combined 357# version (not tested)] 358#0 byte 0x1a RISC OS archive (spark format) 3590 string \032archive RISC OS archive (ArcFS format) 3600 string Archive\000 RISC OS archive (ArcFS format) 361 362# All these were taken from idarc, many could not be verified. Unfortunately, 363# there were many low-quality sigs, i.e. easy to trigger false positives. 364# Please notify me of any real-world fishy/ambiguous signatures and I'll try 365# to get my hands on the actual archiver and see if I find something better. [JW] 366# probably many can be enhanced by finding some 0-byte or control char near the start 367 368# idarc calls this Crush/Uncompressed... *shrug* 3690 string CRUSH Crush archive data 370# Squeeze It (.sqz) 3710 string HLSQZ Squeeze It archive data 372# SQWEZ 3730 string SQWEZ SQWEZ archive data 374# HPack (.hpk) 3750 string HPAK HPack archive data 376# HAP 3770 string \x91\x33HF HAP archive data 378# MD/MDCD 3790 string MDmd MDCD archive data 380# LIM 3810 string LIM\x1a LIM archive data 382# SAR 3833 string LH5 SAR archive data 384# BSArc/BS2 3850 string \212\3SB\020\0 BSArc/BS2 archive data 386# Bethesda Softworks Archive (Oblivion) 3870 string BSA\0 BSArc archive data 388>4 lelong x version %d 389# MAR 3902 string =-ah MAR archive data 391# ACB 392#0 belong&0x00f800ff 0x00800000 ACB archive data 393# CPZ 394# TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data 395# JRC 3960 string JRchive JRC archive data 397# Quantum 3980 string DS\0 Quantum archive data 399# ReSOF 4000 string PK\3\6 ReSOF archive data 401# QuArk 4020 string 7\4 QuArk archive data 403# YAC 40414 string YC YAC archive data 405# X1 4060 string X1 X1 archive data 4070 string XhDr X1 archive data 408# CDC Codec (.dqt) 4090 belong&0xffffe000 0x76ff2000 CDC Codec archive data 410# AMGC 4110 string \xad6" AMGC archive data 412# NuLIB 4130 string N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data 414# PakLeo 4150 string LEOLZW PAKLeo archive data 416# ChArc 4170 string SChF ChArc archive data 418# PSA 4190 string PSA PSA archive data 420# CrossePAC 4210 string DSIGDCC CrossePAC archive data 422# Freeze 4230 string \x1f\x9f\x4a\x10\x0a Freeze archive data 424# KBoom 4250 string \xc2\xa8MP\xc2\xa8 KBoom archive data 426# NSQ, must go after CDC Codec 4270 string \x76\xff NSQ archive data 428# DPA 4290 string Dirk\ Paehl DPA archive data 430# BA 431# TODO: idarc says "bytes 0-2 == bytes 3-5" 432# TTComp 433# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive 434# Update: Joerg Jenderek 435# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others 4360 string \0\6 437# look for first keyword of Panorama database *.pan 438>12 search/261 DESIGN 439# skip keyword with low entropy 440>12 default x TTComp archive, binary, 4K dictionary 441# (version 5.25) labeled the above entry as "TTComp archive data" 442# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation? 4430 string ESP ESP archive data 444# ZPack 4450 string \1ZPK\1 ZPack archive data 446# Sky 4470 string \xbc\x40 Sky archive data 448# UFA 4490 string UFA UFA archive data 450# Dry 4510 string =-H2O DRY archive data 452# FoxSQZ 4530 string FOXSQZ FoxSQZ archive data 454# AR7 4550 string ,AR7 AR7 archive data 456# PPMZ 4570 string PPMZ PPMZ archive data 458# MS Compress 459# Update: Joerg Jenderek 460# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression 461# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html 462# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z 4634 string \x88\xf0\x27 464# KWAJ variant 465>0 string KWAJ MS Compress archive data, KWAJ variant 466!:mime application/x-ms-compress-kwaj 467# extension not working in version 5.32 468# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?' 469# file: line 284: Bad magic entry ' ??_' 470!:ext ??_ 471# compression method (0-4) 472>>8 uleshort x \b, %u method 473# offset of compressed data 474>>10 uleshort x \b, 0x%x offset 475#>>(10.s) uleshort x 476#>>>&-6 string x \b, TEST extension %-.3s 477# header flags to mark header extensions 478>>12 uleshort >0 \b, 0x%x flags 479# 4 bytes: decompressed length of file 480>>12 uleshort &0x01 481>>>14 ulelong x \b, original size: %u bytes 482# 2 bytes: unknown purpose 483# 2 bytes: length of unknown data + mentioned bytes 484# 1-9 bytes: null-terminated file name 485# 1-4 bytes: null-terminated file extension 486>>12 uleshort &0x08 487>>>12 uleshort ^0x01 488>>>>12 uleshort ^0x02 489>>>>>12 uleshort ^0x04 490>>>>>>12 uleshort ^0x10 491>>>>>>>14 string x \b, %-.8s 492>>>>>>12 uleshort &0x10 493>>>>>>>14 string x \b, %-.8s 494>>>>>>>>&1 string x \b.%-.3s 495>>>>>12 uleshort &0x04 496>>>>>>12 uleshort ^0x10 497>>>>>>>(14.s) uleshort x 498>>>>>>>>&14 string x \b, %-.8s 499>>>>>>12 uleshort &0x10 500>>>>>>>(14.s) uleshort x 501>>>>>>>>&14 string x \b, %-.8s 502>>>>>>>>>&1 string x \b.%-.3s 503>>>>12 uleshort &0x02 504>>>>>12 uleshort ^0x04 505>>>>>>12 uleshort ^0x10 506>>>>>>>16 string x \b, %-.8s 507>>>>>>12 uleshort &0x10 508>>>>>>>16 string x \b, %-.8s 509>>>>>>>>&1 string x \b.%-.3s 510>>>>>12 uleshort &0x04 511>>>>>>12 uleshort ^0x10 512>>>>>>>(16.s) uleshort x 513>>>>>>>>&16 string x \b, %-.8s 514>>>>>>12 uleshort &0x10 515>>>>>>>(16.s) uleshort x 516>>>>>>>&16 string x %-.8s 517>>>>>>>>&1 string x \b.%-.3s 518>>>12 uleshort &0x01 519>>>>12 uleshort ^0x02 520>>>>>12 uleshort ^0x04 521>>>>>>12 uleshort ^0x10 522>>>>>>>18 string x \b, %-.8s 523>>>>>>12 uleshort &0x10 524>>>>>>>18 string x \b, %-.8s 525>>>>>>>>&1 string x \b.%-.3s 526>>>>>12 uleshort &0x04 527>>>>>>12 uleshort ^0x10 528>>>>>>>(18.s) uleshort x 529>>>>>>>>&18 string x \b, %-.8s 530>>>>>>12 uleshort &0x10 531>>>>>>>(18.s) uleshort x 532>>>>>>>>&18 string x \b, %-.8s 533>>>>>>>>>&1 string x \b.%-.3s 534>>>>12 uleshort &0x02 535>>>>>12 uleshort ^0x04 536>>>>>>12 uleshort ^0x10 537>>>>>>>20 string x \b, %-.8s 538>>>>>>12 uleshort &0x10 539>>>>>>>20 string x \b, %-.8s 540>>>>>>>>&1 string x \b.%-.3s 541>>>>>12 uleshort &0x04 542>>>>>>12 uleshort ^0x10 543>>>>>>>(20.s) uleshort x 544>>>>>>>>&20 string x \b, %-.8s 545>>>>>>12 uleshort &0x10 546>>>>>>>(20.s) uleshort x 547>>>>>>>>&20 string x \b, %-.8s 548>>>>>>>>>&1 string x \b.%-.3s 549# 2 bytes: length of data + mentioned bytes 550# 551# SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ 552>0 string SZDD MS Compress archive data, SZDD variant 553!:mime application/x-ms-compress-szdd 554!:ext ??_ 555# The character missing from the end of the filename (0=unknown) 556>>9 string >\0 \b, %-.1s is last character of original name 557# https://www.betaarchive.com/forum/viewtopic.php?t=26161 558# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e 559>>8 string !A \b, %-.1s method 560>>10 ulelong >0 \b, original size: %u bytes 561# QBasic SZDD variant 5623 string \x88\xf0\x27 563>0 string SZ\x20 MS Compress archive data, QBasic variant 564!:mime application/x-ms-compress-sz 565!:ext ??$ 566>>8 ulelong >0 \b, original size: %u bytes 567 568# MP3 (archiver, not lossy audio compression) 5690 string MP3\x1a MP3-Archiver archive data 570# ZET 5710 string OZ\xc3\x9d ZET archive data 572# TSComp 5730 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data 574# ARQ 5750 string gW\4\1 ARQ archive data 576# Squash 5773 string OctSqu Squash archive data 578# Terse 5790 string \5\1\1\0 Terse archive data 580# PUCrunch 5810 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data 582# UHarc 5830 string UHA UHarc archive data 584# ABComp 5850 string \2AB ABComp archive data 5860 string \3AB2 ABComp archive data 587# CMP 5880 string CO\0 CMP archive data 589# Splint 5900 string \x93\xb9\x06 Splint archive data 591# InstallShield 5920 string \x13\x5d\x65\x8c InstallShield Z archive Data 593# Gather 5941 string GTH Gather archive data 595# BOA 5960 string BOA BOA archive data 597# RAX 5980 string ULEB\xa RAX archive data 599# Xtreme 6000 string ULEB\0 Xtreme archive data 601# Pack Magic 6020 string @\xc3\xa2\1\0 Pack Magic archive data 603# BTS 6040 belong&0xfeffffff 0x1a034465 BTS archive data 605# ELI 5750 6060 string Ora\ ELI 5750 archive data 607# QFC 6080 string \x1aFC\x1a QFC archive data 6090 string \x1aQF\x1a QFC archive data 610# PRO-PACK 6110 string RNC PRO-PACK archive data 612# 777 6130 string 777 777 archive data 614# LZS221 6150 string sTaC LZS221 archive data 616# HPA 6170 string HPA HPA archive data 618# Arhangel 6190 string LG Arhangel archive data 620# EXP1, uses bzip2 6210 string 0123456789012345BZh EXP1 archive data 622# IMP 6230 string IMP\xa IMP archive data 624# NRV 6250 string \x00\x9E\x6E\x72\x76\xFF NRV archive data 626# Squish 6270 string \x73\xb2\x90\xf4 Squish archive data 628# Par 6290 string PHILIPP Par archive data 6300 string PAR Par archive data 631# HIT 6320 string UB HIT archive data 633# SBX 6340 belong&0xfffff000 0x53423000 SBX archive data 635# NaShrink 6360 string NSK NaShrink archive data 637# SAPCAR 6380 string #\ CAR\ archive\ header SAPCAR archive data 6390 string CAR\ 2.00RG SAPCAR archive data 640# Disintegrator 6410 string DST Disintegrator archive data 642# ASD 6430 string ASD ASD archive data 644# InstallShield CAB 6450 string ISc( InstallShield CAB 646# TOP4 6470 string T4\x1a TOP4 archive data 648# BatComp left out: sig looks like COM executable 649# so TODO: get real 4dos batcomp file and find sig 650# BlakHole 6510 string BH\5\7 BlakHole archive data 652# BIX 6530 string BIX0 BIX archive data 654# ChiefLZA 6550 string ChfLZ ChiefLZA archive data 656# Blink 6570 string Blink Blink archive data 658# Logitech Compress 6590 string \xda\xfa Logitech Compress archive data 660# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE) 6611 string (C)\ STEPANYUK ARS-Sfx archive data 662# AKT/AKT32 6630 string AKT32 AKT32 archive data 6640 string AKT AKT archive data 665# NPack 6660 string MSTSM NPack archive data 667# PFT 6680 string \0\x50\0\x14 PFT archive data 669# SemOne 6700 string SEM SemOne archive data 671# PPMD 6720 string \x8f\xaf\xac\x84 PPMD archive data 673# FIZ 6740 string FIZ FIZ archive data 675# MSXiE 6760 belong&0xfffff0f0 0x4d530000 MSXiE archive data 677# DeepFreezer 6780 belong&0xfffffff0 0x797a3030 DeepFreezer archive data 679# DC 6800 string =<DC- DC archive data 681# TPac 6820 string \4TPAC\3 TPac archive data 683# Ai 6840 string Ai\1\1\0 Ai archive data 6850 string Ai\1\0\0 Ai archive data 686# Ai32 6870 string Ai\2\0 Ai32 archive data 6880 string Ai\2\1 Ai32 archive data 689# SBC 6900 string SBC SBC archive data 691# Ybs 6920 string YBS Ybs archive data 693# DitPack 6940 string \x9e\0\0 DitPack archive data 695# DMS 6960 string DMS! DMS archive data 697# EPC 6980 string \x8f\xaf\xac\x8c EPC archive data 699# VSARC 7000 string VS\x1a VSARC archive data 701# PDZ 7020 string PDZ PDZ archive data 703# ReDuq 7040 string rdqx ReDuq archive data 705# GCA 7060 string GCAX GCA archive data 707# PPMN 7080 string pN PPMN archive data 709# WinImage 7103 string WINIMAGE WinImage archive data 711# Compressia 7120 string CMP0CMP Compressia archive data 713# UHBC 7140 string UHB UHBC archive data 715# WinHKI 7160 string \x61\x5C\x04\x05 WinHKI archive data 717# WWPack data file 7180 string WWP WWPack archive data 719# BSN (BSA, PTS-DOS) 7200 string \xffBSG BSN archive data 7211 string \xffBSG BSN archive data 7223 string \xffBSG BSN archive data 7231 string \0\xae\2 BSN archive data 7241 string \0\xae\3 BSN archive data 7251 string \0\xae\7 BSN archive data 726# AIN 7270 string \x33\x18 AIN archive data 7280 string \x33\x17 AIN archive data 729# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015 730# SZip (TODO: doesn't catch all versions) 7310 string SZ\x0a\4 SZip archive data 732# XPack DiskImage 733# *.XDI updated by Joerg Jenderek Sep 2015 734# ftp://ftp.sac.sk/pub/sac/pack/0index.txt 735# GRR: this test is still too general as it catches also text files starting with jm 7360 string jm 737# only found examples with this additional characteristic 2 bytes 738>2 string \x2\x4 Xpack DiskImage archive data 739#!:ext xdi 740# XPack Data 741# *.xpa updated by Joerg Jenderek Sep 2015 742# ftp://ftp.elf.stuba.sk/pub/pc/pack/ 7430 string xpa XPA 744!:ext xpa 745# XPA32 746# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip 747# created by XPA32.EXE version 1.0.2 for Windows 748>0 string xpa\0\1 \b32 archive data 749# created by XPACK.COM version 1.67m or 1.67r with short 0x1800 750>3 ubeshort !0x0001 \bck archive data 751# XPack Single Data 752# changed by Joerg Jenderek Sep 2015 back to like in version 5.12 753# letter 'I'+ acute accent is equivalent to \xcd 7540 string \xcd\ jm Xpack single archive data 755#!:mime application/x-xpa-compressed 756!:ext xpa 757 758# TODO: missing due to unknown magic/magic at end of file: 759#DWC 760#ARG 761#ZAR 762#PC/3270 763#InstallIt 764#RKive 765#RK 766#XPack Diskimage 767 768# These were inspired by idarc, but actually verified 769# Dzip archiver (.dz) 770# Update: Joerg Jenderek 771# URL: http://speeddemosarchive.com/dzip/ 772# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c 773# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt 7740 string DZ 775# latest version is 2.9 dated 7 may 2003 776>2 byte <4 Dzip archive data 777!:mime application/x-dzip 778!:ext dz 779>>2 byte x \b, version %i 780>>3 byte x \b.%i 781>>4 ulelong x \b, offset 0x%x 782>>8 ulelong x \b, %u files 783# ZZip archiver (.zz) 7840 string ZZ\ \0\0 ZZip archive data 7850 string ZZ0 ZZip archive data 786# PAQ archiver (.paq) 7870 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data 7880 string PAQ PAQ archive data 789>3 byte&0xf0 0x30 790>>3 byte x (v%c) 791# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP) 7920xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data 7930 string JARCS JAR (ARJ Software, Inc.) archive data 794 795# ARJ archiver (jason@jarthur.Claremont.EDU) 7960 leshort 0xea60 ARJ archive data 797!:mime application/x-arj 798>5 byte x \b, v%d, 799>8 byte &0x04 multi-volume, 800>8 byte &0x10 slash-switched, 801>8 byte &0x20 backup, 802>34 string x original name: %s, 803>7 byte 0 os: MS-DOS 804>7 byte 1 os: PRIMOS 805>7 byte 2 os: Unix 806>7 byte 3 os: Amiga 807>7 byte 4 os: Macintosh 808>7 byte 5 os: OS/2 809>7 byte 6 os: Apple ][ GS 810>7 byte 7 os: Atari ST 811>7 byte 8 os: NeXT 812>7 byte 9 os: VAX/VMS 813>3 byte >0 %d] 814# [JW] idarc says this is also possible 8152 leshort 0xea60 ARJ archive data 816 817# HA archiver (Greg Roelofs, newt@uchicago.edu) 818# This is a really bad format. A file containing HAWAII will match this... 819#0 string HA HA archive data, 820#>2 leshort =1 1 file, 821#>2 leshort >1 %hu files, 822#>4 byte&0x0f =0 first is type CPY 823#>4 byte&0x0f =1 first is type ASC 824#>4 byte&0x0f =2 first is type HSC 825#>4 byte&0x0f =0x0e first is type DIR 826#>4 byte&0x0f =0x0f first is type SPECIAL 827# suggestion: at least identify small archives (<1024 files) 8280 belong&0xffff00fc 0x48410000 HA archive data 829>2 leshort =1 1 file, 830>2 leshort >1 %u files, 831>4 byte&0x0f =0 first is type CPY 832>4 byte&0x0f =1 first is type ASC 833>4 byte&0x0f =2 first is type HSC 834>4 byte&0x0f =0x0e first is type DIR 835>4 byte&0x0f =0x0f first is type SPECIAL 836 837# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz) 8380 string HPAK HPACK archive data 839 840# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net 8410 string \351,\001JAM\ JAM archive, 842>7 string >\0 version %.4s 843>0x26 byte =0x27 - 844>>0x2b string >\0 label %.11s, 845>>0x27 lelong x serial %08x, 846>>0x36 string >\0 fstype %.8s 847 848# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) 849# Update: Joerg Jenderek 850# URL: https://en.wikipedia.org/wiki/LHA_(file_format) 851# Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html 852# 853# check and display information of lharc (LHa,PMarc) file 8540 name lharc-file 855# check 1st character of method id like -lz4- -lh5- or -pm2- 856>2 string - 857# check 5th character of method id 858>>6 string - 859# check header level 0 1 2 3 860>>>20 ubyte <4 861# check 2nd, 3th and 4th character of method id 862>>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b 863!:mime application/x-lzh-compressed 864# creator type "LHA " 865!:apple ????LHA 866# display archive type name like "LHa/LZS archive data" or "LArc archive" 867>>>>>2 string -lz \b 868!:ext lzs 869# already known -lzs- -lz4- -lz5- with old names 870>>>>>>2 string -lzs LHa/LZS archive data 871>>>>>>3 regex \^lz[45] LHarc 1.x archive data 872# missing -lz?- with wikipedia names 873>>>>>>3 regex \^lz[2378] LArc archive 874# display archive type name like "LHa (2.x) archive data" 875>>>>>2 string -lh \b 876# already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names 877>>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data 878# LHice archiver use ".ICE" as name extension instead usual one ".lzh" 879# FOOBAR archiver use ".foo" as name extension instead usual one 880# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment 881>>>>>>>2 string -lh1 \b 882!:ext lha/lzh/ice 883>>>>>>3 regex \^lh[23d] LHa 2.x? archive data 884>>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data 885>>>>>>3 regex \^lh[456] LHa (2.x) archive data 886>>>>>>>2 string -lh5 \b 887# https://en.wikipedia.org/wiki/BIOS 888# Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like 889# bios.rom , kd7_v14.bin, 1010.004, ... 890!:ext lha/lzh/rom/bin 891# missing -lh?- variants (Joe Jared) 892>>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive 893# UNLHA32 2.67a 894>>>>>>2 string -lhx LHa (UNLHA32) archive 895# lha archives with standard file name extensions ".lha" ".lzh" 896>>>>>>3 regex !\^(lh1|lh5) \b 897!:ext lha/lzh 898# this should not happen if all -lh variants are described 899>>>>>>2 default x LHa (unknown) archive 900#!:ext lha 901# PMarc 902>>>>>3 regex \^pm[012] PMarc archive data 903!:ext pma 904# append method id without leading and trailing minus character 905>>>>>3 string x [%3.3s] 906>>>>>>0 use lharc-header 907# 908# check and display information of lharc header 9090 name lharc-header 910# header size 0x4 , 0x1b-0x61 911>0 ubyte x 912# compressed data size != compressed file size 913#>7 ulelong x \b, data size %d 914# attribute: 0x2~?? 0x10~symlink|target 0x20~normal 915#>19 ubyte x \b, 19_0x%x 916# level identifier 0 1 2 3 917#>20 ubyte x \b, level %d 918# time stamp 919#>15 ubelong x DATE 0x%8.8x 920# OS ID for level 1 921>20 ubyte 1 922# 0x20 types find for *.rom files 923>>(21.b+24) ubyte <0x21 \b, 0x%x OS 924# ascii type like M for MSDOS 925>>(21.b+24) ubyte >0x20 \b, '%c' OS 926# OS ID for level 2 927>20 ubyte 2 928#>>23 ubyte x \b, OS ID 0x%x 929>>23 ubyte <0x21 \b, 0x%x OS 930>>23 ubyte >0x20 \b, '%c' OS 931# filename only for level 0 and 1 932>20 ubyte <2 933# length of filename 934>>21 ubyte >0 \b, with 935# filename 936>>>21 pstring x "%s" 937# 938#2 string -lh0- LHarc 1.x/ARX archive data [lh0] 939#!:mime application/x-lharc 9402 string -lh0- 941>0 use lharc-file 942#2 string -lh1- LHarc 1.x/ARX archive data [lh1] 943#!:mime application/x-lharc 9442 string -lh1- 945>0 use lharc-file 946# NEW -lz2- ... -lz8- 9472 string -lz2- 948>0 use lharc-file 9492 string -lz3- 950>0 use lharc-file 9512 string -lz4- 952>0 use lharc-file 9532 string -lz5- 954>0 use lharc-file 9552 string -lz7- 956>0 use lharc-file 9572 string -lz8- 958>0 use lharc-file 959# [never seen any but the last; -lh4- reported in comp.compression:] 960#2 string -lzs- LHa/LZS archive data [lzs] 9612 string -lzs- 962>0 use lharc-file 963# According to wikipedia and others such a version does not exist 964#2 string -lh\40- LHa 2.x? archive data [lh ] 965#2 string -lhd- LHa 2.x? archive data [lhd] 9662 string -lhd- 967>0 use lharc-file 968#2 string -lh2- LHa 2.x? archive data [lh2] 9692 string -lh2- 970>0 use lharc-file 971#2 string -lh3- LHa 2.x? archive data [lh3] 9722 string -lh3- 973>0 use lharc-file 974#2 string -lh4- LHa (2.x) archive data [lh4] 9752 string -lh4- 976>0 use lharc-file 977#2 string -lh5- LHa (2.x) archive data [lh5] 9782 string -lh5- 979>0 use lharc-file 980#2 string -lh6- LHa (2.x) archive data [lh6] 9812 string -lh6- 982>0 use lharc-file 983#2 string -lh7- LHa (2.x)/LHark archive data [lh7] 9842 string -lh7- 985# !:mime application/x-lha 986# >20 byte x - header level %d 987>0 use lharc-file 988# NEW -lh8- ... -lhe- , -lhx- 9892 string -lh8- 990>0 use lharc-file 9912 string -lh9- 992>0 use lharc-file 9932 string -lha- 994>0 use lharc-file 9952 string -lhb- 996>0 use lharc-file 9972 string -lhc- 998>0 use lharc-file 9992 string -lhe- 1000>0 use lharc-file 10012 string -lhx- 1002>0 use lharc-file 1003# taken from idarc [JW] 10042 string -lZ PUT archive data 1005# already done by LHarc magics 1006# this should never happen if all sub types of LZS archive are identified 1007#2 string -lz LZS archive data 10082 string -sw1- Swag archive data 1009 10100 name rar-file-header 1011>24 byte 15 \b, v1.5 1012>24 byte 20 \b, v2.0 1013>24 byte 29 \b, v4 1014>15 byte 0 \b, os: MS-DOS 1015>15 byte 1 \b, os: OS/2 1016>15 byte 2 \b, os: Win32 1017>15 byte 3 \b, os: Unix 1018>15 byte 4 \b, os: Mac OS 1019>15 byte 5 \b, os: BeOS 1020 10210 name rar-archive-header 1022>3 leshort&0x1ff >0 \b, flags: 1023>>3 leshort &0x01 ArchiveVolume 1024>>3 leshort &0x02 Commented 1025>>3 leshort &0x04 Locked 1026>>3 leshort &0x10 NewVolumeNaming 1027>>3 leshort &0x08 Solid 1028>>3 leshort &0x20 Authenticated 1029>>3 leshort &0x40 RecoveryRecordPresent 1030>>3 leshort &0x80 EncryptedBlockHeader 1031>>3 leshort &0x100 FirstVolume 1032 1033# RAR (Roshal Archive) archive 10340 string Rar!\x1a\7\0 RAR archive data 1035!:mime application/x-rar 1036!:ext rar/cbr 1037# file header 1038>(0xc.l+9) byte 0x74 1039>>(0xc.l+7) use rar-file-header 1040# subblock seems to share information with file header 1041>(0xc.l+9) byte 0x7a 1042>>(0xc.l+7) use rar-file-header 1043>9 byte 0x73 1044>>7 use rar-archive-header 1045 10460 string Rar!\x1a\7\1\0 RAR archive data, v5 1047!:mime application/x-rar 1048!:ext rar 1049 1050# Very old RAR archive 1051# https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf 10520 string RE\x7e\x5e RAR archive data (<v1.5) 1053!:mime application/x-rar 1054!:ext rar/cbr 1055 1056# SQUISH archiver (Greg Roelofs, newt@uchicago.edu) 10570 string SQSH squished archive data (Acorn RISCOS) 1058 1059# UC2 archiver (Greg Roelofs, newt@uchicago.edu) 1060# [JW] see exe section for self-extracting version 10610 string UC2\x1a UC2 archive data 1062 1063# PKZIP multi-volume archive 10640 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract 1065!:mime application/zip 1066!:ext zip/cbz 1067 1068# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 10690 string PK\005\006 Zip archive data (empty) 1070!:mime application/zip 1071!:ext zip/cbz 1072!:strength +1 10730 string PK\003\004 1074!:strength +1 1075 1076# Specialised zip formats which start with a member named 'mimetype' 1077# (stored uncompressed, with no 'extra field') containing the file's MIME type. 1078# Check for have 8-byte name, 0-byte extra field, name "mimetype", and 1079# contents starting with "application/": 1080>26 string \x8\0\0\0mimetypeapplication/ 1081 1082# KOffice / OpenOffice & StarOffice / OpenDocument formats 1083# From: Abel Cheung <abel@oaka.org> 1084 1085# KOffice (1.2 or above) formats 1086# (mimetype contains "application/vnd.kde.<SUBTYPE>") 1087>>50 string vnd.kde. KOffice (>=1.2) 1088>>>58 string karbon Karbon document 1089>>>58 string kchart KChart document 1090>>>58 string kformula KFormula document 1091>>>58 string kivio Kivio document 1092>>>58 string kontour Kontour document 1093>>>58 string kpresenter KPresenter document 1094>>>58 string kspread KSpread document 1095>>>58 string kword KWord document 1096 1097# OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7) 1098# (mimetype contains "application/vnd.sun.xml.<SUBTYPE>") 1099>>50 string vnd.sun.xml. OpenOffice.org 1.x 1100>>>62 string writer Writer 1101>>>>68 byte !0x2e document 1102>>>>68 string .template template 1103>>>>68 string .global global document 1104>>>62 string calc Calc 1105>>>>66 byte !0x2e spreadsheet 1106>>>>66 string .template template 1107>>>62 string draw Draw 1108>>>>66 byte !0x2e document 1109>>>>66 string .template template 1110>>>62 string impress Impress 1111>>>>69 byte !0x2e presentation 1112>>>>69 string .template template 1113>>>62 string math Math document 1114>>>62 string base Database file 1115 1116# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) 1117# https://lists.oasis-open.org/archives/office/200505/msg00006.html 1118# (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>") 1119>>50 string vnd.oasis.opendocument. OpenDocument 1120>>>73 string text 1121>>>>77 byte !0x2d Text 1122!:mime application/vnd.oasis.opendocument.text 1123>>>>77 string -template Text Template 1124!:mime application/vnd.oasis.opendocument.text-template 1125>>>>77 string -web HTML Document Template 1126!:mime application/vnd.oasis.opendocument.text-web 1127>>>>77 string -master Master Document 1128!:mime application/vnd.oasis.opendocument.text-master 1129>>>73 string graphics 1130>>>>81 byte !0x2d Drawing 1131!:mime application/vnd.oasis.opendocument.graphics 1132>>>>81 string -template Template 1133!:mime application/vnd.oasis.opendocument.graphics-template 1134>>>73 string presentation 1135>>>>85 byte !0x2d Presentation 1136!:mime application/vnd.oasis.opendocument.presentation 1137>>>>85 string -template Template 1138!:mime application/vnd.oasis.opendocument.presentation-template 1139>>>73 string spreadsheet 1140>>>>84 byte !0x2d Spreadsheet 1141!:mime application/vnd.oasis.opendocument.spreadsheet 1142>>>>84 string -template Template 1143!:mime application/vnd.oasis.opendocument.spreadsheet-template 1144>>>73 string chart 1145>>>>78 byte !0x2d Chart 1146!:mime application/vnd.oasis.opendocument.chart 1147>>>>78 string -template Template 1148!:mime application/vnd.oasis.opendocument.chart-template 1149>>>73 string formula 1150>>>>80 byte !0x2d Formula 1151!:mime application/vnd.oasis.opendocument.formula 1152>>>>80 string -template Template 1153!:mime application/vnd.oasis.opendocument.formula-template 1154>>>73 string database Database 1155!:mime application/vnd.oasis.opendocument.database 1156# Valid for LibreOffice Base 6.0.1.1 at least 1157>>>73 string base Database 1158!:mime application/vnd.oasis.opendocument.base 1159>>>73 string image 1160>>>>78 byte !0x2d Image 1161!:mime application/vnd.oasis.opendocument.image 1162>>>>78 string -template Template 1163!:mime application/vnd.oasis.opendocument.image-template 1164 1165# EPUB (OEBPS) books using OCF (OEBPS Container Format) 1166# https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. 1167# From: Ralf Brown <ralf.brown@gmail.com> 1168>>50 string epub+zip EPUB document 1169!:mime application/epub+zip 1170 1171# Catch other ZIP-with-mimetype formats 1172# In a ZIP file, the bytes immediately after a member's contents are 1173# always "PK". The 2 regex rules here print the "mimetype" member's 1174# contents up to the first 'P'. Luckily, most MIME types don't contain 1175# any capital 'P's. This is a kludge. 1176# (mimetype contains "application/<OTHER>") 1177>>50 string !epub+zip 1178>>>50 string !vnd.oasis.opendocument. 1179>>>>50 string !vnd.sun.xml. 1180>>>>>50 string !vnd.kde. 1181>>>>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) 1182!:mime application/zip 1183# (mimetype contents other than "application/*") 1184>26 string \x8\0\0\0mimetype 1185>>38 string !application/ 1186>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) 1187!:mime application/zip 1188 1189# Java Jar files 1190>(26.s+30) leshort 0xcafe Java archive data (JAR) 1191!:mime application/java-archive 1192 1193# iOS App 1194>(26.s+30) leshort !0xcafe 1195>>26 string !\x8\0\0\0mimetype 1196>>>30 string Payload/ 1197>>>>38 search/64 .app/ iOS App 1198!:mime application/x-ios-app 1199 1200 1201# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 1202# Next line excludes specialized formats: 1203>(26.s+30) leshort !0xcafe 1204>>26 string !\x8\0\0\0mimetype Zip archive data 1205!:mime application/zip 1206>>>4 beshort x \b, at least 1207>>>4 use zipversion 1208>>>4 beshort x to extract 1209>>>0x161 string WINZIP \b, WinZIP self-extracting 1210 1211# StarView Metafile 1212# From Pierre Ducroquet <pinaraf@pinaraf.info> 12130 string VCLMTF StarView MetaFile 1214>6 beshort x \b, version %d 1215>8 belong x \b, size %d 1216 1217# Zoo archiver 121820 lelong 0xfdc4a7dc Zoo archive data 1219!:mime application/x-zoo 1220>4 byte >48 \b, v%c. 1221>>6 byte >47 \b%c 1222>>>7 byte >47 \b%c 1223>32 byte >0 \b, modify: v%d 1224>>33 byte x \b.%d+ 1225>42 lelong 0xfdc4a7dc \b, 1226>>70 byte >0 extract: v%d 1227>>>71 byte x \b.%d+ 1228 1229# Shell archives 123010 string #\ This\ is\ a\ shell\ archive shell archive text 1231!:mime application/octet-stream 1232 1233# 1234# LBR. NB: May conflict with the questionable 1235# "binary Computer Graphics Metafile" format. 1236# 12370 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data 1238# 1239# PMA (CP/M derivative of LHA) 1240# Update: Joerg Jenderek 1241# URL: https://en.wikipedia.org/wiki/LHA_(file_format) 1242# 1243#2 string -pm0- PMarc archive data [pm0] 12442 string -pm0- 1245>0 use lharc-file 1246#2 string -pm1- PMarc archive data [pm1] 12472 string -pm1- 1248>0 use lharc-file 1249#2 string -pm2- PMarc archive data [pm2] 12502 string -pm2- 1251>0 use lharc-file 12522 string -pms- PMarc SFX archive (CP/M, DOS) 1253#!:mime application/x-foobar-exec 1254!:ext com 12555 string -pc1- PopCom compressed executable (CP/M) 1256#!:mime application/x- 1257#!:ext com 1258 1259# From Rafael Laboissiere <rafael@laboissiere.net> 1260# The Project Revision Control System (see 1261# http://prcs.sourceforge.net) generates a packaged project 1262# file which is recognized by the following entry: 12630 leshort 0xeb81 PRCS packaged project 1264 1265# Microsoft cabinets 1266# by David Necas (Yeti) <yeti@physics.muni.cz> 1267#0 string MSCF\0\0\0\0 Microsoft cabinet file data, 1268#>25 byte x v%d 1269#>24 byte x \b.%d 1270# MPi: All CABs have version 1.3, so this is pointless. 1271# Better magic in debian-additions. 1272 1273# GTKtalog catalogs 1274# by David Necas (Yeti) <yeti@physics.muni.cz> 12754 string gtktalog\ GTKtalog catalog data, 1276>13 string 3 version 3 1277>>14 beshort 0x677a (gzipped) 1278>>14 beshort !0x677a (not gzipped) 1279>13 string >3 version %s 1280 1281############################################################################ 1282# Parity archive reconstruction file, the 'par' file format now used on Usenet. 12830 string PAR\0 PARity archive data 1284>48 leshort =0 - Index file 1285>48 leshort >0 - file number %d 1286 1287# Felix von Leitner <felix-file@fefe.de> 12880 string d8:announce BitTorrent file 1289!:mime application/x-bittorrent 1290# Durval Menezes, <jmgthbfile at durval dot com> 12910 string d13:announce-list BitTorrent file 1292!:mime application/x-bittorrent 1293 1294# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi> 12950 beshort 0x0e0f Atari MSA archive data 1296>2 beshort x \b, %d sectors per track 1297>4 beshort 0 \b, 1 sided 1298>4 beshort 1 \b, 2 sided 1299>6 beshort x \b, starting track: %d 1300>8 beshort x \b, ending track: %d 1301 1302# Alternate ZIP string (amc@arwen.cs.berkeley.edu) 13030 string PK00PK\003\004 Zip archive data 1304!:mime application/zip 1305!:ext zip/cbz 1306 1307# ACE archive (from http://www.wotsit.org/download.asp?f=ace) 1308# by Stefan `Sec` Zehl <sec@42.org> 13097 string **ACE** ACE archive data 1310>15 byte >0 version %d 1311>16 byte =0x00 \b, from MS-DOS 1312>16 byte =0x01 \b, from OS/2 1313>16 byte =0x02 \b, from Win/32 1314>16 byte =0x03 \b, from Unix 1315>16 byte =0x04 \b, from MacOS 1316>16 byte =0x05 \b, from WinNT 1317>16 byte =0x06 \b, from Primos 1318>16 byte =0x07 \b, from AppleGS 1319>16 byte =0x08 \b, from Atari 1320>16 byte =0x09 \b, from Vax/VMS 1321>16 byte =0x0A \b, from Amiga 1322>16 byte =0x0B \b, from Next 1323>14 byte x \b, version %d to extract 1324>5 leshort &0x0080 \b, multiple volumes, 1325>>17 byte x \b (part %d), 1326>5 leshort &0x0002 \b, contains comment 1327>5 leshort &0x0200 \b, sfx 1328>5 leshort &0x0400 \b, small dictionary 1329>5 leshort &0x0800 \b, multi-volume 1330>5 leshort &0x1000 \b, contains AV-String 1331>>30 string \x16*UNREGISTERED\x20VERSION* (unregistered) 1332>5 leshort &0x2000 \b, with recovery record 1333>5 leshort &0x4000 \b, locked 1334>5 leshort &0x8000 \b, solid 1335# Date in MS-DOS format (whatever that is) 1336#>18 lelong x Created on 1337 1338# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann 1339# <doj@cubic.org> 13400x1A string sfArk sfArk compressed Soundfont 1341>0x15 string 2 1342>>0x1 string >\0 Version %s 1343>>0x2A string >\0 : %s 1344 1345# DR-DOS 7.03 Packed File *.??_ 13460 string Packed\ File\ Personal NetWare Packed File 1347>12 string x \b, was "%.12s" 1348 1349# EET archive 1350# From: Tilman Sauerbeck <tilman@code-monkey.de> 13510 belong 0x1ee7ff00 EET archive 1352!:mime application/x-eet 1353 1354# rzip archives 13550 string RZIP rzip compressed data 1356>4 byte x - version %d 1357>5 byte x \b.%d 1358>6 belong x (%d bytes) 1359 1360# From: Joerg Jenderek 1361# URL: https://help.foxitsoftware.com/kb/install-fzip-file.php 1362# reference: http://mark0.net/download/triddefs_xml.7z/ 1363# defs/f/fzip.trid.xml 1364# Note: unknown compression; No "PK" zip magic; normally in directory like 1365# "%APPDATA%\Foxit Software\Addon\Foxit Reader\Install" 13660 ubequad 0x2506781901010000 Foxit add-on/update 1367!:mime application/x-fzip 1368!:ext fzip 1369 1370# From: "Robert Dale" <robdale@gmail.com> 13710 belong 123 dar archive, 1372>4 belong x label "%.8x 1373>>8 belong x %.8x 1374>>>12 beshort x %.4x" 1375>14 byte 0x54 end slice 1376>14 beshort 0x4e4e multi-part 1377>14 beshort 0x4e53 multi-part, with -S 1378 1379# Symbian installation files 1380# https://www.thouky.co.uk/software/psifs/sis.html 1381# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf 13828 lelong 0x10000419 Symbian installation file 1383!:mime application/vnd.symbian.install 1384>4 lelong 0x1000006D (EPOC release 3/4/5) 1385>4 lelong 0x10003A12 (EPOC release 6) 13860 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x) 1387!:mime x-epoc/x-sisx-app 1388 1389# From "Nelson A. de Oliveira" <naoliv@gmail.com> 13900 string MPQ\032 MoPaQ (MPQ) archive 1391 1392# From: "Nelson A. de Oliveira" <naoliv@gmail.com> 1393# .kgb 13940 string KGB_arch KGB Archiver file 1395>10 string x with compression level %.1s 1396 1397# xar (eXtensible ARchiver) archive 1398# URL: https://en.wikipedia.org/wiki/Xar_(archiver) 1399# xar archive format: https://code.google.com/p/xar/ 1400# From: "David Remahl" <dremahl@apple.com> 1401# Update: Joerg Jenderek 1402# TODO: lzma compression; X509Data for pkg and xip 1403# Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or 1404# 7z t -txar Xcode_10.2_beta_4.xip` 14050 string xar! xar archive 1406!:mime application/x-xar 1407# pkg for Mac OSX installer package like FullBundleUpdate.pkg 1408# xip for signed Apple software like Xcode_10.2_beta_4.xip 1409!:ext xar/pkg/xip 1410# always 28 in older archives 1411>4 ubeshort >28 \b, header size %u 1412# currently there exit only version 1 since about 2014 1413>6 ubeshort >1 version %u, 1414>8 ubequad x compressed TOC: %llu, 1415#>16 ubequad x uncompressed TOC: %llu, 1416# cksum_alg 0-2 in older and also 3-4 in newer 1417>24 belong 0 no checksum 1418>24 belong 1 SHA-1 checksum 1419>24 belong 2 MD5 checksum 1420>24 belong 3 SHA-256 checksum 1421>24 belong 4 SHA-512 checksum 1422>24 belong >4 unknown 0x%x checksum 1423#>24 belong >4 checksum 1424# For no compression jump 0 bytes 1425>24 belong 0 1426>>0 ubyte x 1427# jump more bytes forward by header size 1428>>>&(4.S) ubyte x 1429# jump more bytes forward by compressed table of contents size 1430#>>>>&(8.Q) ubequad x \b, heap data 0x%llx 1431>>>>&(8.Q) ubyte x 1432# look for data by ./compress after message with 1 space at end 1433>>>>>&-3 indirect x \b, contains 1434# For SHA-1 jump 20 minus 2 bytes 1435>24 belong 1 1436>>18 ubyte x 1437# jump more bytes forward by header size 1438>>>&(4.S) ubyte x 1439# jump more bytes forward by compressed table of contents size 1440>>>>&(8.Q) ubyte x 1441# data compressed by gzip, bzip, lzma or none 1442>>>>>&-1 indirect x \b, contains 1443# For SHA-256 jump 32 minus 2 bytes 1444>24 belong 3 1445>>30 ubyte x 1446# jump more bytes forward by header size 1447>>>&(4.S) ubyte x 1448# jump more bytes forward by compressed table of contents size 1449>>>>&(8.Q) ubyte x 1450>>>>>&-1 indirect x \b, contains 1451# For SHA-512 jump 64 minus 2 bytes 1452>24 belong 4 1453>>62 ubyte x 1454# jump more bytes forward by header size 1455>>>&(4.S) ubyte x 1456# jump more bytes forward by compressed table of contents size 1457>>>>&(8.Q) ubyte x 1458>>>>>&-1 indirect x \b, contains 1459 1460# Type: Parity Archive 1461# From: Daniel van Eeden <daniel_e@dds.nl> 14620 string PAR2 Parity Archive Volume Set 1463 1464# Bacula volume format. (Volumes always start with a block header.) 1465# URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html 1466# From: Adam Buchbinder <adam.buchbinder@gmail.com> 146712 string BB02 Bacula volume 1468>20 bedate x \b, started %s 1469 1470# ePub is XHTML + XML inside a ZIP archive. The first member of the 1471# archive must be an uncompressed file called 'mimetype' with contents 1472# 'application/epub+zip' 1473 1474 1475# From: "Michael Gorny" <mgorny@gentoo.org> 1476# ZPAQ: http://mattmahoney.net/dc/zpaq.html 14770 string zPQ ZPAQ stream 1478>3 byte x \b, level %d 1479# From: Barry Carter <carter.barry@gmail.com> 1480# https://encode.ru/threads/456-zpaq-updates/page32 14810 string 7kSt ZPAQ file 1482 1483# BBeB ebook, unencrypted (LRF format) 1484# URL: https://www.sven.de/librie/Librie/LrfFormat 1485# From: Adam Buchbinder <adam.buchbinder@gmail.com> 14860 string L\0R\0F\0\0\0 BBeB ebook data, unencrypted 1487>8 beshort x \b, version %d 1488>36 byte 1 \b, front-to-back 1489>36 byte 16 \b, back-to-front 1490>42 beshort x \b, (%dx, 1491>44 beshort x %d) 1492 1493# Symantec GHOST image by Joerg Jenderek at May 2014 1494# https://us.norton.com/ghost/ 1495# https://www.garykessler.net/library/file_sigs.html 14960 ubelong&0xFFFFf7f0 0xFEEF0100 Norton GHost image 1497# *.GHO 1498>2 ubyte&0x08 0x00 \b, first file 1499# *.GHS or *.[0-9] with cns program option 1500>2 ubyte&0x08 0x08 \b, split file 1501# part of split index interesting for *.ghs 1502>>4 ubyte x id=0x%x 1503# compression tag minus one equals numeric compression command line switch z[1-9] 1504>3 ubyte 0 \b, no compression 1505>3 ubyte 2 \b, fast compression (Z1) 1506>3 ubyte 3 \b, medium compression (Z2) 1507>3 ubyte >3 1508>>3 ubyte <11 \b, compression (Z%d-1) 1509>2 ubyte&0x08 0x00 1510# ~ 30 byte password field only for *.gho 1511>>12 ubequad !0 \b, password protected 1512>>44 ubyte !1 1513# 1~Image All, sector-by-sector only for *.gho 1514>>>10 ubyte 1 \b, sector copy 1515# 1~Image Boot track only for *.gho 1516>>>43 ubyte 1 \b, boot track 1517# 1~Image Disc only for *.gho implies Image Boot track and sector copy 1518>>44 ubyte 1 \b, disc sector copy 1519# optional image description only *.gho 1520>>0xff string >\0 "%-.254s" 1521# look for DOS sector end sequence 1522>0xE08 search/7776 \x55\xAA 1523>>&-512 indirect x \b; contains 1524 1525# Google Chrome extensions 1526# https://developer.chrome.com/extensions/crx 1527# https://developer.chrome.com/extensions/hosting 15280 string Cr24 Google Chrome extension 1529!:mime application/x-chrome-extension 1530>4 ulong x \b, version %u 1531 1532# SeqBox - Sequenced container 1533# ext: sbx, seqbox 1534# Marco Pontello marcopon@gmail.com 1535# reference: https://github.com/MarcoPon/SeqBox 15360 string SBx SeqBox, 1537>3 byte x version %d 1538 1539# LyNX archive 154056 string USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE LyNX archive 1541 1542# From: Joerg Jenderek 1543# URL: https://www.acronis.com/ 1544# Reference: https://en.wikipedia.org/wiki/TIB_(file_format) 1545# Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110 15460 ubequad 0xce24b9a220000000 Acronis True Image backup 1547!:mime application/x-acronis-tib 1548!:ext tib 1549# 01000000 1550#>20 ubelong x \b, at 20 0x%x 1551# 20000000 1552#>28 ubelong x \b, at 28 0x%x 1553# strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0" 1554# ??? 1555# strings like "\Device\0000011e" "\Device\0000015a" 1556#>0 search/0x6852300/cs \\Device\\ 1557#>>&-1 pstring x \b, %s 1558# "\Device\HarddiskVolume30" "\Device\HarddiskVolume39" 1559#>>>&1 search/180/cs \\Device\\ 1560#>>>>&-1 pstring x \b, %s 1561#>>>>>&0 search/29/cs \0\0\xc8\0 1562# disk label 1563#>>>>>>&10 lestring16 x \b, disk label %11.11s 1564#>>>>>>&9 plestring16 x \b, disk label "%11.11s" 1565#>>>>>>&10 ubequad x %16.16llx 1566 1567 1568# Gentoo XPAK binary package 1569# by Michal Gorny <mgorny@gentoo.org> 1570# https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5 1571-4 string STOP 1572>-16 string XPAKSTOP Gentoo binary package (XPAK) 1573 1574# From: Joerg Jenderek 1575# URL: https://kodi.wiki/view/TexturePacker 1576# Reference: https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz 1577# /xbmc-Krypton/xbmc/guilib/XBTF.h 1578# /xbmc-Krypton/xbmc/guilib/XBTF.cpp 15790 string XBTF 1580# skip ASCII text by looking for terminating \0 of path 1581>264 ubyte 0 XBMC texture package 1582!:mime application/x-xbmc-xbt 1583!:ext xbt 1584# XBTF_VERSION 2 1585>>4 string !2 \b, version %-.1s 1586# nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp 1587>>5 ulelong x \b, %u file 1588# plural s 1589>>5 ulelong >1 \bs 1590# path[CXBTFFile[MaximumPathLength=256] 1591>>9 string x \b, 1st %s 1592 1593