1#------------------------------------------------------------------------------ 2# $File: archive,v 1.162 2022/05/27 21:27:59 christos Exp $ 3# archive: file(1) magic for archive formats (see also "msdos" for self- 4# extracting compressed archives) 5# 6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc. 7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c. 8 9# POSIX tar archives 10# URL: https://en.wikipedia.org/wiki/Tar_(computing) 11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current 12# header mainly padded with nul bytes 13500 quad 0 14!:strength /2 15# filename or extended attribute printable strings in range space null til umlaut ue 16>0 ubeshort >0x1F00 17>>0 ubeshort <0xFCFD 18# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad 19# at https://sourceforge.net/projects/s-tar/files/testscripts/ 20>>>508 ubelong&0x8B9E8DFF 0 21# nul, space or ascii digit 0-7 at start of mode 22>>>>100 ubyte&0xC8 =0 23>>>>>101 ubyte&0xC8 =0 24# nul, space at end of check sum 25>>>>>>155 ubyte&0xDF =0 26# space or ascii digit 0 at start of check sum 27>>>>>>>148 ubyte&0xEF =0x20 28>>>>>>>>0 use tar-file 29# minimal check and then display tar archive information which can also be 30# embedded inside others like Android Backup, Clam AntiVirus database 310 name tar-file 32>257 string !ustar 33# header padded with nuls 34>>257 ulong =0 35# GNU tar version 1.29 with non pax format option without refusing 36# creates misleading V7 header for Long path, Multi-volume, Volume type 37>>>156 ubyte 0x4c GNU tar archive 38!:mime application/x-gtar 39!:ext tar/gtar 40>>>156 ubyte 0x4d GNU tar archive 41!:mime application/x-gtar 42!:ext tar/gtar 43>>>156 ubyte 0x56 GNU tar archive 44!:mime application/x-gtar 45!:ext tar/gtar 46>>>156 default x tar archive (V7) 47!:mime application/x-tar 48!:ext tar 49# other stuff in padding 50# some implementations add new fields to the blank area at the end of the header record 51# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option 52>>257 ulong !0 tar archive (old) 53!:mime application/x-tar 54!:ext tar 55# magic in newer, GNU, posix variants 56>257 string =ustar 57# 2 last char of magic and UStar version because string expression does not work 58# 2 space characters followed by a null for GNU variant 59>>261 ubelong =0x72202000 POSIX tar archive (GNU) 60!:mime application/x-gtar 61!:ext tar/gtar 62# UStar version with ASCII "00" 63>>261 ubelong 0x72003030 POSIX 64# gLOBAL and ExTENSION type only found in POSIX.1-2001 format 65>>>156 ubyte 0x67 \b.1-2001 66>>>156 ubyte 0x78 \b.1-2001 67>>>156 ubyte x tar archive 68!:mime application/x-ustar 69!:ext tar/ustar 70# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab 71>>261 ubelong 0x72000000 tar archive (ustar) 72!:mime application/x-ustar 73!:ext tar/ustar 74# not seen ustar variant with garbish version 75>>261 default x tar archive (unknown ustar) 76!:mime application/x-ustar 77!:ext tar/ustar 78# type flag of 1st tar archive member 79#>156 ubyte x \b, %c-type 80>156 ubyte x 81>>156 ubyte 0 \b, file 82>>156 ubyte 0x30 \b, file 83>>156 ubyte 0x31 \b, hard link 84>>156 ubyte 0x32 \b, symlink 85>>156 ubyte 0x33 \b, char device 86>>156 ubyte 0x34 \b, block device 87>>156 ubyte 0x35 \b, directory 88>>156 ubyte 0x36 \b, fifo 89>>156 ubyte 0x37 \b, reserved 90>>156 ubyte 0x4c \b, long path 91>>156 ubyte 0x4d \b, multi volume 92>>156 ubyte 0x56 \b, volume 93>>156 ubyte 0x67 \b, global 94>>156 ubyte 0x78 \b, extension 95>>156 default x \b, type 96>>>156 ubyte x '%c' 97# name[100] 98>0 string >\0 %-.60s 99# mode mainly stored as an octal number in ASCII null or space terminated 100>100 string >\0 \b, mode %-.7s 101# user id mainly as octal numbers in ASCII null or space terminated 102>108 string >\0 \b, uid %-.7s 103# group id mainly as octal numbers in ASCII null or space terminated 104>116 string >\0 \b, gid %-.7s 105# size mainly as octal number in ASCII 106>124 ubyte <0x38 107>>124 string >\0 \b, size %-.12s 108# coding indicated by setting the high-order bit of the leftmost byte 109>124 ubyte >0xEF \b, size 0x 110>>124 ubyte !0xff \b%2.2x 111>>125 ubyte !0xff \b%2.2x 112>>126 ubyte !0xff \b%2.2x 113>>127 ubyte !0xff \b%2.2x 114>>128 ubyte !0xff \b%2.2x 115>>129 ubyte !0xff \b%2.2x 116>>130 ubyte !0xff \b%2.2x 117>>131 ubyte !0xff \b%2.2x 118>>132 ubyte !0xff \b%2.2x 119>>133 ubyte !0xff \b%2.2x 120>>134 ubyte !0xff \b%2.2x 121>>135 ubyte !0xff \b%2.2x 122# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated 123>136 string >\0 \b, seconds %-.11s 124# header checksum stored as an octal number in ASCII null or space terminated 125#>148 string x \b, cksum %.7s 126# linkname[100] 127>157 string >\0 \b, linkname %-.40s 128# additional fields for ustar 129>257 string =ustar 130# owner user name null terminated 131>>265 string >\0 \b, user %-.32s 132# group name null terminated 133>>297 string >\0 \b, group %-.32s 134# device major minor if not zero 135>>329 ubequad&0xCFCFCFCFcFcFcFdf !0 136>>>329 string x \b, devmaj %-.7s 137>>337 ubequad&0xCFCFCFCFcFcFcFdf !0 138>>>337 string x \b, devmin %-.7s 139# prefix[155] 140>>345 string >\0 \b, prefix %-.155s 141# old non ustar/POSIX tar 142>257 string !ustar 143>>508 string =tar\0 144# padding[255] in old star 145>>>257 string >\0 \b, padding: %-.40s 146>>508 default x 147# padding[255] in old tar sometimes comment field 148>>>257 string >\0 \b, comment: %-.40s 149 150# Incremental snapshot gnu-tar format from: 151# https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html 1520 string GNU\ tar- GNU tar incremental snapshot data 153>&0 regex [0-9]\\.[0-9]+-[0-9]+ version %s 154 155# cpio archives 156# 157# Yes, the top two "cpio archive" formats *are* supposed to just be "short". 158# The idea is to indicate archives produced on machines with the same 159# byte order as the machine running "file" with "cpio archive", and 160# to indicate archives produced on machines with the opposite byte order 161# from the machine running "file" with "byte-swapped cpio archive". 162# 163# The SVR4 "cpio(4)" hints that there are additional formats, but they 164# are defined as "short"s; I think all the new formats are 165# character-header formats and thus are strings, not numbers. 1660 short 070707 cpio archive 167!:mime application/x-cpio 1680 short 0143561 byte-swapped cpio archive 169!:mime application/x-cpio # encoding: swapped 1700 string 070707 ASCII cpio archive (pre-SVR4 or odc) 171!:mime application/x-cpio 1720 string 070701 ASCII cpio archive (SVR4 with no CRC) 173!:mime application/x-cpio 1740 string 070702 ASCII cpio archive (SVR4 with CRC) 175!:mime application/x-cpio 176 177# 178# Various archive formats used by various versions of the "ar" 179# command. 180# 181 182# 183# Original UNIX archive formats. 184# They were written with binary values in host byte order, and 185# the magic number was a host "int", which might have been 16 bits 186# or 32 bits. We don't say "PDP-11" or "VAX", as there might have 187# been ports to little-endian 16-bit-int or 32-bit-int platforms 188# (x86?) using some of those formats; if none existed, feel free 189# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian 190# 32-bit. There might have been big-endian ports of that sort as 191# well. 192# 1930 leshort 0177555 very old 16-bit-int little-endian archive 1940 beshort 0177555 very old 16-bit-int big-endian archive 1950 lelong 0177555 very old 32-bit-int little-endian archive 1960 belong 0177555 very old 32-bit-int big-endian archive 197 1980 leshort 0177545 old 16-bit-int little-endian archive 199>2 string __.SYMDEF random library 2000 beshort 0177545 old 16-bit-int big-endian archive 201>2 string __.SYMDEF random library 2020 lelong 0177545 old 32-bit-int little-endian archive 203>4 string __.SYMDEF random library 2040 belong 0177545 old 32-bit-int big-endian archive 205>4 string __.SYMDEF random library 206 207# 208# From "pdp" (but why a 4-byte quantity?) 209# 2100 lelong 0x39bed PDP-11 old archive 2110 lelong 0x39bee PDP-11 4.0 archive 212 213# 214# XXX - what flavor of APL used this, and was it a variant of 215# some ar archive format? It's similar to, but not the same 216# as, the APL workspace magic numbers in pdp. 217# 2180 long 0100554 apl workspace 219 220# 221# System V Release 1 portable(?) archive format. 222# 2230 string =<ar> System V Release 1 ar archive 224!:mime application/x-archive 225 226# 227# Debian package; it's in the portable archive format, and needs to go 228# before the entry for regular portable archives, as it's recognized as 229# a portable archive whose first member has a name beginning with 230# "debian". 231# 232# Update: Joerg Jenderek 233# URL: https://en.wikipedia.org/wiki/Deb_(file_format) 2340 string =!<arch>\ndebian 235# https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html 236>14 string -split part of multipart Debian package 237!:mime application/vnd.debian.binary-package 238# udeb is used for stripped down deb file 239!:ext deb/udeb 240>14 string -binary Debian binary package 241!:mime application/vnd.debian.binary-package 242# For ipk packager see also https://en.wikipedia.org/wiki/Opkg 243!:ext deb/udeb/ipk 244# This should not happen 245>14 default x Unknown Debian package 246# NL terminated version; for most Debian cases this is 2.0 or 2.1 for split 247>68 string >\0 (format %s) 248#>68 string !2.0\n 249#>>68 string x (format %.3s) 250>68 string =2.0\n 251# 2nd archive name=control archive name like control.tar.gz or control.tar.xz 252>>72 string >\0 \b, with %.14s 253# look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma} 254>>0 search/0x93e4f data.tar. \b, data compression 255# the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised 256# for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb 257>>>&0 string x %.2s 258# skip space (0x20 BSD) and slash (0x2f System V) character marking end of name 259>>>&2 ubyte !0x20 260>>>>&-1 ubyte !0x2f 261# display 3rd character of file name extension like 2 of bz2 or m of lzma 262>>>>>&-1 ubyte x \b%c 263>>>>>>&0 ubyte !0x20 264>>>>>>>&-1 ubyte !0x2f 265# display 4th character of file name extension like a of lzma 266>>>>>>>>&-1 ubyte x \b%c 267# split debian package case 268>68 string =2.1\n 269# dpkg-1.18.25/dpkg-split/info.c 270# NL terminated ASCII package name like ckermit 271>>&0 string x \b, %s 272# NL terminated package version like 302-5.3 273>>>&1 string x %s 274# NL terminated MD5 checksum 275>>>>&1 string x \b, MD5 %s 276# NL terminated original package length 277>>>>>&1 string x \b, unsplitted size %s 278# NL terminated part length 279>>>>>>&1 string x \b, part length %s 280# NL terminated package part like n/m 281>>>>>>>&1 string x \b, part %s 282# NL terminated package architecture like armhf since dpkg 1.16.1 or later 283>>>>>>>>&1 string x \b, %s 284 285# 286# MIPS archive; they're in the portable archive format, and need to go 287# before the entry for regular portable archives, as it's recognized as 288# a portable archive whose first member has a name beginning with 289# "__________E". 290# 2910 string =!<arch>\n__________E MIPS archive 292!:mime application/x-archive 293>20 string U with MIPS Ucode members 294>21 string L with MIPSEL members 295>21 string B with MIPSEB members 296>19 string L and an EL hash table 297>19 string B and an EB hash table 298>22 string X -- out of date 299 300# 301# BSD/SVR2-and-later portable archive formats. 302# 303# Update: Joerg Jenderek 304# URL: http://fileformats.archiveteam.org/wiki/AR 305# Reference: https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/ 306# Note: Mach-O universal binary in ./cafebabe is dependent 307# TODO: unify current ar archive, MIPS archive, Debian package 308# distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR; 309# *.ar packages from *.a libraries. handle empty archive 3100 string =!<arch>\n current ar archive 311# print first and possibly second ar_name[16] for debugging purpose 312#>8 string x \b, 1st "%.16s" 313#>68 string x \b, 2nd "%.16s" 314!:mime application/x-archive 315# a in most case for libraries; lib for Microsoft libraries; ar else cases 316!:ext a/lib/ar 317>8 string __.SYMDEF random library 318# first member with long marked name __.SYMDEF SORTED implies BSD library 319>68 string __.SYMDEF\ SORTED random library 320# Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf 321# "archive file" entry moved from ./hp 322# LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture 323# LST header a_magic 0619h~relocatable library 324>68 belong 0x020b0619 - PA-RISC1.0 relocatable library 325>68 belong 0x02100619 - PA-RISC1.1 relocatable library 326>68 belong 0x02110619 - PA-RISC1.2 relocatable library 327>68 belong 0x02140619 - PA-RISC2.0 relocatable library 328#EOF for common ar archives 329 330# 331# "Thin" archive, as can be produced by GNU ar. 332# 3330 string =!<thin>\n thin archive with 334>68 belong 0 no symbol entries 335>68 belong 1 %d symbol entry 336>68 belong >1 %d symbol entries 337 3380 search/1 -h- Software Tools format archive text 339 340# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com) 341# 342# The first byte is the magic (0x1a), byte 2 is the compression type for 343# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS 344# filename of the first file (null terminated). Since some types collide 345# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%), 346# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo. 3470 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW 348!:mime application/x-arc 3490 lelong&0x8080ffff 0x0000091a ARC archive data, squashed 350!:mime application/x-arc 3510 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed 352!:mime application/x-arc 3530 lelong&0x8080ffff 0x0000031a ARC archive data, packed 354!:mime application/x-arc 3550 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed 356!:mime application/x-arc 3570 lelong&0x8080ffff 0x0000061a ARC archive data, crunched 358!:mime application/x-arc 359# [JW] stuff taken from idarc, obviously ARC successors: 3600 lelong&0x8080ffff 0x00000a1a PAK archive data 361!:mime application/x-arc 3620 lelong&0x8080ffff 0x0000141a ARC+ archive data 363!:mime application/x-arc 3640 lelong&0x8080ffff 0x0000481a HYP archive data 365!:mime application/x-arc 366 367# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk) 368# I can't create either SPARK or ArcFS archives so I have not tested this stuff 369# [GRR: the original entries collide with ARC, above; replaced with combined 370# version (not tested)] 371#0 byte 0x1a RISC OS archive (spark format) 3720 string \032archive RISC OS archive (ArcFS format) 3730 string Archive\000 RISC OS archive (ArcFS format) 374 375# All these were taken from idarc, many could not be verified. Unfortunately, 376# there were many low-quality sigs, i.e. easy to trigger false positives. 377# Please notify me of any real-world fishy/ambiguous signatures and I'll try 378# to get my hands on the actual archiver and see if I find something better. [JW] 379# probably many can be enhanced by finding some 0-byte or control char near the start 380 381# idarc calls this Crush/Uncompressed... *shrug* 3820 string CRUSH Crush archive data 383# Squeeze It (.sqz) 3840 string HLSQZ Squeeze It archive data 385# SQWEZ 3860 string SQWEZ SQWEZ archive data 387# HPack (.hpk) 3880 string HPAK HPack archive data 389# HAP 3900 string \x91\x33HF HAP archive data 391# MD/MDCD 3920 string MDmd MDCD archive data 393# LIM 3940 string LIM\x1a LIM archive data 395# SAR 3963 string LH5 SAR archive data 397# BSArc/BS2 3980 string \212\3SB\020\0 BSArc/BS2 archive data 399# Bethesda Softworks Archive (Oblivion) 4000 string BSA\0 BSArc archive data 401>4 lelong x version %d 402# MAR 4032 string =-ah MAR archive data 404# ACB 405#0 belong&0x00f800ff 0x00800000 ACB archive data 406# CPZ 407# TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data 408# JRC 4090 string JRchive JRC archive data 410# Quantum 4110 string DS\0 Quantum archive data 412# ReSOF 4130 string PK\3\6 ReSOF archive data 414# QuArk 4150 string 7\4 QuArk archive data 416# YAC 41714 string YC YAC archive data 418# X1 4190 string X1 X1 archive data 4200 string XhDr X1 archive data 421# CDC Codec (.dqt) 4220 belong&0xffffe000 0x76ff2000 CDC Codec archive data 423# AMGC 4240 string \xad6" AMGC archive data 425# NuLIB 4260 string N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data 427# PakLeo 4280 string LEOLZW PAKLeo archive data 429# ChArc 4300 string SChF ChArc archive data 431# PSA 4320 string PSA PSA archive data 433# CrossePAC 4340 string DSIGDCC CrossePAC archive data 435# Freeze 4360 string \x1f\x9f\x4a\x10\x0a Freeze archive data 437# KBoom 4380 string \xc2\xa8MP\xc2\xa8 KBoom archive data 439# NSQ, must go after CDC Codec 4400 string \x76\xff NSQ archive data 441# DPA 4420 string Dirk\ Paehl DPA archive data 443# BA 444# TODO: idarc says "bytes 0-2 == bytes 3-5" 445# TTComp 446# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive 447# Update: Joerg Jenderek 448# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others 4490 string \0\6 450# look for first keyword of Panorama database *.pan 451>12 search/261 DESIGN 452# skip keyword with low entropy 453>12 default x 454# skip DOS 2.0 backup id file, sequence 6 with many nils like BACKUPID_xx6.@@@ handled by ./msdos 455>>8 quad !0 456>>>0 use ttcomp 457# variant ASCII, 4K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? 4580 string \1\6 459# TODO: 460# skip VAX-order 68k Blit mpx/mux executable (strength=50) handled by ./blit 461!:strength -2 462>0 use ttcomp 4630 string \0\5 464# skip some DOS 2.0 backup id file, sequence 5 with many nils like BACKUPID_075.@@@ handled by ./msdos 465>8 quad !0 466>>0 use ttcomp 4670 string \1\5 468# TODO: 469# variant ASCII, 2K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? 470# skip ctab data (strength=50) handled by ./ibm6000 471# skip locale data table (strength=50) handled by ./digital 472!:strength -2 473>0 use ttcomp 4740 string \0\4 475# skip many Maple help database *.hdb with version tag handled by ./maple 476>1028 string !version 477# skip veclib maple.hdb by looking for Mable keyword 478>>4 search/1091 Maple\040 479#>4 search/34090 Maple\040 480>>4 default x 481# skip DOS 2.0-3.2 backed up sequence 4 with many nils like LOTUS5.RAR handled by ./msdos 482# skip xBASE Compound Index file *.CDX with many nils 483>>>0x54 quad !0 484>>>>0 use ttcomp 4850 string \1\4 486# TODO: 487# skip Commodore PET BASIC 4.0 program *.prg 488# variant ASCII, 1K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? 489# skip shared library (strength=50) handled by ./ibm6000 490!:strength -2 491>0 use ttcomp 492# display information of TTComp archive 4930 name ttcomp 494# (version 5.25) labeled the entry as "TTComp archive data" 495>0 ubyte x TTComp archive data 496!:mime application/x-compress-ttcomp 497# PBACKSCR.PI1 498!:ext $xe/$ts/pi1/__d 499# compression type: 0~binary compression 1~ASCII compression 500>0 ubyte 0 \b, binary 501>0 ubyte 1 \b, ASCII 502# size of the dictionary: 4~1024 bytes 5~2048 bytes 6~4096 bytes 503>1 ubyte 4 \b, 1K 504>1 ubyte 5 \b, 2K 505>1 ubyte 6 \b, 4K 506>1 ubyte x dictionary 507# https://mark0.net/forum/index.php?topic=848 508# last 3 bytes probably have only 8 possible bit sequences 509# xxxxxxxx 0000000x 11111111 ____FFh 510# xxxxxxxx 10000000 01111111 __807Fh 511# 0xxxxxxx 11000000 00111111 __C03Fh 512# 00xxxxxx 11100000 00011111 __E01Fh 513# 000xxxxx 11110000 00001111 __F00Fh 514# 0000xxxx 11111000 00000111 __F807h 515# 00000xxx 11111100 00000011 __FC03h 516# 000000xx 11111110 00000001 __FE01h 517# but for quickgif.__d 0A7DD4h 518#>-3 ubyte x \b, last 3 bytes 0x%2.2x 519#>-2 ubeshort x \b%4.4x 520# From: Joerg Jenderek 521# URL: https://en.wikipedia.org/wiki/Disk_Copy 522# reference: http://nulib.com/library/FTN.e00005.htm 5230x52 ubeshort 0x0100 524# test for disk image size equal or above 400k 525>0x40 ubelong >409599 526# test also for disk image size equal or below 1440k to skip 527# windows7en.mbr UNICODE.DAT 528#>>0x40 ubelong <1474561 529# test now for "low" disk image size equal or below 64 MiB to skip 530# windows7en.mbr (B441BBAAh) UNICODE.DAT (0400AF05h) 531>>0x40 ubelong <0x04000001 532# To skip Flags$StringJoiner.class with size 00106A61h test also for valid disk image sizes 533# 00064000 for 400k GCR disks dc42-400k-gcr.trid.xml 534# 000c8000 for 800k GCR disks dc42-800k-gcr.trid.xml 535# 000b4000 for 720k MFM disks dc42-720k-mfm.trid.xml 536# 00168000 for 1440k MFM disks dc42-1440k-mfm.trid.xml 537# https://lisaem.sunder.net/LisaProjectDocs.txt 538# 00500000 05M available 539# 00A00000 10M available 540# 01800000 24M possible 541# 02000000 32M uncertain 542# 04000000 64M uncertain 543>>>0x40 ubelong&0xf8003fFF 0 544# skip samples with invalid disk name length like: 545# 181 (biosmd80.rom) 202 (Flags$StringJoiner.class) 90 (UNICODE.DAT) 546>>>>0x0 ubyte <64 547>>>>>0 use dc42-floppy 548# display information of Apple DiskCopy 4.2 floppy image 5490 name dc42-floppy 550# disk name length; maximal 63 551#>0 ubyte x DISK NAME LENGTH %u 552# ASCII image pascal (maximal 63 bytes) name padded with NULs like: 553# "Microsoft Mail" "Disquette 2" "IIe Installer Disk" 554# "-lisaem.sunder.net hd-" (dc42-lisaem.trid.xml) "-not a Macintosh disk" (dc42-nonmac.trid.xml) 555>00 pstring/B x Apple DiskCopy 4.2 image %s 556#!:mime application/octet-stream 557!:mime application/x-dc42-floppy-image 558!:apple dCpydImg 559# probably also img like: "Utilitaires 2.img" "Installation 7.img" 560!:ext image/dc42/img 561# data size in bytes like: 409600 737280 819200 1474560 562>0x40 ubelong x \b, %u bytes 563# for debugging purpose size in hexadecimal 564#>0x40 ubelong x (%#8.8x) 565# tag size in bytes like: 0 (often) 2580h (PUID fmt/625) 4B00h (Microsoft Mail.image) 566>0x44 ubelong >0 \b, %#x tag size 567# data checksum 568#>0x48 ubelong x \b, %#x checksum 569# tag checksum 570#>0x4c ubelong x \b, %#x tag checksum 571# disk encoding like: 0 1 2 3 (PUID: fmt/625) 572>0x50 ubyte 0 \b, GCR CLV ssdd (400k) 573>0x50 ubyte 1 \b, GCR CLV dsdd (800k) 574>0x50 ubyte 2 \b, MFM CAV dsdd (720k) 575>0x50 ubyte 3 \b, MFM CAV dshd (1440k) 576>0x50 ubyte >3 \b, %#x encoding 577# format byte like: 12h (Lisa 400K) 24h (400K Macintosh) 96h (800K Apple II disk) 578# 2 (Mac 400k "Disquette Installation 13.image") 579# 22h (double-sided MFM or Mac 800k "Disco 12.image" "IIe Installer Disk.image") 580>0x51 ubyte x \b, %#x format 581#>0x54 ubequad x \b, data %#16.16llx 582# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation? 5830 string ESP ESP archive data 584# ZPack 5850 string \1ZPK\1 ZPack archive data 586# Sky 5870 string \xbc\x40 Sky archive data 588# UFA 5890 string UFA UFA archive data 590# Dry 5910 string =-H2O DRY archive data 592# FoxSQZ 5930 string FOXSQZ FoxSQZ archive data 594# AR7 5950 string ,AR7 AR7 archive data 596# PPMZ 5970 string PPMZ PPMZ archive data 598# MS Compress 599# Update: Joerg Jenderek 600# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression 601# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html 602# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z 6034 string \x88\xf0\x27 604# KWAJ variant 605>0 string KWAJ MS Compress archive data, KWAJ variant 606!:mime application/x-ms-compress-kwaj 607# extension not working in version 5.32 608# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?' 609# file: line 284: Bad magic entry ' ??_' 610!:ext ??_ 611# compression method (0-4) 612>>8 uleshort x \b, %u method 613# offset of compressed data 614>>10 uleshort x \b, %#x offset 615#>>(10.s) uleshort x 616#>>>&-6 string x \b, TEST extension %-.3s 617# header flags to mark header extensions 618>>12 uleshort >0 \b, %#x flags 619# 4 bytes: decompressed length of file 620>>12 uleshort &0x01 621>>>14 ulelong x \b, original size: %u bytes 622# 2 bytes: unknown purpose 623# 2 bytes: length of unknown data + mentioned bytes 624# 1-9 bytes: null-terminated file name 625# 1-4 bytes: null-terminated file extension 626>>12 uleshort &0x08 627>>>12 uleshort ^0x01 628>>>>12 uleshort ^0x02 629>>>>>12 uleshort ^0x04 630>>>>>>12 uleshort ^0x10 631>>>>>>>14 string x \b, %-.8s 632>>>>>>12 uleshort &0x10 633>>>>>>>14 string x \b, %-.8s 634>>>>>>>>&1 string x \b.%-.3s 635>>>>>12 uleshort &0x04 636>>>>>>12 uleshort ^0x10 637>>>>>>>(14.s) uleshort x 638>>>>>>>>&14 string x \b, %-.8s 639>>>>>>12 uleshort &0x10 640>>>>>>>(14.s) uleshort x 641>>>>>>>>&14 string x \b, %-.8s 642>>>>>>>>>&1 string x \b.%-.3s 643>>>>12 uleshort &0x02 644>>>>>12 uleshort ^0x04 645>>>>>>12 uleshort ^0x10 646>>>>>>>16 string x \b, %-.8s 647>>>>>>12 uleshort &0x10 648>>>>>>>16 string x \b, %-.8s 649>>>>>>>>&1 string x \b.%-.3s 650>>>>>12 uleshort &0x04 651>>>>>>12 uleshort ^0x10 652>>>>>>>(16.s) uleshort x 653>>>>>>>>&16 string x \b, %-.8s 654>>>>>>12 uleshort &0x10 655>>>>>>>(16.s) uleshort x 656>>>>>>>&16 string x %-.8s 657>>>>>>>>&1 string x \b.%-.3s 658>>>12 uleshort &0x01 659>>>>12 uleshort ^0x02 660>>>>>12 uleshort ^0x04 661>>>>>>12 uleshort ^0x10 662>>>>>>>18 string x \b, %-.8s 663>>>>>>12 uleshort &0x10 664>>>>>>>18 string x \b, %-.8s 665>>>>>>>>&1 string x \b.%-.3s 666>>>>>12 uleshort &0x04 667>>>>>>12 uleshort ^0x10 668>>>>>>>(18.s) uleshort x 669>>>>>>>>&18 string x \b, %-.8s 670>>>>>>12 uleshort &0x10 671>>>>>>>(18.s) uleshort x 672>>>>>>>>&18 string x \b, %-.8s 673>>>>>>>>>&1 string x \b.%-.3s 674>>>>12 uleshort &0x02 675>>>>>12 uleshort ^0x04 676>>>>>>12 uleshort ^0x10 677>>>>>>>20 string x \b, %-.8s 678>>>>>>12 uleshort &0x10 679>>>>>>>20 string x \b, %-.8s 680>>>>>>>>&1 string x \b.%-.3s 681>>>>>12 uleshort &0x04 682>>>>>>12 uleshort ^0x10 683>>>>>>>(20.s) uleshort x 684>>>>>>>>&20 string x \b, %-.8s 685>>>>>>12 uleshort &0x10 686>>>>>>>(20.s) uleshort x 687>>>>>>>>&20 string x \b, %-.8s 688>>>>>>>>>&1 string x \b.%-.3s 689# 2 bytes: length of data + mentioned bytes 690# 691# SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ 692# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression 693# Reference: http://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html 694# http://mark0.net/download/triddefs_xml.7z/defs/s/szdd.trid.xml 695# Note: called "Microsoft SZDD compressed (Haruhiko Okumura's LZSS)" by TrID 696# verfied by 7-Zip `7z l -tMsLZ -slt *.??_` as MsLZ 697# `deark -l -m lzss_oku -d2 setup-1-41.bin` as "LZSS.C by Haruhiko Okumura" 698>0 string SZDD MS Compress archive data, SZDD variant 699# 2nd part of signature 700#>>4 ubelong 0x88F02733 \b, SIGNATURE OK 701!:mime application/x-ms-compress-szdd 702!:ext ??_ 703# The character missing from the end of the filename (0=unknown) 704>>9 string >\0 \b, %-.1s is last character of original name 705# https://www.betaarchive.com/forum/viewtopic.php?t=26161 706# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e 707>>8 string !A \b, %-.1s method 708>>10 ulelong >0 \b, original size: %u bytes 709# Summary: InstallShield archive with SZDD compressed 710# URL: https://community.flexera.com/t5/InstallShield-Knowledge-Base/InstallShield-Redistributable-Files/ta-p/5647 711# From: Joerg Jenderek 7121 search/48/bs SZDD\x88\xF0\x27\x33 InstallShield archive 713#!:mime application/octet-stream 714!:mime application/x-installshield-compress-szdd 715!:ext ibt 716# name of compressed archive member like: setup.dl_ _setup7int.dl_ _setup2k.dl_ _igdi.dl_ cabinet.dl_ 717>0 string x %s 718# name of uncompressed archive member like: setup.dll _Setup.dll IGdi.dll CABINET.DLL 719>>&1 string x (%s) 720# probably version like: 9.0.0.333 9.1.0.429 11.50.0.42618 721>>>&1 string x \b, version %s 722# SZDD member length like: 168048 169333 181842 723>>>>&1 string x \b, %s bytes 724# MS Compress archive data 725#>&0 string SZDD \b, SIGNATURE FOUND 726>&0 indirect x 727# QBasic SZDD variant 7283 string \x88\xf0\x27 729>0 string SZ\x20 MS Compress archive data, QBasic variant 730!:mime application/x-ms-compress-sz 731!:ext ??$ 732>>8 ulelong >0 \b, original size: %u bytes 733 734# Summary: CAZIP compressed file 735# From: Joerg Jenderek 736# URL: http://fileformats.archiveteam.org/wiki/CAZIP 737# Reference: http://mark0.net/download/triddefs_xml.7z/defs/c/caz.trid.xml 738# Note: Format is distinct from CAZIPXP compressed 7390 string \x0D\x0A\x1ACAZIP CAZIP compressed file 740#!:mime application/octet-stream 741!:mime application/x-compress-cazip 742# like: BLINKER.WR_ CLIPDEFS._ CAOSETUP.EX_ CLIPPER.EX_ FILEIO.C_ 743!:ext ??_/?_/_ 744 745# Summary: FTCOMP compressed archive 746# From: Joerg Jenderek 747# URL: http://fileformats.archiveteam.org/wiki/FTCOMP 748# Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ftcomp.trid.xml 749# Note: called by TrID "FTCOMP compressed archive" 750# extracted by `unpack seahelp.hl_` 75124 string/b FTCOMP FTCOMP compressed archive 752#!:mime application/octet-stream 753!:mime application/x-compress-ftcomp 754!:ext ??_/??@/dll/drv/pk2/ 755# probably A596FDFF magic at the beginning 756>0 ubelong !0xA596FDFF \b, at beginning %#x 757# probably original file name with directory like: \OS2\unpack.exe \SYSTEM\8514.DRV MAHJONGG.EXE 758>41 string x "%s" 759 760# MP3 (archiver, not lossy audio compression) 7610 string MP3\x1a MP3-Archiver archive data 762# ZET 7630 string OZ\xc3\x9d ZET archive data 764# TSComp 7650 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data 766# ARQ 7670 string gW\4\1 ARQ archive data 768# Squash 7693 string OctSqu Squash archive data 770# Terse 7710 string \5\1\1\0 Terse archive data 772# PUCrunch 7730 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data 774# UHarc 7750 string UHA UHarc archive data 776# ABComp 7770 string \2AB ABComp archive data 7780 string \3AB2 ABComp archive data 779# CMP 7800 string CO\0 CMP archive data 781# Splint 7820 string \x93\xb9\x06 Splint archive data 783# InstallShield 7840 string \x13\x5d\x65\x8c InstallShield Z archive Data 785# Gather 7861 string GTH Gather archive data 787# BOA 7880 string BOA BOA archive data 789# RAX 7900 string ULEB\xa RAX archive data 791# Xtreme 7920 string ULEB\0 Xtreme archive data 793# Pack Magic 7940 string @\xc3\xa2\1\0 Pack Magic archive data 795# BTS 7960 belong&0xfeffffff 0x1a034465 BTS archive data 797# ELI 5750 7980 string Ora\ ELI 5750 archive data 799# QFC 8000 string \x1aFC\x1a QFC archive data 8010 string \x1aQF\x1a QFC archive data 802# PRO-PACK 8030 string RNC PRO-PACK archive data 804# 777 8050 string 777 777 archive data 806# LZS221 8070 string sTaC LZS221 archive data 808# HPA 8090 string HPA HPA archive data 810# Arhangel 8110 string LG Arhangel archive data 812# EXP1, uses bzip2 8130 string 0123456789012345BZh EXP1 archive data 814# IMP 8150 string IMP\xa IMP archive data 816# NRV 8170 string \x00\x9E\x6E\x72\x76\xFF NRV archive data 818# Squish 8190 string \x73\xb2\x90\xf4 Squish archive data 820# Par 8210 string PHILIPP Par archive data 8220 string PAR Par archive data 823# HIT 8240 string UB HIT archive data 825# SBX 8260 belong&0xfffff000 0x53423000 SBX archive data 827# NaShrink 8280 string NSK NaShrink archive data 829# SAPCAR 8300 string #\ CAR\ archive\ header SAPCAR archive data 8310 string CAR\ 2.00 SAPCAR archive data 8320 string CAR\ 2.01 SAPCAR archive data 833#!:mime application/octet-stream 834!:mime application/vnd.sar 835!:ext sar 836# Disintegrator 8370 string DST Disintegrator archive data 838# ASD 8390 string ASD ASD archive data 840# InstallShield CAB 841# Update: Joerg Jenderek at Nov 2021 842# URL: https://en.wikipedia.org/wiki/InstallShield 843# Reference: https://github.com/twogood/unshield/blob/master/lib/cabfile.h 844# Note: Not compatible with Microsoft CAB files 845# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield.trid.xml 846# CAB_SIGNATURE 0x28635349 8470 string ISc( InstallShield 848#!:mime application/octet-stream 849!:mime application/x-installshield 850# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield-hdr.trid.xml 851>16 ulelong !0 setup header 852# like: _SYS1.HDR _USER1.HDR data1.hdr 853!:ext hdr 854>16 ulelong =0 CAB 855# like: _SYS1.CAB _USER1.CAB DATA1.CAB data2.cab 856!:ext cab 857# https://github.com/twogood/unshield/blob/master/lib/helper.c 858# version like: 0x1005201 0x100600c 0x1007000 0x1009500 859# 0x2000578 0x20005dc 0x2000640 0x40007d0 0x4000834 860>4 ulelong x \b, version %#x 861# volume_info like: 0 862>8 ulelong !0 \b, volume_info %#x 863# cab_descriptor_offset like: 0x200 864>12 ulelong !0x200 \b, offset %#x 865#>0x200 ubequad x \b, at 0x200 %#16.16llx 866# cab_descriptor_size like: 0 (*.cab) BD5 C8B DA5 E2A E36 116C 251D 4DA9 56F0 5CC2 6E4B 777D 779E 1F7C2 867>16 ulelong !0 \b, descriptor size %#x 868# TOP4 8690 string T4\x1a TOP4 archive data 870# BatComp left out: sig looks like COM executable 871# so TODO: get real 4dos batcomp file and find sig 872# BlakHole 8730 string BH\5\7 BlakHole archive data 874# BIX 8750 string BIX0 BIX archive data 876# ChiefLZA 8770 string ChfLZ ChiefLZA archive data 878# Blink 8790 string Blink Blink archive data 880# Logitech Compress 8810 string \xda\xfa Logitech Compress archive data 882# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE) 8831 string (C)\ STEPANYUK ARS-Sfx archive data 884# AKT/AKT32 8850 string AKT32 AKT32 archive data 8860 string AKT AKT archive data 887# NPack 8880 string MSTSM NPack archive data 889# PFT 8900 string \0\x50\0\x14 PFT archive data 891# SemOne 8920 string SEM SemOne archive data 893# PPMD 8940 string \x8f\xaf\xac\x84 PPMD archive data 895# FIZ 8960 string FIZ FIZ archive data 897# MSXiE 8980 belong&0xfffff0f0 0x4d530000 MSXiE archive data 899# DeepFreezer 9000 belong&0xfffffff0 0x797a3030 DeepFreezer archive data 901# DC 9020 string =<DC- DC archive data 903# TPac 9040 string \4TPAC\3 TPac archive data 905# Ai 9060 string Ai\1\1\0 Ai archive data 9070 string Ai\1\0\0 Ai archive data 908# Ai32 9090 string Ai\2\0 Ai32 archive data 9100 string Ai\2\1 Ai32 archive data 911# SBC 9120 string SBC SBC archive data 913# Ybs 9140 string YBS Ybs archive data 915# DitPack 9160 string \x9e\0\0 DitPack archive data 917# DMS 9180 string DMS! DMS archive data 919# EPC 9200 string \x8f\xaf\xac\x8c EPC archive data 921# VSARC 9220 string VS\x1a VSARC archive data 923# PDZ 9240 string PDZ PDZ archive data 925# ReDuq 9260 string rdqx ReDuq archive data 927# GCA 9280 string GCAX GCA archive data 929# PPMN 9300 string pN PPMN archive data 931# WinImage 9323 string WINIMAGE WinImage archive data 933# Compressia 9340 string CMP0CMP Compressia archive data 935# UHBC 9360 string UHB UHBC archive data 937# WinHKI 9380 string \x61\x5C\x04\x05 WinHKI archive data 939# WWPack data file 9400 string WWP WWPack archive data 941# BSN (BSA, PTS-DOS) 9420 string \xffBSG BSN archive data 9431 string \xffBSG BSN archive data 9443 string \xffBSG BSN archive data 9451 string \0\xae\2 BSN archive data 9461 string \0\xae\3 BSN archive data 9471 string \0\xae\7 BSN archive data 948# AIN 9490 string \x33\x18 AIN archive data 9500 string \x33\x17 AIN archive data 951# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015 952# SZip (TODO: doesn't catch all versions) 9530 string SZ\x0a\4 SZip archive data 954# XPack DiskImage 955# *.XDI updated by Joerg Jenderek Sep 2015 956# ftp://ftp.sac.sk/pub/sac/pack/0index.txt 957# GRR: this test is still too general as it catches also text files starting with jm 9580 string jm 959# only found examples with this additional characteristic 2 bytes 960>2 string \x2\x4 Xpack DiskImage archive data 961#!:ext xdi 962# XPack Data 963# *.xpa updated by Joerg Jenderek Sep 2015 964# ftp://ftp.elf.stuba.sk/pub/pc/pack/ 9650 string xpa XPA 966!:ext xpa 967# XPA32 968# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip 969# created by XPA32.EXE version 1.0.2 for Windows 970>0 string xpa\0\1 \b32 archive data 971# created by XPACK.COM version 1.67m or 1.67r with short 0x1800 972>3 ubeshort !0x0001 \bck archive data 973# XPack Single Data 974# changed by Joerg Jenderek Sep 2015 back to like in version 5.12 975# letter 'I'+ acute accent is equivalent to \xcd 9760 string \xcd\ jm Xpack single archive data 977#!:mime application/x-xpa-compressed 978!:ext xpa 979 980# TODO: missing due to unknown magic/magic at end of file: 981#DWC 982#ARG 983#ZAR 984#PC/3270 985#InstallIt 986#RKive 987#RK 988#XPack Diskimage 989 990# These were inspired by idarc, but actually verified 991# Dzip archiver (.dz) 992# Update: Joerg Jenderek 993# URL: http://speeddemosarchive.com/dzip/ 994# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c 995# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt 9960 string DZ 997# latest version is 2.9 dated 7 may 2003 998>2 byte <4 Dzip archive data 999!:mime application/x-dzip 1000!:ext dz 1001>>2 byte x \b, version %i 1002>>3 byte x \b.%i 1003>>4 ulelong x \b, offset %#x 1004>>8 ulelong x \b, %u files 1005# ZZip archiver (.zz) 10060 string ZZ\ \0\0 ZZip archive data 10070 string ZZ0 ZZip archive data 1008# PAQ archiver (.paq) 10090 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data 10100 string PAQ PAQ archive data 1011>3 byte&0xf0 0x30 1012>>3 byte x (v%c) 1013# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP) 1014# Update: Joerg Jenderek 1015# URL: http://fileformats.archiveteam.org/wiki/JAR_(ARJ_Software) 1016# reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jar.trid.xml 1017# https://www.sac.sk/download/pack/jar102x.exe/TECHNOTE.DOC 1018# Note: called "JAR compressed archive" by TrID 10190xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data 1020#!:mime application/octet-stream 1021!:mime application/x-compress-j 1022>0 ulelong x \b, CRC32 %#x 1023# standard suffix is ".j"; for multi volumes following order j01 j02 ... j99 100 ... 990 1024!:ext j/j01/j02 1025# URL: http://fileformats.archiveteam.org/wiki/JARCS 1026# reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jarcs.trid.xml 1027# Note: called "JARCS compressed archive" by TrID 10280 string JARCS JAR (ARJ Software, Inc.) archive data 1029#!:mime application/octet-stream 1030!:mime application/x-compress-jar 1031!:ext jar 1032 1033# ARJ archiver (jason@jarthur.Claremont.EDU) 1034# URL: http://fileformats.archiveteam.org/wiki/ARJ 1035# reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-arj.trid.xml 1036# https://github.com/FarGroup/FarManager/ 1037# blob/master/plugins/multiarc/arc.doc/arj.txt 1038# Note: called "ARJ compressed archive" by TrID and 1039# "ARJ File Format" by DROID via PUID fmt/610 1040# verified by `7z l -tarj PHRACK1.ARJ` and 1041# `arj.exe l TEST-hk9.ARJ` 10420 leshort 0xea60 1043# skip DROID fmt-610-signature-id-946.arj by check for valid file type of main header 1044>0xA ubyte 2 1045>>0 use arj-archive 10460 name arj-archive 1047>0 leshort x ARJ archive 1048!:mime application/x-arj 1049# look for terminating 0-character of filename 1050>0x26 search/1024 \0 1051# file name extension is normally .arj but not for parts of multi volume 1052#>>&-5 string x extension %.4s 1053>>&-5 string/c .arj data 1054!:ext arj 1055>>&-5 default x 1056# for multi volume first name is archive.arj then following parts archive.a01 archive.a02 ... 1057>>>8 byte &0x04 data 1058!:ext a01/a02 1059# for SFX first name is archive.exe then following parts archive.e01 archive.e02 ... 1060>>>8 byte ^0x04 data, SFX multi-volume 1061!:ext e01/e02 1062# basic header size like: 0x002b 0x002c 0x04e0 0x04e3 0x04e7 1063#>2 uleshort x basic header size %#4.4x 1064# next fragment content like: 0x0a200a003a8fc713 0x524a000010bb3471 0x524a0000c73c70f9 1065#>(2.s) ubequad x NEXT FRAGMENT CONTENT %#16.16llx 1066# first_hdr_size; seems to be same as basic header size 1067#>2 uleshort x 1st header size %#x 1068# archiver version number like: 3 4 6 11 102 1069>5 byte x \b, v%d 1070# minimum archiver version to extract like: 1 1071>6 ubyte !1 \b, minimum %u to extract 1072# FOR DEBUGGING 1073#>8 byte x \b, FLAGS %#x 1074# GARBLED_FLAG1; garble with password; g switch 1075>8 byte &0x01 \b, password-protected 1076# encryption version: 0~old 1~old 2~new 3~reserved 4~40 bit key GOST 1077>>0x20 ubyte x (v%u) 1078#>8 byte &0x02 \b, secured 1079# ANSIPAGE_FLAG; indicates ANSI codepage used by ARJ32; hy switch 1080>8 byte &0x02 \b, ANSI codepage 1081# VOLUME_FLAG indicates presence of succeeding volume; but apparently not for SFX 1082>8 byte &0x04 \b, multi-volume 1083#>8 byte &0x08 \b, file-offset 1084# ARJPROT_FLAG; build with data protection record; hk switch 1085>8 byte &0x08 \b, recoverable 1086# arj protection factor; maximal 10; switch hky -> factor=y+1 1087>>0x22 byte x (factor %u) 1088>8 byte &0x10 \b, slash-switched 1089# BACKUP_FLAG; obsolete 1090>8 byte &0x20 \b, backup 1091# SECURED_FLAG; 1092>8 byte &0x40 \b, secured, 1093# ALTNAME_FLAG; indicates dual-name archive 1094>8 byte &0x80 \b, dual-name 1095# security version; 0~old 2~current 1096>9 ubyte !0 1097>>9 ubyte !2 \b, security version %u 1098# file type; 2 in main header; 0~binary 1~7-bitText 2~comment 3~directory 4~VolumeLabel 5=ChapterLabel 1099>0xA ubyte !2 \b, file type %u 1100# date+time when original archive was created in MS-DOS format via ./msdos 1101>0xC ulelong x \b, created 1102>0xC use dos-date 1103# or date and time by new internal function 1104#>0xE lemsdosdate x %s 1105#>0xC lemsdostime x %s 1106# FOR DEBUGGING 1107#>0x12 uleshort x RAW DATE %#4.4x 1108#>0x10 uleshort x RAW TIME %#4.4x 1109# date+time when archive was last modified; sometimes nil or 1110# maybe wrong like in HP4DRVR.ARJ 1111#>0x10 ulelong >0 \b, modified 1112#>>0x10 use dos-date 1113# or date and time by new internal function 1114#>>0x12 lemsdosdate x %s 1115#>>0x10 lemsdostime x %s 1116# archive size (currently used only for secured archives); MAYBE? 1117#>0x14 ulelong !0 \b, file size %u 1118# security envelope file position; MAYBE? 1119#>0x18 ulelong !0 \b, at %#x security envelope 1120# filespec position in filename; WHAT IS THAT? 1121#>0x1C uleshort >0 \b, filespec position %#x 1122# length in bytes of security envelope data like: 2CAh 301h 364h 471h 1123>0x1E uleshort !0 \b, security envelope length %#x 1124# last chapter like: 0 1 1125>0x21 ubyte !0 \b, last chapter %u 1126# filename (null-terminated string); sometimes at 0x26 when 4 bytes for extra data 1127>34 byte x \b, original name: 1128# with extras data 1129>34 byte <0x0B 1130>>38 string x %s 1131# without extras data 1132>34 byte >0x0A 1133>>34 string x %s 1134# host OS: 0~MSDOS ... 11~WIN32 1135>7 byte 0 \b, os: MS-DOS 1136>7 byte 1 \b, os: PRIMOS 1137>7 byte 2 \b, os: Unix 1138>7 byte 3 \b, os: Amiga 1139>7 byte 4 \b, os: Macintosh 1140>7 byte 5 \b, os: OS/2 1141>7 byte 6 \b, os: Apple ][ GS 1142>7 byte 7 \b, os: Atari ST 1143>7 byte 8 \b, os: NeXT 1144>7 byte 9 \b, os: VAX/VMS 1145>7 byte 10 \b, os: WIN95 1146>7 byte 11 \b, os: WIN32 1147# [JW] idarc says this is also possible 11482 leshort 0xea60 ARJ archive data 1149#2 leshort 0xea60 1150#>2 use arj-archive 1151 1152# HA archiver (Greg Roelofs, newt@uchicago.edu) 1153# This is a really bad format. A file containing HAWAII will match this... 1154#0 string HA HA archive data, 1155#>2 leshort =1 1 file, 1156#>2 leshort >1 %hu files, 1157#>4 byte&0x0f =0 first is type CPY 1158#>4 byte&0x0f =1 first is type ASC 1159#>4 byte&0x0f =2 first is type HSC 1160#>4 byte&0x0f =0x0e first is type DIR 1161#>4 byte&0x0f =0x0f first is type SPECIAL 1162# suggestion: at least identify small archives (<1024 files) 11630 belong&0xffff00fc 0x48410000 HA archive data 1164>2 leshort =1 1 file, 1165>2 leshort >1 %u files, 1166>4 byte&0x0f =0 first is type CPY 1167>4 byte&0x0f =1 first is type ASC 1168>4 byte&0x0f =2 first is type HSC 1169>4 byte&0x0f =0x0e first is type DIR 1170>4 byte&0x0f =0x0f first is type SPECIAL 1171 1172# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz) 11730 string HPAK HPACK archive data 1174 1175# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net 11760 string \351,\001JAM\ JAM archive, 1177>7 string >\0 version %.4s 1178>0x26 byte =0x27 - 1179>>0x2b string >\0 label %.11s, 1180>>0x27 lelong x serial %08x, 1181>>0x36 string >\0 fstype %.8s 1182 1183# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) 1184# Update: Joerg Jenderek 1185# URL: https://en.wikipedia.org/wiki/LHA_(file_format) 1186# Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html 1187# 1188# check and display information of lharc (LHa,PMarc) file 11890 name lharc-file 1190# check 1st character of method id like -lz4- -lh5- or -pm2- 1191>2 string - 1192# check 5th character of method id 1193>>6 string - 1194# check header level 0 1 2 3 1195>>>20 ubyte <4 1196# check 2nd, 3th and 4th character of method id 1197>>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b 1198!:mime application/x-lzh-compressed 1199# creator type "LHA " 1200!:apple ????LHA 1201# display archive type name like "LHa/LZS archive data" or "LArc archive" 1202>>>>>2 string -lz \b 1203!:ext lzs 1204# already known -lzs- -lz4- -lz5- with old names 1205>>>>>>2 string -lzs LHa/LZS archive data 1206>>>>>>3 regex \^lz[45] LHarc 1.x archive data 1207# missing -lz?- with wikipedia names 1208>>>>>>3 regex \^lz[2378] LArc archive 1209# display archive type name like "LHa (2.x) archive data" 1210>>>>>2 string -lh \b 1211# already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names 1212>>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data 1213# LHice archiver use ".ICE" as name extension instead usual one ".lzh" 1214# FOOBAR archiver use ".foo" as name extension instead usual one 1215# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment 1216>>>>>>>2 string -lh1 \b 1217!:ext lha/lzh/ice 1218>>>>>>3 regex \^lh[23d] LHa 2.x? archive data 1219>>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data 1220>>>>>>3 regex \^lh[456] LHa (2.x) archive data 1221>>>>>>>2 string -lh5 \b 1222# https://en.wikipedia.org/wiki/BIOS 1223# Some mainboard BIOS like Award use LHa compression. So archives with unusual extension are found like 1224# bios.rom , kd7_v14.bin, 1010.004, ... 1225!:ext lha/lzh/rom/bin 1226# missing -lh?- variants (Joe Jared) 1227>>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive 1228# UNLHA32 2.67a 1229>>>>>>2 string -lhx LHa (UNLHA32) archive 1230# lha archives with standard file name extensions ".lha" ".lzh" 1231>>>>>>3 regex !\^(lh1|lh5) \b 1232!:ext lha/lzh 1233# this should not happen if all -lh variants are described 1234>>>>>>2 default x LHa (unknown) archive 1235#!:ext lha 1236# PMarc 1237>>>>>3 regex \^pm[012] PMarc archive data 1238!:ext pma 1239# append method id without leading and trailing minus character 1240>>>>>3 string x [%3.3s] 1241>>>>>>0 use lharc-header 1242# 1243# check and display information of lharc header 12440 name lharc-header 1245# header size 0x4 , 0x1b-0x61 1246>0 ubyte x 1247# compressed data size != compressed file size 1248#>7 ulelong x \b, data size %d 1249# attribute: 0x2~?? 0x10~symlink|target 0x20~normal 1250#>19 ubyte x \b, 19_%#x 1251# level identifier 0 1 2 3 1252#>20 ubyte x \b, level %d 1253# time stamp 1254#>15 ubelong x DATE %#8.8x 1255# OS ID for level 1 1256>20 ubyte 1 1257# 0x20 types find for *.rom files 1258>>(21.b+24) ubyte <0x21 \b, %#x OS 1259# ascii type like M for MSDOS 1260>>(21.b+24) ubyte >0x20 \b, '%c' OS 1261# OS ID for level 2 1262>20 ubyte 2 1263#>>23 ubyte x \b, OS ID %#x 1264>>23 ubyte <0x21 \b, %#x OS 1265>>23 ubyte >0x20 \b, '%c' OS 1266# filename only for level 0 and 1 1267>20 ubyte <2 1268# length of filename 1269>>21 ubyte >0 \b, with 1270# filename 1271>>>21 pstring x "%s" 1272# 1273#2 string -lh0- LHarc 1.x/ARX archive data [lh0] 1274#!:mime application/x-lharc 12752 string -lh0- 1276>0 use lharc-file 1277#2 string -lh1- LHarc 1.x/ARX archive data [lh1] 1278#!:mime application/x-lharc 12792 string -lh1- 1280>0 use lharc-file 1281# NEW -lz2- ... -lz8- 12822 string -lz2- 1283>0 use lharc-file 12842 string -lz3- 1285>0 use lharc-file 12862 string -lz4- 1287>0 use lharc-file 12882 string -lz5- 1289>0 use lharc-file 12902 string -lz7- 1291>0 use lharc-file 12922 string -lz8- 1293>0 use lharc-file 1294# [never seen any but the last; -lh4- reported in comp.compression:] 1295#2 string -lzs- LHa/LZS archive data [lzs] 12962 string -lzs- 1297>0 use lharc-file 1298# According to wikipedia and others such a version does not exist 1299#2 string -lh\40- LHa 2.x? archive data [lh ] 1300#2 string -lhd- LHa 2.x? archive data [lhd] 13012 string -lhd- 1302>0 use lharc-file 1303#2 string -lh2- LHa 2.x? archive data [lh2] 13042 string -lh2- 1305>0 use lharc-file 1306#2 string -lh3- LHa 2.x? archive data [lh3] 13072 string -lh3- 1308>0 use lharc-file 1309#2 string -lh4- LHa (2.x) archive data [lh4] 13102 string -lh4- 1311>0 use lharc-file 1312#2 string -lh5- LHa (2.x) archive data [lh5] 13132 string -lh5- 1314>0 use lharc-file 1315#2 string -lh6- LHa (2.x) archive data [lh6] 13162 string -lh6- 1317>0 use lharc-file 1318#2 string -lh7- LHa (2.x)/LHark archive data [lh7] 13192 string -lh7- 1320# !:mime application/x-lha 1321# >20 byte x - header level %d 1322>0 use lharc-file 1323# NEW -lh8- ... -lhe- , -lhx- 13242 string -lh8- 1325>0 use lharc-file 13262 string -lh9- 1327>0 use lharc-file 13282 string -lha- 1329>0 use lharc-file 13302 string -lhb- 1331>0 use lharc-file 13322 string -lhc- 1333>0 use lharc-file 13342 string -lhe- 1335>0 use lharc-file 13362 string -lhx- 1337>0 use lharc-file 1338# taken from idarc [JW] 13392 string -lZ PUT archive data 1340# already done by LHarc magics 1341# this should never happen if all sub types of LZS archive are identified 1342#2 string -lz LZS archive data 13432 string -sw1- Swag archive data 1344 13450 name rar-file-header 1346>24 byte 15 \b, v1.5 1347>24 byte 20 \b, v2.0 1348>24 byte 29 \b, v4 1349>15 byte 0 \b, os: MS-DOS 1350>15 byte 1 \b, os: OS/2 1351>15 byte 2 \b, os: Win32 1352>15 byte 3 \b, os: Unix 1353>15 byte 4 \b, os: Mac OS 1354>15 byte 5 \b, os: BeOS 1355 13560 name rar-archive-header 1357>3 leshort&0x1ff >0 \b, flags: 1358>>3 leshort &0x01 ArchiveVolume 1359>>3 leshort &0x02 Commented 1360>>3 leshort &0x04 Locked 1361>>3 leshort &0x10 NewVolumeNaming 1362>>3 leshort &0x08 Solid 1363>>3 leshort &0x20 Authenticated 1364>>3 leshort &0x40 RecoveryRecordPresent 1365>>3 leshort &0x80 EncryptedBlockHeader 1366>>3 leshort &0x100 FirstVolume 1367 1368# RAR (Roshal Archive) archive 13690 string Rar!\x1a\7\0 RAR archive data 1370!:mime application/x-rar 1371!:ext rar/cbr 1372# file header 1373>(0xc.l+9) byte 0x74 1374>>(0xc.l+7) use rar-file-header 1375# subblock seems to share information with file header 1376>(0xc.l+9) byte 0x7a 1377>>(0xc.l+7) use rar-file-header 1378>9 byte 0x73 1379>>7 use rar-archive-header 1380 13810 string Rar!\x1a\7\1\0 RAR archive data, v5 1382!:mime application/x-rar 1383!:ext rar 1384 1385# Very old RAR archive 1386# https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf 13870 string RE\x7e\x5e RAR archive data (<v1.5) 1388!:mime application/x-rar 1389!:ext rar/cbr 1390 1391# SQUISH archiver (Greg Roelofs, newt@uchicago.edu) 13920 string SQSH squished archive data (Acorn RISCOS) 1393 1394# UC2 archiver (Greg Roelofs, newt@uchicago.edu) 1395# [JW] see exe section for self-extracting version 13960 string UC2\x1a UC2 archive data 1397 1398# PKZIP multi-volume archive 13990 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract 1400!:mime application/zip 1401!:ext zip/cbz 1402 1403# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 14040 string PK\005\006 Zip archive data (empty) 1405!:mime application/zip 1406!:ext zip/cbz 1407!:strength +1 14080 string PK\003\004 1409!:strength +1 1410 1411# Specialised zip formats which start with a member named 'mimetype' 1412# (stored uncompressed, with no 'extra field') containing the file's MIME type. 1413# Check for have 8-byte name, 0-byte extra field, name "mimetype", and 1414# contents starting with "application/": 1415>26 string \x8\0\0\0mimetypeapplication/ 1416 1417# KOffice / OpenOffice & StarOffice / OpenDocument formats 1418# From: Abel Cheung <abel@oaka.org> 1419 1420# KOffice (1.2 or above) formats 1421# (mimetype contains "application/vnd.kde.<SUBTYPE>") 1422>>50 string vnd.kde. KOffice (>=1.2) 1423>>>58 string karbon Karbon document 1424>>>58 string kchart KChart document 1425>>>58 string kformula KFormula document 1426>>>58 string kivio Kivio document 1427>>>58 string kontour Kontour document 1428>>>58 string kpresenter KPresenter document 1429>>>58 string kspread KSpread document 1430>>>58 string kword KWord document 1431 1432# OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7) 1433# (mimetype contains "application/vnd.sun.xml.<SUBTYPE>") 1434# URL: https://en.wikipedia.org/wiki/OpenOffice.org_XML 1435# reference: http://fileformats.archiveteam.org/wiki/OpenOffice.org_XML 1436>>50 string vnd.sun.xml. OpenOffice.org 1.x 1437>>>62 string writer Writer 1438>>>>68 byte !0x2e document 1439!:mime application/vnd.sun.xml.writer 1440!:ext sxw 1441>>>>68 string .template template 1442!:mime application/vnd.sun.xml.writer.template 1443!:ext stw 1444>>>>68 string .web Web template 1445!:mime application/vnd.sun.xml.writer.web 1446!:ext stw 1447>>>>68 string .global global document 1448!:mime application/vnd.sun.xml.writer.global 1449!:ext sxg 1450>>>62 string calc Calc 1451>>>>66 byte !0x2e spreadsheet 1452!:mime application/vnd.sun.xml.calc 1453!:ext sxc 1454>>>>66 string .template template 1455!:mime application/vnd.sun.xml.calc.template 1456!:ext stc 1457>>>62 string draw Draw 1458>>>>66 byte !0x2e document 1459!:mime application/vnd.sun.xml.draw 1460!:ext sxd 1461>>>>66 string .template template 1462!:mime application/vnd.sun.xml.draw.template 1463!:ext std 1464>>>62 string impress Impress 1465>>>>69 byte !0x2e presentation 1466!:mime application/vnd.sun.xml.impress 1467!:ext sxi 1468>>>>69 string .template template 1469!:mime application/vnd.sun.xml.impress.template 1470!:ext sti 1471>>>62 string math Math document 1472!:mime application/vnd.sun.xml.math 1473!:ext sxm 1474>>>62 string base Database file 1475!:mime application/vnd.sun.xml.base 1476!:ext sdb 1477 1478# URL: https://wiki.openoffice.org/wiki/Documentation/DevGuide/Extensions/File_Format 1479# From: Joerg Jenderek 1480# Note: only few OXT samples are detected here by mimetype member 1481# is used by OpenOffice and LibreOffice and probably also NeoOffice 1482# verified by `unzip -Zv *.oxt` or `7z l -slt *.oxt` 1483>>50 string vnd.openofficeorg. OpenOffice 1484>>>68 string extension \b/LibreOffice Extension 1485# http://extension.nirsoft.net/oxt 1486!:mime application/vnd.openofficeorg.extension 1487# like: Gallery-Puzzle.2.1.0.1.oxt 1488!:ext oxt 1489 1490# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) 1491# URL: http://fileformats.archiveteam.org/wiki/OpenDocument 1492# https://lists.oasis-open.org/archives/office/200505/msg00006.html 1493# (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>") 1494>>50 string vnd.oasis.opendocument. OpenDocument 1495>>>73 string text 1496>>>>77 byte !0x2d Text 1497!:mime application/vnd.oasis.opendocument.text 1498!:ext odt 1499>>>>77 string -template Text Template 1500!:mime application/vnd.oasis.opendocument.text-template 1501!:ext ott 1502>>>>77 string -web HTML Document Template 1503!:mime application/vnd.oasis.opendocument.text-web 1504!:ext oth 1505>>>>77 string -master Master Document 1506!:mime application/vnd.oasis.opendocument.text-master 1507!:ext odm 1508>>>73 string graphics 1509>>>>81 byte !0x2d Drawing 1510!:mime application/vnd.oasis.opendocument.graphics 1511!:ext odg 1512>>>>81 string -template Drawing Template 1513!:mime application/vnd.oasis.opendocument.graphics-template 1514!:ext otg 1515>>>73 string presentation 1516>>>>85 byte !0x2d Presentation 1517!:mime application/vnd.oasis.opendocument.presentation 1518!:ext odp 1519>>>>85 string -template Presentation Template 1520!:mime application/vnd.oasis.opendocument.presentation-template 1521!:ext otp 1522>>>73 string spreadsheet 1523>>>>84 byte !0x2d Spreadsheet 1524!:mime application/vnd.oasis.opendocument.spreadsheet 1525!:ext ods 1526>>>>84 string -template Spreadsheet Template 1527!:mime application/vnd.oasis.opendocument.spreadsheet-template 1528!:ext ots 1529>>>73 string chart 1530>>>>78 byte !0x2d Chart 1531!:mime application/vnd.oasis.opendocument.chart 1532!:ext odc 1533>>>>78 string -template Chart Template 1534!:mime application/vnd.oasis.opendocument.chart-template 1535!:ext otc 1536>>>73 string formula 1537>>>>80 byte !0x2d Formula 1538!:mime application/vnd.oasis.opendocument.formula 1539!:ext odf 1540>>>>80 string -template Formula Template 1541!:mime application/vnd.oasis.opendocument.formula-template 1542!:ext otf 1543# https://www.loc.gov/preservation/digital/formats/fdd/fdd000441.shtml 1544>>>73 string database Database 1545!:mime application/vnd.oasis.opendocument.database 1546!:ext odb 1547# Valid for LibreOffice Base 6.0.1.1 at least 1548>>>73 string base Database 1549# https://bugs.documentfoundation.org/show_bug.cgi?id=45854 1550!:mime application/vnd.oasis.opendocument.database 1551#!:mime application/vnd.oasis.opendocument.base 1552!:ext odb 1553>>>73 string image 1554>>>>78 byte !0x2d Image 1555!:mime application/vnd.oasis.opendocument.image 1556!:ext odi 1557>>>>78 string -template Image Template 1558!:mime application/vnd.oasis.opendocument.image-template 1559!:ext oti 1560 1561# EPUB (OEBPS) books using OCF (OEBPS Container Format) 1562# https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. 1563# From: Ralf Brown <ralf.brown@gmail.com> 1564>>50 string epub+zip EPUB document 1565!:mime application/epub+zip 1566 1567# From: Joerg Jenderek 1568# URL: http://en.wikipedia.org/wiki/CorelDRAW 1569# NOTE: version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based 1570>>50 string x-vnd.corel. Corel 1571>>>62 string draw.document+zip Draw drawing, version 14-16 1572!:mime application/x-vnd.corel.draw.document+zip 1573!:ext cdr 1574>>>62 string draw.template+zip Draw template, version 14-16 1575!:mime application/x-vnd.corel.draw.template+zip 1576!:ext cdrt 1577>>>62 string zcf.draw.document+zip Draw drawing, version 17-22 1578!:mime application/x-vnd.corel.zcf.draw.document+zip 1579!:ext cdr 1580>>>62 string zcf.draw.template+zip Draw template, version 17-22 1581!:mime application/x-vnd.corel.zcf.draw.template+zip 1582!:ext cdt/cdrt 1583# URL: http://product.corel.com/help/CorelDRAW/540240626/Main/EN/Doc/CorelDRAW-Other-file-formats.html 1584>>>62 string zcf.pattern+zip Draw pattern, version 22 1585!:mime application/x-vnd.corel.zcf.pattern+zip 1586!:ext pat 1587# URL: https://en.wikipedia.org/wiki/Corel_Designer 1588# Reference: http://fileformats.archiveteam.org/wiki/Corel_Designer 1589# Note: called by TrID "Corel DESIGN graphics" 1590>>>62 string designer.document+zip DESIGNER graphics, version 14-16 1591!:mime application/x-vnd.corel.designer.document+zip 1592!:ext des 1593>>>62 string zcf.designer.document+zip DESIGNER graphics, version 17-21 1594!:mime application/x-vnd.corel.zcf.designer.document+zip 1595!:ext des 1596# URL: http://product.corel.com/help/CorelDRAW/540223850/Main/EN/Documentation/ 1597# CorelDRAW-Corel-Symbol-Library-CSL.html 1598>>>62 string symbol.library+zip Symbol Library, version 6-16.3 1599!:mime application/x-vnd.corel.symbol.library+zip 1600!:ext csl 1601>>>62 string zcf.symbol.library+zip Symbol Library, version 17-22 1602!:mime application/x-vnd.corel.zcf.symbol.library+zip 1603!:ext csl 1604 1605# Catch other ZIP-with-mimetype formats 1606# In a ZIP file, the bytes immediately after a member's contents are 1607# always "PK". The 2 regex rules here print the "mimetype" member's 1608# contents up to the first 'P'. Luckily, most MIME types don't contain 1609# any capital 'P's. This is a kludge. 1610# (mimetype contains "application/<OTHER>") 1611>>50 default x Zip data 1612>>>38 regex [!-OQ-~]+ (MIME type "%s"?) 1613!:mime application/zip 1614# (mimetype contents other than "application/*") 1615>26 string \x8\0\0\0mimetype 1616>>38 string !application/ 1617>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) 1618!:mime application/zip 1619 1620# Java Jar files 1621>(26.s+30) leshort 0xcafe Java archive data (JAR) 1622!:mime application/java-archive 1623 1624# iOS App 1625>(26.s+30) leshort !0xcafe 1626>>26 string !\x8\0\0\0mimetype 1627>>>30 string Payload/ 1628>>>>38 search/64 .app/ iOS App 1629!:mime application/x-ios-app 1630 1631# Dup, see above. 1632#>30 search/100/b application/epub+zip EPUB document 1633#!:mime application/epub+zip 1634 1635# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 1636# Next line excludes specialized formats: 1637>(26.s+30) leshort !0xcafe 1638>>30 search/100/b !application/epub+zip 1639>>>26 string !\x8\0\0\0mimetype Zip archive data 1640!:mime application/zip 1641>>>>4 beshort x \b, at least 1642>>>>4 use zipversion 1643>>>>4 beshort x to extract 1644>>>>8 beshort x \b, compression method= 1645>>>>8 use zipcompression 1646>>>>0x161 string WINZIP \b, WinZIP self-extracting 1647 1648# StarView Metafile 1649# From Pierre Ducroquet <pinaraf@pinaraf.info> 16500 string VCLMTF StarView MetaFile 1651>6 beshort x \b, version %d 1652>8 belong x \b, size %d 1653 1654# Zoo archiver 165520 lelong 0xfdc4a7dc Zoo archive data 1656!:mime application/x-zoo 1657>4 byte >48 \b, v%c. 1658>>6 byte >47 \b%c 1659>>>7 byte >47 \b%c 1660>32 byte >0 \b, modify: v%d 1661>>33 byte x \b.%d+ 1662>42 lelong 0xfdc4a7dc \b, 1663>>70 byte >0 extract: v%d 1664>>>71 byte x \b.%d+ 1665 1666# Shell archives 166710 string #\ This\ is\ a\ shell\ archive shell archive text 1668!:mime application/octet-stream 1669 1670# 1671# LBR. NB: May conflict with the questionable 1672# "binary Computer Graphics Metafile" format. 1673# 16740 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data 1675# 1676# PMA (CP/M derivative of LHA) 1677# Update: Joerg Jenderek 1678# URL: https://en.wikipedia.org/wiki/LHA_(file_format) 1679# 1680#2 string -pm0- PMarc archive data [pm0] 16812 string -pm0- 1682>0 use lharc-file 1683#2 string -pm1- PMarc archive data [pm1] 16842 string -pm1- 1685>0 use lharc-file 1686#2 string -pm2- PMarc archive data [pm2] 16872 string -pm2- 1688>0 use lharc-file 16892 string -pms- PMarc SFX archive (CP/M, DOS) 1690#!:mime application/x-foobar-exec 1691!:ext com 16925 string -pc1- PopCom compressed executable (CP/M) 1693#!:mime application/x- 1694#!:ext com 1695 1696# From Rafael Laboissiere <rafael@laboissiere.net> 1697# The Project Revision Control System (see 1698# http://prcs.sourceforge.net) generates a packaged project 1699# file which is recognized by the following entry: 17000 leshort 0xeb81 PRCS packaged project 1701 1702# Microsoft cabinets 1703# by David Necas (Yeti) <yeti@physics.muni.cz> 1704#0 string MSCF\0\0\0\0 Microsoft cabinet file data, 1705#>25 byte x v%d 1706#>24 byte x \b.%d 1707# MPi: All CABs have version 1.3, so this is pointless. 1708# Better magic in debian-additions. 1709 1710# GTKtalog catalogs 1711# by David Necas (Yeti) <yeti@physics.muni.cz> 17124 string gtktalog\ GTKtalog catalog data, 1713>13 string 3 version 3 1714>>14 beshort 0x677a (gzipped) 1715>>14 beshort !0x677a (not gzipped) 1716>13 string >3 version %s 1717 1718############################################################################ 1719# Parity archive reconstruction file, the 'par' file format now used on Usenet. 17200 string PAR\0 PARity archive data 1721>48 leshort =0 - Index file 1722>48 leshort >0 - file number %d 1723 1724# Felix von Leitner <felix-file@fefe.de> 17250 string d8:announce BitTorrent file 1726!:mime application/x-bittorrent 1727# Durval Menezes, <jmgthbfile at durval dot com> 17280 string d13:announce-list BitTorrent file 1729!:mime application/x-bittorrent 17300 string d7:comment BitTorrent file 1731!:mime application/x-bittorrent 17320 string d4:info BitTorrent file 1733!:mime application/x-bittorrent 1734 1735# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi> 1736# URL: http://fileformats.archiveteam.org/wiki/MSA_(Magic_Shadow_Archiver) 1737# Reference: http://info-coach.fr/atari/documents/_mydoc/FD_Image_File_Format.pdf 1738# http://mark0.net/download/triddefs_xml.7z/defs/m/msa.trid.xml 1739# Update: Joerg Jenderek 1740# Note: called by TrID "Atari MSA Disk Image" and verified by 1741# command like `deark -l -m msa -d2 PDATS578.msa` as " Atari ST floppy disk image" 1742# GRR: line below is too general as it matches setup.skin 17430 beshort 0x0e0f 1744# skip foo setup.skin with unrealistic high number 52255 of sides by check for valid "low" value 1745>4 ubeshort <2 Atari MSA archive data 1746#!:mime application/octet-stream 1747!:mime application/x-atari-msa 1748!:ext msa 1749# sectors per track like: 9 10 1750>>2 beshort x \b, %d sectors per track 1751# sides (0 or 1; add 1 to this to get correct number of sides) 1752>>4 beshort 0 \b, 1 sided 1753>>4 beshort 1 \b, 2 sided 1754# starting track like: 0 1755>>6 beshort x \b, starting track: %d 1756# ending track like: 39 79 80 81 1757>>8 beshort x \b, ending track: %d 1758# tracks content 1759#>>10 ubequad x \b, track content %#16.16llx 1760 1761# Alternate ZIP string (amc@arwen.cs.berkeley.edu) 17620 string PK00PK\003\004 Zip archive data 1763!:mime application/zip 1764!:ext zip/cbz 1765 1766# ACE archive (from http://www.wotsit.org/download.asp?f=ace) 1767# by Stefan `Sec` Zehl <sec@42.org> 17687 string **ACE** ACE archive data 1769!:mime application/x-ace-compressed 1770!:ext ace 1771>15 byte >0 version %d 1772>16 byte =0x00 \b, from MS-DOS 1773>16 byte =0x01 \b, from OS/2 1774>16 byte =0x02 \b, from Win/32 1775>16 byte =0x03 \b, from Unix 1776>16 byte =0x04 \b, from MacOS 1777>16 byte =0x05 \b, from WinNT 1778>16 byte =0x06 \b, from Primos 1779>16 byte =0x07 \b, from AppleGS 1780>16 byte =0x08 \b, from Atari 1781>16 byte =0x09 \b, from Vax/VMS 1782>16 byte =0x0A \b, from Amiga 1783>16 byte =0x0B \b, from Next 1784>14 byte x \b, version %d to extract 1785>5 leshort &0x0080 \b, multiple volumes, 1786>>17 byte x \b (part %d), 1787>5 leshort &0x0002 \b, contains comment 1788>5 leshort &0x0200 \b, sfx 1789>5 leshort &0x0400 \b, small dictionary 1790>5 leshort &0x0800 \b, multi-volume 1791>5 leshort &0x1000 \b, contains AV-String 1792>>30 string \x16*UNREGISTERED\x20VERSION* (unregistered) 1793>5 leshort &0x2000 \b, with recovery record 1794>5 leshort &0x4000 \b, locked 1795>5 leshort &0x8000 \b, solid 1796# Date in MS-DOS format (whatever that is) 1797#>18 lelong x Created on 1798 1799# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann 1800# <doj@cubic.org> 18010x1A string sfArk sfArk compressed Soundfont 1802>0x15 string 2 1803>>0x1 string >\0 Version %s 1804>>0x2A string >\0 : %s 1805 1806# DR-DOS 7.03 Packed File *.??_ 1807# Reference: http://www.antonis.de/dos/dos-tuts/mpdostip/html/nwdostip.htm 1808# Note: unpacked by PNUNPACK.EXE 18090 string Packed\ File\ 1810# by looking for Control-Z skip ASCII text starting with Packed File 1811>0x18 ubyte 0x1a Personal NetWare Packed File 1812!:mime application/x-novell-compress 1813!:ext ??_ 1814>>12 string x \b, was "%.12s" 1815# 1 or 2 1816#>>0x19 ubyte x \b, at 0x19 %u 1817>>0x1b ulelong x with %u bytes 1818 1819# EET archive 1820# From: Tilman Sauerbeck <tilman@code-monkey.de> 18210 belong 0x1ee7ff00 EET archive 1822!:mime application/x-eet 1823 1824# rzip archives 18250 string RZIP rzip compressed data 1826>4 byte x - version %d 1827>5 byte x \b.%d 1828>6 belong x (%d bytes) 1829 1830# From: Joerg Jenderek 1831# URL: https://help.foxitsoftware.com/kb/install-fzip-file.php 1832# reference: http://mark0.net/download/triddefs_xml.7z/ 1833# defs/f/fzip.trid.xml 1834# Note: unknown compression; No "PK" zip magic; normally in directory like 1835# "%APPDATA%\Foxit Software\Addon\Foxit Reader\Install" 18360 ubequad 0x2506781901010000 Foxit add-on/update 1837!:mime application/x-fzip 1838!:ext fzip 1839 1840# From: "Robert Dale" <robdale@gmail.com> 18410 belong 123 dar archive, 1842>4 belong x label "%.8x 1843>>8 belong x %.8x 1844>>>12 beshort x %.4x" 1845>14 byte 0x54 end slice 1846>14 beshort 0x4e4e multi-part 1847>14 beshort 0x4e53 multi-part, with -S 1848 1849# Symbian installation files 1850# https://www.thouky.co.uk/software/psifs/sis.html 1851# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf 18528 lelong 0x10000419 Symbian installation file 1853!:mime application/vnd.symbian.install 1854>4 lelong 0x1000006D (EPOC release 3/4/5) 1855>4 lelong 0x10003A12 (EPOC release 6) 18560 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x) 1857!:mime x-epoc/x-sisx-app 1858 1859# From "Nelson A. de Oliveira" <naoliv@gmail.com> 18600 string MPQ\032 MoPaQ (MPQ) archive 1861 1862# From: "Nelson A. de Oliveira" <naoliv@gmail.com> 1863# .kgb 18640 string KGB_arch KGB Archiver file 1865>10 string x with compression level %.1s 1866 1867# xar (eXtensible ARchiver) archive 1868# URL: https://en.wikipedia.org/wiki/Xar_(archiver) 1869# xar archive format: https://code.google.com/p/xar/ 1870# From: "David Remahl" <dremahl@apple.com> 1871# Update: Joerg Jenderek 1872# TODO: lzma compression; X509Data for pkg and xip 1873# Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or 1874# 7z t -txar Xcode_10.2_beta_4.xip` 18750 string xar! xar archive 1876!:mime application/x-xar 1877# pkg for Mac OSX installer package like FullBundleUpdate.pkg 1878# xip for signed Apple software like Xcode_10.2_beta_4.xip 1879!:ext xar/pkg/xip 1880# always 28 in older archives 1881>4 ubeshort >28 \b, header size %u 1882# currently there exit only version 1 since about 2014 1883>6 ubeshort >1 version %u, 1884>8 ubequad x compressed TOC: %llu, 1885#>16 ubequad x uncompressed TOC: %llu, 1886# cksum_alg 0-2 in older and also 3-4 in newer 1887>24 belong 0 no checksum 1888>24 belong 1 SHA-1 checksum 1889>24 belong 2 MD5 checksum 1890>24 belong 3 SHA-256 checksum 1891>24 belong 4 SHA-512 checksum 1892>24 belong >4 unknown %#x checksum 1893#>24 belong >4 checksum 1894# For no compression jump 0 bytes 1895>24 belong 0 1896>>0 ubyte x 1897# jump more bytes forward by header size 1898>>>&(4.S) ubyte x 1899# jump more bytes forward by compressed table of contents size 1900#>>>>&(8.Q) ubequad x \b, heap data %#llx 1901>>>>&(8.Q) ubyte x 1902# look for data by ./compress after message with 1 space at end 1903>>>>>&-3 indirect x \b, contains 1904# For SHA-1 jump 20 minus 2 bytes 1905>24 belong 1 1906>>18 ubyte x 1907# jump more bytes forward by header size 1908>>>&(4.S) ubyte x 1909# jump more bytes forward by compressed table of contents size 1910>>>>&(8.Q) ubyte x 1911# data compressed by gzip, bzip, lzma or none 1912>>>>>&-1 indirect x \b, contains 1913# For SHA-256 jump 32 minus 2 bytes 1914>24 belong 3 1915>>30 ubyte x 1916# jump more bytes forward by header size 1917>>>&(4.S) ubyte x 1918# jump more bytes forward by compressed table of contents size 1919>>>>&(8.Q) ubyte x 1920>>>>>&-1 indirect x \b, contains 1921# For SHA-512 jump 64 minus 2 bytes 1922>24 belong 4 1923>>62 ubyte x 1924# jump more bytes forward by header size 1925>>>&(4.S) ubyte x 1926# jump more bytes forward by compressed table of contents size 1927>>>>&(8.Q) ubyte x 1928>>>>>&-1 indirect x \b, contains 1929 1930# Type: Parity Archive 1931# From: Daniel van Eeden <daniel_e@dds.nl> 19320 string PAR2 Parity Archive Volume Set 1933 1934# Bacula volume format. (Volumes always start with a block header.) 1935# URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html 1936# From: Adam Buchbinder <adam.buchbinder@gmail.com> 193712 string BB02 Bacula volume 1938>20 bedate x \b, started %s 1939 1940# ePub is XHTML + XML inside a ZIP archive. The first member of the 1941# archive must be an uncompressed file called 'mimetype' with contents 1942# 'application/epub+zip' 1943 1944 1945# From: "Michael Gorny" <mgorny@gentoo.org> 1946# ZPAQ: http://mattmahoney.net/dc/zpaq.html 19470 string zPQ ZPAQ stream 1948>3 byte x \b, level %d 1949# From: Barry Carter <carter.barry@gmail.com> 1950# https://encode.ru/threads/456-zpaq-updates/page32 19510 string 7kSt ZPAQ file 1952 1953# BBeB ebook, unencrypted (LRF format) 1954# URL: https://www.sven.de/librie/Librie/LrfFormat 1955# From: Adam Buchbinder <adam.buchbinder@gmail.com> 19560 string L\0R\0F\0\0\0 BBeB ebook data, unencrypted 1957>8 beshort x \b, version %d 1958>36 byte 1 \b, front-to-back 1959>36 byte 16 \b, back-to-front 1960>42 beshort x \b, (%dx, 1961>44 beshort x %d) 1962 1963# Symantec GHOST image by Joerg Jenderek at May 2014 1964# https://us.norton.com/ghost/ 1965# https://www.garykessler.net/library/file_sigs.html 19660 ubelong&0xFFFFf7f0 0xFEEF0100 Norton GHost image 1967# *.GHO 1968>2 ubyte&0x08 0x00 \b, first file 1969# *.GHS or *.[0-9] with cns program option 1970>2 ubyte&0x08 0x08 \b, split file 1971# part of split index interesting for *.ghs 1972>>4 ubyte x id=%#x 1973# compression tag minus one equals numeric compression command line switch z[1-9] 1974>3 ubyte 0 \b, no compression 1975>3 ubyte 2 \b, fast compression (Z1) 1976>3 ubyte 3 \b, medium compression (Z2) 1977>3 ubyte >3 1978>>3 ubyte <11 \b, compression (Z%d-1) 1979>2 ubyte&0x08 0x00 1980# ~ 30 byte password field only for *.gho 1981>>12 ubequad !0 \b, password protected 1982>>44 ubyte !1 1983# 1~Image All, sector-by-sector only for *.gho 1984>>>10 ubyte 1 \b, sector copy 1985# 1~Image Boot track only for *.gho 1986>>>43 ubyte 1 \b, boot track 1987# 1~Image Disc only for *.gho implies Image Boot track and sector copy 1988>>44 ubyte 1 \b, disc sector copy 1989# optional image description only *.gho 1990>>0xff string >\0 "%-.254s" 1991# look for DOS sector end sequence 1992>0xE08 search/7776 \x55\xAA 1993>>&-512 indirect x \b; contains 1994 1995# Google Chrome extensions 1996# https://developer.chrome.com/extensions/crx 1997# https://developer.chrome.com/extensions/hosting 19980 string Cr24 Google Chrome extension 1999!:mime application/x-chrome-extension 2000>4 ulong x \b, version %u 2001 2002# SeqBox - Sequenced container 2003# ext: sbx, seqbox 2004# Marco Pontello marcopon@gmail.com 2005# reference: https://github.com/MarcoPon/SeqBox 20060 string SBx SeqBox, 2007>3 byte x version %d 2008 2009# LyNX archive 201056 string USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE LyNX archive 2011 2012# From: Joerg Jenderek 2013# URL: https://www.acronis.com/ 2014# Reference: https://en.wikipedia.org/wiki/TIB_(file_format) 2015# Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110 20160 ubequad 0xce24b9a220000000 Acronis True Image backup 2017!:mime application/x-acronis-tib 2018!:ext tib 2019# 01000000 2020#>20 ubelong x \b, at 20 %#x 2021# 20000000 2022#>28 ubelong x \b, at 28 %#x 2023# strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0" 2024# ??? 2025# strings like "\Device\0000011e" "\Device\0000015a" 2026#>0 search/0x6852300/cs \\Device\\ 2027#>>&-1 pstring x \b, %s 2028# "\Device\HarddiskVolume30" "\Device\HarddiskVolume39" 2029#>>>&1 search/180/cs \\Device\\ 2030#>>>>&-1 pstring x \b, %s 2031#>>>>>&0 search/29/cs \0\0\xc8\0 2032# disk label 2033#>>>>>>&10 lestring16 x \b, disk label %11.11s 2034#>>>>>>&9 plestring16 x \b, disk label "%11.11s" 2035#>>>>>>&10 ubequad x %16.16llx 2036 2037 2038# Gentoo XPAK binary package 2039# by Michal Gorny <mgorny@gentoo.org> 2040# https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5 2041-4 string STOP 2042>-16 string XPAKSTOP Gentoo binary package (XPAK) 2043 2044# From: Joerg Jenderek 2045# URL: https://kodi.wiki/view/TexturePacker 2046# Reference: https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz 2047# /xbmc-Krypton/xbmc/guilib/XBTF.h 2048# /xbmc-Krypton/xbmc/guilib/XBTF.cpp 20490 string XBTF 2050# skip ASCII text by looking for terminating \0 of path 2051>264 ubyte 0 XBMC texture package 2052!:mime application/x-xbmc-xbt 2053!:ext xbt 2054# XBTF_VERSION 2 2055>>4 string !2 \b, version %-.1s 2056# nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp 2057>>5 ulelong x \b, %u file 2058# plural s 2059>>5 ulelong >1 \bs 2060# path[CXBTFFile[MaximumPathLength=256] 2061>>9 string x \b, 1st %s 2062 2063# ALZIP archive 2064# by Hyungjun Park <hyungjun.park@worksmobile.com>, Hajin Jang <hajin_jang@worksmobile.com> 2065# http://kippler.com/win/unalz/ 2066# https://salsa.debian.org/l10n-korean-team/unalz 20670 string ALZ\001 ALZ archive data 2068!:ext alz 2069 2070# https://cf-aldn.altools.co.kr/setup/EGG_Specification.zip 20710 string EGGA EGG archive data, 2072!:ext egg 2073>5 byte x version %u 2074>4 byte x \b.%u 2075>>0x0E ulelong =0x08E28222 2076>>0x0E ulelong =0x24F5A262 \b, split 2077>>0x0E ulelong =0x24E5A060 \b, solid 2078>>0x0E default x \b, unknown 2079 2080# PAQ9A archive 2081# URL: http://mattmahoney.net/dc/#paq9a 2082# Note: Line 1186 of paq9a.cpp gives the magic bytes 20830 string pQ9\001 PAQ9A archive 2084