1#------------------------------------------------------------------------------ 2# $File: archive,v 1.169 2022/09/12 13:13:28 christos Exp $ 3# archive: file(1) magic for archive formats (see also "msdos" for self- 4# extracting compressed archives) 5# 6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc. 7# pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c. 8 9# POSIX tar archives 10# URL: https://en.wikipedia.org/wiki/Tar_(computing) 11# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current 12# header mainly padded with nul bytes 13500 quad 0 14!:strength /2 15# filename or extended attribute printable strings in range space null til umlaut ue 16>0 ubeshort >0x1F00 17>>0 ubeshort <0xFCFD 18# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad 19# at https://sourceforge.net/projects/s-tar/files/testscripts/ 20>>>508 ubelong&0x8B9E8DFF 0 21# nul, space or ascii digit 0-7 at start of mode 22>>>>100 ubyte&0xC8 =0 23>>>>>101 ubyte&0xC8 =0 24# nul, space at end of check sum 25>>>>>>155 ubyte&0xDF =0 26# space or ascii digit 0 at start of check sum 27>>>>>>>148 ubyte&0xEF =0x20 28# FOR DEBUGGING: 29#>>>>>>>>0 regex \^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp) NAME "%s" 30# check for 1st image main name with digits used for sorting 31# and for name extension case insensitive like: PNG JPG JPEG TIF TIFF GIF BMP 32>>>>>>>>0 regex \^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp) 33#foo 34>>>>>>>>>0 use tar-cbt 35# if 1st member name without digits and without used image suffix then it is a TAR archive 36>>>>>>>>0 default x 37>>>>>>>>>0 use tar-file 38# minimal check and then display tar archive information which can also be 39# embedded inside others like Android Backup, Clam AntiVirus database 400 name tar-file 41>257 string !ustar 42# header padded with nuls 43>>257 ulong =0 44# GNU tar version 1.29 with non pax format option without refusing 45# creates misleading V7 header for Long path, Multi-volume, Volume type 46>>>156 ubyte 0x4c GNU tar archive 47!:mime application/x-gtar 48!:ext tar/gtar 49>>>156 ubyte 0x4d GNU tar archive 50!:mime application/x-gtar 51!:ext tar/gtar 52>>>156 ubyte 0x56 GNU tar archive 53!:mime application/x-gtar 54!:ext tar/gtar 55>>>156 default x tar archive (V7) 56!:mime application/x-tar 57!:ext tar 58# other stuff in padding 59# some implementations add new fields to the blank area at the end of the header record 60# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option 61>>257 ulong !0 tar archive (old) 62!:mime application/x-tar 63!:ext tar 64# magic in newer, GNU, posix variants 65>257 string =ustar 66# 2 last char of magic and UStar version because string expression does not work 67# 2 space characters followed by a null for GNU variant 68>>261 ubelong =0x72202000 POSIX tar archive (GNU) 69!:mime application/x-gtar 70!:ext tar/gtar 71# UStar version with ASCII "00" 72>>261 ubelong 0x72003030 POSIX 73# gLOBAL and ExTENSION type only found in POSIX.1-2001 format 74>>>156 ubyte 0x67 \b.1-2001 75>>>156 ubyte 0x78 \b.1-2001 76>>>156 ubyte x tar archive 77!:mime application/x-ustar 78!:ext tar/ustar 79# version with 2 binary nuls embedded in Android Backup like com.android.settings.ab 80>>261 ubelong 0x72000000 tar archive (ustar) 81!:mime application/x-ustar 82!:ext tar/ustar 83# not seen ustar variant with garbish version 84>>261 default x tar archive (unknown ustar) 85!:mime application/x-ustar 86!:ext tar/ustar 87# type flag of 1st tar archive member 88#>156 ubyte x \b, %c-type 89>156 ubyte x 90>>156 ubyte 0 \b, file 91>>156 ubyte 0x30 \b, file 92>>156 ubyte 0x31 \b, hard link 93>>156 ubyte 0x32 \b, symlink 94>>156 ubyte 0x33 \b, char device 95>>156 ubyte 0x34 \b, block device 96>>156 ubyte 0x35 \b, directory 97>>156 ubyte 0x36 \b, fifo 98>>156 ubyte 0x37 \b, reserved 99>>156 ubyte 0x4c \b, long path 100>>156 ubyte 0x4d \b, multi volume 101>>156 ubyte 0x56 \b, volume 102>>156 ubyte 0x67 \b, global 103>>156 ubyte 0x78 \b, extension 104>>156 default x \b, type 105>>>156 ubyte x '%c' 106# name[100] 107>0 string >\0 %-.60s 108# mode mainly stored as an octal number in ASCII null or space terminated 109>100 string >\0 \b, mode %-.7s 110# user id mainly as octal numbers in ASCII null or space terminated 111>108 string >\0 \b, uid %-.7s 112# group id mainly as octal numbers in ASCII null or space terminated 113>116 string >\0 \b, gid %-.7s 114# size mainly as octal number in ASCII 115>124 ubyte <0x38 116>>124 string >\0 \b, size %-.12s 117# coding indicated by setting the high-order bit of the leftmost byte 118>124 ubyte >0xEF \b, size 0x 119>>124 ubyte !0xff \b%2.2x 120>>125 ubyte !0xff \b%2.2x 121>>126 ubyte !0xff \b%2.2x 122>>127 ubyte !0xff \b%2.2x 123>>128 ubyte !0xff \b%2.2x 124>>129 ubyte !0xff \b%2.2x 125>>130 ubyte !0xff \b%2.2x 126>>131 ubyte !0xff \b%2.2x 127>>132 ubyte !0xff \b%2.2x 128>>133 ubyte !0xff \b%2.2x 129>>134 ubyte !0xff \b%2.2x 130>>135 ubyte !0xff \b%2.2x 131# seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated 132>136 string >\0 \b, seconds %-.11s 133# header checksum stored as an octal number in ASCII null or space terminated 134#>148 string x \b, cksum %.7s 135# linkname[100] 136>157 string >\0 \b, linkname %-.40s 137# additional fields for ustar 138>257 string =ustar 139# owner user name null terminated 140>>265 string >\0 \b, user %-.32s 141# group name null terminated 142>>297 string >\0 \b, group %-.32s 143# device major minor if not zero 144>>329 ubequad&0xCFCFCFCFcFcFcFdf !0 145>>>329 string x \b, devmaj %-.7s 146>>337 ubequad&0xCFCFCFCFcFcFcFdf !0 147>>>337 string x \b, devmin %-.7s 148# prefix[155] 149>>345 string >\0 \b, prefix %-.155s 150# old non ustar/POSIX tar 151>257 string !ustar 152>>508 string =tar\0 153# padding[255] in old star 154>>>257 string >\0 \b, padding: %-.40s 155>>508 default x 156# padding[255] in old tar sometimes comment field 157>>>257 string >\0 \b, comment: %-.40s 158# Summary: Comic Book Archive *.CBT with TAR format 159# URL: https://en.wikipedia.org/wiki/Comic_book_archive 160# http://fileformats.archiveteam.org/wiki/Comic_Book_Archive 161# Note: there exist also RAR, ZIP, ACE and 7Z packed variants 1620 name tar-cbt 163>0 string x Comic Book archive, tar archive 164#!:mime application/x-tar 165!:mime application/vnd.comicbook 166#!:mime application/vnd.comicbook+tar 167!:ext cbt 168# name[100] probably like: 19.jpg 0001.png 0002.png 169# or maybe like ComicInfo.xml 170>0 string >\0 \b, 1st image %-.60s 171 172# Incremental snapshot gnu-tar format from: 173# https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html 1740 string GNU\ tar- GNU tar incremental snapshot data 175>&0 regex [0-9]\\.[0-9]+-[0-9]+ version %s 176 177# cpio archives 178# 179# Yes, the top two "cpio archive" formats *are* supposed to just be "short". 180# The idea is to indicate archives produced on machines with the same 181# byte order as the machine running "file" with "cpio archive", and 182# to indicate archives produced on machines with the opposite byte order 183# from the machine running "file" with "byte-swapped cpio archive". 184# 185# The SVR4 "cpio(4)" hints that there are additional formats, but they 186# are defined as "short"s; I think all the new formats are 187# character-header formats and thus are strings, not numbers. 1880 short 070707 cpio archive 189!:mime application/x-cpio 1900 short 0143561 byte-swapped cpio archive 191!:mime application/x-cpio # encoding: swapped 1920 string 070707 ASCII cpio archive (pre-SVR4 or odc) 193!:mime application/x-cpio 1940 string 070701 ASCII cpio archive (SVR4 with no CRC) 195!:mime application/x-cpio 1960 string 070702 ASCII cpio archive (SVR4 with CRC) 197!:mime application/x-cpio 198 199# 200# Various archive formats used by various versions of the "ar" 201# command. 202# 203 204# 205# Original UNIX archive formats. 206# They were written with binary values in host byte order, and 207# the magic number was a host "int", which might have been 16 bits 208# or 32 bits. We don't say "PDP-11" or "VAX", as there might have 209# been ports to little-endian 16-bit-int or 32-bit-int platforms 210# (x86?) using some of those formats; if none existed, feel free 211# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian 212# 32-bit. There might have been big-endian ports of that sort as 213# well. 214# 2150 leshort 0177555 very old 16-bit-int little-endian archive 2160 beshort 0177555 very old 16-bit-int big-endian archive 2170 lelong 0177555 very old 32-bit-int little-endian archive 2180 belong 0177555 very old 32-bit-int big-endian archive 219 2200 leshort 0177545 old 16-bit-int little-endian archive 221>2 string __.SYMDEF random library 2220 beshort 0177545 old 16-bit-int big-endian archive 223>2 string __.SYMDEF random library 2240 lelong 0177545 old 32-bit-int little-endian archive 225>4 string __.SYMDEF random library 2260 belong 0177545 old 32-bit-int big-endian archive 227>4 string __.SYMDEF random library 228 229# 230# From "pdp" (but why a 4-byte quantity?) 231# 2320 lelong 0x39bed PDP-11 old archive 2330 lelong 0x39bee PDP-11 4.0 archive 234 235# 236# XXX - what flavor of APL used this, and was it a variant of 237# some ar archive format? It's similar to, but not the same 238# as, the APL workspace magic numbers in pdp. 239# 2400 long 0100554 apl workspace 241 242# 243# System V Release 1 portable(?) archive format. 244# 2450 string =<ar> System V Release 1 ar archive 246!:mime application/x-archive 247 248# 249# Debian package; it's in the portable archive format, and needs to go 250# before the entry for regular portable archives, as it's recognized as 251# a portable archive whose first member has a name beginning with 252# "debian". 253# 254# Update: Joerg Jenderek 255# URL: https://en.wikipedia.org/wiki/Deb_(file_format) 2560 string =!<arch>\ndebian 257# https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html 258>14 string -split part of multipart Debian package 259!:mime application/vnd.debian.binary-package 260# udeb is used for stripped down deb file 261!:ext deb/udeb 262>14 string -binary Debian binary package 263!:mime application/vnd.debian.binary-package 264# For ipk packager see also https://en.wikipedia.org/wiki/Opkg 265!:ext deb/udeb/ipk 266# This should not happen 267>14 default x Unknown Debian package 268# NL terminated version; for most Debian cases this is 2.0 or 2.1 for split 269>68 string >\0 (format %s) 270#>68 string !2.0\n 271#>>68 string x (format %.3s) 272>68 string =2.0\n 273# 2nd archive name=control archive name like control.tar.gz or control.tar.xz 274>>72 string >\0 \b, with %.14s 275# look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma} 276>>0 search/0x93e4f data.tar. \b, data compression 277# the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised 278# for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb 279>>>&0 string x %.2s 280# skip space (0x20 BSD) and slash (0x2f System V) character marking end of name 281>>>&2 ubyte !0x20 282>>>>&-1 ubyte !0x2f 283# display 3rd character of file name extension like 2 of bz2 or m of lzma 284>>>>>&-1 ubyte x \b%c 285>>>>>>&0 ubyte !0x20 286>>>>>>>&-1 ubyte !0x2f 287# display 4th character of file name extension like a of lzma 288>>>>>>>>&-1 ubyte x \b%c 289# split debian package case 290>68 string =2.1\n 291# dpkg-1.18.25/dpkg-split/info.c 292# NL terminated ASCII package name like ckermit 293>>&0 string x \b, %s 294# NL terminated package version like 302-5.3 295>>>&1 string x %s 296# NL terminated MD5 checksum 297>>>>&1 string x \b, MD5 %s 298# NL terminated original package length 299>>>>>&1 string x \b, unsplitted size %s 300# NL terminated part length 301>>>>>>&1 string x \b, part length %s 302# NL terminated package part like n/m 303>>>>>>>&1 string x \b, part %s 304# NL terminated package architecture like armhf since dpkg 1.16.1 or later 305>>>>>>>>&1 string x \b, %s 306 307# 308# MIPS archive; they're in the portable archive format, and need to go 309# before the entry for regular portable archives, as it's recognized as 310# a portable archive whose first member has a name beginning with 311# "__________E". 312# 3130 string =!<arch>\n__________E MIPS archive 314!:mime application/x-archive 315>20 string U with MIPS Ucode members 316>21 string L with MIPSEL members 317>21 string B with MIPSEB members 318>19 string L and an EL hash table 319>19 string B and an EB hash table 320>22 string X -- out of date 321 322# 323# BSD/SVR2-and-later portable archive formats. 324# 325# Update: Joerg Jenderek 326# URL: http://fileformats.archiveteam.org/wiki/AR 327# Reference: https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/ 328# Note: Mach-O universal binary in ./cafebabe is dependent 329# TODO: unify current ar archive, MIPS archive, Debian package 330# distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR; 331# *.ar packages from *.a libraries. handle empty archive 3320 string =!<arch>\n current ar archive 333# print first and possibly second ar_name[16] for debugging purpose 334#>8 string x \b, 1st "%.16s" 335#>68 string x \b, 2nd "%.16s" 336!:mime application/x-archive 337# a in most case for libraries; lib for Microsoft libraries; ar else cases 338!:ext a/lib/ar 339>8 string __.SYMDEF random library 340# first member with long marked name __.SYMDEF SORTED implies BSD library 341>68 string __.SYMDEF\ SORTED random library 342# Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf 343# "archive file" entry moved from ./hp 344# LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture 345# LST header a_magic 0619h~relocatable library 346>68 belong 0x020b0619 - PA-RISC1.0 relocatable library 347>68 belong 0x02100619 - PA-RISC1.1 relocatable library 348>68 belong 0x02110619 - PA-RISC1.2 relocatable library 349>68 belong 0x02140619 - PA-RISC2.0 relocatable library 350#EOF for common ar archives 351 352# 353# "Thin" archive, as can be produced by GNU ar. 354# 3550 string =!<thin>\n thin archive with 356>68 belong 0 no symbol entries 357>68 belong 1 %d symbol entry 358>68 belong >1 %d symbol entries 359 3600 search/1 -h- Software Tools format archive text 361 362# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com) 363# 364# The first byte is the magic (0x1a), byte 2 is the compression type for 365# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS 366# filename of the first file (null terminated). Since some types collide 367# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%), 368# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo. 3690 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW 370!:mime application/x-arc 3710 lelong&0x8080ffff 0x0000091a ARC archive data, squashed 372!:mime application/x-arc 3730 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed 374!:mime application/x-arc 3750 lelong&0x8080ffff 0x0000031a ARC archive data, packed 376!:mime application/x-arc 3770 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed 378!:mime application/x-arc 3790 lelong&0x8080ffff 0x0000061a ARC archive data, crunched 380!:mime application/x-arc 381# [JW] stuff taken from idarc, obviously ARC successors: 3820 lelong&0x8080ffff 0x00000a1a PAK archive data 383!:mime application/x-arc 3840 lelong&0x8080ffff 0x0000141a ARC+ archive data 385!:mime application/x-arc 3860 lelong&0x8080ffff 0x0000481a HYP archive data 387!:mime application/x-arc 388 389# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk) 390# I can't create either SPARK or ArcFS archives so I have not tested this stuff 391# [GRR: the original entries collide with ARC, above; replaced with combined 392# version (not tested)] 393#0 byte 0x1a RISC OS archive (spark format) 3940 string \032archive RISC OS archive (ArcFS format) 3950 string Archive\000 RISC OS archive (ArcFS format) 396 397# All these were taken from idarc, many could not be verified. Unfortunately, 398# there were many low-quality sigs, i.e. easy to trigger false positives. 399# Please notify me of any real-world fishy/ambiguous signatures and I'll try 400# to get my hands on the actual archiver and see if I find something better. [JW] 401# probably many can be enhanced by finding some 0-byte or control char near the start 402 403# idarc calls this Crush/Uncompressed... *shrug* 4040 string CRUSH Crush archive data 405# Squeeze It (.sqz) 4060 string HLSQZ Squeeze It archive data 407# SQWEZ 4080 string SQWEZ SQWEZ archive data 409# HPack (.hpk) 4100 string HPAK HPack archive data 411# HAP 4120 string \x91\x33HF HAP archive data 413# MD/MDCD 4140 string MDmd MDCD archive data 415# LIM 4160 string LIM\x1a LIM archive data 417# SAR 4183 string LH5 SAR archive data 419# BSArc/BS2 4200 string \212\3SB\020\0 BSArc/BS2 archive data 421# Bethesda Softworks Archive (Oblivion) 4220 string BSA\0 BSArc archive data 423>4 lelong x version %d 424# MAR 4252 string =-ah MAR archive data 426# ACB 427#0 belong&0x00f800ff 0x00800000 ACB archive data 428# CPZ 429# TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data 430# JRC 4310 string JRchive JRC archive data 432# Quantum 4330 string DS\0 Quantum archive data 434# ReSOF 4350 string PK\3\6 ReSOF archive data 436# QuArk 4370 string 7\4 QuArk archive data 438# YAC 43914 string YC YAC archive data 440# X1 4410 string X1 X1 archive data 4420 string XhDr X1 archive data 443# CDC Codec (.dqt) 4440 belong&0xffffe000 0x76ff2000 CDC Codec archive data 445# AMGC 4460 string \xad6" AMGC archive data 447# NuLIB 4480 string N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data 449# PakLeo 4500 string LEOLZW PAKLeo archive data 451# ChArc 4520 string SChF ChArc archive data 453# PSA 4540 string PSA PSA archive data 455# CrossePAC 4560 string DSIGDCC CrossePAC archive data 457# Freeze 4580 string \x1f\x9f\x4a\x10\x0a Freeze archive data 459# KBoom 4600 string \xc2\xa8MP\xc2\xa8 KBoom archive data 461# NSQ, must go after CDC Codec 4620 string \x76\xff NSQ archive data 463# DPA 4640 string Dirk\ Paehl DPA archive data 465# BA 466# TODO: idarc says "bytes 0-2 == bytes 3-5" 467# TTComp 468# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive 469# Update: Joerg Jenderek 470# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others 4710 string \0\6 472# look for first keyword of Panorama database *.pan 473>12 search/261 DESIGN 474# skip keyword with low entropy 475>12 default x 476# skip DOS 2.0 backup id file, sequence 6 with many nils like BACKUPID_xx6.@@@ handled by ./msdos 477>>8 quad !0 478>>>0 use ttcomp 479# variant ASCII, 4K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? 4800 string \1\6 481# TODO: 482# skip VAX-order 68k Blit mpx/mux executable (strength=50) handled by ./blit 483!:strength -2 484>0 use ttcomp 4850 string \0\5 486# skip some DOS 2.0 backup id file, sequence 5 with many nils like BACKUPID_075.@@@ handled by ./msdos 487>8 quad !0 488>>0 use ttcomp 4890 string \1\5 490# TODO: 491# variant ASCII, 2K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? 492# skip ctab data (strength=50) handled by ./ibm6000 493# skip locale data table (strength=50) handled by ./digital 494!:strength -2 495>0 use ttcomp 4960 string \0\4 497# skip many Maple help database *.hdb with version tag handled by ./maple 498>1028 string !version 499# skip veclib maple.hdb by looking for Mable keyword 500>>4 search/1091 Maple\040 501#>4 search/34090 Maple\040 502>>4 default x 503# skip DOS 2.0-3.2 backed up sequence 4 with many nils like LOTUS5.RAR handled by ./msdos 504# skip xBASE Compound Index file *.CDX with many nils 505>>>0x54 quad !0 506>>>>0 use ttcomp 5070 string \1\4 508# TODO: 509# skip Commodore PET BASIC 4.0 program *.prg 510# variant ASCII, 1K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? 511# skip shared library (strength=50) handled by ./ibm6000 512!:strength -2 513>0 use ttcomp 514# display information of TTComp archive 5150 name ttcomp 516# (version 5.25) labeled the entry as "TTComp archive data" 517>0 ubyte x TTComp archive data 518!:mime application/x-compress-ttcomp 519# PBACKSCR.PI1 520!:ext $xe/$ts/pi1/__d 521# compression type: 0~binary compression 1~ASCII compression 522>0 ubyte 0 \b, binary 523>0 ubyte 1 \b, ASCII 524# size of the dictionary: 4~1024 bytes 5~2048 bytes 6~4096 bytes 525>1 ubyte 4 \b, 1K 526>1 ubyte 5 \b, 2K 527>1 ubyte 6 \b, 4K 528>1 ubyte x dictionary 529# https://mark0.net/forum/index.php?topic=848 530# last 3 bytes probably have only 8 possible bit sequences 531# xxxxxxxx 0000000x 11111111 ____FFh 532# xxxxxxxx 10000000 01111111 __807Fh 533# 0xxxxxxx 11000000 00111111 __C03Fh 534# 00xxxxxx 11100000 00011111 __E01Fh 535# 000xxxxx 11110000 00001111 __F00Fh 536# 0000xxxx 11111000 00000111 __F807h 537# 00000xxx 11111100 00000011 __FC03h 538# 000000xx 11111110 00000001 __FE01h 539# but for quickgif.__d 0A7DD4h 540#>-3 ubyte x \b, last 3 bytes 0x%2.2x 541#>-2 ubeshort x \b%4.4x 542# From: Joerg Jenderek 543# URL: https://en.wikipedia.org/wiki/Disk_Copy 544# reference: http://nulib.com/library/FTN.e00005.htm 5450x52 ubeshort 0x0100 546# test for disk image size equal or above 400k 547>0x40 ubelong >409599 548# test also for disk image size equal or below 1440k to skip 549# windows7en.mbr UNICODE.DAT 550#>>0x40 ubelong <1474561 551# test now for "low" disk image size equal or below 64 MiB to skip 552# windows7en.mbr (B441BBAAh) UNICODE.DAT (0400AF05h) 553>>0x40 ubelong <0x04000001 554# To skip Flags$StringJoiner.class with size 00106A61h test also for valid disk image sizes 555# 00064000 for 400k GCR disks dc42-400k-gcr.trid.xml 556# 000c8000 for 800k GCR disks dc42-800k-gcr.trid.xml 557# 000b4000 for 720k MFM disks dc42-720k-mfm.trid.xml 558# 00168000 for 1440k MFM disks dc42-1440k-mfm.trid.xml 559# https://lisaem.sunder.net/LisaProjectDocs.txt 560# 00500000 05M available 561# 00A00000 10M available 562# 01800000 24M possible 563# 02000000 32M uncertain 564# 04000000 64M uncertain 565>>>0x40 ubelong&0xf8003fFF 0 566# skip samples with invalid disk name length like: 567# 181 (biosmd80.rom) 202 (Flags$StringJoiner.class) 90 (UNICODE.DAT) 568>>>>0x0 ubyte <64 569>>>>>0 use dc42-floppy 570# display information of Apple DiskCopy 4.2 floppy image 5710 name dc42-floppy 572# disk name length; maximal 63 573#>0 ubyte x DISK NAME LENGTH %u 574# ASCII image pascal (maximal 63 bytes) name padded with NULs like: 575# "Microsoft Mail" "Disquette 2" "IIe Installer Disk" 576# "-lisaem.sunder.net hd-" (dc42-lisaem.trid.xml) "-not a Macintosh disk" (dc42-nonmac.trid.xml) 577>00 pstring/B x Apple DiskCopy 4.2 image %s 578#!:mime application/octet-stream 579!:mime application/x-dc42-floppy-image 580!:apple dCpydImg 581# probably also img like: "Utilitaires 2.img" "Installation 7.img" 582!:ext image/dc42/img 583# data size in bytes like: 409600 737280 819200 1474560 584>0x40 ubelong x \b, %u bytes 585# for debugging purpose size in hexadecimal 586#>0x40 ubelong x (%#8.8x) 587# tag size in bytes like: 0 (often) 2580h (PUID fmt/625) 4B00h (Microsoft Mail.image) 588>0x44 ubelong >0 \b, %#x tag size 589# data checksum 590#>0x48 ubelong x \b, %#x checksum 591# tag checksum 592#>0x4c ubelong x \b, %#x tag checksum 593# disk encoding like: 0 1 2 3 (PUID: fmt/625) 594>0x50 ubyte 0 \b, GCR CLV ssdd (400k) 595>0x50 ubyte 1 \b, GCR CLV dsdd (800k) 596>0x50 ubyte 2 \b, MFM CAV dsdd (720k) 597>0x50 ubyte 3 \b, MFM CAV dshd (1440k) 598>0x50 ubyte >3 \b, %#x encoding 599# format byte like: 12h (Lisa 400K) 24h (400K Macintosh) 96h (800K Apple II disk) 600# 2 (Mac 400k "Disquette Installation 13.image") 601# 22h (double-sided MFM or Mac 800k "Disco 12.image" "IIe Installer Disk.image") 602>0x51 ubyte x \b, %#x format 603#>0x54 ubequad x \b, data %#16.16llx 604# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation? 6050 string ESP ESP archive data 606# ZPack 6070 string \1ZPK\1 ZPack archive data 608# Sky 6090 string \xbc\x40 Sky archive data 610# UFA 6110 string UFA UFA archive data 612# Dry 6130 string =-H2O DRY archive data 614# FoxSQZ 6150 string FOXSQZ FoxSQZ archive data 616# AR7 6170 string ,AR7 AR7 archive data 618# PPMZ 6190 string PPMZ PPMZ archive data 620# MS Compress 621# Update: Joerg Jenderek 622# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression 623# Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html 624# Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z 6254 string \x88\xf0\x27 626# KWAJ variant 627>0 string KWAJ MS Compress archive data, KWAJ variant 628!:mime application/x-ms-compress-kwaj 629# extension not working in version 5.32 630# magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?' 631# file: line 284: Bad magic entry ' ??_' 632!:ext ??_ 633# compression method (0-4) 634>>8 uleshort x \b, %u method 635# offset of compressed data 636>>10 uleshort x \b, %#x offset 637#>>(10.s) uleshort x 638#>>>&-6 string x \b, TEST extension %-.3s 639# header flags to mark header extensions 640>>12 uleshort >0 \b, %#x flags 641# 4 bytes: decompressed length of file 642>>12 uleshort &0x01 643>>>14 ulelong x \b, original size: %u bytes 644# 2 bytes: unknown purpose 645# 2 bytes: length of unknown data + mentioned bytes 646# 1-9 bytes: null-terminated file name 647# 1-4 bytes: null-terminated file extension 648>>12 uleshort &0x08 649>>>12 uleshort ^0x01 650>>>>12 uleshort ^0x02 651>>>>>12 uleshort ^0x04 652>>>>>>12 uleshort ^0x10 653>>>>>>>14 string x \b, %-.8s 654>>>>>>12 uleshort &0x10 655>>>>>>>14 string x \b, %-.8s 656>>>>>>>>&1 string x \b.%-.3s 657>>>>>12 uleshort &0x04 658>>>>>>12 uleshort ^0x10 659>>>>>>>(14.s) uleshort x 660>>>>>>>>&14 string x \b, %-.8s 661>>>>>>12 uleshort &0x10 662>>>>>>>(14.s) uleshort x 663>>>>>>>>&14 string x \b, %-.8s 664>>>>>>>>>&1 string x \b.%-.3s 665>>>>12 uleshort &0x02 666>>>>>12 uleshort ^0x04 667>>>>>>12 uleshort ^0x10 668>>>>>>>16 string x \b, %-.8s 669>>>>>>12 uleshort &0x10 670>>>>>>>16 string x \b, %-.8s 671>>>>>>>>&1 string x \b.%-.3s 672>>>>>12 uleshort &0x04 673>>>>>>12 uleshort ^0x10 674>>>>>>>(16.s) uleshort x 675>>>>>>>>&16 string x \b, %-.8s 676>>>>>>12 uleshort &0x10 677>>>>>>>(16.s) uleshort x 678>>>>>>>&16 string x %-.8s 679>>>>>>>>&1 string x \b.%-.3s 680>>>12 uleshort &0x01 681>>>>12 uleshort ^0x02 682>>>>>12 uleshort ^0x04 683>>>>>>12 uleshort ^0x10 684>>>>>>>18 string x \b, %-.8s 685>>>>>>12 uleshort &0x10 686>>>>>>>18 string x \b, %-.8s 687>>>>>>>>&1 string x \b.%-.3s 688>>>>>12 uleshort &0x04 689>>>>>>12 uleshort ^0x10 690>>>>>>>(18.s) uleshort x 691>>>>>>>>&18 string x \b, %-.8s 692>>>>>>12 uleshort &0x10 693>>>>>>>(18.s) uleshort x 694>>>>>>>>&18 string x \b, %-.8s 695>>>>>>>>>&1 string x \b.%-.3s 696>>>>12 uleshort &0x02 697>>>>>12 uleshort ^0x04 698>>>>>>12 uleshort ^0x10 699>>>>>>>20 string x \b, %-.8s 700>>>>>>12 uleshort &0x10 701>>>>>>>20 string x \b, %-.8s 702>>>>>>>>&1 string x \b.%-.3s 703>>>>>12 uleshort &0x04 704>>>>>>12 uleshort ^0x10 705>>>>>>>(20.s) uleshort x 706>>>>>>>>&20 string x \b, %-.8s 707>>>>>>12 uleshort &0x10 708>>>>>>>(20.s) uleshort x 709>>>>>>>>&20 string x \b, %-.8s 710>>>>>>>>>&1 string x \b.%-.3s 711# 2 bytes: length of data + mentioned bytes 712# 713# SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ 714# URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression 715# Reference: http://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html 716# http://mark0.net/download/triddefs_xml.7z/defs/s/szdd.trid.xml 717# Note: called "Microsoft SZDD compressed (Haruhiko Okumura's LZSS)" by TrID 718# verfied by 7-Zip `7z l -tMsLZ -slt *.??_` as MsLZ 719# `deark -l -m lzss_oku -d2 setup-1-41.bin` as "LZSS.C by Haruhiko Okumura" 720>0 string SZDD MS Compress archive data, SZDD variant 721# 2nd part of signature 722#>>4 ubelong 0x88F02733 \b, SIGNATURE OK 723!:mime application/x-ms-compress-szdd 724!:ext ??_ 725# The character missing from the end of the filename (0=unknown) 726>>9 string >\0 \b, %-.1s is last character of original name 727# https://www.betaarchive.com/forum/viewtopic.php?t=26161 728# Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e 729>>8 string !A \b, %-.1s method 730>>10 ulelong >0 \b, original size: %u bytes 731# Summary: InstallShield archive with SZDD compressed 732# URL: https://community.flexera.com/t5/InstallShield-Knowledge-Base/InstallShield-Redistributable-Files/ta-p/5647 733# From: Joerg Jenderek 7341 search/48/bs SZDD\x88\xF0\x27\x33 InstallShield archive 735#!:mime application/octet-stream 736!:mime application/x-installshield-compress-szdd 737!:ext ibt 738# name of compressed archive member like: setup.dl_ _setup7int.dl_ _setup2k.dl_ _igdi.dl_ cabinet.dl_ 739>0 string x %s 740# name of uncompressed archive member like: setup.dll _Setup.dll IGdi.dll CABINET.DLL 741>>&1 string x (%s) 742# probably version like: 9.0.0.333 9.1.0.429 11.50.0.42618 743>>>&1 string x \b, version %s 744# SZDD member length like: 168048 169333 181842 745>>>>&1 string x \b, %s bytes 746# MS Compress archive data 747#>&0 string SZDD \b, SIGNATURE FOUND 748>&0 indirect x 749# QBasic SZDD variant 7503 string \x88\xf0\x27 751>0 string SZ\x20 MS Compress archive data, QBasic variant 752!:mime application/x-ms-compress-sz 753!:ext ??$ 754>>8 ulelong >0 \b, original size: %u bytes 755 756# Summary: CAZIP compressed file 757# From: Joerg Jenderek 758# URL: http://fileformats.archiveteam.org/wiki/CAZIP 759# Reference: http://mark0.net/download/triddefs_xml.7z/defs/c/caz.trid.xml 760# Note: Format is distinct from CAZIPXP compressed 7610 string \x0D\x0A\x1ACAZIP CAZIP compressed file 762#!:mime application/octet-stream 763!:mime application/x-compress-cazip 764# like: BLINKER.WR_ CLIPDEFS._ CAOSETUP.EX_ CLIPPER.EX_ FILEIO.C_ 765!:ext ??_/?_/_ 766 767# Summary: FTCOMP compressed archive 768# From: Joerg Jenderek 769# URL: http://fileformats.archiveteam.org/wiki/FTCOMP 770# Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ftcomp.trid.xml 771# Note: called by TrID "FTCOMP compressed archive" 772# extracted by `unpack seahelp.hl_` 77324 string/b FTCOMP FTCOMP compressed archive 774#!:mime application/octet-stream 775!:mime application/x-compress-ftcomp 776!:ext ??_/??@/dll/drv/pk2/ 777# probably A596FDFF magic at the beginning 778>0 ubelong !0xA596FDFF \b, at beginning %#x 779# probably original file name with directory like: \OS2\unpack.exe \SYSTEM\8514.DRV MAHJONGG.EXE 780>41 string x "%s" 781 782# MP3 (archiver, not lossy audio compression) 7830 string MP3\x1a MP3-Archiver archive data 784# ZET 7850 string OZ\xc3\x9d ZET archive data 786# TSComp 7870 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data 788# ARQ 7890 string gW\4\1 ARQ archive data 790# Squash 7913 string OctSqu Squash archive data 792# Terse 7930 string \5\1\1\0 Terse archive data 794# PUCrunch 7950 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data 796# UHarc 7970 string UHA UHarc archive data 798# ABComp 7990 string \2AB ABComp archive data 8000 string \3AB2 ABComp archive data 801# CMP 8020 string CO\0 CMP archive data 803# Splint 8040 string \x93\xb9\x06 Splint archive data 805# InstallShield 8060 string \x13\x5d\x65\x8c InstallShield Z archive Data 807# Gather 8081 string GTH Gather archive data 809# BOA 8100 string BOA BOA archive data 811# RAX 8120 string ULEB\xa RAX archive data 813# Xtreme 8140 string ULEB\0 Xtreme archive data 815# Pack Magic 8160 string @\xc3\xa2\1\0 Pack Magic archive data 817# BTS 8180 belong&0xfeffffff 0x1a034465 BTS archive data 819# ELI 5750 8200 string Ora\ ELI 5750 archive data 821# QFC 8220 string \x1aFC\x1a QFC archive data 8230 string \x1aQF\x1a QFC archive data 824# PRO-PACK 8250 string RNC PRO-PACK archive data 826# 777 8270 string 777 777 archive data 828# LZS221 8290 string sTaC LZS221 archive data 830# HPA 8310 string HPA HPA archive data 832# Arhangel 8330 string LG Arhangel archive data 834# EXP1, uses bzip2 8350 string 0123456789012345BZh EXP1 archive data 836# IMP 8370 string IMP\xa IMP archive data 838# NRV 8390 string \x00\x9E\x6E\x72\x76\xFF NRV archive data 840# Squish 8410 string \x73\xb2\x90\xf4 Squish archive data 842# Par 8430 string PHILIPP Par archive data 8440 string PAR Par archive data 845# HIT 8460 string UB HIT archive data 847# SBX 8480 belong&0xfffff000 0x53423000 SBX archive data 849# NaShrink 8500 string NSK NaShrink archive data 851# SAPCAR 8520 string #\ CAR\ archive\ header SAPCAR archive data 8530 string CAR\ 2.00 SAPCAR archive data 8540 string CAR\ 2.01 SAPCAR archive data 855#!:mime application/octet-stream 856!:mime application/vnd.sar 857!:ext sar 858# Disintegrator 8590 string DST Disintegrator archive data 860# ASD 8610 string ASD ASD archive data 862# InstallShield CAB 863# Update: Joerg Jenderek at Nov 2021 864# URL: https://en.wikipedia.org/wiki/InstallShield 865# Reference: https://github.com/twogood/unshield/blob/master/lib/cabfile.h 866# Note: Not compatible with Microsoft CAB files 867# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield.trid.xml 868# CAB_SIGNATURE 0x28635349 8690 string ISc( InstallShield 870#!:mime application/octet-stream 871!:mime application/x-installshield 872# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield-hdr.trid.xml 873>16 ulelong !0 setup header 874# like: _SYS1.HDR _USER1.HDR data1.hdr 875!:ext hdr 876>16 ulelong =0 CAB 877# like: _SYS1.CAB _USER1.CAB DATA1.CAB data2.cab 878!:ext cab 879# https://github.com/twogood/unshield/blob/master/lib/helper.c 880# version like: 0x1005201 0x100600c 0x1007000 0x1009500 881# 0x2000578 0x20005dc 0x2000640 0x40007d0 0x4000834 882>4 ulelong x \b, version %#x 883# volume_info like: 0 884>8 ulelong !0 \b, volume_info %#x 885# cab_descriptor_offset like: 0x200 886>12 ulelong !0x200 \b, offset %#x 887#>0x200 ubequad x \b, at 0x200 %#16.16llx 888# cab_descriptor_size like: 0 (*.cab) BD5 C8B DA5 E2A E36 116C 251D 4DA9 56F0 5CC2 6E4B 777D 779E 1F7C2 889>16 ulelong !0 \b, descriptor size %#x 890# TOP4 8910 string T4\x1a TOP4 archive data 892# BatComp left out: sig looks like COM executable 893# so TODO: get real 4dos batcomp file and find sig 894# BlakHole 8950 string BH\5\7 BlakHole archive data 896# BIX 8970 string BIX0 BIX archive data 898# ChiefLZA 8990 string ChfLZ ChiefLZA archive data 900# Blink 9010 string Blink Blink archive data 902# Logitech Compress 9030 string \xda\xfa Logitech Compress archive data 904# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE) 9051 string (C)\ STEPANYUK ARS-Sfx archive data 906# AKT/AKT32 9070 string AKT32 AKT32 archive data 9080 string AKT AKT archive data 909# NPack 9100 string MSTSM NPack archive data 911# PFT 9120 string \0\x50\0\x14 PFT archive data 913# SemOne 9140 string SEM SemOne archive data 915# PPMD 9160 string \x8f\xaf\xac\x84 PPMD archive data 917# FIZ 9180 string FIZ FIZ archive data 919# MSXiE 9200 belong&0xfffff0f0 0x4d530000 MSXiE archive data 921# DeepFreezer 9220 belong&0xfffffff0 0x797a3030 DeepFreezer archive data 923# DC 9240 string =<DC- DC archive data 925# TPac 9260 string \4TPAC\3 TPac archive data 927# Ai 9280 string Ai\1\1\0 Ai archive data 9290 string Ai\1\0\0 Ai archive data 930# Ai32 9310 string Ai\2\0 Ai32 archive data 9320 string Ai\2\1 Ai32 archive data 933# SBC 9340 string SBC SBC archive data 935# Ybs 9360 string YBS Ybs archive data 937# DitPack 9380 string \x9e\0\0 DitPack archive data 939# DMS 9400 string DMS! DMS archive data 941# EPC 9420 string \x8f\xaf\xac\x8c EPC archive data 943# VSARC 9440 string VS\x1a VSARC archive data 945# PDZ 9460 string PDZ PDZ archive data 947# ReDuq 9480 string rdqx ReDuq archive data 949# GCA 9500 string GCAX GCA archive data 951# PPMN 9520 string pN PPMN archive data 953# WinImage 9543 string WINIMAGE WinImage archive data 955# Compressia 9560 string CMP0CMP Compressia archive data 957# UHBC 9580 string UHB UHBC archive data 959# WinHKI 9600 string \x61\x5C\x04\x05 WinHKI archive data 961# WWPack data file 9620 string WWP WWPack archive data 963# BSN (BSA, PTS-DOS) 9640 string \xffBSG BSN archive data 9651 string \xffBSG BSN archive data 9663 string \xffBSG BSN archive data 9671 string \0\xae\2 BSN archive data 9681 string \0\xae\3 BSN archive data 9691 string \0\xae\7 BSN archive data 970# AIN 9710 string \x33\x18 AIN archive data 9720 string \x33\x17 AIN archive data 973# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015 974# SZip (TODO: doesn't catch all versions) 9750 string SZ\x0a\4 SZip archive data 976# XPack DiskImage 977# *.XDI updated by Joerg Jenderek Sep 2015 978# ftp://ftp.sac.sk/pub/sac/pack/0index.txt 979# GRR: this test is still too general as it catches also text files starting with jm 9800 string jm 981# only found examples with this additional characteristic 2 bytes 982>2 string \x2\x4 Xpack DiskImage archive data 983#!:ext xdi 984# XPack Data 985# *.xpa updated by Joerg Jenderek Sep 2015 986# ftp://ftp.elf.stuba.sk/pub/pc/pack/ 9870 string xpa XPA 988!:ext xpa 989# XPA32 990# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip 991# created by XPA32.EXE version 1.0.2 for Windows 992>0 string xpa\0\1 \b32 archive data 993# created by XPACK.COM version 1.67m or 1.67r with short 0x1800 994>3 ubeshort !0x0001 \bck archive data 995# XPack Single Data 996# changed by Joerg Jenderek Sep 2015 back to like in version 5.12 997# letter 'I'+ acute accent is equivalent to \xcd 9980 string \xcd\ jm Xpack single archive data 999#!:mime application/x-xpa-compressed 1000!:ext xpa 1001 1002# TODO: missing due to unknown magic/magic at end of file: 1003#DWC 1004#ARG 1005#ZAR 1006#PC/3270 1007#InstallIt 1008#RKive 1009#RK 1010#XPack Diskimage 1011 1012# These were inspired by idarc, but actually verified 1013# Dzip archiver (.dz) 1014# Update: Joerg Jenderek 1015# URL: http://speeddemosarchive.com/dzip/ 1016# reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c 1017# GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt 10180 string DZ 1019# latest version is 2.9 dated 7 may 2003 1020>2 byte <4 Dzip archive data 1021!:mime application/x-dzip 1022!:ext dz 1023>>2 byte x \b, version %i 1024>>3 byte x \b.%i 1025>>4 ulelong x \b, offset %#x 1026>>8 ulelong x \b, %u files 1027# ZZip archiver (.zz) 10280 string ZZ\ \0\0 ZZip archive data 10290 string ZZ0 ZZip archive data 1030# PAQ archiver (.paq) 10310 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data 10320 string PAQ PAQ archive data 1033>3 byte&0xf0 0x30 1034>>3 byte x (v%c) 1035# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP) 1036# Update: Joerg Jenderek 1037# URL: http://fileformats.archiveteam.org/wiki/JAR_(ARJ_Software) 1038# reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jar.trid.xml 1039# https://www.sac.sk/download/pack/jar102x.exe/TECHNOTE.DOC 1040# Note: called "JAR compressed archive" by TrID 10410xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data 1042#!:mime application/octet-stream 1043!:mime application/x-compress-j 1044>0 ulelong x \b, CRC32 %#x 1045# standard suffix is ".j"; for multi volumes following order j01 j02 ... j99 100 ... 990 1046!:ext j/j01/j02 1047# URL: http://fileformats.archiveteam.org/wiki/JARCS 1048# reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jarcs.trid.xml 1049# Note: called "JARCS compressed archive" by TrID 10500 string JARCS JAR (ARJ Software, Inc.) archive data 1051#!:mime application/octet-stream 1052!:mime application/x-compress-jar 1053!:ext jar 1054 1055# ARJ archiver (jason@jarthur.Claremont.EDU) 1056# URL: http://fileformats.archiveteam.org/wiki/ARJ 1057# reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-arj.trid.xml 1058# https://github.com/FarGroup/FarManager/ 1059# blob/master/plugins/multiarc/arc.doc/arj.txt 1060# Note: called "ARJ compressed archive" by TrID and 1061# "ARJ File Format" by DROID via PUID fmt/610 1062# verified by `7z l -tarj PHRACK1.ARJ` and 1063# `arj.exe l TEST-hk9.ARJ` 10640 leshort 0xea60 1065# skip DROID fmt-610-signature-id-946.arj by check for valid file type of main header 1066>0xA ubyte 2 1067>>0 use arj-archive 10680 name arj-archive 1069>0 leshort x ARJ archive 1070!:mime application/x-arj 1071# look for terminating 0-character of filename 1072>0x26 search/1024 \0 1073# file name extension is normally .arj but not for parts of multi volume 1074#>>&-5 string x extension %.4s 1075>>&-5 string/c .arj data 1076!:ext arj 1077>>&-5 default x 1078# for multi volume first name is archive.arj then following parts archive.a01 archive.a02 ... 1079>>>8 byte &0x04 data 1080!:ext a01/a02 1081# for SFX first name is archive.exe then following parts archive.e01 archive.e02 ... 1082>>>8 byte ^0x04 data, SFX multi-volume 1083!:ext e01/e02 1084# basic header size like: 0x002b 0x002c 0x04e0 0x04e3 0x04e7 1085#>2 uleshort x basic header size %#4.4x 1086# next fragment content like: 0x0a200a003a8fc713 0x524a000010bb3471 0x524a0000c73c70f9 1087#>(2.s) ubequad x NEXT FRAGMENT CONTENT %#16.16llx 1088# first_hdr_size; seems to be same as basic header size 1089#>2 uleshort x 1st header size %#x 1090# archiver version number like: 3 4 6 11 102 1091>5 byte x \b, v%d 1092# minimum archiver version to extract like: 1 1093>6 ubyte !1 \b, minimum %u to extract 1094# FOR DEBUGGING 1095#>8 byte x \b, FLAGS %#x 1096# GARBLED_FLAG1; garble with password; g switch 1097>8 byte &0x01 \b, password-protected 1098# encryption version: 0~old 1~old 2~new 3~reserved 4~40 bit key GOST 1099>>0x20 ubyte x (v%u) 1100#>8 byte &0x02 \b, secured 1101# ANSIPAGE_FLAG; indicates ANSI codepage used by ARJ32; hy switch 1102>8 byte &0x02 \b, ANSI codepage 1103# VOLUME_FLAG indicates presence of succeeding volume; but apparently not for SFX 1104>8 byte &0x04 \b, multi-volume 1105#>8 byte &0x08 \b, file-offset 1106# ARJPROT_FLAG; build with data protection record; hk switch 1107>8 byte &0x08 \b, recoverable 1108# arj protection factor; maximal 10; switch hky -> factor=y+1 1109>>0x22 byte x (factor %u) 1110>8 byte &0x10 \b, slash-switched 1111# BACKUP_FLAG; obsolete 1112>8 byte &0x20 \b, backup 1113# SECURED_FLAG; 1114>8 byte &0x40 \b, secured, 1115# ALTNAME_FLAG; indicates dual-name archive 1116>8 byte &0x80 \b, dual-name 1117# security version; 0~old 2~current 1118>9 ubyte !0 1119>>9 ubyte !2 \b, security version %u 1120# file type; 2 in main header; 0~binary 1~7-bitText 2~comment 3~directory 4~VolumeLabel 5=ChapterLabel 1121>0xA ubyte !2 \b, file type %u 1122# date+time when original archive was created in MS-DOS format via ./msdos 1123>0xC ulelong x \b, created 1124>0xC use dos-date 1125# or date and time by new internal function 1126#>0xE lemsdosdate x %s 1127#>0xC lemsdostime x %s 1128# FOR DEBUGGING 1129#>0x12 uleshort x RAW DATE %#4.4x 1130#>0x10 uleshort x RAW TIME %#4.4x 1131# date+time when archive was last modified; sometimes nil or 1132# maybe wrong like in HP4DRVR.ARJ 1133#>0x10 ulelong >0 \b, modified 1134#>>0x10 use dos-date 1135# or date and time by new internal function 1136#>>0x12 lemsdosdate x %s 1137#>>0x10 lemsdostime x %s 1138# archive size (currently used only for secured archives); MAYBE? 1139#>0x14 ulelong !0 \b, file size %u 1140# security envelope file position; MAYBE? 1141#>0x18 ulelong !0 \b, at %#x security envelope 1142# filespec position in filename; WHAT IS THAT? 1143#>0x1C uleshort >0 \b, filespec position %#x 1144# length in bytes of security envelope data like: 2CAh 301h 364h 471h 1145>0x1E uleshort !0 \b, security envelope length %#x 1146# last chapter like: 0 1 1147>0x21 ubyte !0 \b, last chapter %u 1148# filename (null-terminated string); sometimes at 0x26 when 4 bytes for extra data 1149>34 byte x \b, original name: 1150# with extras data 1151>34 byte <0x0B 1152>>38 string x %s 1153# without extras data 1154>34 byte >0x0A 1155>>34 string x %s 1156# host OS: 0~MSDOS ... 11~WIN32 1157>7 byte 0 \b, os: MS-DOS 1158>7 byte 1 \b, os: PRIMOS 1159>7 byte 2 \b, os: Unix 1160>7 byte 3 \b, os: Amiga 1161>7 byte 4 \b, os: Macintosh 1162>7 byte 5 \b, os: OS/2 1163>7 byte 6 \b, os: Apple ][ GS 1164>7 byte 7 \b, os: Atari ST 1165>7 byte 8 \b, os: NeXT 1166>7 byte 9 \b, os: VAX/VMS 1167>7 byte 10 \b, os: WIN95 1168>7 byte 11 \b, os: WIN32 1169# [JW] idarc says this is also possible 11702 leshort 0xea60 ARJ archive data 1171#2 leshort 0xea60 1172#>2 use arj-archive 1173 1174# HA archiver (Greg Roelofs, newt@uchicago.edu) 1175# This is a really bad format. A file containing HAWAII will match this... 1176#0 string HA HA archive data, 1177#>2 leshort =1 1 file, 1178#>2 leshort >1 %hu files, 1179#>4 byte&0x0f =0 first is type CPY 1180#>4 byte&0x0f =1 first is type ASC 1181#>4 byte&0x0f =2 first is type HSC 1182#>4 byte&0x0f =0x0e first is type DIR 1183#>4 byte&0x0f =0x0f first is type SPECIAL 1184# suggestion: at least identify small archives (<1024 files) 11850 belong&0xffff00fc 0x48410000 HA archive data 1186>2 leshort =1 1 file, 1187>2 leshort >1 %u files, 1188>4 byte&0x0f =0 first is type CPY 1189>4 byte&0x0f =1 first is type ASC 1190>4 byte&0x0f =2 first is type HSC 1191>4 byte&0x0f =0x0e first is type DIR 1192>4 byte&0x0f =0x0f first is type SPECIAL 1193 1194# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz) 11950 string HPAK HPACK archive data 1196 1197# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net 11980 string \351,\001JAM\ JAM archive, 1199>7 string >\0 version %.4s 1200>0x26 byte =0x27 - 1201>>0x2b string >\0 label %.11s, 1202>>0x27 lelong x serial %08x, 1203>>0x36 string >\0 fstype %.8s 1204 1205# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) 1206# Update: Joerg Jenderek 1207# URL: https://en.wikipedia.org/wiki/LHA_(file_format) 1208# Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html 1209# 1210# check and display information of lharc (LHa,PMarc) file 12110 name lharc-file 1212# check 1st character of method id like -lz4- -lh5- or -pm2- 1213>2 string - 1214# check 5th character of method id 1215>>6 string - 1216# check header level 0 1 2 3 1217>>>20 ubyte <4 1218# check 2nd, 3th and 4th character of method id 1219>>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b 1220!:mime application/x-lzh-compressed 1221# creator type "LHA " 1222!:apple ????LHA 1223# display archive type name like "LHa/LZS archive data" or "LArc archive" 1224>>>>>2 string -lz \b 1225!:ext lzs 1226# already known -lzs- -lz4- -lz5- with old names 1227>>>>>>2 string -lzs LHa/LZS archive data 1228>>>>>>3 regex \^lz[45] LHarc 1.x archive data 1229# missing -lz?- with wikipedia names 1230>>>>>>3 regex \^lz[2378] LArc archive 1231# display archive type name like "LHa (2.x) archive data" 1232>>>>>2 string -lh \b 1233# already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names 1234>>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data 1235# LHice archiver use ".ICE" as name extension instead usual one ".lzh" 1236# FOOBAR archiver use ".foo" as name extension instead usual one 1237# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment 1238>>>>>>>2 string -lh1 \b 1239!:ext lha/lzh/ice 1240>>>>>>3 regex \^lh[23d] LHa 2.x? archive data 1241>>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data 1242>>>>>>3 regex \^lh[456] LHa (2.x) archive data 1243>>>>>>>2 string -lh5 \b 1244# https://en.wikipedia.org/wiki/BIOS 1245# Some mainboard BIOS like Award use LHa compression. So archives with unusual extension are found like 1246# bios.rom , kd7_v14.bin, 1010.004, ... 1247!:ext lha/lzh/rom/bin 1248# missing -lh?- variants (Joe Jared) 1249>>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive 1250# UNLHA32 2.67a 1251>>>>>>2 string -lhx LHa (UNLHA32) archive 1252# lha archives with standard file name extensions ".lha" ".lzh" 1253>>>>>>3 regex !\^(lh1|lh5) \b 1254!:ext lha/lzh 1255# this should not happen if all -lh variants are described 1256>>>>>>2 default x LHa (unknown) archive 1257#!:ext lha 1258# PMarc 1259>>>>>3 regex \^pm[012] PMarc archive data 1260!:ext pma 1261# append method id without leading and trailing minus character 1262>>>>>3 string x [%3.3s] 1263>>>>>>0 use lharc-header 1264# 1265# check and display information of lharc header 12660 name lharc-header 1267# header size 0x4 , 0x1b-0x61 1268>0 ubyte x 1269# compressed data size != compressed file size 1270#>7 ulelong x \b, data size %d 1271# attribute: 0x2~?? 0x10~symlink|target 0x20~normal 1272#>19 ubyte x \b, 19_%#x 1273# level identifier 0 1 2 3 1274#>20 ubyte x \b, level %d 1275# time stamp 1276#>15 ubelong x DATE %#8.8x 1277# OS ID for level 1 1278>20 ubyte 1 1279# 0x20 types find for *.rom files 1280>>(21.b+24) ubyte <0x21 \b, %#x OS 1281# ascii type like M for MSDOS 1282>>(21.b+24) ubyte >0x20 \b, '%c' OS 1283# OS ID for level 2 1284>20 ubyte 2 1285#>>23 ubyte x \b, OS ID %#x 1286>>23 ubyte <0x21 \b, %#x OS 1287>>23 ubyte >0x20 \b, '%c' OS 1288# filename only for level 0 and 1 1289>20 ubyte <2 1290# length of filename 1291>>21 ubyte >0 \b, with 1292# filename 1293>>>21 pstring x "%s" 1294# 1295#2 string -lh0- LHarc 1.x/ARX archive data [lh0] 1296#!:mime application/x-lharc 12972 string -lh0- 1298>0 use lharc-file 1299#2 string -lh1- LHarc 1.x/ARX archive data [lh1] 1300#!:mime application/x-lharc 13012 string -lh1- 1302>0 use lharc-file 1303# NEW -lz2- ... -lz8- 13042 string -lz2- 1305>0 use lharc-file 13062 string -lz3- 1307>0 use lharc-file 13082 string -lz4- 1309>0 use lharc-file 13102 string -lz5- 1311>0 use lharc-file 13122 string -lz7- 1313>0 use lharc-file 13142 string -lz8- 1315>0 use lharc-file 1316# [never seen any but the last; -lh4- reported in comp.compression:] 1317#2 string -lzs- LHa/LZS archive data [lzs] 13182 string -lzs- 1319>0 use lharc-file 1320# According to wikipedia and others such a version does not exist 1321#2 string -lh\40- LHa 2.x? archive data [lh ] 1322#2 string -lhd- LHa 2.x? archive data [lhd] 13232 string -lhd- 1324>0 use lharc-file 1325#2 string -lh2- LHa 2.x? archive data [lh2] 13262 string -lh2- 1327>0 use lharc-file 1328#2 string -lh3- LHa 2.x? archive data [lh3] 13292 string -lh3- 1330>0 use lharc-file 1331#2 string -lh4- LHa (2.x) archive data [lh4] 13322 string -lh4- 1333>0 use lharc-file 1334#2 string -lh5- LHa (2.x) archive data [lh5] 13352 string -lh5- 1336>0 use lharc-file 1337#2 string -lh6- LHa (2.x) archive data [lh6] 13382 string -lh6- 1339>0 use lharc-file 1340#2 string -lh7- LHa (2.x)/LHark archive data [lh7] 13412 string -lh7- 1342# !:mime application/x-lha 1343# >20 byte x - header level %d 1344>0 use lharc-file 1345# NEW -lh8- ... -lhe- , -lhx- 13462 string -lh8- 1347>0 use lharc-file 13482 string -lh9- 1349>0 use lharc-file 13502 string -lha- 1351>0 use lharc-file 13522 string -lhb- 1353>0 use lharc-file 13542 string -lhc- 1355>0 use lharc-file 13562 string -lhe- 1357>0 use lharc-file 13582 string -lhx- 1359>0 use lharc-file 1360# taken from idarc [JW] 13612 string -lZ PUT archive data 1362# already done by LHarc magics 1363# this should never happen if all sub types of LZS archive are identified 1364#2 string -lz LZS archive data 13652 string -sw1- Swag archive data 1366 13670 name rar-file-header 1368>24 byte 15 \b, v1.5 1369>24 byte 20 \b, v2.0 1370>24 byte 29 \b, v4 1371>15 byte 0 \b, os: MS-DOS 1372>15 byte 1 \b, os: OS/2 1373>15 byte 2 \b, os: Win32 1374>15 byte 3 \b, os: Unix 1375>15 byte 4 \b, os: Mac OS 1376>15 byte 5 \b, os: BeOS 1377 13780 name rar-archive-header 1379>3 leshort&0x1ff >0 \b, flags: 1380>>3 leshort &0x01 ArchiveVolume 1381>>3 leshort &0x02 Commented 1382>>3 leshort &0x04 Locked 1383>>3 leshort &0x10 NewVolumeNaming 1384>>3 leshort &0x08 Solid 1385>>3 leshort &0x20 Authenticated 1386>>3 leshort &0x40 RecoveryRecordPresent 1387>>3 leshort &0x80 EncryptedBlockHeader 1388>>3 leshort &0x100 FirstVolume 1389 1390# RAR (Roshal Archive) archive 13910 string Rar!\x1a\7\0 RAR archive data 1392!:mime application/x-rar 1393!:ext rar/cbr 1394# file header 1395>(0xc.l+9) byte 0x74 1396>>(0xc.l+7) use rar-file-header 1397# subblock seems to share information with file header 1398>(0xc.l+9) byte 0x7a 1399>>(0xc.l+7) use rar-file-header 1400>9 byte 0x73 1401>>7 use rar-archive-header 1402 14030 string Rar!\x1a\7\1\0 RAR archive data, v5 1404!:mime application/x-rar 1405!:ext rar 1406 1407# Very old RAR archive 1408# https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf 14090 string RE\x7e\x5e RAR archive data (<v1.5) 1410!:mime application/x-rar 1411!:ext rar/cbr 1412 1413# SQUISH archiver (Greg Roelofs, newt@uchicago.edu) 14140 string SQSH squished archive data (Acorn RISCOS) 1415 1416# UC2 archiver (Greg Roelofs, newt@uchicago.edu) 1417# [JW] see exe section for self-extracting version 14180 string UC2\x1a UC2 archive data 1419 1420# PKZIP multi-volume archive 14210 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract 1422!:mime application/zip 1423!:ext zip/cbz 1424 1425# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 14260 string PK\005\006 Zip archive data (empty) 1427!:mime application/zip 1428!:ext zip/cbz 1429!:strength +1 14300 string PK\003\004 1431!:strength +1 1432 1433# Specialised zip formats which start with a member named 'mimetype' 1434# (stored uncompressed, with no 'extra field') containing the file's MIME type. 1435# Check for have 8-byte name, 0-byte extra field, name "mimetype", and 1436# contents starting with "application/": 1437>26 string \x8\0\0\0mimetypeapplication/ 1438 1439# KOffice / OpenOffice & StarOffice / OpenDocument formats 1440# From: Abel Cheung <abel@oaka.org> 1441 1442# KOffice (1.2 or above) formats 1443# (mimetype contains "application/vnd.kde.<SUBTYPE>") 1444>>50 string vnd.kde. KOffice (>=1.2) 1445>>>58 string karbon Karbon document 1446>>>58 string kchart KChart document 1447>>>58 string kformula KFormula document 1448>>>58 string kivio Kivio document 1449>>>58 string kontour Kontour document 1450>>>58 string kpresenter KPresenter document 1451>>>58 string kspread KSpread document 1452>>>58 string kword KWord document 1453 1454# OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7) 1455# (mimetype contains "application/vnd.sun.xml.<SUBTYPE>") 1456# URL: https://en.wikipedia.org/wiki/OpenOffice.org_XML 1457# reference: http://fileformats.archiveteam.org/wiki/OpenOffice.org_XML 1458>>50 string vnd.sun.xml. OpenOffice.org 1.x 1459>>>62 string writer Writer 1460>>>>68 byte !0x2e document 1461!:mime application/vnd.sun.xml.writer 1462!:ext sxw 1463>>>>68 string .template template 1464!:mime application/vnd.sun.xml.writer.template 1465!:ext stw 1466>>>>68 string .web Web template 1467!:mime application/vnd.sun.xml.writer.web 1468!:ext stw 1469>>>>68 string .global global document 1470!:mime application/vnd.sun.xml.writer.global 1471!:ext sxg 1472>>>62 string calc Calc 1473>>>>66 byte !0x2e spreadsheet 1474!:mime application/vnd.sun.xml.calc 1475!:ext sxc 1476>>>>66 string .template template 1477!:mime application/vnd.sun.xml.calc.template 1478!:ext stc 1479>>>62 string draw Draw 1480>>>>66 byte !0x2e document 1481!:mime application/vnd.sun.xml.draw 1482!:ext sxd 1483>>>>66 string .template template 1484!:mime application/vnd.sun.xml.draw.template 1485!:ext std 1486>>>62 string impress Impress 1487>>>>69 byte !0x2e presentation 1488!:mime application/vnd.sun.xml.impress 1489!:ext sxi 1490>>>>69 string .template template 1491!:mime application/vnd.sun.xml.impress.template 1492!:ext sti 1493>>>62 string math Math document 1494!:mime application/vnd.sun.xml.math 1495!:ext sxm 1496>>>62 string base Database file 1497!:mime application/vnd.sun.xml.base 1498!:ext sdb 1499 1500# URL: https://wiki.openoffice.org/wiki/Documentation/DevGuide/Extensions/File_Format 1501# From: Joerg Jenderek 1502# Note: only few OXT samples are detected here by mimetype member 1503# is used by OpenOffice and LibreOffice and probably also NeoOffice 1504# verified by `unzip -Zv *.oxt` or `7z l -slt *.oxt` 1505>>50 string vnd.openofficeorg. OpenOffice 1506>>>68 string extension \b/LibreOffice Extension 1507# http://extension.nirsoft.net/oxt 1508!:mime application/vnd.openofficeorg.extension 1509# like: Gallery-Puzzle.2.1.0.1.oxt 1510!:ext oxt 1511 1512# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) 1513# URL: http://fileformats.archiveteam.org/wiki/OpenDocument 1514# https://lists.oasis-open.org/archives/office/200505/msg00006.html 1515# (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>") 1516>>50 string vnd.oasis.opendocument. OpenDocument 1517>>>73 string text 1518>>>>77 byte !0x2d Text 1519!:mime application/vnd.oasis.opendocument.text 1520!:ext odt 1521>>>>77 string -template Text Template 1522!:mime application/vnd.oasis.opendocument.text-template 1523!:ext ott 1524>>>>77 string -web HTML Document Template 1525!:mime application/vnd.oasis.opendocument.text-web 1526!:ext oth 1527>>>>77 string -master Master Document 1528!:mime application/vnd.oasis.opendocument.text-master 1529!:ext odm 1530>>>73 string graphics 1531>>>>81 byte !0x2d Drawing 1532!:mime application/vnd.oasis.opendocument.graphics 1533!:ext odg 1534>>>>81 string -template Drawing Template 1535!:mime application/vnd.oasis.opendocument.graphics-template 1536!:ext otg 1537>>>73 string presentation 1538>>>>85 byte !0x2d Presentation 1539!:mime application/vnd.oasis.opendocument.presentation 1540!:ext odp 1541>>>>85 string -template Presentation Template 1542!:mime application/vnd.oasis.opendocument.presentation-template 1543!:ext otp 1544>>>73 string spreadsheet 1545>>>>84 byte !0x2d Spreadsheet 1546!:mime application/vnd.oasis.opendocument.spreadsheet 1547!:ext ods 1548>>>>84 string -template Spreadsheet Template 1549!:mime application/vnd.oasis.opendocument.spreadsheet-template 1550!:ext ots 1551>>>73 string chart 1552>>>>78 byte !0x2d Chart 1553!:mime application/vnd.oasis.opendocument.chart 1554!:ext odc 1555>>>>78 string -template Chart Template 1556!:mime application/vnd.oasis.opendocument.chart-template 1557!:ext otc 1558>>>73 string formula 1559>>>>80 byte !0x2d Formula 1560!:mime application/vnd.oasis.opendocument.formula 1561!:ext odf 1562>>>>80 string -template Formula Template 1563!:mime application/vnd.oasis.opendocument.formula-template 1564!:ext otf 1565# https://www.loc.gov/preservation/digital/formats/fdd/fdd000441.shtml 1566>>>73 string database Database 1567!:mime application/vnd.oasis.opendocument.database 1568!:ext odb 1569# Valid for LibreOffice Base 6.0.1.1 at least 1570>>>73 string base Database 1571# https://bugs.documentfoundation.org/show_bug.cgi?id=45854 1572!:mime application/vnd.oasis.opendocument.database 1573#!:mime application/vnd.oasis.opendocument.base 1574!:ext odb 1575>>>73 string image 1576>>>>78 byte !0x2d Image 1577!:mime application/vnd.oasis.opendocument.image 1578!:ext odi 1579>>>>78 string -template Image Template 1580!:mime application/vnd.oasis.opendocument.image-template 1581!:ext oti 1582 1583# EPUB (OEBPS) books using OCF (OEBPS Container Format) 1584# https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. 1585# From: Ralf Brown <ralf.brown@gmail.com> 1586>>50 string epub+zip EPUB document 1587!:mime application/epub+zip 1588 1589# From: Joerg Jenderek 1590# URL: http://en.wikipedia.org/wiki/CorelDRAW 1591# NOTE: version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based 1592>>50 string x-vnd.corel. Corel 1593>>>62 string draw.document+zip Draw drawing, version 14-16 1594!:mime application/x-vnd.corel.draw.document+zip 1595!:ext cdr 1596>>>62 string draw.template+zip Draw template, version 14-16 1597!:mime application/x-vnd.corel.draw.template+zip 1598!:ext cdrt 1599>>>62 string zcf.draw.document+zip Draw drawing, version 17-22 1600!:mime application/x-vnd.corel.zcf.draw.document+zip 1601!:ext cdr 1602>>>62 string zcf.draw.template+zip Draw template, version 17-22 1603!:mime application/x-vnd.corel.zcf.draw.template+zip 1604!:ext cdt/cdrt 1605# URL: http://product.corel.com/help/CorelDRAW/540240626/Main/EN/Doc/CorelDRAW-Other-file-formats.html 1606>>>62 string zcf.pattern+zip Draw pattern, version 22 1607!:mime application/x-vnd.corel.zcf.pattern+zip 1608!:ext pat 1609# URL: https://en.wikipedia.org/wiki/Corel_Designer 1610# Reference: http://fileformats.archiveteam.org/wiki/Corel_Designer 1611# Note: called by TrID "Corel DESIGN graphics" 1612>>>62 string designer.document+zip DESIGNER graphics, version 14-16 1613!:mime application/x-vnd.corel.designer.document+zip 1614!:ext des 1615>>>62 string zcf.designer.document+zip DESIGNER graphics, version 17-21 1616!:mime application/x-vnd.corel.zcf.designer.document+zip 1617!:ext des 1618# URL: http://product.corel.com/help/CorelDRAW/540223850/Main/EN/Documentation/ 1619# CorelDRAW-Corel-Symbol-Library-CSL.html 1620>>>62 string symbol.library+zip Symbol Library, version 6-16.3 1621!:mime application/x-vnd.corel.symbol.library+zip 1622!:ext csl 1623>>>62 string zcf.symbol.library+zip Symbol Library, version 17-22 1624!:mime application/x-vnd.corel.zcf.symbol.library+zip 1625!:ext csl 1626 1627# Catch other ZIP-with-mimetype formats 1628# In a ZIP file, the bytes immediately after a member's contents are 1629# always "PK". The 2 regex rules here print the "mimetype" member's 1630# contents up to the first 'P'. Luckily, most MIME types don't contain 1631# any capital 'P's. This is a kludge. 1632# (mimetype contains "application/<OTHER>") 1633>>50 default x Zip data 1634>>>38 regex [!-OQ-~]+ (MIME type "%s"?) 1635!:mime application/zip 1636# (mimetype contents other than "application/*") 1637>26 string \x8\0\0\0mimetype 1638>>38 string !application/ 1639>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) 1640!:mime application/zip 1641 1642# Java Jar files 1643>(26.s+30) leshort 0xcafe Java archive data (JAR) 1644!:mime application/java-archive 1645 1646# iOS App 1647>(26.s+30) leshort !0xcafe 1648>>26 string !\x8\0\0\0mimetype 1649>>>30 string Payload/ 1650>>>>38 search/64 .app/ iOS App 1651!:mime application/x-ios-app 1652 1653# Dup, see above. 1654#>30 search/100/b application/epub+zip EPUB document 1655#!:mime application/epub+zip 1656 1657# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 1658# Next line excludes specialized formats: 1659>(26.s+30) leshort !0xcafe 1660>>30 search/100/b !application/epub+zip 1661>>>26 string !\x8\0\0\0mimetype Zip archive data 1662!:mime application/zip 1663>>>>4 beshort x \b, at least 1664>>>>4 use zipversion 1665>>>>4 beshort x to extract 1666>>>>8 beshort x \b, compression method= 1667>>>>8 use zipcompression 1668>>>>0x161 string WINZIP \b, WinZIP self-extracting 1669 1670# StarView Metafile 1671# From Pierre Ducroquet <pinaraf@pinaraf.info> 16720 string VCLMTF StarView MetaFile 1673>6 beshort x \b, version %d 1674>8 belong x \b, size %d 1675 1676# Zoo archiver 167720 lelong 0xfdc4a7dc Zoo archive data 1678!:mime application/x-zoo 1679>4 byte >48 \b, v%c. 1680>>6 byte >47 \b%c 1681>>>7 byte >47 \b%c 1682>32 byte >0 \b, modify: v%d 1683>>33 byte x \b.%d+ 1684>42 lelong 0xfdc4a7dc \b, 1685>>70 byte >0 extract: v%d 1686>>>71 byte x \b.%d+ 1687 1688# Shell archives 168910 string #\ This\ is\ a\ shell\ archive shell archive text 1690!:mime application/octet-stream 1691 1692# 1693# LBR. NB: May conflict with the questionable 1694# "binary Computer Graphics Metafile" format. 1695# 16960 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data 1697# 1698# PMA (CP/M derivative of LHA) 1699# Update: Joerg Jenderek 1700# URL: https://en.wikipedia.org/wiki/LHA_(file_format) 1701# 1702#2 string -pm0- PMarc archive data [pm0] 17032 string -pm0- 1704>0 use lharc-file 1705#2 string -pm1- PMarc archive data [pm1] 17062 string -pm1- 1707>0 use lharc-file 1708#2 string -pm2- PMarc archive data [pm2] 17092 string -pm2- 1710>0 use lharc-file 17112 string -pms- PMarc SFX archive (CP/M, DOS) 1712#!:mime application/x-foobar-exec 1713!:ext com 17145 string -pc1- PopCom compressed executable (CP/M) 1715#!:mime application/x- 1716#!:ext com 1717 1718# From Rafael Laboissiere <rafael@laboissiere.net> 1719# The Project Revision Control System (see 1720# http://prcs.sourceforge.net) generates a packaged project 1721# file which is recognized by the following entry: 17220 leshort 0xeb81 PRCS packaged project 1723 1724# Microsoft cabinets 1725# by David Necas (Yeti) <yeti@physics.muni.cz> 1726#0 string MSCF\0\0\0\0 Microsoft cabinet file data, 1727#>25 byte x v%d 1728#>24 byte x \b.%d 1729# MPi: All CABs have version 1.3, so this is pointless. 1730# Better magic in debian-additions. 1731 1732# GTKtalog catalogs 1733# by David Necas (Yeti) <yeti@physics.muni.cz> 17344 string gtktalog\ GTKtalog catalog data, 1735>13 string 3 version 3 1736>>14 beshort 0x677a (gzipped) 1737>>14 beshort !0x677a (not gzipped) 1738>13 string >3 version %s 1739 1740############################################################################ 1741# Parity archive reconstruction file, the 'par' file format now used on Usenet. 17420 string PAR\0 PARity archive data 1743>48 leshort =0 - Index file 1744>48 leshort >0 - file number %d 1745 1746# Felix von Leitner <felix-file@fefe.de> 17470 string d8:announce BitTorrent file 1748!:mime application/x-bittorrent 1749!:ext torrent 1750# Durval Menezes, <jmgthbfile at durval dot com> 17510 string d13:announce-list BitTorrent file 1752!:mime application/x-bittorrent 1753!:ext torrent 17540 string d7:comment BitTorrent file 1755!:mime application/x-bittorrent 1756!:ext torrent 17570 string d4:info BitTorrent file 1758!:mime application/x-bittorrent 1759!:ext torrent 1760 1761# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi> 1762# URL: http://fileformats.archiveteam.org/wiki/MSA_(Magic_Shadow_Archiver) 1763# Reference: http://info-coach.fr/atari/documents/_mydoc/FD_Image_File_Format.pdf 1764# http://mark0.net/download/triddefs_xml.7z/defs/m/msa.trid.xml 1765# Update: Joerg Jenderek 1766# Note: called by TrID "Atari MSA Disk Image" and verified by 1767# command like `deark -l -m msa -d2 PDATS578.msa` as " Atari ST floppy disk image" 1768# GRR: line below is too general as it matches setup.skin 17690 beshort 0x0e0f 1770# skip foo setup.skin with unrealistic high number 52255 of sides by check for valid "low" value 1771>4 ubeshort <2 Atari MSA archive data 1772#!:mime application/octet-stream 1773!:mime application/x-atari-msa 1774!:ext msa 1775# sectors per track like: 9 10 1776>>2 beshort x \b, %d sectors per track 1777# sides (0 or 1; add 1 to this to get correct number of sides) 1778>>4 beshort 0 \b, 1 sided 1779>>4 beshort 1 \b, 2 sided 1780# starting track like: 0 1781>>6 beshort x \b, starting track: %d 1782# ending track like: 39 79 80 81 1783>>8 beshort x \b, ending track: %d 1784# tracks content 1785#>>10 ubequad x \b, track content %#16.16llx 1786 1787# Alternate ZIP string (amc@arwen.cs.berkeley.edu) 17880 string PK00PK\003\004 Zip archive data 1789!:mime application/zip 1790!:ext zip/cbz 1791 1792# ACE archive (from http://www.wotsit.org/download.asp?f=ace) 1793# by Stefan `Sec` Zehl <sec@42.org> 17947 string **ACE** ACE archive data 1795!:mime application/x-ace-compressed 1796!:ext ace 1797>15 byte >0 version %d 1798>16 byte =0x00 \b, from MS-DOS 1799>16 byte =0x01 \b, from OS/2 1800>16 byte =0x02 \b, from Win/32 1801>16 byte =0x03 \b, from Unix 1802>16 byte =0x04 \b, from MacOS 1803>16 byte =0x05 \b, from WinNT 1804>16 byte =0x06 \b, from Primos 1805>16 byte =0x07 \b, from AppleGS 1806>16 byte =0x08 \b, from Atari 1807>16 byte =0x09 \b, from Vax/VMS 1808>16 byte =0x0A \b, from Amiga 1809>16 byte =0x0B \b, from Next 1810>14 byte x \b, version %d to extract 1811>5 leshort &0x0080 \b, multiple volumes, 1812>>17 byte x \b (part %d), 1813>5 leshort &0x0002 \b, contains comment 1814>5 leshort &0x0200 \b, sfx 1815>5 leshort &0x0400 \b, small dictionary 1816>5 leshort &0x0800 \b, multi-volume 1817>5 leshort &0x1000 \b, contains AV-String 1818>>30 string \x16*UNREGISTERED\x20VERSION* (unregistered) 1819>5 leshort &0x2000 \b, with recovery record 1820>5 leshort &0x4000 \b, locked 1821>5 leshort &0x8000 \b, solid 1822# Date in MS-DOS format (whatever that is) 1823#>18 lelong x Created on 1824 1825# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann 1826# <doj@cubic.org> 18270x1A string sfArk sfArk compressed Soundfont 1828>0x15 string 2 1829>>0x1 string >\0 Version %s 1830>>0x2A string >\0 : %s 1831 1832# DR-DOS 7.03 Packed File *.??_ 1833# Reference: http://www.antonis.de/dos/dos-tuts/mpdostip/html/nwdostip.htm 1834# Note: unpacked by PNUNPACK.EXE 18350 string Packed\ File\ 1836# by looking for Control-Z skip ASCII text starting with Packed File 1837>0x18 ubyte 0x1a Personal NetWare Packed File 1838!:mime application/x-novell-compress 1839!:ext ??_ 1840>>12 string x \b, was "%.12s" 1841# 1 or 2 1842#>>0x19 ubyte x \b, at 0x19 %u 1843>>0x1b ulelong x with %u bytes 1844 1845# EET archive 1846# From: Tilman Sauerbeck <tilman@code-monkey.de> 18470 belong 0x1ee7ff00 EET archive 1848!:mime application/x-eet 1849 1850# rzip archives 18510 string RZIP rzip compressed data 1852>4 byte x - version %d 1853>5 byte x \b.%d 1854>6 belong x (%d bytes) 1855 1856# From: Joerg Jenderek 1857# URL: https://help.foxitsoftware.com/kb/install-fzip-file.php 1858# reference: http://mark0.net/download/triddefs_xml.7z/ 1859# defs/f/fzip.trid.xml 1860# Note: unknown compression; No "PK" zip magic; normally in directory like 1861# "%APPDATA%\Foxit Software\Addon\Foxit Reader\Install" 18620 ubequad 0x2506781901010000 Foxit add-on/update 1863!:mime application/x-fzip 1864!:ext fzip 1865 1866# From: "Robert Dale" <robdale@gmail.com> 18670 belong 123 dar archive, 1868>4 belong x label "%.8x 1869>>8 belong x %.8x 1870>>>12 beshort x %.4x" 1871>14 byte 0x54 end slice 1872>14 beshort 0x4e4e multi-part 1873>14 beshort 0x4e53 multi-part, with -S 1874 1875# Symbian installation files 1876# https://www.thouky.co.uk/software/psifs/sis.html 1877# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf 18788 lelong 0x10000419 Symbian installation file 1879!:mime application/vnd.symbian.install 1880>4 lelong 0x1000006D (EPOC release 3/4/5) 1881>4 lelong 0x10003A12 (EPOC release 6) 18820 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x) 1883!:mime x-epoc/x-sisx-app 1884 1885# From "Nelson A. de Oliveira" <naoliv@gmail.com> 18860 string MPQ\032 MoPaQ (MPQ) archive 1887 1888# From: "Nelson A. de Oliveira" <naoliv@gmail.com> 1889# .kgb 18900 string KGB_arch KGB Archiver file 1891>10 string x with compression level %.1s 1892 1893# xar (eXtensible ARchiver) archive 1894# URL: https://en.wikipedia.org/wiki/Xar_(archiver) 1895# xar archive format: https://code.google.com/p/xar/ 1896# From: "David Remahl" <dremahl@apple.com> 1897# Update: Joerg Jenderek 1898# TODO: lzma compression; X509Data for pkg and xip 1899# Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or 1900# 7z t -txar Xcode_10.2_beta_4.xip` 19010 string xar! xar archive 1902!:mime application/x-xar 1903# pkg for Mac OSX installer package like FullBundleUpdate.pkg 1904# xip for signed Apple software like Xcode_10.2_beta_4.xip 1905!:ext xar/pkg/xip 1906# always 28 in older archives 1907>4 ubeshort >28 \b, header size %u 1908# currently there exit only version 1 since about 2014 1909>6 ubeshort >1 version %u, 1910>8 ubequad x compressed TOC: %llu, 1911#>16 ubequad x uncompressed TOC: %llu, 1912# cksum_alg 0-2 in older and also 3-4 in newer 1913>24 belong 0 no checksum 1914>24 belong 1 SHA-1 checksum 1915>24 belong 2 MD5 checksum 1916>24 belong 3 SHA-256 checksum 1917>24 belong 4 SHA-512 checksum 1918>24 belong >4 unknown %#x checksum 1919#>24 belong >4 checksum 1920# For no compression jump 0 bytes 1921>24 belong 0 1922>>0 ubyte x 1923# jump more bytes forward by header size 1924>>>&(4.S) ubyte x 1925# jump more bytes forward by compressed table of contents size 1926#>>>>&(8.Q) ubequad x \b, heap data %#llx 1927>>>>&(8.Q) ubyte x 1928# look for data by ./compress after message with 1 space at end 1929>>>>>&-3 indirect x \b, contains 1930# For SHA-1 jump 20 minus 2 bytes 1931>24 belong 1 1932>>18 ubyte x 1933# jump more bytes forward by header size 1934>>>&(4.S) ubyte x 1935# jump more bytes forward by compressed table of contents size 1936>>>>&(8.Q) ubyte x 1937# data compressed by gzip, bzip, lzma or none 1938>>>>>&-1 indirect x \b, contains 1939# For SHA-256 jump 32 minus 2 bytes 1940>24 belong 3 1941>>30 ubyte x 1942# jump more bytes forward by header size 1943>>>&(4.S) ubyte x 1944# jump more bytes forward by compressed table of contents size 1945>>>>&(8.Q) ubyte x 1946>>>>>&-1 indirect x \b, contains 1947# For SHA-512 jump 64 minus 2 bytes 1948>24 belong 4 1949>>62 ubyte x 1950# jump more bytes forward by header size 1951>>>&(4.S) ubyte x 1952# jump more bytes forward by compressed table of contents size 1953>>>>&(8.Q) ubyte x 1954>>>>>&-1 indirect x \b, contains 1955 1956# Type: Parity Archive 1957# From: Daniel van Eeden <daniel_e@dds.nl> 19580 string PAR2 Parity Archive Volume Set 1959 1960# Bacula volume format. (Volumes always start with a block header.) 1961# URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html 1962# From: Adam Buchbinder <adam.buchbinder@gmail.com> 196312 string BB02 Bacula volume 1964>20 bedate x \b, started %s 1965 1966# ePub is XHTML + XML inside a ZIP archive. The first member of the 1967# archive must be an uncompressed file called 'mimetype' with contents 1968# 'application/epub+zip' 1969 1970 1971# From: "Michael Gorny" <mgorny@gentoo.org> 1972# ZPAQ: http://mattmahoney.net/dc/zpaq.html 19730 string zPQ ZPAQ stream 1974>3 byte x \b, level %d 1975# From: Barry Carter <carter.barry@gmail.com> 1976# https://encode.ru/threads/456-zpaq-updates/page32 19770 string 7kSt ZPAQ file 1978 1979# BBeB ebook, unencrypted (LRF format) 1980# URL: https://www.sven.de/librie/Librie/LrfFormat 1981# From: Adam Buchbinder <adam.buchbinder@gmail.com> 19820 string L\0R\0F\0\0\0 BBeB ebook data, unencrypted 1983>8 beshort x \b, version %d 1984>36 byte 1 \b, front-to-back 1985>36 byte 16 \b, back-to-front 1986>42 beshort x \b, (%dx, 1987>44 beshort x %d) 1988 1989# Symantec GHOST image by Joerg Jenderek at May 2014 1990# https://us.norton.com/ghost/ 1991# https://www.garykessler.net/library/file_sigs.html 19920 ubelong&0xFFFFf7f0 0xFEEF0100 Norton GHost image 1993# *.GHO 1994>2 ubyte&0x08 0x00 \b, first file 1995# *.GHS or *.[0-9] with cns program option 1996>2 ubyte&0x08 0x08 \b, split file 1997# part of split index interesting for *.ghs 1998>>4 ubyte x id=%#x 1999# compression tag minus one equals numeric compression command line switch z[1-9] 2000>3 ubyte 0 \b, no compression 2001>3 ubyte 2 \b, fast compression (Z1) 2002>3 ubyte 3 \b, medium compression (Z2) 2003>3 ubyte >3 2004>>3 ubyte <11 \b, compression (Z%d-1) 2005>2 ubyte&0x08 0x00 2006# ~ 30 byte password field only for *.gho 2007>>12 ubequad !0 \b, password protected 2008>>44 ubyte !1 2009# 1~Image All, sector-by-sector only for *.gho 2010>>>10 ubyte 1 \b, sector copy 2011# 1~Image Boot track only for *.gho 2012>>>43 ubyte 1 \b, boot track 2013# 1~Image Disc only for *.gho implies Image Boot track and sector copy 2014>>44 ubyte 1 \b, disc sector copy 2015# optional image description only *.gho 2016>>0xff string >\0 "%-.254s" 2017# look for DOS sector end sequence 2018>0xE08 search/7776 \x55\xAA 2019>>&-512 indirect x \b; contains 2020 2021# Google Chrome extensions 2022# https://developer.chrome.com/extensions/crx 2023# https://developer.chrome.com/extensions/hosting 20240 string Cr24 Google Chrome extension 2025!:mime application/x-chrome-extension 2026>4 ulong x \b, version %u 2027 2028# SeqBox - Sequenced container 2029# ext: sbx, seqbox 2030# Marco Pontello marcopon@gmail.com 2031# reference: https://github.com/MarcoPon/SeqBox 20320 string SBx SeqBox, 2033>3 byte x version %d 2034 2035# LyNX archive 203656 string USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE LyNX archive 2037 2038# From: Joerg Jenderek 2039# URL: https://www.acronis.com/ 2040# Reference: https://en.wikipedia.org/wiki/TIB_(file_format) 2041# Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110 20420 ubequad 0xce24b9a220000000 Acronis True Image backup 2043!:mime application/x-acronis-tib 2044!:ext tib 2045# 01000000 2046#>20 ubelong x \b, at 20 %#x 2047# 20000000 2048#>28 ubelong x \b, at 28 %#x 2049# strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0" 2050# ??? 2051# strings like "\Device\0000011e" "\Device\0000015a" 2052#>0 search/0x6852300/cs \\Device\\ 2053#>>&-1 pstring x \b, %s 2054# "\Device\HarddiskVolume30" "\Device\HarddiskVolume39" 2055#>>>&1 search/180/cs \\Device\\ 2056#>>>>&-1 pstring x \b, %s 2057#>>>>>&0 search/29/cs \0\0\xc8\0 2058# disk label 2059#>>>>>>&10 lestring16 x \b, disk label %11.11s 2060#>>>>>>&9 plestring16 x \b, disk label "%11.11s" 2061#>>>>>>&10 ubequad x %16.16llx 2062 2063 2064# Gentoo XPAK binary package 2065# by Michal Gorny <mgorny@gentoo.org> 2066# https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5 2067-4 string STOP 2068>-16 string XPAKSTOP Gentoo binary package (XPAK) 2069 2070# From: Joerg Jenderek 2071# URL: https://kodi.wiki/view/TexturePacker 2072# Reference: https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz 2073# /xbmc-Krypton/xbmc/guilib/XBTF.h 2074# /xbmc-Krypton/xbmc/guilib/XBTF.cpp 20750 string XBTF 2076# skip ASCII text by looking for terminating \0 of path 2077>264 ubyte 0 XBMC texture package 2078!:mime application/x-xbmc-xbt 2079!:ext xbt 2080# XBTF_VERSION 2 2081>>4 string !2 \b, version %-.1s 2082# nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp 2083>>5 ulelong x \b, %u file 2084# plural s 2085>>5 ulelong >1 \bs 2086# path[CXBTFFile[MaximumPathLength=256] 2087>>9 string x \b, 1st %s 2088 2089# ALZIP archive 2090# by Hyungjun Park <hyungjun.park@worksmobile.com>, Hajin Jang <hajin_jang@worksmobile.com> 2091# http://kippler.com/win/unalz/ 2092# https://salsa.debian.org/l10n-korean-team/unalz 20930 string ALZ\001 ALZ archive data 2094!:ext alz 2095 2096# https://cf-aldn.altools.co.kr/setup/EGG_Specification.zip 20970 string EGGA EGG archive data, 2098!:ext egg 2099>5 byte x version %u 2100>4 byte x \b.%u 2101>>0x0E ulelong =0x08E28222 2102>>0x0E ulelong =0x24F5A262 \b, split 2103>>0x0E ulelong =0x24E5A060 \b, solid 2104>>0x0E default x \b, unknown 2105 2106# PAQ9A archive 2107# URL: http://mattmahoney.net/dc/#paq9a 2108# Note: Line 1186 of paq9a.cpp gives the magic bytes 21090 string pQ9\001 PAQ9A archive 2110 2111# From wof (wof@stachelkaktus.net) 21120 string Unison\ archive\ format Unison archive format 2113