#------------------------------------------------------------------------------
# $File: scientific,v 1.14 2023/04/29 17:28:09 christos Exp $
# scientific:  file(1) magic for scientific formats
#
# From: Joe Krahn <krahn@niehs.nih.gov>

########################################################
# CCP4 data and plot files:
0	string		MTZ\040		MTZ reflection file

92	string		PLOT%%84	Plot84 plotting file
>52	byte		1		, Little-endian
>55	byte		1		, Big-endian

########################################################
# Electron density MAP/MASK formats

0	string		EZD_MAP	NEWEZD Electron Density Map
109	string		MAP\040(  Old EZD Electron Density Map

0	string/c	:-)\040Origin	BRIX Electron Density Map
>170	string		>0	, Sigma:%.12s
#>4	string		>0	%.178s
#>4	addr		x	%.178s

7	string		18\040!NTITLE	XPLOR ASCII Electron Density Map
9	string		\040!NTITLE\012\040REMARK	CNS ASCII electron density map

208	string		MAP\040	CCP4 Electron Density Map
# Assumes same stamp for float and double (normal case)
>212	byte		17	\b, Big-endian
>212	byte		34	\b, VAX format
>212	byte		68	\b, Little-endian
>212	byte		85	\b, Convex native

############################################################
# X-Ray Area Detector images
0	string	R-AXIS4\ \ \ 	R-Axis Area Detector Image:
>796	lelong	<20		Little-endian, IP #%d,
>>768	lelong	>0		Size=%dx
>>772	lelong	>0		\b%d
>796	belong	<20		Big-endian, IP #%d,
>>768	belong	>0		Size=%dx
>>772	belong	>0		\b%d

0	string	RAXIS\ \ \ \ \ 	R-Axis Area Detector Image, Win32:
>796	lelong	<20		Little-endian, IP #%d,
>>768	lelong	>0		Size=%dx
>>772	lelong	>0		\b%d
>796	belong	<20		Big-endian, IP #%d,
>>768	belong	>0		Size=%dx
>>772	belong	>0		\b%d


1028	string	MMX\000\000\000\000\000\000\000\000\000\000\000\000\000	MAR Area Detector Image,
>1072	ulong	>1		Compressed(%d),
>1100	ulong	>1		%d headers,
>1104	ulong	>0		%d x
>1108	ulong	>0		%d,
>1120	ulong	>0		%d bits/pixel

# Type: GEDCOM genealogical (family history) data
# From: Giuseppe Bilotta
# Update:	Joerg Jenderek
# URL:		http://fileformats.archiveteam.org/wiki/GEDCOM
#		https://en.wikipedia.org/wiki/GEDCOM
# Reference:	http://mark0.net/download/triddefs_xml.7z/defs/g/
#		ged.trid.xml ged-utf8.trid.xml ged-utf16.trid.xml
# Note:		called "GEDCOM Family History" by TrID and "Genealogical Data Communication (GEDCOM) Format" by DROID via PUID fmt/851
0       search/1/c	0\ HEAD         GEDCOM genealogy text
#!:mime	text/plain
#!:mime	application/x-gedcom
# https://www.iana.org/assignments/media-types/text/vnd.familysearch.gedcom
!:mime	text/vnd.familysearch.gedcom
!:ext	ged
# no gedcom sample found and ged suffix also used for other formats
#!:ext	ged/gedcom
>&0     search		1\ GEDC
>>&0    search		2\ VERS         version
# 4 5.0 5.3 5.4 5.5 5.5.1 5.5.5 5.6 7.0 or no version
>>>&1   string		>\0		%s
# From: Phil Endecott <phil05@chezphil.org>
# 0\040HEAD as UTF-16 big endian without BOM
0	string	\000\060\000\040\000\110\000\105\000\101\000\104		GEDCOM genealogy text
!:mime	text/vnd.familysearch.gedcom
!:ext	ged
# look for VERS tag encoded as UTF-16 big endian
>12		search/0x65	V\0E\0R\0S					version
# version like: 5.5.1
>>&2		bestring16	x						%s
>>0		string		x						\b, UTF-16 (without BOM) big-endian text
# 0\040HEAD as UTF-16 little endian without BOM
0	string	\060\000\040\000\110\000\105\000\101\000\104\000		GEDCOM genealogy text
!:mime	text/vnd.familysearch.gedcom
!:ext	ged
# look for VERS tag encoded as UTF-16 lttle endian
>12		search/0x65	V\0E\0R\0S					version
# version like: 5.5.1
>>&3		lestring16	x						%s
>>2		string		x						\b, UTF-16 (without BOM) little-endian text
# Note:		UTF-16 with BOM variants already described above by first test as "GEDCOM genealogy text"
# 0\040HEAD as UTF-16 big endian with BOM
#0	string	\376\377\000\060\000\040\000\110\000\105\000\101\000\104	GEDCOM data
# 0\040HEAD as UTF-16 little endian with BOM
#0	string	\377\376\060\000\040\000\110\000\105\000\101\000\104\000	GEDCOM data

# PDB: Protein Data Bank files
# Adam Buchbinder <adam.buchbinder@gmail.com>
#
# https://www.wwpdb.org/documentation/format32/sect2.html
# https://www.ch.ic.ac.uk/chemime/
#
# The PDB file format is fixed-field, 80 columns. From the spec:
#
# COLS        DATA
#  1 -  6      "HEADER"
#  11 - 50     String(40)
#  51 - 59     Date
#  63 - 66     IDcode
#
# Thus, positions 7-10, 60-62 and 67-80 are spaces. The Date must be in the
# format DD-MMM-YY, e.g., 01-JAN-70, and the IDcode consists of numbers and
# uppercase letters. However, examples have been seen without the date string,
# e.g., the example on the chemime site.
0	string	HEADER\ \ \ \040
>&0	regex/1l	\^.{40}
>>&0	regex/1l	[0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3}
>>>&0	regex/1ls	[A-Z0-9]{4}.{14}$
>>>>&0	regex/1l	[A-Z0-9]{4}	Protein Data Bank data, ID Code %s
!:mime	chemical/x-pdb
>>>>0	regex/1l	[0-9]{2}-[A-Z]{3}-[0-9]{2}	\b, %s

# Type:	GDSII Stream file
0	belong	0x00060002	GDSII Stream file
>4	byte	0x00
>>5	byte	x		version %d.0
>4	byte	>0x00		version %d
>>5	byte	x		\b.%d

# Type: LXT (interLaced eXtensible Trace)
# chrysn <chrysn@fsfe.org>
0	beshort	0x0138	interLaced eXtensible Trace (LXT) file
>2	beshort	>0	(Version %u)