13e41d09dSXin LI# coding: utf-8 23e41d09dSXin LI 3b6cee71dSXin LI''' 4b6cee71dSXin LIPython bindings for libmagic 5b6cee71dSXin LI''' 6b6cee71dSXin LI 7b6cee71dSXin LIimport ctypes 843a5ec4eSXin LIimport threading 9b6cee71dSXin LI 103e41d09dSXin LIfrom collections import namedtuple 113e41d09dSXin LI 12b6cee71dSXin LIfrom ctypes import * 13b6cee71dSXin LIfrom ctypes.util import find_library 14b6cee71dSXin LI 15b6cee71dSXin LI 16b6cee71dSXin LIdef _init(): 17b6cee71dSXin LI """ 18b6cee71dSXin LI Loads the shared library through ctypes and returns a library 19b6cee71dSXin LI L{ctypes.CDLL} instance 20b6cee71dSXin LI """ 21b6cee71dSXin LI return ctypes.cdll.LoadLibrary(find_library('magic')) 22b6cee71dSXin LI 23b6cee71dSXin LI_libraries = {} 24b6cee71dSXin LI_libraries['magic'] = _init() 25b6cee71dSXin LI 26b6cee71dSXin LI# Flag constants for open and setflags 27b6cee71dSXin LIMAGIC_NONE = NONE = 0 28b6cee71dSXin LIMAGIC_DEBUG = DEBUG = 1 29b6cee71dSXin LIMAGIC_SYMLINK = SYMLINK = 2 30b6cee71dSXin LIMAGIC_COMPRESS = COMPRESS = 4 31b6cee71dSXin LIMAGIC_DEVICES = DEVICES = 8 32b6cee71dSXin LIMAGIC_MIME_TYPE = MIME_TYPE = 16 33b6cee71dSXin LIMAGIC_CONTINUE = CONTINUE = 32 34b6cee71dSXin LIMAGIC_CHECK = CHECK = 64 35b6cee71dSXin LIMAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128 36b6cee71dSXin LIMAGIC_RAW = RAW = 256 37b6cee71dSXin LIMAGIC_ERROR = ERROR = 512 38b6cee71dSXin LIMAGIC_MIME_ENCODING = MIME_ENCODING = 1024 393e41d09dSXin LIMAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING 40b6cee71dSXin LIMAGIC_APPLE = APPLE = 2048 41b6cee71dSXin LI 42b6cee71dSXin LIMAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096 43b6cee71dSXin LIMAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192 44b6cee71dSXin LIMAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384 45b6cee71dSXin LIMAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768 46b6cee71dSXin LIMAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536 47b6cee71dSXin LIMAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072 48b6cee71dSXin LIMAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144 49b6cee71dSXin LIMAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576 50b6cee71dSXin LIMAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152 51b6cee71dSXin LI 52b6cee71dSXin LIMAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824 53b6cee71dSXin LI 5443a5ec4eSXin LIMAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0 5543a5ec4eSXin LIMAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1 5643a5ec4eSXin LIMAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2 5743a5ec4eSXin LIMAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3 5843a5ec4eSXin LIMAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4 5943a5ec4eSXin LIMAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5 6043a5ec4eSXin LIMAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6 6143a5ec4eSXin LI 623e41d09dSXin LIFileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name')) 633e41d09dSXin LI 64b6cee71dSXin LI 65b6cee71dSXin LIclass magic_set(Structure): 66b6cee71dSXin LI pass 67b6cee71dSXin LImagic_set._fields_ = [] 68b6cee71dSXin LImagic_t = POINTER(magic_set) 69b6cee71dSXin LI 70b6cee71dSXin LI_open = _libraries['magic'].magic_open 71b6cee71dSXin LI_open.restype = magic_t 72b6cee71dSXin LI_open.argtypes = [c_int] 73b6cee71dSXin LI 74b6cee71dSXin LI_close = _libraries['magic'].magic_close 75b6cee71dSXin LI_close.restype = None 76b6cee71dSXin LI_close.argtypes = [magic_t] 77b6cee71dSXin LI 78b6cee71dSXin LI_file = _libraries['magic'].magic_file 79b6cee71dSXin LI_file.restype = c_char_p 80b6cee71dSXin LI_file.argtypes = [magic_t, c_char_p] 81b6cee71dSXin LI 82b6cee71dSXin LI_descriptor = _libraries['magic'].magic_descriptor 83b6cee71dSXin LI_descriptor.restype = c_char_p 84b6cee71dSXin LI_descriptor.argtypes = [magic_t, c_int] 85b6cee71dSXin LI 86b6cee71dSXin LI_buffer = _libraries['magic'].magic_buffer 87b6cee71dSXin LI_buffer.restype = c_char_p 88b6cee71dSXin LI_buffer.argtypes = [magic_t, c_void_p, c_size_t] 89b6cee71dSXin LI 90b6cee71dSXin LI_error = _libraries['magic'].magic_error 91b6cee71dSXin LI_error.restype = c_char_p 92b6cee71dSXin LI_error.argtypes = [magic_t] 93b6cee71dSXin LI 94b6cee71dSXin LI_setflags = _libraries['magic'].magic_setflags 95b6cee71dSXin LI_setflags.restype = c_int 96b6cee71dSXin LI_setflags.argtypes = [magic_t, c_int] 97b6cee71dSXin LI 98b6cee71dSXin LI_load = _libraries['magic'].magic_load 99b6cee71dSXin LI_load.restype = c_int 100b6cee71dSXin LI_load.argtypes = [magic_t, c_char_p] 101b6cee71dSXin LI 102b6cee71dSXin LI_compile = _libraries['magic'].magic_compile 103b6cee71dSXin LI_compile.restype = c_int 104b6cee71dSXin LI_compile.argtypes = [magic_t, c_char_p] 105b6cee71dSXin LI 106b6cee71dSXin LI_check = _libraries['magic'].magic_check 107b6cee71dSXin LI_check.restype = c_int 108b6cee71dSXin LI_check.argtypes = [magic_t, c_char_p] 109b6cee71dSXin LI 110b6cee71dSXin LI_list = _libraries['magic'].magic_list 111b6cee71dSXin LI_list.restype = c_int 112b6cee71dSXin LI_list.argtypes = [magic_t, c_char_p] 113b6cee71dSXin LI 114b6cee71dSXin LI_errno = _libraries['magic'].magic_errno 115b6cee71dSXin LI_errno.restype = c_int 116b6cee71dSXin LI_errno.argtypes = [magic_t] 117b6cee71dSXin LI 11843a5ec4eSXin LI_getparam = _libraries['magic'].magic_getparam 11943a5ec4eSXin LI_getparam.restype = c_int 12043a5ec4eSXin LI_getparam.argtypes = [magic_t, c_int, c_void_p] 12143a5ec4eSXin LI 12243a5ec4eSXin LI_setparam = _libraries['magic'].magic_setparam 12343a5ec4eSXin LI_setparam.restype = c_int 12443a5ec4eSXin LI_setparam.argtypes = [magic_t, c_int, c_void_p] 12543a5ec4eSXin LI 126b6cee71dSXin LI 127b6cee71dSXin LIclass Magic(object): 128b6cee71dSXin LI def __init__(self, ms): 129b6cee71dSXin LI self._magic_t = ms 130b6cee71dSXin LI 131b6cee71dSXin LI def close(self): 132b6cee71dSXin LI """ 133b6cee71dSXin LI Closes the magic database and deallocates any resources used. 134b6cee71dSXin LI """ 135b6cee71dSXin LI _close(self._magic_t) 136b6cee71dSXin LI 13740427ccaSGordon Tetlow @staticmethod 13840427ccaSGordon Tetlow def __tostr(s): 13940427ccaSGordon Tetlow if s is None: 14040427ccaSGordon Tetlow return None 14140427ccaSGordon Tetlow if isinstance(s, str): 14240427ccaSGordon Tetlow return s 14340427ccaSGordon Tetlow try: # keep Python 2 compatibility 14440427ccaSGordon Tetlow return str(s, 'utf-8') 14540427ccaSGordon Tetlow except TypeError: 14640427ccaSGordon Tetlow return str(s) 14740427ccaSGordon Tetlow 14840427ccaSGordon Tetlow @staticmethod 14940427ccaSGordon Tetlow def __tobytes(b): 15040427ccaSGordon Tetlow if b is None: 15140427ccaSGordon Tetlow return None 15240427ccaSGordon Tetlow if isinstance(b, bytes): 15340427ccaSGordon Tetlow return b 15440427ccaSGordon Tetlow try: # keep Python 2 compatibility 15540427ccaSGordon Tetlow return bytes(b, 'utf-8') 15640427ccaSGordon Tetlow except TypeError: 15740427ccaSGordon Tetlow return bytes(b) 15840427ccaSGordon Tetlow 159b6cee71dSXin LI def file(self, filename): 160b6cee71dSXin LI """ 161b6cee71dSXin LI Returns a textual description of the contents of the argument passed 162b6cee71dSXin LI as a filename or None if an error occurred and the MAGIC_ERROR flag 163b6cee71dSXin LI is set. A call to errno() will return the numeric error code. 164b6cee71dSXin LI """ 16540427ccaSGordon Tetlow return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename))) 166b6cee71dSXin LI 167b6cee71dSXin LI def descriptor(self, fd): 168b6cee71dSXin LI """ 16940427ccaSGordon Tetlow Returns a textual description of the contents of the argument passed 17040427ccaSGordon Tetlow as a file descriptor or None if an error occurred and the MAGIC_ERROR 17140427ccaSGordon Tetlow flag is set. A call to errno() will return the numeric error code. 172b6cee71dSXin LI """ 17340427ccaSGordon Tetlow return Magic.__tostr(_descriptor(self._magic_t, fd)) 174b6cee71dSXin LI 175b6cee71dSXin LI def buffer(self, buf): 176b6cee71dSXin LI """ 177b6cee71dSXin LI Returns a textual description of the contents of the argument passed 178b6cee71dSXin LI as a buffer or None if an error occurred and the MAGIC_ERROR flag 179b6cee71dSXin LI is set. A call to errno() will return the numeric error code. 180b6cee71dSXin LI """ 18140427ccaSGordon Tetlow return Magic.__tostr(_buffer(self._magic_t, buf, len(buf))) 182b6cee71dSXin LI 183b6cee71dSXin LI def error(self): 184b6cee71dSXin LI """ 185b6cee71dSXin LI Returns a textual explanation of the last error or None 186b6cee71dSXin LI if there was no error. 187b6cee71dSXin LI """ 18840427ccaSGordon Tetlow return Magic.__tostr(_error(self._magic_t)) 189b6cee71dSXin LI 190b6cee71dSXin LI def setflags(self, flags): 191b6cee71dSXin LI """ 192b6cee71dSXin LI Set flags on the magic object which determine how magic checking 193b6cee71dSXin LI behaves; a bitwise OR of the flags described in libmagic(3), but 194b6cee71dSXin LI without the MAGIC_ prefix. 195b6cee71dSXin LI 196b6cee71dSXin LI Returns -1 on systems that don't support utime(2) or utimes(2) 197b6cee71dSXin LI when PRESERVE_ATIME is set. 198b6cee71dSXin LI """ 199b6cee71dSXin LI return _setflags(self._magic_t, flags) 200b6cee71dSXin LI 201b6cee71dSXin LI def load(self, filename=None): 202b6cee71dSXin LI """ 203b6cee71dSXin LI Must be called to load entries in the colon separated list of database 204b6cee71dSXin LI files passed as argument or the default database file if no argument 205b6cee71dSXin LI before any magic queries can be performed. 206b6cee71dSXin LI 207b6cee71dSXin LI Returns 0 on success and -1 on failure. 208b6cee71dSXin LI """ 20940427ccaSGordon Tetlow return _load(self._magic_t, Magic.__tobytes(filename)) 210b6cee71dSXin LI 211b6cee71dSXin LI def compile(self, dbs): 212b6cee71dSXin LI """ 213b6cee71dSXin LI Compile entries in the colon separated list of database files 214b6cee71dSXin LI passed as argument or the default database file if no argument. 215b6cee71dSXin LI The compiled files created are named from the basename(1) of each file 216b6cee71dSXin LI argument with ".mgc" appended to it. 21740427ccaSGordon Tetlow 21840427ccaSGordon Tetlow Returns 0 on success and -1 on failure. 219b6cee71dSXin LI """ 22040427ccaSGordon Tetlow return _compile(self._magic_t, Magic.__tobytes(dbs)) 221b6cee71dSXin LI 222b6cee71dSXin LI def check(self, dbs): 223b6cee71dSXin LI """ 224b6cee71dSXin LI Check the validity of entries in the colon separated list of 225b6cee71dSXin LI database files passed as argument or the default database file 226b6cee71dSXin LI if no argument. 22740427ccaSGordon Tetlow 228b6cee71dSXin LI Returns 0 on success and -1 on failure. 229b6cee71dSXin LI """ 23040427ccaSGordon Tetlow return _check(self._magic_t, Magic.__tobytes(dbs)) 231b6cee71dSXin LI 232b6cee71dSXin LI def list(self, dbs): 233b6cee71dSXin LI """ 234b6cee71dSXin LI Check the validity of entries in the colon separated list of 235b6cee71dSXin LI database files passed as argument or the default database file 236b6cee71dSXin LI if no argument. 23740427ccaSGordon Tetlow 238b6cee71dSXin LI Returns 0 on success and -1 on failure. 239b6cee71dSXin LI """ 24040427ccaSGordon Tetlow return _list(self._magic_t, Magic.__tobytes(dbs)) 241b6cee71dSXin LI 242b6cee71dSXin LI def errno(self): 243b6cee71dSXin LI """ 244b6cee71dSXin LI Returns a numeric error code. If return value is 0, an internal 245b6cee71dSXin LI magic error occurred. If return value is non-zero, the value is 246b6cee71dSXin LI an OS error code. Use the errno module or os.strerror() can be used 247b6cee71dSXin LI to provide detailed error information. 248b6cee71dSXin LI """ 249b6cee71dSXin LI return _errno(self._magic_t) 250b6cee71dSXin LI 25143a5ec4eSXin LI def getparam(self, param): 25243a5ec4eSXin LI """ 25343a5ec4eSXin LI Returns the param value if successful and -1 if the parameter 25443a5ec4eSXin LI was unknown. 25543a5ec4eSXin LI """ 25643a5ec4eSXin LI v = c_int() 25743a5ec4eSXin LI i = _getparam(self._magic_t, param, byref(v)) 25843a5ec4eSXin LI if i == -1: 25943a5ec4eSXin LI return -1 26043a5ec4eSXin LI return v.value 26143a5ec4eSXin LI 26243a5ec4eSXin LI def setparam(self, param, value): 26343a5ec4eSXin LI """ 26443a5ec4eSXin LI Returns 0 if successful and -1 if the parameter was unknown. 26543a5ec4eSXin LI """ 26643a5ec4eSXin LI v = c_int(value) 26743a5ec4eSXin LI return _setparam(self._magic_t, param, byref(v)) 26843a5ec4eSXin LI 269b6cee71dSXin LI 270b6cee71dSXin LIdef open(flags): 271b6cee71dSXin LI """ 272b6cee71dSXin LI Returns a magic object on success and None on failure. 273b6cee71dSXin LI Flags argument as for setflags. 274b6cee71dSXin LI """ 275*a2dfb722SXin LI magic_t = _open(flags) 276*a2dfb722SXin LI if magic_t is None: 277*a2dfb722SXin LI return None 278*a2dfb722SXin LI return Magic(magic_t) 2793e41d09dSXin LI 2803e41d09dSXin LI 2813e41d09dSXin LI# Objects used by `detect_from_` functions 282*a2dfb722SXin LIclass error(Exception): 283*a2dfb722SXin LI pass 284*a2dfb722SXin LI 28543a5ec4eSXin LIclass MagicDetect(object): 28643a5ec4eSXin LI def __init__(self): 287*a2dfb722SXin LI self.mime_magic = open(MAGIC_MIME) 288*a2dfb722SXin LI if self.mime_magic is None: 289*a2dfb722SXin LI raise error 290*a2dfb722SXin LI if self.mime_magic.load() == -1: 291*a2dfb722SXin LI self.mime_magic.close() 292*a2dfb722SXin LI self.mime_magic = None 293*a2dfb722SXin LI raise error 294*a2dfb722SXin LI self.none_magic = open(MAGIC_NONE) 295*a2dfb722SXin LI if self.none_magic is None: 296*a2dfb722SXin LI self.mime_magic.close() 297*a2dfb722SXin LI self.mime_magic = None 298*a2dfb722SXin LI raise error 299*a2dfb722SXin LI if self.none_magic.load() == -1: 300*a2dfb722SXin LI self.none_magic.close() 301*a2dfb722SXin LI self.none_magic = None 302*a2dfb722SXin LI self.mime_magic.close() 303*a2dfb722SXin LI self.mime_magic = None 304*a2dfb722SXin LI raise error 3053e41d09dSXin LI 30643a5ec4eSXin LI def __del__(self): 307*a2dfb722SXin LI if self.mime_magic is not None: 30843a5ec4eSXin LI self.mime_magic.close() 309*a2dfb722SXin LI if self.none_magic is not None: 31043a5ec4eSXin LI self.none_magic.close() 31143a5ec4eSXin LI 31243a5ec4eSXin LIthreadlocal = threading.local() 31343a5ec4eSXin LI 31443a5ec4eSXin LIdef _detect_make(): 31543a5ec4eSXin LI v = getattr(threadlocal, "magic_instance", None) 31643a5ec4eSXin LI if v is None: 31743a5ec4eSXin LI v = MagicDetect() 31843a5ec4eSXin LI setattr(threadlocal, "magic_instance", v) 31943a5ec4eSXin LI return v 3203e41d09dSXin LI 3213e41d09dSXin LIdef _create_filemagic(mime_detected, type_detected): 3222dc4dbb9SEitan Adler try: 3233e41d09dSXin LI mime_type, mime_encoding = mime_detected.split('; ') 3242dc4dbb9SEitan Adler except ValueError: 3252dc4dbb9SEitan Adler raise ValueError(mime_detected) 3263e41d09dSXin LI 3273e41d09dSXin LI return FileMagic(name=type_detected, mime_type=mime_type, 3283e41d09dSXin LI encoding=mime_encoding.replace('charset=', '')) 3293e41d09dSXin LI 3303e41d09dSXin LI 3313e41d09dSXin LIdef detect_from_filename(filename): 3323e41d09dSXin LI '''Detect mime type, encoding and file type from a filename 3333e41d09dSXin LI 3343e41d09dSXin LI Returns a `FileMagic` namedtuple. 3353e41d09dSXin LI ''' 33643a5ec4eSXin LI x = _detect_make() 33743a5ec4eSXin LI return _create_filemagic(x.mime_magic.file(filename), 33843a5ec4eSXin LI x.none_magic.file(filename)) 3393e41d09dSXin LI 3403e41d09dSXin LI 3413e41d09dSXin LIdef detect_from_fobj(fobj): 3423e41d09dSXin LI '''Detect mime type, encoding and file type from file-like object 3433e41d09dSXin LI 3443e41d09dSXin LI Returns a `FileMagic` namedtuple. 3453e41d09dSXin LI ''' 3463e41d09dSXin LI 3473e41d09dSXin LI file_descriptor = fobj.fileno() 34843a5ec4eSXin LI x = _detect_make() 34943a5ec4eSXin LI return _create_filemagic(x.mime_magic.descriptor(file_descriptor), 35043a5ec4eSXin LI x.none_magic.descriptor(file_descriptor)) 3513e41d09dSXin LI 3523e41d09dSXin LI 3533e41d09dSXin LIdef detect_from_content(byte_content): 3543e41d09dSXin LI '''Detect mime type, encoding and file type from bytes 3553e41d09dSXin LI 3563e41d09dSXin LI Returns a `FileMagic` namedtuple. 3573e41d09dSXin LI ''' 3583e41d09dSXin LI 35943a5ec4eSXin LI x = _detect_make() 36043a5ec4eSXin LI return _create_filemagic(x.mime_magic.buffer(byte_content), 36143a5ec4eSXin LI x.none_magic.buffer(byte_content)) 362