13e41d09dSXin LI# coding: utf-8 23e41d09dSXin LI 3b6cee71dSXin LI''' 4b6cee71dSXin LIPython bindings for libmagic 5b6cee71dSXin LI''' 6b6cee71dSXin LI 7b6cee71dSXin LIimport ctypes 8b6cee71dSXin LI 93e41d09dSXin LIfrom collections import namedtuple 103e41d09dSXin LI 11b6cee71dSXin LIfrom ctypes import * 12b6cee71dSXin LIfrom ctypes.util import find_library 13b6cee71dSXin LI 14b6cee71dSXin LI 15b6cee71dSXin LIdef _init(): 16b6cee71dSXin LI """ 17b6cee71dSXin LI Loads the shared library through ctypes and returns a library 18b6cee71dSXin LI L{ctypes.CDLL} instance 19b6cee71dSXin LI """ 20b6cee71dSXin LI return ctypes.cdll.LoadLibrary(find_library('magic')) 21b6cee71dSXin LI 22b6cee71dSXin LI_libraries = {} 23b6cee71dSXin LI_libraries['magic'] = _init() 24b6cee71dSXin LI 25b6cee71dSXin LI# Flag constants for open and setflags 26b6cee71dSXin LIMAGIC_NONE = NONE = 0 27b6cee71dSXin LIMAGIC_DEBUG = DEBUG = 1 28b6cee71dSXin LIMAGIC_SYMLINK = SYMLINK = 2 29b6cee71dSXin LIMAGIC_COMPRESS = COMPRESS = 4 30b6cee71dSXin LIMAGIC_DEVICES = DEVICES = 8 31b6cee71dSXin LIMAGIC_MIME_TYPE = MIME_TYPE = 16 32b6cee71dSXin LIMAGIC_CONTINUE = CONTINUE = 32 33b6cee71dSXin LIMAGIC_CHECK = CHECK = 64 34b6cee71dSXin LIMAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128 35b6cee71dSXin LIMAGIC_RAW = RAW = 256 36b6cee71dSXin LIMAGIC_ERROR = ERROR = 512 37b6cee71dSXin LIMAGIC_MIME_ENCODING = MIME_ENCODING = 1024 383e41d09dSXin LIMAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING 39b6cee71dSXin LIMAGIC_APPLE = APPLE = 2048 40b6cee71dSXin LI 41b6cee71dSXin LIMAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096 42b6cee71dSXin LIMAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192 43b6cee71dSXin LIMAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384 44b6cee71dSXin LIMAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768 45b6cee71dSXin LIMAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536 46b6cee71dSXin LIMAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072 47b6cee71dSXin LIMAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144 48b6cee71dSXin LIMAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576 49b6cee71dSXin LIMAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152 50b6cee71dSXin LI 51b6cee71dSXin LIMAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824 52b6cee71dSXin LI 533e41d09dSXin LIFileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name')) 543e41d09dSXin LI 55b6cee71dSXin LI 56b6cee71dSXin LIclass magic_set(Structure): 57b6cee71dSXin LI pass 58b6cee71dSXin LImagic_set._fields_ = [] 59b6cee71dSXin LImagic_t = POINTER(magic_set) 60b6cee71dSXin LI 61b6cee71dSXin LI_open = _libraries['magic'].magic_open 62b6cee71dSXin LI_open.restype = magic_t 63b6cee71dSXin LI_open.argtypes = [c_int] 64b6cee71dSXin LI 65b6cee71dSXin LI_close = _libraries['magic'].magic_close 66b6cee71dSXin LI_close.restype = None 67b6cee71dSXin LI_close.argtypes = [magic_t] 68b6cee71dSXin LI 69b6cee71dSXin LI_file = _libraries['magic'].magic_file 70b6cee71dSXin LI_file.restype = c_char_p 71b6cee71dSXin LI_file.argtypes = [magic_t, c_char_p] 72b6cee71dSXin LI 73b6cee71dSXin LI_descriptor = _libraries['magic'].magic_descriptor 74b6cee71dSXin LI_descriptor.restype = c_char_p 75b6cee71dSXin LI_descriptor.argtypes = [magic_t, c_int] 76b6cee71dSXin LI 77b6cee71dSXin LI_buffer = _libraries['magic'].magic_buffer 78b6cee71dSXin LI_buffer.restype = c_char_p 79b6cee71dSXin LI_buffer.argtypes = [magic_t, c_void_p, c_size_t] 80b6cee71dSXin LI 81b6cee71dSXin LI_error = _libraries['magic'].magic_error 82b6cee71dSXin LI_error.restype = c_char_p 83b6cee71dSXin LI_error.argtypes = [magic_t] 84b6cee71dSXin LI 85b6cee71dSXin LI_setflags = _libraries['magic'].magic_setflags 86b6cee71dSXin LI_setflags.restype = c_int 87b6cee71dSXin LI_setflags.argtypes = [magic_t, c_int] 88b6cee71dSXin LI 89b6cee71dSXin LI_load = _libraries['magic'].magic_load 90b6cee71dSXin LI_load.restype = c_int 91b6cee71dSXin LI_load.argtypes = [magic_t, c_char_p] 92b6cee71dSXin LI 93b6cee71dSXin LI_compile = _libraries['magic'].magic_compile 94b6cee71dSXin LI_compile.restype = c_int 95b6cee71dSXin LI_compile.argtypes = [magic_t, c_char_p] 96b6cee71dSXin LI 97b6cee71dSXin LI_check = _libraries['magic'].magic_check 98b6cee71dSXin LI_check.restype = c_int 99b6cee71dSXin LI_check.argtypes = [magic_t, c_char_p] 100b6cee71dSXin LI 101b6cee71dSXin LI_list = _libraries['magic'].magic_list 102b6cee71dSXin LI_list.restype = c_int 103b6cee71dSXin LI_list.argtypes = [magic_t, c_char_p] 104b6cee71dSXin LI 105b6cee71dSXin LI_errno = _libraries['magic'].magic_errno 106b6cee71dSXin LI_errno.restype = c_int 107b6cee71dSXin LI_errno.argtypes = [magic_t] 108b6cee71dSXin LI 109b6cee71dSXin LI 110b6cee71dSXin LIclass Magic(object): 111b6cee71dSXin LI def __init__(self, ms): 112b6cee71dSXin LI self._magic_t = ms 113b6cee71dSXin LI 114b6cee71dSXin LI def close(self): 115b6cee71dSXin LI """ 116b6cee71dSXin LI Closes the magic database and deallocates any resources used. 117b6cee71dSXin LI """ 118b6cee71dSXin LI _close(self._magic_t) 119b6cee71dSXin LI 12040427ccaSGordon Tetlow @staticmethod 12140427ccaSGordon Tetlow def __tostr(s): 12240427ccaSGordon Tetlow if s is None: 12340427ccaSGordon Tetlow return None 12440427ccaSGordon Tetlow if isinstance(s, str): 12540427ccaSGordon Tetlow return s 12640427ccaSGordon Tetlow try: # keep Python 2 compatibility 12740427ccaSGordon Tetlow return str(s, 'utf-8') 12840427ccaSGordon Tetlow except TypeError: 12940427ccaSGordon Tetlow return str(s) 13040427ccaSGordon Tetlow 13140427ccaSGordon Tetlow @staticmethod 13240427ccaSGordon Tetlow def __tobytes(b): 13340427ccaSGordon Tetlow if b is None: 13440427ccaSGordon Tetlow return None 13540427ccaSGordon Tetlow if isinstance(b, bytes): 13640427ccaSGordon Tetlow return b 13740427ccaSGordon Tetlow try: # keep Python 2 compatibility 13840427ccaSGordon Tetlow return bytes(b, 'utf-8') 13940427ccaSGordon Tetlow except TypeError: 14040427ccaSGordon Tetlow return bytes(b) 14140427ccaSGordon Tetlow 142b6cee71dSXin LI def file(self, filename): 143b6cee71dSXin LI """ 144b6cee71dSXin LI Returns a textual description of the contents of the argument passed 145b6cee71dSXin LI as a filename or None if an error occurred and the MAGIC_ERROR flag 146b6cee71dSXin LI is set. A call to errno() will return the numeric error code. 147b6cee71dSXin LI """ 14840427ccaSGordon Tetlow return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename))) 149b6cee71dSXin LI 150b6cee71dSXin LI def descriptor(self, fd): 151b6cee71dSXin LI """ 15240427ccaSGordon Tetlow Returns a textual description of the contents of the argument passed 15340427ccaSGordon Tetlow as a file descriptor or None if an error occurred and the MAGIC_ERROR 15440427ccaSGordon Tetlow flag is set. A call to errno() will return the numeric error code. 155b6cee71dSXin LI """ 15640427ccaSGordon Tetlow return Magic.__tostr(_descriptor(self._magic_t, fd)) 157b6cee71dSXin LI 158b6cee71dSXin LI def buffer(self, buf): 159b6cee71dSXin LI """ 160b6cee71dSXin LI Returns a textual description of the contents of the argument passed 161b6cee71dSXin LI as a buffer or None if an error occurred and the MAGIC_ERROR flag 162b6cee71dSXin LI is set. A call to errno() will return the numeric error code. 163b6cee71dSXin LI """ 16440427ccaSGordon Tetlow return Magic.__tostr(_buffer(self._magic_t, buf, len(buf))) 165b6cee71dSXin LI 166b6cee71dSXin LI def error(self): 167b6cee71dSXin LI """ 168b6cee71dSXin LI Returns a textual explanation of the last error or None 169b6cee71dSXin LI if there was no error. 170b6cee71dSXin LI """ 17140427ccaSGordon Tetlow return Magic.__tostr(_error(self._magic_t)) 172b6cee71dSXin LI 173b6cee71dSXin LI def setflags(self, flags): 174b6cee71dSXin LI """ 175b6cee71dSXin LI Set flags on the magic object which determine how magic checking 176b6cee71dSXin LI behaves; a bitwise OR of the flags described in libmagic(3), but 177b6cee71dSXin LI without the MAGIC_ prefix. 178b6cee71dSXin LI 179b6cee71dSXin LI Returns -1 on systems that don't support utime(2) or utimes(2) 180b6cee71dSXin LI when PRESERVE_ATIME is set. 181b6cee71dSXin LI """ 182b6cee71dSXin LI return _setflags(self._magic_t, flags) 183b6cee71dSXin LI 184b6cee71dSXin LI def load(self, filename=None): 185b6cee71dSXin LI """ 186b6cee71dSXin LI Must be called to load entries in the colon separated list of database 187b6cee71dSXin LI files passed as argument or the default database file if no argument 188b6cee71dSXin LI before any magic queries can be performed. 189b6cee71dSXin LI 190b6cee71dSXin LI Returns 0 on success and -1 on failure. 191b6cee71dSXin LI """ 19240427ccaSGordon Tetlow return _load(self._magic_t, Magic.__tobytes(filename)) 193b6cee71dSXin LI 194b6cee71dSXin LI def compile(self, dbs): 195b6cee71dSXin LI """ 196b6cee71dSXin LI Compile entries in the colon separated list of database files 197b6cee71dSXin LI passed as argument or the default database file if no argument. 198b6cee71dSXin LI The compiled files created are named from the basename(1) of each file 199b6cee71dSXin LI argument with ".mgc" appended to it. 20040427ccaSGordon Tetlow 20140427ccaSGordon Tetlow Returns 0 on success and -1 on failure. 202b6cee71dSXin LI """ 20340427ccaSGordon Tetlow return _compile(self._magic_t, Magic.__tobytes(dbs)) 204b6cee71dSXin LI 205b6cee71dSXin LI def check(self, dbs): 206b6cee71dSXin LI """ 207b6cee71dSXin LI Check the validity of entries in the colon separated list of 208b6cee71dSXin LI database files passed as argument or the default database file 209b6cee71dSXin LI if no argument. 21040427ccaSGordon Tetlow 211b6cee71dSXin LI Returns 0 on success and -1 on failure. 212b6cee71dSXin LI """ 21340427ccaSGordon Tetlow return _check(self._magic_t, Magic.__tobytes(dbs)) 214b6cee71dSXin LI 215b6cee71dSXin LI def list(self, dbs): 216b6cee71dSXin LI """ 217b6cee71dSXin LI Check the validity of entries in the colon separated list of 218b6cee71dSXin LI database files passed as argument or the default database file 219b6cee71dSXin LI if no argument. 22040427ccaSGordon Tetlow 221b6cee71dSXin LI Returns 0 on success and -1 on failure. 222b6cee71dSXin LI """ 22340427ccaSGordon Tetlow return _list(self._magic_t, Magic.__tobytes(dbs)) 224b6cee71dSXin LI 225b6cee71dSXin LI def errno(self): 226b6cee71dSXin LI """ 227b6cee71dSXin LI Returns a numeric error code. If return value is 0, an internal 228b6cee71dSXin LI magic error occurred. If return value is non-zero, the value is 229b6cee71dSXin LI an OS error code. Use the errno module or os.strerror() can be used 230b6cee71dSXin LI to provide detailed error information. 231b6cee71dSXin LI """ 232b6cee71dSXin LI return _errno(self._magic_t) 233b6cee71dSXin LI 234b6cee71dSXin LI 235b6cee71dSXin LIdef open(flags): 236b6cee71dSXin LI """ 237b6cee71dSXin LI Returns a magic object on success and None on failure. 238b6cee71dSXin LI Flags argument as for setflags. 239b6cee71dSXin LI """ 240b6cee71dSXin LI return Magic(_open(flags)) 2413e41d09dSXin LI 2423e41d09dSXin LI 2433e41d09dSXin LI# Objects used by `detect_from_` functions 2443e41d09dSXin LImime_magic = Magic(_open(MAGIC_MIME)) 2453e41d09dSXin LImime_magic.load() 2463e41d09dSXin LInone_magic = Magic(_open(MAGIC_NONE)) 2473e41d09dSXin LInone_magic.load() 2483e41d09dSXin LI 2493e41d09dSXin LI 2503e41d09dSXin LIdef _create_filemagic(mime_detected, type_detected): 251*2dc4dbb9SEitan Adler try: 2523e41d09dSXin LI mime_type, mime_encoding = mime_detected.split('; ') 253*2dc4dbb9SEitan Adler except ValueError: 254*2dc4dbb9SEitan Adler raise ValueError(mime_detected) 2553e41d09dSXin LI 2563e41d09dSXin LI return FileMagic(name=type_detected, mime_type=mime_type, 2573e41d09dSXin LI encoding=mime_encoding.replace('charset=', '')) 2583e41d09dSXin LI 2593e41d09dSXin LI 2603e41d09dSXin LIdef detect_from_filename(filename): 2613e41d09dSXin LI '''Detect mime type, encoding and file type from a filename 2623e41d09dSXin LI 2633e41d09dSXin LI Returns a `FileMagic` namedtuple. 2643e41d09dSXin LI ''' 2653e41d09dSXin LI 2663e41d09dSXin LI return _create_filemagic(mime_magic.file(filename), 2673e41d09dSXin LI none_magic.file(filename)) 2683e41d09dSXin LI 2693e41d09dSXin LI 2703e41d09dSXin LIdef detect_from_fobj(fobj): 2713e41d09dSXin LI '''Detect mime type, encoding and file type from file-like object 2723e41d09dSXin LI 2733e41d09dSXin LI Returns a `FileMagic` namedtuple. 2743e41d09dSXin LI ''' 2753e41d09dSXin LI 2763e41d09dSXin LI file_descriptor = fobj.fileno() 2773e41d09dSXin LI return _create_filemagic(mime_magic.descriptor(file_descriptor), 2783e41d09dSXin LI none_magic.descriptor(file_descriptor)) 2793e41d09dSXin LI 2803e41d09dSXin LI 2813e41d09dSXin LIdef detect_from_content(byte_content): 2823e41d09dSXin LI '''Detect mime type, encoding and file type from bytes 2833e41d09dSXin LI 2843e41d09dSXin LI Returns a `FileMagic` namedtuple. 2853e41d09dSXin LI ''' 2863e41d09dSXin LI 2873e41d09dSXin LI return _create_filemagic(mime_magic.buffer(byte_content), 2883e41d09dSXin LI none_magic.buffer(byte_content)) 289