xref: /freebsd/contrib/file/python/magic.py (revision a2dfb7224ec9933ee804cae54d51848dce938b6b)
13e41d09dSXin LI# coding: utf-8
23e41d09dSXin LI
3b6cee71dSXin LI'''
4b6cee71dSXin LIPython bindings for libmagic
5b6cee71dSXin LI'''
6b6cee71dSXin LI
7b6cee71dSXin LIimport ctypes
843a5ec4eSXin LIimport threading
9b6cee71dSXin LI
103e41d09dSXin LIfrom collections import namedtuple
113e41d09dSXin LI
12b6cee71dSXin LIfrom ctypes import *
13b6cee71dSXin LIfrom ctypes.util import find_library
14b6cee71dSXin LI
15b6cee71dSXin LI
16b6cee71dSXin LIdef _init():
17b6cee71dSXin LI    """
18b6cee71dSXin LI    Loads the shared library through ctypes and returns a library
19b6cee71dSXin LI    L{ctypes.CDLL} instance
20b6cee71dSXin LI    """
21b6cee71dSXin LI    return ctypes.cdll.LoadLibrary(find_library('magic'))
22b6cee71dSXin LI
23b6cee71dSXin LI_libraries = {}
24b6cee71dSXin LI_libraries['magic'] = _init()
25b6cee71dSXin LI
26b6cee71dSXin LI# Flag constants for open and setflags
27b6cee71dSXin LIMAGIC_NONE = NONE = 0
28b6cee71dSXin LIMAGIC_DEBUG = DEBUG = 1
29b6cee71dSXin LIMAGIC_SYMLINK = SYMLINK = 2
30b6cee71dSXin LIMAGIC_COMPRESS = COMPRESS = 4
31b6cee71dSXin LIMAGIC_DEVICES = DEVICES = 8
32b6cee71dSXin LIMAGIC_MIME_TYPE = MIME_TYPE = 16
33b6cee71dSXin LIMAGIC_CONTINUE = CONTINUE = 32
34b6cee71dSXin LIMAGIC_CHECK = CHECK = 64
35b6cee71dSXin LIMAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
36b6cee71dSXin LIMAGIC_RAW = RAW = 256
37b6cee71dSXin LIMAGIC_ERROR = ERROR = 512
38b6cee71dSXin LIMAGIC_MIME_ENCODING = MIME_ENCODING = 1024
393e41d09dSXin LIMAGIC_MIME = MIME = 1040  # MIME_TYPE + MIME_ENCODING
40b6cee71dSXin LIMAGIC_APPLE = APPLE = 2048
41b6cee71dSXin LI
42b6cee71dSXin LIMAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
43b6cee71dSXin LIMAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
44b6cee71dSXin LIMAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
45b6cee71dSXin LIMAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
46b6cee71dSXin LIMAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
47b6cee71dSXin LIMAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
48b6cee71dSXin LIMAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
49b6cee71dSXin LIMAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
50b6cee71dSXin LIMAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
51b6cee71dSXin LI
52b6cee71dSXin LIMAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
53b6cee71dSXin LI
5443a5ec4eSXin LIMAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
5543a5ec4eSXin LIMAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
5643a5ec4eSXin LIMAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
5743a5ec4eSXin LIMAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
5843a5ec4eSXin LIMAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
5943a5ec4eSXin LIMAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
6043a5ec4eSXin LIMAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6
6143a5ec4eSXin LI
623e41d09dSXin LIFileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
633e41d09dSXin LI
64b6cee71dSXin LI
65b6cee71dSXin LIclass magic_set(Structure):
66b6cee71dSXin LI    pass
67b6cee71dSXin LImagic_set._fields_ = []
68b6cee71dSXin LImagic_t = POINTER(magic_set)
69b6cee71dSXin LI
70b6cee71dSXin LI_open = _libraries['magic'].magic_open
71b6cee71dSXin LI_open.restype = magic_t
72b6cee71dSXin LI_open.argtypes = [c_int]
73b6cee71dSXin LI
74b6cee71dSXin LI_close = _libraries['magic'].magic_close
75b6cee71dSXin LI_close.restype = None
76b6cee71dSXin LI_close.argtypes = [magic_t]
77b6cee71dSXin LI
78b6cee71dSXin LI_file = _libraries['magic'].magic_file
79b6cee71dSXin LI_file.restype = c_char_p
80b6cee71dSXin LI_file.argtypes = [magic_t, c_char_p]
81b6cee71dSXin LI
82b6cee71dSXin LI_descriptor = _libraries['magic'].magic_descriptor
83b6cee71dSXin LI_descriptor.restype = c_char_p
84b6cee71dSXin LI_descriptor.argtypes = [magic_t, c_int]
85b6cee71dSXin LI
86b6cee71dSXin LI_buffer = _libraries['magic'].magic_buffer
87b6cee71dSXin LI_buffer.restype = c_char_p
88b6cee71dSXin LI_buffer.argtypes = [magic_t, c_void_p, c_size_t]
89b6cee71dSXin LI
90b6cee71dSXin LI_error = _libraries['magic'].magic_error
91b6cee71dSXin LI_error.restype = c_char_p
92b6cee71dSXin LI_error.argtypes = [magic_t]
93b6cee71dSXin LI
94b6cee71dSXin LI_setflags = _libraries['magic'].magic_setflags
95b6cee71dSXin LI_setflags.restype = c_int
96b6cee71dSXin LI_setflags.argtypes = [magic_t, c_int]
97b6cee71dSXin LI
98b6cee71dSXin LI_load = _libraries['magic'].magic_load
99b6cee71dSXin LI_load.restype = c_int
100b6cee71dSXin LI_load.argtypes = [magic_t, c_char_p]
101b6cee71dSXin LI
102b6cee71dSXin LI_compile = _libraries['magic'].magic_compile
103b6cee71dSXin LI_compile.restype = c_int
104b6cee71dSXin LI_compile.argtypes = [magic_t, c_char_p]
105b6cee71dSXin LI
106b6cee71dSXin LI_check = _libraries['magic'].magic_check
107b6cee71dSXin LI_check.restype = c_int
108b6cee71dSXin LI_check.argtypes = [magic_t, c_char_p]
109b6cee71dSXin LI
110b6cee71dSXin LI_list = _libraries['magic'].magic_list
111b6cee71dSXin LI_list.restype = c_int
112b6cee71dSXin LI_list.argtypes = [magic_t, c_char_p]
113b6cee71dSXin LI
114b6cee71dSXin LI_errno = _libraries['magic'].magic_errno
115b6cee71dSXin LI_errno.restype = c_int
116b6cee71dSXin LI_errno.argtypes = [magic_t]
117b6cee71dSXin LI
11843a5ec4eSXin LI_getparam = _libraries['magic'].magic_getparam
11943a5ec4eSXin LI_getparam.restype = c_int
12043a5ec4eSXin LI_getparam.argtypes = [magic_t, c_int, c_void_p]
12143a5ec4eSXin LI
12243a5ec4eSXin LI_setparam = _libraries['magic'].magic_setparam
12343a5ec4eSXin LI_setparam.restype = c_int
12443a5ec4eSXin LI_setparam.argtypes = [magic_t, c_int, c_void_p]
12543a5ec4eSXin LI
126b6cee71dSXin LI
127b6cee71dSXin LIclass Magic(object):
128b6cee71dSXin LI    def __init__(self, ms):
129b6cee71dSXin LI        self._magic_t = ms
130b6cee71dSXin LI
131b6cee71dSXin LI    def close(self):
132b6cee71dSXin LI        """
133b6cee71dSXin LI        Closes the magic database and deallocates any resources used.
134b6cee71dSXin LI        """
135b6cee71dSXin LI        _close(self._magic_t)
136b6cee71dSXin LI
13740427ccaSGordon Tetlow    @staticmethod
13840427ccaSGordon Tetlow    def __tostr(s):
13940427ccaSGordon Tetlow        if s is None:
14040427ccaSGordon Tetlow            return None
14140427ccaSGordon Tetlow        if isinstance(s, str):
14240427ccaSGordon Tetlow            return s
14340427ccaSGordon Tetlow        try:  # keep Python 2 compatibility
14440427ccaSGordon Tetlow            return str(s, 'utf-8')
14540427ccaSGordon Tetlow        except TypeError:
14640427ccaSGordon Tetlow            return str(s)
14740427ccaSGordon Tetlow
14840427ccaSGordon Tetlow    @staticmethod
14940427ccaSGordon Tetlow    def __tobytes(b):
15040427ccaSGordon Tetlow        if b is None:
15140427ccaSGordon Tetlow            return None
15240427ccaSGordon Tetlow        if isinstance(b, bytes):
15340427ccaSGordon Tetlow            return b
15440427ccaSGordon Tetlow        try:  # keep Python 2 compatibility
15540427ccaSGordon Tetlow            return bytes(b, 'utf-8')
15640427ccaSGordon Tetlow        except TypeError:
15740427ccaSGordon Tetlow            return bytes(b)
15840427ccaSGordon Tetlow
159b6cee71dSXin LI    def file(self, filename):
160b6cee71dSXin LI        """
161b6cee71dSXin LI        Returns a textual description of the contents of the argument passed
162b6cee71dSXin LI        as a filename or None if an error occurred and the MAGIC_ERROR flag
163b6cee71dSXin LI        is set. A call to errno() will return the numeric error code.
164b6cee71dSXin LI        """
16540427ccaSGordon Tetlow        return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
166b6cee71dSXin LI
167b6cee71dSXin LI    def descriptor(self, fd):
168b6cee71dSXin LI        """
16940427ccaSGordon Tetlow        Returns a textual description of the contents of the argument passed
17040427ccaSGordon Tetlow        as a file descriptor or None if an error occurred and the MAGIC_ERROR
17140427ccaSGordon Tetlow        flag is set. A call to errno() will return the numeric error code.
172b6cee71dSXin LI        """
17340427ccaSGordon Tetlow        return Magic.__tostr(_descriptor(self._magic_t, fd))
174b6cee71dSXin LI
175b6cee71dSXin LI    def buffer(self, buf):
176b6cee71dSXin LI        """
177b6cee71dSXin LI        Returns a textual description of the contents of the argument passed
178b6cee71dSXin LI        as a buffer or None if an error occurred and the MAGIC_ERROR flag
179b6cee71dSXin LI        is set. A call to errno() will return the numeric error code.
180b6cee71dSXin LI        """
18140427ccaSGordon Tetlow        return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
182b6cee71dSXin LI
183b6cee71dSXin LI    def error(self):
184b6cee71dSXin LI        """
185b6cee71dSXin LI        Returns a textual explanation of the last error or None
186b6cee71dSXin LI        if there was no error.
187b6cee71dSXin LI        """
18840427ccaSGordon Tetlow        return Magic.__tostr(_error(self._magic_t))
189b6cee71dSXin LI
190b6cee71dSXin LI    def setflags(self, flags):
191b6cee71dSXin LI        """
192b6cee71dSXin LI        Set flags on the magic object which determine how magic checking
193b6cee71dSXin LI        behaves; a bitwise OR of the flags described in libmagic(3), but
194b6cee71dSXin LI        without the MAGIC_ prefix.
195b6cee71dSXin LI
196b6cee71dSXin LI        Returns -1 on systems that don't support utime(2) or utimes(2)
197b6cee71dSXin LI        when PRESERVE_ATIME is set.
198b6cee71dSXin LI        """
199b6cee71dSXin LI        return _setflags(self._magic_t, flags)
200b6cee71dSXin LI
201b6cee71dSXin LI    def load(self, filename=None):
202b6cee71dSXin LI        """
203b6cee71dSXin LI        Must be called to load entries in the colon separated list of database
204b6cee71dSXin LI        files passed as argument or the default database file if no argument
205b6cee71dSXin LI        before any magic queries can be performed.
206b6cee71dSXin LI
207b6cee71dSXin LI        Returns 0 on success and -1 on failure.
208b6cee71dSXin LI        """
20940427ccaSGordon Tetlow        return _load(self._magic_t, Magic.__tobytes(filename))
210b6cee71dSXin LI
211b6cee71dSXin LI    def compile(self, dbs):
212b6cee71dSXin LI        """
213b6cee71dSXin LI        Compile entries in the colon separated list of database files
214b6cee71dSXin LI        passed as argument or the default database file if no argument.
215b6cee71dSXin LI        The compiled files created are named from the basename(1) of each file
216b6cee71dSXin LI        argument with ".mgc" appended to it.
21740427ccaSGordon Tetlow
21840427ccaSGordon Tetlow        Returns 0 on success and -1 on failure.
219b6cee71dSXin LI        """
22040427ccaSGordon Tetlow        return _compile(self._magic_t, Magic.__tobytes(dbs))
221b6cee71dSXin LI
222b6cee71dSXin LI    def check(self, dbs):
223b6cee71dSXin LI        """
224b6cee71dSXin LI        Check the validity of entries in the colon separated list of
225b6cee71dSXin LI        database files passed as argument or the default database file
226b6cee71dSXin LI        if no argument.
22740427ccaSGordon Tetlow
228b6cee71dSXin LI        Returns 0 on success and -1 on failure.
229b6cee71dSXin LI        """
23040427ccaSGordon Tetlow        return _check(self._magic_t, Magic.__tobytes(dbs))
231b6cee71dSXin LI
232b6cee71dSXin LI    def list(self, dbs):
233b6cee71dSXin LI        """
234b6cee71dSXin LI        Check the validity of entries in the colon separated list of
235b6cee71dSXin LI        database files passed as argument or the default database file
236b6cee71dSXin LI        if no argument.
23740427ccaSGordon Tetlow
238b6cee71dSXin LI        Returns 0 on success and -1 on failure.
239b6cee71dSXin LI        """
24040427ccaSGordon Tetlow        return _list(self._magic_t, Magic.__tobytes(dbs))
241b6cee71dSXin LI
242b6cee71dSXin LI    def errno(self):
243b6cee71dSXin LI        """
244b6cee71dSXin LI        Returns a numeric error code. If return value is 0, an internal
245b6cee71dSXin LI        magic error occurred. If return value is non-zero, the value is
246b6cee71dSXin LI        an OS error code. Use the errno module or os.strerror() can be used
247b6cee71dSXin LI        to provide detailed error information.
248b6cee71dSXin LI        """
249b6cee71dSXin LI        return _errno(self._magic_t)
250b6cee71dSXin LI
25143a5ec4eSXin LI    def getparam(self, param):
25243a5ec4eSXin LI        """
25343a5ec4eSXin LI        Returns the param value if successful and -1 if the parameter
25443a5ec4eSXin LI        was unknown.
25543a5ec4eSXin LI        """
25643a5ec4eSXin LI        v = c_int()
25743a5ec4eSXin LI        i = _getparam(self._magic_t, param, byref(v))
25843a5ec4eSXin LI        if i == -1:
25943a5ec4eSXin LI            return -1
26043a5ec4eSXin LI        return v.value
26143a5ec4eSXin LI
26243a5ec4eSXin LI    def setparam(self, param, value):
26343a5ec4eSXin LI        """
26443a5ec4eSXin LI        Returns 0 if successful and -1 if the parameter was unknown.
26543a5ec4eSXin LI        """
26643a5ec4eSXin LI        v = c_int(value)
26743a5ec4eSXin LI        return _setparam(self._magic_t, param, byref(v))
26843a5ec4eSXin LI
269b6cee71dSXin LI
270b6cee71dSXin LIdef open(flags):
271b6cee71dSXin LI    """
272b6cee71dSXin LI    Returns a magic object on success and None on failure.
273b6cee71dSXin LI    Flags argument as for setflags.
274b6cee71dSXin LI    """
275*a2dfb722SXin LI    magic_t = _open(flags)
276*a2dfb722SXin LI    if magic_t is None:
277*a2dfb722SXin LI        return None
278*a2dfb722SXin LI    return Magic(magic_t)
2793e41d09dSXin LI
2803e41d09dSXin LI
2813e41d09dSXin LI# Objects used by `detect_from_` functions
282*a2dfb722SXin LIclass error(Exception):
283*a2dfb722SXin LI    pass
284*a2dfb722SXin LI
28543a5ec4eSXin LIclass MagicDetect(object):
28643a5ec4eSXin LI    def __init__(self):
287*a2dfb722SXin LI        self.mime_magic = open(MAGIC_MIME)
288*a2dfb722SXin LI        if self.mime_magic is None:
289*a2dfb722SXin LI            raise error
290*a2dfb722SXin LI        if self.mime_magic.load() == -1:
291*a2dfb722SXin LI            self.mime_magic.close()
292*a2dfb722SXin LI            self.mime_magic = None
293*a2dfb722SXin LI            raise error
294*a2dfb722SXin LI        self.none_magic = open(MAGIC_NONE)
295*a2dfb722SXin LI        if self.none_magic is None:
296*a2dfb722SXin LI            self.mime_magic.close()
297*a2dfb722SXin LI            self.mime_magic = None
298*a2dfb722SXin LI            raise error
299*a2dfb722SXin LI        if self.none_magic.load() == -1:
300*a2dfb722SXin LI            self.none_magic.close()
301*a2dfb722SXin LI            self.none_magic = None
302*a2dfb722SXin LI            self.mime_magic.close()
303*a2dfb722SXin LI            self.mime_magic = None
304*a2dfb722SXin LI            raise error
3053e41d09dSXin LI
30643a5ec4eSXin LI    def __del__(self):
307*a2dfb722SXin LI        if self.mime_magic is not None:
30843a5ec4eSXin LI            self.mime_magic.close()
309*a2dfb722SXin LI        if self.none_magic is not None:
31043a5ec4eSXin LI            self.none_magic.close()
31143a5ec4eSXin LI
31243a5ec4eSXin LIthreadlocal = threading.local()
31343a5ec4eSXin LI
31443a5ec4eSXin LIdef _detect_make():
31543a5ec4eSXin LI    v = getattr(threadlocal, "magic_instance", None)
31643a5ec4eSXin LI    if v is None:
31743a5ec4eSXin LI        v = MagicDetect()
31843a5ec4eSXin LI        setattr(threadlocal, "magic_instance", v)
31943a5ec4eSXin LI    return v
3203e41d09dSXin LI
3213e41d09dSXin LIdef _create_filemagic(mime_detected, type_detected):
3222dc4dbb9SEitan Adler    try:
3233e41d09dSXin LI        mime_type, mime_encoding = mime_detected.split('; ')
3242dc4dbb9SEitan Adler    except ValueError:
3252dc4dbb9SEitan Adler        raise ValueError(mime_detected)
3263e41d09dSXin LI
3273e41d09dSXin LI    return FileMagic(name=type_detected, mime_type=mime_type,
3283e41d09dSXin LI                     encoding=mime_encoding.replace('charset=', ''))
3293e41d09dSXin LI
3303e41d09dSXin LI
3313e41d09dSXin LIdef detect_from_filename(filename):
3323e41d09dSXin LI    '''Detect mime type, encoding and file type from a filename
3333e41d09dSXin LI
3343e41d09dSXin LI    Returns a `FileMagic` namedtuple.
3353e41d09dSXin LI    '''
33643a5ec4eSXin LI    x = _detect_make()
33743a5ec4eSXin LI    return _create_filemagic(x.mime_magic.file(filename),
33843a5ec4eSXin LI                             x.none_magic.file(filename))
3393e41d09dSXin LI
3403e41d09dSXin LI
3413e41d09dSXin LIdef detect_from_fobj(fobj):
3423e41d09dSXin LI    '''Detect mime type, encoding and file type from file-like object
3433e41d09dSXin LI
3443e41d09dSXin LI    Returns a `FileMagic` namedtuple.
3453e41d09dSXin LI    '''
3463e41d09dSXin LI
3473e41d09dSXin LI    file_descriptor = fobj.fileno()
34843a5ec4eSXin LI    x = _detect_make()
34943a5ec4eSXin LI    return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
35043a5ec4eSXin LI                             x.none_magic.descriptor(file_descriptor))
3513e41d09dSXin LI
3523e41d09dSXin LI
3533e41d09dSXin LIdef detect_from_content(byte_content):
3543e41d09dSXin LI    '''Detect mime type, encoding and file type from bytes
3553e41d09dSXin LI
3563e41d09dSXin LI    Returns a `FileMagic` namedtuple.
3573e41d09dSXin LI    '''
3583e41d09dSXin LI
35943a5ec4eSXin LI    x = _detect_make()
36043a5ec4eSXin LI    return _create_filemagic(x.mime_magic.buffer(byte_content),
36143a5ec4eSXin LI                             x.none_magic.buffer(byte_content))
362