xref: /freebsd/contrib/file/python/magic.py (revision 40427cca7a9ae77b095936fb1954417c290cfb17)
13e41d09dSXin LI# coding: utf-8
23e41d09dSXin LI
3b6cee71dSXin LI'''
4b6cee71dSXin LIPython bindings for libmagic
5b6cee71dSXin LI'''
6b6cee71dSXin LI
7b6cee71dSXin LIimport ctypes
8b6cee71dSXin LI
93e41d09dSXin LIfrom collections import namedtuple
103e41d09dSXin LI
11b6cee71dSXin LIfrom ctypes import *
12b6cee71dSXin LIfrom ctypes.util import find_library
13b6cee71dSXin LI
14b6cee71dSXin LI
15b6cee71dSXin LIdef _init():
16b6cee71dSXin LI    """
17b6cee71dSXin LI    Loads the shared library through ctypes and returns a library
18b6cee71dSXin LI    L{ctypes.CDLL} instance
19b6cee71dSXin LI    """
20b6cee71dSXin LI    return ctypes.cdll.LoadLibrary(find_library('magic'))
21b6cee71dSXin LI
22b6cee71dSXin LI_libraries = {}
23b6cee71dSXin LI_libraries['magic'] = _init()
24b6cee71dSXin LI
25b6cee71dSXin LI# Flag constants for open and setflags
26b6cee71dSXin LIMAGIC_NONE = NONE = 0
27b6cee71dSXin LIMAGIC_DEBUG = DEBUG = 1
28b6cee71dSXin LIMAGIC_SYMLINK = SYMLINK = 2
29b6cee71dSXin LIMAGIC_COMPRESS = COMPRESS = 4
30b6cee71dSXin LIMAGIC_DEVICES = DEVICES = 8
31b6cee71dSXin LIMAGIC_MIME_TYPE = MIME_TYPE = 16
32b6cee71dSXin LIMAGIC_CONTINUE = CONTINUE = 32
33b6cee71dSXin LIMAGIC_CHECK = CHECK = 64
34b6cee71dSXin LIMAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
35b6cee71dSXin LIMAGIC_RAW = RAW = 256
36b6cee71dSXin LIMAGIC_ERROR = ERROR = 512
37b6cee71dSXin LIMAGIC_MIME_ENCODING = MIME_ENCODING = 1024
383e41d09dSXin LIMAGIC_MIME = MIME = 1040  # MIME_TYPE + MIME_ENCODING
39b6cee71dSXin LIMAGIC_APPLE = APPLE = 2048
40b6cee71dSXin LI
41b6cee71dSXin LIMAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
42b6cee71dSXin LIMAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
43b6cee71dSXin LIMAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
44b6cee71dSXin LIMAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
45b6cee71dSXin LIMAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
46b6cee71dSXin LIMAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
47b6cee71dSXin LIMAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
48b6cee71dSXin LIMAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
49b6cee71dSXin LIMAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
50b6cee71dSXin LI
51b6cee71dSXin LIMAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
52b6cee71dSXin LI
533e41d09dSXin LIFileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
543e41d09dSXin LI
55b6cee71dSXin LI
56b6cee71dSXin LIclass magic_set(Structure):
57b6cee71dSXin LI    pass
58b6cee71dSXin LImagic_set._fields_ = []
59b6cee71dSXin LImagic_t = POINTER(magic_set)
60b6cee71dSXin LI
61b6cee71dSXin LI_open = _libraries['magic'].magic_open
62b6cee71dSXin LI_open.restype = magic_t
63b6cee71dSXin LI_open.argtypes = [c_int]
64b6cee71dSXin LI
65b6cee71dSXin LI_close = _libraries['magic'].magic_close
66b6cee71dSXin LI_close.restype = None
67b6cee71dSXin LI_close.argtypes = [magic_t]
68b6cee71dSXin LI
69b6cee71dSXin LI_file = _libraries['magic'].magic_file
70b6cee71dSXin LI_file.restype = c_char_p
71b6cee71dSXin LI_file.argtypes = [magic_t, c_char_p]
72b6cee71dSXin LI
73b6cee71dSXin LI_descriptor = _libraries['magic'].magic_descriptor
74b6cee71dSXin LI_descriptor.restype = c_char_p
75b6cee71dSXin LI_descriptor.argtypes = [magic_t, c_int]
76b6cee71dSXin LI
77b6cee71dSXin LI_buffer = _libraries['magic'].magic_buffer
78b6cee71dSXin LI_buffer.restype = c_char_p
79b6cee71dSXin LI_buffer.argtypes = [magic_t, c_void_p, c_size_t]
80b6cee71dSXin LI
81b6cee71dSXin LI_error = _libraries['magic'].magic_error
82b6cee71dSXin LI_error.restype = c_char_p
83b6cee71dSXin LI_error.argtypes = [magic_t]
84b6cee71dSXin LI
85b6cee71dSXin LI_setflags = _libraries['magic'].magic_setflags
86b6cee71dSXin LI_setflags.restype = c_int
87b6cee71dSXin LI_setflags.argtypes = [magic_t, c_int]
88b6cee71dSXin LI
89b6cee71dSXin LI_load = _libraries['magic'].magic_load
90b6cee71dSXin LI_load.restype = c_int
91b6cee71dSXin LI_load.argtypes = [magic_t, c_char_p]
92b6cee71dSXin LI
93b6cee71dSXin LI_compile = _libraries['magic'].magic_compile
94b6cee71dSXin LI_compile.restype = c_int
95b6cee71dSXin LI_compile.argtypes = [magic_t, c_char_p]
96b6cee71dSXin LI
97b6cee71dSXin LI_check = _libraries['magic'].magic_check
98b6cee71dSXin LI_check.restype = c_int
99b6cee71dSXin LI_check.argtypes = [magic_t, c_char_p]
100b6cee71dSXin LI
101b6cee71dSXin LI_list = _libraries['magic'].magic_list
102b6cee71dSXin LI_list.restype = c_int
103b6cee71dSXin LI_list.argtypes = [magic_t, c_char_p]
104b6cee71dSXin LI
105b6cee71dSXin LI_errno = _libraries['magic'].magic_errno
106b6cee71dSXin LI_errno.restype = c_int
107b6cee71dSXin LI_errno.argtypes = [magic_t]
108b6cee71dSXin LI
109b6cee71dSXin LI
110b6cee71dSXin LIclass Magic(object):
111b6cee71dSXin LI    def __init__(self, ms):
112b6cee71dSXin LI        self._magic_t = ms
113b6cee71dSXin LI
114b6cee71dSXin LI    def close(self):
115b6cee71dSXin LI        """
116b6cee71dSXin LI        Closes the magic database and deallocates any resources used.
117b6cee71dSXin LI        """
118b6cee71dSXin LI        _close(self._magic_t)
119b6cee71dSXin LI
120*40427ccaSGordon Tetlow    @staticmethod
121*40427ccaSGordon Tetlow    def __tostr(s):
122*40427ccaSGordon Tetlow        if s is None:
123*40427ccaSGordon Tetlow            return None
124*40427ccaSGordon Tetlow        if isinstance(s, str):
125*40427ccaSGordon Tetlow            return s
126*40427ccaSGordon Tetlow        try:  # keep Python 2 compatibility
127*40427ccaSGordon Tetlow            return str(s, 'utf-8')
128*40427ccaSGordon Tetlow        except TypeError:
129*40427ccaSGordon Tetlow            return str(s)
130*40427ccaSGordon Tetlow
131*40427ccaSGordon Tetlow    @staticmethod
132*40427ccaSGordon Tetlow    def __tobytes(b):
133*40427ccaSGordon Tetlow        if b is None:
134*40427ccaSGordon Tetlow            return None
135*40427ccaSGordon Tetlow        if isinstance(b, bytes):
136*40427ccaSGordon Tetlow            return b
137*40427ccaSGordon Tetlow        try:  # keep Python 2 compatibility
138*40427ccaSGordon Tetlow            return bytes(b, 'utf-8')
139*40427ccaSGordon Tetlow        except TypeError:
140*40427ccaSGordon Tetlow            return bytes(b)
141*40427ccaSGordon Tetlow
142b6cee71dSXin LI    def file(self, filename):
143b6cee71dSXin LI        """
144b6cee71dSXin LI        Returns a textual description of the contents of the argument passed
145b6cee71dSXin LI        as a filename or None if an error occurred and the MAGIC_ERROR flag
146b6cee71dSXin LI        is set. A call to errno() will return the numeric error code.
147b6cee71dSXin LI        """
148*40427ccaSGordon Tetlow        return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
149b6cee71dSXin LI
150b6cee71dSXin LI    def descriptor(self, fd):
151b6cee71dSXin LI        """
152*40427ccaSGordon Tetlow        Returns a textual description of the contents of the argument passed
153*40427ccaSGordon Tetlow        as a file descriptor or None if an error occurred and the MAGIC_ERROR
154*40427ccaSGordon Tetlow        flag is set. A call to errno() will return the numeric error code.
155b6cee71dSXin LI        """
156*40427ccaSGordon Tetlow        return Magic.__tostr(_descriptor(self._magic_t, fd))
157b6cee71dSXin LI
158b6cee71dSXin LI    def buffer(self, buf):
159b6cee71dSXin LI        """
160b6cee71dSXin LI        Returns a textual description of the contents of the argument passed
161b6cee71dSXin LI        as a buffer or None if an error occurred and the MAGIC_ERROR flag
162b6cee71dSXin LI        is set. A call to errno() will return the numeric error code.
163b6cee71dSXin LI        """
164*40427ccaSGordon Tetlow        return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
165b6cee71dSXin LI
166b6cee71dSXin LI    def error(self):
167b6cee71dSXin LI        """
168b6cee71dSXin LI        Returns a textual explanation of the last error or None
169b6cee71dSXin LI        if there was no error.
170b6cee71dSXin LI        """
171*40427ccaSGordon Tetlow        return Magic.__tostr(_error(self._magic_t))
172b6cee71dSXin LI
173b6cee71dSXin LI    def setflags(self, flags):
174b6cee71dSXin LI        """
175b6cee71dSXin LI        Set flags on the magic object which determine how magic checking
176b6cee71dSXin LI        behaves; a bitwise OR of the flags described in libmagic(3), but
177b6cee71dSXin LI        without the MAGIC_ prefix.
178b6cee71dSXin LI
179b6cee71dSXin LI        Returns -1 on systems that don't support utime(2) or utimes(2)
180b6cee71dSXin LI        when PRESERVE_ATIME is set.
181b6cee71dSXin LI        """
182b6cee71dSXin LI        return _setflags(self._magic_t, flags)
183b6cee71dSXin LI
184b6cee71dSXin LI    def load(self, filename=None):
185b6cee71dSXin LI        """
186b6cee71dSXin LI        Must be called to load entries in the colon separated list of database
187b6cee71dSXin LI        files passed as argument or the default database file if no argument
188b6cee71dSXin LI        before any magic queries can be performed.
189b6cee71dSXin LI
190b6cee71dSXin LI        Returns 0 on success and -1 on failure.
191b6cee71dSXin LI        """
192*40427ccaSGordon Tetlow        return _load(self._magic_t, Magic.__tobytes(filename))
193b6cee71dSXin LI
194b6cee71dSXin LI    def compile(self, dbs):
195b6cee71dSXin LI        """
196b6cee71dSXin LI        Compile entries in the colon separated list of database files
197b6cee71dSXin LI        passed as argument or the default database file if no argument.
198b6cee71dSXin LI        The compiled files created are named from the basename(1) of each file
199b6cee71dSXin LI        argument with ".mgc" appended to it.
200*40427ccaSGordon Tetlow
201*40427ccaSGordon Tetlow        Returns 0 on success and -1 on failure.
202b6cee71dSXin LI        """
203*40427ccaSGordon Tetlow        return _compile(self._magic_t, Magic.__tobytes(dbs))
204b6cee71dSXin LI
205b6cee71dSXin LI    def check(self, dbs):
206b6cee71dSXin LI        """
207b6cee71dSXin LI        Check the validity of entries in the colon separated list of
208b6cee71dSXin LI        database files passed as argument or the default database file
209b6cee71dSXin LI        if no argument.
210*40427ccaSGordon Tetlow
211b6cee71dSXin LI        Returns 0 on success and -1 on failure.
212b6cee71dSXin LI        """
213*40427ccaSGordon Tetlow        return _check(self._magic_t, Magic.__tobytes(dbs))
214b6cee71dSXin LI
215b6cee71dSXin LI    def list(self, dbs):
216b6cee71dSXin LI        """
217b6cee71dSXin LI        Check the validity of entries in the colon separated list of
218b6cee71dSXin LI        database files passed as argument or the default database file
219b6cee71dSXin LI        if no argument.
220*40427ccaSGordon Tetlow
221b6cee71dSXin LI        Returns 0 on success and -1 on failure.
222b6cee71dSXin LI        """
223*40427ccaSGordon Tetlow        return _list(self._magic_t, Magic.__tobytes(dbs))
224b6cee71dSXin LI
225b6cee71dSXin LI    def errno(self):
226b6cee71dSXin LI        """
227b6cee71dSXin LI        Returns a numeric error code. If return value is 0, an internal
228b6cee71dSXin LI        magic error occurred. If return value is non-zero, the value is
229b6cee71dSXin LI        an OS error code. Use the errno module or os.strerror() can be used
230b6cee71dSXin LI        to provide detailed error information.
231b6cee71dSXin LI        """
232b6cee71dSXin LI        return _errno(self._magic_t)
233b6cee71dSXin LI
234b6cee71dSXin LI
235b6cee71dSXin LIdef open(flags):
236b6cee71dSXin LI    """
237b6cee71dSXin LI    Returns a magic object on success and None on failure.
238b6cee71dSXin LI    Flags argument as for setflags.
239b6cee71dSXin LI    """
240b6cee71dSXin LI    return Magic(_open(flags))
2413e41d09dSXin LI
2423e41d09dSXin LI
2433e41d09dSXin LI# Objects used by `detect_from_` functions
2443e41d09dSXin LImime_magic = Magic(_open(MAGIC_MIME))
2453e41d09dSXin LImime_magic.load()
2463e41d09dSXin LInone_magic = Magic(_open(MAGIC_NONE))
2473e41d09dSXin LInone_magic.load()
2483e41d09dSXin LI
2493e41d09dSXin LI
2503e41d09dSXin LIdef _create_filemagic(mime_detected, type_detected):
2513e41d09dSXin LI    mime_type, mime_encoding = mime_detected.split('; ')
2523e41d09dSXin LI
2533e41d09dSXin LI    return FileMagic(name=type_detected, mime_type=mime_type,
2543e41d09dSXin LI                     encoding=mime_encoding.replace('charset=', ''))
2553e41d09dSXin LI
2563e41d09dSXin LI
2573e41d09dSXin LIdef detect_from_filename(filename):
2583e41d09dSXin LI    '''Detect mime type, encoding and file type from a filename
2593e41d09dSXin LI
2603e41d09dSXin LI    Returns a `FileMagic` namedtuple.
2613e41d09dSXin LI    '''
2623e41d09dSXin LI
2633e41d09dSXin LI    return _create_filemagic(mime_magic.file(filename),
2643e41d09dSXin LI                             none_magic.file(filename))
2653e41d09dSXin LI
2663e41d09dSXin LI
2673e41d09dSXin LIdef detect_from_fobj(fobj):
2683e41d09dSXin LI    '''Detect mime type, encoding and file type from file-like object
2693e41d09dSXin LI
2703e41d09dSXin LI    Returns a `FileMagic` namedtuple.
2713e41d09dSXin LI    '''
2723e41d09dSXin LI
2733e41d09dSXin LI    file_descriptor = fobj.fileno()
2743e41d09dSXin LI    return _create_filemagic(mime_magic.descriptor(file_descriptor),
2753e41d09dSXin LI                             none_magic.descriptor(file_descriptor))
2763e41d09dSXin LI
2773e41d09dSXin LI
2783e41d09dSXin LIdef detect_from_content(byte_content):
2793e41d09dSXin LI    '''Detect mime type, encoding and file type from bytes
2803e41d09dSXin LI
2813e41d09dSXin LI    Returns a `FileMagic` namedtuple.
2823e41d09dSXin LI    '''
2833e41d09dSXin LI
2843e41d09dSXin LI    return _create_filemagic(mime_magic.buffer(byte_content),
2853e41d09dSXin LI                             none_magic.buffer(byte_content))
286