Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/magic/__init__.py: 59%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2magic is a wrapper around the libmagic file identification library.
4See README for more information.
6Usage:
8>>> import magic
9>>> magic.from_file("testdata/test.pdf")
10'PDF document, version 1.2'
11>>> magic.from_file("testdata/test.pdf", mime=True)
12'application/pdf'
13>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
14'PDF document, version 1.2'
15>>>
17"""
19import sys
20import glob
21import ctypes
22import ctypes.util
23import threading
24import logging
26from ctypes import c_char_p, c_int, c_size_t, c_void_p, byref, POINTER
28# avoid shadowing the real open with the version from compat.py
29_real_open = open
32class MagicException(Exception):
33 def __init__(self, message):
34 super(Exception, self).__init__(message)
35 self.message = message
38class Magic:
39 """
40 Magic is a wrapper around the libmagic C library.
41 """
43 def __init__(self, mime=False, magic_file=None, mime_encoding=False,
44 keep_going=False, uncompress=False, raw=False, extension=False):
45 """
46 Create a new libmagic wrapper.
48 mime - if True, mimetypes are returned instead of textual descriptions
49 mime_encoding - if True, codec is returned
50 magic_file - use a mime database other than the system default
51 keep_going - don't stop at the first match, keep going
52 uncompress - Try to look inside compressed files.
53 raw - Do not try to decode "non-printable" chars.
54 extension - Print a slash-separated list of valid extensions for the file type found.
55 """
56 self.flags = MAGIC_NONE
57 if mime:
58 self.flags |= MAGIC_MIME_TYPE
59 if mime_encoding:
60 self.flags |= MAGIC_MIME_ENCODING
61 if keep_going:
62 self.flags |= MAGIC_CONTINUE
63 if uncompress:
64 self.flags |= MAGIC_COMPRESS
65 if raw:
66 self.flags |= MAGIC_RAW
67 if extension:
68 self.flags |= MAGIC_EXTENSION
70 self.cookie = magic_open(self.flags)
71 self.lock = threading.Lock()
73 magic_load(self.cookie, magic_file)
75 # MAGIC_EXTENSION was added in 523 or 524, so bail if
76 # it doesn't appear to be available
77 if extension and (not _has_version or version() < 524):
78 raise NotImplementedError('MAGIC_EXTENSION is not supported in this version of libmagic')
80 # For https://github.com/ahupp/python-magic/issues/190
81 # libmagic has fixed internal limits that some files exceed, causing
82 # an error. We can avoid this (at least for the sample file given)
83 # by bumping the limit up. It's not clear if this is a general solution
84 # or whether other internal limits should be increased, but given
85 # the lack of other reports I'll assume this is rare.
86 if _has_param:
87 try:
88 self.setparam(MAGIC_PARAM_NAME_MAX, 64)
89 except MagicException as e:
90 # some versions of libmagic fail this call,
91 # so rather than fail hard just use default behavior
92 pass
94 def from_buffer(self, buf):
95 """
96 Identify the contents of `buf`
97 """
98 with self.lock:
99 try:
100 # if we're on python3, convert buf to bytes
101 # otherwise this string is passed as wchar*
102 # which is not what libmagic expects
103 # NEXTBREAK: only take bytes
104 if type(buf) == str and str != bytes:
105 buf = buf.encode('utf-8', errors='replace')
106 return maybe_decode(magic_buffer(self.cookie, buf))
107 except MagicException as e:
108 return self._handle509Bug(e)
110 def from_file(self, filename):
111 # raise FileNotFoundException or IOError if the file does not exist
112 with _real_open(filename):
113 pass
115 with self.lock:
116 try:
117 return maybe_decode(magic_file(self.cookie, filename))
118 except MagicException as e:
119 return self._handle509Bug(e)
121 def from_descriptor(self, fd):
122 with self.lock:
123 try:
124 return maybe_decode(magic_descriptor(self.cookie, fd))
125 except MagicException as e:
126 return self._handle509Bug(e)
128 def _handle509Bug(self, e):
129 # libmagic 5.09 has a bug where it might fail to identify the
130 # mimetype of a file and returns null from magic_file (and
131 # likely _buffer), but also does not return an error message.
132 if e.message is None and (self.flags & MAGIC_MIME_TYPE):
133 return "application/octet-stream"
134 else:
135 raise e
137 def setparam(self, param, val):
138 return magic_setparam(self.cookie, param, val)
140 def getparam(self, param):
141 return magic_getparam(self.cookie, param)
143 def __del__(self):
144 # no _thread_check here because there can be no other
145 # references to this object at this point.
147 # during shutdown magic_close may have been cleared already so
148 # make sure it exists before using it.
150 # the self.cookie check should be unnecessary and was an
151 # incorrect fix for a threading problem, however I'm leaving
152 # it in because it's harmless and I'm slightly afraid to
153 # remove it.
154 if hasattr(self, 'cookie') and self.cookie and magic_close:
155 magic_close(self.cookie)
156 self.cookie = None
159_instances = {}
162def _get_magic_type(mime):
163 i = _instances.get(mime)
164 if i is None:
165 i = _instances[mime] = Magic(mime=mime)
166 return i
169def from_file(filename, mime=False):
170 """"
171 Accepts a filename and returns the detected filetype. Return
172 value is the mimetype if mime=True, otherwise a human readable
173 name.
175 >>> magic.from_file("testdata/test.pdf", mime=True)
176 'application/pdf'
177 """
178 m = _get_magic_type(mime)
179 return m.from_file(filename)
182def from_buffer(buffer, mime=False):
183 """
184 Accepts a binary string and returns the detected filetype. Return
185 value is the mimetype if mime=True, otherwise a human readable
186 name.
188 >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
189 'PDF document, version 1.2'
190 """
191 m = _get_magic_type(mime)
192 return m.from_buffer(buffer)
195def from_descriptor(fd, mime=False):
196 """
197 Accepts a file descriptor and returns the detected filetype. Return
198 value is the mimetype if mime=True, otherwise a human readable
199 name.
201 >>> f = open("testdata/test.pdf")
202 >>> magic.from_descriptor(f.fileno())
203 'PDF document, version 1.2'
204 """
205 m = _get_magic_type(mime)
206 return m.from_descriptor(fd)
208from . import loader
209libmagic = loader.load_lib()
211magic_t = ctypes.c_void_p
214def errorcheck_null(result, func, args):
215 if result is None:
216 err = magic_error(args[0])
217 raise MagicException(err)
218 else:
219 return result
222def errorcheck_negative_one(result, func, args):
223 if result == -1:
224 err = magic_error(args[0])
225 raise MagicException(err)
226 else:
227 return result
230# return str on python3. Don't want to unconditionally
231# decode because that results in unicode on python2
232def maybe_decode(s):
233 # NEXTBREAK: remove
234 if str == bytes:
235 return s
236 else:
237 # backslashreplace here because sometimes libmagic will return metadata in the charset
238 # of the file, which is unknown to us (e.g the title of a Word doc)
239 return s.decode('utf-8', 'backslashreplace')
242try:
243 from os import PathLike
244 def unpath(filename):
245 if isinstance(filename, PathLike):
246 return filename.__fspath__()
247 else:
248 return filename
249except ImportError:
250 def unpath(filename):
251 return filename
253def coerce_filename(filename):
254 if filename is None:
255 return None
257 filename = unpath(filename)
259 # ctypes will implicitly convert unicode strings to bytes with
260 # .encode('ascii'). If you use the filesystem encoding
261 # then you'll get inconsistent behavior (crashes) depending on the user's
262 # LANG environment variable
263 # NEXTBREAK: remove
264 is_unicode = (sys.version_info[0] <= 2 and
265 isinstance(filename, unicode)) or \
266 (sys.version_info[0] >= 3 and
267 isinstance(filename, str))
268 if is_unicode:
269 return filename.encode('utf-8', 'surrogateescape')
270 else:
271 return filename
274magic_open = libmagic.magic_open
275magic_open.restype = magic_t
276magic_open.argtypes = [c_int]
278magic_close = libmagic.magic_close
279magic_close.restype = None
280magic_close.argtypes = [magic_t]
282magic_error = libmagic.magic_error
283magic_error.restype = c_char_p
284magic_error.argtypes = [magic_t]
286magic_errno = libmagic.magic_errno
287magic_errno.restype = c_int
288magic_errno.argtypes = [magic_t]
290_magic_file = libmagic.magic_file
291_magic_file.restype = c_char_p
292_magic_file.argtypes = [magic_t, c_char_p]
293_magic_file.errcheck = errorcheck_null
296def magic_file(cookie, filename):
297 return _magic_file(cookie, coerce_filename(filename))
300_magic_buffer = libmagic.magic_buffer
301_magic_buffer.restype = c_char_p
302_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
303_magic_buffer.errcheck = errorcheck_null
306def magic_buffer(cookie, buf):
307 return _magic_buffer(cookie, buf, len(buf))
310magic_descriptor = libmagic.magic_descriptor
311magic_descriptor.restype = c_char_p
312magic_descriptor.argtypes = [magic_t, c_int]
313magic_descriptor.errcheck = errorcheck_null
315_magic_descriptor = libmagic.magic_descriptor
316_magic_descriptor.restype = c_char_p
317_magic_descriptor.argtypes = [magic_t, c_int]
318_magic_descriptor.errcheck = errorcheck_null
321def magic_descriptor(cookie, fd):
322 return _magic_descriptor(cookie, fd)
325_magic_load = libmagic.magic_load
326_magic_load.restype = c_int
327_magic_load.argtypes = [magic_t, c_char_p]
328_magic_load.errcheck = errorcheck_negative_one
331def magic_load(cookie, filename):
332 return _magic_load(cookie, coerce_filename(filename))
335magic_setflags = libmagic.magic_setflags
336magic_setflags.restype = c_int
337magic_setflags.argtypes = [magic_t, c_int]
339magic_check = libmagic.magic_check
340magic_check.restype = c_int
341magic_check.argtypes = [magic_t, c_char_p]
343magic_compile = libmagic.magic_compile
344magic_compile.restype = c_int
345magic_compile.argtypes = [magic_t, c_char_p]
347_has_param = False
348if hasattr(libmagic, 'magic_setparam') and hasattr(libmagic, 'magic_getparam'):
349 _has_param = True
350 _magic_setparam = libmagic.magic_setparam
351 _magic_setparam.restype = c_int
352 _magic_setparam.argtypes = [magic_t, c_int, POINTER(c_size_t)]
353 _magic_setparam.errcheck = errorcheck_negative_one
355 _magic_getparam = libmagic.magic_getparam
356 _magic_getparam.restype = c_int
357 _magic_getparam.argtypes = [magic_t, c_int, POINTER(c_size_t)]
358 _magic_getparam.errcheck = errorcheck_negative_one
361def magic_setparam(cookie, param, val):
362 if not _has_param:
363 raise NotImplementedError("magic_setparam not implemented")
364 v = c_size_t(val)
365 return _magic_setparam(cookie, param, byref(v))
368def magic_getparam(cookie, param):
369 if not _has_param:
370 raise NotImplementedError("magic_getparam not implemented")
371 val = c_size_t()
372 _magic_getparam(cookie, param, byref(val))
373 return val.value
376_has_version = False
377if hasattr(libmagic, "magic_version"):
378 _has_version = True
379 magic_version = libmagic.magic_version
380 magic_version.restype = c_int
381 magic_version.argtypes = []
384def version():
385 if not _has_version:
386 raise NotImplementedError("magic_version not implemented")
387 return magic_version()
390MAGIC_NONE = 0x000000 # No flags
391MAGIC_DEBUG = 0x000001 # Turn on debugging
392MAGIC_SYMLINK = 0x000002 # Follow symlinks
393MAGIC_COMPRESS = 0x000004 # Check inside compressed files
394MAGIC_DEVICES = 0x000008 # Look at the contents of devices
395MAGIC_MIME_TYPE = 0x000010 # Return a mime string
396MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
397# TODO: should be
398# MAGIC_MIME = MAGIC_MIME_TYPE | MAGIC_MIME_ENCODING
399MAGIC_MIME = 0x000010 # Return a mime string
400MAGIC_EXTENSION = 0x1000000 # Return a /-separated list of extensions
402MAGIC_CONTINUE = 0x000020 # Return all matches
403MAGIC_CHECK = 0x000040 # Print warnings to stderr
404MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
405MAGIC_RAW = 0x000100 # Don't translate unprintable chars
406MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
408MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
409MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
410MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
411MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
412MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
413MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
414MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
415MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
416MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
418MAGIC_PARAM_INDIR_MAX = 0 # Recursion limit for indirect magic
419MAGIC_PARAM_NAME_MAX = 1 # Use count limit for name/use magic
420MAGIC_PARAM_ELF_PHNUM_MAX = 2 # Max ELF notes processed
421MAGIC_PARAM_ELF_SHNUM_MAX = 3 # Max ELF program sections processed
422MAGIC_PARAM_ELF_NOTES_MAX = 4 # # Max ELF sections processed
423MAGIC_PARAM_REGEX_MAX = 5 # Length limit for regex searches
424MAGIC_PARAM_BYTES_MAX = 6 # Max number of bytes to read from file
427# This package name conflicts with the one provided by upstream
428# libmagic. This is a common source of confusion for users. To
429# resolve, We ship a copy of that module, and expose it's functions
430# wrapped in deprecation warnings.
431def _add_compat(to_module):
432 import warnings, re
433 from magic import compat
435 def deprecation_wrapper(fn):
436 def _(*args, **kwargs):
437 warnings.warn(
438 "Using compatibility mode with libmagic's python binding. "
439 "See https://github.com/ahupp/python-magic/blob/master/COMPAT.md for details.",
440 PendingDeprecationWarning)
442 return fn(*args, **kwargs)
444 return _
446 fn = ['detect_from_filename',
447 'detect_from_content',
448 'detect_from_fobj',
449 'open']
450 for fname in fn:
451 to_module[fname] = deprecation_wrapper(compat.__dict__[fname])
453 # copy constants over, ensuring there's no conflicts
454 is_const_re = re.compile("^[A-Z_]+$")
455 allowed_inconsistent = set(['MAGIC_MIME'])
456 for name, value in compat.__dict__.items():
457 if is_const_re.match(name):
458 if name in to_module:
459 if name in allowed_inconsistent:
460 continue
461 if to_module[name] != value:
462 raise Exception("inconsistent value for " + name)
463 else:
464 continue
465 else:
466 to_module[name] = value
469_add_compat(globals())