Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/magic/__init__.py: 59%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

255 statements  

1""" 

2magic is a wrapper around the libmagic file identification library. 

3 

4See README for more information. 

5 

6Usage: 

7 

8>>> import magic 

9>>> magic.from_file("testdata/test.pdf") 

10'PDF document, version 1.2' 

11>>> magic.from_file("testdata/test.pdf", mime=True) 

12'application/pdf' 

13>>> magic.from_buffer(open("testdata/test.pdf").read(1024)) 

14'PDF document, version 1.2' 

15>>> 

16 

17""" 

18 

19import sys 

20import glob 

21import ctypes 

22import ctypes.util 

23import threading 

24import logging 

25 

26from ctypes import c_char_p, c_int, c_size_t, c_void_p, byref, POINTER 

27 

28# avoid shadowing the real open with the version from compat.py 

29_real_open = open 

30 

31 

32class MagicException(Exception): 

33 def __init__(self, message): 

34 super(Exception, self).__init__(message) 

35 self.message = message 

36 

37 

38class Magic: 

39 """ 

40 Magic is a wrapper around the libmagic C library. 

41 """ 

42 

43 def __init__(self, mime=False, magic_file=None, mime_encoding=False, 

44 keep_going=False, uncompress=False, raw=False, extension=False): 

45 """ 

46 Create a new libmagic wrapper. 

47 

48 mime - if True, mimetypes are returned instead of textual descriptions 

49 mime_encoding - if True, codec is returned 

50 magic_file - use a mime database other than the system default 

51 keep_going - don't stop at the first match, keep going 

52 uncompress - Try to look inside compressed files. 

53 raw - Do not try to decode "non-printable" chars. 

54 extension - Print a slash-separated list of valid extensions for the file type found. 

55 """ 

56 self.flags = MAGIC_NONE 

57 if mime: 

58 self.flags |= MAGIC_MIME_TYPE 

59 if mime_encoding: 

60 self.flags |= MAGIC_MIME_ENCODING 

61 if keep_going: 

62 self.flags |= MAGIC_CONTINUE 

63 if uncompress: 

64 self.flags |= MAGIC_COMPRESS 

65 if raw: 

66 self.flags |= MAGIC_RAW 

67 if extension: 

68 self.flags |= MAGIC_EXTENSION 

69 

70 self.cookie = magic_open(self.flags) 

71 self.lock = threading.Lock() 

72 

73 magic_load(self.cookie, magic_file) 

74 

75 # MAGIC_EXTENSION was added in 523 or 524, so bail if 

76 # it doesn't appear to be available 

77 if extension and (not _has_version or version() < 524): 

78 raise NotImplementedError('MAGIC_EXTENSION is not supported in this version of libmagic') 

79 

80 # For https://github.com/ahupp/python-magic/issues/190 

81 # libmagic has fixed internal limits that some files exceed, causing 

82 # an error. We can avoid this (at least for the sample file given) 

83 # by bumping the limit up. It's not clear if this is a general solution 

84 # or whether other internal limits should be increased, but given 

85 # the lack of other reports I'll assume this is rare. 

86 if _has_param: 

87 try: 

88 self.setparam(MAGIC_PARAM_NAME_MAX, 64) 

89 except MagicException as e: 

90 # some versions of libmagic fail this call, 

91 # so rather than fail hard just use default behavior 

92 pass 

93 

94 def from_buffer(self, buf): 

95 """ 

96 Identify the contents of `buf` 

97 """ 

98 with self.lock: 

99 try: 

100 # if we're on python3, convert buf to bytes 

101 # otherwise this string is passed as wchar* 

102 # which is not what libmagic expects 

103 # NEXTBREAK: only take bytes 

104 if type(buf) == str and str != bytes: 

105 buf = buf.encode('utf-8', errors='replace') 

106 return maybe_decode(magic_buffer(self.cookie, buf)) 

107 except MagicException as e: 

108 return self._handle509Bug(e) 

109 

110 def from_file(self, filename): 

111 # raise FileNotFoundException or IOError if the file does not exist 

112 with _real_open(filename): 

113 pass 

114 

115 with self.lock: 

116 try: 

117 return maybe_decode(magic_file(self.cookie, filename)) 

118 except MagicException as e: 

119 return self._handle509Bug(e) 

120 

121 def from_descriptor(self, fd): 

122 with self.lock: 

123 try: 

124 return maybe_decode(magic_descriptor(self.cookie, fd)) 

125 except MagicException as e: 

126 return self._handle509Bug(e) 

127 

128 def _handle509Bug(self, e): 

129 # libmagic 5.09 has a bug where it might fail to identify the 

130 # mimetype of a file and returns null from magic_file (and 

131 # likely _buffer), but also does not return an error message. 

132 if e.message is None and (self.flags & MAGIC_MIME_TYPE): 

133 return "application/octet-stream" 

134 else: 

135 raise e 

136 

137 def setparam(self, param, val): 

138 return magic_setparam(self.cookie, param, val) 

139 

140 def getparam(self, param): 

141 return magic_getparam(self.cookie, param) 

142 

143 def __del__(self): 

144 # no _thread_check here because there can be no other 

145 # references to this object at this point. 

146 

147 # during shutdown magic_close may have been cleared already so 

148 # make sure it exists before using it. 

149 

150 # the self.cookie check should be unnecessary and was an 

151 # incorrect fix for a threading problem, however I'm leaving 

152 # it in because it's harmless and I'm slightly afraid to 

153 # remove it. 

154 if hasattr(self, 'cookie') and self.cookie and magic_close: 

155 magic_close(self.cookie) 

156 self.cookie = None 

157 

158 

159_instances = {} 

160 

161 

162def _get_magic_type(mime): 

163 i = _instances.get(mime) 

164 if i is None: 

165 i = _instances[mime] = Magic(mime=mime) 

166 return i 

167 

168 

169def from_file(filename, mime=False): 

170 """" 

171 Accepts a filename and returns the detected filetype. Return 

172 value is the mimetype if mime=True, otherwise a human readable 

173 name. 

174 

175 >>> magic.from_file("testdata/test.pdf", mime=True) 

176 'application/pdf' 

177 """ 

178 m = _get_magic_type(mime) 

179 return m.from_file(filename) 

180 

181 

182def from_buffer(buffer, mime=False): 

183 """ 

184 Accepts a binary string and returns the detected filetype. Return 

185 value is the mimetype if mime=True, otherwise a human readable 

186 name. 

187 

188 >>> magic.from_buffer(open("testdata/test.pdf").read(1024)) 

189 'PDF document, version 1.2' 

190 """ 

191 m = _get_magic_type(mime) 

192 return m.from_buffer(buffer) 

193 

194 

195def from_descriptor(fd, mime=False): 

196 """ 

197 Accepts a file descriptor and returns the detected filetype. Return 

198 value is the mimetype if mime=True, otherwise a human readable 

199 name. 

200 

201 >>> f = open("testdata/test.pdf") 

202 >>> magic.from_descriptor(f.fileno()) 

203 'PDF document, version 1.2' 

204 """ 

205 m = _get_magic_type(mime) 

206 return m.from_descriptor(fd) 

207 

208from . import loader 

209libmagic = loader.load_lib() 

210 

211magic_t = ctypes.c_void_p 

212 

213 

214def errorcheck_null(result, func, args): 

215 if result is None: 

216 err = magic_error(args[0]) 

217 raise MagicException(err) 

218 else: 

219 return result 

220 

221 

222def errorcheck_negative_one(result, func, args): 

223 if result == -1: 

224 err = magic_error(args[0]) 

225 raise MagicException(err) 

226 else: 

227 return result 

228 

229 

230# return str on python3. Don't want to unconditionally 

231# decode because that results in unicode on python2 

232def maybe_decode(s): 

233 # NEXTBREAK: remove 

234 if str == bytes: 

235 return s 

236 else: 

237 # backslashreplace here because sometimes libmagic will return metadata in the charset 

238 # of the file, which is unknown to us (e.g the title of a Word doc) 

239 return s.decode('utf-8', 'backslashreplace') 

240 

241 

242try: 

243 from os import PathLike 

244 def unpath(filename): 

245 if isinstance(filename, PathLike): 

246 return filename.__fspath__() 

247 else: 

248 return filename 

249except ImportError: 

250 def unpath(filename): 

251 return filename 

252 

253def coerce_filename(filename): 

254 if filename is None: 

255 return None 

256 

257 filename = unpath(filename) 

258 

259 # ctypes will implicitly convert unicode strings to bytes with 

260 # .encode('ascii'). If you use the filesystem encoding 

261 # then you'll get inconsistent behavior (crashes) depending on the user's 

262 # LANG environment variable 

263 # NEXTBREAK: remove 

264 is_unicode = (sys.version_info[0] <= 2 and 

265 isinstance(filename, unicode)) or \ 

266 (sys.version_info[0] >= 3 and 

267 isinstance(filename, str)) 

268 if is_unicode: 

269 return filename.encode('utf-8', 'surrogateescape') 

270 else: 

271 return filename 

272 

273 

274magic_open = libmagic.magic_open 

275magic_open.restype = magic_t 

276magic_open.argtypes = [c_int] 

277 

278magic_close = libmagic.magic_close 

279magic_close.restype = None 

280magic_close.argtypes = [magic_t] 

281 

282magic_error = libmagic.magic_error 

283magic_error.restype = c_char_p 

284magic_error.argtypes = [magic_t] 

285 

286magic_errno = libmagic.magic_errno 

287magic_errno.restype = c_int 

288magic_errno.argtypes = [magic_t] 

289 

290_magic_file = libmagic.magic_file 

291_magic_file.restype = c_char_p 

292_magic_file.argtypes = [magic_t, c_char_p] 

293_magic_file.errcheck = errorcheck_null 

294 

295 

296def magic_file(cookie, filename): 

297 return _magic_file(cookie, coerce_filename(filename)) 

298 

299 

300_magic_buffer = libmagic.magic_buffer 

301_magic_buffer.restype = c_char_p 

302_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] 

303_magic_buffer.errcheck = errorcheck_null 

304 

305 

306def magic_buffer(cookie, buf): 

307 return _magic_buffer(cookie, buf, len(buf)) 

308 

309 

310magic_descriptor = libmagic.magic_descriptor 

311magic_descriptor.restype = c_char_p 

312magic_descriptor.argtypes = [magic_t, c_int] 

313magic_descriptor.errcheck = errorcheck_null 

314 

315_magic_descriptor = libmagic.magic_descriptor 

316_magic_descriptor.restype = c_char_p 

317_magic_descriptor.argtypes = [magic_t, c_int] 

318_magic_descriptor.errcheck = errorcheck_null 

319 

320 

321def magic_descriptor(cookie, fd): 

322 return _magic_descriptor(cookie, fd) 

323 

324 

325_magic_load = libmagic.magic_load 

326_magic_load.restype = c_int 

327_magic_load.argtypes = [magic_t, c_char_p] 

328_magic_load.errcheck = errorcheck_negative_one 

329 

330 

331def magic_load(cookie, filename): 

332 return _magic_load(cookie, coerce_filename(filename)) 

333 

334 

335magic_setflags = libmagic.magic_setflags 

336magic_setflags.restype = c_int 

337magic_setflags.argtypes = [magic_t, c_int] 

338 

339magic_check = libmagic.magic_check 

340magic_check.restype = c_int 

341magic_check.argtypes = [magic_t, c_char_p] 

342 

343magic_compile = libmagic.magic_compile 

344magic_compile.restype = c_int 

345magic_compile.argtypes = [magic_t, c_char_p] 

346 

347_has_param = False 

348if hasattr(libmagic, 'magic_setparam') and hasattr(libmagic, 'magic_getparam'): 

349 _has_param = True 

350 _magic_setparam = libmagic.magic_setparam 

351 _magic_setparam.restype = c_int 

352 _magic_setparam.argtypes = [magic_t, c_int, POINTER(c_size_t)] 

353 _magic_setparam.errcheck = errorcheck_negative_one 

354 

355 _magic_getparam = libmagic.magic_getparam 

356 _magic_getparam.restype = c_int 

357 _magic_getparam.argtypes = [magic_t, c_int, POINTER(c_size_t)] 

358 _magic_getparam.errcheck = errorcheck_negative_one 

359 

360 

361def magic_setparam(cookie, param, val): 

362 if not _has_param: 

363 raise NotImplementedError("magic_setparam not implemented") 

364 v = c_size_t(val) 

365 return _magic_setparam(cookie, param, byref(v)) 

366 

367 

368def magic_getparam(cookie, param): 

369 if not _has_param: 

370 raise NotImplementedError("magic_getparam not implemented") 

371 val = c_size_t() 

372 _magic_getparam(cookie, param, byref(val)) 

373 return val.value 

374 

375 

376_has_version = False 

377if hasattr(libmagic, "magic_version"): 

378 _has_version = True 

379 magic_version = libmagic.magic_version 

380 magic_version.restype = c_int 

381 magic_version.argtypes = [] 

382 

383 

384def version(): 

385 if not _has_version: 

386 raise NotImplementedError("magic_version not implemented") 

387 return magic_version() 

388 

389 

390MAGIC_NONE = 0x000000 # No flags 

391MAGIC_DEBUG = 0x000001 # Turn on debugging 

392MAGIC_SYMLINK = 0x000002 # Follow symlinks 

393MAGIC_COMPRESS = 0x000004 # Check inside compressed files 

394MAGIC_DEVICES = 0x000008 # Look at the contents of devices 

395MAGIC_MIME_TYPE = 0x000010 # Return a mime string 

396MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding 

397# TODO: should be 

398# MAGIC_MIME = MAGIC_MIME_TYPE | MAGIC_MIME_ENCODING 

399MAGIC_MIME = 0x000010 # Return a mime string 

400MAGIC_EXTENSION = 0x1000000 # Return a /-separated list of extensions 

401 

402MAGIC_CONTINUE = 0x000020 # Return all matches 

403MAGIC_CHECK = 0x000040 # Print warnings to stderr 

404MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit 

405MAGIC_RAW = 0x000100 # Don't translate unprintable chars 

406MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors 

407 

408MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files 

409MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files 

410MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries 

411MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type 

412MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details 

413MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files 

414MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff 

415MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran 

416MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens 

417 

418MAGIC_PARAM_INDIR_MAX = 0 # Recursion limit for indirect magic 

419MAGIC_PARAM_NAME_MAX = 1 # Use count limit for name/use magic 

420MAGIC_PARAM_ELF_PHNUM_MAX = 2 # Max ELF notes processed 

421MAGIC_PARAM_ELF_SHNUM_MAX = 3 # Max ELF program sections processed 

422MAGIC_PARAM_ELF_NOTES_MAX = 4 # # Max ELF sections processed 

423MAGIC_PARAM_REGEX_MAX = 5 # Length limit for regex searches 

424MAGIC_PARAM_BYTES_MAX = 6 # Max number of bytes to read from file 

425 

426 

427# This package name conflicts with the one provided by upstream 

428# libmagic. This is a common source of confusion for users. To 

429# resolve, We ship a copy of that module, and expose it's functions 

430# wrapped in deprecation warnings. 

431def _add_compat(to_module): 

432 import warnings, re 

433 from magic import compat 

434 

435 def deprecation_wrapper(fn): 

436 def _(*args, **kwargs): 

437 warnings.warn( 

438 "Using compatibility mode with libmagic's python binding. " 

439 "See https://github.com/ahupp/python-magic/blob/master/COMPAT.md for details.", 

440 PendingDeprecationWarning) 

441 

442 return fn(*args, **kwargs) 

443 

444 return _ 

445 

446 fn = ['detect_from_filename', 

447 'detect_from_content', 

448 'detect_from_fobj', 

449 'open'] 

450 for fname in fn: 

451 to_module[fname] = deprecation_wrapper(compat.__dict__[fname]) 

452 

453 # copy constants over, ensuring there's no conflicts 

454 is_const_re = re.compile("^[A-Z_]+$") 

455 allowed_inconsistent = set(['MAGIC_MIME']) 

456 for name, value in compat.__dict__.items(): 

457 if is_const_re.match(name): 

458 if name in to_module: 

459 if name in allowed_inconsistent: 

460 continue 

461 if to_module[name] != value: 

462 raise Exception("inconsistent value for " + name) 

463 else: 

464 continue 

465 else: 

466 to_module[name] = value 

467 

468 

469_add_compat(globals())