Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xdg/Mime.py: 44%
458 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:37 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:37 +0000
1"""
2This module is based on a rox module (LGPL):
4http://cvs.sourceforge.net/viewcvs.py/rox/ROX-Lib2/python/rox/mime.py?rev=1.21&view=log
6This module provides access to the shared MIME database.
8types is a dictionary of all known MIME types, indexed by the type name, e.g.
9types['application/x-python']
11Applications can install information about MIME types by storing an
12XML file as <MIME>/packages/<application>.xml and running the
13update-mime-database command, which is provided by the freedesktop.org
14shared mime database package.
16See http://www.freedesktop.org/standards/shared-mime-info-spec/ for
17information about the format of these files.
19(based on version 0.13)
20"""
22import os
23import re
24import stat
25import sys
26import fnmatch
28from xdg import BaseDirectory
29import xdg.Locale
31from xml.dom import minidom, XML_NAMESPACE
32from collections import defaultdict
34FREE_NS = 'http://www.freedesktop.org/standards/shared-mime-info'
36types = {} # Maps MIME names to type objects
38exts = None # Maps extensions to types
39globs = None # List of (glob, type) pairs
40literals = None # Maps liternal names to types
41magic = None
43PY3 = (sys.version_info[0] >= 3)
45def _get_node_data(node):
46 """Get text of XML node"""
47 return ''.join([n.nodeValue for n in node.childNodes]).strip()
49def lookup(media, subtype = None):
50 """Get the MIMEtype object for the given type.
52 This remains for backwards compatibility; calling MIMEtype now does
53 the same thing.
55 The name can either be passed as one part ('text/plain'), or as two
56 ('text', 'plain').
57 """
58 return MIMEtype(media, subtype)
60class MIMEtype(object):
61 """Class holding data about a MIME type.
63 Calling the class will return a cached instance, so there is only one
64 instance for each MIME type. The name can either be passed as one part
65 ('text/plain'), or as two ('text', 'plain').
66 """
67 def __new__(cls, media, subtype=None):
68 if subtype is None and '/' in media:
69 media, subtype = media.split('/', 1)
70 assert '/' not in subtype
71 media = media.lower()
72 subtype = subtype.lower()
74 try:
75 return types[(media, subtype)]
76 except KeyError:
77 mtype = super(MIMEtype, cls).__new__(cls)
78 mtype._init(media, subtype)
79 types[(media, subtype)] = mtype
80 return mtype
82 # If this is done in __init__, it is automatically called again each time
83 # the MIMEtype is returned by __new__, which we don't want. So we call it
84 # explicitly only when we construct a new instance.
85 def _init(self, media, subtype):
86 self.media = media
87 self.subtype = subtype
88 self._comment = None
90 def _load(self):
91 "Loads comment for current language. Use get_comment() instead."
92 resource = os.path.join('mime', self.media, self.subtype + '.xml')
93 for path in BaseDirectory.load_data_paths(resource):
94 doc = minidom.parse(path)
95 if doc is None:
96 continue
97 for comment in doc.documentElement.getElementsByTagNameNS(FREE_NS, 'comment'):
98 lang = comment.getAttributeNS(XML_NAMESPACE, 'lang') or 'en'
99 goodness = 1 + (lang in xdg.Locale.langs)
100 if goodness > self._comment[0]:
101 self._comment = (goodness, _get_node_data(comment))
102 if goodness == 2: return
104 # FIXME: add get_icon method
105 def get_comment(self):
106 """Returns comment for current language, loading it if needed."""
107 # Should we ever reload?
108 if self._comment is None:
109 self._comment = (0, str(self))
110 self._load()
111 return self._comment[1]
113 def canonical(self):
114 """Returns the canonical MimeType object if this is an alias."""
115 update_cache()
116 s = str(self)
117 if s in aliases:
118 return lookup(aliases[s])
119 return self
121 def inherits_from(self):
122 """Returns a set of Mime types which this inherits from."""
123 update_cache()
124 return set(lookup(t) for t in inheritance[str(self)])
126 def __str__(self):
127 return self.media + '/' + self.subtype
129 def __repr__(self):
130 return 'MIMEtype(%r, %r)' % (self.media, self.subtype)
132 def __hash__(self):
133 return hash(self.media) ^ hash(self.subtype)
135class UnknownMagicRuleFormat(ValueError):
136 pass
138class DiscardMagicRules(Exception):
139 "Raised when __NOMAGIC__ is found, and caught to discard previous rules."
140 pass
142class MagicRule:
143 also = None
145 def __init__(self, start, value, mask, word, range):
146 self.start = start
147 self.value = value
148 self.mask = mask
149 self.word = word
150 self.range = range
152 rule_ending_re = re.compile(br'(?:~(\d+))?(?:\+(\d+))?\n$')
154 @classmethod
155 def from_file(cls, f):
156 """Read a rule from the binary magics file. Returns a 2-tuple of
157 the nesting depth and the MagicRule."""
158 line = f.readline()
159 #print line
161 # [indent] '>'
162 nest_depth, line = line.split(b'>', 1)
163 nest_depth = int(nest_depth) if nest_depth else 0
165 # start-offset '='
166 start, line = line.split(b'=', 1)
167 start = int(start)
169 if line == b'__NOMAGIC__\n':
170 raise DiscardMagicRules
172 # value length (2 bytes, big endian)
173 if sys.version_info[0] >= 3:
174 lenvalue = int.from_bytes(line[:2], byteorder='big')
175 else:
176 lenvalue = (ord(line[0])<<8)+ord(line[1])
177 line = line[2:]
179 # value
180 # This can contain newlines, so we may need to read more lines
181 while len(line) <= lenvalue:
182 line += f.readline()
183 value, line = line[:lenvalue], line[lenvalue:]
185 # ['&' mask]
186 if line.startswith(b'&'):
187 # This can contain newlines, so we may need to read more lines
188 while len(line) <= lenvalue:
189 line += f.readline()
190 mask, line = line[1:lenvalue+1], line[lenvalue+1:]
191 else:
192 mask = None
194 # ['~' word-size] ['+' range-length]
195 ending = cls.rule_ending_re.match(line)
196 if not ending:
197 # Per the spec, this will be caught and ignored, to allow
198 # for future extensions.
199 raise UnknownMagicRuleFormat(repr(line))
201 word, range = ending.groups()
202 word = int(word) if (word is not None) else 1
203 range = int(range) if (range is not None) else 1
205 return nest_depth, cls(start, value, mask, word, range)
207 def maxlen(self):
208 l = self.start + len(self.value) + self.range
209 if self.also:
210 return max(l, self.also.maxlen())
211 return l
213 def match(self, buffer):
214 if self.match0(buffer):
215 if self.also:
216 return self.also.match(buffer)
217 return True
219 def match0(self, buffer):
220 l=len(buffer)
221 lenvalue = len(self.value)
222 for o in range(self.range):
223 s=self.start+o
224 e=s+lenvalue
225 if l<e:
226 return False
227 if self.mask:
228 test=''
229 for i in range(lenvalue):
230 if PY3:
231 c = buffer[s+i] & self.mask[i]
232 else:
233 c = ord(buffer[s+i]) & ord(self.mask[i])
234 test += chr(c)
235 else:
236 test = buffer[s:e]
238 if test==self.value:
239 return True
241 def __repr__(self):
242 return 'MagicRule(start=%r, value=%r, mask=%r, word=%r, range=%r)' %(
243 self.start,
244 self.value,
245 self.mask,
246 self.word,
247 self.range)
250class MagicMatchAny(object):
251 """Match any of a set of magic rules.
253 This has a similar interface to MagicRule objects (i.e. its match() and
254 maxlen() methods), to allow for duck typing.
255 """
256 def __init__(self, rules):
257 self.rules = rules
259 def match(self, buffer):
260 return any(r.match(buffer) for r in self.rules)
262 def maxlen(self):
263 return max(r.maxlen() for r in self.rules)
265 @classmethod
266 def from_file(cls, f):
267 """Read a set of rules from the binary magic file."""
268 c=f.read(1)
269 f.seek(-1, 1)
270 depths_rules = []
271 while c and c != b'[':
272 try:
273 depths_rules.append(MagicRule.from_file(f))
274 except UnknownMagicRuleFormat:
275 # Ignored to allow for extensions to the rule format.
276 pass
277 c=f.read(1)
278 if c:
279 f.seek(-1, 1)
281 # Build the rule tree
282 tree = [] # (rule, [(subrule,[subsubrule,...]), ...])
283 insert_points = {0:tree}
284 for depth, rule in depths_rules:
285 subrules = []
286 insert_points[depth].append((rule, subrules))
287 insert_points[depth+1] = subrules
289 return cls.from_rule_tree(tree)
291 @classmethod
292 def from_rule_tree(cls, tree):
293 """From a nested list of (rule, subrules) pairs, build a MagicMatchAny
294 instance, recursing down the tree.
296 Where there's only one top-level rule, this is returned directly,
297 to simplify the nested structure. Returns None if no rules were read.
298 """
299 rules = []
300 for rule, subrules in tree:
301 if subrules:
302 rule.also = cls.from_rule_tree(subrules)
303 rules.append(rule)
305 if len(rules)==0:
306 return None
307 if len(rules)==1:
308 return rules[0]
309 return cls(rules)
311class MagicDB:
312 def __init__(self):
313 self.bytype = defaultdict(list) # mimetype -> [(priority, rule), ...]
315 def merge_file(self, fname):
316 """Read a magic binary file, and add its rules to this MagicDB."""
317 with open(fname, 'rb') as f:
318 line = f.readline()
319 if line != b'MIME-Magic\0\n':
320 raise IOError('Not a MIME magic file')
322 while True:
323 shead = f.readline().decode('ascii')
324 #print(shead)
325 if not shead:
326 break
327 if shead[0] != '[' or shead[-2:] != ']\n':
328 raise ValueError('Malformed section heading', shead)
329 pri, tname = shead[1:-2].split(':')
330 #print shead[1:-2]
331 pri = int(pri)
332 mtype = lookup(tname)
333 try:
334 rule = MagicMatchAny.from_file(f)
335 except DiscardMagicRules:
336 self.bytype.pop(mtype, None)
337 rule = MagicMatchAny.from_file(f)
338 if rule is None:
339 continue
340 #print rule
342 self.bytype[mtype].append((pri, rule))
344 def finalise(self):
345 """Prepare the MagicDB for matching.
347 This should be called after all rules have been merged into it.
348 """
349 maxlen = 0
350 self.alltypes = [] # (priority, mimetype, rule)
352 for mtype, rules in self.bytype.items():
353 for pri, rule in rules:
354 self.alltypes.append((pri, mtype, rule))
355 maxlen = max(maxlen, rule.maxlen())
357 self.maxlen = maxlen # Number of bytes to read from files
358 self.alltypes.sort(key=lambda x: x[0], reverse=True)
360 def match_data(self, data, max_pri=100, min_pri=0, possible=None):
361 """Do magic sniffing on some bytes.
363 max_pri & min_pri can be used to specify the maximum & minimum priority
364 rules to look for. possible can be a list of mimetypes to check, or None
365 (the default) to check all mimetypes until one matches.
367 Returns the MIMEtype found, or None if no entries match.
368 """
369 if possible is not None:
370 types = []
371 for mt in possible:
372 for pri, rule in self.bytype[mt]:
373 types.append((pri, mt, rule))
374 types.sort(key=lambda x: x[0])
375 else:
376 types = self.alltypes
378 for priority, mimetype, rule in types:
379 #print priority, max_pri, min_pri
380 if priority > max_pri:
381 continue
382 if priority < min_pri:
383 break
385 if rule.match(data):
386 return mimetype
388 def match(self, path, max_pri=100, min_pri=0, possible=None):
389 """Read data from the file and do magic sniffing on it.
391 max_pri & min_pri can be used to specify the maximum & minimum priority
392 rules to look for. possible can be a list of mimetypes to check, or None
393 (the default) to check all mimetypes until one matches.
395 Returns the MIMEtype found, or None if no entries match. Raises IOError
396 if the file can't be opened.
397 """
398 with open(path, 'rb') as f:
399 buf = f.read(self.maxlen)
400 return self.match_data(buf, max_pri, min_pri, possible)
402 def __repr__(self):
403 return '<MagicDB (%d types)>' % len(self.alltypes)
405class GlobDB(object):
406 def __init__(self):
407 """Prepare the GlobDB. It can't actually be used until .finalise() is
408 called, but merge_file() can be used to add data before that.
409 """
410 # Maps mimetype to {(weight, glob, flags), ...}
411 self.allglobs = defaultdict(set)
413 def merge_file(self, path):
414 """Loads name matching information from a globs2 file."""#
415 allglobs = self.allglobs
416 with open(path) as f:
417 for line in f:
418 if line.startswith('#'): continue # Comment
420 fields = line[:-1].split(':')
421 weight, type_name, pattern = fields[:3]
422 weight = int(weight)
423 mtype = lookup(type_name)
424 if len(fields) > 3:
425 flags = fields[3].split(',')
426 else:
427 flags = ()
429 if pattern == '__NOGLOBS__':
430 # This signals to discard any previous globs
431 allglobs.pop(mtype, None)
432 continue
434 allglobs[mtype].add((weight, pattern, tuple(flags)))
436 def finalise(self):
437 """Prepare the GlobDB for matching.
439 This should be called after all files have been merged into it.
440 """
441 self.exts = defaultdict(list) # Maps extensions to [(type, weight),...]
442 self.cased_exts = defaultdict(list)
443 self.globs = [] # List of (regex, type, weight) triplets
444 self.literals = {} # Maps literal names to (type, weight)
445 self.cased_literals = {}
447 for mtype, globs in self.allglobs.items():
448 mtype = mtype.canonical()
449 for weight, pattern, flags in globs:
451 cased = 'cs' in flags
453 if pattern.startswith('*.'):
454 # *.foo -- extension pattern
455 rest = pattern[2:]
456 if not ('*' in rest or '[' in rest or '?' in rest):
457 if cased:
458 self.cased_exts[rest].append((mtype, weight))
459 else:
460 self.exts[rest.lower()].append((mtype, weight))
461 continue
463 if ('*' in pattern or '[' in pattern or '?' in pattern):
464 # Translate the glob pattern to a regex & compile it
465 re_flags = 0 if cased else re.I
466 pattern = re.compile(fnmatch.translate(pattern), flags=re_flags)
467 self.globs.append((pattern, mtype, weight))
468 else:
469 # No wildcards - literal pattern
470 if cased:
471 self.cased_literals[pattern] = (mtype, weight)
472 else:
473 self.literals[pattern.lower()] = (mtype, weight)
475 # Sort globs by weight & length
476 self.globs.sort(reverse=True, key=lambda x: (x[2], len(x[0].pattern)) )
478 def first_match(self, path):
479 """Return the first match found for a given path, or None if no match
480 is found."""
481 try:
482 return next(self._match_path(path))[0]
483 except StopIteration:
484 return None
486 def all_matches(self, path):
487 """Return a list of (MIMEtype, glob weight) pairs for the path."""
488 return list(self._match_path(path))
490 def _match_path(self, path):
491 """Yields pairs of (mimetype, glob weight)."""
492 leaf = os.path.basename(path)
494 # Literals (no wildcards)
495 if leaf in self.cased_literals:
496 yield self.cased_literals[leaf]
498 lleaf = leaf.lower()
499 if lleaf in self.literals:
500 yield self.literals[lleaf]
502 # Extensions
503 ext = leaf
504 while 1:
505 p = ext.find('.')
506 if p < 0: break
507 ext = ext[p + 1:]
508 if ext in self.cased_exts:
509 for res in self.cased_exts[ext]:
510 yield res
511 ext = lleaf
512 while 1:
513 p = ext.find('.')
514 if p < 0: break
515 ext = ext[p+1:]
516 if ext in self.exts:
517 for res in self.exts[ext]:
518 yield res
520 # Other globs
521 for (regex, mime_type, weight) in self.globs:
522 if regex.match(leaf):
523 yield (mime_type, weight)
525# Some well-known types
526text = lookup('text', 'plain')
527octet_stream = lookup('application', 'octet-stream')
528inode_block = lookup('inode', 'blockdevice')
529inode_char = lookup('inode', 'chardevice')
530inode_dir = lookup('inode', 'directory')
531inode_fifo = lookup('inode', 'fifo')
532inode_socket = lookup('inode', 'socket')
533inode_symlink = lookup('inode', 'symlink')
534inode_door = lookup('inode', 'door')
535app_exe = lookup('application', 'executable')
537_cache_uptodate = False
539def _cache_database():
540 global globs, magic, aliases, inheritance, _cache_uptodate
542 _cache_uptodate = True
544 aliases = {} # Maps alias Mime types to canonical names
545 inheritance = defaultdict(set) # Maps to sets of parent mime types.
547 # Load aliases
548 for path in BaseDirectory.load_data_paths(os.path.join('mime', 'aliases')):
549 with open(path, 'r') as f:
550 for line in f:
551 alias, canonical = line.strip().split(None, 1)
552 aliases[alias] = canonical
554 # Load filename patterns (globs)
555 globs = GlobDB()
556 for path in BaseDirectory.load_data_paths(os.path.join('mime', 'globs2')):
557 globs.merge_file(path)
558 globs.finalise()
560 # Load magic sniffing data
561 magic = MagicDB()
562 for path in BaseDirectory.load_data_paths(os.path.join('mime', 'magic')):
563 magic.merge_file(path)
564 magic.finalise()
566 # Load subclasses
567 for path in BaseDirectory.load_data_paths(os.path.join('mime', 'subclasses')):
568 with open(path, 'r') as f:
569 for line in f:
570 sub, parent = line.strip().split(None, 1)
571 inheritance[sub].add(parent)
573def update_cache():
574 if not _cache_uptodate:
575 _cache_database()
577def get_type_by_name(path):
578 """Returns type of file by its name, or None if not known"""
579 update_cache()
580 return globs.first_match(path)
582def get_type_by_contents(path, max_pri=100, min_pri=0):
583 """Returns type of file by its contents, or None if not known"""
584 update_cache()
586 return magic.match(path, max_pri, min_pri)
588def get_type_by_data(data, max_pri=100, min_pri=0):
589 """Returns type of the data, which should be bytes."""
590 update_cache()
592 return magic.match_data(data, max_pri, min_pri)
594def _get_type_by_stat(st_mode):
595 """Match special filesystem objects to Mimetypes."""
596 if stat.S_ISDIR(st_mode): return inode_dir
597 elif stat.S_ISCHR(st_mode): return inode_char
598 elif stat.S_ISBLK(st_mode): return inode_block
599 elif stat.S_ISFIFO(st_mode): return inode_fifo
600 elif stat.S_ISLNK(st_mode): return inode_symlink
601 elif stat.S_ISSOCK(st_mode): return inode_socket
602 return inode_door
604def get_type(path, follow=True, name_pri=100):
605 """Returns type of file indicated by path.
607 This function is *deprecated* - :func:`get_type2` is more accurate.
609 :param path: pathname to check (need not exist)
610 :param follow: when reading file, follow symbolic links
611 :param name_pri: Priority to do name matches. 100=override magic
613 This tries to use the contents of the file, and falls back to the name. It
614 can also handle special filesystem objects like directories and sockets.
615 """
616 update_cache()
618 try:
619 if follow:
620 st = os.stat(path)
621 else:
622 st = os.lstat(path)
623 except:
624 t = get_type_by_name(path)
625 return t or text
627 if stat.S_ISREG(st.st_mode):
628 # Regular file
629 t = get_type_by_contents(path, min_pri=name_pri)
630 if not t: t = get_type_by_name(path)
631 if not t: t = get_type_by_contents(path, max_pri=name_pri)
632 if t is None:
633 if stat.S_IMODE(st.st_mode) & 0o111:
634 return app_exe
635 else:
636 return text
637 return t
638 else:
639 return _get_type_by_stat(st.st_mode)
641def get_type2(path, follow=True):
642 """Find the MIMEtype of a file using the XDG recommended checking order.
644 This first checks the filename, then uses file contents if the name doesn't
645 give an unambiguous MIMEtype. It can also handle special filesystem objects
646 like directories and sockets.
648 :param path: file path to examine (need not exist)
649 :param follow: whether to follow symlinks
651 :rtype: :class:`MIMEtype`
653 .. versionadded:: 1.0
654 """
655 update_cache()
657 try:
658 st = os.stat(path) if follow else os.lstat(path)
659 except OSError:
660 return get_type_by_name(path) or octet_stream
662 if not stat.S_ISREG(st.st_mode):
663 # Special filesystem objects
664 return _get_type_by_stat(st.st_mode)
666 mtypes = sorted(globs.all_matches(path), key=(lambda x: x[1]), reverse=True)
667 if mtypes:
668 max_weight = mtypes[0][1]
669 i = 1
670 for mt, w in mtypes[1:]:
671 if w < max_weight:
672 break
673 i += 1
674 mtypes = mtypes[:i]
675 if len(mtypes) == 1:
676 return mtypes[0][0]
678 possible = [mt for mt,w in mtypes]
679 else:
680 possible = None # Try all magic matches
682 try:
683 t = magic.match(path, possible=possible)
684 except IOError:
685 t = None
687 if t:
688 return t
689 elif mtypes:
690 return mtypes[0][0]
691 elif stat.S_IMODE(st.st_mode) & 0o111:
692 return app_exe
693 else:
694 return text if is_text_file(path) else octet_stream
696def is_text_file(path):
697 """Guess whether a file contains text or binary data.
699 Heuristic: binary if the first 32 bytes include ASCII control characters.
700 This rule may change in future versions.
702 .. versionadded:: 1.0
703 """
704 try:
705 f = open(path, 'rb')
706 except IOError:
707 return False
709 with f:
710 return _is_text(f.read(32))
712if PY3:
713 def _is_text(data):
714 return not any(b <= 0x8 or 0xe <= b < 0x20 or b == 0x7f for b in data)
715else:
716 def _is_text(data):
717 return not any(b <= '\x08' or '\x0e' <= b < '\x20' or b == '\x7f' \
718 for b in data)
720_mime2ext_cache = None
721_mime2ext_cache_uptodate = False
723def get_extensions(mimetype):
724 """Retrieve the set of filename extensions matching a given MIMEtype.
726 Extensions are returned without a leading dot, e.g. 'py'. If no extensions
727 are registered for the MIMEtype, returns an empty set.
729 The extensions are stored in a cache the first time this is called.
731 .. versionadded:: 1.0
732 """
733 global _mime2ext_cache, _mime2ext_cache_uptodate
734 update_cache()
735 if not _mime2ext_cache_uptodate:
736 _mime2ext_cache = defaultdict(set)
737 for ext, mtypes in globs.exts.items():
738 for mtype, prio in mtypes:
739 _mime2ext_cache[mtype].add(ext)
740 _mime2ext_cache_uptodate = True
742 return _mime2ext_cache[mimetype]
745def install_mime_info(application, package_file):
746 """Copy 'package_file' as ``~/.local/share/mime/packages/<application>.xml.``
747 If package_file is None, install ``<app_dir>/<application>.xml``.
748 If already installed, does nothing. May overwrite an existing
749 file with the same name (if the contents are different)"""
750 application += '.xml'
752 with open(package_file) as f:
753 new_data = f.read()
755 # See if the file is already installed
756 package_dir = os.path.join('mime', 'packages')
757 resource = os.path.join(package_dir, application)
758 for x in BaseDirectory.load_data_paths(resource):
759 try:
760 with open(x) as f:
761 old_data = f.read()
762 except:
763 continue
764 if old_data == new_data:
765 return # Already installed
767 global _cache_uptodate
768 _cache_uptodate = False
770 # Not already installed; add a new copy
771 # Create the directory structure...
772 new_file = os.path.join(BaseDirectory.save_data_path(package_dir), application)
774 # Write the file...
775 with open(new_file, 'w') as f:
776 f.write(new_data)
778 # Update the database...
779 command = 'update-mime-database'
780 if os.spawnlp(os.P_WAIT, command, command, BaseDirectory.save_data_path('mime')):
781 os.unlink(new_file)
782 raise Exception("The '%s' command returned an error code!\n" \
783 "Make sure you have the freedesktop.org shared MIME package:\n" \
784 "http://standards.freedesktop.org/shared-mime-info/" % command)