Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xdg/Mime.py: 44%

1"""

2This module is based on a rox module (LGPL):

4http://cvs.sourceforge.net/viewcvs.py/rox/ROX-Lib2/python/rox/mime.py?rev=1.21&view=log

6This module provides access to the shared MIME database.

8types is a dictionary of all known MIME types, indexed by the type name, e.g.

9types['application/x-python']

11Applications can install information about MIME types by storing an

12XML file as <MIME>/packages/<application>.xml and running the

13update-mime-database command, which is provided by the freedesktop.org

14shared mime database package.

16See http://www.freedesktop.org/standards/shared-mime-info-spec/ for

17information about the format of these files.

19(based on version 0.13)

20"""

22import os

23import re

24import stat

25import sys

26import fnmatch

28from xdg import BaseDirectory

29import xdg.Locale

31from xml.dom import minidom, XML_NAMESPACE

32from collections import defaultdict

34FREE_NS = 'http://www.freedesktop.org/standards/shared-mime-info'

36types = {} # Maps MIME names to type objects

38exts = None # Maps extensions to types

39globs = None # List of (glob, type) pairs

40literals = None # Maps liternal names to types

41magic = None

43PY3 = (sys.version_info[0] >= 3)

45def _get_node_data(node):

46 """Get text of XML node"""

47 return ''.join([n.nodeValue for n in node.childNodes]).strip()

49def lookup(media, subtype = None):

50 """Get the MIMEtype object for the given type.

52 This remains for backwards compatibility; calling MIMEtype now does

53 the same thing.

55 The name can either be passed as one part ('text/plain'), or as two

56 ('text', 'plain').

57 """

58 return MIMEtype(media, subtype)

60class MIMEtype(object):

61 """Class holding data about a MIME type.

63 Calling the class will return a cached instance, so there is only one

64 instance for each MIME type. The name can either be passed as one part

65 ('text/plain'), or as two ('text', 'plain').

66 """

67 def __new__(cls, media, subtype=None):

68 if subtype is None and '/' in media:

69 media, subtype = media.split('/', 1)

70 assert '/' not in subtype

71 media = media.lower()

72 subtype = subtype.lower()

74 try:

75 return types[(media, subtype)]

76 except KeyError:

77 mtype = super(MIMEtype, cls).__new__(cls)

78 mtype._init(media, subtype)

79 types[(media, subtype)] = mtype

80 return mtype

82 # If this is done in __init__, it is automatically called again each time

83 # the MIMEtype is returned by __new__, which we don't want. So we call it

84 # explicitly only when we construct a new instance.

85 def _init(self, media, subtype):

86 self.media = media

87 self.subtype = subtype

88 self._comment = None

90 def _load(self):

91 "Loads comment for current language. Use get_comment() instead."

92 resource = os.path.join('mime', self.media, self.subtype + '.xml')

93 for path in BaseDirectory.load_data_paths(resource):

94 doc = minidom.parse(path)

95 if doc is None:

96 continue

97 for comment in doc.documentElement.getElementsByTagNameNS(FREE_NS, 'comment'):

98 lang = comment.getAttributeNS(XML_NAMESPACE, 'lang') or 'en'

99 goodness = 1 + (lang in xdg.Locale.langs)

100 if goodness > self._comment[0]:

101 self._comment = (goodness, _get_node_data(comment))

102 if goodness == 2: return

103

104 # FIXME: add get_icon method

105 def get_comment(self):

106 """Returns comment for current language, loading it if needed."""

107 # Should we ever reload?

108 if self._comment is None:

109 self._comment = (0, str(self))

110 self._load()

111 return self._comment[1]

112

113 def canonical(self):

114 """Returns the canonical MimeType object if this is an alias."""

115 update_cache()

116 s = str(self)

117 if s in aliases:

118 return lookup(aliases[s])

119 return self

120

121 def inherits_from(self):

122 """Returns a set of Mime types which this inherits from."""

123 update_cache()

124 return set(lookup(t) for t in inheritance[str(self)])

125

126 def __str__(self):

127 return self.media + '/' + self.subtype

128

129 def __repr__(self):

130 return 'MIMEtype(%r, %r)' % (self.media, self.subtype)

131

132 def __hash__(self):

133 return hash(self.media) ^ hash(self.subtype)

134

135class UnknownMagicRuleFormat(ValueError):

136 pass

137

138class DiscardMagicRules(Exception):

139 "Raised when __NOMAGIC__ is found, and caught to discard previous rules."

140 pass

141

142class MagicRule:

143 also = None

144

145 def __init__(self, start, value, mask, word, range):

146 self.start = start

147 self.value = value

148 self.mask = mask

149 self.word = word

150 self.range = range

151

152 rule_ending_re = re.compile(br'(?:~(\d+))?(?:\+(\d+))?\n$')

153

154 @classmethod

155 def from_file(cls, f):

156 """Read a rule from the binary magics file. Returns a 2-tuple of

157 the nesting depth and the MagicRule."""

158 line = f.readline()

159 #print line

160

161 # [indent] '>'

162 nest_depth, line = line.split(b'>', 1)

163 nest_depth = int(nest_depth) if nest_depth else 0

164

165 # start-offset '='

166 start, line = line.split(b'=', 1)

167 start = int(start)

168

169 if line == b'__NOMAGIC__\n':

170 raise DiscardMagicRules

171

172 # value length (2 bytes, big endian)

173 if sys.version_info[0] >= 3:

174 lenvalue = int.from_bytes(line[:2], byteorder='big')

175 else:

176 lenvalue = (ord(line[0])<<8)+ord(line[1])

177 line = line[2:]

178

179 # value

180 # This can contain newlines, so we may need to read more lines

181 while len(line) <= lenvalue:

182 line += f.readline()

183 value, line = line[:lenvalue], line[lenvalue:]

184

185 # ['&' mask]

186 if line.startswith(b'&'):

187 # This can contain newlines, so we may need to read more lines

188 while len(line) <= lenvalue:

189 line += f.readline()

190 mask, line = line[1:lenvalue+1], line[lenvalue+1:]

191 else:

192 mask = None

193

194 # ['~' word-size] ['+' range-length]

195 ending = cls.rule_ending_re.match(line)

196 if not ending:

197 # Per the spec, this will be caught and ignored, to allow

198 # for future extensions.

199 raise UnknownMagicRuleFormat(repr(line))

200

201 word, range = ending.groups()

202 word = int(word) if (word is not None) else 1

203 range = int(range) if (range is not None) else 1

204

205 return nest_depth, cls(start, value, mask, word, range)

206

207 def maxlen(self):

208 l = self.start + len(self.value) + self.range

209 if self.also:

210 return max(l, self.also.maxlen())

211 return l

212

213 def match(self, buffer):

214 if self.match0(buffer):

215 if self.also:

216 return self.also.match(buffer)

217 return True

218

219 def match0(self, buffer):

220 l=len(buffer)

221 lenvalue = len(self.value)

222 for o in range(self.range):

223 s=self.start+o

224 e=s+lenvalue

225 if l<e:

226 return False

227 if self.mask:

228 test=''

229 for i in range(lenvalue):

230 if PY3:

231 c = buffer[s+i] & self.mask[i]

232 else:

233 c = ord(buffer[s+i]) & ord(self.mask[i])

234 test += chr(c)

235 else:

236 test = buffer[s:e]

237

238 if test==self.value:

239 return True

240

241 def __repr__(self):

242 return 'MagicRule(start=%r, value=%r, mask=%r, word=%r, range=%r)' %(

243 self.start,

244 self.value,

245 self.mask,

246 self.word,

247 self.range)

248

249

250class MagicMatchAny(object):

251 """Match any of a set of magic rules.

252

253 This has a similar interface to MagicRule objects (i.e. its match() and

254 maxlen() methods), to allow for duck typing.

255 """

256 def __init__(self, rules):

257 self.rules = rules

258

259 def match(self, buffer):

260 return any(r.match(buffer) for r in self.rules)

261

262 def maxlen(self):

263 return max(r.maxlen() for r in self.rules)

264

265 @classmethod

266 def from_file(cls, f):

267 """Read a set of rules from the binary magic file."""

268 c=f.read(1)

269 f.seek(-1, 1)

270 depths_rules = []

271 while c and c != b'[':

272 try:

273 depths_rules.append(MagicRule.from_file(f))

274 except UnknownMagicRuleFormat:

275 # Ignored to allow for extensions to the rule format.

276 pass

277 c=f.read(1)

278 if c:

279 f.seek(-1, 1)

280

281 # Build the rule tree

282 tree = [] # (rule, [(subrule,[subsubrule,...]), ...])

283 insert_points = {0:tree}

284 for depth, rule in depths_rules:

285 subrules = []

286 insert_points[depth].append((rule, subrules))

287 insert_points[depth+1] = subrules

288

289 return cls.from_rule_tree(tree)

290

291 @classmethod

292 def from_rule_tree(cls, tree):

293 """From a nested list of (rule, subrules) pairs, build a MagicMatchAny

294 instance, recursing down the tree.

295

296 Where there's only one top-level rule, this is returned directly,

297 to simplify the nested structure. Returns None if no rules were read.

298 """

299 rules = []

300 for rule, subrules in tree:

301 if subrules:

302 rule.also = cls.from_rule_tree(subrules)

303 rules.append(rule)

304

305 if len(rules)==0:

306 return None

307 if len(rules)==1:

308 return rules[0]

309 return cls(rules)

310

311class MagicDB:

312 def __init__(self):

313 self.bytype = defaultdict(list) # mimetype -> [(priority, rule), ...]

314

315 def merge_file(self, fname):

316 """Read a magic binary file, and add its rules to this MagicDB."""

317 with open(fname, 'rb') as f:

318 line = f.readline()

319 if line != b'MIME-Magic\0\n':

320 raise IOError('Not a MIME magic file')

321

322 while True:

323 shead = f.readline().decode('ascii')

324 #print(shead)

325 if not shead:

326 break

327 if shead[0] != '[' or shead[-2:] != ']\n':

328 raise ValueError('Malformed section heading', shead)

329 pri, tname = shead[1:-2].split(':')

330 #print shead[1:-2]

331 pri = int(pri)

332 mtype = lookup(tname)

333 try:

334 rule = MagicMatchAny.from_file(f)

335 except DiscardMagicRules:

336 self.bytype.pop(mtype, None)

337 rule = MagicMatchAny.from_file(f)

338 if rule is None:

339 continue

340 #print rule

341

342 self.bytype[mtype].append((pri, rule))

343

344 def finalise(self):

345 """Prepare the MagicDB for matching.

346

347 This should be called after all rules have been merged into it.

348 """

349 maxlen = 0

350 self.alltypes = [] # (priority, mimetype, rule)

351

352 for mtype, rules in self.bytype.items():

353 for pri, rule in rules:

354 self.alltypes.append((pri, mtype, rule))

355 maxlen = max(maxlen, rule.maxlen())

356

357 self.maxlen = maxlen # Number of bytes to read from files

358 self.alltypes.sort(key=lambda x: x[0], reverse=True)

359

360 def match_data(self, data, max_pri=100, min_pri=0, possible=None):

361 """Do magic sniffing on some bytes.

362

363 max_pri & min_pri can be used to specify the maximum & minimum priority

364 rules to look for. possible can be a list of mimetypes to check, or None

365 (the default) to check all mimetypes until one matches.

366

367 Returns the MIMEtype found, or None if no entries match.

368 """

369 if possible is not None:

370 types = []

371 for mt in possible:

372 for pri, rule in self.bytype[mt]:

373 types.append((pri, mt, rule))

374 types.sort(key=lambda x: x[0])

375 else:

376 types = self.alltypes

377

378 for priority, mimetype, rule in types:

379 #print priority, max_pri, min_pri

380 if priority > max_pri:

381 continue

382 if priority < min_pri:

383 break

384

385 if rule.match(data):

386 return mimetype

387

388 def match(self, path, max_pri=100, min_pri=0, possible=None):

389 """Read data from the file and do magic sniffing on it.

390

391 max_pri & min_pri can be used to specify the maximum & minimum priority

392 rules to look for. possible can be a list of mimetypes to check, or None

393 (the default) to check all mimetypes until one matches.

394

395 Returns the MIMEtype found, or None if no entries match. Raises IOError

396 if the file can't be opened.

397 """

398 with open(path, 'rb') as f:

399 buf = f.read(self.maxlen)

400 return self.match_data(buf, max_pri, min_pri, possible)

401

402 def __repr__(self):

403 return '<MagicDB (%d types)>' % len(self.alltypes)

404

405class GlobDB(object):

406 def __init__(self):

407 """Prepare the GlobDB. It can't actually be used until .finalise() is

408 called, but merge_file() can be used to add data before that.

409 """

410 # Maps mimetype to {(weight, glob, flags), ...}

411 self.allglobs = defaultdict(set)

412

413 def merge_file(self, path):

414 """Loads name matching information from a globs2 file."""#

415 allglobs = self.allglobs

416 with open(path) as f:

417 for line in f:

418 if line.startswith('#'): continue # Comment

419

420 fields = line[:-1].split(':')

421 weight, type_name, pattern = fields[:3]

422 weight = int(weight)

423 mtype = lookup(type_name)

424 if len(fields) > 3:

425 flags = fields[3].split(',')

426 else:

427 flags = ()

428

429 if pattern == '__NOGLOBS__':

430 # This signals to discard any previous globs

431 allglobs.pop(mtype, None)

432 continue

433

434 allglobs[mtype].add((weight, pattern, tuple(flags)))

435

436 def finalise(self):

437 """Prepare the GlobDB for matching.

438

439 This should be called after all files have been merged into it.

440 """

441 self.exts = defaultdict(list) # Maps extensions to [(type, weight),...]

442 self.cased_exts = defaultdict(list)

443 self.globs = [] # List of (regex, type, weight) triplets

444 self.literals = {} # Maps literal names to (type, weight)

445 self.cased_literals = {}

446

447 for mtype, globs in self.allglobs.items():

448 mtype = mtype.canonical()

449 for weight, pattern, flags in globs:

450

451 cased = 'cs' in flags

452

453 if pattern.startswith('*.'):

454 # *.foo -- extension pattern

455 rest = pattern[2:]

456 if not ('*' in rest or '[' in rest or '?' in rest):

457 if cased:

458 self.cased_exts[rest].append((mtype, weight))

459 else:

460 self.exts[rest.lower()].append((mtype, weight))

461 continue

462

463 if ('*' in pattern or '[' in pattern or '?' in pattern):

464 # Translate the glob pattern to a regex & compile it

465 re_flags = 0 if cased else re.I

466 pattern = re.compile(fnmatch.translate(pattern), flags=re_flags)

467 self.globs.append((pattern, mtype, weight))

468 else:

469 # No wildcards - literal pattern

470 if cased:

471 self.cased_literals[pattern] = (mtype, weight)

472 else:

473 self.literals[pattern.lower()] = (mtype, weight)

474

475 # Sort globs by weight & length

476 self.globs.sort(reverse=True, key=lambda x: (x[2], len(x[0].pattern)) )

477

478 def first_match(self, path):

479 """Return the first match found for a given path, or None if no match

480 is found."""

481 try:

482 return next(self._match_path(path))[0]

483 except StopIteration:

484 return None

485

486 def all_matches(self, path):

487 """Return a list of (MIMEtype, glob weight) pairs for the path."""

488 return list(self._match_path(path))

489

490 def _match_path(self, path):

491 """Yields pairs of (mimetype, glob weight)."""

492 leaf = os.path.basename(path)

493

494 # Literals (no wildcards)

495 if leaf in self.cased_literals:

496 yield self.cased_literals[leaf]

497

498 lleaf = leaf.lower()

499 if lleaf in self.literals:

500 yield self.literals[lleaf]

501

502 # Extensions

503 ext = leaf

504 while 1:

505 p = ext.find('.')

506 if p < 0: break

507 ext = ext[p + 1:]

508 if ext in self.cased_exts:

509 for res in self.cased_exts[ext]:

510 yield res

511 ext = lleaf

512 while 1:

513 p = ext.find('.')

514 if p < 0: break

515 ext = ext[p+1:]

516 if ext in self.exts:

517 for res in self.exts[ext]:

518 yield res

519

520 # Other globs

521 for (regex, mime_type, weight) in self.globs:

522 if regex.match(leaf):

523 yield (mime_type, weight)

524

525# Some well-known types

526text = lookup('text', 'plain')

527octet_stream = lookup('application', 'octet-stream')

528inode_block = lookup('inode', 'blockdevice')

529inode_char = lookup('inode', 'chardevice')

530inode_dir = lookup('inode', 'directory')

531inode_fifo = lookup('inode', 'fifo')

532inode_socket = lookup('inode', 'socket')

533inode_symlink = lookup('inode', 'symlink')

534inode_door = lookup('inode', 'door')

535app_exe = lookup('application', 'executable')

536

537_cache_uptodate = False

538

539def _cache_database():

540 global globs, magic, aliases, inheritance, _cache_uptodate

541

542 _cache_uptodate = True

543

544 aliases = {} # Maps alias Mime types to canonical names

545 inheritance = defaultdict(set) # Maps to sets of parent mime types.

546

547 # Load aliases

548 for path in BaseDirectory.load_data_paths(os.path.join('mime', 'aliases')):

549 with open(path, 'r') as f:

550 for line in f:

551 alias, canonical = line.strip().split(None, 1)

552 aliases[alias] = canonical

553

554 # Load filename patterns (globs)

555 globs = GlobDB()

556 for path in BaseDirectory.load_data_paths(os.path.join('mime', 'globs2')):

557 globs.merge_file(path)

558 globs.finalise()

559

560 # Load magic sniffing data

561 magic = MagicDB()

562 for path in BaseDirectory.load_data_paths(os.path.join('mime', 'magic')):

563 magic.merge_file(path)

564 magic.finalise()

565

566 # Load subclasses

567 for path in BaseDirectory.load_data_paths(os.path.join('mime', 'subclasses')):

568 with open(path, 'r') as f:

569 for line in f:

570 sub, parent = line.strip().split(None, 1)

571 inheritance[sub].add(parent)

572

573def update_cache():

574 if not _cache_uptodate:

575 _cache_database()

576

577def get_type_by_name(path):

578 """Returns type of file by its name, or None if not known"""

579 update_cache()

580 return globs.first_match(path)

581

582def get_type_by_contents(path, max_pri=100, min_pri=0):

583 """Returns type of file by its contents, or None if not known"""

584 update_cache()

585

586 return magic.match(path, max_pri, min_pri)

587

588def get_type_by_data(data, max_pri=100, min_pri=0):

589 """Returns type of the data, which should be bytes."""

590 update_cache()

591

592 return magic.match_data(data, max_pri, min_pri)

593

594def _get_type_by_stat(st_mode):

595 """Match special filesystem objects to Mimetypes."""

596 if stat.S_ISDIR(st_mode): return inode_dir

597 elif stat.S_ISCHR(st_mode): return inode_char

598 elif stat.S_ISBLK(st_mode): return inode_block

599 elif stat.S_ISFIFO(st_mode): return inode_fifo

600 elif stat.S_ISLNK(st_mode): return inode_symlink

601 elif stat.S_ISSOCK(st_mode): return inode_socket

602 return inode_door

603

604def get_type(path, follow=True, name_pri=100):

605 """Returns type of file indicated by path.

606

607 This function is *deprecated* - :func:`get_type2` is more accurate.

608

609 :param path: pathname to check (need not exist)

610 :param follow: when reading file, follow symbolic links

611 :param name_pri: Priority to do name matches. 100=override magic

612

613 This tries to use the contents of the file, and falls back to the name. It

614 can also handle special filesystem objects like directories and sockets.

615 """

616 update_cache()

617

618 try:

619 if follow:

620 st = os.stat(path)

621 else:

622 st = os.lstat(path)

623 except:

624 t = get_type_by_name(path)

625 return t or text

626

627 if stat.S_ISREG(st.st_mode):

628 # Regular file

629 t = get_type_by_contents(path, min_pri=name_pri)

630 if not t: t = get_type_by_name(path)

631 if not t: t = get_type_by_contents(path, max_pri=name_pri)

632 if t is None:

633 if stat.S_IMODE(st.st_mode) & 0o111:

634 return app_exe

635 else:

636 return text

637 return t

638 else:

639 return _get_type_by_stat(st.st_mode)

640

641def get_type2(path, follow=True):

642 """Find the MIMEtype of a file using the XDG recommended checking order.

643

644 This first checks the filename, then uses file contents if the name doesn't

645 give an unambiguous MIMEtype. It can also handle special filesystem objects

646 like directories and sockets.

647

648 :param path: file path to examine (need not exist)

649 :param follow: whether to follow symlinks

650

651 :rtype: :class:`MIMEtype`

652

653 .. versionadded:: 1.0

654 """

655 update_cache()

656

657 try:

658 st = os.stat(path) if follow else os.lstat(path)

659 except OSError:

660 return get_type_by_name(path) or octet_stream

661

662 if not stat.S_ISREG(st.st_mode):

663 # Special filesystem objects

664 return _get_type_by_stat(st.st_mode)

665

666 mtypes = sorted(globs.all_matches(path), key=(lambda x: x[1]), reverse=True)

667 if mtypes:

668 max_weight = mtypes[0][1]

669 i = 1

670 for mt, w in mtypes[1:]:

671 if w < max_weight:

672 break

673 i += 1

674 mtypes = mtypes[:i]

675 if len(mtypes) == 1:

676 return mtypes[0][0]

677

678 possible = [mt for mt,w in mtypes]

679 else:

680 possible = None # Try all magic matches

681

682 try:

683 t = magic.match(path, possible=possible)

684 except IOError:

685 t = None

686

687 if t:

688 return t

689 elif mtypes:

690 return mtypes[0][0]

691 elif stat.S_IMODE(st.st_mode) & 0o111:

692 return app_exe

693 else:

694 return text if is_text_file(path) else octet_stream

695

696def is_text_file(path):

697 """Guess whether a file contains text or binary data.

698

699 Heuristic: binary if the first 32 bytes include ASCII control characters.

700 This rule may change in future versions.

701

702 .. versionadded:: 1.0

703 """

704 try:

705 f = open(path, 'rb')

706 except IOError:

707 return False

708

709 with f:

710 return _is_text(f.read(32))

711

712if PY3:

713 def _is_text(data):

714 return not any(b <= 0x8 or 0xe <= b < 0x20 or b == 0x7f for b in data)

715else:

716 def _is_text(data):

717 return not any(b <= '\x08' or '\x0e' <= b < '\x20' or b == '\x7f' \

718 for b in data)

719

720_mime2ext_cache = None

721_mime2ext_cache_uptodate = False

722

723def get_extensions(mimetype):

724 """Retrieve the set of filename extensions matching a given MIMEtype.

725

726 Extensions are returned without a leading dot, e.g. 'py'. If no extensions

727 are registered for the MIMEtype, returns an empty set.

728

729 The extensions are stored in a cache the first time this is called.

730

731 .. versionadded:: 1.0

732 """

733 global _mime2ext_cache, _mime2ext_cache_uptodate

734 update_cache()

735 if not _mime2ext_cache_uptodate:

736 _mime2ext_cache = defaultdict(set)

737 for ext, mtypes in globs.exts.items():

738 for mtype, prio in mtypes:

739 _mime2ext_cache[mtype].add(ext)

740 _mime2ext_cache_uptodate = True

741

742 return _mime2ext_cache[mimetype]

743

744

745def install_mime_info(application, package_file):

746 """Copy 'package_file' as ``~/.local/share/mime/packages/<application>.xml.``

747 If package_file is None, install ``<app_dir>/<application>.xml``.

748 If already installed, does nothing. May overwrite an existing

749 file with the same name (if the contents are different)"""

750 application += '.xml'

751

752 with open(package_file) as f:

753 new_data = f.read()

754

755 # See if the file is already installed

756 package_dir = os.path.join('mime', 'packages')

757 resource = os.path.join(package_dir, application)

758 for x in BaseDirectory.load_data_paths(resource):

759 try:

760 with open(x) as f:

761 old_data = f.read()

762 except:

763 continue

764 if old_data == new_data:

765 return # Already installed

766

767 global _cache_uptodate

768 _cache_uptodate = False

769

770 # Not already installed; add a new copy

771 # Create the directory structure...

772 new_file = os.path.join(BaseDirectory.save_data_path(package_dir), application)

773

774 # Write the file...

775 with open(new_file, 'w') as f:

776 f.write(new_data)

777

778 # Update the database...

779 command = 'update-mime-database'

780 if os.spawnlp(os.P_WAIT, command, command, BaseDirectory.save_data_path('mime')):

781 os.unlink(new_file)

782 raise Exception("The '%s' command returned an error code!\n" \

783 "Make sure you have the freedesktop.org shared MIME package:\n" \

784 "http://standards.freedesktop.org/shared-mime-info/" % command)