1"""
2APIs exposing metadata from third-party Python packages.
3
4This codebase is shared between importlib.metadata in the stdlib
5and importlib_metadata in PyPI. See
6https://github.com/python/importlib_metadata/wiki/Development-Methodology
7for more detail.
8"""
9
10from __future__ import annotations
11
12import abc
13import collections
14import email
15import functools
16import itertools
17import operator
18import os
19import pathlib
20import posixpath
21import re
22import sys
23import textwrap
24import types
25from collections.abc import Iterable, Mapping
26from contextlib import suppress
27from importlib import import_module
28from importlib.abc import MetaPathFinder
29from itertools import starmap
30from typing import Any
31
32from . import _meta
33from ._collections import FreezableDefaultDict, Pair
34from ._compat import (
35 NullFinder,
36 install,
37)
38from ._context import ExceptionTrap
39from ._functools import method_cache, noop, pass_none, passthrough
40from ._itertools import always_iterable, bucket, unique_everseen
41from ._meta import PackageMetadata, SimplePath
42from .compat import py311
43
44__all__ = [
45 'Distribution',
46 'DistributionFinder',
47 'PackageMetadata',
48 'PackageNotFoundError',
49 'MetadataNotFound',
50 'SimplePath',
51 'distribution',
52 'distributions',
53 'entry_points',
54 'files',
55 'metadata',
56 'packages_distributions',
57 'requires',
58 'version',
59]
60
61
62class PackageNotFoundError(ModuleNotFoundError):
63 """The package was not found."""
64
65 def __str__(self) -> str:
66 return f"No package metadata was found for {self.name}"
67
68 @property
69 def name(self) -> str: # type: ignore[override] # make readonly
70 (name,) = self.args
71 return name
72
73
74class MetadataNotFound(FileNotFoundError):
75 """No metadata file is present in the distribution."""
76
77
78class Sectioned:
79 """
80 A simple entry point config parser for performance
81
82 >>> for item in Sectioned.read(Sectioned._sample):
83 ... print(item)
84 Pair(name='sec1', value='# comments ignored')
85 Pair(name='sec1', value='a = 1')
86 Pair(name='sec1', value='b = 2')
87 Pair(name='sec2', value='a = 2')
88
89 >>> res = Sectioned.section_pairs(Sectioned._sample)
90 >>> item = next(res)
91 >>> item.name
92 'sec1'
93 >>> item.value
94 Pair(name='a', value='1')
95 >>> item = next(res)
96 >>> item.value
97 Pair(name='b', value='2')
98 >>> item = next(res)
99 >>> item.name
100 'sec2'
101 >>> item.value
102 Pair(name='a', value='2')
103 >>> list(res)
104 []
105 """
106
107 _sample = textwrap.dedent(
108 """
109 [sec1]
110 # comments ignored
111 a = 1
112 b = 2
113
114 [sec2]
115 a = 2
116 """
117 ).lstrip()
118
119 @classmethod
120 def section_pairs(cls, text):
121 return (
122 section._replace(value=Pair.parse(section.value))
123 for section in cls.read(text, filter_=cls.valid)
124 if section.name is not None
125 )
126
127 @staticmethod
128 def read(text, filter_=None):
129 lines = filter(filter_, map(str.strip, text.splitlines()))
130 name = None
131 for value in lines:
132 section_match = value.startswith('[') and value.endswith(']')
133 if section_match:
134 name = value.strip('[]')
135 continue
136 yield Pair(name, value)
137
138 @staticmethod
139 def valid(line: str):
140 return line and not line.startswith('#')
141
142
143class _EntryPointMatch(types.SimpleNamespace):
144 module: str
145 attr: str
146 extras: str
147
148
149class EntryPoint:
150 """An entry point as defined by Python packaging conventions.
151
152 See `the packaging docs on entry points
153 <https://packaging.python.org/specifications/entry-points/>`_
154 for more information.
155
156 >>> ep = EntryPoint(
157 ... name=None, group=None, value='package.module:attr [extra1, extra2]')
158 >>> ep.module
159 'package.module'
160 >>> ep.attr
161 'attr'
162 >>> ep.extras
163 ['extra1', 'extra2']
164
165 If the value package or module are not valid identifiers, a
166 ValueError is raised on access.
167
168 >>> EntryPoint(name=None, group=None, value='invalid-name').module
169 Traceback (most recent call last):
170 ...
171 ValueError: ('Invalid object reference...invalid-name...
172 >>> EntryPoint(name=None, group=None, value='invalid-name').attr
173 Traceback (most recent call last):
174 ...
175 ValueError: ('Invalid object reference...invalid-name...
176 >>> EntryPoint(name=None, group=None, value='invalid-name').extras
177 Traceback (most recent call last):
178 ...
179 ValueError: ('Invalid object reference...invalid-name...
180
181 The same thing happens on construction.
182
183 >>> EntryPoint(name=None, group=None, value='invalid-name')
184 Traceback (most recent call last):
185 ...
186 ValueError: ('Invalid object reference...invalid-name...
187
188 """
189
190 pattern = re.compile(
191 r'(?P<module>[\w.]+)\s*'
192 r'(:\s*(?P<attr>[\w.]+)\s*)?'
193 r'((?P<extras>\[.*\])\s*)?$'
194 )
195 """
196 A regular expression describing the syntax for an entry point,
197 which might look like:
198
199 - module
200 - package.module
201 - package.module:attribute
202 - package.module:object.attribute
203 - package.module:attr [extra1, extra2]
204
205 Other combinations are possible as well.
206
207 The expression is lenient about whitespace around the ':',
208 following the attr, and following any extras.
209 """
210
211 name: str
212 value: str
213 group: str
214
215 dist: Distribution | None = None
216
217 def __init__(self, name: str, value: str, group: str) -> None:
218 vars(self).update(name=name, value=value, group=group)
219 self.module
220
221 def load(self) -> Any:
222 """Load the entry point from its definition. If only a module
223 is indicated by the value, return that module. Otherwise,
224 return the named object.
225 """
226 module = import_module(self.module)
227 attrs = filter(None, (self.attr or '').split('.'))
228 return functools.reduce(getattr, attrs, module)
229
230 @property
231 def module(self) -> str:
232 return self._match.module
233
234 @property
235 def attr(self) -> str:
236 return self._match.attr
237
238 @property
239 def extras(self) -> list[str]:
240 return re.findall(r'\w+', self._match.extras or '')
241
242 @functools.cached_property
243 def _match(self) -> _EntryPointMatch:
244 match = self.pattern.match(self.value)
245 if not match:
246 raise ValueError(
247 'Invalid object reference. '
248 'See https://packaging.python.org'
249 '/en/latest/specifications/entry-points/#data-model',
250 self.value,
251 )
252 return _EntryPointMatch(**match.groupdict())
253
254 def _for(self, dist):
255 vars(self).update(dist=dist)
256 return self
257
258 def matches(self, **params):
259 """
260 EntryPoint matches the given parameters.
261
262 >>> ep = EntryPoint(group='foo', name='bar', value='bing:bong [extra1, extra2]')
263 >>> ep.matches(group='foo')
264 True
265 >>> ep.matches(name='bar', value='bing:bong [extra1, extra2]')
266 True
267 >>> ep.matches(group='foo', name='other')
268 False
269 >>> ep.matches()
270 True
271 >>> ep.matches(extras=['extra1', 'extra2'])
272 True
273 >>> ep.matches(module='bing')
274 True
275 >>> ep.matches(attr='bong')
276 True
277 """
278 self._disallow_dist(params)
279 attrs = (getattr(self, param) for param in params)
280 return all(map(operator.eq, params.values(), attrs))
281
282 @staticmethod
283 def _disallow_dist(params):
284 """
285 Querying by dist is not allowed (dist objects are not comparable).
286 >>> EntryPoint(name='fan', value='fav', group='fag').matches(dist='foo')
287 Traceback (most recent call last):
288 ...
289 ValueError: "dist" is not suitable for matching...
290 """
291 if "dist" in params:
292 raise ValueError(
293 '"dist" is not suitable for matching. '
294 "Instead, use Distribution.entry_points.select() on a "
295 "located distribution."
296 )
297
298 def _key(self):
299 return self.name, self.value, self.group
300
301 def __lt__(self, other):
302 return self._key() < other._key()
303
304 def __eq__(self, other):
305 return self._key() == other._key()
306
307 def __setattr__(self, name, value):
308 raise AttributeError("EntryPoint objects are immutable.")
309
310 def __repr__(self):
311 return (
312 f'EntryPoint(name={self.name!r}, value={self.value!r}, '
313 f'group={self.group!r})'
314 )
315
316 def __hash__(self) -> int:
317 return hash(self._key())
318
319
320class EntryPoints(tuple):
321 """
322 An immutable collection of selectable EntryPoint objects.
323 """
324
325 __slots__ = ()
326
327 def __getitem__(self, name: str) -> EntryPoint: # type: ignore[override] # Work with str instead of int
328 """
329 Get the EntryPoint in self matching name.
330 """
331 try:
332 return next(iter(self.select(name=name)))
333 except StopIteration:
334 raise KeyError(name)
335
336 def __repr__(self):
337 """
338 Repr with classname and tuple constructor to
339 signal that we deviate from regular tuple behavior.
340 """
341 return '%s(%r)' % (self.__class__.__name__, tuple(self))
342
343 def select(self, **params) -> EntryPoints:
344 """
345 Select entry points from self that match the
346 given parameters (typically group and/or name).
347 """
348 return EntryPoints(ep for ep in self if ep.matches(**params))
349
350 @property
351 def names(self) -> set[str]:
352 """
353 Return the set of all names of all entry points.
354 """
355 return {ep.name for ep in self}
356
357 @property
358 def groups(self) -> set[str]:
359 """
360 Return the set of all groups of all entry points.
361 """
362 return {ep.group for ep in self}
363
364 @classmethod
365 def _from_text_for(cls, text, dist):
366 return cls(ep._for(dist) for ep in cls._from_text(text))
367
368 @staticmethod
369 def _from_text(text):
370 return (
371 EntryPoint(name=item.value.name, value=item.value.value, group=item.name)
372 for item in Sectioned.section_pairs(text or '')
373 )
374
375
376class PackagePath(pathlib.PurePosixPath):
377 """A reference to a path in a package"""
378
379 hash: FileHash | None
380 size: int
381 dist: Distribution
382
383 def read_text(self, encoding: str = 'utf-8') -> str:
384 return self.locate().read_text(encoding=encoding)
385
386 def read_binary(self) -> bytes:
387 return self.locate().read_bytes()
388
389 def locate(self) -> SimplePath:
390 """Return a path-like object for this path"""
391 return self.dist.locate_file(self)
392
393
394class FileHash:
395 def __init__(self, spec: str) -> None:
396 self.mode, _, self.value = spec.partition('=')
397
398 def __repr__(self) -> str:
399 return f'<FileHash mode: {self.mode} value: {self.value}>'
400
401
402class Distribution(metaclass=abc.ABCMeta):
403 """
404 An abstract Python distribution package.
405
406 Custom providers may derive from this class and define
407 the abstract methods to provide a concrete implementation
408 for their environment. Some providers may opt to override
409 the default implementation of some properties to bypass
410 the file-reading mechanism.
411 """
412
413 @abc.abstractmethod
414 def read_text(self, filename) -> str | None:
415 """Attempt to load metadata file given by the name.
416
417 Python distribution metadata is organized by blobs of text
418 typically represented as "files" in the metadata directory
419 (e.g. package-1.0.dist-info). These files include things
420 like:
421
422 - METADATA: The distribution metadata including fields
423 like Name and Version and Description.
424 - entry_points.txt: A series of entry points as defined in
425 `the entry points spec <https://packaging.python.org/en/latest/specifications/entry-points/#file-format>`_.
426 - RECORD: A record of files according to
427 `this recording spec <https://packaging.python.org/en/latest/specifications/recording-installed-packages/#the-record-file>`_.
428
429 A package may provide any set of files, including those
430 not listed here or none at all.
431
432 :param filename: The name of the file in the distribution info.
433 :return: The text if found, otherwise None.
434 """
435
436 @abc.abstractmethod
437 def locate_file(self, path: str | os.PathLike[str]) -> SimplePath:
438 """
439 Given a path to a file in this distribution, return a SimplePath
440 to it.
441
442 This method is used by callers of ``Distribution.files()`` to
443 locate files within the distribution. If it's possible for a
444 Distribution to represent files in the distribution as
445 ``SimplePath`` objects, it should implement this method
446 to resolve such objects.
447
448 Some Distribution providers may elect not to resolve SimplePath
449 objects within the distribution by raising a
450 NotImplementedError, but consumers of such a Distribution would
451 be unable to invoke ``Distribution.files()``.
452 """
453
454 @classmethod
455 def from_name(cls, name: str) -> Distribution:
456 """Return the Distribution for the given package name.
457
458 :param name: The name of the distribution package to search for.
459 :return: The Distribution instance (or subclass thereof) for the named
460 package, if found.
461 :raises PackageNotFoundError: When the named package's distribution
462 metadata cannot be found.
463 :raises ValueError: When an invalid value is supplied for name.
464 """
465 if not name:
466 raise ValueError("A distribution name is required.")
467 try:
468 return next(iter(cls._prefer_valid(cls.discover(name=name))))
469 except StopIteration:
470 raise PackageNotFoundError(name)
471
472 @classmethod
473 def discover(
474 cls, *, context: DistributionFinder.Context | None = None, **kwargs
475 ) -> Iterable[Distribution]:
476 """Return an iterable of Distribution objects for all packages.
477
478 Pass a ``context`` or pass keyword arguments for constructing
479 a context.
480
481 :context: A ``DistributionFinder.Context`` object.
482 :return: Iterable of Distribution objects for packages matching
483 the context.
484 """
485 if context and kwargs:
486 raise ValueError("cannot accept context and kwargs")
487 context = context or DistributionFinder.Context(**kwargs)
488 return itertools.chain.from_iterable(
489 resolver(context) for resolver in cls._discover_resolvers()
490 )
491
492 @staticmethod
493 def _prefer_valid(dists: Iterable[Distribution]) -> Iterable[Distribution]:
494 """
495 Prefer (move to the front) distributions that have metadata.
496
497 Ref python/importlib_resources#489.
498 """
499
500 has_metadata = ExceptionTrap(MetadataNotFound).passes(
501 operator.attrgetter('metadata')
502 )
503
504 buckets = bucket(dists, has_metadata)
505 return itertools.chain(buckets[True], buckets[False])
506
507 @staticmethod
508 def at(path: str | os.PathLike[str]) -> Distribution:
509 """Return a Distribution for the indicated metadata path.
510
511 :param path: a string or path-like object
512 :return: a concrete Distribution instance for the path
513 """
514 return PathDistribution(pathlib.Path(path))
515
516 @staticmethod
517 def _discover_resolvers():
518 """Search the meta_path for resolvers (MetadataPathFinders)."""
519 declared = (
520 getattr(finder, 'find_distributions', None) for finder in sys.meta_path
521 )
522 return filter(None, declared)
523
524 @property
525 def metadata(self) -> _meta.PackageMetadata:
526 """Return the parsed metadata for this Distribution.
527
528 The returned object will have keys that name the various bits of
529 metadata per the
530 `Core metadata specifications <https://packaging.python.org/en/latest/specifications/core-metadata/#core-metadata>`_.
531
532 Custom providers may provide the METADATA file or override this
533 property.
534
535 :raises MetadataNotFound: If no metadata file is present.
536 """
537
538 text = (
539 self.read_text('METADATA')
540 or self.read_text('PKG-INFO')
541 # This last clause is here to support old egg-info files. Its
542 # effect is to just end up using the PathDistribution's self._path
543 # (which points to the egg-info file) attribute unchanged.
544 or self.read_text('')
545 )
546 return self._assemble_message(self._ensure_metadata_present(text))
547
548 @staticmethod
549 def _assemble_message(text: str) -> _meta.PackageMetadata:
550 # deferred for performance (python/cpython#109829)
551 from . import _adapters
552
553 return _adapters.Message(email.message_from_string(text))
554
555 def _ensure_metadata_present(self, text: str | None) -> str:
556 if text is not None:
557 return text
558
559 raise MetadataNotFound('No package metadata was found.')
560
561 @property
562 def name(self) -> str:
563 """Return the 'Name' metadata for the distribution package."""
564 return self.metadata['Name']
565
566 @property
567 def _normalized_name(self):
568 """Return a normalized version of the name."""
569 return Prepared.normalize(self.name)
570
571 @property
572 def version(self) -> str:
573 """Return the 'Version' metadata for the distribution package."""
574 return self.metadata['Version']
575
576 @property
577 def entry_points(self) -> EntryPoints:
578 """
579 Return EntryPoints for this distribution.
580
581 Custom providers may provide the ``entry_points.txt`` file
582 or override this property.
583 """
584 return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self)
585
586 @property
587 def files(self) -> list[PackagePath] | None:
588 """Files in this distribution.
589
590 :return: List of PackagePath for this distribution or None
591
592 Result is `None` if the metadata file that enumerates files
593 (i.e. RECORD for dist-info, or installed-files.txt or
594 SOURCES.txt for egg-info) is missing.
595 Result may be empty if the metadata exists but is empty.
596
597 Custom providers are recommended to provide a "RECORD" file (in
598 ``read_text``) or override this property to allow for callers to be
599 able to resolve filenames provided by the package.
600 """
601
602 def make_file(name, hash=None, size_str=None):
603 result = PackagePath(name)
604 result.hash = FileHash(hash) if hash else None
605 result.size = int(size_str) if size_str else None
606 result.dist = self
607 return result
608
609 @pass_none
610 def make_files(lines):
611 # Delay csv import, since Distribution.files is not as widely used
612 # as other parts of importlib.metadata
613 import csv
614
615 return starmap(make_file, csv.reader(lines))
616
617 @pass_none
618 def skip_missing_files(package_paths):
619 return list(filter(lambda path: path.locate().exists(), package_paths))
620
621 return skip_missing_files(
622 make_files(
623 self._read_files_distinfo()
624 or self._read_files_egginfo_installed()
625 or self._read_files_egginfo_sources()
626 )
627 )
628
629 def _read_files_distinfo(self):
630 """
631 Read the lines of RECORD.
632 """
633 text = self.read_text('RECORD')
634 return text and text.splitlines()
635
636 def _read_files_egginfo_installed(self):
637 """
638 Read installed-files.txt and return lines in a similar
639 CSV-parsable format as RECORD: each file must be placed
640 relative to the site-packages directory and must also be
641 quoted (since file names can contain literal commas).
642
643 This file is written when the package is installed by pip,
644 but it might not be written for other installation methods.
645 Assume the file is accurate if it exists.
646 """
647 text = self.read_text('installed-files.txt')
648 # Prepend the .egg-info/ subdir to the lines in this file.
649 # But this subdir is only available from PathDistribution's
650 # self._path.
651 subdir = getattr(self, '_path', None)
652 if not text or not subdir:
653 return
654
655 paths = (
656 py311
657 .relative_fix((subdir / name).resolve())
658 .relative_to(self.locate_file('').resolve(), walk_up=True)
659 .as_posix()
660 for name in text.splitlines()
661 )
662 return map('"{}"'.format, paths)
663
664 def _read_files_egginfo_sources(self):
665 """
666 Read SOURCES.txt and return lines in a similar CSV-parsable
667 format as RECORD: each file name must be quoted (since it
668 might contain literal commas).
669
670 Note that SOURCES.txt is not a reliable source for what
671 files are installed by a package. This file is generated
672 for a source archive, and the files that are present
673 there (e.g. setup.py) may not correctly reflect the files
674 that are present after the package has been installed.
675 """
676 text = self.read_text('SOURCES.txt')
677 return text and map('"{}"'.format, text.splitlines())
678
679 @property
680 def requires(self) -> list[str] | None:
681 """Generated requirements specified for this Distribution"""
682 reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
683 return reqs and list(reqs)
684
685 def _read_dist_info_reqs(self):
686 return self.metadata.get_all('Requires-Dist')
687
688 def _read_egg_info_reqs(self):
689 source = self.read_text('requires.txt')
690 return pass_none(self._deps_from_requires_text)(source)
691
692 @classmethod
693 def _deps_from_requires_text(cls, source):
694 return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source))
695
696 @staticmethod
697 def _convert_egg_info_reqs_to_simple_reqs(sections):
698 """
699 Historically, setuptools would solicit and store 'extra'
700 requirements, including those with environment markers,
701 in separate sections. More modern tools expect each
702 dependency to be defined separately, with any relevant
703 extras and environment markers attached directly to that
704 requirement. This method converts the former to the
705 latter. See _test_deps_from_requires_text for an example.
706 """
707
708 def make_condition(name):
709 return name and f'extra == "{name}"'
710
711 def quoted_marker(section):
712 section = section or ''
713 extra, sep, markers = section.partition(':')
714 if extra and markers:
715 markers = f'({markers})'
716 conditions = list(filter(None, [markers, make_condition(extra)]))
717 return '; ' + ' and '.join(conditions) if conditions else ''
718
719 def url_req_space(req):
720 """
721 PEP 508 requires a space between the url_spec and the quoted_marker.
722 Ref python/importlib_metadata#357.
723 """
724 # '@' is uniquely indicative of a url_req.
725 return ' ' * ('@' in req)
726
727 for section in sections:
728 space = url_req_space(section.value)
729 yield section.value + space + quoted_marker(section.name)
730
731 @property
732 def origin(self):
733 return self._load_json('direct_url.json')
734
735 def _load_json(self, filename):
736 # Deferred for performance (python/importlib_metadata#503)
737 import json
738
739 return pass_none(json.loads)(
740 self.read_text(filename),
741 object_hook=lambda data: types.SimpleNamespace(**data),
742 )
743
744
745class DistributionFinder(MetaPathFinder):
746 """
747 A MetaPathFinder capable of discovering installed distributions.
748
749 Custom providers should implement this interface in order to
750 supply metadata.
751 """
752
753 class Context:
754 """
755 Keyword arguments presented by the caller to
756 ``distributions()`` or ``Distribution.discover()``
757 to narrow the scope of a search for distributions
758 in all DistributionFinders.
759
760 Each DistributionFinder may expect any parameters
761 and should attempt to honor the canonical
762 parameters defined below when appropriate.
763
764 This mechanism gives a custom provider a means to
765 solicit additional details from the caller beyond
766 "name" and "path" when searching distributions.
767 For example, imagine a provider that exposes suites
768 of packages in either a "public" or "private" ``realm``.
769 A caller may wish to query only for distributions in
770 a particular realm and could call
771 ``distributions(realm="private")`` to signal to the
772 custom provider to only include distributions from that
773 realm.
774 """
775
776 name = None
777 """
778 Specific name for which a distribution finder should match.
779 A name of ``None`` matches all distributions.
780 """
781
782 def __init__(self, **kwargs):
783 vars(self).update(kwargs)
784
785 @property
786 def path(self) -> list[str]:
787 """
788 The sequence of directory path that a distribution finder
789 should search.
790
791 Typically refers to Python installed package paths such as
792 "site-packages" directories and defaults to ``sys.path``.
793 """
794 return vars(self).get('path', sys.path)
795
796 @abc.abstractmethod
797 def find_distributions(self, context=Context()) -> Iterable[Distribution]:
798 """
799 Find distributions.
800
801 Return an iterable of all Distribution instances capable of
802 loading the metadata for packages matching the ``context``,
803 a DistributionFinder.Context instance.
804 """
805
806
807@passthrough
808def _clear_after_fork(cached):
809 """Ensure ``func`` clears cached state after ``fork`` when supported.
810
811 ``FastPath`` caches zip-backed ``pathlib.Path`` objects that retain a
812 reference to the parent's open ``ZipFile`` handle. Re-using a cached
813 instance in a forked child can therefore resurrect invalid file pointers
814 and trigger ``BadZipFile``/``OSError`` failures (python/importlib_metadata#520).
815 Registering ``cache_clear`` with ``os.register_at_fork`` keeps each process
816 on its own cache.
817 """
818 getattr(os, 'register_at_fork', noop)(after_in_child=cached.cache_clear)
819
820
821class FastPath:
822 """
823 Micro-optimized class for searching a root for children.
824
825 Root is a path on the file system that may contain metadata
826 directories either as natural directories or within a zip file.
827
828 >>> FastPath('').children()
829 ['...']
830
831 FastPath objects are cached and recycled for any given root.
832
833 >>> FastPath('foobar') is FastPath('foobar')
834 True
835 """
836
837 @_clear_after_fork # type: ignore[misc]
838 @functools.lru_cache()
839 def __new__(cls, root):
840 return super().__new__(cls)
841
842 def __init__(self, root):
843 self.root = root
844
845 def joinpath(self, child):
846 return pathlib.Path(self.root, child)
847
848 def children(self):
849 with suppress(Exception):
850 return os.listdir(self.root or '.')
851 with suppress(Exception):
852 return self.zip_children()
853 return []
854
855 def zip_children(self):
856 # deferred for performance (python/importlib_metadata#502)
857 from zipp.compat.overlay import zipfile
858
859 zip_path = zipfile.Path(self.root)
860 names = zip_path.root.namelist()
861 self.joinpath = zip_path.joinpath
862
863 return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
864
865 def search(self, name):
866 return self.lookup(self.mtime).search(name)
867
868 @property
869 def mtime(self):
870 with suppress(OSError):
871 return os.stat(self.root).st_mtime
872 self.lookup.cache_clear()
873
874 @method_cache
875 def lookup(self, mtime):
876 return Lookup(self)
877
878
879class Lookup:
880 """
881 A micro-optimized class for searching a (fast) path for metadata.
882 """
883
884 def __init__(self, path: FastPath):
885 """
886 Calculate all of the children representing metadata.
887
888 From the children in the path, calculate early all of the
889 children that appear to represent metadata (infos) or legacy
890 metadata (eggs).
891 """
892
893 base = os.path.basename(path.root).lower()
894 base_is_egg = base.endswith(".egg")
895 self.infos = FreezableDefaultDict(list)
896 self.eggs = FreezableDefaultDict(list)
897
898 for child in path.children():
899 low = child.lower()
900 if low.endswith((".dist-info", ".egg-info")):
901 # rpartition is faster than splitext and suitable for this purpose.
902 name = low.rpartition(".")[0].partition("-")[0]
903 normalized = Prepared.normalize(name)
904 self.infos[normalized].append(path.joinpath(child))
905 elif base_is_egg and low == "egg-info":
906 name = base.rpartition(".")[0].partition("-")[0]
907 legacy_normalized = Prepared.legacy_normalize(name)
908 self.eggs[legacy_normalized].append(path.joinpath(child))
909
910 self.infos.freeze()
911 self.eggs.freeze()
912
913 def search(self, prepared: Prepared):
914 """
915 Yield all infos and eggs matching the Prepared query.
916 """
917 infos = (
918 self.infos[prepared.normalized]
919 if prepared
920 else itertools.chain.from_iterable(self.infos.values())
921 )
922 eggs = (
923 self.eggs[prepared.legacy_normalized]
924 if prepared
925 else itertools.chain.from_iterable(self.eggs.values())
926 )
927 return itertools.chain(infos, eggs)
928
929
930class Prepared:
931 """
932 A prepared search query for metadata on a possibly-named package.
933
934 Pre-calculates the normalization to prevent repeated operations.
935
936 >>> none = Prepared(None)
937 >>> none.normalized
938 >>> none.legacy_normalized
939 >>> bool(none)
940 False
941 >>> sample = Prepared('Sample__Pkg-name.foo')
942 >>> sample.normalized
943 'sample_pkg_name_foo'
944 >>> sample.legacy_normalized
945 'sample__pkg_name.foo'
946 >>> bool(sample)
947 True
948 """
949
950 normalized = None
951 legacy_normalized = None
952
953 def __init__(self, name: str | None):
954 self.name = name
955 if name is None:
956 return
957 self.normalized = self.normalize(name)
958 self.legacy_normalized = self.legacy_normalize(name)
959
960 @staticmethod
961 def normalize(name):
962 """
963 PEP 503 normalization plus dashes as underscores.
964 """
965 return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
966
967 @staticmethod
968 def legacy_normalize(name):
969 """
970 Normalize the package name as found in the convention in
971 older packaging tools versions and specs.
972 """
973 return name.lower().replace('-', '_')
974
975 def __bool__(self):
976 return bool(self.name)
977
978
979@install
980class MetadataPathFinder(NullFinder, DistributionFinder):
981 """A degenerate finder for distribution packages on the file system.
982
983 This finder supplies only a find_distributions() method for versions
984 of Python that do not have a PathFinder find_distributions().
985 """
986
987 @classmethod
988 def find_distributions(
989 cls, context=DistributionFinder.Context()
990 ) -> Iterable[PathDistribution]:
991 """
992 Find distributions.
993
994 Return an iterable of all Distribution instances capable of
995 loading the metadata for packages matching ``context.name``
996 (or all names if ``None`` indicated) along the paths in the list
997 of directories ``context.path``.
998 """
999 found = cls._search_paths(context.name, context.path)
1000 return map(PathDistribution, found)
1001
1002 @classmethod
1003 def _search_paths(cls, name, paths):
1004 """Find metadata directories in paths heuristically."""
1005 prepared = Prepared(name)
1006 return itertools.chain.from_iterable(
1007 path.search(prepared) for path in map(FastPath, paths)
1008 )
1009
1010 @classmethod
1011 def invalidate_caches(cls) -> None:
1012 FastPath.__new__.cache_clear()
1013
1014
1015class PathDistribution(Distribution):
1016 def __init__(self, path: SimplePath) -> None:
1017 """Construct a distribution.
1018
1019 :param path: SimplePath indicating the metadata directory.
1020 """
1021 self._path = path
1022
1023 def read_text(self, filename: str | os.PathLike[str]) -> str | None:
1024 with suppress(
1025 FileNotFoundError,
1026 IsADirectoryError,
1027 KeyError,
1028 NotADirectoryError,
1029 PermissionError,
1030 ):
1031 return self._path.joinpath(filename).read_text(encoding='utf-8')
1032
1033 return None
1034
1035 read_text.__doc__ = Distribution.read_text.__doc__
1036
1037 def locate_file(self, path: str | os.PathLike[str]) -> SimplePath:
1038 return self._path.parent / path
1039
1040 @property
1041 def _normalized_name(self):
1042 """
1043 Performance optimization: where possible, resolve the
1044 normalized name from the file system path.
1045 """
1046 stem = os.path.basename(str(self._path))
1047 return (
1048 pass_none(Prepared.normalize)(self._name_from_stem(stem))
1049 or super()._normalized_name
1050 )
1051
1052 @staticmethod
1053 def _name_from_stem(stem):
1054 """
1055 >>> PathDistribution._name_from_stem('foo-3.0.egg-info')
1056 'foo'
1057 >>> PathDistribution._name_from_stem('CherryPy-3.0.dist-info')
1058 'CherryPy'
1059 >>> PathDistribution._name_from_stem('face.egg-info')
1060 'face'
1061 >>> PathDistribution._name_from_stem('foo.bar')
1062 """
1063 filename, ext = os.path.splitext(stem)
1064 if ext not in ('.dist-info', '.egg-info'):
1065 return
1066 name, sep, rest = filename.partition('-')
1067 return name
1068
1069
1070def distribution(distribution_name: str) -> Distribution:
1071 """Get the ``Distribution`` instance for the named package.
1072
1073 :param distribution_name: The name of the distribution package as a string.
1074 :return: A ``Distribution`` instance (or subclass thereof).
1075 """
1076 return Distribution.from_name(distribution_name)
1077
1078
1079def distributions(**kwargs) -> Iterable[Distribution]:
1080 """Get all ``Distribution`` instances in the current environment.
1081
1082 :return: An iterable of ``Distribution`` instances.
1083 """
1084 return Distribution.discover(**kwargs)
1085
1086
1087def metadata(distribution_name: str) -> _meta.PackageMetadata:
1088 """Get the metadata for the named package.
1089
1090 :param distribution_name: The name of the distribution package to query.
1091 :return: A PackageMetadata containing the parsed metadata.
1092 :raises MetadataNotFound: If no metadata file is present in the distribution.
1093 """
1094 return Distribution.from_name(distribution_name).metadata
1095
1096
1097def version(distribution_name: str) -> str:
1098 """Get the version string for the named package.
1099
1100 :param distribution_name: The name of the distribution package to query.
1101 :return: The version string for the package as defined in the package's
1102 "Version" metadata key.
1103 """
1104 return distribution(distribution_name).version
1105
1106
1107_unique = functools.partial(
1108 unique_everseen,
1109 key=operator.attrgetter('_normalized_name'),
1110)
1111"""
1112Wrapper for ``distributions`` to return unique distributions by name.
1113"""
1114
1115
1116def entry_points(**params) -> EntryPoints:
1117 """Return EntryPoint objects for all installed packages.
1118
1119 Pass selection parameters (group or name) to filter the
1120 result to entry points matching those properties (see
1121 EntryPoints.select()).
1122
1123 :return: EntryPoints for all installed packages.
1124 """
1125 eps = itertools.chain.from_iterable(
1126 dist.entry_points for dist in _unique(distributions())
1127 )
1128 return EntryPoints(eps).select(**params)
1129
1130
1131def files(distribution_name: str) -> list[PackagePath] | None:
1132 """Return a list of files for the named package.
1133
1134 :param distribution_name: The name of the distribution package to query.
1135 :return: List of files composing the distribution.
1136 """
1137 return distribution(distribution_name).files
1138
1139
1140def requires(distribution_name: str) -> list[str] | None:
1141 """
1142 Return a list of requirements for the named package.
1143
1144 :return: An iterable of requirements, suitable for
1145 packaging.requirement.Requirement.
1146 """
1147 return distribution(distribution_name).requires
1148
1149
1150def packages_distributions() -> Mapping[str, list[str]]:
1151 """
1152 Return a mapping of top-level packages to their
1153 distributions.
1154
1155 >>> import collections.abc
1156 >>> pkgs = packages_distributions()
1157 >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values())
1158 True
1159 """
1160 pkg_to_dist = collections.defaultdict(list)
1161 for dist in distributions():
1162 for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
1163 pkg_to_dist[pkg].append(dist.metadata['Name'])
1164 return dict(pkg_to_dist)
1165
1166
1167def _top_level_declared(dist):
1168 return (dist.read_text('top_level.txt') or '').split()
1169
1170
1171def _topmost(name: PackagePath) -> str | None:
1172 """
1173 Return the top-most parent as long as there is a parent.
1174 """
1175 top, *rest = name.parts
1176 return top if rest else None
1177
1178
1179def _get_toplevel_name(name: PackagePath) -> str:
1180 """
1181 Infer a possibly importable module name from a name presumed on
1182 sys.path.
1183
1184 >>> _get_toplevel_name(PackagePath('foo.py'))
1185 'foo'
1186 >>> _get_toplevel_name(PackagePath('foo'))
1187 'foo'
1188 >>> _get_toplevel_name(PackagePath('foo.pyc'))
1189 'foo'
1190 >>> _get_toplevel_name(PackagePath('foo/__init__.py'))
1191 'foo'
1192 >>> _get_toplevel_name(PackagePath('foo.pth'))
1193 'foo.pth'
1194 >>> _get_toplevel_name(PackagePath('foo.dist-info'))
1195 'foo.dist-info'
1196 """
1197 # Defer import of inspect for performance (python/cpython#118761)
1198 import inspect
1199
1200 return _topmost(name) or inspect.getmodulename(name) or str(name)
1201
1202
1203def _top_level_inferred(dist):
1204 opt_names = set(map(_get_toplevel_name, always_iterable(dist.files)))
1205
1206 def importable_name(name):
1207 return '.' not in name
1208
1209 return filter(importable_name, opt_names)