1"""
2APIs exposing metadata from third-party Python packages.
3
4This codebase is shared between importlib.metadata in the stdlib
5and importlib_metadata in PyPI. See
6https://github.com/python/importlib_metadata/wiki/Development-Methodology
7for more detail.
8"""
9
10from __future__ import annotations
11
12import abc
13import collections
14import email
15import functools
16import itertools
17import operator
18import os
19import pathlib
20import posixpath
21import re
22import sys
23import textwrap
24import types
25from collections.abc import Iterable, Mapping
26from contextlib import suppress
27from importlib import import_module
28from importlib.abc import MetaPathFinder
29from itertools import starmap
30from typing import Any
31
32from . import _meta
33from ._collections import FreezableDefaultDict, Pair
34from ._compat import (
35 NullFinder,
36 install,
37)
38from ._context import ExceptionTrap
39from ._functools import method_cache, noop, pass_none, passthrough
40from ._itertools import always_iterable, bucket, unique_everseen
41from ._meta import PackageMetadata, SimplePath
42from .compat import py311
43
44__all__ = [
45 'Distribution',
46 'DistributionFinder',
47 'PackageMetadata',
48 'PackageNotFoundError',
49 'PackagePath',
50 'MetadataNotFound',
51 'SimplePath',
52 'distribution',
53 'distributions',
54 'entry_points',
55 'files',
56 'metadata',
57 'packages_distributions',
58 'requires',
59 'version',
60]
61
62
63class PackageNotFoundError(ModuleNotFoundError):
64 """The package was not found."""
65
66 def __str__(self) -> str:
67 return f"No package metadata was found for {self.name}"
68
69 @property
70 def name(self) -> str: # type: ignore[override] # make readonly
71 (name,) = self.args
72 return name
73
74
75class MetadataNotFound(FileNotFoundError):
76 """No metadata file is present in the distribution."""
77
78
79class Sectioned:
80 """
81 A simple entry point config parser for performance
82
83 >>> for item in Sectioned.read(Sectioned._sample):
84 ... print(item)
85 Pair(name='sec1', value='# comments ignored')
86 Pair(name='sec1', value='a = 1')
87 Pair(name='sec1', value='b = 2')
88 Pair(name='sec2', value='a = 2')
89
90 >>> res = Sectioned.section_pairs(Sectioned._sample)
91 >>> item = next(res)
92 >>> item.name
93 'sec1'
94 >>> item.value
95 Pair(name='a', value='1')
96 >>> item = next(res)
97 >>> item.value
98 Pair(name='b', value='2')
99 >>> item = next(res)
100 >>> item.name
101 'sec2'
102 >>> item.value
103 Pair(name='a', value='2')
104 >>> list(res)
105 []
106 """
107
108 _sample = textwrap.dedent(
109 """
110 [sec1]
111 # comments ignored
112 a = 1
113 b = 2
114
115 [sec2]
116 a = 2
117 """
118 ).lstrip()
119
120 @classmethod
121 def section_pairs(cls, text):
122 return (
123 section._replace(value=Pair.parse(section.value))
124 for section in cls.read(text, filter_=cls.valid)
125 if section.name is not None
126 )
127
128 @staticmethod
129 def read(text, filter_=None):
130 lines = filter(filter_, map(str.strip, text.splitlines()))
131 name = None
132 for value in lines:
133 section_match = value.startswith('[') and value.endswith(']')
134 if section_match:
135 name = value.strip('[]')
136 continue
137 yield Pair(name, value)
138
139 @staticmethod
140 def valid(line: str):
141 return line and not line.startswith('#')
142
143
144class _EntryPointMatch(types.SimpleNamespace):
145 module: str
146 attr: str
147 extras: str
148
149
150class EntryPoint:
151 """An entry point as defined by Python packaging conventions.
152
153 See `the packaging docs on entry points
154 <https://packaging.python.org/specifications/entry-points/>`_
155 for more information.
156
157 >>> ep = EntryPoint(
158 ... name=None, group=None, value='package.module:attr [extra1, extra2]')
159 >>> ep.module
160 'package.module'
161 >>> ep.attr
162 'attr'
163 >>> ep.extras
164 ['extra1', 'extra2']
165
166 If the value package or module are not valid identifiers, a
167 ValueError is raised on access.
168
169 >>> EntryPoint(name=None, group=None, value='invalid-name').module
170 Traceback (most recent call last):
171 ...
172 ValueError: ('Invalid object reference...invalid-name...
173 >>> EntryPoint(name=None, group=None, value='invalid-name').attr
174 Traceback (most recent call last):
175 ...
176 ValueError: ('Invalid object reference...invalid-name...
177 >>> EntryPoint(name=None, group=None, value='invalid-name').extras
178 Traceback (most recent call last):
179 ...
180 ValueError: ('Invalid object reference...invalid-name...
181
182 The same thing happens on construction.
183
184 >>> EntryPoint(name=None, group=None, value='invalid-name')
185 Traceback (most recent call last):
186 ...
187 ValueError: ('Invalid object reference...invalid-name...
188
189 """
190
191 pattern = re.compile(
192 r'(?P<module>[\w.]+)\s*'
193 r'(:\s*(?P<attr>[\w.]+)\s*)?'
194 r'((?P<extras>\[.*\])\s*)?$'
195 )
196 """
197 A regular expression describing the syntax for an entry point,
198 which might look like:
199
200 - module
201 - package.module
202 - package.module:attribute
203 - package.module:object.attribute
204 - package.module:attr [extra1, extra2]
205
206 Other combinations are possible as well.
207
208 The expression is lenient about whitespace around the ':',
209 following the attr, and following any extras.
210 """
211
212 name: str
213 value: str
214 group: str
215
216 dist: Distribution | None = None
217
218 def __init__(self, name: str, value: str, group: str) -> None:
219 vars(self).update(name=name, value=value, group=group)
220 self.module
221
222 def load(self) -> Any:
223 """Load the entry point from its definition. If only a module
224 is indicated by the value, return that module. Otherwise,
225 return the named object.
226 """
227 module = import_module(self.module)
228 attrs = filter(None, (self.attr or '').split('.'))
229 return functools.reduce(getattr, attrs, module)
230
231 @property
232 def module(self) -> str:
233 return self._match.module
234
235 @property
236 def attr(self) -> str:
237 return self._match.attr
238
239 @property
240 def extras(self) -> list[str]:
241 return re.findall(r'\w+', self._match.extras or '')
242
243 @functools.cached_property
244 def _match(self) -> _EntryPointMatch:
245 match = self.pattern.match(self.value)
246 if not match:
247 raise ValueError(
248 'Invalid object reference. '
249 'See https://packaging.python.org'
250 '/en/latest/specifications/entry-points/#data-model',
251 self.value,
252 )
253 return _EntryPointMatch(**match.groupdict())
254
255 def _for(self, dist):
256 vars(self).update(dist=dist)
257 return self
258
259 def matches(self, **params):
260 """
261 EntryPoint matches the given parameters.
262
263 >>> ep = EntryPoint(group='foo', name='bar', value='bing:bong [extra1, extra2]')
264 >>> ep.matches(group='foo')
265 True
266 >>> ep.matches(name='bar', value='bing:bong [extra1, extra2]')
267 True
268 >>> ep.matches(group='foo', name='other')
269 False
270 >>> ep.matches()
271 True
272 >>> ep.matches(extras=['extra1', 'extra2'])
273 True
274 >>> ep.matches(module='bing')
275 True
276 >>> ep.matches(attr='bong')
277 True
278 """
279 self._disallow_dist(params)
280 attrs = (getattr(self, param) for param in params)
281 return all(map(operator.eq, params.values(), attrs))
282
283 @staticmethod
284 def _disallow_dist(params):
285 """
286 Querying by dist is not allowed (dist objects are not comparable).
287 >>> EntryPoint(name='fan', value='fav', group='fag').matches(dist='foo')
288 Traceback (most recent call last):
289 ...
290 ValueError: "dist" is not suitable for matching...
291 """
292 if "dist" in params:
293 raise ValueError(
294 '"dist" is not suitable for matching. '
295 "Instead, use Distribution.entry_points.select() on a "
296 "located distribution."
297 )
298
299 def _key(self):
300 return self.name, self.value, self.group
301
302 def __lt__(self, other):
303 return self._key() < other._key()
304
305 def __eq__(self, other):
306 return self._key() == other._key()
307
308 def __setattr__(self, name, value):
309 raise AttributeError("EntryPoint objects are immutable.")
310
311 def __repr__(self):
312 return (
313 f'EntryPoint(name={self.name!r}, value={self.value!r}, '
314 f'group={self.group!r})'
315 )
316
317 def __hash__(self) -> int:
318 return hash(self._key())
319
320
321class EntryPoints(tuple):
322 """
323 An immutable collection of selectable EntryPoint objects.
324 """
325
326 __slots__ = ()
327
328 def __getitem__(self, name: str) -> EntryPoint: # type: ignore[override] # Work with str instead of int
329 """
330 Get the EntryPoint in self matching name.
331 """
332 try:
333 return next(iter(self.select(name=name)))
334 except StopIteration:
335 raise KeyError(name)
336
337 def __repr__(self):
338 """
339 Repr with classname and tuple constructor to
340 signal that we deviate from regular tuple behavior.
341 """
342 return '%s(%r)' % (self.__class__.__name__, tuple(self))
343
344 def select(self, **params) -> EntryPoints:
345 """
346 Select entry points from self that match the
347 given parameters (typically group and/or name).
348 """
349 return EntryPoints(ep for ep in self if ep.matches(**params))
350
351 @property
352 def names(self) -> set[str]:
353 """
354 Return the set of all names of all entry points.
355 """
356 return {ep.name for ep in self}
357
358 @property
359 def groups(self) -> set[str]:
360 """
361 Return the set of all groups of all entry points.
362 """
363 return {ep.group for ep in self}
364
365 @classmethod
366 def _from_text_for(cls, text, dist):
367 return cls(ep._for(dist) for ep in cls._from_text(text))
368
369 @staticmethod
370 def _from_text(text):
371 return (
372 EntryPoint(name=item.value.name, value=item.value.value, group=item.name)
373 for item in Sectioned.section_pairs(text or '')
374 )
375
376
377class PackagePath(pathlib.PurePosixPath):
378 """A reference to a path in a package"""
379
380 hash: FileHash | None
381 size: int
382 dist: Distribution
383
384 def read_text(self, encoding: str = 'utf-8') -> str:
385 return self.locate().read_text(encoding=encoding)
386
387 def read_binary(self) -> bytes:
388 return self.locate().read_bytes()
389
390 def locate(self) -> SimplePath:
391 """Return a path-like object for this path"""
392 return self.dist.locate_file(self)
393
394
395class FileHash:
396 def __init__(self, spec: str) -> None:
397 self.mode, _, self.value = spec.partition('=')
398
399 def __repr__(self) -> str:
400 return f'<FileHash mode: {self.mode} value: {self.value}>'
401
402
403class Distribution(metaclass=abc.ABCMeta):
404 """
405 An abstract Python distribution package.
406
407 Custom providers may derive from this class and define
408 the abstract methods to provide a concrete implementation
409 for their environment. Some providers may opt to override
410 the default implementation of some properties to bypass
411 the file-reading mechanism.
412 """
413
414 @abc.abstractmethod
415 def read_text(self, filename) -> str | None:
416 """Attempt to load metadata file given by the name.
417
418 Python distribution metadata is organized by blobs of text
419 typically represented as "files" in the metadata directory
420 (e.g. package-1.0.dist-info). These files include things
421 like:
422
423 - METADATA: The distribution metadata including fields
424 like Name and Version and Description.
425 - entry_points.txt: A series of entry points as defined in
426 `the entry points spec <https://packaging.python.org/en/latest/specifications/entry-points/#file-format>`_.
427 - RECORD: A record of files according to
428 `this recording spec <https://packaging.python.org/en/latest/specifications/recording-installed-packages/#the-record-file>`_.
429
430 A package may provide any set of files, including those
431 not listed here or none at all.
432
433 :param filename: The name of the file in the distribution info.
434 :return: The text if found, otherwise None.
435 """
436
437 @abc.abstractmethod
438 def locate_file(self, path: str | os.PathLike[str]) -> SimplePath:
439 """
440 Given a path to a file in this distribution, return a SimplePath
441 to it.
442
443 This method is used by callers of ``Distribution.files()`` to
444 locate files within the distribution. If it's possible for a
445 Distribution to represent files in the distribution as
446 ``SimplePath`` objects, it should implement this method
447 to resolve such objects.
448
449 Some Distribution providers may elect not to resolve SimplePath
450 objects within the distribution by raising a
451 NotImplementedError, but consumers of such a Distribution would
452 be unable to invoke ``Distribution.files()``.
453 """
454
455 @classmethod
456 def from_name(cls, name: str) -> Distribution:
457 """Return the Distribution for the given package name.
458
459 :param name: The name of the distribution package to search for.
460 :return: The Distribution instance (or subclass thereof) for the named
461 package, if found.
462 :raises PackageNotFoundError: When the named package's distribution
463 metadata cannot be found.
464 :raises ValueError: When an invalid value is supplied for name.
465 """
466 if not name:
467 raise ValueError("A distribution name is required.")
468 try:
469 return next(iter(cls._prefer_valid(cls.discover(name=name))))
470 except StopIteration:
471 raise PackageNotFoundError(name) from None
472
473 @classmethod
474 def discover(
475 cls, *, context: DistributionFinder.Context | None = None, **kwargs
476 ) -> Iterable[Distribution]:
477 """Return an iterable of Distribution objects for all packages.
478
479 Pass a ``context`` or pass keyword arguments for constructing
480 a context.
481
482 :context: A ``DistributionFinder.Context`` object.
483 :return: Iterable of Distribution objects for packages matching
484 the context.
485 """
486 if context and kwargs:
487 raise ValueError("cannot accept context and kwargs")
488 context = context or DistributionFinder.Context(**kwargs)
489 return itertools.chain.from_iterable(
490 resolver(context) for resolver in cls._discover_resolvers()
491 )
492
493 @staticmethod
494 def _prefer_valid(dists: Iterable[Distribution]) -> Iterable[Distribution]:
495 """
496 Prefer (move to the front) distributions that have metadata.
497
498 Ref python/importlib_resources#489.
499 """
500
501 has_metadata = ExceptionTrap(MetadataNotFound).passes(
502 operator.attrgetter('metadata')
503 )
504
505 buckets = bucket(dists, has_metadata)
506 return itertools.chain(buckets[True], buckets[False])
507
508 @staticmethod
509 def at(path: str | os.PathLike[str]) -> Distribution:
510 """Return a Distribution for the indicated metadata path.
511
512 :param path: a string or path-like object
513 :return: a concrete Distribution instance for the path
514 """
515 return PathDistribution(pathlib.Path(path))
516
517 @staticmethod
518 def _discover_resolvers():
519 """Search the meta_path for resolvers (MetadataPathFinders)."""
520 declared = (
521 getattr(finder, 'find_distributions', None) for finder in sys.meta_path
522 )
523 return filter(None, declared)
524
525 @property
526 def metadata(self) -> _meta.PackageMetadata:
527 """Return the parsed metadata for this Distribution.
528
529 The returned object will have keys that name the various bits of
530 metadata per the
531 `Core metadata specifications <https://packaging.python.org/en/latest/specifications/core-metadata/#core-metadata>`_.
532
533 Custom providers may provide the METADATA file or override this
534 property.
535
536 :raises MetadataNotFound: If no metadata file is present.
537 """
538
539 text = (
540 self.read_text('METADATA')
541 or self.read_text('PKG-INFO')
542 # This last clause is here to support old egg-info files. Its
543 # effect is to just end up using the PathDistribution's self._path
544 # (which points to the egg-info file) attribute unchanged.
545 or self.read_text('')
546 )
547 return self._assemble_message(self._ensure_metadata_present(text))
548
549 @staticmethod
550 def _assemble_message(text: str) -> _meta.PackageMetadata:
551 # deferred for performance (python/cpython#109829)
552 from . import _adapters
553
554 return _adapters.Message(email.message_from_string(text))
555
556 def _ensure_metadata_present(self, text: str | None) -> str:
557 if text is not None:
558 return text
559
560 raise MetadataNotFound('No package metadata was found.')
561
562 @property
563 def name(self) -> str:
564 """Return the 'Name' metadata for the distribution package."""
565 return self.metadata['Name']
566
567 @property
568 def _normalized_name(self):
569 """Return a normalized version of the name."""
570 return Prepared.normalize(self.name)
571
572 @property
573 def version(self) -> str:
574 """Return the 'Version' metadata for the distribution package."""
575 return self.metadata['Version']
576
577 @property
578 def entry_points(self) -> EntryPoints:
579 """
580 Return EntryPoints for this distribution.
581
582 Custom providers may provide the ``entry_points.txt`` file
583 or override this property.
584 """
585 return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self)
586
587 @property
588 def files(self) -> list[PackagePath] | None:
589 """Files in this distribution.
590
591 :return: List of PackagePath for this distribution or None
592
593 Result is `None` if the metadata file that enumerates files
594 (i.e. RECORD for dist-info, or installed-files.txt or
595 SOURCES.txt for egg-info) is missing.
596 Result may be empty if the metadata exists but is empty.
597
598 Custom providers are recommended to provide a "RECORD" file (in
599 ``read_text``) or override this property to allow for callers to be
600 able to resolve filenames provided by the package.
601 """
602
603 def make_file(name, hash=None, size_str=None):
604 result = PackagePath(name)
605 result.hash = FileHash(hash) if hash else None
606 result.size = int(size_str) if size_str else None
607 result.dist = self
608 return result
609
610 @pass_none
611 def make_files(lines):
612 # Delay csv import, since Distribution.files is not as widely used
613 # as other parts of importlib.metadata
614 import csv
615
616 return starmap(make_file, csv.reader(lines))
617
618 @pass_none
619 def skip_missing_files(package_paths):
620 return list(filter(lambda path: path.locate().exists(), package_paths))
621
622 return skip_missing_files(
623 make_files(
624 self._read_files_distinfo()
625 or self._read_files_egginfo_installed()
626 or self._read_files_egginfo_sources()
627 )
628 )
629
630 def _read_files_distinfo(self):
631 """
632 Read the lines of RECORD.
633 """
634 text = self.read_text('RECORD')
635 return text and text.splitlines()
636
637 def _read_files_egginfo_installed(self):
638 """
639 Read installed-files.txt and return lines in a similar
640 CSV-parsable format as RECORD: each file must be placed
641 relative to the site-packages directory and must also be
642 quoted (since file names can contain literal commas).
643
644 This file is written when the package is installed by pip,
645 but it might not be written for other installation methods.
646 Assume the file is accurate if it exists.
647 """
648 text = self.read_text('installed-files.txt')
649 # Prepend the .egg-info/ subdir to the lines in this file.
650 # But this subdir is only available from PathDistribution's
651 # self._path.
652 subdir = getattr(self, '_path', None)
653 if not text or not subdir:
654 return
655
656 paths = (
657 py311
658 .relative_fix((subdir / name).resolve())
659 .relative_to(self.locate_file('').resolve(), walk_up=True)
660 .as_posix()
661 for name in text.splitlines()
662 )
663 return map('"{}"'.format, paths)
664
665 def _read_files_egginfo_sources(self):
666 """
667 Read SOURCES.txt and return lines in a similar CSV-parsable
668 format as RECORD: each file name must be quoted (since it
669 might contain literal commas).
670
671 Note that SOURCES.txt is not a reliable source for what
672 files are installed by a package. This file is generated
673 for a source archive, and the files that are present
674 there (e.g. setup.py) may not correctly reflect the files
675 that are present after the package has been installed.
676 """
677 text = self.read_text('SOURCES.txt')
678 return text and map('"{}"'.format, text.splitlines())
679
680 @property
681 def requires(self) -> list[str] | None:
682 """Generated requirements specified for this Distribution"""
683 reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
684 return reqs and list(reqs)
685
686 def _read_dist_info_reqs(self):
687 return self.metadata.get_all('Requires-Dist')
688
689 def _read_egg_info_reqs(self):
690 source = self.read_text('requires.txt')
691 return pass_none(self._deps_from_requires_text)(source)
692
693 @classmethod
694 def _deps_from_requires_text(cls, source):
695 return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source))
696
697 @staticmethod
698 def _convert_egg_info_reqs_to_simple_reqs(sections):
699 """
700 Historically, setuptools would solicit and store 'extra'
701 requirements, including those with environment markers,
702 in separate sections. More modern tools expect each
703 dependency to be defined separately, with any relevant
704 extras and environment markers attached directly to that
705 requirement. This method converts the former to the
706 latter. See _test_deps_from_requires_text for an example.
707 """
708
709 def make_condition(name):
710 return name and f'extra == "{name}"'
711
712 def quoted_marker(section):
713 section = section or ''
714 extra, sep, markers = section.partition(':')
715 if extra and markers:
716 markers = f'({markers})'
717 conditions = list(filter(None, [markers, make_condition(extra)]))
718 return '; ' + ' and '.join(conditions) if conditions else ''
719
720 def url_req_space(req):
721 """
722 PEP 508 requires a space between the url_spec and the quoted_marker.
723 Ref python/importlib_metadata#357.
724 """
725 # '@' is uniquely indicative of a url_req.
726 return ' ' * ('@' in req)
727
728 for section in sections:
729 space = url_req_space(section.value)
730 yield section.value + space + quoted_marker(section.name)
731
732 @property
733 def origin(self):
734 return self._load_json('direct_url.json')
735
736 def _load_json(self, filename):
737 # Deferred for performance (python/importlib_metadata#503)
738 import json
739
740 return pass_none(json.loads)(
741 self.read_text(filename),
742 object_hook=lambda data: types.SimpleNamespace(**data),
743 )
744
745
746class DistributionFinder(MetaPathFinder):
747 """
748 A MetaPathFinder capable of discovering installed distributions.
749
750 Custom providers should implement this interface in order to
751 supply metadata.
752 """
753
754 class Context:
755 """
756 Keyword arguments presented by the caller to
757 ``distributions()`` or ``Distribution.discover()``
758 to narrow the scope of a search for distributions
759 in all DistributionFinders.
760
761 Each DistributionFinder may expect any parameters
762 and should attempt to honor the canonical
763 parameters defined below when appropriate.
764
765 This mechanism gives a custom provider a means to
766 solicit additional details from the caller beyond
767 "name" and "path" when searching distributions.
768 For example, imagine a provider that exposes suites
769 of packages in either a "public" or "private" ``realm``.
770 A caller may wish to query only for distributions in
771 a particular realm and could call
772 ``distributions(realm="private")`` to signal to the
773 custom provider to only include distributions from that
774 realm.
775 """
776
777 name = None
778 """
779 Specific name for which a distribution finder should match.
780 A name of ``None`` matches all distributions.
781 """
782
783 def __init__(self, **kwargs):
784 vars(self).update(kwargs)
785
786 @property
787 def path(self) -> list[str]:
788 """
789 The sequence of directory path that a distribution finder
790 should search.
791
792 Typically refers to Python installed package paths such as
793 "site-packages" directories and defaults to ``sys.path``.
794 """
795 return vars(self).get('path', sys.path)
796
797 @abc.abstractmethod
798 def find_distributions(self, context=Context()) -> Iterable[Distribution]:
799 """
800 Find distributions.
801
802 Return an iterable of all Distribution instances capable of
803 loading the metadata for packages matching the ``context``,
804 a DistributionFinder.Context instance.
805 """
806
807
808@passthrough
809def _clear_after_fork(cached):
810 """Ensure ``func`` clears cached state after ``fork`` when supported.
811
812 ``FastPath`` caches zip-backed ``pathlib.Path`` objects that retain a
813 reference to the parent's open ``ZipFile`` handle. Re-using a cached
814 instance in a forked child can therefore resurrect invalid file pointers
815 and trigger ``BadZipFile``/``OSError`` failures (python/importlib_metadata#520).
816 Registering ``cache_clear`` with ``os.register_at_fork`` keeps each process
817 on its own cache.
818 """
819 getattr(os, 'register_at_fork', noop)(after_in_child=cached.cache_clear)
820
821
822class FastPath:
823 """
824 Micro-optimized class for searching a root for children.
825
826 Root is a path on the file system that may contain metadata
827 directories either as natural directories or within a zip file.
828
829 >>> FastPath('').children()
830 ['...']
831
832 FastPath objects are cached and recycled for any given root.
833
834 >>> FastPath('foobar') is FastPath('foobar')
835 True
836 """
837
838 @_clear_after_fork # type: ignore[misc]
839 @functools.lru_cache()
840 def __new__(cls, root):
841 return super().__new__(cls)
842
843 def __init__(self, root):
844 self.root = root
845
846 def joinpath(self, child):
847 return pathlib.Path(self.root, child)
848
849 def children(self):
850 with suppress(Exception):
851 return os.listdir(self.root or '.')
852 with suppress(Exception):
853 return self.zip_children()
854 return []
855
856 def zip_children(self):
857 # deferred for performance (python/importlib_metadata#502)
858 from zipp.compat.overlay import zipfile
859
860 zip_path = zipfile.Path(self.root)
861 names = zip_path.root.namelist()
862 self.joinpath = zip_path.joinpath
863
864 return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
865
866 def search(self, name):
867 return self.lookup(self.mtime).search(name)
868
869 @property
870 def mtime(self):
871 with suppress(OSError):
872 return os.stat(self.root).st_mtime
873 self.lookup.cache_clear()
874
875 @method_cache
876 def lookup(self, mtime):
877 return Lookup(self)
878
879
880class Lookup:
881 """
882 A micro-optimized class for searching a (fast) path for metadata.
883 """
884
885 def __init__(self, path: FastPath):
886 """
887 Calculate all of the children representing metadata.
888
889 From the children in the path, calculate early all of the
890 children that appear to represent metadata (infos) or legacy
891 metadata (eggs).
892 """
893
894 base = os.path.basename(path.root).lower()
895 base_is_egg = base.endswith(".egg")
896 self.infos = FreezableDefaultDict(list)
897 self.eggs = FreezableDefaultDict(list)
898
899 for child in path.children():
900 low = child.lower()
901 if low.endswith((".dist-info", ".egg-info")):
902 # rpartition is faster than splitext and suitable for this purpose.
903 name = low.rpartition(".")[0].partition("-")[0]
904 normalized = Prepared.normalize(name)
905 self.infos[normalized].append(path.joinpath(child))
906 elif base_is_egg and low == "egg-info":
907 name = base.rpartition(".")[0].partition("-")[0]
908 legacy_normalized = Prepared.legacy_normalize(name)
909 self.eggs[legacy_normalized].append(path.joinpath(child))
910
911 self.infos.freeze()
912 self.eggs.freeze()
913
914 def search(self, prepared: Prepared):
915 """
916 Yield all infos and eggs matching the Prepared query.
917 """
918 infos = (
919 self.infos[prepared.normalized]
920 if prepared
921 else itertools.chain.from_iterable(self.infos.values())
922 )
923 eggs = (
924 self.eggs[prepared.legacy_normalized]
925 if prepared
926 else itertools.chain.from_iterable(self.eggs.values())
927 )
928 return itertools.chain(infos, eggs)
929
930
931class Prepared:
932 """
933 A prepared search query for metadata on a possibly-named package.
934
935 Pre-calculates the normalization to prevent repeated operations.
936
937 >>> none = Prepared(None)
938 >>> none.normalized
939 >>> none.legacy_normalized
940 >>> bool(none)
941 False
942 >>> sample = Prepared('Sample__Pkg-name.foo')
943 >>> sample.normalized
944 'sample_pkg_name_foo'
945 >>> sample.legacy_normalized
946 'sample__pkg_name.foo'
947 >>> bool(sample)
948 True
949 """
950
951 normalized = None
952 legacy_normalized = None
953
954 def __init__(self, name: str | None):
955 self.name = name
956 if name is None:
957 return
958 self.normalized = self.normalize(name)
959 self.legacy_normalized = self.legacy_normalize(name)
960
961 @staticmethod
962 def normalize(name):
963 """
964 PEP 503 normalization plus dashes as underscores.
965
966 Specifically avoids ``re.sub`` as prescribed for performance
967 benefits (see python/cpython#143658).
968 """
969 value = name.lower().replace("-", "_").replace(".", "_")
970 # Condense repeats
971 while "__" in value:
972 value = value.replace("__", "_")
973 return value
974
975 @staticmethod
976 def legacy_normalize(name):
977 """
978 Normalize the package name as found in the convention in
979 older packaging tools versions and specs.
980 """
981 return name.lower().replace('-', '_')
982
983 def __bool__(self):
984 return bool(self.name)
985
986
987@install
988class MetadataPathFinder(NullFinder, DistributionFinder):
989 """A degenerate finder for distribution packages on the file system.
990
991 This finder supplies only a find_distributions() method for versions
992 of Python that do not have a PathFinder find_distributions().
993 """
994
995 @classmethod
996 def find_distributions(
997 cls, context=DistributionFinder.Context()
998 ) -> Iterable[PathDistribution]:
999 """
1000 Find distributions.
1001
1002 Return an iterable of all Distribution instances capable of
1003 loading the metadata for packages matching ``context.name``
1004 (or all names if ``None`` indicated) along the paths in the list
1005 of directories ``context.path``.
1006 """
1007 found = cls._search_paths(context.name, context.path)
1008 return map(PathDistribution, found)
1009
1010 @classmethod
1011 def _search_paths(cls, name, paths):
1012 """Find metadata directories in paths heuristically."""
1013 prepared = Prepared(name)
1014 return itertools.chain.from_iterable(
1015 path.search(prepared) for path in map(FastPath, paths)
1016 )
1017
1018 @classmethod
1019 def invalidate_caches(cls) -> None:
1020 FastPath.__new__.cache_clear()
1021
1022
1023class PathDistribution(Distribution):
1024 def __init__(self, path: SimplePath) -> None:
1025 """Construct a distribution.
1026
1027 :param path: SimplePath indicating the metadata directory.
1028 """
1029 self._path = path
1030
1031 def read_text(self, filename: str | os.PathLike[str]) -> str | None:
1032 with suppress(
1033 FileNotFoundError,
1034 IsADirectoryError,
1035 KeyError,
1036 NotADirectoryError,
1037 PermissionError,
1038 ):
1039 return self._path.joinpath(filename).read_text(encoding='utf-8')
1040
1041 return None
1042
1043 read_text.__doc__ = Distribution.read_text.__doc__
1044
1045 def locate_file(self, path: str | os.PathLike[str]) -> SimplePath:
1046 return self._path.parent / path
1047
1048 @property
1049 def _normalized_name(self):
1050 """
1051 Performance optimization: where possible, resolve the
1052 normalized name from the file system path.
1053 """
1054 stem = os.path.basename(str(self._path))
1055 return (
1056 pass_none(Prepared.normalize)(self._name_from_stem(stem))
1057 or super()._normalized_name
1058 )
1059
1060 @staticmethod
1061 def _name_from_stem(stem):
1062 """
1063 >>> PathDistribution._name_from_stem('foo-3.0.egg-info')
1064 'foo'
1065 >>> PathDistribution._name_from_stem('CherryPy-3.0.dist-info')
1066 'CherryPy'
1067 >>> PathDistribution._name_from_stem('face.egg-info')
1068 'face'
1069 >>> PathDistribution._name_from_stem('foo.bar')
1070 """
1071 filename, ext = os.path.splitext(stem)
1072 if ext not in ('.dist-info', '.egg-info'):
1073 return
1074 name, sep, rest = filename.partition('-')
1075 return name
1076
1077
1078def distribution(distribution_name: str) -> Distribution:
1079 """Get the ``Distribution`` instance for the named package.
1080
1081 :param distribution_name: The name of the distribution package as a string.
1082 :return: A ``Distribution`` instance (or subclass thereof).
1083 """
1084 return Distribution.from_name(distribution_name)
1085
1086
1087def distributions(**kwargs) -> Iterable[Distribution]:
1088 """Get all ``Distribution`` instances in the current environment.
1089
1090 :return: An iterable of ``Distribution`` instances.
1091 """
1092 return Distribution.discover(**kwargs)
1093
1094
1095def metadata(distribution_name: str) -> _meta.PackageMetadata:
1096 """Get the metadata for the named package.
1097
1098 :param distribution_name: The name of the distribution package to query.
1099 :return: A PackageMetadata containing the parsed metadata.
1100 :raises MetadataNotFound: If no metadata file is present in the distribution.
1101 """
1102 return Distribution.from_name(distribution_name).metadata
1103
1104
1105def version(distribution_name: str) -> str:
1106 """Get the version string for the named package.
1107
1108 :param distribution_name: The name of the distribution package to query.
1109 :return: The version string for the package as defined in the package's
1110 "Version" metadata key.
1111 """
1112 return distribution(distribution_name).version
1113
1114
1115_unique = functools.partial(
1116 unique_everseen,
1117 key=operator.attrgetter('_normalized_name'),
1118)
1119"""
1120Wrapper for ``distributions`` to return unique distributions by name.
1121"""
1122
1123
1124def entry_points(**params) -> EntryPoints:
1125 """Return EntryPoint objects for all installed packages.
1126
1127 Pass selection parameters (group or name) to filter the
1128 result to entry points matching those properties (see
1129 EntryPoints.select()).
1130
1131 :return: EntryPoints for all installed packages.
1132 """
1133 eps = itertools.chain.from_iterable(
1134 dist.entry_points for dist in _unique(distributions())
1135 )
1136 return EntryPoints(eps).select(**params)
1137
1138
1139def files(distribution_name: str) -> list[PackagePath] | None:
1140 """Return a list of files for the named package.
1141
1142 :param distribution_name: The name of the distribution package to query.
1143 :return: List of files composing the distribution.
1144 """
1145 return distribution(distribution_name).files
1146
1147
1148def requires(distribution_name: str) -> list[str] | None:
1149 """
1150 Return a list of requirements for the named package.
1151
1152 :return: An iterable of requirements, suitable for
1153 packaging.requirement.Requirement.
1154 """
1155 return distribution(distribution_name).requires
1156
1157
1158def packages_distributions() -> Mapping[str, list[str]]:
1159 """
1160 Return a mapping of top-level packages to their
1161 distributions.
1162
1163 >>> import collections.abc
1164 >>> pkgs = packages_distributions()
1165 >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values())
1166 True
1167 """
1168 pkg_to_dist = collections.defaultdict(list)
1169 for dist in distributions():
1170 for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
1171 pkg_to_dist[pkg].append(dist.metadata['Name'])
1172 return dict(pkg_to_dist)
1173
1174
1175def _top_level_declared(dist):
1176 return (dist.read_text('top_level.txt') or '').split()
1177
1178
1179def _topmost(name: PackagePath) -> str | None:
1180 """
1181 Return the top-most parent as long as there is a parent.
1182 """
1183 top, *rest = name.parts
1184 return top if rest else None
1185
1186
1187def _get_toplevel_name(name: PackagePath) -> str:
1188 """
1189 Infer a possibly importable module name from a name presumed on
1190 sys.path.
1191
1192 >>> _get_toplevel_name(PackagePath('foo.py'))
1193 'foo'
1194 >>> _get_toplevel_name(PackagePath('foo'))
1195 'foo'
1196 >>> _get_toplevel_name(PackagePath('foo.pyc'))
1197 'foo'
1198 >>> _get_toplevel_name(PackagePath('foo/__init__.py'))
1199 'foo'
1200 >>> _get_toplevel_name(PackagePath('foo.pth'))
1201 'foo.pth'
1202 >>> _get_toplevel_name(PackagePath('foo.dist-info'))
1203 'foo.dist-info'
1204 """
1205 # Defer import of inspect for performance (python/cpython#118761)
1206 import inspect
1207
1208 return _topmost(name) or inspect.getmodulename(name) or str(name)
1209
1210
1211def _top_level_inferred(dist):
1212 opt_names = set(map(_get_toplevel_name, always_iterable(dist.files)))
1213
1214 def importable_name(name):
1215 return '.' not in name
1216
1217 return filter(importable_name, opt_names)