1"""
2APIs exposing metadata from third-party Python packages.
3
4This codebase is shared between importlib.metadata in the stdlib
5and importlib_metadata in PyPI. See
6https://github.com/python/importlib_metadata/wiki/Development-Methodology
7for more detail.
8"""
9
10from __future__ import annotations
11
12import abc
13import collections
14import email
15import functools
16import itertools
17import operator
18import os
19import pathlib
20import posixpath
21import re
22import sys
23import textwrap
24import types
25from collections.abc import Iterable, Mapping
26from contextlib import suppress
27from importlib import import_module
28from importlib.abc import MetaPathFinder
29from itertools import starmap
30from typing import Any
31
32from . import _meta
33from ._collections import FreezableDefaultDict, Pair
34from ._compat import (
35 NullFinder,
36 install,
37)
38from ._functools import method_cache, noop, pass_none, passthrough
39from ._itertools import always_iterable, bucket, unique_everseen
40from ._meta import PackageMetadata, SimplePath
41from ._typing import md_none
42from .compat import py39, py311
43
44__all__ = [
45 'Distribution',
46 'DistributionFinder',
47 'PackageMetadata',
48 'PackageNotFoundError',
49 'SimplePath',
50 'distribution',
51 'distributions',
52 'entry_points',
53 'files',
54 'metadata',
55 'packages_distributions',
56 'requires',
57 'version',
58]
59
60
61class PackageNotFoundError(ModuleNotFoundError):
62 """The package was not found."""
63
64 def __str__(self) -> str:
65 return f"No package metadata was found for {self.name}"
66
67 @property
68 def name(self) -> str: # type: ignore[override] # make readonly
69 (name,) = self.args
70 return name
71
72
73class Sectioned:
74 """
75 A simple entry point config parser for performance
76
77 >>> for item in Sectioned.read(Sectioned._sample):
78 ... print(item)
79 Pair(name='sec1', value='# comments ignored')
80 Pair(name='sec1', value='a = 1')
81 Pair(name='sec1', value='b = 2')
82 Pair(name='sec2', value='a = 2')
83
84 >>> res = Sectioned.section_pairs(Sectioned._sample)
85 >>> item = next(res)
86 >>> item.name
87 'sec1'
88 >>> item.value
89 Pair(name='a', value='1')
90 >>> item = next(res)
91 >>> item.value
92 Pair(name='b', value='2')
93 >>> item = next(res)
94 >>> item.name
95 'sec2'
96 >>> item.value
97 Pair(name='a', value='2')
98 >>> list(res)
99 []
100 """
101
102 _sample = textwrap.dedent(
103 """
104 [sec1]
105 # comments ignored
106 a = 1
107 b = 2
108
109 [sec2]
110 a = 2
111 """
112 ).lstrip()
113
114 @classmethod
115 def section_pairs(cls, text):
116 return (
117 section._replace(value=Pair.parse(section.value))
118 for section in cls.read(text, filter_=cls.valid)
119 if section.name is not None
120 )
121
122 @staticmethod
123 def read(text, filter_=None):
124 lines = filter(filter_, map(str.strip, text.splitlines()))
125 name = None
126 for value in lines:
127 section_match = value.startswith('[') and value.endswith(']')
128 if section_match:
129 name = value.strip('[]')
130 continue
131 yield Pair(name, value)
132
133 @staticmethod
134 def valid(line: str):
135 return line and not line.startswith('#')
136
137
138class _EntryPointMatch(types.SimpleNamespace):
139 module: str
140 attr: str
141 extras: str
142
143
144class EntryPoint:
145 """An entry point as defined by Python packaging conventions.
146
147 See `the packaging docs on entry points
148 <https://packaging.python.org/specifications/entry-points/>`_
149 for more information.
150
151 >>> ep = EntryPoint(
152 ... name=None, group=None, value='package.module:attr [extra1, extra2]')
153 >>> ep.module
154 'package.module'
155 >>> ep.attr
156 'attr'
157 >>> ep.extras
158 ['extra1', 'extra2']
159
160 If the value package or module are not valid identifiers, a
161 ValueError is raised on access.
162
163 >>> EntryPoint(name=None, group=None, value='invalid-name').module
164 Traceback (most recent call last):
165 ...
166 ValueError: ('Invalid object reference...invalid-name...
167 >>> EntryPoint(name=None, group=None, value='invalid-name').attr
168 Traceback (most recent call last):
169 ...
170 ValueError: ('Invalid object reference...invalid-name...
171 >>> EntryPoint(name=None, group=None, value='invalid-name').extras
172 Traceback (most recent call last):
173 ...
174 ValueError: ('Invalid object reference...invalid-name...
175
176 The same thing happens on construction.
177
178 >>> EntryPoint(name=None, group=None, value='invalid-name')
179 Traceback (most recent call last):
180 ...
181 ValueError: ('Invalid object reference...invalid-name...
182
183 """
184
185 pattern = re.compile(
186 r'(?P<module>[\w.]+)\s*'
187 r'(:\s*(?P<attr>[\w.]+)\s*)?'
188 r'((?P<extras>\[.*\])\s*)?$'
189 )
190 """
191 A regular expression describing the syntax for an entry point,
192 which might look like:
193
194 - module
195 - package.module
196 - package.module:attribute
197 - package.module:object.attribute
198 - package.module:attr [extra1, extra2]
199
200 Other combinations are possible as well.
201
202 The expression is lenient about whitespace around the ':',
203 following the attr, and following any extras.
204 """
205
206 name: str
207 value: str
208 group: str
209
210 dist: Distribution | None = None
211
212 def __init__(self, name: str, value: str, group: str) -> None:
213 vars(self).update(name=name, value=value, group=group)
214 self.module
215
216 def load(self) -> Any:
217 """Load the entry point from its definition. If only a module
218 is indicated by the value, return that module. Otherwise,
219 return the named object.
220 """
221 module = import_module(self.module)
222 attrs = filter(None, (self.attr or '').split('.'))
223 return functools.reduce(getattr, attrs, module)
224
225 @property
226 def module(self) -> str:
227 return self._match.module
228
229 @property
230 def attr(self) -> str:
231 return self._match.attr
232
233 @property
234 def extras(self) -> list[str]:
235 return re.findall(r'\w+', self._match.extras or '')
236
237 @functools.cached_property
238 def _match(self) -> _EntryPointMatch:
239 match = self.pattern.match(self.value)
240 if not match:
241 raise ValueError(
242 'Invalid object reference. '
243 'See https://packaging.python.org'
244 '/en/latest/specifications/entry-points/#data-model',
245 self.value,
246 )
247 return _EntryPointMatch(**match.groupdict())
248
249 def _for(self, dist):
250 vars(self).update(dist=dist)
251 return self
252
253 def matches(self, **params):
254 """
255 EntryPoint matches the given parameters.
256
257 >>> ep = EntryPoint(group='foo', name='bar', value='bing:bong [extra1, extra2]')
258 >>> ep.matches(group='foo')
259 True
260 >>> ep.matches(name='bar', value='bing:bong [extra1, extra2]')
261 True
262 >>> ep.matches(group='foo', name='other')
263 False
264 >>> ep.matches()
265 True
266 >>> ep.matches(extras=['extra1', 'extra2'])
267 True
268 >>> ep.matches(module='bing')
269 True
270 >>> ep.matches(attr='bong')
271 True
272 """
273 self._disallow_dist(params)
274 attrs = (getattr(self, param) for param in params)
275 return all(map(operator.eq, params.values(), attrs))
276
277 @staticmethod
278 def _disallow_dist(params):
279 """
280 Querying by dist is not allowed (dist objects are not comparable).
281 >>> EntryPoint(name='fan', value='fav', group='fag').matches(dist='foo')
282 Traceback (most recent call last):
283 ...
284 ValueError: "dist" is not suitable for matching...
285 """
286 if "dist" in params:
287 raise ValueError(
288 '"dist" is not suitable for matching. '
289 "Instead, use Distribution.entry_points.select() on a "
290 "located distribution."
291 )
292
293 def _key(self):
294 return self.name, self.value, self.group
295
296 def __lt__(self, other):
297 return self._key() < other._key()
298
299 def __eq__(self, other):
300 return self._key() == other._key()
301
302 def __setattr__(self, name, value):
303 raise AttributeError("EntryPoint objects are immutable.")
304
305 def __repr__(self):
306 return (
307 f'EntryPoint(name={self.name!r}, value={self.value!r}, '
308 f'group={self.group!r})'
309 )
310
311 def __hash__(self) -> int:
312 return hash(self._key())
313
314
315class EntryPoints(tuple):
316 """
317 An immutable collection of selectable EntryPoint objects.
318 """
319
320 __slots__ = ()
321
322 def __getitem__(self, name: str) -> EntryPoint: # type: ignore[override] # Work with str instead of int
323 """
324 Get the EntryPoint in self matching name.
325 """
326 try:
327 return next(iter(self.select(name=name)))
328 except StopIteration:
329 raise KeyError(name)
330
331 def __repr__(self):
332 """
333 Repr with classname and tuple constructor to
334 signal that we deviate from regular tuple behavior.
335 """
336 return '%s(%r)' % (self.__class__.__name__, tuple(self))
337
338 def select(self, **params) -> EntryPoints:
339 """
340 Select entry points from self that match the
341 given parameters (typically group and/or name).
342 """
343 return EntryPoints(ep for ep in self if py39.ep_matches(ep, **params))
344
345 @property
346 def names(self) -> set[str]:
347 """
348 Return the set of all names of all entry points.
349 """
350 return {ep.name for ep in self}
351
352 @property
353 def groups(self) -> set[str]:
354 """
355 Return the set of all groups of all entry points.
356 """
357 return {ep.group for ep in self}
358
359 @classmethod
360 def _from_text_for(cls, text, dist):
361 return cls(ep._for(dist) for ep in cls._from_text(text))
362
363 @staticmethod
364 def _from_text(text):
365 return (
366 EntryPoint(name=item.value.name, value=item.value.value, group=item.name)
367 for item in Sectioned.section_pairs(text or '')
368 )
369
370
371class PackagePath(pathlib.PurePosixPath):
372 """A reference to a path in a package"""
373
374 hash: FileHash | None
375 size: int
376 dist: Distribution
377
378 def read_text(self, encoding: str = 'utf-8') -> str:
379 return self.locate().read_text(encoding=encoding)
380
381 def read_binary(self) -> bytes:
382 return self.locate().read_bytes()
383
384 def locate(self) -> SimplePath:
385 """Return a path-like object for this path"""
386 return self.dist.locate_file(self)
387
388
389class FileHash:
390 def __init__(self, spec: str) -> None:
391 self.mode, _, self.value = spec.partition('=')
392
393 def __repr__(self) -> str:
394 return f'<FileHash mode: {self.mode} value: {self.value}>'
395
396
397class Distribution(metaclass=abc.ABCMeta):
398 """
399 An abstract Python distribution package.
400
401 Custom providers may derive from this class and define
402 the abstract methods to provide a concrete implementation
403 for their environment. Some providers may opt to override
404 the default implementation of some properties to bypass
405 the file-reading mechanism.
406 """
407
408 @abc.abstractmethod
409 def read_text(self, filename) -> str | None:
410 """Attempt to load metadata file given by the name.
411
412 Python distribution metadata is organized by blobs of text
413 typically represented as "files" in the metadata directory
414 (e.g. package-1.0.dist-info). These files include things
415 like:
416
417 - METADATA: The distribution metadata including fields
418 like Name and Version and Description.
419 - entry_points.txt: A series of entry points as defined in
420 `the entry points spec <https://packaging.python.org/en/latest/specifications/entry-points/#file-format>`_.
421 - RECORD: A record of files according to
422 `this recording spec <https://packaging.python.org/en/latest/specifications/recording-installed-packages/#the-record-file>`_.
423
424 A package may provide any set of files, including those
425 not listed here or none at all.
426
427 :param filename: The name of the file in the distribution info.
428 :return: The text if found, otherwise None.
429 """
430
431 @abc.abstractmethod
432 def locate_file(self, path: str | os.PathLike[str]) -> SimplePath:
433 """
434 Given a path to a file in this distribution, return a SimplePath
435 to it.
436
437 This method is used by callers of ``Distribution.files()`` to
438 locate files within the distribution. If it's possible for a
439 Distribution to represent files in the distribution as
440 ``SimplePath`` objects, it should implement this method
441 to resolve such objects.
442
443 Some Distribution providers may elect not to resolve SimplePath
444 objects within the distribution by raising a
445 NotImplementedError, but consumers of such a Distribution would
446 be unable to invoke ``Distribution.files()``.
447 """
448
449 @classmethod
450 def from_name(cls, name: str) -> Distribution:
451 """Return the Distribution for the given package name.
452
453 :param name: The name of the distribution package to search for.
454 :return: The Distribution instance (or subclass thereof) for the named
455 package, if found.
456 :raises PackageNotFoundError: When the named package's distribution
457 metadata cannot be found.
458 :raises ValueError: When an invalid value is supplied for name.
459 """
460 if not name:
461 raise ValueError("A distribution name is required.")
462 try:
463 return next(iter(cls._prefer_valid(cls.discover(name=name))))
464 except StopIteration:
465 raise PackageNotFoundError(name)
466
467 @classmethod
468 def discover(
469 cls, *, context: DistributionFinder.Context | None = None, **kwargs
470 ) -> Iterable[Distribution]:
471 """Return an iterable of Distribution objects for all packages.
472
473 Pass a ``context`` or pass keyword arguments for constructing
474 a context.
475
476 :context: A ``DistributionFinder.Context`` object.
477 :return: Iterable of Distribution objects for packages matching
478 the context.
479 """
480 if context and kwargs:
481 raise ValueError("cannot accept context and kwargs")
482 context = context or DistributionFinder.Context(**kwargs)
483 return itertools.chain.from_iterable(
484 resolver(context) for resolver in cls._discover_resolvers()
485 )
486
487 @staticmethod
488 def _prefer_valid(dists: Iterable[Distribution]) -> Iterable[Distribution]:
489 """
490 Prefer (move to the front) distributions that have metadata.
491
492 Ref python/importlib_resources#489.
493 """
494 buckets = bucket(dists, lambda dist: bool(dist.metadata))
495 return itertools.chain(buckets[True], buckets[False])
496
497 @staticmethod
498 def at(path: str | os.PathLike[str]) -> Distribution:
499 """Return a Distribution for the indicated metadata path.
500
501 :param path: a string or path-like object
502 :return: a concrete Distribution instance for the path
503 """
504 return PathDistribution(pathlib.Path(path))
505
506 @staticmethod
507 def _discover_resolvers():
508 """Search the meta_path for resolvers (MetadataPathFinders)."""
509 declared = (
510 getattr(finder, 'find_distributions', None) for finder in sys.meta_path
511 )
512 return filter(None, declared)
513
514 @property
515 def metadata(self) -> _meta.PackageMetadata | None:
516 """Return the parsed metadata for this Distribution.
517
518 The returned object will have keys that name the various bits of
519 metadata per the
520 `Core metadata specifications <https://packaging.python.org/en/latest/specifications/core-metadata/#core-metadata>`_.
521
522 Custom providers may provide the METADATA file or override this
523 property.
524 """
525
526 text = (
527 self.read_text('METADATA')
528 or self.read_text('PKG-INFO')
529 # This last clause is here to support old egg-info files. Its
530 # effect is to just end up using the PathDistribution's self._path
531 # (which points to the egg-info file) attribute unchanged.
532 or self.read_text('')
533 )
534 return self._assemble_message(text)
535
536 @staticmethod
537 @pass_none
538 def _assemble_message(text: str) -> _meta.PackageMetadata:
539 # deferred for performance (python/cpython#109829)
540 from . import _adapters
541
542 return _adapters.Message(email.message_from_string(text))
543
544 @property
545 def name(self) -> str:
546 """Return the 'Name' metadata for the distribution package."""
547 return md_none(self.metadata)['Name']
548
549 @property
550 def _normalized_name(self):
551 """Return a normalized version of the name."""
552 return Prepared.normalize(self.name)
553
554 @property
555 def version(self) -> str:
556 """Return the 'Version' metadata for the distribution package."""
557 return md_none(self.metadata)['Version']
558
559 @property
560 def entry_points(self) -> EntryPoints:
561 """
562 Return EntryPoints for this distribution.
563
564 Custom providers may provide the ``entry_points.txt`` file
565 or override this property.
566 """
567 return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self)
568
569 @property
570 def files(self) -> list[PackagePath] | None:
571 """Files in this distribution.
572
573 :return: List of PackagePath for this distribution or None
574
575 Result is `None` if the metadata file that enumerates files
576 (i.e. RECORD for dist-info, or installed-files.txt or
577 SOURCES.txt for egg-info) is missing.
578 Result may be empty if the metadata exists but is empty.
579
580 Custom providers are recommended to provide a "RECORD" file (in
581 ``read_text``) or override this property to allow for callers to be
582 able to resolve filenames provided by the package.
583 """
584
585 def make_file(name, hash=None, size_str=None):
586 result = PackagePath(name)
587 result.hash = FileHash(hash) if hash else None
588 result.size = int(size_str) if size_str else None
589 result.dist = self
590 return result
591
592 @pass_none
593 def make_files(lines):
594 # Delay csv import, since Distribution.files is not as widely used
595 # as other parts of importlib.metadata
596 import csv
597
598 return starmap(make_file, csv.reader(lines))
599
600 @pass_none
601 def skip_missing_files(package_paths):
602 return list(filter(lambda path: path.locate().exists(), package_paths))
603
604 return skip_missing_files(
605 make_files(
606 self._read_files_distinfo()
607 or self._read_files_egginfo_installed()
608 or self._read_files_egginfo_sources()
609 )
610 )
611
612 def _read_files_distinfo(self):
613 """
614 Read the lines of RECORD.
615 """
616 text = self.read_text('RECORD')
617 return text and text.splitlines()
618
619 def _read_files_egginfo_installed(self):
620 """
621 Read installed-files.txt and return lines in a similar
622 CSV-parsable format as RECORD: each file must be placed
623 relative to the site-packages directory and must also be
624 quoted (since file names can contain literal commas).
625
626 This file is written when the package is installed by pip,
627 but it might not be written for other installation methods.
628 Assume the file is accurate if it exists.
629 """
630 text = self.read_text('installed-files.txt')
631 # Prepend the .egg-info/ subdir to the lines in this file.
632 # But this subdir is only available from PathDistribution's
633 # self._path.
634 subdir = getattr(self, '_path', None)
635 if not text or not subdir:
636 return
637
638 paths = (
639 py311
640 .relative_fix((subdir / name).resolve())
641 .relative_to(self.locate_file('').resolve(), walk_up=True)
642 .as_posix()
643 for name in text.splitlines()
644 )
645 return map('"{}"'.format, paths)
646
647 def _read_files_egginfo_sources(self):
648 """
649 Read SOURCES.txt and return lines in a similar CSV-parsable
650 format as RECORD: each file name must be quoted (since it
651 might contain literal commas).
652
653 Note that SOURCES.txt is not a reliable source for what
654 files are installed by a package. This file is generated
655 for a source archive, and the files that are present
656 there (e.g. setup.py) may not correctly reflect the files
657 that are present after the package has been installed.
658 """
659 text = self.read_text('SOURCES.txt')
660 return text and map('"{}"'.format, text.splitlines())
661
662 @property
663 def requires(self) -> list[str] | None:
664 """Generated requirements specified for this Distribution"""
665 reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
666 return reqs and list(reqs)
667
668 def _read_dist_info_reqs(self):
669 return self.metadata.get_all('Requires-Dist')
670
671 def _read_egg_info_reqs(self):
672 source = self.read_text('requires.txt')
673 return pass_none(self._deps_from_requires_text)(source)
674
675 @classmethod
676 def _deps_from_requires_text(cls, source):
677 return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source))
678
679 @staticmethod
680 def _convert_egg_info_reqs_to_simple_reqs(sections):
681 """
682 Historically, setuptools would solicit and store 'extra'
683 requirements, including those with environment markers,
684 in separate sections. More modern tools expect each
685 dependency to be defined separately, with any relevant
686 extras and environment markers attached directly to that
687 requirement. This method converts the former to the
688 latter. See _test_deps_from_requires_text for an example.
689 """
690
691 def make_condition(name):
692 return name and f'extra == "{name}"'
693
694 def quoted_marker(section):
695 section = section or ''
696 extra, sep, markers = section.partition(':')
697 if extra and markers:
698 markers = f'({markers})'
699 conditions = list(filter(None, [markers, make_condition(extra)]))
700 return '; ' + ' and '.join(conditions) if conditions else ''
701
702 def url_req_space(req):
703 """
704 PEP 508 requires a space between the url_spec and the quoted_marker.
705 Ref python/importlib_metadata#357.
706 """
707 # '@' is uniquely indicative of a url_req.
708 return ' ' * ('@' in req)
709
710 for section in sections:
711 space = url_req_space(section.value)
712 yield section.value + space + quoted_marker(section.name)
713
714 @property
715 def origin(self):
716 return self._load_json('direct_url.json')
717
718 def _load_json(self, filename):
719 # Deferred for performance (python/importlib_metadata#503)
720 import json
721
722 return pass_none(json.loads)(
723 self.read_text(filename),
724 object_hook=lambda data: types.SimpleNamespace(**data),
725 )
726
727
728class DistributionFinder(MetaPathFinder):
729 """
730 A MetaPathFinder capable of discovering installed distributions.
731
732 Custom providers should implement this interface in order to
733 supply metadata.
734 """
735
736 class Context:
737 """
738 Keyword arguments presented by the caller to
739 ``distributions()`` or ``Distribution.discover()``
740 to narrow the scope of a search for distributions
741 in all DistributionFinders.
742
743 Each DistributionFinder may expect any parameters
744 and should attempt to honor the canonical
745 parameters defined below when appropriate.
746
747 This mechanism gives a custom provider a means to
748 solicit additional details from the caller beyond
749 "name" and "path" when searching distributions.
750 For example, imagine a provider that exposes suites
751 of packages in either a "public" or "private" ``realm``.
752 A caller may wish to query only for distributions in
753 a particular realm and could call
754 ``distributions(realm="private")`` to signal to the
755 custom provider to only include distributions from that
756 realm.
757 """
758
759 name = None
760 """
761 Specific name for which a distribution finder should match.
762 A name of ``None`` matches all distributions.
763 """
764
765 def __init__(self, **kwargs):
766 vars(self).update(kwargs)
767
768 @property
769 def path(self) -> list[str]:
770 """
771 The sequence of directory path that a distribution finder
772 should search.
773
774 Typically refers to Python installed package paths such as
775 "site-packages" directories and defaults to ``sys.path``.
776 """
777 return vars(self).get('path', sys.path)
778
779 @abc.abstractmethod
780 def find_distributions(self, context=Context()) -> Iterable[Distribution]:
781 """
782 Find distributions.
783
784 Return an iterable of all Distribution instances capable of
785 loading the metadata for packages matching the ``context``,
786 a DistributionFinder.Context instance.
787 """
788
789
790@passthrough
791def _clear_after_fork(cached):
792 """Ensure ``func`` clears cached state after ``fork`` when supported.
793
794 ``FastPath`` caches zip-backed ``pathlib.Path`` objects that retain a
795 reference to the parent's open ``ZipFile`` handle. Re-using a cached
796 instance in a forked child can therefore resurrect invalid file pointers
797 and trigger ``BadZipFile``/``OSError`` failures (python/importlib_metadata#520).
798 Registering ``cache_clear`` with ``os.register_at_fork`` keeps each process
799 on its own cache.
800 """
801 getattr(os, 'register_at_fork', noop)(after_in_child=cached.cache_clear)
802
803
804class FastPath:
805 """
806 Micro-optimized class for searching a root for children.
807
808 Root is a path on the file system that may contain metadata
809 directories either as natural directories or within a zip file.
810
811 >>> FastPath('').children()
812 ['...']
813
814 FastPath objects are cached and recycled for any given root.
815
816 >>> FastPath('foobar') is FastPath('foobar')
817 True
818 """
819
820 @_clear_after_fork # type: ignore[misc]
821 @functools.lru_cache()
822 def __new__(cls, root):
823 return super().__new__(cls)
824
825 def __init__(self, root):
826 self.root = root
827
828 def joinpath(self, child):
829 return pathlib.Path(self.root, child)
830
831 def children(self):
832 with suppress(Exception):
833 return os.listdir(self.root or '.')
834 with suppress(Exception):
835 return self.zip_children()
836 return []
837
838 def zip_children(self):
839 # deferred for performance (python/importlib_metadata#502)
840 from zipp.compat.overlay import zipfile
841
842 zip_path = zipfile.Path(self.root)
843 names = zip_path.root.namelist()
844 self.joinpath = zip_path.joinpath
845
846 return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
847
848 def search(self, name):
849 return self.lookup(self.mtime).search(name)
850
851 @property
852 def mtime(self):
853 with suppress(OSError):
854 return os.stat(self.root).st_mtime
855 self.lookup.cache_clear()
856
857 @method_cache
858 def lookup(self, mtime):
859 return Lookup(self)
860
861
862class Lookup:
863 """
864 A micro-optimized class for searching a (fast) path for metadata.
865 """
866
867 def __init__(self, path: FastPath):
868 """
869 Calculate all of the children representing metadata.
870
871 From the children in the path, calculate early all of the
872 children that appear to represent metadata (infos) or legacy
873 metadata (eggs).
874 """
875
876 base = os.path.basename(path.root).lower()
877 base_is_egg = base.endswith(".egg")
878 self.infos = FreezableDefaultDict(list)
879 self.eggs = FreezableDefaultDict(list)
880
881 for child in path.children():
882 low = child.lower()
883 if low.endswith((".dist-info", ".egg-info")):
884 # rpartition is faster than splitext and suitable for this purpose.
885 name = low.rpartition(".")[0].partition("-")[0]
886 normalized = Prepared.normalize(name)
887 self.infos[normalized].append(path.joinpath(child))
888 elif base_is_egg and low == "egg-info":
889 name = base.rpartition(".")[0].partition("-")[0]
890 legacy_normalized = Prepared.legacy_normalize(name)
891 self.eggs[legacy_normalized].append(path.joinpath(child))
892
893 self.infos.freeze()
894 self.eggs.freeze()
895
896 def search(self, prepared: Prepared):
897 """
898 Yield all infos and eggs matching the Prepared query.
899 """
900 infos = (
901 self.infos[prepared.normalized]
902 if prepared
903 else itertools.chain.from_iterable(self.infos.values())
904 )
905 eggs = (
906 self.eggs[prepared.legacy_normalized]
907 if prepared
908 else itertools.chain.from_iterable(self.eggs.values())
909 )
910 return itertools.chain(infos, eggs)
911
912
913class Prepared:
914 """
915 A prepared search query for metadata on a possibly-named package.
916
917 Pre-calculates the normalization to prevent repeated operations.
918
919 >>> none = Prepared(None)
920 >>> none.normalized
921 >>> none.legacy_normalized
922 >>> bool(none)
923 False
924 >>> sample = Prepared('Sample__Pkg-name.foo')
925 >>> sample.normalized
926 'sample_pkg_name_foo'
927 >>> sample.legacy_normalized
928 'sample__pkg_name.foo'
929 >>> bool(sample)
930 True
931 """
932
933 normalized = None
934 legacy_normalized = None
935
936 def __init__(self, name: str | None):
937 self.name = name
938 if name is None:
939 return
940 self.normalized = self.normalize(name)
941 self.legacy_normalized = self.legacy_normalize(name)
942
943 @staticmethod
944 def normalize(name):
945 """
946 PEP 503 normalization plus dashes as underscores.
947 """
948 return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
949
950 @staticmethod
951 def legacy_normalize(name):
952 """
953 Normalize the package name as found in the convention in
954 older packaging tools versions and specs.
955 """
956 return name.lower().replace('-', '_')
957
958 def __bool__(self):
959 return bool(self.name)
960
961
962@install
963class MetadataPathFinder(NullFinder, DistributionFinder):
964 """A degenerate finder for distribution packages on the file system.
965
966 This finder supplies only a find_distributions() method for versions
967 of Python that do not have a PathFinder find_distributions().
968 """
969
970 @classmethod
971 def find_distributions(
972 cls, context=DistributionFinder.Context()
973 ) -> Iterable[PathDistribution]:
974 """
975 Find distributions.
976
977 Return an iterable of all Distribution instances capable of
978 loading the metadata for packages matching ``context.name``
979 (or all names if ``None`` indicated) along the paths in the list
980 of directories ``context.path``.
981 """
982 found = cls._search_paths(context.name, context.path)
983 return map(PathDistribution, found)
984
985 @classmethod
986 def _search_paths(cls, name, paths):
987 """Find metadata directories in paths heuristically."""
988 prepared = Prepared(name)
989 return itertools.chain.from_iterable(
990 path.search(prepared) for path in map(FastPath, paths)
991 )
992
993 @classmethod
994 def invalidate_caches(cls) -> None:
995 FastPath.__new__.cache_clear()
996
997
998class PathDistribution(Distribution):
999 def __init__(self, path: SimplePath) -> None:
1000 """Construct a distribution.
1001
1002 :param path: SimplePath indicating the metadata directory.
1003 """
1004 self._path = path
1005
1006 def read_text(self, filename: str | os.PathLike[str]) -> str | None:
1007 with suppress(
1008 FileNotFoundError,
1009 IsADirectoryError,
1010 KeyError,
1011 NotADirectoryError,
1012 PermissionError,
1013 ):
1014 return self._path.joinpath(filename).read_text(encoding='utf-8')
1015
1016 return None
1017
1018 read_text.__doc__ = Distribution.read_text.__doc__
1019
1020 def locate_file(self, path: str | os.PathLike[str]) -> SimplePath:
1021 return self._path.parent / path
1022
1023 @property
1024 def _normalized_name(self):
1025 """
1026 Performance optimization: where possible, resolve the
1027 normalized name from the file system path.
1028 """
1029 stem = os.path.basename(str(self._path))
1030 return (
1031 pass_none(Prepared.normalize)(self._name_from_stem(stem))
1032 or super()._normalized_name
1033 )
1034
1035 @staticmethod
1036 def _name_from_stem(stem):
1037 """
1038 >>> PathDistribution._name_from_stem('foo-3.0.egg-info')
1039 'foo'
1040 >>> PathDistribution._name_from_stem('CherryPy-3.0.dist-info')
1041 'CherryPy'
1042 >>> PathDistribution._name_from_stem('face.egg-info')
1043 'face'
1044 >>> PathDistribution._name_from_stem('foo.bar')
1045 """
1046 filename, ext = os.path.splitext(stem)
1047 if ext not in ('.dist-info', '.egg-info'):
1048 return
1049 name, sep, rest = filename.partition('-')
1050 return name
1051
1052
1053def distribution(distribution_name: str) -> Distribution:
1054 """Get the ``Distribution`` instance for the named package.
1055
1056 :param distribution_name: The name of the distribution package as a string.
1057 :return: A ``Distribution`` instance (or subclass thereof).
1058 """
1059 return Distribution.from_name(distribution_name)
1060
1061
1062def distributions(**kwargs) -> Iterable[Distribution]:
1063 """Get all ``Distribution`` instances in the current environment.
1064
1065 :return: An iterable of ``Distribution`` instances.
1066 """
1067 return Distribution.discover(**kwargs)
1068
1069
1070def metadata(distribution_name: str) -> _meta.PackageMetadata | None:
1071 """Get the metadata for the named package.
1072
1073 :param distribution_name: The name of the distribution package to query.
1074 :return: A PackageMetadata containing the parsed metadata.
1075 """
1076 return Distribution.from_name(distribution_name).metadata
1077
1078
1079def version(distribution_name: str) -> str:
1080 """Get the version string for the named package.
1081
1082 :param distribution_name: The name of the distribution package to query.
1083 :return: The version string for the package as defined in the package's
1084 "Version" metadata key.
1085 """
1086 return distribution(distribution_name).version
1087
1088
1089_unique = functools.partial(
1090 unique_everseen,
1091 key=py39.normalized_name,
1092)
1093"""
1094Wrapper for ``distributions`` to return unique distributions by name.
1095"""
1096
1097
1098def entry_points(**params) -> EntryPoints:
1099 """Return EntryPoint objects for all installed packages.
1100
1101 Pass selection parameters (group or name) to filter the
1102 result to entry points matching those properties (see
1103 EntryPoints.select()).
1104
1105 :return: EntryPoints for all installed packages.
1106 """
1107 eps = itertools.chain.from_iterable(
1108 dist.entry_points for dist in _unique(distributions())
1109 )
1110 return EntryPoints(eps).select(**params)
1111
1112
1113def files(distribution_name: str) -> list[PackagePath] | None:
1114 """Return a list of files for the named package.
1115
1116 :param distribution_name: The name of the distribution package to query.
1117 :return: List of files composing the distribution.
1118 """
1119 return distribution(distribution_name).files
1120
1121
1122def requires(distribution_name: str) -> list[str] | None:
1123 """
1124 Return a list of requirements for the named package.
1125
1126 :return: An iterable of requirements, suitable for
1127 packaging.requirement.Requirement.
1128 """
1129 return distribution(distribution_name).requires
1130
1131
1132def packages_distributions() -> Mapping[str, list[str]]:
1133 """
1134 Return a mapping of top-level packages to their
1135 distributions.
1136
1137 >>> import collections.abc
1138 >>> pkgs = packages_distributions()
1139 >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values())
1140 True
1141 """
1142 pkg_to_dist = collections.defaultdict(list)
1143 for dist in distributions():
1144 for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
1145 pkg_to_dist[pkg].append(md_none(dist.metadata)['Name'])
1146 return dict(pkg_to_dist)
1147
1148
1149def _top_level_declared(dist):
1150 return (dist.read_text('top_level.txt') or '').split()
1151
1152
1153def _topmost(name: PackagePath) -> str | None:
1154 """
1155 Return the top-most parent as long as there is a parent.
1156 """
1157 top, *rest = name.parts
1158 return top if rest else None
1159
1160
1161def _get_toplevel_name(name: PackagePath) -> str:
1162 """
1163 Infer a possibly importable module name from a name presumed on
1164 sys.path.
1165
1166 >>> _get_toplevel_name(PackagePath('foo.py'))
1167 'foo'
1168 >>> _get_toplevel_name(PackagePath('foo'))
1169 'foo'
1170 >>> _get_toplevel_name(PackagePath('foo.pyc'))
1171 'foo'
1172 >>> _get_toplevel_name(PackagePath('foo/__init__.py'))
1173 'foo'
1174 >>> _get_toplevel_name(PackagePath('foo.pth'))
1175 'foo.pth'
1176 >>> _get_toplevel_name(PackagePath('foo.dist-info'))
1177 'foo.dist-info'
1178 """
1179 # Defer import of inspect for performance (python/cpython#118761)
1180 import inspect
1181
1182 return _topmost(name) or inspect.getmodulename(name) or str(name)
1183
1184
1185def _top_level_inferred(dist):
1186 opt_names = set(map(_get_toplevel_name, always_iterable(dist.files)))
1187
1188 def importable_name(name):
1189 return '.' not in name
1190
1191 return filter(importable_name, opt_names)