1"""
2APIs exposing metadata from third-party Python packages.
3
4This codebase is shared between importlib.metadata in the stdlib
5and importlib_metadata in PyPI. See
6https://github.com/python/importlib_metadata/wiki/Development-Methodology
7for more detail.
8"""
9
10from __future__ import annotations
11
12import abc
13import collections
14import email
15import functools
16import itertools
17import operator
18import os
19import pathlib
20import posixpath
21import re
22import sys
23import textwrap
24import types
25from collections.abc import Iterable, Mapping
26from contextlib import suppress
27from importlib import import_module
28from importlib.abc import MetaPathFinder
29from itertools import starmap
30from typing import Any
31
32from . import _meta
33from ._collections import FreezableDefaultDict, Pair
34from ._compat import (
35 NullFinder,
36 install,
37)
38from ._functools import method_cache, pass_none
39from ._itertools import always_iterable, bucket, unique_everseen
40from ._meta import PackageMetadata, SimplePath
41from ._typing import md_none
42from .compat import py39, py311
43
44__all__ = [
45 'Distribution',
46 'DistributionFinder',
47 'PackageMetadata',
48 'PackageNotFoundError',
49 'SimplePath',
50 'distribution',
51 'distributions',
52 'entry_points',
53 'files',
54 'metadata',
55 'packages_distributions',
56 'requires',
57 'version',
58]
59
60
61class PackageNotFoundError(ModuleNotFoundError):
62 """The package was not found."""
63
64 def __str__(self) -> str:
65 return f"No package metadata was found for {self.name}"
66
67 @property
68 def name(self) -> str: # type: ignore[override] # make readonly
69 (name,) = self.args
70 return name
71
72
73class Sectioned:
74 """
75 A simple entry point config parser for performance
76
77 >>> for item in Sectioned.read(Sectioned._sample):
78 ... print(item)
79 Pair(name='sec1', value='# comments ignored')
80 Pair(name='sec1', value='a = 1')
81 Pair(name='sec1', value='b = 2')
82 Pair(name='sec2', value='a = 2')
83
84 >>> res = Sectioned.section_pairs(Sectioned._sample)
85 >>> item = next(res)
86 >>> item.name
87 'sec1'
88 >>> item.value
89 Pair(name='a', value='1')
90 >>> item = next(res)
91 >>> item.value
92 Pair(name='b', value='2')
93 >>> item = next(res)
94 >>> item.name
95 'sec2'
96 >>> item.value
97 Pair(name='a', value='2')
98 >>> list(res)
99 []
100 """
101
102 _sample = textwrap.dedent(
103 """
104 [sec1]
105 # comments ignored
106 a = 1
107 b = 2
108
109 [sec2]
110 a = 2
111 """
112 ).lstrip()
113
114 @classmethod
115 def section_pairs(cls, text):
116 return (
117 section._replace(value=Pair.parse(section.value))
118 for section in cls.read(text, filter_=cls.valid)
119 if section.name is not None
120 )
121
122 @staticmethod
123 def read(text, filter_=None):
124 lines = filter(filter_, map(str.strip, text.splitlines()))
125 name = None
126 for value in lines:
127 section_match = value.startswith('[') and value.endswith(']')
128 if section_match:
129 name = value.strip('[]')
130 continue
131 yield Pair(name, value)
132
133 @staticmethod
134 def valid(line: str):
135 return line and not line.startswith('#')
136
137
138class _EntryPointMatch(types.SimpleNamespace):
139 module: str
140 attr: str
141 extras: str
142
143
144class EntryPoint:
145 """An entry point as defined by Python packaging conventions.
146
147 See `the packaging docs on entry points
148 <https://packaging.python.org/specifications/entry-points/>`_
149 for more information.
150
151 >>> ep = EntryPoint(
152 ... name=None, group=None, value='package.module:attr [extra1, extra2]')
153 >>> ep.module
154 'package.module'
155 >>> ep.attr
156 'attr'
157 >>> ep.extras
158 ['extra1', 'extra2']
159
160 If the value package or module are not valid identifiers, a
161 ValueError is raised on access.
162
163 >>> EntryPoint(name=None, group=None, value='invalid-name').module
164 Traceback (most recent call last):
165 ...
166 ValueError: ('Invalid object reference...invalid-name...
167 >>> EntryPoint(name=None, group=None, value='invalid-name').attr
168 Traceback (most recent call last):
169 ...
170 ValueError: ('Invalid object reference...invalid-name...
171 >>> EntryPoint(name=None, group=None, value='invalid-name').extras
172 Traceback (most recent call last):
173 ...
174 ValueError: ('Invalid object reference...invalid-name...
175
176 The same thing happens on construction.
177
178 >>> EntryPoint(name=None, group=None, value='invalid-name')
179 Traceback (most recent call last):
180 ...
181 ValueError: ('Invalid object reference...invalid-name...
182
183 """
184
185 pattern = re.compile(
186 r'(?P<module>[\w.]+)\s*'
187 r'(:\s*(?P<attr>[\w.]+)\s*)?'
188 r'((?P<extras>\[.*\])\s*)?$'
189 )
190 """
191 A regular expression describing the syntax for an entry point,
192 which might look like:
193
194 - module
195 - package.module
196 - package.module:attribute
197 - package.module:object.attribute
198 - package.module:attr [extra1, extra2]
199
200 Other combinations are possible as well.
201
202 The expression is lenient about whitespace around the ':',
203 following the attr, and following any extras.
204 """
205
206 name: str
207 value: str
208 group: str
209
210 dist: Distribution | None = None
211
212 def __init__(self, name: str, value: str, group: str) -> None:
213 vars(self).update(name=name, value=value, group=group)
214 self.module
215
216 def load(self) -> Any:
217 """Load the entry point from its definition. If only a module
218 is indicated by the value, return that module. Otherwise,
219 return the named object.
220 """
221 module = import_module(self.module)
222 attrs = filter(None, (self.attr or '').split('.'))
223 return functools.reduce(getattr, attrs, module)
224
225 @property
226 def module(self) -> str:
227 return self._match.module
228
229 @property
230 def attr(self) -> str:
231 return self._match.attr
232
233 @property
234 def extras(self) -> list[str]:
235 return re.findall(r'\w+', self._match.extras or '')
236
237 @functools.cached_property
238 def _match(self) -> _EntryPointMatch:
239 match = self.pattern.match(self.value)
240 if not match:
241 raise ValueError(
242 'Invalid object reference. '
243 'See https://packaging.python.org'
244 '/en/latest/specifications/entry-points/#data-model',
245 self.value,
246 )
247 return _EntryPointMatch(**match.groupdict())
248
249 def _for(self, dist):
250 vars(self).update(dist=dist)
251 return self
252
253 def matches(self, **params):
254 """
255 EntryPoint matches the given parameters.
256
257 >>> ep = EntryPoint(group='foo', name='bar', value='bing:bong [extra1, extra2]')
258 >>> ep.matches(group='foo')
259 True
260 >>> ep.matches(name='bar', value='bing:bong [extra1, extra2]')
261 True
262 >>> ep.matches(group='foo', name='other')
263 False
264 >>> ep.matches()
265 True
266 >>> ep.matches(extras=['extra1', 'extra2'])
267 True
268 >>> ep.matches(module='bing')
269 True
270 >>> ep.matches(attr='bong')
271 True
272 """
273 self._disallow_dist(params)
274 attrs = (getattr(self, param) for param in params)
275 return all(map(operator.eq, params.values(), attrs))
276
277 @staticmethod
278 def _disallow_dist(params):
279 """
280 Querying by dist is not allowed (dist objects are not comparable).
281 >>> EntryPoint(name='fan', value='fav', group='fag').matches(dist='foo')
282 Traceback (most recent call last):
283 ...
284 ValueError: "dist" is not suitable for matching...
285 """
286 if "dist" in params:
287 raise ValueError(
288 '"dist" is not suitable for matching. '
289 "Instead, use Distribution.entry_points.select() on a "
290 "located distribution."
291 )
292
293 def _key(self):
294 return self.name, self.value, self.group
295
296 def __lt__(self, other):
297 return self._key() < other._key()
298
299 def __eq__(self, other):
300 return self._key() == other._key()
301
302 def __setattr__(self, name, value):
303 raise AttributeError("EntryPoint objects are immutable.")
304
305 def __repr__(self):
306 return (
307 f'EntryPoint(name={self.name!r}, value={self.value!r}, '
308 f'group={self.group!r})'
309 )
310
311 def __hash__(self) -> int:
312 return hash(self._key())
313
314
315class EntryPoints(tuple):
316 """
317 An immutable collection of selectable EntryPoint objects.
318 """
319
320 __slots__ = ()
321
322 def __getitem__(self, name: str) -> EntryPoint: # type: ignore[override] # Work with str instead of int
323 """
324 Get the EntryPoint in self matching name.
325 """
326 try:
327 return next(iter(self.select(name=name)))
328 except StopIteration:
329 raise KeyError(name)
330
331 def __repr__(self):
332 """
333 Repr with classname and tuple constructor to
334 signal that we deviate from regular tuple behavior.
335 """
336 return '%s(%r)' % (self.__class__.__name__, tuple(self))
337
338 def select(self, **params) -> EntryPoints:
339 """
340 Select entry points from self that match the
341 given parameters (typically group and/or name).
342 """
343 return EntryPoints(ep for ep in self if py39.ep_matches(ep, **params))
344
345 @property
346 def names(self) -> set[str]:
347 """
348 Return the set of all names of all entry points.
349 """
350 return {ep.name for ep in self}
351
352 @property
353 def groups(self) -> set[str]:
354 """
355 Return the set of all groups of all entry points.
356 """
357 return {ep.group for ep in self}
358
359 @classmethod
360 def _from_text_for(cls, text, dist):
361 return cls(ep._for(dist) for ep in cls._from_text(text))
362
363 @staticmethod
364 def _from_text(text):
365 return (
366 EntryPoint(name=item.value.name, value=item.value.value, group=item.name)
367 for item in Sectioned.section_pairs(text or '')
368 )
369
370
371class PackagePath(pathlib.PurePosixPath):
372 """A reference to a path in a package"""
373
374 hash: FileHash | None
375 size: int
376 dist: Distribution
377
378 def read_text(self, encoding: str = 'utf-8') -> str:
379 return self.locate().read_text(encoding=encoding)
380
381 def read_binary(self) -> bytes:
382 return self.locate().read_bytes()
383
384 def locate(self) -> SimplePath:
385 """Return a path-like object for this path"""
386 return self.dist.locate_file(self)
387
388
389class FileHash:
390 def __init__(self, spec: str) -> None:
391 self.mode, _, self.value = spec.partition('=')
392
393 def __repr__(self) -> str:
394 return f'<FileHash mode: {self.mode} value: {self.value}>'
395
396
397class Distribution(metaclass=abc.ABCMeta):
398 """
399 An abstract Python distribution package.
400
401 Custom providers may derive from this class and define
402 the abstract methods to provide a concrete implementation
403 for their environment. Some providers may opt to override
404 the default implementation of some properties to bypass
405 the file-reading mechanism.
406 """
407
408 @abc.abstractmethod
409 def read_text(self, filename) -> str | None:
410 """Attempt to load metadata file given by the name.
411
412 Python distribution metadata is organized by blobs of text
413 typically represented as "files" in the metadata directory
414 (e.g. package-1.0.dist-info). These files include things
415 like:
416
417 - METADATA: The distribution metadata including fields
418 like Name and Version and Description.
419 - entry_points.txt: A series of entry points as defined in
420 `the entry points spec <https://packaging.python.org/en/latest/specifications/entry-points/#file-format>`_.
421 - RECORD: A record of files according to
422 `this recording spec <https://packaging.python.org/en/latest/specifications/recording-installed-packages/#the-record-file>`_.
423
424 A package may provide any set of files, including those
425 not listed here or none at all.
426
427 :param filename: The name of the file in the distribution info.
428 :return: The text if found, otherwise None.
429 """
430
431 @abc.abstractmethod
432 def locate_file(self, path: str | os.PathLike[str]) -> SimplePath:
433 """
434 Given a path to a file in this distribution, return a SimplePath
435 to it.
436
437 This method is used by callers of ``Distribution.files()`` to
438 locate files within the distribution. If it's possible for a
439 Distribution to represent files in the distribution as
440 ``SimplePath`` objects, it should implement this method
441 to resolve such objects.
442
443 Some Distribution providers may elect not to resolve SimplePath
444 objects within the distribution by raising a
445 NotImplementedError, but consumers of such a Distribution would
446 be unable to invoke ``Distribution.files()``.
447 """
448
449 @classmethod
450 def from_name(cls, name: str) -> Distribution:
451 """Return the Distribution for the given package name.
452
453 :param name: The name of the distribution package to search for.
454 :return: The Distribution instance (or subclass thereof) for the named
455 package, if found.
456 :raises PackageNotFoundError: When the named package's distribution
457 metadata cannot be found.
458 :raises ValueError: When an invalid value is supplied for name.
459 """
460 if not name:
461 raise ValueError("A distribution name is required.")
462 try:
463 return next(iter(cls._prefer_valid(cls.discover(name=name))))
464 except StopIteration:
465 raise PackageNotFoundError(name)
466
467 @classmethod
468 def discover(
469 cls, *, context: DistributionFinder.Context | None = None, **kwargs
470 ) -> Iterable[Distribution]:
471 """Return an iterable of Distribution objects for all packages.
472
473 Pass a ``context`` or pass keyword arguments for constructing
474 a context.
475
476 :context: A ``DistributionFinder.Context`` object.
477 :return: Iterable of Distribution objects for packages matching
478 the context.
479 """
480 if context and kwargs:
481 raise ValueError("cannot accept context and kwargs")
482 context = context or DistributionFinder.Context(**kwargs)
483 return itertools.chain.from_iterable(
484 resolver(context) for resolver in cls._discover_resolvers()
485 )
486
487 @staticmethod
488 def _prefer_valid(dists: Iterable[Distribution]) -> Iterable[Distribution]:
489 """
490 Prefer (move to the front) distributions that have metadata.
491
492 Ref python/importlib_resources#489.
493 """
494 buckets = bucket(dists, lambda dist: bool(dist.metadata))
495 return itertools.chain(buckets[True], buckets[False])
496
497 @staticmethod
498 def at(path: str | os.PathLike[str]) -> Distribution:
499 """Return a Distribution for the indicated metadata path.
500
501 :param path: a string or path-like object
502 :return: a concrete Distribution instance for the path
503 """
504 return PathDistribution(pathlib.Path(path))
505
506 @staticmethod
507 def _discover_resolvers():
508 """Search the meta_path for resolvers (MetadataPathFinders)."""
509 declared = (
510 getattr(finder, 'find_distributions', None) for finder in sys.meta_path
511 )
512 return filter(None, declared)
513
514 @property
515 def metadata(self) -> _meta.PackageMetadata | None:
516 """Return the parsed metadata for this Distribution.
517
518 The returned object will have keys that name the various bits of
519 metadata per the
520 `Core metadata specifications <https://packaging.python.org/en/latest/specifications/core-metadata/#core-metadata>`_.
521
522 Custom providers may provide the METADATA file or override this
523 property.
524 """
525
526 text = (
527 self.read_text('METADATA')
528 or self.read_text('PKG-INFO')
529 # This last clause is here to support old egg-info files. Its
530 # effect is to just end up using the PathDistribution's self._path
531 # (which points to the egg-info file) attribute unchanged.
532 or self.read_text('')
533 )
534 return self._assemble_message(text)
535
536 @staticmethod
537 @pass_none
538 def _assemble_message(text: str) -> _meta.PackageMetadata:
539 # deferred for performance (python/cpython#109829)
540 from . import _adapters
541
542 return _adapters.Message(email.message_from_string(text))
543
544 @property
545 def name(self) -> str:
546 """Return the 'Name' metadata for the distribution package."""
547 return md_none(self.metadata)['Name']
548
549 @property
550 def _normalized_name(self):
551 """Return a normalized version of the name."""
552 return Prepared.normalize(self.name)
553
554 @property
555 def version(self) -> str:
556 """Return the 'Version' metadata for the distribution package."""
557 return md_none(self.metadata)['Version']
558
559 @property
560 def entry_points(self) -> EntryPoints:
561 """
562 Return EntryPoints for this distribution.
563
564 Custom providers may provide the ``entry_points.txt`` file
565 or override this property.
566 """
567 return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self)
568
569 @property
570 def files(self) -> list[PackagePath] | None:
571 """Files in this distribution.
572
573 :return: List of PackagePath for this distribution or None
574
575 Result is `None` if the metadata file that enumerates files
576 (i.e. RECORD for dist-info, or installed-files.txt or
577 SOURCES.txt for egg-info) is missing.
578 Result may be empty if the metadata exists but is empty.
579
580 Custom providers are recommended to provide a "RECORD" file (in
581 ``read_text``) or override this property to allow for callers to be
582 able to resolve filenames provided by the package.
583 """
584
585 def make_file(name, hash=None, size_str=None):
586 result = PackagePath(name)
587 result.hash = FileHash(hash) if hash else None
588 result.size = int(size_str) if size_str else None
589 result.dist = self
590 return result
591
592 @pass_none
593 def make_files(lines):
594 # Delay csv import, since Distribution.files is not as widely used
595 # as other parts of importlib.metadata
596 import csv
597
598 return starmap(make_file, csv.reader(lines))
599
600 @pass_none
601 def skip_missing_files(package_paths):
602 return list(filter(lambda path: path.locate().exists(), package_paths))
603
604 return skip_missing_files(
605 make_files(
606 self._read_files_distinfo()
607 or self._read_files_egginfo_installed()
608 or self._read_files_egginfo_sources()
609 )
610 )
611
612 def _read_files_distinfo(self):
613 """
614 Read the lines of RECORD.
615 """
616 text = self.read_text('RECORD')
617 return text and text.splitlines()
618
619 def _read_files_egginfo_installed(self):
620 """
621 Read installed-files.txt and return lines in a similar
622 CSV-parsable format as RECORD: each file must be placed
623 relative to the site-packages directory and must also be
624 quoted (since file names can contain literal commas).
625
626 This file is written when the package is installed by pip,
627 but it might not be written for other installation methods.
628 Assume the file is accurate if it exists.
629 """
630 text = self.read_text('installed-files.txt')
631 # Prepend the .egg-info/ subdir to the lines in this file.
632 # But this subdir is only available from PathDistribution's
633 # self._path.
634 subdir = getattr(self, '_path', None)
635 if not text or not subdir:
636 return
637
638 paths = (
639 py311.relative_fix((subdir / name).resolve())
640 .relative_to(self.locate_file('').resolve(), walk_up=True)
641 .as_posix()
642 for name in text.splitlines()
643 )
644 return map('"{}"'.format, paths)
645
646 def _read_files_egginfo_sources(self):
647 """
648 Read SOURCES.txt and return lines in a similar CSV-parsable
649 format as RECORD: each file name must be quoted (since it
650 might contain literal commas).
651
652 Note that SOURCES.txt is not a reliable source for what
653 files are installed by a package. This file is generated
654 for a source archive, and the files that are present
655 there (e.g. setup.py) may not correctly reflect the files
656 that are present after the package has been installed.
657 """
658 text = self.read_text('SOURCES.txt')
659 return text and map('"{}"'.format, text.splitlines())
660
661 @property
662 def requires(self) -> list[str] | None:
663 """Generated requirements specified for this Distribution"""
664 reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
665 return reqs and list(reqs)
666
667 def _read_dist_info_reqs(self):
668 return self.metadata.get_all('Requires-Dist')
669
670 def _read_egg_info_reqs(self):
671 source = self.read_text('requires.txt')
672 return pass_none(self._deps_from_requires_text)(source)
673
674 @classmethod
675 def _deps_from_requires_text(cls, source):
676 return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source))
677
678 @staticmethod
679 def _convert_egg_info_reqs_to_simple_reqs(sections):
680 """
681 Historically, setuptools would solicit and store 'extra'
682 requirements, including those with environment markers,
683 in separate sections. More modern tools expect each
684 dependency to be defined separately, with any relevant
685 extras and environment markers attached directly to that
686 requirement. This method converts the former to the
687 latter. See _test_deps_from_requires_text for an example.
688 """
689
690 def make_condition(name):
691 return name and f'extra == "{name}"'
692
693 def quoted_marker(section):
694 section = section or ''
695 extra, sep, markers = section.partition(':')
696 if extra and markers:
697 markers = f'({markers})'
698 conditions = list(filter(None, [markers, make_condition(extra)]))
699 return '; ' + ' and '.join(conditions) if conditions else ''
700
701 def url_req_space(req):
702 """
703 PEP 508 requires a space between the url_spec and the quoted_marker.
704 Ref python/importlib_metadata#357.
705 """
706 # '@' is uniquely indicative of a url_req.
707 return ' ' * ('@' in req)
708
709 for section in sections:
710 space = url_req_space(section.value)
711 yield section.value + space + quoted_marker(section.name)
712
713 @property
714 def origin(self):
715 return self._load_json('direct_url.json')
716
717 def _load_json(self, filename):
718 # Deferred for performance (python/importlib_metadata#503)
719 import json
720
721 return pass_none(json.loads)(
722 self.read_text(filename),
723 object_hook=lambda data: types.SimpleNamespace(**data),
724 )
725
726
727class DistributionFinder(MetaPathFinder):
728 """
729 A MetaPathFinder capable of discovering installed distributions.
730
731 Custom providers should implement this interface in order to
732 supply metadata.
733 """
734
735 class Context:
736 """
737 Keyword arguments presented by the caller to
738 ``distributions()`` or ``Distribution.discover()``
739 to narrow the scope of a search for distributions
740 in all DistributionFinders.
741
742 Each DistributionFinder may expect any parameters
743 and should attempt to honor the canonical
744 parameters defined below when appropriate.
745
746 This mechanism gives a custom provider a means to
747 solicit additional details from the caller beyond
748 "name" and "path" when searching distributions.
749 For example, imagine a provider that exposes suites
750 of packages in either a "public" or "private" ``realm``.
751 A caller may wish to query only for distributions in
752 a particular realm and could call
753 ``distributions(realm="private")`` to signal to the
754 custom provider to only include distributions from that
755 realm.
756 """
757
758 name = None
759 """
760 Specific name for which a distribution finder should match.
761 A name of ``None`` matches all distributions.
762 """
763
764 def __init__(self, **kwargs):
765 vars(self).update(kwargs)
766
767 @property
768 def path(self) -> list[str]:
769 """
770 The sequence of directory path that a distribution finder
771 should search.
772
773 Typically refers to Python installed package paths such as
774 "site-packages" directories and defaults to ``sys.path``.
775 """
776 return vars(self).get('path', sys.path)
777
778 @abc.abstractmethod
779 def find_distributions(self, context=Context()) -> Iterable[Distribution]:
780 """
781 Find distributions.
782
783 Return an iterable of all Distribution instances capable of
784 loading the metadata for packages matching the ``context``,
785 a DistributionFinder.Context instance.
786 """
787
788
789class FastPath:
790 """
791 Micro-optimized class for searching a root for children.
792
793 Root is a path on the file system that may contain metadata
794 directories either as natural directories or within a zip file.
795
796 >>> FastPath('').children()
797 ['...']
798
799 FastPath objects are cached and recycled for any given root.
800
801 >>> FastPath('foobar') is FastPath('foobar')
802 True
803 """
804
805 @functools.lru_cache() # type: ignore[misc]
806 def __new__(cls, root):
807 return super().__new__(cls)
808
809 def __init__(self, root):
810 self.root = root
811
812 def joinpath(self, child):
813 return pathlib.Path(self.root, child)
814
815 def children(self):
816 with suppress(Exception):
817 return os.listdir(self.root or '.')
818 with suppress(Exception):
819 return self.zip_children()
820 return []
821
822 def zip_children(self):
823 # deferred for performance (python/importlib_metadata#502)
824 from zipp.compat.overlay import zipfile
825
826 zip_path = zipfile.Path(self.root)
827 names = zip_path.root.namelist()
828 self.joinpath = zip_path.joinpath
829
830 return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
831
832 def search(self, name):
833 return self.lookup(self.mtime).search(name)
834
835 @property
836 def mtime(self):
837 with suppress(OSError):
838 return os.stat(self.root).st_mtime
839 self.lookup.cache_clear()
840
841 @method_cache
842 def lookup(self, mtime):
843 return Lookup(self)
844
845
846class Lookup:
847 """
848 A micro-optimized class for searching a (fast) path for metadata.
849 """
850
851 def __init__(self, path: FastPath):
852 """
853 Calculate all of the children representing metadata.
854
855 From the children in the path, calculate early all of the
856 children that appear to represent metadata (infos) or legacy
857 metadata (eggs).
858 """
859
860 base = os.path.basename(path.root).lower()
861 base_is_egg = base.endswith(".egg")
862 self.infos = FreezableDefaultDict(list)
863 self.eggs = FreezableDefaultDict(list)
864
865 for child in path.children():
866 low = child.lower()
867 if low.endswith((".dist-info", ".egg-info")):
868 # rpartition is faster than splitext and suitable for this purpose.
869 name = low.rpartition(".")[0].partition("-")[0]
870 normalized = Prepared.normalize(name)
871 self.infos[normalized].append(path.joinpath(child))
872 elif base_is_egg and low == "egg-info":
873 name = base.rpartition(".")[0].partition("-")[0]
874 legacy_normalized = Prepared.legacy_normalize(name)
875 self.eggs[legacy_normalized].append(path.joinpath(child))
876
877 self.infos.freeze()
878 self.eggs.freeze()
879
880 def search(self, prepared: Prepared):
881 """
882 Yield all infos and eggs matching the Prepared query.
883 """
884 infos = (
885 self.infos[prepared.normalized]
886 if prepared
887 else itertools.chain.from_iterable(self.infos.values())
888 )
889 eggs = (
890 self.eggs[prepared.legacy_normalized]
891 if prepared
892 else itertools.chain.from_iterable(self.eggs.values())
893 )
894 return itertools.chain(infos, eggs)
895
896
897class Prepared:
898 """
899 A prepared search query for metadata on a possibly-named package.
900
901 Pre-calculates the normalization to prevent repeated operations.
902
903 >>> none = Prepared(None)
904 >>> none.normalized
905 >>> none.legacy_normalized
906 >>> bool(none)
907 False
908 >>> sample = Prepared('Sample__Pkg-name.foo')
909 >>> sample.normalized
910 'sample_pkg_name_foo'
911 >>> sample.legacy_normalized
912 'sample__pkg_name.foo'
913 >>> bool(sample)
914 True
915 """
916
917 normalized = None
918 legacy_normalized = None
919
920 def __init__(self, name: str | None):
921 self.name = name
922 if name is None:
923 return
924 self.normalized = self.normalize(name)
925 self.legacy_normalized = self.legacy_normalize(name)
926
927 @staticmethod
928 def normalize(name):
929 """
930 PEP 503 normalization plus dashes as underscores.
931 """
932 return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
933
934 @staticmethod
935 def legacy_normalize(name):
936 """
937 Normalize the package name as found in the convention in
938 older packaging tools versions and specs.
939 """
940 return name.lower().replace('-', '_')
941
942 def __bool__(self):
943 return bool(self.name)
944
945
946@install
947class MetadataPathFinder(NullFinder, DistributionFinder):
948 """A degenerate finder for distribution packages on the file system.
949
950 This finder supplies only a find_distributions() method for versions
951 of Python that do not have a PathFinder find_distributions().
952 """
953
954 @classmethod
955 def find_distributions(
956 cls, context=DistributionFinder.Context()
957 ) -> Iterable[PathDistribution]:
958 """
959 Find distributions.
960
961 Return an iterable of all Distribution instances capable of
962 loading the metadata for packages matching ``context.name``
963 (or all names if ``None`` indicated) along the paths in the list
964 of directories ``context.path``.
965 """
966 found = cls._search_paths(context.name, context.path)
967 return map(PathDistribution, found)
968
969 @classmethod
970 def _search_paths(cls, name, paths):
971 """Find metadata directories in paths heuristically."""
972 prepared = Prepared(name)
973 return itertools.chain.from_iterable(
974 path.search(prepared) for path in map(FastPath, paths)
975 )
976
977 @classmethod
978 def invalidate_caches(cls) -> None:
979 FastPath.__new__.cache_clear()
980
981
982class PathDistribution(Distribution):
983 def __init__(self, path: SimplePath) -> None:
984 """Construct a distribution.
985
986 :param path: SimplePath indicating the metadata directory.
987 """
988 self._path = path
989
990 def read_text(self, filename: str | os.PathLike[str]) -> str | None:
991 with suppress(
992 FileNotFoundError,
993 IsADirectoryError,
994 KeyError,
995 NotADirectoryError,
996 PermissionError,
997 ):
998 return self._path.joinpath(filename).read_text(encoding='utf-8')
999
1000 return None
1001
1002 read_text.__doc__ = Distribution.read_text.__doc__
1003
1004 def locate_file(self, path: str | os.PathLike[str]) -> SimplePath:
1005 return self._path.parent / path
1006
1007 @property
1008 def _normalized_name(self):
1009 """
1010 Performance optimization: where possible, resolve the
1011 normalized name from the file system path.
1012 """
1013 stem = os.path.basename(str(self._path))
1014 return (
1015 pass_none(Prepared.normalize)(self._name_from_stem(stem))
1016 or super()._normalized_name
1017 )
1018
1019 @staticmethod
1020 def _name_from_stem(stem):
1021 """
1022 >>> PathDistribution._name_from_stem('foo-3.0.egg-info')
1023 'foo'
1024 >>> PathDistribution._name_from_stem('CherryPy-3.0.dist-info')
1025 'CherryPy'
1026 >>> PathDistribution._name_from_stem('face.egg-info')
1027 'face'
1028 >>> PathDistribution._name_from_stem('foo.bar')
1029 """
1030 filename, ext = os.path.splitext(stem)
1031 if ext not in ('.dist-info', '.egg-info'):
1032 return
1033 name, sep, rest = filename.partition('-')
1034 return name
1035
1036
1037def distribution(distribution_name: str) -> Distribution:
1038 """Get the ``Distribution`` instance for the named package.
1039
1040 :param distribution_name: The name of the distribution package as a string.
1041 :return: A ``Distribution`` instance (or subclass thereof).
1042 """
1043 return Distribution.from_name(distribution_name)
1044
1045
1046def distributions(**kwargs) -> Iterable[Distribution]:
1047 """Get all ``Distribution`` instances in the current environment.
1048
1049 :return: An iterable of ``Distribution`` instances.
1050 """
1051 return Distribution.discover(**kwargs)
1052
1053
1054def metadata(distribution_name: str) -> _meta.PackageMetadata | None:
1055 """Get the metadata for the named package.
1056
1057 :param distribution_name: The name of the distribution package to query.
1058 :return: A PackageMetadata containing the parsed metadata.
1059 """
1060 return Distribution.from_name(distribution_name).metadata
1061
1062
1063def version(distribution_name: str) -> str:
1064 """Get the version string for the named package.
1065
1066 :param distribution_name: The name of the distribution package to query.
1067 :return: The version string for the package as defined in the package's
1068 "Version" metadata key.
1069 """
1070 return distribution(distribution_name).version
1071
1072
1073_unique = functools.partial(
1074 unique_everseen,
1075 key=py39.normalized_name,
1076)
1077"""
1078Wrapper for ``distributions`` to return unique distributions by name.
1079"""
1080
1081
1082def entry_points(**params) -> EntryPoints:
1083 """Return EntryPoint objects for all installed packages.
1084
1085 Pass selection parameters (group or name) to filter the
1086 result to entry points matching those properties (see
1087 EntryPoints.select()).
1088
1089 :return: EntryPoints for all installed packages.
1090 """
1091 eps = itertools.chain.from_iterable(
1092 dist.entry_points for dist in _unique(distributions())
1093 )
1094 return EntryPoints(eps).select(**params)
1095
1096
1097def files(distribution_name: str) -> list[PackagePath] | None:
1098 """Return a list of files for the named package.
1099
1100 :param distribution_name: The name of the distribution package to query.
1101 :return: List of files composing the distribution.
1102 """
1103 return distribution(distribution_name).files
1104
1105
1106def requires(distribution_name: str) -> list[str] | None:
1107 """
1108 Return a list of requirements for the named package.
1109
1110 :return: An iterable of requirements, suitable for
1111 packaging.requirement.Requirement.
1112 """
1113 return distribution(distribution_name).requires
1114
1115
1116def packages_distributions() -> Mapping[str, list[str]]:
1117 """
1118 Return a mapping of top-level packages to their
1119 distributions.
1120
1121 >>> import collections.abc
1122 >>> pkgs = packages_distributions()
1123 >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values())
1124 True
1125 """
1126 pkg_to_dist = collections.defaultdict(list)
1127 for dist in distributions():
1128 for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
1129 pkg_to_dist[pkg].append(md_none(dist.metadata)['Name'])
1130 return dict(pkg_to_dist)
1131
1132
1133def _top_level_declared(dist):
1134 return (dist.read_text('top_level.txt') or '').split()
1135
1136
1137def _topmost(name: PackagePath) -> str | None:
1138 """
1139 Return the top-most parent as long as there is a parent.
1140 """
1141 top, *rest = name.parts
1142 return top if rest else None
1143
1144
1145def _get_toplevel_name(name: PackagePath) -> str:
1146 """
1147 Infer a possibly importable module name from a name presumed on
1148 sys.path.
1149
1150 >>> _get_toplevel_name(PackagePath('foo.py'))
1151 'foo'
1152 >>> _get_toplevel_name(PackagePath('foo'))
1153 'foo'
1154 >>> _get_toplevel_name(PackagePath('foo.pyc'))
1155 'foo'
1156 >>> _get_toplevel_name(PackagePath('foo/__init__.py'))
1157 'foo'
1158 >>> _get_toplevel_name(PackagePath('foo.pth'))
1159 'foo.pth'
1160 >>> _get_toplevel_name(PackagePath('foo.dist-info'))
1161 'foo.dist-info'
1162 """
1163 # Defer import of inspect for performance (python/cpython#118761)
1164 import inspect
1165
1166 return _topmost(name) or inspect.getmodulename(name) or str(name)
1167
1168
1169def _top_level_inferred(dist):
1170 opt_names = set(map(_get_toplevel_name, always_iterable(dist.files)))
1171
1172 def importable_name(name):
1173 return '.' not in name
1174
1175 return filter(importable_name, opt_names)