1from __future__ import annotations
2
3import csv
4import email.message
5import functools
6import json
7import logging
8import pathlib
9import re
10import zipfile
11from collections.abc import Collection, Container, Iterable, Iterator
12from typing import (
13 IO,
14 Any,
15 NamedTuple,
16 Protocol,
17 Union,
18)
19
20from pip._vendor.packaging.requirements import Requirement
21from pip._vendor.packaging.specifiers import InvalidSpecifier, SpecifierSet
22from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
23from pip._vendor.packaging.version import Version
24
25from pip._internal.exceptions import NoneMetadataError
26from pip._internal.locations import site_packages, user_site
27from pip._internal.models.direct_url import (
28 DIRECT_URL_METADATA_NAME,
29 DirectUrl,
30 DirectUrlValidationError,
31)
32from pip._internal.utils.compat import stdlib_pkgs # TODO: Move definition here.
33from pip._internal.utils.egg_link import egg_link_path_from_sys_path
34from pip._internal.utils.misc import is_local, normalize_path
35from pip._internal.utils.urls import url_to_path
36
37from ._json import msg_to_json
38
39InfoPath = Union[str, pathlib.PurePath]
40
41logger = logging.getLogger(__name__)
42
43
44class BaseEntryPoint(Protocol):
45 @property
46 def name(self) -> str:
47 raise NotImplementedError()
48
49 @property
50 def value(self) -> str:
51 raise NotImplementedError()
52
53 @property
54 def group(self) -> str:
55 raise NotImplementedError()
56
57
58def _convert_installed_files_path(
59 entry: tuple[str, ...],
60 info: tuple[str, ...],
61) -> str:
62 """Convert a legacy installed-files.txt path into modern RECORD path.
63
64 The legacy format stores paths relative to the info directory, while the
65 modern format stores paths relative to the package root, e.g. the
66 site-packages directory.
67
68 :param entry: Path parts of the installed-files.txt entry.
69 :param info: Path parts of the egg-info directory relative to package root.
70 :returns: The converted entry.
71
72 For best compatibility with symlinks, this does not use ``abspath()`` or
73 ``Path.resolve()``, but tries to work with path parts:
74
75 1. While ``entry`` starts with ``..``, remove the equal amounts of parts
76 from ``info``; if ``info`` is empty, start appending ``..`` instead.
77 2. Join the two directly.
78 """
79 while entry and entry[0] == "..":
80 if not info or info[-1] == "..":
81 info += ("..",)
82 else:
83 info = info[:-1]
84 entry = entry[1:]
85 return str(pathlib.Path(*info, *entry))
86
87
88class RequiresEntry(NamedTuple):
89 requirement: str
90 extra: str
91 marker: str
92
93
94class BaseDistribution(Protocol):
95 @classmethod
96 def from_directory(cls, directory: str) -> BaseDistribution:
97 """Load the distribution from a metadata directory.
98
99 :param directory: Path to a metadata directory, e.g. ``.dist-info``.
100 """
101 raise NotImplementedError()
102
103 @classmethod
104 def from_metadata_file_contents(
105 cls,
106 metadata_contents: bytes,
107 filename: str,
108 project_name: str,
109 ) -> BaseDistribution:
110 """Load the distribution from the contents of a METADATA file.
111
112 This is used to implement PEP 658 by generating a "shallow" dist object that can
113 be used for resolution without downloading or building the actual dist yet.
114
115 :param metadata_contents: The contents of a METADATA file.
116 :param filename: File name for the dist with this metadata.
117 :param project_name: Name of the project this dist represents.
118 """
119 raise NotImplementedError()
120
121 @classmethod
122 def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
123 """Load the distribution from a given wheel.
124
125 :param wheel: A concrete wheel definition.
126 :param name: File name of the wheel.
127
128 :raises InvalidWheel: Whenever loading of the wheel causes a
129 :py:exc:`zipfile.BadZipFile` exception to be thrown.
130 :raises UnsupportedWheel: If the wheel is a valid zip, but malformed
131 internally.
132 """
133 raise NotImplementedError()
134
135 def __repr__(self) -> str:
136 return f"{self.raw_name} {self.raw_version} ({self.location})"
137
138 def __str__(self) -> str:
139 return f"{self.raw_name} {self.raw_version}"
140
141 @property
142 def location(self) -> str | None:
143 """Where the distribution is loaded from.
144
145 A string value is not necessarily a filesystem path, since distributions
146 can be loaded from other sources, e.g. arbitrary zip archives. ``None``
147 means the distribution is created in-memory.
148
149 Do not canonicalize this value with e.g. ``pathlib.Path.resolve()``. If
150 this is a symbolic link, we want to preserve the relative path between
151 it and files in the distribution.
152 """
153 raise NotImplementedError()
154
155 @property
156 def editable_project_location(self) -> str | None:
157 """The project location for editable distributions.
158
159 This is the directory where pyproject.toml or setup.py is located.
160 None if the distribution is not installed in editable mode.
161 """
162 # TODO: this property is relatively costly to compute, memoize it ?
163 direct_url = self.direct_url
164 if direct_url:
165 if direct_url.is_local_editable():
166 return url_to_path(direct_url.url)
167 else:
168 # Search for an .egg-link file by walking sys.path, as it was
169 # done before by dist_is_editable().
170 egg_link_path = egg_link_path_from_sys_path(self.raw_name)
171 if egg_link_path:
172 # TODO: get project location from second line of egg_link file
173 # (https://github.com/pypa/pip/issues/10243)
174 return self.location
175 return None
176
177 @property
178 def installed_location(self) -> str | None:
179 """The distribution's "installed" location.
180
181 This should generally be a ``site-packages`` directory. This is
182 usually ``dist.location``, except for legacy develop-installed packages,
183 where ``dist.location`` is the source code location, and this is where
184 the ``.egg-link`` file is.
185
186 The returned location is normalized (in particular, with symlinks removed).
187 """
188 raise NotImplementedError()
189
190 @property
191 def info_location(self) -> str | None:
192 """Location of the .[egg|dist]-info directory or file.
193
194 Similarly to ``location``, a string value is not necessarily a
195 filesystem path. ``None`` means the distribution is created in-memory.
196
197 For a modern .dist-info installation on disk, this should be something
198 like ``{location}/{raw_name}-{version}.dist-info``.
199
200 Do not canonicalize this value with e.g. ``pathlib.Path.resolve()``. If
201 this is a symbolic link, we want to preserve the relative path between
202 it and other files in the distribution.
203 """
204 raise NotImplementedError()
205
206 @property
207 def installed_by_distutils(self) -> bool:
208 """Whether this distribution is installed with legacy distutils format.
209
210 A distribution installed with "raw" distutils not patched by setuptools
211 uses one single file at ``info_location`` to store metadata. We need to
212 treat this specially on uninstallation.
213 """
214 info_location = self.info_location
215 if not info_location:
216 return False
217 return pathlib.Path(info_location).is_file()
218
219 @property
220 def installed_as_egg(self) -> bool:
221 """Whether this distribution is installed as an egg.
222
223 This usually indicates the distribution was installed by (older versions
224 of) easy_install.
225 """
226 location = self.location
227 if not location:
228 return False
229 # XXX if the distribution is a zipped egg, location has a trailing /
230 # so we resort to pathlib.Path to check the suffix in a reliable way.
231 return pathlib.Path(location).suffix == ".egg"
232
233 @property
234 def installed_with_setuptools_egg_info(self) -> bool:
235 """Whether this distribution is installed with the ``.egg-info`` format.
236
237 This usually indicates the distribution was installed with setuptools
238 with an old pip version or with ``single-version-externally-managed``.
239
240 Note that this ensure the metadata store is a directory. distutils can
241 also installs an ``.egg-info``, but as a file, not a directory. This
242 property is *False* for that case. Also see ``installed_by_distutils``.
243 """
244 info_location = self.info_location
245 if not info_location:
246 return False
247 if not info_location.endswith(".egg-info"):
248 return False
249 return pathlib.Path(info_location).is_dir()
250
251 @property
252 def installed_with_dist_info(self) -> bool:
253 """Whether this distribution is installed with the "modern format".
254
255 This indicates a "modern" installation, e.g. storing metadata in the
256 ``.dist-info`` directory. This applies to installations made by
257 setuptools (but through pip, not directly), or anything using the
258 standardized build backend interface (PEP 517).
259 """
260 info_location = self.info_location
261 if not info_location:
262 return False
263 if not info_location.endswith(".dist-info"):
264 return False
265 return pathlib.Path(info_location).is_dir()
266
267 @property
268 def canonical_name(self) -> NormalizedName:
269 raise NotImplementedError()
270
271 @property
272 def version(self) -> Version:
273 raise NotImplementedError()
274
275 @property
276 def raw_version(self) -> str:
277 raise NotImplementedError()
278
279 @property
280 def setuptools_filename(self) -> str:
281 """Convert a project name to its setuptools-compatible filename.
282
283 This is a copy of ``pkg_resources.to_filename()`` for compatibility.
284 """
285 return self.raw_name.replace("-", "_")
286
287 @property
288 def direct_url(self) -> DirectUrl | None:
289 """Obtain a DirectUrl from this distribution.
290
291 Returns None if the distribution has no `direct_url.json` metadata,
292 or if `direct_url.json` is invalid.
293 """
294 try:
295 content = self.read_text(DIRECT_URL_METADATA_NAME)
296 except FileNotFoundError:
297 return None
298 try:
299 return DirectUrl.from_json(content)
300 except (
301 UnicodeDecodeError,
302 json.JSONDecodeError,
303 DirectUrlValidationError,
304 ) as e:
305 logger.warning(
306 "Error parsing %s for %s: %s",
307 DIRECT_URL_METADATA_NAME,
308 self.canonical_name,
309 e,
310 )
311 return None
312
313 @property
314 def installer(self) -> str:
315 try:
316 installer_text = self.read_text("INSTALLER")
317 except (OSError, ValueError, NoneMetadataError):
318 return "" # Fail silently if the installer file cannot be read.
319 for line in installer_text.splitlines():
320 cleaned_line = line.strip()
321 if cleaned_line:
322 return cleaned_line
323 return ""
324
325 @property
326 def requested(self) -> bool:
327 return self.is_file("REQUESTED")
328
329 @property
330 def editable(self) -> bool:
331 return bool(self.editable_project_location)
332
333 @property
334 def local(self) -> bool:
335 """If distribution is installed in the current virtual environment.
336
337 Always True if we're not in a virtualenv.
338 """
339 if self.installed_location is None:
340 return False
341 return is_local(self.installed_location)
342
343 @property
344 def in_usersite(self) -> bool:
345 if self.installed_location is None or user_site is None:
346 return False
347 return self.installed_location.startswith(normalize_path(user_site))
348
349 @property
350 def in_site_packages(self) -> bool:
351 if self.installed_location is None or site_packages is None:
352 return False
353 return self.installed_location.startswith(normalize_path(site_packages))
354
355 def is_file(self, path: InfoPath) -> bool:
356 """Check whether an entry in the info directory is a file."""
357 raise NotImplementedError()
358
359 def iter_distutils_script_names(self) -> Iterator[str]:
360 """Find distutils 'scripts' entries metadata.
361
362 If 'scripts' is supplied in ``setup.py``, distutils records those in the
363 installed distribution's ``scripts`` directory, a file for each script.
364 """
365 raise NotImplementedError()
366
367 def read_text(self, path: InfoPath) -> str:
368 """Read a file in the info directory.
369
370 :raise FileNotFoundError: If ``path`` does not exist in the directory.
371 :raise NoneMetadataError: If ``path`` exists in the info directory, but
372 cannot be read.
373 """
374 raise NotImplementedError()
375
376 def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
377 raise NotImplementedError()
378
379 def _metadata_impl(self) -> email.message.Message:
380 raise NotImplementedError()
381
382 @functools.cached_property
383 def metadata(self) -> email.message.Message:
384 """Metadata of distribution parsed from e.g. METADATA or PKG-INFO.
385
386 This should return an empty message if the metadata file is unavailable.
387
388 :raises NoneMetadataError: If the metadata file is available, but does
389 not contain valid metadata.
390 """
391 metadata = self._metadata_impl()
392 self._add_egg_info_requires(metadata)
393 return metadata
394
395 @property
396 def metadata_dict(self) -> dict[str, Any]:
397 """PEP 566 compliant JSON-serializable representation of METADATA or PKG-INFO.
398
399 This should return an empty dict if the metadata file is unavailable.
400
401 :raises NoneMetadataError: If the metadata file is available, but does
402 not contain valid metadata.
403 """
404 return msg_to_json(self.metadata)
405
406 @property
407 def metadata_version(self) -> str | None:
408 """Value of "Metadata-Version:" in distribution metadata, if available."""
409 return self.metadata.get("Metadata-Version")
410
411 @property
412 def raw_name(self) -> str:
413 """Value of "Name:" in distribution metadata."""
414 # The metadata should NEVER be missing the Name: key, but if it somehow
415 # does, fall back to the known canonical name.
416 return self.metadata.get("Name", self.canonical_name)
417
418 @property
419 def requires_python(self) -> SpecifierSet:
420 """Value of "Requires-Python:" in distribution metadata.
421
422 If the key does not exist or contains an invalid value, an empty
423 SpecifierSet should be returned.
424 """
425 value = self.metadata.get("Requires-Python")
426 if value is None:
427 return SpecifierSet()
428 try:
429 # Convert to str to satisfy the type checker; this can be a Header object.
430 spec = SpecifierSet(str(value))
431 except InvalidSpecifier as e:
432 message = "Package %r has an invalid Requires-Python: %s"
433 logger.warning(message, self.raw_name, e)
434 return SpecifierSet()
435 return spec
436
437 def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]:
438 """Dependencies of this distribution.
439
440 For modern .dist-info distributions, this is the collection of
441 "Requires-Dist:" entries in distribution metadata.
442 """
443 raise NotImplementedError()
444
445 def iter_raw_dependencies(self) -> Iterable[str]:
446 """Raw Requires-Dist metadata."""
447 return self.metadata.get_all("Requires-Dist", [])
448
449 def iter_provided_extras(self) -> Iterable[NormalizedName]:
450 """Extras provided by this distribution.
451
452 For modern .dist-info distributions, this is the collection of
453 "Provides-Extra:" entries in distribution metadata.
454
455 The return value of this function is expected to be normalised names,
456 per PEP 685, with the returned value being handled appropriately by
457 `iter_dependencies`.
458 """
459 raise NotImplementedError()
460
461 def _iter_declared_entries_from_record(self) -> Iterator[str] | None:
462 try:
463 text = self.read_text("RECORD")
464 except FileNotFoundError:
465 return None
466 # This extra Path-str cast normalizes entries.
467 return (str(pathlib.Path(row[0])) for row in csv.reader(text.splitlines()))
468
469 def _iter_declared_entries_from_legacy(self) -> Iterator[str] | None:
470 try:
471 text = self.read_text("installed-files.txt")
472 except FileNotFoundError:
473 return None
474 paths = (p for p in text.splitlines(keepends=False) if p)
475 root = self.location
476 info = self.info_location
477 if root is None or info is None:
478 return paths
479 try:
480 info_rel = pathlib.Path(info).relative_to(root)
481 except ValueError: # info is not relative to root.
482 return paths
483 if not info_rel.parts: # info *is* root.
484 return paths
485 return (
486 _convert_installed_files_path(pathlib.Path(p).parts, info_rel.parts)
487 for p in paths
488 )
489
490 def iter_declared_entries(self) -> Iterator[str] | None:
491 """Iterate through file entries declared in this distribution.
492
493 For modern .dist-info distributions, this is the files listed in the
494 ``RECORD`` metadata file. For legacy setuptools distributions, this
495 comes from ``installed-files.txt``, with entries normalized to be
496 compatible with the format used by ``RECORD``.
497
498 :return: An iterator for listed entries, or None if the distribution
499 contains neither ``RECORD`` nor ``installed-files.txt``.
500 """
501 return (
502 self._iter_declared_entries_from_record()
503 or self._iter_declared_entries_from_legacy()
504 )
505
506 def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]:
507 """Parse a ``requires.txt`` in an egg-info directory.
508
509 This is an INI-ish format where an egg-info stores dependencies. A
510 section name describes extra other environment markers, while each entry
511 is an arbitrary string (not a key-value pair) representing a dependency
512 as a requirement string (no markers).
513
514 There is a construct in ``importlib.metadata`` called ``Sectioned`` that
515 does mostly the same, but the format is currently considered private.
516 """
517 try:
518 content = self.read_text("requires.txt")
519 except FileNotFoundError:
520 return
521 extra = marker = "" # Section-less entries don't have markers.
522 for line in content.splitlines():
523 line = line.strip()
524 if not line or line.startswith("#"): # Comment; ignored.
525 continue
526 if line.startswith("[") and line.endswith("]"): # A section header.
527 extra, _, marker = line.strip("[]").partition(":")
528 continue
529 yield RequiresEntry(requirement=line, extra=extra, marker=marker)
530
531 def _iter_egg_info_extras(self) -> Iterable[str]:
532 """Get extras from the egg-info directory."""
533 known_extras = {""}
534 for entry in self._iter_requires_txt_entries():
535 extra = canonicalize_name(entry.extra)
536 if extra in known_extras:
537 continue
538 known_extras.add(extra)
539 yield extra
540
541 def _iter_egg_info_dependencies(self) -> Iterable[str]:
542 """Get distribution dependencies from the egg-info directory.
543
544 To ease parsing, this converts a legacy dependency entry into a PEP 508
545 requirement string. Like ``_iter_requires_txt_entries()``, there is code
546 in ``importlib.metadata`` that does mostly the same, but not do exactly
547 what we need.
548
549 Namely, ``importlib.metadata`` does not normalize the extra name before
550 putting it into the requirement string, which causes marker comparison
551 to fail because the dist-info format do normalize. This is consistent in
552 all currently available PEP 517 backends, although not standardized.
553 """
554 for entry in self._iter_requires_txt_entries():
555 extra = canonicalize_name(entry.extra)
556 if extra and entry.marker:
557 marker = f'({entry.marker}) and extra == "{extra}"'
558 elif extra:
559 marker = f'extra == "{extra}"'
560 elif entry.marker:
561 marker = entry.marker
562 else:
563 marker = ""
564 if marker:
565 yield f"{entry.requirement} ; {marker}"
566 else:
567 yield entry.requirement
568
569 def _add_egg_info_requires(self, metadata: email.message.Message) -> None:
570 """Add egg-info requires.txt information to the metadata."""
571 if not metadata.get_all("Requires-Dist"):
572 for dep in self._iter_egg_info_dependencies():
573 metadata["Requires-Dist"] = dep
574 if not metadata.get_all("Provides-Extra"):
575 for extra in self._iter_egg_info_extras():
576 metadata["Provides-Extra"] = extra
577
578
579class BaseEnvironment:
580 """An environment containing distributions to introspect."""
581
582 @classmethod
583 def default(cls) -> BaseEnvironment:
584 raise NotImplementedError()
585
586 @classmethod
587 def from_paths(cls, paths: list[str] | None) -> BaseEnvironment:
588 raise NotImplementedError()
589
590 def get_distribution(self, name: str) -> BaseDistribution | None:
591 """Given a requirement name, return the installed distributions.
592
593 The name may not be normalized. The implementation must canonicalize
594 it for lookup.
595 """
596 raise NotImplementedError()
597
598 def _iter_distributions(self) -> Iterator[BaseDistribution]:
599 """Iterate through installed distributions.
600
601 This function should be implemented by subclass, but never called
602 directly. Use the public ``iter_distribution()`` instead, which
603 implements additional logic to make sure the distributions are valid.
604 """
605 raise NotImplementedError()
606
607 def iter_all_distributions(self) -> Iterator[BaseDistribution]:
608 """Iterate through all installed distributions without any filtering."""
609 for dist in self._iter_distributions():
610 # Make sure the distribution actually comes from a valid Python
611 # packaging distribution. Pip's AdjacentTempDirectory leaves folders
612 # e.g. ``~atplotlib.dist-info`` if cleanup was interrupted. The
613 # valid project name pattern is taken from PEP 508.
614 project_name_valid = re.match(
615 r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$",
616 dist.canonical_name,
617 flags=re.IGNORECASE,
618 )
619 if not project_name_valid:
620 logger.warning(
621 "Ignoring invalid distribution %s (%s)",
622 dist.canonical_name,
623 dist.location,
624 )
625 continue
626 yield dist
627
628 def iter_installed_distributions(
629 self,
630 local_only: bool = True,
631 skip: Container[str] = stdlib_pkgs,
632 include_editables: bool = True,
633 editables_only: bool = False,
634 user_only: bool = False,
635 ) -> Iterator[BaseDistribution]:
636 """Return a list of installed distributions.
637
638 This is based on ``iter_all_distributions()`` with additional filtering
639 options. Note that ``iter_installed_distributions()`` without arguments
640 is *not* equal to ``iter_all_distributions()``, since some of the
641 configurations exclude packages by default.
642
643 :param local_only: If True (default), only return installations
644 local to the current virtualenv, if in a virtualenv.
645 :param skip: An iterable of canonicalized project names to ignore;
646 defaults to ``stdlib_pkgs``.
647 :param include_editables: If False, don't report editables.
648 :param editables_only: If True, only report editables.
649 :param user_only: If True, only report installations in the user
650 site directory.
651 """
652 it = self.iter_all_distributions()
653 if local_only:
654 it = (d for d in it if d.local)
655 if not include_editables:
656 it = (d for d in it if not d.editable)
657 if editables_only:
658 it = (d for d in it if d.editable)
659 if user_only:
660 it = (d for d in it if d.in_usersite)
661 return (d for d in it if d.canonical_name not in skip)
662
663
664class Wheel(Protocol):
665 location: str
666
667 def as_zipfile(self) -> zipfile.ZipFile:
668 raise NotImplementedError()
669
670
671class FilesystemWheel(Wheel):
672 def __init__(self, location: str) -> None:
673 self.location = location
674
675 def as_zipfile(self) -> zipfile.ZipFile:
676 return zipfile.ZipFile(self.location, allowZip64=True)
677
678
679class MemoryWheel(Wheel):
680 def __init__(self, location: str, stream: IO[bytes]) -> None:
681 self.location = location
682 self.stream = stream
683
684 def as_zipfile(self) -> zipfile.ZipFile:
685 return zipfile.ZipFile(self.stream, allowZip64=True)