1from __future__ import annotations
2
3import csv
4import email.message
5import functools
6import json
7import logging
8import pathlib
9import re
10import zipfile
11from collections.abc import Collection, Container, Iterable, Iterator
12from typing import (
13 IO,
14 Any,
15 NamedTuple,
16 Protocol,
17)
18
19from pip._vendor.packaging.requirements import Requirement
20from pip._vendor.packaging.specifiers import InvalidSpecifier, SpecifierSet
21from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
22from pip._vendor.packaging.version import Version
23
24from pip._internal.exceptions import NoneMetadataError
25from pip._internal.locations import site_packages, user_site
26from pip._internal.models.direct_url import (
27 DIRECT_URL_METADATA_NAME,
28 DirectUrl,
29 DirectUrlValidationError,
30)
31from pip._internal.utils.compat import stdlib_pkgs # TODO: Move definition here.
32from pip._internal.utils.egg_link import egg_link_path_from_sys_path
33from pip._internal.utils.misc import is_local, normalize_path
34from pip._internal.utils.urls import url_to_path
35
36from ._json import msg_to_json
37
38InfoPath = str | pathlib.PurePath
39
40logger = logging.getLogger(__name__)
41
42
43class BaseEntryPoint(Protocol):
44 @property
45 def name(self) -> str:
46 raise NotImplementedError()
47
48 @property
49 def value(self) -> str:
50 raise NotImplementedError()
51
52 @property
53 def group(self) -> str:
54 raise NotImplementedError()
55
56
57def _convert_installed_files_path(
58 entry: tuple[str, ...],
59 info: tuple[str, ...],
60) -> str:
61 """Convert a legacy installed-files.txt path into modern RECORD path.
62
63 The legacy format stores paths relative to the info directory, while the
64 modern format stores paths relative to the package root, e.g. the
65 site-packages directory.
66
67 :param entry: Path parts of the installed-files.txt entry.
68 :param info: Path parts of the egg-info directory relative to package root.
69 :returns: The converted entry.
70
71 For best compatibility with symlinks, this does not use ``abspath()`` or
72 ``Path.resolve()``, but tries to work with path parts:
73
74 1. While ``entry`` starts with ``..``, remove the equal amounts of parts
75 from ``info``; if ``info`` is empty, start appending ``..`` instead.
76 2. Join the two directly.
77 """
78 while entry and entry[0] == "..":
79 if not info or info[-1] == "..":
80 info += ("..",)
81 else:
82 info = info[:-1]
83 entry = entry[1:]
84 return str(pathlib.Path(*info, *entry))
85
86
87class RequiresEntry(NamedTuple):
88 requirement: str
89 extra: str
90 marker: str
91
92
93class BaseDistribution(Protocol):
94 @classmethod
95 def from_directory(cls, directory: str) -> BaseDistribution:
96 """Load the distribution from a metadata directory.
97
98 :param directory: Path to a metadata directory, e.g. ``.dist-info``.
99 """
100 raise NotImplementedError()
101
102 @classmethod
103 def from_metadata_file_contents(
104 cls,
105 metadata_contents: bytes,
106 filename: str,
107 project_name: str,
108 ) -> BaseDistribution:
109 """Load the distribution from the contents of a METADATA file.
110
111 This is used to implement PEP 658 by generating a "shallow" dist object that can
112 be used for resolution without downloading or building the actual dist yet.
113
114 :param metadata_contents: The contents of a METADATA file.
115 :param filename: File name for the dist with this metadata.
116 :param project_name: Name of the project this dist represents.
117 """
118 raise NotImplementedError()
119
120 @classmethod
121 def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
122 """Load the distribution from a given wheel.
123
124 :param wheel: A concrete wheel definition.
125 :param name: File name of the wheel.
126
127 :raises InvalidWheel: Whenever loading of the wheel causes a
128 :py:exc:`zipfile.BadZipFile` exception to be thrown.
129 :raises UnsupportedWheel: If the wheel is a valid zip, but malformed
130 internally.
131 """
132 raise NotImplementedError()
133
134 def __repr__(self) -> str:
135 return f"{self.raw_name} {self.raw_version} ({self.location})"
136
137 def __str__(self) -> str:
138 return f"{self.raw_name} {self.raw_version}"
139
140 @property
141 def location(self) -> str | None:
142 """Where the distribution is loaded from.
143
144 A string value is not necessarily a filesystem path, since distributions
145 can be loaded from other sources, e.g. arbitrary zip archives. ``None``
146 means the distribution is created in-memory.
147
148 Do not canonicalize this value with e.g. ``pathlib.Path.resolve()``. If
149 this is a symbolic link, we want to preserve the relative path between
150 it and files in the distribution.
151 """
152 raise NotImplementedError()
153
154 @property
155 def editable_project_location(self) -> str | None:
156 """The project location for editable distributions.
157
158 This is the directory where pyproject.toml or setup.py is located.
159 None if the distribution is not installed in editable mode.
160 """
161 # TODO: this property is relatively costly to compute, memoize it ?
162 direct_url = self.direct_url
163 if direct_url:
164 if direct_url.is_local_editable():
165 return url_to_path(direct_url.url)
166 else:
167 # Search for an .egg-link file by walking sys.path, as it was
168 # done before by dist_is_editable().
169 egg_link_path = egg_link_path_from_sys_path(self.raw_name)
170 if egg_link_path:
171 # TODO: get project location from second line of egg_link file
172 # (https://github.com/pypa/pip/issues/10243)
173 return self.location
174 return None
175
176 @property
177 def installed_location(self) -> str | None:
178 """The distribution's "installed" location.
179
180 This should generally be a ``site-packages`` directory. This is
181 usually ``dist.location``, except for legacy develop-installed packages,
182 where ``dist.location`` is the source code location, and this is where
183 the ``.egg-link`` file is.
184
185 The returned location is normalized (in particular, with symlinks removed).
186 """
187 raise NotImplementedError()
188
189 @property
190 def info_location(self) -> str | None:
191 """Location of the .[egg|dist]-info directory or file.
192
193 Similarly to ``location``, a string value is not necessarily a
194 filesystem path. ``None`` means the distribution is created in-memory.
195
196 For a modern .dist-info installation on disk, this should be something
197 like ``{location}/{raw_name}-{version}.dist-info``.
198
199 Do not canonicalize this value with e.g. ``pathlib.Path.resolve()``. If
200 this is a symbolic link, we want to preserve the relative path between
201 it and other files in the distribution.
202 """
203 raise NotImplementedError()
204
205 @property
206 def installed_by_distutils(self) -> bool:
207 """Whether this distribution is installed with legacy distutils format.
208
209 A distribution installed with "raw" distutils not patched by setuptools
210 uses one single file at ``info_location`` to store metadata. We need to
211 treat this specially on uninstallation.
212 """
213 info_location = self.info_location
214 if not info_location:
215 return False
216 return pathlib.Path(info_location).is_file()
217
218 @property
219 def installed_as_egg(self) -> bool:
220 """Whether this distribution is installed as an egg.
221
222 This usually indicates the distribution was installed by (older versions
223 of) easy_install.
224 """
225 location = self.location
226 if not location:
227 return False
228 # XXX if the distribution is a zipped egg, location has a trailing /
229 # so we resort to pathlib.Path to check the suffix in a reliable way.
230 return pathlib.Path(location).suffix == ".egg"
231
232 @property
233 def installed_with_setuptools_egg_info(self) -> bool:
234 """Whether this distribution is installed with the ``.egg-info`` format.
235
236 This usually indicates the distribution was installed with setuptools
237 with an old pip version or with ``single-version-externally-managed``.
238
239 Note that this ensure the metadata store is a directory. distutils can
240 also installs an ``.egg-info``, but as a file, not a directory. This
241 property is *False* for that case. Also see ``installed_by_distutils``.
242 """
243 info_location = self.info_location
244 if not info_location:
245 return False
246 if not info_location.endswith(".egg-info"):
247 return False
248 return pathlib.Path(info_location).is_dir()
249
250 @property
251 def installed_with_dist_info(self) -> bool:
252 """Whether this distribution is installed with the "modern format".
253
254 This indicates a "modern" installation, e.g. storing metadata in the
255 ``.dist-info`` directory. This applies to installations made by
256 setuptools (but through pip, not directly), or anything using the
257 standardized build backend interface (PEP 517).
258 """
259 info_location = self.info_location
260 if not info_location:
261 return False
262 if not info_location.endswith(".dist-info"):
263 return False
264 return pathlib.Path(info_location).is_dir()
265
266 @property
267 def canonical_name(self) -> NormalizedName:
268 raise NotImplementedError()
269
270 @property
271 def version(self) -> Version:
272 raise NotImplementedError()
273
274 @property
275 def raw_version(self) -> str:
276 raise NotImplementedError()
277
278 @property
279 def setuptools_filename(self) -> str:
280 """Convert a project name to its setuptools-compatible filename.
281
282 This is a copy of ``pkg_resources.to_filename()`` for compatibility.
283 """
284 return self.raw_name.replace("-", "_")
285
286 @property
287 def direct_url(self) -> DirectUrl | None:
288 """Obtain a DirectUrl from this distribution.
289
290 Returns None if the distribution has no `direct_url.json` metadata,
291 or if `direct_url.json` is invalid.
292 """
293 try:
294 content = self.read_text(DIRECT_URL_METADATA_NAME)
295 except FileNotFoundError:
296 return None
297 try:
298 return DirectUrl.from_json(content)
299 except (
300 UnicodeDecodeError,
301 json.JSONDecodeError,
302 DirectUrlValidationError,
303 ) as e:
304 logger.warning(
305 "Error parsing %s for %s: %s",
306 DIRECT_URL_METADATA_NAME,
307 self.canonical_name,
308 e,
309 )
310 return None
311
312 @property
313 def installer(self) -> str:
314 try:
315 installer_text = self.read_text("INSTALLER")
316 except (OSError, ValueError, NoneMetadataError):
317 return "" # Fail silently if the installer file cannot be read.
318 for line in installer_text.splitlines():
319 cleaned_line = line.strip()
320 if cleaned_line:
321 return cleaned_line
322 return ""
323
324 @property
325 def requested(self) -> bool:
326 return self.is_file("REQUESTED")
327
328 @property
329 def editable(self) -> bool:
330 return bool(self.editable_project_location)
331
332 @property
333 def local(self) -> bool:
334 """If distribution is installed in the current virtual environment.
335
336 Always True if we're not in a virtualenv.
337 """
338 if self.installed_location is None:
339 return False
340 return is_local(self.installed_location)
341
342 @property
343 def in_usersite(self) -> bool:
344 if self.installed_location is None or user_site is None:
345 return False
346 return self.installed_location.startswith(normalize_path(user_site))
347
348 @property
349 def in_site_packages(self) -> bool:
350 if self.installed_location is None or site_packages is None:
351 return False
352 return self.installed_location.startswith(normalize_path(site_packages))
353
354 def is_file(self, path: InfoPath) -> bool:
355 """Check whether an entry in the info directory is a file."""
356 raise NotImplementedError()
357
358 def iter_distutils_script_names(self) -> Iterator[str]:
359 """Find distutils 'scripts' entries metadata.
360
361 If 'scripts' is supplied in ``setup.py``, distutils records those in the
362 installed distribution's ``scripts`` directory, a file for each script.
363 """
364 raise NotImplementedError()
365
366 def read_text(self, path: InfoPath) -> str:
367 """Read a file in the info directory.
368
369 :raise FileNotFoundError: If ``path`` does not exist in the directory.
370 :raise NoneMetadataError: If ``path`` exists in the info directory, but
371 cannot be read.
372 """
373 raise NotImplementedError()
374
375 def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
376 raise NotImplementedError()
377
378 def _metadata_impl(self) -> email.message.Message:
379 raise NotImplementedError()
380
381 @functools.cached_property
382 def metadata(self) -> email.message.Message:
383 """Metadata of distribution parsed from e.g. METADATA or PKG-INFO.
384
385 This should return an empty message if the metadata file is unavailable.
386
387 :raises NoneMetadataError: If the metadata file is available, but does
388 not contain valid metadata.
389 """
390 metadata = self._metadata_impl()
391 self._add_egg_info_requires(metadata)
392 return metadata
393
394 @property
395 def metadata_dict(self) -> dict[str, Any]:
396 """PEP 566 compliant JSON-serializable representation of METADATA or PKG-INFO.
397
398 This should return an empty dict if the metadata file is unavailable.
399
400 :raises NoneMetadataError: If the metadata file is available, but does
401 not contain valid metadata.
402 """
403 return msg_to_json(self.metadata)
404
405 @property
406 def metadata_version(self) -> str | None:
407 """Value of "Metadata-Version:" in distribution metadata, if available."""
408 return self.metadata.get("Metadata-Version")
409
410 @property
411 def raw_name(self) -> str:
412 """Value of "Name:" in distribution metadata."""
413 # The metadata should NEVER be missing the Name: key, but if it somehow
414 # does, fall back to the known canonical name.
415 return self.metadata.get("Name", self.canonical_name)
416
417 @property
418 def requires_python(self) -> SpecifierSet:
419 """Value of "Requires-Python:" in distribution metadata.
420
421 If the key does not exist or contains an invalid value, an empty
422 SpecifierSet should be returned.
423 """
424 value = self.metadata.get("Requires-Python")
425 if value is None:
426 return SpecifierSet()
427 try:
428 # Convert to str to satisfy the type checker; this can be a Header object.
429 spec = SpecifierSet(str(value))
430 except InvalidSpecifier as e:
431 message = "Package %r has an invalid Requires-Python: %s"
432 logger.warning(message, self.raw_name, e)
433 return SpecifierSet()
434 return spec
435
436 def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]:
437 """Dependencies of this distribution.
438
439 For modern .dist-info distributions, this is the collection of
440 "Requires-Dist:" entries in distribution metadata.
441 """
442 raise NotImplementedError()
443
444 def iter_raw_dependencies(self) -> Iterable[str]:
445 """Raw Requires-Dist metadata."""
446 return self.metadata.get_all("Requires-Dist", [])
447
448 def iter_provided_extras(self) -> Iterable[NormalizedName]:
449 """Extras provided by this distribution.
450
451 For modern .dist-info distributions, this is the collection of
452 "Provides-Extra:" entries in distribution metadata.
453
454 The return value of this function is expected to be normalised names,
455 per PEP 685, with the returned value being handled appropriately by
456 `iter_dependencies`.
457 """
458 raise NotImplementedError()
459
460 def _iter_declared_entries_from_record(self) -> Iterator[str] | None:
461 try:
462 text = self.read_text("RECORD")
463 except FileNotFoundError:
464 return None
465 # This extra Path-str cast normalizes entries.
466 return (str(pathlib.Path(row[0])) for row in csv.reader(text.splitlines()))
467
468 def _iter_declared_entries_from_legacy(self) -> Iterator[str] | None:
469 try:
470 text = self.read_text("installed-files.txt")
471 except FileNotFoundError:
472 return None
473 paths = (p for p in text.splitlines(keepends=False) if p)
474 root = self.location
475 info = self.info_location
476 if root is None or info is None:
477 return paths
478 try:
479 info_rel = pathlib.Path(info).relative_to(root)
480 except ValueError: # info is not relative to root.
481 return paths
482 if not info_rel.parts: # info *is* root.
483 return paths
484 return (
485 _convert_installed_files_path(pathlib.Path(p).parts, info_rel.parts)
486 for p in paths
487 )
488
489 def iter_declared_entries(self) -> Iterator[str] | None:
490 """Iterate through file entries declared in this distribution.
491
492 For modern .dist-info distributions, this is the files listed in the
493 ``RECORD`` metadata file. For legacy setuptools distributions, this
494 comes from ``installed-files.txt``, with entries normalized to be
495 compatible with the format used by ``RECORD``.
496
497 :return: An iterator for listed entries, or None if the distribution
498 contains neither ``RECORD`` nor ``installed-files.txt``.
499 """
500 return (
501 self._iter_declared_entries_from_record()
502 or self._iter_declared_entries_from_legacy()
503 )
504
505 def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]:
506 """Parse a ``requires.txt`` in an egg-info directory.
507
508 This is an INI-ish format where an egg-info stores dependencies. A
509 section name describes extra other environment markers, while each entry
510 is an arbitrary string (not a key-value pair) representing a dependency
511 as a requirement string (no markers).
512
513 There is a construct in ``importlib.metadata`` called ``Sectioned`` that
514 does mostly the same, but the format is currently considered private.
515 """
516 try:
517 content = self.read_text("requires.txt")
518 except FileNotFoundError:
519 return
520 extra = marker = "" # Section-less entries don't have markers.
521 for line in content.splitlines():
522 line = line.strip()
523 if not line or line.startswith("#"): # Comment; ignored.
524 continue
525 if line.startswith("[") and line.endswith("]"): # A section header.
526 extra, _, marker = line.strip("[]").partition(":")
527 continue
528 yield RequiresEntry(requirement=line, extra=extra, marker=marker)
529
530 def _iter_egg_info_extras(self) -> Iterable[str]:
531 """Get extras from the egg-info directory."""
532 known_extras = {""}
533 for entry in self._iter_requires_txt_entries():
534 extra = canonicalize_name(entry.extra)
535 if extra in known_extras:
536 continue
537 known_extras.add(extra)
538 yield extra
539
540 def _iter_egg_info_dependencies(self) -> Iterable[str]:
541 """Get distribution dependencies from the egg-info directory.
542
543 To ease parsing, this converts a legacy dependency entry into a PEP 508
544 requirement string. Like ``_iter_requires_txt_entries()``, there is code
545 in ``importlib.metadata`` that does mostly the same, but not do exactly
546 what we need.
547
548 Namely, ``importlib.metadata`` does not normalize the extra name before
549 putting it into the requirement string, which causes marker comparison
550 to fail because the dist-info format do normalize. This is consistent in
551 all currently available PEP 517 backends, although not standardized.
552 """
553 for entry in self._iter_requires_txt_entries():
554 extra = canonicalize_name(entry.extra)
555 if extra and entry.marker:
556 marker = f'({entry.marker}) and extra == "{extra}"'
557 elif extra:
558 marker = f'extra == "{extra}"'
559 elif entry.marker:
560 marker = entry.marker
561 else:
562 marker = ""
563 if marker:
564 yield f"{entry.requirement} ; {marker}"
565 else:
566 yield entry.requirement
567
568 def _add_egg_info_requires(self, metadata: email.message.Message) -> None:
569 """Add egg-info requires.txt information to the metadata."""
570 if not metadata.get_all("Requires-Dist"):
571 for dep in self._iter_egg_info_dependencies():
572 metadata["Requires-Dist"] = dep
573 if not metadata.get_all("Provides-Extra"):
574 for extra in self._iter_egg_info_extras():
575 metadata["Provides-Extra"] = extra
576
577
578class BaseEnvironment:
579 """An environment containing distributions to introspect."""
580
581 @classmethod
582 def default(cls) -> BaseEnvironment:
583 raise NotImplementedError()
584
585 @classmethod
586 def from_paths(cls, paths: list[str] | None) -> BaseEnvironment:
587 raise NotImplementedError()
588
589 def get_distribution(self, name: str) -> BaseDistribution | None:
590 """Given a requirement name, return the installed distributions.
591
592 The name may not be normalized. The implementation must canonicalize
593 it for lookup.
594 """
595 raise NotImplementedError()
596
597 def _iter_distributions(self) -> Iterator[BaseDistribution]:
598 """Iterate through installed distributions.
599
600 This function should be implemented by subclass, but never called
601 directly. Use the public ``iter_distribution()`` instead, which
602 implements additional logic to make sure the distributions are valid.
603 """
604 raise NotImplementedError()
605
606 def iter_all_distributions(self) -> Iterator[BaseDistribution]:
607 """Iterate through all installed distributions without any filtering."""
608 for dist in self._iter_distributions():
609 # Make sure the distribution actually comes from a valid Python
610 # packaging distribution. Pip's AdjacentTempDirectory leaves folders
611 # e.g. ``~atplotlib.dist-info`` if cleanup was interrupted. The
612 # valid project name pattern is taken from PEP 508.
613 project_name_valid = re.match(
614 r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$",
615 dist.canonical_name,
616 flags=re.IGNORECASE,
617 )
618 if not project_name_valid:
619 logger.warning(
620 "Ignoring invalid distribution %s (%s)",
621 dist.canonical_name,
622 dist.location,
623 )
624 continue
625 yield dist
626
627 def iter_installed_distributions(
628 self,
629 local_only: bool = True,
630 skip: Container[str] = stdlib_pkgs,
631 include_editables: bool = True,
632 editables_only: bool = False,
633 user_only: bool = False,
634 ) -> Iterator[BaseDistribution]:
635 """Return a list of installed distributions.
636
637 This is based on ``iter_all_distributions()`` with additional filtering
638 options. Note that ``iter_installed_distributions()`` without arguments
639 is *not* equal to ``iter_all_distributions()``, since some of the
640 configurations exclude packages by default.
641
642 :param local_only: If True (default), only return installations
643 local to the current virtualenv, if in a virtualenv.
644 :param skip: An iterable of canonicalized project names to ignore;
645 defaults to ``stdlib_pkgs``.
646 :param include_editables: If False, don't report editables.
647 :param editables_only: If True, only report editables.
648 :param user_only: If True, only report installations in the user
649 site directory.
650 """
651 it = self.iter_all_distributions()
652 if local_only:
653 it = (d for d in it if d.local)
654 if not include_editables:
655 it = (d for d in it if not d.editable)
656 if editables_only:
657 it = (d for d in it if d.editable)
658 if user_only:
659 it = (d for d in it if d.in_usersite)
660 return (d for d in it if d.canonical_name not in skip)
661
662
663class Wheel(Protocol):
664 location: str
665
666 def as_zipfile(self) -> zipfile.ZipFile:
667 raise NotImplementedError()
668
669
670class FilesystemWheel(Wheel):
671 def __init__(self, location: str) -> None:
672 self.location = location
673
674 def as_zipfile(self) -> zipfile.ZipFile:
675 return zipfile.ZipFile(self.location, allowZip64=True)
676
677
678class MemoryWheel(Wheel):
679 def __init__(self, location: str, stream: IO[bytes]) -> None:
680 self.location = location
681 self.stream = stream
682
683 def as_zipfile(self) -> zipfile.ZipFile:
684 return zipfile.ZipFile(self.stream, allowZip64=True)