1# This file is dual licensed under the terms of the Apache License, Version
2# 2.0, and the BSD License. See the LICENSE file in the root of this repository
3# for complete details.
4
5from __future__ import annotations
6
7import re
8from typing import NewType, Tuple, Union, cast
9
10from .tags import Tag, parse_tag
11from .version import InvalidVersion, Version
12
13BuildTag = Union[Tuple[()], Tuple[int, str]]
14NormalizedName = NewType("NormalizedName", str)
15
16
17class InvalidName(ValueError):
18 """
19 An invalid distribution name; users should refer to the packaging user guide.
20 """
21
22
23class InvalidWheelFilename(ValueError):
24 """
25 An invalid wheel filename was found, users should refer to PEP 427.
26 """
27
28
29class InvalidSdistFilename(ValueError):
30 """
31 An invalid sdist filename was found, users should refer to the packaging user guide.
32 """
33
34
35# Core metadata spec for `Name`
36_validate_regex = re.compile(
37 r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE
38)
39_canonicalize_regex = re.compile(r"[-_.]+")
40_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$")
41# PEP 427: The build number must start with a digit.
42_build_tag_regex = re.compile(r"(\d+)(.*)")
43
44
45def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName:
46 if validate and not _validate_regex.match(name):
47 raise InvalidName(f"name is invalid: {name!r}")
48 # This is taken from PEP 503.
49 value = _canonicalize_regex.sub("-", name).lower()
50 return cast(NormalizedName, value)
51
52
53def is_normalized_name(name: str) -> bool:
54 return _normalized_regex.match(name) is not None
55
56
57def canonicalize_version(
58 version: Version | str, *, strip_trailing_zero: bool = True
59) -> str:
60 """
61 This is very similar to Version.__str__, but has one subtle difference
62 with the way it handles the release segment.
63 """
64 if isinstance(version, str):
65 try:
66 parsed = Version(version)
67 except InvalidVersion:
68 # Legacy versions cannot be normalized
69 return version
70 else:
71 parsed = version
72
73 parts = []
74
75 # Epoch
76 if parsed.epoch != 0:
77 parts.append(f"{parsed.epoch}!")
78
79 # Release segment
80 release_segment = ".".join(str(x) for x in parsed.release)
81 if strip_trailing_zero:
82 # NB: This strips trailing '.0's to normalize
83 release_segment = re.sub(r"(\.0)+$", "", release_segment)
84 parts.append(release_segment)
85
86 # Pre-release
87 if parsed.pre is not None:
88 parts.append("".join(str(x) for x in parsed.pre))
89
90 # Post-release
91 if parsed.post is not None:
92 parts.append(f".post{parsed.post}")
93
94 # Development release
95 if parsed.dev is not None:
96 parts.append(f".dev{parsed.dev}")
97
98 # Local version segment
99 if parsed.local is not None:
100 parts.append(f"+{parsed.local}")
101
102 return "".join(parts)
103
104
105def parse_wheel_filename(
106 filename: str,
107) -> tuple[NormalizedName, Version, BuildTag, frozenset[Tag]]:
108 if not filename.endswith(".whl"):
109 raise InvalidWheelFilename(
110 f"Invalid wheel filename (extension must be '.whl'): {filename}"
111 )
112
113 filename = filename[:-4]
114 dashes = filename.count("-")
115 if dashes not in (4, 5):
116 raise InvalidWheelFilename(
117 f"Invalid wheel filename (wrong number of parts): {filename}"
118 )
119
120 parts = filename.split("-", dashes - 2)
121 name_part = parts[0]
122 # See PEP 427 for the rules on escaping the project name.
123 if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None:
124 raise InvalidWheelFilename(f"Invalid project name: {filename}")
125 name = canonicalize_name(name_part)
126
127 try:
128 version = Version(parts[1])
129 except InvalidVersion as e:
130 raise InvalidWheelFilename(
131 f"Invalid wheel filename (invalid version): {filename}"
132 ) from e
133
134 if dashes == 5:
135 build_part = parts[2]
136 build_match = _build_tag_regex.match(build_part)
137 if build_match is None:
138 raise InvalidWheelFilename(
139 f"Invalid build number: {build_part} in '{filename}'"
140 )
141 build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2)))
142 else:
143 build = ()
144 tags = parse_tag(parts[-1])
145 return (name, version, build, tags)
146
147
148def parse_sdist_filename(filename: str) -> tuple[NormalizedName, Version]:
149 if filename.endswith(".tar.gz"):
150 file_stem = filename[: -len(".tar.gz")]
151 elif filename.endswith(".zip"):
152 file_stem = filename[: -len(".zip")]
153 else:
154 raise InvalidSdistFilename(
155 f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):"
156 f" {filename}"
157 )
158
159 # We are requiring a PEP 440 version, which cannot contain dashes,
160 # so we split on the last dash.
161 name_part, sep, version_part = file_stem.rpartition("-")
162 if not sep:
163 raise InvalidSdistFilename(f"Invalid sdist filename: {filename}")
164
165 name = canonicalize_name(name_part)
166
167 try:
168 version = Version(version_part)
169 except InvalidVersion as e:
170 raise InvalidSdistFilename(
171 f"Invalid sdist filename (invalid version): {filename}"
172 ) from e
173
174 return (name, version)