1# This file is dual licensed under the terms of the Apache License, Version
2# 2.0, and the BSD License. See the LICENSE file in the root of this repository
3# for complete details.
4
5from __future__ import annotations
6
7import re
8from typing import NewType, Tuple, Union, cast
9
10from .tags import Tag, UnsortedTagsError, parse_tag
11from .version import InvalidVersion, Version, _TrimmedRelease
12
13__all__ = [
14 "BuildTag",
15 "InvalidName",
16 "InvalidSdistFilename",
17 "InvalidWheelFilename",
18 "NormalizedName",
19 "canonicalize_name",
20 "canonicalize_version",
21 "is_normalized_name",
22 "parse_sdist_filename",
23 "parse_wheel_filename",
24]
25
26
27def __dir__() -> list[str]:
28 return __all__
29
30
31BuildTag = Union[Tuple[()], Tuple[int, str]]
32
33NormalizedName = NewType("NormalizedName", str)
34"""
35A :class:`typing.NewType` of :class:`str`, representing a normalized name.
36"""
37
38
39class InvalidName(ValueError):
40 """
41 An invalid distribution name; users should refer to the packaging user guide.
42 """
43
44
45class InvalidWheelFilename(ValueError):
46 """
47 An invalid wheel filename was found, users should refer to PEP 427.
48 """
49
50
51class InvalidSdistFilename(ValueError):
52 """
53 An invalid sdist filename was found, users should refer to the packaging user guide.
54 """
55
56
57# Core metadata spec for `Name`
58_validate_regex = re.compile(
59 r"[a-z0-9]|[a-z0-9][a-z0-9._-]*[a-z0-9]", re.IGNORECASE | re.ASCII
60)
61_normalized_regex = re.compile(r"[a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9]", re.ASCII)
62# PEP 427: The build number must start with a digit.
63_build_tag_regex = re.compile(r"(\d+)(.*)", re.ASCII)
64
65
66def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName:
67 """
68 This function takes a valid Python package or extra name, and returns the
69 normalized form of it.
70
71 The return type is typed as :class:`NormalizedName`. This allows type
72 checkers to help require that a string has passed through this function
73 before use.
74
75 If **validate** is true, then the function will check if **name** is a valid
76 distribution name before normalizing.
77
78 :param str name: The name to normalize.
79 :param bool validate: Check whether the name is a valid distribution name.
80 :raises InvalidName: If **validate** is true and the name is not an
81 acceptable distribution name.
82
83 >>> from packaging.utils import canonicalize_name
84 >>> canonicalize_name("Django")
85 'django'
86 >>> canonicalize_name("oslo.concurrency")
87 'oslo-concurrency'
88 >>> canonicalize_name("requests")
89 'requests'
90 """
91 if validate and not _validate_regex.fullmatch(name):
92 raise InvalidName(f"name is invalid: {name!r}")
93 # Ensure all ``.`` and ``_`` are ``-``
94 # Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503
95 # Much faster than re, and even faster than str.translate
96 value = name.lower().replace("_", "-").replace(".", "-")
97 # Condense repeats (faster than regex)
98 while "--" in value:
99 value = value.replace("--", "-")
100 return cast("NormalizedName", value)
101
102
103def is_normalized_name(name: str) -> bool:
104 """
105 Check if a name is already normalized (i.e. :func:`canonicalize_name` would
106 roundtrip to the same value).
107
108 :param str name: The name to check.
109
110 >>> from packaging.utils import is_normalized_name
111 >>> is_normalized_name("requests")
112 True
113 >>> is_normalized_name("Django")
114 False
115 """
116 return _normalized_regex.fullmatch(name) is not None
117
118
119def canonicalize_version(
120 version: Version | str, *, strip_trailing_zero: bool = True
121) -> str:
122 """Return a canonical form of a version as a string.
123
124 This function takes a string representing a package version (or a
125 :class:`~packaging.version.Version` instance), and returns the
126 normalized form of it. By default, it strips trailing zeros from
127 the release segment.
128
129 >>> from packaging.utils import canonicalize_version
130 >>> canonicalize_version('1.0.1')
131 '1.0.1'
132
133 Per PEP 625, versions may have multiple canonical forms, differing
134 only by trailing zeros.
135
136 >>> canonicalize_version('1.0.0')
137 '1'
138 >>> canonicalize_version('1.0.0', strip_trailing_zero=False)
139 '1.0.0'
140
141 Invalid versions are returned unaltered.
142
143 >>> canonicalize_version('foo bar baz')
144 'foo bar baz'
145
146 >>> canonicalize_version('1.4.0.0.0')
147 '1.4'
148 """
149 if isinstance(version, str):
150 try:
151 version = Version(version)
152 except InvalidVersion:
153 return str(version)
154 return str(_TrimmedRelease(version) if strip_trailing_zero else version)
155
156
157def parse_wheel_filename(
158 filename: str,
159 *,
160 validate_order: bool = False,
161) -> tuple[NormalizedName, Version, BuildTag, frozenset[Tag]]:
162 """
163 This function takes the filename of a wheel file, and parses it,
164 returning a tuple of name, version, build number, and tags.
165
166 The name part of the tuple is normalized and typed as
167 :class:`NormalizedName`. The version portion is an instance of
168 :class:`~packaging.version.Version`. The build number is ``()`` if
169 there is no build number in the wheel filename, otherwise a
170 two-item tuple of an integer for the leading digits and
171 a string for the rest of the build number. The tags portion is a
172 frozen set of :class:`~packaging.tags.Tag` instances (as the tag
173 string format allows multiple tags to be combined into a single
174 string).
175
176 If **validate_order** is true, compressed tag set components are
177 checked to be in sorted order as required by PEP 425.
178
179 :param str filename: The name of the wheel file.
180 :param bool validate_order: Check whether compressed tag set components
181 are in sorted order.
182 :raises InvalidWheelFilename: If the filename in question
183 does not follow the :ref:`wheel specification
184 <pypug:binary-distribution-format>`.
185
186 >>> from packaging.utils import parse_wheel_filename
187 >>> from packaging.tags import Tag
188 >>> from packaging.version import Version
189 >>> name, ver, build, tags = parse_wheel_filename("foo-1.0-py3-none-any.whl")
190 >>> name
191 'foo'
192 >>> ver == Version('1.0')
193 True
194 >>> tags == {Tag("py3", "none", "any")}
195 True
196 >>> not build
197 True
198
199 .. versionadded:: 26.1
200 The *validate_order* parameter.
201 """
202 if not filename.endswith(".whl"):
203 raise InvalidWheelFilename(
204 f"Invalid wheel filename (extension must be '.whl'): {filename!r}"
205 )
206
207 filename = filename[:-4]
208 dashes = filename.count("-")
209 if dashes not in (4, 5):
210 raise InvalidWheelFilename(
211 f"Invalid wheel filename (wrong number of parts): {filename!r}"
212 )
213
214 parts = filename.split("-", dashes - 2)
215 name_part = parts[0]
216 # See PEP 427 for the rules on escaping the project name.
217 if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None:
218 raise InvalidWheelFilename(f"Invalid project name: {filename!r}")
219 name = canonicalize_name(name_part)
220
221 try:
222 version = Version(parts[1])
223 except InvalidVersion as e:
224 raise InvalidWheelFilename(
225 f"Invalid wheel filename (invalid version): {filename!r}"
226 ) from e
227
228 if dashes == 5:
229 build_part = parts[2]
230 build_match = _build_tag_regex.match(build_part)
231 if build_match is None:
232 raise InvalidWheelFilename(
233 f"Invalid build number: {build_part} in {filename!r}"
234 )
235 build = cast("BuildTag", (int(build_match.group(1)), build_match.group(2)))
236 else:
237 build = ()
238 tag_str = parts[-1]
239 try:
240 tags = parse_tag(tag_str, validate_order=validate_order)
241 except UnsortedTagsError:
242 raise InvalidWheelFilename(
243 f"Invalid wheel filename (compressed tag set components must be in "
244 f"sorted order per PEP 425): {filename!r}"
245 ) from None
246 return (name, version, build, tags)
247
248
249def parse_sdist_filename(filename: str) -> tuple[NormalizedName, Version]:
250 """
251 This function takes the filename of a sdist file (as specified
252 in the `Source distribution format`_ documentation), and parses
253 it, returning a tuple of the normalized name and version as
254 represented by an instance of :class:`~packaging.version.Version`.
255
256 :param str filename: The name of the sdist file.
257 :raises InvalidSdistFilename: If the filename does not end
258 with an sdist extension (``.zip`` or ``.tar.gz``), or if it does not
259 contain a dash separating the name and the version of the distribution.
260
261 >>> from packaging.utils import parse_sdist_filename
262 >>> from packaging.version import Version
263 >>> name, ver = parse_sdist_filename("foo-1.0.tar.gz")
264 >>> name
265 'foo'
266 >>> ver == Version('1.0')
267 True
268
269 .. _Source distribution format: https://packaging.python.org/specifications/source-distribution-format/#source-distribution-file-name
270 """
271 if filename.endswith(".tar.gz"):
272 file_stem = filename[: -len(".tar.gz")]
273 elif filename.endswith(".zip"):
274 file_stem = filename[: -len(".zip")]
275 else:
276 raise InvalidSdistFilename(
277 f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):"
278 f" {filename!r}"
279 )
280
281 # We are requiring a PEP 440 version, which cannot contain dashes,
282 # so we split on the last dash.
283 name_part, sep, version_part = file_stem.rpartition("-")
284 if not sep:
285 raise InvalidSdistFilename(f"Invalid sdist filename: {filename!r}")
286
287 name = canonicalize_name(name_part)
288
289 try:
290 version = Version(version_part)
291 except InvalidVersion as e:
292 raise InvalidSdistFilename(
293 f"Invalid sdist filename (invalid version): {filename!r}"
294 ) from e
295
296 return (name, version)