1from __future__ import annotations
2
3import collections
4import contextlib
5import functools
6import os
7import re
8import sys
9import warnings
10from typing import Generator, Iterator, NamedTuple, Sequence
11
12from ._elffile import EIClass, EIData, ELFFile, EMachine
13
14EF_ARM_ABIMASK = 0xFF000000
15EF_ARM_ABI_VER5 = 0x05000000
16EF_ARM_ABI_FLOAT_HARD = 0x00000400
17
18_ALLOWED_ARCHS = {
19 "x86_64",
20 "aarch64",
21 "ppc64",
22 "ppc64le",
23 "s390x",
24 "loongarch64",
25 "riscv64",
26}
27
28
29# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
30# as the type for `path` until then.
31@contextlib.contextmanager
32def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]:
33 try:
34 with open(path, "rb") as f:
35 yield ELFFile(f)
36 except (OSError, TypeError, ValueError):
37 yield None
38
39
40def _is_linux_armhf(executable: str) -> bool:
41 # hard-float ABI can be detected from the ELF header of the running
42 # process
43 # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
44 with _parse_elf(executable) as f:
45 return (
46 f is not None
47 and f.capacity == EIClass.C32
48 and f.encoding == EIData.Lsb
49 and f.machine == EMachine.Arm
50 and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
51 and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
52 )
53
54
55def _is_linux_i686(executable: str) -> bool:
56 with _parse_elf(executable) as f:
57 return (
58 f is not None
59 and f.capacity == EIClass.C32
60 and f.encoding == EIData.Lsb
61 and f.machine == EMachine.I386
62 )
63
64
65def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
66 if "armv7l" in archs:
67 return _is_linux_armhf(executable)
68 if "i686" in archs:
69 return _is_linux_i686(executable)
70 return any(arch in _ALLOWED_ARCHS for arch in archs)
71
72
73# If glibc ever changes its major version, we need to know what the last
74# minor version was, so we can build the complete list of all versions.
75# For now, guess what the highest minor version might be, assume it will
76# be 50 for testing. Once this actually happens, update the dictionary
77# with the actual value.
78_LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50)
79
80
81class _GLibCVersion(NamedTuple):
82 major: int
83 minor: int
84
85
86def _glibc_version_string_confstr() -> str | None:
87 """
88 Primary implementation of glibc_version_string using os.confstr.
89 """
90 # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
91 # to be broken or missing. This strategy is used in the standard library
92 # platform module.
93 # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
94 try:
95 # Should be a string like "glibc 2.17".
96 version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
97 assert version_string is not None
98 _, version = version_string.rsplit()
99 except (AssertionError, AttributeError, OSError, ValueError):
100 # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
101 return None
102 return version
103
104
105def _glibc_version_string_ctypes() -> str | None:
106 """
107 Fallback implementation of glibc_version_string using ctypes.
108 """
109 try:
110 import ctypes # noqa: PLC0415
111 except ImportError:
112 return None
113
114 # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
115 # manpage says, "If filename is NULL, then the returned handle is for the
116 # main program". This way we can let the linker do the work to figure out
117 # which libc our process is actually using.
118 #
119 # We must also handle the special case where the executable is not a
120 # dynamically linked executable. This can occur when using musl libc,
121 # for example. In this situation, dlopen() will error, leading to an
122 # OSError. Interestingly, at least in the case of musl, there is no
123 # errno set on the OSError. The single string argument used to construct
124 # OSError comes from libc itself and is therefore not portable to
125 # hard code here. In any case, failure to call dlopen() means we
126 # can proceed, so we bail on our attempt.
127 try:
128 process_namespace = ctypes.CDLL(None)
129 except OSError:
130 return None
131
132 try:
133 gnu_get_libc_version = process_namespace.gnu_get_libc_version
134 except AttributeError:
135 # Symbol doesn't exist -> therefore, we are not linked to
136 # glibc.
137 return None
138
139 # Call gnu_get_libc_version, which returns a string like "2.5"
140 gnu_get_libc_version.restype = ctypes.c_char_p
141 version_str: str = gnu_get_libc_version()
142 # py2 / py3 compatibility:
143 if not isinstance(version_str, str):
144 version_str = version_str.decode("ascii")
145
146 return version_str
147
148
149def _glibc_version_string() -> str | None:
150 """Returns glibc version string, or None if not using glibc."""
151 return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
152
153
154def _parse_glibc_version(version_str: str) -> _GLibCVersion:
155 """Parse glibc version.
156
157 We use a regexp instead of str.split because we want to discard any
158 random junk that might come after the minor version -- this might happen
159 in patched/forked versions of glibc (e.g. Linaro's version of glibc
160 uses version strings like "2.20-2014.11"). See gh-3588.
161 """
162 m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
163 if not m:
164 warnings.warn(
165 f"Expected glibc version with 2 components major.minor, got: {version_str}",
166 RuntimeWarning,
167 stacklevel=2,
168 )
169 return _GLibCVersion(-1, -1)
170 return _GLibCVersion(int(m.group("major")), int(m.group("minor")))
171
172
173@functools.lru_cache
174def _get_glibc_version() -> _GLibCVersion:
175 version_str = _glibc_version_string()
176 if version_str is None:
177 return _GLibCVersion(-1, -1)
178 return _parse_glibc_version(version_str)
179
180
181# From PEP 513, PEP 600
182def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
183 sys_glibc = _get_glibc_version()
184 if sys_glibc < version:
185 return False
186 # Check for presence of _manylinux module.
187 try:
188 import _manylinux # noqa: PLC0415
189 except ImportError:
190 return True
191 if hasattr(_manylinux, "manylinux_compatible"):
192 result = _manylinux.manylinux_compatible(version[0], version[1], arch)
193 if result is not None:
194 return bool(result)
195 return True
196 if version == _GLibCVersion(2, 5) and hasattr(_manylinux, "manylinux1_compatible"):
197 return bool(_manylinux.manylinux1_compatible)
198 if version == _GLibCVersion(2, 12) and hasattr(
199 _manylinux, "manylinux2010_compatible"
200 ):
201 return bool(_manylinux.manylinux2010_compatible)
202 if version == _GLibCVersion(2, 17) and hasattr(
203 _manylinux, "manylinux2014_compatible"
204 ):
205 return bool(_manylinux.manylinux2014_compatible)
206 return True
207
208
209_LEGACY_MANYLINUX_MAP: dict[_GLibCVersion, str] = {
210 # CentOS 7 w/ glibc 2.17 (PEP 599)
211 _GLibCVersion(2, 17): "manylinux2014",
212 # CentOS 6 w/ glibc 2.12 (PEP 571)
213 _GLibCVersion(2, 12): "manylinux2010",
214 # CentOS 5 w/ glibc 2.5 (PEP 513)
215 _GLibCVersion(2, 5): "manylinux1",
216}
217
218
219def platform_tags(archs: Sequence[str]) -> Iterator[str]:
220 """Generate manylinux tags compatible to the current platform.
221
222 :param archs: Sequence of compatible architectures.
223 The first one shall be the closest to the actual architecture and be the part of
224 platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
225 The ``linux_`` prefix is assumed as a prerequisite for the current platform to
226 be manylinux-compatible.
227
228 :returns: An iterator of compatible manylinux tags.
229 """
230 if not _have_compatible_abi(sys.executable, archs):
231 return
232 # Oldest glibc to be supported regardless of architecture is (2, 17).
233 too_old_glibc2 = _GLibCVersion(2, 16)
234 if set(archs) & {"x86_64", "i686"}:
235 # On x86/i686 also oldest glibc to be supported is (2, 5).
236 too_old_glibc2 = _GLibCVersion(2, 4)
237 current_glibc = _GLibCVersion(*_get_glibc_version())
238 glibc_max_list = [current_glibc]
239 # We can assume compatibility across glibc major versions.
240 # https://sourceware.org/bugzilla/show_bug.cgi?id=24636
241 #
242 # Build a list of maximum glibc versions so that we can
243 # output the canonical list of all glibc from current_glibc
244 # down to too_old_glibc2, including all intermediary versions.
245 for glibc_major in range(current_glibc.major - 1, 1, -1):
246 glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
247 glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
248 for arch in archs:
249 for glibc_max in glibc_max_list:
250 if glibc_max.major == too_old_glibc2.major:
251 min_minor = too_old_glibc2.minor
252 else:
253 # For other glibc major versions oldest supported is (x, 0).
254 min_minor = -1
255 for glibc_minor in range(glibc_max.minor, min_minor, -1):
256 glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
257 if _is_compatible(arch, glibc_version):
258 yield "manylinux_{}_{}_{}".format(*glibc_version, arch)
259
260 # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
261 if legacy_tag := _LEGACY_MANYLINUX_MAP.get(glibc_version):
262 yield f"{legacy_tag}_{arch}"