1import collections
2import contextlib
3import functools
4import os
5import re
6import sys
7import warnings
8from typing import Dict, Generator, Iterator, NamedTuple, Optional, Sequence, Tuple
9
10from ._elffile import EIClass, EIData, ELFFile, EMachine
11
12EF_ARM_ABIMASK = 0xFF000000
13EF_ARM_ABI_VER5 = 0x05000000
14EF_ARM_ABI_FLOAT_HARD = 0x00000400
15
16
17# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
18# as the type for `path` until then.
19@contextlib.contextmanager
20def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]:
21 try:
22 with open(path, "rb") as f:
23 yield ELFFile(f)
24 except (OSError, TypeError, ValueError):
25 yield None
26
27
28def _is_linux_armhf(executable: str) -> bool:
29 # hard-float ABI can be detected from the ELF header of the running
30 # process
31 # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
32 with _parse_elf(executable) as f:
33 return (
34 f is not None
35 and f.capacity == EIClass.C32
36 and f.encoding == EIData.Lsb
37 and f.machine == EMachine.Arm
38 and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
39 and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
40 )
41
42
43def _is_linux_i686(executable: str) -> bool:
44 with _parse_elf(executable) as f:
45 return (
46 f is not None
47 and f.capacity == EIClass.C32
48 and f.encoding == EIData.Lsb
49 and f.machine == EMachine.I386
50 )
51
52
53def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
54 if "armv7l" in archs:
55 return _is_linux_armhf(executable)
56 if "i686" in archs:
57 return _is_linux_i686(executable)
58 allowed_archs = {
59 "x86_64",
60 "aarch64",
61 "ppc64",
62 "ppc64le",
63 "s390x",
64 "loongarch64",
65 "riscv64",
66 }
67 return any(arch in allowed_archs for arch in archs)
68
69
70# If glibc ever changes its major version, we need to know what the last
71# minor version was, so we can build the complete list of all versions.
72# For now, guess what the highest minor version might be, assume it will
73# be 50 for testing. Once this actually happens, update the dictionary
74# with the actual value.
75_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50)
76
77
78class _GLibCVersion(NamedTuple):
79 major: int
80 minor: int
81
82
83def _glibc_version_string_confstr() -> Optional[str]:
84 """
85 Primary implementation of glibc_version_string using os.confstr.
86 """
87 # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
88 # to be broken or missing. This strategy is used in the standard library
89 # platform module.
90 # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
91 try:
92 # Should be a string like "glibc 2.17".
93 version_string: Optional[str] = os.confstr("CS_GNU_LIBC_VERSION")
94 assert version_string is not None
95 _, version = version_string.rsplit()
96 except (AssertionError, AttributeError, OSError, ValueError):
97 # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
98 return None
99 return version
100
101
102def _glibc_version_string_ctypes() -> Optional[str]:
103 """
104 Fallback implementation of glibc_version_string using ctypes.
105 """
106 try:
107 import ctypes
108 except ImportError:
109 return None
110
111 # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
112 # manpage says, "If filename is NULL, then the returned handle is for the
113 # main program". This way we can let the linker do the work to figure out
114 # which libc our process is actually using.
115 #
116 # We must also handle the special case where the executable is not a
117 # dynamically linked executable. This can occur when using musl libc,
118 # for example. In this situation, dlopen() will error, leading to an
119 # OSError. Interestingly, at least in the case of musl, there is no
120 # errno set on the OSError. The single string argument used to construct
121 # OSError comes from libc itself and is therefore not portable to
122 # hard code here. In any case, failure to call dlopen() means we
123 # can proceed, so we bail on our attempt.
124 try:
125 process_namespace = ctypes.CDLL(None)
126 except OSError:
127 return None
128
129 try:
130 gnu_get_libc_version = process_namespace.gnu_get_libc_version
131 except AttributeError:
132 # Symbol doesn't exist -> therefore, we are not linked to
133 # glibc.
134 return None
135
136 # Call gnu_get_libc_version, which returns a string like "2.5"
137 gnu_get_libc_version.restype = ctypes.c_char_p
138 version_str: str = gnu_get_libc_version()
139 # py2 / py3 compatibility:
140 if not isinstance(version_str, str):
141 version_str = version_str.decode("ascii")
142
143 return version_str
144
145
146def _glibc_version_string() -> Optional[str]:
147 """Returns glibc version string, or None if not using glibc."""
148 return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
149
150
151def _parse_glibc_version(version_str: str) -> Tuple[int, int]:
152 """Parse glibc version.
153
154 We use a regexp instead of str.split because we want to discard any
155 random junk that might come after the minor version -- this might happen
156 in patched/forked versions of glibc (e.g. Linaro's version of glibc
157 uses version strings like "2.20-2014.11"). See gh-3588.
158 """
159 m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
160 if not m:
161 warnings.warn(
162 f"Expected glibc version with 2 components major.minor,"
163 f" got: {version_str}",
164 RuntimeWarning,
165 )
166 return -1, -1
167 return int(m.group("major")), int(m.group("minor"))
168
169
170@functools.lru_cache()
171def _get_glibc_version() -> Tuple[int, int]:
172 version_str = _glibc_version_string()
173 if version_str is None:
174 return (-1, -1)
175 return _parse_glibc_version(version_str)
176
177
178# From PEP 513, PEP 600
179def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
180 sys_glibc = _get_glibc_version()
181 if sys_glibc < version:
182 return False
183 # Check for presence of _manylinux module.
184 try:
185 import _manylinux
186 except ImportError:
187 return True
188 if hasattr(_manylinux, "manylinux_compatible"):
189 result = _manylinux.manylinux_compatible(version[0], version[1], arch)
190 if result is not None:
191 return bool(result)
192 return True
193 if version == _GLibCVersion(2, 5):
194 if hasattr(_manylinux, "manylinux1_compatible"):
195 return bool(_manylinux.manylinux1_compatible)
196 if version == _GLibCVersion(2, 12):
197 if hasattr(_manylinux, "manylinux2010_compatible"):
198 return bool(_manylinux.manylinux2010_compatible)
199 if version == _GLibCVersion(2, 17):
200 if hasattr(_manylinux, "manylinux2014_compatible"):
201 return bool(_manylinux.manylinux2014_compatible)
202 return True
203
204
205_LEGACY_MANYLINUX_MAP = {
206 # CentOS 7 w/ glibc 2.17 (PEP 599)
207 (2, 17): "manylinux2014",
208 # CentOS 6 w/ glibc 2.12 (PEP 571)
209 (2, 12): "manylinux2010",
210 # CentOS 5 w/ glibc 2.5 (PEP 513)
211 (2, 5): "manylinux1",
212}
213
214
215def platform_tags(archs: Sequence[str]) -> Iterator[str]:
216 """Generate manylinux tags compatible to the current platform.
217
218 :param archs: Sequence of compatible architectures.
219 The first one shall be the closest to the actual architecture and be the part of
220 platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
221 The ``linux_`` prefix is assumed as a prerequisite for the current platform to
222 be manylinux-compatible.
223
224 :returns: An iterator of compatible manylinux tags.
225 """
226 if not _have_compatible_abi(sys.executable, archs):
227 return
228 # Oldest glibc to be supported regardless of architecture is (2, 17).
229 too_old_glibc2 = _GLibCVersion(2, 16)
230 if set(archs) & {"x86_64", "i686"}:
231 # On x86/i686 also oldest glibc to be supported is (2, 5).
232 too_old_glibc2 = _GLibCVersion(2, 4)
233 current_glibc = _GLibCVersion(*_get_glibc_version())
234 glibc_max_list = [current_glibc]
235 # We can assume compatibility across glibc major versions.
236 # https://sourceware.org/bugzilla/show_bug.cgi?id=24636
237 #
238 # Build a list of maximum glibc versions so that we can
239 # output the canonical list of all glibc from current_glibc
240 # down to too_old_glibc2, including all intermediary versions.
241 for glibc_major in range(current_glibc.major - 1, 1, -1):
242 glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
243 glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
244 for arch in archs:
245 for glibc_max in glibc_max_list:
246 if glibc_max.major == too_old_glibc2.major:
247 min_minor = too_old_glibc2.minor
248 else:
249 # For other glibc major versions oldest supported is (x, 0).
250 min_minor = -1
251 for glibc_minor in range(glibc_max.minor, min_minor, -1):
252 glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
253 tag = "manylinux_{}_{}".format(*glibc_version)
254 if _is_compatible(arch, glibc_version):
255 yield f"{tag}_{arch}"
256 # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
257 if glibc_version in _LEGACY_MANYLINUX_MAP:
258 legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
259 if _is_compatible(arch, glibc_version):
260 yield f"{legacy_tag}_{arch}"