1# Licensed under the LGPL: https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html
2# For details: https://github.com/pylint-dev/astroid/blob/main/LICENSE
3# Copyright (c) https://github.com/pylint-dev/astroid/blob/main/CONTRIBUTORS.txt
4
5"""Python modules manipulation utility functions.
6
7:type PY_SOURCE_EXTS: tuple(str)
8:var PY_SOURCE_EXTS: list of possible python source file extension
9
10:type STD_LIB_DIRS: set of str
11:var STD_LIB_DIRS: directories where standard modules are located
12
13:type BUILTIN_MODULES: dict
14:var BUILTIN_MODULES: dictionary with builtin module names has key
15"""
16
17from __future__ import annotations
18
19import importlib
20import importlib.machinery
21import importlib.util
22import io
23import itertools
24import logging
25import os
26import sys
27import sysconfig
28import types
29import warnings
30from collections.abc import Callable, Iterable, Sequence
31from contextlib import redirect_stderr, redirect_stdout
32from functools import lru_cache
33
34from astroid.const import IS_JYTHON, PY310_PLUS
35from astroid.interpreter._import import spec, util
36
37if PY310_PLUS:
38 from sys import stdlib_module_names
39else:
40 from astroid._backport_stdlib_names import stdlib_module_names
41
42logger = logging.getLogger(__name__)
43
44
45if sys.platform.startswith("win"):
46 PY_SOURCE_EXTS = ("py", "pyw", "pyi")
47 PY_SOURCE_EXTS_STUBS_FIRST = ("pyi", "pyw", "py")
48 PY_COMPILED_EXTS = ("dll", "pyd")
49else:
50 PY_SOURCE_EXTS = ("py", "pyi")
51 PY_SOURCE_EXTS_STUBS_FIRST = ("pyi", "py")
52 PY_COMPILED_EXTS = ("so",)
53
54
55# TODO: Adding `platstdlib` is a fix for a workaround in virtualenv. At some point we should
56# revisit whether this is still necessary. See https://github.com/pylint-dev/astroid/pull/1323.
57STD_LIB_DIRS = {sysconfig.get_path("stdlib"), sysconfig.get_path("platstdlib")}
58
59if os.name == "nt":
60 STD_LIB_DIRS.add(os.path.join(sys.prefix, "dlls"))
61 try:
62 # real_prefix is defined when running inside virtual environments,
63 # created with the **virtualenv** library.
64 # Deprecated in virtualenv==16.7.9
65 # See: https://github.com/pypa/virtualenv/issues/1622
66 STD_LIB_DIRS.add(os.path.join(sys.real_prefix, "dlls")) # type: ignore[attr-defined]
67 except AttributeError:
68 # sys.base_exec_prefix is always defined, but in a virtual environment
69 # created with the stdlib **venv** module, it points to the original
70 # installation, if the virtual env is activated.
71 try:
72 STD_LIB_DIRS.add(os.path.join(sys.base_exec_prefix, "dlls"))
73 except AttributeError:
74 pass
75
76if os.name == "posix":
77 # Need the real prefix if we're in a virtualenv, otherwise
78 # the usual one will do.
79 # Deprecated in virtualenv==16.7.9
80 # See: https://github.com/pypa/virtualenv/issues/1622
81 try:
82 prefix: str = sys.real_prefix # type: ignore[attr-defined]
83 except AttributeError:
84 prefix = sys.prefix
85
86 def _posix_path(path: str) -> str:
87 base_python = "python%d.%d" % sys.version_info[:2]
88 return os.path.join(prefix, path, base_python)
89
90 STD_LIB_DIRS.add(_posix_path("lib"))
91 if sys.maxsize > 2**32:
92 # This tries to fix a problem with /usr/lib64 builds,
93 # where systems are running both 32-bit and 64-bit code
94 # on the same machine, which reflects into the places where
95 # standard library could be found. More details can be found
96 # here http://bugs.python.org/issue1294959.
97 # An easy reproducing case would be
98 # https://github.com/pylint-dev/pylint/issues/712#issuecomment-163178753
99 STD_LIB_DIRS.add(_posix_path("lib64"))
100
101EXT_LIB_DIRS = {sysconfig.get_path("purelib"), sysconfig.get_path("platlib")}
102BUILTIN_MODULES = dict.fromkeys(sys.builtin_module_names, True)
103
104
105class NoSourceFile(Exception):
106 """Exception raised when we are not able to get a python
107 source file for a precompiled file.
108 """
109
110
111def _normalize_path(path: str) -> str:
112 """Resolve symlinks in path and convert to absolute path.
113
114 Note that environment variables and ~ in the path need to be expanded in
115 advance.
116
117 This can be cached by using _cache_normalize_path.
118 """
119 return os.path.normcase(os.path.realpath(path))
120
121
122def _path_from_filename(filename: str, is_jython: bool = IS_JYTHON) -> str:
123 if not is_jython:
124 return filename
125 head, has_pyclass, _ = filename.partition("$py.class")
126 if has_pyclass:
127 return head + ".py"
128 return filename
129
130
131def _handle_blacklist(
132 blacklist: Sequence[str], dirnames: list[str], filenames: list[str]
133) -> None:
134 """Remove files/directories in the black list.
135
136 dirnames/filenames are usually from os.walk
137 """
138 for norecurs in blacklist:
139 if norecurs in dirnames:
140 dirnames.remove(norecurs)
141 elif norecurs in filenames:
142 filenames.remove(norecurs)
143
144
145@lru_cache
146def _cache_normalize_path_(path: str) -> str:
147 return _normalize_path(path)
148
149
150def _cache_normalize_path(path: str) -> str:
151 """Normalize path with caching."""
152 # _module_file calls abspath on every path in sys.path every time it's
153 # called; on a larger codebase this easily adds up to half a second just
154 # assembling path components. This cache alleviates that.
155 if not path: # don't cache result for ''
156 return _normalize_path(path)
157 return _cache_normalize_path_(path)
158
159
160def load_module_from_name(dotted_name: str) -> types.ModuleType:
161 """Load a Python module from its name.
162
163 :type dotted_name: str
164 :param dotted_name: python name of a module or package
165
166 :raise ImportError: if the module or package is not found
167
168 :rtype: module
169 :return: the loaded module
170 """
171 try:
172 return sys.modules[dotted_name]
173 except KeyError:
174 pass
175
176 # Capture and log anything emitted during import to avoid
177 # contaminating JSON reports in pylint
178 with (
179 redirect_stderr(io.StringIO()) as stderr,
180 redirect_stdout(io.StringIO()) as stdout,
181 ):
182 module = importlib.import_module(dotted_name)
183
184 stderr_value = stderr.getvalue()
185 if stderr_value:
186 logger.error(
187 "Captured stderr while importing %s:\n%s", dotted_name, stderr_value
188 )
189 stdout_value = stdout.getvalue()
190 if stdout_value:
191 logger.info(
192 "Captured stdout while importing %s:\n%s", dotted_name, stdout_value
193 )
194
195 return module
196
197
198def load_module_from_modpath(parts: Sequence[str]) -> types.ModuleType:
199 """Load a python module from its split name.
200
201 :param parts:
202 python name of a module or package split on '.'
203
204 :raise ImportError: if the module or package is not found
205
206 :return: the loaded module
207 """
208 return load_module_from_name(".".join(parts))
209
210
211def load_module_from_file(filepath: str) -> types.ModuleType:
212 """Load a Python module from it's path.
213
214 :type filepath: str
215 :param filepath: path to the python module or package
216
217 :raise ImportError: if the module or package is not found
218
219 :rtype: module
220 :return: the loaded module
221 """
222 modpath = modpath_from_file(filepath)
223 return load_module_from_modpath(modpath)
224
225
226def check_modpath_has_init(path: str, mod_path: list[str]) -> bool:
227 """Check there are some __init__.py all along the way."""
228 modpath: list[str] = []
229 for part in mod_path:
230 modpath.append(part)
231 path = os.path.join(path, part)
232 if not _has_init(path):
233 old_namespace = util.is_namespace(".".join(modpath))
234 if not old_namespace:
235 return False
236 return True
237
238
239def _get_relative_base_path(filename: str, path_to_check: str) -> list[str] | None:
240 """Extracts the relative mod path of the file to import from.
241
242 Check if a file is within the passed in path and if so, returns the
243 relative mod path from the one passed in.
244
245 If the filename is no in path_to_check, returns None
246
247 Note this function will look for both abs and realpath of the file,
248 this allows to find the relative base path even if the file is a
249 symlink of a file in the passed in path
250
251 Examples:
252 _get_relative_base_path("/a/b/c/d.py", "/a/b") -> ["c","d"]
253 _get_relative_base_path("/a/b/c/d.py", "/dev") -> None
254 """
255 importable_path = None
256 path_to_check = os.path.normcase(path_to_check)
257 abs_filename = os.path.abspath(filename)
258 if os.path.normcase(abs_filename).startswith(path_to_check):
259 importable_path = abs_filename
260
261 real_filename = os.path.realpath(filename)
262 if os.path.normcase(real_filename).startswith(path_to_check):
263 importable_path = real_filename
264
265 if importable_path:
266 base_path = os.path.splitext(importable_path)[0]
267 relative_base_path = base_path[len(path_to_check) :]
268 return [pkg for pkg in relative_base_path.split(os.sep) if pkg]
269
270 return None
271
272
273def modpath_from_file_with_callback(
274 filename: str,
275 path: list[str] | None = None,
276 is_package_cb: Callable[[str, list[str]], bool] | None = None,
277) -> list[str]:
278 filename = os.path.expanduser(_path_from_filename(filename))
279 paths_to_check = sys.path.copy()
280 if path:
281 paths_to_check = path + paths_to_check
282 for pathname in itertools.chain(
283 paths_to_check, map(_cache_normalize_path, paths_to_check)
284 ):
285 if not pathname:
286 continue
287 modpath = _get_relative_base_path(filename, pathname)
288 if not modpath:
289 continue
290 assert is_package_cb is not None
291 if is_package_cb(pathname, modpath[:-1]):
292 return modpath
293
294 raise ImportError(
295 "Unable to find module for {} in {}".format(
296 filename, ", \n".join(paths_to_check)
297 )
298 )
299
300
301def modpath_from_file(filename: str, path: list[str] | None = None) -> list[str]:
302 """Get the corresponding split module's name from a filename.
303
304 This function will return the name of a module or package split on `.`.
305
306 :type filename: str
307 :param filename: file's path for which we want the module's name
308
309 :type Optional[List[str]] path:
310 Optional list of paths where the module or package should be
311 searched, additionally to sys.path
312
313 :raise ImportError:
314 if the corresponding module's name has not been found
315
316 :rtype: list(str)
317 :return: the corresponding split module's name
318 """
319 return modpath_from_file_with_callback(filename, path, check_modpath_has_init)
320
321
322def file_from_modpath(
323 modpath: list[str],
324 path: Sequence[str] | None = None,
325 context_file: str | None = None,
326) -> str | None:
327 return file_info_from_modpath(modpath, path, context_file).location
328
329
330def file_info_from_modpath(
331 modpath: list[str],
332 path: Sequence[str] | None = None,
333 context_file: str | None = None,
334) -> spec.ModuleSpec:
335 """Given a mod path (i.e. split module / package name), return the
336 corresponding file.
337
338 Giving priority to source file over precompiled file if it exists.
339
340 :param modpath:
341 split module's name (i.e name of a module or package split
342 on '.')
343 (this means explicit relative imports that start with dots have
344 empty strings in this list!)
345
346 :param path:
347 optional list of path where the module or package should be
348 searched (use sys.path if nothing or None is given)
349
350 :param context_file:
351 context file to consider, necessary if the identifier has been
352 introduced using a relative import unresolvable in the actual
353 context (i.e. modutils)
354
355 :raise ImportError: if there is no such module in the directory
356
357 :return:
358 the path to the module's file or None if it's an integrated
359 builtin module such as 'sys'
360 """
361 if context_file is not None:
362 context: str | None = os.path.dirname(context_file)
363 else:
364 context = context_file
365 if modpath[0] == "xml":
366 # handle _xmlplus
367 try:
368 return _spec_from_modpath(["_xmlplus"] + modpath[1:], path, context)
369 except ImportError:
370 return _spec_from_modpath(modpath, path, context)
371 elif modpath == ["os", "path"]:
372 # FIXME: currently ignoring search_path...
373 return spec.ModuleSpec(
374 name="os.path",
375 location=os.path.__file__,
376 type=spec.ModuleType.PY_SOURCE,
377 )
378 return _spec_from_modpath(modpath, path, context)
379
380
381def get_module_part(dotted_name: str, context_file: str | None = None) -> str:
382 """Given a dotted name return the module part of the name :
383
384 >>> get_module_part('astroid.as_string.dump')
385 'astroid.as_string'
386
387 :param dotted_name: full name of the identifier we are interested in
388
389 :param context_file:
390 context file to consider, necessary if the identifier has been
391 introduced using a relative import unresolvable in the actual
392 context (i.e. modutils)
393
394 :raise ImportError: if there is no such module in the directory
395
396 :return:
397 the module part of the name or None if we have not been able at
398 all to import the given name
399
400 XXX: deprecated, since it doesn't handle package precedence over module
401 (see #10066)
402 """
403 # os.path trick
404 if dotted_name.startswith("os.path"):
405 return "os.path"
406 parts = dotted_name.split(".")
407 if context_file is not None:
408 # first check for builtin module which won't be considered latter
409 # in that case (path != None)
410 if parts[0] in BUILTIN_MODULES:
411 if len(parts) > 2:
412 raise ImportError(dotted_name)
413 return parts[0]
414 # don't use += or insert, we want a new list to be created !
415 path: list[str] | None = None
416 starti = 0
417 if parts[0] == "":
418 assert (
419 context_file is not None
420 ), "explicit relative import, but no context_file?"
421 path = [] # prevent resolving the import non-relatively
422 starti = 1
423 # for all further dots: change context
424 while starti < len(parts) and parts[starti] == "":
425 starti += 1
426 assert (
427 context_file is not None
428 ), "explicit relative import, but no context_file?"
429 context_file = os.path.dirname(context_file)
430 for i in range(starti, len(parts)):
431 try:
432 file_from_modpath(
433 parts[starti : i + 1], path=path, context_file=context_file
434 )
435 except ImportError:
436 if i < max(1, len(parts) - 2):
437 raise
438 return ".".join(parts[:i])
439 return dotted_name
440
441
442def get_module_files(
443 src_directory: str, blacklist: Sequence[str], list_all: bool = False
444) -> list[str]:
445 """Given a package directory return a list of all available python
446 module's files in the package and its subpackages.
447
448 :param src_directory:
449 path of the directory corresponding to the package
450
451 :param blacklist: iterable
452 list of files or directories to ignore.
453
454 :param list_all:
455 get files from all paths, including ones without __init__.py
456
457 :return:
458 the list of all available python module's files in the package and
459 its subpackages
460 """
461 files: list[str] = []
462 for directory, dirnames, filenames in os.walk(src_directory):
463 if directory in blacklist:
464 continue
465 _handle_blacklist(blacklist, dirnames, filenames)
466 # check for __init__.py
467 if not list_all and {"__init__.py", "__init__.pyi"}.isdisjoint(filenames):
468 dirnames[:] = ()
469 continue
470 for filename in filenames:
471 if _is_python_file(filename):
472 src = os.path.join(directory, filename)
473 files.append(src)
474 return files
475
476
477def get_source_file(
478 filename: str, include_no_ext: bool = False, prefer_stubs: bool = False
479) -> str:
480 """Given a python module's file name return the matching source file
481 name (the filename will be returned identically if it's already an
482 absolute path to a python source file).
483
484 :param filename: python module's file name
485
486 :raise NoSourceFile: if no source file exists on the file system
487
488 :return: the absolute path of the source file if it exists
489 """
490 filename = os.path.abspath(_path_from_filename(filename))
491 base, orig_ext = os.path.splitext(filename)
492 if orig_ext == ".pyi" and os.path.exists(f"{base}{orig_ext}"):
493 return f"{base}{orig_ext}"
494 for ext in PY_SOURCE_EXTS_STUBS_FIRST if prefer_stubs else PY_SOURCE_EXTS:
495 source_path = f"{base}.{ext}"
496 if os.path.exists(source_path):
497 return source_path
498 if include_no_ext and not orig_ext and os.path.exists(base):
499 return base
500 raise NoSourceFile(filename)
501
502
503def is_python_source(filename: str | None) -> bool:
504 """Return: True if the filename is a python source file."""
505 if not filename:
506 return False
507 return os.path.splitext(filename)[1][1:] in PY_SOURCE_EXTS
508
509
510def is_stdlib_module(modname: str) -> bool:
511 """Return: True if the modname is in the standard library"""
512 return modname.split(".")[0] in stdlib_module_names
513
514
515def module_in_path(modname: str, path: str | Iterable[str]) -> bool:
516 """Try to determine if a module is imported from one of the specified paths
517
518 :param modname: name of the module
519
520 :param path: paths to consider
521
522 :return:
523 true if the module:
524 - is located on the path listed in one of the directory in `paths`
525 """
526
527 modname = modname.split(".")[0]
528 try:
529 filename = file_from_modpath([modname])
530 except ImportError:
531 # Import failed, we can't check path if we don't know it
532 return False
533
534 if filename is None:
535 # No filename likely means it's compiled in, or potentially a namespace
536 return False
537 filename = _normalize_path(filename)
538
539 if isinstance(path, str):
540 return filename.startswith(_cache_normalize_path(path))
541
542 return any(filename.startswith(_cache_normalize_path(entry)) for entry in path)
543
544
545def is_standard_module(modname: str, std_path: Iterable[str] | None = None) -> bool:
546 """Try to guess if a module is a standard python module (by default,
547 see `std_path` parameter's description).
548
549 :param modname: name of the module we are interested in
550
551 :param std_path: list of path considered has standard
552
553 :return:
554 true if the module:
555 - is located on the path listed in one of the directory in `std_path`
556 - is a built-in module
557 """
558 warnings.warn(
559 "is_standard_module() is deprecated. Use, is_stdlib_module() or module_in_path() instead",
560 DeprecationWarning,
561 stacklevel=2,
562 )
563
564 modname = modname.split(".")[0]
565 try:
566 filename = file_from_modpath([modname])
567 except ImportError:
568 # import failed, i'm probably not so wrong by supposing it's
569 # not standard...
570 return False
571 # modules which are not living in a file are considered standard
572 # (sys and __builtin__ for instance)
573 if filename is None:
574 # we assume there are no namespaces in stdlib
575 return not util.is_namespace(modname)
576 filename = _normalize_path(filename)
577 for path in EXT_LIB_DIRS:
578 if filename.startswith(_cache_normalize_path(path)):
579 return False
580 if std_path is None:
581 std_path = STD_LIB_DIRS
582
583 return any(filename.startswith(_cache_normalize_path(path)) for path in std_path)
584
585
586def is_relative(modname: str, from_file: str) -> bool:
587 """Return true if the given module name is relative to the given
588 file name.
589
590 :param modname: name of the module we are interested in
591
592 :param from_file:
593 path of the module from which modname has been imported
594
595 :return:
596 true if the module has been imported relatively to `from_file`
597 """
598 if not os.path.isdir(from_file):
599 from_file = os.path.dirname(from_file)
600 if from_file in sys.path:
601 return False
602 return bool(
603 importlib.machinery.PathFinder.find_spec(
604 modname.split(".", maxsplit=1)[0], [from_file]
605 )
606 )
607
608
609@lru_cache(maxsize=1024)
610def cached_os_path_isfile(path: str | os.PathLike[str]) -> bool:
611 """A cached version of os.path.isfile that helps avoid repetitive I/O"""
612 return os.path.isfile(path)
613
614
615# internal only functions #####################################################
616
617
618def _spec_from_modpath(
619 modpath: list[str],
620 path: Sequence[str] | None = None,
621 context: str | None = None,
622) -> spec.ModuleSpec:
623 """Given a mod path (i.e. split module / package name), return the
624 corresponding spec.
625
626 this function is used internally, see `file_from_modpath`'s
627 documentation for more information
628 """
629 assert modpath
630 location = None
631 if context is not None:
632 try:
633 found_spec = spec.find_spec(modpath, [context])
634 location = found_spec.location
635 except ImportError:
636 found_spec = spec.find_spec(modpath, path)
637 location = found_spec.location
638 else:
639 found_spec = spec.find_spec(modpath, path)
640 if found_spec.type == spec.ModuleType.PY_COMPILED:
641 try:
642 assert found_spec.location is not None
643 location = get_source_file(found_spec.location)
644 return found_spec._replace(
645 location=location, type=spec.ModuleType.PY_SOURCE
646 )
647 except NoSourceFile:
648 return found_spec._replace(location=location)
649 elif found_spec.type == spec.ModuleType.C_BUILTIN:
650 # integrated builtin module
651 return found_spec._replace(location=None)
652 elif found_spec.type == spec.ModuleType.PKG_DIRECTORY:
653 assert found_spec.location is not None
654 location = _has_init(found_spec.location)
655 return found_spec._replace(location=location, type=spec.ModuleType.PY_SOURCE)
656 return found_spec
657
658
659def _is_python_file(filename: str) -> bool:
660 """Return true if the given filename should be considered as a python file.
661
662 .pyc and .pyo are ignored
663 """
664 return filename.endswith((".py", ".pyi", ".so", ".pyd", ".pyw"))
665
666
667@lru_cache(maxsize=1024)
668def _has_init(directory: str) -> str | None:
669 """If the given directory has a valid __init__ file, return its path,
670 else return None.
671 """
672 mod_or_pack = os.path.join(directory, "__init__")
673 for ext in (*PY_SOURCE_EXTS, "pyc", "pyo"):
674 if os.path.exists(mod_or_pack + "." + ext):
675 return mod_or_pack + "." + ext
676 return None
677
678
679def is_namespace(specobj: spec.ModuleSpec) -> bool:
680 return specobj.type == spec.ModuleType.PY_NAMESPACE
681
682
683def is_directory(specobj: spec.ModuleSpec) -> bool:
684 return specobj.type == spec.ModuleType.PKG_DIRECTORY
685
686
687def is_module_name_part_of_extension_package_whitelist(
688 module_name: str, package_whitelist: set[str]
689) -> bool:
690 """
691 Returns True if one part of the module name is in the package whitelist.
692
693 >>> is_module_name_part_of_extension_package_whitelist('numpy.core.umath', {'numpy'})
694 True
695 """
696 parts = module_name.split(".")
697 return any(
698 ".".join(parts[:x]) in package_whitelist for x in range(1, len(parts) + 1)
699 )