1"""Automatic discovery of Python modules and packages (for inclusion in the
2distribution) and other config values.
3
4For the purposes of this module, the following nomenclature is used:
5
6- "src-layout": a directory representing a Python project that contains a "src"
7 folder. Everything under the "src" folder is meant to be included in the
8 distribution when packaging the project. Example::
9
10 .
11 ├── tox.ini
12 ├── pyproject.toml
13 └── src/
14 └── mypkg/
15 ├── __init__.py
16 ├── mymodule.py
17 └── my_data_file.txt
18
19- "flat-layout": a Python project that does not use "src-layout" but instead
20 have a directory under the project root for each package::
21
22 .
23 ├── tox.ini
24 ├── pyproject.toml
25 └── mypkg/
26 ├── __init__.py
27 ├── mymodule.py
28 └── my_data_file.txt
29
30- "single-module": a project that contains a single Python script direct under
31 the project root (no directory used)::
32
33 .
34 ├── tox.ini
35 ├── pyproject.toml
36 └── mymodule.py
37
38"""
39
40from __future__ import annotations
41
42import itertools
43import os
44from fnmatch import fnmatchcase
45from glob import glob
46from pathlib import Path
47from typing import TYPE_CHECKING, Iterable, Iterator, Mapping
48
49import _distutils_hack.override # noqa: F401
50
51from ._path import StrPath
52
53from distutils import log
54from distutils.util import convert_path
55
56StrIter = Iterator[str]
57
58chain_iter = itertools.chain.from_iterable
59
60if TYPE_CHECKING:
61 from setuptools import Distribution
62
63
64def _valid_name(path: StrPath) -> bool:
65 # Ignore invalid names that cannot be imported directly
66 return os.path.basename(path).isidentifier()
67
68
69class _Filter:
70 """
71 Given a list of patterns, create a callable that will be true only if
72 the input matches at least one of the patterns.
73 """
74
75 def __init__(self, *patterns: str):
76 self._patterns = dict.fromkeys(patterns)
77
78 def __call__(self, item: str) -> bool:
79 return any(fnmatchcase(item, pat) for pat in self._patterns)
80
81 def __contains__(self, item: str) -> bool:
82 return item in self._patterns
83
84
85class _Finder:
86 """Base class that exposes functionality for module/package finders"""
87
88 ALWAYS_EXCLUDE: tuple[str, ...] = ()
89 DEFAULT_EXCLUDE: tuple[str, ...] = ()
90
91 @classmethod
92 def find(
93 cls,
94 where: StrPath = '.',
95 exclude: Iterable[str] = (),
96 include: Iterable[str] = ('*',),
97 ) -> list[str]:
98 """Return a list of all Python items (packages or modules, depending on
99 the finder implementation) found within directory 'where'.
100
101 'where' is the root directory which will be searched.
102 It should be supplied as a "cross-platform" (i.e. URL-style) path;
103 it will be converted to the appropriate local path syntax.
104
105 'exclude' is a sequence of names to exclude; '*' can be used
106 as a wildcard in the names.
107 When finding packages, 'foo.*' will exclude all subpackages of 'foo'
108 (but not 'foo' itself).
109
110 'include' is a sequence of names to include.
111 If it's specified, only the named items will be included.
112 If it's not specified, all found items will be included.
113 'include' can contain shell style wildcard patterns just like
114 'exclude'.
115 """
116
117 exclude = exclude or cls.DEFAULT_EXCLUDE
118 return list(
119 cls._find_iter(
120 convert_path(str(where)),
121 _Filter(*cls.ALWAYS_EXCLUDE, *exclude),
122 _Filter(*include),
123 )
124 )
125
126 @classmethod
127 def _find_iter(cls, where: StrPath, exclude: _Filter, include: _Filter) -> StrIter:
128 raise NotImplementedError
129
130
131class PackageFinder(_Finder):
132 """
133 Generate a list of all Python packages found within a directory
134 """
135
136 ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__")
137
138 @classmethod
139 def _find_iter(cls, where: StrPath, exclude: _Filter, include: _Filter) -> StrIter:
140 """
141 All the packages found in 'where' that pass the 'include' filter, but
142 not the 'exclude' filter.
143 """
144 for root, dirs, files in os.walk(str(where), followlinks=True):
145 # Copy dirs to iterate over it, then empty dirs.
146 all_dirs = dirs[:]
147 dirs[:] = []
148
149 for dir in all_dirs:
150 full_path = os.path.join(root, dir)
151 rel_path = os.path.relpath(full_path, where)
152 package = rel_path.replace(os.path.sep, '.')
153
154 # Skip directory trees that are not valid packages
155 if '.' in dir or not cls._looks_like_package(full_path, package):
156 continue
157
158 # Should this package be included?
159 if include(package) and not exclude(package):
160 yield package
161
162 # Early pruning if there is nothing else to be scanned
163 if f"{package}*" in exclude or f"{package}.*" in exclude:
164 continue
165
166 # Keep searching subdirectories, as there may be more packages
167 # down there, even if the parent was excluded.
168 dirs.append(dir)
169
170 @staticmethod
171 def _looks_like_package(path: StrPath, _package_name: str) -> bool:
172 """Does a directory look like a package?"""
173 return os.path.isfile(os.path.join(path, '__init__.py'))
174
175
176class PEP420PackageFinder(PackageFinder):
177 @staticmethod
178 def _looks_like_package(_path: StrPath, _package_name: str) -> bool:
179 return True
180
181
182class ModuleFinder(_Finder):
183 """Find isolated Python modules.
184 This function will **not** recurse subdirectories.
185 """
186
187 @classmethod
188 def _find_iter(cls, where: StrPath, exclude: _Filter, include: _Filter) -> StrIter:
189 for file in glob(os.path.join(where, "*.py")):
190 module, _ext = os.path.splitext(os.path.basename(file))
191
192 if not cls._looks_like_module(module):
193 continue
194
195 if include(module) and not exclude(module):
196 yield module
197
198 _looks_like_module = staticmethod(_valid_name)
199
200
201# We have to be extra careful in the case of flat layout to not include files
202# and directories not meant for distribution (e.g. tool-related)
203
204
205class FlatLayoutPackageFinder(PEP420PackageFinder):
206 _EXCLUDE = (
207 "ci",
208 "bin",
209 "debian",
210 "doc",
211 "docs",
212 "documentation",
213 "manpages",
214 "news",
215 "newsfragments",
216 "changelog",
217 "test",
218 "tests",
219 "unit_test",
220 "unit_tests",
221 "example",
222 "examples",
223 "scripts",
224 "tools",
225 "util",
226 "utils",
227 "python",
228 "build",
229 "dist",
230 "venv",
231 "env",
232 "requirements",
233 # ---- Task runners / Build tools ----
234 "tasks", # invoke
235 "fabfile", # fabric
236 "site_scons", # SCons
237 # ---- Other tools ----
238 "benchmark",
239 "benchmarks",
240 "exercise",
241 "exercises",
242 "htmlcov", # Coverage.py
243 # ---- Hidden directories/Private packages ----
244 "[._]*",
245 )
246
247 DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE))
248 """Reserved package names"""
249
250 @staticmethod
251 def _looks_like_package(_path: StrPath, package_name: str) -> bool:
252 names = package_name.split('.')
253 # Consider PEP 561
254 root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs")
255 return root_pkg_is_valid and all(name.isidentifier() for name in names[1:])
256
257
258class FlatLayoutModuleFinder(ModuleFinder):
259 DEFAULT_EXCLUDE = (
260 "setup",
261 "conftest",
262 "test",
263 "tests",
264 "example",
265 "examples",
266 "build",
267 # ---- Task runners ----
268 "toxfile",
269 "noxfile",
270 "pavement",
271 "dodo",
272 "tasks",
273 "fabfile",
274 # ---- Other tools ----
275 "[Ss][Cc]onstruct", # SCons
276 "conanfile", # Connan: C/C++ build tool
277 "manage", # Django
278 "benchmark",
279 "benchmarks",
280 "exercise",
281 "exercises",
282 # ---- Hidden files/Private modules ----
283 "[._]*",
284 )
285 """Reserved top-level module names"""
286
287
288def _find_packages_within(root_pkg: str, pkg_dir: StrPath) -> list[str]:
289 nested = PEP420PackageFinder.find(pkg_dir)
290 return [root_pkg] + [".".join((root_pkg, n)) for n in nested]
291
292
293class ConfigDiscovery:
294 """Fill-in metadata and options that can be automatically derived
295 (from other metadata/options, the file system or conventions)
296 """
297
298 def __init__(self, distribution: Distribution):
299 self.dist = distribution
300 self._called = False
301 self._disabled = False
302 self._skip_ext_modules = False
303
304 def _disable(self):
305 """Internal API to disable automatic discovery"""
306 self._disabled = True
307
308 def _ignore_ext_modules(self):
309 """Internal API to disregard ext_modules.
310
311 Normally auto-discovery would not be triggered if ``ext_modules`` are set
312 (this is done for backward compatibility with existing packages relying on
313 ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function
314 to ignore given ``ext_modules`` and proceed with the auto-discovery if
315 ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml
316 metadata).
317 """
318 self._skip_ext_modules = True
319
320 @property
321 def _root_dir(self) -> StrPath:
322 # The best is to wait until `src_root` is set in dist, before using _root_dir.
323 return self.dist.src_root or os.curdir
324
325 @property
326 def _package_dir(self) -> dict[str, str]:
327 if self.dist.package_dir is None:
328 return {}
329 return self.dist.package_dir
330
331 def __call__(self, force=False, name=True, ignore_ext_modules=False):
332 """Automatically discover missing configuration fields
333 and modifies the given ``distribution`` object in-place.
334
335 Note that by default this will only have an effect the first time the
336 ``ConfigDiscovery`` object is called.
337
338 To repeatedly invoke automatic discovery (e.g. when the project
339 directory changes), please use ``force=True`` (or create a new
340 ``ConfigDiscovery`` instance).
341 """
342 if force is False and (self._called or self._disabled):
343 # Avoid overhead of multiple calls
344 return
345
346 self._analyse_package_layout(ignore_ext_modules)
347 if name:
348 self.analyse_name() # depends on ``packages`` and ``py_modules``
349
350 self._called = True
351
352 def _explicitly_specified(self, ignore_ext_modules: bool) -> bool:
353 """``True`` if the user has specified some form of package/module listing"""
354 ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules
355 ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules)
356 return (
357 self.dist.packages is not None
358 or self.dist.py_modules is not None
359 or ext_modules
360 or hasattr(self.dist, "configuration")
361 and self.dist.configuration
362 # ^ Some projects use numpy.distutils.misc_util.Configuration
363 )
364
365 def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool:
366 if self._explicitly_specified(ignore_ext_modules):
367 # For backward compatibility, just try to find modules/packages
368 # when nothing is given
369 return True
370
371 log.debug(
372 "No `packages` or `py_modules` configuration, performing "
373 "automatic discovery."
374 )
375
376 return (
377 self._analyse_explicit_layout()
378 or self._analyse_src_layout()
379 # flat-layout is the trickiest for discovery so it should be last
380 or self._analyse_flat_layout()
381 )
382
383 def _analyse_explicit_layout(self) -> bool:
384 """The user can explicitly give a package layout via ``package_dir``"""
385 package_dir = self._package_dir.copy() # don't modify directly
386 package_dir.pop("", None) # This falls under the "src-layout" umbrella
387 root_dir = self._root_dir
388
389 if not package_dir:
390 return False
391
392 log.debug(f"`explicit-layout` detected -- analysing {package_dir}")
393 pkgs = chain_iter(
394 _find_packages_within(pkg, os.path.join(root_dir, parent_dir))
395 for pkg, parent_dir in package_dir.items()
396 )
397 self.dist.packages = list(pkgs)
398 log.debug(f"discovered packages -- {self.dist.packages}")
399 return True
400
401 def _analyse_src_layout(self) -> bool:
402 """Try to find all packages or modules under the ``src`` directory
403 (or anything pointed by ``package_dir[""]``).
404
405 The "src-layout" is relatively safe for automatic discovery.
406 We assume that everything within is meant to be included in the
407 distribution.
408
409 If ``package_dir[""]`` is not given, but the ``src`` directory exists,
410 this function will set ``package_dir[""] = "src"``.
411 """
412 package_dir = self._package_dir
413 src_dir = os.path.join(self._root_dir, package_dir.get("", "src"))
414 if not os.path.isdir(src_dir):
415 return False
416
417 log.debug(f"`src-layout` detected -- analysing {src_dir}")
418 package_dir.setdefault("", os.path.basename(src_dir))
419 self.dist.package_dir = package_dir # persist eventual modifications
420 self.dist.packages = PEP420PackageFinder.find(src_dir)
421 self.dist.py_modules = ModuleFinder.find(src_dir)
422 log.debug(f"discovered packages -- {self.dist.packages}")
423 log.debug(f"discovered py_modules -- {self.dist.py_modules}")
424 return True
425
426 def _analyse_flat_layout(self) -> bool:
427 """Try to find all packages and modules under the project root.
428
429 Since the ``flat-layout`` is more dangerous in terms of accidentally including
430 extra files/directories, this function is more conservative and will raise an
431 error if multiple packages or modules are found.
432
433 This assumes that multi-package dists are uncommon and refuse to support that
434 use case in order to be able to prevent unintended errors.
435 """
436 log.debug(f"`flat-layout` detected -- analysing {self._root_dir}")
437 return self._analyse_flat_packages() or self._analyse_flat_modules()
438
439 def _analyse_flat_packages(self) -> bool:
440 self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir)
441 top_level = remove_nested_packages(remove_stubs(self.dist.packages))
442 log.debug(f"discovered packages -- {self.dist.packages}")
443 self._ensure_no_accidental_inclusion(top_level, "packages")
444 return bool(top_level)
445
446 def _analyse_flat_modules(self) -> bool:
447 self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir)
448 log.debug(f"discovered py_modules -- {self.dist.py_modules}")
449 self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules")
450 return bool(self.dist.py_modules)
451
452 def _ensure_no_accidental_inclusion(self, detected: list[str], kind: str):
453 if len(detected) > 1:
454 from inspect import cleandoc
455
456 from setuptools.errors import PackageDiscoveryError
457
458 msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}.
459
460 To avoid accidental inclusion of unwanted files or directories,
461 setuptools will not proceed with this build.
462
463 If you are trying to create a single distribution with multiple {kind}
464 on purpose, you should not rely on automatic discovery.
465 Instead, consider the following options:
466
467 1. set up custom discovery (`find` directive with `include` or `exclude`)
468 2. use a `src-layout`
469 3. explicitly set `py_modules` or `packages` with a list of names
470
471 To find more information, look for "package discovery" on setuptools docs.
472 """
473 raise PackageDiscoveryError(cleandoc(msg))
474
475 def analyse_name(self):
476 """The packages/modules are the essential contribution of the author.
477 Therefore the name of the distribution can be derived from them.
478 """
479 if self.dist.metadata.name or self.dist.name:
480 # get_name() is not reliable (can return "UNKNOWN")
481 return
482
483 log.debug("No `name` configuration, performing automatic discovery")
484
485 name = (
486 self._find_name_single_package_or_module()
487 or self._find_name_from_packages()
488 )
489 if name:
490 self.dist.metadata.name = name
491
492 def _find_name_single_package_or_module(self) -> str | None:
493 """Exactly one module or package"""
494 for field in ('packages', 'py_modules'):
495 items = getattr(self.dist, field, None) or []
496 if items and len(items) == 1:
497 log.debug(f"Single module/package detected, name: {items[0]}")
498 return items[0]
499
500 return None
501
502 def _find_name_from_packages(self) -> str | None:
503 """Try to find the root package that is not a PEP 420 namespace"""
504 if not self.dist.packages:
505 return None
506
507 packages = remove_stubs(sorted(self.dist.packages, key=len))
508 package_dir = self.dist.package_dir or {}
509
510 parent_pkg = find_parent_package(packages, package_dir, self._root_dir)
511 if parent_pkg:
512 log.debug(f"Common parent package detected, name: {parent_pkg}")
513 return parent_pkg
514
515 log.warn("No parent package detected, impossible to derive `name`")
516 return None
517
518
519def remove_nested_packages(packages: list[str]) -> list[str]:
520 """Remove nested packages from a list of packages.
521
522 >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"])
523 ['a']
524 >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"])
525 ['a', 'b', 'c.d', 'g.h']
526 """
527 pkgs = sorted(packages, key=len)
528 top_level = pkgs[:]
529 size = len(pkgs)
530 for i, name in enumerate(reversed(pkgs)):
531 if any(name.startswith(f"{other}.") for other in top_level):
532 top_level.pop(size - i - 1)
533
534 return top_level
535
536
537def remove_stubs(packages: list[str]) -> list[str]:
538 """Remove type stubs (:pep:`561`) from a list of packages.
539
540 >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"])
541 ['a', 'a.b', 'b']
542 """
543 return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")]
544
545
546def find_parent_package(
547 packages: list[str], package_dir: Mapping[str, str], root_dir: StrPath
548) -> str | None:
549 """Find the parent package that is not a namespace."""
550 packages = sorted(packages, key=len)
551 common_ancestors = []
552 for i, name in enumerate(packages):
553 if not all(n.startswith(f"{name}.") for n in packages[i + 1 :]):
554 # Since packages are sorted by length, this condition is able
555 # to find a list of all common ancestors.
556 # When there is divergence (e.g. multiple root packages)
557 # the list will be empty
558 break
559 common_ancestors.append(name)
560
561 for name in common_ancestors:
562 pkg_path = find_package_path(name, package_dir, root_dir)
563 init = os.path.join(pkg_path, "__init__.py")
564 if os.path.isfile(init):
565 return name
566
567 return None
568
569
570def find_package_path(
571 name: str, package_dir: Mapping[str, str], root_dir: StrPath
572) -> str:
573 """Given a package name, return the path where it should be found on
574 disk, considering the ``package_dir`` option.
575
576 >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".")
577 >>> path.replace(os.sep, "/")
578 './root/is/nested/my/pkg'
579
580 >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".")
581 >>> path.replace(os.sep, "/")
582 './root/is/nested/pkg'
583
584 >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".")
585 >>> path.replace(os.sep, "/")
586 './root/is/nested'
587
588 >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".")
589 >>> path.replace(os.sep, "/")
590 './other/pkg'
591 """
592 parts = name.split(".")
593 for i in range(len(parts), 0, -1):
594 # Look backwards, the most specific package_dir first
595 partial_name = ".".join(parts[:i])
596 if partial_name in package_dir:
597 parent = package_dir[partial_name]
598 return os.path.join(root_dir, parent, *parts[i:])
599
600 parent = package_dir.get("") or ""
601 return os.path.join(root_dir, *parent.split("/"), *parts)
602
603
604def construct_package_dir(packages: list[str], package_path: StrPath) -> dict[str, str]:
605 parent_pkgs = remove_nested_packages(packages)
606 prefix = Path(package_path).parts
607 return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs}