Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/smart_open/doctools.py: 72%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# Copyright (C) 2019 Radim Rehurek <me@radimrehurek.com>
3#
4# This code is distributed under the terms and conditions
5# from the MIT License (MIT).
6#
8"""Common functions for working with docstrings.
10For internal use only.
11"""
13# ruff: noqa: T201 # this module builds the open()/parse_uri() docstrings by writing to sys.stdout
15from __future__ import annotations
17import contextlib
18import inspect
19import io
20import sys
21from pathlib import Path
22from typing import TYPE_CHECKING, Any, cast
24from . import compression, transport
26if TYPE_CHECKING:
27 from collections.abc import Callable
29#
30# Python 3.13+ automatically trims docstrings (like inspect.cleandoc),
31# so we need to adjust the placeholder and indentation accordingly.
32#
33if sys.version_info >= (3, 13):
34 PLACEHOLDER = "smart_open/doctools.py magic goes here"
35 LPAD = ""
36else:
37 PLACEHOLDER = " smart_open/doctools.py magic goes here"
38 LPAD = " "
41def extract_kwargs(docstring: str | None) -> list[list[Any]]:
42 """Extract keyword argument documentation from a Google-style ``Args:`` section.
44 Args:
45 docstring: The docstring to extract keyword arguments from.
47 Returns:
48 A list of ``[name, type, description_lines]`` triples. ``type`` is
49 always an empty string since Google-style docstrings in this codebase
50 don't carry argument types, and ``description_lines`` is a list of
51 lines.
53 Note:
54 The implementation expects:
56 1. The parameters are under an ``Args:`` header
57 2. Argument lines start with 4 spaces of indent (`` name: desc``)
58 3. Continuation lines for a description are indented with 8 spaces
59 4. The ``Args:`` section ends with an empty line or another section header.
61 Example:
62 >>> docstring = '''The foo function.
63 ... Args:
64 ... bar: This parameter is the bar.
65 ... baz: This parameter is the baz.
66 ...
67 ... '''
68 >>> kwargs = extract_kwargs(docstring)
69 >>> kwargs[0]
70 ['bar', '', ['This parameter is the bar.']]
71 """
72 if not docstring:
73 return []
75 lines = inspect.cleandoc(docstring).split("\n")
77 for idx, line in enumerate(lines):
78 if line.rstrip() == "Args:":
79 lines = lines[idx + 1 :]
80 break
81 else:
82 return []
84 kwargs: list[list[Any]] = []
85 for line in lines:
86 if not line.strip(): # stop at the first empty line encountered
87 break
88 # Argument line: 4-space indent, then ``name: description``.
89 if line.startswith(" ") and not line.startswith(" "):
90 stripped = line[4:]
91 if ":" not in stripped:
92 continue
93 name, desc = stripped.split(":", 1)
94 kwargs.append([name.strip(), "", [desc.strip()] if desc.strip() else []])
95 continue
96 # Continuation line for the previous arg: 8-space (or deeper) indent.
97 if line.startswith(" ") and kwargs:
98 kwargs[-1][-1].append(line.strip())
100 return kwargs
103def to_docstring(kwargs: list[Any], lpad: str = "") -> str:
104 """Reconstruct a docstring from keyword argument info.
106 Basically reverses :func:`extract_kwargs`.
108 Args:
109 kwargs: Output from the :func:`extract_kwargs` function.
110 lpad: Padding string (from the left).
112 Returns:
113 The docstring snippet documenting the keyword arguments.
115 Example:
116 >>> kwargs = [
117 ... ("bar", "str, optional", ["This parameter is the bar."]),
118 ... ("baz", "int, optional", ["This parameter is the baz."]),
119 ... ]
120 >>> print(to_docstring(kwargs), end="")
121 bar: str, optional
122 This parameter is the bar.
123 baz: int, optional
124 This parameter is the baz.
125 """
126 buf = io.StringIO()
127 for name, type_, description in kwargs:
128 if type_:
129 buf.write(f"{lpad}{name}: {type_}\n")
130 else:
131 buf.write(f"{lpad}{name}:\n")
132 for line in description:
133 buf.write(f"{lpad} {line}\n")
134 return buf.getvalue()
137def extract_examples_from_readme(indent: str | None = None) -> str:
138 """Extract examples from this project's README.md file.
140 Args:
141 indent: Prepend each line with this string. Should contain some number
142 of spaces.
144 Returns:
145 The examples as a single string.
147 Note:
148 Quite fragile, depends on the example markers and the fenced code block
149 inside the README.md file.
150 """
151 if indent is None:
152 indent = LPAD
153 readme_path = Path(__file__).resolve().parent.parent / "README.md"
154 try:
155 text = readme_path.read_text(encoding="utf-8")
156 body = text.split("<!-- doctools_before_examples -->", 1)[1]
157 body = body.split("<!-- doctools_after_examples -->", 1)[0]
158 # keep only the contents of the ```python ... ``` fenced code block
159 body = body.split("```python", 1)[1].rsplit("```", 1)[0]
160 lines = body.strip("\n").split("\n")
161 return "".join(indent + line + "\n" for line in lines)
162 except Exception: # noqa: BLE001 # README parsing is best-effort; any failure falls back gracefully
163 return indent + "See README.md"
166def tweak_open_docstring(f: Callable[..., Any]) -> None:
167 """Inject transport, compression and example sections into ``f``'s docstring."""
168 buf = io.StringIO()
169 seen = set()
171 root_path = Path(__file__).parent.parent
172 body_pad = LPAD + " "
174 with contextlib.redirect_stdout(buf):
175 print(f"{LPAD}Transports:") # builds docstring via redirect_stdout
176 print()
177 for scheme, submodule in sorted(transport._REGISTRY.items()): # noqa: SLF001 # intra-package coupling
178 if scheme == transport.NO_SCHEME or submodule in seen:
179 continue
180 seen.add(submodule)
182 try:
183 schemes = submodule.SCHEMES
184 except AttributeError:
185 schemes = [scheme]
187 relpath = Path(cast("str", submodule.__file__)).relative_to(root_path)
188 heading = "{} ({})".format("/".join(schemes), relpath)
189 print(f"{body_pad}{heading}")
190 print(f"{body_pad}{'~' * len(heading)}")
191 print(f"{body_pad}{(submodule.__doc__ or '').split(chr(10))[0]}")
192 print()
194 kwargs = extract_kwargs(submodule.open.__doc__)
195 if kwargs:
196 print(to_docstring(kwargs, lpad=body_pad))
198 print(f"{LPAD}Examples:")
199 print()
200 print(extract_examples_from_readme(indent=body_pad))
202 print(f"{LPAD}Codecs:")
203 print()
204 print(f"{body_pad}smart_open supports transparent compression and decompression for files")
205 print(f"{body_pad}with the following extensions:")
206 print()
207 for extension in compression.get_supported_extensions():
208 print(f"{body_pad}* {extension}")
209 print()
210 print(f"{body_pad}The codec is selected based on the file extension.")
212 #
213 # The docstring can be None if -OO was passed to the interpreter.
214 #
215 if f.__doc__:
216 f.__doc__ = f.__doc__.replace(PLACEHOLDER, buf.getvalue())
219def tweak_parse_uri_docstring(f: Callable[..., Any]) -> None:
220 """Inject supported schemes and example URIs into ``f``'s docstring."""
221 buf = io.StringIO()
222 seen = set()
223 schemes = []
224 examples = []
226 for scheme, submodule in sorted(transport._REGISTRY.items()): # noqa: SLF001 # intra-package coupling
227 if scheme == transport.NO_SCHEME or submodule in seen:
228 continue
230 seen.add(submodule)
232 with contextlib.suppress(AttributeError):
233 examples.extend(submodule.URI_EXAMPLES)
235 try:
236 schemes.extend(submodule.SCHEMES)
237 except AttributeError:
238 schemes.append(scheme)
240 body_pad = LPAD + " "
242 with contextlib.redirect_stdout(buf):
243 print(f"{LPAD}Schemes:")
244 print()
245 for scheme in schemes:
246 print(f"{body_pad}* {scheme}")
247 print()
248 print(f"{LPAD}Examples:")
249 print()
250 for example in examples:
251 print(f"{body_pad}* {example}")
253 if f.__doc__:
254 f.__doc__ = f.__doc__.replace(PLACEHOLDER, buf.getvalue())