Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/smart_open/doctools.py: 72%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

129 statements  

1# 

2# Copyright (C) 2019 Radim Rehurek <me@radimrehurek.com> 

3# 

4# This code is distributed under the terms and conditions 

5# from the MIT License (MIT). 

6# 

7 

8"""Common functions for working with docstrings. 

9 

10For internal use only. 

11""" 

12 

13# ruff: noqa: T201 # this module builds the open()/parse_uri() docstrings by writing to sys.stdout 

14 

15from __future__ import annotations 

16 

17import contextlib 

18import inspect 

19import io 

20import sys 

21from pathlib import Path 

22from typing import TYPE_CHECKING, Any, cast 

23 

24from . import compression, transport 

25 

26if TYPE_CHECKING: 

27 from collections.abc import Callable 

28 

29# 

30# Python 3.13+ automatically trims docstrings (like inspect.cleandoc), 

31# so we need to adjust the placeholder and indentation accordingly. 

32# 

33if sys.version_info >= (3, 13): 

34 PLACEHOLDER = "smart_open/doctools.py magic goes here" 

35 LPAD = "" 

36else: 

37 PLACEHOLDER = " smart_open/doctools.py magic goes here" 

38 LPAD = " " 

39 

40 

41def extract_kwargs(docstring: str | None) -> list[list[Any]]: 

42 """Extract keyword argument documentation from a Google-style ``Args:`` section. 

43 

44 Args: 

45 docstring: The docstring to extract keyword arguments from. 

46 

47 Returns: 

48 A list of ``[name, type, description_lines]`` triples. ``type`` is 

49 always an empty string since Google-style docstrings in this codebase 

50 don't carry argument types, and ``description_lines`` is a list of 

51 lines. 

52 

53 Note: 

54 The implementation expects: 

55 

56 1. The parameters are under an ``Args:`` header 

57 2. Argument lines start with 4 spaces of indent (`` name: desc``) 

58 3. Continuation lines for a description are indented with 8 spaces 

59 4. The ``Args:`` section ends with an empty line or another section header. 

60 

61 Example: 

62 >>> docstring = '''The foo function. 

63 ... Args: 

64 ... bar: This parameter is the bar. 

65 ... baz: This parameter is the baz. 

66 ... 

67 ... ''' 

68 >>> kwargs = extract_kwargs(docstring) 

69 >>> kwargs[0] 

70 ['bar', '', ['This parameter is the bar.']] 

71 """ 

72 if not docstring: 

73 return [] 

74 

75 lines = inspect.cleandoc(docstring).split("\n") 

76 

77 for idx, line in enumerate(lines): 

78 if line.rstrip() == "Args:": 

79 lines = lines[idx + 1 :] 

80 break 

81 else: 

82 return [] 

83 

84 kwargs: list[list[Any]] = [] 

85 for line in lines: 

86 if not line.strip(): # stop at the first empty line encountered 

87 break 

88 # Argument line: 4-space indent, then ``name: description``. 

89 if line.startswith(" ") and not line.startswith(" "): 

90 stripped = line[4:] 

91 if ":" not in stripped: 

92 continue 

93 name, desc = stripped.split(":", 1) 

94 kwargs.append([name.strip(), "", [desc.strip()] if desc.strip() else []]) 

95 continue 

96 # Continuation line for the previous arg: 8-space (or deeper) indent. 

97 if line.startswith(" ") and kwargs: 

98 kwargs[-1][-1].append(line.strip()) 

99 

100 return kwargs 

101 

102 

103def to_docstring(kwargs: list[Any], lpad: str = "") -> str: 

104 """Reconstruct a docstring from keyword argument info. 

105 

106 Basically reverses :func:`extract_kwargs`. 

107 

108 Args: 

109 kwargs: Output from the :func:`extract_kwargs` function. 

110 lpad: Padding string (from the left). 

111 

112 Returns: 

113 The docstring snippet documenting the keyword arguments. 

114 

115 Example: 

116 >>> kwargs = [ 

117 ... ("bar", "str, optional", ["This parameter is the bar."]), 

118 ... ("baz", "int, optional", ["This parameter is the baz."]), 

119 ... ] 

120 >>> print(to_docstring(kwargs), end="") 

121 bar: str, optional 

122 This parameter is the bar. 

123 baz: int, optional 

124 This parameter is the baz. 

125 """ 

126 buf = io.StringIO() 

127 for name, type_, description in kwargs: 

128 if type_: 

129 buf.write(f"{lpad}{name}: {type_}\n") 

130 else: 

131 buf.write(f"{lpad}{name}:\n") 

132 for line in description: 

133 buf.write(f"{lpad} {line}\n") 

134 return buf.getvalue() 

135 

136 

137def extract_examples_from_readme(indent: str | None = None) -> str: 

138 """Extract examples from this project's README.md file. 

139 

140 Args: 

141 indent: Prepend each line with this string. Should contain some number 

142 of spaces. 

143 

144 Returns: 

145 The examples as a single string. 

146 

147 Note: 

148 Quite fragile, depends on the example markers and the fenced code block 

149 inside the README.md file. 

150 """ 

151 if indent is None: 

152 indent = LPAD 

153 readme_path = Path(__file__).resolve().parent.parent / "README.md" 

154 try: 

155 text = readme_path.read_text(encoding="utf-8") 

156 body = text.split("<!-- doctools_before_examples -->", 1)[1] 

157 body = body.split("<!-- doctools_after_examples -->", 1)[0] 

158 # keep only the contents of the ```python ... ``` fenced code block 

159 body = body.split("```python", 1)[1].rsplit("```", 1)[0] 

160 lines = body.strip("\n").split("\n") 

161 return "".join(indent + line + "\n" for line in lines) 

162 except Exception: # noqa: BLE001 # README parsing is best-effort; any failure falls back gracefully 

163 return indent + "See README.md" 

164 

165 

166def tweak_open_docstring(f: Callable[..., Any]) -> None: 

167 """Inject transport, compression and example sections into ``f``'s docstring.""" 

168 buf = io.StringIO() 

169 seen = set() 

170 

171 root_path = Path(__file__).parent.parent 

172 body_pad = LPAD + " " 

173 

174 with contextlib.redirect_stdout(buf): 

175 print(f"{LPAD}Transports:") # builds docstring via redirect_stdout 

176 print() 

177 for scheme, submodule in sorted(transport._REGISTRY.items()): # noqa: SLF001 # intra-package coupling 

178 if scheme == transport.NO_SCHEME or submodule in seen: 

179 continue 

180 seen.add(submodule) 

181 

182 try: 

183 schemes = submodule.SCHEMES 

184 except AttributeError: 

185 schemes = [scheme] 

186 

187 relpath = Path(cast("str", submodule.__file__)).relative_to(root_path) 

188 heading = "{} ({})".format("/".join(schemes), relpath) 

189 print(f"{body_pad}{heading}") 

190 print(f"{body_pad}{'~' * len(heading)}") 

191 print(f"{body_pad}{(submodule.__doc__ or '').split(chr(10))[0]}") 

192 print() 

193 

194 kwargs = extract_kwargs(submodule.open.__doc__) 

195 if kwargs: 

196 print(to_docstring(kwargs, lpad=body_pad)) 

197 

198 print(f"{LPAD}Examples:") 

199 print() 

200 print(extract_examples_from_readme(indent=body_pad)) 

201 

202 print(f"{LPAD}Codecs:") 

203 print() 

204 print(f"{body_pad}smart_open supports transparent compression and decompression for files") 

205 print(f"{body_pad}with the following extensions:") 

206 print() 

207 for extension in compression.get_supported_extensions(): 

208 print(f"{body_pad}* {extension}") 

209 print() 

210 print(f"{body_pad}The codec is selected based on the file extension.") 

211 

212 # 

213 # The docstring can be None if -OO was passed to the interpreter. 

214 # 

215 if f.__doc__: 

216 f.__doc__ = f.__doc__.replace(PLACEHOLDER, buf.getvalue()) 

217 

218 

219def tweak_parse_uri_docstring(f: Callable[..., Any]) -> None: 

220 """Inject supported schemes and example URIs into ``f``'s docstring.""" 

221 buf = io.StringIO() 

222 seen = set() 

223 schemes = [] 

224 examples = [] 

225 

226 for scheme, submodule in sorted(transport._REGISTRY.items()): # noqa: SLF001 # intra-package coupling 

227 if scheme == transport.NO_SCHEME or submodule in seen: 

228 continue 

229 

230 seen.add(submodule) 

231 

232 with contextlib.suppress(AttributeError): 

233 examples.extend(submodule.URI_EXAMPLES) 

234 

235 try: 

236 schemes.extend(submodule.SCHEMES) 

237 except AttributeError: 

238 schemes.append(scheme) 

239 

240 body_pad = LPAD + " " 

241 

242 with contextlib.redirect_stdout(buf): 

243 print(f"{LPAD}Schemes:") 

244 print() 

245 for scheme in schemes: 

246 print(f"{body_pad}* {scheme}") 

247 print() 

248 print(f"{LPAD}Examples:") 

249 print() 

250 for example in examples: 

251 print(f"{body_pad}* {example}") 

252 

253 if f.__doc__: 

254 f.__doc__ = f.__doc__.replace(PLACEHOLDER, buf.getvalue())