1"""
2========
3numpydoc
4========
5
6Sphinx extension that handles docstrings in the Numpy standard format. [1]
7
8It will:
9
10- Convert Parameters etc. sections to field lists.
11- Convert See Also section to a See also entry.
12- Renumber references.
13- Extract the signature from the docstring, if it can't be determined
14 otherwise.
15
16.. [1] https://github.com/numpy/numpydoc
17
18"""
19
20import hashlib
21import inspect
22import itertools
23import pydoc
24import re
25from collections.abc import Callable
26from copy import deepcopy
27
28from docutils.nodes import Text, citation, comment, inline, reference, section
29from sphinx.addnodes import desc_content, pending_xref
30from sphinx.util import logging
31
32from . import __version__
33from .docscrape_sphinx import get_doc_object
34from .validate import get_validation_checks, validate
35from .xref import DEFAULT_LINKS
36
37logger = logging.getLogger(__name__)
38
39HASH_LEN = 12
40
41
42def _traverse_or_findall(node, condition, **kwargs):
43 """Triage node.traverse (docutils <0.18.1) vs node.findall.
44
45 TODO: This check can be removed when the minimum supported docutils version
46 for numpydoc is docutils>=0.18.1
47 """
48 return (
49 node.findall(condition, **kwargs)
50 if hasattr(node, "findall")
51 else node.traverse(condition, **kwargs)
52 )
53
54
55def rename_references(app, what, name, obj, options, lines):
56 # decorate reference numbers so that there are no duplicates
57 # these are later undecorated in the doctree, in relabel_references
58 references = set()
59 for line in lines:
60 line = line.strip()
61 m = re.match(
62 r"^\.\. +\[(%s)\]" % app.config.numpydoc_citation_re, line, re.IGNORECASE
63 )
64 if m:
65 references.add(m.group(1))
66
67 if references:
68 # we use a hash to mangle the reference name to avoid invalid names
69 sha = hashlib.sha256()
70 sha.update(name.encode("utf8"))
71 prefix = "R" + sha.hexdigest()[:HASH_LEN]
72
73 for r in references:
74 new_r = prefix + "-" + r
75 for i, line in enumerate(lines):
76 lines[i] = lines[i].replace(f"[{r}]_", f"[{new_r}]_")
77 lines[i] = lines[i].replace(f".. [{r}]", f".. [{new_r}]")
78
79
80def _is_cite_in_numpydoc_docstring(citation_node):
81 # Find DEDUPLICATION_TAG in comment as last node of sibling section
82
83 # XXX: I failed to use citation_node.traverse to do this:
84 section_node = citation_node.parent
85
86 def is_docstring_section(node):
87 return isinstance(node, (section, desc_content))
88
89 while not is_docstring_section(section_node):
90 section_node = section_node.parent
91 if section_node is None:
92 return False
93
94 sibling_sections = itertools.chain(
95 _traverse_or_findall(
96 section_node,
97 is_docstring_section,
98 include_self=True,
99 descend=False,
100 siblings=True,
101 )
102 )
103 for sibling_section in sibling_sections:
104 if not sibling_section.children:
105 continue
106
107 for child in sibling_section.children[::-1]:
108 if not isinstance(child, comment):
109 continue
110
111 if child.rawsource.strip() == DEDUPLICATION_TAG.strip():
112 return True
113
114 return False
115
116
117def relabel_references(app, doc):
118 # Change 'hash-ref' to 'ref' in label text
119 for citation_node in _traverse_or_findall(doc, citation):
120 if not _is_cite_in_numpydoc_docstring(citation_node):
121 continue
122 label_node = citation_node[0]
123 prefix, _, new_label = label_node[0].astext().partition("-")
124 assert len(prefix) == HASH_LEN + 1
125 new_text = Text(new_label)
126 label_node.replace(label_node[0], new_text)
127
128 for id_ in citation_node["backrefs"]:
129 ref = doc.ids[id_]
130 ref_text = ref[0]
131
132 # Sphinx has created pending_xref nodes with [reftext] text.
133 def matching_pending_xref(node):
134 return (
135 isinstance(node, pending_xref)
136 and node[0].astext() == f"[{ref_text}]"
137 )
138
139 for xref_node in _traverse_or_findall(ref.parent, matching_pending_xref):
140 xref_node.replace(xref_node[0], Text(f"[{new_text}]"))
141 ref.replace(ref_text, new_text.copy())
142
143
144def clean_backrefs(app, doc, docname):
145 # only::latex directive has resulted in citation backrefs without reference
146 known_ref_ids = set()
147 for ref in _traverse_or_findall(doc, reference, descend=True):
148 for id_ in ref["ids"]:
149 known_ref_ids.add(id_)
150 # some extensions produce backrefs to inline elements
151 for ref in _traverse_or_findall(doc, inline, descend=True):
152 for id_ in ref["ids"]:
153 known_ref_ids.add(id_)
154 for citation_node in _traverse_or_findall(doc, citation, descend=True):
155 # remove backrefs to non-existent refs
156 citation_node["backrefs"] = [
157 id_ for id_ in citation_node["backrefs"] if id_ in known_ref_ids
158 ]
159
160
161DEDUPLICATION_TAG = " !! processed by numpydoc !!"
162
163
164def mangle_docstrings(app, what, name, obj, options, lines):
165 if DEDUPLICATION_TAG in lines:
166 return
167 show_inherited_class_members = app.config.numpydoc_show_inherited_class_members
168 if isinstance(show_inherited_class_members, dict):
169 try:
170 show_inherited_class_members = show_inherited_class_members[name]
171 except KeyError:
172 show_inherited_class_members = True
173
174 cfg = {
175 "use_plots": app.config.numpydoc_use_plots,
176 "show_class_members": app.config.numpydoc_show_class_members,
177 "show_inherited_class_members": show_inherited_class_members,
178 "class_members_toctree": app.config.numpydoc_class_members_toctree,
179 "attributes_as_param_list": app.config.numpydoc_attributes_as_param_list,
180 "xref_param_type": app.config.numpydoc_xref_param_type,
181 "xref_aliases": app.config.numpydoc_xref_aliases_complete,
182 "xref_ignore": app.config.numpydoc_xref_ignore,
183 }
184
185 cfg.update(options or {})
186 u_NL = "\n"
187 if what == "module":
188 # Strip top title
189 pattern = "^\\s*[#*=]{4,}\\n[a-z0-9 -]+\\n[#*=]{4,}\\s*"
190 title_re = re.compile(pattern, re.IGNORECASE | re.DOTALL)
191 lines[:] = title_re.sub("", u_NL.join(lines)).split(u_NL)
192 else:
193 try:
194 doc = get_doc_object(
195 obj, what, u_NL.join(lines), config=cfg, builder=app.builder
196 )
197 lines[:] = str(doc).split(u_NL)
198 except Exception:
199 logger.error("[numpydoc] While processing docstring for %r", name)
200 raise
201
202 if app.config.numpydoc_validation_checks:
203 # If the user has supplied patterns to ignore via the
204 # numpydoc_validation_exclude config option, skip validation for
205 # any objs whose name matches any of the patterns
206 excluder = app.config.numpydoc_validation_excluder
207 exclude_from_validation = excluder.search(name) if excluder else False
208 if not exclude_from_validation:
209 # TODO: Currently, all validation checks are run and only those
210 # selected via config are reported. It would be more efficient to
211 # only run the selected checks.
212 report = validate(doc)
213 errors = [
214 err
215 for err in report["errors"]
216 if not (
217 (
218 overrides := app.config.numpydoc_validation_overrides.get(
219 err[0]
220 )
221 )
222 and re.search(overrides, report["docstring"])
223 )
224 ]
225 if {err[0] for err in errors} & app.config.numpydoc_validation_checks:
226 msg = (
227 f"[numpydoc] Validation warnings while processing "
228 f"docstring for {name!r}:\n"
229 )
230 for err in errors:
231 if err[0] in app.config.numpydoc_validation_checks:
232 msg += f" {err[0]}: {err[1]}\n"
233 logger.warning(msg)
234
235 # call function to replace reference numbers so that there are no
236 # duplicates
237 rename_references(app, what, name, obj, options, lines)
238
239 lines += ["..", DEDUPLICATION_TAG]
240
241
242def mangle_signature(app, what, name, obj, options, sig, retann):
243 # Do not try to inspect classes that don't define `__init__`
244 if inspect.isclass(obj) and (
245 not hasattr(obj, "__init__")
246 or "initializes x; see " in pydoc.getdoc(obj.__init__)
247 ):
248 return "", ""
249
250 if not (isinstance(obj, Callable) or hasattr(obj, "__argspec_is_invalid_")):
251 return None
252
253 if not hasattr(obj, "__doc__"):
254 return None
255 doc = get_doc_object(obj, config={"show_class_members": False})
256 sig = doc["Signature"] or _clean_text_signature(
257 getattr(obj, "__text_signature__", None)
258 )
259 if sig:
260 sig = re.sub("^[^(]*", "", sig)
261 return sig, ""
262
263
264def _clean_text_signature(sig):
265 if sig is None:
266 return None
267 start_pattern = re.compile(r"^[^(]*\(")
268 start, end = start_pattern.search(sig).span()
269 start_sig = sig[start:end]
270 sig = sig[end:-1]
271 sig = re.sub(r"^\$(self|module|type)(,\s|$)", "", sig, count=1)
272 sig = re.sub(r"(^|(?<=,\s))/,\s\*", "*", sig, count=1)
273 return start_sig + sig + ")"
274
275
276def setup(app, get_doc_object_=get_doc_object):
277 if not hasattr(app, "add_config_value"):
278 return None # probably called by nose, better bail out
279
280 global get_doc_object
281 get_doc_object = get_doc_object_
282
283 app.setup_extension("sphinx.ext.autosummary")
284 app.connect("config-inited", update_config)
285 app.connect("autodoc-process-docstring", mangle_docstrings)
286 app.connect("autodoc-process-signature", mangle_signature)
287 app.connect("doctree-read", relabel_references)
288 app.connect("doctree-resolved", clean_backrefs)
289 app.add_config_value("numpydoc_use_plots", None, False)
290 app.add_config_value("numpydoc_show_class_members", True, True)
291 app.add_config_value(
292 "numpydoc_show_inherited_class_members", True, True, types=(bool, dict)
293 )
294 app.add_config_value("numpydoc_class_members_toctree", True, True)
295 app.add_config_value("numpydoc_citation_re", "[a-z0-9_.-]+", True)
296 app.add_config_value("numpydoc_attributes_as_param_list", True, True)
297 app.add_config_value("numpydoc_xref_param_type", False, True)
298 app.add_config_value("numpydoc_xref_aliases", dict(), True)
299 app.add_config_value("numpydoc_xref_ignore", set(), True)
300 app.add_config_value("numpydoc_validation_checks", set(), True)
301 app.add_config_value("numpydoc_validation_exclude", set(), False)
302 app.add_config_value("numpydoc_validation_overrides", dict(), False)
303
304 # Extra mangling domains
305 app.add_domain(NumpyPythonDomain)
306 app.add_domain(NumpyCDomain)
307
308 metadata = {"version": __version__, "parallel_read_safe": True}
309 return metadata
310
311
312def update_config(app, config=None):
313 """Update the configuration with default values."""
314 if config is None: # needed for testing and old Sphinx
315 config = app.config
316 # Do not simply overwrite the `app.config.numpydoc_xref_aliases`
317 # otherwise the next sphinx-build will compare the incoming values (without
318 # our additions) to the old values (with our additions) and trigger
319 # a full rebuild!
320 numpydoc_xref_aliases_complete = deepcopy(config.numpydoc_xref_aliases)
321 for key, value in DEFAULT_LINKS.items():
322 if key not in numpydoc_xref_aliases_complete:
323 numpydoc_xref_aliases_complete[key] = value
324 config.numpydoc_xref_aliases_complete = numpydoc_xref_aliases_complete
325
326 # Processing to determine whether numpydoc_validation_checks is treated
327 # as a blocklist or allowlist
328 config.numpydoc_validation_checks = get_validation_checks(
329 config.numpydoc_validation_checks
330 )
331
332 # Generate the regexp for docstrings to ignore during validation
333 if isinstance(config.numpydoc_validation_exclude, str):
334 raise ValueError(
335 f"numpydoc_validation_exclude must be a container of strings, "
336 f"e.g. [{config.numpydoc_validation_exclude!r}]."
337 )
338 config.numpydoc_validation_excluder = None
339 if config.numpydoc_validation_exclude:
340 exclude_expr = re.compile(
341 r"|".join(exp for exp in config.numpydoc_validation_exclude)
342 )
343 config.numpydoc_validation_excluder = exclude_expr
344
345 for check, patterns in config.numpydoc_validation_overrides.items():
346 config.numpydoc_validation_overrides[check] = re.compile(
347 r"|".join(exp for exp in patterns)
348 )
349
350
351# ------------------------------------------------------------------------------
352# Docstring-mangling domains
353# ------------------------------------------------------------------------------
354
355from docutils.statemachine import ViewList
356from sphinx.domains.c import CDomain
357from sphinx.domains.python import PythonDomain
358
359
360class ManglingDomainBase:
361 directive_mangling_map = {}
362
363 def __init__(self, *a, **kw):
364 super().__init__(*a, **kw)
365 self.wrap_mangling_directives()
366
367 def wrap_mangling_directives(self):
368 for name, objtype in list(self.directive_mangling_map.items()):
369 self.directives[name] = wrap_mangling_directive(
370 self.directives[name], objtype
371 )
372
373
374class NumpyPythonDomain(ManglingDomainBase, PythonDomain):
375 name = "np"
376 directive_mangling_map = {
377 "function": "function",
378 "class": "class",
379 "exception": "class",
380 "method": "function",
381 "classmethod": "function",
382 "staticmethod": "function",
383 "attribute": "attribute",
384 }
385 indices = []
386
387
388class NumpyCDomain(ManglingDomainBase, CDomain):
389 name = "np-c"
390 directive_mangling_map = {
391 "function": "function",
392 "member": "attribute",
393 "macro": "function",
394 "type": "class",
395 "var": "object",
396 }
397
398
399def match_items(lines, content_old):
400 """Create items for mangled lines.
401
402 This function tries to match the lines in ``lines`` with the items (source
403 file references and line numbers) in ``content_old``. The
404 ``mangle_docstrings`` function changes the actual docstrings, but doesn't
405 keep track of where each line came from. The mangling does many operations
406 on the original lines, which are hard to track afterwards.
407
408 Many of the line changes come from deleting or inserting blank lines. This
409 function tries to match lines by ignoring blank lines. All other changes
410 (such as inserting figures or changes in the references) are completely
411 ignored, so the generated line numbers will be off if ``mangle_docstrings``
412 does anything non-trivial.
413
414 This is a best-effort function and the real fix would be to make
415 ``mangle_docstrings`` actually keep track of the ``items`` together with
416 the ``lines``.
417
418 Examples
419 --------
420 >>> lines = ["", "A", "", "B", " ", "", "C", "D"]
421 >>> lines_old = ["a", "", "", "b", "", "c"]
422 >>> items_old = [
423 ... ("file1.py", 0),
424 ... ("file1.py", 1),
425 ... ("file1.py", 2),
426 ... ("file2.py", 0),
427 ... ("file2.py", 1),
428 ... ("file2.py", 2),
429 ... ]
430 >>> content_old = ViewList(lines_old, items=items_old)
431 >>> match_items(lines, content_old) # doctest: +NORMALIZE_WHITESPACE
432 [('file1.py', 0), ('file1.py', 0), ('file2.py', 0), ('file2.py', 0),
433 ('file2.py', 2), ('file2.py', 2), ('file2.py', 2), ('file2.py', 2)]
434 >>> # first 2 ``lines`` are matched to 'a', second 2 to 'b', rest to 'c'
435 >>> # actual content is completely ignored.
436
437 Notes
438 -----
439 The algorithm tries to match any line in ``lines`` with one in
440 ``lines_old``. It skips over all empty lines in ``lines_old`` and assigns
441 this line number to all lines in ``lines``, unless a non-empty line is
442 found in ``lines`` in which case it goes to the next line in ``lines_old``.
443
444 """
445 items_new = []
446 lines_old = content_old.data
447 items_old = content_old.items
448 j = 0
449 for i, line in enumerate(lines):
450 # go to next non-empty line in old:
451 # line.strip() checks whether the string is all whitespace
452 while j < len(lines_old) - 1 and not lines_old[j].strip():
453 j += 1
454 items_new.append(items_old[j])
455 if line.strip() and j < len(lines_old) - 1:
456 j += 1
457 assert len(items_new) == len(lines)
458 return items_new
459
460
461def wrap_mangling_directive(base_directive, objtype):
462 class directive(base_directive):
463 def run(self):
464 env = self.state.document.settings.env
465
466 name = None
467 if self.arguments:
468 m = re.match(r"^(.*\s+)?(.*?)(\(.*)?", self.arguments[0])
469 name = m.group(2).strip()
470
471 if not name:
472 name = self.arguments[0]
473
474 lines = list(self.content)
475 mangle_docstrings(env.app, objtype, name, None, None, lines)
476 if self.content:
477 items = match_items(lines, self.content)
478 self.content = ViewList(lines, items=items, parent=self.content.parent)
479
480 return base_directive.run(self)
481
482 return directive