Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/docutils/nodes.py: 47%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Maintainer: docutils-develop@lists.sourceforge.net
4# Copyright: This module has been placed in the public domain.
6"""
7Docutils document tree element class library.
9Classes in CamelCase are abstract base classes or auxiliary classes. The one
10exception is `Text`, for a text (PCDATA) node; uppercase is used to
11differentiate from element classes. Classes in lower_case_with_underscores
12are element classes, matching the XML element generic identifiers in the DTD_.
14The position of each node (the level at which it can occur) is significant and
15is represented by abstract base classes (`Root`, `Structural`, `Body`,
16`Inline`, etc.). Certain transformations will be easier because we can use
17``isinstance(node, base_class)`` to determine the position of the node in the
18hierarchy.
20.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd
21"""
23from __future__ import annotations
25__docformat__ = 'reStructuredText'
27import os
28import re
29import sys
30import unicodedata
31import warnings
32from collections import Counter
33from typing import TYPE_CHECKING, overload
34# import xml.dom.minidom as dom # -> conditional import in Node.asdom()
35# and document.asdom()
37# import docutils.transforms # -> delayed import in document.__init__()
39if TYPE_CHECKING:
40 from collections.abc import (Callable, Iterable, Iterator,
41 Mapping, Sequence)
42 from types import ModuleType
43 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex
44 if sys.version_info[:2] >= (3, 12):
45 from typing import TypeAlias
46 else:
47 from typing_extensions import TypeAlias
49 from xml.dom import minidom
51 from docutils.frontend import Values
52 from docutils.transforms import Transformer, Transform
53 from docutils.utils import Reporter
55 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]
56 _ContentModelQuantifier = Literal['.', '?', '+', '*']
57 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,
58 _ContentModelQuantifier]
59 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]
61 StrPath: TypeAlias = str | os.PathLike[str]
62 """File system path. No bytes!"""
64 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]
67# ==============================
68# Functional Node Base Classes
69# ==============================
71class Node:
72 """Abstract base class of nodes in a document tree."""
74 parent: Element | None = None
75 """Back-reference to the Node immediately containing this Node."""
77 children: Sequence[Node] = ()
78 """Sequence of child nodes.
80 Override in subclass instances that are not terminal nodes.
81 """
83 source: StrPath | None = None
84 """Path or description of the input source which generated this Node."""
86 line: int | None = None
87 """The line number (1-based) of the beginning of this Node in `source`."""
89 tagname: str # defined in subclasses
90 """The element generic identifier."""
92 _document: document | None = None
94 @property
95 def document(self) -> document | None:
96 """Return the `document` root node of the tree containing this Node.
97 """
98 try:
99 return self._document or self.parent.document
100 except AttributeError:
101 return None
103 @document.setter
104 def document(self, value: document) -> None:
105 self._document = value
107 def __bool__(self) -> Literal[True]:
108 """
109 Node instances are always true, even if they're empty. A node is more
110 than a simple container. Its boolean "truth" does not depend on
111 having one or more subnodes in the doctree.
113 Use `len()` to check node length.
114 """
115 return True
117 def asdom(self,
118 dom: ModuleType | None = None,
119 ) -> minidom.Document | minidom.Element | minidom.Text:
120 # TODO: minidom.Document is only returned by document.asdom()
121 # (which overwrites this base-class implementation)
122 """Return a DOM **fragment** representation of this Node."""
123 if dom is None:
124 import xml.dom.minidom as dom
125 domroot = dom.Document()
126 return self._dom_node(domroot)
128 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:
129 # Stub. Override in subclasses.
130 return domroot.createElement(self.__class__.__name__)
132 def shortrepr(self) -> str:
133 # concise string representation for test and debugging purposes
134 return repr(self)
136 def pformat(self, indent: str = ' ', level: int = 0) -> str:
137 """
138 Return an indented pseudo-XML representation, for test purposes.
140 Override in subclasses.
141 """
142 raise NotImplementedError
144 def copy(self) -> Self:
145 """Return a copy of self."""
146 raise NotImplementedError
148 def deepcopy(self) -> Self:
149 """Return a deep copy of self (also copying children)."""
150 raise NotImplementedError
152 def astext(self) -> str:
153 """Return a string representation of this Node."""
154 raise NotImplementedError
156 def setup_child(self, child: Node) -> None:
157 child.parent = self
158 if self.document:
159 child.document = self.document
160 if child.source is None:
161 child.source = self.document.current_source
162 if child.line is None:
163 child.line = self.document.current_line
165 def walk(self, visitor: NodeVisitor) -> bool:
166 """
167 Traverse a tree of `Node` objects, calling the
168 `dispatch_visit()` method of `visitor` when entering each
169 node. (The `walkabout()` method is similar, except it also
170 calls the `dispatch_departure()` method before exiting each
171 node.)
173 This tree traversal supports limited in-place tree
174 modifications. Replacing one node with one or more nodes is
175 OK, as is removing an element. However, if the node removed
176 or replaced occurs after the current node, the old node will
177 still be traversed, and any new nodes will not.
179 Within ``visit`` methods (and ``depart`` methods for
180 `walkabout()`), `TreePruningException` subclasses may be raised
181 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
183 Parameter `visitor`: A `NodeVisitor` object, containing a
184 ``visit`` implementation for each `Node` subclass encountered.
186 Return true if we should stop the traversal.
187 """
188 stop = False
189 visitor.document.reporter.debug(
190 'docutils.nodes.Node.walk calling dispatch_visit for %s'
191 % self.__class__.__name__)
192 try:
193 try:
194 visitor.dispatch_visit(self)
195 except (SkipChildren, SkipNode):
196 return stop
197 except SkipDeparture: # not applicable; ignore
198 pass
199 children = self.children
200 try:
201 for child in children[:]:
202 if child.walk(visitor):
203 stop = True
204 break
205 except SkipSiblings:
206 pass
207 except StopTraversal:
208 stop = True
209 return stop
211 def walkabout(self, visitor: NodeVisitor) -> bool:
212 """
213 Perform a tree traversal similarly to `Node.walk()` (which
214 see), except also call the `dispatch_departure()` method
215 before exiting each node.
217 Parameter `visitor`: A `NodeVisitor` object, containing a
218 ``visit`` and ``depart`` implementation for each `Node`
219 subclass encountered.
221 Return true if we should stop the traversal.
222 """
223 call_depart = True
224 stop = False
225 visitor.document.reporter.debug(
226 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
227 % self.__class__.__name__)
228 try:
229 try:
230 visitor.dispatch_visit(self)
231 except SkipNode:
232 return stop
233 except SkipDeparture:
234 call_depart = False
235 children = self.children
236 try:
237 for child in children[:]:
238 if child.walkabout(visitor):
239 stop = True
240 break
241 except SkipSiblings:
242 pass
243 except SkipChildren:
244 pass
245 except StopTraversal:
246 stop = True
247 if call_depart:
248 visitor.document.reporter.debug(
249 'docutils.nodes.Node.walkabout calling dispatch_departure '
250 'for %s' % self.__class__.__name__)
251 visitor.dispatch_departure(self)
252 return stop
254 def _fast_findall(self, cls: type) -> Iterator[Node]:
255 """Return iterator that only supports instance checks."""
256 if isinstance(self, cls):
257 yield self
258 for child in self.children:
259 yield from child._fast_findall(cls)
261 def _superfast_findall(self) -> Iterator[Node]:
262 """Return iterator that doesn't check for a condition."""
263 # This is different from ``iter(self)`` implemented via
264 # __getitem__() and __len__() in the Element subclass,
265 # which yields only the direct children.
266 yield self
267 for child in self.children:
268 yield from child._superfast_findall()
270 def findall(self,
271 condition: Callable[[Node], bool] | type | None = None,
272 include_self: bool = True,
273 descend: bool = True,
274 siblings: bool = False,
275 ascend: bool = False,
276 ) -> Iterator[Node]:
277 """
278 Return an iterator yielding nodes following `self`:
280 * self (if `include_self` is true)
281 * all descendants in tree traversal order (if `descend` is true)
282 * the following siblings (if `siblings` is true) and their
283 descendants (if also `descend` is true)
284 * the following siblings of the parent (if `ascend` is true) and
285 their descendants (if also `descend` is true), and so on.
287 If `condition` is not None, the iterator yields only nodes
288 for which ``condition(node)`` is true. If `condition` is a
289 type ``cls``, it is equivalent to a function consisting
290 of ``return isinstance(node, cls)``.
292 If `ascend` is true, assume `siblings` to be true as well.
294 If the tree structure is modified during iteration, the result
295 is undefined.
297 For example, given the following tree::
299 <paragraph>
300 <emphasis> <--- emphasis.traverse() and
301 <strong> <--- strong.traverse() are called.
302 Foo
303 Bar
304 <reference name="Baz" refid="baz">
305 Baz
307 Then tuple(emphasis.traverse()) equals ::
309 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)
311 and list(strong.traverse(ascend=True) equals ::
313 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
314 """
315 if ascend:
316 siblings = True
317 # Check for special argument combinations that allow using an
318 # optimized version of traverse()
319 if include_self and descend and not siblings:
320 if condition is None:
321 yield from self._superfast_findall()
322 return
323 elif isinstance(condition, type):
324 yield from self._fast_findall(condition)
325 return
326 # Check if `condition` is a class (check for TypeType for Python
327 # implementations that use only new-style classes, like PyPy).
328 if isinstance(condition, type):
329 node_class = condition
331 def condition(node, node_class=node_class):
332 return isinstance(node, node_class)
334 if include_self and (condition is None or condition(self)):
335 yield self
336 if descend and len(self.children):
337 for child in self:
338 yield from child.findall(condition=condition,
339 include_self=True, descend=True,
340 siblings=False, ascend=False)
341 if siblings or ascend:
342 node = self
343 while node.parent:
344 index = node.parent.index(node)
345 # extra check since Text nodes have value-equality
346 while node.parent[index] is not node:
347 index = node.parent.index(node, index + 1)
348 for sibling in node.parent[index+1:]:
349 yield from sibling.findall(
350 condition=condition,
351 include_self=True, descend=descend,
352 siblings=False, ascend=False)
353 if not ascend:
354 break
355 else:
356 node = node.parent
358 def traverse(self,
359 condition: Callable[[Node], bool] | type | None = None,
360 include_self: bool = True,
361 descend: bool = True,
362 siblings: bool = False,
363 ascend: bool = False,
364 ) -> list[Node]:
365 """Return list of nodes following `self`.
367 For looping, Node.findall() is faster and more memory efficient.
368 """
369 # traverse() may be eventually removed:
370 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',
371 DeprecationWarning, stacklevel=2)
372 return list(self.findall(condition, include_self, descend,
373 siblings, ascend))
375 def next_node(self,
376 condition: Callable[[Node], bool] | type | None = None,
377 include_self: bool = False,
378 descend: bool = True,
379 siblings: bool = False,
380 ascend: bool = False,
381 ) -> Node | None:
382 """
383 Return the first node in the iterator returned by findall(),
384 or None if the iterable is empty.
386 Parameter list is the same as of `findall()`. Note that `include_self`
387 defaults to False, though.
388 """
389 try:
390 return next(self.findall(condition, include_self,
391 descend, siblings, ascend))
392 except StopIteration:
393 return None
395 def validate(self, recursive: bool = True) -> None:
396 """Raise ValidationError if this node is not valid.
398 Override in subclasses that define validity constraints.
399 """
401 def validate_position(self) -> None:
402 """Hook for additional checks of the parent's content model.
404 Raise ValidationError, if `self` is at an invalid position.
406 Override in subclasses with complex validity constraints. See
407 `subtitle.validate_position()` and `transition.validate_position()`.
408 """
411class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)
412 """
413 Instances are terminal nodes (leaves) containing text only; no child
414 nodes or attributes. Initialize by passing a string to the constructor.
416 Access the raw (null-escaped) text with ``str(<instance>)``
417 and unescaped text with ``<instance>.astext()``.
418 """
420 tagname: Final = '#text'
422 children: Final = ()
423 """Text nodes have no children, and cannot have children."""
425 def __new__(cls, data: str, rawsource: None = None) -> Self:
426 """Assert that `data` is not an array of bytes
427 and warn if the deprecated `rawsource` argument is used.
428 """
429 if isinstance(data, bytes):
430 raise TypeError('expecting str data, not bytes')
431 if rawsource is not None:
432 warnings.warn('nodes.Text: initialization argument "rawsource" '
433 'is ignored and will be removed in Docutils 2.0.',
434 DeprecationWarning, stacklevel=2)
435 return str.__new__(cls, data)
437 def shortrepr(self, maxlen: int = 18) -> str:
438 data = self
439 if len(data) > maxlen:
440 data = data[:maxlen-4] + ' ...'
441 return '<%s: %r>' % (self.tagname, str(data))
443 def __repr__(self) -> str:
444 return self.shortrepr(maxlen=68)
446 def astext(self) -> str:
447 return str(unescape(self))
449 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:
450 return domroot.createTextNode(str(self))
452 def copy(self) -> Self:
453 return self.__class__(str(self))
455 def deepcopy(self) -> Self:
456 return self.copy()
458 def pformat(self, indent: str = ' ', level: int = 0) -> str:
459 try:
460 if self.document.settings.detailed:
461 tag = '%s%s' % (indent*level, '<#text>')
462 lines = (indent*(level+1) + repr(line)
463 for line in self.splitlines(True))
464 return '\n'.join((tag, *lines)) + '\n'
465 except AttributeError:
466 pass
467 indent = indent * level
468 lines = [indent+line for line in self.astext().splitlines()]
469 if not lines:
470 return ''
471 return '\n'.join(lines) + '\n'
473 # rstrip and lstrip are used by substitution definitions where
474 # they are expected to return a Text instance, this was formerly
475 # taken care of by UserString.
477 def rstrip(self, chars: str | None = None) -> Self:
478 return self.__class__(str.rstrip(self, chars))
480 def lstrip(self, chars: str | None = None) -> Self:
481 return self.__class__(str.lstrip(self, chars))
484class Element(Node):
485 """
486 `Element` is the superclass to all specific elements.
488 Elements contain attributes and child nodes.
489 They can be described as a cross between a list and a dictionary.
491 Elements emulate dictionaries for external [#]_ attributes, indexing by
492 attribute name (a string). To set the attribute 'att' to 'value', do::
494 element['att'] = 'value'
496 .. [#] External attributes correspond to the XML element attributes.
497 From its `Node` superclass, Element also inherits "internal"
498 class attributes that are accessed using the standard syntax, e.g.
499 ``element.parent``.
501 There are two special attributes: 'ids' and 'names'. Both are
502 lists of unique identifiers: 'ids' conform to the regular expression
503 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and
504 details). 'names' serve as user-friendly interfaces to IDs; they are
505 case- and whitespace-normalized (see the fully_normalize_name() function).
507 Elements emulate lists for child nodes (element nodes and/or text
508 nodes), indexing by integer. To get the first child node, use::
510 element[0]
512 to iterate over the child nodes (without descending), use::
514 for child in element:
515 ...
517 Elements may be constructed using the ``+=`` operator. To add one new
518 child node to element, do::
520 element += node
522 This is equivalent to ``element.append(node)``.
524 To add a list of multiple child nodes at once, use the same ``+=``
525 operator::
527 element += [node1, node2]
529 This is equivalent to ``element.extend([node1, node2])``.
530 """
532 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')
533 """Tuple of attributes that are initialized to empty lists.
535 NOTE: Derived classes should update this value when supporting
536 additional list attributes.
537 """
539 valid_attributes: Final = list_attributes + ('source',)
540 """Tuple of attributes that are valid for elements of this class.
542 NOTE: Derived classes should update this value when supporting
543 additional attributes.
544 """
546 common_attributes: Final = valid_attributes
547 """Tuple of `common attributes`__ known to all Doctree Element classes.
549 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes
550 """
552 known_attributes: Final = common_attributes
553 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""
555 basic_attributes: Final = list_attributes
556 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""
558 local_attributes: Final = ('backrefs',)
559 """Obsolete. Will be removed in Docutils 2.0."""
561 content_model: ClassVar[_ContentModelTuple] = ()
562 """Python representation of the element's content model (cf. docutils.dtd).
564 A tuple of ``(category, quantifier)`` tuples with
566 :category: class or tuple of classes that are expected at this place(s)
567 in the list of children
568 :quantifier: string representation stating how many elements
569 of `category` are expected. Value is one of:
570 '.' (exactly one), '?' (zero or one),
571 '+' (one or more), '*' (zero or more).
573 NOTE: The default describes the empty element. Derived classes should
574 update this value to match teir content model.
576 Provisional.
577 """
579 tagname: str | None = None
580 """The element generic identifier.
582 If None, it is set as an instance attribute to the name of the class.
583 """
585 child_text_separator: Final = '\n\n'
586 """Separator for child nodes, used by `astext()` method."""
588 def __init__(self,
589 rawsource: str = '',
590 *children: Node,
591 **attributes: Any,
592 ) -> None:
593 self.rawsource = rawsource
594 """The raw text from which this element was constructed.
596 For informative and debugging purposes. Don't rely on its value!
598 NOTE: some elements do not set this value (default '').
599 """
600 if isinstance(rawsource, Element):
601 raise TypeError('First argument "rawsource" must be a string.')
603 self.children: list[Node] = []
604 """List of child nodes (elements and/or `Text`)."""
606 self.extend(children) # maintain parent info
608 self.attributes: dict[str, Any] = {}
609 """Dictionary of attribute {name: value}."""
611 # Initialize list attributes.
612 for att in self.list_attributes:
613 self.attributes[att] = []
615 for att, value in attributes.items():
616 att = att.lower() # normalize attribute name
617 if att in self.list_attributes:
618 # lists are mutable; make a copy for this node
619 self.attributes[att] = value[:]
620 else:
621 self.attributes[att] = value
623 if self.tagname is None:
624 self.tagname: str = self.__class__.__name__
626 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:
627 element = domroot.createElement(self.tagname)
628 for attribute, value in self.attlist():
629 if isinstance(value, list):
630 value = ' '.join(serial_escape('%s' % (v,)) for v in value)
631 element.setAttribute(attribute, '%s' % value)
632 for child in self.children:
633 element.appendChild(child._dom_node(domroot))
634 return element
636 def __repr__(self) -> str:
637 data = ''
638 for c in self.children:
639 data += c.shortrepr()
640 if len(data) > 60:
641 data = data[:56] + ' ...'
642 break
643 if self['names']:
644 return '<%s "%s": %s>' % (self.tagname,
645 '; '.join(self['names']), data)
646 else:
647 return '<%s: %s>' % (self.tagname, data)
649 def shortrepr(self) -> str:
650 if self['names']:
651 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))
652 else:
653 return '<%s...>' % self.tagname
655 def __str__(self) -> str:
656 if self.children:
657 return '%s%s%s' % (self.starttag(),
658 ''.join(str(c) for c in self.children),
659 self.endtag())
660 else:
661 return self.emptytag()
663 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:
664 # the optional arg is used by the docutils_xml writer
665 if quoteattr is None:
666 quoteattr = pseudo_quoteattr
667 parts = [self.tagname]
668 for name, value in self.attlist():
669 if value is None: # boolean attribute
670 parts.append('%s="True"' % name)
671 continue
672 if isinstance(value, bool):
673 value = str(int(value))
674 if isinstance(value, list):
675 values = [serial_escape('%s' % (v,)) for v in value]
676 value = ' '.join(values)
677 else:
678 value = str(value)
679 value = quoteattr(value)
680 parts.append('%s=%s' % (name, value))
681 return '<%s>' % ' '.join(parts)
683 def endtag(self) -> str:
684 return '</%s>' % self.tagname
686 def emptytag(self) -> str:
687 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())
688 return '<%s/>' % ' '.join((self.tagname, *attributes))
690 def __len__(self) -> int:
691 return len(self.children)
693 def __contains__(self, key: str | Node) -> bool:
694 # Test for both, children and attributes with operator ``in``.
695 if isinstance(key, str):
696 return key in self.attributes
697 return key in self.children
699 @overload
700 def __getitem__(self, key: str) -> Any:
701 ...
703 @overload
704 def __getitem__(self, key: int) -> Node:
705 ...
707 @overload
708 def __getitem__(self, key: slice) -> list[Node]:
709 ...
711 def __getitem__(self,
712 key: str | int | slice,
713 ) -> Node | list[Node] | Any:
714 if isinstance(key, str):
715 return self.attributes[key]
716 elif isinstance(key, int):
717 return self.children[key]
718 elif isinstance(key, slice):
719 assert key.step in (None, 1), 'cannot handle slice with stride'
720 return self.children[key.start:key.stop]
721 else:
722 raise TypeError('element index must be an integer, a slice, or '
723 'an attribute name string')
725 @overload
726 def __setitem__(self, key: str, item: Any) -> None:
727 ...
729 @overload
730 def __setitem__(self, key: int, item: Node) -> None:
731 ...
733 @overload
734 def __setitem__(self, key: slice, item: Iterable[Node]) -> None:
735 ...
737 def __setitem__(self, key, item) -> None:
738 if isinstance(key, str):
739 self.attributes[str(key)] = item
740 elif isinstance(key, int):
741 self.setup_child(item)
742 self.children[key] = item
743 elif isinstance(key, slice):
744 assert key.step in (None, 1), 'cannot handle slice with stride'
745 for node in item:
746 self.setup_child(node)
747 self.children[key.start:key.stop] = item
748 else:
749 raise TypeError('element index must be an integer, a slice, or '
750 'an attribute name string')
752 def __delitem__(self, key: str | int | slice) -> None:
753 if isinstance(key, str):
754 del self.attributes[key]
755 elif isinstance(key, int):
756 del self.children[key]
757 elif isinstance(key, slice):
758 assert key.step in (None, 1), 'cannot handle slice with stride'
759 del self.children[key.start:key.stop]
760 else:
761 raise TypeError('element index must be an integer, a simple '
762 'slice, or an attribute name string')
764 def __add__(self, other: list[Node]) -> list[Node]:
765 return self.children + other
767 def __radd__(self, other: list[Node]) -> list[Node]:
768 return other + self.children
770 def __iadd__(self, other: Node | Iterable[Node]) -> Self:
771 """Append a node or a list of nodes to `self.children`."""
772 if isinstance(other, Node):
773 self.append(other)
774 elif other is not None:
775 self.extend(other)
776 return self
778 def astext(self) -> str:
779 return self.child_text_separator.join(
780 [child.astext() for child in self.children])
782 def non_default_attributes(self) -> dict[str, Any]:
783 atts = {key: value for key, value in self.attributes.items()
784 if self.is_not_default(key)}
785 return atts
787 def attlist(self) -> list[tuple[str, Any]]:
788 return sorted(self.non_default_attributes().items())
790 def get(self, key: str, failobj: Any | None = None) -> Any:
791 return self.attributes.get(key, failobj)
793 def hasattr(self, attr: str) -> bool:
794 return attr in self.attributes
796 def delattr(self, attr: str) -> None:
797 if attr in self.attributes:
798 del self.attributes[attr]
800 def setdefault(self, key: str, failobj: Any | None = None) -> Any:
801 return self.attributes.setdefault(key, failobj)
803 has_key = hasattr
805 def get_language_code(self, fallback: str = '') -> str:
806 """Return node's language tag.
808 Look iteratively in self and parents for a class argument
809 starting with ``language-`` and return the remainder of it
810 (which should be a `BCP49` language tag) or the `fallback`.
811 """
812 for cls in self.get('classes', []):
813 if cls.startswith('language-'):
814 return cls.removeprefix('language-')
815 try:
816 return self.parent.get_language_code(fallback)
817 except AttributeError:
818 return fallback
820 def append(self, item: Node) -> None:
821 self.setup_child(item)
822 self.children.append(item)
824 def extend(self, item: Iterable[Node]) -> None:
825 for node in item:
826 self.append(node)
828 def insert(self,
829 index: SupportsIndex,
830 item: Node | Iterable[Node],
831 ) -> None:
832 if isinstance(item, Node):
833 self.setup_child(item)
834 self.children.insert(index, item)
835 elif item is not None:
836 self[index:index] = item
838 def pop(self, i: int = -1) -> Node:
839 return self.children.pop(i)
841 def remove(self, item: Node) -> None:
842 self.children.remove(item)
844 def index(self,
845 item: Node,
846 start: int = 0,
847 stop: int = sys.maxsize,
848 ) -> int:
849 return self.children.index(item, start, stop)
851 def previous_sibling(self) -> Node | None:
852 """Return preceding sibling node or ``None``."""
853 try:
854 i = self.parent.index(self)
855 except (AttributeError):
856 return None
857 return self.parent[i-1] if i > 0 else None
859 def is_not_default(self, key: str) -> bool:
860 if self[key] == [] and key in self.list_attributes:
861 return False
862 else:
863 return True
865 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:
866 """
867 Update basic attributes ('ids', 'names', 'classes',
868 'dupnames', but not 'source') from node or dictionary `dict_`.
870 Provisional.
871 """
872 if isinstance(dict_, Node):
873 dict_ = dict_.attributes
874 for att in self.basic_attributes:
875 self.append_attr_list(att, dict_.get(att, []))
877 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:
878 """
879 For each element in values, if it does not exist in self[attr], append
880 it.
882 NOTE: Requires self[attr] and values to be sequence type and the
883 former should specifically be a list.
884 """
885 # List Concatenation
886 for value in values:
887 if value not in self[attr]:
888 self[attr].append(value)
890 def coerce_append_attr_list(
891 self, attr: str, value: list[Any] | Any) -> None:
892 """
893 First, convert both self[attr] and value to a non-string sequence
894 type; if either is not already a sequence, convert it to a list of one
895 element. Then call append_attr_list.
897 NOTE: self[attr] and value both must not be None.
898 """
899 # List Concatenation
900 if not isinstance(self.get(attr), list):
901 self[attr] = [self[attr]]
902 if not isinstance(value, list):
903 value = [value]
904 self.append_attr_list(attr, value)
906 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:
907 """
908 If self[attr] does not exist or force is True or omitted, set
909 self[attr] to value, otherwise do nothing.
910 """
911 # One or the other
912 if force or self.get(attr) is None:
913 self[attr] = value
915 def copy_attr_convert(
916 self, attr: str, value: Any, replace: bool = True) -> None:
917 """
918 If attr is an attribute of self, set self[attr] to
919 [self[attr], value], otherwise set self[attr] to value.
921 NOTE: replace is not used by this function and is kept only for
922 compatibility with the other copy functions.
923 """
924 if self.get(attr) is not value:
925 self.coerce_append_attr_list(attr, value)
927 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:
928 """
929 If attr is an attribute of self and either self[attr] or value is a
930 list, convert all non-sequence values to a sequence of 1 element and
931 then concatenate the two sequence, setting the result to self[attr].
932 If both self[attr] and value are non-sequences and replace is True or
933 self[attr] is None, replace self[attr] with value. Otherwise, do
934 nothing.
935 """
936 if self.get(attr) is not value:
937 if isinstance(self.get(attr), list) or \
938 isinstance(value, list):
939 self.coerce_append_attr_list(attr, value)
940 else:
941 self.replace_attr(attr, value, replace)
943 def copy_attr_concatenate(
944 self, attr: str, value: Any, replace: bool) -> None:
945 """
946 If attr is an attribute of self and both self[attr] and value are
947 lists, concatenate the two sequences, setting the result to
948 self[attr]. If either self[attr] or value are non-sequences and
949 replace is True or self[attr] is None, replace self[attr] with value.
950 Otherwise, do nothing.
951 """
952 if self.get(attr) is not value:
953 if isinstance(self.get(attr), list) and \
954 isinstance(value, list):
955 self.append_attr_list(attr, value)
956 else:
957 self.replace_attr(attr, value, replace)
959 def copy_attr_consistent(
960 self, attr: str, value: Any, replace: bool) -> None:
961 """
962 If replace is True or self[attr] is None, replace self[attr] with
963 value. Otherwise, do nothing.
964 """
965 if self.get(attr) is not value:
966 self.replace_attr(attr, value, replace)
968 def update_all_atts(self,
969 dict_: Mapping[str, Any] | Element,
970 update_fun: _UpdateFun = copy_attr_consistent,
971 replace: bool = True,
972 and_source: bool = False,
973 ) -> None:
974 """
975 Updates all attributes from node or dictionary `dict_`.
977 Appends the basic attributes ('ids', 'names', 'classes',
978 'dupnames', but not 'source') and then, for all other attributes in
979 dict_, updates the same attribute in self. When attributes with the
980 same identifier appear in both self and dict_, the two values are
981 merged based on the value of update_fun. Generally, when replace is
982 True, the values in self are replaced or merged with the values in
983 dict_; otherwise, the values in self may be preserved or merged. When
984 and_source is True, the 'source' attribute is included in the copy.
986 NOTE: When replace is False, and self contains a 'source' attribute,
987 'source' is not replaced even when dict_ has a 'source'
988 attribute, though it may still be merged into a list depending
989 on the value of update_fun.
990 NOTE: It is easier to call the update-specific methods then to pass
991 the update_fun method to this function.
992 """
993 if isinstance(dict_, Node):
994 dict_ = dict_.attributes
996 # Include the source attribute when copying?
997 if and_source:
998 filter_fun = self.is_not_list_attribute
999 else:
1000 filter_fun = self.is_not_known_attribute
1002 # Copy the basic attributes
1003 self.update_basic_atts(dict_)
1005 # Grab other attributes in dict_ not in self except the
1006 # (All basic attributes should be copied already)
1007 for att in filter(filter_fun, dict_):
1008 update_fun(self, att, dict_[att], replace)
1010 def update_all_atts_consistantly(self,
1011 dict_: Mapping[str, Any] | Element,
1012 replace: bool = True,
1013 and_source: bool = False,
1014 ) -> None:
1015 """
1016 Updates all attributes from node or dictionary `dict_`.
1018 Appends the basic attributes ('ids', 'names', 'classes',
1019 'dupnames', but not 'source') and then, for all other attributes in
1020 dict_, updates the same attribute in self. When attributes with the
1021 same identifier appear in both self and dict_ and replace is True, the
1022 values in self are replaced with the values in dict_; otherwise, the
1023 values in self are preserved. When and_source is True, the 'source'
1024 attribute is included in the copy.
1026 NOTE: When replace is False, and self contains a 'source' attribute,
1027 'source' is not replaced even when dict_ has a 'source'
1028 attribute, though it may still be merged into a list depending
1029 on the value of update_fun.
1030 """
1031 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
1032 and_source)
1034 def update_all_atts_concatenating(self,
1035 dict_: Mapping[str, Any] | Element,
1036 replace: bool = True,
1037 and_source: bool = False,
1038 ) -> None:
1039 """
1040 Updates all attributes from node or dictionary `dict_`.
1042 Appends the basic attributes ('ids', 'names', 'classes',
1043 'dupnames', but not 'source') and then, for all other attributes in
1044 dict_, updates the same attribute in self. When attributes with the
1045 same identifier appear in both self and dict_ whose values aren't each
1046 lists and replace is True, the values in self are replaced with the
1047 values in dict_; if the values from self and dict_ for the given
1048 identifier are both of list type, then the two lists are concatenated
1049 and the result stored in self; otherwise, the values in self are
1050 preserved. When and_source is True, the 'source' attribute is
1051 included in the copy.
1053 NOTE: When replace is False, and self contains a 'source' attribute,
1054 'source' is not replaced even when dict_ has a 'source'
1055 attribute, though it may still be merged into a list depending
1056 on the value of update_fun.
1057 """
1058 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
1059 and_source)
1061 def update_all_atts_coercion(self,
1062 dict_: Mapping[str, Any] | Element,
1063 replace: bool = True,
1064 and_source: bool = False,
1065 ) -> None:
1066 """
1067 Updates all attributes from node or dictionary `dict_`.
1069 Appends the basic attributes ('ids', 'names', 'classes',
1070 'dupnames', but not 'source') and then, for all other attributes in
1071 dict_, updates the same attribute in self. When attributes with the
1072 same identifier appear in both self and dict_ whose values are both
1073 not lists and replace is True, the values in self are replaced with
1074 the values in dict_; if either of the values from self and dict_ for
1075 the given identifier are of list type, then first any non-lists are
1076 converted to 1-element lists and then the two lists are concatenated
1077 and the result stored in self; otherwise, the values in self are
1078 preserved. When and_source is True, the 'source' attribute is
1079 included in the copy.
1081 NOTE: When replace is False, and self contains a 'source' attribute,
1082 'source' is not replaced even when dict_ has a 'source'
1083 attribute, though it may still be merged into a list depending
1084 on the value of update_fun.
1085 """
1086 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
1087 and_source)
1089 def update_all_atts_convert(self,
1090 dict_: Mapping[str, Any] | Element,
1091 and_source: bool = False,
1092 ) -> None:
1093 """
1094 Updates all attributes from node or dictionary `dict_`.
1096 Appends the basic attributes ('ids', 'names', 'classes',
1097 'dupnames', but not 'source') and then, for all other attributes in
1098 dict_, updates the same attribute in self. When attributes with the
1099 same identifier appear in both self and dict_ then first any non-lists
1100 are converted to 1-element lists and then the two lists are
1101 concatenated and the result stored in self; otherwise, the values in
1102 self are preserved. When and_source is True, the 'source' attribute
1103 is included in the copy.
1105 NOTE: When replace is False, and self contains a 'source' attribute,
1106 'source' is not replaced even when dict_ has a 'source'
1107 attribute, though it may still be merged into a list depending
1108 on the value of update_fun.
1109 """
1110 self.update_all_atts(dict_, Element.copy_attr_convert,
1111 and_source=and_source)
1113 def clear(self) -> None:
1114 self.children = []
1116 def replace(self, old: Node, new: Node | Iterable[Node]) -> None:
1117 """Replace one child `Node` with another child or children."""
1118 index = self.index(old)
1119 if isinstance(new, Node):
1120 self.setup_child(new)
1121 self[index] = new
1122 elif new is not None:
1123 self[index:index+1] = new
1125 def replace_self(self, new: Node | Sequence[Node]) -> None:
1126 """
1127 Replace `self` node with `new`, where `new` is a node or a
1128 list of nodes.
1130 Provisional: the handling of node attributes will be revised.
1131 """
1132 update = new
1133 if not isinstance(new, Node):
1134 # `new` is a list; update first child.
1135 try:
1136 update = new[0]
1137 except IndexError:
1138 update = None
1139 if isinstance(update, Element):
1140 update.update_basic_atts(self)
1141 else:
1142 # `update` is a Text node or `new` is an empty list.
1143 # Assert that we aren't losing any attributes.
1144 for att in self.basic_attributes:
1145 assert not self[att], \
1146 'Losing "%s" attribute: %s' % (att, self[att])
1147 self.parent.replace(self, new)
1149 def first_child_matching_class(self,
1150 childclass: type[Element] | type[Text]
1151 | tuple[type[Element] | type[Text], ...],
1152 start: int = 0,
1153 end: int = sys.maxsize,
1154 ) -> int | None:
1155 """
1156 Return the index of the first child whose class exactly matches.
1158 Parameters:
1160 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
1161 classes. If a tuple, any of the classes may match.
1162 - `start`: Initial index to check.
1163 - `end`: Initial index to *not* check.
1164 """
1165 if not isinstance(childclass, tuple):
1166 childclass = (childclass,)
1167 for index in range(start, min(len(self), end)):
1168 for c in childclass:
1169 if isinstance(self[index], c):
1170 return index
1171 return None
1173 def first_child_not_matching_class(
1174 self,
1175 childclass: type[Element] | type[Text]
1176 | tuple[type[Element] | type[Text], ...],
1177 start: int = 0,
1178 end: int = sys.maxsize,
1179 ) -> int | None:
1180 """
1181 Return the index of the first child whose class does *not* match.
1183 Parameters:
1185 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
1186 classes. If a tuple, none of the classes may match.
1187 - `start`: Initial index to check.
1188 - `end`: Initial index to *not* check.
1189 """
1190 if not isinstance(childclass, tuple):
1191 childclass = (childclass,)
1192 for index in range(start, min(len(self), end)):
1193 for c in childclass:
1194 if isinstance(self.children[index], c):
1195 break
1196 else:
1197 return index
1198 return None
1200 def pformat(self, indent: str = ' ', level: int = 0) -> str:
1201 tagline = '%s%s\n' % (indent*level, self.starttag())
1202 childreps = (c.pformat(indent, level+1) for c in self.children)
1203 return ''.join((tagline, *childreps))
1205 def copy(self) -> Self:
1206 obj = self.__class__(rawsource=self.rawsource, **self.attributes)
1207 obj._document = self._document
1208 obj.source = self.source
1209 obj.line = self.line
1210 return obj
1212 def deepcopy(self) -> Self:
1213 copy = self.copy()
1214 copy.extend([child.deepcopy() for child in self.children])
1215 return copy
1217 def note_referenced_by(self,
1218 name: str | None = None,
1219 id: str | None = None,
1220 ) -> None:
1221 """Note that this Element has been referenced by its name
1222 `name` or id `id`."""
1223 self.referenced = True
1224 # Element.expect_referenced_by_* dictionaries map names or ids
1225 # to nodes whose ``referenced`` attribute is set to true as
1226 # soon as this node is referenced by the given name or id.
1227 # Needed for target propagation.
1228 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
1229 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
1230 if by_name:
1231 assert name is not None
1232 by_name.referenced = True
1233 if by_id:
1234 assert id is not None
1235 by_id.referenced = True
1237 @classmethod
1238 def is_not_list_attribute(cls, attr: str) -> bool:
1239 """
1240 Returns True if and only if the given attribute is NOT one of the
1241 basic list attributes defined for all Elements.
1242 """
1243 return attr not in cls.list_attributes
1245 @classmethod
1246 def is_not_known_attribute(cls, attr: str) -> bool:
1247 """
1248 Return True if `attr` is NOT defined for all Element instances.
1250 Provisional. May be removed in Docutils 2.0.
1251 """
1252 return attr not in cls.common_attributes
1254 def validate_attributes(self) -> None:
1255 """Normalize and validate element attributes.
1257 Convert string values to expected datatype.
1258 Normalize values.
1260 Raise `ValidationError` for invalid attributes or attribute values.
1262 Provisional.
1263 """
1264 messages = []
1265 for key, value in self.attributes.items():
1266 if key.startswith('internal:'):
1267 continue # see docs/user/config.html#expose-internals
1268 if key not in self.valid_attributes:
1269 va = '", "'.join(self.valid_attributes)
1270 messages.append(f'Attribute "{key}" not one of "{va}".')
1271 continue
1272 try:
1273 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
1274 except (ValueError, TypeError, KeyError) as e:
1275 messages.append(
1276 f'Attribute "{key}" has invalid value "{value}".\n {e}')
1277 if messages:
1278 raise ValidationError(f'Element {self.starttag()} invalid:\n '
1279 + '\n '.join(messages),
1280 problematic_element=self)
1282 def validate_content(self,
1283 model: _ContentModelTuple | None = None,
1284 elements: Sequence[Node] | None = None,
1285 ) -> list[Node]:
1286 """Test compliance of `elements` with `model`.
1288 :model: content model description, default `self.content_model`,
1289 :elements: list of doctree elements, default `self.children`.
1291 Return list of children that do not fit in the model or raise
1292 `ValidationError` if the content does not comply with the `model`.
1294 Provisional.
1295 """
1296 if model is None:
1297 model = self.content_model
1298 if elements is None:
1299 elements = self.children
1300 ichildren = iter(elements)
1301 child = next(ichildren, None)
1302 for category, quantifier in model:
1303 if not isinstance(child, category):
1304 if quantifier in ('.', '+'):
1305 raise ValidationError(self._report_child(child, category),
1306 problematic_element=child)
1307 else: # quantifier in ('?', '*') -> optional child
1308 continue # try same child with next part of content model
1309 else:
1310 # Check additional placement constraints (if applicable):
1311 child.validate_position()
1312 # advance:
1313 if quantifier in ('.', '?'): # go to next element
1314 child = next(ichildren, None)
1315 else: # if quantifier in ('*', '+'): # pass all matching elements
1316 for child in ichildren:
1317 if not isinstance(child, category):
1318 break
1319 try:
1320 child.validate_position()
1321 except AttributeError:
1322 pass
1323 else:
1324 child = None
1325 return [] if child is None else [child, *ichildren]
1327 def _report_child(self,
1328 child: Node | None,
1329 category: Element | Iterable[Element],
1330 ) -> str:
1331 # Return a str reporting a missing child or child of wrong category.
1332 try:
1333 _type = category.__name__
1334 except AttributeError:
1335 _type = '> or <'.join(c.__name__ for c in category)
1336 msg = f'Element {self.starttag()} invalid:\n'
1337 if child is None:
1338 return f'{msg} Missing child of type <{_type}>.'
1339 if isinstance(child, Text):
1340 return (f'{msg} Expecting child of type <{_type}>, '
1341 f'not text data "{child.astext()}".')
1342 return (f'{msg} Expecting child of type <{_type}>, '
1343 f'not {child.starttag()}.')
1345 def validate(self, recursive: bool = True) -> None:
1346 """Validate Docutils Document Tree element ("doctree").
1348 Raise ValidationError if there are violations.
1349 If `recursive` is True, validate also the element's descendants.
1351 See `The Docutils Document Tree`__ for details of the
1352 Docutils Document Model.
1354 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1356 Provisional (work in progress).
1357 """
1358 self.validate_attributes()
1360 leftover_childs = self.validate_content()
1361 for child in leftover_childs:
1362 if isinstance(child, Text):
1363 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1364 f' Spurious text: "{child.astext()}".',
1365 problematic_element=self)
1366 else:
1367 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1368 f' Child element {child.starttag()} '
1369 'not allowed at this position.',
1370 problematic_element=child)
1372 if recursive:
1373 for child in self:
1374 child.validate(recursive=recursive)
1377# ====================
1378# Element Categories
1379# ====================
1380#
1381# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.
1383class Root:
1384 """Element at the root of a document tree."""
1387class Structural:
1388 """`Structural elements`__.
1390 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1391 #structural-elements
1392 """
1395class SubStructural:
1396 """`Structural subelements`__ are children of `Structural` elements.
1398 Most Structural elements accept only specific `SubStructural` elements.
1400 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1401 #structural-subelements
1402 """
1405class Bibliographic:
1406 """`Bibliographic Elements`__ (displayed document meta-data).
1408 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1409 #bibliographic-elements
1410 """
1413class Body:
1414 """`Body elements`__.
1416 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements
1417 """
1420class Admonition(Body):
1421 """Admonitions (distinctive and self-contained notices)."""
1422 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1425class Sequential(Body):
1426 """List-like body elements."""
1429class General(Body):
1430 """Miscellaneous body elements."""
1433class Special(Body):
1434 """Special internal body elements."""
1437class Part:
1438 """`Body Subelements`__ always occur within specific parent elements.
1440 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements
1441 """
1444class Decorative:
1445 """Decorative elements (`header` and `footer`).
1447 Children of `decoration`.
1448 """
1449 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1452class Inline:
1453 """Inline elements contain text data and possibly other inline elements.
1454 """
1457# Orthogonal categories and Mixins
1458# ================================
1460class PreBibliographic:
1461 """Elements which may occur before Bibliographic Elements."""
1464class Invisible(Special, PreBibliographic):
1465 """Internal elements that don't appear in output."""
1468class Labeled:
1469 """Contains a `label` as its first element."""
1472class Resolvable:
1473 resolved: bool = False
1476class BackLinkable:
1477 """Mixin for Elements that accept a "backrefs" attribute."""
1479 list_attributes: Final = Element.list_attributes + ('backrefs',)
1480 valid_attributes: Final = Element.valid_attributes + ('backrefs',)
1482 def add_backref(self: Element, refid: str) -> None:
1483 self['backrefs'].append(refid)
1486class Referential(Resolvable):
1487 """Elements holding a cross-reference (outgoing hyperlink)."""
1490class Targetable(Resolvable):
1491 """Cross-reference targets (incoming hyperlink)."""
1492 referenced: int = 0
1494 indirect_reference_name: str | None = None
1495 """Holds the whitespace_normalized_name (contains mixed case) of a target.
1496 Required for MoinMoin/reST compatibility.
1498 Provisional.
1499 """
1502class Titular:
1503 """Title, sub-title, or informal heading (rubric)."""
1506class TextElement(Element):
1507 """
1508 An element which directly contains text.
1510 Its children are all `Text` or `Inline` subclass nodes. You can
1511 check whether an element's context is inline simply by checking whether
1512 its immediate parent is a `TextElement` instance (including subclasses).
1513 This is handy for nodes like `image` that can appear both inline and as
1514 standalone body elements.
1516 If passing children to `__init__()`, make sure to set `text` to
1517 ``''`` or some other suitable value.
1518 """
1519 content_model: Final = (((Text, Inline), '*'),)
1520 # (#PCDATA | %inline.elements;)*
1522 child_text_separator: Final = ''
1523 """Separator for child nodes, used by `astext()` method."""
1525 def __init__(self,
1526 rawsource: str = '',
1527 text: str = '',
1528 *children: Node,
1529 **attributes: Any,
1530 ) -> None:
1531 if text:
1532 textnode = Text(text)
1533 Element.__init__(self, rawsource, textnode, *children,
1534 **attributes)
1535 else:
1536 Element.__init__(self, rawsource, *children, **attributes)
1539class FixedTextElement(TextElement):
1540 """An element which directly contains preformatted text."""
1542 valid_attributes: Final = Element.valid_attributes + ('xml:space',)
1544 def __init__(self,
1545 rawsource: str = '',
1546 text: str = '',
1547 *children: Node,
1548 **attributes: Any,
1549 ) -> None:
1550 super().__init__(rawsource, text, *children, **attributes)
1551 self.attributes['xml:space'] = 'preserve'
1554class PureTextElement(TextElement):
1555 """An element which only contains text, no children."""
1556 content_model: Final = ((Text, '?'),) # (#PCDATA)
1559# =================================
1560# Concrete Document Tree Elements
1561# =================================
1562#
1563# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference
1565# Decorative Elements
1566# ===================
1568class header(Decorative, Element): pass
1569class footer(Decorative, Element): pass
1572# Structural Subelements
1573# ======================
1575class title(Titular, PreBibliographic, SubStructural, TextElement):
1576 """Title of `document`, `section`, `topic` and generic `admonition`.
1577 """
1578 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')
1581class subtitle(Titular, PreBibliographic, SubStructural, TextElement):
1582 """Sub-title of `document`, `section` and `sidebar`."""
1584 def validate_position(self) -> None:
1585 """Check position of subtitle: must follow a title."""
1586 if self.parent and self.parent.index(self) == 0:
1587 raise ValidationError(f'Element {self.parent.starttag()} invalid:'
1588 '\n <subtitle> only allowed after <title>.',
1589 problematic_element=self)
1592class meta(PreBibliographic, SubStructural, Element):
1593 """Container for "invisible" bibliographic data, or meta-data."""
1594 valid_attributes: Final = Element.valid_attributes + (
1595 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')
1598class docinfo(SubStructural, Element):
1599 """Container for displayed document meta-data."""
1600 content_model: Final = ((Bibliographic, '+'),)
1601 # (%bibliographic.elements;)+
1604class decoration(PreBibliographic, SubStructural, Element):
1605 """Container for `header` and `footer`."""
1606 content_model: Final = ((header, '?'), # Empty element doesn't make sense,
1607 (footer, '?'), # but is simpler to define.
1608 )
1609 # (header?, footer?)
1611 def get_header(self) -> header:
1612 if not len(self.children) or not isinstance(self.children[0], header):
1613 self.insert(0, header())
1614 return self.children[0]
1616 def get_footer(self) -> footer:
1617 if not len(self.children) or not isinstance(self.children[-1], footer):
1618 self.append(footer())
1619 return self.children[-1]
1622class transition(SubStructural, Element):
1623 """Transitions__ are breaks between untitled text parts.
1625 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition
1626 """
1628 def validate_position(self) -> None:
1629 """Check additional constraints on `transition` placement.
1631 A transition may not begin or end a section or document,
1632 nor may two transitions be immediately adjacent.
1633 """
1634 messages = [f'Element {self.parent.starttag()} invalid:']
1635 predecessor = self.previous_sibling()
1636 if (predecessor is None # index == 0
1637 or isinstance(predecessor, (title, subtitle, meta, decoration))
1638 # A transition following these elements still counts as
1639 # "at the beginning of a document or section".
1640 ):
1641 messages.append(
1642 '<transition> may not begin a section or document.')
1643 if self.parent.index(self) == len(self.parent) - 1:
1644 messages.append('<transition> may not end a section or document.')
1645 if isinstance(predecessor, transition):
1646 messages.append(
1647 '<transition> may not directly follow another transition.')
1648 if len(messages) > 1:
1649 raise ValidationError('\n '.join(messages),
1650 problematic_element=self)
1653# Structural Elements
1654# ===================
1656class topic(Structural, Element):
1657 """
1658 Topics__ are non-recursive, mini-sections.
1660 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic
1661 """
1662 content_model: Final = ((title, '?'), (Body, '+'))
1663 # (title?, (%body.elements;)+)
1666class sidebar(Structural, Element):
1667 """
1668 Sidebars__ are like parallel documents providing related material.
1670 A sidebar is typically offset by a border and "floats" to the side
1671 of the page
1673 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar
1674 """
1675 content_model: Final = ((title, '?'),
1676 (subtitle, '?'),
1677 ((topic, Body), '+'),
1678 )
1679 # ((title, subtitle?)?, (%body.elements; | topic)+)
1680 # "subtitle only after title" is ensured in `subtitle.validate_position()`.
1683class section(Structural, Element):
1684 """Document section__. The main unit of hierarchy.
1686 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section
1687 """
1688 # recursive content model, see below
1691section.content_model = ((title, '.'),
1692 (subtitle, '?'),
1693 ((Body, topic, sidebar, transition), '*'),
1694 ((section, transition), '*'),
1695 )
1696# (title, subtitle?, %structure.model;)
1697# Correct transition placement is ensured in `transition.validate_position()`.
1700# Root Element
1701# ============
1703class document(Root, Element):
1704 """
1705 The document root element.
1707 Do not instantiate this class directly; use
1708 `docutils.utils.new_document()` instead.
1709 """
1710 valid_attributes: Final = Element.valid_attributes + ('title',)
1711 content_model: Final = ((title, '?'),
1712 (subtitle, '?'),
1713 (meta, '*'),
1714 (decoration, '?'),
1715 (docinfo, '?'),
1716 (transition, '?'),
1717 ((Body, topic, sidebar, transition), '*'),
1718 ((section, transition), '*'),
1719 )
1720 # ( (title, subtitle?)?,
1721 # meta*,
1722 # decoration?,
1723 # (docinfo, transition?)?,
1724 # %structure.model; )
1725 # Additional restrictions for `subtitle` and `transition` are tested
1726 # with the respective `validate_position()` methods.
1728 def __init__(self,
1729 settings: Values,
1730 reporter: Reporter,
1731 *args: Node,
1732 **kwargs: Any,
1733 ) -> None:
1734 Element.__init__(self, *args, **kwargs)
1736 self.current_source: StrPath | None = None
1737 """Path to or description of the input source being processed."""
1739 self.current_line: int | None = None
1740 """Line number (1-based) of `current_source`."""
1742 self.settings: Values = settings
1743 """Runtime settings data record."""
1745 self.reporter: Reporter = reporter
1746 """System message generator."""
1748 self.indirect_targets: list[target] = []
1749 """List of indirect target nodes."""
1751 self.substitution_defs: dict[str, substitution_definition] = {}
1752 """Mapping of substitution names to substitution_definition nodes."""
1754 self.substitution_names: dict[str, str] = {}
1755 """Mapping of case-normalized to case-sensitive substitution names."""
1757 self.refnames: dict[str, list[Element]] = {}
1758 """Mapping of names to lists of referencing nodes."""
1760 self.refids: dict[str, list[Element]] = {}
1761 """Mapping of ids to lists of referencing nodes."""
1763 self.nameids: dict[str, str] = {}
1764 """Mapping of names to unique id's."""
1766 self.nametypes: dict[str, bool] = {}
1767 """Mapping of names to hyperlink type. True: explicit, False: implicit.
1768 """
1770 self.ids: dict[str, Element] = {}
1771 """Mapping of ids to nodes."""
1773 self.footnote_refs: dict[str, list[footnote_reference]] = {}
1774 """Mapping of footnote labels to lists of footnote_reference nodes."""
1776 self.citation_refs: dict[str, list[citation_reference]] = {}
1777 """Mapping of citation labels to lists of citation_reference nodes."""
1779 self.autofootnotes: list[footnote] = []
1780 """List of auto-numbered footnote nodes."""
1782 self.autofootnote_refs: list[footnote_reference] = []
1783 """List of auto-numbered footnote_reference nodes."""
1785 self.symbol_footnotes: list[footnote] = []
1786 """List of symbol footnote nodes."""
1788 self.symbol_footnote_refs: list[footnote_reference] = []
1789 """List of symbol footnote_reference nodes."""
1791 self.footnotes: list[footnote] = []
1792 """List of manually-numbered footnote nodes."""
1794 self.citations: list[citation] = []
1795 """List of citation nodes."""
1797 self.autofootnote_start: int = 1
1798 """Initial auto-numbered footnote number."""
1800 self.symbol_footnote_start: int = 0
1801 """Initial symbol footnote symbol index."""
1803 self.id_counter: Counter[int] = Counter()
1804 """Numbers added to otherwise identical IDs."""
1806 self.parse_messages: list[system_message] = []
1807 """System messages generated while parsing."""
1809 self.transform_messages: list[system_message] = []
1810 """System messages generated while applying transforms."""
1812 import docutils.transforms
1813 self.transformer: Transformer = docutils.transforms.Transformer(self)
1814 """Storage for transforms to be applied to this document."""
1816 self.include_log: list[tuple[StrPath, tuple]] = []
1817 """The current source's parents (to detect inclusion loops)."""
1819 self.decoration: decoration | None = None
1820 """Document's `decoration` node."""
1822 self._document: document = self
1824 def __getstate__(self) -> dict[str, Any]:
1825 """
1826 Return dict with unpicklable references removed.
1827 """
1828 state = self.__dict__.copy()
1829 state['reporter'] = None
1830 state['transformer'] = None
1831 return state
1833 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:
1834 """Return a DOM representation of this document."""
1835 if dom is None:
1836 import xml.dom.minidom as dom
1837 domroot = dom.Document()
1838 domroot.appendChild(self._dom_node(domroot))
1839 return domroot
1841 def set_id(self,
1842 node: Element,
1843 msgnode: Element | None = None,
1844 suggested_prefix: str = '',
1845 ) -> str:
1846 if node['ids']:
1847 # register and check for duplicates
1848 for id in node['ids']:
1849 self.ids.setdefault(id, node)
1850 if self.ids[id] is not node:
1851 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '
1852 f'{self.ids[id].starttag()} '
1853 f'and {node.starttag()}',
1854 base_node=node)
1855 if msgnode is not None:
1856 msgnode += msg
1857 return id
1858 # generate and set id
1859 id_prefix = self.settings.id_prefix
1860 auto_id_prefix = self.settings.auto_id_prefix
1861 base_id = ''
1862 id = ''
1863 for name in node['names']:
1864 if id_prefix: # allow names starting with numbers
1865 base_id = make_id('x'+name)[1:]
1866 else:
1867 base_id = make_id(name)
1868 # TODO: normalize id-prefix? (would make code simpler)
1869 id = id_prefix + base_id
1870 if base_id and id not in self.ids:
1871 break
1872 else:
1873 if base_id and auto_id_prefix.endswith('%'):
1874 # disambiguate name-derived ID
1875 # TODO: remove second condition after announcing change
1876 prefix = id + '-'
1877 else:
1878 prefix = id_prefix + auto_id_prefix
1879 if prefix.endswith('%'):
1880 prefix = f"""{prefix[:-1]}{suggested_prefix
1881 or make_id(node.tagname)}-"""
1882 while True:
1883 self.id_counter[prefix] += 1
1884 id = f'{prefix}{self.id_counter[prefix]}'
1885 if id not in self.ids:
1886 break
1887 node['ids'].append(id)
1888 self.ids[id] = node
1889 return id
1891 def set_name_id_map(self,
1892 node: Element,
1893 id: str,
1894 msgnode: Element | None = None,
1895 explicit: bool = False,
1896 ) -> None:
1897 """
1898 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1899 booleans representing hyperlink type (True==explicit,
1900 False==implicit). This method updates the mappings.
1902 The following state transition table shows how `self.nameids` items
1903 ("id") and `self.nametypes` items ("type") change with new input
1904 (a call to this method), and what actions are performed
1905 ("implicit"-type system messages are INFO/1, and
1906 "explicit"-type system messages are ERROR/3):
1908 ==== ===== ======== ======== ======= ==== ===== =====
1909 Old State Input Action New State Notes
1910 ----------- -------- ----------------- ----------- -----
1911 id type new type sys.msg. dupname id type
1912 ==== ===== ======== ======== ======= ==== ===== =====
1913 - - explicit - - new True
1914 - - implicit - - new False
1915 - False explicit - - new True
1916 old False explicit implicit old new True
1917 - True explicit explicit new - True
1918 old True explicit explicit new,old - True [#]_
1919 - False implicit implicit new - False
1920 old False implicit implicit new,old - False
1921 - True implicit implicit new - True
1922 old True implicit implicit new old True
1923 ==== ===== ======== ======== ======= ==== ===== =====
1925 .. [#] Do not clear the name-to-id map or invalidate the old target if
1926 both old and new targets are external and refer to identical URIs.
1927 The new target is invalidated regardless.
1928 """
1929 for name in tuple(node['names']):
1930 if name in self.nameids:
1931 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1932 # attention: modifies node['names']
1933 else:
1934 self.nameids[name] = id
1935 self.nametypes[name] = explicit
1937 def set_duplicate_name_id(self,
1938 node: Element,
1939 id: str,
1940 name: str,
1941 msgnode: Element,
1942 explicit: bool,
1943 ) -> None:
1944 old_id = self.nameids[name]
1945 old_explicit = self.nametypes[name]
1946 self.nametypes[name] = old_explicit or explicit
1947 if explicit:
1948 if old_explicit:
1949 level = 2
1950 if old_id is not None:
1951 old_node = self.ids[old_id]
1952 if 'refuri' in node:
1953 refuri = node['refuri']
1954 if (old_node['names']
1955 and 'refuri' in old_node
1956 and old_node['refuri'] == refuri):
1957 level = 1 # just inform if refuri's identical
1958 if level > 1:
1959 dupname(old_node, name)
1960 self.nameids[name] = None
1961 msg = self.reporter.system_message(
1962 level, 'Duplicate explicit target name: "%s".' % name,
1963 backrefs=[id], base_node=node)
1964 if msgnode is not None:
1965 msgnode += msg
1966 dupname(node, name)
1967 else:
1968 self.nameids[name] = id
1969 if old_id is not None:
1970 old_node = self.ids[old_id]
1971 dupname(old_node, name)
1972 else:
1973 if old_id is not None and not old_explicit:
1974 self.nameids[name] = None
1975 old_node = self.ids[old_id]
1976 dupname(old_node, name)
1977 dupname(node, name)
1978 if not explicit or (not old_explicit and old_id is not None):
1979 msg = self.reporter.info(
1980 'Duplicate implicit target name: "%s".' % name,
1981 backrefs=[id], base_node=node)
1982 if msgnode is not None:
1983 msgnode += msg
1985 def has_name(self, name: str) -> bool:
1986 return name in self.nameids
1988 # "note" here is an imperative verb: "take note of".
1989 def note_implicit_target(
1990 self, target: Element, msgnode: Element | None = None) -> None:
1991 id = self.set_id(target, msgnode)
1992 self.set_name_id_map(target, id, msgnode, explicit=False)
1994 def note_explicit_target(
1995 self, target: Element, msgnode: Element | None = None) -> None:
1996 id = self.set_id(target, msgnode)
1997 self.set_name_id_map(target, id, msgnode, explicit=True)
1999 def note_refname(self, node: Element) -> None:
2000 self.refnames.setdefault(node['refname'], []).append(node)
2002 def note_refid(self, node: Element) -> None:
2003 self.refids.setdefault(node['refid'], []).append(node)
2005 def note_indirect_target(self, target: target) -> None:
2006 self.indirect_targets.append(target)
2007 if target['names']:
2008 self.note_refname(target)
2010 def note_anonymous_target(self, target: target) -> None:
2011 self.set_id(target)
2013 def note_autofootnote(self, footnote: footnote) -> None:
2014 self.set_id(footnote)
2015 self.autofootnotes.append(footnote)
2017 def note_autofootnote_ref(self, ref: footnote_reference) -> None:
2018 self.set_id(ref)
2019 self.autofootnote_refs.append(ref)
2021 def note_symbol_footnote(self, footnote: footnote) -> None:
2022 self.set_id(footnote)
2023 self.symbol_footnotes.append(footnote)
2025 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:
2026 self.set_id(ref)
2027 self.symbol_footnote_refs.append(ref)
2029 def note_footnote(self, footnote: footnote) -> None:
2030 self.set_id(footnote)
2031 self.footnotes.append(footnote)
2033 def note_footnote_ref(self, ref: footnote_reference) -> None:
2034 self.set_id(ref)
2035 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
2036 self.note_refname(ref)
2038 def note_citation(self, citation: citation) -> None:
2039 self.citations.append(citation)
2041 def note_citation_ref(self, ref: citation_reference) -> None:
2042 self.set_id(ref)
2043 self.citation_refs.setdefault(ref['refname'], []).append(ref)
2044 self.note_refname(ref)
2046 def note_substitution_def(self,
2047 subdef: substitution_definition,
2048 def_name: str,
2049 msgnode: Element | None = None,
2050 ) -> None:
2051 name = whitespace_normalize_name(def_name)
2052 if name in self.substitution_defs:
2053 msg = self.reporter.error(
2054 'Duplicate substitution definition name: "%s".' % name,
2055 base_node=subdef)
2056 if msgnode is not None:
2057 msgnode += msg
2058 oldnode = self.substitution_defs[name]
2059 dupname(oldnode, name)
2060 # keep only the last definition:
2061 self.substitution_defs[name] = subdef
2062 # case-insensitive mapping:
2063 self.substitution_names[fully_normalize_name(name)] = name
2065 def note_substitution_ref(self,
2066 subref: substitution_reference,
2067 refname: str,
2068 ) -> None:
2069 subref['refname'] = whitespace_normalize_name(refname)
2071 def note_pending(
2072 self, pending: pending, priority: int | None = None) -> None:
2073 self.transformer.add_pending(pending, priority)
2075 def note_parse_message(self, message: system_message) -> None:
2076 self.parse_messages.append(message)
2078 def note_transform_message(self, message: system_message) -> None:
2079 self.transform_messages.append(message)
2081 def note_source(self,
2082 source: StrPath | None,
2083 offset: int | None,
2084 ) -> None:
2085 self.current_source = source and os.fspath(source)
2086 if offset is None:
2087 self.current_line = offset
2088 else:
2089 self.current_line = offset + 1
2091 def copy(self) -> Self:
2092 obj = self.__class__(self.settings, self.reporter,
2093 **self.attributes)
2094 obj.source = self.source
2095 obj.line = self.line
2096 return obj
2098 def get_decoration(self) -> decoration:
2099 if not self.decoration:
2100 self.decoration: decoration = decoration()
2101 index = self.first_child_not_matching_class((Titular, meta))
2102 if index is None:
2103 self.append(self.decoration)
2104 else:
2105 self.insert(index, self.decoration)
2106 return self.decoration
2109# Bibliographic Elements
2110# ======================
2112class author(Bibliographic, TextElement): pass
2113class organization(Bibliographic, TextElement): pass
2114class address(Bibliographic, FixedTextElement): pass
2115class contact(Bibliographic, TextElement): pass
2116class version(Bibliographic, TextElement): pass
2117class revision(Bibliographic, TextElement): pass
2118class status(Bibliographic, TextElement): pass
2119class date(Bibliographic, TextElement): pass
2120class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)
2123class authors(Bibliographic, Element):
2124 """Container for author information for documents with multiple authors.
2125 """
2126 content_model: Final = ((author, '+'),
2127 (organization, '?'),
2128 (address, '?'),
2129 (contact, '?'),
2130 )
2131 # (author, organization?, address?, contact?)+
2133 def validate_content(self,
2134 model: _ContentModelTuple | None = None,
2135 elements: Sequence[Node] | None = None,
2136 ) -> list[Node]:
2137 """Repeatedly test for children matching the content model.
2139 Provisional.
2140 """
2141 relics = super().validate_content()
2142 while relics:
2143 relics = super().validate_content(elements=relics)
2144 return relics
2147# Body Elements
2148# =============
2149#
2150# General
2151# -------
2152#
2153# Miscellaneous Body Elements and related Body Subelements (Part)
2155class paragraph(General, TextElement): pass
2156class rubric(Titular, General, TextElement): pass
2159class compound(General, Element):
2160 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2163class container(General, Element):
2164 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2167class attribution(Part, TextElement):
2168 """Visible reference to the source of a `block_quote`."""
2171class block_quote(General, Element):
2172 """An extended quotation, set off from the main text."""
2173 content_model: Final = ((Body, '+'), (attribution, '?'))
2174 # ((%body.elements;)+, attribution?)
2177# Lists
2178# -----
2179#
2180# Lists (Sequential) and related Body Subelements (Part)
2182class list_item(Part, Element):
2183 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2186class bullet_list(Sequential, Element):
2187 valid_attributes: Final = Element.valid_attributes + ('bullet',)
2188 content_model: Final = ((list_item, '+'),) # (list_item+)
2191class enumerated_list(Sequential, Element):
2192 valid_attributes: Final = Element.valid_attributes + (
2193 'enumtype', 'prefix', 'suffix', 'start')
2194 content_model: Final = ((list_item, '+'),) # (list_item+)
2197class term(Part, TextElement): pass
2198class classifier(Part, TextElement): pass
2201class definition(Part, Element):
2202 """Definition of a `term` in a `definition_list`."""
2203 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2206class definition_list_item(Part, Element):
2207 content_model: Final = ((term, '.'),
2208 ((classifier, term), '*'),
2209 (definition, '.'),
2210 )
2211 # ((term, classifier*)+, definition)
2214class definition_list(Sequential, Element):
2215 """List of terms and their definitions.
2217 Can be used for glossaries or dictionaries, to describe or
2218 classify things, for dialogues, or to itemize subtopics.
2219 """
2220 content_model: Final = ((definition_list_item, '+'),)
2221 # (definition_list_item+)
2224class field_name(Part, TextElement): pass
2227class field_body(Part, Element):
2228 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2231class field(Part, Bibliographic, Element):
2232 content_model: Final = ((field_name, '.'), (field_body, '.'))
2233 # (field_name, field_body)
2236class field_list(Sequential, Element):
2237 """List of label & data pairs.
2239 Typically rendered as a two-column list.
2240 Also used for extension syntax or special processing.
2241 """
2242 content_model: Final = ((field, '+'),) # (field+)
2245class option_string(Part, PureTextElement):
2246 """A literal command-line option. Typically monospaced."""
2249class option_argument(Part, PureTextElement):
2250 """Placeholder text for option arguments."""
2251 valid_attributes: Final = Element.valid_attributes + ('delimiter',)
2253 def astext(self) -> str:
2254 return self.get('delimiter', ' ') + TextElement.astext(self)
2257class option(Part, Element):
2258 """Option element in an `option_list_item`.
2260 Groups an option string with zero or more option argument placeholders.
2261 """
2262 child_text_separator: Final = ''
2263 content_model: Final = ((option_string, '.'), (option_argument, '*'))
2264 # (option_string, option_argument*)
2267class option_group(Part, Element):
2268 """Groups together one or more `option` elements, all synonyms."""
2269 child_text_separator: Final = ', '
2270 content_model: Final = ((option, '+'),) # (option+)
2273class description(Part, Element):
2274 """Describtion of a command-line option."""
2275 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2278class option_list_item(Part, Element):
2279 """Container for a pair of `option_group` and `description` elements.
2280 """
2281 child_text_separator: Final = ' '
2282 content_model: Final = ((option_group, '.'), (description, '.'))
2283 # (option_group, description)
2286class option_list(Sequential, Element):
2287 """Two-column list of command-line options and descriptions."""
2288 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)
2291# Pre-formatted text blocks
2292# -------------------------
2294class literal_block(General, FixedTextElement): pass
2295class doctest_block(General, FixedTextElement): pass
2298class math_block(General, FixedTextElement, PureTextElement):
2299 """Mathematical notation (display formula)."""
2302class line(Part, TextElement):
2303 """Single line of text in a `line_block`."""
2304 indent: str | None = None
2307class line_block(General, Element):
2308 """Sequence of lines and nested line blocks.
2309 """
2310 # recursive content model: (line | line_block)+
2313line_block.content_model = (((line, line_block), '+'),)
2316# Admonitions
2317# -----------
2318# distinctive and self-contained notices
2320class attention(Admonition, Element): pass
2321class caution(Admonition, Element): pass
2322class danger(Admonition, Element): pass
2323class error(Admonition, Element): pass
2324class important(Admonition, Element): pass
2325class note(Admonition, Element): pass
2326class tip(Admonition, Element): pass
2327class hint(Admonition, Element): pass
2328class warning(Admonition, Element): pass
2331class admonition(Admonition, Element):
2332 content_model: Final = ((title, '.'), (Body, '+'))
2333 # (title, (%body.elements;)+)
2336# Footnote and citation
2337# ---------------------
2339class label(Part, PureTextElement):
2340 """Visible identifier for footnotes and citations."""
2343class footnote(General, BackLinkable, Element, Labeled, Targetable):
2344 """Labelled note providing additional context (footnote or endnote)."""
2345 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')
2346 content_model: Final = ((label, '?'), (Body, '+'))
2347 # (label?, (%body.elements;)+)
2348 # The label will become required in Docutils 1.0.
2351class citation(General, BackLinkable, Element, Labeled, Targetable):
2352 content_model: Final = ((label, '.'), (Body, '+'))
2353 # (label, (%body.elements;)+)
2356# Graphical elements
2357# ------------------
2359class image(General, Inline, Element):
2360 """Reference to an image resource.
2362 May be body element or inline element.
2363 """
2364 valid_attributes: Final = Element.valid_attributes + (
2365 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
2367 def astext(self) -> str:
2368 return self.get('alt', '')
2371class caption(Part, TextElement): pass
2374class legend(Part, Element):
2375 """A wrapper for text accompanying a `figure` that is not the caption."""
2376 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2379class figure(General, Element):
2380 """A formal figure, generally an illustration, with a title."""
2381 valid_attributes: Final = Element.valid_attributes + ('align', 'width')
2382 content_model: Final = ((image, '.'),
2383 (caption, '?'),
2384 (legend, '?'),
2385 )
2386 # (image, ((caption, legend?) | legend))
2387 # TODO: According to the DTD, a caption or legend is required
2388 # but rST allows "bare" figures which are formatted differently from
2389 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]
2392# Tables
2393# ------
2395class entry(Part, Element):
2396 """An entry in a `row` (a table cell)."""
2397 valid_attributes: Final = Element.valid_attributes + (
2398 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',
2399 'morerows', 'namest', 'nameend', 'rowsep', 'valign')
2400 content_model: Final = ((Body, '*'),)
2401 # %tbl.entry.mdl -> (%body.elements;)*
2404class row(Part, Element):
2405 """Row of table cells."""
2406 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')
2407 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+
2410class colspec(Part, Element):
2411 """Specifications for a column in a `tgroup`."""
2412 valid_attributes: Final = Element.valid_attributes + (
2413 'align', 'char', 'charoff', 'colname', 'colnum',
2414 'colsep', 'colwidth', 'rowsep', 'stub')
2416 def propwidth(self) -> int|float:
2417 """Return numerical value of "colwidth__" attribute. Default 1.
2419 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.
2421 Provisional.
2423 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
2424 """
2425 # Move current implementation of validate_colwidth() here
2426 # in Docutils 1.0
2427 return validate_colwidth(self.get('colwidth', ''))
2430class thead(Part, Element):
2431 """Row(s) that form the head of a `tgroup`."""
2432 valid_attributes: Final = Element.valid_attributes + ('valign',)
2433 content_model: Final = ((row, '+'),) # (row+)
2436class tbody(Part, Element):
2437 """Body of a `tgroup`."""
2438 valid_attributes: Final = Element.valid_attributes + ('valign',)
2439 content_model: Final = ((row, '+'),) # (row+)
2442class tgroup(Part, Element):
2443 """A portion of a table. Most tables have just one `tgroup`."""
2444 valid_attributes: Final = Element.valid_attributes + (
2445 'align', 'cols', 'colsep', 'rowsep')
2446 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))
2447 # (colspec*, thead?, tbody)
2450class table(General, Element):
2451 """A data arrangement with rows and columns."""
2452 valid_attributes: Final = Element.valid_attributes + (
2453 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
2454 content_model: Final = ((title, '?'), (tgroup, '+'))
2455 # (title?, tgroup+)
2458# Special purpose elements
2459# ------------------------
2460# Body elements for internal use or special requests.
2462class comment(Invisible, FixedTextElement, PureTextElement):
2463 """Author notes, hidden from the output."""
2466class substitution_definition(Invisible, TextElement):
2467 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')
2470class target(Invisible, Inline, TextElement, Targetable):
2471 valid_attributes: Final = Element.valid_attributes + (
2472 'anonymous', 'refid', 'refname', 'refuri')
2475class system_message(Special, BackLinkable, PreBibliographic, Element):
2476 """
2477 System message element.
2479 Do not instantiate this class directly; use
2480 ``document.reporter.info/warning/error/severe()`` instead.
2481 """
2482 valid_attributes: Final = BackLinkable.valid_attributes + (
2483 'level', 'line', 'type')
2484 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2486 def __init__(self,
2487 message: str | None = None,
2488 *children: Node,
2489 **attributes: Any,
2490 ) -> None:
2491 rawsource = attributes.pop('rawsource', '')
2492 if message:
2493 p = paragraph('', message)
2494 children = (p,) + children
2495 try:
2496 Element.__init__(self, rawsource, *children, **attributes)
2497 except: # NoQA: E722 (catchall)
2498 print('system_message: children=%r' % (children,))
2499 raise
2501 def astext(self) -> str:
2502 line = self.get('line', '')
2503 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
2504 self['level'], Element.astext(self))
2507class pending(Invisible, Element):
2508 """
2509 Placeholder for pending operations.
2511 The "pending" element is used to encapsulate a pending operation: the
2512 operation (transform), the point at which to apply it, and any data it
2513 requires. Only the pending operation's location within the document is
2514 stored in the public document tree (by the "pending" object itself); the
2515 operation and its data are stored in the "pending" object's internal
2516 instance attributes.
2518 For example, say you want a table of contents in your reStructuredText
2519 document. The easiest way to specify where to put it is from within the
2520 document, with a directive::
2522 .. contents::
2524 But the "contents" directive can't do its work until the entire document
2525 has been parsed and possibly transformed to some extent. So the directive
2526 code leaves a placeholder behind that will trigger the second phase of its
2527 processing, something like this::
2529 <pending ...public attributes...> + internal attributes
2531 Use `document.note_pending()` so that the
2532 `docutils.transforms.Transformer` stage of processing can run all pending
2533 transforms.
2534 """
2536 def __init__(self,
2537 transform: Transform,
2538 details: Mapping[str, Any] | None = None,
2539 rawsource: str = '',
2540 *children: Node,
2541 **attributes: Any,
2542 ) -> None:
2543 Element.__init__(self, rawsource, *children, **attributes)
2545 self.transform: Transform = transform
2546 """The `docutils.transforms.Transform` class implementing the pending
2547 operation."""
2549 self.details: Mapping[str, Any] = details or {}
2550 """Detail data (dictionary) required by the pending operation."""
2552 def pformat(self, indent: str = ' ', level: int = 0) -> str:
2553 internals = ['.. internal attributes:',
2554 ' .transform: %s.%s' % (self.transform.__module__,
2555 self.transform.__name__),
2556 ' .details:']
2557 details = sorted(self.details.items())
2558 for key, value in details:
2559 if isinstance(value, Node):
2560 internals.append('%7s%s:' % ('', key))
2561 internals.extend(['%9s%s' % ('', line)
2562 for line in value.pformat().splitlines()])
2563 elif (value
2564 and isinstance(value, list)
2565 and isinstance(value[0], Node)):
2566 internals.append('%7s%s:' % ('', key))
2567 for v in value:
2568 internals.extend(['%9s%s' % ('', line)
2569 for line in v.pformat().splitlines()])
2570 else:
2571 internals.append('%7s%s: %r' % ('', key, value))
2572 return (Element.pformat(self, indent, level)
2573 + ''.join((' %s%s\n' % (indent * level, line))
2574 for line in internals))
2576 def copy(self) -> Self:
2577 obj = self.__class__(self.transform, self.details, self.rawsource,
2578 **self.attributes)
2579 obj._document = self._document
2580 obj.source = self.source
2581 obj.line = self.line
2582 return obj
2585class raw(Special, Inline, PreBibliographic,
2586 FixedTextElement, PureTextElement):
2587 """Raw data that is to be passed untouched to the Writer.
2589 Can be used as Body element or Inline element.
2590 """
2591 valid_attributes: Final = Element.valid_attributes + (
2592 'format', 'xml:space')
2595# Inline Elements
2596# ===============
2598class abbreviation(Inline, TextElement): pass
2599class acronym(Inline, TextElement): pass
2600class emphasis(Inline, TextElement): pass
2601class generated(Inline, TextElement): pass
2602class inline(Inline, TextElement): pass
2603class literal(Inline, TextElement): pass
2604class strong(Inline, TextElement): pass
2605class subscript(Inline, TextElement): pass
2606class superscript(Inline, TextElement): pass
2607class title_reference(Inline, TextElement): pass
2610class reference(General, Inline, Referential, TextElement):
2611 valid_attributes: Final = Element.valid_attributes + (
2612 'anonymous', 'name', 'refid', 'refname', 'refuri')
2615class footnote_reference(Inline, Referential, PureTextElement):
2616 valid_attributes: Final = Element.valid_attributes + (
2617 'auto', 'refid', 'refname')
2620class citation_reference(Inline, Referential, PureTextElement):
2621 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')
2624class substitution_reference(Inline, TextElement):
2625 valid_attributes: Final = Element.valid_attributes + ('refname',)
2628class math(Inline, PureTextElement):
2629 """Mathematical notation in running text."""
2632class problematic(Inline, TextElement):
2633 valid_attributes: Final = Element.valid_attributes + (
2634 'refid', 'refname', 'refuri')
2637# ========================================
2638# Auxiliary Classes, Functions, and Data
2639# ========================================
2641node_class_names: Sequence[str] = """
2642 Text
2643 abbreviation acronym address admonition attention attribution author
2644 authors
2645 block_quote bullet_list
2646 caption caution citation citation_reference classifier colspec comment
2647 compound contact container copyright
2648 danger date decoration definition definition_list definition_list_item
2649 description docinfo doctest_block document
2650 emphasis entry enumerated_list error
2651 field field_body field_list field_name figure footer
2652 footnote footnote_reference
2653 generated
2654 header hint
2655 image important inline
2656 label legend line line_block list_item literal literal_block
2657 math math_block meta
2658 note
2659 option option_argument option_group option_list option_list_item
2660 option_string organization
2661 paragraph pending problematic
2662 raw reference revision row rubric
2663 section sidebar status strong subscript substitution_definition
2664 substitution_reference subtitle superscript system_message
2665 table target tbody term tgroup thead tip title title_reference topic
2666 transition
2667 version
2668 warning""".split()
2669"""A list of names of all concrete Node subclasses."""
2672class NodeVisitor:
2673 """
2674 "Visitor" pattern [GoF95]_ abstract superclass implementation for
2675 document tree traversals.
2677 Each node class has corresponding methods, doing nothing by
2678 default; override individual methods for specific and useful
2679 behaviour. The `dispatch_visit()` method is called by
2680 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
2681 the `dispatch_departure()` method before exiting a node.
2683 The dispatch methods call "``visit_`` + node class name" or
2684 "``depart_`` + node class name", resp.
2686 This is a base class for visitors whose ``visit_...`` & ``depart_...``
2687 methods must be implemented for *all* compulsory node types encountered
2688 (such as for `docutils.writers.Writer` subclasses).
2689 Unimplemented methods will raise exceptions (except for optional nodes).
2691 For sparse traversals, where only certain node types are of interest, use
2692 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
2693 processing is desired, subclass `GenericNodeVisitor`.
2695 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
2696 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
2697 1995.
2698 """
2700 optional: ClassVar[tuple[str, ...]] = ('meta',)
2701 """
2702 Tuple containing node class names (as strings).
2704 No exception will be raised if writers do not implement visit
2705 or departure functions for these node classes.
2707 Used to ensure transitional compatibility with existing 3rd-party writers.
2708 """
2710 def __init__(self, document: document, /) -> None:
2711 self.document: document = document
2713 def dispatch_visit(self, node: Node) -> None:
2714 """
2715 Call self."``visit_`` + node class name" with `node` as
2716 parameter. If the ``visit_...`` method does not exist, call
2717 self.unknown_visit.
2718 """
2719 node_name = node.__class__.__name__
2720 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
2721 self.document.reporter.debug(
2722 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
2723 % (method.__name__, node_name))
2724 return method(node)
2726 def dispatch_departure(self, node: Node) -> None:
2727 """
2728 Call self."``depart_`` + node class name" with `node` as
2729 parameter. If the ``depart_...`` method does not exist, call
2730 self.unknown_departure.
2731 """
2732 node_name = node.__class__.__name__
2733 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
2734 self.document.reporter.debug(
2735 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
2736 % (method.__name__, node_name))
2737 return method(node)
2739 def unknown_visit(self, node: Node) -> None:
2740 """
2741 Called when entering unknown `Node` types.
2743 Raise an exception unless overridden.
2744 """
2745 if (self.document.settings.strict_visitor
2746 or node.__class__.__name__ not in self.optional):
2747 raise NotImplementedError(
2748 '%s visiting unknown node type: %s'
2749 % (self.__class__, node.__class__.__name__))
2751 def unknown_departure(self, node: Node) -> None:
2752 """
2753 Called before exiting unknown `Node` types.
2755 Raise exception unless overridden.
2756 """
2757 if (self.document.settings.strict_visitor
2758 or node.__class__.__name__ not in self.optional):
2759 raise NotImplementedError(
2760 '%s departing unknown node type: %s'
2761 % (self.__class__, node.__class__.__name__))
2764class SparseNodeVisitor(NodeVisitor):
2765 """
2766 Base class for sparse traversals, where only certain node types are of
2767 interest. When ``visit_...`` & ``depart_...`` methods should be
2768 implemented for *all* node types (such as for `docutils.writers.Writer`
2769 subclasses), subclass `NodeVisitor` instead.
2770 """
2773class GenericNodeVisitor(NodeVisitor):
2774 """
2775 Generic "Visitor" abstract superclass, for simple traversals.
2777 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
2778 each ``depart_...`` method (when using `Node.walkabout()`) calls
2779 `default_departure()`. `default_visit()` (and `default_departure()`) must
2780 be overridden in subclasses.
2782 Define fully generic visitors by overriding `default_visit()` (and
2783 `default_departure()`) only. Define semi-generic visitors by overriding
2784 individual ``visit_...()`` (and ``depart_...()``) methods also.
2786 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
2787 be overridden for default behavior.
2788 """
2790 def default_visit(self, node: Node):
2791 """Override for generic, uniform traversals."""
2792 raise NotImplementedError
2794 def default_departure(self, node: Node):
2795 """Override for generic, uniform traversals."""
2796 raise NotImplementedError
2799def _call_default_visit(self: GenericNodeVisitor, node: Node) -> None:
2800 self.default_visit(node)
2803def _call_default_departure(self: GenericNodeVisitor, node: Node) -> None:
2804 self.default_departure(node)
2807def _nop(self: SparseNodeVisitor, node: Node) -> None:
2808 pass
2811def _add_node_class_names(names) -> None:
2812 """Save typing with dynamic assignments:"""
2813 for _name in names:
2814 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
2815 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
2816 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
2817 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
2820_add_node_class_names(node_class_names)
2823class TreeCopyVisitor(GenericNodeVisitor):
2824 """
2825 Make a complete copy of a tree or branch, including element attributes.
2826 """
2828 def __init__(self, document: document) -> None:
2829 super().__init__(document)
2830 self.parent_stack: list[list[Node]] = []
2831 self.parent: list[Node] = []
2833 def get_tree_copy(self) -> Node:
2834 return self.parent[0]
2836 def default_visit(self, node: Node) -> None:
2837 """Copy the current node, and make it the new acting parent."""
2838 newnode = node.copy()
2839 self.parent.append(newnode)
2840 self.parent_stack.append(self.parent)
2841 self.parent = newnode
2843 def default_departure(self, node: Node) -> None:
2844 """Restore the previous acting parent."""
2845 self.parent = self.parent_stack.pop()
2848# Custom Exceptions
2849# =================
2851class ValidationError(ValueError):
2852 """Invalid Docutils Document Tree Element."""
2853 def __init__(self, msg: str, problematic_element: Element = None) -> None:
2854 super().__init__(msg)
2855 self.problematic_element = problematic_element
2858class TreePruningException(Exception):
2859 """
2860 Base class for `NodeVisitor`-related tree pruning exceptions.
2862 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
2863 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
2864 the tree traversed.
2865 """
2868class SkipChildren(TreePruningException):
2869 """
2870 Do not visit any children of the current node. The current node's
2871 siblings and ``depart_...`` method are not affected.
2872 """
2875class SkipSiblings(TreePruningException):
2876 """
2877 Do not visit any more siblings (to the right) of the current node. The
2878 current node's children and its ``depart_...`` method are not affected.
2879 """
2882class SkipNode(TreePruningException):
2883 """
2884 Do not visit the current node's children, and do not call the current
2885 node's ``depart_...`` method.
2886 """
2889class SkipDeparture(TreePruningException):
2890 """
2891 Do not call the current node's ``depart_...`` method. The current node's
2892 children and siblings are not affected.
2893 """
2896class NodeFound(TreePruningException):
2897 """
2898 Raise to indicate that the target of a search has been found. This
2899 exception must be caught by the client; it is not caught by the traversal
2900 code.
2901 """
2904class StopTraversal(TreePruningException):
2905 """
2906 Stop the traversal altogether. The current node's ``depart_...`` method
2907 is not affected. The parent nodes ``depart_...`` methods are also called
2908 as usual. No other nodes are visited. This is an alternative to
2909 NodeFound that does not cause exception handling to trickle up to the
2910 caller.
2911 """
2914# definition moved here from `utils` to avoid circular import dependency
2915def unescape(text: str,
2916 restore_backslashes: bool = False,
2917 respect_whitespace: bool = False,
2918 ) -> str:
2919 """
2920 Return a string with nulls removed or restored to backslashes.
2921 Backslash-escaped spaces are also removed.
2922 """
2923 # `respect_whitespace` is ignored (since introduction 2016-12-16)
2924 if restore_backslashes:
2925 return text.replace('\x00', '\\')
2926 else:
2927 for sep in ['\x00 ', '\x00\n', '\x00']:
2928 text = ''.join(text.split(sep))
2929 return text
2932def make_id(string: str) -> str:
2933 """
2934 Convert `string` into an identifier and return it.
2936 Docutils identifiers will conform to the regular expression
2937 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
2938 and "id" attributes) should have no underscores, colons, or periods.
2939 Hyphens may be used.
2941 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
2943 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
2944 followed by any number of letters, digits ([0-9]), hyphens ("-"),
2945 underscores ("_"), colons (":"), and periods (".").
2947 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
2948 a tighter interpretation ("flex" tokenizer notation; "latin1" and
2949 "escape" 8-bit characters have been replaced with entities)::
2951 unicode \\[0-9a-f]{1,4}
2952 latin1 [¡-ÿ]
2953 escape {unicode}|\\[ -~¡-ÿ]
2954 nmchar [-a-z0-9]|{latin1}|{escape}
2955 name {nmchar}+
2957 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
2958 or periods ("."), therefore "class" and "id" attributes should not contain
2959 these characters. They should be replaced with hyphens ("-"). Combined
2960 with HTML's requirements (the first character must be a letter; no
2961 "unicode", "latin1", or "escape" characters), this results in the
2962 ``[a-z](-?[a-z0-9]+)*`` pattern.
2964 .. _HTML 4.01 spec: https://www.w3.org/TR/html401
2965 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1
2966 """
2967 id = string.lower()
2968 id = id.translate(_non_id_translate_digraphs)
2969 id = id.translate(_non_id_translate)
2970 # get rid of non-ascii characters.
2971 # 'ascii' lowercase to prevent problems with turkish locale.
2972 id = unicodedata.normalize(
2973 'NFKD', id).encode('ascii', 'ignore').decode('ascii')
2974 # shrink runs of whitespace and replace by hyphen
2975 id = _non_id_chars.sub('-', ' '.join(id.split()))
2976 id = _non_id_at_ends.sub('', id)
2977 return str(id)
2980_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')
2981_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')
2982_non_id_translate: dict[int, str] = {
2983 0x00f8: 'o', # o with stroke
2984 0x0111: 'd', # d with stroke
2985 0x0127: 'h', # h with stroke
2986 0x0131: 'i', # dotless i
2987 0x0142: 'l', # l with stroke
2988 0x0167: 't', # t with stroke
2989 0x0180: 'b', # b with stroke
2990 0x0183: 'b', # b with topbar
2991 0x0188: 'c', # c with hook
2992 0x018c: 'd', # d with topbar
2993 0x0192: 'f', # f with hook
2994 0x0199: 'k', # k with hook
2995 0x019a: 'l', # l with bar
2996 0x019e: 'n', # n with long right leg
2997 0x01a5: 'p', # p with hook
2998 0x01ab: 't', # t with palatal hook
2999 0x01ad: 't', # t with hook
3000 0x01b4: 'y', # y with hook
3001 0x01b6: 'z', # z with stroke
3002 0x01e5: 'g', # g with stroke
3003 0x0225: 'z', # z with hook
3004 0x0234: 'l', # l with curl
3005 0x0235: 'n', # n with curl
3006 0x0236: 't', # t with curl
3007 0x0237: 'j', # dotless j
3008 0x023c: 'c', # c with stroke
3009 0x023f: 's', # s with swash tail
3010 0x0240: 'z', # z with swash tail
3011 0x0247: 'e', # e with stroke
3012 0x0249: 'j', # j with stroke
3013 0x024b: 'q', # q with hook tail
3014 0x024d: 'r', # r with stroke
3015 0x024f: 'y', # y with stroke
3016}
3017_non_id_translate_digraphs: dict[int, str] = {
3018 0x00df: 'sz', # ligature sz
3019 0x00e6: 'ae', # ae
3020 0x0153: 'oe', # ligature oe
3021 0x0238: 'db', # db digraph
3022 0x0239: 'qp', # qp digraph
3023}
3026def dupname(node: Element, name: str) -> None:
3027 node['dupnames'].append(name)
3028 node['names'].remove(name)
3029 # Assume that `node` is referenced, even though it isn't;
3030 # we don't want to throw unnecessary system_messages.
3031 node.referenced = True
3034def fully_normalize_name(name: str) -> str:
3035 """Return a case- and whitespace-normalized name."""
3036 return ' '.join(name.lower().split())
3039def whitespace_normalize_name(name: str) -> str:
3040 """Return a whitespace-normalized name."""
3041 return ' '.join(name.split())
3044def serial_escape(value: str) -> str:
3045 """Escape string values that are elements of a list, for serialization."""
3046 return value.replace('\\', r'\\').replace(' ', r'\ ')
3049def split_name_list(s: str) -> list[str]:
3050 r"""Split a string at non-escaped whitespace.
3052 Backslashes escape internal whitespace (cf. `serial_escape()`).
3053 Return list of "names" (after removing escaping backslashes).
3055 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),
3056 ['a name', 'two\\', r'n\ames']
3058 Provisional.
3059 """
3060 s = s.replace('\\', '\x00') # escape with NULL char
3061 s = s.replace('\x00\x00', '\\') # unescape backslashes
3062 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL
3063 names = s.split(' ')
3064 # restore internal spaces, drop other escaping characters
3065 return [name.replace('\x00\x00', ' ').replace('\x00', '')
3066 for name in names]
3069def pseudo_quoteattr(value: str) -> str:
3070 """Quote attributes for pseudo-xml"""
3071 return '"%s"' % value
3074def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'
3075 ) -> tuple[int|float, str]:
3076 """Parse a measure__, return value + unit.
3078 `unit_pattern` is a regular expression describing recognized units.
3079 The default is suited for (but not limited to) CSS3 units and SI units.
3080 It matches runs of ASCII letters or Greek mu, a single percent sign,
3081 or no unit.
3083 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3085 Provisional.
3086 """
3087 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)
3088 try:
3089 try:
3090 value = int(match.group(1))
3091 except ValueError:
3092 value = float(match.group(1))
3093 unit = match.group(2)
3094 except (AttributeError, ValueError):
3095 raise ValueError(f'"{measure}" is no valid measure.')
3096 return value, unit
3099# Methods to validate `Element attribute`__ values.
3101# Ensure the expected Python `data type`__, normalize, and check for
3102# restrictions.
3103#
3104# The methods can be used to convert `str` values (eg. from an XML
3105# representation) or to validate an existing document tree or node.
3106#
3107# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,
3108# and the `attribute_validating_functions` mapping below.
3109#
3110# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3111# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
3113def create_keyword_validator(*keywords: str) -> Callable[[str], str]:
3114 """
3115 Return a function that validates a `str` against given `keywords`.
3117 Provisional.
3118 """
3119 def validate_keywords(value: str) -> str:
3120 if value not in keywords:
3121 allowed = '", \"'.join(keywords)
3122 raise ValueError(f'"{value}" is not one of "{allowed}".')
3123 return value
3124 return validate_keywords
3127def validate_identifier(value: str) -> str:
3128 """
3129 Validate identifier key or class name.
3131 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.
3133 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
3135 Provisional.
3136 """
3137 if value != make_id(value):
3138 raise ValueError(f'"{value}" is no valid id or class name.')
3139 return value
3142def validate_identifier_list(value: str | list[str]) -> list[str]:
3143 """
3144 A (space-separated) list of ids or class names.
3146 `value` may be a `list` or a `str` with space separated
3147 ids or class names (cf. `validate_identifier()`).
3149 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.
3151 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type
3152 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type
3153 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type
3155 Provisional.
3156 """
3157 if isinstance(value, str):
3158 value = value.split()
3159 for token in value:
3160 validate_identifier(token)
3161 return value
3164def validate_measure(measure: str) -> str:
3165 """
3166 Validate a measure__ (number + optional unit). Return normalized `str`.
3168 See `parse_measure()` for a function returning a "number + unit" tuple.
3170 The unit may be a run of ASCII letters or Greek mu, a single percent sign,
3171 or the empty string. Case is preserved.
3173 Provisional.
3175 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3176 """
3177 value, unit = parse_measure(measure)
3178 return f'{value}{unit}'
3181def validate_colwidth(measure: str|int|float) -> int|float:
3182 """Validate the "colwidth__" attribute.
3184 Provisional:
3185 `measure` must be a `str` and will be returned as normalized `str`
3186 (with unit "*" for proportional values) in Docutils 1.0.
3188 The default unit will change to "pt" in Docutils 2.0.
3190 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
3191 """
3192 if isinstance(measure, (int, float)):
3193 value = measure
3194 elif measure in ('*', ''): # short for '1*'
3195 value = 1
3196 else:
3197 try:
3198 value, unit = parse_measure(measure, unit_pattern='[*]?')
3199 except ValueError:
3200 value = -1
3201 if value <= 0:
3202 raise ValueError(f'"{measure}" is no proportional measure.')
3203 return value
3206def validate_NMTOKEN(value: str) -> str:
3207 """
3208 Validate a "name token": a `str` of ASCII letters, digits, and [-._].
3210 Provisional.
3211 """
3212 if not re.fullmatch('[-._A-Za-z0-9]+', value):
3213 raise ValueError(f'"{value}" is no NMTOKEN.')
3214 return value
3217def validate_NMTOKENS(value: str | list[str]) -> list[str]:
3218 """
3219 Validate a list of "name tokens".
3221 Provisional.
3222 """
3223 if isinstance(value, str):
3224 value = value.split()
3225 for token in value:
3226 validate_NMTOKEN(token)
3227 return value
3230def validate_refname_list(value: str | list[str]) -> list[str]:
3231 """
3232 Validate a list of `reference names`__.
3234 Reference names may contain all characters;
3235 whitespace is normalized (cf, `whitespace_normalize_name()`).
3237 `value` may be either a `list` of names or a `str` with
3238 space separated names (with internal spaces backslash escaped
3239 and literal backslashes doubled cf. `serial_escape()`).
3241 Return a list of whitespace-normalized, unescaped reference names.
3243 Provisional.
3245 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name
3246 """
3247 if isinstance(value, str):
3248 value = split_name_list(value)
3249 return [whitespace_normalize_name(name) for name in value]
3252def validate_yesorno(value: str | int | bool) -> bool:
3253 """Validate a `%yesorno`__ (flag) value.
3255 The string literal "0" evaluates to ``False``, all other
3256 values are converterd with `bool()`.
3258 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno
3259 """
3260 if value == "0":
3261 return False
3262 return bool(value)
3265ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {
3266 'alt': str, # CDATA
3267 'align': str,
3268 'anonymous': validate_yesorno,
3269 'auto': str, # CDATA (only '1' or '*' are used in rST)
3270 'backrefs': validate_identifier_list,
3271 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)
3272 'classes': validate_identifier_list,
3273 'char': str, # from Exchange Table Model (CALS), currently ignored
3274 'charoff': validate_NMTOKEN, # from CALS, currently ignored
3275 'colname': validate_NMTOKEN, # from CALS, currently ignored
3276 'colnum': int, # from CALS, currently ignored
3277 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
3278 'colsep': validate_yesorno,
3279 'colwidth': validate_colwidth, # see docstring for pending changes
3280 'content': str, # <meta>
3281 'delimiter': str,
3282 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>
3283 'dupnames': validate_refname_list,
3284 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',
3285 'upperalpha', 'upperroman'),
3286 'format': str, # CDATA (space separated format names)
3287 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',
3288 'sides', 'none'), # from CALS, ignored
3289 'height': validate_measure,
3290 'http-equiv': str, # <meta>
3291 'ids': validate_identifier_list,
3292 'lang': str, # <meta>
3293 'level': int,
3294 'line': int,
3295 'ltrim': validate_yesorno,
3296 'loading': create_keyword_validator('embed', 'link', 'lazy'),
3297 'media': str, # <meta>
3298 'morecols': int,
3299 'morerows': int,
3300 'name': whitespace_normalize_name, # in <reference> (deprecated)
3301 # 'name': node_attributes.validate_NMTOKEN, # in <meta>
3302 'names': validate_refname_list,
3303 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored
3304 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored
3305 'pgwide': validate_yesorno, # from CALS, currently ignored
3306 'prefix': str,
3307 'refid': validate_identifier,
3308 'refname': whitespace_normalize_name,
3309 'refuri': str,
3310 'rowsep': validate_yesorno,
3311 'rtrim': validate_yesorno,
3312 'scale': int,
3313 'scheme': str,
3314 'source': str,
3315 'start': int,
3316 'stub': validate_yesorno,
3317 'suffix': str,
3318 'title': str,
3319 'type': validate_NMTOKEN,
3320 'uri': str,
3321 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS
3322 'width': validate_measure,
3323 'xml:space': create_keyword_validator('default', 'preserve'),
3324 }
3325"""
3326Mapping of `attribute names`__ to validating functions.
3328Provisional.
3330__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3331"""