Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/nodes.py: 62%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Maintainer: docutils-develop@lists.sourceforge.net
4# Copyright: This module has been placed in the public domain.
6"""
7Docutils document tree element class library.
9The relationships and semantics of elements and attributes is documented in
10`The Docutils Document Tree`__.
12Classes in CamelCase are abstract base classes or auxiliary classes. The one
13exception is `Text`, for a text (PCDATA) node; uppercase is used to
14differentiate from element classes. Classes in lower_case_with_underscores
15are element classes, matching the XML element generic identifiers in the DTD_.
17The position of each node (the level at which it can occur) is significant and
18is represented by abstract base classes (`Root`, `Structural`, `Body`,
19`Inline`, etc.). Certain transformations will be easier because we can use
20``isinstance(node, base_class)`` to determine the position of the node in the
21hierarchy.
23__ https://docutils.sourceforge.io/docs/ref/doctree.html
24.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd
25"""
27from __future__ import annotations
29__docformat__ = 'reStructuredText'
31import os
32import re
33import sys
34import unicodedata
35import warnings
36from collections import Counter
37# import xml.dom.minidom as dom # -> conditional import in Node.asdom()
38# and document.asdom()
40# import docutils.transforms # -> delayed import in document.__init__()
42TYPE_CHECKING = False
43if TYPE_CHECKING:
44 from collections.abc import (Callable, Iterable, Iterator,
45 Mapping, Sequence)
46 from types import ModuleType
47 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex
49 from docutils.utils._typing import TypeAlias
51 from xml.dom import minidom
53 from docutils.frontend import Values
54 from docutils.transforms import Transformer, Transform
55 from docutils.utils import Reporter
57 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]
58 _ContentModelQuantifier = Literal['.', '?', '+', '*']
59 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,
60 _ContentModelQuantifier]
61 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]
63 StrPath: TypeAlias = str | os.PathLike[str]
64 """File system path. No bytes!"""
66 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]
69# ==============================
70# Functional Node Base Classes
71# ==============================
73class Node:
74 """Abstract base class of nodes in a document tree."""
76 parent: Element | None = None
77 """Back-reference to the Node immediately containing this Node."""
79 children: Sequence # defined in subclasses
80 """List of child nodes (Elements or Text).
82 Override in subclass instances that are not terminal nodes.
83 """
85 source: StrPath | None = None
86 """Path or description of the input source which generated this Node."""
88 line: int | None = None
89 """The line number (1-based) of the beginning of this Node in `source`."""
91 tagname: str # defined in subclasses
92 """The element generic identifier."""
94 _document: document | None = None
96 @property
97 def document(self) -> document | None:
98 """Return the `document` root node of the tree containing this Node.
99 """
100 try:
101 return self._document or self.parent.document
102 except AttributeError:
103 return None
105 @document.setter
106 def document(self, value: document) -> None:
107 self._document = value
109 def __bool__(self) -> Literal[True]:
110 """
111 Node instances are always true, even if they're empty. A node is more
112 than a simple container. Its boolean "truth" does not depend on
113 having one or more subnodes in the doctree.
115 Use `len()` to check node length.
116 """
117 return True
119 def asdom(self,
120 dom: ModuleType | None = None,
121 ) -> minidom.Document | minidom.Element | minidom.Text:
122 # TODO: minidom.Document is only returned by document.asdom()
123 # (which overwrites this base-class implementation)
124 """Return a DOM **fragment** representation of this Node."""
125 if dom is None:
126 import xml.dom.minidom as dom
127 domroot = dom.Document()
128 return self._dom_node(domroot)
130 def pformat(self, indent: str = ' ', level: int = 0) -> str:
131 """
132 Return an indented pseudo-XML representation, for test purposes.
134 Override in subclasses.
135 """
136 raise NotImplementedError
138 def copy(self) -> Self:
139 """Return a copy of self."""
140 raise NotImplementedError
142 def deepcopy(self) -> Self:
143 """Return a deep copy of self (also copying children)."""
144 raise NotImplementedError
146 def astext(self) -> str:
147 """Return a string representation of this Node."""
148 raise NotImplementedError
150 def setup_child(self, child) -> None:
151 child.parent = self
152 if self.document:
153 child.document = self.document
154 if child.source is None:
155 child.source = self.document.current_source
156 if child.line is None:
157 child.line = self.document.current_line
159 def walk(self, visitor: NodeVisitor) -> bool:
160 """
161 Traverse a tree of `Node` objects, calling the
162 `dispatch_visit()` method of `visitor` when entering each
163 node. (The `walkabout()` method is similar, except it also
164 calls the `dispatch_departure()` method before exiting each
165 node.)
167 This tree traversal supports limited in-place tree
168 modifications. Replacing one node with one or more nodes is
169 OK, as is removing an element. However, if the node removed
170 or replaced occurs after the current node, the old node will
171 still be traversed, and any new nodes will not.
173 Within ``visit`` methods (and ``depart`` methods for
174 `walkabout()`), `TreePruningException` subclasses may be raised
175 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
177 Parameter `visitor`: A `NodeVisitor` object, containing a
178 ``visit`` implementation for each `Node` subclass encountered.
180 Return true if we should stop the traversal.
181 """
182 stop = False
183 visitor.document.reporter.debug(
184 'docutils.nodes.Node.walk calling dispatch_visit for %s'
185 % self.__class__.__name__)
186 try:
187 try:
188 visitor.dispatch_visit(self)
189 except (SkipChildren, SkipNode):
190 return stop
191 except SkipDeparture: # not applicable; ignore
192 pass
193 children = self.children
194 try:
195 for child in children[:]:
196 if child.walk(visitor):
197 stop = True
198 break
199 except SkipSiblings:
200 pass
201 except StopTraversal:
202 stop = True
203 return stop
205 def walkabout(self, visitor: NodeVisitor) -> bool:
206 """
207 Perform a tree traversal similarly to `Node.walk()` (which
208 see), except also call the `dispatch_departure()` method
209 before exiting each node.
211 Parameter `visitor`: A `NodeVisitor` object, containing a
212 ``visit`` and ``depart`` implementation for each `Node`
213 subclass encountered.
215 Return true if we should stop the traversal.
216 """
217 call_depart = True
218 stop = False
219 visitor.document.reporter.debug(
220 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
221 % self.__class__.__name__)
222 try:
223 try:
224 visitor.dispatch_visit(self)
225 except SkipNode:
226 return stop
227 except SkipDeparture:
228 call_depart = False
229 children = self.children
230 try:
231 for child in children[:]:
232 if child.walkabout(visitor):
233 stop = True
234 break
235 except SkipSiblings:
236 pass
237 except SkipChildren:
238 pass
239 except StopTraversal:
240 stop = True
241 if call_depart:
242 visitor.document.reporter.debug(
243 'docutils.nodes.Node.walkabout calling dispatch_departure '
244 'for %s' % self.__class__.__name__)
245 visitor.dispatch_departure(self)
246 return stop
248 def _fast_findall(self, cls: type) -> Iterator:
249 """Return iterator that only supports instance checks."""
250 if isinstance(self, cls):
251 yield self
252 for child in self.children:
253 yield from child._fast_findall(cls)
255 def _superfast_findall(self) -> Iterator:
256 """Return iterator that doesn't check for a condition."""
257 # This is different from ``iter(self)`` implemented via
258 # __getitem__() and __len__() in the Element subclass,
259 # which yields only the direct children.
260 yield self
261 for child in self.children:
262 yield from child._superfast_findall()
264 def findall(self,
265 condition: type | Callable[[Node], bool] | None = None,
266 include_self: bool = True,
267 descend: bool = True,
268 siblings: bool = False,
269 ascend: bool = False,
270 ) -> Iterator:
271 """
272 Return an iterator yielding nodes following `self`:
274 * self (if `include_self` is true)
275 * all descendants in tree traversal order (if `descend` is true)
276 * the following siblings (if `siblings` is true) and their
277 descendants (if also `descend` is true)
278 * the following siblings of the parent (if `ascend` is true) and
279 their descendants (if also `descend` is true), and so on.
281 If `condition` is not None, the iterator yields only nodes
282 for which ``condition(node)`` is true. If `condition` is a
283 type ``cls``, it is equivalent to a function consisting
284 of ``return isinstance(node, cls)``.
286 If `ascend` is true, assume `siblings` to be true as well.
288 If the tree structure is modified during iteration, the result
289 is undefined.
291 For example, given the following tree::
293 <paragraph>
294 <emphasis> <--- emphasis.traverse() and
295 <strong> <--- strong.traverse() are called.
296 Foo
297 Bar
298 <reference name="Baz" refid="baz">
299 Baz
301 Then tuple(emphasis.traverse()) equals ::
303 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)
305 and list(strong.traverse(ascend=True) equals ::
307 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
308 """
309 if ascend:
310 siblings = True
311 # Check for special argument combinations that allow using an
312 # optimized version of traverse()
313 if include_self and descend and not siblings:
314 if condition is None:
315 yield from self._superfast_findall()
316 return
317 elif isinstance(condition, type):
318 yield from self._fast_findall(condition)
319 return
320 # Check if `condition` is a class (check for TypeType for Python
321 # implementations that use only new-style classes, like PyPy).
322 if isinstance(condition, type):
323 node_class = condition
325 def condition(node, node_class=node_class):
326 return isinstance(node, node_class)
328 if include_self and (condition is None or condition(self)):
329 yield self
330 if descend and len(self.children):
331 for child in self:
332 yield from child.findall(condition=condition,
333 include_self=True, descend=True,
334 siblings=False, ascend=False)
335 if siblings or ascend:
336 node = self
337 while node.parent:
338 index = node.parent.index(node)
339 # extra check since Text nodes have value-equality
340 while node.parent[index] is not node:
341 index = node.parent.index(node, index + 1)
342 for sibling in node.parent[index+1:]:
343 yield from sibling.findall(
344 condition=condition,
345 include_self=True, descend=descend,
346 siblings=False, ascend=False)
347 if not ascend:
348 break
349 else:
350 node = node.parent
352 def traverse(self,
353 condition: type | Callable[[Node], bool] | None = None,
354 include_self: bool = True,
355 descend: bool = True,
356 siblings: bool = False,
357 ascend: bool = False,
358 ) -> list:
359 """Return list of nodes following `self`.
361 For looping, Node.findall() is faster and more memory efficient.
362 """
363 # traverse() may be eventually removed:
364 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',
365 DeprecationWarning, stacklevel=2)
366 return list(self.findall(condition, include_self, descend,
367 siblings, ascend))
369 def next_node(self,
370 condition: type | Callable[[Node], bool] | None = None,
371 include_self: bool = False,
372 descend: bool = True,
373 siblings: bool = False,
374 ascend: bool = False,
375 ) -> Node | None:
376 """
377 Return the first node in the iterator returned by findall(),
378 or None if the iterable is empty.
380 Parameter list is the same as of `findall()`. Note that `include_self`
381 defaults to False, though.
382 """
383 try:
384 return next(self.findall(condition, include_self,
385 descend, siblings, ascend))
386 except StopIteration:
387 return None
389 def validate(self, recursive: bool = True) -> None:
390 """Raise ValidationError if this node is not valid.
392 Override in subclasses that define validity constraints.
393 """
395 def validate_position(self) -> None:
396 """Hook for additional checks of the parent's content model.
398 Raise ValidationError, if `self` is at an invalid position.
400 Override in subclasses with complex validity constraints. See
401 `subtitle.validate_position()` and `transition.validate_position()`.
402 """
405class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)
406 """
407 Instances are terminal nodes (leaves) containing text only; no child
408 nodes or attributes. Initialize by passing a string to the constructor.
410 Access the raw (null-escaped) text with ``str(<instance>)``
411 and unescaped text with ``<instance>.astext()``.
412 """
414 tagname: Final = '#text'
416 children: Final = ()
417 """Text nodes have no children, and cannot have children."""
419 def __new__(cls, data: str, rawsource: None = None) -> Self:
420 """Assert that `data` is not an array of bytes
421 and warn if the deprecated `rawsource` argument is used.
422 """
423 if isinstance(data, bytes):
424 raise TypeError('expecting str data, not bytes')
425 if rawsource is not None:
426 warnings.warn('nodes.Text: initialization argument "rawsource" '
427 'is ignored and will be removed in Docutils 2.0.',
428 DeprecationWarning, stacklevel=2)
429 return str.__new__(cls, data)
431 def shortrepr(self, maxlen: int = 18) -> str:
432 data = self
433 if len(data) > maxlen:
434 data = data[:maxlen-4] + ' ...'
435 return '<%s: %r>' % (self.tagname, str(data))
437 def __repr__(self) -> str:
438 return self.shortrepr(maxlen=68)
440 def astext(self) -> str:
441 return str(unescape(self))
443 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:
444 return domroot.createTextNode(str(self))
446 def copy(self) -> Self:
447 return self.__class__(str(self))
449 def deepcopy(self) -> Self:
450 return self.copy()
452 def pformat(self, indent: str = ' ', level: int = 0) -> str:
453 try:
454 if self.document.settings.detailed:
455 tag = '%s%s' % (indent*level, '<#text>')
456 lines = (indent*(level+1) + repr(line)
457 for line in self.splitlines(True))
458 return '\n'.join((tag, *lines)) + '\n'
459 except AttributeError:
460 pass
461 indent = indent * level
462 lines = [indent+line for line in self.astext().splitlines()]
463 if not lines:
464 return ''
465 return '\n'.join(lines) + '\n'
467 # rstrip and lstrip are used by substitution definitions where
468 # they are expected to return a Text instance, this was formerly
469 # taken care of by UserString.
471 def rstrip(self, chars: str | None = None) -> Self:
472 return self.__class__(str.rstrip(self, chars))
474 def lstrip(self, chars: str | None = None) -> Self:
475 return self.__class__(str.lstrip(self, chars))
478class Element(Node):
479 """
480 `Element` is the superclass to all specific elements.
482 Elements contain attributes and child nodes.
483 They can be described as a cross between a list and a dictionary.
485 Elements emulate dictionaries for external [#]_ attributes, indexing by
486 attribute name (a string). To set the attribute 'att' to 'value', do::
488 element['att'] = 'value'
490 .. [#] External attributes correspond to the XML element attributes.
491 From its `Node` superclass, Element also inherits "internal"
492 class attributes that are accessed using the standard syntax, e.g.
493 ``element.parent``.
495 There are two special attributes: 'ids' and 'names'. Both are
496 lists of unique identifiers: 'ids' conform to the regular expression
497 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and
498 details). 'names' serve as user-friendly interfaces to IDs; they are
499 case- and whitespace-normalized (see the fully_normalize_name() function).
501 Elements emulate lists for child nodes (element nodes and/or text
502 nodes), indexing by integer. To get the first child node, use::
504 element[0]
506 to iterate over the child nodes (without descending), use::
508 for child in element:
509 ...
511 Elements may be constructed using the ``+=`` operator. To add one new
512 child node to element, do::
514 element += node
516 This is equivalent to ``element.append(node)``.
518 To add a list of multiple child nodes at once, use the same ``+=``
519 operator::
521 element += [node1, node2]
523 This is equivalent to ``element.extend([node1, node2])``.
524 """
526 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')
527 """Tuple of attributes that are initialized to empty lists.
529 NOTE: Derived classes should update this value when supporting
530 additional list attributes.
531 """
533 valid_attributes: Final = list_attributes + ('source',)
534 """Tuple of attributes that are valid for elements of this class.
536 NOTE: Derived classes should update this value when supporting
537 additional attributes.
538 """
540 common_attributes: Final = valid_attributes
541 """Tuple of `common attributes`__ known to all Doctree Element classes.
543 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes
544 """
546 known_attributes: Final = common_attributes
547 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""
549 basic_attributes: Final = list_attributes
550 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""
552 local_attributes: Final = ('backrefs',)
553 """Obsolete. Will be removed in Docutils 2.0."""
555 content_model: ClassVar[_ContentModelTuple] = ()
556 """Python representation of the element's content model (cf. docutils.dtd).
558 A tuple of ``(category, quantifier)`` tuples with
560 :category: class or tuple of classes that are expected at this place(s)
561 in the list of children
562 :quantifier: string representation stating how many elements
563 of `category` are expected. Value is one of:
564 '.' (exactly one), '?' (zero or one),
565 '+' (one or more), '*' (zero or more).
567 NOTE: The default describes the empty element. Derived classes should
568 update this value to match their content model.
570 Provisional.
571 """
573 tagname: str | None = None
574 """The element generic identifier.
576 If None, it is set as an instance attribute to the name of the class.
577 """
579 child_text_separator: Final = '\n\n'
580 """Separator for child nodes, used by `astext()` method."""
582 def __init__(self,
583 rawsource: str = '',
584 *children,
585 **attributes: Any,
586 ) -> None:
587 self.rawsource = rawsource
588 """The raw text from which this element was constructed.
590 For informative and debugging purposes. Don't rely on its value!
592 NOTE: some elements do not set this value (default '').
593 """
594 if isinstance(rawsource, Element):
595 raise TypeError('First argument "rawsource" must be a string.')
597 self.children: list = []
598 """List of child nodes (elements and/or `Text`)."""
600 self.extend(children) # maintain parent info
602 self.attributes: dict[str, Any] = {}
603 """Dictionary of attribute {name: value}."""
605 # Initialize list attributes.
606 for att in self.list_attributes:
607 self.attributes[att] = []
609 for att, value in attributes.items():
610 att = att.lower() # normalize attribute name
611 if att in self.list_attributes:
612 # lists are mutable; make a copy for this node
613 self.attributes[att] = value[:]
614 else:
615 self.attributes[att] = value
617 if self.tagname is None:
618 self.tagname: str = self.__class__.__name__
620 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:
621 element = domroot.createElement(self.tagname)
622 for attribute, value in self.attlist():
623 if isinstance(value, list):
624 value = ' '.join(serial_escape('%s' % (v,)) for v in value)
625 element.setAttribute(attribute, '%s' % value)
626 for child in self.children:
627 element.appendChild(child._dom_node(domroot))
628 return element
630 def __repr__(self) -> str:
631 data = ''
632 for c in self.children:
633 data += c.shortrepr()
634 if len(data) > 60:
635 data = data[:56] + ' ...'
636 break
637 if self['names']:
638 return '<%s "%s": %s>' % (self.tagname,
639 '; '.join(self['names']), data)
640 else:
641 return '<%s: %s>' % (self.tagname, data)
643 def shortrepr(self) -> str:
644 if self['names']:
645 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))
646 else:
647 return '<%s...>' % self.tagname
649 def __str__(self) -> str:
650 if self.children:
651 return '%s%s%s' % (self.starttag(),
652 ''.join(str(c) for c in self.children),
653 self.endtag())
654 else:
655 return self.emptytag()
657 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:
658 # the optional arg is used by the docutils_xml writer
659 if quoteattr is None:
660 quoteattr = pseudo_quoteattr
661 parts = [self.tagname]
662 for name, value in self.attlist():
663 if value is None: # boolean attribute
664 parts.append('%s="True"' % name)
665 continue
666 if isinstance(value, bool):
667 value = str(int(value))
668 if isinstance(value, list):
669 values = [serial_escape('%s' % (v,)) for v in value]
670 value = ' '.join(values)
671 else:
672 value = str(value)
673 value = quoteattr(value)
674 parts.append('%s=%s' % (name, value))
675 return '<%s>' % ' '.join(parts)
677 def endtag(self) -> str:
678 return '</%s>' % self.tagname
680 def emptytag(self) -> str:
681 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())
682 return '<%s/>' % ' '.join((self.tagname, *attributes))
684 def __len__(self) -> int:
685 return len(self.children)
687 def __contains__(self, key) -> bool:
688 # Test for both, children and attributes with operator ``in``.
689 if isinstance(key, str):
690 return key in self.attributes
691 return key in self.children
693 def __getitem__(self, key: str | int | slice) -> Any:
694 if isinstance(key, str):
695 return self.attributes[key]
696 elif isinstance(key, int):
697 return self.children[key]
698 elif isinstance(key, slice):
699 assert key.step in (None, 1), 'cannot handle slice with stride'
700 return self.children[key.start:key.stop]
701 else:
702 raise TypeError('element index must be an integer, a slice, or '
703 'an attribute name string')
705 def __setitem__(self, key, item) -> None:
706 if isinstance(key, str):
707 self.attributes[str(key)] = item
708 elif isinstance(key, int):
709 self.setup_child(item)
710 self.children[key] = item
711 elif isinstance(key, slice):
712 assert key.step in (None, 1), 'cannot handle slice with stride'
713 for node in item:
714 self.setup_child(node)
715 self.children[key.start:key.stop] = item
716 else:
717 raise TypeError('element index must be an integer, a slice, or '
718 'an attribute name string')
720 def __delitem__(self, key: str | int | slice) -> None:
721 if isinstance(key, str):
722 del self.attributes[key]
723 elif isinstance(key, int):
724 del self.children[key]
725 elif isinstance(key, slice):
726 assert key.step in (None, 1), 'cannot handle slice with stride'
727 del self.children[key.start:key.stop]
728 else:
729 raise TypeError('element index must be an integer, a simple '
730 'slice, or an attribute name string')
732 def __add__(self, other: list) -> list:
733 return self.children + other
735 def __radd__(self, other: list) -> list:
736 return other + self.children
738 def __iadd__(self, other) -> Self:
739 """Append a node or a list of nodes to `self.children`."""
740 if isinstance(other, Node):
741 self.append(other)
742 elif other is not None:
743 self.extend(other)
744 return self
746 def astext(self) -> str:
747 return self.child_text_separator.join(
748 [child.astext() for child in self.children])
750 def non_default_attributes(self) -> dict[str, Any]:
751 atts = {key: value for key, value in self.attributes.items()
752 if self.is_not_default(key)}
753 return atts
755 def attlist(self) -> list[tuple[str, Any]]:
756 return sorted(self.non_default_attributes().items())
758 def get(self, key: str, failobj: Any | None = None) -> Any:
759 return self.attributes.get(key, failobj)
761 def hasattr(self, attr: str) -> bool:
762 return attr in self.attributes
764 def delattr(self, attr: str) -> None:
765 if attr in self.attributes:
766 del self.attributes[attr]
768 def setdefault(self, key: str, failobj: Any | None = None) -> Any:
769 return self.attributes.setdefault(key, failobj)
771 has_key = hasattr
773 def get_language_code(self, fallback: str = '') -> str:
774 """Return node's language tag.
776 Look iteratively in self and parents for a class argument
777 starting with ``language-`` and return the remainder of it
778 (which should be a `BCP49` language tag) or the `fallback`.
779 """
780 for cls in self.get('classes', []):
781 if cls.startswith('language-'):
782 return cls.removeprefix('language-')
783 try:
784 return self.parent.get_language_code(fallback)
785 except AttributeError:
786 return fallback
788 def append(self, item) -> None:
789 self.setup_child(item)
790 self.children.append(item)
792 def extend(self, item: Iterable) -> None:
793 for node in item:
794 self.append(node)
796 def insert(self, index: SupportsIndex, item) -> None:
797 if isinstance(item, Node):
798 self.setup_child(item)
799 self.children.insert(index, item)
800 elif item is not None:
801 self[index:index] = item
803 def pop(self, i: int = -1):
804 return self.children.pop(i)
806 def remove(self, item) -> None:
807 self.children.remove(item)
809 def index(self, item, start: int = 0, stop: int = sys.maxsize) -> int:
810 return self.children.index(item, start, stop)
812 def previous_sibling(self):
813 """Return preceding sibling node or ``None``."""
814 try:
815 i = self.parent.index(self)
816 except (AttributeError):
817 return None
818 return self.parent[i-1] if i > 0 else None
820 def section_hierarchy(self) -> list[section]:
821 """Return the element's section hierarchy.
823 Return a list of all <section> elements that contain `self`
824 (including `self` if it is a <section>) and have a parent node.
826 List item ``[i]`` is the parent <section> of level i+1
827 (1: section, 2: subsection, 3: subsubsection, ...).
828 The length of the list is the element's section level.
830 See `docutils.parsers.rst.states.RSTState.check_subsection()`
831 for a usage example.
833 Provisional. May be changed or removed without warning.
834 """
835 sections = []
836 node = self
837 while node.parent is not None:
838 if isinstance(node, section):
839 sections.append(node)
840 node = node.parent
841 sections.reverse()
842 return sections
844 def is_not_default(self, key: str) -> bool:
845 if self[key] == [] and key in self.list_attributes:
846 return False
847 else:
848 return True
850 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:
851 """
852 Update basic attributes ('ids', 'names', 'classes',
853 'dupnames', but not 'source') from node or dictionary `dict_`.
855 Provisional.
856 """
857 if isinstance(dict_, Node):
858 dict_ = dict_.attributes
859 for att in self.basic_attributes:
860 self.append_attr_list(att, dict_.get(att, []))
862 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:
863 """
864 For each element in values, if it does not exist in self[attr], append
865 it.
867 NOTE: Requires self[attr] and values to be sequence type and the
868 former should specifically be a list.
869 """
870 # List Concatenation
871 for value in values:
872 if value not in self[attr]:
873 self[attr].append(value)
875 def coerce_append_attr_list(
876 self, attr: str, value: list[Any] | Any) -> None:
877 """
878 First, convert both self[attr] and value to a non-string sequence
879 type; if either is not already a sequence, convert it to a list of one
880 element. Then call append_attr_list.
882 NOTE: self[attr] and value both must not be None.
883 """
884 # List Concatenation
885 if not isinstance(self.get(attr), list):
886 self[attr] = [self[attr]]
887 if not isinstance(value, list):
888 value = [value]
889 self.append_attr_list(attr, value)
891 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:
892 """
893 If self[attr] does not exist or force is True or omitted, set
894 self[attr] to value, otherwise do nothing.
895 """
896 # One or the other
897 if force or self.get(attr) is None:
898 self[attr] = value
900 def copy_attr_convert(
901 self, attr: str, value: Any, replace: bool = True) -> None:
902 """
903 If attr is an attribute of self, set self[attr] to
904 [self[attr], value], otherwise set self[attr] to value.
906 NOTE: replace is not used by this function and is kept only for
907 compatibility with the other copy functions.
908 """
909 if self.get(attr) is not value:
910 self.coerce_append_attr_list(attr, value)
912 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:
913 """
914 If attr is an attribute of self and either self[attr] or value is a
915 list, convert all non-sequence values to a sequence of 1 element and
916 then concatenate the two sequence, setting the result to self[attr].
917 If both self[attr] and value are non-sequences and replace is True or
918 self[attr] is None, replace self[attr] with value. Otherwise, do
919 nothing.
920 """
921 if self.get(attr) is not value:
922 if isinstance(self.get(attr), list) or \
923 isinstance(value, list):
924 self.coerce_append_attr_list(attr, value)
925 else:
926 self.replace_attr(attr, value, replace)
928 def copy_attr_concatenate(
929 self, attr: str, value: Any, replace: bool) -> None:
930 """
931 If attr is an attribute of self and both self[attr] and value are
932 lists, concatenate the two sequences, setting the result to
933 self[attr]. If either self[attr] or value are non-sequences and
934 replace is True or self[attr] is None, replace self[attr] with value.
935 Otherwise, do nothing.
936 """
937 if self.get(attr) is not value:
938 if isinstance(self.get(attr), list) and \
939 isinstance(value, list):
940 self.append_attr_list(attr, value)
941 else:
942 self.replace_attr(attr, value, replace)
944 def copy_attr_consistent(
945 self, attr: str, value: Any, replace: bool) -> None:
946 """
947 If replace is True or self[attr] is None, replace self[attr] with
948 value. Otherwise, do nothing.
949 """
950 if self.get(attr) is not value:
951 self.replace_attr(attr, value, replace)
953 def update_all_atts(self,
954 dict_: Mapping[str, Any] | Element,
955 update_fun: _UpdateFun = copy_attr_consistent,
956 replace: bool = True,
957 and_source: bool = False,
958 ) -> None:
959 """
960 Updates all attributes from node or dictionary `dict_`.
962 Appends the basic attributes ('ids', 'names', 'classes',
963 'dupnames', but not 'source') and then, for all other attributes in
964 dict_, updates the same attribute in self. When attributes with the
965 same identifier appear in both self and dict_, the two values are
966 merged based on the value of update_fun. Generally, when replace is
967 True, the values in self are replaced or merged with the values in
968 dict_; otherwise, the values in self may be preserved or merged. When
969 and_source is True, the 'source' attribute is included in the copy.
971 NOTE: When replace is False, and self contains a 'source' attribute,
972 'source' is not replaced even when dict_ has a 'source'
973 attribute, though it may still be merged into a list depending
974 on the value of update_fun.
975 NOTE: It is easier to call the update-specific methods then to pass
976 the update_fun method to this function.
977 """
978 if isinstance(dict_, Node):
979 dict_ = dict_.attributes
981 # Include the source attribute when copying?
982 if and_source:
983 filter_fun = self.is_not_list_attribute
984 else:
985 filter_fun = self.is_not_known_attribute
987 # Copy the basic attributes
988 self.update_basic_atts(dict_)
990 # Grab other attributes in dict_ not in self except the
991 # (All basic attributes should be copied already)
992 for att in filter(filter_fun, dict_):
993 update_fun(self, att, dict_[att], replace)
995 def update_all_atts_consistantly(self,
996 dict_: Mapping[str, Any] | Element,
997 replace: bool = True,
998 and_source: bool = False,
999 ) -> None:
1000 """
1001 Updates all attributes from node or dictionary `dict_`.
1003 Appends the basic attributes ('ids', 'names', 'classes',
1004 'dupnames', but not 'source') and then, for all other attributes in
1005 dict_, updates the same attribute in self. When attributes with the
1006 same identifier appear in both self and dict_ and replace is True, the
1007 values in self are replaced with the values in dict_; otherwise, the
1008 values in self are preserved. When and_source is True, the 'source'
1009 attribute is included in the copy.
1011 NOTE: When replace is False, and self contains a 'source' attribute,
1012 'source' is not replaced even when dict_ has a 'source'
1013 attribute, though it may still be merged into a list depending
1014 on the value of update_fun.
1015 """
1016 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
1017 and_source)
1019 def update_all_atts_concatenating(self,
1020 dict_: Mapping[str, Any] | Element,
1021 replace: bool = True,
1022 and_source: bool = False,
1023 ) -> None:
1024 """
1025 Updates all attributes from node or dictionary `dict_`.
1027 Appends the basic attributes ('ids', 'names', 'classes',
1028 'dupnames', but not 'source') and then, for all other attributes in
1029 dict_, updates the same attribute in self. When attributes with the
1030 same identifier appear in both self and dict_ whose values aren't each
1031 lists and replace is True, the values in self are replaced with the
1032 values in dict_; if the values from self and dict_ for the given
1033 identifier are both of list type, then the two lists are concatenated
1034 and the result stored in self; otherwise, the values in self are
1035 preserved. When and_source is True, the 'source' attribute is
1036 included in the copy.
1038 NOTE: When replace is False, and self contains a 'source' attribute,
1039 'source' is not replaced even when dict_ has a 'source'
1040 attribute, though it may still be merged into a list depending
1041 on the value of update_fun.
1042 """
1043 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
1044 and_source)
1046 def update_all_atts_coercion(self,
1047 dict_: Mapping[str, Any] | Element,
1048 replace: bool = True,
1049 and_source: bool = False,
1050 ) -> None:
1051 """
1052 Updates all attributes from node or dictionary `dict_`.
1054 Appends the basic attributes ('ids', 'names', 'classes',
1055 'dupnames', but not 'source') and then, for all other attributes in
1056 dict_, updates the same attribute in self. When attributes with the
1057 same identifier appear in both self and dict_ whose values are both
1058 not lists and replace is True, the values in self are replaced with
1059 the values in dict_; if either of the values from self and dict_ for
1060 the given identifier are of list type, then first any non-lists are
1061 converted to 1-element lists and then the two lists are concatenated
1062 and the result stored in self; otherwise, the values in self are
1063 preserved. When and_source is True, the 'source' attribute is
1064 included in the copy.
1066 NOTE: When replace is False, and self contains a 'source' attribute,
1067 'source' is not replaced even when dict_ has a 'source'
1068 attribute, though it may still be merged into a list depending
1069 on the value of update_fun.
1070 """
1071 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
1072 and_source)
1074 def update_all_atts_convert(self,
1075 dict_: Mapping[str, Any] | Element,
1076 and_source: bool = False,
1077 ) -> None:
1078 """
1079 Updates all attributes from node or dictionary `dict_`.
1081 Appends the basic attributes ('ids', 'names', 'classes',
1082 'dupnames', but not 'source') and then, for all other attributes in
1083 dict_, updates the same attribute in self. When attributes with the
1084 same identifier appear in both self and dict_ then first any non-lists
1085 are converted to 1-element lists and then the two lists are
1086 concatenated and the result stored in self; otherwise, the values in
1087 self are preserved. When and_source is True, the 'source' attribute
1088 is included in the copy.
1090 NOTE: When replace is False, and self contains a 'source' attribute,
1091 'source' is not replaced even when dict_ has a 'source'
1092 attribute, though it may still be merged into a list depending
1093 on the value of update_fun.
1094 """
1095 self.update_all_atts(dict_, Element.copy_attr_convert,
1096 and_source=and_source)
1098 def clear(self) -> None:
1099 self.children = []
1101 def replace(self, old, new) -> None:
1102 """Replace one child `Node` with another child or children."""
1103 index = self.index(old)
1104 if isinstance(new, Node):
1105 self.setup_child(new)
1106 self[index] = new
1107 elif new is not None:
1108 self[index:index+1] = new
1110 def replace_self(self, new) -> None:
1111 """
1112 Replace `self` node with `new`, where `new` is a node or a
1113 list of nodes.
1115 Provisional: the handling of node attributes will be revised.
1116 """
1117 update = new
1118 if not isinstance(new, Node):
1119 # `new` is a list; update first child.
1120 try:
1121 update = new[0]
1122 except IndexError:
1123 update = None
1124 if isinstance(update, Element):
1125 update.update_basic_atts(self)
1126 else:
1127 # `update` is a Text node or `new` is an empty list.
1128 # Assert that we aren't losing any attributes.
1129 for att in self.basic_attributes:
1130 assert not self[att], \
1131 'Losing "%s" attribute: %s' % (att, self[att])
1132 self.parent.replace(self, new)
1134 def first_child_matching_class(self,
1135 childclass: type[Element] | type[Text]
1136 | tuple[type[Element] | type[Text], ...],
1137 start: int = 0,
1138 end: int = sys.maxsize,
1139 ) -> int | None:
1140 """
1141 Return the index of the first child whose class exactly matches.
1143 Parameters:
1145 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
1146 classes. If a tuple, any of the classes may match.
1147 - `start`: Initial index to check.
1148 - `end`: Initial index to *not* check.
1149 """
1150 if not isinstance(childclass, tuple):
1151 childclass = (childclass,)
1152 for index in range(start, min(len(self), end)):
1153 for c in childclass:
1154 if isinstance(self[index], c):
1155 return index
1156 return None
1158 def first_child_not_matching_class(
1159 self,
1160 childclass: type[Element] | type[Text]
1161 | tuple[type[Element] | type[Text], ...],
1162 start: int = 0,
1163 end: int = sys.maxsize,
1164 ) -> int | None:
1165 """
1166 Return the index of the first child whose class does *not* match.
1168 Parameters:
1170 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
1171 classes. If a tuple, none of the classes may match.
1172 - `start`: Initial index to check.
1173 - `end`: Initial index to *not* check.
1174 """
1175 if not isinstance(childclass, tuple):
1176 childclass = (childclass,)
1177 for index in range(start, min(len(self), end)):
1178 for c in childclass:
1179 if isinstance(self.children[index], c):
1180 break
1181 else:
1182 return index
1183 return None
1185 def pformat(self, indent: str = ' ', level: int = 0) -> str:
1186 tagline = '%s%s\n' % (indent*level, self.starttag())
1187 childreps = (c.pformat(indent, level+1) for c in self.children)
1188 return ''.join((tagline, *childreps))
1190 def copy(self) -> Self:
1191 obj = self.__class__(rawsource=self.rawsource, **self.attributes)
1192 obj._document = self._document
1193 obj.source = self.source
1194 obj.line = self.line
1195 return obj
1197 def deepcopy(self) -> Self:
1198 copy = self.copy()
1199 copy.extend([child.deepcopy() for child in self.children])
1200 return copy
1202 def note_referenced_by(self,
1203 name: str | None = None,
1204 id: str | None = None,
1205 ) -> None:
1206 """Note that this Element has been referenced by its name
1207 `name` or id `id`."""
1208 self.referenced = True
1209 # Element.expect_referenced_by_* dictionaries map names or ids
1210 # to nodes whose ``referenced`` attribute is set to true as
1211 # soon as this node is referenced by the given name or id.
1212 # Needed for target propagation.
1213 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
1214 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
1215 if by_name:
1216 assert name is not None
1217 by_name.referenced = True
1218 if by_id:
1219 assert id is not None
1220 by_id.referenced = True
1222 @classmethod
1223 def is_not_list_attribute(cls, attr: str) -> bool:
1224 """
1225 Returns True if and only if the given attribute is NOT one of the
1226 basic list attributes defined for all Elements.
1227 """
1228 return attr not in cls.list_attributes
1230 @classmethod
1231 def is_not_known_attribute(cls, attr: str) -> bool:
1232 """
1233 Return True if `attr` is NOT defined for all Element instances.
1235 Provisional. May be removed in Docutils 2.0.
1236 """
1237 return attr not in cls.common_attributes
1239 def validate_attributes(self) -> None:
1240 """Normalize and validate element attributes.
1242 Convert string values to expected datatype.
1243 Normalize values.
1245 Raise `ValidationError` for invalid attributes or attribute values.
1247 Provisional.
1248 """
1249 messages = []
1250 for key, value in self.attributes.items():
1251 if key.startswith('internal:'):
1252 continue # see docs/user/config.html#expose-internals
1253 if key not in self.valid_attributes:
1254 va = '", "'.join(self.valid_attributes)
1255 messages.append(f'Attribute "{key}" not one of "{va}".')
1256 continue
1257 try:
1258 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
1259 except (ValueError, TypeError, KeyError) as e:
1260 messages.append(
1261 f'Attribute "{key}" has invalid value "{value}".\n {e}')
1262 if messages:
1263 raise ValidationError(f'Element {self.starttag()} invalid:\n '
1264 + '\n '.join(messages),
1265 problematic_element=self)
1267 def validate_content(self,
1268 model: _ContentModelTuple | None = None,
1269 elements: Sequence | None = None,
1270 ) -> list:
1271 """Test compliance of `elements` with `model`.
1273 :model: content model description, default `self.content_model`,
1274 :elements: list of doctree elements, default `self.children`.
1276 Return list of children that do not fit in the model or raise
1277 `ValidationError` if the content does not comply with the `model`.
1279 Provisional.
1280 """
1281 if model is None:
1282 model = self.content_model
1283 if elements is None:
1284 elements = self.children
1285 ichildren = iter(elements)
1286 child = next(ichildren, None)
1287 for category, quantifier in model:
1288 if not isinstance(child, category):
1289 if quantifier in ('.', '+'):
1290 raise ValidationError(self._report_child(child, category),
1291 problematic_element=child)
1292 else: # quantifier in ('?', '*') -> optional child
1293 continue # try same child with next part of content model
1294 else:
1295 # Check additional placement constraints (if applicable):
1296 child.validate_position()
1297 # advance:
1298 if quantifier in ('.', '?'): # go to next element
1299 child = next(ichildren, None)
1300 else: # if quantifier in ('*', '+'): # pass all matching elements
1301 for child in ichildren:
1302 if not isinstance(child, category):
1303 break
1304 try:
1305 child.validate_position()
1306 except AttributeError:
1307 pass
1308 else:
1309 child = None
1310 return [] if child is None else [child, *ichildren]
1312 def _report_child(self,
1313 child,
1314 category: Element | Iterable[Element],
1315 ) -> str:
1316 # Return a str reporting a missing child or child of wrong category.
1317 try:
1318 _type = category.__name__
1319 except AttributeError:
1320 _type = '> or <'.join(c.__name__ for c in category)
1321 msg = f'Element {self.starttag()} invalid:\n'
1322 if child is None:
1323 return f'{msg} Missing child of type <{_type}>.'
1324 if isinstance(child, Text):
1325 return (f'{msg} Expecting child of type <{_type}>, '
1326 f'not text data "{child.astext()}".')
1327 return (f'{msg} Expecting child of type <{_type}>, '
1328 f'not {child.starttag()}.')
1330 def validate(self, recursive: bool = True) -> None:
1331 """Validate Docutils Document Tree element ("doctree").
1333 Raise ValidationError if there are violations.
1334 If `recursive` is True, validate also the element's descendants.
1336 See `The Docutils Document Tree`__ for details of the
1337 Docutils Document Model.
1339 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1341 Provisional (work in progress).
1342 """
1343 self.validate_attributes()
1345 leftover_childs = self.validate_content()
1346 for child in leftover_childs:
1347 if isinstance(child, Text):
1348 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1349 f' Spurious text: "{child.astext()}".',
1350 problematic_element=self)
1351 else:
1352 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1353 f' Child element {child.starttag()} '
1354 'not allowed at this position.',
1355 problematic_element=child)
1357 if recursive:
1358 for child in self:
1359 child.validate(recursive=recursive)
1362# ====================
1363# Element Categories
1364# ====================
1365#
1366# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.
1368class Root:
1369 """Element at the root of a document tree."""
1372class Structural:
1373 """`Structural elements`__.
1375 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1376 #structural-elements
1377 """
1380class SubStructural:
1381 """`Structural subelements`__ are children of `Structural` elements.
1383 Most Structural elements accept only specific `SubStructural` elements.
1385 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1386 #structural-subelements
1387 """
1390class Bibliographic:
1391 """`Bibliographic Elements`__ (displayed document meta-data).
1393 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1394 #bibliographic-elements
1395 """
1398class Body:
1399 """`Body elements`__.
1401 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements
1402 """
1405class Admonition(Body):
1406 """Admonitions (distinctive and self-contained notices)."""
1407 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1410class Sequential(Body):
1411 """List-like body elements."""
1414class General(Body):
1415 """Miscellaneous body elements."""
1418class Special(Body):
1419 """Special internal body elements."""
1422class Part:
1423 """`Body Subelements`__ always occur within specific parent elements.
1425 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements
1426 """
1429class Decorative:
1430 """Decorative elements (`header` and `footer`).
1432 Children of `decoration`.
1433 """
1434 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1437class Inline:
1438 """Inline elements contain text data and possibly other inline elements.
1439 """
1442# Orthogonal categories and Mixins
1443# ================================
1445class PreBibliographic:
1446 """Elements which may occur before Bibliographic Elements."""
1449class Invisible(Special, PreBibliographic):
1450 """Internal elements that don't appear in output."""
1453class Labeled:
1454 """Contains a `label` as its first element."""
1457class Resolvable:
1458 resolved: bool = False
1461class BackLinkable:
1462 """Mixin for Elements that accept a "backrefs" attribute."""
1464 list_attributes: Final = Element.list_attributes + ('backrefs',)
1465 valid_attributes: Final = Element.valid_attributes + ('backrefs',)
1467 def add_backref(self: Element, refid: str) -> None:
1468 self['backrefs'].append(refid)
1471class Referential(Resolvable):
1472 """Elements holding a cross-reference (outgoing hyperlink)."""
1475class Targetable(Resolvable):
1476 """Cross-reference targets (incoming hyperlink)."""
1477 referenced: int = 0
1479 indirect_reference_name: str | None = None
1480 """Holds the whitespace_normalized_name (contains mixed case) of a target.
1482 This was required for MoinMoin <= 1.9 compatibility.
1484 Deprecated, will be removed in Docutils 1.0.
1485 """
1488class Titular:
1489 """Title, sub-title, or informal heading (rubric)."""
1492class TextElement(Element):
1493 """
1494 An element which directly contains text.
1496 Its children are all `Text` or `Inline` subclass nodes. You can
1497 check whether an element's context is inline simply by checking whether
1498 its immediate parent is a `TextElement` instance (including subclasses).
1499 This is handy for nodes like `image` that can appear both inline and as
1500 standalone body elements.
1502 If passing children to `__init__()`, make sure to set `text` to
1503 ``''`` or some other suitable value.
1504 """
1505 content_model: Final = (((Text, Inline), '*'),)
1506 # (#PCDATA | %inline.elements;)*
1508 child_text_separator: Final = ''
1509 """Separator for child nodes, used by `astext()` method."""
1511 def __init__(self,
1512 rawsource: str = '',
1513 text: str = '',
1514 *children,
1515 **attributes: Any,
1516 ) -> None:
1517 if text:
1518 textnode = Text(text)
1519 Element.__init__(self, rawsource, textnode, *children,
1520 **attributes)
1521 else:
1522 Element.__init__(self, rawsource, *children, **attributes)
1525class FixedTextElement(TextElement):
1526 """An element which directly contains preformatted text."""
1528 valid_attributes: Final = Element.valid_attributes + ('xml:space',)
1530 def __init__(self,
1531 rawsource: str = '',
1532 text: str = '',
1533 *children,
1534 **attributes: Any,
1535 ) -> None:
1536 super().__init__(rawsource, text, *children, **attributes)
1537 self.attributes['xml:space'] = 'preserve'
1540class PureTextElement(TextElement):
1541 """An element which only contains text, no children."""
1542 content_model: Final = ((Text, '?'),) # (#PCDATA)
1545# =================================
1546# Concrete Document Tree Elements
1547# =================================
1548#
1549# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference
1551# Decorative Elements
1552# ===================
1554class header(Decorative, Element): pass
1555class footer(Decorative, Element): pass
1558# Structural Subelements
1559# ======================
1561class title(Titular, PreBibliographic, SubStructural, TextElement):
1562 """Title of `document`, `section`, `topic` and generic `admonition`.
1563 """
1564 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')
1567class subtitle(Titular, PreBibliographic, SubStructural, TextElement):
1568 """Sub-title of `document`, `section` and `sidebar`."""
1570 def validate_position(self) -> None:
1571 """Check position of subtitle: must follow a title."""
1572 if self.parent and self.parent.index(self) == 0:
1573 raise ValidationError(f'Element {self.parent.starttag()} invalid:'
1574 '\n <subtitle> only allowed after <title>.',
1575 problematic_element=self)
1578class meta(PreBibliographic, SubStructural, Element):
1579 """Container for "invisible" bibliographic data, or meta-data."""
1580 valid_attributes: Final = Element.valid_attributes + (
1581 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')
1584class docinfo(SubStructural, Element):
1585 """Container for displayed document meta-data."""
1586 content_model: Final = ((Bibliographic, '+'),)
1587 # (%bibliographic.elements;)+
1590class decoration(PreBibliographic, SubStructural, Element):
1591 """Container for `header` and `footer`."""
1592 content_model: Final = ((header, '?'), # Empty element doesn't make sense,
1593 (footer, '?'), # but is simpler to define.
1594 )
1595 # (header?, footer?)
1597 def get_header(self) -> header:
1598 if not len(self.children) or not isinstance(self.children[0], header):
1599 self.insert(0, header())
1600 return self.children[0]
1602 def get_footer(self) -> footer:
1603 if not len(self.children) or not isinstance(self.children[-1], footer):
1604 self.append(footer())
1605 return self.children[-1]
1608class transition(SubStructural, Element):
1609 """Transitions__ are breaks between untitled text parts.
1611 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition
1612 """
1614 def validate_position(self) -> None:
1615 """Check additional constraints on `transition` placement.
1617 A transition may not begin or end a section or document,
1618 nor may two transitions be immediately adjacent.
1619 """
1620 messages = [f'Element {self.parent.starttag()} invalid:']
1621 predecessor = self.previous_sibling()
1622 if (predecessor is None # index == 0
1623 or isinstance(predecessor, (title, subtitle, meta, decoration))
1624 # A transition following these elements still counts as
1625 # "at the beginning of a document or section".
1626 ):
1627 messages.append(
1628 '<transition> may not begin a section or document.')
1629 if self.parent.index(self) == len(self.parent) - 1:
1630 messages.append('<transition> may not end a section or document.')
1631 if isinstance(predecessor, transition):
1632 messages.append(
1633 '<transition> may not directly follow another transition.')
1634 if len(messages) > 1:
1635 raise ValidationError('\n '.join(messages),
1636 problematic_element=self)
1639# Structural Elements
1640# ===================
1642class topic(Structural, Element):
1643 """
1644 Topics__ are non-recursive, mini-sections.
1646 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic
1647 """
1648 content_model: Final = ((title, '?'), (Body, '+'))
1649 # (title?, (%body.elements;)+)
1652class sidebar(Structural, Element):
1653 """
1654 Sidebars__ are like parallel documents providing related material.
1656 A sidebar is typically offset by a border and "floats" to the side
1657 of the page
1659 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar
1660 """
1661 content_model: Final = ((title, '?'),
1662 (subtitle, '?'),
1663 ((topic, Body), '+'),
1664 )
1665 # ((title, subtitle?)?, (%body.elements; | topic)+)
1666 # "subtitle only after title" is ensured in `subtitle.validate_position()`.
1669class section(Structural, Element):
1670 """Document section__. The main unit of hierarchy.
1672 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section
1673 """
1674 # recursive content model, see below
1677section.content_model = ((title, '.'),
1678 (subtitle, '?'),
1679 ((Body, topic, sidebar, transition), '*'),
1680 ((section, transition), '*'),
1681 )
1682# (title, subtitle?, %structure.model;)
1683# Correct transition placement is ensured in `transition.validate_position()`.
1686# Root Element
1687# ============
1689class document(Root, Element):
1690 """
1691 The document root element.
1693 Do not instantiate this class directly; use
1694 `docutils.utils.new_document()` instead.
1695 """
1696 valid_attributes: Final = Element.valid_attributes + ('title',)
1697 content_model: Final = ((title, '?'),
1698 (subtitle, '?'),
1699 (meta, '*'),
1700 (decoration, '?'),
1701 (docinfo, '?'),
1702 (transition, '?'),
1703 ((Body, topic, sidebar, transition), '*'),
1704 ((section, transition), '*'),
1705 )
1706 # ( (title, subtitle?)?,
1707 # meta*,
1708 # decoration?,
1709 # (docinfo, transition?)?,
1710 # %structure.model; )
1711 # Additional restrictions for `subtitle` and `transition` are tested
1712 # with the respective `validate_position()` methods.
1714 def __init__(self,
1715 settings: Values,
1716 reporter: Reporter,
1717 *args,
1718 **kwargs: Any,
1719 ) -> None:
1720 Element.__init__(self, *args, **kwargs)
1722 self.current_source: StrPath | None = None
1723 """Path to or description of the input source being processed."""
1725 self.current_line: int | None = None
1726 """Line number (1-based) of `current_source`."""
1728 self.settings: Values = settings
1729 """Runtime settings data record."""
1731 self.reporter: Reporter = reporter
1732 """System message generator."""
1734 self.indirect_targets: list[target] = []
1735 """List of indirect target nodes."""
1737 self.substitution_defs: dict[str, substitution_definition] = {}
1738 """Mapping of substitution names to substitution_definition nodes."""
1740 self.substitution_names: dict[str, str] = {}
1741 """Mapping of case-normalized to case-sensitive substitution names."""
1743 self.refnames: dict[str, list[Element]] = {}
1744 """Mapping of names to lists of referencing nodes."""
1746 self.refids: dict[str, list[Element]] = {}
1747 """Mapping of ids to lists of referencing nodes."""
1749 self.nameids: dict[str, str] = {}
1750 """Mapping of names to unique id's."""
1752 self.nametypes: dict[str, bool] = {}
1753 """Mapping of names to hyperlink type. True: explicit, False: implicit.
1754 """
1756 self.ids: dict[str, Element] = {}
1757 """Mapping of ids to nodes."""
1759 self.footnote_refs: dict[str, list[footnote_reference]] = {}
1760 """Mapping of footnote labels to lists of footnote_reference nodes."""
1762 self.citation_refs: dict[str, list[citation_reference]] = {}
1763 """Mapping of citation labels to lists of citation_reference nodes."""
1765 self.autofootnotes: list[footnote] = []
1766 """List of auto-numbered footnote nodes."""
1768 self.autofootnote_refs: list[footnote_reference] = []
1769 """List of auto-numbered footnote_reference nodes."""
1771 self.symbol_footnotes: list[footnote] = []
1772 """List of symbol footnote nodes."""
1774 self.symbol_footnote_refs: list[footnote_reference] = []
1775 """List of symbol footnote_reference nodes."""
1777 self.footnotes: list[footnote] = []
1778 """List of manually-numbered footnote nodes."""
1780 self.citations: list[citation] = []
1781 """List of citation nodes."""
1783 self.autofootnote_start: int = 1
1784 """Initial auto-numbered footnote number."""
1786 self.symbol_footnote_start: int = 0
1787 """Initial symbol footnote symbol index."""
1789 self.id_counter: Counter[int] = Counter()
1790 """Numbers added to otherwise identical IDs."""
1792 self.parse_messages: list[system_message] = []
1793 """System messages generated while parsing."""
1795 self.transform_messages: list[system_message] = []
1796 """System messages generated while applying transforms."""
1798 import docutils.transforms
1799 self.transformer: Transformer = docutils.transforms.Transformer(self)
1800 """Storage for transforms to be applied to this document."""
1802 self.include_log: list[tuple[StrPath, tuple]] = []
1803 """The current source's parents (to detect inclusion loops)."""
1805 self.decoration: decoration | None = None
1806 """Document's `decoration` node."""
1808 self._document: document = self
1810 def __getstate__(self) -> dict[str, Any]:
1811 """
1812 Return dict with unpicklable references removed.
1813 """
1814 state = self.__dict__.copy()
1815 state['reporter'] = None
1816 state['transformer'] = None
1817 return state
1819 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:
1820 """Return a DOM representation of this document."""
1821 if dom is None:
1822 import xml.dom.minidom as dom
1823 domroot = dom.Document()
1824 domroot.appendChild(self._dom_node(domroot))
1825 return domroot
1827 def set_id(self,
1828 node: Element,
1829 msgnode: Element | None = None,
1830 suggested_prefix: str = '',
1831 ) -> str:
1832 if node['ids']:
1833 # register and check for duplicates
1834 for id in node['ids']:
1835 self.ids.setdefault(id, node)
1836 if self.ids[id] is not node:
1837 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '
1838 f'{self.ids[id].starttag()} '
1839 f'and {node.starttag()}',
1840 base_node=node)
1841 if msgnode is not None:
1842 msgnode += msg
1843 return id
1844 # generate and set id
1845 id_prefix = self.settings.id_prefix
1846 auto_id_prefix = self.settings.auto_id_prefix
1847 base_id = ''
1848 id = ''
1849 for name in node['names']:
1850 if id_prefix: # allow names starting with numbers
1851 base_id = make_id('x'+name)[1:]
1852 else:
1853 base_id = make_id(name)
1854 # TODO: normalize id-prefix? (would make code simpler)
1855 id = id_prefix + base_id
1856 if base_id and id not in self.ids:
1857 break
1858 else:
1859 if base_id and auto_id_prefix.endswith('%'):
1860 # disambiguate name-derived ID
1861 # TODO: remove second condition after announcing change
1862 prefix = id + '-'
1863 else:
1864 prefix = id_prefix + auto_id_prefix
1865 if prefix.endswith('%'):
1866 prefix = f"""{prefix[:-1]}{suggested_prefix
1867 or make_id(node.tagname)}-"""
1868 while True:
1869 self.id_counter[prefix] += 1
1870 id = f'{prefix}{self.id_counter[prefix]}'
1871 if id not in self.ids:
1872 break
1873 node['ids'].append(id)
1874 self.ids[id] = node
1875 return id
1877 def set_name_id_map(self,
1878 node: Element,
1879 id: str,
1880 msgnode: Element | None = None,
1881 explicit: bool = False,
1882 ) -> None:
1883 """
1884 Update the name/id mappings.
1886 `self.nameids` maps names to IDs. The value ``None`` indicates
1887 that the name is a "dupname" (i.e. there are already at least
1888 two targets with the same name and type).
1890 `self.nametypes` maps names to booleans representing
1891 hyperlink target type (True==explicit, False==implicit).
1893 The following state transition table shows how `self.nameids` items
1894 ("id") and `self.nametypes` items ("type") change with new input
1895 (a call to this method), and what actions are performed:
1897 ======== ==== ======== ==== ======== ======== ======= ======
1898 Input Old State New State Action Notes
1899 -------- -------------- -------------- ---------------- ------
1900 type id type id type dupname report
1901 ======== ==== ======== ==== ======== ======== ======= ======
1902 explicit new explicit
1903 implicit new implicit
1904 explicit old explicit None explicit new,old WARNING [#ex]_
1905 implicit old explicit old explicit new INFO [#ex]_
1906 explicit old implicit new explicit old INFO [#ex]_
1907 implicit old implicit None implicit new,old INFO [#ex]_
1908 explicit None explicit None explicit new WARNING
1909 implicit None explicit None explicit new INFO
1910 explicit None implicit new explicit
1911 implicit None implicit None implicit new INFO
1912 ======== ==== ======== ==== ======== ======== ======= ======
1914 .. [#] Do not clear the name-to-id map or invalidate the old target if
1915 both old and new targets refer to identical URIs or reference names.
1916 The new target is invalidated regardless.
1918 Provisional. There will be changes to prefer explicit reference names
1919 as base for an element's ID.
1920 """
1921 for name in tuple(node['names']):
1922 if name in self.nameids:
1923 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1924 # attention: modifies node['names']
1925 else:
1926 self.nameids[name] = id
1927 self.nametypes[name] = explicit
1929 def set_duplicate_name_id(self,
1930 node: Element,
1931 id: str,
1932 name: str,
1933 msgnode: Element,
1934 explicit: bool,
1935 ) -> None:
1936 old_id = self.nameids[name] # None if name is only dupname
1937 old_explicit = self.nametypes[name]
1938 old_node = self.ids.get(old_id)
1939 level = 0 # system message level: 1-info, 2-warning
1941 self.nametypes[name] = old_explicit or explicit
1943 if old_id is not None and (
1944 'refname' in node and node['refname'] == old_node.get('refname')
1945 or 'refuri' in node and node['refuri'] == old_node.get('refuri')
1946 ):
1947 # indirect targets with same reference -> keep old target
1948 level = 1
1949 ref = node.get('refuri') or node.get('refname')
1950 s = f'Duplicate name "{name}" for external target "{ref}".'
1951 dupname(node, name)
1952 elif explicit:
1953 if old_explicit:
1954 level = 2
1955 s = f'Duplicate explicit target name: "{name}".'
1956 dupname(node, name)
1957 if old_id is not None:
1958 dupname(old_node, name)
1959 self.nameids[name] = None
1960 else: # new explicit, old implicit -> override
1961 self.nameids[name] = id
1962 if old_id is not None:
1963 level = 1
1964 s = f'Target name overrides implicit target name "{name}".'
1965 dupname(old_node, name)
1966 else: # new name is implicit
1967 level = 1
1968 s = f'Duplicate implicit target name: "{name}".'
1969 dupname(node, name)
1970 if old_id is not None and not old_explicit:
1971 dupname(old_node, name)
1972 self.nameids[name] = None
1974 if level:
1975 backrefs = [id]
1976 # don't add backref id for empty targets (not shown in output)
1977 if isinstance(node, target) and 'refuri' in node:
1978 backrefs = []
1979 msg = self.reporter.system_message(level, s,
1980 backrefs=backrefs,
1981 base_node=node)
1982 # try appending near to the problem:
1983 if msgnode is not None:
1984 msgnode += msg
1985 try:
1986 msgnode.validate(recursive=False)
1987 except ValidationError:
1988 # detach -> will be handled by `Messages` transform
1989 msgnode.pop()
1990 msg.parent = None
1992 def has_name(self, name: str) -> bool:
1993 return name in self.nameids
1995 # "note" here is an imperative verb: "take note of".
1996 def note_implicit_target(
1997 self, target: Element, msgnode: Element | None = None) -> None:
1998 # TODO: Postpone ID creation. Register reference name instead of ID
1999 # to allow for IDs based on explicit target pointing to the same
2000 # element. https://github.com/sphinx-doc/sphinx/issues/1961
2001 id = self.set_id(target, msgnode)
2002 self.set_name_id_map(target, id, msgnode, explicit=False)
2004 def note_explicit_target(
2005 self, target: Element, msgnode: Element | None = None) -> None:
2006 id = self.set_id(target, msgnode)
2007 self.set_name_id_map(target, id, msgnode, explicit=True)
2009 def note_refname(self, node: Element) -> None:
2010 self.refnames.setdefault(node['refname'], []).append(node)
2012 def note_refid(self, node: Element) -> None:
2013 self.refids.setdefault(node['refid'], []).append(node)
2015 def note_indirect_target(self, target: target) -> None:
2016 self.indirect_targets.append(target)
2017 if target['names']:
2018 self.note_refname(target)
2020 def note_anonymous_target(self, target: target) -> None:
2021 self.set_id(target)
2023 def note_autofootnote(self, footnote: footnote) -> None:
2024 self.set_id(footnote)
2025 self.autofootnotes.append(footnote)
2027 def note_autofootnote_ref(self, ref: footnote_reference) -> None:
2028 self.set_id(ref)
2029 self.autofootnote_refs.append(ref)
2031 def note_symbol_footnote(self, footnote: footnote) -> None:
2032 self.set_id(footnote)
2033 self.symbol_footnotes.append(footnote)
2035 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:
2036 self.set_id(ref)
2037 self.symbol_footnote_refs.append(ref)
2039 def note_footnote(self, footnote: footnote) -> None:
2040 self.set_id(footnote)
2041 self.footnotes.append(footnote)
2043 def note_footnote_ref(self, ref: footnote_reference) -> None:
2044 self.set_id(ref)
2045 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
2046 self.note_refname(ref)
2048 def note_citation(self, citation: citation) -> None:
2049 self.citations.append(citation)
2051 def note_citation_ref(self, ref: citation_reference) -> None:
2052 self.set_id(ref)
2053 self.citation_refs.setdefault(ref['refname'], []).append(ref)
2054 self.note_refname(ref)
2056 def note_substitution_def(self,
2057 subdef: substitution_definition,
2058 def_name: str,
2059 msgnode: Element | None = None,
2060 ) -> None:
2061 name = whitespace_normalize_name(def_name)
2062 if name in self.substitution_defs:
2063 msg = self.reporter.error(
2064 'Duplicate substitution definition name: "%s".' % name,
2065 base_node=subdef)
2066 if msgnode is not None:
2067 msgnode += msg
2068 oldnode = self.substitution_defs[name]
2069 dupname(oldnode, name)
2070 # keep only the last definition:
2071 self.substitution_defs[name] = subdef
2072 # case-insensitive mapping:
2073 self.substitution_names[fully_normalize_name(name)] = name
2075 def note_substitution_ref(self,
2076 subref: substitution_reference,
2077 refname: str,
2078 ) -> None:
2079 subref['refname'] = whitespace_normalize_name(refname)
2081 def note_pending(
2082 self, pending: pending, priority: int | None = None) -> None:
2083 self.transformer.add_pending(pending, priority)
2085 def note_parse_message(self, message: system_message) -> None:
2086 self.parse_messages.append(message)
2088 def note_transform_message(self, message: system_message) -> None:
2089 self.transform_messages.append(message)
2091 def note_source(self,
2092 source: StrPath | None,
2093 offset: int | None,
2094 ) -> None:
2095 self.current_source = source and os.fspath(source)
2096 if offset is None:
2097 self.current_line = offset
2098 else:
2099 self.current_line = offset + 1
2101 def copy(self) -> Self:
2102 obj = self.__class__(self.settings, self.reporter,
2103 **self.attributes)
2104 obj.source = self.source
2105 obj.line = self.line
2106 return obj
2108 def get_decoration(self) -> decoration:
2109 if not self.decoration:
2110 self.decoration: decoration = decoration()
2111 index = self.first_child_not_matching_class((Titular, meta))
2112 if index is None:
2113 self.append(self.decoration)
2114 else:
2115 self.insert(index, self.decoration)
2116 return self.decoration
2119# Bibliographic Elements
2120# ======================
2122class author(Bibliographic, TextElement): pass
2123class organization(Bibliographic, TextElement): pass
2124class address(Bibliographic, FixedTextElement): pass
2125class contact(Bibliographic, TextElement): pass
2126class version(Bibliographic, TextElement): pass
2127class revision(Bibliographic, TextElement): pass
2128class status(Bibliographic, TextElement): pass
2129class date(Bibliographic, TextElement): pass
2130class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)
2133class authors(Bibliographic, Element):
2134 """Container for author information for documents with multiple authors.
2135 """
2136 content_model: Final = ((author, '+'),
2137 (organization, '?'),
2138 (address, '?'),
2139 (contact, '?'),
2140 )
2141 # (author, organization?, address?, contact?)+
2143 def validate_content(self,
2144 model: _ContentModelTuple | None = None,
2145 elements: Sequence | None = None,
2146 ) -> list:
2147 """Repeatedly test for children matching the content model.
2149 Provisional.
2150 """
2151 relics = super().validate_content()
2152 while relics:
2153 relics = super().validate_content(elements=relics)
2154 return relics
2157# Body Elements
2158# =============
2159#
2160# General
2161# -------
2162#
2163# Miscellaneous Body Elements and related Body Subelements (Part)
2165class paragraph(General, TextElement): pass
2166class rubric(Titular, General, TextElement): pass
2169class compound(General, Element):
2170 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2173class container(General, Element):
2174 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2177class attribution(Part, TextElement):
2178 """Visible reference to the source of a `block_quote`."""
2181class block_quote(General, Element):
2182 """An extended quotation, set off from the main text."""
2183 content_model: Final = ((Body, '+'), (attribution, '?'))
2184 # ((%body.elements;)+, attribution?)
2187class reference(General, Inline, Referential, TextElement):
2188 valid_attributes: Final = Element.valid_attributes + (
2189 'anonymous', 'name', 'refid', 'refname', 'refuri')
2192# Lists
2193# -----
2194#
2195# Lists (Sequential) and related Body Subelements (Part)
2197class list_item(Part, Element):
2198 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2201class bullet_list(Sequential, Element):
2202 valid_attributes: Final = Element.valid_attributes + ('bullet',)
2203 content_model: Final = ((list_item, '+'),) # (list_item+)
2206class enumerated_list(Sequential, Element):
2207 valid_attributes: Final = Element.valid_attributes + (
2208 'enumtype', 'prefix', 'suffix', 'start')
2209 content_model: Final = ((list_item, '+'),) # (list_item+)
2212class term(Part, TextElement): pass
2213class classifier(Part, TextElement): pass
2216class definition(Part, Element):
2217 """Definition of a `term` in a `definition_list`."""
2218 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2221class definition_list_item(Part, Element):
2222 content_model: Final = ((term, '.'),
2223 ((classifier, term), '*'),
2224 (definition, '.'),
2225 )
2226 # ((term, classifier*)+, definition)
2229class definition_list(Sequential, Element):
2230 """List of terms and their definitions.
2232 Can be used for glossaries or dictionaries, to describe or
2233 classify things, for dialogues, or to itemize subtopics.
2234 """
2235 content_model: Final = ((definition_list_item, '+'),)
2236 # (definition_list_item+)
2239class field_name(Part, TextElement): pass
2242class field_body(Part, Element):
2243 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2246class field(Part, Bibliographic, Element):
2247 content_model: Final = ((field_name, '.'), (field_body, '.'))
2248 # (field_name, field_body)
2251class field_list(Sequential, Element):
2252 """List of label & data pairs.
2254 Typically rendered as a two-column list.
2255 Also used for extension syntax or special processing.
2256 """
2257 content_model: Final = ((field, '+'),) # (field+)
2260class option_string(Part, PureTextElement):
2261 """A literal command-line option. Typically monospaced."""
2264class option_argument(Part, PureTextElement):
2265 """Placeholder text for option arguments."""
2266 valid_attributes: Final = Element.valid_attributes + ('delimiter',)
2268 def astext(self) -> str:
2269 return self.get('delimiter', ' ') + TextElement.astext(self)
2272class option(Part, Element):
2273 """Option element in an `option_list_item`.
2275 Groups an option string with zero or more option argument placeholders.
2276 """
2277 child_text_separator: Final = ''
2278 content_model: Final = ((option_string, '.'), (option_argument, '*'))
2279 # (option_string, option_argument*)
2282class option_group(Part, Element):
2283 """Groups together one or more `option` elements, all synonyms."""
2284 child_text_separator: Final = ', '
2285 content_model: Final = ((option, '+'),) # (option+)
2288class description(Part, Element):
2289 """Describtion of a command-line option."""
2290 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2293class option_list_item(Part, Element):
2294 """Container for a pair of `option_group` and `description` elements.
2295 """
2296 child_text_separator: Final = ' '
2297 content_model: Final = ((option_group, '.'), (description, '.'))
2298 # (option_group, description)
2301class option_list(Sequential, Element):
2302 """Two-column list of command-line options and descriptions."""
2303 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)
2306# Pre-formatted text blocks
2307# -------------------------
2309class literal_block(General, FixedTextElement): pass
2310class doctest_block(General, FixedTextElement): pass
2313class math_block(General, FixedTextElement, PureTextElement):
2314 """Mathematical notation (display formula)."""
2317class line(Part, TextElement):
2318 """Single line of text in a `line_block`."""
2319 indent: str | None = None
2322class line_block(General, Element):
2323 """Sequence of lines and nested line blocks.
2324 """
2325 # recursive content model: (line | line_block)+
2328line_block.content_model = (((line, line_block), '+'),)
2331# Admonitions
2332# -----------
2333# distinctive and self-contained notices
2335class attention(Admonition, Element): pass
2336class caution(Admonition, Element): pass
2337class danger(Admonition, Element): pass
2338class error(Admonition, Element): pass
2339class important(Admonition, Element): pass
2340class note(Admonition, Element): pass
2341class tip(Admonition, Element): pass
2342class hint(Admonition, Element): pass
2343class warning(Admonition, Element): pass
2346class admonition(Admonition, Element):
2347 content_model: Final = ((title, '.'), (Body, '+'))
2348 # (title, (%body.elements;)+)
2351# Footnote and citation
2352# ---------------------
2354class label(Part, PureTextElement):
2355 """Visible identifier for footnotes and citations."""
2358class footnote(General, BackLinkable, Element, Labeled, Targetable):
2359 """Labelled note providing additional context (footnote or endnote)."""
2360 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')
2361 content_model: Final = ((label, '?'), (Body, '+'))
2362 # (label?, (%body.elements;)+)
2363 # The label will become required in Docutils 1.0.
2366class citation(General, BackLinkable, Element, Labeled, Targetable):
2367 content_model: Final = ((label, '.'), (Body, '+'))
2368 # (label, (%body.elements;)+)
2371# Graphical elements
2372# ------------------
2374class image(General, Inline, Element):
2375 """Reference to an image resource.
2377 May be body element or inline element.
2378 """
2379 valid_attributes: Final = Element.valid_attributes + (
2380 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
2382 def astext(self) -> str:
2383 return self.get('alt', '')
2386class caption(Part, TextElement): pass
2389class legend(Part, Element):
2390 """A wrapper for text accompanying a `figure` that is not the caption."""
2391 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2394class figure(General, Element):
2395 """A formal figure, generally an illustration, with a title."""
2396 valid_attributes: Final = Element.valid_attributes + ('align', 'width')
2397 content_model: Final = (((image, reference), '.'),
2398 (caption, '?'),
2399 (legend, '?'),
2400 )
2401 # (image, ((caption, legend?) | legend))
2402 # TODO: According to the DTD, a caption or legend is required
2403 # but rST allows "bare" figures which are formatted differently from
2404 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]
2407# Tables
2408# ------
2410class entry(Part, Element):
2411 """An entry in a `row` (a table cell)."""
2412 valid_attributes: Final = Element.valid_attributes + (
2413 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',
2414 'morerows', 'namest', 'nameend', 'rowsep', 'valign')
2415 content_model: Final = ((Body, '*'),)
2416 # %tbl.entry.mdl -> (%body.elements;)*
2419class row(Part, Element):
2420 """Row of table cells."""
2421 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')
2422 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+
2425class colspec(Part, Element):
2426 """Specifications for a column in a `tgroup`."""
2427 valid_attributes: Final = Element.valid_attributes + (
2428 'align', 'char', 'charoff', 'colname', 'colnum',
2429 'colsep', 'colwidth', 'rowsep', 'stub')
2431 def propwidth(self) -> int|float:
2432 """Return numerical value of "colwidth__" attribute. Default 1.
2434 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.
2436 Provisional.
2438 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
2439 """
2440 # Move current implementation of validate_colwidth() here
2441 # in Docutils 1.0
2442 return validate_colwidth(self.get('colwidth', ''))
2445class thead(Part, Element):
2446 """Row(s) that form the head of a `tgroup`."""
2447 valid_attributes: Final = Element.valid_attributes + ('valign',)
2448 content_model: Final = ((row, '+'),) # (row+)
2451class tbody(Part, Element):
2452 """Body of a `tgroup`."""
2453 valid_attributes: Final = Element.valid_attributes + ('valign',)
2454 content_model: Final = ((row, '+'),) # (row+)
2457class tgroup(Part, Element):
2458 """A portion of a table. Most tables have just one `tgroup`."""
2459 valid_attributes: Final = Element.valid_attributes + (
2460 'align', 'cols', 'colsep', 'rowsep')
2461 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))
2462 # (colspec*, thead?, tbody)
2465class table(General, Element):
2466 """A data arrangement with rows and columns."""
2467 valid_attributes: Final = Element.valid_attributes + (
2468 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
2469 content_model: Final = ((title, '?'), (tgroup, '+'))
2470 # (title?, tgroup+)
2473# Special purpose elements
2474# ------------------------
2475# Body elements for internal use or special requests.
2477class comment(Invisible, FixedTextElement, PureTextElement):
2478 """Author notes, hidden from the output."""
2481class substitution_definition(Invisible, TextElement):
2482 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')
2485class target(Invisible, Inline, TextElement, Targetable):
2486 valid_attributes: Final = Element.valid_attributes + (
2487 'anonymous', 'refid', 'refname', 'refuri')
2490class system_message(Special, BackLinkable, PreBibliographic, Element):
2491 """
2492 System message element.
2494 Do not instantiate this class directly; use
2495 ``document.reporter.info/warning/error/severe()`` instead.
2496 """
2497 valid_attributes: Final = BackLinkable.valid_attributes + (
2498 'level', 'line', 'type')
2499 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2501 def __init__(self,
2502 message: str | None = None,
2503 *children,
2504 **attributes: Any,
2505 ) -> None:
2506 rawsource = attributes.pop('rawsource', '')
2507 if message:
2508 p = paragraph('', message)
2509 children = (p,) + children
2510 try:
2511 Element.__init__(self, rawsource, *children, **attributes)
2512 except: # NoQA: E722 (catchall)
2513 print('system_message: children=%r' % (children,))
2514 raise
2516 def astext(self) -> str:
2517 line = self.get('line', '')
2518 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
2519 self['level'], Element.astext(self))
2522class pending(Invisible, Element):
2523 """
2524 Placeholder for pending operations.
2526 The "pending" element is used to encapsulate a pending operation: the
2527 operation (transform), the point at which to apply it, and any data it
2528 requires. Only the pending operation's location within the document is
2529 stored in the public document tree (by the "pending" object itself); the
2530 operation and its data are stored in the "pending" object's internal
2531 instance attributes.
2533 For example, say you want a table of contents in your reStructuredText
2534 document. The easiest way to specify where to put it is from within the
2535 document, with a directive::
2537 .. contents::
2539 But the "contents" directive can't do its work until the entire document
2540 has been parsed and possibly transformed to some extent. So the directive
2541 code leaves a placeholder behind that will trigger the second phase of its
2542 processing, something like this::
2544 <pending ...public attributes...> + internal attributes
2546 Use `document.note_pending()` so that the
2547 `docutils.transforms.Transformer` stage of processing can run all pending
2548 transforms.
2549 """
2551 def __init__(self,
2552 transform: Transform,
2553 details: Mapping[str, Any] | None = None,
2554 rawsource: str = '',
2555 *children,
2556 **attributes: Any,
2557 ) -> None:
2558 Element.__init__(self, rawsource, *children, **attributes)
2560 self.transform: Transform = transform
2561 """The `docutils.transforms.Transform` class implementing the pending
2562 operation."""
2564 self.details: Mapping[str, Any] = details or {}
2565 """Detail data (dictionary) required by the pending operation."""
2567 def pformat(self, indent: str = ' ', level: int = 0) -> str:
2568 internals = ['.. internal attributes:',
2569 ' .transform: %s.%s' % (self.transform.__module__,
2570 self.transform.__name__),
2571 ' .details:']
2572 details = sorted(self.details.items())
2573 for key, value in details:
2574 if isinstance(value, Node):
2575 internals.append('%7s%s:' % ('', key))
2576 internals.extend(['%9s%s' % ('', line)
2577 for line in value.pformat().splitlines()])
2578 elif (value
2579 and isinstance(value, list)
2580 and isinstance(value[0], Node)):
2581 internals.append('%7s%s:' % ('', key))
2582 for v in value:
2583 internals.extend(['%9s%s' % ('', line)
2584 for line in v.pformat().splitlines()])
2585 else:
2586 internals.append('%7s%s: %r' % ('', key, value))
2587 return (Element.pformat(self, indent, level)
2588 + ''.join((' %s%s\n' % (indent * level, line))
2589 for line in internals))
2591 def copy(self) -> Self:
2592 obj = self.__class__(self.transform, self.details, self.rawsource,
2593 **self.attributes)
2594 obj._document = self._document
2595 obj.source = self.source
2596 obj.line = self.line
2597 return obj
2600class raw(Special, Inline, PreBibliographic,
2601 FixedTextElement, PureTextElement):
2602 """Raw data that is to be passed untouched to the Writer.
2604 Can be used as Body element or Inline element.
2605 """
2606 valid_attributes: Final = Element.valid_attributes + (
2607 'format', 'xml:space')
2610# Inline Elements
2611# ===============
2613class abbreviation(Inline, TextElement): pass
2614class acronym(Inline, TextElement): pass
2615class emphasis(Inline, TextElement): pass
2616class generated(Inline, TextElement): pass
2617class inline(Inline, TextElement): pass
2618class literal(Inline, TextElement): pass
2619class strong(Inline, TextElement): pass
2620class subscript(Inline, TextElement): pass
2621class superscript(Inline, TextElement): pass
2622class title_reference(Inline, TextElement): pass
2625class footnote_reference(Inline, Referential, PureTextElement):
2626 valid_attributes: Final = Element.valid_attributes + (
2627 'auto', 'refid', 'refname')
2630class citation_reference(Inline, Referential, PureTextElement):
2631 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')
2634class substitution_reference(Inline, TextElement):
2635 valid_attributes: Final = Element.valid_attributes + ('refname',)
2638class math(Inline, PureTextElement):
2639 """Mathematical notation in running text."""
2642class problematic(Inline, TextElement):
2643 valid_attributes: Final = Element.valid_attributes + (
2644 'refid', 'refname', 'refuri')
2647# ========================================
2648# Auxiliary Classes, Functions, and Data
2649# ========================================
2651node_class_names: Sequence[str] = """
2652 Text
2653 abbreviation acronym address admonition attention attribution author
2654 authors
2655 block_quote bullet_list
2656 caption caution citation citation_reference classifier colspec comment
2657 compound contact container copyright
2658 danger date decoration definition definition_list definition_list_item
2659 description docinfo doctest_block document
2660 emphasis entry enumerated_list error
2661 field field_body field_list field_name figure footer
2662 footnote footnote_reference
2663 generated
2664 header hint
2665 image important inline
2666 label legend line line_block list_item literal literal_block
2667 math math_block meta
2668 note
2669 option option_argument option_group option_list option_list_item
2670 option_string organization
2671 paragraph pending problematic
2672 raw reference revision row rubric
2673 section sidebar status strong subscript substitution_definition
2674 substitution_reference subtitle superscript system_message
2675 table target tbody term tgroup thead tip title title_reference topic
2676 transition
2677 version
2678 warning""".split()
2679"""A list of names of all concrete Node subclasses."""
2682class NodeVisitor:
2683 """
2684 "Visitor" pattern [GoF95]_ abstract superclass implementation for
2685 document tree traversals.
2687 Each node class has corresponding methods, doing nothing by
2688 default; override individual methods for specific and useful
2689 behaviour. The `dispatch_visit()` method is called by
2690 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
2691 the `dispatch_departure()` method before exiting a node.
2693 The dispatch methods call "``visit_`` + node class name" or
2694 "``depart_`` + node class name", resp.
2696 This is a base class for visitors whose ``visit_...`` & ``depart_...``
2697 methods must be implemented for *all* compulsory node types encountered
2698 (such as for `docutils.writers.Writer` subclasses).
2699 Unimplemented methods will raise exceptions (except for optional nodes).
2701 For sparse traversals, where only certain node types are of interest, use
2702 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
2703 processing is desired, subclass `GenericNodeVisitor`.
2705 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
2706 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
2707 1995.
2708 """
2710 optional: ClassVar[tuple[str, ...]] = ('meta',)
2711 """
2712 Tuple containing node class names (as strings).
2714 No exception will be raised if writers do not implement visit
2715 or departure functions for these node classes.
2717 Used to ensure transitional compatibility with existing 3rd-party writers.
2718 """
2720 def __init__(self, document: document, /) -> None:
2721 self.document: document = document
2723 def dispatch_visit(self, node) -> None:
2724 """
2725 Call self."``visit_`` + node class name" with `node` as
2726 parameter. If the ``visit_...`` method does not exist, call
2727 self.unknown_visit.
2728 """
2729 node_name = node.__class__.__name__
2730 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
2731 self.document.reporter.debug(
2732 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
2733 % (method.__name__, node_name))
2734 return method(node)
2736 def dispatch_departure(self, node) -> None:
2737 """
2738 Call self."``depart_`` + node class name" with `node` as
2739 parameter. If the ``depart_...`` method does not exist, call
2740 self.unknown_departure.
2741 """
2742 node_name = node.__class__.__name__
2743 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
2744 self.document.reporter.debug(
2745 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
2746 % (method.__name__, node_name))
2747 return method(node)
2749 def unknown_visit(self, node) -> None:
2750 """
2751 Called when entering unknown `Node` types.
2753 Raise an exception unless overridden.
2754 """
2755 if (self.document.settings.strict_visitor
2756 or node.__class__.__name__ not in self.optional):
2757 raise NotImplementedError(
2758 '%s visiting unknown node type: %s'
2759 % (self.__class__, node.__class__.__name__))
2761 def unknown_departure(self, node) -> None:
2762 """
2763 Called before exiting unknown `Node` types.
2765 Raise exception unless overridden.
2766 """
2767 if (self.document.settings.strict_visitor
2768 or node.__class__.__name__ not in self.optional):
2769 raise NotImplementedError(
2770 '%s departing unknown node type: %s'
2771 % (self.__class__, node.__class__.__name__))
2774class SparseNodeVisitor(NodeVisitor):
2775 """
2776 Base class for sparse traversals, where only certain node types are of
2777 interest. When ``visit_...`` & ``depart_...`` methods should be
2778 implemented for *all* node types (such as for `docutils.writers.Writer`
2779 subclasses), subclass `NodeVisitor` instead.
2780 """
2783class GenericNodeVisitor(NodeVisitor):
2784 """
2785 Generic "Visitor" abstract superclass, for simple traversals.
2787 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
2788 each ``depart_...`` method (when using `Node.walkabout()`) calls
2789 `default_departure()`. `default_visit()` (and `default_departure()`) must
2790 be overridden in subclasses.
2792 Define fully generic visitors by overriding `default_visit()` (and
2793 `default_departure()`) only. Define semi-generic visitors by overriding
2794 individual ``visit_...()`` (and ``depart_...()``) methods also.
2796 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
2797 be overridden for default behavior.
2798 """
2800 def default_visit(self, node):
2801 """Override for generic, uniform traversals."""
2802 raise NotImplementedError
2804 def default_departure(self, node):
2805 """Override for generic, uniform traversals."""
2806 raise NotImplementedError
2809def _call_default_visit(self: GenericNodeVisitor, node) -> None:
2810 self.default_visit(node)
2813def _call_default_departure(self: GenericNodeVisitor, node) -> None:
2814 self.default_departure(node)
2817def _nop(self: SparseNodeVisitor, node) -> None:
2818 pass
2821def _add_node_class_names(names) -> None:
2822 """Save typing with dynamic assignments:"""
2823 for _name in names:
2824 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
2825 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
2826 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
2827 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
2830_add_node_class_names(node_class_names)
2833class TreeCopyVisitor(GenericNodeVisitor):
2834 """
2835 Make a complete copy of a tree or branch, including element attributes.
2836 """
2838 def __init__(self, document: document) -> None:
2839 super().__init__(document)
2840 self.parent_stack: list[list] = []
2841 self.parent: list = []
2843 def get_tree_copy(self):
2844 return self.parent[0]
2846 def default_visit(self, node) -> None:
2847 """Copy the current node, and make it the new acting parent."""
2848 newnode = node.copy()
2849 self.parent.append(newnode)
2850 self.parent_stack.append(self.parent)
2851 self.parent = newnode
2853 def default_departure(self, node) -> None:
2854 """Restore the previous acting parent."""
2855 self.parent = self.parent_stack.pop()
2858# Custom Exceptions
2859# =================
2861class ValidationError(ValueError):
2862 """Invalid Docutils Document Tree Element."""
2863 def __init__(self, msg: str, problematic_element: Element = None) -> None:
2864 super().__init__(msg)
2865 self.problematic_element = problematic_element
2868class TreePruningException(Exception):
2869 """
2870 Base class for `NodeVisitor`-related tree pruning exceptions.
2872 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
2873 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
2874 the tree traversed.
2875 """
2878class SkipChildren(TreePruningException):
2879 """
2880 Do not visit any children of the current node. The current node's
2881 siblings and ``depart_...`` method are not affected.
2882 """
2885class SkipSiblings(TreePruningException):
2886 """
2887 Do not visit any more siblings (to the right) of the current node. The
2888 current node's children and its ``depart_...`` method are not affected.
2889 """
2892class SkipNode(TreePruningException):
2893 """
2894 Do not visit the current node's children, and do not call the current
2895 node's ``depart_...`` method.
2896 """
2899class SkipDeparture(TreePruningException):
2900 """
2901 Do not call the current node's ``depart_...`` method. The current node's
2902 children and siblings are not affected.
2903 """
2906class NodeFound(TreePruningException):
2907 """
2908 Raise to indicate that the target of a search has been found. This
2909 exception must be caught by the client; it is not caught by the traversal
2910 code.
2911 """
2914class StopTraversal(TreePruningException):
2915 """
2916 Stop the traversal altogether. The current node's ``depart_...`` method
2917 is not affected. The parent nodes ``depart_...`` methods are also called
2918 as usual. No other nodes are visited. This is an alternative to
2919 NodeFound that does not cause exception handling to trickle up to the
2920 caller.
2921 """
2924# definition moved here from `utils` to avoid circular import dependency
2925def unescape(text: str,
2926 restore_backslashes: bool = False,
2927 respect_whitespace: bool = False,
2928 ) -> str:
2929 """
2930 Return a string with nulls removed or restored to backslashes.
2931 Backslash-escaped spaces are also removed.
2932 """
2933 # `respect_whitespace` is ignored (since introduction 2016-12-16)
2934 if restore_backslashes:
2935 return text.replace('\x00', '\\')
2936 else:
2937 for sep in ['\x00 ', '\x00\n', '\x00']:
2938 text = ''.join(text.split(sep))
2939 return text
2942def make_id(string: str) -> str:
2943 """
2944 Convert `string` into an identifier and return it.
2946 Docutils identifiers will conform to the regular expression
2947 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
2948 and "id" attributes) should have no underscores, colons, or periods.
2949 Hyphens may be used.
2951 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
2953 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
2954 followed by any number of letters, digits ([0-9]), hyphens ("-"),
2955 underscores ("_"), colons (":"), and periods (".").
2957 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
2958 a tighter interpretation ("flex" tokenizer notation; "latin1" and
2959 "escape" 8-bit characters have been replaced with entities)::
2961 unicode \\[0-9a-f]{1,4}
2962 latin1 [¡-ÿ]
2963 escape {unicode}|\\[ -~¡-ÿ]
2964 nmchar [-a-z0-9]|{latin1}|{escape}
2965 name {nmchar}+
2967 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
2968 or periods ("."), therefore "class" and "id" attributes should not contain
2969 these characters. They should be replaced with hyphens ("-"). Combined
2970 with HTML's requirements (the first character must be a letter; no
2971 "unicode", "latin1", or "escape" characters), this results in the
2972 ``[a-z](-?[a-z0-9]+)*`` pattern.
2974 .. _HTML 4.01 spec: https://www.w3.org/TR/html401
2975 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1
2976 """
2977 id = string.lower()
2978 id = id.translate(_non_id_translate_digraphs)
2979 id = id.translate(_non_id_translate)
2980 # get rid of non-ascii characters.
2981 # 'ascii' lowercase to prevent problems with turkish locale.
2982 id = unicodedata.normalize(
2983 'NFKD', id).encode('ascii', 'ignore').decode('ascii')
2984 # shrink runs of whitespace and replace by hyphen
2985 id = _non_id_chars.sub('-', ' '.join(id.split()))
2986 id = _non_id_at_ends.sub('', id)
2987 return str(id)
2990_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')
2991_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')
2992_non_id_translate: dict[int, str] = {
2993 0x00f8: 'o', # o with stroke
2994 0x0111: 'd', # d with stroke
2995 0x0127: 'h', # h with stroke
2996 0x0131: 'i', # dotless i
2997 0x0142: 'l', # l with stroke
2998 0x0167: 't', # t with stroke
2999 0x0180: 'b', # b with stroke
3000 0x0183: 'b', # b with topbar
3001 0x0188: 'c', # c with hook
3002 0x018c: 'd', # d with topbar
3003 0x0192: 'f', # f with hook
3004 0x0199: 'k', # k with hook
3005 0x019a: 'l', # l with bar
3006 0x019e: 'n', # n with long right leg
3007 0x01a5: 'p', # p with hook
3008 0x01ab: 't', # t with palatal hook
3009 0x01ad: 't', # t with hook
3010 0x01b4: 'y', # y with hook
3011 0x01b6: 'z', # z with stroke
3012 0x01e5: 'g', # g with stroke
3013 0x0225: 'z', # z with hook
3014 0x0234: 'l', # l with curl
3015 0x0235: 'n', # n with curl
3016 0x0236: 't', # t with curl
3017 0x0237: 'j', # dotless j
3018 0x023c: 'c', # c with stroke
3019 0x023f: 's', # s with swash tail
3020 0x0240: 'z', # z with swash tail
3021 0x0247: 'e', # e with stroke
3022 0x0249: 'j', # j with stroke
3023 0x024b: 'q', # q with hook tail
3024 0x024d: 'r', # r with stroke
3025 0x024f: 'y', # y with stroke
3026}
3027_non_id_translate_digraphs: dict[int, str] = {
3028 0x00df: 'sz', # ligature sz
3029 0x00e6: 'ae', # ae
3030 0x0153: 'oe', # ligature oe
3031 0x0238: 'db', # db digraph
3032 0x0239: 'qp', # qp digraph
3033}
3036def dupname(node: Element, name: str) -> None:
3037 node['dupnames'].append(name)
3038 node['names'].remove(name)
3039 # Assume that `node` is referenced, even though it isn't;
3040 # we don't want to throw unnecessary system_messages.
3041 node.referenced = True
3044def fully_normalize_name(name: str) -> str:
3045 """Return a case- and whitespace-normalized name."""
3046 return ' '.join(name.lower().split())
3049def whitespace_normalize_name(name: str) -> str:
3050 """Return a whitespace-normalized name."""
3051 return ' '.join(name.split())
3054def serial_escape(value: str) -> str:
3055 """Escape string values that are elements of a list, for serialization."""
3056 return value.replace('\\', r'\\').replace(' ', r'\ ')
3059def split_name_list(s: str) -> list[str]:
3060 r"""Split a string at non-escaped whitespace.
3062 Backslashes escape internal whitespace (cf. `serial_escape()`).
3063 Return list of "names" (after removing escaping backslashes).
3065 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),
3066 ['a name', 'two\\', r'n\ames']
3068 Provisional.
3069 """
3070 s = s.replace('\\', '\x00') # escape with NULL char
3071 s = s.replace('\x00\x00', '\\') # unescape backslashes
3072 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL
3073 names = s.split(' ')
3074 # restore internal spaces, drop other escaping characters
3075 return [name.replace('\x00\x00', ' ').replace('\x00', '')
3076 for name in names]
3079def pseudo_quoteattr(value: str) -> str:
3080 """Quote attributes for pseudo-xml"""
3081 return '"%s"' % value
3084def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'
3085 ) -> tuple[int|float, str]:
3086 """Parse a measure__, return value + unit.
3088 `unit_pattern` is a regular expression describing recognized units.
3089 The default is suited for (but not limited to) CSS3 units and SI units.
3090 It matches runs of ASCII letters or Greek mu, a single percent sign,
3091 or no unit.
3093 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3095 Provisional.
3096 """
3097 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)
3098 try:
3099 try:
3100 value = int(match.group(1))
3101 except ValueError:
3102 value = float(match.group(1))
3103 unit = match.group(2)
3104 except (AttributeError, ValueError):
3105 raise ValueError(f'"{measure}" is no valid measure.')
3106 return value, unit
3109# Methods to validate `Element attribute`__ values.
3111# Ensure the expected Python `data type`__, normalize, and check for
3112# restrictions.
3113#
3114# The methods can be used to convert `str` values (eg. from an XML
3115# representation) or to validate an existing document tree or node.
3116#
3117# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,
3118# and the `attribute_validating_functions` mapping below.
3119#
3120# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3121# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
3123def create_keyword_validator(*keywords: str) -> Callable[[str], str]:
3124 """
3125 Return a function that validates a `str` against given `keywords`.
3127 Provisional.
3128 """
3129 def validate_keywords(value: str) -> str:
3130 if value not in keywords:
3131 allowed = '", \"'.join(keywords)
3132 raise ValueError(f'"{value}" is not one of "{allowed}".')
3133 return value
3134 return validate_keywords
3137def validate_identifier(value: str) -> str:
3138 """
3139 Validate identifier key or class name.
3141 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.
3143 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
3145 Provisional.
3146 """
3147 if value != make_id(value):
3148 raise ValueError(f'"{value}" is no valid id or class name.')
3149 return value
3152def validate_identifier_list(value: str | list[str]) -> list[str]:
3153 """
3154 A (space-separated) list of ids or class names.
3156 `value` may be a `list` or a `str` with space separated
3157 ids or class names (cf. `validate_identifier()`).
3159 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.
3161 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type
3162 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type
3163 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type
3165 Provisional.
3166 """
3167 if isinstance(value, str):
3168 value = value.split()
3169 for token in value:
3170 validate_identifier(token)
3171 return value
3174def validate_measure(measure: str) -> str:
3175 """
3176 Validate a measure__ (number + optional unit). Return normalized `str`.
3178 See `parse_measure()` for a function returning a "number + unit" tuple.
3180 The unit may be a run of ASCII letters or Greek mu, a single percent sign,
3181 or the empty string. Case is preserved.
3183 Provisional.
3185 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3186 """
3187 value, unit = parse_measure(measure)
3188 return f'{value}{unit}'
3191def validate_colwidth(measure: str|int|float) -> int|float:
3192 """Validate the "colwidth__" attribute.
3194 Provisional:
3195 `measure` must be a `str` and will be returned as normalized `str`
3196 (with unit "*" for proportional values) in Docutils 1.0.
3198 The default unit will change to "pt" in Docutils 2.0.
3200 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
3201 """
3202 if isinstance(measure, (int, float)):
3203 value = measure
3204 elif measure in ('*', ''): # short for '1*'
3205 value = 1
3206 else:
3207 try:
3208 value, _unit = parse_measure(measure, unit_pattern='[*]?')
3209 except ValueError:
3210 value = -1
3211 if value <= 0:
3212 raise ValueError(f'"{measure}" is no proportional measure.')
3213 return value
3216def validate_NMTOKEN(value: str) -> str:
3217 """
3218 Validate a "name token": a `str` of ASCII letters, digits, and [-._].
3220 Provisional.
3221 """
3222 if not re.fullmatch('[-._A-Za-z0-9]+', value):
3223 raise ValueError(f'"{value}" is no NMTOKEN.')
3224 return value
3227def validate_NMTOKENS(value: str | list[str]) -> list[str]:
3228 """
3229 Validate a list of "name tokens".
3231 Provisional.
3232 """
3233 if isinstance(value, str):
3234 value = value.split()
3235 for token in value:
3236 validate_NMTOKEN(token)
3237 return value
3240def validate_refname_list(value: str | list[str]) -> list[str]:
3241 """
3242 Validate a list of `reference names`__.
3244 Reference names may contain all characters;
3245 whitespace is normalized (cf, `whitespace_normalize_name()`).
3247 `value` may be either a `list` of names or a `str` with
3248 space separated names (with internal spaces backslash escaped
3249 and literal backslashes doubled cf. `serial_escape()`).
3251 Return a list of whitespace-normalized, unescaped reference names.
3253 Provisional.
3255 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name
3256 """
3257 if isinstance(value, str):
3258 value = split_name_list(value)
3259 return [whitespace_normalize_name(name) for name in value]
3262def validate_yesorno(value: str | int | bool) -> bool:
3263 """Validate a `%yesorno`__ (flag) value.
3265 The string literal "0" evaluates to ``False``, all other
3266 values are converterd with `bool()`.
3268 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno
3269 """
3270 if value == "0":
3271 return False
3272 return bool(value)
3275ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {
3276 'alt': str, # CDATA
3277 'align': str,
3278 'anonymous': validate_yesorno,
3279 'auto': str, # CDATA (only '1' or '*' are used in rST)
3280 'backrefs': validate_identifier_list,
3281 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)
3282 'classes': validate_identifier_list,
3283 'char': str, # from Exchange Table Model (CALS), currently ignored
3284 'charoff': validate_NMTOKEN, # from CALS, currently ignored
3285 'colname': validate_NMTOKEN, # from CALS, currently ignored
3286 'colnum': int, # from CALS, currently ignored
3287 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
3288 'colsep': validate_yesorno,
3289 'colwidth': validate_colwidth, # see docstring for pending changes
3290 'content': str, # <meta>
3291 'delimiter': str,
3292 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>
3293 'dupnames': validate_refname_list,
3294 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',
3295 'upperalpha', 'upperroman'),
3296 'format': str, # CDATA (space separated format names)
3297 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',
3298 'sides', 'none'), # from CALS, ignored
3299 'height': validate_measure,
3300 'http-equiv': str, # <meta>
3301 'ids': validate_identifier_list,
3302 'lang': str, # <meta>
3303 'level': int,
3304 'line': int,
3305 'ltrim': validate_yesorno,
3306 'loading': create_keyword_validator('embed', 'link', 'lazy'),
3307 'media': str, # <meta>
3308 'morecols': int,
3309 'morerows': int,
3310 'name': whitespace_normalize_name, # in <reference> (deprecated)
3311 # 'name': node_attributes.validate_NMTOKEN, # in <meta>
3312 'names': validate_refname_list,
3313 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored
3314 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored
3315 'pgwide': validate_yesorno, # from CALS, currently ignored
3316 'prefix': str,
3317 'refid': validate_identifier,
3318 'refname': whitespace_normalize_name,
3319 'refuri': str,
3320 'rowsep': validate_yesorno,
3321 'rtrim': validate_yesorno,
3322 'scale': int,
3323 'scheme': str,
3324 'source': str,
3325 'start': int,
3326 'stub': validate_yesorno,
3327 'suffix': str,
3328 'title': str,
3329 'type': validate_NMTOKEN,
3330 'uri': str,
3331 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS
3332 'width': validate_measure,
3333 'xml:space': create_keyword_validator('default', 'preserve'),
3334 }
3335"""
3336Mapping of `attribute names`__ to validating functions.
3338Provisional.
3340__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3341"""