Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/nodes.py: 62%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Maintainer: docutils-develop@lists.sourceforge.net
4# Copyright: This module has been placed in the public domain.
6"""
7Docutils document tree element class library.
9The relationships and semantics of elements and attributes is documented in
10`The Docutils Document Tree`__.
12Classes in CamelCase are abstract base classes or auxiliary classes. The one
13exception is `Text`, for a text (PCDATA) node; uppercase is used to
14differentiate from element classes. Classes in lower_case_with_underscores
15are element classes, matching the XML element generic identifiers in the DTD_.
17The position of each node (the level at which it can occur) is significant and
18is represented by abstract base classes (`Root`, `Structural`, `Body`,
19`Inline`, etc.). Certain transformations will be easier because we can use
20``isinstance(node, base_class)`` to determine the position of the node in the
21hierarchy.
23__ https://docutils.sourceforge.io/docs/ref/doctree.html
24.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd
25"""
27from __future__ import annotations
29__docformat__ = 'reStructuredText'
31import os
32import re
33import sys
34import unicodedata
35import warnings
36from collections import Counter
37# import xml.dom.minidom as dom # -> conditional import in Node.asdom()
38# and document.asdom()
40# import docutils.transforms # -> delayed import in document.__init__()
42TYPE_CHECKING = False
43if TYPE_CHECKING:
44 from collections.abc import (Callable, Iterable, Iterator,
45 Mapping, Sequence)
46 from types import ModuleType
47 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex
49 from docutils.utils._typing import TypeAlias
51 from xml.dom import minidom
53 from docutils.frontend import Values
54 from docutils.transforms import Transformer, Transform
55 from docutils.utils import Reporter
57 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]
58 _ContentModelQuantifier = Literal['.', '?', '+', '*']
59 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,
60 _ContentModelQuantifier]
61 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]
63 StrPath: TypeAlias = str | os.PathLike[str]
64 """File system path. No bytes!"""
66 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]
69# ==============================
70# Functional Node Base Classes
71# ==============================
73class Node:
74 """Abstract base class of nodes in a document tree."""
76 parent: Element | None = None
77 """Back-reference to the Node immediately containing this Node."""
79 children: Sequence # defined in subclasses
80 """List of child nodes (Elements or Text).
82 Override in subclass instances that are not terminal nodes.
83 """
85 source: StrPath | None = None
86 """Path or description of the input source which generated this Node."""
88 line: int | None = None
89 """The line number (1-based) of the beginning of this Node in `source`."""
91 tagname: str # defined in subclasses
92 """The element generic identifier."""
94 _document: document | None = None
96 @property
97 def document(self) -> document | None:
98 """Return the `document` root node of the tree containing this Node.
99 """
100 try:
101 return self._document or self.parent.document
102 except AttributeError:
103 return None
105 @document.setter
106 def document(self, value: document) -> None:
107 self._document = value
109 def __bool__(self) -> Literal[True]:
110 """
111 Node instances are always true, even if they're empty. A node is more
112 than a simple container. Its boolean "truth" does not depend on
113 having one or more subnodes in the doctree.
115 Use `len()` to check node length.
116 """
117 return True
119 def asdom(self,
120 dom: ModuleType | None = None,
121 ) -> minidom.Document | minidom.Element | minidom.Text:
122 # TODO: minidom.Document is only returned by document.asdom()
123 # (which overwrites this base-class implementation)
124 """Return a DOM **fragment** representation of this Node."""
125 if dom is None:
126 import xml.dom.minidom as dom
127 domroot = dom.Document()
128 return self._dom_node(domroot)
130 def pformat(self, indent: str = ' ', level: int = 0) -> str:
131 """
132 Return an indented pseudo-XML representation, for test purposes.
134 Override in subclasses.
135 """
136 raise NotImplementedError
138 def copy(self) -> Self:
139 """Return a copy of self."""
140 raise NotImplementedError
142 def deepcopy(self) -> Self:
143 """Return a deep copy of self (also copying children)."""
144 raise NotImplementedError
146 def astext(self) -> str:
147 """Return a string representation of this Node."""
148 raise NotImplementedError
150 def setup_child(self, child) -> None:
151 child.parent = self
152 if self.document:
153 child.document = self.document
154 if child.source is None:
155 child.source = self.document.current_source
156 if child.line is None:
157 child.line = self.document.current_line
159 def walk(self, visitor: NodeVisitor) -> bool:
160 """
161 Traverse a tree of `Node` objects, calling the
162 `dispatch_visit()` method of `visitor` when entering each
163 node. (The `walkabout()` method is similar, except it also
164 calls the `dispatch_departure()` method before exiting each
165 node.)
167 This tree traversal supports limited in-place tree
168 modifications. Replacing one node with one or more nodes is
169 OK, as is removing an element. However, if the node removed
170 or replaced occurs after the current node, the old node will
171 still be traversed, and any new nodes will not.
173 Within ``visit`` methods (and ``depart`` methods for
174 `walkabout()`), `TreePruningException` subclasses may be raised
175 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
177 Parameter `visitor`: A `NodeVisitor` object, containing a
178 ``visit`` implementation for each `Node` subclass encountered.
180 Return true if we should stop the traversal.
181 """
182 stop = False
183 visitor.document.reporter.debug(
184 'docutils.nodes.Node.walk calling dispatch_visit for %s'
185 % self.__class__.__name__)
186 try:
187 try:
188 visitor.dispatch_visit(self)
189 except (SkipChildren, SkipNode):
190 return stop
191 except SkipDeparture: # not applicable; ignore
192 pass
193 children = self.children
194 try:
195 for child in children[:]:
196 if child.walk(visitor):
197 stop = True
198 break
199 except SkipSiblings:
200 pass
201 except StopTraversal:
202 stop = True
203 return stop
205 def walkabout(self, visitor: NodeVisitor) -> bool:
206 """
207 Perform a tree traversal similarly to `Node.walk()` (which
208 see), except also call the `dispatch_departure()` method
209 before exiting each node.
211 Parameter `visitor`: A `NodeVisitor` object, containing a
212 ``visit`` and ``depart`` implementation for each `Node`
213 subclass encountered.
215 Return true if we should stop the traversal.
216 """
217 call_depart = True
218 stop = False
219 visitor.document.reporter.debug(
220 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
221 % self.__class__.__name__)
222 try:
223 try:
224 visitor.dispatch_visit(self)
225 except SkipNode:
226 return stop
227 except SkipDeparture:
228 call_depart = False
229 children = self.children
230 try:
231 for child in children[:]:
232 if child.walkabout(visitor):
233 stop = True
234 break
235 except SkipSiblings:
236 pass
237 except SkipChildren:
238 pass
239 except StopTraversal:
240 stop = True
241 if call_depart:
242 visitor.document.reporter.debug(
243 'docutils.nodes.Node.walkabout calling dispatch_departure '
244 'for %s' % self.__class__.__name__)
245 visitor.dispatch_departure(self)
246 return stop
248 def _fast_findall(self, cls: type|tuple[type]) -> Iterator:
249 """Return iterator that only supports instance checks."""
250 if isinstance(self, cls):
251 yield self
252 for child in self.children:
253 yield from child._fast_findall(cls)
255 def _superfast_findall(self) -> Iterator:
256 """Return iterator that doesn't check for a condition."""
257 # This is different from ``iter(self)`` implemented via
258 # __getitem__() and __len__() in the Element subclass,
259 # which yields only the direct children.
260 yield self
261 for child in self.children:
262 yield from child._superfast_findall()
264 def findall(self,
265 condition: type|tuple[type]|Callable[[Node], bool]|None = None,
266 include_self: bool = True,
267 descend: bool = True,
268 siblings: bool = False,
269 ascend: bool = False,
270 ) -> Iterator:
271 """
272 Return an iterator yielding nodes following `self`:
274 * self (if `include_self` is true)
275 * all descendants in tree traversal order (if `descend` is true)
276 * the following siblings (if `siblings` is true) and their
277 descendants (if also `descend` is true)
278 * the following siblings of the parent (if `ascend` is true) and
279 their descendants (if also `descend` is true), and so on.
281 If `condition` is not None, the iterator yields only nodes
282 for which ``condition(node)`` is true.
283 If `condition` is a type (or tuple of types) ``cls``, it is equivalent
284 to a function consisting of ``return isinstance(node, cls)``.
286 If `ascend` is true, assume `siblings` to be true as well.
288 If the tree structure is modified during iteration, the result
289 is undefined.
291 For example, given the following tree::
293 <paragraph>
294 <emphasis> <--- emphasis.traverse() and
295 <strong> <--- strong.traverse() are called.
296 Foo
297 Bar
298 <reference name="Baz" refid="baz">
299 Baz
301 Then tuple(emphasis.traverse()) equals ::
303 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)
305 and list(strong.traverse(ascend=True) equals ::
307 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
308 """
309 if ascend:
310 siblings = True
311 # Check for special argument combinations that allow using an
312 # optimized version of traverse()
313 if include_self and descend and not siblings:
314 if condition is None:
315 yield from self._superfast_findall()
316 return
317 elif isinstance(condition, (type, tuple)):
318 yield from self._fast_findall(condition)
319 return
320 # Check if `condition` is a class (check for TypeType for Python
321 # implementations that use only new-style classes, like PyPy).
322 if isinstance(condition, (type, tuple)):
323 class_or_tuple = condition
325 def condition(node, class_or_tuple=class_or_tuple):
326 return isinstance(node, class_or_tuple)
328 if include_self and (condition is None or condition(self)):
329 yield self
330 if descend and len(self.children):
331 for child in self:
332 yield from child.findall(condition=condition,
333 include_self=True, descend=True,
334 siblings=False, ascend=False)
335 if siblings or ascend:
336 node = self
337 while node.parent:
338 index = node.parent.index(node)
339 # extra check since Text nodes have value-equality
340 while node.parent[index] is not node:
341 index = node.parent.index(node, index + 1)
342 for sibling in node.parent[index+1:]:
343 yield from sibling.findall(
344 condition=condition,
345 include_self=True, descend=descend,
346 siblings=False, ascend=False)
347 if not ascend:
348 break
349 else:
350 node = node.parent
352 def traverse(
353 self,
354 condition: type|tuple[type]|Callable[[Node], bool]|None = None,
355 include_self: bool = True,
356 descend: bool = True,
357 siblings: bool = False,
358 ascend: bool = False,
359 ) -> list:
360 """Return list of nodes following `self`.
362 For looping, Node.findall() is faster and more memory efficient.
363 """
364 # traverse() may be eventually removed:
365 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',
366 DeprecationWarning, stacklevel=2)
367 return list(self.findall(condition, include_self, descend,
368 siblings, ascend))
370 def next_node(
371 self,
372 condition: type|tuple[type]|Callable[[Node], bool]|None = None,
373 include_self: bool = False,
374 descend: bool = True,
375 siblings: bool = False,
376 ascend: bool = False,
377 ) -> Node | None:
378 """
379 Return the first node in the iterator returned by findall(),
380 or None if the iterable is empty.
382 Parameter list is the same as of `findall()`. Note that `include_self`
383 defaults to False, though.
384 """
385 try:
386 return next(self.findall(condition, include_self,
387 descend, siblings, ascend))
388 except StopIteration:
389 return None
391 def validate(self, recursive: bool = True) -> None:
392 """Raise ValidationError if this node is not valid.
394 Override in subclasses that define validity constraints.
395 """
397 def validate_position(self) -> None:
398 """Hook for additional checks of the parent's content model.
400 Raise ValidationError, if `self` is at an invalid position.
402 Override in subclasses with complex validity constraints. See
403 `subtitle.validate_position()` and `transition.validate_position()`.
404 """
407class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)
408 """
409 Instances are terminal nodes (leaves) containing text only; no child
410 nodes or attributes. Initialize by passing a string to the constructor.
412 Access the raw (null-escaped) text with ``str(<instance>)``
413 and unescaped text with ``<instance>.astext()``.
414 """
416 tagname: Final = '#text'
418 children: Final = ()
419 """Text nodes have no children, and cannot have children."""
421 def __new__(cls, data: str, rawsource: None = None) -> Self:
422 """Assert that `data` is not an array of bytes
423 and warn if the deprecated `rawsource` argument is used.
424 """
425 if isinstance(data, bytes):
426 raise TypeError('expecting str data, not bytes')
427 if rawsource is not None:
428 warnings.warn('nodes.Text: initialization argument "rawsource" '
429 'is ignored and will be removed in Docutils 2.0.',
430 DeprecationWarning, stacklevel=2)
431 return str.__new__(cls, data)
433 def shortrepr(self, maxlen: int = 18) -> str:
434 data = self
435 if len(data) > maxlen:
436 data = data[:maxlen-4] + ' ...'
437 return '<%s: %r>' % (self.tagname, str(data))
439 def __repr__(self) -> str:
440 return self.shortrepr(maxlen=68)
442 def astext(self) -> str:
443 return str(unescape(self))
445 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:
446 return domroot.createTextNode(str(self))
448 def copy(self) -> Self:
449 return self.__class__(str(self))
451 def deepcopy(self) -> Self:
452 return self.copy()
454 def pformat(self, indent: str = ' ', level: int = 0) -> str:
455 try:
456 if self.document.settings.detailed:
457 tag = '%s%s' % (indent*level, '<#text>')
458 lines = (indent*(level+1) + repr(line)
459 for line in self.splitlines(True))
460 return '\n'.join((tag, *lines)) + '\n'
461 except AttributeError:
462 pass
463 indent = indent * level
464 lines = [indent+line for line in self.astext().splitlines()]
465 if not lines:
466 return ''
467 return '\n'.join(lines) + '\n'
469 # rstrip and lstrip are used by substitution definitions where
470 # they are expected to return a Text instance, this was formerly
471 # taken care of by UserString.
473 def rstrip(self, chars: str | None = None) -> Self:
474 return self.__class__(str.rstrip(self, chars))
476 def lstrip(self, chars: str | None = None) -> Self:
477 return self.__class__(str.lstrip(self, chars))
480class Element(Node):
481 """
482 `Element` is the superclass to all specific elements.
484 Elements contain attributes and child nodes.
485 They can be described as a cross between a list and a dictionary.
487 Elements emulate dictionaries for external [#]_ attributes, indexing by
488 attribute name (a string). To set the attribute 'att' to 'value', do::
490 element['att'] = 'value'
492 .. [#] External attributes correspond to the XML element attributes.
493 From its `Node` superclass, Element also inherits "internal"
494 class attributes that are accessed using the standard syntax, e.g.
495 ``element.parent``.
497 There are two special attributes: 'ids' and 'names'. Both are
498 lists of unique identifiers: 'ids' conform to the regular expression
499 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and
500 details). 'names' serve as user-friendly interfaces to IDs; they are
501 case- and whitespace-normalized (see the fully_normalize_name() function).
503 Elements emulate lists for child nodes (element nodes and/or text
504 nodes), indexing by integer. To get the first child node, use::
506 element[0]
508 to iterate over the child nodes (without descending), use::
510 for child in element:
511 ...
513 Elements may be constructed using the ``+=`` operator. To add one new
514 child node to element, do::
516 element += node
518 This is equivalent to ``element.append(node)``.
520 To add a list of multiple child nodes at once, use the same ``+=``
521 operator::
523 element += [node1, node2]
525 This is equivalent to ``element.extend([node1, node2])``.
526 """
528 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')
529 """Tuple of attributes that are initialized to empty lists.
531 NOTE: Derived classes should update this value when supporting
532 additional list attributes.
533 """
535 valid_attributes: Final = list_attributes + ('source',)
536 """Tuple of attributes that are valid for elements of this class.
538 NOTE: Derived classes should update this value when supporting
539 additional attributes.
540 """
542 common_attributes: Final = valid_attributes
543 """Tuple of `common attributes`__ known to all Doctree Element classes.
545 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes
546 """
548 known_attributes: Final = common_attributes
549 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""
551 basic_attributes: Final = list_attributes
552 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""
554 local_attributes: Final = ('backrefs',)
555 """Obsolete. Will be removed in Docutils 2.0."""
557 content_model: ClassVar[_ContentModelTuple] = ()
558 """Python representation of the element's content model (cf. docutils.dtd).
560 A tuple of ``(category, quantifier)`` tuples with
562 :category: class or tuple of classes that are expected at this place(s)
563 in the list of children
564 :quantifier: string representation stating how many elements
565 of `category` are expected. Value is one of:
566 '.' (exactly one), '?' (zero or one),
567 '+' (one or more), '*' (zero or more).
569 NOTE: The default describes the empty element. Derived classes should
570 update this value to match their content model.
572 Provisional.
573 """
575 tagname: str | None = None
576 """The element generic identifier.
578 If None, it is set as an instance attribute to the name of the class.
579 """
581 child_text_separator: Final = '\n\n'
582 """Separator for child nodes, used by `astext()` method."""
584 def __init__(self,
585 rawsource: str = '',
586 *children,
587 **attributes: Any,
588 ) -> None:
589 self.rawsource = rawsource
590 """The raw text from which this element was constructed.
592 For informative and debugging purposes. Don't rely on its value!
594 NOTE: some elements do not set this value (default '').
595 """
596 if isinstance(rawsource, Element):
597 raise TypeError('First argument "rawsource" must be a string.')
599 self.children: list = []
600 """List of child nodes (elements and/or `Text`)."""
602 self.extend(children) # maintain parent info
604 self.attributes: dict[str, Any] = {}
605 """Dictionary of attribute {name: value}."""
607 # Initialize list attributes.
608 for att in self.list_attributes:
609 self.attributes[att] = []
611 for att, value in attributes.items():
612 att = att.lower() # normalize attribute name
613 if att in self.list_attributes:
614 # lists are mutable; make a copy for this node
615 self.attributes[att] = value[:]
616 else:
617 self.attributes[att] = value
619 if self.tagname is None:
620 self.tagname: str = self.__class__.__name__
622 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:
623 element = domroot.createElement(self.tagname)
624 for attribute, value in self.attlist():
625 if isinstance(value, list):
626 value = ' '.join(serial_escape('%s' % (v,)) for v in value)
627 element.setAttribute(attribute, '%s' % value)
628 for child in self.children:
629 element.appendChild(child._dom_node(domroot))
630 return element
632 def __repr__(self) -> str:
633 data = ''
634 for c in self.children:
635 data += c.shortrepr()
636 if len(data) > 60:
637 data = data[:56] + ' ...'
638 break
639 if self['names']:
640 return '<%s "%s": %s>' % (self.tagname,
641 '; '.join(self['names']), data)
642 else:
643 return '<%s: %s>' % (self.tagname, data)
645 def shortrepr(self) -> str:
646 if self['names']:
647 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))
648 else:
649 return '<%s...>' % self.tagname
651 def __str__(self) -> str:
652 if self.children:
653 return '%s%s%s' % (self.starttag(),
654 ''.join(str(c) for c in self.children),
655 self.endtag())
656 else:
657 return self.emptytag()
659 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:
660 # the optional arg is used by the docutils_xml writer
661 if quoteattr is None:
662 quoteattr = pseudo_quoteattr
663 parts = [self.tagname]
664 for name, value in self.attlist():
665 if value is None: # boolean attribute
666 parts.append('%s="True"' % name)
667 continue
668 if isinstance(value, bool):
669 value = str(int(value))
670 if isinstance(value, list):
671 values = [serial_escape('%s' % (v,)) for v in value]
672 value = ' '.join(values)
673 else:
674 value = str(value)
675 value = quoteattr(value)
676 parts.append('%s=%s' % (name, value))
677 return '<%s>' % ' '.join(parts)
679 def endtag(self) -> str:
680 return '</%s>' % self.tagname
682 def emptytag(self) -> str:
683 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())
684 return '<%s/>' % ' '.join((self.tagname, *attributes))
686 def __len__(self) -> int:
687 return len(self.children)
689 def __contains__(self, key) -> bool:
690 # Test for both, children and attributes with operator ``in``.
691 if isinstance(key, str):
692 return key in self.attributes
693 return key in self.children
695 def __getitem__(self, key: str | int | slice) -> Any:
696 if isinstance(key, str):
697 return self.attributes[key]
698 elif isinstance(key, int):
699 return self.children[key]
700 elif isinstance(key, slice):
701 assert key.step in (None, 1), 'cannot handle slice with stride'
702 return self.children[key.start:key.stop]
703 else:
704 raise TypeError('element index must be an integer, a slice, or '
705 'an attribute name string')
707 def __setitem__(self, key, item) -> None:
708 if isinstance(key, str):
709 self.attributes[str(key)] = item
710 elif isinstance(key, int):
711 self.setup_child(item)
712 self.children[key] = item
713 elif isinstance(key, slice):
714 assert key.step in (None, 1), 'cannot handle slice with stride'
715 for node in item:
716 self.setup_child(node)
717 self.children[key.start:key.stop] = item
718 else:
719 raise TypeError('element index must be an integer, a slice, or '
720 'an attribute name string')
722 def __delitem__(self, key: str | int | slice) -> None:
723 if isinstance(key, str):
724 del self.attributes[key]
725 elif isinstance(key, int):
726 del self.children[key]
727 elif isinstance(key, slice):
728 assert key.step in (None, 1), 'cannot handle slice with stride'
729 del self.children[key.start:key.stop]
730 else:
731 raise TypeError('element index must be an integer, a simple '
732 'slice, or an attribute name string')
734 def __add__(self, other: list) -> list:
735 return self.children + other
737 def __radd__(self, other: list) -> list:
738 return other + self.children
740 def __iadd__(self, other) -> Self:
741 """Append a node or a list of nodes to `self.children`."""
742 if isinstance(other, Node):
743 self.append(other)
744 elif other is not None:
745 self.extend(other)
746 return self
748 def astext(self) -> str:
749 return self.child_text_separator.join(
750 [child.astext() for child in self.children])
752 def non_default_attributes(self) -> dict[str, Any]:
753 atts = {key: value for key, value in self.attributes.items()
754 if self.is_not_default(key)}
755 return atts
757 def attlist(self) -> list[tuple[str, Any]]:
758 return sorted(self.non_default_attributes().items())
760 def get(self, key: str, failobj: Any | None = None) -> Any:
761 return self.attributes.get(key, failobj)
763 def hasattr(self, attr: str) -> bool:
764 return attr in self.attributes
766 def delattr(self, attr: str) -> None:
767 if attr in self.attributes:
768 del self.attributes[attr]
770 def setdefault(self, key: str, failobj: Any | None = None) -> Any:
771 return self.attributes.setdefault(key, failobj)
773 has_key = hasattr
775 def get_language_code(self, fallback: str = '') -> str:
776 """Return node's language tag.
778 Look iteratively in self and parents for a class argument
779 starting with ``language-`` and return the remainder of it
780 (which should be a `BCP49` language tag) or the `fallback`.
781 """
782 for cls in self.get('classes', []):
783 if cls.startswith('language-'):
784 return cls.removeprefix('language-')
785 try:
786 return self.parent.get_language_code(fallback)
787 except AttributeError:
788 return fallback
790 def append(self, item) -> None:
791 self.setup_child(item)
792 self.children.append(item)
794 def extend(self, item: Iterable) -> None:
795 for node in item:
796 self.append(node)
798 def insert(self, index: SupportsIndex, item) -> None:
799 if isinstance(item, Node):
800 self.setup_child(item)
801 self.children.insert(index, item)
802 elif item is not None:
803 self[index:index] = item
805 def pop(self, i: int = -1):
806 return self.children.pop(i)
808 def remove(self, item) -> None:
809 self.children.remove(item)
811 def index(self, item, start: int = 0, stop: int = sys.maxsize) -> int:
812 return self.children.index(item, start, stop)
814 def previous_sibling(self):
815 """Return preceding sibling node or ``None``."""
816 try:
817 i = self.parent.index(self)
818 except (AttributeError):
819 return None
820 return self.parent[i-1] if i > 0 else None
822 def section_hierarchy(self) -> list[section]:
823 """Return the element's section anchestors.
825 Return a list of all <section> elements that contain `self`
826 (including `self` if it is a <section>) and have a parent node.
828 List item ``[i]`` is the parent <section> of level i+1
829 (1: section, 2: subsection, 3: subsubsection, ...).
830 The length of the list is the element's section level.
832 See `docutils.parsers.rst.states.RSTState.check_subsection()`
833 for a usage example.
835 Provisional. May be changed or removed without warning.
836 """
837 sections = []
838 node = self
839 while node.parent is not None:
840 if isinstance(node, section):
841 sections.append(node)
842 node = node.parent
843 sections.reverse()
844 return sections
846 def is_not_default(self, key: str) -> bool:
847 if self[key] == [] and key in self.list_attributes:
848 return False
849 else:
850 return True
852 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:
853 """
854 Update basic attributes ('ids', 'names', 'classes',
855 'dupnames', but not 'source') from node or dictionary `dict_`.
857 Provisional.
858 """
859 if isinstance(dict_, Node):
860 dict_ = dict_.attributes
861 for att in self.basic_attributes:
862 self.append_attr_list(att, dict_.get(att, []))
864 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:
865 """
866 For each element in values, if it does not exist in self[attr], append
867 it.
869 NOTE: Requires self[attr] and values to be sequence type and the
870 former should specifically be a list.
871 """
872 # List Concatenation
873 for value in values:
874 if value not in self[attr]:
875 self[attr].append(value)
877 def coerce_append_attr_list(
878 self, attr: str, value: list[Any] | Any) -> None:
879 """
880 First, convert both self[attr] and value to a non-string sequence
881 type; if either is not already a sequence, convert it to a list of one
882 element. Then call append_attr_list.
884 NOTE: self[attr] and value both must not be None.
885 """
886 # List Concatenation
887 if not isinstance(self.get(attr), list):
888 self[attr] = [self[attr]]
889 if not isinstance(value, list):
890 value = [value]
891 self.append_attr_list(attr, value)
893 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:
894 """
895 If self[attr] does not exist or force is True or omitted, set
896 self[attr] to value, otherwise do nothing.
897 """
898 # One or the other
899 if force or self.get(attr) is None:
900 self[attr] = value
902 def copy_attr_convert(
903 self, attr: str, value: Any, replace: bool = True) -> None:
904 """
905 If attr is an attribute of self, set self[attr] to
906 [self[attr], value], otherwise set self[attr] to value.
908 NOTE: replace is not used by this function and is kept only for
909 compatibility with the other copy functions.
910 """
911 if self.get(attr) is not value:
912 self.coerce_append_attr_list(attr, value)
914 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:
915 """
916 If attr is an attribute of self and either self[attr] or value is a
917 list, convert all non-sequence values to a sequence of 1 element and
918 then concatenate the two sequence, setting the result to self[attr].
919 If both self[attr] and value are non-sequences and replace is True or
920 self[attr] is None, replace self[attr] with value. Otherwise, do
921 nothing.
922 """
923 if self.get(attr) is not value:
924 if isinstance(self.get(attr), list) or \
925 isinstance(value, list):
926 self.coerce_append_attr_list(attr, value)
927 else:
928 self.replace_attr(attr, value, replace)
930 def copy_attr_concatenate(
931 self, attr: str, value: Any, replace: bool) -> None:
932 """
933 If attr is an attribute of self and both self[attr] and value are
934 lists, concatenate the two sequences, setting the result to
935 self[attr]. If either self[attr] or value are non-sequences and
936 replace is True or self[attr] is None, replace self[attr] with value.
937 Otherwise, do nothing.
938 """
939 if self.get(attr) is not value:
940 if isinstance(self.get(attr), list) and \
941 isinstance(value, list):
942 self.append_attr_list(attr, value)
943 else:
944 self.replace_attr(attr, value, replace)
946 def copy_attr_consistent(
947 self, attr: str, value: Any, replace: bool) -> None:
948 """
949 If replace is True or self[attr] is None, replace self[attr] with
950 value. Otherwise, do nothing.
951 """
952 if self.get(attr) is not value:
953 self.replace_attr(attr, value, replace)
955 def update_all_atts(self,
956 dict_: Mapping[str, Any] | Element,
957 update_fun: _UpdateFun = copy_attr_consistent,
958 replace: bool = True,
959 and_source: bool = False,
960 ) -> None:
961 """
962 Updates all attributes from node or dictionary `dict_`.
964 Appends the basic attributes ('ids', 'names', 'classes',
965 'dupnames', but not 'source') and then, for all other attributes in
966 dict_, updates the same attribute in self. When attributes with the
967 same identifier appear in both self and dict_, the two values are
968 merged based on the value of update_fun. Generally, when replace is
969 True, the values in self are replaced or merged with the values in
970 dict_; otherwise, the values in self may be preserved or merged. When
971 and_source is True, the 'source' attribute is included in the copy.
973 NOTE: When replace is False, and self contains a 'source' attribute,
974 'source' is not replaced even when dict_ has a 'source'
975 attribute, though it may still be merged into a list depending
976 on the value of update_fun.
977 NOTE: It is easier to call the update-specific methods then to pass
978 the update_fun method to this function.
979 """
980 if isinstance(dict_, Node):
981 dict_ = dict_.attributes
983 # Include the source attribute when copying?
984 if and_source:
985 filter_fun = self.is_not_list_attribute
986 else:
987 filter_fun = self.is_not_known_attribute
989 # Copy the basic attributes
990 self.update_basic_atts(dict_)
992 # Grab other attributes in dict_ not in self except the
993 # (All basic attributes should be copied already)
994 for att in filter(filter_fun, dict_):
995 update_fun(self, att, dict_[att], replace)
997 def update_all_atts_consistantly(self,
998 dict_: Mapping[str, Any] | Element,
999 replace: bool = True,
1000 and_source: bool = False,
1001 ) -> None:
1002 """
1003 Updates all attributes from node or dictionary `dict_`.
1005 Appends the basic attributes ('ids', 'names', 'classes',
1006 'dupnames', but not 'source') and then, for all other attributes in
1007 dict_, updates the same attribute in self. When attributes with the
1008 same identifier appear in both self and dict_ and replace is True, the
1009 values in self are replaced with the values in dict_; otherwise, the
1010 values in self are preserved. When and_source is True, the 'source'
1011 attribute is included in the copy.
1013 NOTE: When replace is False, and self contains a 'source' attribute,
1014 'source' is not replaced even when dict_ has a 'source'
1015 attribute, though it may still be merged into a list depending
1016 on the value of update_fun.
1017 """
1018 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
1019 and_source)
1021 def update_all_atts_concatenating(self,
1022 dict_: Mapping[str, Any] | Element,
1023 replace: bool = True,
1024 and_source: bool = False,
1025 ) -> None:
1026 """
1027 Updates all attributes from node or dictionary `dict_`.
1029 Appends the basic attributes ('ids', 'names', 'classes',
1030 'dupnames', but not 'source') and then, for all other attributes in
1031 dict_, updates the same attribute in self. When attributes with the
1032 same identifier appear in both self and dict_ whose values aren't each
1033 lists and replace is True, the values in self are replaced with the
1034 values in dict_; if the values from self and dict_ for the given
1035 identifier are both of list type, then the two lists are concatenated
1036 and the result stored in self; otherwise, the values in self are
1037 preserved. When and_source is True, the 'source' attribute is
1038 included in the copy.
1040 NOTE: When replace is False, and self contains a 'source' attribute,
1041 'source' is not replaced even when dict_ has a 'source'
1042 attribute, though it may still be merged into a list depending
1043 on the value of update_fun.
1044 """
1045 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
1046 and_source)
1048 def update_all_atts_coercion(self,
1049 dict_: Mapping[str, Any] | Element,
1050 replace: bool = True,
1051 and_source: bool = False,
1052 ) -> None:
1053 """
1054 Updates all attributes from node or dictionary `dict_`.
1056 Appends the basic attributes ('ids', 'names', 'classes',
1057 'dupnames', but not 'source') and then, for all other attributes in
1058 dict_, updates the same attribute in self. When attributes with the
1059 same identifier appear in both self and dict_ whose values are both
1060 not lists and replace is True, the values in self are replaced with
1061 the values in dict_; if either of the values from self and dict_ for
1062 the given identifier are of list type, then first any non-lists are
1063 converted to 1-element lists and then the two lists are concatenated
1064 and the result stored in self; otherwise, the values in self are
1065 preserved. When and_source is True, the 'source' attribute is
1066 included in the copy.
1068 NOTE: When replace is False, and self contains a 'source' attribute,
1069 'source' is not replaced even when dict_ has a 'source'
1070 attribute, though it may still be merged into a list depending
1071 on the value of update_fun.
1072 """
1073 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
1074 and_source)
1076 def update_all_atts_convert(self,
1077 dict_: Mapping[str, Any] | Element,
1078 and_source: bool = False,
1079 ) -> None:
1080 """
1081 Updates all attributes from node or dictionary `dict_`.
1083 Appends the basic attributes ('ids', 'names', 'classes',
1084 'dupnames', but not 'source') and then, for all other attributes in
1085 dict_, updates the same attribute in self. When attributes with the
1086 same identifier appear in both self and dict_ then first any non-lists
1087 are converted to 1-element lists and then the two lists are
1088 concatenated and the result stored in self; otherwise, the values in
1089 self are preserved. When and_source is True, the 'source' attribute
1090 is included in the copy.
1092 NOTE: When replace is False, and self contains a 'source' attribute,
1093 'source' is not replaced even when dict_ has a 'source'
1094 attribute, though it may still be merged into a list depending
1095 on the value of update_fun.
1096 """
1097 self.update_all_atts(dict_, Element.copy_attr_convert,
1098 and_source=and_source)
1100 def clear(self) -> None:
1101 self.children = []
1103 def replace(self, old, new) -> None:
1104 """Replace one child `Node` with another child or children."""
1105 index = self.index(old)
1106 if isinstance(new, Node):
1107 self.setup_child(new)
1108 self[index] = new
1109 elif new is not None:
1110 self[index:index+1] = new
1112 def replace_self(self, new) -> None:
1113 """
1114 Replace `self` node with `new`, where `new` is a node or a
1115 list of nodes.
1117 Provisional: the handling of node attributes will be revised.
1118 """
1119 update = new
1120 if not isinstance(new, Node):
1121 # `new` is a list; update first child.
1122 try:
1123 update = new[0]
1124 except IndexError:
1125 update = None
1126 if isinstance(update, Element):
1127 update.update_basic_atts(self)
1128 else:
1129 # `update` is a Text node or `new` is an empty list.
1130 # Assert that we aren't losing any attributes.
1131 for att in self.basic_attributes:
1132 assert not self[att], \
1133 'Losing "%s" attribute: %s' % (att, self[att])
1134 self.parent.replace(self, new)
1136 def first_child_matching_class(self,
1137 childclass: type[Element] | type[Text]
1138 | tuple[type[Element] | type[Text], ...],
1139 start: int = 0,
1140 end: int = sys.maxsize,
1141 ) -> int | None:
1142 """
1143 Return the index of the first child whose class exactly matches.
1145 Parameters:
1147 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
1148 classes. If a tuple, any of the classes may match.
1149 - `start`: Initial index to check.
1150 - `end`: Initial index to *not* check.
1151 """
1152 if not isinstance(childclass, tuple):
1153 childclass = (childclass,)
1154 for index in range(start, min(len(self), end)):
1155 for c in childclass:
1156 if isinstance(self[index], c):
1157 return index
1158 return None
1160 def first_child_not_matching_class(
1161 self,
1162 childclass: type[Element] | type[Text]
1163 | tuple[type[Element] | type[Text], ...],
1164 start: int = 0,
1165 end: int = sys.maxsize,
1166 ) -> int | None:
1167 """
1168 Return the index of the first child whose class does *not* match.
1170 Parameters:
1172 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
1173 classes. If a tuple, none of the classes may match.
1174 - `start`: Initial index to check.
1175 - `end`: Initial index to *not* check.
1176 """
1177 if not isinstance(childclass, tuple):
1178 childclass = (childclass,)
1179 for index in range(start, min(len(self), end)):
1180 for c in childclass:
1181 if isinstance(self.children[index], c):
1182 break
1183 else:
1184 return index
1185 return None
1187 def pformat(self, indent: str = ' ', level: int = 0) -> str:
1188 tagline = '%s%s\n' % (indent*level, self.starttag())
1189 childreps = (c.pformat(indent, level+1) for c in self.children)
1190 return ''.join((tagline, *childreps))
1192 def copy(self) -> Self:
1193 obj = self.__class__(rawsource=self.rawsource, **self.attributes)
1194 obj._document = self._document
1195 obj.source = self.source
1196 obj.line = self.line
1197 return obj
1199 def deepcopy(self) -> Self:
1200 copy = self.copy()
1201 copy.extend([child.deepcopy() for child in self.children])
1202 return copy
1204 def note_referenced_by(self,
1205 name: str | None = None,
1206 id: str | None = None,
1207 ) -> None:
1208 """Note that this Element has been referenced by its name
1209 `name` or id `id`."""
1210 self.referenced = True
1211 # Element.expect_referenced_by_* dictionaries map names or ids
1212 # to nodes whose ``referenced`` attribute is set to true as
1213 # soon as this node is referenced by the given name or id.
1214 # Needed for target propagation.
1215 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
1216 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
1217 if by_name:
1218 assert name is not None
1219 by_name.referenced = True
1220 if by_id:
1221 assert id is not None
1222 by_id.referenced = True
1224 @classmethod
1225 def is_not_list_attribute(cls, attr: str) -> bool:
1226 """
1227 Returns True if and only if the given attribute is NOT one of the
1228 basic list attributes defined for all Elements.
1229 """
1230 return attr not in cls.list_attributes
1232 @classmethod
1233 def is_not_known_attribute(cls, attr: str) -> bool:
1234 """
1235 Return True if `attr` is NOT defined for all Element instances.
1237 Provisional. May be removed in Docutils 2.0.
1238 """
1239 return attr not in cls.common_attributes
1241 def validate_attributes(self) -> None:
1242 """Normalize and validate element attributes.
1244 Convert string values to expected datatype.
1245 Normalize values.
1247 Raise `ValidationError` for invalid attributes or attribute values.
1249 Provisional.
1250 """
1251 messages = []
1252 for key, value in self.attributes.items():
1253 if key.startswith('internal:'):
1254 continue # see docs/user/config.html#expose-internals
1255 if key not in self.valid_attributes:
1256 va = '", "'.join(self.valid_attributes)
1257 messages.append(f'Attribute "{key}" not one of "{va}".')
1258 continue
1259 try:
1260 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
1261 except (ValueError, TypeError, KeyError) as e:
1262 messages.append(
1263 f'Attribute "{key}" has invalid value "{value}".\n {e}')
1264 if messages:
1265 raise ValidationError(f'Element {self.starttag()} invalid:\n '
1266 + '\n '.join(messages),
1267 problematic_element=self)
1269 def validate_content(self,
1270 model: _ContentModelTuple | None = None,
1271 elements: Sequence | None = None,
1272 ) -> list:
1273 """Test compliance of `elements` with `model`.
1275 :model: content model description, default `self.content_model`,
1276 :elements: list of doctree elements, default `self.children`.
1278 Return list of children that do not fit in the model or raise
1279 `ValidationError` if the content does not comply with the `model`.
1281 Provisional.
1282 """
1283 if model is None:
1284 model = self.content_model
1285 if elements is None:
1286 elements = self.children
1287 ichildren = iter(elements)
1288 child = next(ichildren, None)
1289 for category, quantifier in model:
1290 if not isinstance(child, category):
1291 if quantifier in ('.', '+'):
1292 raise ValidationError(self._report_child(child, category),
1293 problematic_element=child)
1294 else: # quantifier in ('?', '*') -> optional child
1295 continue # try same child with next part of content model
1296 else:
1297 # Check additional placement constraints (if applicable):
1298 child.validate_position()
1299 # advance:
1300 if quantifier in ('.', '?'): # go to next element
1301 child = next(ichildren, None)
1302 else: # if quantifier in ('*', '+'): # pass all matching elements
1303 for child in ichildren:
1304 if not isinstance(child, category):
1305 break
1306 try:
1307 child.validate_position()
1308 except AttributeError:
1309 pass
1310 else:
1311 child = None
1312 return [] if child is None else [child, *ichildren]
1314 def _report_child(self,
1315 child,
1316 category: Element | Iterable[Element],
1317 ) -> str:
1318 # Return a str reporting a missing child or child of wrong category.
1319 try:
1320 _type = category.__name__
1321 except AttributeError:
1322 _type = '> or <'.join(c.__name__ for c in category)
1323 msg = f'Element {self.starttag()} invalid:\n'
1324 if child is None:
1325 return f'{msg} Missing child of type <{_type}>.'
1326 if isinstance(child, Text):
1327 return (f'{msg} Expecting child of type <{_type}>, '
1328 f'not text data "{child.astext()}".')
1329 return (f'{msg} Expecting child of type <{_type}>, '
1330 f'not {child.starttag()}.')
1332 def validate(self, recursive: bool = True) -> None:
1333 """Validate Docutils Document Tree element ("doctree").
1335 Raise ValidationError if there are violations.
1336 If `recursive` is True, validate also the element's descendants.
1338 See `The Docutils Document Tree`__ for details of the
1339 Docutils Document Model.
1341 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1343 Provisional (work in progress).
1344 """
1345 self.validate_attributes()
1347 leftover_childs = self.validate_content()
1348 for child in leftover_childs:
1349 if isinstance(child, Text):
1350 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1351 f' Spurious text: "{child.astext()}".',
1352 problematic_element=self)
1353 else:
1354 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1355 f' Child element {child.starttag()} '
1356 'not allowed at this position.',
1357 problematic_element=child)
1359 if recursive:
1360 for child in self:
1361 child.validate(recursive=recursive)
1364# ====================
1365# Element Categories
1366# ====================
1367#
1368# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.
1370class Root:
1371 """Element at the root of a document tree."""
1374class Structural:
1375 """`Structural elements`__.
1377 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1378 #structural-elements
1379 """
1382class SubStructural:
1383 """`Structural subelements`__ are children of `Structural` elements.
1385 Most Structural elements accept only specific `SubStructural` elements.
1387 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1388 #structural-subelements
1389 """
1392class Bibliographic:
1393 """`Bibliographic Elements`__ (displayed document meta-data).
1395 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1396 #bibliographic-elements
1397 """
1400class Body:
1401 """`Body elements`__.
1403 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements
1404 """
1407class Admonition(Body):
1408 """Admonitions (distinctive and self-contained notices)."""
1409 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1412class Sequential(Body):
1413 """List-like body elements."""
1416class General(Body):
1417 """Miscellaneous body elements."""
1420class Special(Body):
1421 """Special internal body elements."""
1424class Part:
1425 """`Body Subelements`__ always occur within specific parent elements.
1427 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements
1428 """
1431class Decorative:
1432 """Decorative elements (`header` and `footer`).
1434 Children of `decoration`.
1435 """
1436 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1439class Inline:
1440 """Inline elements contain text data and possibly other inline elements.
1441 """
1444# Orthogonal categories and Mixins
1445# ================================
1447class PreBibliographic:
1448 """Elements which may occur before Bibliographic Elements."""
1451class Invisible(Special, PreBibliographic):
1452 """Internal elements that don't appear in output."""
1455class Labeled:
1456 """Contains a `label` as its first element."""
1459class Resolvable:
1460 resolved: bool = False
1463class BackLinkable:
1464 """Mixin for Elements that accept a "backrefs" attribute."""
1466 list_attributes: Final = Element.list_attributes + ('backrefs',)
1467 valid_attributes: Final = Element.valid_attributes + ('backrefs',)
1469 def add_backref(self: Element, refid: str) -> None:
1470 self['backrefs'].append(refid)
1473class Referential(Resolvable):
1474 """Elements holding a cross-reference (outgoing hyperlink)."""
1477class Targetable(Resolvable):
1478 """Cross-reference targets (incoming hyperlink)."""
1479 referenced: int = 0
1481 indirect_reference_name: str | None = None
1482 """Holds the whitespace_normalized_name (contains mixed case) of a target.
1484 This was required for MoinMoin <= 1.9 compatibility.
1486 Deprecated, will be removed in Docutils 1.0.
1487 """
1490class Titular:
1491 """Title, sub-title, or informal heading (rubric)."""
1494class TextElement(Element):
1495 """
1496 An element which directly contains text.
1498 Its children are all `Text` or `Inline` subclass nodes. You can
1499 check whether an element's context is inline simply by checking whether
1500 its immediate parent is a `TextElement` instance (including subclasses).
1501 This is handy for nodes like `image` that can appear both inline and as
1502 standalone body elements.
1504 If passing children to `__init__()`, make sure to set `text` to
1505 ``''`` or some other suitable value.
1506 """
1507 content_model: Final = (((Text, Inline), '*'),)
1508 # (#PCDATA | %inline.elements;)*
1510 child_text_separator: Final = ''
1511 """Separator for child nodes, used by `astext()` method."""
1513 def __init__(self,
1514 rawsource: str = '',
1515 text: str = '',
1516 *children,
1517 **attributes: Any,
1518 ) -> None:
1519 if text:
1520 textnode = Text(text)
1521 Element.__init__(self, rawsource, textnode, *children,
1522 **attributes)
1523 else:
1524 Element.__init__(self, rawsource, *children, **attributes)
1527class FixedTextElement(TextElement):
1528 """An element which directly contains preformatted text."""
1530 valid_attributes: Final = Element.valid_attributes + ('xml:space',)
1532 def __init__(self,
1533 rawsource: str = '',
1534 text: str = '',
1535 *children,
1536 **attributes: Any,
1537 ) -> None:
1538 super().__init__(rawsource, text, *children, **attributes)
1539 self.attributes['xml:space'] = 'preserve'
1542class PureTextElement(TextElement):
1543 """An element which only contains text, no children."""
1544 content_model: Final = ((Text, '?'),) # (#PCDATA)
1547# =================================
1548# Concrete Document Tree Elements
1549# =================================
1550#
1551# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference
1553# Decorative Elements
1554# ===================
1556class header(Decorative, Element): pass
1557class footer(Decorative, Element): pass
1560# Structural Subelements
1561# ======================
1563class title(Titular, PreBibliographic, SubStructural, TextElement):
1564 """Title of `document`, `section`, `topic` and generic `admonition`.
1565 """
1566 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')
1569class subtitle(Titular, PreBibliographic, SubStructural, TextElement):
1570 """Sub-title of `document`, `section` and `sidebar`."""
1572 def validate_position(self) -> None:
1573 """Check position of subtitle: must follow a title."""
1574 if self.parent and self.parent.index(self) == 0:
1575 raise ValidationError(f'Element {self.parent.starttag()} invalid:'
1576 '\n <subtitle> only allowed after <title>.',
1577 problematic_element=self)
1580class meta(PreBibliographic, SubStructural, Element):
1581 """Container for "invisible" bibliographic data, or meta-data."""
1582 valid_attributes: Final = Element.valid_attributes + (
1583 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')
1586class docinfo(SubStructural, Element):
1587 """Container for displayed document meta-data."""
1588 content_model: Final = ((Bibliographic, '+'),)
1589 # (%bibliographic.elements;)+
1592class decoration(PreBibliographic, SubStructural, Element):
1593 """Container for `header` and `footer`."""
1594 content_model: Final = ((header, '?'), # Empty element doesn't make sense,
1595 (footer, '?'), # but is simpler to define.
1596 )
1597 # (header?, footer?)
1599 def get_header(self) -> header:
1600 if not len(self.children) or not isinstance(self.children[0], header):
1601 self.insert(0, header())
1602 return self.children[0]
1604 def get_footer(self) -> footer:
1605 if not len(self.children) or not isinstance(self.children[-1], footer):
1606 self.append(footer())
1607 return self.children[-1]
1610class transition(SubStructural, Element):
1611 """Transitions__ are breaks between untitled text parts.
1613 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition
1614 """
1616 def validate_position(self) -> None:
1617 """Check additional constraints on `transition` placement.
1619 A transition may not begin or end a section or document,
1620 nor may two transitions be immediately adjacent.
1621 """
1622 messages = [f'Element {self.parent.starttag()} invalid:']
1623 predecessor = self.previous_sibling()
1624 if (predecessor is None # index == 0
1625 or isinstance(predecessor, (title, subtitle, meta, decoration))
1626 # A transition following these elements still counts as
1627 # "at the beginning of a document or section".
1628 ):
1629 messages.append(
1630 '<transition> may not begin a section or document.')
1631 if self.parent.index(self) == len(self.parent) - 1:
1632 messages.append('<transition> may not end a section or document.')
1633 if isinstance(predecessor, transition):
1634 messages.append(
1635 '<transition> may not directly follow another transition.')
1636 if len(messages) > 1:
1637 raise ValidationError('\n '.join(messages),
1638 problematic_element=self)
1641# Structural Elements
1642# ===================
1644class topic(Structural, Element):
1645 """
1646 Topics__ are non-recursive, mini-sections.
1648 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic
1649 """
1650 content_model: Final = ((title, '?'), (Body, '+'))
1651 # (title?, (%body.elements;)+)
1654class sidebar(Structural, Element):
1655 """
1656 Sidebars__ are like parallel documents providing related material.
1658 A sidebar is typically offset by a border and "floats" to the side
1659 of the page
1661 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar
1662 """
1663 content_model: Final = ((title, '?'),
1664 (subtitle, '?'),
1665 ((topic, Body), '+'),
1666 )
1667 # ((title, subtitle?)?, (%body.elements; | topic)+)
1668 # "subtitle only after title" is ensured in `subtitle.validate_position()`.
1671class section(Structural, Element):
1672 """Document section__. The main unit of hierarchy.
1674 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section
1675 """
1676 # recursive content model, see below
1679section.content_model = ((title, '.'),
1680 (subtitle, '?'),
1681 ((Body, topic, sidebar, transition), '*'),
1682 ((section, transition), '*'),
1683 )
1684# (title, subtitle?, %structure.model;)
1685# Correct transition placement is ensured in `transition.validate_position()`.
1688# Root Element
1689# ============
1691class document(Root, Element):
1692 """
1693 The document root element.
1695 Do not instantiate this class directly; use
1696 `docutils.utils.new_document()` instead.
1697 """
1698 valid_attributes: Final = Element.valid_attributes + ('title',)
1699 content_model: Final = ((title, '?'),
1700 (subtitle, '?'),
1701 (meta, '*'),
1702 (decoration, '?'),
1703 (docinfo, '?'),
1704 (transition, '?'),
1705 ((Body, topic, sidebar, transition), '*'),
1706 ((section, transition), '*'),
1707 )
1708 # ( (title, subtitle?)?,
1709 # meta*,
1710 # decoration?,
1711 # (docinfo, transition?)?,
1712 # %structure.model; )
1713 # Additional restrictions for `subtitle` and `transition` are tested
1714 # with the respective `validate_position()` methods.
1716 def __init__(self,
1717 settings: Values,
1718 reporter: Reporter,
1719 *args,
1720 **kwargs: Any,
1721 ) -> None:
1722 Element.__init__(self, *args, **kwargs)
1724 self.current_source: StrPath | None = None
1725 """Path to or description of the input source being processed."""
1727 self.current_line: int | None = None
1728 """Line number (1-based) of `current_source`."""
1730 self.settings: Values = settings
1731 """Runtime settings data record."""
1733 self.reporter: Reporter = reporter
1734 """System message generator."""
1736 self.indirect_targets: list[target] = []
1737 """List of indirect target nodes."""
1739 self.substitution_defs: dict[str, substitution_definition] = {}
1740 """Mapping of substitution names to substitution_definition nodes."""
1742 self.substitution_names: dict[str, str] = {}
1743 """Mapping of case-normalized to case-sensitive substitution names."""
1745 self.refnames: dict[str, list[Element]] = {}
1746 """Mapping of names to lists of referencing nodes."""
1748 self.refids: dict[str, list[Element]] = {}
1749 """(Incomplete) Mapping of ids to lists of referencing nodes."""
1751 self.nameids: dict[str, str] = {}
1752 """Mapping of names to unique id's."""
1754 self.nametypes: dict[str, bool] = {}
1755 """Mapping of names to hyperlink type. True: explicit, False: implicit.
1756 """
1758 self.ids: dict[str, Element] = {}
1759 """Mapping of ids to nodes."""
1761 self.footnote_refs: dict[str, list[footnote_reference]] = {}
1762 """Mapping of footnote labels to lists of footnote_reference nodes."""
1764 self.citation_refs: dict[str, list[citation_reference]] = {}
1765 """Mapping of citation labels to lists of citation_reference nodes."""
1767 self.autofootnotes: list[footnote] = []
1768 """List of auto-numbered footnote nodes."""
1770 self.autofootnote_refs: list[footnote_reference] = []
1771 """List of auto-numbered footnote_reference nodes."""
1773 self.symbol_footnotes: list[footnote] = []
1774 """List of symbol footnote nodes."""
1776 self.symbol_footnote_refs: list[footnote_reference] = []
1777 """List of symbol footnote_reference nodes."""
1779 self.footnotes: list[footnote] = []
1780 """List of manually-numbered footnote nodes."""
1782 self.citations: list[citation] = []
1783 """List of citation nodes."""
1785 self.autofootnote_start: int = 1
1786 """Initial auto-numbered footnote number."""
1788 self.symbol_footnote_start: int = 0
1789 """Initial symbol footnote symbol index."""
1791 self.id_counter: Counter[int] = Counter()
1792 """Numbers added to otherwise identical IDs."""
1794 self.parse_messages: list[system_message] = []
1795 """System messages generated while parsing."""
1797 self.transform_messages: list[system_message] = []
1798 """System messages generated while applying transforms."""
1800 import docutils.transforms
1801 self.transformer: Transformer = docutils.transforms.Transformer(self)
1802 """Storage for transforms to be applied to this document."""
1804 self.include_log: list[tuple[StrPath, tuple]] = []
1805 """The current source's parents (to detect inclusion loops)."""
1807 self.decoration: decoration | None = None
1808 """Document's `decoration` node."""
1810 self._document: document = self
1812 def __getstate__(self) -> dict[str, Any]:
1813 """
1814 Return dict with unpicklable references removed.
1815 """
1816 state = self.__dict__.copy()
1817 state['reporter'] = None
1818 state['transformer'] = None
1819 return state
1821 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:
1822 """Return a DOM representation of this document."""
1823 if dom is None:
1824 import xml.dom.minidom as dom
1825 domroot = dom.Document()
1826 domroot.appendChild(self._dom_node(domroot))
1827 return domroot
1829 def set_id(self,
1830 node: Element,
1831 msgnode: Element | None = None,
1832 suggested_prefix: str = '',
1833 ) -> str:
1834 if node['ids']:
1835 # register and check for duplicates
1836 for id in node['ids']:
1837 self.ids.setdefault(id, node)
1838 if self.ids[id] is not node:
1839 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '
1840 f'{self.ids[id].starttag()} '
1841 f'and {node.starttag()}',
1842 base_node=node)
1843 if msgnode is not None:
1844 msgnode += msg
1845 return id
1846 # generate and set id
1847 id_prefix = self.settings.id_prefix
1848 auto_id_prefix = self.settings.auto_id_prefix
1849 base_id = ''
1850 id = ''
1851 for name in node['names']:
1852 if id_prefix: # allow names starting with numbers
1853 base_id = make_id('x'+name)[1:]
1854 else:
1855 base_id = make_id(name)
1856 # TODO: normalize id-prefix? (would make code simpler)
1857 id = id_prefix + base_id
1858 if base_id and id not in self.ids:
1859 break
1860 else:
1861 if base_id and auto_id_prefix.endswith('%'):
1862 # disambiguate name-derived ID
1863 # TODO: remove second condition after announcing change
1864 prefix = id + '-'
1865 else:
1866 prefix = id_prefix + auto_id_prefix
1867 if prefix.endswith('%'):
1868 prefix = f"""{prefix[:-1]}{suggested_prefix
1869 or make_id(node.tagname)}-"""
1870 while True:
1871 self.id_counter[prefix] += 1
1872 id = f'{prefix}{self.id_counter[prefix]}'
1873 if id not in self.ids:
1874 break
1875 node['ids'].append(id)
1876 self.ids[id] = node
1877 return id
1879 def set_name_id_map(self,
1880 node: Element,
1881 id: str,
1882 msgnode: Element | None = None,
1883 explicit: bool = False,
1884 ) -> None:
1885 """
1886 Update the name/id mappings.
1888 `self.nameids` maps names to IDs. The value ``None`` indicates
1889 that the name is a "dupname" (i.e. there are already at least
1890 two targets with the same name and type).
1892 `self.nametypes` maps names to booleans representing
1893 hyperlink target type (True==explicit, False==implicit).
1895 The following state transition table shows how `self.nameids` items
1896 ("id") and `self.nametypes` items ("type") change with new input
1897 (a call to this method), and what actions are performed:
1899 ======== ==== ======== ==== ======== ======== ======= ======
1900 Input Old State New State Action Notes
1901 -------- -------------- -------------- ---------------- ------
1902 type id type id type dupname report
1903 ======== ==== ======== ==== ======== ======== ======= ======
1904 explicit new explicit
1905 implicit new implicit
1906 explicit old explicit None explicit new,old WARNING [#ex]_
1907 implicit old explicit old explicit new INFO [#ex]_
1908 explicit old implicit new explicit old INFO [#ex]_
1909 implicit old implicit None implicit new,old INFO [#ex]_
1910 explicit None explicit None explicit new WARNING
1911 implicit None explicit None explicit new INFO
1912 explicit None implicit new explicit
1913 implicit None implicit None implicit new INFO
1914 ======== ==== ======== ==== ======== ======== ======= ======
1916 .. [#] Do not clear the name-to-id map or invalidate the old target if
1917 both old and new targets refer to identical URIs or reference names.
1918 The new target is invalidated regardless.
1920 Provisional. There will be changes to prefer explicit reference names
1921 as base for an element's ID.
1922 """
1923 for name in tuple(node['names']):
1924 if name in self.nameids:
1925 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1926 # attention: modifies node['names']
1927 else:
1928 self.nameids[name] = id
1929 self.nametypes[name] = explicit
1931 def set_duplicate_name_id(self,
1932 node: Element,
1933 id: str,
1934 name: str,
1935 msgnode: Element,
1936 explicit: bool,
1937 ) -> None:
1938 old_id = self.nameids[name] # None if name is only dupname
1939 old_explicit = self.nametypes[name]
1940 old_node = self.ids.get(old_id)
1941 level = 0 # system message level: 1-info, 2-warning
1943 self.nametypes[name] = old_explicit or explicit
1945 if old_id is not None and (
1946 'refname' in node and node['refname'] == old_node.get('refname')
1947 or 'refuri' in node and node['refuri'] == old_node.get('refuri')
1948 ):
1949 # indirect targets with same reference -> keep old target
1950 level = 1
1951 ref = node.get('refuri') or node.get('refname')
1952 s = f'Duplicate name "{name}" for external target "{ref}".'
1953 dupname(node, name)
1954 elif explicit:
1955 if old_explicit:
1956 level = 2
1957 s = f'Duplicate explicit target name: "{name}".'
1958 dupname(node, name)
1959 if old_id is not None:
1960 dupname(old_node, name)
1961 self.nameids[name] = None
1962 else: # new explicit, old implicit -> override
1963 self.nameids[name] = id
1964 if old_id is not None:
1965 level = 1
1966 s = f'Target name overrides implicit target name "{name}".'
1967 dupname(old_node, name)
1968 else: # new name is implicit
1969 level = 1
1970 s = f'Duplicate implicit target name: "{name}".'
1971 dupname(node, name)
1972 if old_id is not None and not old_explicit:
1973 dupname(old_node, name)
1974 self.nameids[name] = None
1976 if level:
1977 backrefs = [id]
1978 # don't add backref id for empty targets (not shown in output)
1979 if isinstance(node, target) and 'refuri' in node:
1980 backrefs = []
1981 msg = self.reporter.system_message(level, s,
1982 backrefs=backrefs,
1983 base_node=node)
1984 # try appending near to the problem:
1985 if msgnode is not None:
1986 msgnode += msg
1987 try:
1988 msgnode.validate(recursive=False)
1989 except ValidationError:
1990 # detach -> will be handled by `Messages` transform
1991 msgnode.pop()
1992 msg.parent = None
1994 def has_name(self, name: str) -> bool:
1995 return name in self.nameids
1997 # "note" here is an imperative verb: "take note of".
1998 def note_implicit_target(
1999 self, target: Element, msgnode: Element | None = None) -> None:
2000 # TODO: Postpone ID creation and register reference name instead of ID?
2001 id = self.set_id(target, msgnode)
2002 self.set_name_id_map(target, id, msgnode, explicit=False)
2004 def note_explicit_target(
2005 self, target: Element, msgnode: Element | None = None) -> None:
2006 # TODO: if the id matching the name is applied to an implicid target,
2007 # transfer it to this target and put a "disambiguated" id on the other.
2008 id = self.set_id(target, msgnode)
2009 self.set_name_id_map(target, id, msgnode, explicit=True)
2011 def note_refname(self, node: Element) -> None:
2012 self.refnames.setdefault(node['refname'], []).append(node)
2014 def note_refid(self, node: Element) -> None:
2015 self.refids.setdefault(node['refid'], []).append(node)
2017 def note_indirect_target(self, target: target) -> None:
2018 self.indirect_targets.append(target)
2019 if target['names']:
2020 self.note_refname(target)
2022 def note_anonymous_target(self, target: target) -> None:
2023 self.set_id(target)
2025 def note_autofootnote(self, footnote: footnote) -> None:
2026 self.set_id(footnote)
2027 self.autofootnotes.append(footnote)
2029 def note_autofootnote_ref(self, ref: footnote_reference) -> None:
2030 self.set_id(ref)
2031 self.autofootnote_refs.append(ref)
2033 def note_symbol_footnote(self, footnote: footnote) -> None:
2034 self.set_id(footnote)
2035 self.symbol_footnotes.append(footnote)
2037 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:
2038 self.set_id(ref)
2039 self.symbol_footnote_refs.append(ref)
2041 def note_footnote(self, footnote: footnote) -> None:
2042 self.set_id(footnote)
2043 self.footnotes.append(footnote)
2045 def note_footnote_ref(self, ref: footnote_reference) -> None:
2046 self.set_id(ref)
2047 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
2048 self.note_refname(ref)
2050 def note_citation(self, citation: citation) -> None:
2051 self.citations.append(citation)
2053 def note_citation_ref(self, ref: citation_reference) -> None:
2054 self.set_id(ref)
2055 self.citation_refs.setdefault(ref['refname'], []).append(ref)
2056 self.note_refname(ref)
2058 def note_substitution_def(self,
2059 subdef: substitution_definition,
2060 def_name: str,
2061 msgnode: Element | None = None,
2062 ) -> None:
2063 name = whitespace_normalize_name(def_name)
2064 if name in self.substitution_defs:
2065 msg = self.reporter.error(
2066 'Duplicate substitution definition name: "%s".' % name,
2067 base_node=subdef)
2068 if msgnode is not None:
2069 msgnode += msg
2070 oldnode = self.substitution_defs[name]
2071 dupname(oldnode, name)
2072 # keep only the last definition:
2073 self.substitution_defs[name] = subdef
2074 # case-insensitive mapping:
2075 self.substitution_names[fully_normalize_name(name)] = name
2077 def note_substitution_ref(self,
2078 subref: substitution_reference,
2079 refname: str,
2080 ) -> None:
2081 subref['refname'] = whitespace_normalize_name(refname)
2083 def note_pending(
2084 self, pending: pending, priority: int | None = None) -> None:
2085 self.transformer.add_pending(pending, priority)
2087 def note_parse_message(self, message: system_message) -> None:
2088 self.parse_messages.append(message)
2090 def note_transform_message(self, message: system_message) -> None:
2091 self.transform_messages.append(message)
2093 def note_source(self,
2094 source: StrPath | None,
2095 offset: int | None,
2096 ) -> None:
2097 self.current_source = source and os.fspath(source)
2098 if offset is None:
2099 self.current_line = offset
2100 else:
2101 self.current_line = offset + 1
2103 def copy(self) -> Self:
2104 obj = self.__class__(self.settings, self.reporter,
2105 **self.attributes)
2106 obj.source = self.source
2107 obj.line = self.line
2108 return obj
2110 def get_decoration(self) -> decoration:
2111 if not self.decoration:
2112 self.decoration: decoration = decoration()
2113 index = self.first_child_not_matching_class((Titular, meta))
2114 if index is None:
2115 self.append(self.decoration)
2116 else:
2117 self.insert(index, self.decoration)
2118 return self.decoration
2121# Bibliographic Elements
2122# ======================
2124class author(Bibliographic, TextElement): pass
2125class organization(Bibliographic, TextElement): pass
2126class address(Bibliographic, FixedTextElement): pass
2127class contact(Bibliographic, TextElement): pass
2128class version(Bibliographic, TextElement): pass
2129class revision(Bibliographic, TextElement): pass
2130class status(Bibliographic, TextElement): pass
2131class date(Bibliographic, TextElement): pass
2132class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)
2135class authors(Bibliographic, Element):
2136 """Container for author information for documents with multiple authors.
2137 """
2138 content_model: Final = ((author, '+'),
2139 (organization, '?'),
2140 (address, '?'),
2141 (contact, '?'),
2142 )
2143 # (author, organization?, address?, contact?)+
2145 def validate_content(self,
2146 model: _ContentModelTuple | None = None,
2147 elements: Sequence | None = None,
2148 ) -> list:
2149 """Repeatedly test for children matching the content model.
2151 Provisional.
2152 """
2153 relics = super().validate_content()
2154 while relics:
2155 relics = super().validate_content(elements=relics)
2156 return relics
2159# Body Elements
2160# =============
2161#
2162# General
2163# -------
2164#
2165# Miscellaneous Body Elements and related Body Subelements (Part)
2167class paragraph(General, TextElement): pass
2168class rubric(Titular, General, TextElement): pass
2171class compound(General, Element):
2172 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2175class container(General, Element):
2176 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2179class attribution(Part, TextElement):
2180 """Visible reference to the source of a `block_quote`."""
2183class block_quote(General, Element):
2184 """An extended quotation, set off from the main text."""
2185 content_model: Final = ((Body, '+'), (attribution, '?'))
2186 # ((%body.elements;)+, attribution?)
2189class reference(General, Inline, Referential, TextElement):
2190 valid_attributes: Final = Element.valid_attributes + (
2191 'anonymous', 'name', 'refid', 'refname', 'refuri')
2194# Lists
2195# -----
2196#
2197# Lists (Sequential) and related Body Subelements (Part)
2199class list_item(Part, Element):
2200 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2203class bullet_list(Sequential, Element):
2204 valid_attributes: Final = Element.valid_attributes + ('bullet',)
2205 content_model: Final = ((list_item, '+'),) # (list_item+)
2208class enumerated_list(Sequential, Element):
2209 valid_attributes: Final = Element.valid_attributes + (
2210 'enumtype', 'prefix', 'suffix', 'start')
2211 content_model: Final = ((list_item, '+'),) # (list_item+)
2214class term(Part, TextElement): pass
2215class classifier(Part, TextElement): pass
2218class definition(Part, Element):
2219 """Definition of a `term` in a `definition_list`."""
2220 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2223class definition_list_item(Part, Element):
2224 content_model: Final = ((term, '.'),
2225 ((classifier, term), '*'),
2226 (definition, '.'),
2227 )
2228 # ((term, classifier*)+, definition)
2231class definition_list(Sequential, Element):
2232 """List of terms and their definitions.
2234 Can be used for glossaries or dictionaries, to describe or
2235 classify things, for dialogues, or to itemize subtopics.
2236 """
2237 content_model: Final = ((definition_list_item, '+'),)
2238 # (definition_list_item+)
2241class field_name(Part, TextElement): pass
2244class field_body(Part, Element):
2245 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2248class field(Part, Bibliographic, Element):
2249 content_model: Final = ((field_name, '.'), (field_body, '.'))
2250 # (field_name, field_body)
2253class field_list(Sequential, Element):
2254 """List of label & data pairs.
2256 Typically rendered as a two-column list.
2257 Also used for extension syntax or special processing.
2258 """
2259 content_model: Final = ((field, '+'),) # (field+)
2262class option_string(Part, PureTextElement):
2263 """A literal command-line option. Typically monospaced."""
2266class option_argument(Part, PureTextElement):
2267 """Placeholder text for option arguments."""
2268 valid_attributes: Final = Element.valid_attributes + ('delimiter',)
2270 def astext(self) -> str:
2271 return self.get('delimiter', ' ') + TextElement.astext(self)
2274class option(Part, Element):
2275 """Option element in an `option_list_item`.
2277 Groups an option string with zero or more option argument placeholders.
2278 """
2279 child_text_separator: Final = ''
2280 content_model: Final = ((option_string, '.'), (option_argument, '*'))
2281 # (option_string, option_argument*)
2284class option_group(Part, Element):
2285 """Groups together one or more `option` elements, all synonyms."""
2286 child_text_separator: Final = ', '
2287 content_model: Final = ((option, '+'),) # (option+)
2290class description(Part, Element):
2291 """Describtion of a command-line option."""
2292 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2295class option_list_item(Part, Element):
2296 """Container for a pair of `option_group` and `description` elements.
2297 """
2298 child_text_separator: Final = ' '
2299 content_model: Final = ((option_group, '.'), (description, '.'))
2300 # (option_group, description)
2303class option_list(Sequential, Element):
2304 """Two-column list of command-line options and descriptions."""
2305 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)
2308# Pre-formatted text blocks
2309# -------------------------
2311class literal_block(General, FixedTextElement): pass
2312class doctest_block(General, FixedTextElement): pass
2315class math_block(General, FixedTextElement, PureTextElement):
2316 """Mathematical notation (display formula)."""
2319class line(Part, TextElement):
2320 """Single line of text in a `line_block`."""
2321 indent: str | None = None
2324class line_block(General, Element):
2325 """Sequence of lines and nested line blocks.
2326 """
2327 # recursive content model: (line | line_block)+
2330line_block.content_model = (((line, line_block), '+'),)
2333# Admonitions
2334# -----------
2335# distinctive and self-contained notices
2337class attention(Admonition, Element): pass
2338class caution(Admonition, Element): pass
2339class danger(Admonition, Element): pass
2340class error(Admonition, Element): pass
2341class important(Admonition, Element): pass
2342class note(Admonition, Element): pass
2343class tip(Admonition, Element): pass
2344class hint(Admonition, Element): pass
2345class warning(Admonition, Element): pass
2348class admonition(Admonition, Element):
2349 content_model: Final = ((title, '.'), (Body, '+'))
2350 # (title, (%body.elements;)+)
2353# Footnote and citation
2354# ---------------------
2356class label(Part, PureTextElement):
2357 """Visible identifier for footnotes and citations."""
2360class footnote(General, BackLinkable, Element, Labeled, Targetable):
2361 """Labelled note providing additional context (footnote or endnote)."""
2362 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')
2363 content_model: Final = ((label, '?'), (Body, '+'))
2364 # (label?, (%body.elements;)+)
2365 # The label will become required in Docutils 1.0.
2368class citation(General, BackLinkable, Element, Labeled, Targetable):
2369 content_model: Final = ((label, '.'), (Body, '+'))
2370 # (label, (%body.elements;)+)
2373# Graphical elements
2374# ------------------
2376class image(General, Inline, Element):
2377 """Reference to an image resource.
2379 May be body element or inline element.
2380 """
2381 valid_attributes: Final = Element.valid_attributes + (
2382 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
2384 def astext(self) -> str:
2385 return self.get('alt', '')
2388class caption(Part, TextElement): pass
2391class legend(Part, Element):
2392 """A wrapper for text accompanying a `figure` that is not the caption."""
2393 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2396class figure(General, Element):
2397 """A formal figure, generally an illustration, with a title."""
2398 valid_attributes: Final = Element.valid_attributes + ('align', 'width')
2399 content_model: Final = (((image, reference), '.'),
2400 (caption, '?'),
2401 (legend, '?'),
2402 )
2403 # (image, ((caption, legend?) | legend))
2404 # TODO: According to the DTD, a caption or legend is required
2405 # but rST allows "bare" figures which are formatted differently from
2406 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]
2409# Tables
2410# ------
2412class entry(Part, Element):
2413 """An entry in a `row` (a table cell)."""
2414 valid_attributes: Final = Element.valid_attributes + (
2415 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',
2416 'morerows', 'namest', 'nameend', 'rowsep', 'valign')
2417 content_model: Final = ((Body, '*'),)
2418 # %tbl.entry.mdl -> (%body.elements;)*
2421class row(Part, Element):
2422 """Row of table cells."""
2423 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')
2424 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+
2427class colspec(Part, Element):
2428 """Specifications for a column in a `tgroup`."""
2429 valid_attributes: Final = Element.valid_attributes + (
2430 'align', 'char', 'charoff', 'colname', 'colnum',
2431 'colsep', 'colwidth', 'rowsep', 'stub')
2433 def propwidth(self) -> int|float:
2434 """Return numerical value of "colwidth__" attribute. Default 1.
2436 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.
2438 Provisional.
2440 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
2441 """
2442 # Move current implementation of validate_colwidth() here
2443 # in Docutils 1.0
2444 return validate_colwidth(self.get('colwidth', ''))
2447class thead(Part, Element):
2448 """Row(s) that form the head of a `tgroup`."""
2449 valid_attributes: Final = Element.valid_attributes + ('valign',)
2450 content_model: Final = ((row, '+'),) # (row+)
2453class tbody(Part, Element):
2454 """Body of a `tgroup`."""
2455 valid_attributes: Final = Element.valid_attributes + ('valign',)
2456 content_model: Final = ((row, '+'),) # (row+)
2459class tgroup(Part, Element):
2460 """A portion of a table. Most tables have just one `tgroup`."""
2461 valid_attributes: Final = Element.valid_attributes + (
2462 'align', 'cols', 'colsep', 'rowsep')
2463 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))
2464 # (colspec*, thead?, tbody)
2467class table(General, Element):
2468 """A data arrangement with rows and columns."""
2469 valid_attributes: Final = Element.valid_attributes + (
2470 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
2471 content_model: Final = ((title, '?'), (tgroup, '+'))
2472 # (title?, tgroup+)
2475# Special purpose elements
2476# ------------------------
2477# Body elements for internal use or special requests.
2479class comment(Invisible, FixedTextElement, PureTextElement):
2480 """Author notes, hidden from the output."""
2483class substitution_definition(Invisible, TextElement):
2484 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')
2487class target(Invisible, Inline, TextElement, Targetable):
2488 valid_attributes: Final = Element.valid_attributes + (
2489 'anonymous', 'refid', 'refname', 'refuri')
2492class system_message(Special, BackLinkable, PreBibliographic, Element):
2493 """
2494 System message element.
2496 Do not instantiate this class directly; use
2497 ``document.reporter.info/warning/error/severe()`` instead.
2498 """
2499 valid_attributes: Final = BackLinkable.valid_attributes + (
2500 'level', 'line', 'type')
2501 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2503 def __init__(self,
2504 message: str | None = None,
2505 *children,
2506 **attributes: Any,
2507 ) -> None:
2508 rawsource = attributes.pop('rawsource', '')
2509 if message:
2510 p = paragraph('', message)
2511 children = (p,) + children
2512 try:
2513 Element.__init__(self, rawsource, *children, **attributes)
2514 except: # NoQA: E722 (catchall)
2515 print('system_message: children=%r' % (children,))
2516 raise
2518 def astext(self) -> str:
2519 line = self.get('line', '')
2520 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
2521 self['level'], Element.astext(self))
2524class pending(Invisible, Element):
2525 """
2526 Placeholder for pending operations.
2528 The "pending" element is used to encapsulate a pending operation: the
2529 operation (transform), the point at which to apply it, and any data it
2530 requires. Only the pending operation's location within the document is
2531 stored in the public document tree (by the "pending" object itself); the
2532 operation and its data are stored in the "pending" object's internal
2533 instance attributes.
2535 For example, say you want a table of contents in your reStructuredText
2536 document. The easiest way to specify where to put it is from within the
2537 document, with a directive::
2539 .. contents::
2541 But the "contents" directive can't do its work until the entire document
2542 has been parsed and possibly transformed to some extent. So the directive
2543 code leaves a placeholder behind that will trigger the second phase of its
2544 processing, something like this::
2546 <pending ...public attributes...> + internal attributes
2548 Use `document.note_pending()` so that the
2549 `docutils.transforms.Transformer` stage of processing can run all pending
2550 transforms.
2551 """
2553 def __init__(self,
2554 transform: Transform,
2555 details: Mapping[str, Any] | None = None,
2556 rawsource: str = '',
2557 *children,
2558 **attributes: Any,
2559 ) -> None:
2560 Element.__init__(self, rawsource, *children, **attributes)
2562 self.transform: Transform = transform
2563 """The `docutils.transforms.Transform` class implementing the pending
2564 operation."""
2566 self.details: Mapping[str, Any] = details or {}
2567 """Detail data (dictionary) required by the pending operation."""
2569 def pformat(self, indent: str = ' ', level: int = 0) -> str:
2570 internals = ['.. internal attributes:',
2571 ' .transform: %s.%s' % (self.transform.__module__,
2572 self.transform.__name__),
2573 ' .details:']
2574 details = sorted(self.details.items())
2575 for key, value in details:
2576 if isinstance(value, Node):
2577 internals.append('%7s%s:' % ('', key))
2578 internals.extend(['%9s%s' % ('', line)
2579 for line in value.pformat().splitlines()])
2580 elif (value
2581 and isinstance(value, list)
2582 and isinstance(value[0], Node)):
2583 internals.append('%7s%s:' % ('', key))
2584 for v in value:
2585 internals.extend(['%9s%s' % ('', line)
2586 for line in v.pformat().splitlines()])
2587 else:
2588 internals.append('%7s%s: %r' % ('', key, value))
2589 return (Element.pformat(self, indent, level)
2590 + ''.join((' %s%s\n' % (indent * level, line))
2591 for line in internals))
2593 def copy(self) -> Self:
2594 obj = self.__class__(self.transform, self.details, self.rawsource,
2595 **self.attributes)
2596 obj._document = self._document
2597 obj.source = self.source
2598 obj.line = self.line
2599 return obj
2602class raw(Special, Inline, PreBibliographic,
2603 FixedTextElement, PureTextElement):
2604 """Raw data that is to be passed untouched to the Writer.
2606 Can be used as Body element or Inline element.
2607 """
2608 valid_attributes: Final = Element.valid_attributes + (
2609 'format', 'xml:space')
2612# Inline Elements
2613# ===============
2615class abbreviation(Inline, TextElement): pass
2616class acronym(Inline, TextElement): pass
2617class emphasis(Inline, TextElement): pass
2618class generated(Inline, TextElement): pass
2619class inline(Inline, TextElement): pass
2620class literal(Inline, TextElement): pass
2621class strong(Inline, TextElement): pass
2622class subscript(Inline, TextElement): pass
2623class superscript(Inline, TextElement): pass
2624class title_reference(Inline, TextElement): pass
2627class footnote_reference(Inline, Referential, PureTextElement):
2628 valid_attributes: Final = Element.valid_attributes + (
2629 'auto', 'refid', 'refname')
2632class citation_reference(Inline, Referential, PureTextElement):
2633 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')
2636class substitution_reference(Inline, TextElement):
2637 valid_attributes: Final = Element.valid_attributes + ('refname',)
2640class math(Inline, PureTextElement):
2641 """Mathematical notation in running text."""
2644class problematic(Inline, TextElement):
2645 valid_attributes: Final = Element.valid_attributes + (
2646 'refid', 'refname', 'refuri')
2649# ========================================
2650# Auxiliary Classes, Functions, and Data
2651# ========================================
2653node_class_names: Sequence[str] = """
2654 Text
2655 abbreviation acronym address admonition attention attribution author
2656 authors
2657 block_quote bullet_list
2658 caption caution citation citation_reference classifier colspec comment
2659 compound contact container copyright
2660 danger date decoration definition definition_list definition_list_item
2661 description docinfo doctest_block document
2662 emphasis entry enumerated_list error
2663 field field_body field_list field_name figure footer
2664 footnote footnote_reference
2665 generated
2666 header hint
2667 image important inline
2668 label legend line line_block list_item literal literal_block
2669 math math_block meta
2670 note
2671 option option_argument option_group option_list option_list_item
2672 option_string organization
2673 paragraph pending problematic
2674 raw reference revision row rubric
2675 section sidebar status strong subscript substitution_definition
2676 substitution_reference subtitle superscript system_message
2677 table target tbody term tgroup thead tip title title_reference topic
2678 transition
2679 version
2680 warning""".split()
2681"""A list of names of all concrete Node subclasses."""
2684class NodeVisitor:
2685 """
2686 "Visitor" pattern [GoF95]_ abstract superclass implementation for
2687 document tree traversals.
2689 Each node class has corresponding methods, doing nothing by
2690 default; override individual methods for specific and useful
2691 behaviour. The `dispatch_visit()` method is called by
2692 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
2693 the `dispatch_departure()` method before exiting a node.
2695 The dispatch methods call "``visit_`` + node class name" or
2696 "``depart_`` + node class name", resp.
2698 This is a base class for visitors whose ``visit_...`` & ``depart_...``
2699 methods must be implemented for *all* compulsory node types encountered
2700 (such as for `docutils.writers.Writer` subclasses).
2701 Unimplemented methods will raise exceptions (except for optional nodes).
2703 For sparse traversals, where only certain node types are of interest, use
2704 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
2705 processing is desired, subclass `GenericNodeVisitor`.
2707 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
2708 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
2709 1995.
2710 """
2712 optional: ClassVar[tuple[str, ...]] = ('meta',)
2713 """
2714 Tuple containing node class names (as strings).
2716 No exception will be raised if writers do not implement visit
2717 or departure functions for these node classes.
2719 Used to ensure transitional compatibility with existing 3rd-party writers.
2720 """
2722 def __init__(self, document: document, /) -> None:
2723 self.document: document = document
2725 def dispatch_visit(self, node) -> None:
2726 """
2727 Call self."``visit_`` + node class name" with `node` as
2728 parameter. If the ``visit_...`` method does not exist, call
2729 self.unknown_visit.
2730 """
2731 node_name = node.__class__.__name__
2732 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
2733 self.document.reporter.debug(
2734 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
2735 % (method.__name__, node_name))
2736 return method(node)
2738 def dispatch_departure(self, node) -> None:
2739 """
2740 Call self."``depart_`` + node class name" with `node` as
2741 parameter. If the ``depart_...`` method does not exist, call
2742 self.unknown_departure.
2743 """
2744 node_name = node.__class__.__name__
2745 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
2746 self.document.reporter.debug(
2747 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
2748 % (method.__name__, node_name))
2749 return method(node)
2751 def unknown_visit(self, node) -> None:
2752 """
2753 Called when entering unknown `Node` types.
2755 Raise an exception unless overridden.
2756 """
2757 if (self.document.settings.strict_visitor
2758 or node.__class__.__name__ not in self.optional):
2759 raise NotImplementedError(
2760 '%s visiting unknown node type: %s'
2761 % (self.__class__, node.__class__.__name__))
2763 def unknown_departure(self, node) -> None:
2764 """
2765 Called before exiting unknown `Node` types.
2767 Raise exception unless overridden.
2768 """
2769 if (self.document.settings.strict_visitor
2770 or node.__class__.__name__ not in self.optional):
2771 raise NotImplementedError(
2772 '%s departing unknown node type: %s'
2773 % (self.__class__, node.__class__.__name__))
2776class SparseNodeVisitor(NodeVisitor):
2777 """
2778 Base class for sparse traversals, where only certain node types are of
2779 interest. When ``visit_...`` & ``depart_...`` methods should be
2780 implemented for *all* node types (such as for `docutils.writers.Writer`
2781 subclasses), subclass `NodeVisitor` instead.
2782 """
2785class GenericNodeVisitor(NodeVisitor):
2786 """
2787 Generic "Visitor" abstract superclass, for simple traversals.
2789 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
2790 each ``depart_...`` method (when using `Node.walkabout()`) calls
2791 `default_departure()`. `default_visit()` (and `default_departure()`) must
2792 be overridden in subclasses.
2794 Define fully generic visitors by overriding `default_visit()` (and
2795 `default_departure()`) only. Define semi-generic visitors by overriding
2796 individual ``visit_...()`` (and ``depart_...()``) methods also.
2798 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
2799 be overridden for default behavior.
2800 """
2802 def default_visit(self, node):
2803 """Override for generic, uniform traversals."""
2804 raise NotImplementedError
2806 def default_departure(self, node):
2807 """Override for generic, uniform traversals."""
2808 raise NotImplementedError
2811def _call_default_visit(self: GenericNodeVisitor, node) -> None:
2812 self.default_visit(node)
2815def _call_default_departure(self: GenericNodeVisitor, node) -> None:
2816 self.default_departure(node)
2819def _nop(self: SparseNodeVisitor, node) -> None:
2820 pass
2823def _add_node_class_names(names) -> None:
2824 """Save typing with dynamic assignments:"""
2825 for _name in names:
2826 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
2827 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
2828 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
2829 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
2832_add_node_class_names(node_class_names)
2835class TreeCopyVisitor(GenericNodeVisitor):
2836 """
2837 Make a complete copy of a tree or branch, including element attributes.
2838 """
2840 def __init__(self, document: document) -> None:
2841 super().__init__(document)
2842 self.parent_stack: list[list] = []
2843 self.parent: list = []
2845 def get_tree_copy(self):
2846 return self.parent[0]
2848 def default_visit(self, node) -> None:
2849 """Copy the current node, and make it the new acting parent."""
2850 newnode = node.copy()
2851 self.parent.append(newnode)
2852 self.parent_stack.append(self.parent)
2853 self.parent = newnode
2855 def default_departure(self, node) -> None:
2856 """Restore the previous acting parent."""
2857 self.parent = self.parent_stack.pop()
2860# Custom Exceptions
2861# =================
2863class ValidationError(ValueError):
2864 """Invalid Docutils Document Tree Element."""
2865 def __init__(self, msg: str, problematic_element: Element = None) -> None:
2866 super().__init__(msg)
2867 self.problematic_element = problematic_element
2870class TreePruningException(Exception):
2871 """
2872 Base class for `NodeVisitor`-related tree pruning exceptions.
2874 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
2875 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
2876 the tree traversed.
2877 """
2880class SkipChildren(TreePruningException):
2881 """
2882 Do not visit any children of the current node. The current node's
2883 siblings and ``depart_...`` method are not affected.
2884 """
2887class SkipSiblings(TreePruningException):
2888 """
2889 Do not visit any more siblings (to the right) of the current node. The
2890 current node's children and its ``depart_...`` method are not affected.
2891 """
2894class SkipNode(TreePruningException):
2895 """
2896 Do not visit the current node's children, and do not call the current
2897 node's ``depart_...`` method.
2898 """
2901class SkipDeparture(TreePruningException):
2902 """
2903 Do not call the current node's ``depart_...`` method. The current node's
2904 children and siblings are not affected.
2905 """
2908class NodeFound(TreePruningException):
2909 """
2910 Raise to indicate that the target of a search has been found. This
2911 exception must be caught by the client; it is not caught by the traversal
2912 code.
2913 """
2916class StopTraversal(TreePruningException):
2917 """
2918 Stop the traversal altogether. The current node's ``depart_...`` method
2919 is not affected. The parent nodes ``depart_...`` methods are also called
2920 as usual. No other nodes are visited. This is an alternative to
2921 NodeFound that does not cause exception handling to trickle up to the
2922 caller.
2923 """
2926# definition moved here from `utils` to avoid circular import dependency
2927def unescape(text: str,
2928 restore_backslashes: bool = False,
2929 respect_whitespace: bool = False,
2930 ) -> str:
2931 """
2932 Return a string with nulls removed or restored to backslashes.
2933 Backslash-escaped spaces are also removed.
2934 """
2935 # `respect_whitespace` is ignored (since introduction 2016-12-16)
2936 if restore_backslashes:
2937 return text.replace('\x00', '\\')
2938 else:
2939 for sep in ['\x00 ', '\x00\n', '\x00']:
2940 text = ''.join(text.split(sep))
2941 return text
2944def make_id(string: str) -> str:
2945 """
2946 Convert `string` into an identifier and return it.
2948 Docutils identifiers will conform to the regular expression
2949 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
2950 and "id" attributes) should have no underscores, colons, or periods.
2951 Hyphens may be used.
2953 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
2955 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
2956 followed by any number of letters, digits ([0-9]), hyphens ("-"),
2957 underscores ("_"), colons (":"), and periods (".").
2959 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
2960 a tighter interpretation ("flex" tokenizer notation; "latin1" and
2961 "escape" 8-bit characters have been replaced with entities)::
2963 unicode \\[0-9a-f]{1,4}
2964 latin1 [¡-ÿ]
2965 escape {unicode}|\\[ -~¡-ÿ]
2966 nmchar [-a-z0-9]|{latin1}|{escape}
2967 name {nmchar}+
2969 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
2970 or periods ("."), therefore "class" and "id" attributes should not contain
2971 these characters. They should be replaced with hyphens ("-"). Combined
2972 with HTML's requirements (the first character must be a letter; no
2973 "unicode", "latin1", or "escape" characters), this results in the
2974 ``[a-z](-?[a-z0-9]+)*`` pattern.
2976 .. _HTML 4.01 spec: https://www.w3.org/TR/html401
2977 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1
2978 """
2979 id = string.lower()
2980 id = id.translate(_non_id_translate_digraphs)
2981 id = id.translate(_non_id_translate)
2982 # get rid of non-ascii characters.
2983 # 'ascii' lowercase to prevent problems with turkish locale.
2984 id = unicodedata.normalize(
2985 'NFKD', id).encode('ascii', 'ignore').decode('ascii')
2986 # shrink runs of whitespace and replace by hyphen
2987 id = _non_id_chars.sub('-', ' '.join(id.split()))
2988 id = _non_id_at_ends.sub('', id)
2989 return str(id)
2992_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')
2993_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')
2994_non_id_translate: dict[int, str] = {
2995 0x00f8: 'o', # o with stroke
2996 0x0111: 'd', # d with stroke
2997 0x0127: 'h', # h with stroke
2998 0x0131: 'i', # dotless i
2999 0x0142: 'l', # l with stroke
3000 0x0167: 't', # t with stroke
3001 0x0180: 'b', # b with stroke
3002 0x0183: 'b', # b with topbar
3003 0x0188: 'c', # c with hook
3004 0x018c: 'd', # d with topbar
3005 0x0192: 'f', # f with hook
3006 0x0199: 'k', # k with hook
3007 0x019a: 'l', # l with bar
3008 0x019e: 'n', # n with long right leg
3009 0x01a5: 'p', # p with hook
3010 0x01ab: 't', # t with palatal hook
3011 0x01ad: 't', # t with hook
3012 0x01b4: 'y', # y with hook
3013 0x01b6: 'z', # z with stroke
3014 0x01e5: 'g', # g with stroke
3015 0x0225: 'z', # z with hook
3016 0x0234: 'l', # l with curl
3017 0x0235: 'n', # n with curl
3018 0x0236: 't', # t with curl
3019 0x0237: 'j', # dotless j
3020 0x023c: 'c', # c with stroke
3021 0x023f: 's', # s with swash tail
3022 0x0240: 'z', # z with swash tail
3023 0x0247: 'e', # e with stroke
3024 0x0249: 'j', # j with stroke
3025 0x024b: 'q', # q with hook tail
3026 0x024d: 'r', # r with stroke
3027 0x024f: 'y', # y with stroke
3028}
3029_non_id_translate_digraphs: dict[int, str] = {
3030 0x00df: 'sz', # ligature sz
3031 0x00e6: 'ae', # ae
3032 0x0153: 'oe', # ligature oe
3033 0x0238: 'db', # db digraph
3034 0x0239: 'qp', # qp digraph
3035}
3038def dupname(node: Element, name: str) -> None:
3039 node['dupnames'].append(name)
3040 node['names'].remove(name)
3041 # Assume that `node` is referenced, even though it isn't;
3042 # we don't want to throw unnecessary system_messages.
3043 node.referenced = True
3046def fully_normalize_name(name: str) -> str:
3047 """Return a case- and whitespace-normalized name."""
3048 return ' '.join(name.lower().split())
3051def whitespace_normalize_name(name: str) -> str:
3052 """Return a whitespace-normalized name."""
3053 return ' '.join(name.split())
3056def serial_escape(value: str) -> str:
3057 """Escape string values that are elements of a list, for serialization."""
3058 return value.replace('\\', r'\\').replace(' ', r'\ ')
3061def split_name_list(s: str) -> list[str]:
3062 r"""Split a string at non-escaped whitespace.
3064 Backslashes escape internal whitespace (cf. `serial_escape()`).
3065 Return list of "names" (after removing escaping backslashes).
3067 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),
3068 ['a name', 'two\\', r'n\ames']
3070 Provisional.
3071 """
3072 s = s.replace('\\', '\x00') # escape with NULL char
3073 s = s.replace('\x00\x00', '\\') # unescape backslashes
3074 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL
3075 names = s.split(' ')
3076 # restore internal spaces, drop other escaping characters
3077 return [name.replace('\x00\x00', ' ').replace('\x00', '')
3078 for name in names]
3081def pseudo_quoteattr(value: str) -> str:
3082 """Quote attributes for pseudo-xml"""
3083 return '"%s"' % value
3086def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'
3087 ) -> tuple[int|float, str]:
3088 """Parse a measure__, return value + unit.
3090 `unit_pattern` is a regular expression describing recognized units.
3091 The default is suited for (but not limited to) CSS3 units and SI units.
3092 It matches runs of ASCII letters or Greek mu, a single percent sign,
3093 or no unit.
3095 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3097 Provisional.
3098 """
3099 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)
3100 try:
3101 try:
3102 value = int(match.group(1))
3103 except ValueError:
3104 value = float(match.group(1))
3105 unit = match.group(2)
3106 except (AttributeError, ValueError):
3107 raise ValueError(f'"{measure}" is no valid measure.')
3108 return value, unit
3111# Methods to validate `Element attribute`__ values.
3113# Ensure the expected Python `data type`__, normalize, and check for
3114# restrictions.
3115#
3116# The methods can be used to convert `str` values (eg. from an XML
3117# representation) or to validate an existing document tree or node.
3118#
3119# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,
3120# and the `attribute_validating_functions` mapping below.
3121#
3122# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3123# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
3125def create_keyword_validator(*keywords: str) -> Callable[[str], str]:
3126 """
3127 Return a function that validates a `str` against given `keywords`.
3129 Provisional.
3130 """
3131 def validate_keywords(value: str) -> str:
3132 if value not in keywords:
3133 allowed = '", \"'.join(keywords)
3134 raise ValueError(f'"{value}" is not one of "{allowed}".')
3135 return value
3136 return validate_keywords
3139def validate_identifier(value: str) -> str:
3140 """
3141 Validate identifier key or class name.
3143 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.
3145 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
3147 Provisional.
3148 """
3149 if value != make_id(value):
3150 raise ValueError(f'"{value}" is no valid id or class name.')
3151 return value
3154def validate_identifier_list(value: str | list[str]) -> list[str]:
3155 """
3156 A (space-separated) list of ids or class names.
3158 `value` may be a `list` or a `str` with space separated
3159 ids or class names (cf. `validate_identifier()`).
3161 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.
3163 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type
3164 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type
3165 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type
3167 Provisional.
3168 """
3169 if isinstance(value, str):
3170 value = value.split()
3171 for token in value:
3172 validate_identifier(token)
3173 return value
3176def validate_measure(measure: str) -> str:
3177 """
3178 Validate a measure__ (number + optional unit). Return normalized `str`.
3180 See `parse_measure()` for a function returning a "number + unit" tuple.
3182 The unit may be a run of ASCII letters or Greek mu, a single percent sign,
3183 or the empty string. Case is preserved.
3185 Provisional.
3187 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3188 """
3189 value, unit = parse_measure(measure)
3190 return f'{value}{unit}'
3193def validate_colwidth(measure: str|int|float) -> int|float:
3194 """Validate the "colwidth__" attribute.
3196 Provisional:
3197 `measure` must be a `str` and will be returned as normalized `str`
3198 (with unit "*" for proportional values) in Docutils 1.0.
3200 The default unit will change to "pt" in Docutils 2.0.
3202 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
3203 """
3204 if isinstance(measure, (int, float)):
3205 value = measure
3206 elif measure in ('*', ''): # short for '1*'
3207 value = 1
3208 else:
3209 try:
3210 value, _unit = parse_measure(measure, unit_pattern='[*]?')
3211 except ValueError:
3212 value = -1
3213 if value <= 0:
3214 raise ValueError(f'"{measure}" is no proportional measure.')
3215 return value
3218def validate_NMTOKEN(value: str) -> str:
3219 """
3220 Validate a "name token": a `str` of ASCII letters, digits, and [-._].
3222 Provisional.
3223 """
3224 if not re.fullmatch('[-._A-Za-z0-9]+', value):
3225 raise ValueError(f'"{value}" is no NMTOKEN.')
3226 return value
3229def validate_NMTOKENS(value: str | list[str]) -> list[str]:
3230 """
3231 Validate a list of "name tokens".
3233 Provisional.
3234 """
3235 if isinstance(value, str):
3236 value = value.split()
3237 for token in value:
3238 validate_NMTOKEN(token)
3239 return value
3242def validate_refname_list(value: str | list[str]) -> list[str]:
3243 """
3244 Validate a list of `reference names`__.
3246 Reference names may contain all characters;
3247 whitespace is normalized (cf, `whitespace_normalize_name()`).
3249 `value` may be either a `list` of names or a `str` with
3250 space separated names (with internal spaces backslash escaped
3251 and literal backslashes doubled cf. `serial_escape()`).
3253 Return a list of whitespace-normalized, unescaped reference names.
3255 Provisional.
3257 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name
3258 """
3259 if isinstance(value, str):
3260 value = split_name_list(value)
3261 return [whitespace_normalize_name(name) for name in value]
3264def validate_yesorno(value: str | int | bool) -> bool:
3265 """Validate a `%yesorno`__ (flag) value.
3267 The string literal "0" evaluates to ``False``, all other
3268 values are converterd with `bool()`.
3270 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno
3271 """
3272 if value == "0":
3273 return False
3274 return bool(value)
3277ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {
3278 'alt': str, # CDATA
3279 'align': str,
3280 'anonymous': validate_yesorno,
3281 'auto': str, # CDATA (only '1' or '*' are used in rST)
3282 'backrefs': validate_identifier_list,
3283 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)
3284 'classes': validate_identifier_list,
3285 'char': str, # from Exchange Table Model (CALS), currently ignored
3286 'charoff': validate_NMTOKEN, # from CALS, currently ignored
3287 'colname': validate_NMTOKEN, # from CALS, currently ignored
3288 'colnum': int, # from CALS, currently ignored
3289 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
3290 'colsep': validate_yesorno,
3291 'colwidth': validate_colwidth, # see docstring for pending changes
3292 'content': str, # <meta>
3293 'delimiter': str,
3294 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>
3295 'dupnames': validate_refname_list,
3296 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',
3297 'upperalpha', 'upperroman'),
3298 'format': str, # CDATA (space separated format names)
3299 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',
3300 'sides', 'none'), # from CALS, ignored
3301 'height': validate_measure,
3302 'http-equiv': str, # <meta>
3303 'ids': validate_identifier_list,
3304 'lang': str, # <meta>
3305 'level': int,
3306 'line': int,
3307 'ltrim': validate_yesorno,
3308 'loading': create_keyword_validator('embed', 'link', 'lazy'),
3309 'media': str, # <meta>
3310 'morecols': int,
3311 'morerows': int,
3312 'name': whitespace_normalize_name, # in <reference> (deprecated)
3313 # 'name': node_attributes.validate_NMTOKEN, # in <meta>
3314 'names': validate_refname_list,
3315 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored
3316 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored
3317 'pgwide': validate_yesorno, # from CALS, currently ignored
3318 'prefix': str,
3319 'refid': validate_identifier,
3320 'refname': whitespace_normalize_name,
3321 'refuri': str,
3322 'rowsep': validate_yesorno,
3323 'rtrim': validate_yesorno,
3324 'scale': int,
3325 'scheme': str,
3326 'source': str,
3327 'start': int,
3328 'stub': validate_yesorno,
3329 'suffix': str,
3330 'title': str,
3331 'type': validate_NMTOKEN,
3332 'uri': str,
3333 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS
3334 'width': validate_measure,
3335 'xml:space': create_keyword_validator('default', 'preserve'),
3336 }
3337"""
3338Mapping of `attribute names`__ to validating functions.
3340Provisional.
3342__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3343"""