Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/nodes.py: 56%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id: nodes.py 10312 2026-04-17 17:54:14Z milde $
2# Author: David Goodger <goodger@python.org>
3# Maintainer: docutils-develop@lists.sourceforge.net
4# Copyright: This module has been placed in the public domain.
6"""
7Docutils document tree element class library.
9The relationships and semantics of elements and attributes is documented in
10`The Docutils Document Tree`__.
12Classes in CamelCase are abstract base classes or auxiliary classes. The one
13exception is `Text`, for a text (PCDATA) node; uppercase is used to
14differentiate from element classes. Classes in lower_case_with_underscores
15are element classes, matching the XML element generic identifiers in the DTD_.
17The position of each node (the level at which it can occur) is significant and
18is represented by abstract base classes (`Root`, `Structural`, `Body`,
19`Inline`, etc.). Certain transformations will be easier because we can use
20``isinstance(node, base_class)`` to determine the position of the node in the
21hierarchy.
23__ https://docutils.sourceforge.io/docs/ref/doctree.html
24.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd
25"""
27from __future__ import annotations
29__docformat__ = 'reStructuredText'
31import os
32import re
33import sys
34import unicodedata
35import warnings
36from collections import Counter
37# import xml.dom.minidom as dom # -> conditional import in Node.asdom()
38# and document.asdom()
40# import docutils.transforms # -> delayed import in document.__init__()
42TYPE_CHECKING = False
43if TYPE_CHECKING:
44 from collections.abc import (Callable, Iterable, Iterator,
45 Mapping, Sequence)
46 from types import ModuleType
47 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex
49 from docutils.utils._typing import TypeAlias
51 from xml.dom import minidom
53 from docutils.frontend import Values
54 from docutils.transforms import Transformer, Transform
55 from docutils.utils import Reporter
57 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]
58 _ContentModelQuantifier = Literal['.', '?', '+', '*']
59 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,
60 _ContentModelQuantifier]
61 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]
63 StrPath: TypeAlias = str | os.PathLike[str]
64 """File system path. No bytes!"""
66 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]
69# ==============================
70# Functional Node Base Classes
71# ==============================
73class Node:
74 """Abstract base class of nodes in a document tree."""
76 parent: Element | None = None
77 """Back-reference to the Node immediately containing this Node."""
79 children: Sequence # defined in subclasses
80 """List of child nodes (Elements or Text).
82 Override in subclass instances that are not terminal nodes.
83 """
85 source: StrPath | None = None
86 """Path or description of the input source which generated this Node."""
88 line: int | None = None
89 """The line number (1-based) of the beginning of this Node in `source`."""
91 tagname: str # defined in subclasses
92 """The element generic identifier."""
94 _document: document | None = None
96 @property
97 def document(self) -> document | None:
98 """Return the `document` root node of the tree containing this Node.
99 """
100 try:
101 return self._document or self.parent.document
102 except AttributeError:
103 return None
105 @document.setter
106 def document(self, value: document) -> None:
107 self._document = value
109 def __bool__(self) -> Literal[True]:
110 """
111 Node instances are always true, even if they're empty. A node is more
112 than a simple container. Its boolean "truth" does not depend on
113 having one or more subnodes in the doctree.
115 Use `len()` to check node length.
116 """
117 return True
119 def asdom(self,
120 dom: ModuleType | None = None,
121 ) -> minidom.Document | minidom.Element | minidom.Text:
122 # TODO: minidom.Document is only returned by document.asdom()
123 # (which overwrites this base-class implementation)
124 """Return a DOM **fragment** representation of this Node."""
125 if dom is None:
126 import xml.dom.minidom as dom
127 domroot = dom.Document()
128 return self._dom_node(domroot)
130 def pformat(self, indent: str = ' ', level: int = 0) -> str:
131 """
132 Return an indented pseudo-XML representation, for test purposes.
134 Override in subclasses.
135 """
136 raise NotImplementedError
138 def copy(self) -> Self:
139 """Return a copy of self."""
140 raise NotImplementedError
142 def deepcopy(self) -> Self:
143 """Return a deep copy of self (also copying children)."""
144 raise NotImplementedError
146 def astext(self) -> str:
147 """Return a string representation of this Node."""
148 raise NotImplementedError
150 def setup_child(self, child) -> None:
151 child.parent = self
152 if self.document:
153 child.document = self.document
154 if child.source is None:
155 child.source = self.document.current_source
156 if child.line is None:
157 child.line = self.document.current_line
159 def walk(self, visitor: NodeVisitor) -> bool:
160 """
161 Traverse a tree of `Node` objects, calling the
162 `dispatch_visit()` method of `visitor` when entering each
163 node. (The `walkabout()` method is similar, except it also
164 calls the `dispatch_departure()` method before exiting each
165 node.)
167 This tree traversal supports limited in-place tree
168 modifications. Replacing one node with one or more nodes is
169 OK, as is removing an element. However, if the node removed
170 or replaced occurs after the current node, the old node will
171 still be traversed, and any new nodes will not.
173 Within ``visit`` methods (and ``depart`` methods for
174 `walkabout()`), `TreePruningException` subclasses may be raised
175 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
177 Parameter `visitor`: A `NodeVisitor` object, containing a
178 ``visit`` implementation for each `Node` subclass encountered.
180 Return true if we should stop the traversal.
181 """
182 stop = False
183 visitor.document.reporter.debug(
184 'docutils.nodes.Node.walk calling dispatch_visit for %s'
185 % self.__class__.__name__)
186 try:
187 try:
188 visitor.dispatch_visit(self)
189 except (SkipChildren, SkipNode):
190 return stop
191 except SkipDeparture: # not applicable; ignore
192 pass
193 children = self.children
194 try:
195 for child in children[:]:
196 if child.walk(visitor):
197 stop = True
198 break
199 except SkipSiblings:
200 pass
201 except StopTraversal:
202 stop = True
203 return stop
205 def walkabout(self, visitor: NodeVisitor) -> bool:
206 """
207 Perform a tree traversal similarly to `Node.walk()` (which
208 see), except also call the `dispatch_departure()` method
209 before exiting each node.
211 Parameter `visitor`: A `NodeVisitor` object, containing a
212 ``visit`` and ``depart`` implementation for each `Node`
213 subclass encountered.
215 Return true if we should stop the traversal.
216 """
217 call_depart = True
218 stop = False
219 visitor.document.reporter.debug(
220 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
221 % self.__class__.__name__)
222 try:
223 try:
224 visitor.dispatch_visit(self)
225 except SkipNode:
226 return stop
227 except SkipDeparture:
228 call_depart = False
229 children = self.children
230 try:
231 for child in children[:]:
232 if child.walkabout(visitor):
233 stop = True
234 break
235 except SkipSiblings:
236 pass
237 except SkipChildren:
238 pass
239 except StopTraversal:
240 stop = True
241 if call_depart:
242 visitor.document.reporter.debug(
243 'docutils.nodes.Node.walkabout calling dispatch_departure '
244 'for %s' % self.__class__.__name__)
245 visitor.dispatch_departure(self)
246 return stop
248 def _fast_findall(self, cls: type|tuple[type]) -> Iterator:
249 """Return iterator that only supports instance checks."""
250 if isinstance(self, cls):
251 yield self
252 for child in self.children:
253 yield from child._fast_findall(cls)
255 def _superfast_findall(self) -> Iterator:
256 """Return iterator that doesn't check for a condition."""
257 # This is different from ``iter(self)`` implemented via
258 # __getitem__() and __len__() in the Element subclass,
259 # which yields only the direct children.
260 yield self
261 for child in self.children:
262 yield from child._superfast_findall()
264 def findall(self,
265 condition: type|tuple[type]|Callable[[Node], bool]|None = None,
266 include_self: bool = True,
267 descend: bool = True,
268 siblings: bool = False,
269 ascend: bool = False,
270 ) -> Iterator:
271 """
272 Return an iterator yielding nodes following `self`:
274 * self (if `include_self` is true)
275 * all descendants in tree traversal order (if `descend` is true)
276 * the following siblings (if `siblings` is true) and their
277 descendants (if also `descend` is true)
278 * the following siblings of the parent (if `ascend` is true) and
279 their descendants (if also `descend` is true), and so on.
281 If `condition` is not None, the iterator yields only nodes
282 for which ``condition(node)`` is true.
283 If `condition` is a type (or tuple of types) ``cls``, it is equivalent
284 to a function consisting of ``return isinstance(node, cls)``.
286 If `ascend` is true, assume `siblings` to be true as well.
288 If the tree structure is modified during iteration, the result
289 is undefined.
291 For example, given the following tree::
293 <paragraph>
294 <emphasis> <--- emphasis.traverse() and
295 <strong> <--- strong.traverse() are called.
296 Foo
297 Bar
298 <reference name="Baz" refid="baz">
299 Baz
301 Then tuple(emphasis.traverse()) equals ::
303 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)
305 and list(strong.traverse(ascend=True) equals ::
307 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
308 """
309 if ascend:
310 siblings = True
311 # Check for special argument combinations that allow using an
312 # optimized version of traverse()
313 if include_self and descend and not siblings:
314 if condition is None:
315 yield from self._superfast_findall()
316 return
317 elif isinstance(condition, (type, tuple)):
318 yield from self._fast_findall(condition)
319 return
320 # Check if `condition` is a class (check for TypeType for Python
321 # implementations that use only new-style classes, like PyPy).
322 if isinstance(condition, (type, tuple)):
323 class_or_tuple = condition
325 def condition(node, class_or_tuple=class_or_tuple):
326 return isinstance(node, class_or_tuple)
328 if include_self and (condition is None or condition(self)):
329 yield self
330 if descend and len(self.children):
331 for child in self:
332 yield from child.findall(condition=condition,
333 include_self=True, descend=True,
334 siblings=False, ascend=False)
335 if siblings or ascend:
336 node = self
337 while node.parent:
338 index = node.parent.index(node)
339 # extra check since Text nodes have value-equality
340 while node.parent[index] is not node:
341 index = node.parent.index(node, index + 1)
342 for sibling in node.parent[index+1:]:
343 yield from sibling.findall(
344 condition=condition,
345 include_self=True, descend=descend,
346 siblings=False, ascend=False)
347 if not ascend:
348 break
349 else:
350 node = node.parent
352 def traverse(
353 self,
354 condition: type|tuple[type]|Callable[[Node], bool]|None = None,
355 include_self: bool = True,
356 descend: bool = True,
357 siblings: bool = False,
358 ascend: bool = False,
359 ) -> list:
360 """Return list of nodes following `self`.
362 For looping, Node.findall() is faster and more memory efficient.
363 """
364 # traverse() may be eventually removed:
365 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',
366 DeprecationWarning, stacklevel=2)
367 return list(self.findall(condition, include_self, descend,
368 siblings, ascend))
370 def next_node(
371 self,
372 condition: type|tuple[type]|Callable[[Node], bool]|None = None,
373 include_self: bool = False,
374 descend: bool = True,
375 siblings: bool = False,
376 ascend: bool = False,
377 ) -> Node | None:
378 """
379 Return the first node in the iterator returned by findall(),
380 or None if the iterable is empty.
382 Parameter list is the same as of `findall()`. Note that `include_self`
383 defaults to False, though.
384 """
385 try:
386 return next(self.findall(condition, include_self,
387 descend, siblings, ascend))
388 except StopIteration:
389 return None
391 def validate(self, recursive: bool = True) -> None:
392 """Raise ValidationError if this node is not valid.
394 Override in subclasses that define validity constraints.
395 """
397 def validate_position(self) -> None:
398 """Hook for additional checks of the parent's content model.
400 Raise ValidationError, if `self` is at an invalid position.
402 Override in subclasses with complex validity constraints. See
403 `subtitle.validate_position()` and `transition.validate_position()`.
404 """
407class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)
408 """
409 Instances are terminal nodes (leaves) containing text only; no child
410 nodes or attributes. Initialize by passing a string to the constructor.
412 Access the raw (null-escaped) text with ``str(<instance>)``
413 and unescaped text with ``<instance>.astext()``.
414 """
416 tagname: Final = '#text'
418 children: Final = ()
419 """Text nodes have no children, and cannot have children."""
421 def __new__(cls, data: str, rawsource: None = None) -> Self:
422 """Assert that `data` is not an array of bytes
423 and warn if the deprecated `rawsource` argument is used.
424 """
425 if isinstance(data, bytes):
426 raise TypeError('expecting str data, not bytes')
427 if rawsource is not None:
428 warnings.warn('nodes.Text: initialization argument "rawsource" '
429 'is ignored and will be removed in Docutils 2.0.',
430 DeprecationWarning, stacklevel=2)
431 return str.__new__(cls, data)
433 def shortrepr(self, maxlen: int = 18) -> str:
434 data = self
435 if len(data) > maxlen:
436 data = data[:maxlen-4] + ' ...'
437 return '<%s: %r>' % (self.tagname, str(data))
439 def __repr__(self) -> str:
440 return self.shortrepr(maxlen=68)
442 def astext(self) -> str:
443 return str(unescape(self))
445 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:
446 return domroot.createTextNode(str(self))
448 def copy(self) -> Self:
449 return self.__class__(str(self))
451 def deepcopy(self) -> Self:
452 return self.copy()
454 def pformat(self, indent: str = ' ', level: int = 0) -> str:
455 try:
456 if self.document.settings.detailed:
457 tag = '%s%s' % (indent*level, '<#text>')
458 lines = (indent*(level+1) + repr(line)
459 for line in self.splitlines(True))
460 return '\n'.join((tag, *lines)) + '\n'
461 except AttributeError:
462 pass
463 indent = indent * level
464 lines = [indent+line for line in self.astext().splitlines()]
465 if not lines:
466 return ''
467 return '\n'.join(lines) + '\n'
469 # rstrip and lstrip are used by substitution definitions where
470 # they are expected to return a Text instance, this was formerly
471 # taken care of by UserString.
473 def rstrip(self, chars: str | None = None) -> Self:
474 return self.__class__(str.rstrip(self, chars))
476 def lstrip(self, chars: str | None = None) -> Self:
477 return self.__class__(str.lstrip(self, chars))
480class Element(Node):
481 """
482 `Element` is the superclass to all specific elements.
484 Elements contain attributes and child nodes.
485 They can be described as a cross between a list and a dictionary.
487 Elements emulate dictionaries for external [#]_ attributes, indexing by
488 attribute name (a string). To set the attribute 'att' to 'value', do::
490 element['att'] = 'value'
492 .. [#] External attributes correspond to the XML element attributes.
493 From its `Node` superclass, Element also inherits "internal"
494 class attributes that are accessed using the standard syntax, e.g.
495 ``element.parent``.
497 There are two special attributes: 'ids' and 'names'. Both are
498 lists of unique identifiers: 'ids' conform to the regular expression
499 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and
500 details). 'names' serve as user-friendly interfaces to IDs; they are
501 case- and whitespace-normalized (see the fully_normalize_name() function).
503 Elements emulate lists for child nodes (element nodes and/or text
504 nodes), indexing by integer. To get the first child node, use::
506 element[0]
508 to iterate over the child nodes (without descending), use::
510 for child in element:
511 ...
513 Elements may be constructed using the ``+=`` operator. To add one new
514 child node to element, do::
516 element += node
518 This is equivalent to ``element.append(node)``.
520 To add a list of multiple child nodes at once, use the same ``+=``
521 operator::
523 element += [node1, node2]
525 This is equivalent to ``element.extend([node1, node2])``.
526 """
528 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')
529 """Tuple of attributes that are initialized to empty lists.
531 NOTE: Derived classes should update this value when supporting
532 additional list attributes.
533 """
535 valid_attributes: Final = list_attributes + ('source',)
536 """Tuple of attributes that are valid for elements of this class.
538 NOTE: Derived classes should update this value when supporting
539 additional attributes.
540 """
542 common_attributes: Final = valid_attributes
543 """Tuple of `common attributes`__ known to all Doctree Element classes.
545 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes
546 """
548 known_attributes: Final = common_attributes
549 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""
551 basic_attributes: Final = list_attributes
552 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""
554 local_attributes: Final = ('backrefs',)
555 """Obsolete. Will be removed in Docutils 2.0."""
557 content_model: ClassVar[_ContentModelTuple] = ()
558 """Python representation of the element's content model (cf. docutils.dtd).
560 A tuple of ``(category, quantifier)`` tuples with
562 :category: class or tuple of classes that are expected at this place(s)
563 in the list of children
564 :quantifier: string representation stating how many elements
565 of `category` are expected. Value is one of:
566 '.' (exactly one), '?' (zero or one),
567 '+' (one or more), '*' (zero or more).
569 NOTE: The default describes the empty element. Derived classes should
570 update this value to match their content model.
572 Provisional.
573 """
575 tagname: str | None = None
576 """The element generic identifier.
578 If None, it is set as an instance attribute to the name of the class.
579 """
581 child_text_separator: Final = '\n\n'
582 """Separator for child nodes, used by `astext()` method."""
584 def __init__(self,
585 rawsource: str = '',
586 *children,
587 **attributes: Any,
588 ) -> None:
589 self.rawsource = rawsource
590 """The raw text from which this element was constructed.
592 For informative and debugging purposes. Don't rely on its value!
594 NOTE: some elements do not set this value (default '').
595 """
596 if isinstance(rawsource, Element):
597 raise TypeError('First argument "rawsource" must be a string.')
599 self.children: list = []
600 """List of child nodes (elements and/or `Text`)."""
602 self.extend(children) # maintain parent info
604 self.attributes: dict[str, Any] = {}
605 """Dictionary of attribute {name: value}."""
607 # Initialize list attributes.
608 for att in self.list_attributes:
609 self.attributes[att] = []
611 for att, value in attributes.items():
612 att = att.lower() # normalize attribute name
613 if att in self.list_attributes:
614 # lists are mutable; make a copy for this node
615 self.attributes[att] = value[:]
616 else:
617 self.attributes[att] = value
619 if self.tagname is None:
620 self.tagname: str = self.__class__.__name__
622 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:
623 element = domroot.createElement(self.tagname)
624 for attribute, value in self.attlist():
625 if isinstance(value, list):
626 value = ' '.join(serial_escape('%s' % (v,)) for v in value)
627 element.setAttribute(attribute, '%s' % value)
628 for child in self.children:
629 element.appendChild(child._dom_node(domroot))
630 return element
632 def __repr__(self) -> str:
633 data = ''
634 for c in self.children:
635 data += c.shortrepr()
636 if len(data) > 60:
637 data = data[:56] + ' ...'
638 break
639 if self['names']:
640 return '<%s "%s": %s>' % (self.tagname,
641 '; '.join(self['names']), data)
642 else:
643 return '<%s: %s>' % (self.tagname, data)
645 def shortrepr(self) -> str:
646 if self['names']:
647 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))
648 else:
649 return '<%s...>' % self.tagname
651 def __str__(self) -> str:
652 if self.children:
653 return '%s%s%s' % (self.starttag(),
654 ''.join(str(c) for c in self.children),
655 self.endtag())
656 else:
657 return self.emptytag()
659 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:
660 # the optional arg is used by the docutils_xml writer
661 if quoteattr is None:
662 quoteattr = pseudo_quoteattr
663 parts = [self.tagname]
664 for name, value in self.attlist():
665 if value is None: # boolean attribute
666 parts.append('%s="True"' % name)
667 continue
668 if isinstance(value, bool):
669 value = str(int(value))
670 if isinstance(value, list):
671 values = [serial_escape('%s' % (v,)) for v in value]
672 value = ' '.join(values)
673 else:
674 value = str(value)
675 value = quoteattr(value)
676 parts.append('%s=%s' % (name, value))
677 return '<%s>' % ' '.join(parts)
679 def endtag(self) -> str:
680 return '</%s>' % self.tagname
682 def emptytag(self) -> str:
683 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())
684 return '<%s/>' % ' '.join((self.tagname, *attributes))
686 def __len__(self) -> int:
687 return len(self.children)
689 def __contains__(self, key) -> bool:
690 # Test for both, children and attributes with operator ``in``.
691 if isinstance(key, str):
692 return key in self.attributes
693 return key in self.children
695 def __getitem__(self, key: str | int | slice) -> Any:
696 if isinstance(key, str):
697 return self.attributes[key]
698 elif isinstance(key, int):
699 return self.children[key]
700 elif isinstance(key, slice):
701 assert key.step in (None, 1), 'cannot handle slice with stride'
702 return self.children[key.start:key.stop]
703 else:
704 raise TypeError('element index must be an integer, a slice, or '
705 'an attribute name string')
707 def __setitem__(self, key, item) -> None:
708 if isinstance(key, str):
709 self.attributes[str(key)] = item
710 elif isinstance(key, int):
711 self.setup_child(item)
712 self.children[key] = item
713 elif isinstance(key, slice):
714 assert key.step in (None, 1), 'cannot handle slice with stride'
715 for node in item:
716 self.setup_child(node)
717 self.children[key.start:key.stop] = item
718 else:
719 raise TypeError('element index must be an integer, a slice, or '
720 'an attribute name string')
722 def __delitem__(self, key: str | int | slice) -> None:
723 if isinstance(key, str):
724 del self.attributes[key]
725 elif isinstance(key, int):
726 del self.children[key]
727 elif isinstance(key, slice):
728 assert key.step in (None, 1), 'cannot handle slice with stride'
729 del self.children[key.start:key.stop]
730 else:
731 raise TypeError('element index must be an integer, a simple '
732 'slice, or an attribute name string')
734 def __add__(self, other: list) -> list:
735 return self.children + other
737 def __radd__(self, other: list) -> list:
738 return other + self.children
740 def __iadd__(self, other) -> Self:
741 """Append a node or a list of nodes to `self.children`."""
742 if isinstance(other, Node):
743 self.append(other)
744 elif other is not None:
745 self.extend(other)
746 return self
748 def astext(self) -> str:
749 return self.child_text_separator.join(
750 [child.astext() for child in self.children])
752 def non_default_attributes(self) -> dict[str, Any]:
753 atts = {key: value for key, value in self.attributes.items()
754 if self.is_not_default(key)}
755 return atts
757 def attlist(self) -> list[tuple[str, Any]]:
758 return sorted(self.non_default_attributes().items())
760 def get(self, key: str, failobj: Any | None = None) -> Any:
761 return self.attributes.get(key, failobj)
763 def hasattr(self, attr: str) -> bool:
764 return attr in self.attributes
766 def delattr(self, attr: str) -> None:
767 if attr in self.attributes:
768 del self.attributes[attr]
770 def setdefault(self, key: str, failobj: Any | None = None) -> Any:
771 return self.attributes.setdefault(key, failobj)
773 has_key = hasattr
775 def get_language_code(self, fallback: str = '') -> str:
776 """Return node's language tag.
778 Look iteratively in self and parents for a class argument
779 starting with ``language-`` and return the remainder of it
780 (which should be a `BCP49` language tag) or the `fallback`.
781 """
782 for cls in self.get('classes', []):
783 if cls.startswith('language-'):
784 return cls.removeprefix('language-')
785 try:
786 return self.parent.get_language_code(fallback)
787 except AttributeError:
788 return fallback
790 def append(self, item) -> None:
791 self.setup_child(item)
792 self.children.append(item)
794 def extend(self, item: Iterable) -> None:
795 for node in item:
796 self.append(node)
798 def insert(self, index: SupportsIndex, item) -> None:
799 if isinstance(item, Node):
800 self.setup_child(item)
801 self.children.insert(index, item)
802 elif item is not None:
803 self[index:index] = item
805 def pop(self, i: int = -1):
806 return self.children.pop(i)
808 def remove(self, item) -> None:
809 self.children.remove(item)
811 def index(self, item, start: int = 0, stop: int = sys.maxsize) -> int:
812 return self.children.index(item, start, stop)
814 def previous_sibling(self):
815 """Return preceding sibling node or ``None``."""
816 try:
817 i = self.parent.index(self)
818 except (AttributeError):
819 return None
820 return self.parent[i-1] if i > 0 else None
822 def section_hierarchy(self) -> list[section]:
823 """Return the element's section anchestors.
825 Return a list of all <section> elements that contain `self`
826 (including `self` if it is a <section>) and have a parent node.
828 List item ``[i]`` is the parent <section> of level i+1
829 (1: section, 2: subsection, 3: subsubsection, ...).
830 The length of the list is the element's section level.
832 See `docutils.parsers.rst.states.RSTState.check_subsection()`
833 for a usage example.
835 Provisional. May be changed or removed without warning.
836 """
837 sections = []
838 node = self
839 while node.parent is not None:
840 if isinstance(node, section):
841 sections.append(node)
842 node = node.parent
843 sections.reverse()
844 return sections
846 def is_not_default(self, key: str) -> bool:
847 if self[key] == [] and key in self.list_attributes:
848 return False
849 else:
850 return True
852 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:
853 """
854 Update basic attributes ('ids', 'names', 'classes',
855 'dupnames', but not 'source') from node or dictionary `dict_`.
857 Provisional.
858 """
859 if isinstance(dict_, Node):
860 dict_ = dict_.attributes
861 for att in self.basic_attributes:
862 self.append_attr_list(att, dict_.get(att, []))
864 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:
865 """
866 For each element in values, if it does not exist in self[attr], append
867 it.
869 NOTE: Requires self[attr] and values to be sequence type and the
870 former should specifically be a list.
871 """
872 # List Concatenation
873 for value in values:
874 if value not in self[attr]:
875 self[attr].append(value)
877 def coerce_append_attr_list(
878 self, attr: str, value: list[Any] | Any) -> None:
879 """
880 First, convert both self[attr] and value to a non-string sequence
881 type; if either is not already a sequence, convert it to a list of one
882 element. Then call append_attr_list.
884 NOTE: self[attr] and value both must not be None.
885 """
886 # List Concatenation
887 if not isinstance(self.get(attr), list):
888 self[attr] = [self[attr]]
889 if not isinstance(value, list):
890 value = [value]
891 self.append_attr_list(attr, value)
893 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:
894 """
895 If self[attr] does not exist or force is True or omitted, set
896 self[attr] to value, otherwise do nothing.
897 """
898 # One or the other
899 if force or self.get(attr) is None:
900 self[attr] = value
902 def copy_attr_convert(
903 self, attr: str, value: Any, replace: bool = True) -> None:
904 """
905 If attr is an attribute of self, set self[attr] to
906 [self[attr], value], otherwise set self[attr] to value.
908 NOTE: replace is not used by this function and is kept only for
909 compatibility with the other copy functions.
910 """
911 if self.get(attr) is not value:
912 self.coerce_append_attr_list(attr, value)
914 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:
915 """
916 If attr is an attribute of self and either self[attr] or value is a
917 list, convert all non-sequence values to a sequence of 1 element and
918 then concatenate the two sequence, setting the result to self[attr].
919 If both self[attr] and value are non-sequences and replace is True or
920 self[attr] is None, replace self[attr] with value. Otherwise, do
921 nothing.
922 """
923 if self.get(attr) is not value:
924 if isinstance(self.get(attr), list) or \
925 isinstance(value, list):
926 self.coerce_append_attr_list(attr, value)
927 else:
928 self.replace_attr(attr, value, replace)
930 def copy_attr_concatenate(
931 self, attr: str, value: Any, replace: bool) -> None:
932 """
933 If attr is an attribute of self and both self[attr] and value are
934 lists, concatenate the two sequences, setting the result to
935 self[attr]. If either self[attr] or value are non-sequences and
936 replace is True or self[attr] is None, replace self[attr] with value.
937 Otherwise, do nothing.
938 """
939 if self.get(attr) is not value:
940 if isinstance(self.get(attr), list) and \
941 isinstance(value, list):
942 self.append_attr_list(attr, value)
943 else:
944 self.replace_attr(attr, value, replace)
946 def copy_attr_consistent(
947 self, attr: str, value: Any, replace: bool) -> None:
948 """
949 If replace is True or self[attr] is None, replace self[attr] with
950 value. Otherwise, do nothing.
951 """
952 if self.get(attr) is not value:
953 self.replace_attr(attr, value, replace)
955 def update_all_atts(self,
956 dict_: Mapping[str, Any] | Element,
957 update_fun: _UpdateFun = copy_attr_consistent,
958 replace: bool = True,
959 and_source: bool = False,
960 ) -> None:
961 """
962 Updates all attributes from node or dictionary `dict_`.
964 Appends the basic attributes ('ids', 'names', 'classes',
965 'dupnames', but not 'source') and then, for all other attributes in
966 dict_, updates the same attribute in self. When attributes with the
967 same identifier appear in both self and dict_, the two values are
968 merged based on the value of update_fun. Generally, when replace is
969 True, the values in self are replaced or merged with the values in
970 dict_; otherwise, the values in self may be preserved or merged. When
971 and_source is True, the 'source' attribute is included in the copy.
973 NOTE: When replace is False, and self contains a 'source' attribute,
974 'source' is not replaced even when dict_ has a 'source'
975 attribute, though it may still be merged into a list depending
976 on the value of update_fun.
977 NOTE: It is easier to call the update-specific methods then to pass
978 the update_fun method to this function.
979 """
980 if isinstance(dict_, Node):
981 dict_ = dict_.attributes
983 # Include the source attribute when copying?
984 if and_source:
985 filter_fun = self.is_not_list_attribute
986 else:
987 filter_fun = self.is_not_known_attribute
989 # Copy the basic attributes
990 self.update_basic_atts(dict_)
992 # Grab other attributes in dict_ not in self except the
993 # (All basic attributes should be copied already)
994 for att in filter(filter_fun, dict_):
995 update_fun(self, att, dict_[att], replace)
997 def update_all_atts_consistantly(self,
998 dict_: Mapping[str, Any] | Element,
999 replace: bool = True,
1000 and_source: bool = False,
1001 ) -> None:
1002 """
1003 Updates all attributes from node or dictionary `dict_`.
1005 Appends the basic attributes ('ids', 'names', 'classes',
1006 'dupnames', but not 'source') and then, for all other attributes in
1007 dict_, updates the same attribute in self. When attributes with the
1008 same identifier appear in both self and dict_ and replace is True, the
1009 values in self are replaced with the values in dict_; otherwise, the
1010 values in self are preserved. When and_source is True, the 'source'
1011 attribute is included in the copy.
1013 NOTE: When replace is False, and self contains a 'source' attribute,
1014 'source' is not replaced even when dict_ has a 'source'
1015 attribute, though it may still be merged into a list depending
1016 on the value of update_fun.
1017 """
1018 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
1019 and_source)
1021 def update_all_atts_concatenating(self,
1022 dict_: Mapping[str, Any] | Element,
1023 replace: bool = True,
1024 and_source: bool = False,
1025 ) -> None:
1026 """
1027 Updates all attributes from node or dictionary `dict_`.
1029 Appends the basic attributes ('ids', 'names', 'classes',
1030 'dupnames', but not 'source') and then, for all other attributes in
1031 dict_, updates the same attribute in self. When attributes with the
1032 same identifier appear in both self and dict_ whose values aren't each
1033 lists and replace is True, the values in self are replaced with the
1034 values in dict_; if the values from self and dict_ for the given
1035 identifier are both of list type, then the two lists are concatenated
1036 and the result stored in self; otherwise, the values in self are
1037 preserved. When and_source is True, the 'source' attribute is
1038 included in the copy.
1040 NOTE: When replace is False, and self contains a 'source' attribute,
1041 'source' is not replaced even when dict_ has a 'source'
1042 attribute, though it may still be merged into a list depending
1043 on the value of update_fun.
1044 """
1045 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
1046 and_source)
1048 def update_all_atts_coercion(self,
1049 dict_: Mapping[str, Any] | Element,
1050 replace: bool = True,
1051 and_source: bool = False,
1052 ) -> None:
1053 """
1054 Updates all attributes from node or dictionary `dict_`.
1056 Appends the basic attributes ('ids', 'names', 'classes',
1057 'dupnames', but not 'source') and then, for all other attributes in
1058 dict_, updates the same attribute in self. When attributes with the
1059 same identifier appear in both self and dict_ whose values are both
1060 not lists and replace is True, the values in self are replaced with
1061 the values in dict_; if either of the values from self and dict_ for
1062 the given identifier are of list type, then first any non-lists are
1063 converted to 1-element lists and then the two lists are concatenated
1064 and the result stored in self; otherwise, the values in self are
1065 preserved. When and_source is True, the 'source' attribute is
1066 included in the copy.
1068 NOTE: When replace is False, and self contains a 'source' attribute,
1069 'source' is not replaced even when dict_ has a 'source'
1070 attribute, though it may still be merged into a list depending
1071 on the value of update_fun.
1072 """
1073 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
1074 and_source)
1076 def update_all_atts_convert(self,
1077 dict_: Mapping[str, Any] | Element,
1078 and_source: bool = False,
1079 ) -> None:
1080 """
1081 Updates all attributes from node or dictionary `dict_`.
1083 Appends the basic attributes ('ids', 'names', 'classes',
1084 'dupnames', but not 'source') and then, for all other attributes in
1085 dict_, updates the same attribute in self. When attributes with the
1086 same identifier appear in both self and dict_ then first any non-lists
1087 are converted to 1-element lists and then the two lists are
1088 concatenated and the result stored in self; otherwise, the values in
1089 self are preserved. When and_source is True, the 'source' attribute
1090 is included in the copy.
1092 NOTE: When replace is False, and self contains a 'source' attribute,
1093 'source' is not replaced even when dict_ has a 'source'
1094 attribute, though it may still be merged into a list depending
1095 on the value of update_fun.
1096 """
1097 self.update_all_atts(dict_, Element.copy_attr_convert,
1098 and_source=and_source)
1100 def clear(self) -> None:
1101 self.children = []
1103 def replace(self, old, new) -> None:
1104 """Replace one child `Node` with another child or children."""
1105 index = self.index(old)
1106 if isinstance(new, Node):
1107 self.setup_child(new)
1108 self[index] = new
1109 elif new is not None:
1110 self[index:index+1] = new
1112 def replace_self(self, new) -> None:
1113 """
1114 Replace `self` node with `new`, where `new` is a node or a
1115 list of nodes.
1117 Provisional: the handling of node attributes will be revised.
1118 """
1119 update = new
1120 if not isinstance(new, Node):
1121 # `new` is a list; update first child.
1122 try:
1123 update = new[0]
1124 except IndexError:
1125 update = None
1126 if isinstance(update, Element):
1127 update.update_basic_atts(self)
1128 else:
1129 # `update` is a Text node or `new` is an empty list.
1130 # Assert that we aren't losing any attributes.
1131 for att in self.basic_attributes:
1132 assert not self[att], \
1133 'Losing "%s" attribute: %s' % (att, self[att])
1134 self.parent.replace(self, new)
1136 def first_child_matching_class(self,
1137 childclass: type[Element] | type[Text]
1138 | tuple[type[Element] | type[Text], ...],
1139 start: int = 0,
1140 end: int = sys.maxsize,
1141 ) -> int | None:
1142 """
1143 Return the index of the first child whose class exactly matches.
1145 Parameters:
1147 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
1148 classes. If a tuple, any of the classes may match.
1149 - `start`: Initial index to check.
1150 - `end`: Initial index to *not* check.
1151 """
1152 if not isinstance(childclass, tuple):
1153 childclass = (childclass,)
1154 for index in range(start, min(len(self), end)):
1155 for c in childclass:
1156 if isinstance(self[index], c):
1157 return index
1158 return None
1160 def first_child_not_matching_class(
1161 self,
1162 childclass: type[Element] | type[Text]
1163 | tuple[type[Element] | type[Text], ...],
1164 start: int = 0,
1165 end: int = sys.maxsize,
1166 ) -> int | None:
1167 """
1168 Return the index of the first child whose class does *not* match.
1170 Parameters:
1172 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
1173 classes. If a tuple, none of the classes may match.
1174 - `start`: Initial index to check.
1175 - `end`: Initial index to *not* check.
1176 """
1177 if not isinstance(childclass, tuple):
1178 childclass = (childclass,)
1179 for index in range(start, min(len(self), end)):
1180 for c in childclass:
1181 if isinstance(self.children[index], c):
1182 break
1183 else:
1184 return index
1185 return None
1187 def pformat(self, indent: str = ' ', level: int = 0) -> str:
1188 tagline = '%s%s\n' % (indent*level, self.starttag())
1189 childreps = (c.pformat(indent, level+1) for c in self.children)
1190 return ''.join((tagline, *childreps))
1192 def copy(self) -> Self:
1193 obj = self.__class__(rawsource=self.rawsource, **self.attributes)
1194 obj._document = self._document
1195 obj.source = self.source
1196 obj.line = self.line
1197 return obj
1199 def deepcopy(self) -> Self:
1200 copy = self.copy()
1201 copy.extend([child.deepcopy() for child in self.children])
1202 return copy
1204 def note_referenced_by(self,
1205 name: str | None = None,
1206 id: str | None = None,
1207 ) -> None:
1208 """Note that this Element has been referenced by its name
1209 `name` or id `id`."""
1210 self.referenced = True
1211 # Element.expect_referenced_by_* dictionaries map names or ids
1212 # to nodes whose ``referenced`` attribute is set to true as
1213 # soon as this node is referenced by the given name or id.
1214 # Needed for target propagation.
1215 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
1216 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
1217 if by_name:
1218 assert name is not None
1219 by_name.referenced = True
1220 if by_id:
1221 assert id is not None
1222 by_id.referenced = True
1224 @classmethod
1225 def is_not_list_attribute(cls, attr: str) -> bool:
1226 """
1227 Returns True if and only if the given attribute is NOT one of the
1228 basic list attributes defined for all Elements.
1229 """
1230 return attr not in cls.list_attributes
1232 @classmethod
1233 def is_not_known_attribute(cls, attr: str) -> bool:
1234 """
1235 Return True if `attr` is NOT defined for all Element instances.
1237 Provisional. May be removed in Docutils 2.0.
1238 """
1239 return attr not in cls.common_attributes
1241 def validate_attributes(self) -> None:
1242 """Normalize and validate element attributes.
1244 Convert string values to expected datatype.
1245 Normalize values.
1247 Raise `ValidationError` for invalid attributes or attribute values.
1249 Provisional.
1250 """
1251 messages = []
1252 for key, value in self.attributes.items():
1253 if key.startswith('internal:'):
1254 continue # see docs/user/config.html#expose-internals
1255 if key not in self.valid_attributes:
1256 va = '", "'.join(self.valid_attributes)
1257 messages.append(f'Attribute "{key}" not one of "{va}".')
1258 continue
1259 try:
1260 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
1261 except (ValueError, TypeError, KeyError) as e:
1262 messages.append(
1263 f'Attribute "{key}" has invalid value "{value}".\n {e}')
1264 if messages:
1265 raise ValidationError(f'Element {self.starttag()} invalid:\n '
1266 + '\n '.join(messages),
1267 problematic_element=self)
1269 def validate_content(self,
1270 model: _ContentModelTuple | None = None,
1271 elements: Sequence | None = None,
1272 ) -> list:
1273 """Test compliance of `elements` with `model`.
1275 :model: content model description, default `self.content_model`,
1276 :elements: list of doctree elements, default `self.children`.
1278 Return list of children that do not fit in the model or raise
1279 `ValidationError` if the content does not comply with the `model`.
1281 Provisional.
1282 """
1283 if model is None:
1284 model = self.content_model
1285 if elements is None:
1286 elements = self.children
1287 ichildren = iter(elements)
1288 child = next(ichildren, None)
1289 for category, quantifier in model:
1290 if not isinstance(child, category):
1291 if quantifier in ('.', '+'):
1292 raise ValidationError(self._report_child(child, category),
1293 problematic_element=child)
1294 else: # quantifier in ('?', '*') -> optional child
1295 continue # try same child with next part of content model
1296 else:
1297 # Check additional placement constraints (if applicable):
1298 child.validate_position()
1299 # advance:
1300 if quantifier in ('.', '?'): # go to next element
1301 child = next(ichildren, None)
1302 else: # if quantifier in ('*', '+'): # pass all matching elements
1303 for child in ichildren:
1304 if not isinstance(child, category):
1305 break
1306 try:
1307 child.validate_position()
1308 except AttributeError:
1309 pass
1310 else:
1311 child = None
1312 return [] if child is None else [child, *ichildren]
1314 def _report_child(self,
1315 child,
1316 category: Element | Iterable[Element],
1317 ) -> str:
1318 # Return a str reporting a missing child or child of wrong category.
1319 try:
1320 _type = category.__name__
1321 except AttributeError:
1322 _type = '> or <'.join(c.__name__ for c in category)
1323 msg = f'Element {self.starttag()} invalid:\n'
1324 if child is None:
1325 return f'{msg} Missing child of type <{_type}>.'
1326 if isinstance(child, Text):
1327 return (f'{msg} Expecting child of type <{_type}>, '
1328 f'not text data "{child.astext()}".')
1329 return (f'{msg} Expecting child of type <{_type}>, '
1330 f'not {child.starttag()}.')
1332 def validate(self, recursive: bool = True) -> None:
1333 """Validate Docutils Document Tree element ("doctree").
1335 Raise ValidationError if there are violations.
1336 If `recursive` is True, validate also the element's descendants.
1338 See `The Docutils Document Tree`__ for details of the
1339 Docutils Document Model.
1341 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1343 Provisional (work in progress).
1344 """
1345 self.validate_attributes()
1347 leftover_childs = self.validate_content()
1348 for child in leftover_childs:
1349 if isinstance(child, Text):
1350 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1351 f' Spurious text: "{child.astext()}".',
1352 problematic_element=self)
1353 else:
1354 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1355 f' Child element {child.starttag()} '
1356 'not allowed at this position.',
1357 problematic_element=child)
1359 if recursive:
1360 for child in self:
1361 child.validate(recursive=recursive)
1364# ====================
1365# Element Categories
1366# ====================
1367#
1368# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.
1370class Root:
1371 """Element at the root of a document tree."""
1374class Structural:
1375 """`Structural elements`__.
1377 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1378 #structural-elements
1379 """
1382class SubStructural:
1383 """`Structural subelements`__ are children of `Structural` elements.
1385 Most Structural elements accept only specific `SubStructural` elements.
1387 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1388 #structural-subelements
1389 """
1392class Bibliographic:
1393 """`Bibliographic Elements`__ (displayed document meta-data).
1395 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1396 #bibliographic-elements
1397 """
1400class Body:
1401 """`Body elements`__.
1403 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements
1404 """
1407class Admonition(Body):
1408 """Admonitions (distinctive and self-contained notices)."""
1409 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1412class Sequential(Body):
1413 """List-like body elements."""
1416class General(Body):
1417 """Miscellaneous body elements."""
1420class Special(Body):
1421 """Special internal body elements."""
1424class Part:
1425 """`Body Subelements`__ always occur within specific parent elements.
1427 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements
1428 """
1431class Decorative:
1432 """Decorative elements (`header` and `footer`).
1434 Children of `decoration`.
1435 """
1436 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1439class Inline:
1440 """Inline elements contain text data and possibly other inline elements.
1441 """
1444# Orthogonal categories and Mixins
1445# ================================
1447class PreBibliographic:
1448 """Elements which may occur before Bibliographic Elements."""
1451class Invisible(Special, PreBibliographic):
1452 """Internal elements that don't appear in output."""
1455class Labeled:
1456 """Contains a `label` as its first element."""
1459class Resolvable:
1460 resolved: bool = False
1463class BackLinkable:
1464 """Mixin for Elements that accept a "backrefs" attribute."""
1466 list_attributes: Final = Element.list_attributes + ('backrefs',)
1467 valid_attributes: Final = Element.valid_attributes + ('backrefs',)
1469 def add_backref(self: Element, refid: str) -> None:
1470 self['backrefs'].append(refid)
1473class Referential(Resolvable):
1474 """Elements holding a cross-reference (outgoing hyperlink)."""
1477class Targetable(Resolvable):
1478 """Cross-reference targets (incoming hyperlink)."""
1479 referenced: int = 0
1481 indirect_reference_name: str | None = None
1482 """Holds the whitespace_normalized_name (contains mixed case) of a target.
1484 This was required for MoinMoin <= 1.9 compatibility.
1486 Deprecated, will be removed in Docutils 1.0.
1487 """
1490class Titular:
1491 """Title, sub-title, or informal heading (rubric)."""
1494class TextElement(Element):
1495 """
1496 An element which directly contains text.
1498 Its children are all `Text` or `Inline` subclass nodes. You can
1499 check whether an element's context is inline simply by checking whether
1500 its immediate parent is a `TextElement` instance (including subclasses).
1501 This is handy for nodes like `image` that can appear both inline and as
1502 standalone body elements.
1504 If passing children to `__init__()`, make sure to set `text` to
1505 ``''`` or some other suitable value.
1506 """
1507 content_model: Final = (((Text, Inline), '*'),)
1508 # (#PCDATA | %inline.elements;)*
1510 child_text_separator: Final = ''
1511 """Separator for child nodes, used by `astext()` method."""
1513 def __init__(self,
1514 rawsource: str = '',
1515 text: str = '',
1516 *children,
1517 **attributes: Any,
1518 ) -> None:
1519 if text:
1520 textnode = Text(text)
1521 Element.__init__(self, rawsource, textnode, *children,
1522 **attributes)
1523 else:
1524 Element.__init__(self, rawsource, *children, **attributes)
1527class FixedTextElement(TextElement):
1528 """An element which directly contains preformatted text."""
1530 valid_attributes: Final = Element.valid_attributes + ('xml:space',)
1532 def __init__(self,
1533 rawsource: str = '',
1534 text: str = '',
1535 *children,
1536 **attributes: Any,
1537 ) -> None:
1538 super().__init__(rawsource, text, *children, **attributes)
1539 self.attributes['xml:space'] = 'preserve'
1542class PureTextElement(TextElement):
1543 """An element which only contains text, no children."""
1544 content_model: Final = ((Text, '?'),) # (#PCDATA)
1547# =================================
1548# Concrete Document Tree Elements
1549# =================================
1550#
1551# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference
1553# Special purpose elements
1554# ========================
1555#
1556# Body elements for internal use or special requests.
1558class comment(Invisible, FixedTextElement, PureTextElement):
1559 """Author notes, hidden from the output."""
1562class substitution_definition(Invisible, TextElement):
1563 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')
1566class target(Invisible, Inline, TextElement, Targetable):
1567 valid_attributes: Final = Element.valid_attributes + (
1568 'anonymous', 'refid', 'refname', 'refuri')
1571class system_message(Special, BackLinkable, PreBibliographic, Element):
1572 """
1573 System message element.
1575 Do not instantiate this class directly; use
1576 ``document.reporter.info/warning/error/severe()`` instead.
1577 """
1578 valid_attributes: Final = BackLinkable.valid_attributes + (
1579 'level', 'line', 'type')
1580 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1582 def __init__(self,
1583 message: str | None = None,
1584 *children,
1585 **attributes: Any,
1586 ) -> None:
1587 rawsource = attributes.pop('rawsource', '')
1588 if message:
1589 p = paragraph('', message)
1590 children = (p,) + children
1591 try:
1592 Element.__init__(self, rawsource, *children, **attributes)
1593 except: # NoQA: E722 (catchall)
1594 print('system_message: children=%r' % (children,))
1595 raise
1597 def astext(self) -> str:
1598 line = self.get('line', '')
1599 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1600 self['level'], Element.astext(self))
1603class pending(Invisible, Element):
1604 """
1605 Placeholder for pending operations.
1607 The "pending" element is used to encapsulate a pending operation: the
1608 operation (transform), the point at which to apply it, and any data it
1609 requires. Only the pending operation's location within the document is
1610 stored in the public document tree (by the "pending" object itself); the
1611 operation and its data are stored in the "pending" object's internal
1612 instance attributes.
1614 For example, say you want a table of contents in your reStructuredText
1615 document. The easiest way to specify where to put it is from within the
1616 document, with a directive::
1618 .. contents::
1620 But the "contents" directive can't do its work until the entire document
1621 has been parsed and possibly transformed to some extent. So the directive
1622 code leaves a placeholder behind that will trigger the second phase of its
1623 processing, something like this::
1625 <pending ...public attributes...> + internal attributes
1627 Use `document.note_pending()` so that the
1628 `docutils.transforms.Transformer` stage of processing can run all pending
1629 transforms.
1630 """
1632 def __init__(self,
1633 transform: Transform,
1634 details: Mapping[str, Any] | None = None,
1635 rawsource: str = '',
1636 *children,
1637 **attributes: Any,
1638 ) -> None:
1639 Element.__init__(self, rawsource, *children, **attributes)
1641 self.transform: Transform = transform
1642 """The `docutils.transforms.Transform` class implementing the pending
1643 operation."""
1645 self.details: Mapping[str, Any] = details or {}
1646 """Detail data (dictionary) required by the pending operation."""
1648 def pformat(self, indent: str = ' ', level: int = 0) -> str:
1649 internals = ['.. internal attributes:',
1650 ' .transform: %s.%s' % (self.transform.__module__,
1651 self.transform.__name__),
1652 ' .details:']
1653 details = sorted(self.details.items())
1654 for key, value in details:
1655 if isinstance(value, Node):
1656 internals.append('%7s%s:' % ('', key))
1657 internals.extend(['%9s%s' % ('', line)
1658 for line in value.pformat().splitlines()])
1659 elif (value
1660 and isinstance(value, list)
1661 and isinstance(value[0], Node)):
1662 internals.append('%7s%s:' % ('', key))
1663 for v in value:
1664 internals.extend(['%9s%s' % ('', line)
1665 for line in v.pformat().splitlines()])
1666 else:
1667 internals.append('%7s%s: %r' % ('', key, value))
1668 return (Element.pformat(self, indent, level)
1669 + ''.join((' %s%s\n' % (indent * level, line))
1670 for line in internals))
1672 def copy(self) -> Self:
1673 obj = self.__class__(self.transform, self.details, self.rawsource,
1674 **self.attributes)
1675 obj._document = self._document
1676 obj.source = self.source
1677 obj.line = self.line
1678 return obj
1681class raw(Special, Inline, PreBibliographic,
1682 FixedTextElement, PureTextElement):
1683 """Raw data that is to be passed untouched to the Writer.
1685 Can be used as Body element or Inline element.
1686 """
1687 valid_attributes: Final = Element.valid_attributes + (
1688 'format', 'xml:space')
1691# Decorative Elements
1692# ===================
1694class header(Decorative, Element): pass
1695class footer(Decorative, Element): pass
1698# Structural Subelements
1699# ======================
1701class title(Titular, PreBibliographic, SubStructural, TextElement):
1702 """Title of `document`, `section`, `topic` and generic `admonition`.
1703 """
1704 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')
1707class subtitle(Titular, PreBibliographic, SubStructural, TextElement):
1708 """Sub-title of `document`, `section` and `sidebar`."""
1710 def validate_position(self) -> None:
1711 """Check position of subtitle: must follow a title."""
1712 if self.parent and self.parent.index(self) == 0:
1713 raise ValidationError(f'Element {self.parent.starttag()} invalid:'
1714 '\n <subtitle> only allowed after <title>.',
1715 problematic_element=self)
1718class meta(PreBibliographic, SubStructural, Element):
1719 """Container for "invisible" bibliographic data, or meta-data."""
1720 valid_attributes: Final = Element.valid_attributes + (
1721 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')
1724class docinfo(SubStructural, Element):
1725 """Container for displayed document meta-data."""
1726 content_model: Final = ((Bibliographic, '+'),)
1727 # (%bibliographic.elements;)+
1730class decoration(PreBibliographic, SubStructural, Element):
1731 """Container for `header` and `footer`."""
1732 content_model: Final = ((header, '?'), # Empty element doesn't make sense,
1733 (footer, '?'), # but is simpler to define.
1734 )
1735 # (header?, footer?)
1737 def get_header(self) -> header:
1738 if not len(self.children) or not isinstance(self.children[0], header):
1739 self.insert(0, header())
1740 return self.children[0]
1742 def get_footer(self) -> footer:
1743 if not len(self.children) or not isinstance(self.children[-1], footer):
1744 self.append(footer())
1745 return self.children[-1]
1748class transition(SubStructural, Element):
1749 """Transitions__ represent "semantic breaks".
1751 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition
1752 """
1753 # Sibling nodes that are ignored when validating a transition's position
1754 # (titles plus moving and invisible elements except comments):
1755 ignored_siblings = (decoration, meta, pending, substitution_definition,
1756 subtitle, target, title)
1758 def validate_position(self) -> None:
1759 """Check additional constraints on `transition` placement.
1761 A transition may not begin or end section or document text,
1762 nor may two transitions be immediately adjacent.
1763 """
1764 messages = [f'Element {self.parent.starttag()} invalid:']
1765 if isinstance(self.previous_sibling(), transition):
1766 messages.append(
1767 '<transition> may not directly follow another transition.')
1768 i = self.parent.index(self)
1769 prev_siblings = self.parent[:i]
1770 if not [sibling for sibling in prev_siblings
1771 if not isinstance(sibling, self.ignored_siblings)]:
1772 messages.append(
1773 '<transition> may not begin a section or document.')
1774 next_siblings = self.parent[i+1:]
1775 if not [sibling for sibling in next_siblings
1776 if not isinstance(sibling, self.ignored_siblings)]:
1777 messages.append('<transition> may not end a section or document.')
1778 if len(messages) > 1:
1779 raise ValidationError('\n '.join(messages),
1780 problematic_element=self)
1783# Structural Elements
1784# ===================
1786class topic(Structural, Element):
1787 """
1788 Topics__ are non-recursive, mini-sections.
1790 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic
1791 """
1792 content_model: Final = ((title, '?'), (Body, '+'))
1793 # (title?, (%body.elements;)+)
1796class sidebar(Structural, Element):
1797 """
1798 Sidebars__ are like parallel documents providing related material.
1800 A sidebar is typically offset by a border and "floats" to the side
1801 of the page
1803 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar
1804 """
1805 content_model: Final = ((title, '?'),
1806 (subtitle, '?'),
1807 ((topic, Body), '+'),
1808 )
1809 # ((title, subtitle?)?, (%body.elements; | topic)+)
1810 # "subtitle only after title" is ensured in `subtitle.validate_position()`.
1813class section(Structural, Element):
1814 """Document section__. The main unit of hierarchy.
1816 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section
1817 """
1818 # recursive content model, see below
1821section.content_model = ((title, '.'),
1822 (subtitle, '?'),
1823 ((Body, topic, sidebar, transition), '*'),
1824 ((section, transition), '*'),
1825 )
1826# (title, subtitle?, %structure.model;)
1827# Correct transition placement is ensured in `transition.validate_position()`.
1830# Root Element
1831# ============
1833class document(Root, Element):
1834 """
1835 The document root element.
1837 Do not instantiate this class directly; use
1838 `docutils.utils.new_document()` instead.
1839 """
1840 valid_attributes: Final = Element.valid_attributes + ('title',)
1841 content_model: Final = ((title, '?'),
1842 (subtitle, '?'),
1843 (meta, '*'),
1844 (decoration, '?'),
1845 (docinfo, '?'),
1846 (transition, '?'),
1847 ((Body, topic, sidebar, transition), '*'),
1848 ((section, transition), '*'),
1849 )
1850 # ( (title, subtitle?)?,
1851 # meta*,
1852 # decoration?,
1853 # (docinfo, transition?)?,
1854 # %structure.model; )
1855 # Additional restrictions for `subtitle` and `transition` are tested
1856 # with the respective `validate_position()` methods.
1858 def __init__(self,
1859 settings: Values,
1860 reporter: Reporter,
1861 *args,
1862 **kwargs: Any,
1863 ) -> None:
1864 Element.__init__(self, *args, **kwargs)
1866 self.current_source: StrPath | None = None
1867 """Path to or description of the input source being processed."""
1869 self.current_line: int | None = None
1870 """Line number (1-based) of `current_source`."""
1872 self.settings: Values = settings
1873 """Runtime settings data record."""
1875 self.reporter: Reporter = reporter
1876 """System message generator."""
1878 self.indirect_targets: list[target] = []
1879 """List of indirect target nodes."""
1881 self.substitution_defs: dict[str, substitution_definition] = {}
1882 """Mapping of substitution names to substitution_definition nodes."""
1884 self.substitution_names: dict[str, str] = {}
1885 """Mapping of case-normalized to case-sensitive substitution names."""
1887 self.refnames: dict[str, list[Element]] = {}
1888 """Mapping of names to lists of referencing nodes."""
1890 self.refids: dict[str, list[Element]] = {}
1891 """(Incomplete) Mapping of ids to lists of referencing nodes."""
1893 self.names: dict[str, Element|None] = {}
1894 """Mapping of names to nodes (or ``None`` if name is a duplicate)."""
1896 self.ids: dict[str, Element] = {}
1897 """Mapping of ids to nodes."""
1899 self.nameids: dict[str, str] = {}
1900 """Mapping of names to unique id's."""
1902 self.nametypes: dict[str, bool] = {}
1903 """Mapping of names to hyperlink type. True: explicit, False: implicit.
1904 """
1906 self.footnote_refs: dict[str, list[footnote_reference]] = {}
1907 """Mapping of footnote labels to lists of footnote_reference nodes."""
1909 self.citation_refs: dict[str, list[citation_reference]] = {}
1910 """Mapping of citation labels to lists of citation_reference nodes."""
1912 self.autofootnotes: list[footnote] = []
1913 """List of auto-numbered footnote nodes."""
1915 self.autofootnote_refs: list[footnote_reference] = []
1916 """List of auto-numbered footnote_reference nodes."""
1918 self.symbol_footnotes: list[footnote] = []
1919 """List of symbol footnote nodes."""
1921 self.symbol_footnote_refs: list[footnote_reference] = []
1922 """List of symbol footnote_reference nodes."""
1924 self.footnotes: list[footnote] = []
1925 """List of manually-numbered footnote nodes."""
1927 self.citations: list[citation] = []
1928 """List of citation nodes."""
1930 self.autofootnote_start: int = 1
1931 """Initial auto-numbered footnote number."""
1933 self.symbol_footnote_start: int = 0
1934 """Initial symbol footnote symbol index."""
1936 self.id_counter: Counter[int] = Counter()
1937 """Numbers added to otherwise identical IDs."""
1939 self.parse_messages: list[system_message] = []
1940 """System messages generated while parsing."""
1942 self.transform_messages: list[system_message] = []
1943 """System messages generated while applying transforms."""
1945 import docutils.transforms
1946 self.transformer: Transformer = docutils.transforms.Transformer(self)
1947 """Storage for transforms to be applied to this document."""
1949 self.include_log: list[tuple[StrPath, tuple]] = []
1950 """The current source's parents (to detect inclusion loops)."""
1952 self.decoration: decoration | None = None
1953 """Document's `decoration` node."""
1955 self._document: document = self
1957 def __getstate__(self) -> dict[str, Any]:
1958 """
1959 Return dict with unpicklable references removed.
1960 """
1961 state = self.__dict__.copy()
1962 state['reporter'] = None
1963 state['transformer'] = None
1964 return state
1966 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:
1967 """Return a DOM representation of this document."""
1968 if dom is None:
1969 import xml.dom.minidom as dom
1970 domroot = dom.Document()
1971 domroot.appendChild(self._dom_node(domroot))
1972 return domroot
1974 def set_id(self,
1975 node: Element,
1976 msgnode: Element | None = None,
1977 suggested_prefix: str = '',
1978 ) -> str:
1979 """
1980 Check/set identifiers of element `node`. Return last identifier.
1982 Check `node`s identifiers for duplicates,
1983 create a new identifier if there are none.
1984 Update `document.ids` and `document.nameids`.
1986 Provisional.
1987 """
1988 if not node['ids']:
1989 node['ids'].append(self.create_id(node, suggested_prefix))
1990 # register and check for duplicates
1991 for id in node['ids']:
1992 self.ids.setdefault(id, node)
1993 if self.ids[id] is not node:
1994 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '
1995 f'{self.ids[id].starttag()} '
1996 f'and {node.starttag()}',
1997 base_node=node)
1998 if msgnode is not None:
1999 msgnode += msg
2000 for name in node['names']:
2001 self.nameids[name] = id
2002 return id
2004 def create_id(self, node: Element, suggested_prefix: str = '') -> str:
2005 # Internal auxiliary method for set_id():
2006 # generate and return a suitable identifier for `node`.
2007 # See also make_id()
2008 id_prefix = self.settings.id_prefix
2009 auto_id_prefix = self.settings.auto_id_prefix
2010 base_id = ''
2011 id = ''
2012 for name in node['names']:
2013 if id_prefix: # allow names starting with numbers
2014 base_id = make_id('x'+name)[1:]
2015 else:
2016 base_id = make_id(name)
2017 # TODO: normalize id-prefix? (would make code simpler)
2018 id = id_prefix + base_id
2019 if base_id and id not in self.ids:
2020 break
2021 else:
2022 if base_id and auto_id_prefix.endswith('%'):
2023 # disambiguate name-derived ID
2024 # TODO: remove second condition after announcing change
2025 prefix = id + '-'
2026 elif (node['dupnames'] and auto_id_prefix.endswith('%')
2027 and make_id(node['dupnames'][0])):
2028 prefix = make_id(node['dupnames'][0]) + '-'
2029 else:
2030 prefix = id_prefix + auto_id_prefix
2031 if prefix.endswith('%'):
2032 prefix = f"""{prefix[:-1]}{suggested_prefix
2033 or make_id(node.tagname)}-"""
2034 while True:
2035 self.id_counter[prefix] += 1
2036 id = f'{prefix}{self.id_counter[prefix]}'
2037 if id not in self.ids:
2038 break
2039 return id
2041 def set_name_id_map(self,
2042 node: Element,
2043 id: str,
2044 msgnode: Element | None = None,
2045 explicit: bool = False,
2046 ) -> None:
2047 """Deprecated. Will be removed in Docutils 1.0."""
2048 warnings.warn('nodes.document.set_name_id_map() will be removed'
2049 ' in Docutils 1.0.', DeprecationWarning, stacklevel=2)
2050 self.note_names(node, msgnode, explicit)
2051 for name in node['names']:
2052 self.nameids[name] = id
2054 def set_duplicate_name(self,
2055 node: Element,
2056 name: str,
2057 msgnode: Element,
2058 explicit: bool,
2059 ) -> None:
2060 """
2061 Handle name conflicts according to the `rST specification`__.
2063 Called by `self.note_names()` when the reference name `name`
2064 of the element `node` is already registered in `self.names`.
2066 `self.names` maps names to elements. The value ``None`` indicates
2067 that the name is a "dupname" (i.e. the document contains two or
2068 more elements with the same name and target type).
2070 `self.nametypes` maps names to booleans representing the target type
2071 (True = "explicit", False = "implicit").
2073 The following state transition table shows how the values
2074 of `self.names` ("name") and `self.nametypes` ("type") items
2075 with key `name` change and which actions are performed.
2077 "Old" is the element with conflicting reference name,
2078 "new" is the element specified by the argument `node`.
2079 The "Input type" is specified by the argument `explicit`.
2081 ======== ==== ======== ==== ======== =============== =======
2082 Input Old State New State Action
2083 -------- -------------- -------------- ------------------------
2084 type name type name type invalidate [#]_ report
2085 ======== ==== ======== ==== ======== =============== =======
2086 explicit old explicit None explicit new,old [#ex]_ WARNING
2087 implicit old explicit old explicit new INFO
2088 explicit old implicit new explicit old INFO
2089 implicit old implicit None implicit new,old [#ex]_ INFO
2090 explicit None explicit None explicit new WARNING
2091 implicit None explicit None explicit new INFO
2092 explicit None implicit new explicit
2093 implicit None implicit None implicit new INFO
2094 ======== ==== ======== ==== ======== =============== =======
2096 .. [#] When "invalidating" an element, `name` is transferred from
2097 the element's "name" attribute to its "dupnames" attribute.
2099 .. [#ex] If both "old" and "new" refer to identical URIs or
2100 reference names, keep the old state and only invalidate "new".
2102 __ https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html
2103 #implicit-hyperlink-targets
2105 Provisional.
2106 """
2107 old_node = self.names[name] # None if name is only dupname
2108 old_explicit = self.nametypes[name]
2109 level = 0 # system message level: 1-info, 2-warning
2111 self.nametypes[name] = old_explicit or explicit
2113 if old_node is not None and (
2114 'refname' in node and node['refname'] == old_node.get('refname')
2115 or 'refuri' in node and node['refuri'] == old_node.get('refuri')
2116 ):
2117 # indirect targets with same reference -> keep old target
2118 level = 1
2119 ref = node.get('refuri') or node.get('refname')
2120 s = f'Duplicate name "{name}" for external target "{ref}".'
2121 dupname(node, name)
2122 elif explicit:
2123 if old_explicit:
2124 level = 2
2125 s = f'Duplicate explicit target name: "{name}".'
2126 dupname(node, name)
2127 if old_node is not None:
2128 dupname(old_node, name)
2129 self.names[name] = None
2130 self.nameids[name] = None
2131 else: # new explicit, old implicit -> override
2132 self.names[name] = node
2133 if old_node is not None:
2134 level = 1
2135 s = f'Target name overrides implicit target name "{name}".'
2136 dupname(old_node, name)
2137 else: # new name is implicit
2138 level = 1
2139 s = f'Duplicate implicit target name: "{name}".'
2140 dupname(node, name)
2141 if old_node is not None and not old_explicit:
2142 dupname(old_node, name)
2143 self.names[name] = None
2144 self.nameids[name] = None
2145 self.set_id(old_node) # set id to get running numbers right
2146 if level:
2147 # don't add backref id for empty targets (not shown in output)
2148 if isinstance(node, target) and not node.children:
2149 backrefs = []
2150 else:
2151 backrefs = [self.set_id(node)]
2152 msg = self.reporter.system_message(level, s, backrefs=backrefs,
2153 base_node=node)
2154 # try appending near to the problem:
2155 if msgnode is not None and 'Body' in repr(msgnode.content_model):
2156 msgnode += msg
2158 def note_names(self,
2159 node: Element,
2160 msgnode: Element|None = None,
2161 explicit: bool = False,
2162 ) -> None:
2163 """
2164 Register the reference names of the element `node`.
2166 Update `self.names` and `self.nametypes`
2167 for each name in the "names" attribute of `node`.
2168 In case of name conflicts, call `self.set_duplicate_name()`.
2169 """
2170 for name in tuple(node['names']):
2171 if name in self.names and self.names[name] != node:
2172 self.set_duplicate_name(node, name, msgnode, explicit)
2173 # attention: modifies node['names']
2174 else:
2175 self.names[name] = node
2176 self.nametypes.setdefault(name, explicit)
2178 def has_name(self, name: str) -> bool:
2179 # TODO: deprecate? (use ``name in document.names``)
2180 return name in self.names
2182 # "note" here is an imperative verb: "take note of".
2183 def note_implicit_target(self, target: Element,
2184 msgnode: Element|None = None) -> None:
2185 self.note_names(target, msgnode, explicit=False)
2186 if getattr(self.settings, "legacy_ids", True):
2187 self.set_id(target, msgnode)
2189 def note_explicit_target(self, target: Element,
2190 msgnode: Element|None = None) -> None:
2191 self.note_names(target, msgnode, explicit=True)
2192 self.set_id(target, msgnode)
2194 def note_refname(self, node: Element) -> None:
2195 self.refnames.setdefault(node['refname'], []).append(node)
2197 def note_refid(self, node: Element) -> None:
2198 self.refids.setdefault(node['refid'], []).append(node)
2200 def note_indirect_target(self, target: target) -> None:
2201 self.indirect_targets.append(target)
2202 if target['names']:
2203 self.note_refname(target)
2205 def note_anonymous_target(self, target: target) -> None:
2206 self.set_id(target)
2208 def note_autofootnote(self, footnote: footnote) -> None:
2209 self.set_id(footnote)
2210 self.autofootnotes.append(footnote)
2212 def note_autofootnote_ref(self, ref: footnote_reference) -> None:
2213 self.set_id(ref)
2214 self.autofootnote_refs.append(ref)
2216 def note_symbol_footnote(self, footnote: footnote) -> None:
2217 self.set_id(footnote)
2218 self.symbol_footnotes.append(footnote)
2220 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:
2221 self.set_id(ref)
2222 self.symbol_footnote_refs.append(ref)
2224 def note_footnote(self, footnote: footnote) -> None:
2225 self.set_id(footnote)
2226 self.footnotes.append(footnote)
2228 def note_footnote_ref(self, ref: footnote_reference) -> None:
2229 self.set_id(ref)
2230 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
2231 self.note_refname(ref)
2233 def note_citation(self, citation: citation) -> None:
2234 self.citations.append(citation)
2236 def note_citation_ref(self, ref: citation_reference) -> None:
2237 self.set_id(ref)
2238 self.citation_refs.setdefault(ref['refname'], []).append(ref)
2239 self.note_refname(ref)
2241 def note_substitution_def(self,
2242 subdef: substitution_definition,
2243 def_name: str,
2244 msgnode: Element | None = None,
2245 ) -> None:
2246 name = whitespace_normalize_name(def_name)
2247 if name in self.substitution_defs:
2248 msg = self.reporter.error(
2249 'Duplicate substitution definition name: "%s".' % name,
2250 base_node=subdef)
2251 if msgnode is not None:
2252 msgnode += msg
2253 oldnode = self.substitution_defs[name]
2254 dupname(oldnode, name)
2255 # keep only the last definition:
2256 self.substitution_defs[name] = subdef
2257 # case-insensitive mapping:
2258 self.substitution_names[fully_normalize_name(name)] = name
2260 def note_substitution_ref(self,
2261 subref: substitution_reference,
2262 refname: str,
2263 ) -> None:
2264 subref['refname'] = whitespace_normalize_name(refname)
2266 def note_pending(
2267 self, pending: pending, priority: int | None = None) -> None:
2268 self.transformer.add_pending(pending, priority)
2270 def note_parse_message(self, message: system_message) -> None:
2271 self.parse_messages.append(message)
2273 def note_transform_message(self, message: system_message) -> None:
2274 self.transform_messages.append(message)
2276 def note_source(self,
2277 source: StrPath | None,
2278 offset: int | None,
2279 ) -> None:
2280 self.current_source = source and os.fspath(source)
2281 if offset is None:
2282 self.current_line = offset
2283 else:
2284 self.current_line = offset + 1
2286 def copy(self) -> Self:
2287 obj = self.__class__(self.settings, self.reporter,
2288 **self.attributes)
2289 obj.source = self.source
2290 obj.line = self.line
2291 return obj
2293 def get_decoration(self) -> decoration:
2294 if not self.decoration:
2295 self.decoration: decoration = decoration()
2296 index = self.first_child_not_matching_class((Titular, meta))
2297 if index is None:
2298 self.append(self.decoration)
2299 else:
2300 self.insert(index, self.decoration)
2301 return self.decoration
2304# Bibliographic Elements
2305# ======================
2307class author(Bibliographic, TextElement): pass
2308class organization(Bibliographic, TextElement): pass
2309class address(Bibliographic, FixedTextElement): pass
2310class contact(Bibliographic, TextElement): pass
2311class version(Bibliographic, TextElement): pass
2312class revision(Bibliographic, TextElement): pass
2313class status(Bibliographic, TextElement): pass
2314class date(Bibliographic, TextElement): pass
2315class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)
2318class authors(Bibliographic, Element):
2319 """Container for author information for documents with multiple authors.
2320 """
2321 content_model: Final = ((author, '+'),
2322 (organization, '?'),
2323 (address, '?'),
2324 (contact, '?'),
2325 )
2326 # (author, organization?, address?, contact?)+
2328 def validate_content(self,
2329 model: _ContentModelTuple | None = None,
2330 elements: Sequence | None = None,
2331 ) -> list:
2332 """Repeatedly test for children matching the content model.
2334 Provisional.
2335 """
2336 relics = super().validate_content()
2337 while relics:
2338 relics = super().validate_content(elements=relics)
2339 return relics
2342# Body Elements
2343# =============
2344#
2345# General
2346# -------
2347#
2348# Miscellaneous Body Elements and related Body Subelements (Part)
2350class paragraph(General, TextElement): pass
2351class rubric(Titular, General, TextElement): pass
2354class compound(General, Element):
2355 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2358class container(General, Element):
2359 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2362class attribution(Part, TextElement):
2363 """Visible reference to the source of a `block_quote`."""
2366class block_quote(General, Element):
2367 """An extended quotation, set off from the main text."""
2368 content_model: Final = ((Body, '+'), (attribution, '?'))
2369 # ((%body.elements;)+, attribution?)
2372class reference(General, Inline, Referential, TextElement):
2373 valid_attributes: Final = Element.valid_attributes + (
2374 'anonymous', 'name', 'refid', 'refname', 'refuri')
2377# Lists
2378# -----
2379#
2380# Lists (Sequential) and related Body Subelements (Part)
2382class list_item(Part, Element):
2383 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2386class bullet_list(Sequential, Element):
2387 valid_attributes: Final = Element.valid_attributes + ('bullet',)
2388 content_model: Final = ((list_item, '+'),) # (list_item+)
2391class enumerated_list(Sequential, Element):
2392 valid_attributes: Final = Element.valid_attributes + (
2393 'enumtype', 'prefix', 'suffix', 'start')
2394 content_model: Final = ((list_item, '+'),) # (list_item+)
2397class term(Part, TextElement): pass
2398class classifier(Part, TextElement): pass
2401class definition(Part, Element):
2402 """Definition of a `term` in a `definition_list`."""
2403 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2406class definition_list_item(Part, Element):
2407 content_model: Final = ((term, '.'),
2408 ((classifier, term), '*'),
2409 (definition, '.'),
2410 )
2411 # ((term, classifier*)+, definition)
2414class definition_list(Sequential, Element):
2415 """List of terms and their definitions.
2417 Can be used for glossaries or dictionaries, to describe or
2418 classify things, for dialogues, or to itemize subtopics.
2419 """
2420 content_model: Final = ((definition_list_item, '+'),)
2421 # (definition_list_item+)
2424class field_name(Part, TextElement): pass
2427class field_body(Part, Element):
2428 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2431class field(Part, Bibliographic, Element):
2432 content_model: Final = ((field_name, '.'), (field_body, '.'))
2433 # (field_name, field_body)
2436class field_list(Sequential, Element):
2437 """List of label & data pairs.
2439 Typically rendered as a two-column list.
2440 Also used for extension syntax or special processing.
2441 """
2442 content_model: Final = ((field, '+'),) # (field+)
2445class option_string(Part, PureTextElement):
2446 """A literal command-line option. Typically monospaced."""
2449class option_argument(Part, PureTextElement):
2450 """Placeholder text for option arguments."""
2451 valid_attributes: Final = Element.valid_attributes + ('delimiter',)
2453 def astext(self) -> str:
2454 return self.get('delimiter', ' ') + TextElement.astext(self)
2457class option(Part, Element):
2458 """Option element in an `option_list_item`.
2460 Groups an option string with zero or more option argument placeholders.
2461 """
2462 child_text_separator: Final = ''
2463 content_model: Final = ((option_string, '.'), (option_argument, '*'))
2464 # (option_string, option_argument*)
2467class option_group(Part, Element):
2468 """Groups together one or more `option` elements, all synonyms."""
2469 child_text_separator: Final = ', '
2470 content_model: Final = ((option, '+'),) # (option+)
2473class description(Part, Element):
2474 """Describtion of a command-line option."""
2475 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2478class option_list_item(Part, Element):
2479 """Container for a pair of `option_group` and `description` elements.
2480 """
2481 child_text_separator: Final = ' '
2482 content_model: Final = ((option_group, '.'), (description, '.'))
2483 # (option_group, description)
2486class option_list(Sequential, Element):
2487 """Two-column list of command-line options and descriptions."""
2488 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)
2491# Pre-formatted text blocks
2492# -------------------------
2494class literal_block(General, FixedTextElement): pass
2495class doctest_block(General, FixedTextElement): pass
2498class math_block(General, FixedTextElement, PureTextElement):
2499 """Mathematical notation (display formula)."""
2502class line(Part, TextElement):
2503 """Single line of text in a `line_block`."""
2504 indent: str | None = None
2507class line_block(General, Element):
2508 """Sequence of lines and nested line blocks.
2509 """
2510 # recursive content model: (line | line_block)+
2513line_block.content_model = (((line, line_block), '+'),)
2516# Admonitions
2517# -----------
2518# distinctive and self-contained notices
2520class attention(Admonition, Element): pass
2521class caution(Admonition, Element): pass
2522class danger(Admonition, Element): pass
2523class error(Admonition, Element): pass
2524class important(Admonition, Element): pass
2525class note(Admonition, Element): pass
2526class tip(Admonition, Element): pass
2527class hint(Admonition, Element): pass
2528class warning(Admonition, Element): pass
2531class admonition(Admonition, Element):
2532 content_model: Final = ((title, '.'), (Body, '+'))
2533 # (title, (%body.elements;)+)
2536# Footnote and citation
2537# ---------------------
2539class label(Part, PureTextElement):
2540 """Visible identifier for footnotes and citations."""
2543class footnote(General, BackLinkable, Element, Labeled, Targetable):
2544 """Labelled note providing additional context (footnote or endnote)."""
2545 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')
2546 content_model: Final = ((label, '?'), (Body, '+'))
2547 # (label?, (%body.elements;)+)
2548 # The label will become required in Docutils 1.0.
2551class citation(General, BackLinkable, Element, Labeled, Targetable):
2552 content_model: Final = ((label, '.'), (Body, '+'))
2553 # (label, (%body.elements;)+)
2556# Graphical elements
2557# ------------------
2559class image(General, Inline, Element):
2560 """Reference to an image resource.
2562 May be body element or inline element.
2563 """
2564 valid_attributes: Final = Element.valid_attributes + (
2565 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
2567 def astext(self) -> str:
2568 return self.get('alt', '')
2571class caption(Part, TextElement): pass
2574class legend(Part, Element):
2575 """A wrapper for text accompanying a `figure` that is not the caption."""
2576 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2579class figure(General, Element):
2580 """A formal figure, generally an illustration, with a title."""
2581 valid_attributes: Final = Element.valid_attributes + ('align', 'width')
2582 content_model: Final = (((image, reference), '.'),
2583 (caption, '?'),
2584 (legend, '?'),
2585 )
2586 # (image, ((caption, legend?) | legend))
2587 # TODO: According to the DTD, a caption or legend is required
2588 # but rST allows "bare" figures which are formatted differently from
2589 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]
2592# Tables
2593# ------
2595class entry(Part, Element):
2596 """An entry in a `row` (a table cell)."""
2597 valid_attributes: Final = Element.valid_attributes + (
2598 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',
2599 'morerows', 'namest', 'nameend', 'rowsep', 'valign')
2600 content_model: Final = ((Body, '*'),)
2601 # %tbl.entry.mdl -> (%body.elements;)*
2604class row(Part, Element):
2605 """Row of table cells."""
2606 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')
2607 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+
2610class colspec(Part, Element):
2611 """Specifications for a column in a `tgroup`."""
2612 valid_attributes: Final = Element.valid_attributes + (
2613 'align', 'char', 'charoff', 'colname', 'colnum',
2614 'colsep', 'colwidth', 'rowsep', 'stub')
2616 def propwidth(self) -> int|float:
2617 """Return numerical value of "colwidth__" attribute. Default 1.
2619 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.
2621 Provisional.
2623 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
2624 """
2625 # Move current implementation of validate_colwidth() here
2626 # in Docutils 1.0
2627 return validate_colwidth(self.get('colwidth', ''))
2630class thead(Part, Element):
2631 """Row(s) that form the head of a `tgroup`."""
2632 valid_attributes: Final = Element.valid_attributes + ('valign',)
2633 content_model: Final = ((row, '+'),) # (row+)
2636class tbody(Part, Element):
2637 """Body of a `tgroup`."""
2638 valid_attributes: Final = Element.valid_attributes + ('valign',)
2639 content_model: Final = ((row, '+'),) # (row+)
2642class tgroup(Part, Element):
2643 """A portion of a table. Most tables have just one `tgroup`."""
2644 valid_attributes: Final = Element.valid_attributes + (
2645 'align', 'cols', 'colsep', 'rowsep')
2646 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))
2647 # (colspec*, thead?, tbody)
2650class table(General, Element):
2651 """A data arrangement with rows and columns."""
2652 valid_attributes: Final = Element.valid_attributes + (
2653 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
2654 content_model: Final = ((title, '?'), (tgroup, '+'))
2655 # (title?, tgroup+)
2658# Inline Elements
2659# ===============
2661class abbreviation(Inline, TextElement): pass
2662class acronym(Inline, TextElement): pass
2663class emphasis(Inline, TextElement): pass
2664class generated(Inline, TextElement): pass
2665class inline(Inline, TextElement): pass
2666class literal(Inline, TextElement): pass
2667class strong(Inline, TextElement): pass
2668class subscript(Inline, TextElement): pass
2669class superscript(Inline, TextElement): pass
2670class title_reference(Inline, TextElement): pass
2673class footnote_reference(Inline, Referential, PureTextElement):
2674 valid_attributes: Final = Element.valid_attributes + (
2675 'auto', 'refid', 'refname')
2678class citation_reference(Inline, Referential, PureTextElement):
2679 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')
2682class substitution_reference(Inline, TextElement):
2683 valid_attributes: Final = Element.valid_attributes + ('refname',)
2686class math(Inline, PureTextElement):
2687 """Mathematical notation in running text."""
2690class problematic(Inline, TextElement):
2691 valid_attributes: Final = Element.valid_attributes + (
2692 'refid', 'refname', 'refuri')
2695# ========================================
2696# Auxiliary Classes, Functions, and Data
2697# ========================================
2699node_class_names: Sequence[str] = """
2700 Text
2701 abbreviation acronym address admonition attention attribution author
2702 authors
2703 block_quote bullet_list
2704 caption caution citation citation_reference classifier colspec comment
2705 compound contact container copyright
2706 danger date decoration definition definition_list definition_list_item
2707 description docinfo doctest_block document
2708 emphasis entry enumerated_list error
2709 field field_body field_list field_name figure footer
2710 footnote footnote_reference
2711 generated
2712 header hint
2713 image important inline
2714 label legend line line_block list_item literal literal_block
2715 math math_block meta
2716 note
2717 option option_argument option_group option_list option_list_item
2718 option_string organization
2719 paragraph pending problematic
2720 raw reference revision row rubric
2721 section sidebar status strong subscript substitution_definition
2722 substitution_reference subtitle superscript system_message
2723 table target tbody term tgroup thead tip title title_reference topic
2724 transition
2725 version
2726 warning""".split()
2727"""A list of names of all concrete Node subclasses."""
2730class NodeVisitor:
2731 """
2732 "Visitor" pattern [GoF95]_ abstract superclass implementation for
2733 document tree traversals.
2735 Each node class has corresponding methods, doing nothing by
2736 default; override individual methods for specific and useful
2737 behaviour. The `dispatch_visit()` method is called by
2738 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
2739 the `dispatch_departure()` method before exiting a node.
2741 The dispatch methods call "``visit_`` + node class name" or
2742 "``depart_`` + node class name", resp.
2744 This is a base class for visitors whose ``visit_...`` & ``depart_...``
2745 methods must be implemented for *all* compulsory node types encountered
2746 (such as for `docutils.writers.Writer` subclasses).
2747 Unimplemented methods will raise exceptions (except for optional nodes).
2749 For sparse traversals, where only certain node types are of interest, use
2750 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
2751 processing is desired, subclass `GenericNodeVisitor`.
2753 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
2754 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
2755 1995.
2756 """
2758 optional: ClassVar[tuple[str, ...]] = ('meta',)
2759 """
2760 Tuple containing node class names (as strings).
2762 No exception will be raised if writers do not implement visit
2763 or departure functions for these node classes.
2765 Used to ensure transitional compatibility with existing 3rd-party writers.
2766 """
2768 def __init__(self, document: document, /) -> None:
2769 self.document: document = document
2771 def dispatch_visit(self, node) -> None:
2772 """
2773 Call self."``visit_`` + node class name" with `node` as
2774 parameter. If the ``visit_...`` method does not exist, call
2775 self.unknown_visit.
2776 """
2777 node_name = node.__class__.__name__
2778 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
2779 self.document.reporter.debug(
2780 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
2781 % (method.__name__, node_name))
2782 return method(node)
2784 def dispatch_departure(self, node) -> None:
2785 """
2786 Call self."``depart_`` + node class name" with `node` as
2787 parameter. If the ``depart_...`` method does not exist, call
2788 self.unknown_departure.
2789 """
2790 node_name = node.__class__.__name__
2791 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
2792 self.document.reporter.debug(
2793 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
2794 % (method.__name__, node_name))
2795 return method(node)
2797 def unknown_visit(self, node) -> None:
2798 """
2799 Called when entering unknown `Node` types.
2801 Raise an exception unless overridden.
2802 """
2803 if (self.document.settings.strict_visitor
2804 or node.__class__.__name__ not in self.optional):
2805 raise NotImplementedError(
2806 '%s visiting unknown node type: %s'
2807 % (self.__class__, node.__class__.__name__))
2809 def unknown_departure(self, node) -> None:
2810 """
2811 Called before exiting unknown `Node` types.
2813 Raise exception unless overridden.
2814 """
2815 if (self.document.settings.strict_visitor
2816 or node.__class__.__name__ not in self.optional):
2817 raise NotImplementedError(
2818 '%s departing unknown node type: %s'
2819 % (self.__class__, node.__class__.__name__))
2822class SparseNodeVisitor(NodeVisitor):
2823 """
2824 Base class for sparse traversals, where only certain node types are of
2825 interest. When ``visit_...`` & ``depart_...`` methods should be
2826 implemented for *all* node types (such as for `docutils.writers.Writer`
2827 subclasses), subclass `NodeVisitor` instead.
2828 """
2831class GenericNodeVisitor(NodeVisitor):
2832 """
2833 Generic "Visitor" abstract superclass, for simple traversals.
2835 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
2836 each ``depart_...`` method (when using `Node.walkabout()`) calls
2837 `default_departure()`. `default_visit()` (and `default_departure()`) must
2838 be overridden in subclasses.
2840 Define fully generic visitors by overriding `default_visit()` (and
2841 `default_departure()`) only. Define semi-generic visitors by overriding
2842 individual ``visit_...()`` (and ``depart_...()``) methods also.
2844 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
2845 be overridden for default behavior.
2846 """
2848 def default_visit(self, node):
2849 """Override for generic, uniform traversals."""
2850 raise NotImplementedError
2852 def default_departure(self, node):
2853 """Override for generic, uniform traversals."""
2854 raise NotImplementedError
2857def _call_default_visit(self: GenericNodeVisitor, node) -> None:
2858 self.default_visit(node)
2861def _call_default_departure(self: GenericNodeVisitor, node) -> None:
2862 self.default_departure(node)
2865def _nop(self: SparseNodeVisitor, node) -> None:
2866 pass
2869def _add_node_class_names(names) -> None:
2870 """Save typing with dynamic assignments:"""
2871 for _name in names:
2872 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
2873 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
2874 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
2875 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
2878_add_node_class_names(node_class_names)
2881class TreeCopyVisitor(GenericNodeVisitor):
2882 """
2883 Make a complete copy of a tree or branch, including element attributes.
2884 """
2886 def __init__(self, document: document) -> None:
2887 super().__init__(document)
2888 self.parent_stack: list[list] = []
2889 self.parent: list = []
2891 def get_tree_copy(self):
2892 return self.parent[0]
2894 def default_visit(self, node) -> None:
2895 """Copy the current node, and make it the new acting parent."""
2896 newnode = node.copy()
2897 self.parent.append(newnode)
2898 self.parent_stack.append(self.parent)
2899 self.parent = newnode
2901 def default_departure(self, node) -> None:
2902 """Restore the previous acting parent."""
2903 self.parent = self.parent_stack.pop()
2906# Custom Exceptions
2907# =================
2909class ValidationError(ValueError):
2910 """Invalid Docutils Document Tree Element."""
2911 def __init__(self, msg: str, problematic_element: Element = None) -> None:
2912 super().__init__(msg)
2913 self.problematic_element = problematic_element
2916class TreePruningException(Exception):
2917 """
2918 Base class for `NodeVisitor`-related tree pruning exceptions.
2920 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
2921 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
2922 the tree traversed.
2923 """
2926class SkipChildren(TreePruningException):
2927 """
2928 Do not visit any children of the current node. The current node's
2929 siblings and ``depart_...`` method are not affected.
2930 """
2933class SkipSiblings(TreePruningException):
2934 """
2935 Do not visit any more siblings (to the right) of the current node. The
2936 current node's children and its ``depart_...`` method are not affected.
2937 """
2940class SkipNode(TreePruningException):
2941 """
2942 Do not visit the current node's children, and do not call the current
2943 node's ``depart_...`` method.
2944 """
2947class SkipDeparture(TreePruningException):
2948 """
2949 Do not call the current node's ``depart_...`` method. The current node's
2950 children and siblings are not affected.
2951 """
2954class NodeFound(TreePruningException):
2955 """
2956 Raise to indicate that the target of a search has been found. This
2957 exception must be caught by the client; it is not caught by the traversal
2958 code.
2959 """
2962class StopTraversal(TreePruningException):
2963 """
2964 Stop the traversal altogether. The current node's ``depart_...`` method
2965 is not affected. The parent nodes ``depart_...`` methods are also called
2966 as usual. No other nodes are visited. This is an alternative to
2967 NodeFound that does not cause exception handling to trickle up to the
2968 caller.
2969 """
2972# definition moved here from `utils` to avoid circular import dependency
2973def unescape(text: str,
2974 restore_backslashes: bool = False,
2975 respect_whitespace: bool = False,
2976 ) -> str:
2977 """
2978 Return a string with nulls removed or restored to backslashes.
2979 Backslash-escaped spaces are also removed.
2980 """
2981 # `respect_whitespace` is ignored (since introduction 2016-12-16)
2982 if restore_backslashes:
2983 return text.replace('\x00', '\\')
2984 else:
2985 for sep in ['\x00 ', '\x00\n', '\x00']:
2986 text = ''.join(text.split(sep))
2987 return text
2990def make_id(string: str) -> str:
2991 """
2992 Convert `string` into an identifier and return it.
2994 Docutils identifiers will conform to the regular expression
2995 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
2996 and "id" attributes) should have no underscores, colons, or periods.
2997 Hyphens may be used.
2999 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
3001 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
3002 followed by any number of letters, digits ([0-9]), hyphens ("-"),
3003 underscores ("_"), colons (":"), and periods (".").
3005 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
3006 a tighter interpretation ("flex" tokenizer notation; "latin1" and
3007 "escape" 8-bit characters have been replaced with entities)::
3009 unicode \\[0-9a-f]{1,4}
3010 latin1 [¡-ÿ]
3011 escape {unicode}|\\[ -~¡-ÿ]
3012 nmchar [-a-z0-9]|{latin1}|{escape}
3013 name {nmchar}+
3015 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
3016 or periods ("."), therefore "class" and "id" attributes should not contain
3017 these characters. They should be replaced with hyphens ("-"). Combined
3018 with HTML's requirements (the first character must be a letter; no
3019 "unicode", "latin1", or "escape" characters), this results in the
3020 ``[a-z](-?[a-z0-9]+)*`` pattern.
3022 .. _HTML 4.01 spec: https://www.w3.org/TR/html401
3023 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1
3024 """
3025 id = string.lower()
3026 id = id.translate(_non_id_translate_digraphs)
3027 id = id.translate(_non_id_translate)
3028 # get rid of non-ascii characters.
3029 # 'ascii' lowercase to prevent problems with turkish locale.
3030 id = unicodedata.normalize(
3031 'NFKD', id).encode('ascii', 'ignore').decode('ascii')
3032 # shrink runs of whitespace and replace by hyphen
3033 id = _non_id_chars.sub('-', ' '.join(id.split()))
3034 id = _non_id_at_ends.sub('', id)
3035 return str(id)
3038_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')
3039_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')
3040_non_id_translate: dict[int, str] = {
3041 0x00f8: 'o', # o with stroke
3042 0x0111: 'd', # d with stroke
3043 0x0127: 'h', # h with stroke
3044 0x0131: 'i', # dotless i
3045 0x0142: 'l', # l with stroke
3046 0x0167: 't', # t with stroke
3047 0x0180: 'b', # b with stroke
3048 0x0183: 'b', # b with topbar
3049 0x0188: 'c', # c with hook
3050 0x018c: 'd', # d with topbar
3051 0x0192: 'f', # f with hook
3052 0x0199: 'k', # k with hook
3053 0x019a: 'l', # l with bar
3054 0x019e: 'n', # n with long right leg
3055 0x01a5: 'p', # p with hook
3056 0x01ab: 't', # t with palatal hook
3057 0x01ad: 't', # t with hook
3058 0x01b4: 'y', # y with hook
3059 0x01b6: 'z', # z with stroke
3060 0x01e5: 'g', # g with stroke
3061 0x0225: 'z', # z with hook
3062 0x0234: 'l', # l with curl
3063 0x0235: 'n', # n with curl
3064 0x0236: 't', # t with curl
3065 0x0237: 'j', # dotless j
3066 0x023c: 'c', # c with stroke
3067 0x023f: 's', # s with swash tail
3068 0x0240: 'z', # z with swash tail
3069 0x0247: 'e', # e with stroke
3070 0x0249: 'j', # j with stroke
3071 0x024b: 'q', # q with hook tail
3072 0x024d: 'r', # r with stroke
3073 0x024f: 'y', # y with stroke
3074}
3075_non_id_translate_digraphs: dict[int, str] = {
3076 0x00df: 'sz', # ligature sz
3077 0x00e6: 'ae', # ae
3078 0x0153: 'oe', # ligature oe
3079 0x0238: 'db', # db digraph
3080 0x0239: 'qp', # qp digraph
3081}
3084def dupname(node: Element, name: str) -> None:
3085 node['dupnames'].append(name)
3086 node['names'].remove(name)
3087 # Assume that `node` is referenced, even though it isn't;
3088 # we don't want to throw unnecessary system_messages.
3089 node.referenced = True
3092def fully_normalize_name(name: str) -> str:
3093 """Return a case- and whitespace-normalized name."""
3094 return ' '.join(name.lower().split())
3097def whitespace_normalize_name(name: str) -> str:
3098 """Return a whitespace-normalized name."""
3099 return ' '.join(name.split())
3102def serial_escape(value: str) -> str:
3103 """Escape string values that are elements of a list, for serialization."""
3104 return value.replace('\\', r'\\').replace(' ', r'\ ')
3107def split_name_list(s: str) -> list[str]:
3108 r"""Split a string at non-escaped whitespace.
3110 Backslashes escape internal whitespace (cf. `serial_escape()`).
3111 Return list of "names" (after removing escaping backslashes).
3113 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),
3114 ['a name', 'two\\', r'n\ames']
3116 Provisional.
3117 """
3118 s = s.replace('\\', '\x00') # escape with NULL char
3119 s = s.replace('\x00\x00', '\\') # unescape backslashes
3120 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL
3121 names = s.split(' ')
3122 # restore internal spaces, drop other escaping characters
3123 return [name.replace('\x00\x00', ' ').replace('\x00', '')
3124 for name in names]
3127def pseudo_quoteattr(value: str) -> str:
3128 """Quote attributes for pseudo-xml"""
3129 return '"%s"' % value
3132def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'
3133 ) -> tuple[int|float, str]:
3134 """Parse a measure__, return value + unit.
3136 `unit_pattern` is a regular expression describing recognized units.
3137 The default is suited for (but not limited to) CSS3 units and SI units.
3138 It matches runs of ASCII letters or Greek mu, a single percent sign,
3139 or no unit.
3141 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3143 Provisional.
3144 """
3145 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)
3146 try:
3147 try:
3148 value = int(match.group(1))
3149 except ValueError:
3150 value = float(match.group(1))
3151 unit = match.group(2)
3152 except (AttributeError, ValueError):
3153 raise ValueError(f'"{measure}" is no valid measure.')
3154 return value, unit
3157# Methods to validate `Element attribute`__ values.
3159# Ensure the expected Python `data type`__, normalize, and check for
3160# restrictions.
3161#
3162# The methods can be used to convert `str` values (eg. from an XML
3163# representation) or to validate an existing document tree or node.
3164#
3165# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,
3166# and the `attribute_validating_functions` mapping below.
3167#
3168# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3169# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
3171def create_keyword_validator(*keywords: str) -> Callable[[str], str]:
3172 """
3173 Return a function that validates a `str` against given `keywords`.
3175 Provisional.
3176 """
3177 def validate_keywords(value: str) -> str:
3178 if value not in keywords:
3179 allowed = '", \"'.join(keywords)
3180 raise ValueError(f'"{value}" is not one of "{allowed}".')
3181 return value
3182 return validate_keywords
3185def validate_identifier(value: str) -> str:
3186 """
3187 Validate identifier key or class name.
3189 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.
3191 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
3193 Provisional.
3194 """
3195 if value != make_id(value):
3196 raise ValueError(f'"{value}" is no valid id or class name.')
3197 return value
3200def validate_identifier_list(value: str | list[str]) -> list[str]:
3201 """
3202 A (space-separated) list of ids or class names.
3204 `value` may be a `list` or a `str` with space separated
3205 ids or class names (cf. `validate_identifier()`).
3207 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.
3209 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type
3210 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type
3211 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type
3213 Provisional.
3214 """
3215 if isinstance(value, str):
3216 value = value.split()
3217 for token in value:
3218 validate_identifier(token)
3219 return value
3222def validate_measure(measure: str) -> str:
3223 """
3224 Validate a measure__ (number + optional unit). Return normalized `str`.
3226 See `parse_measure()` for a function returning a "number + unit" tuple.
3228 The unit may be a run of ASCII letters or Greek mu, a single percent sign,
3229 or the empty string. Case is preserved.
3231 Provisional.
3233 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3234 """
3235 value, unit = parse_measure(measure)
3236 return f'{value}{unit}'
3239def validate_colwidth(measure: str|int|float) -> int|float:
3240 """Validate the "colwidth__" attribute.
3242 Provisional:
3243 `measure` must be a `str` and will be returned as normalized `str`
3244 (with unit "*" for proportional values) in Docutils 1.0.
3246 The default unit will change to "pt" in Docutils 2.0.
3248 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
3249 """
3250 if isinstance(measure, (int, float)):
3251 value = measure
3252 elif measure in ('*', ''): # short for '1*'
3253 value = 1
3254 else:
3255 try:
3256 value, _unit = parse_measure(measure, unit_pattern='[*]?')
3257 except ValueError:
3258 value = -1
3259 if value <= 0:
3260 raise ValueError(f'"{measure}" is no proportional measure.')
3261 return value
3264def validate_NMTOKEN(value: str) -> str:
3265 """
3266 Validate a "name token": a `str` of ASCII letters, digits, and [-._].
3268 Provisional.
3269 """
3270 if not re.fullmatch('[-._A-Za-z0-9]+', value):
3271 raise ValueError(f'"{value}" is no NMTOKEN.')
3272 return value
3275def validate_NMTOKENS(value: str | list[str]) -> list[str]:
3276 """
3277 Validate a list of "name tokens".
3279 Provisional.
3280 """
3281 if isinstance(value, str):
3282 value = value.split()
3283 for token in value:
3284 validate_NMTOKEN(token)
3285 return value
3288def validate_refname_list(value: str | list[str]) -> list[str]:
3289 """
3290 Validate a list of `reference names`__.
3292 Reference names may contain all characters;
3293 whitespace is normalized (cf, `whitespace_normalize_name()`).
3295 `value` may be either a `list` of names or a `str` with
3296 space separated names (with internal spaces backslash escaped
3297 and literal backslashes doubled cf. `serial_escape()`).
3299 Return a list of whitespace-normalized, unescaped reference names.
3301 Provisional.
3303 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name
3304 """
3305 if isinstance(value, str):
3306 value = split_name_list(value)
3307 return [whitespace_normalize_name(name) for name in value]
3310def validate_yesorno(value: str | int | bool) -> bool:
3311 """Validate a `%yesorno`__ (flag) value.
3313 The string literal "0" evaluates to ``False``, all other
3314 values are converterd with `bool()`.
3316 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno
3317 """
3318 if value == "0":
3319 return False
3320 return bool(value)
3323ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {
3324 'alt': str, # CDATA
3325 'align': str,
3326 'anonymous': validate_yesorno,
3327 'auto': str, # CDATA (only '1' or '*' are used in rST)
3328 'backrefs': validate_identifier_list,
3329 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)
3330 'classes': validate_identifier_list,
3331 'char': str, # from Exchange Table Model (CALS), currently ignored
3332 'charoff': validate_NMTOKEN, # from CALS, currently ignored
3333 'colname': validate_NMTOKEN, # from CALS, currently ignored
3334 'colnum': int, # from CALS, currently ignored
3335 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
3336 'colsep': validate_yesorno,
3337 'colwidth': validate_colwidth, # see docstring for pending changes
3338 'content': str, # <meta>
3339 'delimiter': str,
3340 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>
3341 'dupnames': validate_refname_list,
3342 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',
3343 'upperalpha', 'upperroman'),
3344 'format': str, # CDATA (space separated format names)
3345 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',
3346 'sides', 'none'), # from CALS, ignored
3347 'height': validate_measure,
3348 'http-equiv': str, # <meta>
3349 'ids': validate_identifier_list,
3350 'lang': str, # <meta>
3351 'level': int,
3352 'line': int,
3353 'ltrim': validate_yesorno,
3354 'loading': create_keyword_validator('embed', 'link', 'lazy'),
3355 'media': str, # <meta>
3356 'morecols': int,
3357 'morerows': int,
3358 'name': whitespace_normalize_name, # in <reference> (deprecated)
3359 # 'name': node_attributes.validate_NMTOKEN, # in <meta>
3360 'names': validate_refname_list,
3361 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored
3362 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored
3363 'pgwide': validate_yesorno, # from CALS, currently ignored
3364 'prefix': str,
3365 'refid': validate_identifier,
3366 'refname': whitespace_normalize_name,
3367 'refuri': str,
3368 'rowsep': validate_yesorno,
3369 'rtrim': validate_yesorno,
3370 'scale': int,
3371 'scheme': str,
3372 'source': str,
3373 'start': int,
3374 'stub': validate_yesorno,
3375 'suffix': str,
3376 'title': str,
3377 'type': validate_NMTOKEN,
3378 'uri': str,
3379 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS
3380 'width': validate_measure,
3381 'xml:space': create_keyword_validator('default', 'preserve'),
3382 }
3383"""
3384Mapping of `attribute names`__ to validating functions.
3386Provisional.
3388__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3389"""