Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/nodes.py: 62%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Maintainer: docutils-develop@lists.sourceforge.net
4# Copyright: This module has been placed in the public domain.
6"""
7Docutils document tree element class library.
9The relationships and semantics of elements and attributes is documented in
10`The Docutils Document Tree`__.
12Classes in CamelCase are abstract base classes or auxiliary classes. The one
13exception is `Text`, for a text (PCDATA) node; uppercase is used to
14differentiate from element classes. Classes in lower_case_with_underscores
15are element classes, matching the XML element generic identifiers in the DTD_.
17The position of each node (the level at which it can occur) is significant and
18is represented by abstract base classes (`Root`, `Structural`, `Body`,
19`Inline`, etc.). Certain transformations will be easier because we can use
20``isinstance(node, base_class)`` to determine the position of the node in the
21hierarchy.
23__ https://docutils.sourceforge.io/docs/ref/doctree.html
24.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd
25"""
27from __future__ import annotations
29__docformat__ = 'reStructuredText'
31import os
32import re
33import sys
34import unicodedata
35import warnings
36from collections import Counter
37# import xml.dom.minidom as dom # -> conditional import in Node.asdom()
38# and document.asdom()
40# import docutils.transforms # -> delayed import in document.__init__()
42TYPE_CHECKING = False
43if TYPE_CHECKING:
44 from collections.abc import (Callable, Iterable, Iterator,
45 Mapping, Sequence)
46 from types import ModuleType
47 from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex
49 from docutils.utils._typing import TypeAlias
51 from xml.dom import minidom
53 from docutils.frontend import Values
54 from docutils.transforms import Transformer, Transform
55 from docutils.utils import Reporter
57 _ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]
58 _ContentModelQuantifier = Literal['.', '?', '+', '*']
59 _ContentModelItem: TypeAlias = tuple[_ContentModelCategory,
60 _ContentModelQuantifier]
61 _ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]
63 StrPath: TypeAlias = str | os.PathLike[str]
64 """File system path. No bytes!"""
66 _UpdateFun: TypeAlias = Callable[[str, Any, bool], None]
69# ==============================
70# Functional Node Base Classes
71# ==============================
73class Node:
74 """Abstract base class of nodes in a document tree."""
76 parent: Element | None = None
77 """Back-reference to the Node immediately containing this Node."""
79 children: Sequence # defined in subclasses
80 """List of child nodes (Elements or Text).
82 Override in subclass instances that are not terminal nodes.
83 """
85 source: StrPath | None = None
86 """Path or description of the input source which generated this Node."""
88 line: int | None = None
89 """The line number (1-based) of the beginning of this Node in `source`."""
91 tagname: str # defined in subclasses
92 """The element generic identifier."""
94 _document: document | None = None
96 @property
97 def document(self) -> document | None:
98 """Return the `document` root node of the tree containing this Node.
99 """
100 try:
101 return self._document or self.parent.document
102 except AttributeError:
103 return None
105 @document.setter
106 def document(self, value: document) -> None:
107 self._document = value
109 def __bool__(self) -> Literal[True]:
110 """
111 Node instances are always true, even if they're empty. A node is more
112 than a simple container. Its boolean "truth" does not depend on
113 having one or more subnodes in the doctree.
115 Use `len()` to check node length.
116 """
117 return True
119 def asdom(self,
120 dom: ModuleType | None = None,
121 ) -> minidom.Document | minidom.Element | minidom.Text:
122 # TODO: minidom.Document is only returned by document.asdom()
123 # (which overwrites this base-class implementation)
124 """Return a DOM **fragment** representation of this Node."""
125 if dom is None:
126 import xml.dom.minidom as dom
127 domroot = dom.Document()
128 return self._dom_node(domroot)
130 def pformat(self, indent: str = ' ', level: int = 0) -> str:
131 """
132 Return an indented pseudo-XML representation, for test purposes.
134 Override in subclasses.
135 """
136 raise NotImplementedError
138 def copy(self) -> Self:
139 """Return a copy of self."""
140 raise NotImplementedError
142 def deepcopy(self) -> Self:
143 """Return a deep copy of self (also copying children)."""
144 raise NotImplementedError
146 def astext(self) -> str:
147 """Return a string representation of this Node."""
148 raise NotImplementedError
150 def setup_child(self, child) -> None:
151 child.parent = self
152 if self.document:
153 child.document = self.document
154 if child.source is None:
155 child.source = self.document.current_source
156 if child.line is None:
157 child.line = self.document.current_line
159 def walk(self, visitor: NodeVisitor) -> bool:
160 """
161 Traverse a tree of `Node` objects, calling the
162 `dispatch_visit()` method of `visitor` when entering each
163 node. (The `walkabout()` method is similar, except it also
164 calls the `dispatch_departure()` method before exiting each
165 node.)
167 This tree traversal supports limited in-place tree
168 modifications. Replacing one node with one or more nodes is
169 OK, as is removing an element. However, if the node removed
170 or replaced occurs after the current node, the old node will
171 still be traversed, and any new nodes will not.
173 Within ``visit`` methods (and ``depart`` methods for
174 `walkabout()`), `TreePruningException` subclasses may be raised
175 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
177 Parameter `visitor`: A `NodeVisitor` object, containing a
178 ``visit`` implementation for each `Node` subclass encountered.
180 Return true if we should stop the traversal.
181 """
182 stop = False
183 visitor.document.reporter.debug(
184 'docutils.nodes.Node.walk calling dispatch_visit for %s'
185 % self.__class__.__name__)
186 try:
187 try:
188 visitor.dispatch_visit(self)
189 except (SkipChildren, SkipNode):
190 return stop
191 except SkipDeparture: # not applicable; ignore
192 pass
193 children = self.children
194 try:
195 for child in children[:]:
196 if child.walk(visitor):
197 stop = True
198 break
199 except SkipSiblings:
200 pass
201 except StopTraversal:
202 stop = True
203 return stop
205 def walkabout(self, visitor: NodeVisitor) -> bool:
206 """
207 Perform a tree traversal similarly to `Node.walk()` (which
208 see), except also call the `dispatch_departure()` method
209 before exiting each node.
211 Parameter `visitor`: A `NodeVisitor` object, containing a
212 ``visit`` and ``depart`` implementation for each `Node`
213 subclass encountered.
215 Return true if we should stop the traversal.
216 """
217 call_depart = True
218 stop = False
219 visitor.document.reporter.debug(
220 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
221 % self.__class__.__name__)
222 try:
223 try:
224 visitor.dispatch_visit(self)
225 except SkipNode:
226 return stop
227 except SkipDeparture:
228 call_depart = False
229 children = self.children
230 try:
231 for child in children[:]:
232 if child.walkabout(visitor):
233 stop = True
234 break
235 except SkipSiblings:
236 pass
237 except SkipChildren:
238 pass
239 except StopTraversal:
240 stop = True
241 if call_depart:
242 visitor.document.reporter.debug(
243 'docutils.nodes.Node.walkabout calling dispatch_departure '
244 'for %s' % self.__class__.__name__)
245 visitor.dispatch_departure(self)
246 return stop
248 def _fast_findall(self, cls: type) -> Iterator:
249 """Return iterator that only supports instance checks."""
250 if isinstance(self, cls):
251 yield self
252 for child in self.children:
253 yield from child._fast_findall(cls)
255 def _superfast_findall(self) -> Iterator:
256 """Return iterator that doesn't check for a condition."""
257 # This is different from ``iter(self)`` implemented via
258 # __getitem__() and __len__() in the Element subclass,
259 # which yields only the direct children.
260 yield self
261 for child in self.children:
262 yield from child._superfast_findall()
264 def findall(self,
265 condition: type | Callable[[Node], bool] | None = None,
266 include_self: bool = True,
267 descend: bool = True,
268 siblings: bool = False,
269 ascend: bool = False,
270 ) -> Iterator:
271 """
272 Return an iterator yielding nodes following `self`:
274 * self (if `include_self` is true)
275 * all descendants in tree traversal order (if `descend` is true)
276 * the following siblings (if `siblings` is true) and their
277 descendants (if also `descend` is true)
278 * the following siblings of the parent (if `ascend` is true) and
279 their descendants (if also `descend` is true), and so on.
281 If `condition` is not None, the iterator yields only nodes
282 for which ``condition(node)`` is true. If `condition` is a
283 type ``cls``, it is equivalent to a function consisting
284 of ``return isinstance(node, cls)``.
286 If `ascend` is true, assume `siblings` to be true as well.
288 If the tree structure is modified during iteration, the result
289 is undefined.
291 For example, given the following tree::
293 <paragraph>
294 <emphasis> <--- emphasis.traverse() and
295 <strong> <--- strong.traverse() are called.
296 Foo
297 Bar
298 <reference name="Baz" refid="baz">
299 Baz
301 Then tuple(emphasis.traverse()) equals ::
303 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)
305 and list(strong.traverse(ascend=True) equals ::
307 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
308 """
309 if ascend:
310 siblings = True
311 # Check for special argument combinations that allow using an
312 # optimized version of traverse()
313 if include_self and descend and not siblings:
314 if condition is None:
315 yield from self._superfast_findall()
316 return
317 elif isinstance(condition, type):
318 yield from self._fast_findall(condition)
319 return
320 # Check if `condition` is a class (check for TypeType for Python
321 # implementations that use only new-style classes, like PyPy).
322 if isinstance(condition, type):
323 node_class = condition
325 def condition(node, node_class=node_class):
326 return isinstance(node, node_class)
328 if include_self and (condition is None or condition(self)):
329 yield self
330 if descend and len(self.children):
331 for child in self:
332 yield from child.findall(condition=condition,
333 include_self=True, descend=True,
334 siblings=False, ascend=False)
335 if siblings or ascend:
336 node = self
337 while node.parent:
338 index = node.parent.index(node)
339 # extra check since Text nodes have value-equality
340 while node.parent[index] is not node:
341 index = node.parent.index(node, index + 1)
342 for sibling in node.parent[index+1:]:
343 yield from sibling.findall(
344 condition=condition,
345 include_self=True, descend=descend,
346 siblings=False, ascend=False)
347 if not ascend:
348 break
349 else:
350 node = node.parent
352 def traverse(self,
353 condition: type | Callable[[Node], bool] | None = None,
354 include_self: bool = True,
355 descend: bool = True,
356 siblings: bool = False,
357 ascend: bool = False,
358 ) -> list:
359 """Return list of nodes following `self`.
361 For looping, Node.findall() is faster and more memory efficient.
362 """
363 # traverse() may be eventually removed:
364 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',
365 DeprecationWarning, stacklevel=2)
366 return list(self.findall(condition, include_self, descend,
367 siblings, ascend))
369 def next_node(self,
370 condition: type | Callable[[Node], bool] | None = None,
371 include_self: bool = False,
372 descend: bool = True,
373 siblings: bool = False,
374 ascend: bool = False,
375 ) -> Node | None:
376 """
377 Return the first node in the iterator returned by findall(),
378 or None if the iterable is empty.
380 Parameter list is the same as of `findall()`. Note that `include_self`
381 defaults to False, though.
382 """
383 try:
384 return next(self.findall(condition, include_self,
385 descend, siblings, ascend))
386 except StopIteration:
387 return None
389 def validate(self, recursive: bool = True) -> None:
390 """Raise ValidationError if this node is not valid.
392 Override in subclasses that define validity constraints.
393 """
395 def validate_position(self) -> None:
396 """Hook for additional checks of the parent's content model.
398 Raise ValidationError, if `self` is at an invalid position.
400 Override in subclasses with complex validity constraints. See
401 `subtitle.validate_position()` and `transition.validate_position()`.
402 """
405class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)
406 """
407 Instances are terminal nodes (leaves) containing text only; no child
408 nodes or attributes. Initialize by passing a string to the constructor.
410 Access the raw (null-escaped) text with ``str(<instance>)``
411 and unescaped text with ``<instance>.astext()``.
412 """
414 tagname: Final = '#text'
416 children: Final = ()
417 """Text nodes have no children, and cannot have children."""
419 def __new__(cls, data: str, rawsource: None = None) -> Self:
420 """Assert that `data` is not an array of bytes
421 and warn if the deprecated `rawsource` argument is used.
422 """
423 if isinstance(data, bytes):
424 raise TypeError('expecting str data, not bytes')
425 if rawsource is not None:
426 warnings.warn('nodes.Text: initialization argument "rawsource" '
427 'is ignored and will be removed in Docutils 2.0.',
428 DeprecationWarning, stacklevel=2)
429 return str.__new__(cls, data)
431 def shortrepr(self, maxlen: int = 18) -> str:
432 data = self
433 if len(data) > maxlen:
434 data = data[:maxlen-4] + ' ...'
435 return '<%s: %r>' % (self.tagname, str(data))
437 def __repr__(self) -> str:
438 return self.shortrepr(maxlen=68)
440 def astext(self) -> str:
441 return str(unescape(self))
443 def _dom_node(self, domroot: minidom.Document) -> minidom.Text:
444 return domroot.createTextNode(str(self))
446 def copy(self) -> Self:
447 return self.__class__(str(self))
449 def deepcopy(self) -> Self:
450 return self.copy()
452 def pformat(self, indent: str = ' ', level: int = 0) -> str:
453 try:
454 if self.document.settings.detailed:
455 tag = '%s%s' % (indent*level, '<#text>')
456 lines = (indent*(level+1) + repr(line)
457 for line in self.splitlines(True))
458 return '\n'.join((tag, *lines)) + '\n'
459 except AttributeError:
460 pass
461 indent = indent * level
462 lines = [indent+line for line in self.astext().splitlines()]
463 if not lines:
464 return ''
465 return '\n'.join(lines) + '\n'
467 # rstrip and lstrip are used by substitution definitions where
468 # they are expected to return a Text instance, this was formerly
469 # taken care of by UserString.
471 def rstrip(self, chars: str | None = None) -> Self:
472 return self.__class__(str.rstrip(self, chars))
474 def lstrip(self, chars: str | None = None) -> Self:
475 return self.__class__(str.lstrip(self, chars))
478class Element(Node):
479 """
480 `Element` is the superclass to all specific elements.
482 Elements contain attributes and child nodes.
483 They can be described as a cross between a list and a dictionary.
485 Elements emulate dictionaries for external [#]_ attributes, indexing by
486 attribute name (a string). To set the attribute 'att' to 'value', do::
488 element['att'] = 'value'
490 .. [#] External attributes correspond to the XML element attributes.
491 From its `Node` superclass, Element also inherits "internal"
492 class attributes that are accessed using the standard syntax, e.g.
493 ``element.parent``.
495 There are two special attributes: 'ids' and 'names'. Both are
496 lists of unique identifiers: 'ids' conform to the regular expression
497 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and
498 details). 'names' serve as user-friendly interfaces to IDs; they are
499 case- and whitespace-normalized (see the fully_normalize_name() function).
501 Elements emulate lists for child nodes (element nodes and/or text
502 nodes), indexing by integer. To get the first child node, use::
504 element[0]
506 to iterate over the child nodes (without descending), use::
508 for child in element:
509 ...
511 Elements may be constructed using the ``+=`` operator. To add one new
512 child node to element, do::
514 element += node
516 This is equivalent to ``element.append(node)``.
518 To add a list of multiple child nodes at once, use the same ``+=``
519 operator::
521 element += [node1, node2]
523 This is equivalent to ``element.extend([node1, node2])``.
524 """
526 list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')
527 """Tuple of attributes that are initialized to empty lists.
529 NOTE: Derived classes should update this value when supporting
530 additional list attributes.
531 """
533 valid_attributes: Final = list_attributes + ('source',)
534 """Tuple of attributes that are valid for elements of this class.
536 NOTE: Derived classes should update this value when supporting
537 additional attributes.
538 """
540 common_attributes: Final = valid_attributes
541 """Tuple of `common attributes`__ known to all Doctree Element classes.
543 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes
544 """
546 known_attributes: Final = common_attributes
547 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""
549 basic_attributes: Final = list_attributes
550 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""
552 local_attributes: Final = ('backrefs',)
553 """Obsolete. Will be removed in Docutils 2.0."""
555 content_model: ClassVar[_ContentModelTuple] = ()
556 """Python representation of the element's content model (cf. docutils.dtd).
558 A tuple of ``(category, quantifier)`` tuples with
560 :category: class or tuple of classes that are expected at this place(s)
561 in the list of children
562 :quantifier: string representation stating how many elements
563 of `category` are expected. Value is one of:
564 '.' (exactly one), '?' (zero or one),
565 '+' (one or more), '*' (zero or more).
567 NOTE: The default describes the empty element. Derived classes should
568 update this value to match their content model.
570 Provisional.
571 """
573 tagname: str | None = None
574 """The element generic identifier.
576 If None, it is set as an instance attribute to the name of the class.
577 """
579 child_text_separator: Final = '\n\n'
580 """Separator for child nodes, used by `astext()` method."""
582 def __init__(self,
583 rawsource: str = '',
584 *children,
585 **attributes: Any,
586 ) -> None:
587 self.rawsource = rawsource
588 """The raw text from which this element was constructed.
590 For informative and debugging purposes. Don't rely on its value!
592 NOTE: some elements do not set this value (default '').
593 """
594 if isinstance(rawsource, Element):
595 raise TypeError('First argument "rawsource" must be a string.')
597 self.children: list = []
598 """List of child nodes (elements and/or `Text`)."""
600 self.extend(children) # maintain parent info
602 self.attributes: dict[str, Any] = {}
603 """Dictionary of attribute {name: value}."""
605 # Initialize list attributes.
606 for att in self.list_attributes:
607 self.attributes[att] = []
609 for att, value in attributes.items():
610 att = att.lower() # normalize attribute name
611 if att in self.list_attributes:
612 # lists are mutable; make a copy for this node
613 self.attributes[att] = value[:]
614 else:
615 self.attributes[att] = value
617 if self.tagname is None:
618 self.tagname: str = self.__class__.__name__
620 def _dom_node(self, domroot: minidom.Document) -> minidom.Element:
621 element = domroot.createElement(self.tagname)
622 for attribute, value in self.attlist():
623 if isinstance(value, list):
624 value = ' '.join(serial_escape('%s' % (v,)) for v in value)
625 element.setAttribute(attribute, '%s' % value)
626 for child in self.children:
627 element.appendChild(child._dom_node(domroot))
628 return element
630 def __repr__(self) -> str:
631 data = ''
632 for c in self.children:
633 data += c.shortrepr()
634 if len(data) > 60:
635 data = data[:56] + ' ...'
636 break
637 if self['names']:
638 return '<%s "%s": %s>' % (self.tagname,
639 '; '.join(self['names']), data)
640 else:
641 return '<%s: %s>' % (self.tagname, data)
643 def shortrepr(self) -> str:
644 if self['names']:
645 return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))
646 else:
647 return '<%s...>' % self.tagname
649 def __str__(self) -> str:
650 if self.children:
651 return '%s%s%s' % (self.starttag(),
652 ''.join(str(c) for c in self.children),
653 self.endtag())
654 else:
655 return self.emptytag()
657 def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:
658 # the optional arg is used by the docutils_xml writer
659 if quoteattr is None:
660 quoteattr = pseudo_quoteattr
661 parts = [self.tagname]
662 for name, value in self.attlist():
663 if value is None: # boolean attribute
664 parts.append('%s="True"' % name)
665 continue
666 if isinstance(value, bool):
667 value = str(int(value))
668 if isinstance(value, list):
669 values = [serial_escape('%s' % (v,)) for v in value]
670 value = ' '.join(values)
671 else:
672 value = str(value)
673 value = quoteattr(value)
674 parts.append('%s=%s' % (name, value))
675 return '<%s>' % ' '.join(parts)
677 def endtag(self) -> str:
678 return '</%s>' % self.tagname
680 def emptytag(self) -> str:
681 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())
682 return '<%s/>' % ' '.join((self.tagname, *attributes))
684 def __len__(self) -> int:
685 return len(self.children)
687 def __contains__(self, key) -> bool:
688 # Test for both, children and attributes with operator ``in``.
689 if isinstance(key, str):
690 return key in self.attributes
691 return key in self.children
693 def __getitem__(self, key: str | int | slice) -> Any:
694 if isinstance(key, str):
695 return self.attributes[key]
696 elif isinstance(key, int):
697 return self.children[key]
698 elif isinstance(key, slice):
699 assert key.step in (None, 1), 'cannot handle slice with stride'
700 return self.children[key.start:key.stop]
701 else:
702 raise TypeError('element index must be an integer, a slice, or '
703 'an attribute name string')
705 def __setitem__(self, key, item) -> None:
706 if isinstance(key, str):
707 self.attributes[str(key)] = item
708 elif isinstance(key, int):
709 self.setup_child(item)
710 self.children[key] = item
711 elif isinstance(key, slice):
712 assert key.step in (None, 1), 'cannot handle slice with stride'
713 for node in item:
714 self.setup_child(node)
715 self.children[key.start:key.stop] = item
716 else:
717 raise TypeError('element index must be an integer, a slice, or '
718 'an attribute name string')
720 def __delitem__(self, key: str | int | slice) -> None:
721 if isinstance(key, str):
722 del self.attributes[key]
723 elif isinstance(key, int):
724 del self.children[key]
725 elif isinstance(key, slice):
726 assert key.step in (None, 1), 'cannot handle slice with stride'
727 del self.children[key.start:key.stop]
728 else:
729 raise TypeError('element index must be an integer, a simple '
730 'slice, or an attribute name string')
732 def __add__(self, other: list) -> list:
733 return self.children + other
735 def __radd__(self, other: list) -> list:
736 return other + self.children
738 def __iadd__(self, other) -> Self:
739 """Append a node or a list of nodes to `self.children`."""
740 if isinstance(other, Node):
741 self.append(other)
742 elif other is not None:
743 self.extend(other)
744 return self
746 def astext(self) -> str:
747 return self.child_text_separator.join(
748 [child.astext() for child in self.children])
750 def non_default_attributes(self) -> dict[str, Any]:
751 atts = {key: value for key, value in self.attributes.items()
752 if self.is_not_default(key)}
753 return atts
755 def attlist(self) -> list[tuple[str, Any]]:
756 return sorted(self.non_default_attributes().items())
758 def get(self, key: str, failobj: Any | None = None) -> Any:
759 return self.attributes.get(key, failobj)
761 def hasattr(self, attr: str) -> bool:
762 return attr in self.attributes
764 def delattr(self, attr: str) -> None:
765 if attr in self.attributes:
766 del self.attributes[attr]
768 def setdefault(self, key: str, failobj: Any | None = None) -> Any:
769 return self.attributes.setdefault(key, failobj)
771 has_key = hasattr
773 def get_language_code(self, fallback: str = '') -> str:
774 """Return node's language tag.
776 Look iteratively in self and parents for a class argument
777 starting with ``language-`` and return the remainder of it
778 (which should be a `BCP49` language tag) or the `fallback`.
779 """
780 for cls in self.get('classes', []):
781 if cls.startswith('language-'):
782 return cls.removeprefix('language-')
783 try:
784 return self.parent.get_language_code(fallback)
785 except AttributeError:
786 return fallback
788 def append(self, item) -> None:
789 self.setup_child(item)
790 self.children.append(item)
792 def extend(self, item: Iterable) -> None:
793 for node in item:
794 self.append(node)
796 def insert(self, index: SupportsIndex, item) -> None:
797 if isinstance(item, Node):
798 self.setup_child(item)
799 self.children.insert(index, item)
800 elif item is not None:
801 self[index:index] = item
803 def pop(self, i: int = -1):
804 return self.children.pop(i)
806 def remove(self, item) -> None:
807 self.children.remove(item)
809 def index(self, item, start: int = 0, stop: int = sys.maxsize) -> int:
810 return self.children.index(item, start, stop)
812 def previous_sibling(self):
813 """Return preceding sibling node or ``None``."""
814 try:
815 i = self.parent.index(self)
816 except (AttributeError):
817 return None
818 return self.parent[i-1] if i > 0 else None
820 def section_hierarchy(self) -> list[section]:
821 """Return the element's section hierarchy.
823 Return a list of all <section> elements containing `self`
824 (including `self` if it is a <section>).
826 List item ``[i]`` is the parent <section> of level i+1
827 (1: section, 2: subsection, 3: subsubsection, ...).
828 The length of the list is the element's section level.
830 Provisional. May be changed or removed without warning.
831 """
832 sections = []
833 node = self
834 while node is not None:
835 if isinstance(node, section):
836 sections.append(node)
837 node = node.parent
838 sections.reverse()
839 return sections
841 def is_not_default(self, key: str) -> bool:
842 if self[key] == [] and key in self.list_attributes:
843 return False
844 else:
845 return True
847 def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:
848 """
849 Update basic attributes ('ids', 'names', 'classes',
850 'dupnames', but not 'source') from node or dictionary `dict_`.
852 Provisional.
853 """
854 if isinstance(dict_, Node):
855 dict_ = dict_.attributes
856 for att in self.basic_attributes:
857 self.append_attr_list(att, dict_.get(att, []))
859 def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:
860 """
861 For each element in values, if it does not exist in self[attr], append
862 it.
864 NOTE: Requires self[attr] and values to be sequence type and the
865 former should specifically be a list.
866 """
867 # List Concatenation
868 for value in values:
869 if value not in self[attr]:
870 self[attr].append(value)
872 def coerce_append_attr_list(
873 self, attr: str, value: list[Any] | Any) -> None:
874 """
875 First, convert both self[attr] and value to a non-string sequence
876 type; if either is not already a sequence, convert it to a list of one
877 element. Then call append_attr_list.
879 NOTE: self[attr] and value both must not be None.
880 """
881 # List Concatenation
882 if not isinstance(self.get(attr), list):
883 self[attr] = [self[attr]]
884 if not isinstance(value, list):
885 value = [value]
886 self.append_attr_list(attr, value)
888 def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:
889 """
890 If self[attr] does not exist or force is True or omitted, set
891 self[attr] to value, otherwise do nothing.
892 """
893 # One or the other
894 if force or self.get(attr) is None:
895 self[attr] = value
897 def copy_attr_convert(
898 self, attr: str, value: Any, replace: bool = True) -> None:
899 """
900 If attr is an attribute of self, set self[attr] to
901 [self[attr], value], otherwise set self[attr] to value.
903 NOTE: replace is not used by this function and is kept only for
904 compatibility with the other copy functions.
905 """
906 if self.get(attr) is not value:
907 self.coerce_append_attr_list(attr, value)
909 def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:
910 """
911 If attr is an attribute of self and either self[attr] or value is a
912 list, convert all non-sequence values to a sequence of 1 element and
913 then concatenate the two sequence, setting the result to self[attr].
914 If both self[attr] and value are non-sequences and replace is True or
915 self[attr] is None, replace self[attr] with value. Otherwise, do
916 nothing.
917 """
918 if self.get(attr) is not value:
919 if isinstance(self.get(attr), list) or \
920 isinstance(value, list):
921 self.coerce_append_attr_list(attr, value)
922 else:
923 self.replace_attr(attr, value, replace)
925 def copy_attr_concatenate(
926 self, attr: str, value: Any, replace: bool) -> None:
927 """
928 If attr is an attribute of self and both self[attr] and value are
929 lists, concatenate the two sequences, setting the result to
930 self[attr]. If either self[attr] or value are non-sequences and
931 replace is True or self[attr] is None, replace self[attr] with value.
932 Otherwise, do nothing.
933 """
934 if self.get(attr) is not value:
935 if isinstance(self.get(attr), list) and \
936 isinstance(value, list):
937 self.append_attr_list(attr, value)
938 else:
939 self.replace_attr(attr, value, replace)
941 def copy_attr_consistent(
942 self, attr: str, value: Any, replace: bool) -> None:
943 """
944 If replace is True or self[attr] is None, replace self[attr] with
945 value. Otherwise, do nothing.
946 """
947 if self.get(attr) is not value:
948 self.replace_attr(attr, value, replace)
950 def update_all_atts(self,
951 dict_: Mapping[str, Any] | Element,
952 update_fun: _UpdateFun = copy_attr_consistent,
953 replace: bool = True,
954 and_source: bool = False,
955 ) -> None:
956 """
957 Updates all attributes from node or dictionary `dict_`.
959 Appends the basic attributes ('ids', 'names', 'classes',
960 'dupnames', but not 'source') and then, for all other attributes in
961 dict_, updates the same attribute in self. When attributes with the
962 same identifier appear in both self and dict_, the two values are
963 merged based on the value of update_fun. Generally, when replace is
964 True, the values in self are replaced or merged with the values in
965 dict_; otherwise, the values in self may be preserved or merged. When
966 and_source is True, the 'source' attribute is included in the copy.
968 NOTE: When replace is False, and self contains a 'source' attribute,
969 'source' is not replaced even when dict_ has a 'source'
970 attribute, though it may still be merged into a list depending
971 on the value of update_fun.
972 NOTE: It is easier to call the update-specific methods then to pass
973 the update_fun method to this function.
974 """
975 if isinstance(dict_, Node):
976 dict_ = dict_.attributes
978 # Include the source attribute when copying?
979 if and_source:
980 filter_fun = self.is_not_list_attribute
981 else:
982 filter_fun = self.is_not_known_attribute
984 # Copy the basic attributes
985 self.update_basic_atts(dict_)
987 # Grab other attributes in dict_ not in self except the
988 # (All basic attributes should be copied already)
989 for att in filter(filter_fun, dict_):
990 update_fun(self, att, dict_[att], replace)
992 def update_all_atts_consistantly(self,
993 dict_: Mapping[str, Any] | Element,
994 replace: bool = True,
995 and_source: bool = False,
996 ) -> None:
997 """
998 Updates all attributes from node or dictionary `dict_`.
1000 Appends the basic attributes ('ids', 'names', 'classes',
1001 'dupnames', but not 'source') and then, for all other attributes in
1002 dict_, updates the same attribute in self. When attributes with the
1003 same identifier appear in both self and dict_ and replace is True, the
1004 values in self are replaced with the values in dict_; otherwise, the
1005 values in self are preserved. When and_source is True, the 'source'
1006 attribute is included in the copy.
1008 NOTE: When replace is False, and self contains a 'source' attribute,
1009 'source' is not replaced even when dict_ has a 'source'
1010 attribute, though it may still be merged into a list depending
1011 on the value of update_fun.
1012 """
1013 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
1014 and_source)
1016 def update_all_atts_concatenating(self,
1017 dict_: Mapping[str, Any] | Element,
1018 replace: bool = True,
1019 and_source: bool = False,
1020 ) -> None:
1021 """
1022 Updates all attributes from node or dictionary `dict_`.
1024 Appends the basic attributes ('ids', 'names', 'classes',
1025 'dupnames', but not 'source') and then, for all other attributes in
1026 dict_, updates the same attribute in self. When attributes with the
1027 same identifier appear in both self and dict_ whose values aren't each
1028 lists and replace is True, the values in self are replaced with the
1029 values in dict_; if the values from self and dict_ for the given
1030 identifier are both of list type, then the two lists are concatenated
1031 and the result stored in self; otherwise, the values in self are
1032 preserved. When and_source is True, the 'source' attribute is
1033 included in the copy.
1035 NOTE: When replace is False, and self contains a 'source' attribute,
1036 'source' is not replaced even when dict_ has a 'source'
1037 attribute, though it may still be merged into a list depending
1038 on the value of update_fun.
1039 """
1040 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
1041 and_source)
1043 def update_all_atts_coercion(self,
1044 dict_: Mapping[str, Any] | Element,
1045 replace: bool = True,
1046 and_source: bool = False,
1047 ) -> None:
1048 """
1049 Updates all attributes from node or dictionary `dict_`.
1051 Appends the basic attributes ('ids', 'names', 'classes',
1052 'dupnames', but not 'source') and then, for all other attributes in
1053 dict_, updates the same attribute in self. When attributes with the
1054 same identifier appear in both self and dict_ whose values are both
1055 not lists and replace is True, the values in self are replaced with
1056 the values in dict_; if either of the values from self and dict_ for
1057 the given identifier are of list type, then first any non-lists are
1058 converted to 1-element lists and then the two lists are concatenated
1059 and the result stored in self; otherwise, the values in self are
1060 preserved. When and_source is True, the 'source' attribute is
1061 included in the copy.
1063 NOTE: When replace is False, and self contains a 'source' attribute,
1064 'source' is not replaced even when dict_ has a 'source'
1065 attribute, though it may still be merged into a list depending
1066 on the value of update_fun.
1067 """
1068 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
1069 and_source)
1071 def update_all_atts_convert(self,
1072 dict_: Mapping[str, Any] | Element,
1073 and_source: bool = False,
1074 ) -> None:
1075 """
1076 Updates all attributes from node or dictionary `dict_`.
1078 Appends the basic attributes ('ids', 'names', 'classes',
1079 'dupnames', but not 'source') and then, for all other attributes in
1080 dict_, updates the same attribute in self. When attributes with the
1081 same identifier appear in both self and dict_ then first any non-lists
1082 are converted to 1-element lists and then the two lists are
1083 concatenated and the result stored in self; otherwise, the values in
1084 self are preserved. When and_source is True, the 'source' attribute
1085 is included in the copy.
1087 NOTE: When replace is False, and self contains a 'source' attribute,
1088 'source' is not replaced even when dict_ has a 'source'
1089 attribute, though it may still be merged into a list depending
1090 on the value of update_fun.
1091 """
1092 self.update_all_atts(dict_, Element.copy_attr_convert,
1093 and_source=and_source)
1095 def clear(self) -> None:
1096 self.children = []
1098 def replace(self, old, new) -> None:
1099 """Replace one child `Node` with another child or children."""
1100 index = self.index(old)
1101 if isinstance(new, Node):
1102 self.setup_child(new)
1103 self[index] = new
1104 elif new is not None:
1105 self[index:index+1] = new
1107 def replace_self(self, new) -> None:
1108 """
1109 Replace `self` node with `new`, where `new` is a node or a
1110 list of nodes.
1112 Provisional: the handling of node attributes will be revised.
1113 """
1114 update = new
1115 if not isinstance(new, Node):
1116 # `new` is a list; update first child.
1117 try:
1118 update = new[0]
1119 except IndexError:
1120 update = None
1121 if isinstance(update, Element):
1122 update.update_basic_atts(self)
1123 else:
1124 # `update` is a Text node or `new` is an empty list.
1125 # Assert that we aren't losing any attributes.
1126 for att in self.basic_attributes:
1127 assert not self[att], \
1128 'Losing "%s" attribute: %s' % (att, self[att])
1129 self.parent.replace(self, new)
1131 def first_child_matching_class(self,
1132 childclass: type[Element] | type[Text]
1133 | tuple[type[Element] | type[Text], ...],
1134 start: int = 0,
1135 end: int = sys.maxsize,
1136 ) -> int | None:
1137 """
1138 Return the index of the first child whose class exactly matches.
1140 Parameters:
1142 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
1143 classes. If a tuple, any of the classes may match.
1144 - `start`: Initial index to check.
1145 - `end`: Initial index to *not* check.
1146 """
1147 if not isinstance(childclass, tuple):
1148 childclass = (childclass,)
1149 for index in range(start, min(len(self), end)):
1150 for c in childclass:
1151 if isinstance(self[index], c):
1152 return index
1153 return None
1155 def first_child_not_matching_class(
1156 self,
1157 childclass: type[Element] | type[Text]
1158 | tuple[type[Element] | type[Text], ...],
1159 start: int = 0,
1160 end: int = sys.maxsize,
1161 ) -> int | None:
1162 """
1163 Return the index of the first child whose class does *not* match.
1165 Parameters:
1167 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
1168 classes. If a tuple, none of the classes may match.
1169 - `start`: Initial index to check.
1170 - `end`: Initial index to *not* check.
1171 """
1172 if not isinstance(childclass, tuple):
1173 childclass = (childclass,)
1174 for index in range(start, min(len(self), end)):
1175 for c in childclass:
1176 if isinstance(self.children[index], c):
1177 break
1178 else:
1179 return index
1180 return None
1182 def pformat(self, indent: str = ' ', level: int = 0) -> str:
1183 tagline = '%s%s\n' % (indent*level, self.starttag())
1184 childreps = (c.pformat(indent, level+1) for c in self.children)
1185 return ''.join((tagline, *childreps))
1187 def copy(self) -> Self:
1188 obj = self.__class__(rawsource=self.rawsource, **self.attributes)
1189 obj._document = self._document
1190 obj.source = self.source
1191 obj.line = self.line
1192 return obj
1194 def deepcopy(self) -> Self:
1195 copy = self.copy()
1196 copy.extend([child.deepcopy() for child in self.children])
1197 return copy
1199 def note_referenced_by(self,
1200 name: str | None = None,
1201 id: str | None = None,
1202 ) -> None:
1203 """Note that this Element has been referenced by its name
1204 `name` or id `id`."""
1205 self.referenced = True
1206 # Element.expect_referenced_by_* dictionaries map names or ids
1207 # to nodes whose ``referenced`` attribute is set to true as
1208 # soon as this node is referenced by the given name or id.
1209 # Needed for target propagation.
1210 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
1211 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
1212 if by_name:
1213 assert name is not None
1214 by_name.referenced = True
1215 if by_id:
1216 assert id is not None
1217 by_id.referenced = True
1219 @classmethod
1220 def is_not_list_attribute(cls, attr: str) -> bool:
1221 """
1222 Returns True if and only if the given attribute is NOT one of the
1223 basic list attributes defined for all Elements.
1224 """
1225 return attr not in cls.list_attributes
1227 @classmethod
1228 def is_not_known_attribute(cls, attr: str) -> bool:
1229 """
1230 Return True if `attr` is NOT defined for all Element instances.
1232 Provisional. May be removed in Docutils 2.0.
1233 """
1234 return attr not in cls.common_attributes
1236 def validate_attributes(self) -> None:
1237 """Normalize and validate element attributes.
1239 Convert string values to expected datatype.
1240 Normalize values.
1242 Raise `ValidationError` for invalid attributes or attribute values.
1244 Provisional.
1245 """
1246 messages = []
1247 for key, value in self.attributes.items():
1248 if key.startswith('internal:'):
1249 continue # see docs/user/config.html#expose-internals
1250 if key not in self.valid_attributes:
1251 va = '", "'.join(self.valid_attributes)
1252 messages.append(f'Attribute "{key}" not one of "{va}".')
1253 continue
1254 try:
1255 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
1256 except (ValueError, TypeError, KeyError) as e:
1257 messages.append(
1258 f'Attribute "{key}" has invalid value "{value}".\n {e}')
1259 if messages:
1260 raise ValidationError(f'Element {self.starttag()} invalid:\n '
1261 + '\n '.join(messages),
1262 problematic_element=self)
1264 def validate_content(self,
1265 model: _ContentModelTuple | None = None,
1266 elements: Sequence | None = None,
1267 ) -> list:
1268 """Test compliance of `elements` with `model`.
1270 :model: content model description, default `self.content_model`,
1271 :elements: list of doctree elements, default `self.children`.
1273 Return list of children that do not fit in the model or raise
1274 `ValidationError` if the content does not comply with the `model`.
1276 Provisional.
1277 """
1278 if model is None:
1279 model = self.content_model
1280 if elements is None:
1281 elements = self.children
1282 ichildren = iter(elements)
1283 child = next(ichildren, None)
1284 for category, quantifier in model:
1285 if not isinstance(child, category):
1286 if quantifier in ('.', '+'):
1287 raise ValidationError(self._report_child(child, category),
1288 problematic_element=child)
1289 else: # quantifier in ('?', '*') -> optional child
1290 continue # try same child with next part of content model
1291 else:
1292 # Check additional placement constraints (if applicable):
1293 child.validate_position()
1294 # advance:
1295 if quantifier in ('.', '?'): # go to next element
1296 child = next(ichildren, None)
1297 else: # if quantifier in ('*', '+'): # pass all matching elements
1298 for child in ichildren:
1299 if not isinstance(child, category):
1300 break
1301 try:
1302 child.validate_position()
1303 except AttributeError:
1304 pass
1305 else:
1306 child = None
1307 return [] if child is None else [child, *ichildren]
1309 def _report_child(self,
1310 child,
1311 category: Element | Iterable[Element],
1312 ) -> str:
1313 # Return a str reporting a missing child or child of wrong category.
1314 try:
1315 _type = category.__name__
1316 except AttributeError:
1317 _type = '> or <'.join(c.__name__ for c in category)
1318 msg = f'Element {self.starttag()} invalid:\n'
1319 if child is None:
1320 return f'{msg} Missing child of type <{_type}>.'
1321 if isinstance(child, Text):
1322 return (f'{msg} Expecting child of type <{_type}>, '
1323 f'not text data "{child.astext()}".')
1324 return (f'{msg} Expecting child of type <{_type}>, '
1325 f'not {child.starttag()}.')
1327 def validate(self, recursive: bool = True) -> None:
1328 """Validate Docutils Document Tree element ("doctree").
1330 Raise ValidationError if there are violations.
1331 If `recursive` is True, validate also the element's descendants.
1333 See `The Docutils Document Tree`__ for details of the
1334 Docutils Document Model.
1336 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1338 Provisional (work in progress).
1339 """
1340 self.validate_attributes()
1342 leftover_childs = self.validate_content()
1343 for child in leftover_childs:
1344 if isinstance(child, Text):
1345 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1346 f' Spurious text: "{child.astext()}".',
1347 problematic_element=self)
1348 else:
1349 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1350 f' Child element {child.starttag()} '
1351 'not allowed at this position.',
1352 problematic_element=child)
1354 if recursive:
1355 for child in self:
1356 child.validate(recursive=recursive)
1359# ====================
1360# Element Categories
1361# ====================
1362#
1363# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.
1365class Root:
1366 """Element at the root of a document tree."""
1369class Structural:
1370 """`Structural elements`__.
1372 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1373 #structural-elements
1374 """
1377class SubStructural:
1378 """`Structural subelements`__ are children of `Structural` elements.
1380 Most Structural elements accept only specific `SubStructural` elements.
1382 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1383 #structural-subelements
1384 """
1387class Bibliographic:
1388 """`Bibliographic Elements`__ (displayed document meta-data).
1390 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1391 #bibliographic-elements
1392 """
1395class Body:
1396 """`Body elements`__.
1398 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements
1399 """
1402class Admonition(Body):
1403 """Admonitions (distinctive and self-contained notices)."""
1404 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1407class Sequential(Body):
1408 """List-like body elements."""
1411class General(Body):
1412 """Miscellaneous body elements."""
1415class Special(Body):
1416 """Special internal body elements."""
1419class Part:
1420 """`Body Subelements`__ always occur within specific parent elements.
1422 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements
1423 """
1426class Decorative:
1427 """Decorative elements (`header` and `footer`).
1429 Children of `decoration`.
1430 """
1431 content_model: Final = ((Body, '+'),) # (%body.elements;)+
1434class Inline:
1435 """Inline elements contain text data and possibly other inline elements.
1436 """
1439# Orthogonal categories and Mixins
1440# ================================
1442class PreBibliographic:
1443 """Elements which may occur before Bibliographic Elements."""
1446class Invisible(Special, PreBibliographic):
1447 """Internal elements that don't appear in output."""
1450class Labeled:
1451 """Contains a `label` as its first element."""
1454class Resolvable:
1455 resolved: bool = False
1458class BackLinkable:
1459 """Mixin for Elements that accept a "backrefs" attribute."""
1461 list_attributes: Final = Element.list_attributes + ('backrefs',)
1462 valid_attributes: Final = Element.valid_attributes + ('backrefs',)
1464 def add_backref(self: Element, refid: str) -> None:
1465 self['backrefs'].append(refid)
1468class Referential(Resolvable):
1469 """Elements holding a cross-reference (outgoing hyperlink)."""
1472class Targetable(Resolvable):
1473 """Cross-reference targets (incoming hyperlink)."""
1474 referenced: int = 0
1476 indirect_reference_name: str | None = None
1477 """Holds the whitespace_normalized_name (contains mixed case) of a target.
1478 Required for MoinMoin/reST compatibility.
1480 Provisional.
1481 """
1484class Titular:
1485 """Title, sub-title, or informal heading (rubric)."""
1488class TextElement(Element):
1489 """
1490 An element which directly contains text.
1492 Its children are all `Text` or `Inline` subclass nodes. You can
1493 check whether an element's context is inline simply by checking whether
1494 its immediate parent is a `TextElement` instance (including subclasses).
1495 This is handy for nodes like `image` that can appear both inline and as
1496 standalone body elements.
1498 If passing children to `__init__()`, make sure to set `text` to
1499 ``''`` or some other suitable value.
1500 """
1501 content_model: Final = (((Text, Inline), '*'),)
1502 # (#PCDATA | %inline.elements;)*
1504 child_text_separator: Final = ''
1505 """Separator for child nodes, used by `astext()` method."""
1507 def __init__(self,
1508 rawsource: str = '',
1509 text: str = '',
1510 *children,
1511 **attributes: Any,
1512 ) -> None:
1513 if text:
1514 textnode = Text(text)
1515 Element.__init__(self, rawsource, textnode, *children,
1516 **attributes)
1517 else:
1518 Element.__init__(self, rawsource, *children, **attributes)
1521class FixedTextElement(TextElement):
1522 """An element which directly contains preformatted text."""
1524 valid_attributes: Final = Element.valid_attributes + ('xml:space',)
1526 def __init__(self,
1527 rawsource: str = '',
1528 text: str = '',
1529 *children,
1530 **attributes: Any,
1531 ) -> None:
1532 super().__init__(rawsource, text, *children, **attributes)
1533 self.attributes['xml:space'] = 'preserve'
1536class PureTextElement(TextElement):
1537 """An element which only contains text, no children."""
1538 content_model: Final = ((Text, '?'),) # (#PCDATA)
1541# =================================
1542# Concrete Document Tree Elements
1543# =================================
1544#
1545# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference
1547# Decorative Elements
1548# ===================
1550class header(Decorative, Element): pass
1551class footer(Decorative, Element): pass
1554# Structural Subelements
1555# ======================
1557class title(Titular, PreBibliographic, SubStructural, TextElement):
1558 """Title of `document`, `section`, `topic` and generic `admonition`.
1559 """
1560 valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')
1563class subtitle(Titular, PreBibliographic, SubStructural, TextElement):
1564 """Sub-title of `document`, `section` and `sidebar`."""
1566 def validate_position(self) -> None:
1567 """Check position of subtitle: must follow a title."""
1568 if self.parent and self.parent.index(self) == 0:
1569 raise ValidationError(f'Element {self.parent.starttag()} invalid:'
1570 '\n <subtitle> only allowed after <title>.',
1571 problematic_element=self)
1574class meta(PreBibliographic, SubStructural, Element):
1575 """Container for "invisible" bibliographic data, or meta-data."""
1576 valid_attributes: Final = Element.valid_attributes + (
1577 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')
1580class docinfo(SubStructural, Element):
1581 """Container for displayed document meta-data."""
1582 content_model: Final = ((Bibliographic, '+'),)
1583 # (%bibliographic.elements;)+
1586class decoration(PreBibliographic, SubStructural, Element):
1587 """Container for `header` and `footer`."""
1588 content_model: Final = ((header, '?'), # Empty element doesn't make sense,
1589 (footer, '?'), # but is simpler to define.
1590 )
1591 # (header?, footer?)
1593 def get_header(self) -> header:
1594 if not len(self.children) or not isinstance(self.children[0], header):
1595 self.insert(0, header())
1596 return self.children[0]
1598 def get_footer(self) -> footer:
1599 if not len(self.children) or not isinstance(self.children[-1], footer):
1600 self.append(footer())
1601 return self.children[-1]
1604class transition(SubStructural, Element):
1605 """Transitions__ are breaks between untitled text parts.
1607 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition
1608 """
1610 def validate_position(self) -> None:
1611 """Check additional constraints on `transition` placement.
1613 A transition may not begin or end a section or document,
1614 nor may two transitions be immediately adjacent.
1615 """
1616 messages = [f'Element {self.parent.starttag()} invalid:']
1617 predecessor = self.previous_sibling()
1618 if (predecessor is None # index == 0
1619 or isinstance(predecessor, (title, subtitle, meta, decoration))
1620 # A transition following these elements still counts as
1621 # "at the beginning of a document or section".
1622 ):
1623 messages.append(
1624 '<transition> may not begin a section or document.')
1625 if self.parent.index(self) == len(self.parent) - 1:
1626 messages.append('<transition> may not end a section or document.')
1627 if isinstance(predecessor, transition):
1628 messages.append(
1629 '<transition> may not directly follow another transition.')
1630 if len(messages) > 1:
1631 raise ValidationError('\n '.join(messages),
1632 problematic_element=self)
1635# Structural Elements
1636# ===================
1638class topic(Structural, Element):
1639 """
1640 Topics__ are non-recursive, mini-sections.
1642 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic
1643 """
1644 content_model: Final = ((title, '?'), (Body, '+'))
1645 # (title?, (%body.elements;)+)
1648class sidebar(Structural, Element):
1649 """
1650 Sidebars__ are like parallel documents providing related material.
1652 A sidebar is typically offset by a border and "floats" to the side
1653 of the page
1655 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar
1656 """
1657 content_model: Final = ((title, '?'),
1658 (subtitle, '?'),
1659 ((topic, Body), '+'),
1660 )
1661 # ((title, subtitle?)?, (%body.elements; | topic)+)
1662 # "subtitle only after title" is ensured in `subtitle.validate_position()`.
1665class section(Structural, Element):
1666 """Document section__. The main unit of hierarchy.
1668 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section
1669 """
1670 # recursive content model, see below
1673section.content_model = ((title, '.'),
1674 (subtitle, '?'),
1675 ((Body, topic, sidebar, transition), '*'),
1676 ((section, transition), '*'),
1677 )
1678# (title, subtitle?, %structure.model;)
1679# Correct transition placement is ensured in `transition.validate_position()`.
1682# Root Element
1683# ============
1685class document(Root, Element):
1686 """
1687 The document root element.
1689 Do not instantiate this class directly; use
1690 `docutils.utils.new_document()` instead.
1691 """
1692 valid_attributes: Final = Element.valid_attributes + ('title',)
1693 content_model: Final = ((title, '?'),
1694 (subtitle, '?'),
1695 (meta, '*'),
1696 (decoration, '?'),
1697 (docinfo, '?'),
1698 (transition, '?'),
1699 ((Body, topic, sidebar, transition), '*'),
1700 ((section, transition), '*'),
1701 )
1702 # ( (title, subtitle?)?,
1703 # meta*,
1704 # decoration?,
1705 # (docinfo, transition?)?,
1706 # %structure.model; )
1707 # Additional restrictions for `subtitle` and `transition` are tested
1708 # with the respective `validate_position()` methods.
1710 def __init__(self,
1711 settings: Values,
1712 reporter: Reporter,
1713 *args,
1714 **kwargs: Any,
1715 ) -> None:
1716 Element.__init__(self, *args, **kwargs)
1718 self.current_source: StrPath | None = None
1719 """Path to or description of the input source being processed."""
1721 self.current_line: int | None = None
1722 """Line number (1-based) of `current_source`."""
1724 self.settings: Values = settings
1725 """Runtime settings data record."""
1727 self.reporter: Reporter = reporter
1728 """System message generator."""
1730 self.indirect_targets: list[target] = []
1731 """List of indirect target nodes."""
1733 self.substitution_defs: dict[str, substitution_definition] = {}
1734 """Mapping of substitution names to substitution_definition nodes."""
1736 self.substitution_names: dict[str, str] = {}
1737 """Mapping of case-normalized to case-sensitive substitution names."""
1739 self.refnames: dict[str, list[Element]] = {}
1740 """Mapping of names to lists of referencing nodes."""
1742 self.refids: dict[str, list[Element]] = {}
1743 """Mapping of ids to lists of referencing nodes."""
1745 self.nameids: dict[str, str] = {}
1746 """Mapping of names to unique id's."""
1748 self.nametypes: dict[str, bool] = {}
1749 """Mapping of names to hyperlink type. True: explicit, False: implicit.
1750 """
1752 self.ids: dict[str, Element] = {}
1753 """Mapping of ids to nodes."""
1755 self.footnote_refs: dict[str, list[footnote_reference]] = {}
1756 """Mapping of footnote labels to lists of footnote_reference nodes."""
1758 self.citation_refs: dict[str, list[citation_reference]] = {}
1759 """Mapping of citation labels to lists of citation_reference nodes."""
1761 self.autofootnotes: list[footnote] = []
1762 """List of auto-numbered footnote nodes."""
1764 self.autofootnote_refs: list[footnote_reference] = []
1765 """List of auto-numbered footnote_reference nodes."""
1767 self.symbol_footnotes: list[footnote] = []
1768 """List of symbol footnote nodes."""
1770 self.symbol_footnote_refs: list[footnote_reference] = []
1771 """List of symbol footnote_reference nodes."""
1773 self.footnotes: list[footnote] = []
1774 """List of manually-numbered footnote nodes."""
1776 self.citations: list[citation] = []
1777 """List of citation nodes."""
1779 self.autofootnote_start: int = 1
1780 """Initial auto-numbered footnote number."""
1782 self.symbol_footnote_start: int = 0
1783 """Initial symbol footnote symbol index."""
1785 self.id_counter: Counter[int] = Counter()
1786 """Numbers added to otherwise identical IDs."""
1788 self.parse_messages: list[system_message] = []
1789 """System messages generated while parsing."""
1791 self.transform_messages: list[system_message] = []
1792 """System messages generated while applying transforms."""
1794 import docutils.transforms
1795 self.transformer: Transformer = docutils.transforms.Transformer(self)
1796 """Storage for transforms to be applied to this document."""
1798 self.include_log: list[tuple[StrPath, tuple]] = []
1799 """The current source's parents (to detect inclusion loops)."""
1801 self.decoration: decoration | None = None
1802 """Document's `decoration` node."""
1804 self._document: document = self
1806 def __getstate__(self) -> dict[str, Any]:
1807 """
1808 Return dict with unpicklable references removed.
1809 """
1810 state = self.__dict__.copy()
1811 state['reporter'] = None
1812 state['transformer'] = None
1813 return state
1815 def asdom(self, dom: ModuleType | None = None) -> minidom.Document:
1816 """Return a DOM representation of this document."""
1817 if dom is None:
1818 import xml.dom.minidom as dom
1819 domroot = dom.Document()
1820 domroot.appendChild(self._dom_node(domroot))
1821 return domroot
1823 def set_id(self,
1824 node: Element,
1825 msgnode: Element | None = None,
1826 suggested_prefix: str = '',
1827 ) -> str:
1828 if node['ids']:
1829 # register and check for duplicates
1830 for id in node['ids']:
1831 self.ids.setdefault(id, node)
1832 if self.ids[id] is not node:
1833 msg = self.reporter.error(f'Duplicate ID: "{id}" used by '
1834 f'{self.ids[id].starttag()} '
1835 f'and {node.starttag()}',
1836 base_node=node)
1837 if msgnode is not None:
1838 msgnode += msg
1839 return id
1840 # generate and set id
1841 id_prefix = self.settings.id_prefix
1842 auto_id_prefix = self.settings.auto_id_prefix
1843 base_id = ''
1844 id = ''
1845 for name in node['names']:
1846 if id_prefix: # allow names starting with numbers
1847 base_id = make_id('x'+name)[1:]
1848 else:
1849 base_id = make_id(name)
1850 # TODO: normalize id-prefix? (would make code simpler)
1851 id = id_prefix + base_id
1852 if base_id and id not in self.ids:
1853 break
1854 else:
1855 if base_id and auto_id_prefix.endswith('%'):
1856 # disambiguate name-derived ID
1857 # TODO: remove second condition after announcing change
1858 prefix = id + '-'
1859 else:
1860 prefix = id_prefix + auto_id_prefix
1861 if prefix.endswith('%'):
1862 prefix = f"""{prefix[:-1]}{suggested_prefix
1863 or make_id(node.tagname)}-"""
1864 while True:
1865 self.id_counter[prefix] += 1
1866 id = f'{prefix}{self.id_counter[prefix]}'
1867 if id not in self.ids:
1868 break
1869 node['ids'].append(id)
1870 self.ids[id] = node
1871 return id
1873 def set_name_id_map(self,
1874 node: Element,
1875 id: str,
1876 msgnode: Element | None = None,
1877 explicit: bool = False,
1878 ) -> None:
1879 """
1880 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1881 booleans representing hyperlink target type (True==explicit,
1882 False==implicit). This method updates the mappings.
1884 The following state transition table shows how `self.nameids` items
1885 ("id") and `self.nametypes` items ("type") change with new input
1886 (a call to this method), and what actions are performed:
1888 ==== ======== ======== ======== ======= ==== ======== =====
1889 Old State Input Action New State Notes
1890 -------------- -------- ----------------- -------------- -----
1891 id type new type sys.msg. dupname id type
1892 ==== ======== ======== ======== ======= ==== ======== =====
1893 - - explicit - - new explicit
1894 - - implicit - - new implicit
1895 - implicit explicit - - new explicit
1896 old implicit explicit INFO old new explicit
1897 - explicit explicit ERROR new - explicit
1898 old explicit explicit ERROR new,old - explicit [#]_
1899 - implicit implicit INFO new - implicit
1900 old implicit implicit INFO new,old - implicit
1901 - explicit implicit INFO new - explicit
1902 old explicit implicit INFO new old explicit
1903 ==== ======== ======== ======== ======= ==== ======== =====
1905 .. [#] Do not clear the name-to-id map or invalidate the old target if
1906 both old and new targets are external and refer to identical URIs.
1907 The new target is invalidated regardless.
1908 """
1909 for name in tuple(node['names']):
1910 if name in self.nameids:
1911 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1912 # attention: modifies node['names']
1913 else:
1914 self.nameids[name] = id
1915 self.nametypes[name] = explicit
1917 def set_duplicate_name_id(self,
1918 node: Element,
1919 id: str,
1920 name: str,
1921 msgnode: Element,
1922 explicit: bool,
1923 ) -> None:
1924 old_id = self.nameids[name]
1925 old_explicit = self.nametypes[name]
1926 old_node = self.ids.get(old_id)
1927 self.nametypes[name] = old_explicit or explicit
1928 if explicit:
1929 if old_explicit:
1930 level = 2
1931 if old_id is not None:
1932 if 'refuri' in node:
1933 refuri = node['refuri']
1934 if (old_node['names']
1935 and 'refuri' in old_node
1936 and old_node['refuri'] == refuri):
1937 level = 1 # just inform if refuri is identical
1938 if level > 1:
1939 dupname(old_node, name)
1940 self.nameids[name] = None
1941 msg = self.reporter.system_message(
1942 level, 'Duplicate explicit target name: "%s".' % name,
1943 backrefs=[id], base_node=node)
1944 if msgnode is not None:
1945 # append <system_message> if valid at this place
1946 msgnode += msg
1947 try:
1948 msgnode.validate(recursive=False)
1949 except ValidationError:
1950 msgnode.pop()
1951 msg.parent = None
1952 dupname(node, name)
1953 else: # new explicit, old implicit -> silently overwrite
1954 self.nameids[name] = id
1955 if old_id is not None:
1956 dupname(old_node, name)
1957 else: # new name is implicit
1958 if old_id is not None and not old_explicit:
1959 self.nameids[name] = None
1960 dupname(old_node, name)
1961 dupname(node, name)
1962 if not explicit or (not old_explicit and old_id is not None):
1963 if explicit:
1964 s = f'Target name overrides implicit target name "{name}".'
1965 else:
1966 s = f'Duplicate implicit target name: "{name}".'
1967 msg = self.reporter.info(s, backrefs=[id], base_node=node)
1968 if msgnode is not None:
1969 msgnode += msg
1971 def has_name(self, name: str) -> bool:
1972 return name in self.nameids
1974 # "note" here is an imperative verb: "take note of".
1975 def note_implicit_target(
1976 self, target: Element, msgnode: Element | None = None) -> None:
1977 id = self.set_id(target, msgnode)
1978 self.set_name_id_map(target, id, msgnode, explicit=False)
1980 def note_explicit_target(
1981 self, target: Element, msgnode: Element | None = None) -> None:
1982 id = self.set_id(target, msgnode)
1983 self.set_name_id_map(target, id, msgnode, explicit=True)
1985 def note_refname(self, node: Element) -> None:
1986 self.refnames.setdefault(node['refname'], []).append(node)
1988 def note_refid(self, node: Element) -> None:
1989 self.refids.setdefault(node['refid'], []).append(node)
1991 def note_indirect_target(self, target: target) -> None:
1992 self.indirect_targets.append(target)
1993 if target['names']:
1994 self.note_refname(target)
1996 def note_anonymous_target(self, target: target) -> None:
1997 self.set_id(target)
1999 def note_autofootnote(self, footnote: footnote) -> None:
2000 self.set_id(footnote)
2001 self.autofootnotes.append(footnote)
2003 def note_autofootnote_ref(self, ref: footnote_reference) -> None:
2004 self.set_id(ref)
2005 self.autofootnote_refs.append(ref)
2007 def note_symbol_footnote(self, footnote: footnote) -> None:
2008 self.set_id(footnote)
2009 self.symbol_footnotes.append(footnote)
2011 def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:
2012 self.set_id(ref)
2013 self.symbol_footnote_refs.append(ref)
2015 def note_footnote(self, footnote: footnote) -> None:
2016 self.set_id(footnote)
2017 self.footnotes.append(footnote)
2019 def note_footnote_ref(self, ref: footnote_reference) -> None:
2020 self.set_id(ref)
2021 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
2022 self.note_refname(ref)
2024 def note_citation(self, citation: citation) -> None:
2025 self.citations.append(citation)
2027 def note_citation_ref(self, ref: citation_reference) -> None:
2028 self.set_id(ref)
2029 self.citation_refs.setdefault(ref['refname'], []).append(ref)
2030 self.note_refname(ref)
2032 def note_substitution_def(self,
2033 subdef: substitution_definition,
2034 def_name: str,
2035 msgnode: Element | None = None,
2036 ) -> None:
2037 name = whitespace_normalize_name(def_name)
2038 if name in self.substitution_defs:
2039 msg = self.reporter.error(
2040 'Duplicate substitution definition name: "%s".' % name,
2041 base_node=subdef)
2042 if msgnode is not None:
2043 msgnode += msg
2044 oldnode = self.substitution_defs[name]
2045 dupname(oldnode, name)
2046 # keep only the last definition:
2047 self.substitution_defs[name] = subdef
2048 # case-insensitive mapping:
2049 self.substitution_names[fully_normalize_name(name)] = name
2051 def note_substitution_ref(self,
2052 subref: substitution_reference,
2053 refname: str,
2054 ) -> None:
2055 subref['refname'] = whitespace_normalize_name(refname)
2057 def note_pending(
2058 self, pending: pending, priority: int | None = None) -> None:
2059 self.transformer.add_pending(pending, priority)
2061 def note_parse_message(self, message: system_message) -> None:
2062 self.parse_messages.append(message)
2064 def note_transform_message(self, message: system_message) -> None:
2065 self.transform_messages.append(message)
2067 def note_source(self,
2068 source: StrPath | None,
2069 offset: int | None,
2070 ) -> None:
2071 self.current_source = source and os.fspath(source)
2072 if offset is None:
2073 self.current_line = offset
2074 else:
2075 self.current_line = offset + 1
2077 def copy(self) -> Self:
2078 obj = self.__class__(self.settings, self.reporter,
2079 **self.attributes)
2080 obj.source = self.source
2081 obj.line = self.line
2082 return obj
2084 def get_decoration(self) -> decoration:
2085 if not self.decoration:
2086 self.decoration: decoration = decoration()
2087 index = self.first_child_not_matching_class((Titular, meta))
2088 if index is None:
2089 self.append(self.decoration)
2090 else:
2091 self.insert(index, self.decoration)
2092 return self.decoration
2095# Bibliographic Elements
2096# ======================
2098class author(Bibliographic, TextElement): pass
2099class organization(Bibliographic, TextElement): pass
2100class address(Bibliographic, FixedTextElement): pass
2101class contact(Bibliographic, TextElement): pass
2102class version(Bibliographic, TextElement): pass
2103class revision(Bibliographic, TextElement): pass
2104class status(Bibliographic, TextElement): pass
2105class date(Bibliographic, TextElement): pass
2106class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)
2109class authors(Bibliographic, Element):
2110 """Container for author information for documents with multiple authors.
2111 """
2112 content_model: Final = ((author, '+'),
2113 (organization, '?'),
2114 (address, '?'),
2115 (contact, '?'),
2116 )
2117 # (author, organization?, address?, contact?)+
2119 def validate_content(self,
2120 model: _ContentModelTuple | None = None,
2121 elements: Sequence | None = None,
2122 ) -> list:
2123 """Repeatedly test for children matching the content model.
2125 Provisional.
2126 """
2127 relics = super().validate_content()
2128 while relics:
2129 relics = super().validate_content(elements=relics)
2130 return relics
2133# Body Elements
2134# =============
2135#
2136# General
2137# -------
2138#
2139# Miscellaneous Body Elements and related Body Subelements (Part)
2141class paragraph(General, TextElement): pass
2142class rubric(Titular, General, TextElement): pass
2145class compound(General, Element):
2146 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2149class container(General, Element):
2150 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2153class attribution(Part, TextElement):
2154 """Visible reference to the source of a `block_quote`."""
2157class block_quote(General, Element):
2158 """An extended quotation, set off from the main text."""
2159 content_model: Final = ((Body, '+'), (attribution, '?'))
2160 # ((%body.elements;)+, attribution?)
2163class reference(General, Inline, Referential, TextElement):
2164 valid_attributes: Final = Element.valid_attributes + (
2165 'anonymous', 'name', 'refid', 'refname', 'refuri')
2168# Lists
2169# -----
2170#
2171# Lists (Sequential) and related Body Subelements (Part)
2173class list_item(Part, Element):
2174 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2177class bullet_list(Sequential, Element):
2178 valid_attributes: Final = Element.valid_attributes + ('bullet',)
2179 content_model: Final = ((list_item, '+'),) # (list_item+)
2182class enumerated_list(Sequential, Element):
2183 valid_attributes: Final = Element.valid_attributes + (
2184 'enumtype', 'prefix', 'suffix', 'start')
2185 content_model: Final = ((list_item, '+'),) # (list_item+)
2188class term(Part, TextElement): pass
2189class classifier(Part, TextElement): pass
2192class definition(Part, Element):
2193 """Definition of a `term` in a `definition_list`."""
2194 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2197class definition_list_item(Part, Element):
2198 content_model: Final = ((term, '.'),
2199 ((classifier, term), '*'),
2200 (definition, '.'),
2201 )
2202 # ((term, classifier*)+, definition)
2205class definition_list(Sequential, Element):
2206 """List of terms and their definitions.
2208 Can be used for glossaries or dictionaries, to describe or
2209 classify things, for dialogues, or to itemize subtopics.
2210 """
2211 content_model: Final = ((definition_list_item, '+'),)
2212 # (definition_list_item+)
2215class field_name(Part, TextElement): pass
2218class field_body(Part, Element):
2219 content_model: Final = ((Body, '*'),) # (%body.elements;)*
2222class field(Part, Bibliographic, Element):
2223 content_model: Final = ((field_name, '.'), (field_body, '.'))
2224 # (field_name, field_body)
2227class field_list(Sequential, Element):
2228 """List of label & data pairs.
2230 Typically rendered as a two-column list.
2231 Also used for extension syntax or special processing.
2232 """
2233 content_model: Final = ((field, '+'),) # (field+)
2236class option_string(Part, PureTextElement):
2237 """A literal command-line option. Typically monospaced."""
2240class option_argument(Part, PureTextElement):
2241 """Placeholder text for option arguments."""
2242 valid_attributes: Final = Element.valid_attributes + ('delimiter',)
2244 def astext(self) -> str:
2245 return self.get('delimiter', ' ') + TextElement.astext(self)
2248class option(Part, Element):
2249 """Option element in an `option_list_item`.
2251 Groups an option string with zero or more option argument placeholders.
2252 """
2253 child_text_separator: Final = ''
2254 content_model: Final = ((option_string, '.'), (option_argument, '*'))
2255 # (option_string, option_argument*)
2258class option_group(Part, Element):
2259 """Groups together one or more `option` elements, all synonyms."""
2260 child_text_separator: Final = ', '
2261 content_model: Final = ((option, '+'),) # (option+)
2264class description(Part, Element):
2265 """Describtion of a command-line option."""
2266 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2269class option_list_item(Part, Element):
2270 """Container for a pair of `option_group` and `description` elements.
2271 """
2272 child_text_separator: Final = ' '
2273 content_model: Final = ((option_group, '.'), (description, '.'))
2274 # (option_group, description)
2277class option_list(Sequential, Element):
2278 """Two-column list of command-line options and descriptions."""
2279 content_model: Final = ((option_list_item, '+'),) # (option_list_item+)
2282# Pre-formatted text blocks
2283# -------------------------
2285class literal_block(General, FixedTextElement): pass
2286class doctest_block(General, FixedTextElement): pass
2289class math_block(General, FixedTextElement, PureTextElement):
2290 """Mathematical notation (display formula)."""
2293class line(Part, TextElement):
2294 """Single line of text in a `line_block`."""
2295 indent: str | None = None
2298class line_block(General, Element):
2299 """Sequence of lines and nested line blocks.
2300 """
2301 # recursive content model: (line | line_block)+
2304line_block.content_model = (((line, line_block), '+'),)
2307# Admonitions
2308# -----------
2309# distinctive and self-contained notices
2311class attention(Admonition, Element): pass
2312class caution(Admonition, Element): pass
2313class danger(Admonition, Element): pass
2314class error(Admonition, Element): pass
2315class important(Admonition, Element): pass
2316class note(Admonition, Element): pass
2317class tip(Admonition, Element): pass
2318class hint(Admonition, Element): pass
2319class warning(Admonition, Element): pass
2322class admonition(Admonition, Element):
2323 content_model: Final = ((title, '.'), (Body, '+'))
2324 # (title, (%body.elements;)+)
2327# Footnote and citation
2328# ---------------------
2330class label(Part, PureTextElement):
2331 """Visible identifier for footnotes and citations."""
2334class footnote(General, BackLinkable, Element, Labeled, Targetable):
2335 """Labelled note providing additional context (footnote or endnote)."""
2336 valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')
2337 content_model: Final = ((label, '?'), (Body, '+'))
2338 # (label?, (%body.elements;)+)
2339 # The label will become required in Docutils 1.0.
2342class citation(General, BackLinkable, Element, Labeled, Targetable):
2343 content_model: Final = ((label, '.'), (Body, '+'))
2344 # (label, (%body.elements;)+)
2347# Graphical elements
2348# ------------------
2350class image(General, Inline, Element):
2351 """Reference to an image resource.
2353 May be body element or inline element.
2354 """
2355 valid_attributes: Final = Element.valid_attributes + (
2356 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
2358 def astext(self) -> str:
2359 return self.get('alt', '')
2362class caption(Part, TextElement): pass
2365class legend(Part, Element):
2366 """A wrapper for text accompanying a `figure` that is not the caption."""
2367 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2370class figure(General, Element):
2371 """A formal figure, generally an illustration, with a title."""
2372 valid_attributes: Final = Element.valid_attributes + ('align', 'width')
2373 content_model: Final = (((image, reference), '.'),
2374 (caption, '?'),
2375 (legend, '?'),
2376 )
2377 # (image, ((caption, legend?) | legend))
2378 # TODO: According to the DTD, a caption or legend is required
2379 # but rST allows "bare" figures which are formatted differently from
2380 # images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]
2383# Tables
2384# ------
2386class entry(Part, Element):
2387 """An entry in a `row` (a table cell)."""
2388 valid_attributes: Final = Element.valid_attributes + (
2389 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',
2390 'morerows', 'namest', 'nameend', 'rowsep', 'valign')
2391 content_model: Final = ((Body, '*'),)
2392 # %tbl.entry.mdl -> (%body.elements;)*
2395class row(Part, Element):
2396 """Row of table cells."""
2397 valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')
2398 content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+
2401class colspec(Part, Element):
2402 """Specifications for a column in a `tgroup`."""
2403 valid_attributes: Final = Element.valid_attributes + (
2404 'align', 'char', 'charoff', 'colname', 'colnum',
2405 'colsep', 'colwidth', 'rowsep', 'stub')
2407 def propwidth(self) -> int|float:
2408 """Return numerical value of "colwidth__" attribute. Default 1.
2410 Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.
2412 Provisional.
2414 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
2415 """
2416 # Move current implementation of validate_colwidth() here
2417 # in Docutils 1.0
2418 return validate_colwidth(self.get('colwidth', ''))
2421class thead(Part, Element):
2422 """Row(s) that form the head of a `tgroup`."""
2423 valid_attributes: Final = Element.valid_attributes + ('valign',)
2424 content_model: Final = ((row, '+'),) # (row+)
2427class tbody(Part, Element):
2428 """Body of a `tgroup`."""
2429 valid_attributes: Final = Element.valid_attributes + ('valign',)
2430 content_model: Final = ((row, '+'),) # (row+)
2433class tgroup(Part, Element):
2434 """A portion of a table. Most tables have just one `tgroup`."""
2435 valid_attributes: Final = Element.valid_attributes + (
2436 'align', 'cols', 'colsep', 'rowsep')
2437 content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))
2438 # (colspec*, thead?, tbody)
2441class table(General, Element):
2442 """A data arrangement with rows and columns."""
2443 valid_attributes: Final = Element.valid_attributes + (
2444 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
2445 content_model: Final = ((title, '?'), (tgroup, '+'))
2446 # (title?, tgroup+)
2449# Special purpose elements
2450# ------------------------
2451# Body elements for internal use or special requests.
2453class comment(Invisible, FixedTextElement, PureTextElement):
2454 """Author notes, hidden from the output."""
2457class substitution_definition(Invisible, TextElement):
2458 valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')
2461class target(Invisible, Inline, TextElement, Targetable):
2462 valid_attributes: Final = Element.valid_attributes + (
2463 'anonymous', 'refid', 'refname', 'refuri')
2466class system_message(Special, BackLinkable, PreBibliographic, Element):
2467 """
2468 System message element.
2470 Do not instantiate this class directly; use
2471 ``document.reporter.info/warning/error/severe()`` instead.
2472 """
2473 valid_attributes: Final = BackLinkable.valid_attributes + (
2474 'level', 'line', 'type')
2475 content_model: Final = ((Body, '+'),) # (%body.elements;)+
2477 def __init__(self,
2478 message: str | None = None,
2479 *children,
2480 **attributes: Any,
2481 ) -> None:
2482 rawsource = attributes.pop('rawsource', '')
2483 if message:
2484 p = paragraph('', message)
2485 children = (p,) + children
2486 try:
2487 Element.__init__(self, rawsource, *children, **attributes)
2488 except: # NoQA: E722 (catchall)
2489 print('system_message: children=%r' % (children,))
2490 raise
2492 def astext(self) -> str:
2493 line = self.get('line', '')
2494 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
2495 self['level'], Element.astext(self))
2498class pending(Invisible, Element):
2499 """
2500 Placeholder for pending operations.
2502 The "pending" element is used to encapsulate a pending operation: the
2503 operation (transform), the point at which to apply it, and any data it
2504 requires. Only the pending operation's location within the document is
2505 stored in the public document tree (by the "pending" object itself); the
2506 operation and its data are stored in the "pending" object's internal
2507 instance attributes.
2509 For example, say you want a table of contents in your reStructuredText
2510 document. The easiest way to specify where to put it is from within the
2511 document, with a directive::
2513 .. contents::
2515 But the "contents" directive can't do its work until the entire document
2516 has been parsed and possibly transformed to some extent. So the directive
2517 code leaves a placeholder behind that will trigger the second phase of its
2518 processing, something like this::
2520 <pending ...public attributes...> + internal attributes
2522 Use `document.note_pending()` so that the
2523 `docutils.transforms.Transformer` stage of processing can run all pending
2524 transforms.
2525 """
2527 def __init__(self,
2528 transform: Transform,
2529 details: Mapping[str, Any] | None = None,
2530 rawsource: str = '',
2531 *children,
2532 **attributes: Any,
2533 ) -> None:
2534 Element.__init__(self, rawsource, *children, **attributes)
2536 self.transform: Transform = transform
2537 """The `docutils.transforms.Transform` class implementing the pending
2538 operation."""
2540 self.details: Mapping[str, Any] = details or {}
2541 """Detail data (dictionary) required by the pending operation."""
2543 def pformat(self, indent: str = ' ', level: int = 0) -> str:
2544 internals = ['.. internal attributes:',
2545 ' .transform: %s.%s' % (self.transform.__module__,
2546 self.transform.__name__),
2547 ' .details:']
2548 details = sorted(self.details.items())
2549 for key, value in details:
2550 if isinstance(value, Node):
2551 internals.append('%7s%s:' % ('', key))
2552 internals.extend(['%9s%s' % ('', line)
2553 for line in value.pformat().splitlines()])
2554 elif (value
2555 and isinstance(value, list)
2556 and isinstance(value[0], Node)):
2557 internals.append('%7s%s:' % ('', key))
2558 for v in value:
2559 internals.extend(['%9s%s' % ('', line)
2560 for line in v.pformat().splitlines()])
2561 else:
2562 internals.append('%7s%s: %r' % ('', key, value))
2563 return (Element.pformat(self, indent, level)
2564 + ''.join((' %s%s\n' % (indent * level, line))
2565 for line in internals))
2567 def copy(self) -> Self:
2568 obj = self.__class__(self.transform, self.details, self.rawsource,
2569 **self.attributes)
2570 obj._document = self._document
2571 obj.source = self.source
2572 obj.line = self.line
2573 return obj
2576class raw(Special, Inline, PreBibliographic,
2577 FixedTextElement, PureTextElement):
2578 """Raw data that is to be passed untouched to the Writer.
2580 Can be used as Body element or Inline element.
2581 """
2582 valid_attributes: Final = Element.valid_attributes + (
2583 'format', 'xml:space')
2586# Inline Elements
2587# ===============
2589class abbreviation(Inline, TextElement): pass
2590class acronym(Inline, TextElement): pass
2591class emphasis(Inline, TextElement): pass
2592class generated(Inline, TextElement): pass
2593class inline(Inline, TextElement): pass
2594class literal(Inline, TextElement): pass
2595class strong(Inline, TextElement): pass
2596class subscript(Inline, TextElement): pass
2597class superscript(Inline, TextElement): pass
2598class title_reference(Inline, TextElement): pass
2601class footnote_reference(Inline, Referential, PureTextElement):
2602 valid_attributes: Final = Element.valid_attributes + (
2603 'auto', 'refid', 'refname')
2606class citation_reference(Inline, Referential, PureTextElement):
2607 valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')
2610class substitution_reference(Inline, TextElement):
2611 valid_attributes: Final = Element.valid_attributes + ('refname',)
2614class math(Inline, PureTextElement):
2615 """Mathematical notation in running text."""
2618class problematic(Inline, TextElement):
2619 valid_attributes: Final = Element.valid_attributes + (
2620 'refid', 'refname', 'refuri')
2623# ========================================
2624# Auxiliary Classes, Functions, and Data
2625# ========================================
2627node_class_names: Sequence[str] = """
2628 Text
2629 abbreviation acronym address admonition attention attribution author
2630 authors
2631 block_quote bullet_list
2632 caption caution citation citation_reference classifier colspec comment
2633 compound contact container copyright
2634 danger date decoration definition definition_list definition_list_item
2635 description docinfo doctest_block document
2636 emphasis entry enumerated_list error
2637 field field_body field_list field_name figure footer
2638 footnote footnote_reference
2639 generated
2640 header hint
2641 image important inline
2642 label legend line line_block list_item literal literal_block
2643 math math_block meta
2644 note
2645 option option_argument option_group option_list option_list_item
2646 option_string organization
2647 paragraph pending problematic
2648 raw reference revision row rubric
2649 section sidebar status strong subscript substitution_definition
2650 substitution_reference subtitle superscript system_message
2651 table target tbody term tgroup thead tip title title_reference topic
2652 transition
2653 version
2654 warning""".split()
2655"""A list of names of all concrete Node subclasses."""
2658class NodeVisitor:
2659 """
2660 "Visitor" pattern [GoF95]_ abstract superclass implementation for
2661 document tree traversals.
2663 Each node class has corresponding methods, doing nothing by
2664 default; override individual methods for specific and useful
2665 behaviour. The `dispatch_visit()` method is called by
2666 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
2667 the `dispatch_departure()` method before exiting a node.
2669 The dispatch methods call "``visit_`` + node class name" or
2670 "``depart_`` + node class name", resp.
2672 This is a base class for visitors whose ``visit_...`` & ``depart_...``
2673 methods must be implemented for *all* compulsory node types encountered
2674 (such as for `docutils.writers.Writer` subclasses).
2675 Unimplemented methods will raise exceptions (except for optional nodes).
2677 For sparse traversals, where only certain node types are of interest, use
2678 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
2679 processing is desired, subclass `GenericNodeVisitor`.
2681 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
2682 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
2683 1995.
2684 """
2686 optional: ClassVar[tuple[str, ...]] = ('meta',)
2687 """
2688 Tuple containing node class names (as strings).
2690 No exception will be raised if writers do not implement visit
2691 or departure functions for these node classes.
2693 Used to ensure transitional compatibility with existing 3rd-party writers.
2694 """
2696 def __init__(self, document: document, /) -> None:
2697 self.document: document = document
2699 def dispatch_visit(self, node) -> None:
2700 """
2701 Call self."``visit_`` + node class name" with `node` as
2702 parameter. If the ``visit_...`` method does not exist, call
2703 self.unknown_visit.
2704 """
2705 node_name = node.__class__.__name__
2706 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
2707 self.document.reporter.debug(
2708 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
2709 % (method.__name__, node_name))
2710 return method(node)
2712 def dispatch_departure(self, node) -> None:
2713 """
2714 Call self."``depart_`` + node class name" with `node` as
2715 parameter. If the ``depart_...`` method does not exist, call
2716 self.unknown_departure.
2717 """
2718 node_name = node.__class__.__name__
2719 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
2720 self.document.reporter.debug(
2721 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
2722 % (method.__name__, node_name))
2723 return method(node)
2725 def unknown_visit(self, node) -> None:
2726 """
2727 Called when entering unknown `Node` types.
2729 Raise an exception unless overridden.
2730 """
2731 if (self.document.settings.strict_visitor
2732 or node.__class__.__name__ not in self.optional):
2733 raise NotImplementedError(
2734 '%s visiting unknown node type: %s'
2735 % (self.__class__, node.__class__.__name__))
2737 def unknown_departure(self, node) -> None:
2738 """
2739 Called before exiting unknown `Node` types.
2741 Raise exception unless overridden.
2742 """
2743 if (self.document.settings.strict_visitor
2744 or node.__class__.__name__ not in self.optional):
2745 raise NotImplementedError(
2746 '%s departing unknown node type: %s'
2747 % (self.__class__, node.__class__.__name__))
2750class SparseNodeVisitor(NodeVisitor):
2751 """
2752 Base class for sparse traversals, where only certain node types are of
2753 interest. When ``visit_...`` & ``depart_...`` methods should be
2754 implemented for *all* node types (such as for `docutils.writers.Writer`
2755 subclasses), subclass `NodeVisitor` instead.
2756 """
2759class GenericNodeVisitor(NodeVisitor):
2760 """
2761 Generic "Visitor" abstract superclass, for simple traversals.
2763 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
2764 each ``depart_...`` method (when using `Node.walkabout()`) calls
2765 `default_departure()`. `default_visit()` (and `default_departure()`) must
2766 be overridden in subclasses.
2768 Define fully generic visitors by overriding `default_visit()` (and
2769 `default_departure()`) only. Define semi-generic visitors by overriding
2770 individual ``visit_...()`` (and ``depart_...()``) methods also.
2772 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
2773 be overridden for default behavior.
2774 """
2776 def default_visit(self, node):
2777 """Override for generic, uniform traversals."""
2778 raise NotImplementedError
2780 def default_departure(self, node):
2781 """Override for generic, uniform traversals."""
2782 raise NotImplementedError
2785def _call_default_visit(self: GenericNodeVisitor, node) -> None:
2786 self.default_visit(node)
2789def _call_default_departure(self: GenericNodeVisitor, node) -> None:
2790 self.default_departure(node)
2793def _nop(self: SparseNodeVisitor, node) -> None:
2794 pass
2797def _add_node_class_names(names) -> None:
2798 """Save typing with dynamic assignments:"""
2799 for _name in names:
2800 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
2801 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
2802 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
2803 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
2806_add_node_class_names(node_class_names)
2809class TreeCopyVisitor(GenericNodeVisitor):
2810 """
2811 Make a complete copy of a tree or branch, including element attributes.
2812 """
2814 def __init__(self, document: document) -> None:
2815 super().__init__(document)
2816 self.parent_stack: list[list] = []
2817 self.parent: list = []
2819 def get_tree_copy(self):
2820 return self.parent[0]
2822 def default_visit(self, node) -> None:
2823 """Copy the current node, and make it the new acting parent."""
2824 newnode = node.copy()
2825 self.parent.append(newnode)
2826 self.parent_stack.append(self.parent)
2827 self.parent = newnode
2829 def default_departure(self, node) -> None:
2830 """Restore the previous acting parent."""
2831 self.parent = self.parent_stack.pop()
2834# Custom Exceptions
2835# =================
2837class ValidationError(ValueError):
2838 """Invalid Docutils Document Tree Element."""
2839 def __init__(self, msg: str, problematic_element: Element = None) -> None:
2840 super().__init__(msg)
2841 self.problematic_element = problematic_element
2844class TreePruningException(Exception):
2845 """
2846 Base class for `NodeVisitor`-related tree pruning exceptions.
2848 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
2849 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
2850 the tree traversed.
2851 """
2854class SkipChildren(TreePruningException):
2855 """
2856 Do not visit any children of the current node. The current node's
2857 siblings and ``depart_...`` method are not affected.
2858 """
2861class SkipSiblings(TreePruningException):
2862 """
2863 Do not visit any more siblings (to the right) of the current node. The
2864 current node's children and its ``depart_...`` method are not affected.
2865 """
2868class SkipNode(TreePruningException):
2869 """
2870 Do not visit the current node's children, and do not call the current
2871 node's ``depart_...`` method.
2872 """
2875class SkipDeparture(TreePruningException):
2876 """
2877 Do not call the current node's ``depart_...`` method. The current node's
2878 children and siblings are not affected.
2879 """
2882class NodeFound(TreePruningException):
2883 """
2884 Raise to indicate that the target of a search has been found. This
2885 exception must be caught by the client; it is not caught by the traversal
2886 code.
2887 """
2890class StopTraversal(TreePruningException):
2891 """
2892 Stop the traversal altogether. The current node's ``depart_...`` method
2893 is not affected. The parent nodes ``depart_...`` methods are also called
2894 as usual. No other nodes are visited. This is an alternative to
2895 NodeFound that does not cause exception handling to trickle up to the
2896 caller.
2897 """
2900# definition moved here from `utils` to avoid circular import dependency
2901def unescape(text: str,
2902 restore_backslashes: bool = False,
2903 respect_whitespace: bool = False,
2904 ) -> str:
2905 """
2906 Return a string with nulls removed or restored to backslashes.
2907 Backslash-escaped spaces are also removed.
2908 """
2909 # `respect_whitespace` is ignored (since introduction 2016-12-16)
2910 if restore_backslashes:
2911 return text.replace('\x00', '\\')
2912 else:
2913 for sep in ['\x00 ', '\x00\n', '\x00']:
2914 text = ''.join(text.split(sep))
2915 return text
2918def make_id(string: str) -> str:
2919 """
2920 Convert `string` into an identifier and return it.
2922 Docutils identifiers will conform to the regular expression
2923 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
2924 and "id" attributes) should have no underscores, colons, or periods.
2925 Hyphens may be used.
2927 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
2929 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
2930 followed by any number of letters, digits ([0-9]), hyphens ("-"),
2931 underscores ("_"), colons (":"), and periods (".").
2933 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
2934 a tighter interpretation ("flex" tokenizer notation; "latin1" and
2935 "escape" 8-bit characters have been replaced with entities)::
2937 unicode \\[0-9a-f]{1,4}
2938 latin1 [¡-ÿ]
2939 escape {unicode}|\\[ -~¡-ÿ]
2940 nmchar [-a-z0-9]|{latin1}|{escape}
2941 name {nmchar}+
2943 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
2944 or periods ("."), therefore "class" and "id" attributes should not contain
2945 these characters. They should be replaced with hyphens ("-"). Combined
2946 with HTML's requirements (the first character must be a letter; no
2947 "unicode", "latin1", or "escape" characters), this results in the
2948 ``[a-z](-?[a-z0-9]+)*`` pattern.
2950 .. _HTML 4.01 spec: https://www.w3.org/TR/html401
2951 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1
2952 """
2953 id = string.lower()
2954 id = id.translate(_non_id_translate_digraphs)
2955 id = id.translate(_non_id_translate)
2956 # get rid of non-ascii characters.
2957 # 'ascii' lowercase to prevent problems with turkish locale.
2958 id = unicodedata.normalize(
2959 'NFKD', id).encode('ascii', 'ignore').decode('ascii')
2960 # shrink runs of whitespace and replace by hyphen
2961 id = _non_id_chars.sub('-', ' '.join(id.split()))
2962 id = _non_id_at_ends.sub('', id)
2963 return str(id)
2966_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')
2967_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')
2968_non_id_translate: dict[int, str] = {
2969 0x00f8: 'o', # o with stroke
2970 0x0111: 'd', # d with stroke
2971 0x0127: 'h', # h with stroke
2972 0x0131: 'i', # dotless i
2973 0x0142: 'l', # l with stroke
2974 0x0167: 't', # t with stroke
2975 0x0180: 'b', # b with stroke
2976 0x0183: 'b', # b with topbar
2977 0x0188: 'c', # c with hook
2978 0x018c: 'd', # d with topbar
2979 0x0192: 'f', # f with hook
2980 0x0199: 'k', # k with hook
2981 0x019a: 'l', # l with bar
2982 0x019e: 'n', # n with long right leg
2983 0x01a5: 'p', # p with hook
2984 0x01ab: 't', # t with palatal hook
2985 0x01ad: 't', # t with hook
2986 0x01b4: 'y', # y with hook
2987 0x01b6: 'z', # z with stroke
2988 0x01e5: 'g', # g with stroke
2989 0x0225: 'z', # z with hook
2990 0x0234: 'l', # l with curl
2991 0x0235: 'n', # n with curl
2992 0x0236: 't', # t with curl
2993 0x0237: 'j', # dotless j
2994 0x023c: 'c', # c with stroke
2995 0x023f: 's', # s with swash tail
2996 0x0240: 'z', # z with swash tail
2997 0x0247: 'e', # e with stroke
2998 0x0249: 'j', # j with stroke
2999 0x024b: 'q', # q with hook tail
3000 0x024d: 'r', # r with stroke
3001 0x024f: 'y', # y with stroke
3002}
3003_non_id_translate_digraphs: dict[int, str] = {
3004 0x00df: 'sz', # ligature sz
3005 0x00e6: 'ae', # ae
3006 0x0153: 'oe', # ligature oe
3007 0x0238: 'db', # db digraph
3008 0x0239: 'qp', # qp digraph
3009}
3012def dupname(node: Element, name: str) -> None:
3013 node['dupnames'].append(name)
3014 node['names'].remove(name)
3015 # Assume that `node` is referenced, even though it isn't;
3016 # we don't want to throw unnecessary system_messages.
3017 node.referenced = True
3020def fully_normalize_name(name: str) -> str:
3021 """Return a case- and whitespace-normalized name."""
3022 return ' '.join(name.lower().split())
3025def whitespace_normalize_name(name: str) -> str:
3026 """Return a whitespace-normalized name."""
3027 return ' '.join(name.split())
3030def serial_escape(value: str) -> str:
3031 """Escape string values that are elements of a list, for serialization."""
3032 return value.replace('\\', r'\\').replace(' ', r'\ ')
3035def split_name_list(s: str) -> list[str]:
3036 r"""Split a string at non-escaped whitespace.
3038 Backslashes escape internal whitespace (cf. `serial_escape()`).
3039 Return list of "names" (after removing escaping backslashes).
3041 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),
3042 ['a name', 'two\\', r'n\ames']
3044 Provisional.
3045 """
3046 s = s.replace('\\', '\x00') # escape with NULL char
3047 s = s.replace('\x00\x00', '\\') # unescape backslashes
3048 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL
3049 names = s.split(' ')
3050 # restore internal spaces, drop other escaping characters
3051 return [name.replace('\x00\x00', ' ').replace('\x00', '')
3052 for name in names]
3055def pseudo_quoteattr(value: str) -> str:
3056 """Quote attributes for pseudo-xml"""
3057 return '"%s"' % value
3060def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'
3061 ) -> tuple[int|float, str]:
3062 """Parse a measure__, return value + unit.
3064 `unit_pattern` is a regular expression describing recognized units.
3065 The default is suited for (but not limited to) CSS3 units and SI units.
3066 It matches runs of ASCII letters or Greek mu, a single percent sign,
3067 or no unit.
3069 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3071 Provisional.
3072 """
3073 match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)
3074 try:
3075 try:
3076 value = int(match.group(1))
3077 except ValueError:
3078 value = float(match.group(1))
3079 unit = match.group(2)
3080 except (AttributeError, ValueError):
3081 raise ValueError(f'"{measure}" is no valid measure.')
3082 return value, unit
3085# Methods to validate `Element attribute`__ values.
3087# Ensure the expected Python `data type`__, normalize, and check for
3088# restrictions.
3089#
3090# The methods can be used to convert `str` values (eg. from an XML
3091# representation) or to validate an existing document tree or node.
3092#
3093# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,
3094# and the `attribute_validating_functions` mapping below.
3095#
3096# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3097# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
3099def create_keyword_validator(*keywords: str) -> Callable[[str], str]:
3100 """
3101 Return a function that validates a `str` against given `keywords`.
3103 Provisional.
3104 """
3105 def validate_keywords(value: str) -> str:
3106 if value not in keywords:
3107 allowed = '", \"'.join(keywords)
3108 raise ValueError(f'"{value}" is not one of "{allowed}".')
3109 return value
3110 return validate_keywords
3113def validate_identifier(value: str) -> str:
3114 """
3115 Validate identifier key or class name.
3117 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.
3119 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
3121 Provisional.
3122 """
3123 if value != make_id(value):
3124 raise ValueError(f'"{value}" is no valid id or class name.')
3125 return value
3128def validate_identifier_list(value: str | list[str]) -> list[str]:
3129 """
3130 A (space-separated) list of ids or class names.
3132 `value` may be a `list` or a `str` with space separated
3133 ids or class names (cf. `validate_identifier()`).
3135 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.
3137 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type
3138 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type
3139 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type
3141 Provisional.
3142 """
3143 if isinstance(value, str):
3144 value = value.split()
3145 for token in value:
3146 validate_identifier(token)
3147 return value
3150def validate_measure(measure: str) -> str:
3151 """
3152 Validate a measure__ (number + optional unit). Return normalized `str`.
3154 See `parse_measure()` for a function returning a "number + unit" tuple.
3156 The unit may be a run of ASCII letters or Greek mu, a single percent sign,
3157 or the empty string. Case is preserved.
3159 Provisional.
3161 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
3162 """
3163 value, unit = parse_measure(measure)
3164 return f'{value}{unit}'
3167def validate_colwidth(measure: str|int|float) -> int|float:
3168 """Validate the "colwidth__" attribute.
3170 Provisional:
3171 `measure` must be a `str` and will be returned as normalized `str`
3172 (with unit "*" for proportional values) in Docutils 1.0.
3174 The default unit will change to "pt" in Docutils 2.0.
3176 __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
3177 """
3178 if isinstance(measure, (int, float)):
3179 value = measure
3180 elif measure in ('*', ''): # short for '1*'
3181 value = 1
3182 else:
3183 try:
3184 value, _unit = parse_measure(measure, unit_pattern='[*]?')
3185 except ValueError:
3186 value = -1
3187 if value <= 0:
3188 raise ValueError(f'"{measure}" is no proportional measure.')
3189 return value
3192def validate_NMTOKEN(value: str) -> str:
3193 """
3194 Validate a "name token": a `str` of ASCII letters, digits, and [-._].
3196 Provisional.
3197 """
3198 if not re.fullmatch('[-._A-Za-z0-9]+', value):
3199 raise ValueError(f'"{value}" is no NMTOKEN.')
3200 return value
3203def validate_NMTOKENS(value: str | list[str]) -> list[str]:
3204 """
3205 Validate a list of "name tokens".
3207 Provisional.
3208 """
3209 if isinstance(value, str):
3210 value = value.split()
3211 for token in value:
3212 validate_NMTOKEN(token)
3213 return value
3216def validate_refname_list(value: str | list[str]) -> list[str]:
3217 """
3218 Validate a list of `reference names`__.
3220 Reference names may contain all characters;
3221 whitespace is normalized (cf, `whitespace_normalize_name()`).
3223 `value` may be either a `list` of names or a `str` with
3224 space separated names (with internal spaces backslash escaped
3225 and literal backslashes doubled cf. `serial_escape()`).
3227 Return a list of whitespace-normalized, unescaped reference names.
3229 Provisional.
3231 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name
3232 """
3233 if isinstance(value, str):
3234 value = split_name_list(value)
3235 return [whitespace_normalize_name(name) for name in value]
3238def validate_yesorno(value: str | int | bool) -> bool:
3239 """Validate a `%yesorno`__ (flag) value.
3241 The string literal "0" evaluates to ``False``, all other
3242 values are converterd with `bool()`.
3244 __ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno
3245 """
3246 if value == "0":
3247 return False
3248 return bool(value)
3251ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {
3252 'alt': str, # CDATA
3253 'align': str,
3254 'anonymous': validate_yesorno,
3255 'auto': str, # CDATA (only '1' or '*' are used in rST)
3256 'backrefs': validate_identifier_list,
3257 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)
3258 'classes': validate_identifier_list,
3259 'char': str, # from Exchange Table Model (CALS), currently ignored
3260 'charoff': validate_NMTOKEN, # from CALS, currently ignored
3261 'colname': validate_NMTOKEN, # from CALS, currently ignored
3262 'colnum': int, # from CALS, currently ignored
3263 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
3264 'colsep': validate_yesorno,
3265 'colwidth': validate_colwidth, # see docstring for pending changes
3266 'content': str, # <meta>
3267 'delimiter': str,
3268 'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>
3269 'dupnames': validate_refname_list,
3270 'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',
3271 'upperalpha', 'upperroman'),
3272 'format': str, # CDATA (space separated format names)
3273 'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',
3274 'sides', 'none'), # from CALS, ignored
3275 'height': validate_measure,
3276 'http-equiv': str, # <meta>
3277 'ids': validate_identifier_list,
3278 'lang': str, # <meta>
3279 'level': int,
3280 'line': int,
3281 'ltrim': validate_yesorno,
3282 'loading': create_keyword_validator('embed', 'link', 'lazy'),
3283 'media': str, # <meta>
3284 'morecols': int,
3285 'morerows': int,
3286 'name': whitespace_normalize_name, # in <reference> (deprecated)
3287 # 'name': node_attributes.validate_NMTOKEN, # in <meta>
3288 'names': validate_refname_list,
3289 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored
3290 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored
3291 'pgwide': validate_yesorno, # from CALS, currently ignored
3292 'prefix': str,
3293 'refid': validate_identifier,
3294 'refname': whitespace_normalize_name,
3295 'refuri': str,
3296 'rowsep': validate_yesorno,
3297 'rtrim': validate_yesorno,
3298 'scale': int,
3299 'scheme': str,
3300 'source': str,
3301 'start': int,
3302 'stub': validate_yesorno,
3303 'suffix': str,
3304 'title': str,
3305 'type': validate_NMTOKEN,
3306 'uri': str,
3307 'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS
3308 'width': validate_measure,
3309 'xml:space': create_keyword_validator('default', 'preserve'),
3310 }
3311"""
3312Mapping of `attribute names`__ to validating functions.
3314Provisional.
3316__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3317"""