Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/docutils/nodes.py: 55%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Maintainer: docutils-develop@lists.sourceforge.net
4# Copyright: This module has been placed in the public domain.
6"""
7Docutils document tree element class library.
9Classes in CamelCase are abstract base classes or auxiliary classes. The one
10exception is `Text`, for a text (PCDATA) node; uppercase is used to
11differentiate from element classes. Classes in lower_case_with_underscores
12are element classes, matching the XML element generic identifiers in the DTD_.
14The position of each node (the level at which it can occur) is significant and
15is represented by abstract base classes (`Root`, `Structural`, `Body`,
16`Inline`, etc.). Certain transformations will be easier because we can use
17``isinstance(node, base_class)`` to determine the position of the node in the
18hierarchy.
20.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd
21"""
23__docformat__ = 'reStructuredText'
25from collections import Counter
26import re
27import sys
28import unicodedata
29import warnings
30# import xml.dom.minidom as dom # -> conditional import in Node.asdom()
31# and document.asdom()
33# import docutils.transforms # -> conditional import in document.__init__()
36# ==============================
37# Functional Node Base Classes
38# ==============================
40class Node:
41 """Abstract base class of nodes in a document tree."""
43 parent = None
44 """Back-reference to the Node immediately containing this Node."""
46 source = None
47 """Path or description of the input source which generated this Node."""
49 line = None
50 """The line number (1-based) of the beginning of this Node in `source`."""
52 _document = None
54 @property
55 def document(self):
56 """Return the `document` root node of the tree containing this Node.
57 """
58 try:
59 return self._document or self.parent.document
60 except AttributeError:
61 return None
63 @document.setter
64 def document(self, value):
65 self._document = value
67 def __bool__(self):
68 """
69 Node instances are always true, even if they're empty. A node is more
70 than a simple container. Its boolean "truth" does not depend on
71 having one or more subnodes in the doctree.
73 Use `len()` to check node length.
74 """
75 return True
77 def asdom(self, dom=None):
78 """Return a DOM **fragment** representation of this Node."""
79 if dom is None:
80 import xml.dom.minidom as dom
81 domroot = dom.Document()
82 return self._dom_node(domroot)
84 def pformat(self, indent=' ', level=0):
85 """
86 Return an indented pseudo-XML representation, for test purposes.
88 Override in subclasses.
89 """
90 raise NotImplementedError
92 def copy(self):
93 """Return a copy of self."""
94 raise NotImplementedError
96 def deepcopy(self):
97 """Return a deep copy of self (also copying children)."""
98 raise NotImplementedError
100 def astext(self):
101 """Return a string representation of this Node."""
102 raise NotImplementedError
104 def setup_child(self, child):
105 child.parent = self
106 if self.document:
107 child.document = self.document
108 if child.source is None:
109 child.source = self.document.current_source
110 if child.line is None:
111 child.line = self.document.current_line
113 def walk(self, visitor):
114 """
115 Traverse a tree of `Node` objects, calling the
116 `dispatch_visit()` method of `visitor` when entering each
117 node. (The `walkabout()` method is similar, except it also
118 calls the `dispatch_departure()` method before exiting each
119 node.)
121 This tree traversal supports limited in-place tree
122 modifications. Replacing one node with one or more nodes is
123 OK, as is removing an element. However, if the node removed
124 or replaced occurs after the current node, the old node will
125 still be traversed, and any new nodes will not.
127 Within ``visit`` methods (and ``depart`` methods for
128 `walkabout()`), `TreePruningException` subclasses may be raised
129 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
131 Parameter `visitor`: A `NodeVisitor` object, containing a
132 ``visit`` implementation for each `Node` subclass encountered.
134 Return true if we should stop the traversal.
135 """
136 stop = False
137 visitor.document.reporter.debug(
138 'docutils.nodes.Node.walk calling dispatch_visit for %s'
139 % self.__class__.__name__)
140 try:
141 try:
142 visitor.dispatch_visit(self)
143 except (SkipChildren, SkipNode):
144 return stop
145 except SkipDeparture: # not applicable; ignore
146 pass
147 children = self.children
148 try:
149 for child in children[:]:
150 if child.walk(visitor):
151 stop = True
152 break
153 except SkipSiblings:
154 pass
155 except StopTraversal:
156 stop = True
157 return stop
159 def walkabout(self, visitor):
160 """
161 Perform a tree traversal similarly to `Node.walk()` (which
162 see), except also call the `dispatch_departure()` method
163 before exiting each node.
165 Parameter `visitor`: A `NodeVisitor` object, containing a
166 ``visit`` and ``depart`` implementation for each `Node`
167 subclass encountered.
169 Return true if we should stop the traversal.
170 """
171 call_depart = True
172 stop = False
173 visitor.document.reporter.debug(
174 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
175 % self.__class__.__name__)
176 try:
177 try:
178 visitor.dispatch_visit(self)
179 except SkipNode:
180 return stop
181 except SkipDeparture:
182 call_depart = False
183 children = self.children
184 try:
185 for child in children[:]:
186 if child.walkabout(visitor):
187 stop = True
188 break
189 except SkipSiblings:
190 pass
191 except SkipChildren:
192 pass
193 except StopTraversal:
194 stop = True
195 if call_depart:
196 visitor.document.reporter.debug(
197 'docutils.nodes.Node.walkabout calling dispatch_departure '
198 'for %s' % self.__class__.__name__)
199 visitor.dispatch_departure(self)
200 return stop
202 def _fast_findall(self, cls):
203 """Return iterator that only supports instance checks."""
204 if isinstance(self, cls):
205 yield self
206 for child in self.children:
207 yield from child._fast_findall(cls)
209 def _superfast_findall(self):
210 """Return iterator that doesn't check for a condition."""
211 # This is different from ``iter(self)`` implemented via
212 # __getitem__() and __len__() in the Element subclass,
213 # which yields only the direct children.
214 yield self
215 for child in self.children:
216 yield from child._superfast_findall()
218 def findall(self, condition=None, include_self=True, descend=True,
219 siblings=False, ascend=False):
220 """
221 Return an iterator yielding nodes following `self`:
223 * self (if `include_self` is true)
224 * all descendants in tree traversal order (if `descend` is true)
225 * the following siblings (if `siblings` is true) and their
226 descendants (if also `descend` is true)
227 * the following siblings of the parent (if `ascend` is true) and
228 their descendants (if also `descend` is true), and so on.
230 If `condition` is not None, the iterator yields only nodes
231 for which ``condition(node)`` is true. If `condition` is a
232 node class ``cls``, it is equivalent to a function consisting
233 of ``return isinstance(node, cls)``.
235 If `ascend` is true, assume `siblings` to be true as well.
237 If the tree structure is modified during iteration, the result
238 is undefined.
240 For example, given the following tree::
242 <paragraph>
243 <emphasis> <--- emphasis.traverse() and
244 <strong> <--- strong.traverse() are called.
245 Foo
246 Bar
247 <reference name="Baz" refid="baz">
248 Baz
250 Then tuple(emphasis.traverse()) equals ::
252 (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)
254 and list(strong.traverse(ascend=True) equals ::
256 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
257 """
258 if ascend:
259 siblings = True
260 # Check for special argument combinations that allow using an
261 # optimized version of traverse()
262 if include_self and descend and not siblings:
263 if condition is None:
264 yield from self._superfast_findall()
265 return
266 elif isinstance(condition, type):
267 yield from self._fast_findall(condition)
268 return
269 # Check if `condition` is a class (check for TypeType for Python
270 # implementations that use only new-style classes, like PyPy).
271 if isinstance(condition, type):
272 node_class = condition
274 def condition(node, node_class=node_class):
275 return isinstance(node, node_class)
277 if include_self and (condition is None or condition(self)):
278 yield self
279 if descend and len(self.children):
280 for child in self:
281 yield from child.findall(condition=condition,
282 include_self=True, descend=True,
283 siblings=False, ascend=False)
284 if siblings or ascend:
285 node = self
286 while node.parent:
287 index = node.parent.index(node)
288 # extra check since Text nodes have value-equality
289 while node.parent[index] is not node:
290 index = node.parent.index(node, index + 1)
291 for sibling in node.parent[index+1:]:
292 yield from sibling.findall(
293 condition=condition,
294 include_self=True, descend=descend,
295 siblings=False, ascend=False)
296 if not ascend:
297 break
298 else:
299 node = node.parent
301 def traverse(self, condition=None, include_self=True, descend=True,
302 siblings=False, ascend=False):
303 """Return list of nodes following `self`.
305 For looping, Node.findall() is faster and more memory efficient.
306 """
307 # traverse() may be eventually removed:
308 warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',
309 PendingDeprecationWarning, stacklevel=2)
310 return list(self.findall(condition, include_self, descend,
311 siblings, ascend))
313 def next_node(self, condition=None, include_self=False, descend=True,
314 siblings=False, ascend=False):
315 """
316 Return the first node in the iterator returned by findall(),
317 or None if the iterable is empty.
319 Parameter list is the same as of `findall()`. Note that `include_self`
320 defaults to False, though.
321 """
322 try:
323 return next(self.findall(condition, include_self,
324 descend, siblings, ascend))
325 except StopIteration:
326 return None
329class Text(Node, str):
330 """
331 Instances are terminal nodes (leaves) containing text only; no child
332 nodes or attributes. Initialize by passing a string to the constructor.
334 Access the raw (null-escaped) text with ``str(<instance>)``
335 and unescaped text with ``<instance>.astext()``.
336 """
338 tagname = '#text'
340 children = ()
341 """Text nodes have no children, and cannot have children."""
343 def __new__(cls, data, rawsource=None):
344 """Assert that `data` is not an array of bytes
345 and warn if the deprecated `rawsource` argument is used.
346 """
347 if isinstance(data, bytes):
348 raise TypeError('expecting str data, not bytes')
349 if rawsource is not None:
350 warnings.warn('nodes.Text: initialization argument "rawsource" '
351 'is ignored and will be removed in Docutils 2.0.',
352 DeprecationWarning, stacklevel=2)
353 return str.__new__(cls, data)
355 def shortrepr(self, maxlen=18):
356 data = self
357 if len(data) > maxlen:
358 data = data[:maxlen-4] + ' ...'
359 return '<%s: %r>' % (self.tagname, str(data))
361 def __repr__(self):
362 return self.shortrepr(maxlen=68)
364 def astext(self):
365 return str(unescape(self))
367 def _dom_node(self, domroot):
368 return domroot.createTextNode(str(self))
370 def copy(self):
371 return self.__class__(str(self))
373 def deepcopy(self):
374 return self.copy()
376 def pformat(self, indent=' ', level=0):
377 try:
378 if self.document.settings.detailed:
379 tag = '%s%s' % (indent*level, '<#text>')
380 lines = (indent*(level+1) + repr(line)
381 for line in self.splitlines(True))
382 return '\n'.join((tag, *lines)) + '\n'
383 except AttributeError:
384 pass
385 indent = indent * level
386 lines = [indent+line for line in self.astext().splitlines()]
387 if not lines:
388 return ''
389 return '\n'.join(lines) + '\n'
391 # rstrip and lstrip are used by substitution definitions where
392 # they are expected to return a Text instance, this was formerly
393 # taken care of by UserString.
395 def rstrip(self, chars=None):
396 return self.__class__(str.rstrip(self, chars))
398 def lstrip(self, chars=None):
399 return self.__class__(str.lstrip(self, chars))
401 def validate(self, recursive=True):
402 """Validate Docutils Document Tree element ("doctree")."""
403 # Text nodes have no attributes and no children.
405 def check_position(self):
406 """Hook for additional checks of the parent's content model."""
407 # no special placement requirements for Text nodes
410class Element(Node):
411 """
412 `Element` is the superclass to all specific elements.
414 Elements contain attributes and child nodes.
415 They can be described as a cross between a list and a dictionary.
417 Elements emulate dictionaries for external [#]_ attributes, indexing by
418 attribute name (a string). To set the attribute 'att' to 'value', do::
420 element['att'] = 'value'
422 .. [#] External attributes correspond to the XML element attributes.
423 From its `Node` superclass, Element also inherits "internal"
424 class attributes that are accessed using the standard syntax, e.g.
425 ``element.parent``.
427 There are two special attributes: 'ids' and 'names'. Both are
428 lists of unique identifiers: 'ids' conform to the regular expression
429 ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and
430 details). 'names' serve as user-friendly interfaces to IDs; they are
431 case- and whitespace-normalized (see the fully_normalize_name() function).
433 Elements emulate lists for child nodes (element nodes and/or text
434 nodes), indexing by integer. To get the first child node, use::
436 element[0]
438 to iterate over the child nodes (without descending), use::
440 for child in element:
441 ...
443 Elements may be constructed using the ``+=`` operator. To add one new
444 child node to element, do::
446 element += node
448 This is equivalent to ``element.append(node)``.
450 To add a list of multiple child nodes at once, use the same ``+=``
451 operator::
453 element += [node1, node2]
455 This is equivalent to ``element.extend([node1, node2])``.
456 """
458 list_attributes = ('ids', 'classes', 'names', 'dupnames')
459 """Tuple of attributes that are initialized to empty lists.
461 NOTE: Derived classes should update this value when supporting
462 additional list attributes.
463 """
465 valid_attributes = list_attributes + ('source',)
466 """Tuple of attributes that are valid for elements of this class.
468 NOTE: Derived classes should update this value when supporting
469 additional attributes.
470 """
472 common_attributes = valid_attributes
473 """Tuple of `common attributes`__ known to all Doctree Element classes.
475 __ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes
476 """
478 known_attributes = common_attributes
479 """Alias for `common_attributes`. Will be removed in Docutils 2.0."""
481 basic_attributes = list_attributes
482 """Common list attributes. Deprecated. Will be removed in Docutils 2.0."""
484 local_attributes = ('backrefs',)
485 """Obsolete. Will be removed in Docutils 2.0."""
487 content_model = tuple()
488 """Python representation of the element's content model (cf. docutils.dtd).
490 A tuple of ``(category, quantifier)`` tuples with
492 :category: class or tuple of classes that are expected at this place(s)
493 in the list of children
494 :quantifier: string representation stating how many elements
495 of `category` are expected. Value is one of:
496 '.' (exactly one), '?' (zero or one),
497 '+' (one or more), '*' (zero or more).
499 NOTE: The default describes the empty element. Derived classes should
500 update this value to match teir content model.
502 Provisional.
503 """
505 tagname = None
506 """The element generic identifier.
508 If None, it is set as an instance attribute to the name of the class.
509 """
511 child_text_separator = '\n\n'
512 """Separator for child nodes, used by `astext()` method."""
514 def __init__(self, rawsource='', *children, **attributes):
515 self.rawsource = rawsource
516 """The raw text from which this element was constructed.
518 For informative and debugging purposes. Don't rely on its value!
520 NOTE: some elements do not set this value (default '').
521 """
522 if isinstance(rawsource, Element):
523 raise TypeError('First argument "rawsource" must be a string.')
525 self.children = []
526 """List of child nodes (elements and/or `Text`)."""
528 self.extend(children) # maintain parent info
530 self.attributes = {}
531 """Dictionary of attribute {name: value}."""
533 # Initialize list attributes.
534 for att in self.list_attributes:
535 self.attributes[att] = []
537 for att, value in attributes.items():
538 att = att.lower() # normalize attribute name
539 if att in self.list_attributes:
540 # lists are mutable; make a copy for this node
541 self.attributes[att] = value[:]
542 else:
543 self.attributes[att] = value
545 if self.tagname is None:
546 self.tagname = self.__class__.__name__
548 def _dom_node(self, domroot):
549 element = domroot.createElement(self.tagname)
550 for attribute, value in self.attlist():
551 if isinstance(value, list):
552 value = ' '.join(serial_escape('%s' % (v,)) for v in value)
553 element.setAttribute(attribute, '%s' % value)
554 for child in self.children:
555 element.appendChild(child._dom_node(domroot))
556 return element
558 def __repr__(self):
559 data = ''
560 for c in self.children:
561 data += c.shortrepr()
562 if len(data) > 60:
563 data = data[:56] + ' ...'
564 break
565 if self['names']:
566 return '<%s "%s": %s>' % (self.__class__.__name__,
567 '; '.join(self['names']), data)
568 else:
569 return '<%s: %s>' % (self.__class__.__name__, data)
571 def shortrepr(self):
572 if self['names']:
573 return '<%s "%s"...>' % (self.__class__.__name__,
574 '; '.join(self['names']))
575 else:
576 return '<%s...>' % self.tagname
578 def __str__(self):
579 if self.children:
580 return '%s%s%s' % (self.starttag(),
581 ''.join(str(c) for c in self.children),
582 self.endtag())
583 else:
584 return self.emptytag()
586 def starttag(self, quoteattr=None):
587 # the optional arg is used by the docutils_xml writer
588 if quoteattr is None:
589 quoteattr = pseudo_quoteattr
590 parts = [self.tagname]
591 for name, value in self.attlist():
592 if value is None: # boolean attribute
593 parts.append('%s="True"' % name)
594 continue
595 if isinstance(value, bool):
596 value = str(int(value))
597 if isinstance(value, list):
598 values = [serial_escape('%s' % (v,)) for v in value]
599 value = ' '.join(values)
600 else:
601 value = str(value)
602 value = quoteattr(value)
603 parts.append('%s=%s' % (name, value))
604 return '<%s>' % ' '.join(parts)
606 def endtag(self):
607 return '</%s>' % self.tagname
609 def emptytag(self):
610 attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())
611 return '<%s/>' % ' '.join((self.tagname, *attributes))
613 def __len__(self):
614 return len(self.children)
616 def __contains__(self, key):
617 # Test for both, children and attributes with operator ``in``.
618 if isinstance(key, str):
619 return key in self.attributes
620 return key in self.children
622 def __getitem__(self, key):
623 if isinstance(key, str):
624 return self.attributes[key]
625 elif isinstance(key, int):
626 return self.children[key]
627 elif isinstance(key, slice):
628 assert key.step in (None, 1), 'cannot handle slice with stride'
629 return self.children[key.start:key.stop]
630 else:
631 raise TypeError('element index must be an integer, a slice, or '
632 'an attribute name string')
634 def __setitem__(self, key, item):
635 if isinstance(key, str):
636 self.attributes[str(key)] = item
637 elif isinstance(key, int):
638 self.setup_child(item)
639 self.children[key] = item
640 elif isinstance(key, slice):
641 assert key.step in (None, 1), 'cannot handle slice with stride'
642 for node in item:
643 self.setup_child(node)
644 self.children[key.start:key.stop] = item
645 else:
646 raise TypeError('element index must be an integer, a slice, or '
647 'an attribute name string')
649 def __delitem__(self, key):
650 if isinstance(key, str):
651 del self.attributes[key]
652 elif isinstance(key, int):
653 del self.children[key]
654 elif isinstance(key, slice):
655 assert key.step in (None, 1), 'cannot handle slice with stride'
656 del self.children[key.start:key.stop]
657 else:
658 raise TypeError('element index must be an integer, a simple '
659 'slice, or an attribute name string')
661 def __add__(self, other):
662 return self.children + other
664 def __radd__(self, other):
665 return other + self.children
667 def __iadd__(self, other):
668 """Append a node or a list of nodes to `self.children`."""
669 if isinstance(other, Node):
670 self.append(other)
671 elif other is not None:
672 self.extend(other)
673 return self
675 def astext(self):
676 return self.child_text_separator.join(
677 [child.astext() for child in self.children])
679 def non_default_attributes(self):
680 atts = {}
681 for key, value in self.attributes.items():
682 if self.is_not_default(key):
683 atts[key] = value
684 return atts
686 def attlist(self):
687 return sorted(self.non_default_attributes().items())
689 def get(self, key, failobj=None):
690 return self.attributes.get(key, failobj)
692 def hasattr(self, attr):
693 return attr in self.attributes
695 def delattr(self, attr):
696 if attr in self.attributes:
697 del self.attributes[attr]
699 def setdefault(self, key, failobj=None):
700 return self.attributes.setdefault(key, failobj)
702 has_key = hasattr
704 def get_language_code(self, fallback=''):
705 """Return node's language tag.
707 Look iteratively in self and parents for a class argument
708 starting with ``language-`` and return the remainder of it
709 (which should be a `BCP49` language tag) or the `fallback`.
710 """
711 for cls in self.get('classes', []):
712 if cls.startswith('language-'):
713 return cls[9:]
714 try:
715 return self.parent.get_language(fallback)
716 except AttributeError:
717 return fallback
719 def append(self, item):
720 self.setup_child(item)
721 self.children.append(item)
723 def extend(self, item):
724 for node in item:
725 self.append(node)
727 def insert(self, index, item):
728 if isinstance(item, Node):
729 self.setup_child(item)
730 self.children.insert(index, item)
731 elif item is not None:
732 self[index:index] = item
734 def pop(self, i=-1):
735 return self.children.pop(i)
737 def remove(self, item):
738 self.children.remove(item)
740 def index(self, item, start=0, stop=sys.maxsize):
741 return self.children.index(item, start, stop)
743 def previous_sibling(self):
744 """Return preceding sibling node or ``None``."""
745 try:
746 i = self.parent.index(self)
747 except (AttributeError):
748 return None
749 return self.parent[i-1] if i > 0 else None
751 def is_not_default(self, key):
752 if self[key] == [] and key in self.list_attributes:
753 return 0
754 else:
755 return 1
757 def update_basic_atts(self, dict_):
758 """
759 Update basic attributes ('ids', 'names', 'classes',
760 'dupnames', but not 'source') from node or dictionary `dict_`.
762 Provisional.
763 """
764 if isinstance(dict_, Node):
765 dict_ = dict_.attributes
766 for att in self.basic_attributes:
767 self.append_attr_list(att, dict_.get(att, []))
769 def append_attr_list(self, attr, values):
770 """
771 For each element in values, if it does not exist in self[attr], append
772 it.
774 NOTE: Requires self[attr] and values to be sequence type and the
775 former should specifically be a list.
776 """
777 # List Concatenation
778 for value in values:
779 if value not in self[attr]:
780 self[attr].append(value)
782 def coerce_append_attr_list(self, attr, value):
783 """
784 First, convert both self[attr] and value to a non-string sequence
785 type; if either is not already a sequence, convert it to a list of one
786 element. Then call append_attr_list.
788 NOTE: self[attr] and value both must not be None.
789 """
790 # List Concatenation
791 if not isinstance(self.get(attr), list):
792 self[attr] = [self[attr]]
793 if not isinstance(value, list):
794 value = [value]
795 self.append_attr_list(attr, value)
797 def replace_attr(self, attr, value, force=True):
798 """
799 If self[attr] does not exist or force is True or omitted, set
800 self[attr] to value, otherwise do nothing.
801 """
802 # One or the other
803 if force or self.get(attr) is None:
804 self[attr] = value
806 def copy_attr_convert(self, attr, value, replace=True):
807 """
808 If attr is an attribute of self, set self[attr] to
809 [self[attr], value], otherwise set self[attr] to value.
811 NOTE: replace is not used by this function and is kept only for
812 compatibility with the other copy functions.
813 """
814 if self.get(attr) is not value:
815 self.coerce_append_attr_list(attr, value)
817 def copy_attr_coerce(self, attr, value, replace):
818 """
819 If attr is an attribute of self and either self[attr] or value is a
820 list, convert all non-sequence values to a sequence of 1 element and
821 then concatenate the two sequence, setting the result to self[attr].
822 If both self[attr] and value are non-sequences and replace is True or
823 self[attr] is None, replace self[attr] with value. Otherwise, do
824 nothing.
825 """
826 if self.get(attr) is not value:
827 if isinstance(self.get(attr), list) or \
828 isinstance(value, list):
829 self.coerce_append_attr_list(attr, value)
830 else:
831 self.replace_attr(attr, value, replace)
833 def copy_attr_concatenate(self, attr, value, replace):
834 """
835 If attr is an attribute of self and both self[attr] and value are
836 lists, concatenate the two sequences, setting the result to
837 self[attr]. If either self[attr] or value are non-sequences and
838 replace is True or self[attr] is None, replace self[attr] with value.
839 Otherwise, do nothing.
840 """
841 if self.get(attr) is not value:
842 if isinstance(self.get(attr), list) and \
843 isinstance(value, list):
844 self.append_attr_list(attr, value)
845 else:
846 self.replace_attr(attr, value, replace)
848 def copy_attr_consistent(self, attr, value, replace):
849 """
850 If replace is True or self[attr] is None, replace self[attr] with
851 value. Otherwise, do nothing.
852 """
853 if self.get(attr) is not value:
854 self.replace_attr(attr, value, replace)
856 def update_all_atts(self, dict_, update_fun=copy_attr_consistent,
857 replace=True, and_source=False):
858 """
859 Updates all attributes from node or dictionary `dict_`.
861 Appends the basic attributes ('ids', 'names', 'classes',
862 'dupnames', but not 'source') and then, for all other attributes in
863 dict_, updates the same attribute in self. When attributes with the
864 same identifier appear in both self and dict_, the two values are
865 merged based on the value of update_fun. Generally, when replace is
866 True, the values in self are replaced or merged with the values in
867 dict_; otherwise, the values in self may be preserved or merged. When
868 and_source is True, the 'source' attribute is included in the copy.
870 NOTE: When replace is False, and self contains a 'source' attribute,
871 'source' is not replaced even when dict_ has a 'source'
872 attribute, though it may still be merged into a list depending
873 on the value of update_fun.
874 NOTE: It is easier to call the update-specific methods then to pass
875 the update_fun method to this function.
876 """
877 if isinstance(dict_, Node):
878 dict_ = dict_.attributes
880 # Include the source attribute when copying?
881 if and_source:
882 filter_fun = self.is_not_list_attribute
883 else:
884 filter_fun = self.is_not_known_attribute
886 # Copy the basic attributes
887 self.update_basic_atts(dict_)
889 # Grab other attributes in dict_ not in self except the
890 # (All basic attributes should be copied already)
891 for att in filter(filter_fun, dict_):
892 update_fun(self, att, dict_[att], replace)
894 def update_all_atts_consistantly(self, dict_, replace=True,
895 and_source=False):
896 """
897 Updates all attributes from node or dictionary `dict_`.
899 Appends the basic attributes ('ids', 'names', 'classes',
900 'dupnames', but not 'source') and then, for all other attributes in
901 dict_, updates the same attribute in self. When attributes with the
902 same identifier appear in both self and dict_ and replace is True, the
903 values in self are replaced with the values in dict_; otherwise, the
904 values in self are preserved. When and_source is True, the 'source'
905 attribute is included in the copy.
907 NOTE: When replace is False, and self contains a 'source' attribute,
908 'source' is not replaced even when dict_ has a 'source'
909 attribute, though it may still be merged into a list depending
910 on the value of update_fun.
911 """
912 self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
913 and_source)
915 def update_all_atts_concatenating(self, dict_, replace=True,
916 and_source=False):
917 """
918 Updates all attributes from node or dictionary `dict_`.
920 Appends the basic attributes ('ids', 'names', 'classes',
921 'dupnames', but not 'source') and then, for all other attributes in
922 dict_, updates the same attribute in self. When attributes with the
923 same identifier appear in both self and dict_ whose values aren't each
924 lists and replace is True, the values in self are replaced with the
925 values in dict_; if the values from self and dict_ for the given
926 identifier are both of list type, then the two lists are concatenated
927 and the result stored in self; otherwise, the values in self are
928 preserved. When and_source is True, the 'source' attribute is
929 included in the copy.
931 NOTE: When replace is False, and self contains a 'source' attribute,
932 'source' is not replaced even when dict_ has a 'source'
933 attribute, though it may still be merged into a list depending
934 on the value of update_fun.
935 """
936 self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
937 and_source)
939 def update_all_atts_coercion(self, dict_, replace=True,
940 and_source=False):
941 """
942 Updates all attributes from node or dictionary `dict_`.
944 Appends the basic attributes ('ids', 'names', 'classes',
945 'dupnames', but not 'source') and then, for all other attributes in
946 dict_, updates the same attribute in self. When attributes with the
947 same identifier appear in both self and dict_ whose values are both
948 not lists and replace is True, the values in self are replaced with
949 the values in dict_; if either of the values from self and dict_ for
950 the given identifier are of list type, then first any non-lists are
951 converted to 1-element lists and then the two lists are concatenated
952 and the result stored in self; otherwise, the values in self are
953 preserved. When and_source is True, the 'source' attribute is
954 included in the copy.
956 NOTE: When replace is False, and self contains a 'source' attribute,
957 'source' is not replaced even when dict_ has a 'source'
958 attribute, though it may still be merged into a list depending
959 on the value of update_fun.
960 """
961 self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
962 and_source)
964 def update_all_atts_convert(self, dict_, and_source=False):
965 """
966 Updates all attributes from node or dictionary `dict_`.
968 Appends the basic attributes ('ids', 'names', 'classes',
969 'dupnames', but not 'source') and then, for all other attributes in
970 dict_, updates the same attribute in self. When attributes with the
971 same identifier appear in both self and dict_ then first any non-lists
972 are converted to 1-element lists and then the two lists are
973 concatenated and the result stored in self; otherwise, the values in
974 self are preserved. When and_source is True, the 'source' attribute
975 is included in the copy.
977 NOTE: When replace is False, and self contains a 'source' attribute,
978 'source' is not replaced even when dict_ has a 'source'
979 attribute, though it may still be merged into a list depending
980 on the value of update_fun.
981 """
982 self.update_all_atts(dict_, Element.copy_attr_convert,
983 and_source=and_source)
985 def clear(self):
986 self.children = []
988 def replace(self, old, new):
989 """Replace one child `Node` with another child or children."""
990 index = self.index(old)
991 if isinstance(new, Node):
992 self.setup_child(new)
993 self[index] = new
994 elif new is not None:
995 self[index:index+1] = new
997 def replace_self(self, new):
998 """
999 Replace `self` node with `new`, where `new` is a node or a
1000 list of nodes.
1002 Provisional: the handling of node attributes will be revised.
1003 """
1004 update = new
1005 if not isinstance(new, Node):
1006 # `new` is a list; update first child.
1007 try:
1008 update = new[0]
1009 except IndexError:
1010 update = None
1011 if isinstance(update, Element):
1012 update.update_basic_atts(self)
1013 else:
1014 # `update` is a Text node or `new` is an empty list.
1015 # Assert that we aren't losing any attributes.
1016 for att in self.basic_attributes:
1017 assert not self[att], \
1018 'Losing "%s" attribute: %s' % (att, self[att])
1019 self.parent.replace(self, new)
1021 def first_child_matching_class(self, childclass, start=0, end=sys.maxsize):
1022 """
1023 Return the index of the first child whose class exactly matches.
1025 Parameters:
1027 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
1028 classes. If a tuple, any of the classes may match.
1029 - `start`: Initial index to check.
1030 - `end`: Initial index to *not* check.
1031 """
1032 if not isinstance(childclass, tuple):
1033 childclass = (childclass,)
1034 for index in range(start, min(len(self), end)):
1035 for c in childclass:
1036 if isinstance(self[index], c):
1037 return index
1038 return None
1040 def first_child_not_matching_class(self, childclass, start=0,
1041 end=sys.maxsize):
1042 """
1043 Return the index of the first child whose class does *not* match.
1045 Parameters:
1047 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
1048 classes. If a tuple, none of the classes may match.
1049 - `start`: Initial index to check.
1050 - `end`: Initial index to *not* check.
1051 """
1052 if not isinstance(childclass, tuple):
1053 childclass = (childclass,)
1054 for index in range(start, min(len(self), end)):
1055 for c in childclass:
1056 if isinstance(self.children[index], c):
1057 break
1058 else:
1059 return index
1060 return None
1062 def pformat(self, indent=' ', level=0):
1063 tagline = '%s%s\n' % (indent*level, self.starttag())
1064 childreps = (c.pformat(indent, level+1) for c in self.children)
1065 return ''.join((tagline, *childreps))
1067 def copy(self):
1068 obj = self.__class__(rawsource=self.rawsource, **self.attributes)
1069 obj._document = self._document
1070 obj.source = self.source
1071 obj.line = self.line
1072 return obj
1074 def deepcopy(self):
1075 copy = self.copy()
1076 copy.extend([child.deepcopy() for child in self.children])
1077 return copy
1079 def set_class(self, name):
1080 """Add a new class to the "classes" attribute."""
1081 warnings.warn('docutils.nodes.Element.set_class() is deprecated; '
1082 ' and will be removed in Docutils 0.21 or later.'
1083 "Append to Element['classes'] list attribute directly",
1084 DeprecationWarning, stacklevel=2)
1085 assert ' ' not in name
1086 self['classes'].append(name.lower())
1088 def note_referenced_by(self, name=None, id=None):
1089 """Note that this Element has been referenced by its name
1090 `name` or id `id`."""
1091 self.referenced = True
1092 # Element.expect_referenced_by_* dictionaries map names or ids
1093 # to nodes whose ``referenced`` attribute is set to true as
1094 # soon as this node is referenced by the given name or id.
1095 # Needed for target propagation.
1096 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
1097 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
1098 if by_name:
1099 assert name is not None
1100 by_name.referenced = True
1101 if by_id:
1102 assert id is not None
1103 by_id.referenced = True
1105 @classmethod
1106 def is_not_list_attribute(cls, attr):
1107 """
1108 Returns True if and only if the given attribute is NOT one of the
1109 basic list attributes defined for all Elements.
1110 """
1111 return attr not in cls.list_attributes
1113 @classmethod
1114 def is_not_known_attribute(cls, attr):
1115 """
1116 Return True if `attr` is NOT defined for all Element instances.
1118 Provisional. May be removed in Docutils 2.0.
1119 """
1120 return attr not in cls.common_attributes
1122 def validate_attributes(self):
1123 """Normalize and validate element attributes.
1125 Convert string values to expected datatype.
1126 Normalize values.
1128 Raise `ValidationError` for invalid attributes or attribute values.
1130 Provisional.
1131 """
1132 messages = []
1133 for key, value in self.attributes.items():
1134 if key.startswith('internal:'):
1135 continue # see docs/user/config.html#expose-internals
1136 if key not in self.valid_attributes:
1137 va = '", "'.join(self.valid_attributes)
1138 messages.append(f'Attribute "{key}" not one of "{va}".')
1139 continue
1140 try:
1141 self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
1142 except (ValueError, TypeError, KeyError) as e:
1143 messages.append(
1144 f'Attribute "{key}" has invalid value "{value}".\n {e}')
1145 if messages:
1146 raise ValidationError(f'Element {self.starttag()} invalid:\n '
1147 + '\n '.join(messages),
1148 problematic_element=self)
1150 def validate_content(self, model=None, elements=None):
1151 """Test compliance of `elements` with `model`.
1153 :model: content model description, default `self.content_model`,
1154 :elements: list of doctree elements, default `self.children`.
1156 Return list of children that do not fit in the model or raise
1157 `ValidationError` if the content does not comply with the `model`.
1159 Provisional.
1160 """
1161 if model is None:
1162 model = self.content_model
1163 if elements is None:
1164 elements = self.children
1165 ichildren = iter(elements)
1166 child = next(ichildren, None)
1167 for category, quantifier in model:
1168 if not isinstance(child, category):
1169 if quantifier in ('.', '+'):
1170 raise ValidationError(self._report_child(child, category),
1171 problematic_element=child)
1172 else: # quantifier in ('?', '*') -> optional child
1173 continue # try same child with next part of content model
1174 else:
1175 # Check additional placement constraints (if applicable):
1176 child.check_position()
1177 # advance:
1178 if quantifier in ('.', '?'): # go to next element
1179 child = next(ichildren, None)
1180 else: # if quantifier in ('*', '+'): # pass all matching elements
1181 for child in ichildren:
1182 if not isinstance(child, category):
1183 break
1184 child.check_position()
1185 else:
1186 child = None
1187 return [] if child is None else [child, *ichildren]
1189 def _report_child(self, child, category):
1190 # Return a str reporting a missing child or child of wrong category.
1191 try:
1192 type = category.__name__
1193 except AttributeError:
1194 type = '> or <'.join(c.__name__ for c in category)
1195 msg = f'Element {self.starttag()} invalid:\n'
1196 if child is None:
1197 return f'{msg} Missing child of type <{type}>.'
1198 if isinstance(child, Text):
1199 return (f'{msg} Expecting child of type <{type}>, '
1200 f'not text data "{child.astext()}".')
1201 return (f'{msg} Expecting child of type <{type}>, '
1202 f'not {child.starttag()}.')
1204 def check_position(self):
1205 """Hook for additional checks of the parent's content model.
1207 Raise ValidationError, if `self` is at an invalid position.
1208 See `subtitle.check_position()` and `transition.check_position()`.
1209 """
1211 def validate(self, recursive=True):
1212 """Validate Docutils Document Tree element ("doctree").
1214 Raise ValidationError if there are violations.
1215 If `recursive` is True, validate also the element's descendants.
1217 See `The Docutils Document Tree`__ for details of the
1218 Docutils Document Model.
1220 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1222 Provisional (work in progress).
1223 """
1224 self.validate_attributes()
1226 leftover_childs = self.validate_content()
1227 for child in leftover_childs:
1228 if isinstance(child, Text):
1229 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1230 f' Spurious text: "{child.astext()}".',
1231 problematic_element=self)
1232 else:
1233 raise ValidationError(f'Element {self.starttag()} invalid:\n'
1234 f' Child element {child.starttag()} '
1235 'not allowed at this position.',
1236 problematic_element=child)
1238 if recursive:
1239 for child in self:
1240 child.validate(recursive=recursive)
1243# ====================
1244# Element Categories
1245# ====================
1246#
1247# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.
1249class Root:
1250 """Element at the root of a document tree."""
1253class Structural:
1254 """`Structural elements`__.
1256 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1257 #structural-elements
1258 """
1261class SubStructural:
1262 """`Structural subelements`__ are children of `Structural` elements.
1264 Most Structural elements accept only specific `SubStructural` elements.
1266 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1267 #structural-subelements
1268 """
1271class Bibliographic:
1272 """`Bibliographic Elements`__ (displayed document meta-data).
1274 __ https://docutils.sourceforge.io/docs/ref/doctree.html
1275 #bibliographic-elements
1276 """
1279class Body:
1280 """`Body elements`__.
1282 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements
1283 """
1286class Admonition(Body):
1287 """Admonitions (distinctive and self-contained notices)."""
1288 content_model = ((Body, '+'),) # (%body.elements;)+
1291class Sequential(Body):
1292 """List-like body elements."""
1295class General(Body):
1296 """Miscellaneous body elements."""
1299class Special(Body):
1300 """Special internal body elements."""
1303class Part:
1304 """`Body Subelements`__ always occur within specific parent elements.
1306 __ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements
1307 """
1310class Decorative:
1311 """Decorative elements (`header` and `footer`).
1313 Children of `decoration`.
1314 """
1315 content_model = ((Body, '+'),) # (%body.elements;)+
1318class Inline:
1319 """Inline elements contain text data and possibly other inline elements.
1320 """
1323# Orthogonal categories and Mixins
1324# ================================
1326class PreBibliographic:
1327 """Elements which may occur before Bibliographic Elements."""
1330class Invisible(Special, PreBibliographic):
1331 """Internal elements that don't appear in output."""
1334class Labeled:
1335 """Contains a `label` as its first element."""
1338class Resolvable:
1339 resolved = False
1342class BackLinkable:
1343 """Mixin for Elements that accept a "backrefs" attribute."""
1345 list_attributes = Element.list_attributes + ('backrefs',)
1346 valid_attributes = Element.valid_attributes + ('backrefs',)
1348 def add_backref(self, refid):
1349 self['backrefs'].append(refid)
1352class Referential(Resolvable):
1353 """Elements holding a cross-reference (outgoing hyperlink)."""
1356class Targetable(Resolvable):
1357 """Cross-reference targets (incoming hyperlink)."""
1358 referenced = 0
1360 indirect_reference_name = None
1361 """Holds the whitespace_normalized_name (contains mixed case) of a target.
1362 Required for MoinMoin/reST compatibility.
1364 Provisional.
1365 """
1368class Titular:
1369 """Title, sub-title, or informal heading (rubric)."""
1372class TextElement(Element):
1373 """
1374 An element which directly contains text.
1376 Its children are all `Text` or `Inline` subclass nodes. You can
1377 check whether an element's context is inline simply by checking whether
1378 its immediate parent is a `TextElement` instance (including subclasses).
1379 This is handy for nodes like `image` that can appear both inline and as
1380 standalone body elements.
1382 If passing children to `__init__()`, make sure to set `text` to
1383 ``''`` or some other suitable value.
1384 """
1385 content_model = ( # (#PCDATA | %inline.elements;)*
1386 ((Text, Inline), '*'),)
1388 child_text_separator = ''
1389 """Separator for child nodes, used by `astext()` method."""
1391 def __init__(self, rawsource='', text='', *children, **attributes):
1392 if text:
1393 textnode = Text(text)
1394 Element.__init__(self, rawsource, textnode, *children,
1395 **attributes)
1396 else:
1397 Element.__init__(self, rawsource, *children, **attributes)
1400class FixedTextElement(TextElement):
1401 """An element which directly contains preformatted text."""
1403 valid_attributes = Element.valid_attributes + ('xml:space',)
1405 def __init__(self, rawsource='', text='', *children, **attributes):
1406 super().__init__(rawsource, text, *children, **attributes)
1407 self.attributes['xml:space'] = 'preserve'
1410class PureTextElement(TextElement):
1411 """An element which only contains text, no children."""
1412 content_model = ((Text, '?'),) # (#PCDATA)
1415# =================================
1416# Concrete Document Tree Elements
1417# =================================
1418#
1419# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference
1421# Decorative Elements
1422# ===================
1424class header(Decorative, Element): pass
1425class footer(Decorative, Element): pass
1428# Structural Subelements
1429# ======================
1431class title(Titular, PreBibliographic, SubStructural, TextElement):
1432 """Title of `document`, `section`, `topic` and generic `admonition`.
1433 """
1434 valid_attributes = Element.valid_attributes + ('auto', 'refid')
1437class subtitle(Titular, PreBibliographic, SubStructural, TextElement):
1438 """Sub-title of `document`, `section` and `sidebar`."""
1440 def check_position(self):
1441 """Check position of subtitle: must follow a title."""
1442 if self.parent and self.parent.index(self) == 0:
1443 raise ValidationError(f'Element {self.parent.starttag()} invalid:'
1444 '\n <subtitle> only allowed after <title>.',
1445 problematic_element=self)
1448class meta(PreBibliographic, SubStructural, Element):
1449 """Container for "invisible" bibliographic data, or meta-data."""
1450 valid_attributes = Element.valid_attributes + (
1451 'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')
1454class docinfo(SubStructural, Element):
1455 """Container for displayed document meta-data."""
1456 content_model = ( # (%bibliographic.elements;)+
1457 (Bibliographic, '+'),)
1460class decoration(PreBibliographic, SubStructural, Element):
1461 """Container for `header` and `footer`."""
1462 content_model = ( # (header?, footer?)
1463 (header, '?'),
1464 (footer, '?')) # TODO: empty element does not make sense.
1466 def get_header(self):
1467 if not len(self.children) or not isinstance(self.children[0], header):
1468 self.insert(0, header())
1469 return self.children[0]
1471 def get_footer(self):
1472 if not len(self.children) or not isinstance(self.children[-1], footer):
1473 self.append(footer())
1474 return self.children[-1]
1477class transition(SubStructural, Element):
1478 """Transitions__ are breaks between untitled text parts.
1480 __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition
1481 """
1483 def check_position(self):
1484 """Check additional constraints on `transition` placement.
1486 A transition may not begin or end a section or document,
1487 nor may two transitions be immediately adjacent.
1488 """
1489 messages = [f'Element {self.parent.starttag()} invalid:']
1490 predecessor = self.previous_sibling()
1491 if (predecessor is None # index == 0
1492 or isinstance(predecessor, (title, subtitle, meta, decoration))
1493 # A transition following these elements still counts as
1494 # "at the beginning of a document or section".
1495 ):
1496 messages.append(
1497 '<transition> may not begin a section or document.')
1498 if self.parent.index(self) == len(self.parent) - 1:
1499 messages.append('<transition> may not end a section or document.')
1500 if isinstance(predecessor, transition):
1501 messages.append(
1502 '<transition> may not directly follow another transition.')
1503 if len(messages) > 1:
1504 raise ValidationError('\n '.join(messages),
1505 problematic_element=self)
1508# Structural Elements
1509# ===================
1511class topic(Structural, Element):
1512 """
1513 Topics__ are non-recursive, mini-sections.
1515 __ https://docutils.sourceforge.io/docs/ref/doctree.html#topic
1516 """
1517 content_model = ( # (title?, (%body.elements;)+)
1518 (title, '?'),
1519 (Body, '+'))
1522class sidebar(Structural, Element):
1523 """
1524 Sidebars__ are like parallel documents providing related material.
1526 A sidebar is typically offset by a border and "floats" to the side
1527 of the page
1529 __ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar
1530 """
1531 content_model = ( # ((title, subtitle?)?, (%body.elements; | topic)+)
1532 (title, '?'),
1533 (subtitle, '?'),
1534 ((topic, Body), '+'))
1535 # "subtitle only after title" is ensured in `subtitle.check_position()`.
1538class section(Structural, Element):
1539 """Document section__. The main unit of hierarchy.
1541 __ https://docutils.sourceforge.io/docs/ref/doctree.html#section
1542 """
1543 # recursive content model, see below
1546section.content_model = ( # (title, subtitle?, %structure.model;)
1547 (title, '.'),
1548 (subtitle, '?'),
1549 ((Body, topic, sidebar, transition), '*'),
1550 ((section, transition), '*'),
1551 )
1552# Correct transition placement is ensured in `transition.check_position()`.
1555# Root Element
1556# ============
1558class document(Root, Element):
1559 """
1560 The document root element.
1562 Do not instantiate this class directly; use
1563 `docutils.utils.new_document()` instead.
1564 """
1565 valid_attributes = Element.valid_attributes + ('title',)
1566 content_model = ( # ( (title, subtitle?)?,
1567 # meta*,
1568 # decoration?,
1569 # (docinfo, transition?)?,
1570 # %structure.model; )
1571 (title, '?'),
1572 (subtitle, '?'),
1573 (meta, '*'),
1574 (decoration, '?'),
1575 (docinfo, '?'),
1576 (transition, '?'),
1577 ((Body, topic, sidebar, transition), '*'),
1578 ((section, transition), '*'),
1579 )
1580 # Additional restrictions for `subtitle` and `transition` are tested
1581 # with the respective `check_position()` methods.
1583 def __init__(self, settings, reporter, *args, **kwargs):
1584 Element.__init__(self, *args, **kwargs)
1586 self.current_source = None
1587 """Path to or description of the input source being processed."""
1589 self.current_line = None
1590 """Line number (1-based) of `current_source`."""
1592 self.settings = settings
1593 """Runtime settings data record."""
1595 self.reporter = reporter
1596 """System message generator."""
1598 self.indirect_targets = []
1599 """List of indirect target nodes."""
1601 self.substitution_defs = {}
1602 """Mapping of substitution names to substitution_definition nodes."""
1604 self.substitution_names = {}
1605 """Mapping of case-normalized substitution names to case-sensitive
1606 names."""
1608 self.refnames = {}
1609 """Mapping of names to lists of referencing nodes."""
1611 self.refids = {}
1612 """Mapping of ids to lists of referencing nodes."""
1614 self.nameids = {}
1615 """Mapping of names to unique id's."""
1617 self.nametypes = {}
1618 """Mapping of names to hyperlink type (boolean: True => explicit,
1619 False => implicit."""
1621 self.ids = {}
1622 """Mapping of ids to nodes."""
1624 self.footnote_refs = {}
1625 """Mapping of footnote labels to lists of footnote_reference nodes."""
1627 self.citation_refs = {}
1628 """Mapping of citation labels to lists of citation_reference nodes."""
1630 self.autofootnotes = []
1631 """List of auto-numbered footnote nodes."""
1633 self.autofootnote_refs = []
1634 """List of auto-numbered footnote_reference nodes."""
1636 self.symbol_footnotes = []
1637 """List of symbol footnote nodes."""
1639 self.symbol_footnote_refs = []
1640 """List of symbol footnote_reference nodes."""
1642 self.footnotes = []
1643 """List of manually-numbered footnote nodes."""
1645 self.citations = []
1646 """List of citation nodes."""
1648 self.autofootnote_start = 1
1649 """Initial auto-numbered footnote number."""
1651 self.symbol_footnote_start = 0
1652 """Initial symbol footnote symbol index."""
1654 self.id_counter = Counter()
1655 """Numbers added to otherwise identical IDs."""
1657 self.parse_messages = []
1658 """System messages generated while parsing."""
1660 self.transform_messages = []
1661 """System messages generated while applying transforms."""
1663 import docutils.transforms
1664 self.transformer = docutils.transforms.Transformer(self)
1665 """Storage for transforms to be applied to this document."""
1667 self.include_log = []
1668 """The current source's parents (to detect inclusion loops)."""
1670 self.decoration = None
1671 """Document's `decoration` node."""
1673 self._document = self
1675 def __getstate__(self):
1676 """
1677 Return dict with unpicklable references removed.
1678 """
1679 state = self.__dict__.copy()
1680 state['reporter'] = None
1681 state['transformer'] = None
1682 return state
1684 def asdom(self, dom=None):
1685 """Return a DOM representation of this document."""
1686 if dom is None:
1687 import xml.dom.minidom as dom
1688 domroot = dom.Document()
1689 domroot.appendChild(self._dom_node(domroot))
1690 return domroot
1692 def set_id(self, node, msgnode=None, suggested_prefix=''):
1693 if node['ids']:
1694 # register and check for duplicates
1695 for id in node['ids']:
1696 self.ids.setdefault(id, node)
1697 if self.ids[id] is not node:
1698 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
1699 if msgnode is not None:
1700 msgnode += msg
1701 return id
1702 # generate and set id
1703 id_prefix = self.settings.id_prefix
1704 auto_id_prefix = self.settings.auto_id_prefix
1705 base_id = ''
1706 id = ''
1707 for name in node['names']:
1708 if id_prefix:
1709 # allow names starting with numbers if `id_prefix`
1710 base_id = make_id('x'+name)[1:]
1711 else:
1712 base_id = make_id(name)
1713 # TODO: normalize id-prefix? (would make code simpler)
1714 id = id_prefix + base_id
1715 if base_id and id not in self.ids:
1716 break
1717 else:
1718 if base_id and auto_id_prefix.endswith('%'):
1719 # disambiguate name-derived ID
1720 # TODO: remove second condition after announcing change
1721 prefix = id + '-'
1722 else:
1723 prefix = id_prefix + auto_id_prefix
1724 if prefix.endswith('%'):
1725 prefix = '%s%s-' % (prefix[:-1],
1726 suggested_prefix
1727 or make_id(node.tagname))
1728 while True:
1729 self.id_counter[prefix] += 1
1730 id = '%s%d' % (prefix, self.id_counter[prefix])
1731 if id not in self.ids:
1732 break
1733 node['ids'].append(id)
1734 self.ids[id] = node
1735 return id
1737 def set_name_id_map(self, node, id, msgnode=None, explicit=None):
1738 """
1739 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1740 booleans representing hyperlink type (True==explicit,
1741 False==implicit). This method updates the mappings.
1743 The following state transition table shows how `self.nameids` items
1744 ("id") and `self.nametypes` items ("type") change with new input
1745 (a call to this method), and what actions are performed
1746 ("implicit"-type system messages are INFO/1, and
1747 "explicit"-type system messages are ERROR/3):
1749 ==== ===== ======== ======== ======= ==== ===== =====
1750 Old State Input Action New State Notes
1751 ----------- -------- ----------------- ----------- -----
1752 id type new type sys.msg. dupname id type
1753 ==== ===== ======== ======== ======= ==== ===== =====
1754 - - explicit - - new True
1755 - - implicit - - new False
1756 - False explicit - - new True
1757 old False explicit implicit old new True
1758 - True explicit explicit new - True
1759 old True explicit explicit new,old - True [#]_
1760 - False implicit implicit new - False
1761 old False implicit implicit new,old - False
1762 - True implicit implicit new - True
1763 old True implicit implicit new old True
1764 ==== ===== ======== ======== ======= ==== ===== =====
1766 .. [#] Do not clear the name-to-id map or invalidate the old target if
1767 both old and new targets are external and refer to identical URIs.
1768 The new target is invalidated regardless.
1769 """
1770 for name in tuple(node['names']):
1771 if name in self.nameids:
1772 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1773 # attention: modifies node['names']
1774 else:
1775 self.nameids[name] = id
1776 self.nametypes[name] = explicit
1778 def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
1779 old_id = self.nameids[name]
1780 old_explicit = self.nametypes[name]
1781 self.nametypes[name] = old_explicit or explicit
1782 if explicit:
1783 if old_explicit:
1784 level = 2
1785 if old_id is not None:
1786 old_node = self.ids[old_id]
1787 if 'refuri' in node:
1788 refuri = node['refuri']
1789 if (old_node['names']
1790 and 'refuri' in old_node
1791 and old_node['refuri'] == refuri):
1792 level = 1 # just inform if refuri's identical
1793 if level > 1:
1794 dupname(old_node, name)
1795 self.nameids[name] = None
1796 msg = self.reporter.system_message(
1797 level, 'Duplicate explicit target name: "%s".' % name,
1798 backrefs=[id], base_node=node)
1799 if msgnode is not None:
1800 msgnode += msg
1801 dupname(node, name)
1802 else:
1803 self.nameids[name] = id
1804 if old_id is not None:
1805 old_node = self.ids[old_id]
1806 dupname(old_node, name)
1807 else:
1808 if old_id is not None and not old_explicit:
1809 self.nameids[name] = None
1810 old_node = self.ids[old_id]
1811 dupname(old_node, name)
1812 dupname(node, name)
1813 if not explicit or (not old_explicit and old_id is not None):
1814 msg = self.reporter.info(
1815 'Duplicate implicit target name: "%s".' % name,
1816 backrefs=[id], base_node=node)
1817 if msgnode is not None:
1818 msgnode += msg
1820 def has_name(self, name):
1821 return name in self.nameids
1823 # "note" here is an imperative verb: "take note of".
1824 def note_implicit_target(self, target, msgnode=None):
1825 id = self.set_id(target, msgnode)
1826 self.set_name_id_map(target, id, msgnode, explicit=False)
1828 def note_explicit_target(self, target, msgnode=None):
1829 id = self.set_id(target, msgnode)
1830 self.set_name_id_map(target, id, msgnode, explicit=True)
1832 def note_refname(self, node):
1833 self.refnames.setdefault(node['refname'], []).append(node)
1835 def note_refid(self, node):
1836 self.refids.setdefault(node['refid'], []).append(node)
1838 def note_indirect_target(self, target):
1839 self.indirect_targets.append(target)
1840 if target['names']:
1841 self.note_refname(target)
1843 def note_anonymous_target(self, target):
1844 self.set_id(target)
1846 def note_autofootnote(self, footnote):
1847 self.set_id(footnote)
1848 self.autofootnotes.append(footnote)
1850 def note_autofootnote_ref(self, ref):
1851 self.set_id(ref)
1852 self.autofootnote_refs.append(ref)
1854 def note_symbol_footnote(self, footnote):
1855 self.set_id(footnote)
1856 self.symbol_footnotes.append(footnote)
1858 def note_symbol_footnote_ref(self, ref):
1859 self.set_id(ref)
1860 self.symbol_footnote_refs.append(ref)
1862 def note_footnote(self, footnote):
1863 self.set_id(footnote)
1864 self.footnotes.append(footnote)
1866 def note_footnote_ref(self, ref):
1867 self.set_id(ref)
1868 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
1869 self.note_refname(ref)
1871 def note_citation(self, citation):
1872 self.citations.append(citation)
1874 def note_citation_ref(self, ref):
1875 self.set_id(ref)
1876 self.citation_refs.setdefault(ref['refname'], []).append(ref)
1877 self.note_refname(ref)
1879 def note_substitution_def(self, subdef, def_name, msgnode=None):
1880 name = whitespace_normalize_name(def_name)
1881 if name in self.substitution_defs:
1882 msg = self.reporter.error(
1883 'Duplicate substitution definition name: "%s".' % name,
1884 base_node=subdef)
1885 if msgnode is not None:
1886 msgnode += msg
1887 oldnode = self.substitution_defs[name]
1888 dupname(oldnode, name)
1889 # keep only the last definition:
1890 self.substitution_defs[name] = subdef
1891 # case-insensitive mapping:
1892 self.substitution_names[fully_normalize_name(name)] = name
1894 def note_substitution_ref(self, subref, refname):
1895 subref['refname'] = whitespace_normalize_name(refname)
1897 def note_pending(self, pending, priority=None):
1898 self.transformer.add_pending(pending, priority)
1900 def note_parse_message(self, message):
1901 self.parse_messages.append(message)
1903 def note_transform_message(self, message):
1904 self.transform_messages.append(message)
1906 def note_source(self, source, offset):
1907 self.current_source = source
1908 if offset is None:
1909 self.current_line = offset
1910 else:
1911 self.current_line = offset + 1
1913 def copy(self):
1914 obj = self.__class__(self.settings, self.reporter,
1915 **self.attributes)
1916 obj.source = self.source
1917 obj.line = self.line
1918 return obj
1920 def get_decoration(self):
1921 if not self.decoration:
1922 self.decoration = decoration()
1923 index = self.first_child_not_matching_class((Titular, meta))
1924 if index is None:
1925 self.append(self.decoration)
1926 else:
1927 self.insert(index, self.decoration)
1928 return self.decoration
1931# Bibliographic Elements
1932# ======================
1934class author(Bibliographic, TextElement): pass
1935class organization(Bibliographic, TextElement): pass
1936class address(Bibliographic, FixedTextElement): pass
1937class contact(Bibliographic, TextElement): pass
1938class version(Bibliographic, TextElement): pass
1939class revision(Bibliographic, TextElement): pass
1940class status(Bibliographic, TextElement): pass
1941class date(Bibliographic, TextElement): pass
1942class copyright(Bibliographic, TextElement): pass
1945class authors(Bibliographic, Element):
1946 """Container for author information for documents with multiple authors.
1947 """
1948 content_model = ( # (author, organization?, address?, contact?)+
1949 (author, '+'),
1950 (organization, '?'),
1951 (address, '?'),
1952 (contact, '?'))
1954 def validate_content(self):
1955 """Repeatedly test for children matching the content model.
1957 Provisional.
1958 """
1959 relics = super().validate_content()
1960 while relics:
1961 relics = super().validate_content(elements=relics)
1962 return relics
1965# Body Elements
1966# =============
1967#
1968# General
1969# -------
1970#
1971# Miscellaneous Body Elements and related Body Subelements (Part)
1973class paragraph(General, TextElement): pass
1974class rubric(Titular, General, TextElement): pass
1977class compound(General, Element):
1978 content_model = ((Body, '+'),) # (%body.elements;)+
1981class container(General, Element):
1982 content_model = ((Body, '+'),) # (%body.elements;)+
1985class attribution(Part, TextElement):
1986 """Visible reference to the source of a `block_quote`."""
1989class block_quote(General, Element):
1990 """An extended quotation, set off from the main text."""
1991 content_model = ( # ((%body.elements;)+, attribution?)
1992 (Body, '+'),
1993 (attribution, '?'))
1996# Lists
1997# -----
1998#
1999# Lists (Sequential) and related Body Subelements (Part)
2001class list_item(Part, Element):
2002 content_model = ((Body, '*'),) # (%body.elements;)*
2005class bullet_list(Sequential, Element):
2006 valid_attributes = Element.valid_attributes + ('bullet',)
2007 content_model = ((list_item, '+'),) # (list_item+)
2010class enumerated_list(Sequential, Element):
2011 valid_attributes = Element.valid_attributes + (
2012 'enumtype', 'prefix', 'suffix', 'start')
2013 content_model = ((list_item, '+'),) # (list_item+)
2016class term(Part, TextElement): pass
2017class classifier(Part, TextElement): pass
2020class definition(Part, Element):
2021 """Definition of a `term` in a `definition_list`."""
2022 content_model = ((Body, '+'),) # (%body.elements;)+
2025class definition_list_item(Part, Element):
2026 content_model = ( # ((term, classifier*)+, definition)
2027 (term, '.'),
2028 ((classifier, term), '*'),
2029 (definition, '.'))
2032class definition_list(Sequential, Element):
2033 """List of terms and their definitions.
2035 Can be used for glossaries or dictionaries, to describe or
2036 classify things, for dialogues, or to itemize subtopics.
2037 """
2038 content_model = ((definition_list_item, '+'),) # (definition_list_item+)
2041class field_name(Part, TextElement): pass
2044class field_body(Part, Element):
2045 content_model = ((Body, '*'),) # (%body.elements;)*
2048class field(Part, Bibliographic, Element):
2049 content_model = ( # (field_name, field_body)
2050 (field_name, '.'),
2051 (field_body, '.'))
2054class field_list(Sequential, Element):
2055 """List of label & data pairs.
2057 Typically rendered as a two-column list.
2058 Also used for extension syntax or special processing.
2059 """
2060 content_model = ((field, '+'),) # (field+)
2063class option_string(Part, PureTextElement):
2064 """A literal command-line option. Typically monospaced."""
2067class option_argument(Part, PureTextElement):
2068 """Placeholder text for option arguments."""
2069 valid_attributes = Element.valid_attributes + ('delimiter',)
2071 def astext(self):
2072 return self.get('delimiter', ' ') + TextElement.astext(self)
2075class option(Part, Element):
2076 """Option element in an `option_list_item`.
2078 Groups an option string with zero or more option argument placeholders.
2079 """
2080 child_text_separator = ''
2081 content_model = ( # (option_string, option_argument*)
2082 (option_string, '.'),
2083 (option_argument, '*'))
2086class option_group(Part, Element):
2087 """Groups together one or more `option` elements, all synonyms."""
2088 child_text_separator = ', '
2089 content_model = ((option, '+'),) # (option+)
2092class description(Part, Element):
2093 """Describtion of a command-line option."""
2094 content_model = ((Body, '+'),) # (%body.elements;)+
2097class option_list_item(Part, Element):
2098 """Container for a pair of `option_group` and `description` elements.
2099 """
2100 child_text_separator = ' '
2101 content_model = ( # (option_group, description)
2102 (option_group, '.'),
2103 (description, '.'))
2106class option_list(Sequential, Element):
2107 """Two-column list of command-line options and descriptions."""
2108 content_model = ((option_list_item, '+'),) # (option_list_item+)
2111# Pre-formatted text blocks
2112# -------------------------
2114class literal_block(General, FixedTextElement): pass
2115class doctest_block(General, FixedTextElement): pass
2118class math_block(General, FixedTextElement, PureTextElement):
2119 """Mathematical notation (display formula)."""
2122class line(Part, TextElement):
2123 """Single line of text in a `line_block`."""
2124 indent = None
2127class line_block(General, Element):
2128 """Sequence of lines and nested line blocks.
2129 """
2130 # recursive content model: (line | line_block)+
2133line_block.content_model = (((line, line_block), '+'),)
2136# Admonitions
2137# -----------
2138# distinctive and self-contained notices
2140class attention(Admonition, Element): pass
2141class caution(Admonition, Element): pass
2142class danger(Admonition, Element): pass
2143class error(Admonition, Element): pass
2144class important(Admonition, Element): pass
2145class note(Admonition, Element): pass
2146class tip(Admonition, Element): pass
2147class hint(Admonition, Element): pass
2148class warning(Admonition, Element): pass
2151class admonition(Admonition, Element):
2152 content_model = ( # (title, (%body.elements;)+)
2153 (title, '.'),
2154 (Body, '+'))
2157# Footnote and citation
2158# ---------------------
2160class label(Part, PureTextElement):
2161 """Visible identifier for footnotes and citations."""
2164class footnote(General, BackLinkable, Element, Labeled, Targetable):
2165 """Labelled note providing additional context (footnote or endnote)."""
2166 valid_attributes = Element.valid_attributes + ('auto', 'backrefs')
2167 content_model = ( # (label?, (%body.elements;)+)
2168 (label, '?'),
2169 (Body, '+'))
2170 # TODO: Why is the label optional and content required?
2171 # The rST specification says: "Each footnote consists of an
2172 # explicit markup start (".. "), a left square bracket,
2173 # the footnote label, a right square bracket, and whitespace,
2174 # followed by indented body elements."
2175 #
2176 # The `Labeled` parent class' docstring says:
2177 # "Contains a `label` as its first element."
2178 #
2179 # docutils.dtd requires both label and content but the rST parser
2180 # allows empty footnotes (see test_writers/test_latex2e.py).
2181 # Should the rST parser complain (info, warning or error)?
2184class citation(General, BackLinkable, Element, Labeled, Targetable):
2185 content_model = ( # (label, (%body.elements;)+)
2186 (label, '.'),
2187 (Body, '+'))
2188 # TODO: docutils.dtd requires both label and content but the rST parser
2189 # allows empty citation (see test_rst/test_citations.py).
2190 # Is this sensible?
2191 # The rST specification says: "Citations are identical to footnotes
2192 # except that they use only non-numeric labels such as [note] …"
2195# Graphical elements
2196# ------------------
2198class image(General, Inline, Element):
2199 """Reference to an image resource.
2201 May be body element or inline element.
2202 """
2203 valid_attributes = Element.valid_attributes + (
2204 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
2206 def astext(self):
2207 return self.get('alt', '')
2210class caption(Part, TextElement): pass
2213class legend(Part, Element):
2214 """A wrapper for text accompanying a `figure` that is not the caption."""
2215 content_model = ((Body, '+'),) # (%body.elements;)+
2218class figure(General, Element):
2219 """A formal figure, generally an illustration, with a title."""
2220 valid_attributes = Element.valid_attributes + ('align', 'width')
2221 content_model = ( # (image, ((caption, legend?) | legend))
2222 (image, '.'),
2223 (caption, '?'),
2224 (legend, '?'))
2225 # TODO: According to the DTD, a caption or legend is required
2226 # but rST allows "bare" figures which are formatted differently from
2227 # images (floating in LaTeX, nested in a <figure> in HTML).
2230# Tables
2231# ------
2233class entry(Part, Element):
2234 """An entry in a `row` (a table cell)."""
2235 valid_attributes = Element.valid_attributes + (
2236 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',
2237 'morerows', 'namest', 'nameend', 'rowsep', 'valign')
2238 content_model = ((Body, '*'),) # %tbl.entry.mdl -> (%body.elements;)*
2241class row(Part, Element):
2242 """Row of table cells."""
2243 valid_attributes = Element.valid_attributes + ('rowsep', 'valign')
2244 content_model = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+
2247class colspec(Part, Element):
2248 """Specifications for a column in a `tgroup`."""
2249 valid_attributes = Element.valid_attributes + (
2250 'align', 'char', 'charoff', 'colname', 'colnum',
2251 'colsep', 'colwidth', 'rowsep', 'stub')
2254class thead(Part, Element):
2255 """Row(s) that form the head of a `tgroup`."""
2256 valid_attributes = Element.valid_attributes + ('valign',)
2257 content_model = ((row, '+'),) # (row+)
2260class tbody(Part, Element):
2261 """Body of a `tgroup`."""
2262 valid_attributes = Element.valid_attributes + ('valign',)
2263 content_model = ((row, '+'),) # (row+)
2266class tgroup(Part, Element):
2267 """A portion of a table. Most tables have just one `tgroup`."""
2268 valid_attributes = Element.valid_attributes + (
2269 'align', 'cols', 'colsep', 'rowsep')
2270 content_model = ( # (colspec*, thead?, tbody)
2271 (colspec, '*'),
2272 (thead, '?'),
2273 (tbody, '.'))
2276class table(General, Element):
2277 """A data arrangement with rows and columns."""
2278 valid_attributes = Element.valid_attributes + (
2279 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
2280 content_model = ( # (title?, tgroup+)
2281 (title, '?'),
2282 (tgroup, '+'))
2285# Special purpose elements
2286# ------------------------
2287# Body elements for internal use or special requests.
2289class comment(Invisible, FixedTextElement, PureTextElement):
2290 """Author notes, hidden from the output."""
2293class substitution_definition(Invisible, TextElement):
2294 valid_attributes = Element.valid_attributes + ('ltrim', 'rtrim')
2297class target(Invisible, Inline, TextElement, Targetable):
2298 valid_attributes = Element.valid_attributes + (
2299 'anonymous', 'refid', 'refname', 'refuri')
2302class system_message(Special, BackLinkable, PreBibliographic, Element):
2303 """
2304 System message element.
2306 Do not instantiate this class directly; use
2307 ``document.reporter.info/warning/error/severe()`` instead.
2308 """
2309 valid_attributes = BackLinkable.valid_attributes + (
2310 'level', 'line', 'type')
2311 content_model = ((Body, '+'),) # (%body.elements;)+
2313 def __init__(self, message=None, *children, **attributes):
2314 rawsource = attributes.pop('rawsource', '')
2315 if message:
2316 p = paragraph('', message)
2317 children = (p,) + children
2318 try:
2319 Element.__init__(self, rawsource, *children, **attributes)
2320 except: # noqa catchall
2321 print('system_message: children=%r' % (children,))
2322 raise
2324 def astext(self):
2325 line = self.get('line', '')
2326 return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
2327 self['level'], Element.astext(self))
2330class pending(Invisible, Element):
2331 """
2332 Placeholder for pending operations.
2334 The "pending" element is used to encapsulate a pending operation: the
2335 operation (transform), the point at which to apply it, and any data it
2336 requires. Only the pending operation's location within the document is
2337 stored in the public document tree (by the "pending" object itself); the
2338 operation and its data are stored in the "pending" object's internal
2339 instance attributes.
2341 For example, say you want a table of contents in your reStructuredText
2342 document. The easiest way to specify where to put it is from within the
2343 document, with a directive::
2345 .. contents::
2347 But the "contents" directive can't do its work until the entire document
2348 has been parsed and possibly transformed to some extent. So the directive
2349 code leaves a placeholder behind that will trigger the second phase of its
2350 processing, something like this::
2352 <pending ...public attributes...> + internal attributes
2354 Use `document.note_pending()` so that the
2355 `docutils.transforms.Transformer` stage of processing can run all pending
2356 transforms.
2357 """
2359 def __init__(self, transform, details=None,
2360 rawsource='', *children, **attributes):
2361 Element.__init__(self, rawsource, *children, **attributes)
2363 self.transform = transform
2364 """The `docutils.transforms.Transform` class implementing the pending
2365 operation."""
2367 self.details = details or {}
2368 """Detail data (dictionary) required by the pending operation."""
2370 def pformat(self, indent=' ', level=0):
2371 internals = ['.. internal attributes:',
2372 ' .transform: %s.%s' % (self.transform.__module__,
2373 self.transform.__name__),
2374 ' .details:']
2375 details = sorted(self.details.items())
2376 for key, value in details:
2377 if isinstance(value, Node):
2378 internals.append('%7s%s:' % ('', key))
2379 internals.extend(['%9s%s' % ('', line)
2380 for line in value.pformat().splitlines()])
2381 elif (value
2382 and isinstance(value, list)
2383 and isinstance(value[0], Node)):
2384 internals.append('%7s%s:' % ('', key))
2385 for v in value:
2386 internals.extend(['%9s%s' % ('', line)
2387 for line in v.pformat().splitlines()])
2388 else:
2389 internals.append('%7s%s: %r' % ('', key, value))
2390 return (Element.pformat(self, indent, level)
2391 + ''.join((' %s%s\n' % (indent * level, line))
2392 for line in internals))
2394 def copy(self):
2395 obj = self.__class__(self.transform, self.details, self.rawsource,
2396 **self.attributes)
2397 obj._document = self._document
2398 obj.source = self.source
2399 obj.line = self.line
2400 return obj
2403class raw(Special, Inline, PreBibliographic,
2404 FixedTextElement, PureTextElement):
2405 """Raw data that is to be passed untouched to the Writer.
2407 Can be used as Body element or Inline element.
2408 """
2409 valid_attributes = Element.valid_attributes + ('format', 'xml:space')
2412# Inline Elements
2413# ===============
2415class abbreviation(Inline, TextElement): pass
2416class acronym(Inline, TextElement): pass
2417class emphasis(Inline, TextElement): pass
2418class generated(Inline, TextElement): pass
2419class inline(Inline, TextElement): pass
2420class literal(Inline, TextElement): pass
2421class strong(Inline, TextElement): pass
2422class subscript(Inline, TextElement): pass
2423class superscript(Inline, TextElement): pass
2424class title_reference(Inline, TextElement): pass
2427class reference(General, Inline, Referential, TextElement):
2428 valid_attributes = Element.valid_attributes + (
2429 'anonymous', 'name', 'refid', 'refname', 'refuri')
2432class footnote_reference(Inline, Referential, PureTextElement):
2433 valid_attributes = Element.valid_attributes + ('auto', 'refid', 'refname')
2436class citation_reference(Inline, Referential, PureTextElement):
2437 valid_attributes = Element.valid_attributes + ('refid', 'refname')
2440class substitution_reference(Inline, TextElement):
2441 valid_attributes = Element.valid_attributes + ('refname',)
2444class math(Inline, PureTextElement):
2445 """Mathematical notation in running text."""
2448class problematic(Inline, TextElement):
2449 valid_attributes = Element.valid_attributes + (
2450 'refid', 'refname', 'refuri')
2453# ========================================
2454# Auxiliary Classes, Functions, and Data
2455# ========================================
2457node_class_names = """
2458 Text
2459 abbreviation acronym address admonition attention attribution author
2460 authors
2461 block_quote bullet_list
2462 caption caution citation citation_reference classifier colspec comment
2463 compound contact container copyright
2464 danger date decoration definition definition_list definition_list_item
2465 description docinfo doctest_block document
2466 emphasis entry enumerated_list error
2467 field field_body field_list field_name figure footer
2468 footnote footnote_reference
2469 generated
2470 header hint
2471 image important inline
2472 label legend line line_block list_item literal literal_block
2473 math math_block meta
2474 note
2475 option option_argument option_group option_list option_list_item
2476 option_string organization
2477 paragraph pending problematic
2478 raw reference revision row rubric
2479 section sidebar status strong subscript substitution_definition
2480 substitution_reference subtitle superscript system_message
2481 table target tbody term tgroup thead tip title title_reference topic
2482 transition
2483 version
2484 warning""".split()
2485"""A list of names of all concrete Node subclasses."""
2488class NodeVisitor:
2489 """
2490 "Visitor" pattern [GoF95]_ abstract superclass implementation for
2491 document tree traversals.
2493 Each node class has corresponding methods, doing nothing by
2494 default; override individual methods for specific and useful
2495 behaviour. The `dispatch_visit()` method is called by
2496 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
2497 the `dispatch_departure()` method before exiting a node.
2499 The dispatch methods call "``visit_`` + node class name" or
2500 "``depart_`` + node class name", resp.
2502 This is a base class for visitors whose ``visit_...`` & ``depart_...``
2503 methods must be implemented for *all* compulsory node types encountered
2504 (such as for `docutils.writers.Writer` subclasses).
2505 Unimplemented methods will raise exceptions (except for optional nodes).
2507 For sparse traversals, where only certain node types are of interest, use
2508 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
2509 processing is desired, subclass `GenericNodeVisitor`.
2511 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
2512 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
2513 1995.
2514 """
2516 optional = ('meta',)
2517 """
2518 Tuple containing node class names (as strings).
2520 No exception will be raised if writers do not implement visit
2521 or departure functions for these node classes.
2523 Used to ensure transitional compatibility with existing 3rd-party writers.
2524 """
2526 def __init__(self, document):
2527 self.document = document
2529 def dispatch_visit(self, node):
2530 """
2531 Call self."``visit_`` + node class name" with `node` as
2532 parameter. If the ``visit_...`` method does not exist, call
2533 self.unknown_visit.
2534 """
2535 node_name = node.__class__.__name__
2536 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
2537 self.document.reporter.debug(
2538 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
2539 % (method.__name__, node_name))
2540 return method(node)
2542 def dispatch_departure(self, node):
2543 """
2544 Call self."``depart_`` + node class name" with `node` as
2545 parameter. If the ``depart_...`` method does not exist, call
2546 self.unknown_departure.
2547 """
2548 node_name = node.__class__.__name__
2549 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
2550 self.document.reporter.debug(
2551 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
2552 % (method.__name__, node_name))
2553 return method(node)
2555 def unknown_visit(self, node):
2556 """
2557 Called when entering unknown `Node` types.
2559 Raise an exception unless overridden.
2560 """
2561 if (self.document.settings.strict_visitor
2562 or node.__class__.__name__ not in self.optional):
2563 raise NotImplementedError(
2564 '%s visiting unknown node type: %s'
2565 % (self.__class__, node.__class__.__name__))
2567 def unknown_departure(self, node):
2568 """
2569 Called before exiting unknown `Node` types.
2571 Raise exception unless overridden.
2572 """
2573 if (self.document.settings.strict_visitor
2574 or node.__class__.__name__ not in self.optional):
2575 raise NotImplementedError(
2576 '%s departing unknown node type: %s'
2577 % (self.__class__, node.__class__.__name__))
2580class SparseNodeVisitor(NodeVisitor):
2581 """
2582 Base class for sparse traversals, where only certain node types are of
2583 interest. When ``visit_...`` & ``depart_...`` methods should be
2584 implemented for *all* node types (such as for `docutils.writers.Writer`
2585 subclasses), subclass `NodeVisitor` instead.
2586 """
2589class GenericNodeVisitor(NodeVisitor):
2590 """
2591 Generic "Visitor" abstract superclass, for simple traversals.
2593 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
2594 each ``depart_...`` method (when using `Node.walkabout()`) calls
2595 `default_departure()`. `default_visit()` (and `default_departure()`) must
2596 be overridden in subclasses.
2598 Define fully generic visitors by overriding `default_visit()` (and
2599 `default_departure()`) only. Define semi-generic visitors by overriding
2600 individual ``visit_...()`` (and ``depart_...()``) methods also.
2602 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
2603 be overridden for default behavior.
2604 """
2606 def default_visit(self, node):
2607 """Override for generic, uniform traversals."""
2608 raise NotImplementedError
2610 def default_departure(self, node):
2611 """Override for generic, uniform traversals."""
2612 raise NotImplementedError
2615def _call_default_visit(self, node):
2616 self.default_visit(node)
2619def _call_default_departure(self, node):
2620 self.default_departure(node)
2623def _nop(self, node):
2624 pass
2627def _add_node_class_names(names):
2628 """Save typing with dynamic assignments:"""
2629 for _name in names:
2630 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
2631 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
2632 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
2633 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
2636_add_node_class_names(node_class_names)
2639class TreeCopyVisitor(GenericNodeVisitor):
2640 """
2641 Make a complete copy of a tree or branch, including element attributes.
2642 """
2644 def __init__(self, document):
2645 GenericNodeVisitor.__init__(self, document)
2646 self.parent_stack = []
2647 self.parent = []
2649 def get_tree_copy(self):
2650 return self.parent[0]
2652 def default_visit(self, node):
2653 """Copy the current node, and make it the new acting parent."""
2654 newnode = node.copy()
2655 self.parent.append(newnode)
2656 self.parent_stack.append(self.parent)
2657 self.parent = newnode
2659 def default_departure(self, node):
2660 """Restore the previous acting parent."""
2661 self.parent = self.parent_stack.pop()
2664# Custom Exceptions
2665# =================
2667class ValidationError(ValueError):
2668 """Invalid Docutils Document Tree Element."""
2669 def __init__(self, msg, problematic_element=None):
2670 super().__init__(msg)
2671 self.problematic_element = problematic_element
2674class TreePruningException(Exception):
2675 """
2676 Base class for `NodeVisitor`-related tree pruning exceptions.
2678 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
2679 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
2680 the tree traversed.
2681 """
2684class SkipChildren(TreePruningException):
2685 """
2686 Do not visit any children of the current node. The current node's
2687 siblings and ``depart_...`` method are not affected.
2688 """
2691class SkipSiblings(TreePruningException):
2692 """
2693 Do not visit any more siblings (to the right) of the current node. The
2694 current node's children and its ``depart_...`` method are not affected.
2695 """
2698class SkipNode(TreePruningException):
2699 """
2700 Do not visit the current node's children, and do not call the current
2701 node's ``depart_...`` method.
2702 """
2705class SkipDeparture(TreePruningException):
2706 """
2707 Do not call the current node's ``depart_...`` method. The current node's
2708 children and siblings are not affected.
2709 """
2712class NodeFound(TreePruningException):
2713 """
2714 Raise to indicate that the target of a search has been found. This
2715 exception must be caught by the client; it is not caught by the traversal
2716 code.
2717 """
2720class StopTraversal(TreePruningException):
2721 """
2722 Stop the traversal altogether. The current node's ``depart_...`` method
2723 is not affected. The parent nodes ``depart_...`` methods are also called
2724 as usual. No other nodes are visited. This is an alternative to
2725 NodeFound that does not cause exception handling to trickle up to the
2726 caller.
2727 """
2730# definition moved here from `utils` to avoid circular import dependency
2731def unescape(text, restore_backslashes=False, respect_whitespace=False):
2732 """
2733 Return a string with nulls removed or restored to backslashes.
2734 Backslash-escaped spaces are also removed.
2735 """
2736 # `respect_whitespace` is ignored (since introduction 2016-12-16)
2737 if restore_backslashes:
2738 return text.replace('\x00', '\\')
2739 else:
2740 for sep in ['\x00 ', '\x00\n', '\x00']:
2741 text = ''.join(text.split(sep))
2742 return text
2745def make_id(string):
2746 """
2747 Convert `string` into an identifier and return it.
2749 Docutils identifiers will conform to the regular expression
2750 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
2751 and "id" attributes) should have no underscores, colons, or periods.
2752 Hyphens may be used.
2754 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
2756 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
2757 followed by any number of letters, digits ([0-9]), hyphens ("-"),
2758 underscores ("_"), colons (":"), and periods (".").
2760 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
2761 a tighter interpretation ("flex" tokenizer notation; "latin1" and
2762 "escape" 8-bit characters have been replaced with entities)::
2764 unicode \\[0-9a-f]{1,4}
2765 latin1 [¡-ÿ]
2766 escape {unicode}|\\[ -~¡-ÿ]
2767 nmchar [-a-z0-9]|{latin1}|{escape}
2768 name {nmchar}+
2770 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
2771 or periods ("."), therefore "class" and "id" attributes should not contain
2772 these characters. They should be replaced with hyphens ("-"). Combined
2773 with HTML's requirements (the first character must be a letter; no
2774 "unicode", "latin1", or "escape" characters), this results in the
2775 ``[a-z](-?[a-z0-9]+)*`` pattern.
2777 .. _HTML 4.01 spec: https://www.w3.org/TR/html401
2778 .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1
2779 """
2780 id = string.lower()
2781 id = id.translate(_non_id_translate_digraphs)
2782 id = id.translate(_non_id_translate)
2783 # get rid of non-ascii characters.
2784 # 'ascii' lowercase to prevent problems with turkish locale.
2785 id = unicodedata.normalize(
2786 'NFKD', id).encode('ascii', 'ignore').decode('ascii')
2787 # shrink runs of whitespace and replace by hyphen
2788 id = _non_id_chars.sub('-', ' '.join(id.split()))
2789 id = _non_id_at_ends.sub('', id)
2790 return str(id)
2793_non_id_chars = re.compile('[^a-z0-9]+')
2794_non_id_at_ends = re.compile('^[-0-9]+|-+$')
2795_non_id_translate = {
2796 0x00f8: 'o', # o with stroke
2797 0x0111: 'd', # d with stroke
2798 0x0127: 'h', # h with stroke
2799 0x0131: 'i', # dotless i
2800 0x0142: 'l', # l with stroke
2801 0x0167: 't', # t with stroke
2802 0x0180: 'b', # b with stroke
2803 0x0183: 'b', # b with topbar
2804 0x0188: 'c', # c with hook
2805 0x018c: 'd', # d with topbar
2806 0x0192: 'f', # f with hook
2807 0x0199: 'k', # k with hook
2808 0x019a: 'l', # l with bar
2809 0x019e: 'n', # n with long right leg
2810 0x01a5: 'p', # p with hook
2811 0x01ab: 't', # t with palatal hook
2812 0x01ad: 't', # t with hook
2813 0x01b4: 'y', # y with hook
2814 0x01b6: 'z', # z with stroke
2815 0x01e5: 'g', # g with stroke
2816 0x0225: 'z', # z with hook
2817 0x0234: 'l', # l with curl
2818 0x0235: 'n', # n with curl
2819 0x0236: 't', # t with curl
2820 0x0237: 'j', # dotless j
2821 0x023c: 'c', # c with stroke
2822 0x023f: 's', # s with swash tail
2823 0x0240: 'z', # z with swash tail
2824 0x0247: 'e', # e with stroke
2825 0x0249: 'j', # j with stroke
2826 0x024b: 'q', # q with hook tail
2827 0x024d: 'r', # r with stroke
2828 0x024f: 'y', # y with stroke
2829}
2830_non_id_translate_digraphs = {
2831 0x00df: 'sz', # ligature sz
2832 0x00e6: 'ae', # ae
2833 0x0153: 'oe', # ligature oe
2834 0x0238: 'db', # db digraph
2835 0x0239: 'qp', # qp digraph
2836}
2839def dupname(node, name):
2840 node['dupnames'].append(name)
2841 node['names'].remove(name)
2842 # Assume that `node` is referenced, even though it isn't;
2843 # we don't want to throw unnecessary system_messages.
2844 node.referenced = True
2847def fully_normalize_name(name):
2848 """Return a case- and whitespace-normalized name."""
2849 return ' '.join(name.lower().split())
2852def whitespace_normalize_name(name):
2853 """Return a whitespace-normalized name."""
2854 return ' '.join(name.split())
2857def serial_escape(value):
2858 """Escape string values that are elements of a list, for serialization."""
2859 return value.replace('\\', r'\\').replace(' ', r'\ ')
2862def split_name_list(s):
2863 r"""Split a string at non-escaped whitespace.
2865 Backslashes escape internal whitespace (cf. `serial_escape()`).
2866 Return list of "names" (after removing escaping backslashes).
2868 >>> split_name_list(r'a\ n\ame two\\ n\\ames'),
2869 ['a name', 'two\\', r'n\ames']
2871 Provisional.
2872 """
2873 s = s.replace('\\', '\x00') # escape with NULL char
2874 s = s.replace('\x00\x00', '\\') # unescape backslashes
2875 s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL
2876 names = s.split(' ')
2877 # restore internal spaces, drop other escaping characters
2878 return [name.replace('\x00\x00', ' ').replace('\x00', '')
2879 for name in names]
2882def pseudo_quoteattr(value):
2883 """Quote attributes for pseudo-xml"""
2884 return '"%s"' % value
2887# Methods to validate `Element attribute`__ values.
2889# Ensure the expected Python `data type`__, normalize, and check for
2890# restrictions.
2891#
2892# The methods can be used to convert `str` values (eg. from an XML
2893# representation) or to validate an existing document tree or node.
2894#
2895# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,
2896# and the `attribute_validating_functions` mapping below.
2897#
2898# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
2899# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
2901def validate_enumerated_type(*keywords):
2902 """
2903 Return a function that validates a `str` against given `keywords`.
2905 Provisional.
2906 """
2907 def validate_keywords(value):
2908 if value not in keywords:
2909 allowed = '", \"'.join(keywords)
2910 raise ValueError(f'"{value}" is not one of "{allowed}".')
2911 return value
2912 return validate_keywords
2915def validate_identifier(value):
2916 """
2917 Validate identifier key or class name.
2919 Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.
2921 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
2923 Provisional.
2924 """
2925 if value != make_id(value):
2926 raise ValueError(f'"{value}" is no valid id or class name.')
2927 return value
2930def validate_identifier_list(value):
2931 """
2932 A (space-separated) list of ids or class names.
2934 `value` may be a `list` or a `str` with space separated
2935 ids or class names (cf. `validate_identifier()`).
2937 Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.
2939 __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type
2940 __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type
2941 __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type
2943 Provisional.
2944 """
2945 if isinstance(value, str):
2946 value = value.split()
2947 for token in value:
2948 validate_identifier(token)
2949 return value
2952def validate_measure(value):
2953 """
2954 Validate a length measure__ (number + recognized unit).
2956 __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
2958 Provisional.
2959 """
2960 units = 'em|ex|px|in|cm|mm|pt|pc|%'
2961 if not re.fullmatch(f'[-0-9.]+ *({units}?)', value):
2962 raise ValueError(f'"{value}" is no valid measure. '
2963 f'Valid units: {units.replace("|", " ")}.')
2964 return value.replace(' ', '').strip()
2967def validate_NMTOKEN(value):
2968 """
2969 Validate a "name token": a `str` of letters, digits, and [-._].
2971 Provisional.
2972 """
2973 if not re.fullmatch('[-._A-Za-z0-9]+', value):
2974 raise ValueError(f'"{value}" is no NMTOKEN.')
2975 return value
2978def validate_NMTOKENS(value):
2979 """
2980 Validate a list of "name tokens".
2982 Provisional.
2983 """
2984 if isinstance(value, str):
2985 value = value.split()
2986 for token in value:
2987 validate_NMTOKEN(token)
2988 return value
2991def validate_refname_list(value):
2992 """
2993 Validate a list of `reference names`__.
2995 Reference names may contain all characters;
2996 whitespace is normalized (cf, `whitespace_normalize_name()`).
2998 `value` may be either a `list` of names or a `str` with
2999 space separated names (with internal spaces backslash escaped
3000 and literal backslashes doubled cf. `serial_escape()`).
3002 Return a list of whitespace-normalized, unescaped reference names.
3004 Provisional.
3006 __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name
3007 """
3008 if isinstance(value, str):
3009 value = split_name_list(value)
3010 return [whitespace_normalize_name(name) for name in value]
3013def validate_yesorno(value):
3014 if value == "0":
3015 return False
3016 return bool(value)
3019ATTRIBUTE_VALIDATORS = {
3020 'alt': str, # CDATA
3021 'align': str,
3022 'anonymous': validate_yesorno,
3023 'auto': str, # CDATA (only '1' or '*' are used in rST)
3024 'backrefs': validate_identifier_list,
3025 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)
3026 'classes': validate_identifier_list,
3027 'char': str, # from Exchange Table Model (CALS), currently ignored
3028 'charoff': validate_NMTOKEN, # from CALS, currently ignored
3029 'colname': validate_NMTOKEN, # from CALS, currently ignored
3030 'colnum': int, # from CALS, currently ignored
3031 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
3032 'colsep': validate_yesorno,
3033 'colwidth': int, # sic! CALS: CDATA (measure or number+'*')
3034 'content': str, # <meta>
3035 'delimiter': str,
3036 'dir': validate_enumerated_type('ltr', 'rtl', 'auto'), # <meta>
3037 'dupnames': validate_refname_list,
3038 'enumtype': validate_enumerated_type('arabic', 'loweralpha', 'lowerroman',
3039 'upperalpha', 'upperroman'),
3040 'format': str, # CDATA (space separated format names)
3041 'frame': validate_enumerated_type('top', 'bottom', 'topbot', 'all',
3042 'sides', 'none'), # from CALS, ignored
3043 'height': validate_measure,
3044 'http-equiv': str, # <meta>
3045 'ids': validate_identifier_list,
3046 'lang': str, # <meta>
3047 'level': int,
3048 'line': int,
3049 'ltrim': validate_yesorno,
3050 'loading': validate_enumerated_type('embed', 'link', 'lazy'),
3051 'media': str, # <meta>
3052 'morecols': int,
3053 'morerows': int,
3054 'name': whitespace_normalize_name, # in <reference> (deprecated)
3055 # 'name': node_attributes.validate_NMTOKEN, # in <meta>
3056 'names': validate_refname_list,
3057 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored
3058 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored
3059 'pgwide': validate_yesorno, # from CALS, currently ignored
3060 'prefix': str,
3061 'refid': validate_identifier,
3062 'refname': whitespace_normalize_name,
3063 'refuri': str,
3064 'rowsep': validate_yesorno,
3065 'rtrim': validate_yesorno,
3066 'scale': int,
3067 'scheme': str,
3068 'source': str,
3069 'start': int,
3070 'stub': validate_yesorno,
3071 'suffix': str,
3072 'title': str,
3073 'type': validate_NMTOKEN,
3074 'uri': str,
3075 'valign': validate_enumerated_type('top', 'middle', 'bottom'), # from CALS
3076 'width': validate_measure,
3077 'xml:space': validate_enumerated_type('default', 'preserve'),
3078 }
3079"""
3080Mapping of `attribute names`__ to validating functions.
3082Provisional.
3084__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
3085"""